UMEP-dev · sunt05 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/scripts/profile_multi_grid.py b/scripts/profile_multi_grid.py
@@ -0,0 +1,106 @@
+"""Profile SUEWS multi-grid execution to identify bottlenecks.
+
+Usage:
+    python scripts/profile_multi_grid.py [--grids N] [--profile]
+
+Measures wall-clock time breakdown for N identical grid cells.
+With --profile, runs cProfile for detailed function-level analysis.
+"""
+
+import argparse
+import cProfile
+import io
+import pstats
+from time import perf_counter
+
+import pandas as pd
+
+import supy as sp
+
+
+def create_multi_grid_state(n_grids: int):
+    """Create N-grid initial state from sample data."""
+    df_state_init, df_forcing = sp.load_SampleData()
+
+    # Duplicate state for N grids with unique grid IDs
+    df_state_multi = pd.concat([df_state_init] * n_grids)
+    df_state_multi.index = pd.RangeIndex(n_grids, name="grid")
+
+    return df_state_multi, df_forcing
+
+
+def run_benchmark(n_grids: int, serial: bool = True):
+    """Run N-grid benchmark and report timing breakdown."""
+    mode = "serial" if serial else "parallel"
+    print(f"Setting up {n_grids} grids ({mode})...")
+    t0 = perf_counter()
+    df_state_multi, df_forcing = create_multi_grid_state(n_grids)
+    t_setup = perf_counter() - t0
+    print(f"  Setup: {t_setup:.2f}s")
+
+    # Use first 2 days of forcing to keep runs short
+    df_forcing_short = df_forcing.iloc[:576]  # 2 days at 5-min resolution
+    n_steps = len(df_forcing_short)
+    print(f"  Forcing: {n_steps} timesteps ({n_steps * 5 / 60:.1f} hours)")
+
+    print(f"Running {n_grids} grids x {n_steps} timesteps ({mode})...")
+    t0 = perf_counter()
+    df_output, df_state = sp.run_supy(
+        df_forcing_short,
+        df_state_multi,
+        serial_mode=serial,
+    )
+    t_run = perf_counter() - t0
+
+    total_steps = n_grids * n_steps
+    print(f"  Total time: {t_run:.2f}s")
+    print(f"  Per grid: {t_run / n_grids:.4f}s")
+    print(f"  Per timestep: {t_run / total_steps * 1000:.3f}ms")
+    print(f"  Throughput: {total_steps / t_run:.0f} grid-timesteps/s")
+
+    return t_run
+
+
+def run_with_profile(n_grids: int):
+    """Run with cProfile to get function-level breakdown."""
+    df_state_multi, df_forcing = create_multi_grid_state(n_grids)
+    df_forcing_short = df_forcing.iloc[:576]
+
+    pr = cProfile.Profile()
+    pr.enable()
+    df_output, df_state = sp.run_supy(
+        df_forcing_short,
+        df_state_multi,
+        serial_mode=True,
+    )
+    pr.disable()
+
+    # Print top 30 functions by cumulative time
+    s = io.StringIO()
+    ps = pstats.Stats(pr, stream=s).sort_stats("cumulative")
+    ps.print_stats(30)
+    print(s.getvalue())
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Profile SUEWS multi-grid execution")
+    parser.add_argument("--grids", type=int, default=4, help="Number of grid cells")
+    parser.add_argument("--profile", action="store_true", help="Run cProfile")
+    args = parser.parse_args()
+
+    if args.profile:
+        run_with_profile(args.grids)
+    else:
+        n = args.grids
+        print("=== Serial ===")
+        t_serial = run_benchmark(n, serial=True)
+        print()
+        if n > 1:
+            print("=== Parallel ===")
+            t_parallel = run_benchmark(n, serial=False)
+            print(f"\n  Speedup: {t_serial / t_parallel:.2f}x")
+            print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/suews/Makefile.gfortran b/src/suews/Makefile.gfortran
@@ -10,7 +10,9 @@ CPPFLAGS = -cpp
 # Basic flags such as where to write module files, and an instruction
 #  to read Fortran unformatted data files as big endian
 # Also allow unlimited line length to avoid truncation errors
-BASICFLAGS = -J./mod -fconvert=big-endian -ffree-line-length-none
+# -frecursive: each subroutine call gets its own stack frame, enabling
+# thread-safe execution for Rayon-based multi-grid parallelism.
+BASICFLAGS = -J./mod -fconvert=big-endian -ffree-line-length-none -frecursive
 
 # OpenMP flag
 OMPFLAG = -fopenmp

diff --git a/src/suews/src/suews_ctrl_driver.f95 b/src/suews/src/suews_ctrl_driver.f95
@@ -1317,7 +1317,8 @@ SUBROUTINE SUEWS_cal_BiogenCO2( &
                         LAIMax, LAI_id, gsModel, Kmax, &
                         G_max, G_k, G_q_base, G_q_shape, G_t, G_sm, TH, TL, S1, S2, &
                         unused_gc1, unused_gc2, unused_gc3, unused_gc4, unused_gc5, & ! output: (unused conductances)
-                        gfunc_use, unused_gs, unused_rs) ! output:
+                        gfunc_use, unused_gs, unused_rs, & ! output:
+                        modState)
                   END IF
 
                   IF (gsmodel == 3 .OR. gsmodel == 4) THEN ! With modelled 2 meter temperature
@@ -1342,7 +1343,8 @@ SUBROUTINE SUEWS_cal_BiogenCO2( &
                         LAIMax, LAI_id, gsModel, Kmax, &
                         G_max, G_k, G_q_base, G_q_shape, G_t, G_sm, TH, TL, S1, S2, &
                         unused_gc1, unused_gc2, unused_gc3, unused_gc4, unused_gc5, & ! output: (unused conductances)
-                        gfunc2, unused_gs, unused_rs) ! output:
+                        gfunc2, unused_gs, unused_rs, & ! output:
+                        modState)
                   ELSEIF ((gsmodel == 1 .OR. gsmodel == 2) .AND. RSLLevel > 0) THEN
                      ! Use local temperature for gsmodel 1/2 with RSL diagnostics
                      t2 = Tair_local
@@ -3390,7 +3392,8 @@ SUBROUTINE SUEWS_cal_Resistance( &
                   AerodynamicResistanceMethod, &
                   StabilityMethod, &
                   RoughLenHeatMethod, &
-                  RA, z0v) ! output:
+                  RA, z0v, & ! output:
+                  modState)
 
                IF (SnowUse == 1) THEN
                   IF (Diagnose == 1) WRITE (*, *) 'Calling AerodynamicResistance for snow...'
@@ -3404,7 +3407,8 @@ SUBROUTINE SUEWS_cal_Resistance( &
                      AerodynamicResistanceMethod, &
                      StabilityMethod, &
                      3, &
-                     RASnow, z0vSnow) ! output:
+                     RASnow, z0vSnow, & ! output:
+                     modState)
                END IF
 
                IF (Diagnose == 1) WRITE (*, *) 'Calling SurfaceResistance...'
@@ -3416,7 +3420,8 @@ SUBROUTINE SUEWS_cal_Resistance( &
                   LAIMax, LAI_id, gsModel, Kmax, &
                   G_max, G_k, G_q_base, G_q_shape, G_t, G_sm, TH, TL, S1, S2, &
                   g_kdown, g_dq, g_ta, g_smd, g_lai, & ! output:
-                  gfunc, gsc, RS) ! output:
+                  gfunc, gsc, RS, & ! output:
+                  modState)
 
                IF (Diagnose == 1) WRITE (*, *) 'Calling BoundaryLayerResistance...'
                CALL BoundaryLayerResistance( &

diff --git a/src/suews/src/suews_ctrl_error.f95 b/src/suews/src/suews_ctrl_error.f95
@@ -11,29 +11,29 @@
 !   103: RSL - Interpolation bounds error in interp_z
 !   104: Build/ABI mismatch - output array size disagreement across compilation units
 !
-! Note: Error state uses SAVE variables, so is NOT thread-safe.
-!       Do not call SUEWS from multiple threads simultaneously.
+! Thread Safety:
+!   Fatal errors use module-level SAVE variables (supy_error_flag/code/message).
+!   These are acceptable because a fatal error terminates the simulation.
+!   Non-fatal warnings are routed through modState%errorstate (thread-safe).
+!   For multi-grid parallelism, use process-based isolation or ensure each
+!   thread has its own Fortran address space.
 !==================================================================================================
 MODULE module_ctrl_error_state
    IMPLICIT NONE
 
-   ! Error state variables exposed to Python via f90wrap
+   ! Error state variables for fatal errors — exposed to Python via Rust bridge.
+   ! These use SAVE because fatal errors terminate the run; concurrent writes
+   ! are not a concern in practice (only one fatal error matters).
    LOGICAL, SAVE :: supy_error_flag = .FALSE.
    INTEGER, SAVE :: supy_error_code = 0
    CHARACTER(LEN=512), SAVE :: supy_error_message = ''
 
-   ! Warning state variables for non-fatal issues (module-level fallback)
-   INTEGER, SAVE :: supy_warning_count = 0
-   CHARACTER(LEN=512), SAVE :: supy_last_warning_message = ''
-
 CONTAINS
 
    SUBROUTINE reset_supy_error()
       supy_error_flag = .FALSE.
       supy_error_code = 0
       supy_error_message = ''
-      supy_warning_count = 0
-      supy_last_warning_message = ''
    END SUBROUTINE reset_supy_error
 
    SUBROUTINE set_supy_error(code, message)
@@ -48,13 +48,12 @@ SUBROUTINE set_supy_error(code, message)
    END SUBROUTINE set_supy_error
 
    SUBROUTINE add_supy_warning(message)
-      !> Add a warning to the warning state (non-fatal, module-level fallback)
+      !> No-op stub: warnings should use modState%errorstate%report() instead.
+      !> Retained for backward compatibility with call sites that do not yet
+      !> have modState in scope. These warnings are silently dropped.
+      !> TODO: Thread modState through remaining callers and remove this stub.
       CHARACTER(LEN=*), INTENT(IN) :: message
-      INTEGER :: msg_len
-
-      supy_warning_count = supy_warning_count + 1
-      msg_len = MIN(LEN_TRIM(message), 512)
-      supy_last_warning_message = message(1:msg_len)
+      ! Intentionally empty — no module-level SAVE state for thread safety.
    END SUBROUTINE add_supy_warning
 
 END MODULE module_ctrl_error_state
@@ -71,8 +70,9 @@ SUBROUTINE ErrorHint(errh, ProblemFile, VALUE, value2, valueI, modState)
    !value       -- Error value (real number with correct type)
    !value2      -- Second error value (real number with correct type)
    !valueI      -- Error value (integer)
-   !modState    -- Optional SUEWS_STATE for state-based warning logging
+   !modState    -- Optional SUEWS_STATE for thread-safe warning logging
    ! Last modified -----------------------------------------------------
+   ! TS  03 Apr 2026: Remove module-level warning fallback for thread safety
    ! TS  17 Jan 2026: Add optional modState for state-based warning logging
    ! TS  17 Dec 2025: Remove legacy problems.txt/warnings.txt output (Python handles logging)
    ! MH  12 Apr 2017: Error code for stability added
@@ -84,14 +84,13 @@ SUBROUTINE ErrorHint(errh, ProblemFile, VALUE, value2, valueI, modState)
    ! LJ  08 Feb 2013
    !--------------------------------------------------------------------
    !
-   ! Thread Safety (GH#1042):
-   !   When modState is provided, warnings are logged to state%errorstate (thread-safe).
-   !   Otherwise, falls back to module-level warning state (NOT thread-safe).
-   !   Do not call the SUEWS kernel concurrently from multiple threads without state.
-   !   Use process-based parallelism or serialize calls with a lock in the caller.
+   ! Thread Safety:
+   !   Warnings are logged to modState%errorstate when provided (thread-safe).
+   !   If modState is absent, warnings are silently dropped (no module-level state).
+   !   Fatal errors still use module-level set_supy_error (acceptable: run terminates).
 
    USE module_ctrl_const_datain
-   USE module_ctrl_error_state, ONLY: set_supy_error, add_supy_warning
+   USE module_ctrl_error_state, ONLY: set_supy_error
    USE module_ctrl_type, ONLY: SUEWS_STATE
    ! USE module_ctrl_const_wherewhen
 
@@ -435,15 +434,13 @@ SUBROUTINE ErrorHint(errh, ProblemFile, VALUE, value2, valueI, modState)
          CALL wrf_debug(100, message)
          CALL wrf_debug(100, Errmessage)
 #else
-         ! SuPy: use state-based logging if available (thread-safe, full history)
+         ! SuPy: use state-based logging when available (thread-safe)
+         ! If modState is absent (e.g. dead code paths), warning is silently dropped.
          IF (PRESENT(modState)) THEN
             CALL modState%errorstate%report( &
                message=TRIM(text1)//': '//TRIM(ProblemFile), &
                location='ErrorHint', &
                is_fatal=.FALSE.)
-         ELSE
-            ! Fallback to module-level warning state (not thread-safe)
-            CALL add_supy_warning(TRIM(text1)//': '//TRIM(ProblemFile))
          END IF
 #endif
       END IF

diff --git a/src/suews/src/suews_ctrl_type.f95 b/src/suews/src/suews_ctrl_type.f95
@@ -352,13 +352,17 @@ FUNCTION has_error_state(self) RESULT(has_err)
    END FUNCTION has_error_state
 
    SUBROUTINE report_error_impl(self, message, location, is_fatal, timer)
-      !> Report an error/warning to the error log
+      !> Report an error/warning to the error log.
+      !> Non-fatal warnings are capped at MAX_WARNING_LOG entries to prevent
+      !> unbounded memory growth in long simulations.  Fatal entries are
+      !> always stored regardless of the cap.
       CLASS(error_state), INTENT(INOUT) :: self
       CHARACTER(LEN=*), INTENT(IN) :: message
       CHARACTER(LEN=*), INTENT(IN) :: location
       LOGICAL, INTENT(IN), OPTIONAL :: is_fatal
       TYPE(SUEWS_TIMER), INTENT(IN), OPTIONAL :: timer
 
+      INTEGER, PARAMETER :: MAX_WARNING_LOG = 512
       TYPE(error_entry), ALLOCATABLE :: temp(:)
       TYPE(SUEWS_TIMER) :: timer_use
       INTEGER :: new_size
@@ -367,6 +371,15 @@ SUBROUTINE report_error_impl(self, message, location, is_fatal, timer)
       fatal = .FALSE.
       IF (PRESENT(is_fatal)) fatal = is_fatal
 
+      IF (fatal) THEN
+         self%has_fatal = .TRUE.
+         self%flag = .TRUE.
+         self%message = message
+      END IF
+
+      ! Cap non-fatal entries to avoid unbounded allocation in year-long runs
+      IF (.NOT. fatal .AND. self%count >= MAX_WARNING_LOG) RETURN
+
       ! Use provided timer or default to zeros
       IF (PRESENT(timer)) THEN
          timer_use = timer
@@ -393,12 +406,6 @@ SUBROUTINE report_error_impl(self, message, location, is_fatal, timer)
       self%log(self%count)%message = message
       self%log(self%count)%location = location
       self%log(self%count)%is_fatal = fatal
-
-      IF (fatal) THEN
-         self%has_fatal = .TRUE.
-         self%flag = .TRUE.
-         self%message = message
-      END IF
    END SUBROUTINE report_error_impl
 
    SUBROUTINE clear_error_log(self)

diff --git a/src/suews/src/suews_phys_lumps.f95 b/src/suews/src/suews_phys_lumps.f95
@@ -174,7 +174,7 @@ SUBROUTINE LUMPS_cal_QHQE( &
 
                ! Calculate slope of the saturation vapour pressure vs air temp.
                s_hPa = slope_svp(Temp_C)
-               psyc_hPa = psyc_const(avcp, Press_hPa, lv_J_kg)
+               psyc_hPa = psyc_const(avcp, Press_hPa, lv_J_kg, modState)
                psyc_s = psyc_hPa/s_hPa
 
                !Calculate also sublimation ones if snow calculations are made.