Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions scripts/profile_multi_grid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Profile SUEWS multi-grid execution to identify bottlenecks.

Usage:
python scripts/profile_multi_grid.py [--grids N] [--profile]

Measures wall-clock time breakdown for N identical grid cells.
With --profile, runs cProfile for detailed function-level analysis.
"""

import argparse
import cProfile
import io
import pstats
from time import perf_counter

import pandas as pd

import supy as sp


def create_multi_grid_state(n_grids: int):
"""Create N-grid initial state from sample data."""
df_state_init, df_forcing = sp.load_SampleData()

# Duplicate state for N grids with unique grid IDs
df_state_multi = pd.concat([df_state_init] * n_grids)
df_state_multi.index = pd.RangeIndex(n_grids, name="grid")

return df_state_multi, df_forcing


def run_benchmark(n_grids: int, serial: bool = True):
"""Run N-grid benchmark and report timing breakdown."""
mode = "serial" if serial else "parallel"
print(f"Setting up {n_grids} grids ({mode})...")
t0 = perf_counter()
df_state_multi, df_forcing = create_multi_grid_state(n_grids)
t_setup = perf_counter() - t0
print(f" Setup: {t_setup:.2f}s")

# Use first 2 days of forcing to keep runs short
df_forcing_short = df_forcing.iloc[:576] # 2 days at 5-min resolution
n_steps = len(df_forcing_short)
print(f" Forcing: {n_steps} timesteps ({n_steps * 5 / 60:.1f} hours)")

print(f"Running {n_grids} grids x {n_steps} timesteps ({mode})...")
t0 = perf_counter()
df_output, df_state = sp.run_supy(
df_forcing_short,
df_state_multi,
serial_mode=serial,
)
t_run = perf_counter() - t0

total_steps = n_grids * n_steps
print(f" Total time: {t_run:.2f}s")
print(f" Per grid: {t_run / n_grids:.4f}s")
print(f" Per timestep: {t_run / total_steps * 1000:.3f}ms")
print(f" Throughput: {total_steps / t_run:.0f} grid-timesteps/s")

return t_run


def run_with_profile(n_grids: int):
"""Run with cProfile to get function-level breakdown."""
df_state_multi, df_forcing = create_multi_grid_state(n_grids)
df_forcing_short = df_forcing.iloc[:576]

pr = cProfile.Profile()
pr.enable()
df_output, df_state = sp.run_supy(
df_forcing_short,
df_state_multi,
serial_mode=True,
)
pr.disable()

# Print top 30 functions by cumulative time
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats("cumulative")
ps.print_stats(30)
print(s.getvalue())


def main():
parser = argparse.ArgumentParser(description="Profile SUEWS multi-grid execution")
parser.add_argument("--grids", type=int, default=4, help="Number of grid cells")
parser.add_argument("--profile", action="store_true", help="Run cProfile")
args = parser.parse_args()

if args.profile:
run_with_profile(args.grids)
else:
n = args.grids
print("=== Serial ===")
t_serial = run_benchmark(n, serial=True)
print()
if n > 1:
print("=== Parallel ===")
t_parallel = run_benchmark(n, serial=False)
print(f"\n Speedup: {t_serial / t_parallel:.2f}x")
print()


if __name__ == "__main__":
main()
4 changes: 3 additions & 1 deletion src/suews/Makefile.gfortran
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ CPPFLAGS = -cpp
# Basic flags such as where to write module files, and an instruction
# to read Fortran unformatted data files as big endian
# Also allow unlimited line length to avoid truncation errors
BASICFLAGS = -J./mod -fconvert=big-endian -ffree-line-length-none
# -frecursive: each subroutine call gets its own stack frame, enabling
# thread-safe execution for Rayon-based multi-grid parallelism.
BASICFLAGS = -J./mod -fconvert=big-endian -ffree-line-length-none -frecursive

# OpenMP flag
OMPFLAG = -fopenmp
Expand Down
15 changes: 10 additions & 5 deletions src/suews/src/suews_ctrl_driver.f95
Original file line number Diff line number Diff line change
Expand Up @@ -1317,7 +1317,8 @@ SUBROUTINE SUEWS_cal_BiogenCO2( &
LAIMax, LAI_id, gsModel, Kmax, &
G_max, G_k, G_q_base, G_q_shape, G_t, G_sm, TH, TL, S1, S2, &
unused_gc1, unused_gc2, unused_gc3, unused_gc4, unused_gc5, & ! output: (unused conductances)
gfunc_use, unused_gs, unused_rs) ! output:
gfunc_use, unused_gs, unused_rs, & ! output:
modState)
END IF

IF (gsmodel == 3 .OR. gsmodel == 4) THEN ! With modelled 2 meter temperature
Expand All @@ -1342,7 +1343,8 @@ SUBROUTINE SUEWS_cal_BiogenCO2( &
LAIMax, LAI_id, gsModel, Kmax, &
G_max, G_k, G_q_base, G_q_shape, G_t, G_sm, TH, TL, S1, S2, &
unused_gc1, unused_gc2, unused_gc3, unused_gc4, unused_gc5, & ! output: (unused conductances)
gfunc2, unused_gs, unused_rs) ! output:
gfunc2, unused_gs, unused_rs, & ! output:
modState)
ELSEIF ((gsmodel == 1 .OR. gsmodel == 2) .AND. RSLLevel > 0) THEN
! Use local temperature for gsmodel 1/2 with RSL diagnostics
t2 = Tair_local
Expand Down Expand Up @@ -3390,7 +3392,8 @@ SUBROUTINE SUEWS_cal_Resistance( &
AerodynamicResistanceMethod, &
StabilityMethod, &
RoughLenHeatMethod, &
RA, z0v) ! output:
RA, z0v, & ! output:
modState)

IF (SnowUse == 1) THEN
IF (Diagnose == 1) WRITE (*, *) 'Calling AerodynamicResistance for snow...'
Expand All @@ -3404,7 +3407,8 @@ SUBROUTINE SUEWS_cal_Resistance( &
AerodynamicResistanceMethod, &
StabilityMethod, &
3, &
RASnow, z0vSnow) ! output:
RASnow, z0vSnow, & ! output:
modState)
END IF

IF (Diagnose == 1) WRITE (*, *) 'Calling SurfaceResistance...'
Expand All @@ -3416,7 +3420,8 @@ SUBROUTINE SUEWS_cal_Resistance( &
LAIMax, LAI_id, gsModel, Kmax, &
G_max, G_k, G_q_base, G_q_shape, G_t, G_sm, TH, TL, S1, S2, &
g_kdown, g_dq, g_ta, g_smd, g_lai, & ! output:
gfunc, gsc, RS) ! output:
gfunc, gsc, RS, & ! output:
modState)

IF (Diagnose == 1) WRITE (*, *) 'Calling BoundaryLayerResistance...'
CALL BoundaryLayerResistance( &
Expand Down
49 changes: 23 additions & 26 deletions src/suews/src/suews_ctrl_error.f95
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,29 @@
! 103: RSL - Interpolation bounds error in interp_z
! 104: Build/ABI mismatch - output array size disagreement across compilation units
!
! Note: Error state uses SAVE variables, so is NOT thread-safe.
! Do not call SUEWS from multiple threads simultaneously.
! Thread Safety:
! Fatal errors use module-level SAVE variables (supy_error_flag/code/message).
! These are acceptable because a fatal error terminates the simulation.
! Non-fatal warnings are routed through modState%errorstate (thread-safe).
! For multi-grid parallelism, use process-based isolation or ensure each
! thread has its own Fortran address space.
!==================================================================================================
MODULE module_ctrl_error_state
IMPLICIT NONE

! Error state variables exposed to Python via f90wrap
! Error state variables for fatal errors — exposed to Python via Rust bridge.
! These use SAVE because fatal errors terminate the run; concurrent writes
! are not a concern in practice (only one fatal error matters).
LOGICAL, SAVE :: supy_error_flag = .FALSE.
INTEGER, SAVE :: supy_error_code = 0
CHARACTER(LEN=512), SAVE :: supy_error_message = ''

! Warning state variables for non-fatal issues (module-level fallback)
INTEGER, SAVE :: supy_warning_count = 0
CHARACTER(LEN=512), SAVE :: supy_last_warning_message = ''

CONTAINS

SUBROUTINE reset_supy_error()
supy_error_flag = .FALSE.
supy_error_code = 0
supy_error_message = ''
supy_warning_count = 0
supy_last_warning_message = ''
END SUBROUTINE reset_supy_error

SUBROUTINE set_supy_error(code, message)
Expand All @@ -48,13 +48,12 @@ SUBROUTINE set_supy_error(code, message)
END SUBROUTINE set_supy_error

SUBROUTINE add_supy_warning(message)
!> Add a warning to the warning state (non-fatal, module-level fallback)
!> No-op stub: warnings should use modState%errorstate%report() instead.
!> Retained for backward compatibility with call sites that do not yet
!> have modState in scope. These warnings are silently dropped.
!> TODO: Thread modState through remaining callers and remove this stub.
CHARACTER(LEN=*), INTENT(IN) :: message
INTEGER :: msg_len

supy_warning_count = supy_warning_count + 1
msg_len = MIN(LEN_TRIM(message), 512)
supy_last_warning_message = message(1:msg_len)
! Intentionally empty — no module-level SAVE state for thread safety.
END SUBROUTINE add_supy_warning

END MODULE module_ctrl_error_state
Expand All @@ -71,8 +70,9 @@ SUBROUTINE ErrorHint(errh, ProblemFile, VALUE, value2, valueI, modState)
!value -- Error value (real number with correct type)
!value2 -- Second error value (real number with correct type)
!valueI -- Error value (integer)
!modState -- Optional SUEWS_STATE for state-based warning logging
!modState -- Optional SUEWS_STATE for thread-safe warning logging
! Last modified -----------------------------------------------------
! TS 03 Apr 2026: Remove module-level warning fallback for thread safety
! TS 17 Jan 2026: Add optional modState for state-based warning logging
! TS 17 Dec 2025: Remove legacy problems.txt/warnings.txt output (Python handles logging)
! MH 12 Apr 2017: Error code for stability added
Expand All @@ -84,14 +84,13 @@ SUBROUTINE ErrorHint(errh, ProblemFile, VALUE, value2, valueI, modState)
! LJ 08 Feb 2013
!--------------------------------------------------------------------
!
! Thread Safety (GH#1042):
! When modState is provided, warnings are logged to state%errorstate (thread-safe).
! Otherwise, falls back to module-level warning state (NOT thread-safe).
! Do not call the SUEWS kernel concurrently from multiple threads without state.
! Use process-based parallelism or serialize calls with a lock in the caller.
! Thread Safety:
! Warnings are logged to modState%errorstate when provided (thread-safe).
! If modState is absent, warnings are silently dropped (no module-level state).
! Fatal errors still use module-level set_supy_error (acceptable: run terminates).

USE module_ctrl_const_datain
USE module_ctrl_error_state, ONLY: set_supy_error, add_supy_warning
USE module_ctrl_error_state, ONLY: set_supy_error
USE module_ctrl_type, ONLY: SUEWS_STATE
! USE module_ctrl_const_wherewhen

Expand Down Expand Up @@ -435,15 +434,13 @@ SUBROUTINE ErrorHint(errh, ProblemFile, VALUE, value2, valueI, modState)
CALL wrf_debug(100, message)
CALL wrf_debug(100, Errmessage)
#else
! SuPy: use state-based logging if available (thread-safe, full history)
! SuPy: use state-based logging when available (thread-safe)
! If modState is absent (e.g. dead code paths), warning is silently dropped.
IF (PRESENT(modState)) THEN
CALL modState%errorstate%report( &
message=TRIM(text1)//': '//TRIM(ProblemFile), &
location='ErrorHint', &
is_fatal=.FALSE.)
ELSE
! Fallback to module-level warning state (not thread-safe)
CALL add_supy_warning(TRIM(text1)//': '//TRIM(ProblemFile))
END IF
#endif
END IF
Expand Down
21 changes: 14 additions & 7 deletions src/suews/src/suews_ctrl_type.f95
Original file line number Diff line number Diff line change
Expand Up @@ -352,13 +352,17 @@ FUNCTION has_error_state(self) RESULT(has_err)
END FUNCTION has_error_state

SUBROUTINE report_error_impl(self, message, location, is_fatal, timer)
!> Report an error/warning to the error log
!> Report an error/warning to the error log.
!> Non-fatal warnings are capped at MAX_WARNING_LOG entries to prevent
!> unbounded memory growth in long simulations. Fatal entries are
!> always stored regardless of the cap.
CLASS(error_state), INTENT(INOUT) :: self
CHARACTER(LEN=*), INTENT(IN) :: message
CHARACTER(LEN=*), INTENT(IN) :: location
LOGICAL, INTENT(IN), OPTIONAL :: is_fatal
TYPE(SUEWS_TIMER), INTENT(IN), OPTIONAL :: timer

INTEGER, PARAMETER :: MAX_WARNING_LOG = 512
TYPE(error_entry), ALLOCATABLE :: temp(:)
TYPE(SUEWS_TIMER) :: timer_use
INTEGER :: new_size
Expand All @@ -367,6 +371,15 @@ SUBROUTINE report_error_impl(self, message, location, is_fatal, timer)
fatal = .FALSE.
IF (PRESENT(is_fatal)) fatal = is_fatal

IF (fatal) THEN
self%has_fatal = .TRUE.
self%flag = .TRUE.
self%message = message
END IF

! Cap non-fatal entries to avoid unbounded allocation in year-long runs
IF (.NOT. fatal .AND. self%count >= MAX_WARNING_LOG) RETURN

! Use provided timer or default to zeros
IF (PRESENT(timer)) THEN
timer_use = timer
Expand All @@ -393,12 +406,6 @@ SUBROUTINE report_error_impl(self, message, location, is_fatal, timer)
self%log(self%count)%message = message
self%log(self%count)%location = location
self%log(self%count)%is_fatal = fatal

IF (fatal) THEN
self%has_fatal = .TRUE.
self%flag = .TRUE.
self%message = message
END IF
END SUBROUTINE report_error_impl

SUBROUTINE clear_error_log(self)
Expand Down
2 changes: 1 addition & 1 deletion src/suews/src/suews_phys_lumps.f95
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ SUBROUTINE LUMPS_cal_QHQE( &

! Calculate slope of the saturation vapour pressure vs air temp.
s_hPa = slope_svp(Temp_C)
psyc_hPa = psyc_const(avcp, Press_hPa, lv_J_kg)
psyc_hPa = psyc_const(avcp, Press_hPa, lv_J_kg, modState)
psyc_s = psyc_hPa/s_hPa

!Calculate also sublimation ones if snow calculations are made.
Expand Down
Loading
Loading