From 34b8034a4f22487c39afba42d70fd5398c5f6157 Mon Sep 17 00:00:00 2001
From: James Frost <james.frost@metoffice.gov.uk>
Date: Tue, 10 Feb 2026 15:02:49 +0000
Subject: [PATCH] Add pickle backed load cache to fetch_fcst

This does the loading of the data into cubes (with all associated
callbacks) once in the fetch_fcst step, vastly speeding up the loading
during recipe baking.

The read operator is updated to use this cache when it is provided a
directory, and that directory contains a `loadcache.pickle` file.
Otherwise loading will continue as normal.
---
 .../app/fetch_fcst/bin/fetch_data.py          | 20 +++++++++++++++++++
 src/CSET/operators/read.py                    | 20 +++++++++++++------
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/CSET/cset_workflow/app/fetch_fcst/bin/fetch_data.py b/src/CSET/cset_workflow/app/fetch_fcst/bin/fetch_data.py
index 8bf13cfff..dff160de4 100644
--- a/src/CSET/cset_workflow/app/fetch_fcst/bin/fetch_data.py
+++ b/src/CSET/cset_workflow/app/fetch_fcst/bin/fetch_data.py
@@ -20,6 +20,7 @@
 import itertools
 import logging
 import os
+import pickle
 import ssl
 import urllib.parse
 import urllib.request
@@ -30,6 +31,8 @@
 
 import isodate
 
+from CSET.operators import read
+
 logging.basicConfig(
     level=os.getenv("LOGLEVEL", "INFO"), format="%(asctime)s %(levelname)s %(message)s"
 )
@@ -294,6 +297,23 @@ def fetch_data(file_retriever: FileRetrieverABC):
     if not any_files_found:
         raise FileNotFoundError("No files found for model!")
 
+    # Create the load cache for this model.
+    prime_load_cache(cycle_data_dir)
+
+
+def prime_load_cache(data_directory: str):
+    """Create the load cache for directory."""
+    # Load in the cubes, applying all the callbacks and such.
+    logging.info("Reading in cubes for caching.")
+    cubes = read.read_cubes(data_directory)
+    # Remove the added cset_comparison_base attribute.
+    for cube in cubes:
+        del cube.attributes["cset_comparison_base"]
+    logging.info("Writing cache file.")
+    # Pickle to a cache file.
+    with open(Path(data_directory, "loadcache.pickle"), "wb") as fp:
+        pickle.dump(cubes, fp)
+
 
 def fetch_obs(obs_retriever: FileRetrieverABC):
     """Fetch the observations corresponding to a model run.
diff --git a/src/CSET/operators/read.py b/src/CSET/operators/read.py
index 4d231c265..6133b30d1 100644
--- a/src/CSET/operators/read.py
+++ b/src/CSET/operators/read.py
@@ -20,6 +20,7 @@
 import glob
 import itertools
 import logging
+import pickle
 from pathlib import Path
 from typing import Literal
 
@@ -218,12 +219,19 @@ def _load_model(
     constraint: iris.Constraint | None,
 ) -> iris.cube.CubeList:
     """Load a single model's data into a CubeList."""
-    input_files = _check_input_files(paths)
-    # If unset, a constraint of None lets everything be loaded.
-    logging.debug("Constraint: %s", constraint)
-    cubes = iris.load(input_files, constraint, callback=_loading_callback)
-    # Make the UM's winds consistent with LFRic.
-    _fix_um_winds(cubes)
+    cache_file = Path(paths, "loadcache.pickle") if isinstance(paths, str) else None
+    if cache_file and cache_file.is_file():
+        # Load from pickled cache.
+        with open(cache_file, "rb") as fp:
+            all_cubes = pickle.load(fp)
+        cubes = all_cubes.extract(constraint)
+    else:
+        input_files = _check_input_files(paths)
+        # If unset, a constraint of None lets everything be loaded.
+        logging.debug("Constraint: %s", constraint)
+        cubes = iris.load(input_files, constraint, callback=_loading_callback)
+        # Make the UM's winds consistent with LFRic.
+        _fix_um_winds(cubes)
 
     # Add model_name attribute to each cube to make it available at any further
     # step without needing to pass it as function parameter.