From 62fc920c8b172539145fc4dc48144e037c829c2c Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Tue, 24 Feb 2026 15:57:15 +1300 Subject: [PATCH 1/8] add check realisation script --- workflow/scripts/check_realisation.py | 353 ++++++++++++++++++++++++++ 1 file changed, 353 insertions(+) create mode 100644 workflow/scripts/check_realisation.py diff --git a/workflow/scripts/check_realisation.py b/workflow/scripts/check_realisation.py new file mode 100644 index 0000000..822e997 --- /dev/null +++ b/workflow/scripts/check_realisation.py @@ -0,0 +1,353 @@ +"""Check that realisation can be loaded, if it can't automatically trim extraneous tags and offer to fill in default values.""" + +import difflib +import inspect +import json +import re +from enum import Enum, auto +from pathlib import Path + +import schema +import typer +from rich.console import Console + +from workflow import realisations +from workflow.defaults import DefaultsVersion + +app = typer.Typer() +console = Console() + + +class Response(Enum): + YES = auto() + NO = auto() + AUTO = auto() # Always (!) + NEVER = auto() # Never (N) + + +def is_realisation_configuration(cls: type) -> bool: + return ( + cls != realisations.RealisationConfiguration + and inspect.isclass(cls) + and issubclass(cls, realisations.RealisationConfiguration) + ) + + +def realisation_configurations() -> list[type]: + return [ + cls + for name, cls in inspect.getmembers(realisations) + if is_realisation_configuration(cls) + ] + + +def loadable_defaults( + configurations: list[type], defaults: DefaultsVersion +) -> dict[type, realisations.RealisationConfiguration]: + config_defaults = {} + for config in configurations: + if not issubclass(config, realisations.RealisationConfiguration): + raise TypeError( + f"{config=} should be a subclass of realisations.RealisationConfiguration" + ) + else: + try: + default_config = config.read_from_defaults(defaults) + config_defaults[config] = default_config + except realisations.RealisationParseError: + continue + return config_defaults + + +def yes_no_always_prompt(raw_prompt: str) -> Response: + """Prompt user for a decision, handling y, n, !, and N.""" + prompt = f"[bold]{raw_prompt}[/bold] (y/n/!/N): " + while True: + # Use console.input to support rich markup in the prompt + raw_response = console.input(prompt).strip() + + # Exact match for case-sensitive 'N' (Never) + if raw_response == "N": + return Response.NEVER + elif raw_response == "!": + return Response.AUTO + elif raw_response.lower() == "y": + return Response.YES + elif raw_response.lower() == "n": + return Response.NO + + +def prompt_autofill( + realisation: Path, + config: realisations.RealisationConfiguration, + auto_state: Response | None, + dry_run: bool, +) -> Response: + response = auto_state + + if response not in (Response.AUTO, Response.NEVER): + response = yes_no_always_prompt( + f"Defaults are available for {config.__class__.__name__}, autofill?" + ) + + if response in (Response.YES, Response.AUTO): + if dry_run: + console.print(f"[magenta]DRY RUN: Would autofill {realisation}[/magenta]") + else: + config.write_to_realisation(realisation) + + return response + + +def extract_error( + name: str, schema: schema.Schema, e: schema.SchemaError +) -> tuple[str, list[str]]: + """Returns the formatted error string and a list of extraneous keys found.""" + path_segments = [str(a) for a in e.autos if isinstance(a, str)] + keys = [] + for segment in path_segments: + if match := re.match(r"^Key '(.*?)'", segment): + keys.append(match.group(1)) + + last_error = e.autos[-1] if e.autos else str(e) + table_path = f"[bold cyan]{name}[/bold cyan]" + extraneous_keys = [] + + # Handle multiple wrong keys: "Wrong keys 'dt', 'resolution' in..." + if "Wrong keys" in last_error: + # Extract everything between single quotes + extraneous_keys = re.findall(r"'(.*?)'", last_error.split(" in {")[0]) + error_msg = ( + f"Extraneous keys found: [bold red]{', '.join(extraneous_keys)}[/bold red]" + ) + return f"Error in {table_path}: {error_msg}", extraneous_keys + + # Fallback to existing logic for "Wrong key" (singular/typo) + if match := re.match(r"^Wrong key '(.*?)'", last_error): + unknown_key = match.group(1) + # ... (keep your existing fuzzy matching logic here) ... + return f"Error in {table_path}: Unknown key '{unknown_key}'", [unknown_key] + + return f"Error in {table_path}: {last_error}", [] + + +def prompt_migrate( + realisation: Path, + config: type, + error: schema.SchemaError, + defaults: realisations.RealisationConfiguration | None, + auto_state: Response | None, + dry_run: bool, +) -> Response: + assert issubclass(config, realisations.RealisationConfiguration) + name = config._config_key + + console.print(f"[red]Error loading {name}:[/red]") + error_msg, extraneous_keys = extract_error( + config._config_key, config._schema, error + ) + console.print(error_msg) + + response = auto_state + if extraneous_keys: + if response not in (Response.AUTO, Response.NEVER): + response = yes_no_always_prompt( + f"Remove extraneous keys {extraneous_keys}?" + ) + + if response in (Response.YES, Response.AUTO): + if dry_run: + console.print( + f"[magenta]DRY RUN: Would remove {extraneous_keys} from {realisation}[/magenta]" + ) + else: + # Load raw data, delete keys, save back + with open(realisation, "r") as f: + data = json.load(f) + + # Note: This logic assumes keys are at the root or you'd need + # to traverse based on the 'keys' list from pprint_error + config_data = data[config._config_key] + for k in extraneous_keys: + config_data.pop(k, None) + + with open(realisation, "w") as f: + json.dump(data, f, indent=4) + console.print(f"[green]Successfully trimmed {realisation}[/green]") + return response + if defaults: + if response not in (Response.AUTO, Response.NEVER): + response = yes_no_always_prompt( + "Defaults are available, replace with defaults?" + ) + + if response in (Response.YES, Response.AUTO): + if dry_run: + console.print( + f"[magenta]DRY RUN: Would migrate defaults to {realisation}[/magenta]" + ) + else: + defaults.write_to_realisation(realisation) + else: + return response or Response.NO + + return response + + +def print_diff(config_a: dict, config_b: dict) -> None: + config_a_str = json.dumps(config_a, indent=4) + config_b_str = json.dumps(config_b, indent=4) + + diff = difflib.context_diff( + config_a_str.splitlines(keepends=True), + config_b_str.splitlines(keepends=True), + fromfile="Current", + tofile="Defaults", + ) + + for line in diff: + if line.startswith("+ "): + console.print(f"[green]{line}[/green]", end="") + elif line.startswith("- "): + console.print(f"[red]{line}[/red]", end="") + elif line.startswith("!"): + console.print(f"[yellow]{line}[/yellow]", end="") + else: + console.print(line, end="") + + +def prompt_update( + realisation: Path, + loaded_config: realisations.RealisationConfiguration, + default_config: realisations.RealisationConfiguration, + auto_state: Response | None, + dry_run: bool, +) -> Response: + loaded_conf_dict = loaded_config.to_dict() + default_dict = default_config.to_dict() + response = auto_state + + if loaded_conf_dict != default_dict: + console.print("[yellow]Defaults differ from saved value:[/yellow]") + print_diff(loaded_conf_dict, default_dict) + + if response not in (Response.AUTO, Response.NEVER): + response = yes_no_always_prompt("Accept defaults?") + + if response in (Response.YES, Response.AUTO): + if dry_run: + console.print( + f"[magenta]DRY RUN: Would update {realisation} with defaults[/magenta]" + ) + else: + default_config.write_to_realisation(realisation) + else: + return response or Response.NO + + return response + + +def migrate( + realisation: Path, + defaults_version: DefaultsVersion, + check_configs: list[type], + defaults: dict[type, realisations.RealisationConfiguration], + auto_fill: dict[type, Response], + auto_migrate: dict[type, Response], + auto_update: dict[type, Response], + dry_run: bool, +) -> None: + metadata = realisations.RealisationMetadata.read_from_realisation(realisation) + if metadata.defaults_version != defaults_version: + console.print( + f"[magenta]Updating defaults in {realisation} from {metadata.defaults_version} to {defaults}[/magenta]" + ) + if not dry_run: + metadata.defaults_version = defaults_version + metadata.write_to_realisation(realisation) + + for config in check_configs: + if not issubclass(config, realisations.RealisationConfiguration): + raise TypeError( + f"{config=} should be a subclass of realisations.RealisationConfiguration" + ) + else: + try: + loaded_config = config.read_from_realisation(realisation) + if default_config := defaults.get(config): + response = prompt_update( + realisation, + loaded_config, + default_config, + auto_update.get(config), + dry_run, + ) + if response in (Response.AUTO, Response.NEVER): + auto_update[config] = response + + except realisations.RealisationParseError: + if default_config := defaults.get(config): + response = prompt_autofill( + realisation, + default_config, + auto_state=auto_fill.get(config), + dry_run=dry_run, + ) + if response in (Response.AUTO, Response.NEVER): + auto_fill[config] = response + + except schema.SchemaError as error: + default_config = defaults.get(config) + response = prompt_migrate( + realisation, + config, + error, + default_config, + auto_state=auto_migrate.get(config), + dry_run=dry_run, + ) + if response in (Response.AUTO, Response.NEVER): + auto_migrate[config] = response + + except Exception as e: # noqa: BLE001 + console.print( + f"[bold red]Could not load realisation {realisation} for unrecoverable reason:[/bold red]" + ) + console.print(str(e)) + console.print("[yellow]Skipping[/yellow]") + + +@app.command() +def migrate_all( + realiasation_directory: Path, + defaults_version: DefaultsVersion, + glob: str = "*.json", + dry_run: bool = False, +) -> None: + if dry_run: + console.print( + "[bold magenta]*** RUNNING IN DRY RUN MODE - NO FILES WILL BE MODIFIED ***[/bold magenta]" + ) + + auto_fill: dict[type, Response] = {} + auto_migrate: dict[type, Response] = {} + auto_update: dict[type, Response] = {} + + configs = realisation_configurations() + defaults = loadable_defaults(configs, defaults_version) + + for realisation in realiasation_directory.rglob(glob): + migrate( + realisation, + defaults_version, + configs, + defaults, + auto_fill, + auto_migrate, + auto_update, + dry_run, + ) + + +if __name__ == "__main__": + app() From 6a4dda4ed57d04ea572e7d4b61856bb3be56b5b1 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Tue, 24 Feb 2026 17:36:18 +1300 Subject: [PATCH 2/8] simplify migration script --- uv.lock | 2 - workflow/scripts/check_realisation.py | 290 +++++++++++--------------- 2 files changed, 117 insertions(+), 175 deletions(-) diff --git a/uv.lock b/uv.lock index 070aea5..713d442 100644 --- a/uv.lock +++ b/uv.lock @@ -3107,7 +3107,6 @@ dependencies = [ { name = "geopandas" }, { name = "im-calculation" }, { name = "nshmdb" }, - { name = "numba" }, { name = "numpy" }, { name = "oq-wrapper" }, { name = "pandas", extra = ["hdf5", "parquet"] }, @@ -3151,7 +3150,6 @@ requires-dist = [ { name = "hypothesis", extras = ["numpy"], marker = "extra == 'test'", specifier = ">=6.0.0" }, { name = "im-calculation", specifier = ">=2025.12.5" }, { name = "nshmdb", specifier = ">=2025.12.1" }, - { name = "numba", specifier = ">=0.63.0" }, { name = "numpy" }, { name = "numpydoc", marker = "extra == 'dev'" }, { name = "oq-wrapper", specifier = ">=2025.12.3" }, diff --git a/workflow/scripts/check_realisation.py b/workflow/scripts/check_realisation.py index 822e997..70a639d 100644 --- a/workflow/scripts/check_realisation.py +++ b/workflow/scripts/check_realisation.py @@ -4,8 +4,10 @@ import inspect import json import re +from collections import defaultdict from enum import Enum, auto from pathlib import Path +from typing import Mapping import schema import typer @@ -25,6 +27,13 @@ class Response(Enum): NEVER = auto() # Never (N) +class Action(Enum): + MIGRATE = auto() + TRIM = auto() + FILL = auto() + UPDATE = auto() + + def is_realisation_configuration(cls: type) -> bool: return ( cls != realisations.RealisationConfiguration @@ -61,42 +70,30 @@ def loadable_defaults( def yes_no_always_prompt(raw_prompt: str) -> Response: """Prompt user for a decision, handling y, n, !, and N.""" - prompt = f"[bold]{raw_prompt}[/bold] (y/n/!/N): " + prompt = f"{raw_prompt} (y/n/!/N): " + response_map = { + "N": Response.NEVER, + "!": Response.AUTO, + "y": Response.YES, + "n": Response.NO, + } while True: - # Use console.input to support rich markup in the prompt - raw_response = console.input(prompt).strip() + raw_response = input(prompt).strip() + if raw_response in response_map: + return response_map[raw_response] - # Exact match for case-sensitive 'N' (Never) - if raw_response == "N": - return Response.NEVER - elif raw_response == "!": - return Response.AUTO - elif raw_response.lower() == "y": - return Response.YES - elif raw_response.lower() == "n": - return Response.NO - -def prompt_autofill( +def autofill( realisation: Path, config: realisations.RealisationConfiguration, - auto_state: Response | None, dry_run: bool, -) -> Response: - response = auto_state - - if response not in (Response.AUTO, Response.NEVER): - response = yes_no_always_prompt( - f"Defaults are available for {config.__class__.__name__}, autofill?" +) -> None: + if dry_run: + console.print( + f"DRY RUN: Would merge with {config.__class__.__name__} defaults in {realisation}" ) - - if response in (Response.YES, Response.AUTO): - if dry_run: - console.print(f"[magenta]DRY RUN: Would autofill {realisation}[/magenta]") - else: - config.write_to_realisation(realisation) - - return response + else: + config.write_to_realisation(realisation) def extract_error( @@ -110,88 +107,52 @@ def extract_error( keys.append(match.group(1)) last_error = e.autos[-1] if e.autos else str(e) - table_path = f"[bold cyan]{name}[/bold cyan]" extraneous_keys = [] # Handle multiple wrong keys: "Wrong keys 'dt', 'resolution' in..." if "Wrong keys" in last_error: # Extract everything between single quotes extraneous_keys = re.findall(r"'(.*?)'", last_error.split(" in {")[0]) - error_msg = ( - f"Extraneous keys found: [bold red]{', '.join(extraneous_keys)}[/bold red]" - ) - return f"Error in {table_path}: {error_msg}", extraneous_keys + error_msg = f"Extraneous keys found: [red]{', '.join(extraneous_keys)}[/red]" + return f"Error in {name}: {error_msg}", extraneous_keys # Fallback to existing logic for "Wrong key" (singular/typo) if match := re.match(r"^Wrong key '(.*?)'", last_error): unknown_key = match.group(1) # ... (keep your existing fuzzy matching logic here) ... - return f"Error in {table_path}: Unknown key '{unknown_key}'", [unknown_key] + return f"Error in {name}: Unknown key '{unknown_key}'", [unknown_key] - return f"Error in {table_path}: {last_error}", [] + return f"Error in {name}: {last_error}", [] -def prompt_migrate( - realisation: Path, - config: type, - error: schema.SchemaError, - defaults: realisations.RealisationConfiguration | None, - auto_state: Response | None, - dry_run: bool, -) -> Response: - assert issubclass(config, realisations.RealisationConfiguration) - name = config._config_key +def should_trim_keys(config: type, extra_keys: list[str]) -> Response: + return yes_no_always_prompt(f"Remove extraneous keys {extra_keys}?") - console.print(f"[red]Error loading {name}:[/red]") - error_msg, extraneous_keys = extract_error( - config._config_key, config._schema, error - ) - console.print(error_msg) - response = auto_state - if extraneous_keys: - if response not in (Response.AUTO, Response.NEVER): - response = yes_no_always_prompt( - f"Remove extraneous keys {extraneous_keys}?" - ) +def should_update(config: type) -> Response: + return yes_no_always_prompt(f"Merge with defaults for {config._config_key}?") - if response in (Response.YES, Response.AUTO): - if dry_run: - console.print( - f"[magenta]DRY RUN: Would remove {extraneous_keys} from {realisation}[/magenta]" - ) - else: - # Load raw data, delete keys, save back - with open(realisation, "r") as f: - data = json.load(f) - - # Note: This logic assumes keys are at the root or you'd need - # to traverse based on the 'keys' list from pprint_error - config_data = data[config._config_key] - for k in extraneous_keys: - config_data.pop(k, None) - - with open(realisation, "w") as f: - json.dump(data, f, indent=4) - console.print(f"[green]Successfully trimmed {realisation}[/green]") - return response - if defaults: - if response not in (Response.AUTO, Response.NEVER): - response = yes_no_always_prompt( - "Defaults are available, replace with defaults?" - ) - if response in (Response.YES, Response.AUTO): - if dry_run: - console.print( - f"[magenta]DRY RUN: Would migrate defaults to {realisation}[/magenta]" - ) - else: - defaults.write_to_realisation(realisation) +def trim_keys( + realisation: Path, + config: type, + extra_keys: list[str], + dry_run: bool, +) -> None: + + if dry_run: + console.print(f"DRY RUN: Would remove {extra_keys} from {realisation}") else: - return response or Response.NO + # Load raw data, delete keys, save back + with open(realisation, "r") as f: + data = json.load(f) - return response + config_data = data[config._config_key] + for k in extra_keys: + config_data.pop(k, None) + + with open(realisation, "w") as f: + json.dump(data, f, indent=4) def print_diff(config_a: dict, config_b: dict) -> None: @@ -216,105 +177,97 @@ def print_diff(config_a: dict, config_b: dict) -> None: console.print(line, end="") -def prompt_update( - realisation: Path, - loaded_config: realisations.RealisationConfiguration, - default_config: realisations.RealisationConfiguration, - auto_state: Response | None, - dry_run: bool, -) -> Response: - loaded_conf_dict = loaded_config.to_dict() - default_dict = default_config.to_dict() - response = auto_state - - if loaded_conf_dict != default_dict: - console.print("[yellow]Defaults differ from saved value:[/yellow]") - print_diff(loaded_conf_dict, default_dict) - - if response not in (Response.AUTO, Response.NEVER): - response = yes_no_always_prompt("Accept defaults?") - - if response in (Response.YES, Response.AUTO): - if dry_run: - console.print( - f"[magenta]DRY RUN: Would update {realisation} with defaults[/magenta]" - ) - else: - default_config.write_to_realisation(realisation) - else: - return response or Response.NO - - return response - - def migrate( realisation: Path, defaults_version: DefaultsVersion, check_configs: list[type], defaults: dict[type, realisations.RealisationConfiguration], - auto_fill: dict[type, Response], - auto_migrate: dict[type, Response], - auto_update: dict[type, Response], + auto_response: Mapping[tuple[type, Action], Response], dry_run: bool, ) -> None: metadata = realisations.RealisationMetadata.read_from_realisation(realisation) if metadata.defaults_version != defaults_version: console.print( - f"[magenta]Updating defaults in {realisation} from {metadata.defaults_version} to {defaults}[/magenta]" + f"Updating defaults in {realisation} from {metadata.defaults_version} to {defaults_version}" ) if not dry_run: metadata.defaults_version = defaults_version metadata.write_to_realisation(realisation) + try: + with open(realisation, "r") as f: + json_data = json.load(f) + except json.JSONDecodeError: + console.print( + f"[bold red]Invalid JSON in {realisation}, skipping...[/bold red]" + ) + return for config in check_configs: if not issubclass(config, realisations.RealisationConfiguration): raise TypeError( f"{config=} should be a subclass of realisations.RealisationConfiguration" ) - else: - try: - loaded_config = config.read_from_realisation(realisation) - if default_config := defaults.get(config): - response = prompt_update( - realisation, - loaded_config, - default_config, - auto_update.get(config), - dry_run, - ) - if response in (Response.AUTO, Response.NEVER): - auto_update[config] = response + elif default_config := defaults.get(config): + default_config_dict = default_config.to_dict() + current_config = json_data.get(config._config_key, dict()) + if current_config != default_config_dict: + print_diff(current_config, default_config_dict) + print("") + response = auto_response.get((config, Action.UPDATE)) or should_update( + config + ) - except realisations.RealisationParseError: - if default_config := defaults.get(config): - response = prompt_autofill( + if response in (Response.AUTO, Response.NEVER): + auto_response[Action.UPDATE] = response + + if response in (response.AUTO, response.YES): + autofill( realisation, default_config, - auto_state=auto_fill.get(config), dry_run=dry_run, ) - if response in (Response.AUTO, Response.NEVER): - auto_fill[config] = response - - except schema.SchemaError as error: - default_config = defaults.get(config) - response = prompt_migrate( - realisation, - config, - error, - default_config, - auto_state=auto_migrate.get(config), - dry_run=dry_run, - ) - if response in (Response.AUTO, Response.NEVER): - auto_migrate[config] = response - except Exception as e: # noqa: BLE001 + # Basic validation complete, now try to resolve schema errors + try: + _ = config.read_from_realisation(realisation) + except realisations.RealisationParseError: + if config not in defaults and config != realisations.Seeds: console.print( - f"[bold red]Could not load realisation {realisation} for unrecoverable reason:[/bold red]" + f"[bold red]Missing required configuration {config.__class__.__name__}[/bold red]" + ) + except schema.SchemaError as error: + console.print(f"[red]Schema error for {realisation}[/red]") + + default_config = defaults.get(config) + error, extra_keys = extract_error(config._config_key, config._schema, error) + console.print(error) + if extra_keys: + response = auto_response.get(Action.TRIM) or should_trim_keys( + config, extra_keys ) - console.print(str(e)) - console.print("[yellow]Skipping[/yellow]") + + if response in (Response.AUTO, Response.NEVER): + auto_response[Action.TRIM] = response + + if response in (response.AUTO, response.YES): + trim_keys(realisation, config, extra_keys, dry_run) + # Try to read one more time + try: + _ = config.read_from_realisation(realisation) + except schema.SchemaError as error: + error, _ = extract_error( + config._config_key, config._schema, error + ) + console.print( + f"[bold red]Unrecoverable schema error for {realisation}[/bold red]" + ) + console.print(error) + + except Exception as e: # noqa: BLE001 + console.print( + f"[bold red]Could not load realisation {realisation} for unrecoverable reason:[/bold red]" + ) + console.print(str(e)) @app.command() @@ -324,15 +277,8 @@ def migrate_all( glob: str = "*.json", dry_run: bool = False, ) -> None: - if dry_run: - console.print( - "[bold magenta]*** RUNNING IN DRY RUN MODE - NO FILES WILL BE MODIFIED ***[/bold magenta]" - ) - - auto_fill: dict[type, Response] = {} - auto_migrate: dict[type, Response] = {} - auto_update: dict[type, Response] = {} + auto_response = dict() configs = realisation_configurations() defaults = loadable_defaults(configs, defaults_version) @@ -342,9 +288,7 @@ def migrate_all( defaults_version, configs, defaults, - auto_fill, - auto_migrate, - auto_update, + auto_response, dry_run, ) From d6f3754b3ddc6aec27b8ff4727d116d54003772b Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Tue, 3 Mar 2026 09:40:37 +1300 Subject: [PATCH 3/8] fix(check-realisation): correctly store auto response --- workflow/scripts/check_realisation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/workflow/scripts/check_realisation.py b/workflow/scripts/check_realisation.py index 70a639d..f4a557f 100644 --- a/workflow/scripts/check_realisation.py +++ b/workflow/scripts/check_realisation.py @@ -1,4 +1,5 @@ """Check that realisation can be loaded, if it can't automatically trim extraneous tags and offer to fill in default values.""" +from collections.abc import MutableMapping import difflib import inspect @@ -182,7 +183,7 @@ def migrate( defaults_version: DefaultsVersion, check_configs: list[type], defaults: dict[type, realisations.RealisationConfiguration], - auto_response: Mapping[tuple[type, Action], Response], + auto_response: MutableMapping[tuple[type, Action], Response], dry_run: bool, ) -> None: metadata = realisations.RealisationMetadata.read_from_realisation(realisation) @@ -218,7 +219,7 @@ def migrate( ) if response in (Response.AUTO, Response.NEVER): - auto_response[Action.UPDATE] = response + auto_response[(config, Action.UPDATE)] = response if response in (response.AUTO, response.YES): autofill( @@ -242,12 +243,12 @@ def migrate( error, extra_keys = extract_error(config._config_key, config._schema, error) console.print(error) if extra_keys: - response = auto_response.get(Action.TRIM) or should_trim_keys( + response = auto_response.get((config, Action.TRIM)) or should_trim_keys( config, extra_keys ) if response in (Response.AUTO, Response.NEVER): - auto_response[Action.TRIM] = response + auto_response[(config, Action.TRIM)] = response if response in (response.AUTO, response.YES): trim_keys(realisation, config, extra_keys, dry_run) From 08627cc3b99826ed37f9e483077701ab371ad641 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Tue, 10 Mar 2026 09:18:16 +1300 Subject: [PATCH 4/8] refactor(check_script): rename to migrate --- workflow/scripts/{check_realisation.py => migrate.py} | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) rename workflow/scripts/{check_realisation.py => migrate.py} (99%) diff --git a/workflow/scripts/check_realisation.py b/workflow/scripts/migrate.py similarity index 99% rename from workflow/scripts/check_realisation.py rename to workflow/scripts/migrate.py index f4a557f..4a39b36 100644 --- a/workflow/scripts/check_realisation.py +++ b/workflow/scripts/migrate.py @@ -1,14 +1,12 @@ """Check that realisation can be loaded, if it can't automatically trim extraneous tags and offer to fill in default values.""" -from collections.abc import MutableMapping import difflib import inspect import json import re -from collections import defaultdict +from collections.abc import MutableMapping from enum import Enum, auto from pathlib import Path -from typing import Mapping import schema import typer From 64a705f0d205b6fc07ea9c49a1b260eb20e5fa96 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Tue, 10 Mar 2026 09:26:41 +1300 Subject: [PATCH 5/8] refactor(migrate): move utilities out of migrate script --- workflow/scripts/migrate.py | 38 ++---------------- workflow/utils.py | 80 +++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 34 deletions(-) diff --git a/workflow/scripts/migrate.py b/workflow/scripts/migrate.py index 4a39b36..6201052 100644 --- a/workflow/scripts/migrate.py +++ b/workflow/scripts/migrate.py @@ -20,6 +20,8 @@ class Response(Enum): + """Enum for response to prompts asked of user.""" + YES = auto() NO = auto() AUTO = auto() # Always (!) @@ -27,46 +29,14 @@ class Response(Enum): class Action(Enum): + """Migration actions that can be taken on realisation configuration.""" + MIGRATE = auto() TRIM = auto() FILL = auto() UPDATE = auto() -def is_realisation_configuration(cls: type) -> bool: - return ( - cls != realisations.RealisationConfiguration - and inspect.isclass(cls) - and issubclass(cls, realisations.RealisationConfiguration) - ) - - -def realisation_configurations() -> list[type]: - return [ - cls - for name, cls in inspect.getmembers(realisations) - if is_realisation_configuration(cls) - ] - - -def loadable_defaults( - configurations: list[type], defaults: DefaultsVersion -) -> dict[type, realisations.RealisationConfiguration]: - config_defaults = {} - for config in configurations: - if not issubclass(config, realisations.RealisationConfiguration): - raise TypeError( - f"{config=} should be a subclass of realisations.RealisationConfiguration" - ) - else: - try: - default_config = config.read_from_defaults(defaults) - config_defaults[config] = default_config - except realisations.RealisationParseError: - continue - return config_defaults - - def yes_no_always_prompt(raw_prompt: str) -> Response: """Prompt user for a decision, handling y, n, !, and N.""" prompt = f"{raw_prompt} (y/n/!/N): " diff --git a/workflow/utils.py b/workflow/utils.py index 3e3c627..6ba85fd 100644 --- a/workflow/utils.py +++ b/workflow/utils.py @@ -1,5 +1,6 @@ """Miscellaneous workflow utilities that couldn't go anywhere else.""" +import inspect import os import tempfile import urllib.request @@ -13,6 +14,7 @@ from shapely import Geometry, Polygon, geometry from qcore import coordinates +from workflow import defaults, realisations NZ_COASTLINE_URL = "https://www.dropbox.com/scl/fi/zkohh794y0s2189t7b1hi/NZ.gmt?rlkey=02011f4morc4toutt9nzojrw1&st=vpz2ri8x&dl=1" @@ -174,3 +176,81 @@ def dict_zip(*dicts: Mapping[K, Any], strict: bool = True) -> dict[K, tuple[Any, result = {key: tuple(d[key] for d in dicts) for key in list(keys)} return result + + +def is_realisation_configuration(cls: type) -> bool: + """Returns True if the class is a subclass of realisation configuration. + + Parameters + ---------- + cls : type + Type to check. + + Returns + ------- + bool + True if class is a realisation configuration. + """ + return ( + cls != realisations.RealisationConfiguration + and inspect.isclass(cls) + and issubclass(cls, realisations.RealisationConfiguration) + ) + + +def realisation_configurations() -> list[type]: + """Return a list of all realisation configurations. + + Returns + ------- + list[type] + A list of all realisation configuration types. + """ + return [ + cls + for name, cls in inspect.getmembers(realisations) + if is_realisation_configuration(cls) + ] + + +def loadable_defaults( + configurations: list[type], defaults: defaults.DefaultsVersion +) -> dict[type, realisations.RealisationConfiguration]: + """Filter a list of realisation configurations for those with loadable defaults. + + + + Parameters + ---------- + configurations : list[type] + Configurations to filter. + defaults : defaults.DefaultsVersion + Defaults to try and load. + + + Returns + ------- + dict[type, realisations.RealisationConfiguration] + A mapping from realisation configuration types to their + defaults specified by ``defaults``. + + Raises + ------ + TypeError + If ``configurations`` contains a type that is not a + realisation configuration. + + """ + config_defaults = {} + for config in configurations: + if not is_realisation_configuration(config): + raise TypeError( + f"{config=} should be a subclass of realisations.RealisationConfiguration" + ) + else: + try: + default_config = config.read_from_defaults(defaults) # type: ignore[unresolved-attribute] + config_defaults[config] = default_config + except realisations.RealisationParseError: + continue + return config_defaults From f6217a707799d8f5f4073b153b7ba540343c2d6f Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Tue, 10 Mar 2026 14:29:47 +1300 Subject: [PATCH 6/8] feat(migrate): add more refactoring utilities --- workflow/defaults.py | 28 +-- workflow/scripts/migrate.py | 399 ++++++++++++++++++++++++++++++++---- workflow/utils.py | 84 ++------ 3 files changed, 378 insertions(+), 133 deletions(-) diff --git a/workflow/defaults.py b/workflow/defaults.py index c651372..37a70fd 100644 --- a/workflow/defaults.py +++ b/workflow/defaults.py @@ -3,11 +3,11 @@ import importlib from enum import StrEnum from importlib import resources -from typing import Any import yaml import workflow.default_parameters.root as root +from workflow import utils class DefaultsVersion(StrEnum): @@ -19,30 +19,6 @@ class DefaultsVersion(StrEnum): develop = "develop" -def _merge_defaults(defaults_a: dict[str, Any], defaults_b: dict[str, Any]) -> None: - """Deep-merge dictionaries in place, updating the first with values from the second. - - Parameters - ---------- - defaults_a : dict[str, Any] - Base dictionary to be updated. This dictionary is modified in place with - merged values. - defaults_b : dict[str, Any] - Dictionary providing overriding values. Keys in this dictionary are - preferred when keys conflict. This dictionary is not modified. - """ - - for key, value in defaults_b.items(): - if ( - key in defaults_a - and isinstance(defaults_a[key], dict) - and isinstance(value, dict) - ): - _merge_defaults(defaults_a[key], defaults_b[key]) - else: - defaults_a[key] = value - - def load_defaults(version: DefaultsVersion) -> dict[str, int | float | str]: """Load default parameters for EMOD3D simulation from a YAML file. @@ -72,5 +48,5 @@ def load_defaults(version: DefaultsVersion) -> dict[str, int | float | str]: defaults_path = resources.files(defaults_package) / "defaults.yaml" with defaults_path.open(encoding="utf-8") as emod3d_defaults_file_handle: defaults = yaml.safe_load(emod3d_defaults_file_handle) - _merge_defaults(root_defaults, defaults) + utils.merge_dictionaries(root_defaults, defaults) return root_defaults diff --git a/workflow/scripts/migrate.py b/workflow/scripts/migrate.py index 6201052..48bee7d 100644 --- a/workflow/scripts/migrate.py +++ b/workflow/scripts/migrate.py @@ -4,21 +4,108 @@ import inspect import json import re +import shutil +from collections import defaultdict from collections.abc import MutableMapping from enum import Enum, auto from pathlib import Path +from typing import Annotated, TypeVar +import parse import schema import typer from rich.console import Console -from workflow import realisations +from qcore import cli +from workflow import realisations, utils from workflow.defaults import DefaultsVersion +from workflow.realisations import RealisationMetadata, Seeds app = typer.Typer() console = Console() +def is_realisation_configuration(cls: type) -> bool: + """Returns True if the class is a subclass of realisation configuration. + + Parameters + ---------- + cls : type + Type to check. + + Returns + ------- + bool + True if class is a realisation configuration. + """ + return ( + cls != realisations.RealisationConfiguration + and inspect.isclass(cls) + and issubclass(cls, realisations.RealisationConfiguration) + ) + + +ConfigType = TypeVar("ConfigType", bound=realisations.RealisationConfiguration) + + +def realisation_configurations() -> list[ConfigType]: + """Return a list of all realisation configurations. + + Returns + ------- + list[ConfigType] + A list of all realisation configuration types. + """ + return [ + cls + for name, cls in inspect.getmembers(realisations) + if is_realisation_configuration(cls) + ] + + +def loadable_defaults( + configurations: list[type], defaults: DefaultsVersion +) -> dict[ConfigType, realisations.RealisationConfiguration]: + """Filter a list of realisation configurations for those with loadable defaults. + + + + Parameters + ---------- + configurations : list[type] + Configurations to filter. + defaults : defaults.DefaultsVersion + Defaults to try and load. + + + Returns + ------- + dict[ConfigType, realisations.RealisationConfiguration] + A mapping from realisation configuration types to their + defaults specified by ``defaults``. + + Raises + ------ + TypeError + If ``configurations`` contains a type that is not a + realisation configuration. + + """ + config_defaults = {} + for config in configurations: + if not is_realisation_configuration(config): + raise TypeError( + f"{config=} should be a subclass of realisations.RealisationConfiguration" + ) + else: + try: + default_config = config.read_from_defaults(defaults) # type: ignore[unresolved-attribute] + config_defaults[config] = default_config + except realisations.RealisationParseError: + continue + return config_defaults + + class Response(Enum): """Enum for response to prompts asked of user.""" @@ -38,11 +125,26 @@ class Action(Enum): def yes_no_always_prompt(raw_prompt: str) -> Response: - """Prompt user for a decision, handling y, n, !, and N.""" + """Prompt user for a decision, handling y, n, !, and N. + + + Parameters + ---------- + raw_prompt : str + Prompt to prepend to options. + + + Returns + ------- + Response + Response from user. + """ + prompt = f"{raw_prompt} (y/n/!/N): " response_map = { "N": Response.NEVER, "!": Response.AUTO, + "A": Response.AUTO, "y": Response.YES, "n": Response.NO, } @@ -57,6 +159,17 @@ def autofill( config: realisations.RealisationConfiguration, dry_run: bool, ) -> None: + """Autofill realisation with defaults from config. + + Parameters + ---------- + realisation : Path + Realisation to write to. + config : realisations.RealisationConfiguration + Config to write. + dry_run : bool + If True, print to console instead of writing. + """ if dry_run: console.print( f"DRY RUN: Would merge with {config.__class__.__name__} defaults in {realisation}" @@ -68,7 +181,27 @@ def autofill( def extract_error( name: str, schema: schema.Schema, e: schema.SchemaError ) -> tuple[str, list[str]]: - """Returns the formatted error string and a list of extraneous keys found.""" + """Returns the formatted error string and a list of extraneous keys found. + + + Parameters + ---------- + name : str + Name of configuration to parse. + schema : schema.Schema + Schema to read. + e : schema.SchemaError + Schema error encountered. + + + Returns + ------- + str + Human readable error message. + list[str] + Unknown keys identified in error. + """ + path_segments = [str(a) for a in e.autos if isinstance(a, str)] keys = [] for segment in path_segments: @@ -77,7 +210,7 @@ def extract_error( last_error = e.autos[-1] if e.autos else str(e) extraneous_keys = [] - + assert isinstance(last_error, str) # Handle multiple wrong keys: "Wrong keys 'dt', 'resolution' in..." if "Wrong keys" in last_error: # Extract everything between single quotes @@ -94,21 +227,61 @@ def extract_error( return f"Error in {name}: {last_error}", [] -def should_trim_keys(config: type, extra_keys: list[str]) -> Response: - return yes_no_always_prompt(f"Remove extraneous keys {extra_keys}?") +def should_trim_keys(config: ConfigType, extra_keys: list[str]) -> Response: + """Prompts user if they want to trim extra keys. + + Parameters + ---------- + config : ConfigType + Config to trim keys from. + extra_keys : list[str] + Extra keys to trim. + + Returns + ------- + Response + Response from user to prompt. + """ + return yes_no_always_prompt( + f"Remove extraneous keys {extra_keys} from {config._config_key}?" + ) + + +def should_update(config: ConfigType) -> Response: + """Prompt user to merge config with default values. + Parameters + ---------- + config : ConfigType + Config to merge with. -def should_update(config: type) -> Response: + Returns + ------- + Response + Response from user to prompt. + """ return yes_no_always_prompt(f"Merge with defaults for {config._config_key}?") def trim_keys( realisation: Path, - config: type, + config: ConfigType, extra_keys: list[str], dry_run: bool, ) -> None: - + """Trim extra keys from realisation. + + Parameters + ---------- + realisation : Path + Path to realisation. + config : ConfigType + Config to trim from. + extra_keys : list[str] + Keys to trim. + dry_run : bool + If True, print instead of trimming. + """ if dry_run: console.print(f"DRY RUN: Would remove {extra_keys} from {realisation}") else: @@ -125,6 +298,15 @@ def trim_keys( def print_diff(config_a: dict, config_b: dict) -> None: + """Pretty print diff between two dictionaries. + + Parameters + ---------- + config_a : dict + Dictionary a. + config_b : dict + Dictionary b. + """ config_a_str = json.dumps(config_a, indent=4) config_b_str = json.dumps(config_b, indent=4) @@ -149,11 +331,28 @@ def print_diff(config_a: dict, config_b: dict) -> None: def migrate( realisation: Path, defaults_version: DefaultsVersion, - check_configs: list[type], - defaults: dict[type, realisations.RealisationConfiguration], - auto_response: MutableMapping[tuple[type, Action], Response], + check_configs: list[ConfigType], + defaults: dict[ConfigType, realisations.RealisationConfiguration], + auto_response: MutableMapping[tuple[ConfigType, Action], Response], dry_run: bool, ) -> None: + """Attempt to migrate realisation to new defaults set. + + Parameters + ---------- + realisation : Path + Path to realisation. + defaults_version : DefaultsVersion + Defaults to update to. + check_configs : list[ConfigType] + Configurations to check. + defaults : dict[ConfigType, realisations.RealisationConfiguration] + Defaults to use. + auto_response : MutableMapping[tuple[ConfigType, Action], Response] + Auto response map recording user's always and never requests. + dry_run : bool + If True, print instead of writing to realisations. + """ metadata = realisations.RealisationMetadata.read_from_realisation(realisation) if metadata.defaults_version != defaults_version: console.print( @@ -172,29 +371,27 @@ def migrate( return for config in check_configs: - if not issubclass(config, realisations.RealisationConfiguration): - raise TypeError( - f"{config=} should be a subclass of realisations.RealisationConfiguration" + default_config = defaults.get(config) + if not default_config: + continue + default_config_dict = default_config.to_dict() + current_config = json_data.get(config._config_key, dict()) + if current_config != default_config_dict: + print_diff(current_config, default_config_dict) + print("") + response = auto_response.get((config, Action.UPDATE)) or should_update( + config ) - elif default_config := defaults.get(config): - default_config_dict = default_config.to_dict() - current_config = json_data.get(config._config_key, dict()) - if current_config != default_config_dict: - print_diff(current_config, default_config_dict) - print("") - response = auto_response.get((config, Action.UPDATE)) or should_update( - config - ) - if response in (Response.AUTO, Response.NEVER): - auto_response[(config, Action.UPDATE)] = response + if response in (Response.AUTO, Response.NEVER): + auto_response[(config, Action.UPDATE)] = response - if response in (response.AUTO, response.YES): - autofill( - realisation, - default_config, - dry_run=dry_run, - ) + if response in (response.AUTO, response.YES): + autofill( + realisation, + default_config, + dry_run=dry_run, + ) # Basic validation complete, now try to resolve schema errors try: @@ -239,19 +436,44 @@ def migrate( console.print(str(e)) -@app.command() +@cli.from_docstring(app, name="migrate") # type: ignore[invalid-argument-type] def migrate_all( - realiasation_directory: Path, + realisation_directory: Annotated[ + Path, typer.Argument(exists=True, file_okay=False) + ], defaults_version: DefaultsVersion, glob: str = "*.json", + backup: str | None = None, dry_run: bool = False, ) -> None: - + """Migrate all realisations in a directory to the current workflow version. + + Parameters + ---------- + realisation_directory : Path + Path containing realisations. + defaults_version : DefaultsVersion + Defaults version to migrate to. + glob : str + Glob pattern to look for realisations. + backup : str | None + If given, backup the realisation file with named suffix before + running migration. Equivalent to the ``-iext`` flag used in + sed. Has no effect when combined with dry run. + dry_run : bool + If given, print instead of writing. Useful to check what would + be migrated. + """ auto_response = dict() configs = realisation_configurations() defaults = loadable_defaults(configs, defaults_version) - for realisation in realiasation_directory.rglob(glob): + for realisation in realisation_directory.rglob(glob): + if backup and not dry_run: # only make a copy if we actually modify the file. + shutil.copy( + realisation, realisation.with_suffix(realisation.suffix + backup) + ) + migrate( realisation, defaults_version, @@ -262,5 +484,110 @@ def migrate_all( ) +@cli.from_docstring(app) +def copy( + realisation_template: Annotated[Path, typer.Argument(exists=True, dir_okay=False)], + realisation_directory: Annotated[ + Path, typer.Argument(exists=True, file_okay=False) + ], + configs: list[str] | None = None, + backup: str | None = None, + glob: str = "*.json", +) -> None: + """Utility to copy blocks of configurations between a template and a directory of realisations. + + Realisation configurations can be partially specified, so that + some values can be replaced without replacing all of the others. + + Parameters + ---------- + realisation_template : Path + Template realisation to copy from. + realisation_directory : Path + Directory containing realisation files. + configs : list[str] + Configurations to copy. If None, will copy all configurations + in realisation file. + backup : str | None + If given, backup the realisation file with named suffix before + running migration. Equivalent to the ``-iext`` flag used in + sed. Has no effect when combined with dry run. + glob : str + Glob pattern to look for realisations. + """ + with open(realisation_template) as f: + template = json.load(f) + + configs = configs or list(template) + + for realisation_path in realisation_directory.rglob(glob): + if backup: + shutil.copy( + realisation_path, + realisation_path.with_suffix(realisation_path.suffix + backup), + ) + + with open(realisation_path) as f: + realisation = json.load(f) + + utils.merge_dictionaries(realisation, template) + + with open(realisation_path, "w") as f: + json.dump(realisation, f, indent=4) + + +@cli.from_docstring(app) +def clone( + realisation_directory: Annotated[ + Path, typer.Argument(exists=True, file_okay=False) + ], + num_realisations: int, + realisation_template: str = "{event}_R{realisation:d}", + regenerate_seeds: bool = True, +) -> None: + """Utility to clone realisations with updated seeds. + + Parameters + ---------- + realisation_directory : Path + Directory containing realisation files. + num_realisations : int + Number of realisations to copy. + realisation_template : str, optional + Template structure for realisation names + regenerate_seeds : bool, optional + If set, re-roll seeds configuration. + """ + + realisations = defaultdict(set) + for realisation in realisation_directory.iterdir(): + realisation_path = realisation / "realisation.json" + parsed_content = parse.parse(realisation_template, realisation.name) + if not (realisation.is_dir and realisation_path.exists() and parsed_content): + continue + assert isinstance(parsed_content, parse.Result) + event = parsed_content["event"] + realisation_number = int(parsed_content["realisation"]) + realisations[event].add(realisation_number) + + for event, existing_realisations in realisations.items(): + base_realisation = min(existing_realisations) + base_realisation_path = realisation_directory / realisation_template.format( + event=event, realisation=base_realisation + ) + for i in range(base_realisation + 1, num_realisations + 1): + # Handles cases like clarence_R1, clarence_R3 existing already. + if i in existing_realisations: + continue + realisation_path = realisation_directory / realisation_template.format( + event=event, realisation=i + ) + shutil.copytree(base_realisation_path, realisation_path) + if regenerate_seeds: + realisation_json = realisation_path / "realisation.json" + seeds = Seeds.random_seeds() + seeds.write_to_realisation(realisation_json) + + if __name__ == "__main__": app() diff --git a/workflow/utils.py b/workflow/utils.py index 6ba85fd..594b194 100644 --- a/workflow/utils.py +++ b/workflow/utils.py @@ -14,7 +14,7 @@ from shapely import Geometry, Polygon, geometry from qcore import coordinates -from workflow import defaults, realisations +from workflow import defaults NZ_COASTLINE_URL = "https://www.dropbox.com/scl/fi/zkohh794y0s2189t7b1hi/NZ.gmt?rlkey=02011f4morc4toutt9nzojrw1&st=vpz2ri8x&dl=1" @@ -178,79 +178,21 @@ def dict_zip(*dicts: Mapping[K, Any], strict: bool = True) -> dict[K, tuple[Any, return result -def is_realisation_configuration(cls: type) -> bool: - """Returns True if the class is a subclass of realisation configuration. +def merge_dictionaries(dict_a: dict[str, Any], dict_b: dict[str, Any]) -> None: + """Deep-merge dictionaries in place, updating the first with values from the second. Parameters ---------- - cls : type - Type to check. - - Returns - ------- - bool - True if class is a realisation configuration. + dict_a : dict[str, Any] + Base dictionary to be updated. This dictionary is modified in place with + merged values. + dict_b : dict[str, Any] + Dictionary providing overriding values. Keys in this dictionary are + preferred when keys conflict. This dictionary is not modified. """ - return ( - cls != realisations.RealisationConfiguration - and inspect.isclass(cls) - and issubclass(cls, realisations.RealisationConfiguration) - ) - - -def realisation_configurations() -> list[type]: - """Return a list of all realisation configurations. - - Returns - ------- - list[type] - A list of all realisation configuration types. - """ - return [ - cls - for name, cls in inspect.getmembers(realisations) - if is_realisation_configuration(cls) - ] - - -def loadable_defaults( - configurations: list[type], defaults: defaults.DefaultsVersion -) -> dict[type, realisations.RealisationConfiguration]: - """Filter a list of realisation configurations for those with loadable defaults. - - - Parameters - ---------- - configurations : list[type] - Configurations to filter. - defaults : defaults.DefaultsVersion - Defaults to try and load. - - - Returns - ------- - dict[type, realisations.RealisationConfiguration] - A mapping from realisation configuration types to their - defaults specified by ``defaults``. - - Raises - ------ - TypeError - If ``configurations`` contains a type that is not a - realisation configuration. - - """ - config_defaults = {} - for config in configurations: - if not is_realisation_configuration(config): - raise TypeError( - f"{config=} should be a subclass of realisations.RealisationConfiguration" - ) + for key, value in dict_b.items(): + if key in dict_a and isinstance(dict_a[key], dict) and isinstance(value, dict): + merge_dictionaries(dict_a[key], dict_b[key]) else: - try: - default_config = config.read_from_defaults(defaults) # type: ignore[unresolved-attribute] - config_defaults[config] = default_config - except realisations.RealisationParseError: - continue - return config_defaults + dict_a[key] = value From 276e6ec6d68a898d9662e17179183887556e2823 Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Mon, 23 Mar 2026 16:28:56 +1300 Subject: [PATCH 7/8] migration extras --- workflow/scripts/migrate.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/workflow/scripts/migrate.py b/workflow/scripts/migrate.py index 48bee7d..c8416e6 100644 --- a/workflow/scripts/migrate.py +++ b/workflow/scripts/migrate.py @@ -211,17 +211,13 @@ def extract_error( last_error = e.autos[-1] if e.autos else str(e) extraneous_keys = [] assert isinstance(last_error, str) - # Handle multiple wrong keys: "Wrong keys 'dt', 'resolution' in..." if "Wrong keys" in last_error: - # Extract everything between single quotes extraneous_keys = re.findall(r"'(.*?)'", last_error.split(" in {")[0]) error_msg = f"Extraneous keys found: [red]{', '.join(extraneous_keys)}[/red]" return f"Error in {name}: {error_msg}", extraneous_keys - # Fallback to existing logic for "Wrong key" (singular/typo) if match := re.match(r"^Wrong key '(.*?)'", last_error): unknown_key = match.group(1) - # ... (keep your existing fuzzy matching logic here) ... return f"Error in {name}: Unknown key '{unknown_key}'", [unknown_key] return f"Error in {name}: {last_error}", [] @@ -285,7 +281,6 @@ def trim_keys( if dry_run: console.print(f"DRY RUN: Would remove {extra_keys} from {realisation}") else: - # Load raw data, delete keys, save back with open(realisation, "r") as f: data = json.load(f) @@ -393,7 +388,6 @@ def migrate( dry_run=dry_run, ) - # Basic validation complete, now try to resolve schema errors try: _ = config.read_from_realisation(realisation) except realisations.RealisationParseError: From c673dc4f7591307e36fbbd0bdc97bb43bd30e63e Mon Sep 17 00:00:00 2001 From: Jake Faulkner Date: Mon, 23 Mar 2026 17:00:17 +1300 Subject: [PATCH 8/8] add entrypoint --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 346736f..fb970bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,7 @@ check-domain = "workflow.scripts.check_domain:app" gcmt-auto-simulate = "workflow.scripts.gcmt_auto_simulate:app" import-realisation = "workflow.scripts.import_realisation:app" lf-to-xarray = "workflow.scripts.lf_to_xarray:app" +migrate = "workflow.scripts.migrate:app" [tool.setuptools.package-dir] workflow = "workflow"