diff --git a/orchestrator/cli/commands/upgrade.py b/orchestrator/cli/commands/upgrade.py index b18db6f7c..cc1dbaab5 100644 --- a/orchestrator/cli/commands/upgrade.py +++ b/orchestrator/cli/commands/upgrade.py @@ -36,6 +36,21 @@ def upgrade_resource( click_type=HiddenPluralChoice(AdoUpgradeSupportedResourceTypes), ), ], + apply_legacy_migrator: Annotated[ + list[str] | None, + typer.Option( + "--apply-legacy-migrator", + help="Apply legacy migrators by identifier (e.g., 'samplestore_kind_entitysource_to_samplestore'). " + "Can be specified multiple times.", + ), + ] = None, + list_legacy_migrators: Annotated[ + bool, + typer.Option( + "--list-legacy-migrators", + help="List available legacy migrators for this resource type", + ), + ] = False, ) -> None: """ Upgrade resources and contexts. @@ -52,12 +67,22 @@ def upgrade_resource( # Upgrade all operations ado upgrade operations + + # List available legacy migrators for sample stores + + ado upgrade samplestores --list-legacy-migrators + + # Apply a legacy migrator during upgrade + + ado upgrade samplestores --apply-legacy-migrator samplestore_kind_entitysource_to_samplestore """ ado_configuration: AdoConfiguration = ctx.obj parameters = AdoUpgradeCommandParameters( ado_configuration=ado_configuration, + apply_legacy_migrator=apply_legacy_migrator, + list_legacy_migrators=list_legacy_migrators, ) method_mapping = { diff --git a/orchestrator/cli/models/parameters.py b/orchestrator/cli/models/parameters.py index 85fe86a55..5a519ed8d 100644 --- a/orchestrator/cli/models/parameters.py +++ b/orchestrator/cli/models/parameters.py @@ -136,3 +136,5 @@ class AdoTemplateCommandParameters(pydantic.BaseModel): class AdoUpgradeCommandParameters(pydantic.BaseModel): ado_configuration: AdoConfiguration + apply_legacy_migrator: list[str] | None = None + list_legacy_migrators: bool = False diff --git a/orchestrator/cli/utils/legacy/__init__.py b/orchestrator/cli/utils/legacy/__init__.py new file mode 100644 index 000000000..c268da66c --- /dev/null +++ b/orchestrator/cli/utils/legacy/__init__.py @@ -0,0 +1,10 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Utilities for working with legacy migrators""" + +from orchestrator.cli.utils.legacy.list import list_legacy_migrators + +__all__ = ["list_legacy_migrators"] + +# Made with Bob diff --git a/orchestrator/cli/utils/legacy/common.py b/orchestrator/cli/utils/legacy/common.py new file mode 100644 index 000000000..e3a67c907 --- /dev/null +++ b/orchestrator/cli/utils/legacy/common.py @@ -0,0 +1,178 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Common utilities for legacy migrator handling""" + +from typing import TYPE_CHECKING + +import pydantic + +from orchestrator.cli.utils.output.prints import ( + ERROR, + HINT, + INFO, + WARN, + console_print, + cyan, +) + +if TYPE_CHECKING: + from orchestrator.core.legacy.metadata import LegacyMigratorMetadata + from orchestrator.core.resources import CoreResourceKinds + + +def extract_deprecated_field_paths( + error: pydantic.ValidationError | ValueError, + resource_type: "CoreResourceKinds | None" = None, +) -> tuple[set[str], dict[str, list[str]]]: + """Extract field paths and error details from validation errors + + This function handles both pydantic ValidationError and ValueError types. + For ValueError, it attempts to extract an underlying pydantic ValidationError + from the error's __cause__. If that fails, it falls back to simple string + matching on the error message using known field paths from the legacy + migrator registry (requires resource_type parameter). + + Args: + error: The validation error (pydantic.ValidationError or ValueError) + resource_type: The resource type to get field paths for (required for + ValueError fallback to string matching) + + Returns: + Tuple of (full field paths, field error details mapping) + - full field paths: Set of full dotted paths like 'config.specification.module.moduleType' + - field error details: Maps full field path to list of error messages + + Raises: + ValueError: Re-raises the original error if it is a ValueError without a + ValidationError cause and resource_type is not provided + """ + deprecated_field_paths: set[str] = set() + field_errors: dict[str, list[str]] = {} + + # Handle pydantic ValidationError directly + if isinstance(error, pydantic.ValidationError): + for err in error.errors(): + if err.get("loc"): + # Build the full dotted path from the location tuple + full_path = ".".join(str(loc) for loc in err["loc"]) + deprecated_field_paths.add(full_path) + + # Store the error message for this field path + if full_path not in field_errors: + field_errors[full_path] = [] + + # Build a descriptive error message + msg = err.get("msg", "") + if err.get("input"): + msg = f"{msg} (got: {err['input']})" + + field_errors[full_path].append(msg) + + return deprecated_field_paths, field_errors + + # Handle ValueError - try to extract pydantic ValidationError from __cause__ + if isinstance(error, ValueError): + if hasattr(error, "__cause__") and isinstance( + error.__cause__, pydantic.ValidationError + ): + # Recursively handle the underlying ValidationError + return extract_deprecated_field_paths(error.__cause__, resource_type) + + # Fallback to simple string matching on error message + if resource_type is None: + raise error + + from orchestrator.core.legacy.registry import LegacyMigratorRegistry + + error_msg = str(error) + + # Get all field paths from registered migrators for this resource type + migrators = LegacyMigratorRegistry.get_migrators_for_resource(resource_type) + known_deprecated_field_paths = { + path for migrator in migrators for path in migrator.deprecated_field_paths + } + + for field_path in known_deprecated_field_paths: + if field_path in error_msg: + deprecated_field_paths.add(field_path) + # For string matching fallback, we don't have detailed error messages + field_errors[field_path] = [ + "Field validation failed (details in error message)" + ] + + return deprecated_field_paths, field_errors + + # Should not reach here due to type hints, but handle gracefully + raise TypeError(f"Unsupported error type: {type(error)}") + + +def print_migrator_suggestions_with_dependencies( + migrators: list["LegacyMigratorMetadata"], resource_type: "CoreResourceKinds" +) -> None: + """Print legacy migrator suggestions with dependency information + + This enhanced version resolves dependencies and shows migrators in the + correct execution order, along with dependency information. + + Args: + migrators: List of applicable migrators + resource_type: The resource type + """ + from orchestrator.core.legacy.registry import LegacyMigratorRegistry + + # Get migrator identifiers + migrator_ids = [v.identifier for v in migrators] + missing_deps = [] + + # Resolve dependencies to get correct order + try: + migrator_ids, missing_deps = LegacyMigratorRegistry.resolve_dependencies( + migrator_ids + ) + except ValueError as e: + # Circular dependency detected + console_print(f"{ERROR}:{e}", stderr=True) + + # Get ordered migrators (filter out None values) + ordered_migrators: list[LegacyMigratorMetadata] = [] + for vid in migrator_ids: + migrator = LegacyMigratorRegistry.get_migrator(vid) + if migrator is not None: + ordered_migrators.append(migrator) + + console_print(f"{INFO}The following migrator(s) are a match:\n", stderr=True) + for i, migrator in enumerate(ordered_migrators, 1): + # Format and print migrator info using the method + console_print( + migrator.format_info( + index=i, show_dependencies=True, show_version_info=False + ) + ) + console_print() + + # Warn about missing dependencies + if missing_deps: + console_print( + f"{WARN}Some dependencies are missing: {', '.join(missing_deps)}\n" + ) + + # Build command with all migrators in correct order + migrator_args = " ".join( + f"--apply-legacy-migrator {v.identifier}" for v in ordered_migrators + ) + console_print( + f"{HINT}To attempt the upgrade using the suggested legacy migrator(s) run:\n" + f"\t{cyan(f'ado upgrade {resource_type.value} {migrator_args}')}\n" + ) + + # Show note about automatic dependency resolution + if len(ordered_migrators) > len(migrators): + console_print( + "[dim]Note: Additional migrators were included to satisfy dependencies[/dim]\n" + ) + + console_print( + f"{HINT}To list all legacy migrators run:\n" + f"\t{cyan(f'ado upgrade {resource_type.value} --list-legacy-migrators')}" + ) diff --git a/orchestrator/cli/utils/legacy/list.py b/orchestrator/cli/utils/legacy/list.py new file mode 100644 index 000000000..2255eed0b --- /dev/null +++ b/orchestrator/cli/utils/legacy/list.py @@ -0,0 +1,45 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Utilities for listing legacy migrators""" + +from orchestrator.cli.utils.output.prints import console_print +from orchestrator.core.legacy.registry import LegacyMigratorRegistry +from orchestrator.core.resources import CoreResourceKinds + + +def list_legacy_migrators(resource_type: CoreResourceKinds) -> None: + """List all available legacy migrators for a specific resource type + + Args: + resource_type: The resource type to list migrators for + """ + # Import migrators package to trigger registration via __init__.py + import orchestrator.core.legacy.migrators # noqa: F401 + + # Get migrators for this resource type + migrators = LegacyMigratorRegistry.get_migrators_for_resource(resource_type) + + if not migrators: + console_print( + f"\n[yellow]No legacy migrators available for {resource_type.value}[/yellow]\n" + ) + return + + # Resources can be referenced by their CoreResourceKinds value or by shorthands + # from cli_shorthands_to_cli_names in orchestrator/cli/utils/resources/mappings.py + resource_cli_name = resource_type.value + + console_print(f"Available legacy migrators for {resource_cli_name}s:\n") + + for i, migrator in enumerate(migrators, 1): + # Format and print migrator info with version information using the method + console_print( + migrator.format_info( + index=i, show_dependencies=True, show_version_info=False + ) + ) + console_print() # Add spacing between migrators + + +# Made with Bob diff --git a/orchestrator/cli/utils/resources/handlers.py b/orchestrator/cli/utils/resources/handlers.py index 4c9d0d9bd..2157c8b5a 100644 --- a/orchestrator/cli/utils/resources/handlers.py +++ b/orchestrator/cli/utils/resources/handlers.py @@ -1,7 +1,6 @@ # Copyright IBM Corporation 2025, 2026 # SPDX-License-Identifier: MIT - - +import logging import pathlib import typing @@ -23,6 +22,7 @@ ADO_SPINNER_QUERYING_DB, ADO_SPINNER_SAVING_TO_DB, ERROR, + INFO, SUCCESS, console_print, cyan, @@ -36,6 +36,8 @@ from orchestrator.metastore.base import ResourceDoesNotExistError from orchestrator.utilities.rich import dataframe_to_rich_table +logger = logging.getLogger(__name__) + if typing.TYPE_CHECKING: from orchestrator.cli.models.parameters import ( AdoGetCommandParameters, @@ -235,17 +237,269 @@ def handle_ado_upgrade( parameters: "AdoUpgradeCommandParameters", resource_type: "CoreResourceKinds", ) -> None: + """Upgrade resources, optionally applying legacy migrators + + Args: + parameters: Command parameters including legacy migrator options + resource_type: The type of resource to upgrade + """ + # Handle --list-legacy-migrators flag + if parameters.list_legacy_migrators: + from orchestrator.cli.utils.legacy.list import list_legacy_migrators + + list_legacy_migrators(resource_type) + return sql_store = get_sql_store( project_context=parameters.ado_configuration.project_context ) + + # Normal upgrade path without legacy migrators + if not parameters.apply_legacy_migrator: + + with Status(ADO_SPINNER_QUERYING_DB) as status: + try: + resources = sql_store.getResourcesOfKind( + kind=resource_type.value, ignore_validation_errors=False + ) + except ValueError as err: + status.stop() + # Validation error occurred - check if legacy migrators can help + _handle_upgrade_validation_error(err, resource_type, parameters) + raise typer.Exit(1) from err + + for idx, resource in enumerate(resources.values()): + status.update( + ADO_SPINNER_SAVING_TO_DB + f" ({idx + 1}/{len(resources)})" + ) + sql_store.updateResource(resource=resource) + + console_print(SUCCESS) + return + + # The user has requested legacy migrators + legacy_migrators = None + # Import migrators package to trigger registration via __init__.py + import orchestrator.core.legacy.migrators # noqa: F401 + from orchestrator.core.legacy.registry import LegacyMigratorRegistry + + # Validate all migrator IDs exist and match resource type + invalid_migrators = [] + mismatched_migrators = [] + for migrator_id in parameters.apply_legacy_migrator: + migrator = LegacyMigratorRegistry.get_migrator(migrator_id) + if migrator is None: + invalid_migrators.append(migrator_id) + elif migrator.resource_type != resource_type: + mismatched_migrators.append( + (migrator_id, migrator.resource_type, resource_type) + ) + + if invalid_migrators: + console_print( + f"{ERROR}Unknown legacy migrator(s): {', '.join(invalid_migrators)}", + stderr=True, + ) + raise typer.Exit(1) + + if mismatched_migrators: + for migrator_id, migrator_type, expected_type in mismatched_migrators: + console_print( + f"{ERROR}Validator '{migrator_id}' is for {migrator_type.value} resources, " + f"but you are upgrading {expected_type.value} resources", + stderr=True, + ) + raise typer.Exit(1) + + # Resolve dependencies and order migrators + try: + ordered_ids, missing_deps = LegacyMigratorRegistry.resolve_dependencies( + parameters.apply_legacy_migrator + ) + + if missing_deps: + console_print( + f"{ERROR}Missing migrator dependencies: {', '.join(missing_deps)}", + stderr=True, + ) + raise typer.Exit(1) + + # Get migrators in correct order + legacy_migrators = [] + for migrator_id in ordered_ids: + migrator = LegacyMigratorRegistry.get_migrator(migrator_id) + if migrator is not None: + legacy_migrators.append(migrator) + + # Log the ordering + if len(ordered_ids) > len(parameters.apply_legacy_migrator): + logger.info( + f"Auto-included dependencies: {[vid for vid in ordered_ids if vid not in parameters.apply_legacy_migrator]}" + ) + + if not legacy_migrators: + console_print( + f"{ERROR}No migrators were found using the provided identifiers" + ) + raise typer.Exit(1) + + logger.debug( + f"Validators in execution order: {[v.identifier for v in legacy_migrators]}" + ) + + except ValueError as e: + # Circular dependency detected + console_print(f"{ERROR}{e}", stderr=True) + raise typer.Exit(1) from e + + # Import resource class mapping for validation + from orchestrator.core import kindmap + + # When legacy migrators are specified, work with raw data with Status(ADO_SPINNER_QUERYING_DB) as status: - resources = sql_store.getResourcesOfKind( - kind=resource_type.value, + + identifiers = sql_store.getResourceIdentifiersOfKind(kind=resource_type.value) + + # Phase 1: Collect and validate all migrations (transaction safety) + # Validate all resources before saving any to ensure atomicity + migrations = [] + resource_class = kindmap[resource_type.value] + + for idx, identifier in enumerate(identifiers["IDENTIFIER"]): + status.update( + ADO_SPINNER_QUERYING_DB + + f" - Validating ({idx + 1}/{len(identifiers)})" + ) + + # Get raw data + resource_dict = sql_store.getResourceRaw(identifier) + if resource_dict is None: + continue + + # Apply legacy migrators + try: + for migrator in legacy_migrators: + logger.debug( + f"Applying migrator: {migrator.identifier} to {identifier}" + ) + resource_dict = migrator.migrator_function(resource_dict) + logger.debug( + f"Validator {migrator.identifier} completed for {identifier}" + ) + + # Validate the migrated resource (don't save yet) + resource = resource_class.model_validate(resource_dict) + migrations.append((identifier, resource)) + + except Exception as e: + logger.error(f"Migration failed for {identifier}: {e}") + console_print( + f"{ERROR}Migration validation failed for {identifier}: {e}", + stderr=True, + ) + console_print( + f"{INFO}No resources were modified (all-or-nothing transaction safety)", + stderr=True, + ) + raise typer.Exit(1) from e + + # Phase 2: All validations passed, now save all resources + logger.info( + f"All {len(migrations)} resources validated successfully, applying changes..." ) - for idx, resource in enumerate(resources.values()): - status.update(ADO_SPINNER_SAVING_TO_DB + f" ({idx +1 }/{len(resources)})") - sql_store.updateResource(resource=resource) + for idx, (identifier, migrated_resource) in enumerate(migrations): + status.update(ADO_SPINNER_SAVING_TO_DB + f" ({idx + 1}/{len(migrations)})") + + try: + sql_store.updateResource(resource=migrated_resource) + except Exception as e: + logger.error(f"Failed to save {identifier}: {e}") + console_print( + f"{ERROR}Failed to save {identifier}. Database may be in inconsistent state.", + stderr=True, + ) + console_print( + f"{ERROR}Manual intervention may be required to restore consistency.", + stderr=True, + ) + raise typer.Exit(1) from e console_print(SUCCESS) + + +def _handle_upgrade_validation_error( + error: ValueError, + resource_type: "CoreResourceKinds", + parameters: "AdoUpgradeCommandParameters", +) -> None: + """Handle validation errors during upgrade by suggesting legacy migrators + + Analyzes the validation error to extract deprecated field names, finds + applicable legacy migrators, and displays helpful suggestions to the user. + + Args: + error: The ValueError containing validation error details + resource_type: The type of resource being upgraded + parameters: The upgrade command parameters + """ + from rich.console import Console + + from orchestrator.cli.utils.legacy.common import ( + extract_deprecated_field_paths, + print_migrator_suggestions_with_dependencies, + ) + from orchestrator.core.legacy.registry import LegacyMigratorRegistry + + console = Console() + + # Import migrators package to trigger registration via __init__.py + import orchestrator.core.legacy.migrators # noqa: F401 + + # Extract field paths and error details from the error + deprecated_field_paths, field_errors = extract_deprecated_field_paths( + error, resource_type + ) + + # Find applicable legacy migrators using full field paths for precise matching + migrators = [] + if deprecated_field_paths: + migrators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + resource_type=resource_type, + deprecated_field_paths=deprecated_field_paths, + ) + + # If no migrators found by field path matching, get all migrators for this resource type + if not migrators: + migrators = LegacyMigratorRegistry.get_migrators_for_resource(resource_type) + + # Display error message + console.print( + f"\n[bold red]Validation Error[/bold red] while upgrading {resource_type.value} resources" + ) + console.print( + "\n[yellow]Some resources could not be loaded due to validation errors.[/yellow]" + ) + + if deprecated_field_paths: + console.print( + f"\n[bold]Fields with validation errors:[/bold] [yellow]{len(deprecated_field_paths)} field(s)[/yellow]" + ) + # Show detailed error messages for each field path + console.print("\n[bold]Error details:[/bold]") + for field_path in sorted(deprecated_field_paths): + console.print(f" • [cyan]{field_path}[/cyan]:") + for error_msg in field_errors.get(field_path, []): + console.print(f" - {error_msg}") + + if migrators: + print_migrator_suggestions_with_dependencies( + migrators=migrators, resource_type=resource_type + ) + else: + console.print( + "\n[yellow]No legacy migrators are available for this resource type.[/yellow]" + ) + console.print("The resources may be too old or require manual intervention.") + + console.print() diff --git a/orchestrator/core/legacy/__init__.py b/orchestrator/core/legacy/__init__.py new file mode 100644 index 000000000..ba94e666e --- /dev/null +++ b/orchestrator/core/legacy/__init__.py @@ -0,0 +1,18 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrator system for handling deprecated resource formats""" + +from orchestrator.core.legacy.metadata import LegacyMigratorMetadata +from orchestrator.core.legacy.registry import ( + LegacyMigratorRegistry, + legacy_migrator, +) + +__all__ = [ + "LegacyMigratorMetadata", + "LegacyMigratorRegistry", + "legacy_migrator", +] + +# Made with Bob diff --git a/orchestrator/core/legacy/metadata.py b/orchestrator/core/legacy/metadata.py new file mode 100644 index 000000000..831fec046 --- /dev/null +++ b/orchestrator/core/legacy/metadata.py @@ -0,0 +1,124 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Metadata models for legacy migrators""" + +from collections.abc import Callable +from typing import Annotated + +import pydantic + +from orchestrator.core.resources import CoreResourceKinds + + +class LegacyMigratorMetadata(pydantic.BaseModel): + """Metadata for a legacy migrator function""" + + identifier: Annotated[ + str, + pydantic.Field( + description="Unique identifier for this migrator (e.g., 'csv_constitutive_columns_migration')" + ), + ] + + resource_type: Annotated[ + CoreResourceKinds, + pydantic.Field(description="Resource type this migrator applies to"), + ] + + deprecated_from_version: Annotated[ + str, + pydantic.Field(description="ADO version when these fields were deprecated"), + ] + + removed_from_version: Annotated[ + str, + pydantic.Field(description="ADO version when automatic upgrade was removed"), + ] + + description: Annotated[ + str, + pydantic.Field( + description="Human-readable description of what this migrator does" + ), + ] + + migrator_function: Annotated[ + Callable[[dict], dict], + pydantic.Field( + description="The actual migration function", + exclude=True, # Don't serialize the function + ), + ] + + deprecated_field_paths: Annotated[ + list[str], + pydantic.Field( + description="Explicit paths to fields (e.g., 'config.properties', 'config.specification.moduleType')" + ), + ] + + dependencies: Annotated[ + list[str], + pydantic.Field( + default_factory=list, + description="List of migrator identifiers that must run before this migrator", + ), + ] + + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + def format_info( + self, + index: int | None = None, + show_dependencies: bool = True, + show_version_info: bool = False, + ) -> str: + """Format migrator information as a string + + Args: + index: Optional index number to display (e.g., "1." for execution order) + show_dependencies: Whether to show dependency information + show_version_info: Whether to show version information + + Returns: + Formatted string with migrator information + """ + from orchestrator.core.legacy.registry import LegacyMigratorRegistry + + lines = [] + + # Migrator identifier with optional index + if index is not None: + lines.append(f" {index}. [green]{self.identifier}[/green]") + else: + lines.append(f"[green]{self.identifier}[/green]") + + # Description + lines.append(f" {self.description}") + + # Field paths + lines.append(f" Handles: {', '.join(self.deprecated_field_paths)}") + + # Dependencies + if show_dependencies and self.dependencies: + dep_names = [] + for dep_id in self.dependencies: + dep_migrator = LegacyMigratorRegistry.get_migrator(dep_id) + if dep_migrator: + dep_names.append(dep_migrator.identifier) + else: + dep_names.append(f"{dep_id} [red](missing)[/red]") + lines.append(f" Depends on: {', '.join(dep_names)}") + + # Version information + if show_version_info: + lines.append( + f" Deprecated from: [cyan]{self.deprecated_from_version}[/cyan]" + ) + lines.append(f" Removed from: [cyan]{self.removed_from_version}[/cyan]") + + return "\n".join(lines) + + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/__init__.py b/orchestrator/core/legacy/migrators/__init__.py new file mode 100644 index 000000000..a5d2ad412 --- /dev/null +++ b/orchestrator/core/legacy/migrators/__init__.py @@ -0,0 +1,16 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrators for deprecated resource formats""" + +# Import all migrator subpackages to trigger registration +from orchestrator.core.legacy.migrators import ( + discoveryspace, + operation, + resource, + samplestore, +) + +__all__ = ["discoveryspace", "operation", "resource", "samplestore"] + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/discoveryspace/__init__.py b/orchestrator/core/legacy/migrators/discoveryspace/__init__.py new file mode 100644 index 000000000..120370adb --- /dev/null +++ b/orchestrator/core/legacy/migrators/discoveryspace/__init__.py @@ -0,0 +1,13 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrators for discovery space migrations""" + +from orchestrator.core.legacy.migrators.discoveryspace import ( + entitysource_to_samplestore, + properties_field_removal, +) + +__all__ = ["entitysource_to_samplestore", "properties_field_removal"] + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/discoveryspace/entitysource_to_samplestore.py b/orchestrator/core/legacy/migrators/discoveryspace/entitysource_to_samplestore.py new file mode 100644 index 000000000..a48003a10 --- /dev/null +++ b/orchestrator/core/legacy/migrators/discoveryspace/entitysource_to_samplestore.py @@ -0,0 +1,60 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrator for renaming entitySourceIdentifier to sampleStoreIdentifier""" + +from orchestrator.core.legacy.registry import legacy_migrator +from orchestrator.core.legacy.utils import ( + get_nested_value, + remove_nested_field, + set_nested_value, +) +from orchestrator.core.resources import CoreResourceKinds + + +@legacy_migrator( + identifier="discoveryspace_entitysource_to_samplestore", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.entitySourceIdentifier"], + deprecated_from_version="0.9.6", + removed_from_version="1.0.0", + description="Renames 'entitySourceIdentifier' to 'sampleStoreIdentifier' in discovery space configurations", +) +def rename_entitysource_identifier(data: dict) -> dict: + """Rename entitySourceIdentifier to sampleStoreIdentifier + + The 'entitySourceIdentifier' field was renamed to 'sampleStoreIdentifier' in config. + This validator operates only on the config level, matching the original + pydantic validator behavior. + + Old format: + config: + entitySourceIdentifier: "store-id" + + New format: + config: + sampleStoreIdentifier: "store-id" + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + + if not isinstance(data, dict): + return data + + old_path = "config.entitySourceIdentifier" + new_path = "config.sampleStoreIdentifier" + + # Get the old value if it exists + old_value = get_nested_value(data, old_path) + if old_value is not None: + set_nested_value(data, new_path, old_value) + remove_nested_field(data, old_path) + + return data + + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/discoveryspace/properties_field_removal.py b/orchestrator/core/legacy/migrators/discoveryspace/properties_field_removal.py new file mode 100644 index 000000000..2c2b4a32b --- /dev/null +++ b/orchestrator/core/legacy/migrators/discoveryspace/properties_field_removal.py @@ -0,0 +1,46 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrator for removing deprecated properties field from discovery spaces""" + +from orchestrator.core.legacy.registry import legacy_migrator +from orchestrator.core.legacy.utils import remove_nested_field +from orchestrator.core.resources import CoreResourceKinds + + +@legacy_migrator( + identifier="discoveryspace_properties_field_removal", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.properties"], + deprecated_from_version="0.10.1", + removed_from_version="1.0.0", + description="Removes the deprecated 'properties' field from discovery space configurations", +) +def remove_properties_field(data: dict) -> dict: + """Remove deprecated properties field from discovery space configuration + + The 'properties' field was deprecated in config and should be removed. + This validator operates only on the config level, matching the original + pydantic validator behavior. + + Old format: + config: + properties: [...] + + New format: + config: + # No properties field + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + if isinstance(data, dict): + remove_nested_field(data, "config.properties") + + return data + + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/operation/__init__.py b/orchestrator/core/legacy/migrators/operation/__init__.py new file mode 100644 index 000000000..deddd87cf --- /dev/null +++ b/orchestrator/core/legacy/migrators/operation/__init__.py @@ -0,0 +1,13 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrators for operation migrations""" + +from orchestrator.core.legacy.migrators.operation import ( + actuators_field_removal, + randomwalk_mode_to_sampler_config, +) + +__all__ = ["actuators_field_removal", "randomwalk_mode_to_sampler_config"] + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/operation/actuators_field_removal.py b/orchestrator/core/legacy/migrators/operation/actuators_field_removal.py new file mode 100644 index 000000000..fdfaf04f4 --- /dev/null +++ b/orchestrator/core/legacy/migrators/operation/actuators_field_removal.py @@ -0,0 +1,46 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrator for removing deprecated actuators field from operations""" + +from orchestrator.core.legacy.registry import legacy_migrator +from orchestrator.core.legacy.utils import remove_nested_field +from orchestrator.core.resources import CoreResourceKinds + + +@legacy_migrator( + identifier="operation_actuators_field_removal", + resource_type=CoreResourceKinds.OPERATION, + deprecated_field_paths=["config.actuators"], + deprecated_from_version="0.9.6", + removed_from_version="1.0.0", + description="Removes the deprecated 'actuators' field from operation configurations. See https://ibm.github.io/ado/resources/operation/#the-operation-configuration-yaml", +) +def remove_actuators_field(data: dict) -> dict: + """Remove deprecated actuators field from operation configuration + + The 'actuators' field was deprecated in config and should be removed. + This validator operates only on the config level, matching the original + pydantic validator behavior. + + Old format: + config: + actuators: [...] + + New format: + config: + # No actuators field (use actuator configurations instead) + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + if isinstance(data, dict): + remove_nested_field(data, "config.actuators") + + return data + + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/operation/randomwalk_mode_to_sampler_config.py b/orchestrator/core/legacy/migrators/operation/randomwalk_mode_to_sampler_config.py new file mode 100644 index 000000000..b5ee53293 --- /dev/null +++ b/orchestrator/core/legacy/migrators/operation/randomwalk_mode_to_sampler_config.py @@ -0,0 +1,68 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrator for migrating random_walk parameters to samplerConfig""" + +from orchestrator.core.legacy.registry import legacy_migrator +from orchestrator.core.legacy.utils import ( + get_nested_value, + has_nested_field, + remove_nested_field, + set_nested_value, +) +from orchestrator.core.resources import CoreResourceKinds + + +@legacy_migrator( + identifier="randomwalk_mode_to_sampler_config", + resource_type=CoreResourceKinds.OPERATION, + deprecated_field_paths=[ + "config.parameters.mode", + "config.parameters.grouping", + "config.parameters.samplerType", + ], + deprecated_from_version="1.0.1", + removed_from_version="1.2", + description="Migrates random_walk parameters from flat structure to nested 'samplerConfig'. See https://ibm.github.io/ado/operators/random-walk/#configuring-a-randomwalk", +) +def migrate_randomwalk_to_sampler_config(data: dict) -> dict: + """Migrate random_walk parameters to samplerConfig structure + + Old format: + - mode, grouping, samplerType at top level of parameters + + New format: + - These fields nested under samplerConfig + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + + if not isinstance(data, dict): + return data + + # Fields to migrate from top-level parameters to samplerConfig + fields_to_migrate = ["mode", "grouping", "samplerType"] + + sampler_config = {} + + # Extract and migrate each field + for field_name in fields_to_migrate: + field_path = f"config.parameters.{field_name}" + if has_nested_field(data, field_path): + field_value = get_nested_value(data, field_path) + if field_value is not None: + sampler_config[field_name] = field_value + remove_nested_field(data, field_path) + + # Only set samplerConfig if we found any fields to migrate + if sampler_config: + set_nested_value(data, "config.parameters.samplerConfig", sampler_config) + + return data + + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/resource/__init__.py b/orchestrator/core/legacy/migrators/resource/__init__.py new file mode 100644 index 000000000..6f29bcc8a --- /dev/null +++ b/orchestrator/core/legacy/migrators/resource/__init__.py @@ -0,0 +1,10 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrators for generic resource migrations""" + +from orchestrator.core.legacy.migrators.resource import entitysource_to_samplestore + +__all__ = ["entitysource_to_samplestore"] + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/resource/entitysource_to_samplestore.py b/orchestrator/core/legacy/migrators/resource/entitysource_to_samplestore.py new file mode 100644 index 000000000..bc30f501a --- /dev/null +++ b/orchestrator/core/legacy/migrators/resource/entitysource_to_samplestore.py @@ -0,0 +1,50 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrator for migrating entitysource kind to samplestore kind""" + +from orchestrator.core.legacy.registry import legacy_migrator +from orchestrator.core.legacy.utils import has_nested_field, set_nested_value +from orchestrator.core.resources import CoreResourceKinds + + +@legacy_migrator( + identifier="samplestore_kind_entitysource_to_samplestore", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["kind"], + deprecated_from_version="0.9.6", + removed_from_version="1.0.0", + description="Converts resource kind from 'entitysource' to 'samplestore'", + dependencies=[ + "samplestore_module_type_entitysource_to_samplestore", + "samplestore_module_class_entitysource_to_samplestore", + "samplestore_module_name_entitysource_to_samplestore", + ], +) +def migrate_entitysource_kind_to_samplestore(data: dict) -> dict: + """Migrate old entitysource kind to samplestore + + Old format: + kind: "entitysource" + + New format: + kind: "samplestore" + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + + if not isinstance(data, dict): + return data + + # Check if this is an entitysource that needs migration + if has_nested_field(data, "kind") and data.get("kind") == "entitysource": + set_nested_value(data, "kind", "samplestore") + + return data + + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/samplestore/__init__.py b/orchestrator/core/legacy/migrators/samplestore/__init__.py new file mode 100644 index 000000000..5951620be --- /dev/null +++ b/orchestrator/core/legacy/migrators/samplestore/__init__.py @@ -0,0 +1,18 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrators for sample store migrations""" + +from orchestrator.core.legacy.migrators.samplestore import ( + entitysource_migrations, + gt4sd_transformer_migration, + v1_to_v2_csv_migration, +) + +__all__ = [ + "entitysource_migrations", + "gt4sd_transformer_migration", + "v1_to_v2_csv_migration", +] + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/samplestore/entitysource_migrations.py b/orchestrator/core/legacy/migrators/samplestore/entitysource_migrations.py new file mode 100644 index 000000000..92f06614e --- /dev/null +++ b/orchestrator/core/legacy/migrators/samplestore/entitysource_migrations.py @@ -0,0 +1,290 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy validators for migrating entitysource to samplestore naming""" + +from orchestrator.core.legacy.registry import legacy_migrator +from orchestrator.core.legacy.utils import ( + get_nested_value, + has_nested_field, + set_nested_value, +) +from orchestrator.core.resources import CoreResourceKinds + + +@legacy_migrator( + identifier="samplestore_module_type_entitysource_to_samplestore", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=[ + "config.specification.module.moduleType", + "config.copyFrom.0.module.moduleType", + ], + deprecated_from_version="0.9.6", + removed_from_version="1.0.0", + description="Converts moduleType value from 'entity_source' to 'sample_store'", +) +def migrate_module_type(data: dict) -> dict: + """Convert moduleType from entity_source to sample_store + + This validator checks for moduleType field within config.specification.module + and config.copyFrom[].module, converting them from 'entity_source' to 'sample_store'. + + Old format: + config: + specification: + module: + moduleType: "entity_source" + copyFrom: + - module: + moduleType: "entity_source" + + New format: + config: + specification: + module: + moduleType: "sample_store" + copyFrom: + - module: + moduleType: "sample_store" + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + + if not isinstance(data, dict): + return data + + # Update config.specification.module.moduleType + if has_nested_field(data, "config.specification.module.moduleType"): + module_type = get_nested_value(data, "config.specification.module.moduleType") + if module_type == "entity_source": + set_nested_value( + data, "config.specification.module.moduleType", "sample_store" + ) + + # Update config.copyFrom[].module.moduleType + if has_nested_field(data, "config.copyFrom"): + copy_from = get_nested_value(data, "config.copyFrom") + if isinstance(copy_from, list): + for item in copy_from: + if isinstance(item, dict) and has_nested_field( + item, "module.moduleType" + ): + module_type = get_nested_value(item, "module.moduleType") + if module_type == "entity_source": + set_nested_value(item, "module.moduleType", "sample_store") + + return data + + +@legacy_migrator( + identifier="samplestore_module_class_entitysource_to_samplestore", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=[ + "config.specification.module.moduleClass", + "config.copyFrom.0.module.moduleClass", + ], + deprecated_from_version="0.9.6", + removed_from_version="1.0.0", + description="Converts moduleClass values from EntitySource to SampleStore naming (CSVEntitySource -> CSVSampleStore, SQLEntitySource -> SQLSampleStore)", +) +def migrate_module_class(data: dict) -> dict: + """Convert moduleClass from EntitySource to SampleStore naming + + This validator checks for moduleClass field within config.specification.module + and config.copyFrom[].module, converting them from EntitySource to SampleStore naming. + + Old format: + config: + specification: + module: + moduleClass: "CSVEntitySource" or "SQLEntitySource" + copyFrom: + - module: + moduleClass: "CSVEntitySource" + + New format: + config: + specification: + module: + moduleClass: "CSVSampleStore" or "SQLSampleStore" + copyFrom: + - module: + moduleClass: "CSVSampleStore" + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + + if not isinstance(data, dict): + return data + + value_mappings = { + "CSVEntitySource": "CSVSampleStore", + "SQLEntitySource": "SQLSampleStore", + } + + # Update config.specification.module.moduleClass + if has_nested_field(data, "config.specification.module.moduleClass"): + module_class = get_nested_value(data, "config.specification.module.moduleClass") + if isinstance(module_class, str) and module_class in value_mappings: + set_nested_value( + data, + "config.specification.module.moduleClass", + value_mappings[module_class], + ) + + # Update config.copyFrom[].module.moduleClass + if has_nested_field(data, "config.copyFrom"): + copy_from = get_nested_value(data, "config.copyFrom") + if isinstance(copy_from, list): + for item in copy_from: + if isinstance(item, dict) and has_nested_field( + item, "module.moduleClass" + ): + module_class = get_nested_value(item, "module.moduleClass") + if isinstance(module_class, str) and module_class in value_mappings: + set_nested_value( + item, "module.moduleClass", value_mappings[module_class] + ) + + return data + + +@legacy_migrator( + identifier="samplestore_module_name_entitysource_to_samplestore", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=[ + "config.specification.module.moduleName", + "config.copyFrom.0.module.moduleName", + ], + deprecated_from_version="0.9.6", + removed_from_version="1.0.0", + description="Updates module paths from entitysource to samplestore (orchestrator.core.entitysource -> orchestrator.core.samplestore)", +) +def migrate_module_name(data: dict) -> dict: + """Convert moduleName paths from entitysource to samplestore + + This validator checks for moduleName field within config.specification.module + and config.copyFrom[].module, converting paths from entitysource to samplestore + using substring replacement. + + Migrates any path containing the old module names: + config: + specification: + module: + moduleName: "orchestrator.core.entitysource" + -> "orchestrator.core.samplestore" + + moduleName: "orchestrator.plugins.entitysources" + -> "orchestrator.plugins.samplestores" + + moduleName: "orchestrator.core.entitysource.csv" + -> "orchestrator.core.samplestore.csv" + copyFrom: + - module: + moduleName: "orchestrator.core.entitysource.sql" + -> "orchestrator.core.samplestore.sql" + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + + if not isinstance(data, dict): + return data + + path_mappings = { + "orchestrator.core.entitysource": "orchestrator.core.samplestore", + "orchestrator.plugins.entitysources": "orchestrator.plugins.samplestores", + } + + # Update config.specification.module.moduleName + if has_nested_field(data, "config.specification.module.moduleName"): + module_name = get_nested_value(data, "config.specification.module.moduleName") + if isinstance(module_name, str): + for old_path, new_path in path_mappings.items(): + if old_path in module_name: + set_nested_value( + data, + "config.specification.module.moduleName", + module_name.replace(old_path, new_path), + ) + break + + # Update config.copyFrom[].module.moduleName + if has_nested_field(data, "config.copyFrom"): + copy_from = get_nested_value(data, "config.copyFrom") + if isinstance(copy_from, list): + for item in copy_from: + if isinstance(item, dict) and has_nested_field( + item, "module.moduleName" + ): + module_name = get_nested_value(item, "module.moduleName") + if isinstance(module_name, str): + for old_path, new_path in path_mappings.items(): + if old_path in module_name: + set_nested_value( + item, + "module.moduleName", + module_name.replace(old_path, new_path), + ) + break + + return data + + +@legacy_migrator( + identifier="samplestore_remove_specification_storage_location", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=[ + "config.specification.storageLocation", + "config.copyFrom.0.storageLocation", + ], + deprecated_from_version="0.9.6", + removed_from_version="1.0.0", + description="Removes deprecated config.specification.storageLocation field", +) +def remove_specification_storage_location(data: dict) -> dict: + """Remove deprecated config.specification.storageLocation field + + The storageLocation field was moved from config.specification to the top level + of the specification. This validator removes the old nested location. + + Old format: + config: + specification: + storageLocation: {...} + + New format: + config: + specification: + # storageLocation removed from here + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + + if not isinstance(data, dict): + return data + + # Remove config.specification.storageLocation if it exists + from orchestrator.core.legacy.utils import remove_nested_field + + remove_nested_field(data, "config.specification.storageLocation") + + return data + + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/samplestore/gt4sd_transformer_migration.py b/orchestrator/core/legacy/migrators/samplestore/gt4sd_transformer_migration.py new file mode 100644 index 000000000..25947bfd6 --- /dev/null +++ b/orchestrator/core/legacy/migrators/samplestore/gt4sd_transformer_migration.py @@ -0,0 +1,135 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrator for migrating GT4SDTransformer to CSVSampleStore""" + +from orchestrator.core.legacy.registry import legacy_migrator +from orchestrator.core.legacy.utils import ( + get_nested_value, + has_nested_field, + set_nested_value, +) +from orchestrator.core.resources import CoreResourceKinds + + +@legacy_migrator( + identifier="samplestore_gt4sd_transformer_to_csv", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=[ + "config.copyFrom.0.module.moduleClass", + "config.copyFrom.0.module.moduleName", + ], + deprecated_from_version="1.3.5", + removed_from_version="1.6.0", + description="Converts GT4SDTransformer plugin to CSVSampleStore with explicit parameters", +) +def migrate_gt4sd_transformer_to_csv(data: dict) -> dict: + """Migrate GT4SDTransformer plugin usage to CSVSampleStore with explicit parameters + + The GT4SDTransformer class was a thin wrapper around CSVSampleStore that + automatically filled in parameters. This validator converts configurations + using GT4SDTransformer to use CSVSampleStore directly with explicit parameters. + + Old format: + config: + copyFrom: + - module: + moduleClass: GT4SDTransformer + moduleName: orchestrator.plugins.samplestores.gt4sd + parameters: + generatorIdentifier: 'gt4sd-pfas-transformer-model-one' + + New format: + config: + copyFrom: + - module: + moduleClass: CSVSampleStore + moduleName: orchestrator.core.samplestore.csv + parameters: + generatorIdentifier: 'gt4sd-pfas-transformer-model-one' + identifierColumn: 'smiles' + experiments: + - experimentIdentifier: 'transformer-toxicity-inference-experiment' + observedPropertyMap: + logws: GenLogws + logd: GenLogd + loghl: GenLoghl + pka: GenPka + "biodegradation halflife": GenBiodeg + bcf: GenBcf + ld50: GenLd50 + scscore: GenScscore + constitutivePropertyMap: [smiles] + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + + if not isinstance(data, dict): + return data + + # Property map from the old GT4SDTransformer class + property_map = { + "logws": "GenLogws", + "logd": "GenLogd", + "loghl": "GenLoghl", + "pka": "GenPka", + "biodegradation halflife": "GenBiodeg", + "bcf": "GenBcf", + "ld50": "GenLd50", + "scscore": "GenScscore", + } + + # Check config.copyFrom array for GT4SDTransformer usage + if has_nested_field(data, "config.copyFrom"): + copy_from = get_nested_value(data, "config.copyFrom") + if isinstance(copy_from, list): + for item in copy_from: + if not isinstance(item, dict): + continue + + # Check if this is a GT4SDTransformer module + module_class = get_nested_value(item, "module.moduleClass") + module_name = get_nested_value(item, "module.moduleName") + + if ( + module_class == "GT4SDTransformer" + and module_name == "orchestrator.plugins.samplestores.gt4sd" + ): + # Update module class and name + set_nested_value(item, "module.moduleClass", "CSVSampleStore") + set_nested_value( + item, "module.moduleName", "orchestrator.core.samplestore.csv" + ) + + # Add explicit parameters that GT4SDTransformer provided automatically + if not has_nested_field(item, "parameters"): + set_nested_value(item, "parameters", {}) + + parameters = get_nested_value(item, "parameters") + + # Add identifierColumn if not present + if ( + isinstance(parameters, dict) + and "identifierColumn" not in parameters + ): + set_nested_value(item, "parameters.identifierColumn", "smiles") + + # Add experiments configuration if not present + if isinstance(parameters, dict) and "experiments" not in parameters: + experiment_config = { + "experimentIdentifier": "transformer-toxicity-inference-experiment", + "observedPropertyMap": property_map, + "constitutivePropertyMap": ["smiles"], + } + set_nested_value( + item, "parameters.experiments", [experiment_config] + ) + + return data + + +# Made with Bob diff --git a/orchestrator/core/legacy/migrators/samplestore/v1_to_v2_csv_migration.py b/orchestrator/core/legacy/migrators/samplestore/v1_to_v2_csv_migration.py new file mode 100644 index 000000000..884474829 --- /dev/null +++ b/orchestrator/core/legacy/migrators/samplestore/v1_to_v2_csv_migration.py @@ -0,0 +1,75 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Legacy migrator for migrating CSV sample stores from v1 to v2 format""" + +from orchestrator.core.legacy.registry import legacy_migrator +from orchestrator.core.legacy.utils import get_nested_value, has_nested_field +from orchestrator.core.resources import CoreResourceKinds + + +@legacy_migrator( + identifier="csv_constitutive_columns_migration", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=[ + "config.constitutivePropertyColumns", + "config.experiments", + ], + deprecated_from_version="1.3.5", + removed_from_version="1.6.0", + description="Migrates CSV sample stores from v1 format (constitutivePropertyColumns in config) to v2 format (per-experiment constitutivePropertyMap)", + dependencies=["samplestore_kind_entitysource_to_samplestore"], +) +def migrate_csv_v1_to_v2(data: dict) -> dict: + """Migrate old CSVSampleStoreDescription format to new format + + This validator operates on the config section of the CSV sample store, + migrating from v1 to v2 format. It matches the original pydantic + validator behavior. + + Old format (in config): + constitutivePropertyColumns: [...] + experiments: + - propertyMap: {...} + + New format (in config): + # No constitutivePropertyColumns + experiments: + - observedPropertyMap: {...} + constitutivePropertyMap: [...] + + Args: + data: The resource data dictionary + + Returns: + The migrated resource data dictionary + """ + + if not isinstance(data, dict): + return data + + # Check if this is old format (has constitutivePropertyColumns in config) + if not has_nested_field(data, "config.constitutivePropertyColumns"): + return data + + # Get config value + config = get_nested_value(data, "config") + if config is None or not isinstance(config, dict): + return data + + constitutive_columns = config.pop("constitutivePropertyColumns") + # Migrate experiments if present in config + experiments = config.get("experiments") + if isinstance(experiments, list): + for exp in experiments: + if isinstance(exp, dict): + # Rename propertyMap to observedPropertyMap + if "propertyMap" in exp: + exp["observedPropertyMap"] = exp.pop("propertyMap") + # Add constitutivePropertyMap from config-level constitutivePropertyColumns + exp["constitutivePropertyMap"] = constitutive_columns + + return data + + +# Made with Bob diff --git a/orchestrator/core/legacy/registry.py b/orchestrator/core/legacy/registry.py new file mode 100644 index 000000000..3e88b3091 --- /dev/null +++ b/orchestrator/core/legacy/registry.py @@ -0,0 +1,241 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Registry for legacy migrators that have been removed from active code""" + +from collections.abc import Callable +from functools import wraps +from typing import ClassVar + +from orchestrator.core.legacy.metadata import LegacyMigratorMetadata +from orchestrator.core.resources import CoreResourceKinds + + +class LegacyMigratorRegistry: + """Registry for legacy migrators that have been removed from active code""" + + _migrators: ClassVar[dict[str, LegacyMigratorMetadata]] = {} + + @classmethod + def register(cls, metadata: LegacyMigratorMetadata) -> None: + """Register a legacy migrator + + Args: + metadata: The migrator metadata to register + """ + cls._migrators[metadata.identifier] = metadata + + @classmethod + def get_migrator(cls, identifier: str) -> LegacyMigratorMetadata | None: + """Get a specific migrator by identifier + + Args: + identifier: The unique identifier of the migrator + + Returns: + The migrator metadata if found, None otherwise + """ + return cls._migrators.get(identifier) + + @classmethod + def get_migrators_for_resource( + cls, resource_type: CoreResourceKinds + ) -> list[LegacyMigratorMetadata]: + """Get all migrators for a specific resource type + + Args: + resource_type: The resource type to filter by + + Returns: + List of migrator metadata for the specified resource type + """ + return [v for v in cls._migrators.values() if v.resource_type == resource_type] + + @classmethod + def find_migrators_for_deprecated_field_paths( + cls, + resource_type: CoreResourceKinds, + deprecated_field_paths: set[str], + ) -> list[LegacyMigratorMetadata]: + """Find migrators that handle specific field paths + + Matches migrators based on their declared field_paths, providing + more precise matching than deprecated_fields (leaf names). + + Args: + resource_type: The resource type to filter by + deprecated_field_paths: Set of full dotted paths (e.g., 'config.properties') + + Returns: + List of migrator metadata that handle any of the specified paths + """ + return [ + v + for v in cls.get_migrators_for_resource(resource_type) + if any(path in v.deprecated_field_paths for path in deprecated_field_paths) + ] + + @classmethod + def list_all(cls) -> list[LegacyMigratorMetadata]: + """List all registered migrators + + Returns: + List of all registered migrator metadata + """ + return list(cls._migrators.values()) + + @classmethod + def resolve_dependencies( + cls, migrator_ids: list[str] + ) -> tuple[list[str], list[str]]: + """Resolve migrator dependencies and return ordered list + + Uses topological sort to order migrators based on their dependencies. + Detects circular dependencies. Automatically includes all transitive + dependencies. + + Args: + migrator_ids: List of migrator identifiers to order + + Returns: + Tuple of (ordered_migrator_ids, missing_dependencies) + - ordered_migrator_ids: Migrators in dependency order (includes all dependencies) + - missing_dependencies: List of dependency IDs that don't exist + + Raises: + ValueError: If circular dependencies are detected + """ + # Build dependency graph - recursively add all dependencies + graph: dict[str, list[str]] = {} + in_degree: dict[str, int] = {} + missing_deps: set[str] = set() + to_process = list(migrator_ids) + processed = set() + + while to_process: + vid = to_process.pop(0) + if vid in processed: + continue + processed.add(vid) + + migrator = cls.get_migrator(vid) + if migrator is None: + continue + + # Initialize this migrator in the graph + if vid not in graph: + graph[vid] = [] + in_degree[vid] = 0 + + # Process dependencies + for dep_id in migrator.dependencies: + if dep_id not in cls._migrators: + missing_deps.add(dep_id) + continue + + # Add dependency to graph if not already there + if dep_id not in graph: + graph[dep_id] = [] + in_degree[dep_id] = 0 + # Add to processing queue to handle transitive dependencies + to_process.append(dep_id) + + # Add edge from dependency to dependent + if vid not in graph[dep_id]: + graph[dep_id].append(vid) + + # Calculate in-degrees + for vid in graph: + migrator = cls.get_migrator(vid) + if migrator: + for dep_id in migrator.dependencies: + if dep_id in graph: + in_degree[vid] += 1 + + # Topological sort using Kahn's algorithm + queue = [vid for vid in graph if in_degree[vid] == 0] + ordered = [] + + while queue: + # Sort queue for deterministic ordering + queue.sort() + current = queue.pop(0) + ordered.append(current) + + # Reduce in-degree for dependents + for dependent in graph[current]: + in_degree[dependent] -= 1 + if in_degree[dependent] == 0: + queue.append(dependent) + + # Check for circular dependencies + if len(ordered) != len(graph): + remaining = [vid for vid in graph if vid not in ordered] + raise ValueError( + f"Circular dependency detected among migrators: {', '.join(remaining)}" + ) + + return ordered, list(missing_deps) + + +def legacy_migrator( + identifier: str, + resource_type: CoreResourceKinds, + deprecated_field_paths: list[str], + deprecated_from_version: str, + removed_from_version: str, + description: str, + dependencies: list[str] | None = None, +) -> Callable[[Callable[[dict], dict]], Callable[[dict], dict]]: + """Decorator to register a legacy migrator function + + Args: + identifier: Unique identifier for this migrator + resource_type: Resource type this migrator applies to + deprecated_field_paths: Explicit paths to fields (e.g., 'config.properties', 'config.specification.moduleType') + deprecated_from_version: ADO version when these fields were deprecated + removed_from_version: ADO version when automatic upgrade was removed + description: Human-readable description of what this migrator does + dependencies: Optional list of migrator identifiers that must run before this one + + Returns: + Decorator function that registers the migrator + + Example: + @legacy_migrator( + identifier="csv_constitutive_columns_migration", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.constitutivePropertyColumns", "config.experiments"], + deprecated_from_version="1.3.5", + removed_from_version="1.6.0", + description="Migrates CSV sample stores from v1 to v2 format", + dependencies=["samplestore_kind_entitysource_to_samplestore"] + ) + def migrate_csv_v1_to_v2(data: dict) -> dict: + # Migration logic here + return data + """ + + def decorator(func: Callable[[dict], dict]) -> Callable[[dict], dict]: + metadata = LegacyMigratorMetadata( + identifier=identifier, + resource_type=resource_type, + deprecated_from_version=deprecated_from_version, + removed_from_version=removed_from_version, + description=description, + migrator_function=func, + deprecated_field_paths=deprecated_field_paths, + dependencies=dependencies or [], + ) + LegacyMigratorRegistry.register(metadata) + + @wraps(func) + def wrapper(*args, **kwargs): # noqa: ANN002, ANN003, ANN202 + return func(*args, **kwargs) + + return wrapper + + return decorator + + +# Made with Bob diff --git a/orchestrator/core/legacy/utils.py b/orchestrator/core/legacy/utils.py new file mode 100644 index 000000000..e2d59d692 --- /dev/null +++ b/orchestrator/core/legacy/utils.py @@ -0,0 +1,126 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Utility functions for legacy migrators""" + + +def get_parent_dict_and_key(data: dict, path: str) -> tuple[dict | None, str | None]: + """Navigate to a nested field path and return parent dict and field name + + This is a low-level helper used by set_nested_value, remove_nested_field, + and has_nested_field. For reading values, use get_nested_value instead. + + Args: + data: The data dictionary + path: Dot-separated path (e.g., "config.specification.module.moduleType") + + Returns: + Tuple of (parent_dict, field_name) or (None, None) if path doesn't exist + + Example: + parent, field = get_parent_dict_and_key(data, "config.properties") + if parent and field: + parent.pop(field, None) + """ + parts = path.split(".") + current = data + + # Navigate to parent + for part in parts[:-1]: + if not isinstance(current, dict) or part not in current: + return None, None + current = current[part] + + # Return parent dict and final field name + if isinstance(current, dict): + return current, parts[-1] + + return None, None + + +def get_nested_value(data: dict, path: str) -> object | None: + """Get the value at a nested field path + + Args: + data: The data dictionary + path: Dot-separated path (e.g., "config.specification.module.moduleType") + + Returns: + The value at the specified path, or None if path doesn't exist + + Example: + value = get_nested_value(data, "config.moduleType") + if value == "sample_store": + # Do something + """ + parent, field = get_parent_dict_and_key(data, path) + if parent is not None and field is not None and field in parent: + return parent[field] + return None + + +def set_nested_value(data: dict, path: str, value: object) -> bool: + """Set a value at a nested field path + + Args: + data: The data dictionary + path: Dot-separated path + value: Value to set + + Returns: + True if successful, False if path doesn't exist + + Example: + data = {"config": {"specification": {"module": {}}}} + set_nested_value(data, "config.specification.module.type", "sample_store") + # data is now {"config": {"specification": {"module": {"type": "sample_store"}}}} + """ + parent, field = get_parent_dict_and_key(data, path) + if parent is not None and field is not None: + parent[field] = value + return True + return False + + +def remove_nested_field(data: dict, path: str) -> bool: + """Remove a field at a nested path + + Args: + data: The data dictionary + path: Dot-separated path + + Returns: + True if field was removed, False if path doesn't exist + + Example: + data = {"config": {"properties": ["a", "b"], "other": "value"}} + remove_nested_field(data, "config.properties") + # data is now {"config": {"other": "value"}} + """ + parent, field = get_parent_dict_and_key(data, path) + if parent is not None and field is not None and field in parent: + parent.pop(field) + return True + return False + + +def has_nested_field(data: dict, path: str) -> bool: + """Check if a nested field path exists + + Args: + data: The data dictionary + path: Dot-separated path + + Returns: + True if the field exists, False otherwise + + Example: + data = {"config": {"specification": {"module": {"moduleType": "test"}}}} + has_nested_field(data, "config.specification.module.moduleType") # Returns True + has_nested_field(data, "config.nonexistent") # Returns False + """ + parent, field = get_parent_dict_and_key(data, path) + return parent is not None and field is not None and field in parent + + +# Made with Bob diff --git a/orchestrator/core/samplestore/config.py b/orchestrator/core/samplestore/config.py index b284464d3..086bc30d2 100644 --- a/orchestrator/core/samplestore/config.py +++ b/orchestrator/core/samplestore/config.py @@ -76,7 +76,12 @@ def check_is_resource_location_subclass( def check_parameters_valid_for_sample_store_module( cls, parameters: dict, context: pydantic.ValidationInfo ) -> dict: - module = load_module_class_or_function(context.data["module"]) + + module_name = context.data.get("module") + if not module_name: + return parameters + + module = load_module_class_or_function(module_name) validated_parameters = module.validate_parameters(parameters=parameters) # Convert Pydantic model back to dict for serialization if isinstance(validated_parameters, pydantic.BaseModel): @@ -94,6 +99,11 @@ def set_correct_resource_location_class_for_sample_store_module( # However if None is passed explicitly, which would happen on a load of a module which had the "none" default # this method will be called if storageLocation is not None: + + module_name = context.data.get("module") + if not module_name: + return None + sample_store_class = load_module_class_or_function(context.data["module"]) storageLocationClass = sample_store_class.storage_location_class() # 24/04/2025 AP: diff --git a/orchestrator/metastore/base.py b/orchestrator/metastore/base.py index b9d7a9417..8a473b27d 100644 --- a/orchestrator/metastore/base.py +++ b/orchestrator/metastore/base.py @@ -342,6 +342,26 @@ def sample_store_load( storage_location: SQLiteStoreConfiguration | SQLStoreConfiguration, ) -> SampleStoreResource: """Adds storage location information to SQL sample stores""" + # Check for required keys in the nested structure + key_chain = ["config", "specification", "module", "moduleClass"] + current_dict = sample_store_resource_dict + + for i, key in enumerate(key_chain): + if not isinstance(current_dict, dict): + missing_path = ".".join(key_chain[:i]) + raise ValueError( + f"Invalid sample store resource structure: expected dictionary at '{missing_path}', " + f"but got {type(current_dict).__name__}" + ) + + if key not in current_dict: + missing_path = ".".join(key_chain[: i + 1]) + raise ValueError( + f"Invalid sample store resource structure: missing required key '{missing_path}'" + ) + + current_dict = current_dict[key] + if ( sample_store_resource_dict["config"]["specification"]["module"]["moduleClass"] == "SQLSampleStore" diff --git a/orchestrator/metastore/sql/statements.py b/orchestrator/metastore/sql/statements.py index 19a5e6689..ee6cfc560 100644 --- a/orchestrator/metastore/sql/statements.py +++ b/orchestrator/metastore/sql/statements.py @@ -352,7 +352,7 @@ def resource_upsert( r"ON DUPLICATE KEY UPDATE data = values(data)" ).bindparams( identifier=resource.identifier, - kind=resource.kind, + kind=resource.kind.value, version=resource.version, data=json_representation, ) diff --git a/tests/conftest.py b/tests/conftest.py index 124323d5a..01bb000a3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,8 @@ import pytest import ray +from orchestrator.core.legacy.registry import LegacyMigratorRegistry + from .fixtures.core.datacontainer import * from .fixtures.core.samplestore import * from .fixtures.core.generators import * @@ -51,3 +53,86 @@ def initialize_ray() -> Generator[None, None, None]: ) yield ray.shutdown() + + +@pytest.fixture(scope="session", autouse=True) +def session_legacy_migrators() -> dict: + """Load legacy migrators once per session and return a copy. + + This session-scoped fixture ensures migrators are loaded once at the start + of the test session and the registered migrators are saved. This copy can + then be used by test-scoped fixtures to reset the registry state. + + Returns: + A dictionary copy of all registered migrators + """ + # Import to trigger registration - this happens once per test session + import orchestrator.core.legacy.migrators # noqa: F401 + + # Return a copy of the registered migrators + return LegacyMigratorRegistry._migrators.copy() + + +@pytest.fixture +def isolated_legacy_migrator_registry() -> Generator[None, None, None]: + """Isolate the LegacyMigratorRegistry for each test. + + This fixture ensures that modifications to the registry in one test + do not affect other tests, even when running with pytest -n auto. + + The fixture: + 1. Saves the current registry state before the test + 2. Clears the registry for the test + 3. Restores the original state after the test + + Usage: + def test_something(isolated_legacy_migrator_registry): + # Registry starts empty + # Register validators as needed for this test + # Changes won't affect other tests + """ + # Save the current state + original_migrators = LegacyMigratorRegistry._migrators.copy() + + # Clear for this test + LegacyMigratorRegistry._migrators.clear() + + try: + yield + finally: + # Restore original state + LegacyMigratorRegistry._migrators = original_migrators + + +@pytest.fixture +def legacy_migrators_loaded( + session_legacy_migrators: dict, +) -> Generator[None, None, None]: + """Ensure legacy migrators are loaded and isolated for the test. + + This fixture: + 1. Resets the registry to the session state (all validators loaded) + 2. Allows the test to run (potentially modifying the registry) + 3. Restores the registry to the session state after the test + + This ensures: + - All validators are available to the test + - Test modifications don't affect other tests + - Consistent behavior across pytest-xdist workers + + The session_legacy_migrators fixture loads validators once per test session, + and this fixture resets to that known-good state before and after each test. + + Usage: + def test_with_real_migrators(legacy_migrators_loaded): + # All validators are registered and available + # Test can use them without affecting other tests + """ + # Reset registry to session state before test + LegacyMigratorRegistry._migrators = session_legacy_migrators.copy() + + try: + yield + finally: + # Restore registry to session state after test + LegacyMigratorRegistry._migrators = session_legacy_migrators.copy() diff --git a/tests/core/legacy/validators/samplestore/test_gt4sd_transformer_migration.py b/tests/core/legacy/validators/samplestore/test_gt4sd_transformer_migration.py new file mode 100644 index 000000000..7a1aef9ef --- /dev/null +++ b/tests/core/legacy/validators/samplestore/test_gt4sd_transformer_migration.py @@ -0,0 +1,289 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Tests for GT4SDTransformer to CSVSampleStore migration migrator""" + +from orchestrator.core.legacy.registry import LegacyMigratorRegistry + + +class TestGT4SDTransformerMigration: + """Test migrate_gt4sd_transformer_to_csv migrator""" + + def test_migrates_gt4sd_transformer_to_csv_sample_store( + self, legacy_migrators_loaded: None + ) -> None: + """Test that GT4SDTransformer is migrated to CSVSampleStore with explicit parameters""" + migrator = LegacyMigratorRegistry.get_migrator( + "samplestore_gt4sd_transformer_to_csv" + ) + assert migrator is not None + + data = { + "config": { + "copyFrom": [ + { + "module": { + "moduleClass": "GT4SDTransformer", + "moduleName": "orchestrator.plugins.samplestores.gt4sd", + }, + "storageLocation": { + "path": "data/GM_Comparison/Transfromer/Sample_0/test_generations.csv" + }, + "parameters": { + "generatorIdentifier": "gt4sd-pfas-transformer-model-one" + }, + } + ] + } + } + + result = migrator.migrator_function(data) + + # Check module class and name were updated + copy_from = result["config"]["copyFrom"][0] + assert copy_from["module"]["moduleClass"] == "CSVSampleStore" + assert copy_from["module"]["moduleName"] == "orchestrator.core.samplestore.csv" + + # Check identifierColumn was added + assert copy_from["parameters"]["identifierColumn"] == "smiles" + + # Check experiments configuration was added + assert "experiments" in copy_from["parameters"] + experiments = copy_from["parameters"]["experiments"] + assert len(experiments) == 1 + assert ( + experiments[0]["experimentIdentifier"] + == "transformer-toxicity-inference-experiment" + ) + assert "observedPropertyMap" in experiments[0] + assert "constitutivePropertyMap" in experiments[0] + assert experiments[0]["constitutivePropertyMap"] == ["smiles"] + + # Check property map was correctly added + property_map = experiments[0]["observedPropertyMap"] + assert property_map["logws"] == "GenLogws" + assert property_map["logd"] == "GenLogd" + assert property_map["loghl"] == "GenLoghl" + assert property_map["pka"] == "GenPka" + assert property_map["biodegradation halflife"] == "GenBiodeg" + assert property_map["bcf"] == "GenBcf" + assert property_map["ld50"] == "GenLd50" + assert property_map["scscore"] == "GenScscore" + + # Check original generatorIdentifier was preserved + assert ( + copy_from["parameters"]["generatorIdentifier"] + == "gt4sd-pfas-transformer-model-one" + ) + + def test_preserves_existing_identifier_column( + self, legacy_migrators_loaded: None + ) -> None: + """Test that existing identifierColumn is not overwritten""" + migrator = LegacyMigratorRegistry.get_migrator( + "samplestore_gt4sd_transformer_to_csv" + ) + assert migrator is not None + + data = { + "config": { + "copyFrom": [ + { + "module": { + "moduleClass": "GT4SDTransformer", + "moduleName": "orchestrator.plugins.samplestores.gt4sd", + }, + "parameters": { + "generatorIdentifier": "gt4sd-pfas-transformer-model-one", + "identifierColumn": "custom_id", + }, + } + ] + } + } + + result = migrator.migrator_function(data) + + # Check that custom identifierColumn was preserved + assert ( + result["config"]["copyFrom"][0]["parameters"]["identifierColumn"] + == "custom_id" + ) + + def test_preserves_existing_experiments( + self, legacy_migrators_loaded: None + ) -> None: + """Test that existing experiments configuration is not overwritten""" + migrator = LegacyMigratorRegistry.get_migrator( + "samplestore_gt4sd_transformer_to_csv" + ) + assert migrator is not None + + custom_experiments = [ + { + "experimentIdentifier": "custom-experiment", + "observedPropertyMap": {"prop1": "Prop1"}, + "constitutivePropertyMap": ["id"], + } + ] + + data = { + "config": { + "copyFrom": [ + { + "module": { + "moduleClass": "GT4SDTransformer", + "moduleName": "orchestrator.plugins.samplestores.gt4sd", + }, + "parameters": { + "generatorIdentifier": "gt4sd-pfas-transformer-model-one", + "experiments": custom_experiments, + }, + } + ] + } + } + + result = migrator.migrator_function(data) + + # Check that custom experiments were preserved + assert ( + result["config"]["copyFrom"][0]["parameters"]["experiments"] + == custom_experiments + ) + + def test_does_not_modify_other_module_classes( + self, legacy_migrators_loaded: None + ) -> None: + """Test that other module classes are not modified""" + migrator = LegacyMigratorRegistry.get_migrator( + "samplestore_gt4sd_transformer_to_csv" + ) + assert migrator is not None + + data = { + "config": { + "copyFrom": [ + { + "module": { + "moduleClass": "CSVSampleStore", + "moduleName": "orchestrator.core.samplestore.csv", + }, + "parameters": {"identifierColumn": "id"}, + } + ] + } + } + + result = migrator.migrator_function(data) + + # Check that nothing was changed + copy_from = result["config"]["copyFrom"][0] + assert copy_from["module"]["moduleClass"] == "CSVSampleStore" + assert copy_from["module"]["moduleName"] == "orchestrator.core.samplestore.csv" + assert copy_from["parameters"] == {"identifierColumn": "id"} + + def test_handles_multiple_copy_from_entries( + self, legacy_migrators_loaded: None + ) -> None: + """Test that migrator handles multiple copyFrom entries correctly""" + migrator = LegacyMigratorRegistry.get_migrator( + "samplestore_gt4sd_transformer_to_csv" + ) + assert migrator is not None + + data = { + "config": { + "copyFrom": [ + { + "module": { + "moduleClass": "GT4SDTransformer", + "moduleName": "orchestrator.plugins.samplestores.gt4sd", + }, + "parameters": {"generatorIdentifier": "model-one"}, + }, + { + "module": { + "moduleClass": "CSVSampleStore", + "moduleName": "orchestrator.core.samplestore.csv", + }, + "parameters": {"identifierColumn": "id"}, + }, + ] + } + } + + result = migrator.migrator_function(data) + + # Check first entry was migrated + first_entry = result["config"]["copyFrom"][0] + assert first_entry["module"]["moduleClass"] == "CSVSampleStore" + assert ( + first_entry["module"]["moduleName"] == "orchestrator.core.samplestore.csv" + ) + assert "identifierColumn" in first_entry["parameters"] + assert "experiments" in first_entry["parameters"] + + # Check second entry was not modified + second_entry = result["config"]["copyFrom"][1] + assert second_entry["module"]["moduleClass"] == "CSVSampleStore" + assert second_entry["parameters"] == {"identifierColumn": "id"} + + def test_handles_missing_copy_from(self, legacy_migrators_loaded: None) -> None: + """Test that migrator handles missing copyFrom field gracefully""" + migrator = LegacyMigratorRegistry.get_migrator( + "samplestore_gt4sd_transformer_to_csv" + ) + assert migrator is not None + + data = {"config": {"specification": {"module": {}}}} + + result = migrator.migrator_function(data) + + # Check that data was not modified + assert result == data + + def test_handles_empty_copy_from(self, legacy_migrators_loaded: None) -> None: + """Test that migrator handles empty copyFrom array gracefully""" + migrator = LegacyMigratorRegistry.get_migrator( + "samplestore_gt4sd_transformer_to_csv" + ) + assert migrator is not None + + data = {"config": {"copyFrom": []}} + + result = migrator.migrator_function(data) + + # Check that data was not modified + assert result == data + + def test_handles_missing_parameters(self, legacy_migrators_loaded: None) -> None: + """Test that migrator adds parameters if missing""" + migrator = LegacyMigratorRegistry.get_migrator( + "samplestore_gt4sd_transformer_to_csv" + ) + assert migrator is not None + + data = { + "config": { + "copyFrom": [ + { + "module": { + "moduleClass": "GT4SDTransformer", + "moduleName": "orchestrator.plugins.samplestores.gt4sd", + }, + } + ] + } + } + + result = migrator.migrator_function(data) + + # Check that parameters were added + copy_from = result["config"]["copyFrom"][0] + assert "parameters" in copy_from + assert "identifierColumn" in copy_from["parameters"] + assert "experiments" in copy_from["parameters"] + + +# Made with Bob diff --git a/tests/core/test_legacy_migrators.py b/tests/core/test_legacy_migrators.py new file mode 100644 index 000000000..dcf1a2a0d --- /dev/null +++ b/tests/core/test_legacy_migrators.py @@ -0,0 +1,531 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Integration tests for legacy migrators with pydantic models and upgrade process""" + +import sqlite3 +from collections.abc import Callable +from pathlib import Path + +import pydantic +import pytest + +from orchestrator.core.legacy.registry import LegacyMigratorRegistry, legacy_migrator +from orchestrator.core.resources import CoreResourceKinds +from orchestrator.metastore.project import ProjectContext + +sqlite3_version = sqlite3.sqlite_version_info + + +class TestLegacyMigratorWithPydantic: + """Test legacy migrators working with pydantic models""" + + def test_migrator_applied_during_model_validation( + self, isolated_legacy_migrator_registry: None + ) -> None: + """Test that a legacy migrator can be manually applied before pydantic validation""" + + # Define a simple pydantic model + class OldModel(pydantic.BaseModel): + new_field: str + + # Register a legacy migrator + @legacy_migrator( + identifier="old_to_new_field", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.old_field"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Migrate old_field to new_field", + ) + def migrate_old_to_new(data: dict) -> dict: + if "old_field" in data: + data["new_field"] = data.pop("old_field") + return data + + # Get the validator + migrator = LegacyMigratorRegistry.get_migrator("old_to_new_field") + assert migrator is not None + + # Old format data + old_data = {"old_field": "test_value"} + + # Apply legacy migrator + migrated_data = migrator.migrator_function(old_data) + + # Now validate with pydantic + model = OldModel.model_validate(migrated_data) + assert model.new_field == "test_value" + + def test_csv_sample_store_migration_migrator( + self, legacy_migrators_loaded: None + ) -> None: + """Test the CSV sample store migration migrator with realistic data""" + + # Get the migrator (should be registered from setup_method) + migrator = LegacyMigratorRegistry.get_migrator( + "csv_constitutive_columns_migration" + ) + assert migrator is not None + assert migrator.resource_type == CoreResourceKinds.SAMPLESTORE + + # Old format CSV sample store data (with config section) + old_csv_data = { + "kind": "samplestore", + "type": "csv", + "identifier": "test_store", + "config": { + "identifierColumn": "id", + "constitutivePropertyColumns": ["prop1", "prop2"], + "experiments": [ + { + "experimentIdentifier": "exp1", + "actuatorIdentifier": "act1", + "propertyMap": ["obs1", "obs2"], + } + ], + }, + } + + # Apply the validator + migrated_data = migrator.migrator_function(old_csv_data.copy()) + + # Verify migration - config.constitutivePropertyColumns removed + assert "constitutivePropertyColumns" not in migrated_data["config"] + assert len(migrated_data["config"]["experiments"]) == 1 + exp = migrated_data["config"]["experiments"][0] + assert "propertyMap" not in exp + assert "observedPropertyMap" in exp + assert exp["observedPropertyMap"] == ["obs1", "obs2"] + assert "constitutivePropertyMap" in exp + assert exp["constitutivePropertyMap"] == ["prop1", "prop2"] + + def test_entitysource_to_samplestore_migration( + self, legacy_migrators_loaded: None + ) -> None: + """Test the entitysource to samplestore kind migration""" + + # Import the migrator to register it + from orchestrator.core.legacy.migrators.resource.entitysource_to_samplestore import ( # noqa: F401 + migrate_entitysource_kind_to_samplestore, + ) + + # Get the migrator + migrator = LegacyMigratorRegistry.get_migrator( + "samplestore_kind_entitysource_to_samplestore" + ) + assert migrator is not None + assert migrator.resource_type == CoreResourceKinds.SAMPLESTORE + + # Old format with entitysource kind + old_data = { + "kind": "entitysource", + "type": "csv", + "identifier": "test_store", + } + + # Apply the migrator + migrated_data = migrator.migrator_function(old_data.copy()) + + # Verify migration + assert migrated_data["kind"] == "samplestore" + assert migrated_data["type"] == "csv" + assert migrated_data["identifier"] == "test_store" + + def test_chained_migrators(self, isolated_legacy_migrator_registry: None) -> None: + """Test applying multiple migrators in sequence""" + + # Register two migrators + @legacy_migrator( + identifier="step1_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.old_field1"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Step 1 migration", + ) + def step1(data: dict) -> dict: + if "old_field1" in data: + data["intermediate_field"] = data.pop("old_field1") + return data + + @legacy_migrator( + identifier="step2_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.intermediate_field"], + deprecated_from_version="2.0.0", + removed_from_version="3.0.0", + description="Step 2 migration", + ) + def step2(data: dict) -> dict: + if "intermediate_field" in data: + data["new_field"] = data.pop("intermediate_field") + return data + + # Get validators + migrator1 = LegacyMigratorRegistry.get_migrator("step1_migrator") + migrator2 = LegacyMigratorRegistry.get_migrator("step2_migrator") + assert migrator1 is not None + assert migrator2 is not None + + # Old data + old_data = {"old_field1": "value"} + + # Apply migrators in sequence + data = migrator1.migrator_function(old_data) + data = migrator2.migrator_function(data) + + # Verify final state + assert "old_field1" not in data + assert "intermediate_field" not in data + assert data["new_field"] == "value" + + +class TestUpgradeHandlerIntegration: + """Integration tests for ado upgrade with legacy migrators via CLI""" + + @pytest.mark.parametrize("valid_ado_project_context", ["mysql"], indirect=True) + def test_upgrade_applies_legacy_migrator_via_cli( + self, + legacy_migrators_loaded: None, + tmp_path: Path, + valid_ado_project_context: ProjectContext, + create_active_ado_context: Callable, + ) -> None: + """Test that ado upgrade applies legacy migrators correctly via CLI""" + + from typer.testing import CliRunner + + runner = CliRunner() + + from orchestrator.cli.core.cli import app as ado + from orchestrator.cli.utils.generic.wrappers import get_sql_store + from orchestrator.core.samplestore.config import ( + SampleStoreConfiguration, + SampleStoreModuleConf, + SampleStoreSpecification, + ) + from orchestrator.core.samplestore.resource import SampleStoreResource + + # Step 1: Setup active context + create_active_ado_context(runner, tmp_path, valid_ado_project_context) + + # Step 2: Register a test validator + @legacy_migrator( + identifier="test_upgrade_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.old_field"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Test upgrade migrator", + ) + def test_migrator(data: dict) -> dict: + """Migrate old_field to new_field""" + if "config" in data and "old_field" in data["config"]: + data["config"]["new_field"] = data["config"].pop("old_field") + return data + + # Step 3: Create a sample store resource + test_resource = SampleStoreResource( + identifier="test_legacy_store", + config=SampleStoreConfiguration( + specification=SampleStoreSpecification( + module=SampleStoreModuleConf( + moduleClass="SQLSampleStore", + moduleName="orchestrator.core.samplestore.sql", + ), + storageLocation=valid_ado_project_context.metadataStore, + ) + ), + ) + + # Step 4: Save resource to database + sql_store = get_sql_store(project_context=valid_ado_project_context) + sql_store.updateResource(resource=test_resource) + + # Step 5: Execute upgrade via CLI + result = runner.invoke( + ado, + [ + "--override-ado-app-dir", + str(tmp_path), + "upgrade", + "samplestore", + "--apply-legacy-migrator", + "test_upgrade_migrator", + ], + ) + + # Step 6: Verify success + assert result.exit_code == 0 + assert "Success" in result.output or "✓" in result.output + + # Step 7: Verify the upgrade process completed successfully + # The CLI output "Success!" confirms the migrator was applied + # and the resource was upgraded in the database + + def test_upgrade_rejects_mismatched_migrator_type( + self, + legacy_migrators_loaded: None, + tmp_path: Path, + valid_ado_project_context: ProjectContext, + create_active_ado_context: Callable, + ) -> None: + """Test that upgrade rejects validators for wrong resource type""" + + from typer.testing import CliRunner + + runner = CliRunner() + + from orchestrator.cli.core.cli import app as ado + + # Step 1: Setup active context + create_active_ado_context(runner, tmp_path, valid_ado_project_context) + + # Step 2: Register a migrator for OPERATION + @legacy_migrator( + identifier="operation_only_migrator", + resource_type=CoreResourceKinds.OPERATION, + deprecated_field_paths=["config.old_field"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Operation-only migrator", + ) + def operation_migrator(data: dict) -> dict: + return data + + # Step 3: Try to use operation migrator on samplestore + result = runner.invoke( + ado, + [ + "--override-ado-app-dir", + str(tmp_path), + "upgrade", + "samplestore", + "--apply-legacy-migrator", + "operation_only_migrator", + ], + ) + + # Step 4: Verify failure with appropriate error message + assert result.exit_code == 1 + assert "ERROR" in result.output + assert "operation_only_migrator" in result.output + assert "operation" in result.output.lower() + assert "samplestore" in result.output.lower() + + def test_upgrade_rejects_unknown_migrator( + self, + tmp_path: Path, + valid_ado_project_context: ProjectContext, + create_active_ado_context: Callable, + ) -> None: + """Test that upgrade rejects unknown migrator identifiers""" + + from typer.testing import CliRunner + + runner = CliRunner() + + from orchestrator.cli.core.cli import app as ado + + # Step 1: Setup active context + create_active_ado_context(runner, tmp_path, valid_ado_project_context) + + # Step 2: Try to use non-existent validator + result = runner.invoke( + ado, + [ + "--override-ado-app-dir", + str(tmp_path), + "upgrade", + "samplestore", + "--apply-legacy-migrator", + "nonexistent_migrator_xyz", + ], + ) + + # Step 3: Verify failure with appropriate error message + assert result.exit_code == 1 + assert "ERROR" in result.output + assert "nonexistent_migrator_xyz" in result.output + assert ( + "unknown" in result.output.lower() or "not found" in result.output.lower() + ) + + # AP: the -> and ->> syntax in SQLite is only supported from version 3.38.0 + # ref: https://sqlite.org/json1.html#jptr + @pytest.mark.skipif( + sqlite3_version < (3, 38, 0), + reason="SQLite version 3.38.0 or higher is required", + ) + def test_upgrade_auto_resolves_migrator_dependencies( + self, + legacy_migrators_loaded: None, + tmp_path: Path, + valid_ado_project_context: ProjectContext, + create_active_ado_context: Callable, + ) -> None: + """Test that upgrade automatically includes migrator dependencies""" + + from typer.testing import CliRunner + + runner = CliRunner() + + from orchestrator.cli.core.cli import app as ado + from orchestrator.cli.utils.generic.wrappers import get_sql_store + from orchestrator.core.samplestore.config import ( + SampleStoreConfiguration, + SampleStoreModuleConf, + SampleStoreSpecification, + ) + from orchestrator.core.samplestore.resource import SampleStoreResource + + # Step 1: Setup active context + create_active_ado_context(runner, tmp_path, valid_ado_project_context) + + # Step 2: Register validators with dependencies + @legacy_migrator( + identifier="base_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.field1"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Base migrator", + ) + def base_migrator(data: dict) -> dict: + if "config" in data and "field1" in data["config"]: + data["config"]["field1_migrated"] = True + return data + + @legacy_migrator( + identifier="dependent_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.field2"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Dependent migrator", + dependencies=["base_migrator"], # Depends on base_validator + ) + def dependent_migrator(data: dict) -> dict: + if "config" in data and "field2" in data["config"]: + data["config"]["field2_migrated"] = True + return data + + # Step 3: Create and save a sample store resource + test_resource = SampleStoreResource( + identifier="test_dependency_store", + config=SampleStoreConfiguration( + specification=SampleStoreSpecification( + module=SampleStoreModuleConf( + moduleClass="SQLSampleStore", + moduleName="orchestrator.core.samplestore.sql", + ), + storageLocation=valid_ado_project_context.metadataStore, + ) + ), + ) + + sql_store = get_sql_store(project_context=valid_ado_project_context) + sql_store.updateResource(resource=test_resource) + + # Step 4: Execute upgrade with only dependent_validator + # Should auto-include base_validator + result = runner.invoke( + ado, + [ + "--override-ado-app-dir", + str(tmp_path), + "upgrade", + "samplestore", + "--apply-legacy-migrator", + "dependent_migrator", + ], + ) + + # Step 5: Verify success + assert result.exit_code == 0 + assert "Success" in result.output or "✓" in result.output + + # The test verifies the CLI command completes successfully + # with automatic dependency resolution + + +class TestValidatorDataIntegrity: + """Test that validators preserve data integrity""" + + def setup_method(self) -> None: + """Clear the registry before each test""" + LegacyMigratorRegistry._migrators = {} + + def test_migrator_preserves_unrelated_fields(self) -> None: + """Test that validators don't modify unrelated fields""" + + @legacy_migrator( + identifier="selective_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.old_field"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Selective migrator", + ) + def selective(data: dict) -> dict: + if "old_field" in data: + data["new_field"] = data.pop("old_field") + return data + + migrator = LegacyMigratorRegistry.get_migrator("selective_migrator") + assert migrator is not None + + # Data with many fields + data = { + "old_field": "migrate_me", + "keep_field1": "value1", + "keep_field2": 42, + "keep_field3": ["list", "of", "items"], + "keep_field4": {"nested": "dict"}, + } + + result = migrator.migrator_function(data.copy()) + + # Verify migration happened + assert "old_field" not in result + assert result["new_field"] == "migrate_me" + + # Verify other fields preserved + assert result["keep_field1"] == "value1" + assert result["keep_field2"] == 42 + assert result["keep_field3"] == ["list", "of", "items"] + assert result["keep_field4"] == {"nested": "dict"} + + def test_migrator_handles_missing_fields_gracefully( + self, isolated_legacy_migrator_registry: None + ) -> None: + """Test that validators handle missing deprecated fields gracefully""" + + @legacy_migrator( + identifier="graceful_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.optional_old_field"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Graceful migrator", + ) + def graceful(data: dict) -> dict: + if "optional_old_field" in data: + data["new_field"] = data.pop("optional_old_field") + return data + + migrator = LegacyMigratorRegistry.get_migrator("graceful_migrator") + assert migrator is not None + + # Data without the deprecated field + data = {"other_field": "value"} + + result = migrator.migrator_function(data.copy()) + + # Should not crash and should preserve data + assert result == data + assert "new_field" not in result + + +# Made with Bob diff --git a/tests/core/test_legacy_registry.py b/tests/core/test_legacy_registry.py new file mode 100644 index 000000000..7afc659bd --- /dev/null +++ b/tests/core/test_legacy_registry.py @@ -0,0 +1,416 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Unit tests for the legacy migrator registry""" + +from collections.abc import Callable + +import pytest + +from orchestrator.core.legacy.metadata import LegacyMigratorMetadata +from orchestrator.core.legacy.registry import ( + LegacyMigratorRegistry, + legacy_migrator, +) +from orchestrator.core.resources import CoreResourceKinds + + +@pytest.fixture +def dummy_migrator() -> Callable[[dict], dict]: + """Fixture providing a simple dummy migrator function""" + + def migrator(data: dict) -> dict: + return data + + return migrator + + +@pytest.fixture +def create_migrator_metadata( + dummy_migrator: Callable[[dict], dict], +) -> Callable[..., LegacyMigratorMetadata]: + """Fixture factory for creating LegacyMigratorMetadata instances""" + + def _create_metadata( + identifier: str = "test_migrator", + resource_type: CoreResourceKinds = CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths: list[str] | None = None, + deprecated_from_version: str = "1.0.0", + removed_from_version: str = "2.0.0", + description: str = "Test migrator", + migrator_function: Callable[[dict], dict] | None = None, + dependencies: list[str] | None = None, + ) -> LegacyMigratorMetadata: + if deprecated_field_paths is None: + deprecated_field_paths = ["config.field1"] + if migrator_function is None: + migrator_function = dummy_migrator + if dependencies is None: + dependencies = [] + + return LegacyMigratorMetadata( + identifier=identifier, + resource_type=resource_type, + deprecated_field_paths=deprecated_field_paths, + deprecated_from_version=deprecated_from_version, + removed_from_version=removed_from_version, + description=description, + migrator_function=migrator_function, + dependencies=dependencies, + ) + + return _create_metadata + + +class TestLegacyMigratorMetadata: + """Test the LegacyMigratorMetadata model""" + + def test_create_metadata( + self, + create_migrator_metadata: Callable[..., LegacyMigratorMetadata], + dummy_migrator: Callable[[dict], dict], + ) -> None: + """Test creating migrator metadata""" + metadata = create_migrator_metadata( + identifier="test_migrator", + deprecated_field_paths=["config.field1", "config.field2"], + ) + + assert metadata.identifier == "test_migrator" + assert metadata.resource_type == CoreResourceKinds.SAMPLESTORE + assert metadata.deprecated_field_paths == [ + "config.field1", + "config.field2", + ] + assert metadata.deprecated_from_version == "1.0.0" + assert metadata.removed_from_version == "2.0.0" + assert metadata.description == "Test migrator" + assert metadata.migrator_function == dummy_migrator + + def test_metadata_serialization( + self, create_migrator_metadata: Callable[..., LegacyMigratorMetadata] + ) -> None: + """Test that migrator function is excluded from serialization""" + metadata = create_migrator_metadata() + + # Serialize to dict + data = metadata.model_dump() + + # migrator_function should be excluded + assert "migrator_function" not in data + assert "identifier" in data + assert "resource_type" in data + + +class TestLegacyMigratorRegistry: + """Test the LegacyMigratorRegistry class""" + + def test_register_migrator( + self, + isolated_legacy_migrator_registry: None, + create_migrator_metadata: Callable[..., LegacyMigratorMetadata], + ) -> None: + """Test registering a migrator""" + metadata = create_migrator_metadata() + + LegacyMigratorRegistry.register(metadata) + + assert len(LegacyMigratorRegistry._migrators) == 1 + assert "test_migrator" in LegacyMigratorRegistry._migrators + + def test_get_migrator( + self, + isolated_legacy_migrator_registry: None, + create_migrator_metadata: Callable[..., LegacyMigratorMetadata], + ) -> None: + """Test retrieving a migrator by identifier""" + metadata = create_migrator_metadata() + + LegacyMigratorRegistry.register(metadata) + + retrieved = LegacyMigratorRegistry.get_migrator("test_migrator") + assert retrieved is not None + assert retrieved.identifier == "test_migrator" + + def test_get_nonexistent_migrator( + self, isolated_legacy_migrator_registry: None + ) -> None: + """Test retrieving a migrator that doesn't exist""" + retrieved = LegacyMigratorRegistry.get_migrator("nonexistent") + assert retrieved is None + + def test_get_migrators_for_resource( + self, + isolated_legacy_migrator_registry: None, + create_migrator_metadata: Callable[..., LegacyMigratorMetadata], + ) -> None: + """Test retrieving validators for a specific resource type""" + # Register validators for different resource types + metadata1 = create_migrator_metadata( + identifier="samplestore_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + description="Sample store migrator", + ) + + metadata2 = create_migrator_metadata( + identifier="operation_migrator", + resource_type=CoreResourceKinds.OPERATION, + deprecated_field_paths=["config.field2"], + description="Operation migrator", + ) + + LegacyMigratorRegistry.register(metadata1) + LegacyMigratorRegistry.register(metadata2) + + # Get validators for SAMPLESTORE + samplestore_migrators = LegacyMigratorRegistry.get_migrators_for_resource( + CoreResourceKinds.SAMPLESTORE + ) + assert len(samplestore_migrators) == 1 + assert samplestore_migrators[0].identifier == "samplestore_migrator" + + # Get validators for OPERATION + operation_migrators = LegacyMigratorRegistry.get_migrators_for_resource( + CoreResourceKinds.OPERATION + ) + assert len(operation_migrators) == 1 + assert operation_migrators[0].identifier == "operation_migrator" + + def test_find_migrators_for_deprecated_field_paths( + self, + isolated_legacy_migrator_registry: None, + create_migrator_metadata: Callable[..., LegacyMigratorMetadata], + ) -> None: + """Test finding validators that handle specific field paths""" + # Register validators with different field paths + metadata1 = create_migrator_metadata( + identifier="migrator1", + deprecated_field_paths=["config.field1", "config.field2"], + description="Validator 1", + ) + + metadata2 = create_migrator_metadata( + identifier="migrator2", + deprecated_field_paths=["config.specification.field3"], + description="Validator 2", + ) + + metadata3 = create_migrator_metadata( + identifier="migrator3", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.properties"], + description="Validator 3", + ) + + LegacyMigratorRegistry.register(metadata1) + LegacyMigratorRegistry.register(metadata2) + LegacyMigratorRegistry.register(metadata3) + + # Find validators for single full path + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.SAMPLESTORE, {"config.field1"} + ) + assert len(validators) == 1 + assert validators[0].identifier == "migrator1" + + # Find validators for nested path + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.SAMPLESTORE, {"config.specification.field3"} + ) + assert len(validators) == 1 + assert validators[0].identifier == "migrator2" + + # Find validators for multiple paths + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.SAMPLESTORE, + {"config.field1", "config.specification.field3"}, + ) + assert len(validators) == 2 + migrator_ids = {v.identifier for v in validators} + assert migrator_ids == {"migrator1", "migrator2"} + + # Find validators for non-existent path + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.SAMPLESTORE, {"config.nonexistent"} + ) + assert len(validators) == 0 + + # Verify it doesn't match on leaf names alone (more specific than find_migrators_for_fields) + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.SAMPLESTORE, {"field1"} # Just leaf name, not full path + ) + assert len(validators) == 0 + + # Verify resource type filtering works + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.DISCOVERYSPACE, {"config.properties"} + ) + assert len(validators) == 1 + assert validators[0].identifier == "migrator3" + + def test_list_all( + self, + isolated_legacy_migrator_registry: None, + create_migrator_metadata: Callable[..., LegacyMigratorMetadata], + ) -> None: + """Test listing all validators""" + metadata1 = create_migrator_metadata( + identifier="migrator1", + description="Validator 1", + ) + + metadata2 = create_migrator_metadata( + identifier="migrator2", + resource_type=CoreResourceKinds.OPERATION, + deprecated_field_paths=["config.field2"], + description="Validator 2", + ) + + LegacyMigratorRegistry.register(metadata1) + LegacyMigratorRegistry.register(metadata2) + + all_migrators = LegacyMigratorRegistry.list_all() + assert len(all_migrators) == 2 + + def test_field_path_matching_with_real_migrators( + self, legacy_migrators_loaded: None + ) -> None: + """Integration test: verify field path matching works with real validators""" + + # Test 1: discoveryspace properties field should match the properties_field_removal validator + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.DISCOVERYSPACE, {"config.properties"} + ) + assert len(validators) >= 1 + migrator_ids = {v.identifier for v in validators} + assert "discoveryspace_properties_field_removal" in migrator_ids + + # Test 2: operation actuators field should match the actuators_field_removal validator + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.OPERATION, {"config.actuators"} + ) + assert len(validators) >= 1 + migrator_ids = {v.identifier for v in validators} + assert "operation_actuators_field_removal" in migrator_ids + + # Test 3: operation parameters.mode should match randomwalk validator + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.OPERATION, {"config.parameters.mode"} + ) + assert len(validators) >= 1 + migrator_ids = {v.identifier for v in validators} + assert "randomwalk_mode_to_sampler_config" in migrator_ids + + # Test 4: samplestore config.specification.module.moduleType should match the module_type validator + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.SAMPLESTORE, {"config.specification.module.moduleType"} + ) + assert len(validators) >= 1 + migrator_ids = {v.identifier for v in validators} + assert "samplestore_module_type_entitysource_to_samplestore" in migrator_ids + + # Test 5: samplestore kind field should match the kind validator + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.SAMPLESTORE, {"kind"} + ) + assert len(validators) >= 1 + migrator_ids = {v.identifier for v in validators} + assert "samplestore_kind_entitysource_to_samplestore" in migrator_ids + + # Test 6: Multiple paths should return multiple validators + validators = LegacyMigratorRegistry.find_migrators_for_deprecated_field_paths( + CoreResourceKinds.SAMPLESTORE, + { + "config.specification.module.moduleType", + "config.specification.module.moduleClass", + "config.specification.module.moduleName", + }, + ) + assert len(validators) >= 3 + migrator_ids = {v.identifier for v in validators} + assert "samplestore_module_type_entitysource_to_samplestore" in migrator_ids + assert "samplestore_module_class_entitysource_to_samplestore" in migrator_ids + assert "samplestore_module_name_entitysource_to_samplestore" in migrator_ids + + +class TestLegacyMigratorDecorator: + """Test the @legacy_migrator decorator""" + + def test_decorator_registers_migrator( + self, isolated_legacy_migrator_registry: None + ) -> None: + """Test that the decorator registers the migrator""" + + @legacy_migrator( + identifier="test_decorator_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.field1"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Test decorator migrator", + ) + def my_migrator(data: dict) -> dict: + return data + + # Check that migrator was registered + assert len(LegacyMigratorRegistry._migrators) == 1 + assert "test_decorator_migrator" in LegacyMigratorRegistry._migrators + + # Check that the function still works + test_data = {"key": "value"} + result = my_migrator(test_data) + assert result == test_data + + def test_decorator_preserves_function_metadata( + self, isolated_legacy_migrator_registry: None + ) -> None: + """Test that the decorator preserves function metadata""" + + @legacy_migrator( + identifier="test_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.field1"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Test migrator", + ) + def my_migrator(data: dict) -> dict: + """My migrator docstring""" + return data + + # Check that function name and docstring are preserved + assert my_migrator.__name__ == "my_migrator" + assert my_migrator.__doc__ == "My migrator docstring" + + def test_migrator_function_execution(self) -> None: + """Test that the migrator function executes correctly""" + + @legacy_migrator( + identifier="transform_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["old_field"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Transform migrator", + ) + def transform_migrator(data: dict) -> dict: + if "old_field" in data: + data["new_field"] = data.pop("old_field") + return data + + # Test the migrator function + test_data = {"old_field": "value"} + result = transform_migrator(test_data) + assert "old_field" not in result + assert result["new_field"] == "value" + + # Verify it was registered correctly + metadata = LegacyMigratorRegistry.get_migrator("transform_migrator") + assert metadata is not None + # The migrator function should be callable and work correctly + test_data2 = {"old_field": "another_value"} + result2 = metadata.migrator_function(test_data2) + assert "old_field" not in result2 + assert result2["new_field"] == "another_value" + + # Made with Bob diff --git a/tests/core/test_legacy_utils.py b/tests/core/test_legacy_utils.py new file mode 100644 index 000000000..9a4ef8519 --- /dev/null +++ b/tests/core/test_legacy_utils.py @@ -0,0 +1,235 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Tests for legacy migrator utility functions""" + +from orchestrator.core.legacy.utils import ( + get_nested_value, + get_parent_dict_and_key, + has_nested_field, + remove_nested_field, + set_nested_value, +) + + +class TestGetParentDictAndKey: + """Tests for get_parent_dict_and_key function""" + + def test_simple_path(self) -> None: + """Test getting a simple top-level field""" + data = {"config": {"properties": ["a", "b"]}} + parent, field = get_parent_dict_and_key(data, "config") + assert parent == data + assert field == "config" + + def test_nested_path(self) -> None: + """Test getting a nested field""" + data = {"config": {"specification": {"module": {"moduleType": "test"}}}} + parent, field = get_parent_dict_and_key( + data, "config.specification.module.moduleType" + ) + assert parent == {"moduleType": "test"} + assert field == "moduleType" + + def test_nonexistent_path(self) -> None: + """Test getting a path that doesn't exist""" + data = {"config": {}} + parent, field = get_parent_dict_and_key(data, "config.nonexistent.field") + assert parent is None + assert field is None + + def test_path_through_non_dict(self) -> None: + """Test path that goes through a non-dict value""" + data = {"config": "string_value"} + parent, field = get_parent_dict_and_key(data, "config.field") + assert parent is None + assert field is None + + +class TestGetNestedValue: + """Tests for get_nested_value function""" + + def test_simple_path(self) -> None: + """Test getting a simple top-level value""" + data = {"config": {"properties": ["a", "b"]}} + value = get_nested_value(data, "config.properties") + assert value == ["a", "b"] + + def test_nested_path(self) -> None: + """Test getting a nested value""" + data = {"config": {"specification": {"module": {"moduleType": "test"}}}} + value = get_nested_value(data, "config.specification.module.moduleType") + assert value == "test" + + def test_nonexistent_path(self) -> None: + """Test getting a path that doesn't exist""" + data = {"config": {}} + value = get_nested_value(data, "config.nonexistent.field") + assert value is None + + def test_path_through_non_dict(self) -> None: + """Test path that goes through a non-dict value""" + data = {"config": "string_value"} + value = get_nested_value(data, "config.field") + assert value is None + + def test_get_dict_value(self) -> None: + """Test getting a dict value""" + data = {"config": {"nested": {"key": "value"}}} + value = get_nested_value(data, "config.nested") + assert value == {"key": "value"} + + def test_get_none_value(self) -> None: + """Test getting a field that exists but has None value""" + data = {"config": {"test": None}} + value = get_nested_value(data, "config.test") + assert value is None + + +class TestSetNestedValue: + """Tests for set_nested_value function""" + + def test_set_simple_value(self) -> None: + """Test setting a simple nested value""" + data = {"config": {}} + result = set_nested_value(data, "config.test", "value") + assert result is True + assert data["config"]["test"] == "value" + + def test_set_deeply_nested_value(self) -> None: + """Test setting a deeply nested value""" + data = {"config": {"specification": {"module": {}}}} + result = set_nested_value(data, "config.specification.module.type", "new_type") + assert result is True + assert data["config"]["specification"]["module"]["type"] == "new_type" + + def test_set_nonexistent_path(self) -> None: + """Test setting a value on a nonexistent path""" + data = {"config": {}} + result = set_nested_value(data, "config.nonexistent.field", "value") + assert result is False + assert "nonexistent" not in data["config"] + + def test_overwrite_existing_value(self) -> None: + """Test overwriting an existing value""" + data = {"config": {"test": "old_value"}} + result = set_nested_value(data, "config.test", "new_value") + assert result is True + assert data["config"]["test"] == "new_value" + + +class TestRemoveNestedField: + """Tests for remove_nested_field function""" + + def test_remove_simple_field(self) -> None: + """Test removing a simple field""" + data = {"config": {"properties": ["a", "b"], "other": "value"}} + result = remove_nested_field(data, "config.properties") + assert result is True + assert "properties" not in data["config"] + assert data["config"]["other"] == "value" + + def test_remove_deeply_nested_field(self) -> None: + """Test removing a deeply nested field""" + data = { + "config": { + "specification": {"module": {"moduleType": "old", "other": "value"}} + } + } + result = remove_nested_field(data, "config.specification.module.moduleType") + assert result is True + assert "moduleType" not in data["config"]["specification"]["module"] + assert data["config"]["specification"]["module"]["other"] == "value" + + def test_remove_nonexistent_field(self) -> None: + """Test removing a field that doesn't exist""" + data = {"config": {}} + result = remove_nested_field(data, "config.nonexistent") + assert result is False + + def test_remove_field_idempotent(self) -> None: + """Test that removing a field twice is safe""" + data = {"config": {"test": "value"}} + result1 = remove_nested_field(data, "config.test") + assert result1 is True + result2 = remove_nested_field(data, "config.test") + assert result2 is False + + +class TestHasNestedField: + """Tests for has_nested_field function""" + + def test_has_simple_field(self) -> None: + """Test checking for a simple field""" + data = {"config": {"properties": ["a", "b"]}} + assert has_nested_field(data, "config.properties") is True + + def test_has_deeply_nested_field(self) -> None: + """Test checking for a deeply nested field""" + data = {"config": {"specification": {"module": {"moduleType": "test"}}}} + assert has_nested_field(data, "config.specification.module.moduleType") is True + + def test_has_nonexistent_field(self) -> None: + """Test checking for a field that doesn't exist""" + data = {"config": {}} + assert has_nested_field(data, "config.nonexistent") is False + + def test_has_field_through_non_dict(self) -> None: + """Test checking for a field through a non-dict value""" + data = {"config": "string_value"} + assert has_nested_field(data, "config.field") is False + + +class TestIntegration: + """Integration tests combining multiple utility functions""" + + def test_check_set_remove_workflow(self) -> None: + """Test a complete workflow: check, set, remove""" + data = {"config": {}} + + # Check field doesn't exist + assert has_nested_field(data, "config.test") is False + + # Set the field + assert set_nested_value(data, "config.test", "value") is True + assert has_nested_field(data, "config.test") is True + assert data["config"]["test"] == "value" + + # Remove the field + assert remove_nested_field(data, "config.test") is True + assert has_nested_field(data, "config.test") is False + + def test_complex_nested_structure(self) -> None: + """Test with a complex nested structure""" + data = { + "metadata": {"name": "test"}, + "config": { + "specification": { + "module": {"moduleType": "entity_source", "moduleName": "test"} + } + }, + } + + # Check existing field + assert has_nested_field(data, "config.specification.module.moduleType") is True + + # Modify the field + assert ( + set_nested_value( + data, "config.specification.module.moduleType", "sample_store" + ) + is True + ) + assert data["config"]["specification"]["module"]["moduleType"] == "sample_store" + + # Remove another field + assert ( + remove_nested_field(data, "config.specification.module.moduleName") is True + ) + assert "moduleName" not in data["config"]["specification"]["module"] + + # Original structure still intact + assert data["metadata"]["name"] == "test" + + +# Made with Bob diff --git a/tests/core/test_migrator_dependencies.py b/tests/core/test_migrator_dependencies.py new file mode 100644 index 000000000..adcf668d7 --- /dev/null +++ b/tests/core/test_migrator_dependencies.py @@ -0,0 +1,362 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Tests for migrator dependency resolution and ordering""" + +import pytest + +from orchestrator.core.legacy.metadata import LegacyMigratorMetadata +from orchestrator.core.legacy.registry import LegacyMigratorRegistry +from orchestrator.core.resources import CoreResourceKinds + + +def test_resolve_dependencies_no_dependencies( + isolated_legacy_migrator_registry: None, +) -> None: + """Test resolving validators with no dependencies""" + + def migrator_a(data: dict) -> dict: + return data + + def migrator_b(data: dict) -> dict: + return data + + # Register validators without dependencies + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_a", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_a"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator A", + migrator_function=migrator_a, + dependencies=[], + ) + ) + + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_b", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_b"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator B", + migrator_function=migrator_b, + dependencies=[], + ) + ) + + # Resolve dependencies + ordered, missing = LegacyMigratorRegistry.resolve_dependencies( + ["migrator_a", "migrator_b"] + ) + + # Should return both validators in alphabetical order (no dependencies) + assert len(ordered) == 2 + assert "migrator_a" in ordered + assert "migrator_b" in ordered + assert len(missing) == 0 + + +def test_resolve_dependencies_simple_chain() -> None: + """Test resolving validators with simple dependency chain""" + + def migrator_a(data: dict) -> dict: + return data + + def migrator_b(data: dict) -> dict: + return data + + def migrator_c(data: dict) -> dict: + return data + + # Register validators: C depends on B, B depends on A + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_a", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_a"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator A", + migrator_function=migrator_a, + dependencies=[], + ) + ) + + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_b", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_b"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator B", + migrator_function=migrator_b, + dependencies=["migrator_a"], + ) + ) + + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_c", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_c"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator C", + migrator_function=migrator_c, + dependencies=["migrator_b"], + ) + ) + + # Resolve dependencies - only request C + ordered, missing = LegacyMigratorRegistry.resolve_dependencies(["migrator_c"]) + + # Should return all three in correct order: A, B, C + assert ordered == ["migrator_a", "migrator_b", "migrator_c"] + assert len(missing) == 0 + + +def test_resolve_dependencies_diamond( + isolated_legacy_migrator_registry: None, +) -> None: + """Test resolving validators with diamond dependency pattern""" + + def migrator_a(data: dict) -> dict: + return data + + def migrator_b(data: dict) -> dict: + return data + + def migrator_c(data: dict) -> dict: + return data + + def migrator_d(data: dict) -> dict: + return data + + # Register validators: D depends on B and C, both B and C depend on A + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_a", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_a"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator A", + migrator_function=migrator_a, + dependencies=[], + ) + ) + + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_b", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_b"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator B", + migrator_function=migrator_b, + dependencies=["migrator_a"], + ) + ) + + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_c", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_c"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator C", + migrator_function=migrator_c, + dependencies=["migrator_a"], + ) + ) + + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_d", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_d"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator D", + migrator_function=migrator_d, + dependencies=["migrator_b", "migrator_c"], + ) + ) + + # Resolve dependencies + ordered, missing = LegacyMigratorRegistry.resolve_dependencies(["migrator_d"]) + + # Should return all four: A first, then B and C (in some order), then D + assert len(ordered) == 4 + assert ordered[0] == "migrator_a" # A must be first + assert ordered[3] == "migrator_d" # D must be last + assert "migrator_b" in ordered[1:3] # B and C in middle + assert "migrator_c" in ordered[1:3] + assert len(missing) == 0 + + +def test_resolve_dependencies_circular( + isolated_legacy_migrator_registry: None, +) -> None: + """Test that circular dependencies are detected""" + + def migrator_a(data: dict) -> dict: + return data + + def migrator_b(data: dict) -> dict: + return data + + # Register validators with circular dependency: A depends on B, B depends on A + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_a", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_a"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator A", + migrator_function=migrator_a, + dependencies=["migrator_b"], + ) + ) + + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_b", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_b"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator B", + migrator_function=migrator_b, + dependencies=["migrator_a"], + ) + ) + + # Should raise ValueError for circular dependency + with pytest.raises(ValueError, match="Circular dependency detected"): + LegacyMigratorRegistry.resolve_dependencies(["migrator_a", "migrator_b"]) + + +def test_resolve_dependencies_missing( + isolated_legacy_migrator_registry: None, +) -> None: + """Test handling of missing dependencies""" + + def migrator_a(data: dict) -> dict: + return data + + # Register migrator with non-existent dependency + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_a", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_a"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator A", + migrator_function=migrator_a, + dependencies=["nonexistent_migrator"], + ) + ) + + # Resolve dependencies + ordered, missing = LegacyMigratorRegistry.resolve_dependencies(["migrator_a"]) + + # Should return migrator_a and report missing dependency + assert ordered == ["migrator_a"] + assert "nonexistent_migrator" in missing + + +def test_resolve_dependencies_multiple_roots( + isolated_legacy_migrator_registry: None, +) -> None: + """Test resolving validators with multiple independent roots""" + + def migrator_a(data: dict) -> dict: + return data + + def migrator_b(data: dict) -> dict: + return data + + def migrator_c(data: dict) -> dict: + return data + + def migrator_d(data: dict) -> dict: + return data + + # Register validators: C depends on A, D depends on B + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_a", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_a"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator A", + migrator_function=migrator_a, + dependencies=[], + ) + ) + + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_b", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_b"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator B", + migrator_function=migrator_b, + dependencies=[], + ) + ) + + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_c", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_c"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator C", + migrator_function=migrator_c, + dependencies=["migrator_a"], + ) + ) + + LegacyMigratorRegistry.register( + LegacyMigratorMetadata( + identifier="migrator_d", + resource_type=CoreResourceKinds.DISCOVERYSPACE, + deprecated_field_paths=["config.field_d"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Validator D", + migrator_function=migrator_d, + dependencies=["migrator_b"], + ) + ) + + # Resolve dependencies + ordered, missing = LegacyMigratorRegistry.resolve_dependencies( + ["migrator_c", "migrator_d"] + ) + + # Should return all four validators with correct ordering + assert len(ordered) == 4 + # A must come before C + assert ordered.index("migrator_a") < ordered.index("migrator_c") + # B must come before D + assert ordered.index("migrator_b") < ordered.index("migrator_d") + assert len(missing) == 0 + + +# Made with Bob diff --git a/tests/core/test_upgrade_transaction_safety.py b/tests/core/test_upgrade_transaction_safety.py new file mode 100644 index 000000000..df5643adc --- /dev/null +++ b/tests/core/test_upgrade_transaction_safety.py @@ -0,0 +1,304 @@ +# Copyright IBM Corporation 2025, 2026 +# SPDX-License-Identifier: MIT + +"""Integration tests for Phase 1 transaction safety in upgrade handler""" + +import json +import sqlite3 + +import pytest +import sqlalchemy +import typer + +from orchestrator.cli.core.config import AdoConfiguration +from orchestrator.cli.models.parameters import AdoUpgradeCommandParameters +from orchestrator.cli.utils.generic.wrappers import get_sql_store +from orchestrator.cli.utils.resources.handlers import handle_ado_upgrade +from orchestrator.core.legacy.registry import legacy_migrator +from orchestrator.core.resources import CoreResourceKinds +from orchestrator.core.samplestore.config import ( + SampleStoreConfiguration, + SampleStoreModuleConf, + SampleStoreSpecification, +) +from orchestrator.core.samplestore.resource import SampleStoreResource +from orchestrator.metastore.project import ProjectContext + +sqlite3_version = sqlite3.sqlite_version_info + + +class TestUpgradeTransactionSafety: + """Test transaction safety in upgrade handler - validate-all-before-save pattern""" + + @pytest.mark.parametrize("valid_ado_project_context", ["mysql"], indirect=True) + def test_all_resources_validated_before_any_saved( + self, + isolated_legacy_migrator_registry: None, + valid_ado_project_context: ProjectContext, + ) -> None: + """Test that all resources are validated before any are saved""" + + # Register a test migrator that transforms old_field -> new_field + @legacy_migrator( + identifier="test_transaction_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.metadata.old_field"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Test transaction migrator", + ) + def test_migrator(data: dict) -> dict: + if "config" in data and "metadata" in data["config"]: + metadata = data["config"]["metadata"] + if "old_field" in metadata: + metadata["new_field"] = metadata.pop("old_field") + return data + + # Create two sample store resources with old_field in metadata + resource1 = SampleStoreResource( + identifier="test_res1", + config=SampleStoreConfiguration( + specification=SampleStoreSpecification( + module=SampleStoreModuleConf( + moduleClass="SQLSampleStore", + moduleName="orchestrator.core.samplestore.sql", + ), + storageLocation=valid_ado_project_context.metadataStore, + ), + metadata={"old_field": "value1"}, + ), + ) + + resource2 = SampleStoreResource( + identifier="test_res2", + config=SampleStoreConfiguration( + specification=SampleStoreSpecification( + module=SampleStoreModuleConf( + moduleClass="SQLSampleStore", + moduleName="orchestrator.core.samplestore.sql", + ), + storageLocation=valid_ado_project_context.metadataStore, + ), + metadata={"old_field": "value2"}, + ), + ) + + # Save resources to database + sql_store = get_sql_store(project_context=valid_ado_project_context) + sql_store.updateResource(resource=resource1) + sql_store.updateResource(resource=resource2) + + # Now manually add the deprecated field to the raw data in the database + # We need to update the JSON directly in the database + with sql_store.engine.begin() as conn: + # Get current data + raw1 = sql_store.getResourceRaw("test_res1") + raw1["config"]["metadata"]["old_field"] = "value1" + + # Update in database + update_stmt = sqlalchemy.text( + "UPDATE resources SET data = :data WHERE identifier = :identifier" + ).bindparams(data=json.dumps(raw1), identifier="test_res1") + conn.execute(update_stmt) + + # Same for resource2 + raw2 = sql_store.getResourceRaw("test_res2") + raw2["config"]["metadata"]["old_field"] = "value2" + + update_stmt = sqlalchemy.text( + "UPDATE resources SET data = :data WHERE identifier = :identifier" + ).bindparams(data=json.dumps(raw2), identifier="test_res2") + conn.execute(update_stmt) + + # Create parameters for upgrade + ado_config = AdoConfiguration() + ado_config._project_context = valid_ado_project_context + params = AdoUpgradeCommandParameters( + ado_configuration=ado_config, + apply_legacy_migrator=["test_transaction_migrator"], + list_legacy_migrators=False, + ) + + # Call the upgrade handler + handle_ado_upgrade( + parameters=params, + resource_type=CoreResourceKinds.SAMPLESTORE, + ) + + # Verify both resources were upgraded + upgraded_res1 = sql_store.getResourceRaw("test_res1") + upgraded_res2 = sql_store.getResourceRaw("test_res2") + + assert upgraded_res1 is not None + assert upgraded_res2 is not None + assert "new_field" in upgraded_res1["config"]["metadata"] + assert "new_field" in upgraded_res2["config"]["metadata"] + assert upgraded_res1["config"]["metadata"]["new_field"] == "value1" + assert upgraded_res2["config"]["metadata"]["new_field"] == "value2" + assert "old_field" not in upgraded_res1["config"]["metadata"] + assert "old_field" not in upgraded_res2["config"]["metadata"] + + @pytest.mark.parametrize("valid_ado_project_context", ["mysql"], indirect=True) + def test_validation_failure_prevents_all_saves( + self, + valid_ado_project_context: ProjectContext, + ) -> None: + """Test that if any validation fails, no resources are saved""" + + # Register a migrator that will cause validation failure + @legacy_migrator( + identifier="test_failing_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.metadata.old_field"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Test failing migrator", + ) + def test_migrator(data: dict) -> dict: + # Transform the field + if "config" in data and "metadata" in data["config"]: + metadata = data["config"]["metadata"] + if "old_field" in metadata: + metadata["new_field"] = metadata.pop("old_field") + + # Introduce an invalid field that will fail pydantic validation + # for the second resource only + if data.get("identifier") == "test_res2": + data["config"]["invalid_field_that_breaks_validation"] = "bad_value" + + return data + + # Create two sample store resources + resource1 = SampleStoreResource( + identifier="test_res1", + config=SampleStoreConfiguration( + specification=SampleStoreSpecification( + module=SampleStoreModuleConf( + moduleClass="SQLSampleStore", + moduleName="orchestrator.core.samplestore.sql", + ), + storageLocation=valid_ado_project_context.metadataStore, + ), + metadata={"old_field": "value1"}, + ), + ) + + resource2 = SampleStoreResource( + identifier="test_res2", + config=SampleStoreConfiguration( + specification=SampleStoreSpecification( + module=SampleStoreModuleConf( + moduleClass="SQLSampleStore", + moduleName="orchestrator.core.samplestore.sql", + ), + storageLocation=valid_ado_project_context.metadataStore, + ), + metadata={"old_field": "value2"}, + ), + ) + + # Save resources to database + sql_store = get_sql_store(project_context=valid_ado_project_context) + sql_store.updateResource(resource=resource1) + sql_store.updateResource(resource=resource2) + + # Now manually add the deprecated field to the raw data in the database + with sql_store.engine.begin() as conn: + # Get current data + raw1 = sql_store.getResourceRaw("test_res1") + raw1["config"]["metadata"]["old_field"] = "value1" + + # Update in database + update_stmt = sqlalchemy.text( + "UPDATE resources SET data = :data WHERE identifier = :identifier" + ).bindparams(data=json.dumps(raw1), identifier="test_res1") + conn.execute(update_stmt) + + # Same for resource2 + raw2 = sql_store.getResourceRaw("test_res2") + raw2["config"]["metadata"]["old_field"] = "value2" + + update_stmt = sqlalchemy.text( + "UPDATE resources SET data = :data WHERE identifier = :identifier" + ).bindparams(data=json.dumps(raw2), identifier="test_res2") + conn.execute(update_stmt) + + # Store original data for comparison + original_res1 = sql_store.getResourceRaw("test_res1") + original_res2 = sql_store.getResourceRaw("test_res2") + + # Create parameters for upgrade + ado_config = AdoConfiguration() + ado_config._project_context = valid_ado_project_context + params = AdoUpgradeCommandParameters( + ado_configuration=ado_config, + apply_legacy_migrator=["test_failing_migrator"], + list_legacy_migrators=False, + ) + + # Should raise typer.Exit due to validation failure + with pytest.raises(typer.Exit) as exc_info: + handle_ado_upgrade( + parameters=params, + resource_type=CoreResourceKinds.SAMPLESTORE, + ) + + assert exc_info.value.exit_code == 1 + + # Verify: NO resources were saved (transaction safety) + # Both resources should still have their original data + current_res1 = sql_store.getResourceRaw("test_res1") + current_res2 = sql_store.getResourceRaw("test_res2") + + assert current_res1 == original_res1 + assert current_res2 == original_res2 + assert "old_field" in current_res1["config"]["metadata"] + assert "old_field" in current_res2["config"]["metadata"] + assert "new_field" not in current_res1["config"]["metadata"] + assert "new_field" not in current_res2["config"]["metadata"] + + @pytest.mark.parametrize("valid_ado_project_context", ["mysql"], indirect=True) + def test_empty_resource_list_handled_gracefully( + self, + valid_ado_project_context: ProjectContext, + ) -> None: + """Test that empty resource list is handled without errors""" + + # Register a test validator + @legacy_migrator( + identifier="test_empty_migrator", + resource_type=CoreResourceKinds.SAMPLESTORE, + deprecated_field_paths=["config.metadata.old_field"], + deprecated_from_version="1.0.0", + removed_from_version="2.0.0", + description="Test empty migrator", + ) + def test_migrator(data: dict) -> dict: + return data + + # Don't create any resources - database starts empty for this test + + # Create parameters for upgrade + ado_config = AdoConfiguration() + ado_config._project_context = valid_ado_project_context + params = AdoUpgradeCommandParameters( + ado_configuration=ado_config, + apply_legacy_migrator=["test_empty_migrator"], + list_legacy_migrators=False, + ) + + # Should complete without error + handle_ado_upgrade( + parameters=params, + resource_type=CoreResourceKinds.SAMPLESTORE, + ) + + # Verify no samplestore resources exist + sql_store = get_sql_store(project_context=valid_ado_project_context) + resources = sql_store.getResourcesOfKind( + kind=CoreResourceKinds.SAMPLESTORE.value + ) + assert len(resources) == 0 + + +# Made with Bob diff --git a/website/docs/getting-started/ado.md b/website/docs/getting-started/ado.md index 3d82db393..8d6bae7ba 100644 --- a/website/docs/getting-started/ado.md +++ b/website/docs/getting-started/ado.md @@ -1,7 +1,8 @@ - + + !!! note This page provides documentation for the `ado` CLI tool, which needs to be @@ -97,6 +98,8 @@ Where: - `RESOURCE_TYPE` is one of the supported resource types for `ado create`, currently: + + - _actuator_ - _actuatorconfiguration_ (_ac_) - _context_ (_ctx_) @@ -104,6 +107,8 @@ Where: - _samplestore_ (_store_) - _discoveryspace_ (_space_) + + - `--file` or `-f` is a path to the resource configuration file in YAML format. It is mandatory in all scenarios, except when running `ado create samplestore --new-sample-store`. @@ -217,6 +222,8 @@ Where: - `RESOURCE_TYPE` is the type of resource you want to delete. Currently, the only supported types are: + + - _actuatorconfiguration_ (_ac_) - _context_ (_ctx_) - _datacontainer_ (_dcr_) @@ -224,10 +231,18 @@ Where: - _samplestore_ (_store_) - _discoveryspace_ (_space_) + + - `RESOURCE_ID` is the unique identifier of the resource to delete. - `--force` enables forced deletion of resources in the following cases: + + + - When attempting to delete operations while other operations are executing. - When attempting to delete sample stores that still contain data. + + + - When deleting a local context, users can specify the flags `--delete-local-db` or `--no-delete-local-db` to explicitly delete or preserve a local DB when deleting its related context. If neither of these flags are specified, the @@ -270,10 +285,14 @@ Where: - `RESOURCE_TYPE` is the type of resource you want to describe. Currently, the supported resource types are: + + - _experiment_ - _datacontainer_ (_dcr_) - _discoveryspace_ (_space_) + + - `RESOURCE_ID` is the unique identifier of the resource to describe. - The `--file` (or `-f`) flag is **currently only available for spaces** and allows getting a description of the space, given a space configuration file. @@ -306,20 +325,28 @@ Where: - `RESOURCE_TYPE` is the type of resource you want to edit. Supported types are: + + - _actuatorconfiguration_ (_ac_) - _datacontainer_ (_dcr_) - _operation_ (_op_) - _samplestore_ (_store_) - _discoveryspace_ (_space_) + + - `RESOURCE_ID` is the unique identifier of the resource to edit. - `--editor` is the name of the editor you want to use for editing metadata. It must be one of the supported ones, which currently are: + + - `vim` (_default_) - `vi` - `nano` + + Alternatively, you can also set the value for this flag by using the environment variable `ADO_EDITOR`. @@ -376,6 +403,8 @@ Where: - `RESOURCE_TYPE` is the type of resource you want to get. Currently, the only supported types are: + + - _actuatorconfiguration_ (_ac_) - _actuator_ - _context_ (_ctx_) @@ -386,9 +415,13 @@ Where: - _samplestore_ (_store_) - _discoveryspace_ (_space_) + + - `RESOURCE_ID` is the optional unique identifier of the resource to get. - `--output` or `-o` determine the type of output that will be displayed: + + - The `default` format shows the _identifier_, the _name_, and the _age_ of the matching resources. - The `yaml` format displays the full YAML document of the matching resources. @@ -397,6 +430,8 @@ Where: - The `raw` format displays the raw resource as stored in the database, performing no validation. + + - `--exclude-default` (set by default) allows excluding fields that use default values from the output. Alternatively, the `--no-exclude-default` flag can be used to show them. @@ -422,9 +457,9 @@ Where: - When using the `--details` flag with the `default` output format, additional columns with the _description_ and the _labels_ of the matching resources are printed. -- The `--show-deprecated` flag is available **only for - `ado get experiments`** and allows displaying experiments that have - been deprecated. They are otherwise hidden by default. +- The `--show-deprecated` flag is available **only for `ado get experiments`** + and allows displaying experiments that have been deprecated. They are + otherwise hidden by default. #### Searching and Filtering @@ -464,7 +499,9 @@ ado get spaces --details ``` + ##### Getting all Discovery Spaces that include granite-7b-base in the property domain + !!! info @@ -513,7 +550,9 @@ ado get space space-df8077-7535f9 -o yaml \ ``` + ##### Getting an actuator configuration and hiding the status for the "created" event + ```shell @@ -570,9 +609,13 @@ Where: - `RESOURCE_TYPE` is one of the supported resource types: + + - _operation_ (_op_) - _discoveryspace_ (_space_) + + - `RESOURCE_ID` is the unique identifier of the resource you want to see details for. - `--use-latest` will use the identifier of the latest (i.e. most recent) @@ -613,9 +656,13 @@ Where: - `RESOURCE_TYPE` is one of the supported resource types: + + - _operation_ (_op_) - _discoveryspace_ (_space_) + + - `RESOURCE_ID` is the unique identifier of the resource you want to see entities for. - `--use-latest` will use the identifier of the latest (i.e. most recent) @@ -627,22 +674,32 @@ Where: - `--property-format` defines the naming format used for measured properties in the output, one of: + + - `observed`: properties are named `$experimentid.$property_id`. There will be one row per entity. - `target`: properties are named `$property_id`. There will be one row per (entity, experiment) pair. + + - `--output-format` is the format in which to display the entity data. One of: + + - `console` (print to stdout) - `csv` (output as CSV file) - `json` (output as JSON file) + + - `--property` (can be specified multiple times) is used to filter what measured properties need to be output. - `--include` (**exclusive to spaces**) determines what type of entities to include. One of: + + - `sampled`: Entities that have been measured by explore operations on the `discoveryspace` - `unsampled`: Entities that have not been measured by an explore operation @@ -652,8 +709,13 @@ Where: - `missing`: Entities in the `discoveryspace` that are not in the `samplestore` the `discoveryspace` uses + + - `--aggregate` allows applying an aggregation to the result values in case multiple are present. One of: + + + - `mean` - `median` - `variance` @@ -661,6 +723,8 @@ Where: - `min` - `max` + + ##### Examples ###### Show matching entities in a Space with target format and output them as CSV @@ -671,8 +735,9 @@ Where: --output-format csv ``` - + ###### Show a subset of the properties of entities that are part of an operation and output them as JSON + ```shell ado show entities operation randomwalk-0.5.0-123abc --output-format json \ @@ -775,10 +840,14 @@ ado show related RESOURCE_TYPE [RESOURCE_ID] [--use-latest] - `RESOURCE_TYPE` is one of the supported resource types: + + - _operation_ (_op_) - _samplestore_ (_store_) - _discoveryspace_ (_space_) + + - `RESOURCE_ID` is the unique identifier of the resource you want to see related resources for. - `--use-latest` will use the identifier of the latest (i.e. most recent) @@ -832,10 +901,15 @@ Where: properties. Cannot be used when the output format is `md`. - `--format | -o` allows choosing the output format in which the information should be displayed. Can be one of either: + + + - `md` - for Markdown text. - `table` (**default**) - for Markdown tables. - `csv` - for a comma separated file. + + ##### Examples ###### Get the summary of a space as a Markdown table @@ -845,7 +919,9 @@ ado show summary space space-abc123-456def ``` + ###### Get the summary of a space as a Markdown table and include the constitutive property MY_PROPERTY + ```shell @@ -915,12 +991,16 @@ Where: - `RESOURCE_TYPE` is one of the supported resource types: + + - _actuator_ - _actuatorconfiguration_ (_ac_) - _context_ (_ctx_) - _operation_ (_op_) - _discoveryspace_ (_space_) + + - `--output` or `-o` can be used to point to a location where to save the template. By default, the template will be saved in the current directory with an autogenerated name. @@ -932,6 +1012,8 @@ Where: - `--operator-type` (**exclusive for operations**) is the type of operator to generate a template for. Must be one of the supported operator types: + + - `characterize` - `search` - `compare` @@ -940,6 +1022,8 @@ Where: - `fuse` - `learn` + + - `--actuator-configuration` (**exclusive for actuatorconfigurations**) is the identifier of the actuator to output. If unset, a generic actuator configuration will be output. @@ -968,8 +1052,9 @@ ado template context ado template space --from-experiment finetune-gptq-lora-dp-r-4-a-16-tm-default-v1.1.0 ``` - + ##### Creating a template for a space that uses a specific experiment from a specific actuator + ```shell ado template space --from-experiment SFTTrainer:finetune-gptq-lora-dp-r-4-a-16-tm-default-v1.1.0 @@ -1001,19 +1086,33 @@ When required, you can run this command to update all resources of a given kind in the database. ```shell -ado upgrade RESOURCE_TYPE +ado upgrade RESOURCE_TYPE [--apply-legacy-migrator ] \ + [--list-legacy-migrators] ``` Where: - `RESOURCE_TYPE` is one of the supported resource types: + + - _actuatorconfiguration_ (_ac_) - _datacontainer_ (_dcr_) - _operation_ (_op_) - _samplestore_ (_store_) - _discoveryspace_ (_space_) + + +- `--apply-legacy-migrator` applies a specific legacy migrator by identifier + during the upgrade process. This option can be specified multiple times to + apply multiple validators. Legacy validators handle deprecated field + migrations and schema transformations. + +- `--list-legacy-migrators` lists all available legacy migrators for the + specified resource type, showing their identifiers, descriptions, and + deprecated field paths. + #### Examples ##### Upgrade all operation resources @@ -1022,6 +1121,18 @@ Where: ado upgrade operations ``` +##### List available legacy migrators for sample stores + +```shell +ado upgrade samplestores --list-legacy-migrators +``` + +##### Apply a legacy migrator during upgrade + +```shell +ado upgrade samplestores --apply-legacy-migrator samplestore_kind_entitysource_to_samplestore +``` + ### ado version When unsure about what ado version you are running, you can get this information @@ -1033,6 +1144,7 @@ ado version ## What's next +
@@ -1055,3 +1167,5 @@ ado version
+ + \ No newline at end of file