From 8ab9d712889b734e5628f329265ad52a86bfd292 Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Tue, 17 Mar 2026 12:09:32 -0400 Subject: [PATCH 01/10] docs(index-migrator): add planning workspace and repo guidance --- AGENTS.md | 165 +++++ CONTRIBUTING.md | 23 +- nitin_docs/index_migrator/00_index.md | 71 +++ nitin_docs/index_migrator/01_context.md | 100 +++ nitin_docs/index_migrator/02_architecture.md | 230 +++++++ nitin_docs/index_migrator/03_benchmarking.md | 248 ++++++++ .../10_v1_drop_recreate_spec.md | 306 +++++++++ .../11_v1_drop_recreate_tasks.md | 160 +++++ .../12_v1_drop_recreate_tests.md | 122 ++++ .../20_v2_iterative_shadow_spec.md | 213 +++++++ .../21_v2_iterative_shadow_tasks.md | 159 +++++ .../22_v2_iterative_shadow_tests.md | 102 +++ nitin_docs/index_migrator/90_prd.md | 581 ++++++++++++++++++ 13 files changed, 2479 insertions(+), 1 deletion(-) create mode 100644 AGENTS.md create mode 100644 nitin_docs/index_migrator/00_index.md create mode 100644 nitin_docs/index_migrator/01_context.md create mode 100644 nitin_docs/index_migrator/02_architecture.md create mode 100644 nitin_docs/index_migrator/03_benchmarking.md create mode 100644 nitin_docs/index_migrator/10_v1_drop_recreate_spec.md create mode 100644 nitin_docs/index_migrator/11_v1_drop_recreate_tasks.md create mode 100644 nitin_docs/index_migrator/12_v1_drop_recreate_tests.md create mode 100644 nitin_docs/index_migrator/20_v2_iterative_shadow_spec.md create mode 100644 nitin_docs/index_migrator/21_v2_iterative_shadow_tasks.md create mode 100644 nitin_docs/index_migrator/22_v2_iterative_shadow_tests.md create mode 100644 nitin_docs/index_migrator/90_prd.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..3004e051 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,165 @@ +# AGENTS.md - RedisVL Project Context + +## Frequently Used Commands + +```bash +# Development workflow +make install # Install dependencies +make format # Format code (black + isort) +make check-types # Run mypy type checking +make lint # Run all linting (format + types) +make test # Run tests (no external APIs) +make test-all # Run all tests (includes API tests) +make check # Full check (lint + test) + +# Redis setup +make redis-start # Start Redis container +make redis-stop # Stop Redis container + +# Documentation +make docs-build # Build documentation +make docs-serve # Serve docs locally +``` + +Pre-commit hooks are also configured, which you should +run before you commit: +```bash +pre-commit run --all-files +``` + +## Important Architectural Patterns + +### Async/Sync Dual Interfaces +- Most core classes have both sync and async versions (e.g., `SearchIndex` / `AsyncSearchIndex`) +- Follow existing patterns when adding new functionality + +### Schema-Driven Design +```python +# Index schemas define structure +schema = IndexSchema.from_yaml("schema.yaml") +index = SearchIndex(schema, redis_url="redis://localhost:6379") +``` + +## Critical Rules + +### Do Not Modify +- **CRITICAL**: Do not change this line unless explicitly asked: + ```python + token.strip().strip(",").replace(""", "").replace(""", "").lower() + ``` + +### Git Operations +**CRITICAL**: NEVER use `git push` or attempt to push to remote repositories. The user will handle all git push operations. + +### Branch and Commit Policy +**IMPORTANT**: Use conventional branch names and conventional commits. + +Branch naming: +- Human-created branches should use `/` +- Automation-created branches may use `codex//` +- Preferred branch types: `feat`, `fix`, `docs`, `refactor`, `test`, `chore`, `perf`, `build`, `ci` +- Examples: + - `feat/index-migrator` + - `fix/async-sentinel-pool` + - `docs/index-migrator-benchmarking` + - `codex/feat/index-migrator` + +Commit messages: +- Use Conventional Commits: `(optional-scope): ` +- Preferred commit types: `feat`, `fix`, `docs`, `refactor`, `test`, `chore`, `perf`, `build`, `ci` +- Examples: + - `feat(migrate): add drop recreate planning docs` + - `docs(index-migrator): add benchmarking guidance` + - `fix(cli): validate migrate plan inputs` + +### Code Quality +**IMPORTANT**: Always run `make format` before committing code to ensure proper formatting and linting compliance. + +### README.md Maintenance +**IMPORTANT**: DO NOT modify README.md unless explicitly requested. + +**If you need to document something, use these alternatives:** +- Development info → CONTRIBUTING.md +- API details → docs/ directory +- Examples → docs/examples/ +- Project memory (explicit preferences, directives, etc.) → AGENTS.md + +## Code Style Preferences + +### Import Organization +- **Prefer module-level imports** by default for clarity and standard Python conventions +- **Use local/inline imports only when necessary** for specific reasons: + - Avoiding circular import dependencies + - Improving startup time for heavy/optional dependencies + - Lazy loading for performance-critical paths +- When using local imports, add a brief comment explaining why (e.g., `# Local import to avoid circular dependency`) + +### Comments and Output +- **No emojis in code comments or print statements** +- Keep comments professional and focused on technical clarity +- Use emojis sparingly only in user-facing documentation (markdown files), not in Python code + +### General Guidelines +- Follow existing patterns in the RedisVL codebase +- Maintain consistency with the project's established conventions +- Run `make format` before committing to ensure code quality standards + +## Testing Notes +RedisVL uses `pytest` with `testcontainers` for testing. + +- `make test` - unit tests only (no external APIs) +- `make test-all` - run the full suite, including tests that call external APIs +- `pytest --run-api-tests` - explicitly run API-dependent tests (e.g., LangCache, + external vectorizer/reranker providers). These require the appropriate API + keys and environment variables to be set. + +## Project Structure + +``` +redisvl/ +├── cli/ # Command-line interface (rvl command) +├── extensions/ # AI extensions (cache, memory, routing) +│ ├── cache/ # Semantic caching for LLMs +│ ├── llmcache/ # LLM-specific caching +│ ├── message_history/ # Chat history management +│ ├── router/ # Semantic routing +│ └── session_manager/ # Session management +├── index/ # SearchIndex classes (sync/async) +├── query/ # Query builders (Vector, Range, Filter, Count) +├── redis/ # Redis client utilities +├── schema/ # Index schema definitions +└── utils/ # Utilities (vectorizers, rerankers, optimization) + ├── rerank/ # Result reranking + └── vectorize/ # Embedding providers integration +``` + +## Core Components + +### 1. Index Management +- `SearchIndex` / `AsyncSearchIndex` - Main interface for Redis vector indices +- `IndexSchema` - Define index structure with fields (text, tags, vectors, etc.) +- Support for JSON and Hash storage types + +### 2. Query System +- `VectorQuery` - Semantic similarity search +- `RangeQuery` - Vector search within distance range +- `FilterQuery` - Metadata filtering and full-text search +- `CountQuery` - Count matching records +- Etc. + +### 3. AI Extensions +- `SemanticCache` - LLM response caching with semantic similarity +- `EmbeddingsCache` - Cache for vector embeddings +- `MessageHistory` - Chat history with recency/relevancy retrieval +- `SemanticRouter` - Route queries to topics/intents + +### 4. Vectorizers (Optional Dependencies) +- OpenAI, Azure OpenAI, Cohere, HuggingFace, Mistral, VoyageAI +- Custom vectorizer support +- Batch processing capabilities + +## Documentation +- Main docs: https://docs.redisvl.com +- Built with Sphinx from `docs/` directory +- Includes API reference and user guides +- Example notebooks in documentation `docs/user_guide/...` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 54138d77..6f4afb7f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -251,12 +251,33 @@ Before suggesting a new feature: ## Pull Request Process -1. **Fork and create a branch**: Create a descriptive branch name (e.g., `fix-search-bug` or `add-vector-similarity`) +1. **Fork and create a branch**: Use a conventional branch name such as `feat/index-migrator`, `fix/search-bug`, or `docs/vectorizer-guide` 2. **Make your changes**: Follow our coding standards and include tests 3. **Test thoroughly**: Ensure your changes work and don't break existing functionality 4. **Update documentation**: Add or update documentation as needed 5. **Submit your PR**: Include a clear description of what your changes do +### Branch Naming and Commit Messages + +We use conventional branch names and Conventional Commits to keep history easy to scan and automate. + +Branch naming: + +- Use `/` +- Recommended types: `feat`, `fix`, `docs`, `refactor`, `test`, `chore`, `perf`, `build`, `ci` +- Examples: + - `feat/index-migrator` + - `fix/async-sentinel-pool` + - `docs/migration-benchmarking` + +Commit messages: + +- Use `(optional-scope): ` +- Examples: + - `feat(migrate): add drop recreate plan generation` + - `docs(index-migrator): add benchmark guidance` + - `fix(cli): reject unsupported migration diffs` + ### Review Process - The core team reviews Pull Requests regularly diff --git a/nitin_docs/index_migrator/00_index.md b/nitin_docs/index_migrator/00_index.md new file mode 100644 index 00000000..f04f30f8 --- /dev/null +++ b/nitin_docs/index_migrator/00_index.md @@ -0,0 +1,71 @@ +# Index Migrator Workspace + +## Overview + +This directory is the sole source of truth for RedisVL index migration planning. + +No implementation should start unless the corresponding task exists in a `*_tasks.md` file in this directory. + +This workspace is organized around two phases: + +- Phase 1 MVP: `drop_recreate` +- Phase 2: `iterative_shadow` + +The overall initiative covers both simple schema-only rebuilds and harder migrations that change vector dimensions, datatypes, precision, algorithms, or payload shape. Those advanced cases are intentionally delivered after the MVP rather than being treated as out of scope for the product. + +The planning goal is to make handoff simple. Another engineer or process should be able to open this directory, read the active spec and task list, and start implementation without needing to rediscover product decisions. + +## Guiding Principles + +- Prefer simple and safe over clever orchestration. +- Reuse existing RedisVL primitives before adding new abstractions. +- Migrate one index at a time. +- Keep cutover and platform scaling operator-owned. +- Fail closed on unsupported schema changes. +- Treat documentation artifacts as implementation inputs, not as narrative background. + +## Phase Status + +| Phase | Mode | Status | Implementation Target | +| --- | --- | --- | --- | +| Phase 1 | `drop_recreate` | Ready | Yes | +| Phase 2 | `iterative_shadow` | Planned | No | + +## Doc Map + +- [01_context.md](./01_context.md): customer problem, constraints, and why the work is phased +- [02_architecture.md](./02_architecture.md): shared architecture, responsibilities, capacity model, and diagrams +- [03_benchmarking.md](./03_benchmarking.md): migration benchmarking goals, metrics, scenarios, and output artifacts +- [90_prd.md](./90_prd.md): final product requirements document for team review +- [10_v1_drop_recreate_spec.md](./10_v1_drop_recreate_spec.md): decision-complete MVP spec +- [11_v1_drop_recreate_tasks.md](./11_v1_drop_recreate_tasks.md): implementable MVP task list +- [12_v1_drop_recreate_tests.md](./12_v1_drop_recreate_tests.md): MVP test plan +- [20_v2_iterative_shadow_spec.md](./20_v2_iterative_shadow_spec.md): future iterative shadow spec +- [21_v2_iterative_shadow_tasks.md](./21_v2_iterative_shadow_tasks.md): future iterative shadow tasks +- [22_v2_iterative_shadow_tests.md](./22_v2_iterative_shadow_tests.md): future iterative shadow test plan + +## Current Truth + +The active implementation target is Phase 1. + +- Spec: [10_v1_drop_recreate_spec.md](./10_v1_drop_recreate_spec.md) +- Tasks: [11_v1_drop_recreate_tasks.md](./11_v1_drop_recreate_tasks.md) +- Tests: [12_v1_drop_recreate_tests.md](./12_v1_drop_recreate_tests.md) + +## Next Actions + +- `V1-T01` +- `V1-T02` +- `V1-T03` + +## Locked Decisions + +- The planning workspace lives entirely under `nitin_docs/index_migrator/`. +- The top-level migration notes have been removed to avoid competing sources of truth. +- Phase 1 is documentation-backed implementation scope. +- Phase 2 stays planned until Phase 1 is implemented and learnings are folded back into this directory. +- The default artifact format for plans and reports is YAML. +- Benchmarking is required for migration duration, query impact, and resource-impact planning, but it should be implemented with simple structured outputs rather than a separate benchmarking framework. +- The default execution unit is a single index. +- The default operational model is operator-owned downtime, cutover, and scaling. +- Phase 2 owns advanced vector and payload-shape migrations, including datatype, precision, dimension, and algorithm changes. diff --git a/nitin_docs/index_migrator/01_context.md b/nitin_docs/index_migrator/01_context.md new file mode 100644 index 00000000..9bbdf9f7 --- /dev/null +++ b/nitin_docs/index_migrator/01_context.md @@ -0,0 +1,100 @@ +# Index Migrator Context + +## Problem Statement + +RedisVL does not currently provide a first-class migration workflow for search index changes. + +Today, teams can create indexes, delete indexes, inspect index info, and load documents, but they still need ad hoc scripts and operational runbooks to handle schema evolution. This becomes risky when the index is large, shared by multiple applications, or deployed on clustered Redis Cloud or Redis Software. + +The migration problem has three different shapes: + +- A simpler index rebuild that preserves existing documents and recreates the index definition in place. +- A shadow migration over the same documents when the target schema can still be built from the current stored payload. +- A shadow migration with transform or backfill when vector dimensions, datatypes, precision, algorithms, or payload shape change and a new target payload must be built. + +This workspace deliberately splits those shapes into phases instead of trying to solve everything in one design. Phase 1 proves the plan-first migration workflow. Phase 2 exists to take on the harder vector and payload-shape migrations safely. + +## Customer Requirements + +The planning baseline for this work is: + +- preserve existing documents during migration +- capture the previous index configuration before making changes +- apply only the requested schema changes +- preview the migration plan before execution +- support advanced vector migrations such as `HNSW -> FLAT`, `FP32 -> FP16`, vector dimension changes, and payload-shape-changing model or algorithm swaps +- estimate migration timing, memory impact, and operational impact using simple benchmark artifacts +- benchmark source-versus-target memory and size changes, including peak overlap footprint during shadow migrations +- support both guided and scripted workflows +- make downtime and disruption explicit +- support large datasets without defaulting to full-keyspace audits or fleet-wide orchestration +- keep the implementation understandable enough that another team can operate it safely + +## Current RedisVL Capabilities + +RedisVL already has useful primitives that should be reused instead of replaced: + +- `SearchIndex.from_existing()` can reconstruct schema from a live index. +- `SearchIndex.delete(drop=False)` can remove the index structure without deleting documents. +- `SearchIndex.info()` can retrieve index stats used for planning and validation. +- Existing CLI commands already establish the connection and index lookup patterns the migrator can follow. + +RedisVL does not yet have: + +- a migration planner +- a schema diff classifier +- a migration-specific CLI workflow +- a guided schema migration wizard +- structured migration reports +- capacity-aware orchestration across indexes +- transform or backfill planning for migrations that need new stored payloads + +## Why Phase 1 Comes First + +Phase 1 is intentionally narrow because it gives the team an MVP that is both useful and low-risk: + +- It preserves documents while changing only the index definition. +- It reuses current RedisVL primitives instead of introducing a separate migration runtime. +- It keeps operational ownership clear: RedisVL handles planning, execution, and validation for a single index, while the operator handles the migration window and downstream application expectations. +- It avoids the hardest problems for now: target-payload generation, shadow overlap estimation, cutover automation, and cluster-wide scheduling. + +Phase 1 does not define the full migration goal. The harder vector and payload-shape changes are the reason Phase 2 exists. + +The MVP should prove the planning model, CLI shape, plan artifact, and validation/reporting flow before more advanced orchestration is attempted. + +## Downtime and Disruption + +Phase 1 accepts downtime for the migrated index. + +Engineers need to plan for the following impacts: + +- Search on the target index is unavailable between index drop and recreated index readiness. +- Query results can be partial or unstable while the recreated index is still completing its initial indexing pass. +- Reindexing uses shared database resources and can increase CPU, memory, and indexing pressure on the deployment. +- Shadow migrations can temporarily duplicate index structures and sometimes duplicate payloads as well, increasing peak memory requirements. +- Downstream applications need either a maintenance window, a degraded mode, or a clear operational pause during the rebuild. + +The tooling should not hide these facts. The plan artifact and CLI output must force the user to acknowledge downtime before applying a `drop_recreate` migration. + +## Non-Goals + +The following are explicitly out of scope for Phase 1, not for the overall initiative: + +- a generic migration framework for every schema evolution case +- automatic platform scaling +- automatic traffic cutover +- full key manifest capture by default +- document transforms or backfills in the MVP execution path +- payload relocation to a new keyspace in the MVP execution path +- concurrent migration of multiple large indexes +- fully managed Redis Cloud or Redis Software integration +- automatic transform inference or automatic re-embedding + +The simplicity rules for this effort are: + +- use existing RedisVL index introspection and lifecycle primitives +- do not design a generic migration framework for the MVP +- do not automate platform scaling +- do not automate traffic cutover +- do not require full key manifests by default +- require an explicit transform or backfill plan before Phase 2 handles payload-shape-changing migrations diff --git a/nitin_docs/index_migrator/02_architecture.md b/nitin_docs/index_migrator/02_architecture.md new file mode 100644 index 00000000..e7e0fe9a --- /dev/null +++ b/nitin_docs/index_migrator/02_architecture.md @@ -0,0 +1,230 @@ +# Index Migrator Architecture + +## System Boundaries + +The migration system has three boundaries: + +- RedisVL migration workflow: snapshot, diff, plan, apply, validate, report +- Redis deployment: Redis Cloud or Redis Software database that stores documents and indexes +- Operator and application boundary: maintenance window, scaling decisions, transform inputs, and application-level cutover behavior + +The first implementation should add migration capabilities on top of existing RedisVL primitives instead of creating a separate control plane. + +```mermaid +flowchart LR + OP["Engineer / Operator"] --> CLI["RedisVL migrate CLI"] + APP["Applications"] --> DB["Redis database endpoint"] + + CLI --> SNAP["Snapshot + diff + mode selection"] + CLI --> EXEC["Executor"] + CLI --> VAL["Validator + report"] + + SNAP --> IDX["Live index metadata and stats"] + SNAP --> INV["Optional platform inventory"] + SNAP --> XFORM["Optional transform or backfill plan"] + + EXEC --> DB + VAL --> DB + EXEC --> TARGET["Optional target keyspace + shadow index"] + TARGET --> DB + + DB --> SHARDS["One logical index may span multiple shards"] + OP --> INV + OP --> XFORM +``` + +## Responsibilities + +RedisVL should own: + +- schema snapshot and source configuration capture +- schema diff classification +- migration mode selection +- migration plan generation +- guided wizard and scripted CLI entrypoints +- supported strategy execution +- validation and report generation + +The operator should own: + +- choosing the migration window +- accepting downtime or degraded behavior +- providing platform inventory when capacity planning matters +- providing transform or backfill inputs when payload shape changes +- scaling the Redis deployment +- application cutover and rollback decisions + +The platform should be treated as an external dependency, not as part of the MVP runtime. + +## Platform Model + +The migrator should reason about the deployment at the database level. + +For planning purposes: + +- treat the database endpoint as the unit of execution +- treat a search index as one logical index even if the deployment is sharded +- do not build logic that assumes an entire index lives on a single shard +- record where data lives in terms of database, prefixes, key separators, and target keyspace plans, not physical shard pinning + +This keeps the model compatible with both Redis Cloud and Redis Software without requiring the MVP to integrate directly with their platform APIs. + +## Migration Modes + +### `drop_recreate` + +This is the Phase 1 MVP. + +- Snapshot the current schema and index stats. +- Merge only the requested schema changes. +- Drop only the index structure, preserving documents. +- Recreate the index with the merged schema. +- Wait until indexing is complete. +- Validate and report. + +This mode is explicit about downtime and does not attempt to preserve uninterrupted query availability. + +### `iterative_shadow` + +This is the planned Phase 2 mode. + +- Work on one index at a time. +- Check database-level capacity before creating any shadow index. +- Choose between: + - `shadow_reindex` when the target schema can be built from the current stored payload. + - `shadow_rewrite` when vector datatype, precision, dimension, algorithm, or payload shape changes require a new target payload or keyspace. +- Create a shadow target for the current index only. +- Transform or backfill into a target keyspace when the migration changes payload shape. +- Validate the shadow target. +- Hand cutover to the operator. +- Retire the old index, and optionally the old target payload, only after cutover confirmation. + +This mode aims to reduce disruption without introducing automatic cutover or automatic scaling. This is the mode that should ultimately support migrations such as `HNSW -> FLAT`, `FP32 -> FP16`, vector dimension changes, and embedding-model-driven payload rewrites. + +## Capacity Model + +Phase 1 keeps capacity handling simple: + +- use source index stats for warnings and reports +- show expected downtime and indexing pressure +- do not block on a complex capacity estimator + +Phase 2 introduces a conservative capacity gate: + +- planner input is database-level, not shard-local +- one index at a time is the only supported execution unit +- estimate both source and target footprint +- separate document footprint from index footprint +- calculate peak overlap as the source footprint plus the target footprint that exists during migration +- capture memory savings or growth caused by algorithm, datatype, precision, dimension, and payload-shape changes +- the planner blocks if available headroom is below the estimated peak overlap plus reserve +- scaling stays operator-owned + +Default key-location capture is intentionally bounded: + +- store index name +- store storage type +- store prefixes +- store key separator +- store a bounded key sample + +Full key manifests are not part of the default path. + +## Benchmarking Model + +Benchmarking should be built into migration reporting, not treated as a separate system. + +The shared model is: + +- capture baseline metadata before migration +- capture timing and progress during migration +- capture validation and query-impact signals after migration +- persist simple YAML benchmark artifacts that can be compared across runs + +Benchmarking should focus on the operator questions that matter most: + +- total migration duration +- downtime or overlap duration +- document throughput +- query latency change during the migration window +- resource impact before, during, and after migration +- source-versus-target memory and size delta +- estimated versus actual peak overlap footprint + +The benchmark requirements are defined in [03_benchmarking.md](./03_benchmarking.md). + +## Failure Model + +The system should fail closed. + +- Unsupported schema diffs stop at `plan`. +- Missing transform inputs for a payload-shape-changing migration stop at `plan`. +- Missing source metadata stops at `plan`. +- `apply` never deletes documents in Phase 1. +- Validation failures produce a report and manual next steps. +- The tool does not attempt automatic rollback or automatic traffic switching. + +## `drop_recreate` Sequence + +```mermaid +sequenceDiagram + participant Op as Operator + participant CLI as RedisVL migrate CLI + participant Snap as Snapshot + diff + participant DB as Redis + participant Val as Validator + + Op->>CLI: migrate plan or wizard + CLI->>Snap: capture source schema and stats + Snap->>DB: read live index metadata + Snap-->>CLI: normalized source snapshot + CLI-->>Op: migration_plan.yaml with downtime warning + + Op->>CLI: migrate apply --allow-downtime + CLI->>DB: drop index only + CLI->>DB: recreate merged index + loop until indexing complete + CLI->>DB: poll index info + DB-->>CLI: indexing status and counts + end + CLI->>Val: run validation checks + Val->>DB: verify schema, counts, samples + Val-->>CLI: migration_report.yaml + CLI-->>Op: success or manual review +``` + +## `iterative_shadow` Sequence + +```mermaid +sequenceDiagram + participant Op as Operator + participant CLI as RedisVL migrate CLI + participant Plan as Shadow planner + participant DB as Redis + participant App as Application + + Op->>CLI: migrate plan --mode iterative_shadow + CLI->>Plan: capture source stats, target schema, transform plan, and platform inventory + Plan-->>CLI: READY, SCALE_REQUIRED, or MANUAL_REVIEW_REQUIRED + CLI-->>Op: plan and operator actions + + Op->>CLI: migrate apply --mode iterative_shadow + alt payload-compatible shadow + CLI->>DB: create shadow index for one source index + else payload rewrite required + CLI->>DB: create target keyspace and shadow index + loop backfill target payload + CLI->>DB: read source documents + CLI->>DB: write transformed target documents + end + end + loop until shadow ready + CLI->>DB: poll shadow index info + DB-->>CLI: readiness status + end + CLI->>DB: validate shadow index + CLI-->>Op: cutover runbook + Op->>App: switch to target index + Op->>CLI: confirm cutover + CLI->>DB: retire old index +``` diff --git a/nitin_docs/index_migrator/03_benchmarking.md b/nitin_docs/index_migrator/03_benchmarking.md new file mode 100644 index 00000000..67a637f1 --- /dev/null +++ b/nitin_docs/index_migrator/03_benchmarking.md @@ -0,0 +1,248 @@ +# Migration Benchmarking + +## Goals + +Migration benchmarking exists to answer practical operator questions: + +- how long will the migration take +- how long will search be degraded or unavailable +- how much shared Redis capacity will the migration consume +- how much the target schema or vector shape will increase or reduce memory usage +- how much query performance changes during the migration window +- whether future migrations can be estimated from previous runs + +The first benchmarking design should stay simple. It should collect structured measurements from real runs and manual rehearsals rather than introducing a separate performance framework before the migrator exists. + +## Core Benchmark Questions + +Every migration benchmark should answer: + +1. How long did planning take? +2. How long did `apply` take end-to-end? +3. How long was the index unavailable or in degraded indexing state? +4. What document throughput did the migration achieve? +5. What query latency and error-rate changes occurred during the migration? +6. How much memory, flash, or disk footprint changed before, during, and after migration? +7. How accurate was the peak-overlap estimate? +8. Did the final migrated index match the expected schema and document count? + +## Metrics + +### Timing Metrics + +- `plan_duration_seconds` +- `apply_duration_seconds` +- `validation_duration_seconds` +- `total_migration_duration_seconds` +- `drop_duration_seconds` +- `recreate_duration_seconds` +- `initial_indexing_duration_seconds` +- `downtime_duration_seconds` for `drop_recreate` +- `shadow_overlap_duration_seconds` for `iterative_shadow` +- `transform_duration_seconds` for payload rewrite work +- `backfill_duration_seconds` for target payload creation + +### Throughput Metrics + +- `source_num_docs` +- `target_num_docs` +- `documents_indexed_per_second` +- `documents_transformed_per_second` +- `bytes_rewritten_per_second` +- `progress_samples` captured during readiness polling + +### Query Impact Metrics + +- baseline query latency: `p50`, `p95`, `p99` +- during-migration query latency: `p50`, `p95`, `p99` +- post-migration query latency: `p50`, `p95`, `p99` +- query error rate during migration +- query result overlap or sample correctness checks + +### Resource Impact Metrics + +- source document footprint from live stats or sampling +- source index size from live stats +- target document footprint from live stats or sampling +- target index size from live stats +- total source footprint +- total target footprint +- footprint delta after migration +- estimated peak overlap footprint +- actual peak overlap footprint +- indexing failure delta +- memory headroom before migration +- memory headroom after migration +- peak memory headroom during overlap +- flash or disk footprint before and after when relevant +- source vector dimensions, datatype, precision, and algorithm +- target vector dimensions, datatype, precision, and algorithm +- source vector bytes per document +- target vector bytes per document + +### Correctness Metrics + +- schema match +- document count match +- indexing failure delta equals zero +- representative document fetch checks pass + +## Benchmark Inputs + +Each benchmark run should record the workload context, not just the raw timings. + +Required context: + +- migration mode +- dataset size +- storage type +- field mix +- whether vectors are present +- source and target vector configuration when vectors are present +- whether payload shape changes +- shard count +- replica count +- query load level during migration +- environment label such as `local`, `staging`, `redis_cloud`, or `redis_software` + +Useful optional context: + +- vector dimensions and datatype +- vector precision and algorithm +- auto-tiering enabled or disabled +- representative document size +- maintenance window target + +## Benchmark Scenarios + +Start with a small scenario matrix and expand only when needed. + +Minimum Phase 1 benchmark scenarios: + +- small index, low query load +- medium or large index, low query load +- medium or large index, representative read load + +Minimum Phase 2 benchmark scenarios: + +- one shadow migration on a sharded deployment with sufficient capacity +- one shadow migration that is blocked by the capacity gate +- one shadow migration under representative read load +- one algorithm migration such as `HNSW -> FLAT` +- one vector storage migration such as `FP32 -> FP16` +- one payload-shape-changing migration that requires transform or backfill + +Scale variables should be changed one at a time where possible: + +- document count +- shard count +- query concurrency +- storage type +- vector footprint + +## Benchmark Artifacts + +The benchmark workflow should use simple YAML artifacts. + +### `benchmark_manifest.yaml` + +```yaml +version: 1 +label: staging-large-read-load +mode: drop_recreate +environment: staging +dataset: + num_docs: 1000000 + storage_type: json + vector_fields_present: true +platform: + shard_count: 4 + replica_count: 1 +workload: + query_profile: representative-read + query_check_file: queries.yaml +notes: "" +``` + +### `benchmark_report.yaml` + +```yaml +version: 1 +label: staging-large-read-load +mode: drop_recreate +timings: + total_migration_duration_seconds: 540 + downtime_duration_seconds: 420 + validation_duration_seconds: 18 +throughput: + source_num_docs: 1000000 + documents_indexed_per_second: 2380.95 +query_impact: + baseline_p95_ms: 42 + during_migration_p95_ms: 95 + post_migration_p95_ms: 44 +resource_impact: + source_doc_footprint_mb: 6144 + source_index_size_mb: 8192 + target_doc_footprint_mb: 6144 + target_index_size_mb: 6144 + total_footprint_delta_mb: -2048 + estimated_peak_overlap_footprint_mb: 20480 + actual_peak_overlap_footprint_mb: 19840 + source_vector: + dimensions: 1536 + datatype: float32 + algorithm: hnsw + target_vector: + dimensions: 1536 + datatype: float16 + algorithm: flat +correctness: + schema_match: true + doc_count_match: true +``` + +These artifacts are planning and validation aids. They should not become a separate system before the migrator itself is implemented. + +## How Benchmarking Fits the Phases + +### Phase 1: `drop_recreate` + +Phase 1 should always record: + +- start time +- end time +- index downtime duration +- readiness wait duration +- source and target document counts +- source and target index stats +- observed source-versus-target index footprint delta + +Phase 1 should optionally record: + +- representative query latency before, during, and after migration +- query correctness checks using the same file as validation queries + +### Phase 2: `iterative_shadow` + +Phase 2 should always record: + +- source-to-shadow overlap duration +- planner estimate versus actual runtime +- capacity gate decision +- source and target document and index stats +- estimated versus actual peak overlap footprint +- observed memory savings or growth after the migration +- query impact during overlap + +Phase 2 should use benchmark history as advisory input for ETA and risk reporting, not as a hard execution dependency. + +## Exit Criteria + +Benchmarking is good enough for the first implementation when: + +- every migration report includes core timing and correctness metrics +- every shadow migration benchmark includes source-versus-target footprint deltas +- manual benchmark rehearsals can be run from a simple manifest +- the docs define what to collect before performance tuning begins +- benchmark requirements do not force a separate subsystem before the migrator ships diff --git a/nitin_docs/index_migrator/10_v1_drop_recreate_spec.md b/nitin_docs/index_migrator/10_v1_drop_recreate_spec.md new file mode 100644 index 00000000..fd108f8d --- /dev/null +++ b/nitin_docs/index_migrator/10_v1_drop_recreate_spec.md @@ -0,0 +1,306 @@ +# Phase 1 Spec: `drop_recreate` + +## Goal + +Build a simple RedisVL migration workflow that: + +- preserves existing documents +- captures the old index configuration before change +- applies only the user-requested schema changes +- generates a plan before any mutation +- supports both guided and scripted use +- explicitly accepts downtime for the migrated index + +This phase is intentionally smaller than the full product goal. Vector datatype, precision, dimension, algorithm, and payload-shape-changing migrations are still in scope for the overall initiative, but they are deferred to `iterative_shadow`. + +This is the only implementation target after the docs land. + +## Supported Changes + +The MVP supports schema changes that can be satisfied by rebuilding the index over the existing document set without rewriting or relocating stored documents. + +Supported categories: + +- add a new non-vector field that indexes data already present in stored documents +- remove an existing field from the index definition +- change index options on an existing non-vector field when the field name, field type, and storage path stay the same +- change index-level options that only affect index definition and do not relocate data + +Supported field types for MVP changes: + +- text +- tag +- numeric +- geo + +The MVP always recreates the same logical index name unless the user is only generating a plan. + +## Blocked Changes + +The CLI must classify the following changes as unsupported in the MVP and stop before `apply`: + +- key prefix changes +- key separator changes +- storage type changes +- JSON path remodels +- field renames +- vector dimension changes +- vector datatype changes +- vector precision changes +- any vector field algorithm change that depends on different stored payload shape +- any change that requires document rewrite or relocation +- any change that requires a new index name as part of the execution path + +These changes should be reported as candidates for the Phase 2 `iterative_shadow` path rather than presented as unsupported forever. + +## Inputs + +The workflow accepts: + +- Redis connection parameters +- source index name +- one of: + - `schema_patch.yaml` + - `target_schema.yaml` + - interactive wizard answers + +Recommended CLI surface: + +```text +rvl migrate plan --index --schema-patch +rvl migrate plan --index --target-schema +rvl migrate wizard --index --plan-out +rvl migrate apply --plan --allow-downtime +rvl migrate validate --plan +``` + +Default optional flags: + +- `--plan-out` +- `--report-out` +- `--key-sample-limit` +- `--query-check-file` +- `--non-interactive` + +### `schema_patch.yaml` + +This is the authoritative input model for requested changes. Unspecified source configuration is preserved by default. + +Example: + +```yaml +version: 1 +changes: + add_fields: + - name: category + type: tag + path: $.category + separator: "," + remove_fields: + - legacy_score + update_fields: + - name: title + options: + sortable: true +``` + +### `target_schema.yaml` + +This is a convenience input. The planner normalizes it into a schema patch by diffing it against the live source schema. + +## Outputs + +The workflow produces: + +- `migration_plan.yaml` +- `migration_report.yaml` +- optional `benchmark_report.yaml` +- console summaries for plan, apply, and validate + +### `migration_plan.yaml` + +Required fields: + +```yaml +version: 1 +mode: drop_recreate +source: + index_name: docs + schema_snapshot: {} + stats_snapshot: {} + keyspace: + storage_type: json + prefixes: ["docs"] + key_separator: ":" + key_sample: ["docs:1", "docs:2"] +requested_changes: {} +merged_target_schema: {} +diff_classification: + supported: true + blocked_reasons: [] +warnings: + - index downtime is required +validation: + require_doc_count_match: true + require_schema_match: true +``` + +### `migration_report.yaml` + +Required fields: + +```yaml +version: 1 +mode: drop_recreate +source_index: docs +result: succeeded +started_at: 2026-03-17T00:00:00Z +finished_at: 2026-03-17T00:05:00Z +timings: + total_migration_duration_seconds: 300 + drop_duration_seconds: 3 + recreate_duration_seconds: 12 + initial_indexing_duration_seconds: 270 + validation_duration_seconds: 15 + downtime_duration_seconds: 285 +validation: + schema_match: true + doc_count_match: true + indexing_failures_delta: 0 + query_checks: [] +benchmark_summary: + documents_indexed_per_second: 3703.7 + source_index_size_mb: 2048 + target_index_size_mb: 1984 + index_size_delta_mb: -64 + baseline_query_p95_ms: 42 + during_migration_query_p95_ms: 90 + post_migration_query_p95_ms: 44 +manual_actions: [] +``` + +## CLI UX + +### `plan` + +- Capture the source snapshot from the live index. +- Normalize requested changes. +- Classify the diff as supported or blocked. +- Emit `migration_plan.yaml`. +- Print a short risk summary that includes downtime. + +### `wizard` + +- Read the live source schema first. +- Walk the user through supported change categories only. +- Reject unsupported requests during the wizard instead of silently converting them. +- Explain when a blocked request belongs to a future `iterative_shadow` migration. +- Emit the same `migration_plan.yaml` shape as `plan`. + +### `apply` + +- Accept only `migration_plan.yaml` as input. +- Require `--allow-downtime`. +- Refuse to run if the plan contains blocked reasons. +- Refuse to run if the current live schema no longer matches the saved source snapshot. + +### `validate` + +- Re-run validation checks from the plan against the current live index. +- Emit `migration_report.yaml`. +- Emit `benchmark_report.yaml` when benchmark fields were collected. + +## Execution Flow + +1. Snapshot source state. + - Load the live index schema using existing RedisVL introspection. + - Capture live stats from index info. + - Record storage type, prefixes, key separator, and a bounded key sample. +2. Normalize requested changes. + - If the input is `target_schema.yaml`, diff it against the source schema and convert it to a patch. + - If the input is wizard answers, convert them to the same patch model. +3. Merge and classify. + - Apply only requested changes to the source schema. + - Classify each diff as supported or blocked. + - Stop if any blocked diff exists. +4. Generate the plan. + - Save source snapshot, requested changes, merged target schema, validation policy, and warnings. +5. Apply the migration. + - Confirm current live schema still matches the source snapshot. + - Drop only the index structure. + - Recreate the same index name using the merged target schema. +6. Wait for indexing completion. + - Poll live index info until `indexing` is false and `percent_indexed` is complete when those fields are available. + - If those fields are unavailable, poll `num_docs` and readiness twice in a row before continuing. + - Stop with timeout rather than waiting forever. +7. Validate. + - Compare live schema to merged target schema. + - Compare live doc count to source doc count. + - Check indexing failure delta. + - Run optional query checks. +8. Emit the report. +9. Emit benchmark artifacts when benchmark data was collected. + +## Validation + +Required validation checks: + +- exact schema match against `merged_target_schema` +- live doc count equals source `num_docs` +- `hash_indexing_failures` does not increase +- key sample records still exist + +Optional validation checks: + +- query checks loaded from `--query-check-file` +- bounded sample fetch checks for representative document ids + +Benchmark fields that should be collected during Phase 1: + +- migration start and end timestamps +- index downtime duration +- readiness polling duration +- source and target document counts +- documents indexed per second +- source and target index footprint +- observed index footprint delta after recreate +- optional representative query latency before, during, and after migration + +Validation is a hard failure for `apply`. + +## Failure Handling + +The MVP fails closed. + +- Unsupported diff: stop at `plan`. +- Source snapshot mismatch at apply time: stop and ask the operator to regenerate the plan. +- Drop succeeds but recreate fails: documents remain; emit a failure report and a manual recovery command using the saved merged schema. +- Validation fails after recreate: leave the recreated index in place, emit a failure report, and stop. +- Interrupted run: no checkpointing in MVP. The operator reruns `plan` or reuses the existing plan after confirming the live source state is still compatible. + +The MVP does not implement automatic rollback. + +## Operational Guidance + +This mode is downtime-accepting by design. + +Engineers should assume: + +- the index is unavailable between drop and recreated index readiness +- search quality can be degraded while initial indexing completes +- large indexes can place measurable pressure on shared Redis resources +- off-peak execution is preferred +- application-level maintenance handling is outside RedisVL +- blocked vector and payload-shape changes should be rerouted to Phase 2 planning instead of being forced into this path + +Default key capture is intentionally small: + +- keyspace definition is always recorded +- a bounded key sample is recorded +- a full key manifest is not part of the default MVP path + +Benchmarking for Phase 1 should stay simple: + +- capture timing and correctness metrics in structured reports +- support manual benchmark rehearsals using [03_benchmarking.md](./03_benchmarking.md) +- avoid introducing a dedicated benchmarking subsystem before the migrator exists diff --git a/nitin_docs/index_migrator/11_v1_drop_recreate_tasks.md b/nitin_docs/index_migrator/11_v1_drop_recreate_tasks.md new file mode 100644 index 00000000..d30ae839 --- /dev/null +++ b/nitin_docs/index_migrator/11_v1_drop_recreate_tasks.md @@ -0,0 +1,160 @@ +# Phase 1 Tasks: `drop_recreate` + +## Task Template + +Every Phase 1 task must document: + +- `ID` +- `Status` +- `Goal` +- `Inputs` +- `Outputs` +- `Touchpoints` +- `Dependencies` +- `Acceptance Criteria` +- `Non-Goals` +- `Handoff Notes` + +## V1-T01 + +- `ID`: `V1-T01` +- `Status`: `Ready` +- `Goal`: Build the source snapshot and artifact models used by `plan`, `apply`, and `validate`. +- `Inputs`: source index name, Redis connection settings, optional key sample limit +- `Outputs`: in-memory and YAML-serializable source snapshot, migration plan model, migration report model +- `Touchpoints`: `redisvl/index/index.py`, new `redisvl/migration/models.py`, new `redisvl/migration/snapshot.py` +- `Dependencies`: none +- `Acceptance Criteria`: + - source schema can be loaded from a live index + - source stats needed by the spec are captured + - storage type, prefixes, key separator, and bounded key sample are recorded + - models serialize to YAML without losing required fields +- `Non-Goals`: + - full key manifest generation + - capacity estimation + - schema diff logic +- `Handoff Notes`: preserve the raw source schema as faithfully as possible so later diffing does not invent defaults. + +## V1-T02 + +- `ID`: `V1-T02` +- `Status`: `Ready` +- `Goal`: Implement schema patch normalization, source-plus-patch merge, and supported-versus-blocked diff classification. +- `Inputs`: source schema snapshot, `schema_patch.yaml` or normalized target schema diff +- `Outputs`: merged target schema and diff classification result +- `Touchpoints`: new `redisvl/migration/patch.py`, new `redisvl/migration/diff.py`, `redisvl/schema` +- `Dependencies`: `V1-T01` +- `Acceptance Criteria`: + - unspecified source config is preserved by default + - blocked diff categories from the spec are rejected with actionable reasons + - supported changes produce a deterministic merged target schema + - `target_schema.yaml` input normalizes to the same patch model +- `Non-Goals`: + - document rewrite planning + - vector migration logic + - shadow migration planning +- `Handoff Notes`: prefer an explicit allowlist of supported diff categories over a generic schema merge engine. + +## V1-T03 + +- `ID`: `V1-T03` +- `Status`: `Ready` +- `Goal`: Add the `plan` command and plan artifact generation. +- `Inputs`: source index, connection settings, patch or target schema input +- `Outputs`: `migration_plan.yaml`, console summary +- `Touchpoints`: new `redisvl/cli/migrate.py`, `redisvl/cli/main.py`, `redisvl/migration/planner.py` +- `Dependencies`: `V1-T01`, `V1-T02` +- `Acceptance Criteria`: + - `plan` emits the required YAML shape + - blocked plans do not proceed to mutation + - the console summary includes downtime warnings + - the current plan format is stable enough for `apply` and `validate` +- `Non-Goals`: + - interactive wizard flow + - mutation against Redis + - advanced report rendering +- `Handoff Notes`: make the plan file human-readable so operators can review it before running `apply`. + +## V1-T04 + +- `ID`: `V1-T04` +- `Status`: `Ready` +- `Goal`: Add the guided `wizard` flow that emits the same plan artifact as `plan`. +- `Inputs`: source index, connection settings, interactive answers +- `Outputs`: normalized schema patch and `migration_plan.yaml` +- `Touchpoints`: new `redisvl/migration/wizard.py`, `redisvl/cli/migrate.py` +- `Dependencies`: `V1-T01`, `V1-T02`, `V1-T03` +- `Acceptance Criteria`: + - wizard starts from the live source schema + - wizard only offers supported MVP change categories + - wizard emits the same plan structure as `plan` + - unsupported requests are blocked during the flow +- `Non-Goals`: + - platform inventory collection + - free-form schema editing for blocked categories + - shadow migration support +- `Handoff Notes`: keep prompts simple and linear; this is a guided assistant, not a general schema builder. + +## V1-T05 + +- `ID`: `V1-T05` +- `Status`: `Ready` +- `Goal`: Implement `apply` for the `drop_recreate` strategy. +- `Inputs`: reviewed `migration_plan.yaml`, `--allow-downtime` +- `Outputs`: recreated index and execution status +- `Touchpoints`: new `redisvl/migration/executor.py`, `redisvl/cli/migrate.py`, `redisvl/index/index.py` +- `Dependencies`: `V1-T03` +- `Acceptance Criteria`: + - `apply` refuses to run without `--allow-downtime` + - source snapshot mismatch blocks execution + - index drop preserves documents + - recreated index uses the merged target schema + - readiness polling stops on success or timeout +- `Non-Goals`: + - automatic rollback + - checkpointing + - cutover orchestration +- `Handoff Notes`: use the simplest safe sequence possible; do not add background job machinery in the MVP. + +## V1-T06 + +- `ID`: `V1-T06` +- `Status`: `Ready` +- `Goal`: Implement `validate` and `migration_report.yaml`. +- `Inputs`: `migration_plan.yaml`, live index state, optional query checks +- `Outputs`: validation result, report artifact, console summary +- `Touchpoints`: new `redisvl/migration/validation.py`, new `redisvl/migration/reporting.py`, `redisvl/cli/migrate.py` +- `Dependencies`: `V1-T01`, `V1-T03`, `V1-T05` +- `Acceptance Criteria`: + - schema match is verified + - doc count match is verified + - indexing failure delta is captured + - core timing metrics are captured in the report + - optional query checks run deterministically + - report artifact is emitted for both success and failure +- `Non-Goals`: + - benchmark replay + - observability integrations + - automatic remediation +- `Handoff Notes`: keep the report format concise and stable so it can become the operator handoff artifact later. + +## V1-T07 + +- `ID`: `V1-T07` +- `Status`: `Ready` +- `Goal`: Add Phase 1 tests and user-facing documentation for the new CLI flow. +- `Inputs`: completed planner, wizard, executor, and validator behavior +- `Outputs`: passing tests and concise usage docs +- `Touchpoints`: `tests/`, `redisvl/cli`, `nitin_docs/index_migrator` +- `Dependencies`: `V1-T03`, `V1-T04`, `V1-T05`, `V1-T06` +- `Acceptance Criteria`: + - CI-friendly happy-path and failure-path tests exist + - manual benchmark rehearsal guidance exists + - manual smoke test instructions are captured in the test doc + - help text matches the Phase 1 spec + - the docs directory still points to the active truth +- `Non-Goals`: + - Phase 2 implementation + - platform API integrations + - performance tuning beyond smoke coverage +- `Handoff Notes`: keep test coverage focused on correctness and operator safety, not on simulating every future migration shape. diff --git a/nitin_docs/index_migrator/12_v1_drop_recreate_tests.md b/nitin_docs/index_migrator/12_v1_drop_recreate_tests.md new file mode 100644 index 00000000..e6852a6f --- /dev/null +++ b/nitin_docs/index_migrator/12_v1_drop_recreate_tests.md @@ -0,0 +1,122 @@ +# Phase 1 Tests: `drop_recreate` + +## Test Matrix + +| ID | Scenario | Type | Expected Result | +| --- | --- | --- | --- | +| V1-CI-01 | source snapshot loads live schema and stats | CI | snapshot matches live index metadata | +| V1-CI-02 | patch merge preserves unspecified config | CI | merged target schema is deterministic | +| V1-CI-03 | blocked diff categories stop at `plan` | CI | no mutation and actionable error | +| V1-CI-04 | `plan` emits valid YAML artifact | CI | plan file contains required fields | +| V1-CI-05 | `apply` requires `--allow-downtime` | CI | execution blocked without flag | +| V1-CI-06 | drop and recreate preserves documents | CI | doc count matches before and after | +| V1-CI-07 | readiness polling completes or times out | CI | executor exits deterministically | +| V1-CI-08 | `validate` emits a report on success | CI | report contains required fields | +| V1-CI-09 | `validate` emits a report on failure | CI | failure report includes manual actions | +| V1-CI-10 | timing metrics are captured in reports | CI | report contains stable timing fields | +| V1-MAN-01 | guided wizard produces the same plan model | Manual | plan matches scripted path | +| V1-MAN-02 | realistic rebuild on larger dataset | Manual | migration completes with expected downtime | +| V1-MAN-03 | benchmark rehearsal on representative workload | Manual | duration, throughput, and query impact are recorded | + +## Happy Path + +The minimum automated happy path should cover: + +- create a source index with existing documents +- generate `migration_plan.yaml` from `schema_patch.yaml` +- run `apply --allow-downtime` +- wait for recreated index readiness +- run `validate` +- confirm schema match, doc count match, and zero indexing failure delta + +Representative happy-path schema changes: + +- add a tag field backed by existing JSON data +- remove a legacy numeric field from the index +- make an existing text field sortable + +## Failure Paths + +CI should cover at least: + +- blocked diff because of vector change +- blocked diff because of prefix change +- source snapshot mismatch between `plan` and `apply` +- recreate failure after drop +- validation failure because doc counts diverge +- readiness timeout +- missing required plan fields + +Every failure path must prove: + +- documents are not intentionally deleted by the migrator +- an actionable error is surfaced +- blocked vector and payload-shape diffs point the user to the Phase 2 migration path +- a `migration_report.yaml` can still be produced when the failure happens after `apply` starts + +## Manual Smoke Test + +Run a manual smoke test on a non-production Redis deployment: + +1. Create an index with representative JSON documents. +2. Prepare a `schema_patch.yaml` that adds one non-vector field and removes one old field. +3. Run `rvl migrate plan`. +4. Confirm the plan includes the downtime warning and no blocked diffs. +5. Run `rvl migrate apply --allow-downtime`. +6. Wait until readiness completes. +7. Run `rvl migrate validate`. +8. Confirm search behavior has resumed and the new schema is active. + +Manual smoke test success means: + +- the operator can understand the plan without reading code +- the index rebuild completes without deleting documents +- the report is sufficient to hand back to another operator + +## Scale Sanity Check + +Phase 1 does not need a cluster-wide stress harness, but it does need a basic scale sanity check. + +Manual checks: + +- run the flow on an index large enough to make polling and downtime visible +- confirm default key capture stays bounded +- confirm the tool does not attempt a full key manifest by default +- confirm console output still stays readable for a larger index + +This is not a benchmark. The goal is to catch accidental implementation choices that make the MVP operationally unsafe on larger datasets. + +## Benchmark Rehearsal + +Phase 1 benchmarking should be lightweight and operationally useful. + +Use a simple rehearsal driven by [03_benchmarking.md](./03_benchmarking.md): + +1. Record a benchmark label and workload context. +2. Measure baseline query latency on a representative query set. +3. Run the migration on a realistic non-production index. +4. Record total migration duration, downtime duration, and readiness duration. +5. Record source and target document counts and index stats. +6. Record the observed source-versus-target index footprint delta. +7. Re-run the representative query set after migration. +8. Save a `benchmark_report.yaml`. + +The first benchmark questions to answer are: + +- how long does the rebuild take end-to-end +- how long is the index unavailable +- how many documents per second can the rebuild sustain +- how much query latency changes during and after the rebuild +- how much the recreated index footprint changes even for schema-only rebuilds +- whether the observed runtime is predictable enough for a maintenance window + +## Release Gate + +Phase 1 should not be considered ready until all of the following are true: + +- all CI scenarios in the test matrix pass +- at least one manual smoke test passes +- at least one benchmark rehearsal has been documented on a representative dataset +- help text matches the spec +- the docs in `nitin_docs/index_migrator/` still match the shipped CLI behavior +- the release notes or implementation summary clearly state that `drop_recreate` is downtime-accepting diff --git a/nitin_docs/index_migrator/20_v2_iterative_shadow_spec.md b/nitin_docs/index_migrator/20_v2_iterative_shadow_spec.md new file mode 100644 index 00000000..c5b893a0 --- /dev/null +++ b/nitin_docs/index_migrator/20_v2_iterative_shadow_spec.md @@ -0,0 +1,213 @@ +# Phase 2 Spec: `iterative_shadow` + +## Goal + +Add a conservative, capacity-aware shadow migration mode that works one index at a time and reduces disruption without attempting to automate cutover or platform scaling. + +This phase exists to support the migration cases that Phase 1 intentionally does not handle safely: + +- vector datatype changes such as `FP32 -> FP16` +- vector precision changes +- vector dimension changes +- vector algorithm changes such as `HNSW -> FLAT` +- payload-shape-changing model or algorithm migrations that require new stored fields or a new target keyspace + +The first Phase 2 implementation should stay simple in operation even though it handles harder migration shapes: + +- one index at a time +- capacity gate before each index +- operator-owned cutover +- no automatic scale-up or scale-down +- no multi-index concurrent shadowing +- explicit transform or backfill plan when the target payload shape changes + +## Why It Is Not MVP + +This mode is not the MVP because it introduces operational questions that Phase 1 does not need to solve: + +- database-level capacity inventory +- target-footprint estimation for old and new document and index shapes +- overlap estimation for old and new payloads +- transform or backfill planning for target payload creation +- operator handoff between validation and cutover +- cleanup sequencing after cutover +- larger-scale manual testing on clustered deployments + +Phase 1 should prove the core planning and reporting model first. + +## Planner Inputs + +The Phase 2 planner takes: + +- source index name +- Redis connection settings +- supported schema patch or target schema input +- `platform_inventory.yaml` +- optional `transform_plan.yaml` when the migration requires new target payloads + +### `platform_inventory.yaml` + +```yaml +version: 1 +platform: redis_cloud +database: + name: customer-a-prod + total_memory_mb: 131072 + available_memory_mb: 32768 + shard_count: 8 + replica_count: 1 + auto_tiering: false + notes: "" +policy: + reserve_percent: 15 +``` + +Required inventory fields: + +- platform +- total memory +- available memory +- shard count +- replica count +- reserve policy + +Optional inventory fields: + +- flash or disk notes +- environment labels +- operator comments +- benchmark history notes + +### `transform_plan.yaml` + +This file is required when the target schema cannot be built from the current stored payload. + +Example: + +```yaml +version: 1 +target_keyspace: + storage_type: json + prefixes: ["docs_v2"] + key_separator: ":" +transform: + mode: rewrite + vector_fields: + - name: embedding + source_path: $.embedding + target_path: $.embedding_v2 + source_dimensions: 1536 + target_dimensions: 1536 + source_datatype: float32 + target_datatype: float16 + source_algorithm: hnsw + target_algorithm: flat + payload_changes: + - source_path: $.body + target_path: $.body_v2 + strategy: copy +``` + +The first implementation should keep this model explicit and declarative. The migrator should not guess how to transform payloads. + +## Capacity Gate + +The first Phase 2 capacity gate should be intentionally conservative. + +Planner rules: + +1. Compute source document footprint from live stats or bounded sampling. +2. Compute source index footprint from live index stats. +3. Estimate target document footprint. + - For payload-compatible shadowing, this can be zero or near-zero additional document storage. + - For payload rewrite shadowing, this includes the duplicated target payload. +4. Estimate target index footprint. + - Use live source footprint as a baseline when the target is structurally similar. + - Adjust for vector dimension, datatype, precision, and algorithm changes when those are present. +5. Compute reserve headroom as `max(operator reserve, 15 percent of configured memory)` when no stricter operator value is provided. +6. Compute `estimated_peak_overlap_footprint` as: + - `source_docs + source_index + target_docs + target_index` +7. Return `READY` only if: + - the migration diff is supported for Phase 2 + - any required transform plan is present and valid + - available memory is greater than or equal to `estimated_peak_overlap_footprint + reserve` +8. Return `SCALE_REQUIRED` when the migration is supported but headroom is insufficient. +9. Return `MANUAL_REVIEW_REQUIRED` when the diff is ambiguous or live data is insufficient for a safe estimate. + +This keeps the first shadow planner understandable and safe. More sophisticated estimators can come later if Phase 1 and early Phase 2 learnings justify them. + +The planner should also report: + +- estimated migration window +- estimated peak overlap footprint +- expected source-versus-target footprint delta after cutover +- whether the migration is `shadow_reindex` or `shadow_rewrite` + +## Execution Flow + +1. Capture the source snapshot and normalize requested changes. +2. Classify the migration as either: + - `shadow_reindex` when the target schema can be built from the current payload + - `shadow_rewrite` when a transform or backfill is needed +3. Load `platform_inventory.yaml`. +4. Load `transform_plan.yaml` when `shadow_rewrite` is required. +5. Compute the capacity gate result. +6. Stop if the result is not `READY`. +7. Create the shadow target for the current index only. +8. If `shadow_rewrite` is selected: + - create the target keyspace + - transform or backfill source documents into the target keyspace +9. Wait until the shadow index is ready. +10. Validate the shadow target. +11. Emit an operator cutover runbook. +12. Wait for operator confirmation that cutover is complete. +13. Retire the old index. +14. Retire old source payloads only when the plan explicitly says they are no longer needed. +15. Move to the next index only after the current index is finished. + +The scheduler for Phase 2 is intentionally serial. + +## Operator Actions + +The operator is responsible for: + +- supplying platform inventory +- supplying the transform or backfill plan when payload shape changes +- choosing the migration window +- scaling the database if the plan returns `SCALE_REQUIRED` +- switching application traffic to the shadow target +- confirming cutover before old index retirement +- monitoring the deployment during overlap + +RedisVL should not attempt to perform these actions automatically in the first Phase 2 implementation. + +Phase 2 should still emit structured benchmark outputs so operators can compare: + +- estimated overlap duration versus actual overlap duration +- estimated capacity usage versus observed document and index stats +- memory savings or growth after algorithm, datatype, precision, dimension, or payload-shape changes +- query latency impact during shadow validation and overlap + +## Blocked Scenarios + +The initial Phase 2 plan still blocks: + +- automatic scaling +- automatic traffic switching +- concurrent shadowing of multiple large indexes +- in-place destructive rewrites without a shadow target +- payload-shape-changing migrations without an explicit transform or backfill plan +- transform plans that do not define a deterministic target keyspace +- Active-Active specific workflows +- platform API integrations as a hard requirement + +## Open Questions Deferred + +These questions should stay deferred until after Phase 1 implementation: + +- whether to add direct Redis Cloud or Redis Software API integrations +- whether to support checkpoint and resume across shadow runs +- whether alias-based cutover should be added later +- how transform hooks should be expressed beyond the initial declarative plan format +- whether re-embedding should be integrated directly or stay an operator-supplied preprocessing step +- how much historical benchmark data should influence ETA predictions diff --git a/nitin_docs/index_migrator/21_v2_iterative_shadow_tasks.md b/nitin_docs/index_migrator/21_v2_iterative_shadow_tasks.md new file mode 100644 index 00000000..4b00955e --- /dev/null +++ b/nitin_docs/index_migrator/21_v2_iterative_shadow_tasks.md @@ -0,0 +1,159 @@ +# Phase 2 Tasks: `iterative_shadow` + +## Task Template + +Every Phase 2 task must document: + +- `ID` +- `Status` +- `Goal` +- `Inputs` +- `Outputs` +- `Touchpoints` +- `Dependencies` +- `Acceptance Criteria` +- `Non-Goals` +- `Handoff Notes` + +Phase 2 tasks are planned work only. They should not start until Phase 1 implementation is complete and learnings are folded back into this workspace. + +## V2-T01 + +- `ID`: `V2-T01` +- `Status`: `Planned` +- `Goal`: Add the platform inventory model and parser used by the capacity-aware planner. +- `Inputs`: `platform_inventory.yaml` +- `Outputs`: validated inventory model +- `Touchpoints`: new `redisvl/migration/inventory.py`, `redisvl/migration/models.py`, `redisvl/cli/migrate.py` +- `Dependencies`: Phase 1 implementation complete +- `Acceptance Criteria`: + - required inventory fields are validated + - unsupported platform inventory shapes are rejected clearly + - inventory values are available to the planner without CLI-specific parsing logic +- `Non-Goals`: + - platform API calls + - capacity math + - shadow execution +- `Handoff Notes`: keep the inventory model platform-neutral enough to support both Redis Cloud and Redis Software. + +## V2-T02 + +- `ID`: `V2-T02` +- `Status`: `Planned` +- `Goal`: Add the transform or backfill plan model and classify whether a migration is `shadow_reindex` or `shadow_rewrite`. +- `Inputs`: normalized diff classification, optional `transform_plan.yaml` +- `Outputs`: validated transform model and execution-mode classification +- `Touchpoints`: new `redisvl/migration/transforms.py`, `redisvl/migration/models.py`, `redisvl/migration/planner.py` +- `Dependencies`: `V2-T01` +- `Acceptance Criteria`: + - payload-compatible migrations are classified as `shadow_reindex` + - vector or payload-shape-changing migrations require `shadow_rewrite` + - missing transform plans are rejected clearly when they are required + - transform plans remain declarative and deterministic +- `Non-Goals`: + - direct embedding generation + - platform API calls + - shadow execution +- `Handoff Notes`: keep the first transform model simple and explicit rather than inventing a generic transformation framework. + +## V2-T03 + +- `ID`: `V2-T03` +- `Status`: `Planned` +- `Goal`: Implement the conservative capacity estimator and gate result classification. +- `Inputs`: source index stats, source document footprint, inventory model, normalized diff classification, optional transform model +- `Outputs`: `READY`, `SCALE_REQUIRED`, or `MANUAL_REVIEW_REQUIRED` +- `Touchpoints`: new `redisvl/migration/capacity.py`, `redisvl/migration/planner.py` +- `Dependencies`: `V2-T01`, `V2-T02` +- `Acceptance Criteria`: + - source document and index footprint are computed consistently + - target footprint estimates account for vector datatype, precision, dimension, algorithm, and payload-shape changes when those are present + - reserve policy is applied consistently + - supported diffs can produce `READY` or `SCALE_REQUIRED` + - ambiguous inputs produce `MANUAL_REVIEW_REQUIRED` +- `Non-Goals`: + - fine-grained shard placement modeling + - automated scale actions + - performance benchmarking as a separate subsystem +- `Handoff Notes`: keep the first estimator intentionally conservative and easy to inspect. + +## V2-T04 + +- `ID`: `V2-T04` +- `Status`: `Planned` +- `Goal`: Extend the planner to support `iterative_shadow` for one index at a time. +- `Inputs`: source snapshot, normalized diff, inventory, transform model, capacity result +- `Outputs`: shadow migration plan and operator action list +- `Touchpoints`: `redisvl/migration/planner.py`, `redisvl/cli/migrate.py` +- `Dependencies`: `V2-T03` +- `Acceptance Criteria`: + - supported vector and payload-shape changes can produce a valid shadow plan + - non-`READY` capacity results block apply + - plan artifact clearly identifies source, shadow target, target keyspace when present, and operator actions + - plan artifact identifies whether the run is `shadow_reindex` or `shadow_rewrite` + - plan format stays readable and deterministic +- `Non-Goals`: + - multi-index concurrency + - automatic cleanup + - fleet scheduling +- `Handoff Notes`: preserve the same plan-first experience as Phase 1. + +## V2-T05 + +- `ID`: `V2-T05` +- `Status`: `Planned` +- `Goal`: Implement shadow target creation, optional transform or backfill execution, readiness waiting, and validation hooks. +- `Inputs`: approved shadow migration plan +- `Outputs`: ready shadow index and validation state +- `Touchpoints`: new `redisvl/migration/shadow.py`, `redisvl/migration/executor.py`, `redisvl/migration/validation.py` +- `Dependencies`: `V2-T04` +- `Acceptance Criteria`: + - only one index is processed at a time + - shadow target creation follows the plan artifact + - `shadow_rewrite` runs can build the target payload into the planned keyspace + - readiness polling behaves deterministically + - validation runs before cutover handoff +- `Non-Goals`: + - automatic cutover + - cross-index scheduling + - platform autoscaling +- `Handoff Notes`: do not generalize this into a fleet scheduler in the first Phase 2 implementation. + +## V2-T06 + +- `ID`: `V2-T06` +- `Status`: `Planned` +- `Goal`: Add validation reporting, benchmark reporting, operator handoff, cutover confirmation, and old-index retirement. +- `Inputs`: validated shadow plan and operator confirmation +- `Outputs`: post-cutover cleanup result and report +- `Touchpoints`: `redisvl/cli/migrate.py`, `redisvl/migration/reporting.py`, `redisvl/migration/executor.py` +- `Dependencies`: `V2-T05` +- `Acceptance Criteria`: + - cutover remains operator-owned + - cleanup does not run before operator confirmation + - report captures cutover handoff, cleanup outcome, and source-versus-target footprint deltas +- `Non-Goals`: + - alias management + - application config mutation + - rollback orchestration +- `Handoff Notes`: the CLI should guide the operator clearly, but it must not attempt to switch traffic itself. + +## V2-T07 + +- `ID`: `V2-T07` +- `Status`: `Planned` +- `Goal`: Add future-facing tests and docs for clustered shadow migration planning. +- `Inputs`: completed Phase 2 planner and executor behavior +- `Outputs`: test coverage, manual scale rehearsal instructions, and updated planning docs +- `Touchpoints`: `tests/`, `nitin_docs/index_migrator`, `redisvl/cli` +- `Dependencies`: `V2-T04`, `V2-T05`, `V2-T06` +- `Acceptance Criteria`: + - planner outcomes are covered in automated tests + - benchmark, ETA, and memory-delta guidance are documented for manual cluster rehearsals + - manual cluster rehearsal steps are documented + - docs still reflect the shipped Phase 2 behavior accurately +- `Non-Goals`: + - fleet-wide migration support + - performance tuning beyond safety validation + - platform-specific automation +- `Handoff Notes`: keep Phase 2 documentation grounded in the one-index-at-a-time rule. diff --git a/nitin_docs/index_migrator/22_v2_iterative_shadow_tests.md b/nitin_docs/index_migrator/22_v2_iterative_shadow_tests.md new file mode 100644 index 00000000..e7d2d5c2 --- /dev/null +++ b/nitin_docs/index_migrator/22_v2_iterative_shadow_tests.md @@ -0,0 +1,102 @@ +# Phase 2 Tests: `iterative_shadow` + +## Clustered Test Setup + +Phase 2 needs both automated planner coverage and manual clustered rehearsals. + +Minimum clustered rehearsal setup: + +- Redis Cloud or Redis Software deployment +- sharded database +- one source index large enough to make overlap meaningful +- representative application query set +- operator-supplied `platform_inventory.yaml` +- `transform_plan.yaml` for any vector or payload-shape-changing migration + +The first manual scale rehearsal should stay focused on a single index, not a fleet-wide migration. + +## Planner Acceptance + +Automated planner tests should cover: + +- supported shadow diff with sufficient headroom returns `READY` +- supported shadow diff with insufficient headroom returns `SCALE_REQUIRED` +- ambiguous or incomplete input returns `MANUAL_REVIEW_REQUIRED` +- vector datatype, precision, dimension, or algorithm changes require `shadow_rewrite` +- payload-shape-changing diffs stop before planning unless a valid transform plan is present + +Planner acceptance is successful when the result is deterministic and the operator action list is clear. + +## Unsafe Capacity Cases + +Manual and automated coverage should include: + +- insufficient available memory +- missing or invalid inventory fields +- conflicting operator reserve policy +- large source footprint with conservative reserve +- target footprint larger than source footprint because of dimension or payload expansion +- peak overlap estimate exceeds available headroom even when post-cutover memory would shrink + +Unsafe capacity handling is correct when: + +- the planner blocks the run +- no shadow index is created +- the report tells the operator what must change before retry + +## Shadow Validation + +Validation coverage should prove: + +- shadow target reaches readiness before handoff +- schema matches the planned target +- transformed payload fields match the declared target shape when `shadow_rewrite` is used +- query checks pass before cutover +- old index is not retired before operator confirmation + +This is the safety boundary for Phase 2. + +## Benchmark Rehearsal + +Phase 2 benchmarks should answer: + +- how accurate the planner ETA was +- how long the old and shadow indexes overlapped +- how much query latency changed during overlap +- whether the capacity reserve was conservative enough +- how much memory or size changed after datatype, precision, dimension, algorithm, or payload-shape changes +- whether estimated peak overlap footprint matched observed overlap closely enough + +Minimum manual benchmark coverage: + +- one run where the planner returns `READY` and the migration completes +- one run where the planner returns `SCALE_REQUIRED` +- one run with representative read traffic during overlap +- one vector-shape or algorithm change such as `HNSW -> FLAT` or `FP32 -> FP16` +- one payload-shape-changing migration that requires transform or backfill + +Every benchmark rehearsal should produce a structured benchmark report that can be compared against previous runs. + +## Resume/Retry + +The first Phase 2 implementation does not need fleet-grade checkpointing, but it does need basic retry behavior. + +Required checks: + +- planner can be rerun with the same inventory and produce the same decision +- failed shadow creation does not trigger cleanup of the old index +- operator can rerun the planned index only after fixing the blocking condition + +If stronger checkpointing is needed later, it should become its own scoped follow-up rather than being absorbed into the first shadow implementation. + +## Exit Criteria + +Phase 2 should not move from planned to ready until: + +- Phase 1 has been implemented and reviewed +- Phase 1 learnings have been written back into this workspace +- planner outcomes are covered by automated tests +- at least one manual clustered rehearsal has been designed in detail +- at least one benchmark rehearsal has been defined for a representative shadow migration +- at least one benchmark rehearsal has been defined for a vector or payload-shape-changing shadow migration +- the one-index-at-a-time execution rule is still preserved in the design diff --git a/nitin_docs/index_migrator/90_prd.md b/nitin_docs/index_migrator/90_prd.md new file mode 100644 index 00000000..6bfac766 --- /dev/null +++ b/nitin_docs/index_migrator/90_prd.md @@ -0,0 +1,581 @@ +# Product Requirements Document: RedisVL Index Migrator + +## Summary + +RedisVL needs a migration workflow for search index changes that is safer and more operationally predictable than ad hoc scripts, but still simple enough to build, review, and operate without introducing an orchestration-heavy subsystem. + +This PRD defines a phased migration product: + +- Phase 1 MVP: `drop_recreate` +- Phase 2: `iterative_shadow` + +The overall product goal is broader than the MVP. The migrator should eventually handle not only simple schema changes, but also vector datatype, precision, dimension, algorithm, and payload-shape-changing migrations such as: + +- `HNSW -> FLAT` +- `FP32 -> FP16` +- vector dimension changes +- embedding or payload-shape changes that require new stored fields or a new target keyspace + +Phase 1 stays intentionally narrow so the team can ship a plan-first, document-preserving migration tool quickly. Phase 2 is where those harder migrations are handled safely through one-index-at-a-time shadow migration and optional transform or backfill. + +This document is the review-oriented summary of the detailed planning docs in this workspace. + +## Problem + +RedisVL today gives users index lifecycle primitives, not a migration product. + +Users can: + +- create indexes +- delete indexes +- inspect index information +- load documents + +Users cannot yet: + +- preview a schema migration in a structured way +- preserve current index configuration before change in a standard workflow +- apply only a requested subset of schema changes +- generate a repeatable migration plan artifact +- validate a migration with a consistent report +- estimate migration duration, query impact, or memory impact from benchmarkable outputs +- safely handle vector and payload-shape changes that require a new target shape + +This gap is manageable for small experiments, but becomes painful for production workloads where: + +- indexes can contain millions of documents +- query latency matters during rebuild windows +- teams need predictable maintenance timelines +- migrations may change vector algorithms, datatypes, or payload shape +- Redis deployments may be clustered on Redis Cloud or Redis Software +- operators need a clean handoff between planning, execution, and validation + +## Users and Review Audience + +Primary users: + +- application engineers managing RedisVL-backed indexes +- platform engineers responsible for Redis operations +- support or solution engineers helping customers plan schema migrations + +Review audience: + +- RedisVL maintainers +- product and engineering stakeholders +- operators who will validate whether the workflow is practical in real environments + +## Goals + +- Provide a first-class migration workflow for RedisVL-managed indexes. +- Preserve existing documents during the Phase 1 path. +- Capture current schema and index configuration before any mutation. +- Apply only requested schema changes. +- Require a reviewed migration plan before execution. +- Support both scripted and guided user experiences. +- Make downtime and disruption explicit. +- Add structured reports and benchmarking outputs so migration windows become more predictable over time. +- Add benchmarking for memory and size deltas caused by schema, vector, and payload-shape changes. +- Keep the implementation simple enough that another engineer can understand and modify it quickly. + +## In Scope + +### Product-Wide Scope + +- schema-change migrations for RedisVL-managed indexes +- vector datatype, precision, dimension, and algorithm migrations +- payload-shape-changing migrations when the operator provides an explicit transform or backfill plan +- YAML artifacts for plans and reports +- operator-readable console output +- one-index-at-a-time execution +- benchmarking outputs for timing, query impact, and memory or size deltas + +### Phase 1 MVP + +- one index at a time +- document-preserving `drop_recreate` migrations +- source schema and stats snapshot +- patch-based schema change requests +- target-schema diff normalization into the same patch model +- guided wizard and scripted CLI flows +- plan generation before any mutation +- explicit downtime acknowledgment for `apply` +- readiness waiting after recreate +- validation and reporting +- benchmark-friendly timing, correctness, and index-footprint outputs + +### Phase 2 + +- one-index-at-a-time `iterative_shadow` migrations +- conservative capacity gating before each index +- optional platform inventory input +- shadow target creation and validation +- `shadow_reindex` for payload-compatible migrations +- `shadow_rewrite` for vector or payload-shape-changing migrations +- explicit transform or backfill plan input when payload shape changes +- operator handoff for cutover +- benchmark outputs for ETA, peak overlap, and source-versus-target size deltas + +## Out of Scope + +- automatic traffic cutover +- automatic platform scaling +- concurrent migration of multiple large indexes +- fully managed rollback orchestration +- full key manifest capture by default +- platform API integration as a hard requirement +- automatic transform inference +- automatic embedding generation or automatic re-embedding inside the migrator +- in-place destructive payload rewrites without a shadow target + +## Product Principles + +- Prefer simple and safe over fully automated. +- Reuse existing RedisVL primitives before adding new layers. +- Make the plan artifact the center of the workflow. +- Treat operator actions as first-class, not hidden implementation details. +- Fail closed when a migration request is ambiguous or unsupported for the selected phase. +- Measure migration behavior so future planning gets better with evidence. + +## Current State + +RedisVL already has building blocks that support a migration product: + +- `SearchIndex.from_existing()` for live schema reconstruction +- `SearchIndex.delete(drop=False)` for dropping the index structure while preserving documents +- `SearchIndex.info()` for live index stats that can inform planning, validation, and timing + +What is missing is the product layer on top: + +- a migration planner +- schema patch normalization and diff classification +- migration-specific CLI commands +- guided user flow +- structured migration and benchmark artifacts +- a capacity-aware future mode for larger production environments +- transform or backfill planning for migrations that change payload shape + +## Proposed Product + +### Phase 1 MVP: `drop_recreate` + +Scope: + +- one index at a time +- preserve documents +- snapshot source schema and stats +- accept `schema_patch.yaml`, `target_schema.yaml`, or wizard answers +- normalize all inputs into the same plan model +- classify requested changes as supported or blocked +- generate `migration_plan.yaml` +- require explicit downtime acknowledgment for `apply` +- drop only the index structure +- recreate the index using the merged schema +- wait for readiness +- validate and emit `migration_report.yaml` +- optionally emit `benchmark_report.yaml` + +Supported changes: + +- add non-vector fields backed by existing document data +- remove fields +- adjust supported non-vector index options where stored payload shape does not change +- adjust index-level options that do not relocate or rewrite data + +Blocked in Phase 1: + +- key prefix changes +- key separator changes +- storage type changes +- JSON path remodels +- field renames +- vector dimension, datatype, precision, or payload-shape-changing algorithm changes +- any change that requires document rewrite or relocation + +Blocked changes in Phase 1 should be reported as Phase 2 candidates, not as unsupported forever. + +### Phase 2: `iterative_shadow` + +Scope: + +- one index at a time +- conservative capacity gate before each index +- optional `platform_inventory.yaml` +- optional `transform_plan.yaml` when payload shape changes +- shadow target creation +- readiness waiting and validation +- operator-owned cutover +- old index retirement after operator confirmation +- optional old-payload retirement after operator confirmation +- structured benchmark outputs for overlap timing, ETA accuracy, and memory or size deltas + +Execution submodes: + +- `shadow_reindex` + - use when the new index can be built from the current stored payload + - still useful for lower-disruption rebuilds when the payload shape does not change +- `shadow_rewrite` + - use when vector datatype, precision, dimension, algorithm, or payload shape changes require a new target payload or keyspace + - examples: `HNSW -> FLAT`, `FP32 -> FP16`, dimension changes, new embedding schema + +Still intentionally excluded: + +- automatic cutover +- automatic scaling +- concurrent shadowing of multiple large indexes +- transform inference + +## Architecture + +The product should work as a plan-first migration workflow with explicit operator handoff and an optional transform path for harder migrations. + +```mermaid +flowchart LR + OP["Engineer / Operator"] --> CLI["RedisVL migrate CLI"] + APP["Applications"] --> DB["Redis database endpoint"] + + CLI --> PLAN["Planner
snapshot + diff + mode selection"] + CLI --> EXEC["Executor"] + CLI --> VAL["Validator + reports"] + + PLAN --> META["Live index schema + stats"] + PLAN --> INV["Optional platform inventory"] + PLAN --> XFORM["Optional transform or backfill plan"] + + EXEC --> DB + EXEC --> TARGET["Optional target keyspace + shadow index"] + TARGET --> DB + VAL --> DB + + OP --> CUT["Maintenance window / cutover decision"] + CUT --> APP +``` + +Architecture expectations: + +- RedisVL owns planning, execution, validation, and artifact generation. +- Redis remains the system of record for source documents and index state. +- The operator owns maintenance windows, scaling, transform inputs, and application cutover decisions. +- The product must stay compatible with single-node and clustered deployments without assuming the whole index lives on one shard. + +## Why the Work Is Phased + +The product is phased because the migration strategies solve different problems: + +- `drop_recreate` is the fastest path to a usable, understandable MVP +- `iterative_shadow` is the future path for tighter operational control and safer handling of vector or payload-shape changes + +Trying to ship everything as one fully mature product would push the team into: + +- premature capacity-estimation complexity +- premature transform-runtime design +- premature cutover abstractions +- premature platform-specific automation +- a larger QA and support surface before the MVP proves value + +Phase 1 is therefore the implementation target, while Phase 2 remains planned work informed by Phase 1 learnings. + +## User Experience + +### Scripted Flow + +```text +rvl migrate plan --index --schema-patch +rvl migrate plan --index --target-schema +rvl migrate apply --plan --allow-downtime +rvl migrate validate --plan +``` + +### Guided Flow + +```text +rvl migrate wizard --index --plan-out +``` + +User experience requirements: + +- `plan` must never mutate Redis +- `wizard` must emit the same plan artifact shape as `plan` +- `apply` must only accept a reviewed plan file +- `apply` must require explicit downtime acknowledgment in Phase 1 +- `validate` must be usable independently after `apply` +- console output must be concise and operator-readable +- blocked Phase 1 requests must tell the user when Phase 2 is the correct path + +## Usage + +### Phase 1: `drop_recreate` + +Review-first workflow: + +```text +rvl migrate plan --index products --schema-patch patch.yaml --plan-out migration_plan.yaml +rvl migrate apply --plan migration_plan.yaml --allow-downtime --report-out migration_report.yaml +rvl migrate validate --plan migration_plan.yaml --report-out migration_report.yaml +``` + +Guided workflow: + +```text +rvl migrate wizard --index products --plan-out migration_plan.yaml +rvl migrate apply --plan migration_plan.yaml --allow-downtime +``` + +Expected usage pattern: + +1. Generate a plan from a live source index. +2. Review blocked diffs, warnings, downtime notice, and merged target schema. +3. Run `apply` only after the operator accepts the maintenance window. +4. Run `validate` and retain the report as the handoff artifact. + +### Phase 2: `iterative_shadow` + +Payload-compatible shadow workflow: + +```text +rvl migrate plan --mode iterative_shadow --index products --schema-patch patch.yaml --platform-inventory platform_inventory.yaml --plan-out migration_plan.yaml +rvl migrate apply --plan migration_plan.yaml --report-out migration_report.yaml +``` + +Payload-rewrite shadow workflow: + +```text +rvl migrate plan --mode iterative_shadow --index products --target-schema target_schema.yaml --platform-inventory platform_inventory.yaml --transform-plan transform_plan.yaml --plan-out migration_plan.yaml +rvl migrate apply --plan migration_plan.yaml --report-out migration_report.yaml +``` + +Expected usage pattern: + +1. Provide the schema request and platform inventory. +2. Provide `transform_plan.yaml` when the target payload shape changes. +3. Review the capacity-gate outcome, estimated migration window, and estimated peak overlap footprint. +4. Run the shadow migration for one index only. +5. Hand cutover to the operator. +6. Confirm cutover before retiring the old index and any obsolete payloads. + +## Artifacts + +Required artifacts: + +- `migration_plan.yaml` +- `migration_report.yaml` + +Optional or phase-dependent artifacts: + +- `benchmark_report.yaml` +- `platform_inventory.yaml` +- `transform_plan.yaml` +- `benchmark_manifest.yaml` + +Artifact requirements: + +- YAML-based +- stable enough for handoff and review +- readable by humans first +- structured enough for future automation + +## Operational Model + +RedisVL owns: + +- source snapshot +- schema diffing +- plan generation +- supported strategy execution +- readiness waiting +- validation +- reporting + +Operators own: + +- maintenance windows +- application behavior during migration +- platform scaling +- transform inputs for payload-shape changes +- cutover +- final go or no-go decisions in production + +The product should not imply that RedisVL is a full migration control plane. It is a migration toolset with explicit operator handoff. + +## Capacity and Scale + +Phase 1 keeps capacity handling simple: + +- use source index stats for warnings +- capture timing and impact for later planning +- avoid a complex estimator in the MVP + +Phase 2 introduces a conservative planner: + +- reason at the database level, not as “an index lives on one shard” +- treat each index as one logical distributed index even on sharded deployments +- estimate source document footprint and source index footprint separately +- estimate target document footprint and target index footprint separately +- compute peak overlap as the source footprint plus the target footprint that exists during migration +- require reserve headroom before apply +- return `READY`, `SCALE_REQUIRED`, or `MANUAL_REVIEW_REQUIRED` + +The execution rule stays simple across both phases: + +- one index at a time + +This is the core design choice that keeps the system understandable at production scale. + +## Downtime and Disruption + +Phase 1 explicitly accepts downtime. + +Expected impacts: + +- search on the affected index is unavailable between drop and recreated index readiness +- query quality may be degraded while initial indexing completes +- shared Redis resources are consumed during rebuild +- large indexes need maintenance windows or application-level degraded mode handling + +Phase 2 aims to reduce disruption, but it still has operational costs: + +- old and new index structures overlap during migration +- payload-rewrite migrations may also duplicate payloads temporarily +- memory and size can either grow or shrink depending on datatype, precision, dimension, algorithm, and payload-shape changes + +These are product facts and must be visible in the plan and report artifacts. + +## Benchmarking and Success Metrics + +Benchmarking is a product requirement, not an afterthought. + +The product should help answer: + +- how long planning takes +- how long apply takes +- how long downtime or overlap lasts +- how much document throughput the migration achieves +- how query latency changes during the migration window +- how much memory and size change between source and target +- how accurate the peak-overlap estimate was + +Core success metrics: + +- migration plan generation succeeds for supported diffs +- unsupported diffs are blocked before mutation +- Phase 1 preserves documents +- Phase 2 produces deterministic shadow plans for supported vector and payload-shape migrations +- schema match and document count match succeed after migration +- reports include stable timing, correctness, and memory-delta metrics +- benchmark rehearsals are good enough to estimate future maintenance windows and scaling decisions with confidence + +## Functional Requirements + +- plan generation from live index plus requested schema changes +- schema patch normalization +- supported-versus-blocked diff classification +- guided wizard for supported Phase 1 changes +- explicit downtime acknowledgment in Phase 1 +- structured plan, report, and benchmark outputs +- validation of schema, counts, and indexing-failure deltas +- one-index-at-a-time execution +- Phase 2 capacity-gated shadow planning +- Phase 2 support for vector and payload-shape migrations through explicit shadow planning + +## Non-Functional Requirements + +- deterministic plan outputs +- human-readable YAML artifacts +- clear failure modes +- conservative defaults +- no document deletion by the Phase 1 migrator path +- reasonable operation on large indexes without default full-key manifests +- documentation detailed enough for implementation handoff + +## Risks + +- Users may assume unsupported Phase 1 schema changes should “just work” unless the diff classifier clearly routes them to Phase 2. +- Operators may underestimate downtime for large indexes unless benchmark outputs become part of the review flow. +- Phase 2 can grow too complex if transform logic or platform-specific automation is pulled in too early. +- Capacity estimation may be wrong unless benchmark data and observed footprint deltas are captured consistently. +- Validation may be treated as optional unless the CLI and reports make it central to the workflow. + +## Rollout Plan + +### Phase 1 + +- finalize docs and task list +- implement the planner, diff classifier, CLI flow, executor, and validator +- add CI coverage for supported and blocked migration paths +- run at least one benchmark rehearsal on a representative non-production dataset + +### Phase 1.5 + +- review real implementation learnings +- update the planning workspace with observed gaps and benchmark findings +- confirm whether Phase 2 assumptions still hold + +### Phase 2 + +- implement inventory parsing +- implement transform or backfill plan modeling +- implement conservative capacity gating +- implement one-index-at-a-time shadow planning and execution +- add benchmark rehearsals for overlap duration, ETA accuracy, and memory-delta accuracy + +## Review Questions for the Team + +- Is the Phase 1 boundary narrow enough to ship quickly, but useful enough to solve real user pain? +- Is Phase 2 scoped clearly enough to own vector datatype, precision, dimension, algorithm, and payload-shape changes? +- Is operator-owned cutover still the right long-term boundary? +- Is the benchmarking scope sufficient to make migration windows and scaling decisions predictable without overbuilding a measurement subsystem? +- Does the one-index-at-a-time rule provide the right balance of simplicity and scale? + +## Decision Summary + +- Build the migration product in phases. +- Implement Phase 1 first and keep it intentionally narrow. +- Treat vector and payload-shape migrations as a core product goal, delivered in Phase 2 rather than ignored. +- Keep the plan artifact central to the workflow. +- Keep the operational model explicit. +- Use evidence from benchmark outputs to shape later migration planning. + +## References + +Detailed supporting docs in this workspace: + +- [00_index.md](./00_index.md) +- [01_context.md](./01_context.md) +- [02_architecture.md](./02_architecture.md) +- [03_benchmarking.md](./03_benchmarking.md) +- [10_v1_drop_recreate_spec.md](./10_v1_drop_recreate_spec.md) +- [11_v1_drop_recreate_tasks.md](./11_v1_drop_recreate_tasks.md) +- [12_v1_drop_recreate_tests.md](./12_v1_drop_recreate_tests.md) +- [20_v2_iterative_shadow_spec.md](./20_v2_iterative_shadow_spec.md) +- [21_v2_iterative_shadow_tasks.md](./21_v2_iterative_shadow_tasks.md) +- [22_v2_iterative_shadow_tests.md](./22_v2_iterative_shadow_tests.md) + +## User Journeys + +### Journey 1: Application Engineer Running a Simple Schema Migration + +An application engineer needs to add a new filterable metadata field to an existing index without deleting documents. They run `plan`, review the merged target schema and downtime warning, schedule a maintenance window, run `apply`, then run `validate` and hand the migration report to the team. They do not need to understand Redis internals beyond the migration inputs and the reported downtime. + +### Journey 2: Platform Engineer Reviewing a Vector Precision Migration + +A platform engineer needs to review a planned `FP32 -> FP16` migration for a large production index. They supply platform inventory, review the planner’s peak-overlap estimate, compare the projected post-cutover memory savings to previous benchmark reports, and decide whether the current deployment can run the migration safely in the next window. + +### Journey 3: Engineer Migrating from `HNSW` to `FLAT` + +An engineer wants to switch vector search behavior from `HNSW` to `FLAT` to simplify runtime performance characteristics. The planner classifies the request as a Phase 2 shadow migration, estimates the target index footprint, and produces a one-index-at-a-time plan. The operator runs the migration, validates the shadow target, and cuts traffic over once the benchmark and validation reports look acceptable. + +### Journey 4: Solutions Engineer Validating a Payload-Shape Change + +A solutions engineer wants to understand how long a customer migration will take when a new embedding model changes the stored payload shape. They create a `transform_plan.yaml`, run a rehearsal in non-production, collect benchmark timing, throughput, query-latency, and source-versus-target memory outputs, and use those artifacts to advise on maintenance windows and scaling needs. + +## User Stories + +- As an application engineer, I want to generate a migration plan before any mutation so that I can review the exact schema changes and downtime implications. +- As an application engineer, I want the Phase 1 migrator to preserve documents so that I do not have to rebuild my dataset from another source. +- As an application engineer, I want blocked Phase 1 schema changes to fail early and point me to the correct Phase 2 path so that I do not start a migration the product cannot safely complete. +- As an operator, I want migration and validation reports in YAML so that I can review, archive, and share them with other teams. +- As an operator, I want the CLI to require explicit downtime acknowledgment in Phase 1 so that maintenance-window risk is never implicit. +- As a platform engineer, I want Phase 2 to process one index at a time so that capacity planning stays understandable and bounded. +- As a platform engineer, I want the planner to estimate peak overlap and post-cutover memory deltas so that I can decide whether a migration fits safely. +- As a platform engineer, I want the shadow planner to return `READY`, `SCALE_REQUIRED`, or `MANUAL_REVIEW_REQUIRED` so that I can make a clear operational decision before execution. +- As a solutions engineer, I want benchmark outputs for duration, throughput, query impact, and memory change so that I can estimate future migrations with real evidence. +- As a maintainer, I want the migration product to reuse existing RedisVL primitives so that implementation and long-term maintenance stay simple. From a3d534b9f808d2852ae93eea1019de2b735d6d6f Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Thu, 19 Mar 2026 18:22:44 -0400 Subject: [PATCH 02/10] feat(migrate): add index migration CLI and core engine - Add rvl migrate subcommand (helper, list, plan, apply, validate) - Implement MigrationPlanner for schema diff classification - Implement MigrationExecutor with drop_recreate mode - Support vector quantization (float32 <-> float16) during migration - Add MigrationValidator for post-migration validation - Show error messages prominently on migration failure - Add migration temp files to .gitignore --- .gitignore | 8 + redisvl/cli/main.py | 6 + redisvl/cli/migrate.py | 397 +++++++++++++++++++++++++++++ redisvl/cli/utils.py | 14 +- redisvl/migration/__init__.py | 15 ++ redisvl/migration/executor.py | 335 ++++++++++++++++++++++++ redisvl/migration/models.py | 120 +++++++++ redisvl/migration/planner.py | 439 ++++++++++++++++++++++++++++++++ redisvl/migration/utils.py | 156 ++++++++++++ redisvl/migration/validation.py | 108 ++++++++ 10 files changed, 1593 insertions(+), 5 deletions(-) create mode 100644 redisvl/cli/migrate.py create mode 100644 redisvl/migration/__init__.py create mode 100644 redisvl/migration/executor.py create mode 100644 redisvl/migration/models.py create mode 100644 redisvl/migration/planner.py create mode 100644 redisvl/migration/utils.py create mode 100644 redisvl/migration/validation.py diff --git a/.gitignore b/.gitignore index cd800581..ec91877a 100644 --- a/.gitignore +++ b/.gitignore @@ -234,3 +234,11 @@ tests/data # Local working directory (personal scripts, docs, tools) local/ + +# Local notebooks (kept for development, not committed) +docs/user_guide/13_index_migrations.ipynb + +# Migration temp files (generated by rvl migrate commands) +migration_plan.yaml +migration_report.yaml +schema_patch.yaml diff --git a/redisvl/cli/main.py b/redisvl/cli/main.py index 1353192f..e62c4f62 100644 --- a/redisvl/cli/main.py +++ b/redisvl/cli/main.py @@ -2,6 +2,7 @@ import sys from redisvl.cli.index import Index +from redisvl.cli.migrate import Migrate from redisvl.cli.stats import Stats from redisvl.cli.version import Version from redisvl.utils.log import get_logger @@ -14,6 +15,7 @@ def _usage(): "rvl []\n", "Commands:", "\tindex Index manipulation (create, delete, etc.)", + "\tmigrate Index migration planning and execution", "\tversion Obtain the version of RedisVL", "\tstats Obtain statistics about an index", ] @@ -49,3 +51,7 @@ def version(self): def stats(self): Stats() exit(0) + + def migrate(self): + Migrate() + exit(0) diff --git a/redisvl/cli/migrate.py b/redisvl/cli/migrate.py new file mode 100644 index 00000000..fe527ed0 --- /dev/null +++ b/redisvl/cli/migrate.py @@ -0,0 +1,397 @@ +import argparse +import sys +from argparse import Namespace +from typing import Optional + +from redisvl.cli.utils import add_redis_connection_options, create_redis_url +from redisvl.migration import MigrationExecutor, MigrationPlanner, MigrationValidator +from redisvl.migration.utils import ( + list_indexes, + load_migration_plan, + write_benchmark_report, + write_migration_report, +) +from redisvl.migration.wizard import MigrationWizard +from redisvl.utils.log import get_logger + +logger = get_logger("[RedisVL]") + + +class Migrate: + usage = "\n".join( + [ + "rvl migrate []\n", + "Commands:", + "\thelper Show migration guidance and supported capabilities", + "\tlist List all available indexes", + "\tplan Generate a migration plan for a document-preserving drop/recreate migration", + "\twizard Interactively build a migration plan and schema patch", + "\tapply Execute a reviewed drop/recreate migration plan", + "\tvalidate Validate a completed migration plan against the live index", + "\n", + ] + ) + + def __init__(self): + parser = argparse.ArgumentParser(usage=self.usage) + parser.add_argument("command", help="Subcommand to run") + + args = parser.parse_args(sys.argv[2:3]) + if not hasattr(self, args.command): + parser.print_help() + exit(0) + + try: + getattr(self, args.command)() + except Exception as e: + logger.error(e) + exit(1) + + def helper(self): + parser = argparse.ArgumentParser( + usage="rvl migrate helper [--host --port | --url ]" + ) + parser = add_redis_connection_options(parser) + args = parser.parse_args(sys.argv[3:]) + redis_url = create_redis_url(args) + indexes = list_indexes(redis_url=redis_url) + + print( + """RedisVL Index Migrator + +Available indexes:""" + ) + if indexes: + for position, index_name in enumerate(indexes, start=1): + print(f" {position}. {index_name}") + else: + print(" (none found)") + + print( + """ +Supported changes: + - Adding or removing non-vector fields (text, tag, numeric, geo) + - Changing field options (sortable, separator, weight) + - Changing vector algorithm (FLAT, HNSW, SVS_VAMANA) + - Changing distance metric (COSINE, L2, IP) + - Tuning algorithm parameters (M, EF_CONSTRUCTION) + - Quantizing vectors (float32 to float16) + +Not yet supported: + - Changing vector dimensions + - Changing key prefix or separator + - Changing storage type (hash to JSON) + - Renaming fields + +Commands: + rvl migrate list List all indexes + rvl migrate wizard --index Guided migration builder + rvl migrate plan --index --schema-patch + rvl migrate apply --plan --allow-downtime + rvl migrate validate --plan """ + ) + + def list(self): + parser = argparse.ArgumentParser( + usage="rvl migrate list [--host --port | --url ]" + ) + parser = add_redis_connection_options(parser) + args = parser.parse_args(sys.argv[3:]) + redis_url = create_redis_url(args) + indexes = list_indexes(redis_url=redis_url) + print("Available indexes:") + for position, index_name in enumerate(indexes, start=1): + print(f"{position}. {index_name}") + + def plan(self): + parser = argparse.ArgumentParser( + usage=( + "rvl migrate plan --index " + "(--schema-patch | --target-schema )" + ) + ) + parser.add_argument("-i", "--index", help="Source index name", required=True) + parser.add_argument("--schema-patch", help="Path to a schema patch file") + parser.add_argument("--target-schema", help="Path to a target schema file") + parser.add_argument( + "--plan-out", + help="Path to write migration_plan.yaml", + default="migration_plan.yaml", + ) + parser.add_argument( + "--key-sample-limit", + help="Maximum number of keys to sample from the index keyspace", + type=int, + default=10, + ) + parser = add_redis_connection_options(parser) + + args = parser.parse_args(sys.argv[3:]) + redis_url = create_redis_url(args) + planner = MigrationPlanner(key_sample_limit=args.key_sample_limit) + plan = planner.create_plan( + args.index, + redis_url=redis_url, + schema_patch_path=args.schema_patch, + target_schema_path=args.target_schema, + ) + planner.write_plan(plan, args.plan_out) + self._print_plan_summary(args.plan_out, plan) + + def wizard(self): + parser = argparse.ArgumentParser( + usage=( + "rvl migrate wizard [--index ] " + "[--patch ] " + "[--plan-out ] [--patch-out ]" + ) + ) + parser.add_argument("-i", "--index", help="Source index name", required=False) + parser.add_argument( + "--patch", + help="Load an existing schema patch to continue editing", + default=None, + ) + parser.add_argument( + "--plan-out", + help="Path to write migration_plan.yaml", + default="migration_plan.yaml", + ) + parser.add_argument( + "--patch-out", + help="Path to write schema_patch.yaml (for later editing)", + default="schema_patch.yaml", + ) + parser.add_argument( + "--target-schema-out", + help="Optional path to write the merged target schema", + default=None, + ) + parser.add_argument( + "--key-sample-limit", + help="Maximum number of keys to sample from the index keyspace", + type=int, + default=10, + ) + parser = add_redis_connection_options(parser) + args = parser.parse_args(sys.argv[3:]) + + redis_url = create_redis_url(args) + wizard = MigrationWizard( + planner=MigrationPlanner(key_sample_limit=args.key_sample_limit) + ) + plan = wizard.run( + index_name=args.index, + redis_url=redis_url, + existing_patch_path=args.patch, + plan_out=args.plan_out, + patch_out=args.patch_out, + target_schema_out=args.target_schema_out, + ) + self._print_plan_summary(args.plan_out, plan) + + def apply(self): + parser = argparse.ArgumentParser( + usage=( + "rvl migrate apply --plan --allow-downtime " + "[--report-out ]" + ) + ) + parser.add_argument("--plan", help="Path to migration_plan.yaml", required=True) + parser.add_argument( + "--allow-downtime", + help="Explicitly acknowledge downtime for drop_recreate", + action="store_true", + ) + parser.add_argument( + "--report-out", + help="Path to write migration_report.yaml", + default="migration_report.yaml", + ) + parser.add_argument( + "--benchmark-out", + help="Optional path to write benchmark_report.yaml", + default=None, + ) + parser.add_argument( + "--query-check-file", + help="Optional YAML file containing fetch_ids and keys_exist checks", + default=None, + ) + parser = add_redis_connection_options(parser) + args = parser.parse_args(sys.argv[3:]) + + if not args.allow_downtime: + raise ValueError( + "apply requires --allow-downtime for drop_recreate migrations" + ) + + redis_url = create_redis_url(args) + plan = load_migration_plan(args.plan) + executor = MigrationExecutor() + + print(f"\nApplying migration to '{plan.source.index_name}'...") + + def progress_callback(step: str, detail: str) -> None: + step_labels = { + "drop": "[1/5] Drop index", + "quantize": "[2/5] Quantize vectors", + "create": "[3/5] Create index", + "index": "[4/5] Re-indexing", + "validate": "[5/5] Validate", + } + label = step_labels.get(step, step) + # Use carriage return to update in place for progress + if detail and not detail.startswith("done"): + print(f" {label}: {detail} ", end="\r", flush=True) + else: + print(f" {label}: {detail} ") + + report = executor.apply( + plan, + redis_url=redis_url, + query_check_file=args.query_check_file, + progress_callback=progress_callback, + ) + + # Print completion summary + if report.result == "succeeded": + total_time = report.timings.total_migration_duration_seconds or 0 + downtime = report.timings.downtime_duration_seconds or 0 + print(f"\nMigration completed in {total_time}s (downtime: {downtime}s)") + else: + print(f"\nMigration {report.result}") + # Show errors immediately for visibility + if report.validation.errors: + for error in report.validation.errors: + print(f" ERROR: {error}") + + write_migration_report(report, args.report_out) + if args.benchmark_out: + write_benchmark_report(report, args.benchmark_out) + self._print_report_summary(args.report_out, report, args.benchmark_out) + + def validate(self): + parser = argparse.ArgumentParser( + usage=( + "rvl migrate validate --plan " + "[--report-out ]" + ) + ) + parser.add_argument("--plan", help="Path to migration_plan.yaml", required=True) + parser.add_argument( + "--report-out", + help="Path to write migration_report.yaml", + default="migration_report.yaml", + ) + parser.add_argument( + "--benchmark-out", + help="Optional path to write benchmark_report.yaml", + default=None, + ) + parser.add_argument( + "--query-check-file", + help="Optional YAML file containing fetch_ids and keys_exist checks", + default=None, + ) + parser = add_redis_connection_options(parser) + args = parser.parse_args(sys.argv[3:]) + + redis_url = create_redis_url(args) + plan = load_migration_plan(args.plan) + validator = MigrationValidator() + validation, target_info, validation_duration = validator.validate( + plan, + redis_url=redis_url, + query_check_file=args.query_check_file, + ) + + from redisvl.migration.models import ( + MigrationBenchmarkSummary, + MigrationReport, + MigrationTimings, + ) + from redisvl.migration.utils import timestamp_utc + + source_size = float( + plan.source.stats_snapshot.get("vector_index_sz_mb", 0) or 0 + ) + target_size = float(target_info.get("vector_index_sz_mb", 0) or 0) + + report = MigrationReport( + source_index=plan.source.index_name, + target_index=plan.merged_target_schema["index"]["name"], + result="succeeded" if not validation.errors else "failed", + started_at=timestamp_utc(), + finished_at=timestamp_utc(), + timings=MigrationTimings(validation_duration_seconds=validation_duration), + validation=validation, + benchmark_summary=MigrationBenchmarkSummary( + source_index_size_mb=round(source_size, 3), + target_index_size_mb=round(target_size, 3), + index_size_delta_mb=round(target_size - source_size, 3), + ), + warnings=list(plan.warnings), + manual_actions=( + ["Review validation errors before proceeding."] + if validation.errors + else [] + ), + ) + write_migration_report(report, args.report_out) + if args.benchmark_out: + write_benchmark_report(report, args.benchmark_out) + self._print_report_summary(args.report_out, report, args.benchmark_out) + + def _print_plan_summary(self, plan_out: str, plan) -> None: + import os + + abs_path = os.path.abspath(plan_out) + print(f"Migration plan written to {abs_path}") + print(f"Mode: {plan.mode}") + print(f"Supported: {plan.diff_classification.supported}") + if plan.warnings: + print("Warnings:") + for warning in plan.warnings: + print(f"- {warning}") + if plan.diff_classification.blocked_reasons: + print("Blocked reasons:") + for reason in plan.diff_classification.blocked_reasons: + print(f"- {reason}") + + print("\nNext steps:") + print(f" Review the plan: cat {plan_out}") + print( + f" Apply the migration: rvl migrate apply --plan {plan_out} --allow-downtime" + ) + print(f" Validate the result: rvl migrate validate --plan {plan_out}") + print( + f"\nTo add more changes: rvl migrate wizard --index {plan.source.index_name} --patch schema_patch.yaml" + ) + print( + f"To start over: rvl migrate wizard --index {plan.source.index_name}" + ) + print(f"To cancel: rm {plan_out}") + + def _print_report_summary( + self, + report_out: str, + report, + benchmark_out: Optional[str], + ) -> None: + print(f"Migration report written to {report_out}") + print(f"Result: {report.result}") + print(f"Schema match: {report.validation.schema_match}") + print(f"Doc count match: {report.validation.doc_count_match}") + print(f"Key sample exists: {report.validation.key_sample_exists}") + print(f"Indexing failures delta: {report.validation.indexing_failures_delta}") + if report.validation.errors: + print("Errors:") + for error in report.validation.errors: + print(f"- {error}") + if report.manual_actions: + print("Manual actions:") + for action in report.manual_actions: + print(f"- {action}") + if benchmark_out: + print(f"Benchmark report written to {benchmark_out}") diff --git a/redisvl/cli/utils.py b/redisvl/cli/utils.py index 5d76a184..8245b69a 100644 --- a/redisvl/cli/utils.py +++ b/redisvl/cli/utils.py @@ -26,11 +26,7 @@ def create_redis_url(args: Namespace) -> str: return url -def add_index_parsing_options(parser: ArgumentParser) -> ArgumentParser: - parser.add_argument("-i", "--index", help="Index name", type=str, required=False) - parser.add_argument( - "-s", "--schema", help="Path to schema file", type=str, required=False - ) +def add_redis_connection_options(parser: ArgumentParser) -> ArgumentParser: parser.add_argument("-u", "--url", help="Redis URL", type=str, required=False) parser.add_argument("--host", help="Redis host", type=str, default="localhost") parser.add_argument("-p", "--port", help="Redis port", type=int, default=6379) @@ -38,3 +34,11 @@ def add_index_parsing_options(parser: ArgumentParser) -> ArgumentParser: parser.add_argument("--ssl", help="Use SSL", action="store_true") parser.add_argument("-a", "--password", help="Redis password", type=str, default="") return parser + + +def add_index_parsing_options(parser: ArgumentParser) -> ArgumentParser: + parser.add_argument("-i", "--index", help="Index name", type=str, required=False) + parser.add_argument( + "-s", "--schema", help="Path to schema file", type=str, required=False + ) + return add_redis_connection_options(parser) diff --git a/redisvl/migration/__init__.py b/redisvl/migration/__init__.py new file mode 100644 index 00000000..6cedb500 --- /dev/null +++ b/redisvl/migration/__init__.py @@ -0,0 +1,15 @@ +from redisvl.migration.executor import MigrationExecutor +from redisvl.migration.models import MigrationPlan, MigrationReport, SchemaPatch +from redisvl.migration.planner import MigrationPlanner +from redisvl.migration.validation import MigrationValidator +from redisvl.migration.wizard import MigrationWizard + +__all__ = [ + "MigrationExecutor", + "MigrationPlan", + "MigrationPlanner", + "MigrationReport", + "MigrationValidator", + "MigrationWizard", + "SchemaPatch", +] diff --git a/redisvl/migration/executor.py b/redisvl/migration/executor.py new file mode 100644 index 00000000..e34b4b04 --- /dev/null +++ b/redisvl/migration/executor.py @@ -0,0 +1,335 @@ +from __future__ import annotations + +import logging +import time +from typing import Any, Callable, Dict, Optional + +from redisvl.index import SearchIndex +from redisvl.migration.models import ( + MigrationBenchmarkSummary, + MigrationPlan, + MigrationReport, + MigrationTimings, + MigrationValidation, +) +from redisvl.migration.planner import MigrationPlanner +from redisvl.migration.utils import ( + current_source_matches_snapshot, + timestamp_utc, + wait_for_index_ready, +) +from redisvl.migration.validation import MigrationValidator +from redisvl.redis.utils import array_to_buffer, buffer_to_array +from redisvl.schema import StorageType + +logger = logging.getLogger(__name__) + + +class MigrationExecutor: + def __init__(self, validator: Optional[MigrationValidator] = None): + self.validator = validator or MigrationValidator() + + def apply( + self, + plan: MigrationPlan, + *, + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + query_check_file: Optional[str] = None, + progress_callback: Optional[Callable[[str, Optional[str]], None]] = None, + ) -> MigrationReport: + """Apply a migration plan. + + Args: + plan: The migration plan to apply. + redis_url: Redis connection URL. + redis_client: Optional existing Redis client. + query_check_file: Optional file with query checks. + progress_callback: Optional callback(step, detail) for progress updates. + step: Current step name (e.g., "drop", "quantize", "create", "index", "validate") + detail: Optional detail string (e.g., "1000/5000 docs (20%)") + """ + started_at = timestamp_utc() + started = time.perf_counter() + + report = MigrationReport( + source_index=plan.source.index_name, + target_index=plan.merged_target_schema["index"]["name"], + result="failed", + started_at=started_at, + finished_at=started_at, + warnings=list(plan.warnings), + ) + + if not plan.diff_classification.supported: + report.validation.errors.extend(plan.diff_classification.blocked_reasons) + report.manual_actions.append( + "This change requires document migration, which is not yet supported." + ) + report.finished_at = timestamp_utc() + return report + + if not current_source_matches_snapshot( + plan.source.index_name, + plan.source.schema_snapshot, + redis_url=redis_url, + redis_client=redis_client, + ): + report.validation.errors.append( + "The current live source schema no longer matches the saved source snapshot." + ) + report.manual_actions.append( + "Re-run `rvl migrate plan` to refresh the migration plan before applying." + ) + report.finished_at = timestamp_utc() + return report + + source_index = SearchIndex.from_existing( + plan.source.index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + target_index = SearchIndex.from_dict( + plan.merged_target_schema, + redis_url=redis_url, + redis_client=redis_client, + ) + + drop_duration = 0.0 + quantize_duration = 0.0 + recreate_duration = 0.0 + indexing_duration = 0.0 + target_info: Dict[str, Any] = {} + docs_quantized = 0 + + # Check if we need to re-encode vectors for datatype changes + datatype_changes = MigrationPlanner.get_vector_datatype_changes( + plan.source.schema_snapshot, plan.merged_target_schema + ) + + def _notify(step: str, detail: Optional[str] = None) -> None: + if progress_callback: + progress_callback(step, detail) + + try: + _notify("drop", "Dropping index definition...") + drop_started = time.perf_counter() + source_index.delete(drop=False) + drop_duration = round(time.perf_counter() - drop_started, 3) + _notify("drop", f"done ({drop_duration}s)") + + # Re-encode vectors if datatype changes are needed + if datatype_changes: + _notify("quantize", "Re-encoding vectors...") + quantize_started = time.perf_counter() + docs_quantized = self._quantize_vectors( + source_index, + datatype_changes, + plan, + progress_callback=lambda done, total: _notify( + "quantize", f"{done:,}/{total:,} docs" + ), + ) + quantize_duration = round(time.perf_counter() - quantize_started, 3) + _notify( + "quantize", + f"done ({docs_quantized:,} docs in {quantize_duration}s)", + ) + report.warnings.append( + f"Re-encoded {docs_quantized} documents for vector quantization: " + f"{datatype_changes}" + ) + + _notify("create", "Creating index with new schema...") + recreate_started = time.perf_counter() + target_index.create() + recreate_duration = round(time.perf_counter() - recreate_started, 3) + _notify("create", f"done ({recreate_duration}s)") + + _notify("index", "Waiting for re-indexing...") + + def _index_progress(indexed: int, total: int, pct: float) -> None: + _notify("index", f"{indexed:,}/{total:,} docs ({pct:.0f}%)") + + target_info, indexing_duration = wait_for_index_ready( + target_index, progress_callback=_index_progress + ) + _notify("index", f"done ({indexing_duration}s)") + + _notify("validate", "Validating migration...") + validation, target_info, validation_duration = self.validator.validate( + plan, + redis_url=redis_url, + redis_client=redis_client, + query_check_file=query_check_file, + ) + _notify("validate", f"done ({validation_duration}s)") + report.validation = validation + total_duration = round(time.perf_counter() - started, 3) + report.timings = MigrationTimings( + total_migration_duration_seconds=total_duration, + drop_duration_seconds=drop_duration, + quantize_duration_seconds=( + quantize_duration if quantize_duration else None + ), + recreate_duration_seconds=recreate_duration, + initial_indexing_duration_seconds=indexing_duration, + validation_duration_seconds=validation_duration, + downtime_duration_seconds=round( + drop_duration + + quantize_duration + + recreate_duration + + indexing_duration, + 3, + ), + ) + report.benchmark_summary = self._build_benchmark_summary( + plan, + target_info, + report.timings, + ) + report.result = "succeeded" if not validation.errors else "failed" + if validation.errors: + report.manual_actions.append( + "Review validation errors before treating the migration as complete." + ) + except Exception as exc: + total_duration = round(time.perf_counter() - started, 3) + report.timings = MigrationTimings( + total_migration_duration_seconds=total_duration, + drop_duration_seconds=drop_duration or None, + quantize_duration_seconds=quantize_duration or None, + recreate_duration_seconds=recreate_duration or None, + initial_indexing_duration_seconds=indexing_duration or None, + downtime_duration_seconds=( + round( + drop_duration + + quantize_duration + + recreate_duration + + indexing_duration, + 3, + ) + if drop_duration + or quantize_duration + or recreate_duration + or indexing_duration + else None + ), + ) + report.validation = MigrationValidation( + errors=[f"Migration execution failed: {exc}"] + ) + report.manual_actions.extend( + [ + "Inspect the Redis index state before retrying.", + "If the source index was dropped, recreate it from the saved migration plan.", + ] + ) + finally: + report.finished_at = timestamp_utc() + + return report + + def _quantize_vectors( + self, + source_index: SearchIndex, + datatype_changes: Dict[str, Dict[str, str]], + plan: MigrationPlan, + progress_callback: Optional[Callable[[int, int], None]] = None, + ) -> int: + """Re-encode vectors in documents for datatype changes (quantization). + + This iterates over all documents matching the index prefix and converts + vector fields from source datatype to target datatype. + + Args: + source_index: The source SearchIndex (already dropped but client available) + datatype_changes: Dict mapping field_name -> {"source": dtype, "target": dtype} + plan: The migration plan containing schema info + progress_callback: Optional callback(docs_done, total_docs) + + Returns: + Number of documents processed + """ + client = source_index._redis_client + prefix = plan.source.schema_snapshot["index"]["prefix"] + storage_type = ( + plan.source.schema_snapshot["index"].get("storage_type", "hash").lower() + ) + + # Get estimated total from source stats + estimated_total = int(plan.source.stats_snapshot.get("num_docs", 0) or 0) + + # Get vector field dimensions for validation + field_dims: Dict[str, int] = {} + for field in plan.source.schema_snapshot.get("fields", []): + if field.get("type") == "vector" and field["name"] in datatype_changes: + field_dims[field["name"]] = field.get("attrs", {}).get("dims", 0) + + docs_processed = 0 + batch_size = 500 + cursor = 0 + + while True: + cursor, keys = client.scan( + cursor=cursor, + match=f"{prefix}*", + count=batch_size, + ) + + if keys: + pipe = client.pipeline() + keys_to_update = [] + + for key in keys: + if storage_type == "hash": + # Read all vector fields that need conversion + for field_name, change in datatype_changes.items(): + field_data = client.hget(key, field_name) + if field_data: + # Convert: source dtype -> array -> target dtype -> bytes + array = buffer_to_array(field_data, change["source"]) + new_bytes = array_to_buffer(array, change["target"]) + pipe.hset(key, field_name, new_bytes) + keys_to_update.append(key) + else: + # JSON storage - vectors stored as arrays, need different handling + logger.warning( + f"JSON storage quantization for key {key} - " + "vectors stored as arrays may not need re-encoding" + ) + + if keys_to_update: + pipe.execute() + docs_processed += len(set(keys_to_update)) + if progress_callback: + progress_callback(docs_processed, estimated_total) + + if cursor == 0: + break + + logger.info(f"Quantized {docs_processed} documents: {datatype_changes}") + return docs_processed + + def _build_benchmark_summary( + self, + plan: MigrationPlan, + target_info: dict, + timings: MigrationTimings, + ) -> MigrationBenchmarkSummary: + source_index_size = float( + plan.source.stats_snapshot.get("vector_index_sz_mb", 0) or 0 + ) + target_index_size = float(target_info.get("vector_index_sz_mb", 0) or 0) + source_num_docs = int(plan.source.stats_snapshot.get("num_docs", 0) or 0) + indexed_per_second = None + indexing_time = timings.initial_indexing_duration_seconds + if indexing_time and indexing_time > 0: + indexed_per_second = round(source_num_docs / indexing_time, 3) + + return MigrationBenchmarkSummary( + documents_indexed_per_second=indexed_per_second, + source_index_size_mb=round(source_index_size, 3), + target_index_size_mb=round(target_index_size, 3), + index_size_delta_mb=round(target_index_size - source_index_size, 3), + ) diff --git a/redisvl/migration/models.py b/redisvl/migration/models.py new file mode 100644 index 00000000..9feda0c5 --- /dev/null +++ b/redisvl/migration/models.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field, model_validator + + +class FieldUpdate(BaseModel): + """Partial field update for schema patch inputs.""" + + name: str + type: Optional[str] = None + path: Optional[str] = None + attrs: Dict[str, Any] = Field(default_factory=dict) + options: Dict[str, Any] = Field(default_factory=dict) + + @model_validator(mode="after") + def merge_options_into_attrs(self) -> "FieldUpdate": + if self.options: + merged_attrs = dict(self.attrs) + merged_attrs.update(self.options) + self.attrs = merged_attrs + self.options = {} + return self + + +class SchemaPatchChanges(BaseModel): + add_fields: List[Dict[str, Any]] = Field(default_factory=list) + remove_fields: List[str] = Field(default_factory=list) + update_fields: List[FieldUpdate] = Field(default_factory=list) + index: Dict[str, Any] = Field(default_factory=dict) + + +class SchemaPatch(BaseModel): + version: int = 1 + changes: SchemaPatchChanges = Field(default_factory=SchemaPatchChanges) + + +class KeyspaceSnapshot(BaseModel): + storage_type: str + prefixes: List[str] + key_separator: str + key_sample: List[str] = Field(default_factory=list) + + +class SourceSnapshot(BaseModel): + index_name: str + schema_snapshot: Dict[str, Any] + stats_snapshot: Dict[str, Any] + keyspace: KeyspaceSnapshot + + +class DiffClassification(BaseModel): + supported: bool + blocked_reasons: List[str] = Field(default_factory=list) + + +class ValidationPolicy(BaseModel): + require_doc_count_match: bool = True + require_schema_match: bool = True + + +class MigrationPlan(BaseModel): + version: int = 1 + mode: str = "drop_recreate" + source: SourceSnapshot + requested_changes: Dict[str, Any] + merged_target_schema: Dict[str, Any] + diff_classification: DiffClassification + warnings: List[str] = Field(default_factory=list) + validation: ValidationPolicy = Field(default_factory=ValidationPolicy) + + +class QueryCheckResult(BaseModel): + name: str + passed: bool + details: Optional[str] = None + + +class MigrationValidation(BaseModel): + schema_match: bool = False + doc_count_match: bool = False + key_sample_exists: bool = False + indexing_failures_delta: int = 0 + query_checks: List[QueryCheckResult] = Field(default_factory=list) + errors: List[str] = Field(default_factory=list) + + +class MigrationTimings(BaseModel): + total_migration_duration_seconds: Optional[float] = None + drop_duration_seconds: Optional[float] = None + quantize_duration_seconds: Optional[float] = None + recreate_duration_seconds: Optional[float] = None + initial_indexing_duration_seconds: Optional[float] = None + validation_duration_seconds: Optional[float] = None + downtime_duration_seconds: Optional[float] = None + + +class MigrationBenchmarkSummary(BaseModel): + documents_indexed_per_second: Optional[float] = None + source_index_size_mb: Optional[float] = None + target_index_size_mb: Optional[float] = None + index_size_delta_mb: Optional[float] = None + + +class MigrationReport(BaseModel): + version: int = 1 + mode: str = "drop_recreate" + source_index: str + target_index: str + result: str + started_at: str + finished_at: str + timings: MigrationTimings = Field(default_factory=MigrationTimings) + validation: MigrationValidation = Field(default_factory=MigrationValidation) + benchmark_summary: MigrationBenchmarkSummary = Field( + default_factory=MigrationBenchmarkSummary + ) + warnings: List[str] = Field(default_factory=list) + manual_actions: List[str] = Field(default_factory=list) diff --git a/redisvl/migration/planner.py b/redisvl/migration/planner.py new file mode 100644 index 00000000..8f79db1b --- /dev/null +++ b/redisvl/migration/planner.py @@ -0,0 +1,439 @@ +from __future__ import annotations + +from copy import deepcopy +from pathlib import Path +from typing import Any, Dict, List, Optional + +import yaml + +from redisvl.index import SearchIndex +from redisvl.migration.models import ( + DiffClassification, + KeyspaceSnapshot, + MigrationPlan, + SchemaPatch, + SourceSnapshot, +) +from redisvl.schema.schema import IndexSchema + + +class MigrationPlanner: + """Migration planner for document-preserving drop/recreate flows. + + The `drop_recreate` mode drops the index definition and recreates it with + a new schema. Documents remain untouched in Redis. + + This means: + - Index-only changes work (algorithm, distance metric, tuning params) + - Document-dependent changes fail (the index expects data in a format + that doesn't match what's stored) + + Document-dependent changes (not supported): + - Vector dimensions: stored vectors have wrong number of dimensions + - Prefix/keyspace: documents are at keys the new index won't scan + - Field rename: documents store data under the old field name + - Storage type: documents are in hash format but index expects JSON + """ + + def __init__(self, key_sample_limit: int = 10): + self.key_sample_limit = key_sample_limit + + def create_plan( + self, + index_name: str, + *, + redis_url: Optional[str] = None, + schema_patch_path: Optional[str] = None, + target_schema_path: Optional[str] = None, + redis_client: Optional[Any] = None, + ) -> MigrationPlan: + if not schema_patch_path and not target_schema_path: + raise ValueError( + "Must provide either --schema-patch or --target-schema for migration planning" + ) + if schema_patch_path and target_schema_path: + raise ValueError( + "Provide only one of --schema-patch or --target-schema for migration planning" + ) + + snapshot = self.snapshot_source( + index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + source_schema = IndexSchema.from_dict(snapshot.schema_snapshot) + + if schema_patch_path: + schema_patch = self.load_schema_patch(schema_patch_path) + else: + schema_patch = self.normalize_target_schema_to_patch( + source_schema, target_schema_path + ) + + return self.create_plan_from_patch( + index_name, + schema_patch=schema_patch, + redis_url=redis_url, + redis_client=redis_client, + ) + + def create_plan_from_patch( + self, + index_name: str, + *, + schema_patch: SchemaPatch, + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + ) -> MigrationPlan: + snapshot = self.snapshot_source( + index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + source_schema = IndexSchema.from_dict(snapshot.schema_snapshot) + merged_target_schema = self.merge_patch(source_schema, schema_patch) + diff_classification = self.classify_diff( + source_schema, schema_patch, merged_target_schema + ) + + return MigrationPlan( + source=snapshot, + requested_changes=schema_patch.model_dump(exclude_none=True), + merged_target_schema=merged_target_schema.to_dict(), + diff_classification=diff_classification, + warnings=["Index downtime is required"], + ) + + def snapshot_source( + self, + index_name: str, + *, + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + ) -> SourceSnapshot: + index = SearchIndex.from_existing( + index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + schema_dict = index.schema.to_dict() + stats_snapshot = index.info() + prefixes = index.schema.index.prefix + prefix_list = prefixes if isinstance(prefixes, list) else [prefixes] + + return SourceSnapshot( + index_name=index_name, + schema_snapshot=schema_dict, + stats_snapshot=stats_snapshot, + keyspace=KeyspaceSnapshot( + storage_type=index.schema.index.storage_type.value, + prefixes=prefix_list, + key_separator=index.schema.index.key_separator, + key_sample=self._sample_keys( + client=index.client, + prefixes=prefix_list, + key_separator=index.schema.index.key_separator, + ), + ), + ) + + def load_schema_patch(self, schema_patch_path: str) -> SchemaPatch: + patch_path = Path(schema_patch_path).resolve() + if not patch_path.exists(): + raise FileNotFoundError( + f"Schema patch file {schema_patch_path} does not exist" + ) + + with open(patch_path, "r") as f: + patch_data = yaml.safe_load(f) or {} + return SchemaPatch.model_validate(patch_data) + + def normalize_target_schema_to_patch( + self, source_schema: IndexSchema, target_schema_path: str + ) -> SchemaPatch: + target_schema = IndexSchema.from_yaml(target_schema_path) + source_dict = source_schema.to_dict() + target_dict = target_schema.to_dict() + + changes: Dict[str, Any] = { + "add_fields": [], + "remove_fields": [], + "update_fields": [], + "index": {}, + } + + source_fields = {field["name"]: field for field in source_dict["fields"]} + target_fields = {field["name"]: field for field in target_dict["fields"]} + + for field_name, target_field in target_fields.items(): + if field_name not in source_fields: + changes["add_fields"].append(target_field) + elif source_fields[field_name] != target_field: + changes["update_fields"].append(target_field) + + for field_name in source_fields: + if field_name not in target_fields: + changes["remove_fields"].append(field_name) + + for index_key, target_value in target_dict["index"].items(): + source_value = source_dict["index"].get(index_key) + if source_value != target_value: + changes["index"][index_key] = target_value + + return SchemaPatch.model_validate({"version": 1, "changes": changes}) + + def merge_patch( + self, source_schema: IndexSchema, schema_patch: SchemaPatch + ) -> IndexSchema: + schema_dict = deepcopy(source_schema.to_dict()) + changes = schema_patch.changes + fields_by_name = { + field["name"]: deepcopy(field) for field in schema_dict["fields"] + } + + for field_name in changes.remove_fields: + fields_by_name.pop(field_name, None) + + for field_update in changes.update_fields: + if field_update.name not in fields_by_name: + raise ValueError( + f"Cannot update field '{field_update.name}' because it does not exist in the source schema" + ) + existing_field = fields_by_name[field_update.name] + if field_update.type is not None: + existing_field["type"] = field_update.type + if field_update.path is not None: + existing_field["path"] = field_update.path + if field_update.attrs: + merged_attrs = dict(existing_field.get("attrs", {})) + merged_attrs.update(field_update.attrs) + existing_field["attrs"] = merged_attrs + + for field in changes.add_fields: + field_name = field["name"] + if field_name in fields_by_name: + raise ValueError( + f"Cannot add field '{field_name}' because it already exists in the source schema" + ) + fields_by_name[field_name] = deepcopy(field) + + schema_dict["fields"] = list(fields_by_name.values()) + schema_dict["index"].update(changes.index) + return IndexSchema.from_dict(schema_dict) + + def classify_diff( + self, + source_schema: IndexSchema, + schema_patch: SchemaPatch, + merged_target_schema: IndexSchema, + ) -> DiffClassification: + blocked_reasons: List[str] = [] + changes = schema_patch.changes + source_dict = source_schema.to_dict() + target_dict = merged_target_schema.to_dict() + + for index_key, target_value in changes.index.items(): + source_value = source_dict["index"].get(index_key) + if source_value == target_value: + continue + if index_key == "name": + blocked_reasons.append( + "Changing the index name requires document migration (not yet supported)." + ) + elif index_key == "prefix": + blocked_reasons.append( + "Changing index prefixes requires document migration (not yet supported)." + ) + elif index_key == "key_separator": + blocked_reasons.append( + "Changing the key separator requires document migration (not yet supported)." + ) + elif index_key == "storage_type": + blocked_reasons.append( + "Changing the storage type requires document migration (not yet supported)." + ) + + source_fields = {field["name"]: field for field in source_dict["fields"]} + target_fields = {field["name"]: field for field in target_dict["fields"]} + + for field in changes.add_fields: + if field["type"] == "vector": + blocked_reasons.append( + f"Adding vector field '{field['name']}' requires document migration (not yet supported)." + ) + + for field_update in changes.update_fields: + source_field = source_fields[field_update.name] + target_field = target_fields[field_update.name] + source_type = source_field["type"] + target_type = target_field["type"] + + if source_type != target_type: + blocked_reasons.append( + f"Changing field '{field_update.name}' type from {source_type} to {target_type} is not supported by drop_recreate." + ) + continue + + source_path = source_field.get("path") + target_path = target_field.get("path") + if source_path != target_path: + blocked_reasons.append( + f"Changing field '{field_update.name}' path from {source_path} to {target_path} is not supported by drop_recreate." + ) + continue + + if target_type == "vector" and source_field != target_field: + # Check for document-dependent changes that are not yet supported + vector_blocked = self._classify_vector_field_change( + source_field, target_field + ) + blocked_reasons.extend(vector_blocked) + + blocked_reasons.extend( + self._detect_possible_field_renames(source_fields, target_fields) + ) + + return DiffClassification( + supported=len(blocked_reasons) == 0, + blocked_reasons=self._dedupe(blocked_reasons), + ) + + def write_plan(self, plan: MigrationPlan, plan_out: str) -> None: + plan_path = Path(plan_out).resolve() + with open(plan_path, "w") as f: + yaml.safe_dump(plan.model_dump(exclude_none=True), f, sort_keys=False) + + def _sample_keys( + self, *, client: Any, prefixes: List[str], key_separator: str + ) -> List[str]: + key_sample: List[str] = [] + if client is None or self.key_sample_limit <= 0: + return key_sample + + for prefix in prefixes: + if len(key_sample) >= self.key_sample_limit: + break + match_pattern = ( + f"{prefix}*" + if prefix.endswith(key_separator) + else f"{prefix}{key_separator}*" + ) + cursor = 0 + while True: + cursor, keys = client.scan( + cursor=cursor, + match=match_pattern, + count=max(self.key_sample_limit, 10), + ) + for key in keys: + decoded_key = key.decode() if isinstance(key, bytes) else str(key) + if decoded_key not in key_sample: + key_sample.append(decoded_key) + if len(key_sample) >= self.key_sample_limit: + return key_sample + if cursor == 0: + break + return key_sample + + def _detect_possible_field_renames( + self, + source_fields: Dict[str, Dict[str, Any]], + target_fields: Dict[str, Dict[str, Any]], + ) -> List[str]: + blocked_reasons: List[str] = [] + added_fields = [ + field for name, field in target_fields.items() if name not in source_fields + ] + removed_fields = [ + field for name, field in source_fields.items() if name not in target_fields + ] + + for removed_field in removed_fields: + for added_field in added_fields: + if self._fields_match_except_name(removed_field, added_field): + blocked_reasons.append( + f"Possible field rename from '{removed_field['name']}' to '{added_field['name']}' is not supported by drop_recreate." + ) + return blocked_reasons + + @staticmethod + def _classify_vector_field_change( + source_field: Dict[str, Any], target_field: Dict[str, Any] + ) -> List[str]: + """Classify vector field changes as supported or blocked for drop_recreate. + + Index-only changes (allowed with drop_recreate): + - algorithm (FLAT -> HNSW -> SVS-VAMANA) + - distance_metric (COSINE, L2, IP) + - initial_cap + - Algorithm tuning: m, ef_construction, ef_runtime, epsilon, block_size, + graph_max_degree, construction_window_size, search_window_size, etc. + + Quantization changes (allowed with drop_recreate, requires vector re-encoding): + - datatype (float32 -> float16, etc.) - executor will re-encode vectors + + Document-dependent changes (blocked, not yet supported): + - dims (vectors stored with wrong number of dimensions) + """ + blocked_reasons: List[str] = [] + field_name = source_field.get("name", "unknown") + source_attrs = source_field.get("attrs", {}) + target_attrs = target_field.get("attrs", {}) + + # Document-dependent properties (not yet supported) + if source_attrs.get("dims") != target_attrs.get("dims"): + blocked_reasons.append( + f"Changing vector field '{field_name}' dims from {source_attrs.get('dims')} " + f"to {target_attrs.get('dims')} requires document migration (not yet supported). " + "Vectors are stored with incompatible dimensions." + ) + + # Datatype changes are now ALLOWED - executor will re-encode vectors + # before recreating the index + + # All other vector changes are index-only and allowed + return blocked_reasons + + @staticmethod + def get_vector_datatype_changes( + source_schema: Dict[str, Any], target_schema: Dict[str, Any] + ) -> Dict[str, Dict[str, str]]: + """Identify vector fields that need datatype conversion (quantization). + + Returns: + Dict mapping field_name -> {"source": source_dtype, "target": target_dtype} + """ + changes: Dict[str, Dict[str, str]] = {} + source_fields = {f["name"]: f for f in source_schema.get("fields", [])} + target_fields = {f["name"]: f for f in target_schema.get("fields", [])} + + for name, source_field in source_fields.items(): + if source_field.get("type") != "vector": + continue + target_field = target_fields.get(name) + if not target_field or target_field.get("type") != "vector": + continue + + source_dtype = source_field.get("attrs", {}).get("datatype", "float32") + target_dtype = target_field.get("attrs", {}).get("datatype", "float32") + + if source_dtype != target_dtype: + changes[name] = {"source": source_dtype, "target": target_dtype} + + return changes + + @staticmethod + def _fields_match_except_name( + source_field: Dict[str, Any], target_field: Dict[str, Any] + ) -> bool: + comparable_source = {k: v for k, v in source_field.items() if k != "name"} + comparable_target = {k: v for k, v in target_field.items() if k != "name"} + return comparable_source == comparable_target + + @staticmethod + def _dedupe(values: List[str]) -> List[str]: + deduped: List[str] = [] + for value in values: + if value not in deduped: + deduped.append(value) + return deduped diff --git a/redisvl/migration/utils.py b/redisvl/migration/utils.py new file mode 100644 index 00000000..a5b12766 --- /dev/null +++ b/redisvl/migration/utils.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import json +import time +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Tuple + +import yaml + +from redisvl.index import SearchIndex +from redisvl.migration.models import MigrationPlan, MigrationReport +from redisvl.redis.connection import RedisConnectionFactory +from redisvl.schema.schema import IndexSchema + + +def list_indexes( + *, redis_url: Optional[str] = None, redis_client: Optional[Any] = None +): + if redis_client is None: + if not redis_url: + raise ValueError("Must provide either redis_url or redis_client") + redis_client = RedisConnectionFactory.get_redis_connection(redis_url=redis_url) + index = SearchIndex.from_dict( + {"index": {"name": "__redisvl_migration_helper__"}, "fields": []}, + redis_client=redis_client, + ) + return index.listall() + + +def load_yaml(path: str) -> Dict[str, Any]: + resolved = Path(path).resolve() + with open(resolved, "r") as f: + return yaml.safe_load(f) or {} + + +def write_yaml(data: Dict[str, Any], path: str) -> None: + resolved = Path(path).resolve() + with open(resolved, "w") as f: + yaml.safe_dump(data, f, sort_keys=False) + + +def load_migration_plan(path: str) -> MigrationPlan: + return MigrationPlan.model_validate(load_yaml(path)) + + +def write_migration_report(report: MigrationReport, path: str) -> None: + write_yaml(report.model_dump(exclude_none=True), path) + + +def write_benchmark_report(report: MigrationReport, path: str) -> None: + benchmark_report = { + "version": report.version, + "mode": report.mode, + "source_index": report.source_index, + "target_index": report.target_index, + "result": report.result, + "timings": report.timings.model_dump(exclude_none=True), + "benchmark_summary": report.benchmark_summary.model_dump(exclude_none=True), + "validation": { + "schema_match": report.validation.schema_match, + "doc_count_match": report.validation.doc_count_match, + "indexing_failures_delta": report.validation.indexing_failures_delta, + "key_sample_exists": report.validation.key_sample_exists, + }, + } + write_yaml(benchmark_report, path) + + +def canonicalize_schema(schema_dict: Dict[str, Any]) -> Dict[str, Any]: + schema = IndexSchema.from_dict(schema_dict).to_dict() + schema["fields"] = sorted(schema.get("fields", []), key=lambda field: field["name"]) + prefixes = schema["index"].get("prefix") + if isinstance(prefixes, list): + schema["index"]["prefix"] = sorted(prefixes) + stopwords = schema["index"].get("stopwords") + if isinstance(stopwords, list): + schema["index"]["stopwords"] = list(stopwords) + return schema + + +def schemas_equal(left: Dict[str, Any], right: Dict[str, Any]) -> bool: + return json.dumps(canonicalize_schema(left), sort_keys=True) == json.dumps( + canonicalize_schema(right), sort_keys=True + ) + + +def wait_for_index_ready( + index: SearchIndex, + *, + timeout_seconds: int = 1800, + poll_interval_seconds: float = 0.5, + progress_callback: Optional[Callable[[int, int, float], None]] = None, +) -> Tuple[Dict[str, Any], float]: + """Wait for index to finish indexing all documents. + + Args: + index: The SearchIndex to monitor. + timeout_seconds: Maximum time to wait. + poll_interval_seconds: How often to check status. + progress_callback: Optional callback(indexed_docs, total_docs, percent). + """ + start = time.perf_counter() + deadline = start + timeout_seconds + latest_info = index.info() + + stable_ready_checks = 0 + while time.perf_counter() < deadline: + latest_info = index.info() + indexing = latest_info.get("indexing") + percent_indexed = latest_info.get("percent_indexed") + + if percent_indexed is not None or indexing is not None: + ready = float(percent_indexed or 0) >= 1.0 and not bool(indexing) + if progress_callback: + total_docs = int(latest_info.get("num_docs", 0)) + pct = float(percent_indexed or 0) + indexed_docs = int(total_docs * pct) + progress_callback(indexed_docs, total_docs, pct * 100) + else: + current_docs = latest_info.get("num_docs") + if current_docs is None: + ready = True + else: + if stable_ready_checks == 0: + stable_ready_checks = int(current_docs) + time.sleep(poll_interval_seconds) + continue + ready = int(current_docs) == stable_ready_checks + + if ready: + return latest_info, round(time.perf_counter() - start, 3) + + time.sleep(poll_interval_seconds) + + raise TimeoutError( + f"Index {index.schema.index.name} did not become ready within {timeout_seconds} seconds" + ) + + +def current_source_matches_snapshot( + index_name: str, + expected_schema: Dict[str, Any], + *, + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, +) -> bool: + current_index = SearchIndex.from_existing( + index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + return schemas_equal(current_index.schema.to_dict(), expected_schema) + + +def timestamp_utc() -> str: + return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) diff --git a/redisvl/migration/validation.py b/redisvl/migration/validation.py new file mode 100644 index 00000000..f5bc0ca7 --- /dev/null +++ b/redisvl/migration/validation.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import time +from typing import Any, Dict, Optional + +from redisvl.index import SearchIndex +from redisvl.migration.models import ( + MigrationPlan, + MigrationValidation, + QueryCheckResult, +) +from redisvl.migration.utils import load_yaml, schemas_equal + + +class MigrationValidator: + def validate( + self, + plan: MigrationPlan, + *, + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + query_check_file: Optional[str] = None, + ) -> tuple[MigrationValidation, Dict[str, Any], float]: + started = time.perf_counter() + target_index = SearchIndex.from_existing( + plan.merged_target_schema["index"]["name"], + redis_url=redis_url, + redis_client=redis_client, + ) + target_info = target_index.info() + validation = MigrationValidation() + + live_schema = target_index.schema.to_dict() + validation.schema_match = schemas_equal(live_schema, plan.merged_target_schema) + + source_num_docs = int(plan.source.stats_snapshot.get("num_docs", 0) or 0) + target_num_docs = int(target_info.get("num_docs", 0) or 0) + validation.doc_count_match = source_num_docs == target_num_docs + + source_failures = int( + plan.source.stats_snapshot.get("hash_indexing_failures", 0) or 0 + ) + target_failures = int(target_info.get("hash_indexing_failures", 0) or 0) + validation.indexing_failures_delta = target_failures - source_failures + + key_sample = plan.source.keyspace.key_sample + if not key_sample: + validation.key_sample_exists = True + else: + existing_count = target_index.client.exists(*key_sample) + validation.key_sample_exists = existing_count == len(key_sample) + + if query_check_file: + validation.query_checks = self._run_query_checks( + target_index, + query_check_file, + ) + + if not validation.schema_match: + validation.errors.append("Live schema does not match merged_target_schema.") + if not validation.doc_count_match: + validation.errors.append( + "Live document count does not match source num_docs." + ) + if validation.indexing_failures_delta != 0: + validation.errors.append("Indexing failures increased during migration.") + if not validation.key_sample_exists: + validation.errors.append( + "One or more sampled source keys is missing after migration." + ) + if any(not query_check.passed for query_check in validation.query_checks): + validation.errors.append("One or more query checks failed.") + + return validation, target_info, round(time.perf_counter() - started, 3) + + def _run_query_checks( + self, + target_index: SearchIndex, + query_check_file: str, + ) -> list[QueryCheckResult]: + query_checks = load_yaml(query_check_file) + results: list[QueryCheckResult] = [] + + for doc_id in query_checks.get("fetch_ids", []): + fetched = target_index.fetch(doc_id) + results.append( + QueryCheckResult( + name=f"fetch:{doc_id}", + passed=fetched is not None, + details=( + "Document fetched successfully" + if fetched + else "Document not found" + ), + ) + ) + + for key in query_checks.get("keys_exist", []): + exists = bool(target_index.client.exists(key)) + results.append( + QueryCheckResult( + name=f"key:{key}", + passed=exists, + details="Key exists" if exists else "Key not found", + ) + ) + + return results From b06e949a4514a4776912a066733fd0d98c35b084 Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Thu, 19 Mar 2026 18:22:52 -0400 Subject: [PATCH 03/10] feat(migrate): add interactive migration wizard - Add MigrationWizard for guided schema changes - Support add/update/remove field operations - Algorithm-specific datatype prompts (SVS-VAMANA vs HNSW/FLAT) - SVS-VAMANA params: GRAPH_MAX_DEGREE, COMPRESSION - HNSW params: M, EF_CONSTRUCTION - Normalize SVS_VAMANA -> SVS-VAMANA input - Preview patch as YAML before finishing --- redisvl/migration/wizard.py | 502 ++++++++++++++++++++++++++++++++++++ 1 file changed, 502 insertions(+) create mode 100644 redisvl/migration/wizard.py diff --git a/redisvl/migration/wizard.py b/redisvl/migration/wizard.py new file mode 100644 index 00000000..feb4d3b0 --- /dev/null +++ b/redisvl/migration/wizard.py @@ -0,0 +1,502 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Dict, List, Optional + +import yaml + +from redisvl.migration.models import FieldUpdate, SchemaPatch, SchemaPatchChanges +from redisvl.migration.planner import MigrationPlanner +from redisvl.migration.utils import list_indexes, write_yaml +from redisvl.schema.schema import IndexSchema + +SUPPORTED_FIELD_TYPES = ["text", "tag", "numeric", "geo"] +UPDATABLE_FIELD_TYPES = ["text", "tag", "numeric", "geo", "vector"] + + +class MigrationWizard: + def __init__(self, planner: Optional[MigrationPlanner] = None): + self.planner = planner or MigrationPlanner() + + def run( + self, + *, + index_name: Optional[str] = None, + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + existing_patch_path: Optional[str] = None, + plan_out: str = "migration_plan.yaml", + patch_out: Optional[str] = None, + target_schema_out: Optional[str] = None, + ): + resolved_index_name = self._resolve_index_name( + index_name=index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + snapshot = self.planner.snapshot_source( + resolved_index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + source_schema = IndexSchema.from_dict(snapshot.schema_snapshot) + + print(f"Building a migration plan for index '{resolved_index_name}'") + self._print_source_schema(source_schema.to_dict()) + + # Load existing patch if provided + existing_changes = None + if existing_patch_path: + existing_changes = self._load_existing_patch(existing_patch_path) + + schema_patch = self._build_patch( + source_schema.to_dict(), existing_changes=existing_changes + ) + plan = self.planner.create_plan_from_patch( + resolved_index_name, + schema_patch=schema_patch, + redis_url=redis_url, + redis_client=redis_client, + ) + self.planner.write_plan(plan, plan_out) + + if patch_out: + write_yaml(schema_patch.model_dump(exclude_none=True), patch_out) + if target_schema_out: + write_yaml(plan.merged_target_schema, target_schema_out) + + return plan + + def _load_existing_patch(self, patch_path: str) -> SchemaPatchChanges: + from redisvl.migration.utils import load_yaml + + data = load_yaml(patch_path) + patch = SchemaPatch.model_validate(data) + print(f"Loaded existing patch from {patch_path}") + print(f" Add fields: {len(patch.changes.add_fields)}") + print(f" Update fields: {len(patch.changes.update_fields)}") + print(f" Remove fields: {len(patch.changes.remove_fields)}") + return patch.changes + + def _resolve_index_name( + self, + *, + index_name: Optional[str], + redis_url: Optional[str], + redis_client: Optional[Any], + ) -> str: + if index_name: + return index_name + + indexes = list_indexes(redis_url=redis_url, redis_client=redis_client) + if not indexes: + raise ValueError("No indexes found in Redis") + + print("Available indexes:") + for position, name in enumerate(indexes, start=1): + print(f"{position}. {name}") + + while True: + choice = input("Select an index by number or name: ").strip() + if choice in indexes: + return choice + if choice.isdigit(): + offset = int(choice) - 1 + if 0 <= offset < len(indexes): + return indexes[offset] + print("Invalid selection. Please try again.") + + def _build_patch( + self, + source_schema: Dict[str, Any], + existing_changes: Optional[SchemaPatchChanges] = None, + ) -> SchemaPatch: + if existing_changes: + changes = existing_changes + else: + changes = SchemaPatchChanges() + done = False + while not done: + print("\nChoose an action:") + print("1. Add field (text, tag, numeric, geo)") + print("2. Update field (sortable, weight, separator, vector config)") + print("3. Remove field") + print("4. Preview patch (show pending changes as YAML)") + print("5. Finish") + action = input("Enter a number: ").strip() + + if action == "1": + field = self._prompt_add_field(source_schema) + if field: + changes.add_fields.append(field) + elif action == "2": + update = self._prompt_update_field(source_schema) + if update: + changes.update_fields.append(update) + elif action == "3": + field_name = self._prompt_remove_field(source_schema) + if field_name: + changes.remove_fields.append(field_name) + elif action == "4": + print( + yaml.safe_dump( + {"version": 1, "changes": changes.model_dump()}, sort_keys=False + ) + ) + elif action == "5": + done = True + else: + print("Invalid action. Please choose 1-5.") + + return SchemaPatch(version=1, changes=changes) + + def _prompt_add_field( + self, source_schema: Dict[str, Any] + ) -> Optional[Dict[str, Any]]: + field_name = input("Field name: ").strip() + existing_names = {field["name"] for field in source_schema["fields"]} + if not field_name: + print("Field name is required.") + return None + if field_name in existing_names: + print(f"Field '{field_name}' already exists in the source schema.") + return None + + field_type = self._prompt_from_choices( + "Field type", + SUPPORTED_FIELD_TYPES, + block_message="Vector fields cannot be added (requires embedding all documents). Only text, tag, numeric, and geo are supported.", + ) + if not field_type: + return None + + field: Dict[str, Any] = {"name": field_name, "type": field_type} + storage_type = source_schema["index"]["storage_type"] + if storage_type == "json": + print(" JSON path: location in document where this field is stored") + path = ( + input(f"JSON path [default $.{field_name}]: ").strip() + or f"$.{field_name}" + ) + field["path"] = path + + attrs = self._prompt_common_attrs(field_type) + if attrs: + field["attrs"] = attrs + return field + + def _prompt_update_field( + self, source_schema: Dict[str, Any] + ) -> Optional[FieldUpdate]: + fields = [ + field + for field in source_schema["fields"] + if field["type"] in UPDATABLE_FIELD_TYPES + ] + if not fields: + print("No updatable fields are available.") + return None + + print("Updatable fields:") + for position, field in enumerate(fields, start=1): + print(f"{position}. {field['name']} ({field['type']})") + + choice = input("Select a field to update by number or name: ").strip() + selected: Optional[Dict[str, Any]] = None + for position, field in enumerate(fields, start=1): + if choice == str(position) or choice == field["name"]: + selected = field + break + if not selected: + print("Invalid field selection.") + return None + + if selected["type"] == "vector": + attrs = self._prompt_vector_attrs(selected) + else: + attrs = self._prompt_common_attrs(selected["type"], allow_blank=True) + if not attrs: + print("No changes collected.") + return None + return FieldUpdate(name=selected["name"], attrs=attrs) + + def _prompt_remove_field(self, source_schema: Dict[str, Any]) -> Optional[str]: + removable_fields = [ + field["name"] + for field in source_schema["fields"] + if field["type"] != "vector" + ] + if not removable_fields: + print("No removable Phase 1 fields are available.") + return None + + print("Removable fields:") + for position, field_name in enumerate(removable_fields, start=1): + print(f"{position}. {field_name}") + + choice = input("Select a field to remove by number or name: ").strip() + if choice in removable_fields: + return choice + if choice.isdigit(): + offset = int(choice) - 1 + if 0 <= offset < len(removable_fields): + return removable_fields[offset] + print("Invalid field selection.") + return None + + def _prompt_common_attrs( + self, field_type: str, allow_blank: bool = False + ) -> Dict[str, Any]: + attrs: Dict[str, Any] = {} + + # Sortable - available for all non-vector types + print(" Sortable: enables sorting and aggregation on this field") + sortable = self._prompt_bool("Sortable", allow_blank=allow_blank) + if sortable is not None: + attrs["sortable"] = sortable + + # Index missing - available for all types (requires Redis Search 2.10+) + print( + " Index missing: enables ismissing() queries for documents without this field" + ) + index_missing = self._prompt_bool("Index missing", allow_blank=allow_blank) + if index_missing is not None: + attrs["index_missing"] = index_missing + + # Type-specific attributes + if field_type == "text": + self._prompt_text_attrs(attrs, allow_blank) + elif field_type == "tag": + self._prompt_tag_attrs(attrs, allow_blank) + elif field_type == "numeric": + self._prompt_numeric_attrs(attrs, allow_blank, sortable) + + # No index - only meaningful with sortable + if sortable or (allow_blank and attrs.get("sortable")): + print(" No index: store field for sorting only, not searchable") + no_index = self._prompt_bool("No index", allow_blank=allow_blank) + if no_index is not None: + attrs["no_index"] = no_index + + return attrs + + def _prompt_text_attrs(self, attrs: Dict[str, Any], allow_blank: bool) -> None: + """Prompt for text field specific attributes.""" + # No stem + print( + " Disable stemming: prevents word variations (running/runs) from matching" + ) + no_stem = self._prompt_bool("Disable stemming", allow_blank=allow_blank) + if no_stem is not None: + attrs["no_stem"] = no_stem + + # Weight + print(" Weight: relevance multiplier for full-text search (default: 1.0)") + weight_input = input("Weight [leave blank for default]: ").strip() + if weight_input: + try: + weight = float(weight_input) + if weight > 0: + attrs["weight"] = weight + else: + print("Weight must be positive.") + except ValueError: + print("Invalid weight value.") + + # Index empty (requires Redis Search 2.10+) + print(" Index empty: enables searching for empty string values") + index_empty = self._prompt_bool("Index empty", allow_blank=allow_blank) + if index_empty is not None: + attrs["index_empty"] = index_empty + + # UNF (only if sortable) + if attrs.get("sortable"): + print(" UNF: preserve original form (no lowercasing) for sorting") + unf = self._prompt_bool("UNF (un-normalized form)", allow_blank=allow_blank) + if unf is not None: + attrs["unf"] = unf + + def _prompt_tag_attrs(self, attrs: Dict[str, Any], allow_blank: bool) -> None: + """Prompt for tag field specific attributes.""" + # Separator + print(" Separator: character that splits multiple values (default: comma)") + separator = input("Separator [leave blank to keep existing/default]: ").strip() + if separator: + attrs["separator"] = separator + + # Case sensitive + print(" Case sensitive: match tags with exact case (default: false)") + case_sensitive = self._prompt_bool("Case sensitive", allow_blank=allow_blank) + if case_sensitive is not None: + attrs["case_sensitive"] = case_sensitive + + # Index empty (requires Redis Search 2.10+) + print(" Index empty: enables searching for empty tag values") + index_empty = self._prompt_bool("Index empty", allow_blank=allow_blank) + if index_empty is not None: + attrs["index_empty"] = index_empty + + def _prompt_numeric_attrs( + self, attrs: Dict[str, Any], allow_blank: bool, sortable: Optional[bool] + ) -> None: + """Prompt for numeric field specific attributes.""" + # UNF (only if sortable) + if sortable or attrs.get("sortable"): + print(" UNF: preserve exact numeric representation for sorting") + unf = self._prompt_bool("UNF (un-normalized form)", allow_blank=allow_blank) + if unf is not None: + attrs["unf"] = unf + + def _prompt_vector_attrs(self, field: Dict[str, Any]) -> Dict[str, Any]: + attrs: Dict[str, Any] = {} + current = field.get("attrs", {}) + field_name = field["name"] + + print(f"Current vector config for '{field_name}':") + print(f" algorithm: {current.get('algorithm', 'HNSW')}") + print(f" datatype: {current.get('datatype', 'float32')}") + print(f" distance_metric: {current.get('distance_metric', 'cosine')}") + print(f" dims: {current.get('dims')} (cannot be changed)") + if current.get("algorithm", "HNSW") == "HNSW": + print(f" m: {current.get('m', 16)}") + print(f" ef_construction: {current.get('ef_construction', 200)}") + + print("\nLeave blank to keep current value.") + + # Algorithm + print( + " Algorithm: vector search method (FLAT=brute force, HNSW=graph, SVS-VAMANA=compressed graph)" + ) + algo = ( + input(f"Algorithm [current: {current.get('algorithm', 'HNSW')}]: ") + .strip() + .upper() + .replace("_", "-") # Normalize SVS_VAMANA to SVS-VAMANA + ) + if algo and algo in ("FLAT", "HNSW", "SVS-VAMANA"): + attrs["algorithm"] = algo + + # Datatype (quantization) - show algorithm-specific options + effective_algo = attrs.get( + "algorithm", current.get("algorithm", "HNSW") + ).upper() + if effective_algo == "SVS-VAMANA": + # SVS-VAMANA only supports float16, float32 + print( + " Datatype for SVS-VAMANA: float16, float32 " + "(float16 reduces memory by ~50%)" + ) + valid_datatypes = ("float16", "float32") + else: + # FLAT/HNSW support: float16, float32, bfloat16, float64, int8, uint8 + print( + " Datatype: float16, float32, bfloat16, float64, int8, uint8\n" + " (float16 reduces memory ~50%, int8/uint8 reduce ~75%)" + ) + valid_datatypes = ( + "float16", + "float32", + "bfloat16", + "float64", + "int8", + "uint8", + ) + datatype = ( + input(f"Datatype [current: {current.get('datatype', 'float32')}]: ") + .strip() + .lower() + ) + if datatype and datatype in valid_datatypes: + attrs["datatype"] = datatype + + # Distance metric + print(" Distance metric: how similarity is measured (cosine, l2, ip)") + metric = ( + input( + f"Distance metric [current: {current.get('distance_metric', 'cosine')}]: " + ) + .strip() + .lower() + ) + if metric and metric in ("cosine", "l2", "ip"): + attrs["distance_metric"] = metric + + # Algorithm-specific params (effective_algo already computed above) + if effective_algo == "HNSW": + print( + " M: number of connections per node (higher=better recall, more memory)" + ) + m_input = input(f"M [current: {current.get('m', 16)}]: ").strip() + if m_input and m_input.isdigit(): + attrs["m"] = int(m_input) + + print( + " EF_CONSTRUCTION: build-time search depth (higher=better recall, slower build)" + ) + ef_input = input( + f"EF_CONSTRUCTION [current: {current.get('ef_construction', 200)}]: " + ).strip() + if ef_input and ef_input.isdigit(): + attrs["ef_construction"] = int(ef_input) + + elif effective_algo == "SVS-VAMANA": + print( + " GRAPH_MAX_DEGREE: max edges per node (higher=better recall, more memory)" + ) + gmd_input = input( + f"GRAPH_MAX_DEGREE [current: {current.get('graph_max_degree', 40)}]: " + ).strip() + if gmd_input and gmd_input.isdigit(): + attrs["graph_max_degree"] = int(gmd_input) + + print(" COMPRESSION: optional vector compression for memory savings") + print(" Options: LVQ4, LVQ8, LVQ4x4, LVQ4x8, LeanVec4x8, LeanVec8x8") + compression = input("COMPRESSION [leave blank for none]: ").strip().upper() + if compression and compression in ( + "LVQ4", + "LVQ8", + "LVQ4X4", + "LVQ4X8", + "LEANVEC4X8", + "LEANVEC8X8", + ): + attrs["compression"] = compression + + return attrs + + def _prompt_bool(self, label: str, allow_blank: bool = False) -> Optional[bool]: + suffix = " [y/n]" if not allow_blank else " [y/n/skip]" + while True: + value = input(f"{label}{suffix}: ").strip().lower() + if value in ("y", "yes"): + return True + if value in ("n", "no"): + return False + if allow_blank and value in ("", "skip", "s"): + return None + if not allow_blank and value == "": + return False + print("Please answer y, n, or skip.") + + def _prompt_from_choices( + self, + label: str, + choices: List[str], + *, + block_message: str, + ) -> Optional[str]: + print(f"{label} options: {', '.join(choices)}") + value = input(f"{label}: ").strip().lower() + if value not in choices: + print(block_message) + return None + return value + + def _print_source_schema(self, schema_dict: Dict[str, Any]) -> None: + print("Current schema:") + print(f"- Index name: {schema_dict['index']['name']}") + print(f"- Storage type: {schema_dict['index']['storage_type']}") + for field in schema_dict["fields"]: + path = field.get("path") + suffix = f" path={path}" if path else "" + print(f" - {field['name']} ({field['type']}){suffix}") From 725af07f4e5339b0175702f6552704a7d48ea841 Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Thu, 19 Mar 2026 18:23:04 -0400 Subject: [PATCH 04/10] docs(migrate): add migration guides and field attributes reference - Add conceptual guide: how migrations work (Diataxis explanation) - Add task guide: step-by-step migration walkthrough (Diataxis how-to) - Expand field-attributes.md with migration support matrix - Add vector datatypes table with algorithm compatibility - Update navigation indexes to include new guides - Normalize SVS-VAMANA naming throughout docs --- docs/concepts/field-attributes.md | 91 +++- docs/concepts/index-migrations.md | 145 +++++++ docs/concepts/index.md | 8 + docs/concepts/search-and-indexing.md | 11 +- docs/user_guide/cli.ipynb | 9 +- docs/user_guide/how_to_guides/index.md | 3 + .../how_to_guides/migrate-indexes.md | 394 ++++++++++++++++++ docs/user_guide/index.md | 4 +- 8 files changed, 657 insertions(+), 8 deletions(-) create mode 100644 docs/concepts/index-migrations.md create mode 100644 docs/user_guide/how_to_guides/migrate-indexes.md diff --git a/docs/concepts/field-attributes.md b/docs/concepts/field-attributes.md index c7764a4a..96060d2f 100644 --- a/docs/concepts/field-attributes.md +++ b/docs/concepts/field-attributes.md @@ -267,7 +267,7 @@ Key vector attributes: - `dims`: Vector dimensionality (required) - `algorithm`: `flat`, `hnsw`, or `svs-vamana` - `distance_metric`: `COSINE`, `L2`, or `IP` -- `datatype`: `float16`, `float32`, `float64`, or `bfloat16` +- `datatype`: Vector precision (see table below) - `index_missing`: Allow searching for documents without vectors ```yaml @@ -281,6 +281,48 @@ Key vector attributes: index_missing: true # Handle documents without embeddings ``` +### Vector Datatypes + +The `datatype` attribute controls how vector components are stored. Smaller datatypes reduce memory usage but may affect precision. + +| Datatype | Bits | Memory (768 dims) | Use Case | +|----------|------|-------------------|----------| +| `float32` | 32 | 3 KB | Default. Best precision for most applications. | +| `float16` | 16 | 1.5 KB | Good balance of memory and precision. Recommended for large-scale deployments. | +| `bfloat16` | 16 | 1.5 KB | Better dynamic range than float16. Useful when embeddings have large value ranges. | +| `float64` | 64 | 6 KB | Maximum precision. Rarely needed. | +| `int8` | 8 | 768 B | Integer quantization. Significant memory savings with some precision loss. | +| `uint8` | 8 | 768 B | Unsigned integer quantization. For embeddings with non-negative values. | + +**Algorithm Compatibility:** + +| Datatype | FLAT | HNSW | SVS-VAMANA | +|----------|------|------|------------| +| `float32` | Yes | Yes | Yes | +| `float16` | Yes | Yes | Yes | +| `bfloat16` | Yes | Yes | No | +| `float64` | Yes | Yes | No | +| `int8` | Yes | Yes | No | +| `uint8` | Yes | Yes | No | + +**Choosing a Datatype:** + +- **Start with `float32`** unless you have memory constraints +- **Use `float16`** for production systems with millions of vectors (50% memory savings, minimal precision loss) +- **Use `int8`/`uint8`** only after benchmarking recall on your specific dataset +- **SVS-VAMANA users**: Must use `float16` or `float32` + +**Quantization with the Migrator:** + +You can change vector datatypes on existing indexes using the migration wizard: + +```bash +rvl migrate wizard --index my_index --url redis://localhost:6379 +# Select "Update field" > choose vector field > change datatype +``` + +The migrator automatically re-encodes stored vectors to the new precision. See {doc}`/user_guide/how_to_guides/migrate-indexes` for details. + ## Redis-Specific Subtleties ### Modifier Ordering @@ -304,6 +346,53 @@ Not all attributes work with all field types: | `unf` | ✓ | ✗ | ✓ | ✗ | ✗ | | `withsuffixtrie` | ✓ | ✓ | ✗ | ✗ | ✗ | +### Migration Support + +The migration wizard (`rvl migrate wizard`) supports updating field attributes on existing indexes. The table below shows which attributes can be updated via the wizard vs requiring manual schema patch editing. + +**Wizard Prompts:** + +| Attribute | Text | Tag | Numeric | Geo | Vector | +|-----------|------|-----|---------|-----|--------| +| `sortable` | Wizard | Wizard | Wizard | Wizard | N/A | +| `index_missing` | Wizard | Wizard | Wizard | Wizard | N/A | +| `index_empty` | Wizard | Wizard | N/A | N/A | N/A | +| `no_index` | Wizard | Wizard | Wizard | Wizard | N/A | +| `unf` | Wizard* | N/A | Wizard* | N/A | N/A | +| `separator` | N/A | Wizard | N/A | N/A | N/A | +| `case_sensitive` | N/A | Wizard | N/A | N/A | N/A | +| `no_stem` | Wizard | N/A | N/A | N/A | N/A | +| `weight` | Wizard | N/A | N/A | N/A | N/A | +| `algorithm` | N/A | N/A | N/A | N/A | Wizard | +| `datatype` | N/A | N/A | N/A | N/A | Wizard | +| `distance_metric` | N/A | N/A | N/A | N/A | Wizard | +| `m`, `ef_construction` | N/A | N/A | N/A | N/A | Wizard | + +*\* `unf` is only prompted when `sortable` is enabled.* + +**Manual Schema Patch Required:** + +| Attribute | Notes | +|-----------|-------| +| `phonetic_matcher` | Enable phonetic search | +| `withsuffixtrie` | Suffix/contains search optimization | + +**Example manual patch** for adding `index_missing` to a field: + +```yaml +# schema_patch.yaml +version: 1 +changes: + update_fields: + - name: category + attrs: + index_missing: true +``` + +```bash +rvl migrate plan --index my_index --schema-patch schema_patch.yaml +``` + ### JSON Path for Nested Fields When using JSON storage, use the `path` attribute to index nested fields: diff --git a/docs/concepts/index-migrations.md b/docs/concepts/index-migrations.md new file mode 100644 index 00000000..a162f0ff --- /dev/null +++ b/docs/concepts/index-migrations.md @@ -0,0 +1,145 @@ +--- +myst: + html_meta: + "description lang=en": | + Learn how RedisVL index migrations work and which schema changes are supported. +--- + +# Index Migrations + +Redis Search indexes are immutable. To change an index schema, you must drop the existing index and create a new one. RedisVL provides a migration workflow that automates this process while preserving your data. + +This page explains how migrations work and which changes are supported. For step by step instructions, see the [migration guide](../user_guide/how_to_guides/migrate-indexes.md). + +## Supported and blocked changes + +The migrator classifies schema changes into two categories: + +| Change | Status | +|--------|--------| +| Add or remove a field | Supported | +| Change field options (sortable, separator) | Supported | +| Change vector algorithm (FLAT, HNSW, SVS-VAMANA) | Supported | +| Change distance metric (COSINE, L2, IP) | Supported | +| Tune algorithm parameters (M, EF_CONSTRUCTION) | Supported | +| Quantize vectors (float32 to float16) | Supported | +| Change vector dimensions | Blocked | +| Change key prefix | Blocked | +| Rename a field | Blocked | +| Change storage type (hash to JSON) | Blocked | +| Add a new vector field | Blocked | + +**Supported** changes can be applied automatically using `rvl migrate`. The migrator handles the index rebuild and any necessary data transformations. + +**Blocked** changes require manual intervention because they involve incompatible data formats or missing data. The migrator will reject these changes and explain why. + +## How the migrator works + +The migrator uses a plan first workflow: + +1. **Plan**: Capture the current schema, classify your changes, and generate a migration plan +2. **Review**: Inspect the plan before making any changes +3. **Apply**: Drop the index, transform data if needed, and recreate with the new schema +4. **Validate**: Verify the result matches expectations + +This separation ensures you always know what will happen before any changes are made. + +## Migration mode: drop_recreate + +The `drop_recreate` mode rebuilds the index in place while preserving your documents. + +The process: + +1. Drop only the index structure (documents remain in Redis) +2. For datatype changes, re-encode vectors to the target precision +3. Recreate the index with the new schema +4. Wait for Redis to re-index the existing documents +5. Validate the result + +**Tradeoff**: The index is unavailable during the rebuild. The migrator requires explicit acknowledgment of this downtime before proceeding. + +## Index only vs document dependent changes + +Schema changes fall into two categories based on whether they require modifying stored data. + +**Index only changes** affect how Redis Search indexes data, not the data itself: + +- Algorithm changes: The stored vector bytes are identical. Only the index structure differs. +- Distance metric changes: Same vectors, different similarity calculation. +- Adding or removing fields: The documents already contain the data. The index just starts or stops indexing it. + +These changes complete quickly because they only require rebuilding the index. + +**Document dependent changes** require modifying the stored data: + +- Datatype changes (float32 to float16): Stored vector bytes must be re-encoded. +- Field renames: Stored field names must be updated in every document. +- Dimension changes: Vectors must be re-embedded with a different model. + +The migrator handles datatype changes automatically. Other document dependent changes are blocked because they require application level logic or external services. + +## Vector quantization + +Changing vector precision from float32 to float16 reduces memory usage at the cost of slight precision loss. The migrator handles this automatically by: + +1. Reading all vectors from Redis +2. Converting to the target precision +3. Writing updated vectors back +4. Recreating the index with the new schema + +Typical reductions: + +| Metric | Value | +|--------|-------| +| Index size reduction | ~50% | +| Memory reduction | ~35% | + +Quantization time is proportional to document count. Plan for downtime accordingly. + +## Why some changes are blocked + +### Vector dimension changes + +Vector dimensions are determined by your embedding model. A 384 dimensional vector from one model is mathematically incompatible with a 768 dimensional index expecting vectors from a different model. There is no way to resize an embedding. + +**Resolution**: Re-embed your documents using the new model and load them into a new index. + +### Prefix changes + +Changing a prefix from `docs:` to `articles:` requires copying every document to a new key. This operation doubles storage temporarily and can leave orphaned keys if interrupted. + +**Resolution**: Create a new index with the new prefix and reload your data. + +### Field renames + +Field names are stored in the documents themselves as hash field names or JSON keys. Renaming requires iterating through every document and updating the field name. + +**Resolution**: Create a new index with the correct field name and reload your data. + +### Storage type changes + +Hash and JSON have different data layouts. Hash stores flat key value pairs. JSON stores nested structures. Converting between them requires understanding your schema and restructuring each document. + +**Resolution**: Export your data, transform it to the new format, and reload into a new index. + +### Adding a vector field + +Adding a vector field means all existing documents need vectors for that field. The migrator cannot generate these vectors because it does not know which embedding model to use or what content to embed. + +**Resolution**: Add vectors to your documents using your application, then run the migration. + +## Downtime considerations + +With `drop_recreate`, your index is unavailable between the drop and when re-indexing completes. Plan for: + +- Search unavailability during the migration window +- Partial results while indexing is in progress +- Resource usage from the re-indexing process +- Quantization time if changing vector datatypes + +The duration depends on document count, field count, and vector dimensions. For large indexes, consider running migrations during low traffic periods. + +## Learn more + +- [Migration guide](../user_guide/how_to_guides/migrate-indexes.md): Step by step instructions +- [Search and indexing](search-and-indexing.md): How Redis Search indexes work diff --git a/docs/concepts/index.md b/docs/concepts/index.md index 0e522b1a..02f4d8b0 100644 --- a/docs/concepts/index.md +++ b/docs/concepts/index.md @@ -26,6 +26,13 @@ How RedisVL components connect: schemas, indexes, queries, and extensions. Schemas, fields, documents, storage types, and query patterns. ::: +:::{grid-item-card} 🔄 Index Migrations +:link: index-migrations +:link-type: doc + +How RedisVL handles migration planning, rebuilds, and future shadow migration. +::: + :::{grid-item-card} 🏷️ Field Attributes :link: field-attributes :link-type: doc @@ -62,6 +69,7 @@ Pre-built patterns: caching, message history, and semantic routing. architecture search-and-indexing +index-migrations field-attributes queries utilities diff --git a/docs/concepts/search-and-indexing.md b/docs/concepts/search-and-indexing.md index b4fe6956..5312d7df 100644 --- a/docs/concepts/search-and-indexing.md +++ b/docs/concepts/search-and-indexing.md @@ -106,9 +106,14 @@ To change a schema, you create a new index with the updated configuration, reind Planning your schema carefully upfront reduces the need for migrations, but the capability exists when requirements evolve. ---- +RedisVL now includes a dedicated migration workflow for this lifecycle: + +- `drop_recreate` for document-preserving rebuilds, including vector quantization (`float32` → `float16`) -**Related concepts:** {doc}`field-attributes` explains how to configure field options like `sortable` and `index_missing`. {doc}`queries` covers the different query types available. +That means schema evolution is no longer only a manual operational pattern. It is also a product surface in RedisVL with a planner, CLI, and validation artifacts. + +--- -**Learn more:** {doc}`/user_guide/01_getting_started` walks through building your first index. {doc}`/user_guide/05_hash_vs_json` compares storage options in depth. {doc}`/user_guide/02_complex_filtering` covers query composition. +**Related concepts:** {doc}`field-attributes` explains how to configure field options like `sortable` and `index_missing`. {doc}`queries` covers the different query types available. {doc}`index-migrations` explains migration modes, supported changes, and architecture. +**Learn more:** {doc}`/user_guide/01_getting_started` walks through building your first index. {doc}`/user_guide/05_hash_vs_json` compares storage options in depth. {doc}`/user_guide/02_complex_filtering` covers query composition. {doc}`/user_guide/how_to_guides/migrate-indexes` shows how to use the migration CLI in practice. diff --git a/docs/user_guide/cli.ipynb b/docs/user_guide/cli.ipynb index ba9d645a..dc9377d4 100644 --- a/docs/user_guide/cli.ipynb +++ b/docs/user_guide/cli.ipynb @@ -6,7 +6,7 @@ "source": [ "# The RedisVL CLI\n", "\n", - "RedisVL is a Python library with a dedicated CLI to help load and create vector search indices within Redis.\n", + "RedisVL is a Python library with a dedicated CLI to help load, inspect, migrate, and create vector search indices within Redis.\n", "\n", "This notebook will walk through how to use the Redis Vector Library CLI (``rvl``).\n", "\n", @@ -50,7 +50,12 @@ "| `rvl index` | `delete --index` or `-i ` | remove the specified index, leaving the data still in Redis|\n", "| `rvl index` | `destroy --index` or `-i `| remove the specified index, as well as the associated data|\n", "| `rvl stats` | `--index` or `-i ` | display the index statistics, including number of docs, average bytes per record, indexing time, etc|\n", - "| `rvl stats` | `--schema` or `-s ` | display the index statistics of a schema defined in . The index must have already been created within Redis|" + "| `rvl stats` | `--schema` or `-s ` | display the index statistics of a schema defined in . The index must have already been created within Redis|\n", + "| `rvl migrate` | `helper` or `list` | show migration guidance and list indexes available for migration|\n", + "| `rvl migrate` | `wizard` | interactively build a migration plan and schema patch|\n", + "| `rvl migrate` | `plan` | generate `migration_plan.yaml` from a patch or target schema|\n", + "| `rvl migrate` | `apply --allow-downtime` | execute a reviewed `drop_recreate` migration|\n", + "| `rvl migrate` | `validate` | validate a completed migration and emit report artifacts|" ] }, { diff --git a/docs/user_guide/how_to_guides/index.md b/docs/user_guide/how_to_guides/index.md index c03d705d..f6511d54 100644 --- a/docs/user_guide/how_to_guides/index.md +++ b/docs/user_guide/how_to_guides/index.md @@ -34,6 +34,7 @@ How-to guides are **task-oriented** recipes that help you accomplish specific go :::{grid-item-card} 💾 Storage - [Choose a Storage Type](../05_hash_vs_json.ipynb) -- Hash vs JSON formats and nested data +- [Migrate an Index](migrate-indexes.md) -- use the migrator helper, wizard, plan, apply, and validate workflow ::: :::{grid-item-card} 💻 CLI Operations @@ -59,6 +60,7 @@ How-to guides are **task-oriented** recipes that help you accomplish specific go | Optimize index performance | [Optimize Indexes with SVS-VAMANA](../09_svs_vamana.ipynb) | | Decide on storage format | [Choose a Storage Type](../05_hash_vs_json.ipynb) | | Manage indices from terminal | [Manage Indices with the CLI](../cli.ipynb) | +| Plan and run a supported index migration | [Migrate an Index](migrate-indexes.md) | ```{toctree} :hidden: @@ -74,4 +76,5 @@ Optimize Indexes with SVS-VAMANA <../09_svs_vamana> Cache Embeddings <../10_embeddings_cache> Use Advanced Query Types <../11_advanced_queries> Write SQL Queries for Redis <../12_sql_to_redis_queries> +Migrate an Index ``` diff --git a/docs/user_guide/how_to_guides/migrate-indexes.md b/docs/user_guide/how_to_guides/migrate-indexes.md new file mode 100644 index 00000000..2942f09f --- /dev/null +++ b/docs/user_guide/how_to_guides/migrate-indexes.md @@ -0,0 +1,394 @@ +--- +myst: + html_meta: + "description lang=en": | + How to migrate a RedisVL index schema without losing data. +--- + +# Migrate an Index + +This guide shows how to safely change your index schema using the RedisVL migrator. + +## Quick Start + +Add a field to your index in 4 commands: + +```bash +# 1. See what indexes exist +rvl migrate list --url redis://localhost:6379 + +# 2. Use the wizard to build a migration plan +rvl migrate wizard --index myindex --url redis://localhost:6379 + +# 3. Apply the migration +rvl migrate apply --plan migration_plan.yaml --allow-downtime --url redis://localhost:6379 + +# 4. Verify the result +rvl migrate validate --plan migration_plan.yaml --url redis://localhost:6379 +``` + +## Prerequisites + +- Redis with the Search module (Redis Stack, Redis Cloud, or Redis Enterprise) +- An existing index to migrate +- `redisvl` installed (`pip install redisvl`) + +```bash +# Local development with Redis Stack +docker run -d --name redis -p 6379:6379 redis/redis-stack-server:latest +``` + +## Step 1: Discover Available Indexes + +```bash +rvl migrate helper --url redis://localhost:6379 +rvl migrate list --url redis://localhost:6379 +``` + +**Example output:** +``` +Index Migrator +============== +The migrator helps you safely change your index schema. + +Supported changes: + - Add, remove, or update text/tag/numeric/geo fields + - Change vector algorithm (FLAT, HNSW, SVS-VAMANA) + - Change distance metric (COSINE, L2, IP) + - Quantize vectors (float32 → float16) + +Commands: + rvl migrate list List all indexes + rvl migrate wizard Build a migration interactively + rvl migrate plan Generate a migration plan + rvl migrate apply Execute a migration + rvl migrate validate Verify a migration +``` + +## Step 2: Build Your Schema Change + +Choose one of these approaches: + +### Option A: Use the Wizard (Recommended) + +The wizard guides you through building a migration interactively. Run: + +```bash +rvl migrate wizard --index myindex --url redis://localhost:6379 +``` + +**Example wizard session (adding a field):** + +```text +Building a migration plan for index 'myindex' +Current schema: +- Index name: myindex +- Storage type: hash + - title (text) + - embedding (vector) + +Choose an action: +1. Add field (text, tag, numeric, geo) +2. Update field (sortable, weight, separator) +3. Remove field +4. Preview patch (show pending changes as YAML) +5. Finish +Enter a number: 1 + +Field name: category +Field type options: text, tag, numeric, geo +Field type: tag + Sortable: enables sorting and aggregation on this field +Sortable [y/n]: n + Separator: character that splits multiple values (default: comma) +Separator [leave blank to keep existing/default]: | + +Choose an action: +1. Add field (text, tag, numeric, geo) +2. Update field (sortable, weight, separator) +3. Remove field +4. Preview patch (show pending changes as YAML) +5. Finish +Enter a number: 5 + +Migration plan written to /path/to/migration_plan.yaml +Mode: drop_recreate +Supported: True +Warnings: +- Index downtime is required +``` + +**Example wizard session (quantizing vectors):** + +```text +Choose an action: +1. Add field (text, tag, numeric, geo) +2. Update field (sortable, weight, separator) +3. Remove field +4. Preview patch (show pending changes as YAML) +5. Finish +Enter a number: 2 + +Updatable fields: +1. title (text) +2. embedding (vector) +Select a field to update by number or name: 2 + +Current vector config for 'embedding': + algorithm: HNSW + datatype: float32 + distance_metric: cosine + dims: 384 (cannot be changed) + m: 16 + ef_construction: 200 + +Leave blank to keep current value. + Algorithm: vector search method (FLAT=brute force, HNSW=graph, SVS-VAMANA=compressed graph) +Algorithm [current: HNSW]: + Datatype: float16, float32, bfloat16, float64, int8, uint8 + (float16 reduces memory ~50%, int8/uint8 reduce ~75%) +Datatype [current: float32]: float16 + Distance metric: how similarity is measured (cosine, l2, ip) +Distance metric [current: cosine]: + M: number of connections per node (higher=better recall, more memory) +M [current: 16]: + EF_CONSTRUCTION: build-time search depth (higher=better recall, slower build) +EF_CONSTRUCTION [current: 200]: + +Choose an action: +... +5. Finish +Enter a number: 5 + +Migration plan written to /path/to/migration_plan.yaml +Mode: drop_recreate +Supported: True +``` + +### Option B: Write a Schema Patch (YAML) + +Create `schema_patch.yaml` manually: + +```yaml +version: 1 +changes: + add_fields: + - name: category + type: tag + path: $.category + attrs: + separator: "|" + remove_fields: + - legacy_field + update_fields: + - name: title + attrs: + sortable: true + - name: embedding + attrs: + datatype: float16 # quantize vectors + algorithm: HNSW + distance_metric: cosine +``` + +Then generate the plan: + +```bash +rvl migrate plan \ + --index myindex \ + --schema-patch schema_patch.yaml \ + --url redis://localhost:6379 \ + --plan-out migration_plan.yaml +``` + +### Option C: Provide a Target Schema + +If you have the complete target schema, use it directly: + +```bash +rvl migrate plan \ + --index myindex \ + --target-schema target_schema.yaml \ + --url redis://localhost:6379 \ + --plan-out migration_plan.yaml +``` + +## Step 3: Review the Migration Plan + +Before applying, review `migration_plan.yaml`: + +```yaml +# migration_plan.yaml (example) +version: 1 +index_name: myindex +migration_mode: drop_recreate + +source: + schema_snapshot: + index: + name: myindex + prefix: "doc:" + storage_type: json + fields: + - name: title + type: text + - name: embedding + type: vector + attrs: + dims: 384 + algorithm: HNSW + datatype: float32 + doc_count: 10000 + key_sample: + - "doc:1" + - "doc:2" + - "doc:3" + +diff_classification: + supported: true + mode: drop_recreate + warnings: + - "Index will be unavailable during migration" + blocked_reasons: [] + +changes: + add_fields: + - name: category + type: tag + +merged_target_schema: + index: + name: myindex + prefix: "doc:" + storage_type: json + fields: + - name: title + type: text + - name: category + type: tag + - name: embedding + type: vector + attrs: + dims: 384 + algorithm: HNSW + datatype: float32 +``` + +**Key fields to check:** +- `diff_classification.supported` - Must be `true` to proceed +- `diff_classification.blocked_reasons` - Must be empty +- `merged_target_schema` - The final schema after migration + +## Step 4: Apply the Migration + +The `apply` command requires `--allow-downtime` since the index will be temporarily unavailable. + +```bash +rvl migrate apply \ + --plan migration_plan.yaml \ + --allow-downtime \ + --url redis://localhost:6379 \ + --report-out migration_report.yaml \ + --benchmark-out benchmark_report.yaml +``` + +What `apply` does: + +1. checks that the live source schema still matches the saved source snapshot +2. drops only the index structure +3. preserves the existing documents +4. recreates the same index name with the merged target schema +5. waits for indexing readiness +6. validates the result +7. writes report artifacts + +## Step 5: Validate the Result + +Validation happens automatically during `apply`, but you can run it separately: + +```bash +rvl migrate validate \ + --plan migration_plan.yaml \ + --url redis://localhost:6379 \ + --report-out migration_report.yaml +``` + +**Validation checks:** +- Live schema matches `merged_target_schema` +- Document count matches the source snapshot +- Sampled keys still exist +- No increase in indexing failures + +## What's Supported + +| Change | Supported | Notes | +|--------|-----------|-------| +| Add text/tag/numeric/geo field | ✅ | | +| Remove a field | ✅ | | +| Make a field sortable | ✅ | | +| Change field options (separator, stemming) | ✅ | | +| Change vector algorithm (FLAT ↔ HNSW ↔ SVS-VAMANA) | ✅ | Index-only | +| Change distance metric (COSINE ↔ L2 ↔ IP) | ✅ | Index-only | +| Tune HNSW parameters (M, EF_CONSTRUCTION) | ✅ | Index-only | +| Quantize vectors (float32 → float16) | ✅ | Auto re-encode | + +## What's Blocked + +| Change | Why | Workaround | +|--------|-----|------------| +| Change vector dimensions | Requires re-embedding | Re-embed with new model, reload data | +| Change prefix/keyspace | Documents at wrong keys | Create new index, reload data | +| Rename a field | Stored data uses old name | Create new index, reload data | +| Change storage type (hash ↔ JSON) | Different data format | Export, transform, reload | +| Add a new vector field | Requires vectors for all docs | Add vectors first, then migrate | + +## CLI Reference + +| Command | Description | +|---------|-------------| +| `rvl migrate helper` | Show supported changes and usage tips | +| `rvl migrate list` | List all indexes | +| `rvl migrate wizard` | Build a migration interactively | +| `rvl migrate plan` | Generate a migration plan | +| `rvl migrate apply` | Execute a migration | +| `rvl migrate validate` | Verify a migration result | + +**Common flags:** +- `--url` : Redis connection URL +- `--index` : Index name to migrate +- `--plan` / `--plan-out` : Path to migration plan +- `--allow-downtime` : Acknowledge index unavailability (required for apply) +- `--report-out` : Path for validation report +- `--benchmark-out` : Path for performance metrics + +## Troubleshooting + +### Migration blocked: "unsupported change" + +The planner detected a change that requires data transformation. Check `diff_classification.blocked_reasons` in the plan for details. + +### Apply failed: "source schema mismatch" + +The live index schema changed since the plan was generated. Re-run `rvl migrate plan` to create a fresh plan. + +### Apply failed: "timeout waiting for index ready" + +The index is taking longer to rebuild than expected. This can happen with large datasets. Check Redis logs and consider increasing the timeout or running during lower traffic periods. + +### Validation failed: "document count mismatch" + +Documents were added or removed between plan and apply. This is expected if your application is actively writing. Re-run validation with `--skip-count-check` if acceptable. + +### How to recover from a failed migration + +If `apply` fails mid-migration: + +1. **Check if the index exists:** `rvl index info --index myindex` +2. **If the index exists but is wrong:** Re-run `apply` with the same plan +3. **If the index was dropped:** Recreate it from the plan's `merged_target_schema` + +The underlying documents are never deleted by `drop_recreate`. + +## Learn more + +- {doc}`/concepts/index-migrations`: How migrations work and which changes are supported diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index 5d2cf6df..d85177e7 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -39,7 +39,7 @@ Schema → Index → Load → Query **Solve specific problems.** Task-oriented recipes for LLM extensions, querying, embeddings, optimization, and storage. +++ -LLM Caching • Filtering • Vectorizers • Reranking +LLM Caching • Filtering • Vectorizers • Reranking • Migrations ::: :::{grid-item-card} 💻 CLI Reference @@ -49,7 +49,7 @@ LLM Caching • Filtering • Vectorizers • Reranking **Command-line tools.** Manage indices, inspect stats, and work with schemas using the `rvl` CLI. +++ -rvl index • rvl stats • Schema YAML +rvl index • rvl stats • rvl migrate • Schema YAML ::: :::{grid-item-card} 💡 Use Cases From 1657d2f771a4048cb87461d0b5449982a9dac10b Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Thu, 19 Mar 2026 18:23:12 -0400 Subject: [PATCH 05/10] test(migrate): add migration planner, wizard, and integration tests - Unit tests for MigrationPlanner diff classification - Unit tests for MigrationWizard (41 tests incl. adversarial inputs) - Integration test for drop_recreate flow - Field modifier ordering integration tests (INDEXEMPTY, INDEXMISSING, etc.) --- ...est_field_modifier_ordering_integration.py | 227 ++++ tests/integration/test_migration_v1.py | 127 ++ tests/unit/test_migration_planner.py | 889 +++++++++++++ tests/unit/test_migration_wizard.py | 1142 +++++++++++++++++ 4 files changed, 2385 insertions(+) create mode 100644 tests/integration/test_migration_v1.py create mode 100644 tests/unit/test_migration_planner.py create mode 100644 tests/unit/test_migration_wizard.py diff --git a/tests/integration/test_field_modifier_ordering_integration.py b/tests/integration/test_field_modifier_ordering_integration.py index b26463df..1eae743a 100644 --- a/tests/integration/test_field_modifier_ordering_integration.py +++ b/tests/integration/test_field_modifier_ordering_integration.py @@ -399,6 +399,233 @@ def test_indexmissing_enables_ismissing_query(self, client, redis_url, worker_id index.delete(drop=True) +class TestIndexEmptyIntegration: + """Integration tests for INDEXEMPTY functionality.""" + + def test_text_field_index_empty_creates_successfully( + self, client, redis_url, worker_id + ): + """Test that INDEXEMPTY on text field allows index creation.""" + skip_if_search_version_below_for_indexmissing(client) + schema_dict = { + "index": { + "name": f"test_text_empty_{worker_id}", + "prefix": f"textempty_{worker_id}:", + "storage_type": "hash", + }, + "fields": [ + { + "name": "description", + "type": "text", + "attrs": {"index_empty": True}, + } + ], + } + + schema = IndexSchema.from_dict(schema_dict) + index = SearchIndex(schema=schema, redis_url=redis_url) + index.create(overwrite=True) + + # Verify index was created + info = client.execute_command("FT.INFO", f"test_text_empty_{worker_id}") + assert info is not None + + # Create documents with empty and non-empty values + client.hset(f"textempty_{worker_id}:1", "description", "has content") + client.hset(f"textempty_{worker_id}:2", "description", "") + client.hset(f"textempty_{worker_id}:3", "description", "more content") + + # Search should work, empty string doc should be indexed + result = client.execute_command( + "FT.SEARCH", + f"test_text_empty_{worker_id}", + "*", + ) + # All 3 docs should be found + assert result[0] == 3 + + # Cleanup + client.delete( + f"textempty_{worker_id}:1", + f"textempty_{worker_id}:2", + f"textempty_{worker_id}:3", + ) + index.delete(drop=True) + + def test_tag_field_index_empty_creates_successfully( + self, client, redis_url, worker_id + ): + """Test that INDEXEMPTY on tag field allows index creation.""" + skip_if_search_version_below_for_indexmissing(client) + schema_dict = { + "index": { + "name": f"test_tag_empty_{worker_id}", + "prefix": f"tagempty_{worker_id}:", + "storage_type": "hash", + }, + "fields": [ + { + "name": "category", + "type": "tag", + "attrs": {"index_empty": True}, + } + ], + } + + schema = IndexSchema.from_dict(schema_dict) + index = SearchIndex(schema=schema, redis_url=redis_url) + index.create(overwrite=True) + + # Verify index was created + info = client.execute_command("FT.INFO", f"test_tag_empty_{worker_id}") + assert info is not None + + # Create documents with empty and non-empty values + client.hset(f"tagempty_{worker_id}:1", "category", "electronics") + client.hset(f"tagempty_{worker_id}:2", "category", "") + client.hset(f"tagempty_{worker_id}:3", "category", "books") + + # Search should work + result = client.execute_command( + "FT.SEARCH", + f"test_tag_empty_{worker_id}", + "*", + ) + # All 3 docs should be found + assert result[0] == 3 + + # Cleanup + client.delete( + f"tagempty_{worker_id}:1", + f"tagempty_{worker_id}:2", + f"tagempty_{worker_id}:3", + ) + index.delete(drop=True) + + +class TestUnfModifierIntegration: + """Integration tests for UNF (un-normalized form) modifier.""" + + def test_text_field_unf_requires_sortable(self, client, redis_url, worker_id): + """Test that UNF on text field works only when sortable is also True.""" + skip_if_search_version_below_for_indexmissing(client) + schema_dict = { + "index": { + "name": f"test_text_unf_{worker_id}", + "prefix": f"textunf_{worker_id}:", + "storage_type": "hash", + }, + "fields": [ + { + "name": "title", + "type": "text", + "attrs": {"sortable": True, "unf": True}, + } + ], + } + + schema = IndexSchema.from_dict(schema_dict) + index = SearchIndex(schema=schema, redis_url=redis_url) + + # Should create successfully + index.create(overwrite=True) + + info = client.execute_command("FT.INFO", f"test_text_unf_{worker_id}") + assert info is not None + + index.delete(drop=True) + + def test_numeric_field_unf_with_sortable(self, client, redis_url, worker_id): + """Test that UNF on numeric field works when sortable is True.""" + skip_if_search_version_below_for_indexmissing(client) + schema_dict = { + "index": { + "name": f"test_num_unf_{worker_id}", + "prefix": f"numunf_{worker_id}:", + "storage_type": "hash", + }, + "fields": [ + { + "name": "price", + "type": "numeric", + "attrs": {"sortable": True, "unf": True}, + } + ], + } + + schema = IndexSchema.from_dict(schema_dict) + index = SearchIndex(schema=schema, redis_url=redis_url) + + # Should create successfully + index.create(overwrite=True) + + info = client.execute_command("FT.INFO", f"test_num_unf_{worker_id}") + assert info is not None + + index.delete(drop=True) + + +class TestNoIndexModifierIntegration: + """Integration tests for NOINDEX modifier.""" + + def test_noindex_with_sortable_allows_sorting_not_searching( + self, client, redis_url, worker_id + ): + """Test that NOINDEX field can be sorted but not searched.""" + schema_dict = { + "index": { + "name": f"test_noindex_{worker_id}", + "prefix": f"noindex_{worker_id}:", + "storage_type": "hash", + }, + "fields": [ + { + "name": "searchable", + "type": "text", + }, + { + "name": "sort_only", + "type": "numeric", + "attrs": {"sortable": True, "no_index": True}, + }, + ], + } + + schema = IndexSchema.from_dict(schema_dict) + index = SearchIndex(schema=schema, redis_url=redis_url) + index.create(overwrite=True) + + # Add test documents + client.hset( + f"noindex_{worker_id}:1", mapping={"searchable": "hello", "sort_only": 10} + ) + client.hset( + f"noindex_{worker_id}:2", mapping={"searchable": "world", "sort_only": 5} + ) + client.hset( + f"noindex_{worker_id}:3", mapping={"searchable": "test", "sort_only": 15} + ) + + # Sorting by no_index field should work + result = client.execute_command( + "FT.SEARCH", + f"test_noindex_{worker_id}", + "*", + "SORTBY", + "sort_only", + "ASC", + ) + assert result[0] == 3 + + # Cleanup + client.delete( + f"noindex_{worker_id}:1", + f"noindex_{worker_id}:2", + f"noindex_{worker_id}:3", + ) + index.delete(drop=True) + + class TestFieldTypeModifierSupport: """Test that field types only support their documented modifiers.""" diff --git a/tests/integration/test_migration_v1.py b/tests/integration/test_migration_v1.py new file mode 100644 index 00000000..56a32d28 --- /dev/null +++ b/tests/integration/test_migration_v1.py @@ -0,0 +1,127 @@ +import uuid + +import yaml + +from redisvl.index import SearchIndex +from redisvl.migration import MigrationExecutor, MigrationPlanner, MigrationValidator +from redisvl.migration.utils import load_migration_plan, schemas_equal +from redisvl.redis.utils import array_to_buffer + + +def test_drop_recreate_plan_apply_validate_flow(redis_url, worker_id, tmp_path): + unique_id = str(uuid.uuid4())[:8] + index_name = f"migration_v1_{worker_id}_{unique_id}" + prefix = f"migration_v1:{worker_id}:{unique_id}" + + source_index = SearchIndex.from_dict( + { + "index": { + "name": index_name, + "prefix": prefix, + "storage_type": "hash", + }, + "fields": [ + {"name": "doc_id", "type": "tag"}, + {"name": "title", "type": "text"}, + {"name": "price", "type": "numeric"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "hnsw", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + redis_url=redis_url, + ) + + docs = [ + { + "doc_id": "1", + "title": "alpha", + "price": 1, + "category": "news", + "embedding": array_to_buffer([0.1, 0.2, 0.3], "float32"), + }, + { + "doc_id": "2", + "title": "beta", + "price": 2, + "category": "sports", + "embedding": array_to_buffer([0.2, 0.1, 0.4], "float32"), + }, + ] + + source_index.create(overwrite=True) + source_index.load(docs, id_field="doc_id") + + patch_path = tmp_path / "schema_patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "add_fields": [ + { + "name": "category", + "type": "tag", + "attrs": {"separator": ","}, + } + ], + "remove_fields": ["price"], + "update_fields": [{"name": "title", "attrs": {"sortable": True}}], + }, + }, + sort_keys=False, + ) + ) + + plan_path = tmp_path / "migration_plan.yaml" + planner = MigrationPlanner() + plan = planner.create_plan( + index_name, + redis_url=redis_url, + schema_patch_path=str(patch_path), + ) + assert plan.diff_classification.supported is True + planner.write_plan(plan, str(plan_path)) + + query_check_path = tmp_path / "query_checks.yaml" + query_check_path.write_text( + yaml.safe_dump({"fetch_ids": ["1", "2"]}, sort_keys=False) + ) + + executor = MigrationExecutor() + report = executor.apply( + load_migration_plan(str(plan_path)), + redis_url=redis_url, + query_check_file=str(query_check_path), + ) + + assert report.result == "succeeded" + assert report.validation.schema_match is True + assert report.validation.doc_count_match is True + assert report.validation.key_sample_exists is True + assert report.validation.indexing_failures_delta == 0 + assert not report.validation.errors + assert report.benchmark_summary.documents_indexed_per_second is not None + + live_index = SearchIndex.from_existing(index_name, redis_url=redis_url) + assert schemas_equal(live_index.schema.to_dict(), plan.merged_target_schema) + + validator = MigrationValidator() + validation, _target_info, _duration = validator.validate( + load_migration_plan(str(plan_path)), + redis_url=redis_url, + query_check_file=str(query_check_path), + ) + assert validation.schema_match is True + assert validation.doc_count_match is True + assert validation.key_sample_exists is True + assert not validation.errors + + live_index.delete(drop=True) diff --git a/tests/unit/test_migration_planner.py b/tests/unit/test_migration_planner.py new file mode 100644 index 00000000..5464b5cc --- /dev/null +++ b/tests/unit/test_migration_planner.py @@ -0,0 +1,889 @@ +from fnmatch import fnmatch + +import yaml + +from redisvl.migration import MigrationPlanner +from redisvl.schema.schema import IndexSchema + + +class DummyClient: + def __init__(self, keys): + self.keys = keys + + def scan(self, cursor=0, match=None, count=None): + matched = [] + for key in self.keys: + decoded_key = key.decode() if isinstance(key, bytes) else str(key) + if match is None or fnmatch(decoded_key, match): + matched.append(key) + return 0, matched + + +class DummyIndex: + def __init__(self, schema, stats, keys): + self.schema = schema + self._stats = stats + self._client = DummyClient(keys) + + @property + def client(self): + return self._client + + def info(self): + return self._stats + + +def _make_source_schema(): + return IndexSchema.from_dict( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "title", + "type": "text", + "path": "$.title", + "attrs": {"sortable": False}, + }, + { + "name": "price", + "type": "numeric", + "path": "$.price", + "attrs": {"sortable": True}, + }, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + } + ) + + +def test_create_plan_from_schema_patch_preserves_unspecified_config( + monkeypatch, tmp_path +): + source_schema = _make_source_schema() + dummy_index = DummyIndex( + source_schema, + {"num_docs": 2, "indexing": False}, + [b"docs:1", b"docs:2", b"docs:3"], + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + patch_path = tmp_path / "schema_patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "add_fields": [ + { + "name": "category", + "type": "tag", + "path": "$.category", + "attrs": {"separator": ","}, + } + ], + "remove_fields": ["price"], + "update_fields": [ + { + "name": "title", + "options": {"sortable": True}, + } + ], + }, + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner(key_sample_limit=2) + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + schema_patch_path=str(patch_path), + ) + + assert plan.diff_classification.supported is True + assert plan.source.index_name == "docs" + assert plan.source.keyspace.storage_type == "json" + assert plan.source.keyspace.prefixes == ["docs"] + assert plan.source.keyspace.key_separator == ":" + assert plan.source.keyspace.key_sample == ["docs:1", "docs:2"] + assert plan.warnings == ["Index downtime is required"] + + merged_fields = { + field["name"]: field for field in plan.merged_target_schema["fields"] + } + assert plan.merged_target_schema["index"]["prefix"] == "docs" + assert merged_fields["title"]["attrs"]["sortable"] is True + assert "price" not in merged_fields + assert merged_fields["category"]["type"] == "tag" + + plan_path = tmp_path / "migration_plan.yaml" + planner.write_plan(plan, str(plan_path)) + written_plan = yaml.safe_load(plan_path.read_text()) + assert written_plan["mode"] == "drop_recreate" + assert written_plan["validation"]["require_doc_count_match"] is True + assert written_plan["diff_classification"]["supported"] is True + + +def test_target_schema_vector_datatype_change_is_allowed(monkeypatch, tmp_path): + """Changing vector datatype (quantization) is allowed - executor will re-encode.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "title", + "type": "text", + "path": "$.title", + "attrs": {"sortable": False}, + }, + { + "name": "price", + "type": "numeric", + "path": "$.price", + "attrs": {"sortable": True}, + }, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "flat", # Same algorithm + "dims": 3, + "distance_metric": "cosine", + "datatype": "float16", # Changed from float32 + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + # Datatype change (quantization) should now be ALLOWED + assert plan.diff_classification.supported is True + assert len(plan.diff_classification.blocked_reasons) == 0 + + # Verify datatype changes are detected for the executor + datatype_changes = MigrationPlanner.get_vector_datatype_changes( + plan.source.schema_snapshot, plan.merged_target_schema + ) + assert "embedding" in datatype_changes + assert datatype_changes["embedding"]["source"] == "float32" + assert datatype_changes["embedding"]["target"] == "float16" + + +def test_target_schema_vector_algorithm_change_is_allowed(monkeypatch, tmp_path): + """Changing vector algorithm is allowed (index-only change).""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "title", + "type": "text", + "path": "$.title", + "attrs": {"sortable": False}, + }, + { + "name": "price", + "type": "numeric", + "path": "$.price", + "attrs": {"sortable": True}, + }, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "hnsw", # Changed from flat + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", # Same datatype + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + # Algorithm change should be ALLOWED + assert plan.diff_classification.supported is True + assert len(plan.diff_classification.blocked_reasons) == 0 + + +# ============================================================================= +# BLOCKED CHANGES (Document-Dependent) - require iterative_shadow +# ============================================================================= + + +def test_target_schema_prefix_change_is_blocked(monkeypatch, tmp_path): + """Prefix change is blocked: documents are at wrong keys.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs_v2", + "key_separator": ":", + "storage_type": "json", + }, + "fields": source_schema.to_dict()["fields"], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is False + assert any( + "prefix" in reason.lower() and "iterative_shadow" in reason + for reason in plan.diff_classification.blocked_reasons + ) + + +def test_key_separator_change_is_blocked(monkeypatch, tmp_path): + """Key separator change is blocked: document keys don't match new pattern.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": "/", # Changed from ":" + "storage_type": "json", + }, + "fields": source_schema.to_dict()["fields"], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is False + assert any( + "key_separator" in reason.lower() or "separator" in reason.lower() + for reason in plan.diff_classification.blocked_reasons + ) + + +def test_storage_type_change_is_blocked(monkeypatch, tmp_path): + """Storage type change is blocked: documents are in wrong format.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "hash", # Changed from "json" + }, + "fields": [ + {"name": "title", "type": "text", "attrs": {"sortable": False}}, + {"name": "price", "type": "numeric", "attrs": {"sortable": True}}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is False + assert any( + "storage" in reason.lower() + for reason in plan.diff_classification.blocked_reasons + ) + + +def test_vector_dimension_change_is_blocked(monkeypatch, tmp_path): + """Vector dimension change is blocked: stored vectors have wrong size.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "title", + "type": "text", + "path": "$.title", + "attrs": {"sortable": False}, + }, + { + "name": "price", + "type": "numeric", + "path": "$.price", + "attrs": {"sortable": True}, + }, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "flat", + "dims": 768, # Changed from 3 + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is False + assert any( + "dims" in reason and "iterative_shadow" in reason + for reason in plan.diff_classification.blocked_reasons + ) + + +def test_field_path_change_is_blocked(monkeypatch, tmp_path): + """JSON path change is blocked: stored data is at wrong path.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "title", + "type": "text", + "path": "$.metadata.title", # Changed from $.title + "attrs": {"sortable": False}, + }, + { + "name": "price", + "type": "numeric", + "path": "$.price", + "attrs": {"sortable": True}, + }, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is False + assert any( + "path" in reason.lower() for reason in plan.diff_classification.blocked_reasons + ) + + +def test_field_type_change_is_blocked(monkeypatch, tmp_path): + """Field type change is blocked: index expects different data format.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "title", + "type": "tag", # Changed from text + "path": "$.title", + }, + { + "name": "price", + "type": "numeric", + "path": "$.price", + "attrs": {"sortable": True}, + }, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is False + assert any( + "type" in reason.lower() for reason in plan.diff_classification.blocked_reasons + ) + + +def test_field_rename_is_detected_and_blocked(monkeypatch, tmp_path): + """Field rename is blocked: stored data uses old field name.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "document_title", # Renamed from "title" + "type": "text", + "path": "$.title", + "attrs": {"sortable": False}, + }, + { + "name": "price", + "type": "numeric", + "path": "$.price", + "attrs": {"sortable": True}, + }, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is False + assert any( + "rename" in reason.lower() + for reason in plan.diff_classification.blocked_reasons + ) + + +# ============================================================================= +# ALLOWED CHANGES (Index-Only) +# ============================================================================= + + +def test_add_non_vector_field_is_allowed(monkeypatch, tmp_path): + """Adding a non-vector field is allowed.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + patch_path = tmp_path / "schema_patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "add_fields": [ + {"name": "category", "type": "tag", "path": "$.category"} + ] + }, + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + schema_patch_path=str(patch_path), + ) + + assert plan.diff_classification.supported is True + + +def test_remove_field_is_allowed(monkeypatch, tmp_path): + """Removing a field from the index is allowed.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + patch_path = tmp_path / "schema_patch.yaml" + patch_path.write_text( + yaml.safe_dump( + {"version": 1, "changes": {"remove_fields": ["price"]}}, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + schema_patch_path=str(patch_path), + ) + + assert plan.diff_classification.supported is True + + +def test_change_field_sortable_is_allowed(monkeypatch, tmp_path): + """Changing field sortable option is allowed.""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + patch_path = tmp_path / "schema_patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [{"name": "title", "options": {"sortable": True}}] + }, + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + schema_patch_path=str(patch_path), + ) + + assert plan.diff_classification.supported is True + + +def test_change_vector_distance_metric_is_allowed(monkeypatch, tmp_path): + """Changing vector distance metric is allowed (index-only).""" + source_schema = _make_source_schema() + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "title", + "type": "text", + "path": "$.title", + "attrs": {"sortable": False}, + }, + { + "name": "price", + "type": "numeric", + "path": "$.price", + "attrs": {"sortable": True}, + }, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "L2", # Changed from cosine + "datatype": "float32", + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is True + assert len(plan.diff_classification.blocked_reasons) == 0 + + +def test_change_hnsw_tuning_params_is_allowed(monkeypatch, tmp_path): + """Changing HNSW tuning parameters is allowed (index-only).""" + source_schema = IndexSchema.from_dict( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "hnsw", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + "m": 16, + "ef_construction": 200, + }, + }, + ], + } + ) + dummy_index = DummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", + lambda *args, **kwargs: dummy_index, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "hnsw", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + "m": 32, # Changed from 16 + "ef_construction": 400, # Changed from 200 + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = MigrationPlanner() + plan = planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is True + assert len(plan.diff_classification.blocked_reasons) == 0 diff --git a/tests/unit/test_migration_wizard.py b/tests/unit/test_migration_wizard.py new file mode 100644 index 00000000..56c3a6d3 --- /dev/null +++ b/tests/unit/test_migration_wizard.py @@ -0,0 +1,1142 @@ +import pytest + +from redisvl.migration.wizard import MigrationWizard + + +def _make_vector_source_schema(algorithm="hnsw", datatype="float32"): + """Helper to create a source schema with a vector field.""" + return { + "index": { + "name": "test_index", + "prefix": "test:", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": algorithm, + "dims": 384, + "distance_metric": "cosine", + "datatype": datatype, + "m": 16, + "ef_construction": 200, + }, + }, + ], + } + + +def test_wizard_builds_patch_from_interactive_inputs(monkeypatch): + source_schema = { + "index": { + "name": "docs", + "prefix": "docs", + "storage_type": "json", + }, + "fields": [ + {"name": "title", "type": "text", "path": "$.title"}, + {"name": "category", "type": "tag", "path": "$.category"}, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + } + + answers = iter( + [ + # Add field + "1", + "status", # field name + "tag", # field type + "$.status", # JSON path + "y", # sortable + "n", # index_missing + "|", # separator + "n", # case_sensitive + "n", # index_empty + "n", # no_index (prompted since sortable=y) + # Update field + "2", + "title", # select field + "y", # sortable + "n", # index_missing + "n", # no_stem + "", # weight (blank to skip) + "n", # index_empty + "n", # unf (prompted since sortable=y) + "n", # no_index (prompted since sortable=y) + # Remove field + "3", + "category", + # Finish + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) # noqa: SLF001 + + assert patch.changes.add_fields == [ + { + "name": "status", + "type": "tag", + "path": "$.status", + "attrs": { + "sortable": True, + "index_missing": False, + "separator": "|", + "case_sensitive": False, + "index_empty": False, + "no_index": False, + }, + } + ] + assert patch.changes.remove_fields == ["category"] + assert len(patch.changes.update_fields) == 1 + assert patch.changes.update_fields[0].name == "title" + assert patch.changes.update_fields[0].attrs["sortable"] is True + assert patch.changes.update_fields[0].attrs["no_stem"] is False + + +# ============================================================================= +# Vector Algorithm Tests +# ============================================================================= + + +class TestVectorAlgorithmChanges: + """Test wizard handling of vector algorithm changes.""" + + def test_hnsw_to_flat(self, monkeypatch): + """Test changing from HNSW to FLAT algorithm.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", # Update field + "embedding", # Select vector field + "FLAT", # Change to FLAT + "", # datatype (keep current) + "", # distance_metric (keep current) + # No HNSW params prompted for FLAT + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + assert len(patch.changes.update_fields) == 1 + update = patch.changes.update_fields[0] + assert update.name == "embedding" + assert update.attrs["algorithm"] == "FLAT" + + def test_flat_to_hnsw_with_params(self, monkeypatch): + """Test changing from FLAT to HNSW with custom M and EF_CONSTRUCTION.""" + source_schema = _make_vector_source_schema(algorithm="flat") + + answers = iter( + [ + "2", # Update field + "embedding", # Select vector field + "HNSW", # Change to HNSW + "", # datatype (keep current) + "", # distance_metric (keep current) + "32", # M + "400", # EF_CONSTRUCTION + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["algorithm"] == "HNSW" + assert update.attrs["m"] == 32 + assert update.attrs["ef_construction"] == 400 + + def test_hnsw_to_svs_vamana_with_underscore(self, monkeypatch): + """Test changing to SVS_VAMANA (underscore format) is normalized.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", # Update field + "embedding", # Select vector field + "SVS_VAMANA", # Underscore format (should be normalized) + "float16", # SVS only supports float16/float32 + "", # distance_metric (keep current) + "64", # GRAPH_MAX_DEGREE + "LVQ8", # COMPRESSION + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["algorithm"] == "SVS-VAMANA" # Normalized to hyphen + assert update.attrs["datatype"] == "float16" + assert update.attrs["graph_max_degree"] == 64 + assert update.attrs["compression"] == "LVQ8" + + def test_hnsw_to_svs_vamana_with_hyphen(self, monkeypatch): + """Test changing to SVS-VAMANA (hyphen format) works directly.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", # Update field + "embedding", # Select vector field + "SVS-VAMANA", # Hyphen format + "", # datatype (keep current) + "", # distance_metric (keep current) + "", # GRAPH_MAX_DEGREE (keep default) + "", # COMPRESSION (none) + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["algorithm"] == "SVS-VAMANA" + + def test_svs_vamana_with_leanvec_compression(self, monkeypatch): + """Test SVS-VAMANA with LeanVec compression type.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", # Update field + "embedding", # Select vector field + "SVS-VAMANA", + "float16", + "", # distance_metric + "48", # GRAPH_MAX_DEGREE + "LEANVEC8X8", # COMPRESSION + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["algorithm"] == "SVS-VAMANA" + assert update.attrs["compression"] == "LEANVEC8X8" + + +# ============================================================================= +# Vector Datatype (Quantization) Tests +# ============================================================================= + + +class TestVectorDatatypeChanges: + """Test wizard handling of vector datatype/quantization changes.""" + + def test_float32_to_float16(self, monkeypatch): + """Test quantization from float32 to float16.""" + source_schema = _make_vector_source_schema(datatype="float32") + + answers = iter( + [ + "2", # Update field + "embedding", + "", # algorithm (keep current) + "float16", # datatype + "", # distance_metric + "", # M (keep current) + "", # EF_CONSTRUCTION (keep current) + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["datatype"] == "float16" + + def test_float16_to_float32(self, monkeypatch): + """Test changing from float16 back to float32.""" + source_schema = _make_vector_source_schema(datatype="float16") + + answers = iter( + [ + "2", # Update field + "embedding", + "", # algorithm + "float32", # datatype + "", # distance_metric + "", # M + "", # EF_CONSTRUCTION + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["datatype"] == "float32" + + def test_int8_accepted_for_hnsw(self, monkeypatch): + """Test that int8 is accepted for HNSW/FLAT (but not SVS-VAMANA).""" + source_schema = _make_vector_source_schema(datatype="float32") + + answers = iter( + [ + "2", # Update field + "embedding", + "", # algorithm (keep HNSW) + "int8", # Valid for HNSW/FLAT + "", # distance_metric + "", # M + "", # EF_CONSTRUCTION + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + # int8 is now valid for HNSW/FLAT + update = patch.changes.update_fields[0] + assert update.attrs["datatype"] == "int8" + + +# ============================================================================= +# Distance Metric Tests +# ============================================================================= + + +class TestDistanceMetricChanges: + """Test wizard handling of distance metric changes.""" + + def test_cosine_to_l2(self, monkeypatch): + """Test changing distance metric from cosine to L2.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", # Update field + "embedding", + "", # algorithm + "", # datatype + "l2", # distance_metric + "", # M + "", # EF_CONSTRUCTION + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["distance_metric"] == "l2" + + def test_cosine_to_ip(self, monkeypatch): + """Test changing distance metric from cosine to inner product.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", # Update field + "embedding", + "", # algorithm + "", # datatype + "ip", # distance_metric (inner product) + "", # M + "", # EF_CONSTRUCTION + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["distance_metric"] == "ip" + + +# ============================================================================= +# Combined Changes Tests +# ============================================================================= + + +class TestCombinedVectorChanges: + """Test wizard handling of multiple vector attribute changes.""" + + def test_algorithm_datatype_and_metric_change(self, monkeypatch): + """Test changing algorithm, datatype, and distance metric together.""" + source_schema = _make_vector_source_schema(algorithm="flat", datatype="float32") + + answers = iter( + [ + "2", # Update field + "embedding", + "HNSW", # algorithm + "float16", # datatype + "l2", # distance_metric + "24", # M + "300", # EF_CONSTRUCTION + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["algorithm"] == "HNSW" + assert update.attrs["datatype"] == "float16" + assert update.attrs["distance_metric"] == "l2" + assert update.attrs["m"] == 24 + assert update.attrs["ef_construction"] == 300 + + def test_svs_vamana_full_config(self, monkeypatch): + """Test SVS-VAMANA with all parameters configured.""" + source_schema = _make_vector_source_schema(algorithm="hnsw", datatype="float32") + + answers = iter( + [ + "2", # Update field + "embedding", + "SVS-VAMANA", # algorithm + "float16", # datatype (required for SVS) + "ip", # distance_metric + "50", # GRAPH_MAX_DEGREE + "LVQ4X8", # COMPRESSION + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["algorithm"] == "SVS-VAMANA" + assert update.attrs["datatype"] == "float16" + assert update.attrs["distance_metric"] == "ip" + assert update.attrs["graph_max_degree"] == 50 + assert update.attrs["compression"] == "LVQ4X8" + + def test_no_changes_when_all_blank(self, monkeypatch): + """Test that blank inputs result in no changes.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", # Update field + "embedding", + "", # algorithm (keep current) + "", # datatype (keep current) + "", # distance_metric (keep current) + "", # M (keep current) + "", # EF_CONSTRUCTION (keep current) + "5", # Finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + # No changes collected means no update_fields + assert len(patch.changes.update_fields) == 0 + + +# ============================================================================= +# Adversarial / Edge Case Tests +# ============================================================================= + + +class TestWizardAdversarialInputs: + """Test wizard robustness against malformed, malicious, or edge case inputs.""" + + # ------------------------------------------------------------------------- + # Invalid Algorithm Inputs + # ------------------------------------------------------------------------- + + def test_typo_in_algorithm_ignored(self, monkeypatch): + """Test that typos in algorithm name are ignored.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "HNSW_TYPO", # Invalid algorithm + "", + "", + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + # Invalid algorithm should be ignored, no changes + assert len(patch.changes.update_fields) == 0 + + def test_partial_algorithm_name_ignored(self, monkeypatch): + """Test that partial algorithm names are ignored.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "HNS", # Partial name + "", + "", + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + assert len(patch.changes.update_fields) == 0 + + def test_algorithm_with_special_chars_ignored(self, monkeypatch): + """Test that algorithm with special characters is ignored.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "HNSW; DROP TABLE users;--", # SQL injection attempt + "", + "", + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + assert len(patch.changes.update_fields) == 0 + + def test_algorithm_lowercase_works(self, monkeypatch): + """Test that lowercase algorithm names work (case insensitive).""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "flat", # lowercase + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["algorithm"] == "FLAT" + + def test_algorithm_mixed_case_works(self, monkeypatch): + """Test that mixed case algorithm names work.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "SvS_VaMaNa", # Mixed case with underscore + "", + "", + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["algorithm"] == "SVS-VAMANA" + + # ------------------------------------------------------------------------- + # Invalid Numeric Inputs + # ------------------------------------------------------------------------- + + def test_negative_m_ignored(self, monkeypatch): + """Test that negative M value is ignored.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "HNSW", + "", + "", + "-16", # Negative M + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert "m" not in update.attrs # Negative should be ignored + + def test_float_m_ignored(self, monkeypatch): + """Test that float M value is ignored.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "HNSW", + "", + "", + "16.5", # Float M + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert "m" not in update.attrs + + def test_string_m_ignored(self, monkeypatch): + """Test that string M value is ignored.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "HNSW", + "", + "", + "sixteen", # String M + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert "m" not in update.attrs + + def test_zero_m_accepted(self, monkeypatch): + """Test that zero M is accepted (validation happens at schema level).""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "HNSW", + "", + "", + "0", # Zero M + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + # Zero is a valid digit, wizard accepts it (validation at apply time) + # isdigit() returns False for "0" in some edge cases, let's check + update = patch.changes.update_fields[0] + # "0".isdigit() returns True, so it should be accepted + assert update.attrs.get("m") == 0 + + def test_very_large_ef_construction_accepted(self, monkeypatch): + """Test that very large EF_CONSTRUCTION is accepted by wizard.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "HNSW", + "", + "", + "", + "999999999", # Very large + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["ef_construction"] == 999999999 + + # ------------------------------------------------------------------------- + # Invalid Datatype Inputs + # ------------------------------------------------------------------------- + + def test_bfloat16_accepted_for_hnsw(self, monkeypatch): + """Test that bfloat16 is accepted for HNSW/FLAT.""" + source_schema = _make_vector_source_schema(datatype="float32") + + answers = iter( + [ + "2", + "embedding", + "", + "bfloat16", # Valid for HNSW/FLAT + "", + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["datatype"] == "bfloat16" + + def test_uint8_accepted_for_hnsw(self, monkeypatch): + """Test that uint8 is accepted for HNSW/FLAT.""" + source_schema = _make_vector_source_schema(datatype="float32") + + answers = iter( + [ + "2", + "embedding", + "", + "uint8", # Valid for HNSW/FLAT + "", + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["datatype"] == "uint8" + + def test_int8_rejected_for_svs_vamana(self, monkeypatch): + """Test that int8 is rejected for SVS-VAMANA (only float16/float32 allowed).""" + source_schema = _make_vector_source_schema(datatype="float32", algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "SVS-VAMANA", # Switch to SVS-VAMANA + "int8", # Invalid for SVS-VAMANA + "", + "", + "", # graph_max_degree + "", # compression + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + # Should have algorithm change but NOT datatype + update = patch.changes.update_fields[0] + assert update.attrs["algorithm"] == "SVS-VAMANA" + assert "datatype" not in update.attrs # int8 rejected + + # ------------------------------------------------------------------------- + # Invalid Distance Metric Inputs + # ------------------------------------------------------------------------- + + def test_invalid_distance_metric_ignored(self, monkeypatch): + """Test that invalid distance metric is ignored.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", + "embedding", + "", + "", + "euclidean", # Invalid (should be 'l2') + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + assert len(patch.changes.update_fields) == 0 + + def test_distance_metric_uppercase_works(self, monkeypatch): + """Test that uppercase distance metric works.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", + "embedding", + "", + "", + "L2", # Uppercase + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["distance_metric"] == "l2" + + # ------------------------------------------------------------------------- + # Invalid Compression Inputs + # ------------------------------------------------------------------------- + + def test_invalid_compression_ignored(self, monkeypatch): + """Test that invalid compression type is ignored.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "SVS-VAMANA", + "", + "", + "", + "INVALID_COMPRESSION", # Invalid + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert "compression" not in update.attrs + + def test_compression_lowercase_works(self, monkeypatch): + """Test that lowercase compression works.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "SVS-VAMANA", + "", + "", + "", + "lvq8", # lowercase + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["compression"] == "LVQ8" + + # ------------------------------------------------------------------------- + # Whitespace and Special Character Inputs + # ------------------------------------------------------------------------- + + def test_whitespace_only_treated_as_blank(self, monkeypatch): + """Test that whitespace-only input is treated as blank.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", + "embedding", + " ", # Whitespace only + " ", + " ", + " ", + " ", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + assert len(patch.changes.update_fields) == 0 + + def test_algorithm_with_leading_trailing_whitespace(self, monkeypatch): + """Test that algorithm with whitespace is trimmed and works.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + " FLAT ", # Whitespace around + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert update.attrs["algorithm"] == "FLAT" + + def test_unicode_input_ignored(self, monkeypatch): + """Test that unicode/emoji inputs are ignored.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", + "embedding", + "HNSW🚀", # Unicode emoji + "", + "", + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + assert len(patch.changes.update_fields) == 0 + + def test_very_long_input_ignored(self, monkeypatch): + """Test that very long inputs are ignored.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", + "embedding", + "A" * 10000, # Very long string + "", + "", + "", + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + assert len(patch.changes.update_fields) == 0 + + # ------------------------------------------------------------------------- + # Field Selection Edge Cases + # ------------------------------------------------------------------------- + + def test_nonexistent_field_selection(self, monkeypatch): + """Test selecting a nonexistent field.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", + "nonexistent_field", # Doesn't exist + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + # Should print "Invalid field selection" and continue + assert len(patch.changes.update_fields) == 0 + + def test_field_selection_by_number_out_of_range(self, monkeypatch): + """Test selecting a field by out-of-range number.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", + "99", # Out of range + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + assert len(patch.changes.update_fields) == 0 + + def test_field_selection_negative_number(self, monkeypatch): + """Test selecting a field with negative number.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "2", + "-1", # Negative + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + assert len(patch.changes.update_fields) == 0 + + # ------------------------------------------------------------------------- + # Menu Action Edge Cases + # ------------------------------------------------------------------------- + + def test_invalid_menu_action(self, monkeypatch): + """Test invalid menu action selection.""" + source_schema = _make_vector_source_schema() + + answers = iter( + [ + "99", # Invalid action + "abc", # Invalid action + "", # Empty + "5", # Finally finish + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + # Should handle invalid actions gracefully and eventually finish + assert patch is not None + + # ------------------------------------------------------------------------- + # SVS-VAMANA Specific Edge Cases + # ------------------------------------------------------------------------- + + def test_svs_vamana_negative_graph_max_degree_ignored(self, monkeypatch): + """Test that negative GRAPH_MAX_DEGREE is ignored.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "SVS-VAMANA", + "", + "", + "-40", # Negative + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert "graph_max_degree" not in update.attrs + + def test_svs_vamana_string_graph_max_degree_ignored(self, monkeypatch): + """Test that string GRAPH_MAX_DEGREE is ignored.""" + source_schema = _make_vector_source_schema(algorithm="hnsw") + + answers = iter( + [ + "2", + "embedding", + "SVS-VAMANA", + "", + "", + "forty", # String + "", + "5", + ] + ) + monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) + + wizard = MigrationWizard() + patch = wizard._build_patch(source_schema) + + update = patch.changes.update_fields[0] + assert "graph_max_degree" not in update.attrs From 8fffbef256e23740e880ba2ee8863590de54a74b Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Thu, 19 Mar 2026 18:24:35 -0400 Subject: [PATCH 06/10] chore: add nitin_docs and nitin_scripts to gitignore --- .gitignore | 2 + nitin_docs/index_migrator/00_index.md | 71 --- nitin_docs/index_migrator/01_context.md | 100 --- nitin_docs/index_migrator/02_architecture.md | 230 ------- nitin_docs/index_migrator/03_benchmarking.md | 248 -------- .../10_v1_drop_recreate_spec.md | 306 --------- .../11_v1_drop_recreate_tasks.md | 160 ----- .../12_v1_drop_recreate_tests.md | 122 ---- .../20_v2_iterative_shadow_spec.md | 213 ------- .../21_v2_iterative_shadow_tasks.md | 159 ----- .../22_v2_iterative_shadow_tests.md | 102 --- nitin_docs/index_migrator/90_prd.md | 581 ------------------ 12 files changed, 2 insertions(+), 2292 deletions(-) delete mode 100644 nitin_docs/index_migrator/00_index.md delete mode 100644 nitin_docs/index_migrator/01_context.md delete mode 100644 nitin_docs/index_migrator/02_architecture.md delete mode 100644 nitin_docs/index_migrator/03_benchmarking.md delete mode 100644 nitin_docs/index_migrator/10_v1_drop_recreate_spec.md delete mode 100644 nitin_docs/index_migrator/11_v1_drop_recreate_tasks.md delete mode 100644 nitin_docs/index_migrator/12_v1_drop_recreate_tests.md delete mode 100644 nitin_docs/index_migrator/20_v2_iterative_shadow_spec.md delete mode 100644 nitin_docs/index_migrator/21_v2_iterative_shadow_tasks.md delete mode 100644 nitin_docs/index_migrator/22_v2_iterative_shadow_tests.md delete mode 100644 nitin_docs/index_migrator/90_prd.md diff --git a/.gitignore b/.gitignore index ec91877a..4c03a849 100644 --- a/.gitignore +++ b/.gitignore @@ -234,6 +234,8 @@ tests/data # Local working directory (personal scripts, docs, tools) local/ +nitin_docs/ +nitin_scripts/ # Local notebooks (kept for development, not committed) docs/user_guide/13_index_migrations.ipynb diff --git a/nitin_docs/index_migrator/00_index.md b/nitin_docs/index_migrator/00_index.md deleted file mode 100644 index f04f30f8..00000000 --- a/nitin_docs/index_migrator/00_index.md +++ /dev/null @@ -1,71 +0,0 @@ -# Index Migrator Workspace - -## Overview - -This directory is the sole source of truth for RedisVL index migration planning. - -No implementation should start unless the corresponding task exists in a `*_tasks.md` file in this directory. - -This workspace is organized around two phases: - -- Phase 1 MVP: `drop_recreate` -- Phase 2: `iterative_shadow` - -The overall initiative covers both simple schema-only rebuilds and harder migrations that change vector dimensions, datatypes, precision, algorithms, or payload shape. Those advanced cases are intentionally delivered after the MVP rather than being treated as out of scope for the product. - -The planning goal is to make handoff simple. Another engineer or process should be able to open this directory, read the active spec and task list, and start implementation without needing to rediscover product decisions. - -## Guiding Principles - -- Prefer simple and safe over clever orchestration. -- Reuse existing RedisVL primitives before adding new abstractions. -- Migrate one index at a time. -- Keep cutover and platform scaling operator-owned. -- Fail closed on unsupported schema changes. -- Treat documentation artifacts as implementation inputs, not as narrative background. - -## Phase Status - -| Phase | Mode | Status | Implementation Target | -| --- | --- | --- | --- | -| Phase 1 | `drop_recreate` | Ready | Yes | -| Phase 2 | `iterative_shadow` | Planned | No | - -## Doc Map - -- [01_context.md](./01_context.md): customer problem, constraints, and why the work is phased -- [02_architecture.md](./02_architecture.md): shared architecture, responsibilities, capacity model, and diagrams -- [03_benchmarking.md](./03_benchmarking.md): migration benchmarking goals, metrics, scenarios, and output artifacts -- [90_prd.md](./90_prd.md): final product requirements document for team review -- [10_v1_drop_recreate_spec.md](./10_v1_drop_recreate_spec.md): decision-complete MVP spec -- [11_v1_drop_recreate_tasks.md](./11_v1_drop_recreate_tasks.md): implementable MVP task list -- [12_v1_drop_recreate_tests.md](./12_v1_drop_recreate_tests.md): MVP test plan -- [20_v2_iterative_shadow_spec.md](./20_v2_iterative_shadow_spec.md): future iterative shadow spec -- [21_v2_iterative_shadow_tasks.md](./21_v2_iterative_shadow_tasks.md): future iterative shadow tasks -- [22_v2_iterative_shadow_tests.md](./22_v2_iterative_shadow_tests.md): future iterative shadow test plan - -## Current Truth - -The active implementation target is Phase 1. - -- Spec: [10_v1_drop_recreate_spec.md](./10_v1_drop_recreate_spec.md) -- Tasks: [11_v1_drop_recreate_tasks.md](./11_v1_drop_recreate_tasks.md) -- Tests: [12_v1_drop_recreate_tests.md](./12_v1_drop_recreate_tests.md) - -## Next Actions - -- `V1-T01` -- `V1-T02` -- `V1-T03` - -## Locked Decisions - -- The planning workspace lives entirely under `nitin_docs/index_migrator/`. -- The top-level migration notes have been removed to avoid competing sources of truth. -- Phase 1 is documentation-backed implementation scope. -- Phase 2 stays planned until Phase 1 is implemented and learnings are folded back into this directory. -- The default artifact format for plans and reports is YAML. -- Benchmarking is required for migration duration, query impact, and resource-impact planning, but it should be implemented with simple structured outputs rather than a separate benchmarking framework. -- The default execution unit is a single index. -- The default operational model is operator-owned downtime, cutover, and scaling. -- Phase 2 owns advanced vector and payload-shape migrations, including datatype, precision, dimension, and algorithm changes. diff --git a/nitin_docs/index_migrator/01_context.md b/nitin_docs/index_migrator/01_context.md deleted file mode 100644 index 9bbdf9f7..00000000 --- a/nitin_docs/index_migrator/01_context.md +++ /dev/null @@ -1,100 +0,0 @@ -# Index Migrator Context - -## Problem Statement - -RedisVL does not currently provide a first-class migration workflow for search index changes. - -Today, teams can create indexes, delete indexes, inspect index info, and load documents, but they still need ad hoc scripts and operational runbooks to handle schema evolution. This becomes risky when the index is large, shared by multiple applications, or deployed on clustered Redis Cloud or Redis Software. - -The migration problem has three different shapes: - -- A simpler index rebuild that preserves existing documents and recreates the index definition in place. -- A shadow migration over the same documents when the target schema can still be built from the current stored payload. -- A shadow migration with transform or backfill when vector dimensions, datatypes, precision, algorithms, or payload shape change and a new target payload must be built. - -This workspace deliberately splits those shapes into phases instead of trying to solve everything in one design. Phase 1 proves the plan-first migration workflow. Phase 2 exists to take on the harder vector and payload-shape migrations safely. - -## Customer Requirements - -The planning baseline for this work is: - -- preserve existing documents during migration -- capture the previous index configuration before making changes -- apply only the requested schema changes -- preview the migration plan before execution -- support advanced vector migrations such as `HNSW -> FLAT`, `FP32 -> FP16`, vector dimension changes, and payload-shape-changing model or algorithm swaps -- estimate migration timing, memory impact, and operational impact using simple benchmark artifacts -- benchmark source-versus-target memory and size changes, including peak overlap footprint during shadow migrations -- support both guided and scripted workflows -- make downtime and disruption explicit -- support large datasets without defaulting to full-keyspace audits or fleet-wide orchestration -- keep the implementation understandable enough that another team can operate it safely - -## Current RedisVL Capabilities - -RedisVL already has useful primitives that should be reused instead of replaced: - -- `SearchIndex.from_existing()` can reconstruct schema from a live index. -- `SearchIndex.delete(drop=False)` can remove the index structure without deleting documents. -- `SearchIndex.info()` can retrieve index stats used for planning and validation. -- Existing CLI commands already establish the connection and index lookup patterns the migrator can follow. - -RedisVL does not yet have: - -- a migration planner -- a schema diff classifier -- a migration-specific CLI workflow -- a guided schema migration wizard -- structured migration reports -- capacity-aware orchestration across indexes -- transform or backfill planning for migrations that need new stored payloads - -## Why Phase 1 Comes First - -Phase 1 is intentionally narrow because it gives the team an MVP that is both useful and low-risk: - -- It preserves documents while changing only the index definition. -- It reuses current RedisVL primitives instead of introducing a separate migration runtime. -- It keeps operational ownership clear: RedisVL handles planning, execution, and validation for a single index, while the operator handles the migration window and downstream application expectations. -- It avoids the hardest problems for now: target-payload generation, shadow overlap estimation, cutover automation, and cluster-wide scheduling. - -Phase 1 does not define the full migration goal. The harder vector and payload-shape changes are the reason Phase 2 exists. - -The MVP should prove the planning model, CLI shape, plan artifact, and validation/reporting flow before more advanced orchestration is attempted. - -## Downtime and Disruption - -Phase 1 accepts downtime for the migrated index. - -Engineers need to plan for the following impacts: - -- Search on the target index is unavailable between index drop and recreated index readiness. -- Query results can be partial or unstable while the recreated index is still completing its initial indexing pass. -- Reindexing uses shared database resources and can increase CPU, memory, and indexing pressure on the deployment. -- Shadow migrations can temporarily duplicate index structures and sometimes duplicate payloads as well, increasing peak memory requirements. -- Downstream applications need either a maintenance window, a degraded mode, or a clear operational pause during the rebuild. - -The tooling should not hide these facts. The plan artifact and CLI output must force the user to acknowledge downtime before applying a `drop_recreate` migration. - -## Non-Goals - -The following are explicitly out of scope for Phase 1, not for the overall initiative: - -- a generic migration framework for every schema evolution case -- automatic platform scaling -- automatic traffic cutover -- full key manifest capture by default -- document transforms or backfills in the MVP execution path -- payload relocation to a new keyspace in the MVP execution path -- concurrent migration of multiple large indexes -- fully managed Redis Cloud or Redis Software integration -- automatic transform inference or automatic re-embedding - -The simplicity rules for this effort are: - -- use existing RedisVL index introspection and lifecycle primitives -- do not design a generic migration framework for the MVP -- do not automate platform scaling -- do not automate traffic cutover -- do not require full key manifests by default -- require an explicit transform or backfill plan before Phase 2 handles payload-shape-changing migrations diff --git a/nitin_docs/index_migrator/02_architecture.md b/nitin_docs/index_migrator/02_architecture.md deleted file mode 100644 index e7e0fe9a..00000000 --- a/nitin_docs/index_migrator/02_architecture.md +++ /dev/null @@ -1,230 +0,0 @@ -# Index Migrator Architecture - -## System Boundaries - -The migration system has three boundaries: - -- RedisVL migration workflow: snapshot, diff, plan, apply, validate, report -- Redis deployment: Redis Cloud or Redis Software database that stores documents and indexes -- Operator and application boundary: maintenance window, scaling decisions, transform inputs, and application-level cutover behavior - -The first implementation should add migration capabilities on top of existing RedisVL primitives instead of creating a separate control plane. - -```mermaid -flowchart LR - OP["Engineer / Operator"] --> CLI["RedisVL migrate CLI"] - APP["Applications"] --> DB["Redis database endpoint"] - - CLI --> SNAP["Snapshot + diff + mode selection"] - CLI --> EXEC["Executor"] - CLI --> VAL["Validator + report"] - - SNAP --> IDX["Live index metadata and stats"] - SNAP --> INV["Optional platform inventory"] - SNAP --> XFORM["Optional transform or backfill plan"] - - EXEC --> DB - VAL --> DB - EXEC --> TARGET["Optional target keyspace + shadow index"] - TARGET --> DB - - DB --> SHARDS["One logical index may span multiple shards"] - OP --> INV - OP --> XFORM -``` - -## Responsibilities - -RedisVL should own: - -- schema snapshot and source configuration capture -- schema diff classification -- migration mode selection -- migration plan generation -- guided wizard and scripted CLI entrypoints -- supported strategy execution -- validation and report generation - -The operator should own: - -- choosing the migration window -- accepting downtime or degraded behavior -- providing platform inventory when capacity planning matters -- providing transform or backfill inputs when payload shape changes -- scaling the Redis deployment -- application cutover and rollback decisions - -The platform should be treated as an external dependency, not as part of the MVP runtime. - -## Platform Model - -The migrator should reason about the deployment at the database level. - -For planning purposes: - -- treat the database endpoint as the unit of execution -- treat a search index as one logical index even if the deployment is sharded -- do not build logic that assumes an entire index lives on a single shard -- record where data lives in terms of database, prefixes, key separators, and target keyspace plans, not physical shard pinning - -This keeps the model compatible with both Redis Cloud and Redis Software without requiring the MVP to integrate directly with their platform APIs. - -## Migration Modes - -### `drop_recreate` - -This is the Phase 1 MVP. - -- Snapshot the current schema and index stats. -- Merge only the requested schema changes. -- Drop only the index structure, preserving documents. -- Recreate the index with the merged schema. -- Wait until indexing is complete. -- Validate and report. - -This mode is explicit about downtime and does not attempt to preserve uninterrupted query availability. - -### `iterative_shadow` - -This is the planned Phase 2 mode. - -- Work on one index at a time. -- Check database-level capacity before creating any shadow index. -- Choose between: - - `shadow_reindex` when the target schema can be built from the current stored payload. - - `shadow_rewrite` when vector datatype, precision, dimension, algorithm, or payload shape changes require a new target payload or keyspace. -- Create a shadow target for the current index only. -- Transform or backfill into a target keyspace when the migration changes payload shape. -- Validate the shadow target. -- Hand cutover to the operator. -- Retire the old index, and optionally the old target payload, only after cutover confirmation. - -This mode aims to reduce disruption without introducing automatic cutover or automatic scaling. This is the mode that should ultimately support migrations such as `HNSW -> FLAT`, `FP32 -> FP16`, vector dimension changes, and embedding-model-driven payload rewrites. - -## Capacity Model - -Phase 1 keeps capacity handling simple: - -- use source index stats for warnings and reports -- show expected downtime and indexing pressure -- do not block on a complex capacity estimator - -Phase 2 introduces a conservative capacity gate: - -- planner input is database-level, not shard-local -- one index at a time is the only supported execution unit -- estimate both source and target footprint -- separate document footprint from index footprint -- calculate peak overlap as the source footprint plus the target footprint that exists during migration -- capture memory savings or growth caused by algorithm, datatype, precision, dimension, and payload-shape changes -- the planner blocks if available headroom is below the estimated peak overlap plus reserve -- scaling stays operator-owned - -Default key-location capture is intentionally bounded: - -- store index name -- store storage type -- store prefixes -- store key separator -- store a bounded key sample - -Full key manifests are not part of the default path. - -## Benchmarking Model - -Benchmarking should be built into migration reporting, not treated as a separate system. - -The shared model is: - -- capture baseline metadata before migration -- capture timing and progress during migration -- capture validation and query-impact signals after migration -- persist simple YAML benchmark artifacts that can be compared across runs - -Benchmarking should focus on the operator questions that matter most: - -- total migration duration -- downtime or overlap duration -- document throughput -- query latency change during the migration window -- resource impact before, during, and after migration -- source-versus-target memory and size delta -- estimated versus actual peak overlap footprint - -The benchmark requirements are defined in [03_benchmarking.md](./03_benchmarking.md). - -## Failure Model - -The system should fail closed. - -- Unsupported schema diffs stop at `plan`. -- Missing transform inputs for a payload-shape-changing migration stop at `plan`. -- Missing source metadata stops at `plan`. -- `apply` never deletes documents in Phase 1. -- Validation failures produce a report and manual next steps. -- The tool does not attempt automatic rollback or automatic traffic switching. - -## `drop_recreate` Sequence - -```mermaid -sequenceDiagram - participant Op as Operator - participant CLI as RedisVL migrate CLI - participant Snap as Snapshot + diff - participant DB as Redis - participant Val as Validator - - Op->>CLI: migrate plan or wizard - CLI->>Snap: capture source schema and stats - Snap->>DB: read live index metadata - Snap-->>CLI: normalized source snapshot - CLI-->>Op: migration_plan.yaml with downtime warning - - Op->>CLI: migrate apply --allow-downtime - CLI->>DB: drop index only - CLI->>DB: recreate merged index - loop until indexing complete - CLI->>DB: poll index info - DB-->>CLI: indexing status and counts - end - CLI->>Val: run validation checks - Val->>DB: verify schema, counts, samples - Val-->>CLI: migration_report.yaml - CLI-->>Op: success or manual review -``` - -## `iterative_shadow` Sequence - -```mermaid -sequenceDiagram - participant Op as Operator - participant CLI as RedisVL migrate CLI - participant Plan as Shadow planner - participant DB as Redis - participant App as Application - - Op->>CLI: migrate plan --mode iterative_shadow - CLI->>Plan: capture source stats, target schema, transform plan, and platform inventory - Plan-->>CLI: READY, SCALE_REQUIRED, or MANUAL_REVIEW_REQUIRED - CLI-->>Op: plan and operator actions - - Op->>CLI: migrate apply --mode iterative_shadow - alt payload-compatible shadow - CLI->>DB: create shadow index for one source index - else payload rewrite required - CLI->>DB: create target keyspace and shadow index - loop backfill target payload - CLI->>DB: read source documents - CLI->>DB: write transformed target documents - end - end - loop until shadow ready - CLI->>DB: poll shadow index info - DB-->>CLI: readiness status - end - CLI->>DB: validate shadow index - CLI-->>Op: cutover runbook - Op->>App: switch to target index - Op->>CLI: confirm cutover - CLI->>DB: retire old index -``` diff --git a/nitin_docs/index_migrator/03_benchmarking.md b/nitin_docs/index_migrator/03_benchmarking.md deleted file mode 100644 index 67a637f1..00000000 --- a/nitin_docs/index_migrator/03_benchmarking.md +++ /dev/null @@ -1,248 +0,0 @@ -# Migration Benchmarking - -## Goals - -Migration benchmarking exists to answer practical operator questions: - -- how long will the migration take -- how long will search be degraded or unavailable -- how much shared Redis capacity will the migration consume -- how much the target schema or vector shape will increase or reduce memory usage -- how much query performance changes during the migration window -- whether future migrations can be estimated from previous runs - -The first benchmarking design should stay simple. It should collect structured measurements from real runs and manual rehearsals rather than introducing a separate performance framework before the migrator exists. - -## Core Benchmark Questions - -Every migration benchmark should answer: - -1. How long did planning take? -2. How long did `apply` take end-to-end? -3. How long was the index unavailable or in degraded indexing state? -4. What document throughput did the migration achieve? -5. What query latency and error-rate changes occurred during the migration? -6. How much memory, flash, or disk footprint changed before, during, and after migration? -7. How accurate was the peak-overlap estimate? -8. Did the final migrated index match the expected schema and document count? - -## Metrics - -### Timing Metrics - -- `plan_duration_seconds` -- `apply_duration_seconds` -- `validation_duration_seconds` -- `total_migration_duration_seconds` -- `drop_duration_seconds` -- `recreate_duration_seconds` -- `initial_indexing_duration_seconds` -- `downtime_duration_seconds` for `drop_recreate` -- `shadow_overlap_duration_seconds` for `iterative_shadow` -- `transform_duration_seconds` for payload rewrite work -- `backfill_duration_seconds` for target payload creation - -### Throughput Metrics - -- `source_num_docs` -- `target_num_docs` -- `documents_indexed_per_second` -- `documents_transformed_per_second` -- `bytes_rewritten_per_second` -- `progress_samples` captured during readiness polling - -### Query Impact Metrics - -- baseline query latency: `p50`, `p95`, `p99` -- during-migration query latency: `p50`, `p95`, `p99` -- post-migration query latency: `p50`, `p95`, `p99` -- query error rate during migration -- query result overlap or sample correctness checks - -### Resource Impact Metrics - -- source document footprint from live stats or sampling -- source index size from live stats -- target document footprint from live stats or sampling -- target index size from live stats -- total source footprint -- total target footprint -- footprint delta after migration -- estimated peak overlap footprint -- actual peak overlap footprint -- indexing failure delta -- memory headroom before migration -- memory headroom after migration -- peak memory headroom during overlap -- flash or disk footprint before and after when relevant -- source vector dimensions, datatype, precision, and algorithm -- target vector dimensions, datatype, precision, and algorithm -- source vector bytes per document -- target vector bytes per document - -### Correctness Metrics - -- schema match -- document count match -- indexing failure delta equals zero -- representative document fetch checks pass - -## Benchmark Inputs - -Each benchmark run should record the workload context, not just the raw timings. - -Required context: - -- migration mode -- dataset size -- storage type -- field mix -- whether vectors are present -- source and target vector configuration when vectors are present -- whether payload shape changes -- shard count -- replica count -- query load level during migration -- environment label such as `local`, `staging`, `redis_cloud`, or `redis_software` - -Useful optional context: - -- vector dimensions and datatype -- vector precision and algorithm -- auto-tiering enabled or disabled -- representative document size -- maintenance window target - -## Benchmark Scenarios - -Start with a small scenario matrix and expand only when needed. - -Minimum Phase 1 benchmark scenarios: - -- small index, low query load -- medium or large index, low query load -- medium or large index, representative read load - -Minimum Phase 2 benchmark scenarios: - -- one shadow migration on a sharded deployment with sufficient capacity -- one shadow migration that is blocked by the capacity gate -- one shadow migration under representative read load -- one algorithm migration such as `HNSW -> FLAT` -- one vector storage migration such as `FP32 -> FP16` -- one payload-shape-changing migration that requires transform or backfill - -Scale variables should be changed one at a time where possible: - -- document count -- shard count -- query concurrency -- storage type -- vector footprint - -## Benchmark Artifacts - -The benchmark workflow should use simple YAML artifacts. - -### `benchmark_manifest.yaml` - -```yaml -version: 1 -label: staging-large-read-load -mode: drop_recreate -environment: staging -dataset: - num_docs: 1000000 - storage_type: json - vector_fields_present: true -platform: - shard_count: 4 - replica_count: 1 -workload: - query_profile: representative-read - query_check_file: queries.yaml -notes: "" -``` - -### `benchmark_report.yaml` - -```yaml -version: 1 -label: staging-large-read-load -mode: drop_recreate -timings: - total_migration_duration_seconds: 540 - downtime_duration_seconds: 420 - validation_duration_seconds: 18 -throughput: - source_num_docs: 1000000 - documents_indexed_per_second: 2380.95 -query_impact: - baseline_p95_ms: 42 - during_migration_p95_ms: 95 - post_migration_p95_ms: 44 -resource_impact: - source_doc_footprint_mb: 6144 - source_index_size_mb: 8192 - target_doc_footprint_mb: 6144 - target_index_size_mb: 6144 - total_footprint_delta_mb: -2048 - estimated_peak_overlap_footprint_mb: 20480 - actual_peak_overlap_footprint_mb: 19840 - source_vector: - dimensions: 1536 - datatype: float32 - algorithm: hnsw - target_vector: - dimensions: 1536 - datatype: float16 - algorithm: flat -correctness: - schema_match: true - doc_count_match: true -``` - -These artifacts are planning and validation aids. They should not become a separate system before the migrator itself is implemented. - -## How Benchmarking Fits the Phases - -### Phase 1: `drop_recreate` - -Phase 1 should always record: - -- start time -- end time -- index downtime duration -- readiness wait duration -- source and target document counts -- source and target index stats -- observed source-versus-target index footprint delta - -Phase 1 should optionally record: - -- representative query latency before, during, and after migration -- query correctness checks using the same file as validation queries - -### Phase 2: `iterative_shadow` - -Phase 2 should always record: - -- source-to-shadow overlap duration -- planner estimate versus actual runtime -- capacity gate decision -- source and target document and index stats -- estimated versus actual peak overlap footprint -- observed memory savings or growth after the migration -- query impact during overlap - -Phase 2 should use benchmark history as advisory input for ETA and risk reporting, not as a hard execution dependency. - -## Exit Criteria - -Benchmarking is good enough for the first implementation when: - -- every migration report includes core timing and correctness metrics -- every shadow migration benchmark includes source-versus-target footprint deltas -- manual benchmark rehearsals can be run from a simple manifest -- the docs define what to collect before performance tuning begins -- benchmark requirements do not force a separate subsystem before the migrator ships diff --git a/nitin_docs/index_migrator/10_v1_drop_recreate_spec.md b/nitin_docs/index_migrator/10_v1_drop_recreate_spec.md deleted file mode 100644 index fd108f8d..00000000 --- a/nitin_docs/index_migrator/10_v1_drop_recreate_spec.md +++ /dev/null @@ -1,306 +0,0 @@ -# Phase 1 Spec: `drop_recreate` - -## Goal - -Build a simple RedisVL migration workflow that: - -- preserves existing documents -- captures the old index configuration before change -- applies only the user-requested schema changes -- generates a plan before any mutation -- supports both guided and scripted use -- explicitly accepts downtime for the migrated index - -This phase is intentionally smaller than the full product goal. Vector datatype, precision, dimension, algorithm, and payload-shape-changing migrations are still in scope for the overall initiative, but they are deferred to `iterative_shadow`. - -This is the only implementation target after the docs land. - -## Supported Changes - -The MVP supports schema changes that can be satisfied by rebuilding the index over the existing document set without rewriting or relocating stored documents. - -Supported categories: - -- add a new non-vector field that indexes data already present in stored documents -- remove an existing field from the index definition -- change index options on an existing non-vector field when the field name, field type, and storage path stay the same -- change index-level options that only affect index definition and do not relocate data - -Supported field types for MVP changes: - -- text -- tag -- numeric -- geo - -The MVP always recreates the same logical index name unless the user is only generating a plan. - -## Blocked Changes - -The CLI must classify the following changes as unsupported in the MVP and stop before `apply`: - -- key prefix changes -- key separator changes -- storage type changes -- JSON path remodels -- field renames -- vector dimension changes -- vector datatype changes -- vector precision changes -- any vector field algorithm change that depends on different stored payload shape -- any change that requires document rewrite or relocation -- any change that requires a new index name as part of the execution path - -These changes should be reported as candidates for the Phase 2 `iterative_shadow` path rather than presented as unsupported forever. - -## Inputs - -The workflow accepts: - -- Redis connection parameters -- source index name -- one of: - - `schema_patch.yaml` - - `target_schema.yaml` - - interactive wizard answers - -Recommended CLI surface: - -```text -rvl migrate plan --index --schema-patch -rvl migrate plan --index --target-schema -rvl migrate wizard --index --plan-out -rvl migrate apply --plan --allow-downtime -rvl migrate validate --plan -``` - -Default optional flags: - -- `--plan-out` -- `--report-out` -- `--key-sample-limit` -- `--query-check-file` -- `--non-interactive` - -### `schema_patch.yaml` - -This is the authoritative input model for requested changes. Unspecified source configuration is preserved by default. - -Example: - -```yaml -version: 1 -changes: - add_fields: - - name: category - type: tag - path: $.category - separator: "," - remove_fields: - - legacy_score - update_fields: - - name: title - options: - sortable: true -``` - -### `target_schema.yaml` - -This is a convenience input. The planner normalizes it into a schema patch by diffing it against the live source schema. - -## Outputs - -The workflow produces: - -- `migration_plan.yaml` -- `migration_report.yaml` -- optional `benchmark_report.yaml` -- console summaries for plan, apply, and validate - -### `migration_plan.yaml` - -Required fields: - -```yaml -version: 1 -mode: drop_recreate -source: - index_name: docs - schema_snapshot: {} - stats_snapshot: {} - keyspace: - storage_type: json - prefixes: ["docs"] - key_separator: ":" - key_sample: ["docs:1", "docs:2"] -requested_changes: {} -merged_target_schema: {} -diff_classification: - supported: true - blocked_reasons: [] -warnings: - - index downtime is required -validation: - require_doc_count_match: true - require_schema_match: true -``` - -### `migration_report.yaml` - -Required fields: - -```yaml -version: 1 -mode: drop_recreate -source_index: docs -result: succeeded -started_at: 2026-03-17T00:00:00Z -finished_at: 2026-03-17T00:05:00Z -timings: - total_migration_duration_seconds: 300 - drop_duration_seconds: 3 - recreate_duration_seconds: 12 - initial_indexing_duration_seconds: 270 - validation_duration_seconds: 15 - downtime_duration_seconds: 285 -validation: - schema_match: true - doc_count_match: true - indexing_failures_delta: 0 - query_checks: [] -benchmark_summary: - documents_indexed_per_second: 3703.7 - source_index_size_mb: 2048 - target_index_size_mb: 1984 - index_size_delta_mb: -64 - baseline_query_p95_ms: 42 - during_migration_query_p95_ms: 90 - post_migration_query_p95_ms: 44 -manual_actions: [] -``` - -## CLI UX - -### `plan` - -- Capture the source snapshot from the live index. -- Normalize requested changes. -- Classify the diff as supported or blocked. -- Emit `migration_plan.yaml`. -- Print a short risk summary that includes downtime. - -### `wizard` - -- Read the live source schema first. -- Walk the user through supported change categories only. -- Reject unsupported requests during the wizard instead of silently converting them. -- Explain when a blocked request belongs to a future `iterative_shadow` migration. -- Emit the same `migration_plan.yaml` shape as `plan`. - -### `apply` - -- Accept only `migration_plan.yaml` as input. -- Require `--allow-downtime`. -- Refuse to run if the plan contains blocked reasons. -- Refuse to run if the current live schema no longer matches the saved source snapshot. - -### `validate` - -- Re-run validation checks from the plan against the current live index. -- Emit `migration_report.yaml`. -- Emit `benchmark_report.yaml` when benchmark fields were collected. - -## Execution Flow - -1. Snapshot source state. - - Load the live index schema using existing RedisVL introspection. - - Capture live stats from index info. - - Record storage type, prefixes, key separator, and a bounded key sample. -2. Normalize requested changes. - - If the input is `target_schema.yaml`, diff it against the source schema and convert it to a patch. - - If the input is wizard answers, convert them to the same patch model. -3. Merge and classify. - - Apply only requested changes to the source schema. - - Classify each diff as supported or blocked. - - Stop if any blocked diff exists. -4. Generate the plan. - - Save source snapshot, requested changes, merged target schema, validation policy, and warnings. -5. Apply the migration. - - Confirm current live schema still matches the source snapshot. - - Drop only the index structure. - - Recreate the same index name using the merged target schema. -6. Wait for indexing completion. - - Poll live index info until `indexing` is false and `percent_indexed` is complete when those fields are available. - - If those fields are unavailable, poll `num_docs` and readiness twice in a row before continuing. - - Stop with timeout rather than waiting forever. -7. Validate. - - Compare live schema to merged target schema. - - Compare live doc count to source doc count. - - Check indexing failure delta. - - Run optional query checks. -8. Emit the report. -9. Emit benchmark artifacts when benchmark data was collected. - -## Validation - -Required validation checks: - -- exact schema match against `merged_target_schema` -- live doc count equals source `num_docs` -- `hash_indexing_failures` does not increase -- key sample records still exist - -Optional validation checks: - -- query checks loaded from `--query-check-file` -- bounded sample fetch checks for representative document ids - -Benchmark fields that should be collected during Phase 1: - -- migration start and end timestamps -- index downtime duration -- readiness polling duration -- source and target document counts -- documents indexed per second -- source and target index footprint -- observed index footprint delta after recreate -- optional representative query latency before, during, and after migration - -Validation is a hard failure for `apply`. - -## Failure Handling - -The MVP fails closed. - -- Unsupported diff: stop at `plan`. -- Source snapshot mismatch at apply time: stop and ask the operator to regenerate the plan. -- Drop succeeds but recreate fails: documents remain; emit a failure report and a manual recovery command using the saved merged schema. -- Validation fails after recreate: leave the recreated index in place, emit a failure report, and stop. -- Interrupted run: no checkpointing in MVP. The operator reruns `plan` or reuses the existing plan after confirming the live source state is still compatible. - -The MVP does not implement automatic rollback. - -## Operational Guidance - -This mode is downtime-accepting by design. - -Engineers should assume: - -- the index is unavailable between drop and recreated index readiness -- search quality can be degraded while initial indexing completes -- large indexes can place measurable pressure on shared Redis resources -- off-peak execution is preferred -- application-level maintenance handling is outside RedisVL -- blocked vector and payload-shape changes should be rerouted to Phase 2 planning instead of being forced into this path - -Default key capture is intentionally small: - -- keyspace definition is always recorded -- a bounded key sample is recorded -- a full key manifest is not part of the default MVP path - -Benchmarking for Phase 1 should stay simple: - -- capture timing and correctness metrics in structured reports -- support manual benchmark rehearsals using [03_benchmarking.md](./03_benchmarking.md) -- avoid introducing a dedicated benchmarking subsystem before the migrator exists diff --git a/nitin_docs/index_migrator/11_v1_drop_recreate_tasks.md b/nitin_docs/index_migrator/11_v1_drop_recreate_tasks.md deleted file mode 100644 index d30ae839..00000000 --- a/nitin_docs/index_migrator/11_v1_drop_recreate_tasks.md +++ /dev/null @@ -1,160 +0,0 @@ -# Phase 1 Tasks: `drop_recreate` - -## Task Template - -Every Phase 1 task must document: - -- `ID` -- `Status` -- `Goal` -- `Inputs` -- `Outputs` -- `Touchpoints` -- `Dependencies` -- `Acceptance Criteria` -- `Non-Goals` -- `Handoff Notes` - -## V1-T01 - -- `ID`: `V1-T01` -- `Status`: `Ready` -- `Goal`: Build the source snapshot and artifact models used by `plan`, `apply`, and `validate`. -- `Inputs`: source index name, Redis connection settings, optional key sample limit -- `Outputs`: in-memory and YAML-serializable source snapshot, migration plan model, migration report model -- `Touchpoints`: `redisvl/index/index.py`, new `redisvl/migration/models.py`, new `redisvl/migration/snapshot.py` -- `Dependencies`: none -- `Acceptance Criteria`: - - source schema can be loaded from a live index - - source stats needed by the spec are captured - - storage type, prefixes, key separator, and bounded key sample are recorded - - models serialize to YAML without losing required fields -- `Non-Goals`: - - full key manifest generation - - capacity estimation - - schema diff logic -- `Handoff Notes`: preserve the raw source schema as faithfully as possible so later diffing does not invent defaults. - -## V1-T02 - -- `ID`: `V1-T02` -- `Status`: `Ready` -- `Goal`: Implement schema patch normalization, source-plus-patch merge, and supported-versus-blocked diff classification. -- `Inputs`: source schema snapshot, `schema_patch.yaml` or normalized target schema diff -- `Outputs`: merged target schema and diff classification result -- `Touchpoints`: new `redisvl/migration/patch.py`, new `redisvl/migration/diff.py`, `redisvl/schema` -- `Dependencies`: `V1-T01` -- `Acceptance Criteria`: - - unspecified source config is preserved by default - - blocked diff categories from the spec are rejected with actionable reasons - - supported changes produce a deterministic merged target schema - - `target_schema.yaml` input normalizes to the same patch model -- `Non-Goals`: - - document rewrite planning - - vector migration logic - - shadow migration planning -- `Handoff Notes`: prefer an explicit allowlist of supported diff categories over a generic schema merge engine. - -## V1-T03 - -- `ID`: `V1-T03` -- `Status`: `Ready` -- `Goal`: Add the `plan` command and plan artifact generation. -- `Inputs`: source index, connection settings, patch or target schema input -- `Outputs`: `migration_plan.yaml`, console summary -- `Touchpoints`: new `redisvl/cli/migrate.py`, `redisvl/cli/main.py`, `redisvl/migration/planner.py` -- `Dependencies`: `V1-T01`, `V1-T02` -- `Acceptance Criteria`: - - `plan` emits the required YAML shape - - blocked plans do not proceed to mutation - - the console summary includes downtime warnings - - the current plan format is stable enough for `apply` and `validate` -- `Non-Goals`: - - interactive wizard flow - - mutation against Redis - - advanced report rendering -- `Handoff Notes`: make the plan file human-readable so operators can review it before running `apply`. - -## V1-T04 - -- `ID`: `V1-T04` -- `Status`: `Ready` -- `Goal`: Add the guided `wizard` flow that emits the same plan artifact as `plan`. -- `Inputs`: source index, connection settings, interactive answers -- `Outputs`: normalized schema patch and `migration_plan.yaml` -- `Touchpoints`: new `redisvl/migration/wizard.py`, `redisvl/cli/migrate.py` -- `Dependencies`: `V1-T01`, `V1-T02`, `V1-T03` -- `Acceptance Criteria`: - - wizard starts from the live source schema - - wizard only offers supported MVP change categories - - wizard emits the same plan structure as `plan` - - unsupported requests are blocked during the flow -- `Non-Goals`: - - platform inventory collection - - free-form schema editing for blocked categories - - shadow migration support -- `Handoff Notes`: keep prompts simple and linear; this is a guided assistant, not a general schema builder. - -## V1-T05 - -- `ID`: `V1-T05` -- `Status`: `Ready` -- `Goal`: Implement `apply` for the `drop_recreate` strategy. -- `Inputs`: reviewed `migration_plan.yaml`, `--allow-downtime` -- `Outputs`: recreated index and execution status -- `Touchpoints`: new `redisvl/migration/executor.py`, `redisvl/cli/migrate.py`, `redisvl/index/index.py` -- `Dependencies`: `V1-T03` -- `Acceptance Criteria`: - - `apply` refuses to run without `--allow-downtime` - - source snapshot mismatch blocks execution - - index drop preserves documents - - recreated index uses the merged target schema - - readiness polling stops on success or timeout -- `Non-Goals`: - - automatic rollback - - checkpointing - - cutover orchestration -- `Handoff Notes`: use the simplest safe sequence possible; do not add background job machinery in the MVP. - -## V1-T06 - -- `ID`: `V1-T06` -- `Status`: `Ready` -- `Goal`: Implement `validate` and `migration_report.yaml`. -- `Inputs`: `migration_plan.yaml`, live index state, optional query checks -- `Outputs`: validation result, report artifact, console summary -- `Touchpoints`: new `redisvl/migration/validation.py`, new `redisvl/migration/reporting.py`, `redisvl/cli/migrate.py` -- `Dependencies`: `V1-T01`, `V1-T03`, `V1-T05` -- `Acceptance Criteria`: - - schema match is verified - - doc count match is verified - - indexing failure delta is captured - - core timing metrics are captured in the report - - optional query checks run deterministically - - report artifact is emitted for both success and failure -- `Non-Goals`: - - benchmark replay - - observability integrations - - automatic remediation -- `Handoff Notes`: keep the report format concise and stable so it can become the operator handoff artifact later. - -## V1-T07 - -- `ID`: `V1-T07` -- `Status`: `Ready` -- `Goal`: Add Phase 1 tests and user-facing documentation for the new CLI flow. -- `Inputs`: completed planner, wizard, executor, and validator behavior -- `Outputs`: passing tests and concise usage docs -- `Touchpoints`: `tests/`, `redisvl/cli`, `nitin_docs/index_migrator` -- `Dependencies`: `V1-T03`, `V1-T04`, `V1-T05`, `V1-T06` -- `Acceptance Criteria`: - - CI-friendly happy-path and failure-path tests exist - - manual benchmark rehearsal guidance exists - - manual smoke test instructions are captured in the test doc - - help text matches the Phase 1 spec - - the docs directory still points to the active truth -- `Non-Goals`: - - Phase 2 implementation - - platform API integrations - - performance tuning beyond smoke coverage -- `Handoff Notes`: keep test coverage focused on correctness and operator safety, not on simulating every future migration shape. diff --git a/nitin_docs/index_migrator/12_v1_drop_recreate_tests.md b/nitin_docs/index_migrator/12_v1_drop_recreate_tests.md deleted file mode 100644 index e6852a6f..00000000 --- a/nitin_docs/index_migrator/12_v1_drop_recreate_tests.md +++ /dev/null @@ -1,122 +0,0 @@ -# Phase 1 Tests: `drop_recreate` - -## Test Matrix - -| ID | Scenario | Type | Expected Result | -| --- | --- | --- | --- | -| V1-CI-01 | source snapshot loads live schema and stats | CI | snapshot matches live index metadata | -| V1-CI-02 | patch merge preserves unspecified config | CI | merged target schema is deterministic | -| V1-CI-03 | blocked diff categories stop at `plan` | CI | no mutation and actionable error | -| V1-CI-04 | `plan` emits valid YAML artifact | CI | plan file contains required fields | -| V1-CI-05 | `apply` requires `--allow-downtime` | CI | execution blocked without flag | -| V1-CI-06 | drop and recreate preserves documents | CI | doc count matches before and after | -| V1-CI-07 | readiness polling completes or times out | CI | executor exits deterministically | -| V1-CI-08 | `validate` emits a report on success | CI | report contains required fields | -| V1-CI-09 | `validate` emits a report on failure | CI | failure report includes manual actions | -| V1-CI-10 | timing metrics are captured in reports | CI | report contains stable timing fields | -| V1-MAN-01 | guided wizard produces the same plan model | Manual | plan matches scripted path | -| V1-MAN-02 | realistic rebuild on larger dataset | Manual | migration completes with expected downtime | -| V1-MAN-03 | benchmark rehearsal on representative workload | Manual | duration, throughput, and query impact are recorded | - -## Happy Path - -The minimum automated happy path should cover: - -- create a source index with existing documents -- generate `migration_plan.yaml` from `schema_patch.yaml` -- run `apply --allow-downtime` -- wait for recreated index readiness -- run `validate` -- confirm schema match, doc count match, and zero indexing failure delta - -Representative happy-path schema changes: - -- add a tag field backed by existing JSON data -- remove a legacy numeric field from the index -- make an existing text field sortable - -## Failure Paths - -CI should cover at least: - -- blocked diff because of vector change -- blocked diff because of prefix change -- source snapshot mismatch between `plan` and `apply` -- recreate failure after drop -- validation failure because doc counts diverge -- readiness timeout -- missing required plan fields - -Every failure path must prove: - -- documents are not intentionally deleted by the migrator -- an actionable error is surfaced -- blocked vector and payload-shape diffs point the user to the Phase 2 migration path -- a `migration_report.yaml` can still be produced when the failure happens after `apply` starts - -## Manual Smoke Test - -Run a manual smoke test on a non-production Redis deployment: - -1. Create an index with representative JSON documents. -2. Prepare a `schema_patch.yaml` that adds one non-vector field and removes one old field. -3. Run `rvl migrate plan`. -4. Confirm the plan includes the downtime warning and no blocked diffs. -5. Run `rvl migrate apply --allow-downtime`. -6. Wait until readiness completes. -7. Run `rvl migrate validate`. -8. Confirm search behavior has resumed and the new schema is active. - -Manual smoke test success means: - -- the operator can understand the plan without reading code -- the index rebuild completes without deleting documents -- the report is sufficient to hand back to another operator - -## Scale Sanity Check - -Phase 1 does not need a cluster-wide stress harness, but it does need a basic scale sanity check. - -Manual checks: - -- run the flow on an index large enough to make polling and downtime visible -- confirm default key capture stays bounded -- confirm the tool does not attempt a full key manifest by default -- confirm console output still stays readable for a larger index - -This is not a benchmark. The goal is to catch accidental implementation choices that make the MVP operationally unsafe on larger datasets. - -## Benchmark Rehearsal - -Phase 1 benchmarking should be lightweight and operationally useful. - -Use a simple rehearsal driven by [03_benchmarking.md](./03_benchmarking.md): - -1. Record a benchmark label and workload context. -2. Measure baseline query latency on a representative query set. -3. Run the migration on a realistic non-production index. -4. Record total migration duration, downtime duration, and readiness duration. -5. Record source and target document counts and index stats. -6. Record the observed source-versus-target index footprint delta. -7. Re-run the representative query set after migration. -8. Save a `benchmark_report.yaml`. - -The first benchmark questions to answer are: - -- how long does the rebuild take end-to-end -- how long is the index unavailable -- how many documents per second can the rebuild sustain -- how much query latency changes during and after the rebuild -- how much the recreated index footprint changes even for schema-only rebuilds -- whether the observed runtime is predictable enough for a maintenance window - -## Release Gate - -Phase 1 should not be considered ready until all of the following are true: - -- all CI scenarios in the test matrix pass -- at least one manual smoke test passes -- at least one benchmark rehearsal has been documented on a representative dataset -- help text matches the spec -- the docs in `nitin_docs/index_migrator/` still match the shipped CLI behavior -- the release notes or implementation summary clearly state that `drop_recreate` is downtime-accepting diff --git a/nitin_docs/index_migrator/20_v2_iterative_shadow_spec.md b/nitin_docs/index_migrator/20_v2_iterative_shadow_spec.md deleted file mode 100644 index c5b893a0..00000000 --- a/nitin_docs/index_migrator/20_v2_iterative_shadow_spec.md +++ /dev/null @@ -1,213 +0,0 @@ -# Phase 2 Spec: `iterative_shadow` - -## Goal - -Add a conservative, capacity-aware shadow migration mode that works one index at a time and reduces disruption without attempting to automate cutover or platform scaling. - -This phase exists to support the migration cases that Phase 1 intentionally does not handle safely: - -- vector datatype changes such as `FP32 -> FP16` -- vector precision changes -- vector dimension changes -- vector algorithm changes such as `HNSW -> FLAT` -- payload-shape-changing model or algorithm migrations that require new stored fields or a new target keyspace - -The first Phase 2 implementation should stay simple in operation even though it handles harder migration shapes: - -- one index at a time -- capacity gate before each index -- operator-owned cutover -- no automatic scale-up or scale-down -- no multi-index concurrent shadowing -- explicit transform or backfill plan when the target payload shape changes - -## Why It Is Not MVP - -This mode is not the MVP because it introduces operational questions that Phase 1 does not need to solve: - -- database-level capacity inventory -- target-footprint estimation for old and new document and index shapes -- overlap estimation for old and new payloads -- transform or backfill planning for target payload creation -- operator handoff between validation and cutover -- cleanup sequencing after cutover -- larger-scale manual testing on clustered deployments - -Phase 1 should prove the core planning and reporting model first. - -## Planner Inputs - -The Phase 2 planner takes: - -- source index name -- Redis connection settings -- supported schema patch or target schema input -- `platform_inventory.yaml` -- optional `transform_plan.yaml` when the migration requires new target payloads - -### `platform_inventory.yaml` - -```yaml -version: 1 -platform: redis_cloud -database: - name: customer-a-prod - total_memory_mb: 131072 - available_memory_mb: 32768 - shard_count: 8 - replica_count: 1 - auto_tiering: false - notes: "" -policy: - reserve_percent: 15 -``` - -Required inventory fields: - -- platform -- total memory -- available memory -- shard count -- replica count -- reserve policy - -Optional inventory fields: - -- flash or disk notes -- environment labels -- operator comments -- benchmark history notes - -### `transform_plan.yaml` - -This file is required when the target schema cannot be built from the current stored payload. - -Example: - -```yaml -version: 1 -target_keyspace: - storage_type: json - prefixes: ["docs_v2"] - key_separator: ":" -transform: - mode: rewrite - vector_fields: - - name: embedding - source_path: $.embedding - target_path: $.embedding_v2 - source_dimensions: 1536 - target_dimensions: 1536 - source_datatype: float32 - target_datatype: float16 - source_algorithm: hnsw - target_algorithm: flat - payload_changes: - - source_path: $.body - target_path: $.body_v2 - strategy: copy -``` - -The first implementation should keep this model explicit and declarative. The migrator should not guess how to transform payloads. - -## Capacity Gate - -The first Phase 2 capacity gate should be intentionally conservative. - -Planner rules: - -1. Compute source document footprint from live stats or bounded sampling. -2. Compute source index footprint from live index stats. -3. Estimate target document footprint. - - For payload-compatible shadowing, this can be zero or near-zero additional document storage. - - For payload rewrite shadowing, this includes the duplicated target payload. -4. Estimate target index footprint. - - Use live source footprint as a baseline when the target is structurally similar. - - Adjust for vector dimension, datatype, precision, and algorithm changes when those are present. -5. Compute reserve headroom as `max(operator reserve, 15 percent of configured memory)` when no stricter operator value is provided. -6. Compute `estimated_peak_overlap_footprint` as: - - `source_docs + source_index + target_docs + target_index` -7. Return `READY` only if: - - the migration diff is supported for Phase 2 - - any required transform plan is present and valid - - available memory is greater than or equal to `estimated_peak_overlap_footprint + reserve` -8. Return `SCALE_REQUIRED` when the migration is supported but headroom is insufficient. -9. Return `MANUAL_REVIEW_REQUIRED` when the diff is ambiguous or live data is insufficient for a safe estimate. - -This keeps the first shadow planner understandable and safe. More sophisticated estimators can come later if Phase 1 and early Phase 2 learnings justify them. - -The planner should also report: - -- estimated migration window -- estimated peak overlap footprint -- expected source-versus-target footprint delta after cutover -- whether the migration is `shadow_reindex` or `shadow_rewrite` - -## Execution Flow - -1. Capture the source snapshot and normalize requested changes. -2. Classify the migration as either: - - `shadow_reindex` when the target schema can be built from the current payload - - `shadow_rewrite` when a transform or backfill is needed -3. Load `platform_inventory.yaml`. -4. Load `transform_plan.yaml` when `shadow_rewrite` is required. -5. Compute the capacity gate result. -6. Stop if the result is not `READY`. -7. Create the shadow target for the current index only. -8. If `shadow_rewrite` is selected: - - create the target keyspace - - transform or backfill source documents into the target keyspace -9. Wait until the shadow index is ready. -10. Validate the shadow target. -11. Emit an operator cutover runbook. -12. Wait for operator confirmation that cutover is complete. -13. Retire the old index. -14. Retire old source payloads only when the plan explicitly says they are no longer needed. -15. Move to the next index only after the current index is finished. - -The scheduler for Phase 2 is intentionally serial. - -## Operator Actions - -The operator is responsible for: - -- supplying platform inventory -- supplying the transform or backfill plan when payload shape changes -- choosing the migration window -- scaling the database if the plan returns `SCALE_REQUIRED` -- switching application traffic to the shadow target -- confirming cutover before old index retirement -- monitoring the deployment during overlap - -RedisVL should not attempt to perform these actions automatically in the first Phase 2 implementation. - -Phase 2 should still emit structured benchmark outputs so operators can compare: - -- estimated overlap duration versus actual overlap duration -- estimated capacity usage versus observed document and index stats -- memory savings or growth after algorithm, datatype, precision, dimension, or payload-shape changes -- query latency impact during shadow validation and overlap - -## Blocked Scenarios - -The initial Phase 2 plan still blocks: - -- automatic scaling -- automatic traffic switching -- concurrent shadowing of multiple large indexes -- in-place destructive rewrites without a shadow target -- payload-shape-changing migrations without an explicit transform or backfill plan -- transform plans that do not define a deterministic target keyspace -- Active-Active specific workflows -- platform API integrations as a hard requirement - -## Open Questions Deferred - -These questions should stay deferred until after Phase 1 implementation: - -- whether to add direct Redis Cloud or Redis Software API integrations -- whether to support checkpoint and resume across shadow runs -- whether alias-based cutover should be added later -- how transform hooks should be expressed beyond the initial declarative plan format -- whether re-embedding should be integrated directly or stay an operator-supplied preprocessing step -- how much historical benchmark data should influence ETA predictions diff --git a/nitin_docs/index_migrator/21_v2_iterative_shadow_tasks.md b/nitin_docs/index_migrator/21_v2_iterative_shadow_tasks.md deleted file mode 100644 index 4b00955e..00000000 --- a/nitin_docs/index_migrator/21_v2_iterative_shadow_tasks.md +++ /dev/null @@ -1,159 +0,0 @@ -# Phase 2 Tasks: `iterative_shadow` - -## Task Template - -Every Phase 2 task must document: - -- `ID` -- `Status` -- `Goal` -- `Inputs` -- `Outputs` -- `Touchpoints` -- `Dependencies` -- `Acceptance Criteria` -- `Non-Goals` -- `Handoff Notes` - -Phase 2 tasks are planned work only. They should not start until Phase 1 implementation is complete and learnings are folded back into this workspace. - -## V2-T01 - -- `ID`: `V2-T01` -- `Status`: `Planned` -- `Goal`: Add the platform inventory model and parser used by the capacity-aware planner. -- `Inputs`: `platform_inventory.yaml` -- `Outputs`: validated inventory model -- `Touchpoints`: new `redisvl/migration/inventory.py`, `redisvl/migration/models.py`, `redisvl/cli/migrate.py` -- `Dependencies`: Phase 1 implementation complete -- `Acceptance Criteria`: - - required inventory fields are validated - - unsupported platform inventory shapes are rejected clearly - - inventory values are available to the planner without CLI-specific parsing logic -- `Non-Goals`: - - platform API calls - - capacity math - - shadow execution -- `Handoff Notes`: keep the inventory model platform-neutral enough to support both Redis Cloud and Redis Software. - -## V2-T02 - -- `ID`: `V2-T02` -- `Status`: `Planned` -- `Goal`: Add the transform or backfill plan model and classify whether a migration is `shadow_reindex` or `shadow_rewrite`. -- `Inputs`: normalized diff classification, optional `transform_plan.yaml` -- `Outputs`: validated transform model and execution-mode classification -- `Touchpoints`: new `redisvl/migration/transforms.py`, `redisvl/migration/models.py`, `redisvl/migration/planner.py` -- `Dependencies`: `V2-T01` -- `Acceptance Criteria`: - - payload-compatible migrations are classified as `shadow_reindex` - - vector or payload-shape-changing migrations require `shadow_rewrite` - - missing transform plans are rejected clearly when they are required - - transform plans remain declarative and deterministic -- `Non-Goals`: - - direct embedding generation - - platform API calls - - shadow execution -- `Handoff Notes`: keep the first transform model simple and explicit rather than inventing a generic transformation framework. - -## V2-T03 - -- `ID`: `V2-T03` -- `Status`: `Planned` -- `Goal`: Implement the conservative capacity estimator and gate result classification. -- `Inputs`: source index stats, source document footprint, inventory model, normalized diff classification, optional transform model -- `Outputs`: `READY`, `SCALE_REQUIRED`, or `MANUAL_REVIEW_REQUIRED` -- `Touchpoints`: new `redisvl/migration/capacity.py`, `redisvl/migration/planner.py` -- `Dependencies`: `V2-T01`, `V2-T02` -- `Acceptance Criteria`: - - source document and index footprint are computed consistently - - target footprint estimates account for vector datatype, precision, dimension, algorithm, and payload-shape changes when those are present - - reserve policy is applied consistently - - supported diffs can produce `READY` or `SCALE_REQUIRED` - - ambiguous inputs produce `MANUAL_REVIEW_REQUIRED` -- `Non-Goals`: - - fine-grained shard placement modeling - - automated scale actions - - performance benchmarking as a separate subsystem -- `Handoff Notes`: keep the first estimator intentionally conservative and easy to inspect. - -## V2-T04 - -- `ID`: `V2-T04` -- `Status`: `Planned` -- `Goal`: Extend the planner to support `iterative_shadow` for one index at a time. -- `Inputs`: source snapshot, normalized diff, inventory, transform model, capacity result -- `Outputs`: shadow migration plan and operator action list -- `Touchpoints`: `redisvl/migration/planner.py`, `redisvl/cli/migrate.py` -- `Dependencies`: `V2-T03` -- `Acceptance Criteria`: - - supported vector and payload-shape changes can produce a valid shadow plan - - non-`READY` capacity results block apply - - plan artifact clearly identifies source, shadow target, target keyspace when present, and operator actions - - plan artifact identifies whether the run is `shadow_reindex` or `shadow_rewrite` - - plan format stays readable and deterministic -- `Non-Goals`: - - multi-index concurrency - - automatic cleanup - - fleet scheduling -- `Handoff Notes`: preserve the same plan-first experience as Phase 1. - -## V2-T05 - -- `ID`: `V2-T05` -- `Status`: `Planned` -- `Goal`: Implement shadow target creation, optional transform or backfill execution, readiness waiting, and validation hooks. -- `Inputs`: approved shadow migration plan -- `Outputs`: ready shadow index and validation state -- `Touchpoints`: new `redisvl/migration/shadow.py`, `redisvl/migration/executor.py`, `redisvl/migration/validation.py` -- `Dependencies`: `V2-T04` -- `Acceptance Criteria`: - - only one index is processed at a time - - shadow target creation follows the plan artifact - - `shadow_rewrite` runs can build the target payload into the planned keyspace - - readiness polling behaves deterministically - - validation runs before cutover handoff -- `Non-Goals`: - - automatic cutover - - cross-index scheduling - - platform autoscaling -- `Handoff Notes`: do not generalize this into a fleet scheduler in the first Phase 2 implementation. - -## V2-T06 - -- `ID`: `V2-T06` -- `Status`: `Planned` -- `Goal`: Add validation reporting, benchmark reporting, operator handoff, cutover confirmation, and old-index retirement. -- `Inputs`: validated shadow plan and operator confirmation -- `Outputs`: post-cutover cleanup result and report -- `Touchpoints`: `redisvl/cli/migrate.py`, `redisvl/migration/reporting.py`, `redisvl/migration/executor.py` -- `Dependencies`: `V2-T05` -- `Acceptance Criteria`: - - cutover remains operator-owned - - cleanup does not run before operator confirmation - - report captures cutover handoff, cleanup outcome, and source-versus-target footprint deltas -- `Non-Goals`: - - alias management - - application config mutation - - rollback orchestration -- `Handoff Notes`: the CLI should guide the operator clearly, but it must not attempt to switch traffic itself. - -## V2-T07 - -- `ID`: `V2-T07` -- `Status`: `Planned` -- `Goal`: Add future-facing tests and docs for clustered shadow migration planning. -- `Inputs`: completed Phase 2 planner and executor behavior -- `Outputs`: test coverage, manual scale rehearsal instructions, and updated planning docs -- `Touchpoints`: `tests/`, `nitin_docs/index_migrator`, `redisvl/cli` -- `Dependencies`: `V2-T04`, `V2-T05`, `V2-T06` -- `Acceptance Criteria`: - - planner outcomes are covered in automated tests - - benchmark, ETA, and memory-delta guidance are documented for manual cluster rehearsals - - manual cluster rehearsal steps are documented - - docs still reflect the shipped Phase 2 behavior accurately -- `Non-Goals`: - - fleet-wide migration support - - performance tuning beyond safety validation - - platform-specific automation -- `Handoff Notes`: keep Phase 2 documentation grounded in the one-index-at-a-time rule. diff --git a/nitin_docs/index_migrator/22_v2_iterative_shadow_tests.md b/nitin_docs/index_migrator/22_v2_iterative_shadow_tests.md deleted file mode 100644 index e7d2d5c2..00000000 --- a/nitin_docs/index_migrator/22_v2_iterative_shadow_tests.md +++ /dev/null @@ -1,102 +0,0 @@ -# Phase 2 Tests: `iterative_shadow` - -## Clustered Test Setup - -Phase 2 needs both automated planner coverage and manual clustered rehearsals. - -Minimum clustered rehearsal setup: - -- Redis Cloud or Redis Software deployment -- sharded database -- one source index large enough to make overlap meaningful -- representative application query set -- operator-supplied `platform_inventory.yaml` -- `transform_plan.yaml` for any vector or payload-shape-changing migration - -The first manual scale rehearsal should stay focused on a single index, not a fleet-wide migration. - -## Planner Acceptance - -Automated planner tests should cover: - -- supported shadow diff with sufficient headroom returns `READY` -- supported shadow diff with insufficient headroom returns `SCALE_REQUIRED` -- ambiguous or incomplete input returns `MANUAL_REVIEW_REQUIRED` -- vector datatype, precision, dimension, or algorithm changes require `shadow_rewrite` -- payload-shape-changing diffs stop before planning unless a valid transform plan is present - -Planner acceptance is successful when the result is deterministic and the operator action list is clear. - -## Unsafe Capacity Cases - -Manual and automated coverage should include: - -- insufficient available memory -- missing or invalid inventory fields -- conflicting operator reserve policy -- large source footprint with conservative reserve -- target footprint larger than source footprint because of dimension or payload expansion -- peak overlap estimate exceeds available headroom even when post-cutover memory would shrink - -Unsafe capacity handling is correct when: - -- the planner blocks the run -- no shadow index is created -- the report tells the operator what must change before retry - -## Shadow Validation - -Validation coverage should prove: - -- shadow target reaches readiness before handoff -- schema matches the planned target -- transformed payload fields match the declared target shape when `shadow_rewrite` is used -- query checks pass before cutover -- old index is not retired before operator confirmation - -This is the safety boundary for Phase 2. - -## Benchmark Rehearsal - -Phase 2 benchmarks should answer: - -- how accurate the planner ETA was -- how long the old and shadow indexes overlapped -- how much query latency changed during overlap -- whether the capacity reserve was conservative enough -- how much memory or size changed after datatype, precision, dimension, algorithm, or payload-shape changes -- whether estimated peak overlap footprint matched observed overlap closely enough - -Minimum manual benchmark coverage: - -- one run where the planner returns `READY` and the migration completes -- one run where the planner returns `SCALE_REQUIRED` -- one run with representative read traffic during overlap -- one vector-shape or algorithm change such as `HNSW -> FLAT` or `FP32 -> FP16` -- one payload-shape-changing migration that requires transform or backfill - -Every benchmark rehearsal should produce a structured benchmark report that can be compared against previous runs. - -## Resume/Retry - -The first Phase 2 implementation does not need fleet-grade checkpointing, but it does need basic retry behavior. - -Required checks: - -- planner can be rerun with the same inventory and produce the same decision -- failed shadow creation does not trigger cleanup of the old index -- operator can rerun the planned index only after fixing the blocking condition - -If stronger checkpointing is needed later, it should become its own scoped follow-up rather than being absorbed into the first shadow implementation. - -## Exit Criteria - -Phase 2 should not move from planned to ready until: - -- Phase 1 has been implemented and reviewed -- Phase 1 learnings have been written back into this workspace -- planner outcomes are covered by automated tests -- at least one manual clustered rehearsal has been designed in detail -- at least one benchmark rehearsal has been defined for a representative shadow migration -- at least one benchmark rehearsal has been defined for a vector or payload-shape-changing shadow migration -- the one-index-at-a-time execution rule is still preserved in the design diff --git a/nitin_docs/index_migrator/90_prd.md b/nitin_docs/index_migrator/90_prd.md deleted file mode 100644 index 6bfac766..00000000 --- a/nitin_docs/index_migrator/90_prd.md +++ /dev/null @@ -1,581 +0,0 @@ -# Product Requirements Document: RedisVL Index Migrator - -## Summary - -RedisVL needs a migration workflow for search index changes that is safer and more operationally predictable than ad hoc scripts, but still simple enough to build, review, and operate without introducing an orchestration-heavy subsystem. - -This PRD defines a phased migration product: - -- Phase 1 MVP: `drop_recreate` -- Phase 2: `iterative_shadow` - -The overall product goal is broader than the MVP. The migrator should eventually handle not only simple schema changes, but also vector datatype, precision, dimension, algorithm, and payload-shape-changing migrations such as: - -- `HNSW -> FLAT` -- `FP32 -> FP16` -- vector dimension changes -- embedding or payload-shape changes that require new stored fields or a new target keyspace - -Phase 1 stays intentionally narrow so the team can ship a plan-first, document-preserving migration tool quickly. Phase 2 is where those harder migrations are handled safely through one-index-at-a-time shadow migration and optional transform or backfill. - -This document is the review-oriented summary of the detailed planning docs in this workspace. - -## Problem - -RedisVL today gives users index lifecycle primitives, not a migration product. - -Users can: - -- create indexes -- delete indexes -- inspect index information -- load documents - -Users cannot yet: - -- preview a schema migration in a structured way -- preserve current index configuration before change in a standard workflow -- apply only a requested subset of schema changes -- generate a repeatable migration plan artifact -- validate a migration with a consistent report -- estimate migration duration, query impact, or memory impact from benchmarkable outputs -- safely handle vector and payload-shape changes that require a new target shape - -This gap is manageable for small experiments, but becomes painful for production workloads where: - -- indexes can contain millions of documents -- query latency matters during rebuild windows -- teams need predictable maintenance timelines -- migrations may change vector algorithms, datatypes, or payload shape -- Redis deployments may be clustered on Redis Cloud or Redis Software -- operators need a clean handoff between planning, execution, and validation - -## Users and Review Audience - -Primary users: - -- application engineers managing RedisVL-backed indexes -- platform engineers responsible for Redis operations -- support or solution engineers helping customers plan schema migrations - -Review audience: - -- RedisVL maintainers -- product and engineering stakeholders -- operators who will validate whether the workflow is practical in real environments - -## Goals - -- Provide a first-class migration workflow for RedisVL-managed indexes. -- Preserve existing documents during the Phase 1 path. -- Capture current schema and index configuration before any mutation. -- Apply only requested schema changes. -- Require a reviewed migration plan before execution. -- Support both scripted and guided user experiences. -- Make downtime and disruption explicit. -- Add structured reports and benchmarking outputs so migration windows become more predictable over time. -- Add benchmarking for memory and size deltas caused by schema, vector, and payload-shape changes. -- Keep the implementation simple enough that another engineer can understand and modify it quickly. - -## In Scope - -### Product-Wide Scope - -- schema-change migrations for RedisVL-managed indexes -- vector datatype, precision, dimension, and algorithm migrations -- payload-shape-changing migrations when the operator provides an explicit transform or backfill plan -- YAML artifacts for plans and reports -- operator-readable console output -- one-index-at-a-time execution -- benchmarking outputs for timing, query impact, and memory or size deltas - -### Phase 1 MVP - -- one index at a time -- document-preserving `drop_recreate` migrations -- source schema and stats snapshot -- patch-based schema change requests -- target-schema diff normalization into the same patch model -- guided wizard and scripted CLI flows -- plan generation before any mutation -- explicit downtime acknowledgment for `apply` -- readiness waiting after recreate -- validation and reporting -- benchmark-friendly timing, correctness, and index-footprint outputs - -### Phase 2 - -- one-index-at-a-time `iterative_shadow` migrations -- conservative capacity gating before each index -- optional platform inventory input -- shadow target creation and validation -- `shadow_reindex` for payload-compatible migrations -- `shadow_rewrite` for vector or payload-shape-changing migrations -- explicit transform or backfill plan input when payload shape changes -- operator handoff for cutover -- benchmark outputs for ETA, peak overlap, and source-versus-target size deltas - -## Out of Scope - -- automatic traffic cutover -- automatic platform scaling -- concurrent migration of multiple large indexes -- fully managed rollback orchestration -- full key manifest capture by default -- platform API integration as a hard requirement -- automatic transform inference -- automatic embedding generation or automatic re-embedding inside the migrator -- in-place destructive payload rewrites without a shadow target - -## Product Principles - -- Prefer simple and safe over fully automated. -- Reuse existing RedisVL primitives before adding new layers. -- Make the plan artifact the center of the workflow. -- Treat operator actions as first-class, not hidden implementation details. -- Fail closed when a migration request is ambiguous or unsupported for the selected phase. -- Measure migration behavior so future planning gets better with evidence. - -## Current State - -RedisVL already has building blocks that support a migration product: - -- `SearchIndex.from_existing()` for live schema reconstruction -- `SearchIndex.delete(drop=False)` for dropping the index structure while preserving documents -- `SearchIndex.info()` for live index stats that can inform planning, validation, and timing - -What is missing is the product layer on top: - -- a migration planner -- schema patch normalization and diff classification -- migration-specific CLI commands -- guided user flow -- structured migration and benchmark artifacts -- a capacity-aware future mode for larger production environments -- transform or backfill planning for migrations that change payload shape - -## Proposed Product - -### Phase 1 MVP: `drop_recreate` - -Scope: - -- one index at a time -- preserve documents -- snapshot source schema and stats -- accept `schema_patch.yaml`, `target_schema.yaml`, or wizard answers -- normalize all inputs into the same plan model -- classify requested changes as supported or blocked -- generate `migration_plan.yaml` -- require explicit downtime acknowledgment for `apply` -- drop only the index structure -- recreate the index using the merged schema -- wait for readiness -- validate and emit `migration_report.yaml` -- optionally emit `benchmark_report.yaml` - -Supported changes: - -- add non-vector fields backed by existing document data -- remove fields -- adjust supported non-vector index options where stored payload shape does not change -- adjust index-level options that do not relocate or rewrite data - -Blocked in Phase 1: - -- key prefix changes -- key separator changes -- storage type changes -- JSON path remodels -- field renames -- vector dimension, datatype, precision, or payload-shape-changing algorithm changes -- any change that requires document rewrite or relocation - -Blocked changes in Phase 1 should be reported as Phase 2 candidates, not as unsupported forever. - -### Phase 2: `iterative_shadow` - -Scope: - -- one index at a time -- conservative capacity gate before each index -- optional `platform_inventory.yaml` -- optional `transform_plan.yaml` when payload shape changes -- shadow target creation -- readiness waiting and validation -- operator-owned cutover -- old index retirement after operator confirmation -- optional old-payload retirement after operator confirmation -- structured benchmark outputs for overlap timing, ETA accuracy, and memory or size deltas - -Execution submodes: - -- `shadow_reindex` - - use when the new index can be built from the current stored payload - - still useful for lower-disruption rebuilds when the payload shape does not change -- `shadow_rewrite` - - use when vector datatype, precision, dimension, algorithm, or payload shape changes require a new target payload or keyspace - - examples: `HNSW -> FLAT`, `FP32 -> FP16`, dimension changes, new embedding schema - -Still intentionally excluded: - -- automatic cutover -- automatic scaling -- concurrent shadowing of multiple large indexes -- transform inference - -## Architecture - -The product should work as a plan-first migration workflow with explicit operator handoff and an optional transform path for harder migrations. - -```mermaid -flowchart LR - OP["Engineer / Operator"] --> CLI["RedisVL migrate CLI"] - APP["Applications"] --> DB["Redis database endpoint"] - - CLI --> PLAN["Planner
snapshot + diff + mode selection"] - CLI --> EXEC["Executor"] - CLI --> VAL["Validator + reports"] - - PLAN --> META["Live index schema + stats"] - PLAN --> INV["Optional platform inventory"] - PLAN --> XFORM["Optional transform or backfill plan"] - - EXEC --> DB - EXEC --> TARGET["Optional target keyspace + shadow index"] - TARGET --> DB - VAL --> DB - - OP --> CUT["Maintenance window / cutover decision"] - CUT --> APP -``` - -Architecture expectations: - -- RedisVL owns planning, execution, validation, and artifact generation. -- Redis remains the system of record for source documents and index state. -- The operator owns maintenance windows, scaling, transform inputs, and application cutover decisions. -- The product must stay compatible with single-node and clustered deployments without assuming the whole index lives on one shard. - -## Why the Work Is Phased - -The product is phased because the migration strategies solve different problems: - -- `drop_recreate` is the fastest path to a usable, understandable MVP -- `iterative_shadow` is the future path for tighter operational control and safer handling of vector or payload-shape changes - -Trying to ship everything as one fully mature product would push the team into: - -- premature capacity-estimation complexity -- premature transform-runtime design -- premature cutover abstractions -- premature platform-specific automation -- a larger QA and support surface before the MVP proves value - -Phase 1 is therefore the implementation target, while Phase 2 remains planned work informed by Phase 1 learnings. - -## User Experience - -### Scripted Flow - -```text -rvl migrate plan --index --schema-patch -rvl migrate plan --index --target-schema -rvl migrate apply --plan --allow-downtime -rvl migrate validate --plan -``` - -### Guided Flow - -```text -rvl migrate wizard --index --plan-out -``` - -User experience requirements: - -- `plan` must never mutate Redis -- `wizard` must emit the same plan artifact shape as `plan` -- `apply` must only accept a reviewed plan file -- `apply` must require explicit downtime acknowledgment in Phase 1 -- `validate` must be usable independently after `apply` -- console output must be concise and operator-readable -- blocked Phase 1 requests must tell the user when Phase 2 is the correct path - -## Usage - -### Phase 1: `drop_recreate` - -Review-first workflow: - -```text -rvl migrate plan --index products --schema-patch patch.yaml --plan-out migration_plan.yaml -rvl migrate apply --plan migration_plan.yaml --allow-downtime --report-out migration_report.yaml -rvl migrate validate --plan migration_plan.yaml --report-out migration_report.yaml -``` - -Guided workflow: - -```text -rvl migrate wizard --index products --plan-out migration_plan.yaml -rvl migrate apply --plan migration_plan.yaml --allow-downtime -``` - -Expected usage pattern: - -1. Generate a plan from a live source index. -2. Review blocked diffs, warnings, downtime notice, and merged target schema. -3. Run `apply` only after the operator accepts the maintenance window. -4. Run `validate` and retain the report as the handoff artifact. - -### Phase 2: `iterative_shadow` - -Payload-compatible shadow workflow: - -```text -rvl migrate plan --mode iterative_shadow --index products --schema-patch patch.yaml --platform-inventory platform_inventory.yaml --plan-out migration_plan.yaml -rvl migrate apply --plan migration_plan.yaml --report-out migration_report.yaml -``` - -Payload-rewrite shadow workflow: - -```text -rvl migrate plan --mode iterative_shadow --index products --target-schema target_schema.yaml --platform-inventory platform_inventory.yaml --transform-plan transform_plan.yaml --plan-out migration_plan.yaml -rvl migrate apply --plan migration_plan.yaml --report-out migration_report.yaml -``` - -Expected usage pattern: - -1. Provide the schema request and platform inventory. -2. Provide `transform_plan.yaml` when the target payload shape changes. -3. Review the capacity-gate outcome, estimated migration window, and estimated peak overlap footprint. -4. Run the shadow migration for one index only. -5. Hand cutover to the operator. -6. Confirm cutover before retiring the old index and any obsolete payloads. - -## Artifacts - -Required artifacts: - -- `migration_plan.yaml` -- `migration_report.yaml` - -Optional or phase-dependent artifacts: - -- `benchmark_report.yaml` -- `platform_inventory.yaml` -- `transform_plan.yaml` -- `benchmark_manifest.yaml` - -Artifact requirements: - -- YAML-based -- stable enough for handoff and review -- readable by humans first -- structured enough for future automation - -## Operational Model - -RedisVL owns: - -- source snapshot -- schema diffing -- plan generation -- supported strategy execution -- readiness waiting -- validation -- reporting - -Operators own: - -- maintenance windows -- application behavior during migration -- platform scaling -- transform inputs for payload-shape changes -- cutover -- final go or no-go decisions in production - -The product should not imply that RedisVL is a full migration control plane. It is a migration toolset with explicit operator handoff. - -## Capacity and Scale - -Phase 1 keeps capacity handling simple: - -- use source index stats for warnings -- capture timing and impact for later planning -- avoid a complex estimator in the MVP - -Phase 2 introduces a conservative planner: - -- reason at the database level, not as “an index lives on one shard” -- treat each index as one logical distributed index even on sharded deployments -- estimate source document footprint and source index footprint separately -- estimate target document footprint and target index footprint separately -- compute peak overlap as the source footprint plus the target footprint that exists during migration -- require reserve headroom before apply -- return `READY`, `SCALE_REQUIRED`, or `MANUAL_REVIEW_REQUIRED` - -The execution rule stays simple across both phases: - -- one index at a time - -This is the core design choice that keeps the system understandable at production scale. - -## Downtime and Disruption - -Phase 1 explicitly accepts downtime. - -Expected impacts: - -- search on the affected index is unavailable between drop and recreated index readiness -- query quality may be degraded while initial indexing completes -- shared Redis resources are consumed during rebuild -- large indexes need maintenance windows or application-level degraded mode handling - -Phase 2 aims to reduce disruption, but it still has operational costs: - -- old and new index structures overlap during migration -- payload-rewrite migrations may also duplicate payloads temporarily -- memory and size can either grow or shrink depending on datatype, precision, dimension, algorithm, and payload-shape changes - -These are product facts and must be visible in the plan and report artifacts. - -## Benchmarking and Success Metrics - -Benchmarking is a product requirement, not an afterthought. - -The product should help answer: - -- how long planning takes -- how long apply takes -- how long downtime or overlap lasts -- how much document throughput the migration achieves -- how query latency changes during the migration window -- how much memory and size change between source and target -- how accurate the peak-overlap estimate was - -Core success metrics: - -- migration plan generation succeeds for supported diffs -- unsupported diffs are blocked before mutation -- Phase 1 preserves documents -- Phase 2 produces deterministic shadow plans for supported vector and payload-shape migrations -- schema match and document count match succeed after migration -- reports include stable timing, correctness, and memory-delta metrics -- benchmark rehearsals are good enough to estimate future maintenance windows and scaling decisions with confidence - -## Functional Requirements - -- plan generation from live index plus requested schema changes -- schema patch normalization -- supported-versus-blocked diff classification -- guided wizard for supported Phase 1 changes -- explicit downtime acknowledgment in Phase 1 -- structured plan, report, and benchmark outputs -- validation of schema, counts, and indexing-failure deltas -- one-index-at-a-time execution -- Phase 2 capacity-gated shadow planning -- Phase 2 support for vector and payload-shape migrations through explicit shadow planning - -## Non-Functional Requirements - -- deterministic plan outputs -- human-readable YAML artifacts -- clear failure modes -- conservative defaults -- no document deletion by the Phase 1 migrator path -- reasonable operation on large indexes without default full-key manifests -- documentation detailed enough for implementation handoff - -## Risks - -- Users may assume unsupported Phase 1 schema changes should “just work” unless the diff classifier clearly routes them to Phase 2. -- Operators may underestimate downtime for large indexes unless benchmark outputs become part of the review flow. -- Phase 2 can grow too complex if transform logic or platform-specific automation is pulled in too early. -- Capacity estimation may be wrong unless benchmark data and observed footprint deltas are captured consistently. -- Validation may be treated as optional unless the CLI and reports make it central to the workflow. - -## Rollout Plan - -### Phase 1 - -- finalize docs and task list -- implement the planner, diff classifier, CLI flow, executor, and validator -- add CI coverage for supported and blocked migration paths -- run at least one benchmark rehearsal on a representative non-production dataset - -### Phase 1.5 - -- review real implementation learnings -- update the planning workspace with observed gaps and benchmark findings -- confirm whether Phase 2 assumptions still hold - -### Phase 2 - -- implement inventory parsing -- implement transform or backfill plan modeling -- implement conservative capacity gating -- implement one-index-at-a-time shadow planning and execution -- add benchmark rehearsals for overlap duration, ETA accuracy, and memory-delta accuracy - -## Review Questions for the Team - -- Is the Phase 1 boundary narrow enough to ship quickly, but useful enough to solve real user pain? -- Is Phase 2 scoped clearly enough to own vector datatype, precision, dimension, algorithm, and payload-shape changes? -- Is operator-owned cutover still the right long-term boundary? -- Is the benchmarking scope sufficient to make migration windows and scaling decisions predictable without overbuilding a measurement subsystem? -- Does the one-index-at-a-time rule provide the right balance of simplicity and scale? - -## Decision Summary - -- Build the migration product in phases. -- Implement Phase 1 first and keep it intentionally narrow. -- Treat vector and payload-shape migrations as a core product goal, delivered in Phase 2 rather than ignored. -- Keep the plan artifact central to the workflow. -- Keep the operational model explicit. -- Use evidence from benchmark outputs to shape later migration planning. - -## References - -Detailed supporting docs in this workspace: - -- [00_index.md](./00_index.md) -- [01_context.md](./01_context.md) -- [02_architecture.md](./02_architecture.md) -- [03_benchmarking.md](./03_benchmarking.md) -- [10_v1_drop_recreate_spec.md](./10_v1_drop_recreate_spec.md) -- [11_v1_drop_recreate_tasks.md](./11_v1_drop_recreate_tasks.md) -- [12_v1_drop_recreate_tests.md](./12_v1_drop_recreate_tests.md) -- [20_v2_iterative_shadow_spec.md](./20_v2_iterative_shadow_spec.md) -- [21_v2_iterative_shadow_tasks.md](./21_v2_iterative_shadow_tasks.md) -- [22_v2_iterative_shadow_tests.md](./22_v2_iterative_shadow_tests.md) - -## User Journeys - -### Journey 1: Application Engineer Running a Simple Schema Migration - -An application engineer needs to add a new filterable metadata field to an existing index without deleting documents. They run `plan`, review the merged target schema and downtime warning, schedule a maintenance window, run `apply`, then run `validate` and hand the migration report to the team. They do not need to understand Redis internals beyond the migration inputs and the reported downtime. - -### Journey 2: Platform Engineer Reviewing a Vector Precision Migration - -A platform engineer needs to review a planned `FP32 -> FP16` migration for a large production index. They supply platform inventory, review the planner’s peak-overlap estimate, compare the projected post-cutover memory savings to previous benchmark reports, and decide whether the current deployment can run the migration safely in the next window. - -### Journey 3: Engineer Migrating from `HNSW` to `FLAT` - -An engineer wants to switch vector search behavior from `HNSW` to `FLAT` to simplify runtime performance characteristics. The planner classifies the request as a Phase 2 shadow migration, estimates the target index footprint, and produces a one-index-at-a-time plan. The operator runs the migration, validates the shadow target, and cuts traffic over once the benchmark and validation reports look acceptable. - -### Journey 4: Solutions Engineer Validating a Payload-Shape Change - -A solutions engineer wants to understand how long a customer migration will take when a new embedding model changes the stored payload shape. They create a `transform_plan.yaml`, run a rehearsal in non-production, collect benchmark timing, throughput, query-latency, and source-versus-target memory outputs, and use those artifacts to advise on maintenance windows and scaling needs. - -## User Stories - -- As an application engineer, I want to generate a migration plan before any mutation so that I can review the exact schema changes and downtime implications. -- As an application engineer, I want the Phase 1 migrator to preserve documents so that I do not have to rebuild my dataset from another source. -- As an application engineer, I want blocked Phase 1 schema changes to fail early and point me to the correct Phase 2 path so that I do not start a migration the product cannot safely complete. -- As an operator, I want migration and validation reports in YAML so that I can review, archive, and share them with other teams. -- As an operator, I want the CLI to require explicit downtime acknowledgment in Phase 1 so that maintenance-window risk is never implicit. -- As a platform engineer, I want Phase 2 to process one index at a time so that capacity planning stays understandable and bounded. -- As a platform engineer, I want the planner to estimate peak overlap and post-cutover memory deltas so that I can decide whether a migration fits safely. -- As a platform engineer, I want the shadow planner to return `READY`, `SCALE_REQUIRED`, or `MANUAL_REVIEW_REQUIRED` so that I can make a clear operational decision before execution. -- As a solutions engineer, I want benchmark outputs for duration, throughput, query impact, and memory change so that I can estimate future migrations with real evidence. -- As a maintainer, I want the migration product to reuse existing RedisVL primitives so that implementation and long-term maintenance stay simple. From b559215e00792d296765c515dd2075b27bdd90d4 Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Fri, 20 Mar 2026 10:10:22 -0400 Subject: [PATCH 07/10] feat(migrate): add async migration support Add async/await execution for index migrations, enabling non-blocking operation for large quantization jobs and async application integration. New functionality: - CLI: --async flag for rvl migrate apply - Python API: AsyncMigrationPlanner, AsyncMigrationExecutor, AsyncMigrationValidator - Batched quantization with pipelined HSET operations - Non-blocking readiness polling with asyncio.sleep() What becomes async: - SCAN operations (yields between batches of 500 keys) - Pipelined HSET writes (100-1000 ops per batch) - Index readiness polling (asyncio.sleep vs time.sleep) What stays sync: - CLI prompts (user interaction) - YAML file I/O (local filesystem) Documentation: - Sync vs async execution guidance in concepts/index-migrations.md - Async usage examples in how_to_guides/migrate-indexes.md Tests: - 4 unit tests for AsyncMigrationPlanner - 4 unit tests for AsyncMigrationExecutor - 1 integration test for full async flow --- docs/concepts/index-migrations.md | 103 +++++ .../how_to_guides/migrate-indexes.md | 78 ++++ redisvl/cli/migrate.py | 79 +++- redisvl/migration/__init__.py | 17 + redisvl/migration/async_executor.py | 388 ++++++++++++++++++ redisvl/migration/async_planner.py | 179 ++++++++ redisvl/migration/async_utils.py | 96 +++++ redisvl/migration/async_validation.py | 129 ++++++ tests/integration/test_async_migration_v1.py | 150 +++++++ tests/unit/test_async_migration_executor.py | 125 ++++++ tests/unit/test_async_migration_planner.py | 319 ++++++++++++++ 11 files changed, 1651 insertions(+), 12 deletions(-) create mode 100644 redisvl/migration/async_executor.py create mode 100644 redisvl/migration/async_planner.py create mode 100644 redisvl/migration/async_utils.py create mode 100644 redisvl/migration/async_validation.py create mode 100644 tests/integration/test_async_migration_v1.py create mode 100644 tests/unit/test_async_migration_executor.py create mode 100644 tests/unit/test_async_migration_planner.py diff --git a/docs/concepts/index-migrations.md b/docs/concepts/index-migrations.md index a162f0ff..dd9bc834 100644 --- a/docs/concepts/index-migrations.md +++ b/docs/concepts/index-migrations.md @@ -139,6 +139,109 @@ With `drop_recreate`, your index is unavailable between the drop and when re-ind The duration depends on document count, field count, and vector dimensions. For large indexes, consider running migrations during low traffic periods. +## Sync vs async execution + +The migrator provides both synchronous and asynchronous execution modes. + +### What becomes async and what stays sync + +The migration workflow has distinct phases. Here is what each mode affects: + +| Phase | Sync mode | Async mode | Notes | +|-------|-----------|------------|-------| +| **Plan generation** | `MigrationPlanner.create_plan()` | `AsyncMigrationPlanner.create_plan()` | Reads index metadata from Redis | +| **Schema snapshot** | Sync Redis calls | Async Redis calls | Single `FT.INFO` command | +| **Drop index** | `index.delete()` | `await index.delete()` | Single `FT.DROPINDEX` command | +| **Quantization** | Sequential SCAN + HSET | Pipelined SCAN + batched HSET | See below | +| **Create index** | `index.create()` | `await index.create()` | Single `FT.CREATE` command | +| **Readiness polling** | `time.sleep()` loop | `asyncio.sleep()` loop | Polls `FT.INFO` until indexed | +| **Validation** | Sync Redis calls | Async Redis calls | Schema and doc count checks | +| **CLI interaction** | Always sync | Always sync | User prompts, file I/O | +| **YAML read/write** | Always sync | Always sync | Local filesystem only | + +### When to use sync (default) + +Sync execution is simpler and sufficient for most migrations: + +- Small to medium indexes (under 100K documents) +- Index-only changes (algorithm, distance metric, field options) +- Interactive CLI usage where blocking is acceptable + +For migrations without quantization, the Redis operations are fast single commands. Sync mode adds no meaningful overhead. + +### When to use async + +Async execution (`--async` flag) provides benefits in specific scenarios: + +**Large quantization jobs (1M+ vectors)** + +Converting float32 to float16 requires reading every vector, converting it, and writing it back. The async executor: + +- Uses `SCAN` with `COUNT 500` to iterate keys without blocking Redis (per [Redis SCAN docs](https://redis.io/docs/latest/commands/scan/), SCAN is O(1) per call) +- Pipelines `HSET` operations in batches (100-1000 operations per pipeline is optimal for Redis) +- Yields to the event loop between batches so other tasks can proceed + +**Large keyspaces (40M+ keys)** + +When your Redis instance has many keys, `SCAN` iteration can take minutes. Async mode yields between batches. + +**Async application integration** + +If your application uses asyncio, you can integrate migration directly: + +```python +import asyncio +from redisvl.migration import AsyncMigrationPlanner, AsyncMigrationExecutor + +async def migrate(): + planner = AsyncMigrationPlanner() + plan = await planner.create_plan("myindex", redis_url="redis://localhost:6379") + + executor = AsyncMigrationExecutor() + report = await executor.apply(plan, redis_url="redis://localhost:6379") + +asyncio.run(migrate()) +``` + +### Why async helps with quantization + +The key difference is in the vector re-encoding loop: + +**Sync quantization:** +``` +for each batch of 500 keys: + SCAN (blocks) -> get keys + for each key: + HGET field (blocks) + convert array + pipeline.HSET(field, new_bytes) + pipeline.execute() (blocks) +``` + +**Async quantization:** +``` +for each batch of 500 keys: + await SCAN -> get keys (yields) + for each key: + await HGET field (yields) + convert array + pipeline.HSET(field, new_bytes) + await pipeline.execute() (yields) +``` + +Each `await` is a yield point where other coroutines can run. For millions of vectors, this prevents your application from freezing. + +### What async does NOT improve + +Async execution does not reduce: + +- **Total migration time**: Same work, different scheduling +- **Redis server load**: Same commands execute on the server +- **Downtime window**: Index remains unavailable during rebuild +- **Network round trips**: Same number of Redis calls + +The benefit is application responsiveness, not faster migration. + ## Learn more - [Migration guide](../user_guide/how_to_guides/migrate-indexes.md): Step by step instructions diff --git a/docs/user_guide/how_to_guides/migrate-indexes.md b/docs/user_guide/how_to_guides/migrate-indexes.md index 2942f09f..00326e51 100644 --- a/docs/user_guide/how_to_guides/migrate-indexes.md +++ b/docs/user_guide/how_to_guides/migrate-indexes.md @@ -302,6 +302,41 @@ What `apply` does: 6. validates the result 7. writes report artifacts +### Async execution for large migrations + +For large migrations (especially those involving vector quantization), use the `--async` flag: + +```bash +rvl migrate apply \ + --plan migration_plan.yaml \ + --allow-downtime \ + --async \ + --url redis://localhost:6379 +``` + +**What becomes async:** + +- Keyspace SCAN during quantization (yields between batches of 500 keys) +- Vector read/write operations (pipelined HGET/HSET) +- Index readiness polling (uses `asyncio.sleep()` instead of blocking) +- Validation checks + +**What stays sync:** + +- CLI prompts and user interaction +- YAML file reading/writing +- Progress display + +**When to use async:** + +- Quantizing millions of vectors (float32 to float16) +- Redis instance has 40M+ keys +- Integrating into an async application + +For most migrations (index-only changes, small datasets), sync mode is sufficient and simpler. + +See {doc}`/concepts/index-migrations` for detailed async vs sync guidance. + ## Step 5: Validate the Result Validation happens automatically during `apply`, but you can run it separately: @@ -358,6 +393,7 @@ rvl migrate validate \ - `--index` : Index name to migrate - `--plan` / `--plan-out` : Path to migration plan - `--allow-downtime` : Acknowledge index unavailability (required for apply) +- `--async` : Use async executor for large migrations (apply only) - `--report-out` : Path for validation report - `--benchmark-out` : Path for performance metrics @@ -389,6 +425,48 @@ If `apply` fails mid-migration: The underlying documents are never deleted by `drop_recreate`. +## Python API + +For programmatic migrations, use the migration classes directly: + +### Sync API + +```python +from redisvl.migration import MigrationPlanner, MigrationExecutor + +planner = MigrationPlanner() +plan = planner.create_plan( + "myindex", + redis_url="redis://localhost:6379", + schema_patch_path="schema_patch.yaml", +) + +executor = MigrationExecutor() +report = executor.apply(plan, redis_url="redis://localhost:6379") +print(f"Migration result: {report.result}") +``` + +### Async API + +```python +import asyncio +from redisvl.migration import AsyncMigrationPlanner, AsyncMigrationExecutor + +async def migrate(): + planner = AsyncMigrationPlanner() + plan = await planner.create_plan( + "myindex", + redis_url="redis://localhost:6379", + schema_patch_path="schema_patch.yaml", + ) + + executor = AsyncMigrationExecutor() + report = await executor.apply(plan, redis_url="redis://localhost:6379") + print(f"Migration result: {report.result}") + +asyncio.run(migrate()) +``` + ## Learn more - {doc}`/concepts/index-migrations`: How migrations work and which changes are supported diff --git a/redisvl/cli/migrate.py b/redisvl/cli/migrate.py index fe527ed0..5aff58aa 100644 --- a/redisvl/cli/migrate.py +++ b/redisvl/cli/migrate.py @@ -1,10 +1,17 @@ import argparse +import asyncio import sys from argparse import Namespace from typing import Optional from redisvl.cli.utils import add_redis_connection_options, create_redis_url -from redisvl.migration import MigrationExecutor, MigrationPlanner, MigrationValidator +from redisvl.migration import ( + AsyncMigrationExecutor, + AsyncMigrationValidator, + MigrationExecutor, + MigrationPlanner, + MigrationValidator, +) from redisvl.migration.utils import ( list_indexes, load_migration_plan, @@ -26,7 +33,7 @@ class Migrate: "\tlist List all available indexes", "\tplan Generate a migration plan for a document-preserving drop/recreate migration", "\twizard Interactively build a migration plan and schema patch", - "\tapply Execute a reviewed drop/recreate migration plan", + "\tapply Execute a reviewed drop/recreate migration plan (use --async for large migrations)", "\tvalidate Validate a completed migration plan against the live index", "\n", ] @@ -194,7 +201,7 @@ def apply(self): parser = argparse.ArgumentParser( usage=( "rvl migrate apply --plan --allow-downtime " - "[--report-out ]" + "[--async] [--report-out ]" ) ) parser.add_argument("--plan", help="Path to migration_plan.yaml", required=True) @@ -203,6 +210,12 @@ def apply(self): help="Explicitly acknowledge downtime for drop_recreate", action="store_true", ) + parser.add_argument( + "--async", + dest="use_async", + help="Use async executor (recommended for large migrations with quantization)", + action="store_true", + ) parser.add_argument( "--report-out", help="Path to write migration_report.yaml", @@ -228,6 +241,21 @@ def apply(self): redis_url = create_redis_url(args) plan = load_migration_plan(args.plan) + + if args.use_async: + report = asyncio.run( + self._apply_async(plan, redis_url, args.query_check_file) + ) + else: + report = self._apply_sync(plan, redis_url, args.query_check_file) + + write_migration_report(report, args.report_out) + if args.benchmark_out: + write_benchmark_report(report, args.benchmark_out) + self._print_report_summary(args.report_out, report, args.benchmark_out) + + def _apply_sync(self, plan, redis_url: str, query_check_file: Optional[str]): + """Execute migration synchronously.""" executor = MigrationExecutor() print(f"\nApplying migration to '{plan.source.index_name}'...") @@ -241,7 +269,6 @@ def progress_callback(step: str, detail: str) -> None: "validate": "[5/5] Validate", } label = step_labels.get(step, step) - # Use carriage return to update in place for progress if detail and not detail.startswith("done"): print(f" {label}: {detail} ", end="\r", flush=True) else: @@ -250,27 +277,55 @@ def progress_callback(step: str, detail: str) -> None: report = executor.apply( plan, redis_url=redis_url, - query_check_file=args.query_check_file, + query_check_file=query_check_file, progress_callback=progress_callback, ) - # Print completion summary + self._print_apply_result(report) + return report + + async def _apply_async(self, plan, redis_url: str, query_check_file: Optional[str]): + """Execute migration asynchronously (non-blocking for large quantization jobs).""" + executor = AsyncMigrationExecutor() + + print(f"\nApplying migration to '{plan.source.index_name}' (async mode)...") + + def progress_callback(step: str, detail: str) -> None: + step_labels = { + "drop": "[1/5] Drop index", + "quantize": "[2/5] Quantize vectors", + "create": "[3/5] Create index", + "index": "[4/5] Re-indexing", + "validate": "[5/5] Validate", + } + label = step_labels.get(step, step) + if detail and not detail.startswith("done"): + print(f" {label}: {detail} ", end="\r", flush=True) + else: + print(f" {label}: {detail} ") + + report = await executor.apply( + plan, + redis_url=redis_url, + query_check_file=query_check_file, + progress_callback=progress_callback, + ) + + self._print_apply_result(report) + return report + + def _print_apply_result(self, report) -> None: + """Print the result summary after migration apply.""" if report.result == "succeeded": total_time = report.timings.total_migration_duration_seconds or 0 downtime = report.timings.downtime_duration_seconds or 0 print(f"\nMigration completed in {total_time}s (downtime: {downtime}s)") else: print(f"\nMigration {report.result}") - # Show errors immediately for visibility if report.validation.errors: for error in report.validation.errors: print(f" ERROR: {error}") - write_migration_report(report, args.report_out) - if args.benchmark_out: - write_benchmark_report(report, args.benchmark_out) - self._print_report_summary(args.report_out, report, args.benchmark_out) - def validate(self): parser = argparse.ArgumentParser( usage=( diff --git a/redisvl/migration/__init__.py b/redisvl/migration/__init__.py index 6cedb500..4097d02c 100644 --- a/redisvl/migration/__init__.py +++ b/redisvl/migration/__init__.py @@ -1,3 +1,11 @@ +from redisvl.migration.async_executor import AsyncMigrationExecutor +from redisvl.migration.async_planner import AsyncMigrationPlanner +from redisvl.migration.async_utils import ( + async_current_source_matches_snapshot, + async_list_indexes, + async_wait_for_index_ready, +) +from redisvl.migration.async_validation import AsyncMigrationValidator from redisvl.migration.executor import MigrationExecutor from redisvl.migration.models import MigrationPlan, MigrationReport, SchemaPatch from redisvl.migration.planner import MigrationPlanner @@ -5,6 +13,7 @@ from redisvl.migration.wizard import MigrationWizard __all__ = [ + # Sync "MigrationExecutor", "MigrationPlan", "MigrationPlanner", @@ -12,4 +21,12 @@ "MigrationValidator", "MigrationWizard", "SchemaPatch", + # Async + "AsyncMigrationExecutor", + "AsyncMigrationPlanner", + "AsyncMigrationValidator", + # Async utilities + "async_current_source_matches_snapshot", + "async_list_indexes", + "async_wait_for_index_ready", ] diff --git a/redisvl/migration/async_executor.py b/redisvl/migration/async_executor.py new file mode 100644 index 00000000..835b9c81 --- /dev/null +++ b/redisvl/migration/async_executor.py @@ -0,0 +1,388 @@ +from __future__ import annotations + +import asyncio +import logging +import time +from typing import Any, Callable, Dict, Optional + +from redisvl.index import AsyncSearchIndex +from redisvl.migration.async_planner import AsyncMigrationPlanner +from redisvl.migration.async_validation import AsyncMigrationValidator +from redisvl.migration.models import ( + MigrationBenchmarkSummary, + MigrationPlan, + MigrationReport, + MigrationTimings, + MigrationValidation, +) +from redisvl.migration.utils import timestamp_utc +from redisvl.redis.utils import array_to_buffer, buffer_to_array +from redisvl.types import AsyncRedisClient + +logger = logging.getLogger(__name__) + + +class AsyncMigrationExecutor: + """Async migration executor for document-preserving drop/recreate flows. + + This is the async version of MigrationExecutor. It uses AsyncSearchIndex + and async Redis operations for better performance on large indexes, + especially during vector quantization. + """ + + def __init__(self, validator: Optional[AsyncMigrationValidator] = None): + self.validator = validator or AsyncMigrationValidator() + + async def apply( + self, + plan: MigrationPlan, + *, + redis_url: Optional[str] = None, + redis_client: Optional[AsyncRedisClient] = None, + query_check_file: Optional[str] = None, + progress_callback: Optional[Callable[[str, Optional[str]], None]] = None, + ) -> MigrationReport: + """Apply a migration plan asynchronously. + + Args: + plan: The migration plan to apply. + redis_url: Redis connection URL. + redis_client: Optional existing async Redis client. + query_check_file: Optional file with query checks. + progress_callback: Optional callback(step, detail) for progress updates. + """ + started_at = timestamp_utc() + started = time.perf_counter() + + report = MigrationReport( + source_index=plan.source.index_name, + target_index=plan.merged_target_schema["index"]["name"], + result="failed", + started_at=started_at, + finished_at=started_at, + warnings=list(plan.warnings), + ) + + if not plan.diff_classification.supported: + report.validation.errors.extend(plan.diff_classification.blocked_reasons) + report.manual_actions.append( + "This change requires document migration, which is not yet supported." + ) + report.finished_at = timestamp_utc() + return report + + if not await self._async_current_source_matches_snapshot( + plan.source.index_name, + plan.source.schema_snapshot, + redis_url=redis_url, + redis_client=redis_client, + ): + report.validation.errors.append( + "The current live source schema no longer matches the saved source snapshot." + ) + report.manual_actions.append( + "Re-run `rvl migrate plan` to refresh the migration plan before applying." + ) + report.finished_at = timestamp_utc() + return report + + source_index = await AsyncSearchIndex.from_existing( + plan.source.index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + target_index = AsyncSearchIndex.from_dict( + plan.merged_target_schema, + redis_url=redis_url, + redis_client=redis_client, + ) + + drop_duration = 0.0 + quantize_duration = 0.0 + recreate_duration = 0.0 + indexing_duration = 0.0 + target_info: Dict[str, Any] = {} + docs_quantized = 0 + + datatype_changes = AsyncMigrationPlanner.get_vector_datatype_changes( + plan.source.schema_snapshot, plan.merged_target_schema + ) + + def _notify(step: str, detail: Optional[str] = None) -> None: + if progress_callback: + progress_callback(step, detail) + + try: + _notify("drop", "Dropping index definition...") + drop_started = time.perf_counter() + await source_index.delete(drop=False) + drop_duration = round(time.perf_counter() - drop_started, 3) + _notify("drop", f"done ({drop_duration}s)") + + if datatype_changes: + _notify("quantize", "Re-encoding vectors...") + quantize_started = time.perf_counter() + docs_quantized = await self._async_quantize_vectors( + source_index, + datatype_changes, + plan, + progress_callback=lambda done, total: _notify( + "quantize", f"{done:,}/{total:,} docs" + ), + ) + quantize_duration = round(time.perf_counter() - quantize_started, 3) + _notify( + "quantize", + f"done ({docs_quantized:,} docs in {quantize_duration}s)", + ) + report.warnings.append( + f"Re-encoded {docs_quantized} documents for vector quantization: " + f"{datatype_changes}" + ) + + _notify("create", "Creating index with new schema...") + recreate_started = time.perf_counter() + await target_index.create() + recreate_duration = round(time.perf_counter() - recreate_started, 3) + _notify("create", f"done ({recreate_duration}s)") + + _notify("index", "Waiting for re-indexing...") + + def _index_progress(indexed: int, total: int, pct: float) -> None: + _notify("index", f"{indexed:,}/{total:,} docs ({pct:.0f}%)") + + target_info, indexing_duration = await self._async_wait_for_index_ready( + target_index, progress_callback=_index_progress + ) + _notify("index", f"done ({indexing_duration}s)") + + _notify("validate", "Validating migration...") + validation, target_info, validation_duration = ( + await self.validator.validate( + plan, + redis_url=redis_url, + redis_client=redis_client, + query_check_file=query_check_file, + ) + ) + _notify("validate", f"done ({validation_duration}s)") + report.validation = validation + total_duration = round(time.perf_counter() - started, 3) + report.timings = MigrationTimings( + total_migration_duration_seconds=total_duration, + drop_duration_seconds=drop_duration, + quantize_duration_seconds=( + quantize_duration if quantize_duration else None + ), + recreate_duration_seconds=recreate_duration, + initial_indexing_duration_seconds=indexing_duration, + validation_duration_seconds=validation_duration, + downtime_duration_seconds=round( + drop_duration + + quantize_duration + + recreate_duration + + indexing_duration, + 3, + ), + ) + report.benchmark_summary = self._build_benchmark_summary( + plan, + target_info, + report.timings, + ) + report.result = "succeeded" if not validation.errors else "failed" + if validation.errors: + report.manual_actions.append( + "Review validation errors before treating the migration as complete." + ) + except Exception as exc: + total_duration = round(time.perf_counter() - started, 3) + report.timings = MigrationTimings( + total_migration_duration_seconds=total_duration, + drop_duration_seconds=drop_duration or None, + quantize_duration_seconds=quantize_duration or None, + recreate_duration_seconds=recreate_duration or None, + initial_indexing_duration_seconds=indexing_duration or None, + downtime_duration_seconds=( + round( + drop_duration + + quantize_duration + + recreate_duration + + indexing_duration, + 3, + ) + if drop_duration + or quantize_duration + or recreate_duration + or indexing_duration + else None + ), + ) + report.validation = MigrationValidation( + errors=[f"Migration execution failed: {exc}"] + ) + report.manual_actions.extend( + [ + "Inspect the Redis index state before retrying.", + "If the source index was dropped, recreate it from the saved migration plan.", + ] + ) + finally: + report.finished_at = timestamp_utc() + + return report + + async def _async_quantize_vectors( + self, + source_index: AsyncSearchIndex, + datatype_changes: Dict[str, Dict[str, str]], + plan: MigrationPlan, + progress_callback: Optional[Callable[[int, int], None]] = None, + ) -> int: + """Re-encode vectors in documents for datatype changes (quantization). + + This is the async version that uses async pipeline operations for + better performance on large indexes. + """ + client = source_index._redis_client + if client is None: + raise ValueError("Failed to get Redis client from source index") + + prefix = plan.source.schema_snapshot["index"]["prefix"] + storage_type = ( + plan.source.schema_snapshot["index"].get("storage_type", "hash").lower() + ) + estimated_total = int(plan.source.stats_snapshot.get("num_docs", 0) or 0) + + docs_processed = 0 + batch_size = 500 + cursor: int = 0 + + while True: + cursor, keys = await client.scan( + cursor=cursor, + match=f"{prefix}*", + count=batch_size, + ) + + if keys: + pipe = client.pipeline() + keys_to_update = [] + + for key in keys: + if storage_type == "hash": + for field_name, change in datatype_changes.items(): + # hget returns bytes for binary data + field_data: bytes | None = await client.hget(key, field_name) # type: ignore[misc,assignment] + if field_data: + # field_data is bytes from Redis + array = buffer_to_array(field_data, change["source"]) + new_bytes = array_to_buffer(array, change["target"]) + pipe.hset( + key, field_name, new_bytes # type: ignore[arg-type] + ) + keys_to_update.append(key) + else: + logger.warning( + f"JSON storage quantization for key {key} - " + "vectors stored as arrays may not need re-encoding" + ) + + if keys_to_update: + await pipe.execute() + docs_processed += len(set(keys_to_update)) + if progress_callback: + progress_callback(docs_processed, estimated_total) + + if cursor == 0: + break + + logger.info(f"Quantized {docs_processed} documents: {datatype_changes}") + return docs_processed + + async def _async_wait_for_index_ready( + self, + index: AsyncSearchIndex, + *, + timeout_seconds: int = 1800, + poll_interval_seconds: float = 0.5, + progress_callback: Optional[Callable[[int, int, float], None]] = None, + ) -> tuple[Dict[str, Any], float]: + """Wait for index to finish indexing all documents (async version).""" + start = time.perf_counter() + deadline = start + timeout_seconds + latest_info = await index.info() + + stable_ready_checks = 0 + while time.perf_counter() < deadline: + latest_info = await index.info() + indexing = latest_info.get("indexing") + percent_indexed = latest_info.get("percent_indexed") + + if percent_indexed is not None or indexing is not None: + ready = float(percent_indexed or 0) >= 1.0 and not bool(indexing) + if progress_callback: + total_docs = int(latest_info.get("num_docs", 0)) + pct = float(percent_indexed or 0) + indexed_docs = int(total_docs * pct) + progress_callback(indexed_docs, total_docs, pct * 100) + else: + current_docs = latest_info.get("num_docs") + if current_docs is None: + ready = True + else: + if stable_ready_checks == 0: + stable_ready_checks = int(current_docs) + await asyncio.sleep(poll_interval_seconds) + continue + ready = int(current_docs) == stable_ready_checks + + if ready: + return latest_info, round(time.perf_counter() - start, 3) + + await asyncio.sleep(poll_interval_seconds) + + raise TimeoutError( + f"Index {index.schema.index.name} did not become ready within {timeout_seconds} seconds" + ) + + async def _async_current_source_matches_snapshot( + self, + index_name: str, + expected_schema: Dict[str, Any], + *, + redis_url: Optional[str] = None, + redis_client: Optional[AsyncRedisClient] = None, + ) -> bool: + """Check if current source schema matches the snapshot (async version).""" + from redisvl.migration.utils import schemas_equal + + current_index = await AsyncSearchIndex.from_existing( + index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + return schemas_equal(current_index.schema.to_dict(), expected_schema) + + def _build_benchmark_summary( + self, + plan: MigrationPlan, + target_info: dict, + timings: MigrationTimings, + ) -> MigrationBenchmarkSummary: + source_index_size = float( + plan.source.stats_snapshot.get("vector_index_sz_mb", 0) or 0 + ) + target_index_size = float(target_info.get("vector_index_sz_mb", 0) or 0) + source_num_docs = int(plan.source.stats_snapshot.get("num_docs", 0) or 0) + indexed_per_second = None + indexing_time = timings.initial_indexing_duration_seconds + if indexing_time and indexing_time > 0: + indexed_per_second = round(source_num_docs / indexing_time, 3) + + return MigrationBenchmarkSummary( + documents_indexed_per_second=indexed_per_second, + source_index_size_mb=round(source_index_size, 3), + target_index_size_mb=round(target_index_size, 3), + index_size_delta_mb=round(target_index_size - source_index_size, 3), + ) diff --git a/redisvl/migration/async_planner.py b/redisvl/migration/async_planner.py new file mode 100644 index 00000000..76b2e6e4 --- /dev/null +++ b/redisvl/migration/async_planner.py @@ -0,0 +1,179 @@ +from __future__ import annotations + +from typing import Any, List, Optional + +from redisvl.index import AsyncSearchIndex +from redisvl.migration.models import ( + KeyspaceSnapshot, + MigrationPlan, + SchemaPatch, + SourceSnapshot, +) +from redisvl.migration.planner import MigrationPlanner +from redisvl.schema.schema import IndexSchema +from redisvl.types import AsyncRedisClient + + +class AsyncMigrationPlanner: + """Async migration planner for document-preserving drop/recreate flows. + + This is the async version of MigrationPlanner. It uses AsyncSearchIndex + and async Redis operations for better performance on large indexes. + + The classification logic, schema merging, and diff analysis are delegated + to a sync MigrationPlanner instance (they are CPU-bound and don't need async). + """ + + def __init__(self, key_sample_limit: int = 10): + self.key_sample_limit = key_sample_limit + # Delegate to sync planner for CPU-bound operations + self._sync_planner = MigrationPlanner(key_sample_limit=key_sample_limit) + + # Expose static methods from MigrationPlanner for convenience + get_vector_datatype_changes = staticmethod( + MigrationPlanner.get_vector_datatype_changes + ) + + async def create_plan( + self, + index_name: str, + *, + redis_url: Optional[str] = None, + schema_patch_path: Optional[str] = None, + target_schema_path: Optional[str] = None, + redis_client: Optional[AsyncRedisClient] = None, + ) -> MigrationPlan: + if not schema_patch_path and not target_schema_path: + raise ValueError( + "Must provide either --schema-patch or --target-schema for migration planning" + ) + if schema_patch_path and target_schema_path: + raise ValueError( + "Provide only one of --schema-patch or --target-schema for migration planning" + ) + + snapshot = await self.snapshot_source( + index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + source_schema = IndexSchema.from_dict(snapshot.schema_snapshot) + + if schema_patch_path: + schema_patch = self._sync_planner.load_schema_patch(schema_patch_path) + else: + # target_schema_path is guaranteed to be not None here + assert target_schema_path is not None + schema_patch = self._sync_planner.normalize_target_schema_to_patch( + source_schema, target_schema_path + ) + + return await self.create_plan_from_patch( + index_name, + schema_patch=schema_patch, + redis_url=redis_url, + redis_client=redis_client, + ) + + async def create_plan_from_patch( + self, + index_name: str, + *, + schema_patch: SchemaPatch, + redis_url: Optional[str] = None, + redis_client: Optional[AsyncRedisClient] = None, + ) -> MigrationPlan: + snapshot = await self.snapshot_source( + index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + source_schema = IndexSchema.from_dict(snapshot.schema_snapshot) + merged_target_schema = self._sync_planner.merge_patch( + source_schema, schema_patch + ) + diff_classification = self._sync_planner.classify_diff( + source_schema, schema_patch, merged_target_schema + ) + + return MigrationPlan( + source=snapshot, + requested_changes=schema_patch.model_dump(exclude_none=True), + merged_target_schema=merged_target_schema.to_dict(), + diff_classification=diff_classification, + warnings=["Index downtime is required"], + ) + + async def snapshot_source( + self, + index_name: str, + *, + redis_url: Optional[str] = None, + redis_client: Optional[AsyncRedisClient] = None, + ) -> SourceSnapshot: + index = await AsyncSearchIndex.from_existing( + index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + schema_dict = index.schema.to_dict() + stats_snapshot = await index.info() + prefixes = index.schema.index.prefix + prefix_list = prefixes if isinstance(prefixes, list) else [prefixes] + + client = index.client + if client is None: + raise ValueError("Failed to get Redis client from index") + + return SourceSnapshot( + index_name=index_name, + schema_snapshot=schema_dict, + stats_snapshot=stats_snapshot, + keyspace=KeyspaceSnapshot( + storage_type=index.schema.index.storage_type.value, + prefixes=prefix_list, + key_separator=index.schema.index.key_separator, + key_sample=await self._async_sample_keys( + client=client, + prefixes=prefix_list, + key_separator=index.schema.index.key_separator, + ), + ), + ) + + async def _async_sample_keys( + self, *, client: AsyncRedisClient, prefixes: List[str], key_separator: str + ) -> List[str]: + """Async version of _sample_keys.""" + key_sample: List[str] = [] + if self.key_sample_limit <= 0: + return key_sample + + for prefix in prefixes: + if len(key_sample) >= self.key_sample_limit: + break + match_pattern = ( + f"{prefix}*" + if prefix.endswith(key_separator) + else f"{prefix}{key_separator}*" + ) + cursor: int = 0 + while True: + cursor, keys = await client.scan( + cursor=cursor, + match=match_pattern, + count=max(self.key_sample_limit, 10), + ) + for key in keys: + decoded_key = key.decode() if isinstance(key, bytes) else str(key) + if decoded_key not in key_sample: + key_sample.append(decoded_key) + if len(key_sample) >= self.key_sample_limit: + return key_sample + if cursor == 0: + break + return key_sample + + def write_plan(self, plan: MigrationPlan, plan_out: str) -> None: + """Delegate to sync planner for file I/O.""" + self._sync_planner.write_plan(plan, plan_out) diff --git a/redisvl/migration/async_utils.py b/redisvl/migration/async_utils.py new file mode 100644 index 00000000..158f790f --- /dev/null +++ b/redisvl/migration/async_utils.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import asyncio +import time +from typing import Any, Callable, Dict, List, Optional, Tuple + +from redisvl.index import AsyncSearchIndex +from redisvl.migration.utils import schemas_equal +from redisvl.redis.connection import RedisConnectionFactory +from redisvl.types import AsyncRedisClient + + +async def async_list_indexes( + *, redis_url: Optional[str] = None, redis_client: Optional[AsyncRedisClient] = None +) -> List[str]: + """List all search indexes in Redis (async version).""" + if redis_client is None: + if not redis_url: + raise ValueError("Must provide either redis_url or redis_client") + redis_client = await RedisConnectionFactory._get_aredis_connection( + redis_url=redis_url + ) + index = AsyncSearchIndex.from_dict( + {"index": {"name": "__redisvl_migration_helper__"}, "fields": []}, + redis_client=redis_client, + ) + return await index.listall() + + +async def async_wait_for_index_ready( + index: AsyncSearchIndex, + *, + timeout_seconds: int = 1800, + poll_interval_seconds: float = 0.5, + progress_callback: Optional[Callable[[int, int, float], None]] = None, +) -> Tuple[Dict[str, Any], float]: + """Wait for index to finish indexing all documents (async version). + + Args: + index: The AsyncSearchIndex to monitor. + timeout_seconds: Maximum time to wait. + poll_interval_seconds: How often to check status. + progress_callback: Optional callback(indexed_docs, total_docs, percent). + """ + start = time.perf_counter() + deadline = start + timeout_seconds + latest_info = await index.info() + + stable_ready_checks = 0 + while time.perf_counter() < deadline: + latest_info = await index.info() + indexing = latest_info.get("indexing") + percent_indexed = latest_info.get("percent_indexed") + + if percent_indexed is not None or indexing is not None: + ready = float(percent_indexed or 0) >= 1.0 and not bool(indexing) + if progress_callback: + total_docs = int(latest_info.get("num_docs", 0)) + pct = float(percent_indexed or 0) + indexed_docs = int(total_docs * pct) + progress_callback(indexed_docs, total_docs, pct * 100) + else: + current_docs = latest_info.get("num_docs") + if current_docs is None: + ready = True + else: + if stable_ready_checks == 0: + stable_ready_checks = int(current_docs) + await asyncio.sleep(poll_interval_seconds) + continue + ready = int(current_docs) == stable_ready_checks + + if ready: + return latest_info, round(time.perf_counter() - start, 3) + + await asyncio.sleep(poll_interval_seconds) + + raise TimeoutError( + f"Index {index.schema.index.name} did not become ready within {timeout_seconds} seconds" + ) + + +async def async_current_source_matches_snapshot( + index_name: str, + expected_schema: Dict[str, Any], + *, + redis_url: Optional[str] = None, + redis_client: Optional[AsyncRedisClient] = None, +) -> bool: + """Check if current source schema matches the snapshot (async version).""" + current_index = await AsyncSearchIndex.from_existing( + index_name, + redis_url=redis_url, + redis_client=redis_client, + ) + return schemas_equal(current_index.schema.to_dict(), expected_schema) diff --git a/redisvl/migration/async_validation.py b/redisvl/migration/async_validation.py new file mode 100644 index 00000000..df1c14f5 --- /dev/null +++ b/redisvl/migration/async_validation.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +import time +from typing import Any, Dict, Optional + +from redisvl.index import AsyncSearchIndex +from redisvl.migration.models import ( + MigrationPlan, + MigrationValidation, + QueryCheckResult, +) +from redisvl.migration.utils import load_yaml, schemas_equal +from redisvl.types import AsyncRedisClient + + +class AsyncMigrationValidator: + """Async migration validator for post-migration checks. + + This is the async version of MigrationValidator. It uses AsyncSearchIndex + and async Redis operations for better performance. + """ + + async def validate( + self, + plan: MigrationPlan, + *, + redis_url: Optional[str] = None, + redis_client: Optional[AsyncRedisClient] = None, + query_check_file: Optional[str] = None, + ) -> tuple[MigrationValidation, Dict[str, Any], float]: + started = time.perf_counter() + target_index = await AsyncSearchIndex.from_existing( + plan.merged_target_schema["index"]["name"], + redis_url=redis_url, + redis_client=redis_client, + ) + target_info = await target_index.info() + validation = MigrationValidation() + + live_schema = target_index.schema.to_dict() + validation.schema_match = schemas_equal(live_schema, plan.merged_target_schema) + + source_num_docs = int(plan.source.stats_snapshot.get("num_docs", 0) or 0) + target_num_docs = int(target_info.get("num_docs", 0) or 0) + validation.doc_count_match = source_num_docs == target_num_docs + + source_failures = int( + plan.source.stats_snapshot.get("hash_indexing_failures", 0) or 0 + ) + target_failures = int(target_info.get("hash_indexing_failures", 0) or 0) + validation.indexing_failures_delta = target_failures - source_failures + + key_sample = plan.source.keyspace.key_sample + client = target_index.client + if not key_sample: + validation.key_sample_exists = True + elif client is None: + validation.key_sample_exists = False + validation.errors.append("Failed to get Redis client for key sample check") + else: + existing_count = await client.exists(*key_sample) + validation.key_sample_exists = existing_count == len(key_sample) + + if query_check_file: + validation.query_checks = await self._run_query_checks( + target_index, + query_check_file, + ) + + if not validation.schema_match: + validation.errors.append("Live schema does not match merged_target_schema.") + if not validation.doc_count_match: + validation.errors.append( + "Live document count does not match source num_docs." + ) + if validation.indexing_failures_delta != 0: + validation.errors.append("Indexing failures increased during migration.") + if not validation.key_sample_exists: + validation.errors.append( + "One or more sampled source keys is missing after migration." + ) + if any(not query_check.passed for query_check in validation.query_checks): + validation.errors.append("One or more query checks failed.") + + return validation, target_info, round(time.perf_counter() - started, 3) + + async def _run_query_checks( + self, + target_index: AsyncSearchIndex, + query_check_file: str, + ) -> list[QueryCheckResult]: + query_checks = load_yaml(query_check_file) + results: list[QueryCheckResult] = [] + + for doc_id in query_checks.get("fetch_ids", []): + fetched = await target_index.fetch(doc_id) + results.append( + QueryCheckResult( + name=f"fetch:{doc_id}", + passed=fetched is not None, + details=( + "Document fetched successfully" + if fetched + else "Document not found" + ), + ) + ) + + client = target_index.client + for key in query_checks.get("keys_exist", []): + if client is None: + results.append( + QueryCheckResult( + name=f"key:{key}", + passed=False, + details="Failed to get Redis client", + ) + ) + else: + exists = bool(await client.exists(key)) + results.append( + QueryCheckResult( + name=f"key:{key}", + passed=exists, + details="Key exists" if exists else "Key not found", + ) + ) + + return results diff --git a/tests/integration/test_async_migration_v1.py b/tests/integration/test_async_migration_v1.py new file mode 100644 index 00000000..c50fdaf8 --- /dev/null +++ b/tests/integration/test_async_migration_v1.py @@ -0,0 +1,150 @@ +"""Integration tests for async migration (Phase 1.5). + +These tests verify the async migration components work correctly with a real +Redis instance, mirroring the sync tests in test_migration_v1.py. +""" + +import uuid + +import pytest +import yaml + +from redisvl.index import AsyncSearchIndex +from redisvl.migration import ( + AsyncMigrationExecutor, + AsyncMigrationPlanner, + AsyncMigrationValidator, +) +from redisvl.migration.utils import load_migration_plan, schemas_equal +from redisvl.redis.utils import array_to_buffer + + +@pytest.mark.asyncio +async def test_async_drop_recreate_plan_apply_validate_flow( + redis_url, worker_id, tmp_path +): + """Test full async migration flow: plan -> apply -> validate.""" + unique_id = str(uuid.uuid4())[:8] + index_name = f"async_migration_v1_{worker_id}_{unique_id}" + prefix = f"async_migration_v1:{worker_id}:{unique_id}" + + source_index = AsyncSearchIndex.from_dict( + { + "index": { + "name": index_name, + "prefix": prefix, + "storage_type": "hash", + }, + "fields": [ + {"name": "doc_id", "type": "tag"}, + {"name": "title", "type": "text"}, + {"name": "price", "type": "numeric"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "hnsw", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + redis_url=redis_url, + ) + + docs = [ + { + "doc_id": "1", + "title": "alpha", + "price": 1, + "category": "news", + "embedding": array_to_buffer([0.1, 0.2, 0.3], "float32"), + }, + { + "doc_id": "2", + "title": "beta", + "price": 2, + "category": "sports", + "embedding": array_to_buffer([0.2, 0.1, 0.4], "float32"), + }, + ] + + await source_index.create(overwrite=True) + await source_index.load(docs, id_field="doc_id") + + # Create schema patch + patch_path = tmp_path / "schema_patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "add_fields": [ + { + "name": "category", + "type": "tag", + "attrs": {"separator": ","}, + } + ], + "remove_fields": ["price"], + "update_fields": [{"name": "title", "attrs": {"sortable": True}}], + }, + }, + sort_keys=False, + ) + ) + + # Create plan using async planner + plan_path = tmp_path / "migration_plan.yaml" + planner = AsyncMigrationPlanner() + plan = await planner.create_plan( + index_name, + redis_url=redis_url, + schema_patch_path=str(patch_path), + ) + assert plan.diff_classification.supported is True + planner.write_plan(plan, str(plan_path)) + + # Create query checks + query_check_path = tmp_path / "query_checks.yaml" + query_check_path.write_text( + yaml.safe_dump({"fetch_ids": ["1", "2"]}, sort_keys=False) + ) + + # Apply migration using async executor + executor = AsyncMigrationExecutor() + report = await executor.apply( + load_migration_plan(str(plan_path)), + redis_url=redis_url, + query_check_file=str(query_check_path), + ) + + # Verify migration succeeded + assert report.result == "succeeded" + assert report.validation.schema_match is True + assert report.validation.doc_count_match is True + assert report.validation.key_sample_exists is True + assert report.validation.indexing_failures_delta == 0 + assert not report.validation.errors + assert report.benchmark_summary.documents_indexed_per_second is not None + + # Verify schema matches target + live_index = await AsyncSearchIndex.from_existing(index_name, redis_url=redis_url) + assert schemas_equal(live_index.schema.to_dict(), plan.merged_target_schema) + + # Test standalone async validator + validator = AsyncMigrationValidator() + validation, _target_info, _duration = await validator.validate( + load_migration_plan(str(plan_path)), + redis_url=redis_url, + query_check_file=str(query_check_path), + ) + assert validation.schema_match is True + assert validation.doc_count_match is True + assert validation.key_sample_exists is True + assert not validation.errors + + # Cleanup + await live_index.delete(drop=True) diff --git a/tests/unit/test_async_migration_executor.py b/tests/unit/test_async_migration_executor.py new file mode 100644 index 00000000..da43ba2f --- /dev/null +++ b/tests/unit/test_async_migration_executor.py @@ -0,0 +1,125 @@ +"""Unit tests for AsyncMigrationExecutor. + +These tests mirror the sync MigrationExecutor patterns but use async/await. +""" + +import pytest + +from redisvl.migration import AsyncMigrationExecutor +from redisvl.migration.models import ( + DiffClassification, + KeyspaceSnapshot, + MigrationPlan, + SourceSnapshot, + ValidationPolicy, +) + + +def _make_basic_plan(): + """Create a basic migration plan for testing.""" + return MigrationPlan( + mode="drop_recreate", + source=SourceSnapshot( + index_name="test_index", + keyspace=KeyspaceSnapshot( + storage_type="hash", + prefixes=["test"], + key_separator=":", + key_sample=["test:1", "test:2"], + ), + schema_snapshot={ + "index": { + "name": "test_index", + "prefix": "test", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + stats_snapshot={"num_docs": 2}, + ), + requested_changes={}, + merged_target_schema={ + "index": { + "name": "test_index", + "prefix": "test", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "hnsw", # Changed from flat + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + diff_classification=DiffClassification( + supported=True, + blocked_reasons=[], + ), + validation=ValidationPolicy( + require_doc_count_match=True, + ), + warnings=["Index downtime is required"], + ) + + +def test_async_executor_instantiation(): + """Test AsyncMigrationExecutor can be instantiated.""" + executor = AsyncMigrationExecutor() + assert executor is not None + assert executor.validator is not None + + +def test_async_executor_with_validator(): + """Test AsyncMigrationExecutor with custom validator.""" + from redisvl.migration import AsyncMigrationValidator + + custom_validator = AsyncMigrationValidator() + executor = AsyncMigrationExecutor(validator=custom_validator) + assert executor.validator is custom_validator + + +@pytest.mark.asyncio +async def test_async_executor_handles_unsupported_plan(): + """Test executor returns error report for unsupported plan.""" + plan = _make_basic_plan() + plan.diff_classification.supported = False + plan.diff_classification.blocked_reasons = ["Test blocked reason"] + + executor = AsyncMigrationExecutor() + + # The executor doesn't raise an error - it returns a report with errors + report = await executor.apply(plan, redis_url="redis://localhost:6379") + assert report.result == "failed" + assert "Test blocked reason" in report.validation.errors + + +@pytest.mark.asyncio +async def test_async_executor_validates_redis_url(): + """Test executor requires redis_url or redis_client.""" + plan = _make_basic_plan() + executor = AsyncMigrationExecutor() + + # The executor should raise an error internally when trying to connect + # but let's verify it doesn't crash before it tries to apply + # For a proper test, we'd need to mock AsyncSearchIndex.from_existing + # For now, we just verify the executor is created + assert executor is not None diff --git a/tests/unit/test_async_migration_planner.py b/tests/unit/test_async_migration_planner.py new file mode 100644 index 00000000..1893a348 --- /dev/null +++ b/tests/unit/test_async_migration_planner.py @@ -0,0 +1,319 @@ +"""Unit tests for AsyncMigrationPlanner. + +These tests mirror the sync MigrationPlanner tests but use async/await patterns. +""" + +from fnmatch import fnmatch + +import pytest +import yaml + +from redisvl.migration import AsyncMigrationPlanner, MigrationPlanner +from redisvl.schema.schema import IndexSchema + + +class AsyncDummyClient: + """Async mock Redis client for testing.""" + + def __init__(self, keys): + self.keys = keys + + async def scan(self, cursor=0, match=None, count=None): + matched = [] + for key in self.keys: + decoded_key = key.decode() if isinstance(key, bytes) else str(key) + if match is None or fnmatch(decoded_key, match): + matched.append(key) + return 0, matched + + +class AsyncDummyIndex: + """Async mock SearchIndex for testing.""" + + def __init__(self, schema, stats, keys): + self.schema = schema + self._stats = stats + self._client = AsyncDummyClient(keys) + + @property + def client(self): + return self._client + + async def info(self): + return self._stats + + +def _make_source_schema(): + return IndexSchema.from_dict( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + { + "name": "title", + "type": "text", + "path": "$.title", + "attrs": {"sortable": False}, + }, + { + "name": "price", + "type": "numeric", + "path": "$.price", + "attrs": {"sortable": True}, + }, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + } + ) + + +@pytest.mark.asyncio +async def test_async_create_plan_from_schema_patch(monkeypatch, tmp_path): + """Test async planner creates valid plan from schema patch.""" + source_schema = _make_source_schema() + dummy_index = AsyncDummyIndex( + source_schema, + {"num_docs": 2, "indexing": False}, + [b"docs:1", b"docs:2", b"docs:3"], + ) + + async def mock_from_existing(*args, **kwargs): + return dummy_index + + monkeypatch.setattr( + "redisvl.migration.async_planner.AsyncSearchIndex.from_existing", + mock_from_existing, + ) + + patch_path = tmp_path / "schema_patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "add_fields": [ + { + "name": "category", + "type": "tag", + "path": "$.category", + "attrs": {"separator": ","}, + } + ], + "remove_fields": ["price"], + "update_fields": [ + { + "name": "title", + "options": {"sortable": True}, + } + ], + }, + }, + sort_keys=False, + ) + ) + + planner = AsyncMigrationPlanner(key_sample_limit=2) + plan = await planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + schema_patch_path=str(patch_path), + ) + + assert plan.diff_classification.supported is True + assert plan.source.index_name == "docs" + assert plan.source.keyspace.storage_type == "json" + assert plan.source.keyspace.prefixes == ["docs"] + assert plan.source.keyspace.key_separator == ":" + assert plan.source.keyspace.key_sample == ["docs:1", "docs:2"] + assert plan.warnings == ["Index downtime is required"] + + merged_fields = { + field["name"]: field for field in plan.merged_target_schema["fields"] + } + assert plan.merged_target_schema["index"]["prefix"] == "docs" + assert merged_fields["title"]["attrs"]["sortable"] is True + assert "price" not in merged_fields + assert merged_fields["category"]["type"] == "tag" + + # Test write_plan works (delegates to sync) + plan_path = tmp_path / "migration_plan.yaml" + planner.write_plan(plan, str(plan_path)) + written_plan = yaml.safe_load(plan_path.read_text()) + assert written_plan["mode"] == "drop_recreate" + assert written_plan["diff_classification"]["supported"] is True + + +@pytest.mark.asyncio +async def test_async_planner_datatype_change_allowed(monkeypatch, tmp_path): + """Changing vector datatype (quantization) is allowed - executor will re-encode.""" + source_schema = _make_source_schema() + dummy_index = AsyncDummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + + async def mock_from_existing(*args, **kwargs): + return dummy_index + + monkeypatch.setattr( + "redisvl.migration.async_planner.AsyncSearchIndex.from_existing", + mock_from_existing, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + {"name": "title", "type": "text", "path": "$.title"}, + {"name": "price", "type": "numeric", "path": "$.price"}, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float16", # Changed from float32 + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = AsyncMigrationPlanner() + plan = await planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is True + assert len(plan.diff_classification.blocked_reasons) == 0 + + # Verify datatype changes are detected + datatype_changes = MigrationPlanner.get_vector_datatype_changes( + plan.source.schema_snapshot, plan.merged_target_schema + ) + assert "embedding" in datatype_changes + assert datatype_changes["embedding"]["source"] == "float32" + assert datatype_changes["embedding"]["target"] == "float16" + + +@pytest.mark.asyncio +async def test_async_planner_algorithm_change_allowed(monkeypatch, tmp_path): + """Changing vector algorithm is allowed (index-only change).""" + source_schema = _make_source_schema() + dummy_index = AsyncDummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + + async def mock_from_existing(*args, **kwargs): + return dummy_index + + monkeypatch.setattr( + "redisvl.migration.async_planner.AsyncSearchIndex.from_existing", + mock_from_existing, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs", + "key_separator": ":", + "storage_type": "json", + }, + "fields": [ + {"name": "title", "type": "text", "path": "$.title"}, + {"name": "price", "type": "numeric", "path": "$.price"}, + { + "name": "embedding", + "type": "vector", + "path": "$.embedding", + "attrs": { + "algorithm": "hnsw", # Changed from flat + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + sort_keys=False, + ) + ) + + planner = AsyncMigrationPlanner() + plan = await planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is True + assert len(plan.diff_classification.blocked_reasons) == 0 + + +@pytest.mark.asyncio +async def test_async_planner_prefix_change_blocked(monkeypatch, tmp_path): + """Prefix change is blocked: documents are at wrong keys.""" + source_schema = _make_source_schema() + dummy_index = AsyncDummyIndex(source_schema, {"num_docs": 2}, [b"docs:1"]) + + async def mock_from_existing(*args, **kwargs): + return dummy_index + + monkeypatch.setattr( + "redisvl.migration.async_planner.AsyncSearchIndex.from_existing", + mock_from_existing, + ) + + target_schema_path = tmp_path / "target_schema.yaml" + target_schema_path.write_text( + yaml.safe_dump( + { + "index": { + "name": "docs", + "prefix": "docs_v2", # Changed prefix + "key_separator": ":", + "storage_type": "json", + }, + "fields": source_schema.to_dict()["fields"], + }, + sort_keys=False, + ) + ) + + planner = AsyncMigrationPlanner() + plan = await planner.create_plan( + "docs", + redis_url="redis://localhost:6379", + target_schema_path=str(target_schema_path), + ) + + assert plan.diff_classification.supported is False + assert any( + "prefix" in reason.lower() + for reason in plan.diff_classification.blocked_reasons + ) From 363569964fb585219d92320a2dda52a7197e4e3f Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Fri, 20 Mar 2026 10:14:54 -0400 Subject: [PATCH 08/10] refactor(migrate): remove unused imports --- redisvl/cli/migrate.py | 2 -- redisvl/migration/async_planner.py | 2 +- redisvl/migration/executor.py | 1 - redisvl/migration/utils.py | 2 +- redisvl/migration/wizard.py | 1 - 5 files changed, 2 insertions(+), 6 deletions(-) diff --git a/redisvl/cli/migrate.py b/redisvl/cli/migrate.py index 5aff58aa..130a1417 100644 --- a/redisvl/cli/migrate.py +++ b/redisvl/cli/migrate.py @@ -1,13 +1,11 @@ import argparse import asyncio import sys -from argparse import Namespace from typing import Optional from redisvl.cli.utils import add_redis_connection_options, create_redis_url from redisvl.migration import ( AsyncMigrationExecutor, - AsyncMigrationValidator, MigrationExecutor, MigrationPlanner, MigrationValidator, diff --git a/redisvl/migration/async_planner.py b/redisvl/migration/async_planner.py index 76b2e6e4..7e39faa8 100644 --- a/redisvl/migration/async_planner.py +++ b/redisvl/migration/async_planner.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, List, Optional +from typing import List, Optional from redisvl.index import AsyncSearchIndex from redisvl.migration.models import ( diff --git a/redisvl/migration/executor.py b/redisvl/migration/executor.py index e34b4b04..40e32319 100644 --- a/redisvl/migration/executor.py +++ b/redisvl/migration/executor.py @@ -20,7 +20,6 @@ ) from redisvl.migration.validation import MigrationValidator from redisvl.redis.utils import array_to_buffer, buffer_to_array -from redisvl.schema import StorageType logger = logging.getLogger(__name__) diff --git a/redisvl/migration/utils.py b/redisvl/migration/utils.py index a5b12766..b89263c0 100644 --- a/redisvl/migration/utils.py +++ b/redisvl/migration/utils.py @@ -3,7 +3,7 @@ import json import time from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any, Callable, Dict, Optional, Tuple import yaml diff --git a/redisvl/migration/wizard.py b/redisvl/migration/wizard.py index feb4d3b0..b1a9edd5 100644 --- a/redisvl/migration/wizard.py +++ b/redisvl/migration/wizard.py @@ -1,6 +1,5 @@ from __future__ import annotations -from pathlib import Path from typing import Any, Dict, List, Optional import yaml From 61c6e80d6158c351b536d173c377a1728e37b367 Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Fri, 20 Mar 2026 15:55:07 -0400 Subject: [PATCH 09/10] feat:add batch indexing --- .../how_to_guides/migrate-indexes.md | 296 ++++++++++++++++-- redisvl/cli/migrate.py | 290 ++++++++++++++++- redisvl/migration/__init__.py | 17 +- redisvl/migration/models.py | 103 ++++++ 4 files changed, 678 insertions(+), 28 deletions(-) diff --git a/docs/user_guide/how_to_guides/migrate-indexes.md b/docs/user_guide/how_to_guides/migrate-indexes.md index 00326e51..54240cf8 100644 --- a/docs/user_guide/how_to_guides/migrate-indexes.md +++ b/docs/user_guide/how_to_guides/migrate-indexes.md @@ -41,28 +41,15 @@ docker run -d --name redis -p 6379:6379 redis/redis-stack-server:latest ## Step 1: Discover Available Indexes ```bash -rvl migrate helper --url redis://localhost:6379 rvl migrate list --url redis://localhost:6379 ``` **Example output:** ``` -Index Migrator -============== -The migrator helps you safely change your index schema. - -Supported changes: - - Add, remove, or update text/tag/numeric/geo fields - - Change vector algorithm (FLAT, HNSW, SVS-VAMANA) - - Change distance metric (COSINE, L2, IP) - - Quantize vectors (float32 → float16) - -Commands: - rvl migrate list List all indexes - rvl migrate wizard Build a migration interactively - rvl migrate plan Generate a migration plan - rvl migrate apply Execute a migration - rvl migrate validate Verify a migration +Available indexes: + 1. products_idx + 2. users_idx + 3. orders_idx ``` ## Step 2: Build Your Schema Change @@ -330,7 +317,6 @@ rvl migrate apply \ **When to use async:** - Quantizing millions of vectors (float32 to float16) -- Redis instance has 40M+ keys - Integrating into an async application For most migrations (index-only changes, small datasets), sync mode is sufficient and simpler. @@ -379,15 +365,25 @@ rvl migrate validate \ ## CLI Reference +### Single-Index Commands + | Command | Description | |---------|-------------| -| `rvl migrate helper` | Show supported changes and usage tips | | `rvl migrate list` | List all indexes | | `rvl migrate wizard` | Build a migration interactively | | `rvl migrate plan` | Generate a migration plan | | `rvl migrate apply` | Execute a migration | | `rvl migrate validate` | Verify a migration result | +### Batch Commands + +| Command | Description | +|---------|-------------| +| `rvl migrate batch-plan` | Create a batch migration plan | +| `rvl migrate batch-apply` | Execute a batch migration | +| `rvl migrate batch-resume` | Resume an interrupted batch | +| `rvl migrate batch-status` | Check batch progress | + **Common flags:** - `--url` : Redis connection URL - `--index` : Index name to migrate @@ -397,6 +393,16 @@ rvl migrate validate \ - `--report-out` : Path for validation report - `--benchmark-out` : Path for performance metrics +**Batch-specific flags:** +- `--pattern` : Glob pattern to match index names (e.g., `*_idx`) +- `--indexes` : Explicit list of index names +- `--indexes-file` : File containing index names (one per line) +- `--schema-patch` : Path to shared schema patch YAML +- `--state` : Path to checkpoint state file +- `--failure-policy` : `fail_fast` or `continue_on_error` +- `--accept-data-loss` : Required for quantization (lossy changes) +- `--retry-failed` : Retry previously failed indexes on resume + ## Troubleshooting ### Migration blocked: "unsupported change" @@ -467,6 +473,258 @@ async def migrate(): asyncio.run(migrate()) ``` +## Batch Migration + +When you need to apply the same schema change to multiple indexes, use batch migration. This is common for: + +- Quantizing all indexes from float32 → float16 +- Standardizing vector algorithms across indexes +- Coordinated migrations during maintenance windows + +### Quick Start: Batch Migration + +```bash +# 1. Create a shared patch (applies to any index with an 'embedding' field) +cat > quantize_patch.yaml << 'EOF' +version: 1 +changes: + update_fields: + - name: embedding + attrs: + datatype: float16 +EOF + +# 2. Create a batch plan for all indexes matching a pattern +rvl migrate batch-plan \ + --pattern "*_idx" \ + --schema-patch quantize_patch.yaml \ + --output batch_plan.yaml \ + --url redis://localhost:6379 + +# 3. Apply the batch plan +rvl migrate batch-apply \ + --plan batch_plan.yaml \ + --allow-downtime \ + --accept-data-loss \ + --url redis://localhost:6379 + +# 4. Check status +rvl migrate batch-status --state batch_state.yaml +``` + +### Batch Plan Options + +**Select indexes by pattern:** +```bash +rvl migrate batch-plan \ + --pattern "*_idx" \ + --schema-patch quantize_patch.yaml \ + --output batch_plan.yaml \ + --url redis://localhost:6379 +``` + +**Select indexes by explicit list:** +```bash +rvl migrate batch-plan \ + --indexes products_idx users_idx orders_idx \ + --schema-patch quantize_patch.yaml \ + --output batch_plan.yaml \ + --url redis://localhost:6379 +``` + +**Select indexes from a file (for 100+ indexes):** +```bash +# Create index list file +echo -e "products_idx\nusers_idx\norders_idx" > indexes.txt + +rvl migrate batch-plan \ + --indexes-file indexes.txt \ + --schema-patch quantize_patch.yaml \ + --output batch_plan.yaml \ + --url redis://localhost:6379 +``` + +### Batch Plan Review + +The generated `batch_plan.yaml` shows which indexes will be migrated: + +```yaml +version: 1 +batch_id: "batch_20260320_100000" +mode: drop_recreate +failure_policy: fail_fast +requires_quantization: true + +shared_patch: + version: 1 + changes: + update_fields: + - name: embedding + attrs: + datatype: float16 + +indexes: + - name: products_idx + applicable: true + skip_reason: null + - name: users_idx + applicable: true + skip_reason: null + - name: legacy_idx + applicable: false + skip_reason: "Field 'embedding' not found" + +created_at: "2026-03-20T10:00:00Z" +``` + +**Key fields:** +- `applicable: true` means the patch applies to this index +- `skip_reason` explains why an index will be skipped + +### Applying a Batch Plan + +```bash +# Apply with fail-fast (default: stop on first error) +rvl migrate batch-apply \ + --plan batch_plan.yaml \ + --allow-downtime \ + --accept-data-loss \ + --url redis://localhost:6379 + +# Apply with continue-on-error (process all possible indexes) +rvl migrate batch-apply \ + --plan batch_plan.yaml \ + --allow-downtime \ + --accept-data-loss \ + --failure-policy continue_on_error \ + --url redis://localhost:6379 +``` + +**Flags:** +- `--allow-downtime` : Required (each index is temporarily unavailable during migration) +- `--accept-data-loss` : Required when quantizing vectors (float32 → float16 is lossy) +- `--failure-policy` : `fail_fast` (default) or `continue_on_error` +- `--state` : Path to checkpoint file (default: `batch_state.yaml`) +- `--report-dir` : Directory for per-index reports (default: `./reports/`) + +### Resume After Failure + +Batch migration automatically checkpoints progress. If interrupted: + +```bash +# Resume from where it left off +rvl migrate batch-resume \ + --state batch_state.yaml \ + --allow-downtime \ + --url redis://localhost:6379 + +# Retry previously failed indexes +rvl migrate batch-resume \ + --state batch_state.yaml \ + --retry-failed \ + --allow-downtime \ + --url redis://localhost:6379 +``` + +### Checking Batch Status + +```bash +rvl migrate batch-status --state batch_state.yaml +``` + +**Example output:** +``` +Batch Migration Status +====================== +Batch ID: batch_20260320_100000 +Started: 2026-03-20T10:00:00Z +Updated: 2026-03-20T10:25:00Z + +Completed: 2 + - products_idx: succeeded (10:02:30) + - users_idx: failed - Redis connection timeout (10:05:45) + +In Progress: inventory_idx +Remaining: 1 (analytics_idx) +``` + +### Batch Report + +After completion, a `batch_report.yaml` is generated: + +```yaml +version: 1 +batch_id: "batch_20260320_100000" +status: completed # or partial_failure, failed +summary: + total_indexes: 3 + successful: 3 + failed: 0 + skipped: 0 + total_duration_seconds: 127.5 +indexes: + - name: products_idx + status: succeeded + duration_seconds: 45.2 + docs_migrated: 15000 + report_path: ./reports/products_idx_report.yaml + - name: users_idx + status: succeeded + duration_seconds: 38.1 + docs_migrated: 8500 + - name: orders_idx + status: succeeded + duration_seconds: 44.2 + docs_migrated: 22000 +completed_at: "2026-03-20T10:02:07Z" +``` + +### Python API for Batch Migration + +```python +from redisvl.migration import BatchMigrationPlanner, BatchMigrationExecutor + +# Create batch plan +planner = BatchMigrationPlanner() +batch_plan = planner.create_plan( + redis_url="redis://localhost:6379", + pattern="*_idx", + schema_patch_path="quantize_patch.yaml", +) + +# Review applicability +for idx in batch_plan.indexes: + if idx.applicable: + print(f"Will migrate: {idx.name}") + else: + print(f"Skipping {idx.name}: {idx.skip_reason}") + +# Execute batch +executor = BatchMigrationExecutor() +report = executor.apply( + batch_plan, + redis_url="redis://localhost:6379", + state_path="batch_state.yaml", + report_dir="./reports/", + progress_callback=lambda name, pos, total, status: print(f"[{pos}/{total}] {name}: {status}"), +) + +print(f"Batch status: {report.status}") +print(f"Successful: {report.summary.successful}/{report.summary.total_indexes}") +``` + +### Batch Migration Tips + +1. **Test on a single index first**: Run a single-index migration to verify the patch works before applying to a batch. + +2. **Use `continue_on_error` for large batches**: This ensures one failure doesn't block all remaining indexes. + +3. **Schedule during low-traffic periods**: Each index has downtime during migration. + +4. **Review skipped indexes**: The `skip_reason` often indicates schema differences that need attention. + +5. **Keep checkpoint files**: The `batch_state.yaml` is essential for resume. Don't delete it until the batch completes successfully. + ## Learn more - {doc}`/concepts/index-migrations`: How migrations work and which changes are supported diff --git a/redisvl/cli/migrate.py b/redisvl/cli/migrate.py index 130a1417..d65ecd05 100644 --- a/redisvl/cli/migrate.py +++ b/redisvl/cli/migrate.py @@ -1,11 +1,14 @@ import argparse import asyncio import sys +from pathlib import Path from typing import Optional from redisvl.cli.utils import add_redis_connection_options, create_redis_url from redisvl.migration import ( AsyncMigrationExecutor, + BatchMigrationExecutor, + BatchMigrationPlanner, MigrationExecutor, MigrationPlanner, MigrationValidator, @@ -13,8 +16,10 @@ from redisvl.migration.utils import ( list_indexes, load_migration_plan, + load_yaml, write_benchmark_report, write_migration_report, + write_yaml, ) from redisvl.migration.wizard import MigrationWizard from redisvl.utils.log import get_logger @@ -27,12 +32,18 @@ class Migrate: [ "rvl migrate []\n", "Commands:", - "\thelper Show migration guidance and supported capabilities", - "\tlist List all available indexes", - "\tplan Generate a migration plan for a document-preserving drop/recreate migration", - "\twizard Interactively build a migration plan and schema patch", - "\tapply Execute a reviewed drop/recreate migration plan (use --async for large migrations)", - "\tvalidate Validate a completed migration plan against the live index", + "\thelper Show migration guidance and supported capabilities", + "\tlist List all available indexes", + "\tplan Generate a migration plan for a document-preserving drop/recreate migration", + "\twizard Interactively build a migration plan and schema patch", + "\tapply Execute a reviewed drop/recreate migration plan (use --async for large migrations)", + "\tvalidate Validate a completed migration plan against the live index", + "", + "Batch Commands:", + "\tbatch-plan Generate a batch migration plan for multiple indexes", + "\tbatch-apply Execute a batch migration plan with checkpointing", + "\tbatch-resume Resume an interrupted batch migration", + "\tbatch-status Show status of an in-progress or completed batch migration", "\n", ] ) @@ -42,12 +53,14 @@ def __init__(self): parser.add_argument("command", help="Subcommand to run") args = parser.parse_args(sys.argv[2:3]) - if not hasattr(self, args.command): + # Convert dashes to underscores for method lookup (e.g., batch-plan -> batch_plan) + command = args.command.replace("-", "_") + if not hasattr(self, command): parser.print_help() exit(0) try: - getattr(self, args.command)() + getattr(self, command)() except Exception as e: logger.error(e) exit(1) @@ -448,3 +461,264 @@ def _print_report_summary( print(f"- {action}") if benchmark_out: print(f"Benchmark report written to {benchmark_out}") + + # ------------------------------------------------------------------------- + # Batch migration commands + # ------------------------------------------------------------------------- + + def batch_plan(self): + """Generate a batch migration plan for multiple indexes.""" + parser = argparse.ArgumentParser( + usage=( + "rvl migrate batch-plan --schema-patch " + "(--pattern | --indexes | --indexes-file )" + ) + ) + parser.add_argument( + "--schema-patch", help="Path to shared schema patch file", required=True + ) + parser.add_argument( + "--pattern", help="Glob pattern to match index names (e.g., '*_idx')" + ) + parser.add_argument("--indexes", help="Comma-separated list of index names") + parser.add_argument( + "--indexes-file", help="File with index names (one per line)" + ) + parser.add_argument( + "--failure-policy", + help="How to handle failures: fail_fast or continue_on_error", + choices=["fail_fast", "continue_on_error"], + default="fail_fast", + ) + parser.add_argument( + "--plan-out", + help="Path to write batch_plan.yaml", + default="batch_plan.yaml", + ) + parser = add_redis_connection_options(parser) + args = parser.parse_args(sys.argv[3:]) + + redis_url = create_redis_url(args) + indexes = args.indexes.split(",") if args.indexes else None + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=indexes, + pattern=args.pattern, + indexes_file=args.indexes_file, + schema_patch_path=args.schema_patch, + redis_url=redis_url, + failure_policy=args.failure_policy, + ) + + planner.write_batch_plan(batch_plan, args.plan_out) + self._print_batch_plan_summary(args.plan_out, batch_plan) + + def batch_apply(self): + """Execute a batch migration plan with checkpointing.""" + parser = argparse.ArgumentParser( + usage=( + "rvl migrate batch-apply --plan --allow-downtime " + "[--state ] [--report-dir <./reports>]" + ) + ) + parser.add_argument("--plan", help="Path to batch_plan.yaml", required=True) + parser.add_argument( + "--allow-downtime", + help="Explicitly acknowledge downtime for drop_recreate", + action="store_true", + ) + parser.add_argument( + "--accept-data-loss", + help="Acknowledge that quantization is lossy and cannot be reverted", + action="store_true", + ) + parser.add_argument( + "--state", + help="Path to checkpoint state file", + default="batch_state.yaml", + ) + parser.add_argument( + "--report-dir", + help="Directory for per-index migration reports", + default="./reports", + ) + parser = add_redis_connection_options(parser) + args = parser.parse_args(sys.argv[3:]) + + if not args.allow_downtime: + raise ValueError( + "batch-apply requires --allow-downtime for drop_recreate migrations" + ) + + # Load batch plan + from redisvl.migration.models import BatchPlan + + plan_data = load_yaml(args.plan) + batch_plan = BatchPlan.model_validate(plan_data) + + # Check for quantization warning + if batch_plan.requires_quantization and not args.accept_data_loss: + print( + """WARNING: This batch migration includes quantization (e.g., float32 -> float16). + Vector data will be modified. Original precision cannot be recovered. + To proceed, add --accept-data-loss flag. + + If you need to preserve original vectors, backup your data first: + redis-cli BGSAVE""" + ) + return + + redis_url = create_redis_url(args) + executor = BatchMigrationExecutor() + + def progress_callback( + index_name: str, position: int, total: int, status: str + ) -> None: + print(f"[{position}/{total}] {index_name}: {status}") + + report = executor.apply( + batch_plan, + state_path=args.state, + report_dir=args.report_dir, + redis_url=redis_url, + progress_callback=progress_callback, + ) + + self._print_batch_report_summary(report) + + def batch_resume(self): + """Resume an interrupted batch migration.""" + parser = argparse.ArgumentParser( + usage=( + "rvl migrate batch-resume --state " + "[--plan ] [--retry-failed]" + ) + ) + parser.add_argument( + "--state", help="Path to checkpoint state file", required=True + ) + parser.add_argument( + "--plan", help="Path to batch_plan.yaml (optional, uses state.plan_path)" + ) + parser.add_argument( + "--retry-failed", + help="Retry previously failed indexes", + action="store_true", + ) + parser.add_argument( + "--report-dir", + help="Directory for per-index migration reports", + default="./reports", + ) + parser = add_redis_connection_options(parser) + args = parser.parse_args(sys.argv[3:]) + + redis_url = create_redis_url(args) + executor = BatchMigrationExecutor() + + def progress_callback( + index_name: str, position: int, total: int, status: str + ) -> None: + print(f"[{position}/{total}] {index_name}: {status}") + + report = executor.resume( + args.state, + batch_plan_path=args.plan, + retry_failed=args.retry_failed, + report_dir=args.report_dir, + redis_url=redis_url, + progress_callback=progress_callback, + ) + + self._print_batch_report_summary(report) + + def batch_status(self): + """Show status of an in-progress or completed batch migration.""" + parser = argparse.ArgumentParser( + usage="rvl migrate batch-status --state " + ) + parser.add_argument( + "--state", help="Path to checkpoint state file", required=True + ) + args = parser.parse_args(sys.argv[3:]) + + state_path = Path(args.state).resolve() + if not state_path.exists(): + print(f"State file not found: {args.state}") + return + + from redisvl.migration.models import BatchState + + state_data = load_yaml(args.state) + state = BatchState.model_validate(state_data) + + print(f"Batch ID: {state.batch_id}") + print(f"Started at: {state.started_at}") + print(f"Updated at: {state.updated_at}") + print(f"Current index: {state.current_index or '(none)'}") + print(f"Remaining: {len(state.remaining)}") + print(f"Completed: {len(state.completed)}") + print(f" - Succeeded: {state.success_count}") + print(f" - Failed: {state.failed_count}") + + if state.completed: + print("\nCompleted indexes:") + for idx in state.completed: + status_icon = "[OK]" if idx.status == "succeeded" else "[FAIL]" + print(f" {status_icon} {idx.name}") + if idx.error: + print(f" Error: {idx.error}") + + if state.remaining: + print(f"\nRemaining indexes ({len(state.remaining)}):") + for name in state.remaining[:10]: + print(f" - {name}") + if len(state.remaining) > 10: + print(f" ... and {len(state.remaining) - 10} more") + + def _print_batch_plan_summary(self, plan_out: str, batch_plan) -> None: + """Print summary after generating batch plan.""" + import os + + abs_path = os.path.abspath(plan_out) + print(f"Batch plan written to {abs_path}") + print(f"Batch ID: {batch_plan.batch_id}") + print(f"Mode: {batch_plan.mode}") + print(f"Failure policy: {batch_plan.failure_policy}") + print(f"Requires quantization: {batch_plan.requires_quantization}") + print(f"Total indexes: {len(batch_plan.indexes)}") + print(f" - Applicable: {batch_plan.applicable_count}") + print(f" - Skipped: {batch_plan.skipped_count}") + + if batch_plan.skipped_count > 0: + print("\nSkipped indexes:") + for idx in batch_plan.indexes: + if not idx.applicable: + print(f" - {idx.name}: {idx.skip_reason}") + + print( + f""" +Next steps: + Review the plan: cat {plan_out} + Apply the migration: rvl migrate batch-apply --plan {plan_out} --allow-downtime""" + ) + + if batch_plan.requires_quantization: + print(" (add --accept-data-loss for quantization)") + + def _print_batch_report_summary(self, report) -> None: + """Print summary after batch migration completes.""" + print(f"\nBatch migration {report.status}") + print(f"Batch ID: {report.batch_id}") + print(f"Duration: {report.summary.total_duration_seconds}s") + print(f"Total: {report.summary.total_indexes}") + print(f" - Succeeded: {report.summary.successful}") + print(f" - Failed: {report.summary.failed}") + print(f" - Skipped: {report.summary.skipped}") + + if report.summary.failed > 0: + print("\nFailed indexes:") + for idx in report.indexes: + if idx.status == "failed": + print(f" - {idx.name}: {idx.error}") diff --git a/redisvl/migration/__init__.py b/redisvl/migration/__init__.py index 4097d02c..6f55bdf6 100644 --- a/redisvl/migration/__init__.py +++ b/redisvl/migration/__init__.py @@ -6,8 +6,17 @@ async_wait_for_index_ready, ) from redisvl.migration.async_validation import AsyncMigrationValidator +from redisvl.migration.batch_executor import BatchMigrationExecutor +from redisvl.migration.batch_planner import BatchMigrationPlanner from redisvl.migration.executor import MigrationExecutor -from redisvl.migration.models import MigrationPlan, MigrationReport, SchemaPatch +from redisvl.migration.models import ( + BatchPlan, + BatchReport, + BatchState, + MigrationPlan, + MigrationReport, + SchemaPatch, +) from redisvl.migration.planner import MigrationPlanner from redisvl.migration.validation import MigrationValidator from redisvl.migration.wizard import MigrationWizard @@ -21,6 +30,12 @@ "MigrationValidator", "MigrationWizard", "SchemaPatch", + # Batch + "BatchMigrationExecutor", + "BatchMigrationPlanner", + "BatchPlan", + "BatchReport", + "BatchState", # Async "AsyncMigrationExecutor", "AsyncMigrationPlanner", diff --git a/redisvl/migration/models.py b/redisvl/migration/models.py index 9feda0c5..0ce8c0a9 100644 --- a/redisvl/migration/models.py +++ b/redisvl/migration/models.py @@ -118,3 +118,106 @@ class MigrationReport(BaseModel): ) warnings: List[str] = Field(default_factory=list) manual_actions: List[str] = Field(default_factory=list) + + +# ----------------------------------------------------------------------------- +# Batch Migration Models +# ----------------------------------------------------------------------------- + + +class BatchIndexEntry(BaseModel): + """Entry for a single index in a batch migration plan.""" + + name: str + applicable: bool = True + skip_reason: Optional[str] = None + + +class BatchPlan(BaseModel): + """Plan for migrating multiple indexes with a shared patch.""" + + version: int = 1 + batch_id: str + mode: str = "drop_recreate" + failure_policy: str = "fail_fast" # or "continue_on_error" + requires_quantization: bool = False + shared_patch: SchemaPatch + indexes: List[BatchIndexEntry] = Field(default_factory=list) + created_at: str + + @property + def applicable_count(self) -> int: + return sum(1 for idx in self.indexes if idx.applicable) + + @property + def skipped_count(self) -> int: + return sum(1 for idx in self.indexes if not idx.applicable) + + +class BatchIndexState(BaseModel): + """State of a single index in batch execution.""" + + name: str + status: str # pending, in_progress, success, failed, skipped + started_at: Optional[str] = None + completed_at: Optional[str] = None + failed_at: Optional[str] = None + error: Optional[str] = None + report_path: Optional[str] = None + + +class BatchState(BaseModel): + """Checkpoint state for batch migration execution.""" + + batch_id: str + plan_path: str + started_at: str + updated_at: str + completed: List[BatchIndexState] = Field(default_factory=list) + current_index: Optional[str] = None + remaining: List[str] = Field(default_factory=list) + + @property + def success_count(self) -> int: + return sum(1 for idx in self.completed if idx.status == "success") + + @property + def failed_count(self) -> int: + return sum(1 for idx in self.completed if idx.status == "failed") + + @property + def is_complete(self) -> bool: + return len(self.remaining) == 0 and self.current_index is None + + +class BatchReportSummary(BaseModel): + """Summary statistics for batch migration.""" + + total_indexes: int = 0 + successful: int = 0 + failed: int = 0 + skipped: int = 0 + total_duration_seconds: float = 0.0 + + +class BatchIndexReport(BaseModel): + """Report for a single index in batch execution.""" + + name: str + status: str # success, failed, skipped + duration_seconds: Optional[float] = None + docs_migrated: Optional[int] = None + report_path: Optional[str] = None + error: Optional[str] = None + + +class BatchReport(BaseModel): + """Final report for batch migration execution.""" + + version: int = 1 + batch_id: str + status: str # completed, partial_failure, failed + summary: BatchReportSummary = Field(default_factory=BatchReportSummary) + indexes: List[BatchIndexReport] = Field(default_factory=list) + started_at: str + completed_at: str From 956109482964d916853f0bc6ae0693fa4038faff Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Fri, 20 Mar 2026 17:52:22 -0400 Subject: [PATCH 10/10] feat(migrate): optimize doc enumeration and simplify CLI Document Enumeration Optimization: - Use FT.AGGREGATE WITHCURSOR for efficient key enumeration - Falls back to SCAN only when index has hash_indexing_failures - Pre-enumerate keys before drop for reliable re-indexing CLI Simplification: - Remove redundant --allow-downtime flag from apply/batch-apply - Plan review is now the safety mechanism Batch Migration: - Add BatchMigrationExecutor and BatchMigrationPlanner - Support for multi-index migration with failure policies - Resumable batch operations with state persistence Bug Fixes: - Fix mypy type errors in planner, wizard, validation, and CLI Documentation: - Update concepts and how-to guides for new workflow - Remove --allow-downtime references from all docs --- docs/concepts/index-migrations.md | 32 +- docs/user_guide/cli.ipynb | 2 +- .../how_to_guides/migrate-indexes.md | 65 +- redisvl/cli/migrate.py | 37 +- redisvl/migration/async_executor.py | 255 ++- redisvl/migration/batch_executor.py | 340 ++++ redisvl/migration/batch_planner.py | 226 +++ redisvl/migration/executor.py | 271 +++- redisvl/migration/planner.py | 2 + redisvl/migration/validation.py | 5 +- redisvl/migration/wizard.py | 1 + .../test_batch_migration_integration.py | 486 ++++++ tests/unit/test_batch_migration.py | 1366 +++++++++++++++++ 13 files changed, 2920 insertions(+), 168 deletions(-) create mode 100644 redisvl/migration/batch_executor.py create mode 100644 redisvl/migration/batch_planner.py create mode 100644 tests/integration/test_batch_migration_integration.py create mode 100644 tests/unit/test_batch_migration.py diff --git a/docs/concepts/index-migrations.md b/docs/concepts/index-migrations.md index dd9bc834..1023246a 100644 --- a/docs/concepts/index-migrations.md +++ b/docs/concepts/index-migrations.md @@ -56,7 +56,7 @@ The process: 4. Wait for Redis to re-index the existing documents 5. Validate the result -**Tradeoff**: The index is unavailable during the rebuild. The migrator requires explicit acknowledgment of this downtime before proceeding. +**Tradeoff**: The index is unavailable during the rebuild. Review the migration plan carefully before applying. ## Index only vs document dependent changes @@ -130,7 +130,16 @@ Adding a vector field means all existing documents need vectors for that field. ## Downtime considerations -With `drop_recreate`, your index is unavailable between the drop and when re-indexing completes. Plan for: +c lWith `drop_recreate`, your index is unavailable between the drop and when re-indexing completes. + +**CRITICAL**: Downtime requires both reads AND writes to be paused: + +| Requirement | Reason | +|-------------|--------| +| **Pause reads** | Index is unavailable during migration | +| **Pause writes** | Redis updates indexes synchronously. Writes during migration may conflict with vector re-encoding or be missed | + +Plan for: - Search unavailability during the migration window - Partial results while indexing is in progress @@ -151,8 +160,9 @@ The migration workflow has distinct phases. Here is what each mode affects: |-------|-----------|------------|-------| | **Plan generation** | `MigrationPlanner.create_plan()` | `AsyncMigrationPlanner.create_plan()` | Reads index metadata from Redis | | **Schema snapshot** | Sync Redis calls | Async Redis calls | Single `FT.INFO` command | +| **Enumeration** | FT.AGGREGATE (or SCAN fallback) | FT.AGGREGATE (or SCAN fallback) | Before drop, only if quantization needed | | **Drop index** | `index.delete()` | `await index.delete()` | Single `FT.DROPINDEX` command | -| **Quantization** | Sequential SCAN + HSET | Pipelined SCAN + batched HSET | See below | +| **Quantization** | Sequential HGET + HSET | Pipelined HGET + batched HSET | Uses pre-enumerated keys | | **Create index** | `index.create()` | `await index.create()` | Single `FT.CREATE` command | | **Readiness polling** | `time.sleep()` loop | `asyncio.sleep()` loop | Polls `FT.INFO` until indexed | | **Validation** | Sync Redis calls | Async Redis calls | Schema and doc count checks | @@ -177,13 +187,13 @@ Async execution (`--async` flag) provides benefits in specific scenarios: Converting float32 to float16 requires reading every vector, converting it, and writing it back. The async executor: -- Uses `SCAN` with `COUNT 500` to iterate keys without blocking Redis (per [Redis SCAN docs](https://redis.io/docs/latest/commands/scan/), SCAN is O(1) per call) +- Enumerates documents using `FT.AGGREGATE WITHCURSOR` for index-specific enumeration (falls back to `SCAN` only if indexing failures exist) - Pipelines `HSET` operations in batches (100-1000 operations per pipeline is optimal for Redis) - Yields to the event loop between batches so other tasks can proceed **Large keyspaces (40M+ keys)** -When your Redis instance has many keys, `SCAN` iteration can take minutes. Async mode yields between batches. +When your Redis instance has many keys and the index has indexing failures (requiring SCAN fallback), async mode yields between batches. **Async application integration** @@ -205,12 +215,18 @@ asyncio.run(migrate()) ### Why async helps with quantization -The key difference is in the vector re-encoding loop: +The migrator uses an optimized enumeration strategy: + +1. **Index-based enumeration**: Uses `FT.AGGREGATE WITHCURSOR` to enumerate only indexed documents (not the entire keyspace) +2. **Fallback for safety**: If the index has indexing failures (`hash_indexing_failures > 0`), falls back to `SCAN` to ensure completeness +3. **Enumerate before drop**: Captures the document list while the index still exists, then drops and quantizes + +This optimization provides 10-1000x speedup for sparse indexes (where only a small fraction of prefix-matching keys are indexed). **Sync quantization:** ``` +enumerate keys (FT.AGGREGATE or SCAN) -> store list for each batch of 500 keys: - SCAN (blocks) -> get keys for each key: HGET field (blocks) convert array @@ -220,8 +236,8 @@ for each batch of 500 keys: **Async quantization:** ``` +enumerate keys (FT.AGGREGATE or SCAN) -> store list for each batch of 500 keys: - await SCAN -> get keys (yields) for each key: await HGET field (yields) convert array diff --git a/docs/user_guide/cli.ipynb b/docs/user_guide/cli.ipynb index dc9377d4..8c698fb8 100644 --- a/docs/user_guide/cli.ipynb +++ b/docs/user_guide/cli.ipynb @@ -54,7 +54,7 @@ "| `rvl migrate` | `helper` or `list` | show migration guidance and list indexes available for migration|\n", "| `rvl migrate` | `wizard` | interactively build a migration plan and schema patch|\n", "| `rvl migrate` | `plan` | generate `migration_plan.yaml` from a patch or target schema|\n", - "| `rvl migrate` | `apply --allow-downtime` | execute a reviewed `drop_recreate` migration|\n", + "| `rvl migrate` | `apply` | execute a reviewed `drop_recreate` migration|\n", "| `rvl migrate` | `validate` | validate a completed migration and emit report artifacts|" ] }, diff --git a/docs/user_guide/how_to_guides/migrate-indexes.md b/docs/user_guide/how_to_guides/migrate-indexes.md index 54240cf8..30605a85 100644 --- a/docs/user_guide/how_to_guides/migrate-indexes.md +++ b/docs/user_guide/how_to_guides/migrate-indexes.md @@ -21,7 +21,7 @@ rvl migrate list --url redis://localhost:6379 rvl migrate wizard --index myindex --url redis://localhost:6379 # 3. Apply the migration -rvl migrate apply --plan migration_plan.yaml --allow-downtime --url redis://localhost:6379 +rvl migrate apply --plan migration_plan.yaml --url redis://localhost:6379 # 4. Verify the result rvl migrate validate --plan migration_plan.yaml --url redis://localhost:6379 @@ -266,14 +266,45 @@ merged_target_schema: - `diff_classification.blocked_reasons` - Must be empty - `merged_target_schema` - The final schema after migration +## Understanding Downtime Requirements + +**CRITICAL**: During a `drop_recreate` migration, your application must: + +| Requirement | Description | +|-------------|-------------| +| **Pause reads** | Index is unavailable during migration | +| **Pause writes** | Writes during migration may be missed or cause conflicts | + +### Why Both Reads AND Writes Must Be Paused + +- **Reads**: The index definition is dropped and recreated. Any queries during this window will fail. +- **Writes**: Redis updates indexes synchronously on every write. If your app writes documents while the index is dropped, those writes are not indexed. Additionally, if you're quantizing vectors (float32 → float16), concurrent writes may conflict with the migration's re-encoding process. + +### What "Downtime" Means + +| Downtime Type | Reads | Writes | Safe? | +|---------------|-------|--------|-------| +| Full quiesce (recommended) | Stopped | Stopped | **YES** | +| Read-only pause | Stopped | Continuing | **NO** | +| Active | Active | Active | **NO** | + +### Recovery from Interrupted Migration + +| Interruption Point | Documents | Index | Recovery | +|--------------------|-----------|-------|----------| +| After drop, before quantize | Unchanged | **None** | Re-run apply | +| After quantization, before create | Quantized | **None** | Manual FT.CREATE or re-run apply | +| After create | Correct | Rebuilding | Wait for index ready | + +The underlying documents are **never deleted** by `drop_recreate` mode. + ## Step 4: Apply the Migration -The `apply` command requires `--allow-downtime` since the index will be temporarily unavailable. +The `apply` command executes the migration. The index will be temporarily unavailable during the drop-recreate process. ```bash rvl migrate apply \ --plan migration_plan.yaml \ - --allow-downtime \ --url redis://localhost:6379 \ --report-out migration_report.yaml \ --benchmark-out benchmark_report.yaml @@ -296,14 +327,13 @@ For large migrations (especially those involving vector quantization), use the ` ```bash rvl migrate apply \ --plan migration_plan.yaml \ - --allow-downtime \ --async \ --url redis://localhost:6379 ``` **What becomes async:** -- Keyspace SCAN during quantization (yields between batches of 500 keys) +- Document enumeration during quantization (uses `FT.AGGREGATE WITHCURSOR` for index-specific enumeration, falling back to SCAN only if indexing failures exist) - Vector read/write operations (pipelined HGET/HSET) - Index readiness polling (uses `asyncio.sleep()` instead of blocking) - Validation checks @@ -388,7 +418,6 @@ rvl migrate validate \ - `--url` : Redis connection URL - `--index` : Index name to migrate - `--plan` / `--plan-out` : Path to migration plan -- `--allow-downtime` : Acknowledge index unavailability (required for apply) - `--async` : Use async executor for large migrations (apply only) - `--report-out` : Path for validation report - `--benchmark-out` : Path for performance metrics @@ -504,7 +533,6 @@ rvl migrate batch-plan \ # 3. Apply the batch plan rvl migrate batch-apply \ --plan batch_plan.yaml \ - --allow-downtime \ --accept-data-loss \ --url redis://localhost:6379 @@ -587,26 +615,31 @@ created_at: "2026-03-20T10:00:00Z" # Apply with fail-fast (default: stop on first error) rvl migrate batch-apply \ --plan batch_plan.yaml \ - --allow-downtime \ --accept-data-loss \ --url redis://localhost:6379 -# Apply with continue-on-error (process all possible indexes) +# Apply with continue-on-error (set at batch-plan time) +# Note: failure_policy is set during batch-plan, not batch-apply +rvl migrate batch-plan \ + --pattern "*_idx" \ + --schema-patch quantize_patch.yaml \ + --failure-policy continue_on_error \ + --output batch_plan.yaml \ + --url redis://localhost:6379 + rvl migrate batch-apply \ --plan batch_plan.yaml \ - --allow-downtime \ --accept-data-loss \ - --failure-policy continue_on_error \ --url redis://localhost:6379 ``` -**Flags:** -- `--allow-downtime` : Required (each index is temporarily unavailable during migration) +**Flags for batch-apply:** - `--accept-data-loss` : Required when quantizing vectors (float32 → float16 is lossy) -- `--failure-policy` : `fail_fast` (default) or `continue_on_error` - `--state` : Path to checkpoint file (default: `batch_state.yaml`) - `--report-dir` : Directory for per-index reports (default: `./reports/`) +**Note:** `--failure-policy` is set during `batch-plan`, not `batch-apply`. The policy is stored in the batch plan file. + ### Resume After Failure Batch migration automatically checkpoints progress. If interrupted: @@ -615,14 +648,12 @@ Batch migration automatically checkpoints progress. If interrupted: # Resume from where it left off rvl migrate batch-resume \ --state batch_state.yaml \ - --allow-downtime \ --url redis://localhost:6379 # Retry previously failed indexes rvl migrate batch-resume \ --state batch_state.yaml \ --retry-failed \ - --allow-downtime \ --url redis://localhost:6379 ``` @@ -686,7 +717,7 @@ from redisvl.migration import BatchMigrationPlanner, BatchMigrationExecutor # Create batch plan planner = BatchMigrationPlanner() -batch_plan = planner.create_plan( +batch_plan = planner.create_batch_plan( redis_url="redis://localhost:6379", pattern="*_idx", schema_patch_path="quantize_patch.yaml", diff --git a/redisvl/cli/migrate.py b/redisvl/cli/migrate.py index d65ecd05..0f2aa52e 100644 --- a/redisvl/cli/migrate.py +++ b/redisvl/cli/migrate.py @@ -19,7 +19,6 @@ load_yaml, write_benchmark_report, write_migration_report, - write_yaml, ) from redisvl.migration.wizard import MigrationWizard from redisvl.utils.log import get_logger @@ -105,7 +104,7 @@ def helper(self): rvl migrate list List all indexes rvl migrate wizard --index Guided migration builder rvl migrate plan --index --schema-patch - rvl migrate apply --plan --allow-downtime + rvl migrate apply --plan rvl migrate validate --plan """ ) @@ -211,16 +210,11 @@ def wizard(self): def apply(self): parser = argparse.ArgumentParser( usage=( - "rvl migrate apply --plan --allow-downtime " + "rvl migrate apply --plan " "[--async] [--report-out ]" ) ) parser.add_argument("--plan", help="Path to migration_plan.yaml", required=True) - parser.add_argument( - "--allow-downtime", - help="Explicitly acknowledge downtime for drop_recreate", - action="store_true", - ) parser.add_argument( "--async", dest="use_async", @@ -245,11 +239,6 @@ def apply(self): parser = add_redis_connection_options(parser) args = parser.parse_args(sys.argv[3:]) - if not args.allow_downtime: - raise ValueError( - "apply requires --allow-downtime for drop_recreate migrations" - ) - redis_url = create_redis_url(args) plan = load_migration_plan(args.plan) @@ -271,7 +260,7 @@ def _apply_sync(self, plan, redis_url: str, query_check_file: Optional[str]): print(f"\nApplying migration to '{plan.source.index_name}'...") - def progress_callback(step: str, detail: str) -> None: + def progress_callback(step: str, detail: Optional[str]) -> None: step_labels = { "drop": "[1/5] Drop index", "quantize": "[2/5] Quantize vectors", @@ -301,7 +290,7 @@ async def _apply_async(self, plan, redis_url: str, query_check_file: Optional[st print(f"\nApplying migration to '{plan.source.index_name}' (async mode)...") - def progress_callback(step: str, detail: str) -> None: + def progress_callback(step: str, detail: Optional[str]) -> None: step_labels = { "drop": "[1/5] Drop index", "quantize": "[2/5] Quantize vectors", @@ -427,9 +416,7 @@ def _print_plan_summary(self, plan_out: str, plan) -> None: print("\nNext steps:") print(f" Review the plan: cat {plan_out}") - print( - f" Apply the migration: rvl migrate apply --plan {plan_out} --allow-downtime" - ) + print(f" Apply the migration: rvl migrate apply --plan {plan_out}") print(f" Validate the result: rvl migrate validate --plan {plan_out}") print( f"\nTo add more changes: rvl migrate wizard --index {plan.source.index_name} --patch schema_patch.yaml" @@ -518,16 +505,11 @@ def batch_apply(self): """Execute a batch migration plan with checkpointing.""" parser = argparse.ArgumentParser( usage=( - "rvl migrate batch-apply --plan --allow-downtime " + "rvl migrate batch-apply --plan " "[--state ] [--report-dir <./reports>]" ) ) parser.add_argument("--plan", help="Path to batch_plan.yaml", required=True) - parser.add_argument( - "--allow-downtime", - help="Explicitly acknowledge downtime for drop_recreate", - action="store_true", - ) parser.add_argument( "--accept-data-loss", help="Acknowledge that quantization is lossy and cannot be reverted", @@ -546,11 +528,6 @@ def batch_apply(self): parser = add_redis_connection_options(parser) args = parser.parse_args(sys.argv[3:]) - if not args.allow_downtime: - raise ValueError( - "batch-apply requires --allow-downtime for drop_recreate migrations" - ) - # Load batch plan from redisvl.migration.models import BatchPlan @@ -701,7 +678,7 @@ def _print_batch_plan_summary(self, plan_out: str, batch_plan) -> None: f""" Next steps: Review the plan: cat {plan_out} - Apply the migration: rvl migrate batch-apply --plan {plan_out} --allow-downtime""" + Apply the migration: rvl migrate batch-apply --plan {plan_out}""" ) if batch_plan.requires_quantization: diff --git a/redisvl/migration/async_executor.py b/redisvl/migration/async_executor.py index 835b9c81..fed67918 100644 --- a/redisvl/migration/async_executor.py +++ b/redisvl/migration/async_executor.py @@ -3,7 +3,9 @@ import asyncio import logging import time -from typing import Any, Callable, Dict, Optional +from typing import Any, AsyncGenerator, Callable, Dict, List, Optional + +from redis.exceptions import ResponseError from redisvl.index import AsyncSearchIndex from redisvl.migration.async_planner import AsyncMigrationPlanner @@ -33,6 +35,148 @@ class AsyncMigrationExecutor: def __init__(self, validator: Optional[AsyncMigrationValidator] = None): self.validator = validator or AsyncMigrationValidator() + async def _enumerate_indexed_keys( + self, + client: AsyncRedisClient, + index_name: str, + batch_size: int = 1000, + ) -> AsyncGenerator[str, None]: + """Async version: Enumerate document keys using FT.AGGREGATE with SCAN fallback. + + Uses FT.AGGREGATE WITHCURSOR for efficient enumeration when the index + has no indexing failures. Falls back to SCAN if: + - Index has hash_indexing_failures > 0 (would miss failed docs) + - FT.AGGREGATE command fails for any reason + """ + # Check for indexing failures - if any, fall back to SCAN + try: + info = await client.ft(index_name).info() + failures = int(info.get("hash_indexing_failures", 0) or 0) + if failures > 0: + logger.warning( + f"Index '{index_name}' has {failures} indexing failures. " + "Using SCAN for complete enumeration." + ) + async for key in self._enumerate_with_scan( + client, index_name, batch_size + ): + yield key + return + except Exception as e: + logger.warning(f"Failed to check index info: {e}. Using SCAN fallback.") + async for key in self._enumerate_with_scan(client, index_name, batch_size): + yield key + return + + # Try FT.AGGREGATE enumeration + try: + async for key in self._enumerate_with_aggregate( + client, index_name, batch_size + ): + yield key + except ResponseError as e: + logger.warning( + f"FT.AGGREGATE failed: {e}. Falling back to SCAN enumeration." + ) + async for key in self._enumerate_with_scan(client, index_name, batch_size): + yield key + + async def _enumerate_with_aggregate( + self, + client: AsyncRedisClient, + index_name: str, + batch_size: int = 1000, + ) -> AsyncGenerator[str, None]: + """Async version: Enumerate keys using FT.AGGREGATE WITHCURSOR.""" + cursor_id: Optional[int] = None + + try: + # Initial aggregate call with LOAD 1 __key + result = await client.execute_command( + "FT.AGGREGATE", + index_name, + "*", + "LOAD", + "1", + "__key", + "WITHCURSOR", + "COUNT", + str(batch_size), + ) + + while True: + results_data, cursor_id = result + + # Extract keys from results + for item in results_data[1:]: + if isinstance(item, (list, tuple)) and len(item) >= 2: + key = item[1] + yield key.decode() if isinstance(key, bytes) else str(key) + + if cursor_id == 0: + break + + result = await client.execute_command( + "FT.CURSOR", + "READ", + index_name, + str(cursor_id), + "COUNT", + str(batch_size), + ) + finally: + if cursor_id and cursor_id != 0: + try: + await client.execute_command( + "FT.CURSOR", "DEL", index_name, str(cursor_id) + ) + except Exception: + pass + + async def _enumerate_with_scan( + self, + client: AsyncRedisClient, + index_name: str, + batch_size: int = 1000, + ) -> AsyncGenerator[str, None]: + """Async version: Enumerate keys using SCAN with prefix matching.""" + # Get prefix from index info + try: + info = await client.ft(index_name).info() + if isinstance(info, dict): + prefixes = info.get("index_definition", {}).get("prefixes", []) + else: + prefixes = [] + for i, item in enumerate(info): + if item == b"index_definition" or item == "index_definition": + defn = info[i + 1] + if isinstance(defn, dict): + prefixes = defn.get("prefixes", []) + elif isinstance(defn, list): + for j, d in enumerate(defn): + if d in (b"prefixes", "prefixes") and j + 1 < len(defn): + prefixes = defn[j + 1] + break + prefix = prefixes[0] if prefixes else "" + if isinstance(prefix, bytes): + prefix = prefix.decode() + except Exception as e: + logger.warning(f"Failed to get prefix from index info: {e}") + prefix = "" + + cursor: int = 0 + while True: + cursor, keys = await client.scan( + cursor=cursor, + match=f"{prefix}*" if prefix else "*", + count=batch_size, + ) + for key in keys: + yield key.decode() if isinstance(key, bytes) else str(key) + + if cursor == 0: + break + async def apply( self, plan: MigrationPlan, @@ -97,12 +241,14 @@ async def apply( redis_client=redis_client, ) + enumerate_duration = 0.0 drop_duration = 0.0 quantize_duration = 0.0 recreate_duration = 0.0 indexing_duration = 0.0 target_info: Dict[str, Any] = {} docs_quantized = 0 + keys_to_process: List[str] = [] datatype_changes = AsyncMigrationPlanner.get_vector_datatype_changes( plan.source.schema_snapshot, plan.merged_target_schema @@ -113,19 +259,40 @@ def _notify(step: str, detail: Optional[str] = None) -> None: progress_callback(step, detail) try: + # STEP 1: Enumerate keys BEFORE dropping index (if quantization needed) + if datatype_changes: + _notify("enumerate", "Enumerating indexed documents...") + enumerate_started = time.perf_counter() + client = source_index._redis_client + if client is None: + raise ValueError("Failed to get Redis client from source index") + keys_to_process = [ + key + async for key in self._enumerate_indexed_keys( + client, plan.source.index_name, batch_size=1000 + ) + ] + enumerate_duration = round(time.perf_counter() - enumerate_started, 3) + _notify( + "enumerate", + f"found {len(keys_to_process):,} documents ({enumerate_duration}s)", + ) + + # STEP 2: Drop the index _notify("drop", "Dropping index definition...") drop_started = time.perf_counter() await source_index.delete(drop=False) drop_duration = round(time.perf_counter() - drop_started, 3) _notify("drop", f"done ({drop_duration}s)") - if datatype_changes: + # STEP 3: Re-encode vectors using pre-enumerated keys + if datatype_changes and keys_to_process: _notify("quantize", "Re-encoding vectors...") quantize_started = time.perf_counter() docs_quantized = await self._async_quantize_vectors( source_index, datatype_changes, - plan, + keys_to_process, progress_callback=lambda done, total: _notify( "quantize", f"{done:,}/{total:,} docs" ), @@ -236,66 +403,52 @@ async def _async_quantize_vectors( self, source_index: AsyncSearchIndex, datatype_changes: Dict[str, Dict[str, str]], - plan: MigrationPlan, + keys: List[str], progress_callback: Optional[Callable[[int, int], None]] = None, ) -> int: """Re-encode vectors in documents for datatype changes (quantization). - This is the async version that uses async pipeline operations for - better performance on large indexes. + Uses pre-enumerated keys (from _enumerate_indexed_keys) to process + only the documents that were in the index, avoiding full keyspace scan. + + Args: + source_index: The source AsyncSearchIndex (already dropped but client available) + datatype_changes: Dict mapping field_name -> {"source": dtype, "target": dtype} + keys: Pre-enumerated list of document keys to process + progress_callback: Optional callback(docs_done, total_docs) + + Returns: + Number of documents processed """ client = source_index._redis_client if client is None: raise ValueError("Failed to get Redis client from source index") - prefix = plan.source.schema_snapshot["index"]["prefix"] - storage_type = ( - plan.source.schema_snapshot["index"].get("storage_type", "hash").lower() - ) - estimated_total = int(plan.source.stats_snapshot.get("num_docs", 0) or 0) - + total_keys = len(keys) docs_processed = 0 batch_size = 500 - cursor: int = 0 - - while True: - cursor, keys = await client.scan( - cursor=cursor, - match=f"{prefix}*", - count=batch_size, - ) - - if keys: - pipe = client.pipeline() - keys_to_update = [] - - for key in keys: - if storage_type == "hash": - for field_name, change in datatype_changes.items(): - # hget returns bytes for binary data - field_data: bytes | None = await client.hget(key, field_name) # type: ignore[misc,assignment] - if field_data: - # field_data is bytes from Redis - array = buffer_to_array(field_data, change["source"]) - new_bytes = array_to_buffer(array, change["target"]) - pipe.hset( - key, field_name, new_bytes # type: ignore[arg-type] - ) - keys_to_update.append(key) - else: - logger.warning( - f"JSON storage quantization for key {key} - " - "vectors stored as arrays may not need re-encoding" - ) - - if keys_to_update: - await pipe.execute() - docs_processed += len(set(keys_to_update)) - if progress_callback: - progress_callback(docs_processed, estimated_total) - if cursor == 0: - break + for i in range(0, total_keys, batch_size): + batch = keys[i : i + batch_size] + pipe = client.pipeline() + keys_updated_in_batch: set[str] = set() + + for key in batch: + # Read all vector fields that need conversion + for field_name, change in datatype_changes.items(): + field_data: bytes | None = await client.hget(key, field_name) # type: ignore[misc,assignment] + if field_data: + # Convert: source dtype -> array -> target dtype -> bytes + array = buffer_to_array(field_data, change["source"]) + new_bytes = array_to_buffer(array, change["target"]) + pipe.hset(key, field_name, new_bytes) # type: ignore[arg-type] + keys_updated_in_batch.add(key) + + if keys_updated_in_batch: + await pipe.execute() + docs_processed += len(keys_updated_in_batch) + if progress_callback: + progress_callback(docs_processed, total_keys) logger.info(f"Quantized {docs_processed} documents: {datatype_changes}") return docs_processed diff --git a/redisvl/migration/batch_executor.py b/redisvl/migration/batch_executor.py new file mode 100644 index 00000000..b749caa4 --- /dev/null +++ b/redisvl/migration/batch_executor.py @@ -0,0 +1,340 @@ +"""Batch migration executor with checkpointing and resume support.""" + +from __future__ import annotations + +import time +from pathlib import Path +from typing import Any, Callable, Optional + +import yaml + +from redisvl.migration.executor import MigrationExecutor +from redisvl.migration.models import ( + BatchIndexReport, + BatchIndexState, + BatchPlan, + BatchReport, + BatchReportSummary, + BatchState, +) +from redisvl.migration.planner import MigrationPlanner +from redisvl.migration.utils import timestamp_utc, write_yaml +from redisvl.redis.connection import RedisConnectionFactory + + +class BatchMigrationExecutor: + """Executor for batch migration of multiple indexes. + + Supports: + - Sequential execution (one index at a time) + - Checkpointing for resume after failure + - Configurable failure policies (fail_fast, continue_on_error) + """ + + def __init__(self, executor: Optional[MigrationExecutor] = None): + self._single_executor = executor or MigrationExecutor() + self._planner = MigrationPlanner() + + def apply( + self, + batch_plan: BatchPlan, + *, + state_path: str = "batch_state.yaml", + report_dir: str = "./reports", + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + progress_callback: Optional[Callable[[str, int, int, str], None]] = None, + ) -> BatchReport: + """Execute batch migration with checkpointing. + + Args: + batch_plan: The batch plan to execute. + state_path: Path to checkpoint state file. + report_dir: Directory for per-index reports. + redis_url: Redis connection URL. + redis_client: Existing Redis client. + progress_callback: Optional callback(index_name, position, total, status). + + Returns: + BatchReport with results for all indexes. + """ + # Get Redis client + client = redis_client + if client is None: + if not redis_url: + raise ValueError("Must provide either redis_url or redis_client") + client = RedisConnectionFactory.get_redis_connection(redis_url=redis_url) + + # Ensure report directory exists + report_path = Path(report_dir).resolve() + report_path.mkdir(parents=True, exist_ok=True) + + # Initialize or load state + state = self._init_or_load_state(batch_plan, state_path) + started_at = state.started_at + batch_start_time = time.perf_counter() + + # Get applicable indexes + applicable_indexes = [idx for idx in batch_plan.indexes if idx.applicable] + total = len(applicable_indexes) + + # Process each remaining index + for position, index_name in enumerate(state.remaining[:], start=1): + state.current_index = index_name + state.updated_at = timestamp_utc() + self._write_state(state, state_path) + + if progress_callback: + progress_callback(index_name, position, total, "starting") + + # Find the index entry + index_entry = next( + (idx for idx in batch_plan.indexes if idx.name == index_name), None + ) + if not index_entry or not index_entry.applicable: + # Skip non-applicable indexes + state.remaining.remove(index_name) + state.completed.append( + BatchIndexState( + name=index_name, + status="skipped", + completed_at=timestamp_utc(), + ) + ) + continue + + # Execute migration for this index + index_state = self._migrate_single_index( + index_name=index_name, + batch_plan=batch_plan, + report_dir=report_path, + redis_client=client, + ) + + # Update state + state.remaining.remove(index_name) + state.completed.append(index_state) + state.current_index = None + state.updated_at = timestamp_utc() + self._write_state(state, state_path) + + if progress_callback: + progress_callback(index_name, position, total, index_state.status) + + # Check failure policy + if ( + index_state.status == "failed" + and batch_plan.failure_policy == "fail_fast" + ): + # Mark remaining as skipped + for remaining_name in state.remaining[:]: + state.remaining.remove(remaining_name) + state.completed.append( + BatchIndexState( + name=remaining_name, + status="skipped", + completed_at=timestamp_utc(), + ) + ) + state.updated_at = timestamp_utc() + self._write_state(state, state_path) + break + + # Build final report + total_duration = time.perf_counter() - batch_start_time + return self._build_batch_report(batch_plan, state, started_at, total_duration) + + def resume( + self, + state_path: str, + *, + batch_plan_path: Optional[str] = None, + retry_failed: bool = False, + report_dir: str = "./reports", + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + progress_callback: Optional[Callable[[str, int, int, str], None]] = None, + ) -> BatchReport: + """Resume batch migration from checkpoint. + + Args: + state_path: Path to checkpoint state file. + batch_plan_path: Path to batch plan (uses state.plan_path if not provided). + retry_failed: If True, retry previously failed indexes. + report_dir: Directory for per-index reports. + redis_url: Redis connection URL. + redis_client: Existing Redis client. + progress_callback: Optional callback(index_name, position, total, status). + """ + state = self._load_state(state_path) + plan_path = batch_plan_path or state.plan_path + batch_plan = self._load_batch_plan(plan_path) + + # Optionally retry failed indexes + if retry_failed: + failed_names = [ + idx.name for idx in state.completed if idx.status == "failed" + ] + state.remaining = failed_names + state.remaining + state.completed = [idx for idx in state.completed if idx.status != "failed"] + # Write updated state back to file so apply() picks up the changes + self._write_state(state, state_path) + + # Re-run apply with the updated state + return self.apply( + batch_plan, + state_path=state_path, + report_dir=report_dir, + redis_url=redis_url, + redis_client=redis_client, + progress_callback=progress_callback, + ) + + def _migrate_single_index( + self, + *, + index_name: str, + batch_plan: BatchPlan, + report_dir: Path, + redis_client: Any, + ) -> BatchIndexState: + """Execute migration for a single index.""" + try: + # Create migration plan for this index + plan = self._planner.create_plan_from_patch( + index_name, + schema_patch=batch_plan.shared_patch, + redis_client=redis_client, + ) + + # Execute migration + report = self._single_executor.apply( + plan, + redis_client=redis_client, + ) + + # Write individual report + report_file = report_dir / f"{index_name}_report.yaml" + write_yaml(report.model_dump(exclude_none=True), str(report_file)) + + return BatchIndexState( + name=index_name, + status="succeeded" if report.result == "succeeded" else "failed", + completed_at=timestamp_utc(), + report_path=str(report_file), + error=report.validation.errors[0] if report.validation.errors else None, + ) + + except Exception as e: + return BatchIndexState( + name=index_name, + status="failed", + completed_at=timestamp_utc(), + error=str(e), + ) + + def _init_or_load_state(self, batch_plan: BatchPlan, state_path: str) -> BatchState: + """Initialize new state or load existing checkpoint.""" + path = Path(state_path).resolve() + if path.exists(): + return self._load_state(state_path) + + # Create new state + applicable_names = [idx.name for idx in batch_plan.indexes if idx.applicable] + return BatchState( + batch_id=batch_plan.batch_id, + plan_path="", # Will be set by caller if needed + started_at=timestamp_utc(), + updated_at=timestamp_utc(), + remaining=applicable_names, + completed=[], + current_index=None, + ) + + def _write_state(self, state: BatchState, state_path: str) -> None: + """Write checkpoint state to file.""" + path = Path(state_path).resolve() + with open(path, "w") as f: + yaml.safe_dump(state.model_dump(exclude_none=True), f, sort_keys=False) + + def _load_state(self, state_path: str) -> BatchState: + """Load checkpoint state from file.""" + path = Path(state_path).resolve() + if not path.exists(): + raise FileNotFoundError(f"State file not found: {state_path}") + with open(path, "r") as f: + data = yaml.safe_load(f) or {} + return BatchState.model_validate(data) + + def _load_batch_plan(self, plan_path: str) -> BatchPlan: + """Load batch plan from file.""" + path = Path(plan_path).resolve() + if not path.exists(): + raise FileNotFoundError(f"Batch plan not found: {plan_path}") + with open(path, "r") as f: + data = yaml.safe_load(f) or {} + return BatchPlan.model_validate(data) + + def _build_batch_report( + self, + batch_plan: BatchPlan, + state: BatchState, + started_at: str, + total_duration: float, + ) -> BatchReport: + """Build final batch report from state.""" + index_reports = [] + succeeded = 0 + failed = 0 + skipped = 0 + + for idx_state in state.completed: + index_reports.append( + BatchIndexReport( + name=idx_state.name, + status=idx_state.status, + report_path=idx_state.report_path, + error=idx_state.error, + ) + ) + if idx_state.status == "succeeded": + succeeded += 1 + elif idx_state.status == "failed": + failed += 1 + else: + skipped += 1 + + # Add non-applicable indexes as skipped + for idx in batch_plan.indexes: + if not idx.applicable: + index_reports.append( + BatchIndexReport( + name=idx.name, + status="skipped", + error=idx.skip_reason, + ) + ) + skipped += 1 + + # Determine overall status + if failed == 0 and len(state.remaining) == 0: + status = "completed" + elif succeeded > 0: + status = "partial_failure" + else: + status = "failed" + + return BatchReport( + batch_id=batch_plan.batch_id, + status=status, + started_at=started_at, + completed_at=timestamp_utc(), + summary=BatchReportSummary( + total_indexes=len(batch_plan.indexes), + successful=succeeded, + failed=failed, + skipped=skipped, + total_duration_seconds=round(total_duration, 3), + ), + indexes=index_reports, + ) diff --git a/redisvl/migration/batch_planner.py b/redisvl/migration/batch_planner.py new file mode 100644 index 00000000..00a5d9c1 --- /dev/null +++ b/redisvl/migration/batch_planner.py @@ -0,0 +1,226 @@ +"""Batch migration planner for migrating multiple indexes with a shared patch.""" + +from __future__ import annotations + +import fnmatch +import uuid +from pathlib import Path +from typing import Any, List, Optional + +import yaml + +from redisvl.index import SearchIndex +from redisvl.migration.models import BatchIndexEntry, BatchPlan, SchemaPatch +from redisvl.migration.planner import MigrationPlanner +from redisvl.migration.utils import list_indexes, timestamp_utc +from redisvl.redis.connection import RedisConnectionFactory + + +class BatchMigrationPlanner: + """Planner for batch migration of multiple indexes with a shared patch. + + The batch planner applies a single SchemaPatch to multiple indexes, + checking applicability for each index based on field name matching. + """ + + def __init__(self): + self._single_planner = MigrationPlanner() + + def create_batch_plan( + self, + *, + indexes: Optional[List[str]] = None, + pattern: Optional[str] = None, + indexes_file: Optional[str] = None, + schema_patch_path: str, + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + failure_policy: str = "fail_fast", + ) -> BatchPlan: + """Create a batch migration plan for multiple indexes. + + Args: + indexes: Explicit list of index names. + pattern: Glob pattern to match index names (e.g., "*_idx"). + indexes_file: Path to file with index names (one per line). + schema_patch_path: Path to shared schema patch YAML file. + redis_url: Redis connection URL. + redis_client: Existing Redis client. + failure_policy: "fail_fast" or "continue_on_error". + + Returns: + BatchPlan with shared patch and per-index applicability. + """ + # Get Redis client + client = redis_client + if client is None: + if not redis_url: + raise ValueError("Must provide either redis_url or redis_client") + client = RedisConnectionFactory.get_redis_connection(redis_url=redis_url) + + # Resolve index list + index_names = self._resolve_index_names( + indexes=indexes, + pattern=pattern, + indexes_file=indexes_file, + redis_client=client, + ) + + if not index_names: + raise ValueError("No indexes found matching the specified criteria") + + # Load shared patch + shared_patch = self._single_planner.load_schema_patch(schema_patch_path) + + # Check applicability for each index + batch_entries: List[BatchIndexEntry] = [] + requires_quantization = False + + for index_name in index_names: + entry = self._check_index_applicability( + index_name=index_name, + shared_patch=shared_patch, + redis_client=client, + ) + batch_entries.append(entry) + + # Check if any applicable index requires quantization + if entry.applicable: + try: + plan = self._single_planner.create_plan_from_patch( + index_name, + schema_patch=shared_patch, + redis_client=client, + ) + datatype_changes = MigrationPlanner.get_vector_datatype_changes( + plan.source.schema_snapshot, + plan.merged_target_schema, + ) + if datatype_changes: + requires_quantization = True + except Exception: + pass # Already handled in applicability check + + batch_id = f"batch_{uuid.uuid4().hex[:12]}" + + return BatchPlan( + batch_id=batch_id, + mode="drop_recreate", + failure_policy=failure_policy, + requires_quantization=requires_quantization, + shared_patch=shared_patch, + indexes=batch_entries, + created_at=timestamp_utc(), + ) + + def _resolve_index_names( + self, + *, + indexes: Optional[List[str]], + pattern: Optional[str], + indexes_file: Optional[str], + redis_client: Any, + ) -> List[str]: + """Resolve index names from explicit list, pattern, or file.""" + sources = sum([bool(indexes), bool(pattern), bool(indexes_file)]) + if sources == 0: + raise ValueError("Must provide one of: indexes, pattern, or indexes_file") + if sources > 1: + raise ValueError("Provide only one of: indexes, pattern, or indexes_file") + + if indexes: + return list(indexes) + + if indexes_file: + return self._load_indexes_from_file(indexes_file) + + # Pattern matching + all_indexes = list_indexes(redis_client=redis_client) + matched = [idx for idx in all_indexes if fnmatch.fnmatch(idx, pattern)] + return sorted(matched) + + def _load_indexes_from_file(self, file_path: str) -> List[str]: + """Load index names from a file (one per line).""" + path = Path(file_path).resolve() + if not path.exists(): + raise FileNotFoundError(f"Indexes file not found: {file_path}") + + with open(path, "r") as f: + lines = f.readlines() + + return [ + line.strip() for line in lines if line.strip() and not line.startswith("#") + ] + + def _check_index_applicability( + self, + *, + index_name: str, + shared_patch: SchemaPatch, + redis_client: Any, + ) -> BatchIndexEntry: + """Check if the shared patch can be applied to a specific index.""" + try: + index = SearchIndex.from_existing(index_name, redis_client=redis_client) + schema_dict = index.schema.to_dict() + field_names = {f["name"] for f in schema_dict.get("fields", [])} + + # Check that all update_fields exist in this index + missing_fields = [] + for field_update in shared_patch.changes.update_fields: + if field_update.name not in field_names: + missing_fields.append(field_update.name) + + if missing_fields: + return BatchIndexEntry( + name=index_name, + applicable=False, + skip_reason=f"Missing fields: {', '.join(missing_fields)}", + ) + + # Check that add_fields don't already exist + existing_adds: list[str] = [] + for field in shared_patch.changes.add_fields: + field_name = field.get("name") + if field_name and field_name in field_names: + existing_adds.append(field_name) + + if existing_adds: + return BatchIndexEntry( + name=index_name, + applicable=False, + skip_reason=f"Fields already exist: {', '.join(existing_adds)}", + ) + + # Try creating a plan to check for blocked changes + plan = self._single_planner.create_plan_from_patch( + index_name, + schema_patch=shared_patch, + redis_client=redis_client, + ) + + if not plan.diff_classification.supported: + return BatchIndexEntry( + name=index_name, + applicable=False, + skip_reason=( + plan.diff_classification.blocked_reasons[0] + if plan.diff_classification.blocked_reasons + else "Unsupported changes" + ), + ) + + return BatchIndexEntry(name=index_name, applicable=True) + + except Exception as e: + return BatchIndexEntry( + name=index_name, + applicable=False, + skip_reason=str(e), + ) + + def write_batch_plan(self, batch_plan: BatchPlan, path: str) -> None: + """Write batch plan to YAML file.""" + plan_path = Path(path).resolve() + with open(plan_path, "w") as f: + yaml.safe_dump(batch_plan.model_dump(exclude_none=True), f, sort_keys=False) diff --git a/redisvl/migration/executor.py b/redisvl/migration/executor.py index 40e32319..a8e77d80 100644 --- a/redisvl/migration/executor.py +++ b/redisvl/migration/executor.py @@ -2,7 +2,9 @@ import logging import time -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Dict, Generator, List, Optional + +from redis.exceptions import ResponseError from redisvl.index import SearchIndex from redisvl.migration.models import ( @@ -20,6 +22,7 @@ ) from redisvl.migration.validation import MigrationValidator from redisvl.redis.utils import array_to_buffer, buffer_to_array +from redisvl.types import SyncRedisClient logger = logging.getLogger(__name__) @@ -28,6 +31,164 @@ class MigrationExecutor: def __init__(self, validator: Optional[MigrationValidator] = None): self.validator = validator or MigrationValidator() + def _enumerate_indexed_keys( + self, + client: SyncRedisClient, + index_name: str, + batch_size: int = 1000, + ) -> Generator[str, None, None]: + """Enumerate document keys using FT.AGGREGATE with SCAN fallback. + + Uses FT.AGGREGATE WITHCURSOR for efficient enumeration when the index + has no indexing failures. Falls back to SCAN if: + - Index has hash_indexing_failures > 0 (would miss failed docs) + - FT.AGGREGATE command fails for any reason + + Args: + client: Redis client + index_name: Name of the index to enumerate + batch_size: Number of keys per batch + + Yields: + Document keys as strings + """ + # Check for indexing failures - if any, fall back to SCAN + try: + info = client.ft(index_name).info() + failures = int(info.get("hash_indexing_failures", 0) or 0) + if failures > 0: + logger.warning( + f"Index '{index_name}' has {failures} indexing failures. " + "Using SCAN for complete enumeration." + ) + yield from self._enumerate_with_scan(client, index_name, batch_size) + return + except Exception as e: + logger.warning(f"Failed to check index info: {e}. Using SCAN fallback.") + yield from self._enumerate_with_scan(client, index_name, batch_size) + return + + # Try FT.AGGREGATE enumeration + try: + yield from self._enumerate_with_aggregate(client, index_name, batch_size) + except ResponseError as e: + logger.warning( + f"FT.AGGREGATE failed: {e}. Falling back to SCAN enumeration." + ) + yield from self._enumerate_with_scan(client, index_name, batch_size) + + def _enumerate_with_aggregate( + self, + client: SyncRedisClient, + index_name: str, + batch_size: int = 1000, + ) -> Generator[str, None, None]: + """Enumerate keys using FT.AGGREGATE WITHCURSOR. + + More efficient than SCAN for sparse indexes (only returns indexed docs). + Requires LOAD 1 __key to retrieve document keys. + """ + cursor_id: Optional[int] = None + + try: + # Initial aggregate call with LOAD 1 __key (not LOAD 0!) + result = client.execute_command( + "FT.AGGREGATE", + index_name, + "*", + "LOAD", + "1", + "__key", + "WITHCURSOR", + "COUNT", + str(batch_size), + ) + + while True: + results_data, cursor_id = result + + # Extract keys from results (skip first element which is count) + for item in results_data[1:]: + if isinstance(item, (list, tuple)) and len(item) >= 2: + key = item[1] + yield key.decode() if isinstance(key, bytes) else str(key) + + # Check if done (cursor_id == 0) + if cursor_id == 0: + break + + # Read next batch + result = client.execute_command( + "FT.CURSOR", + "READ", + index_name, + str(cursor_id), + "COUNT", + str(batch_size), + ) + finally: + # Clean up cursor if interrupted + if cursor_id and cursor_id != 0: + try: + client.execute_command( + "FT.CURSOR", "DEL", index_name, str(cursor_id) + ) + except Exception: + pass # Cursor may have expired + + def _enumerate_with_scan( + self, + client: SyncRedisClient, + index_name: str, + batch_size: int = 1000, + ) -> Generator[str, None, None]: + """Enumerate keys using SCAN with prefix matching. + + Fallback method that scans all keys matching the index prefix. + Less efficient but more complete (includes failed-to-index docs). + """ + # Get prefix from index info + try: + info = client.ft(index_name).info() + # Handle both dict and list formats from FT.INFO + if isinstance(info, dict): + prefixes = info.get("index_definition", {}).get("prefixes", []) + else: + # List format - find index_definition + prefixes = [] + for i, item in enumerate(info): + if item == b"index_definition" or item == "index_definition": + defn = info[i + 1] + if isinstance(defn, dict): + prefixes = defn.get("prefixes", []) + elif isinstance(defn, list): + for j, d in enumerate(defn): + if d in (b"prefixes", "prefixes") and j + 1 < len(defn): + prefixes = defn[j + 1] + break + prefix = prefixes[0] if prefixes else "" + if isinstance(prefix, bytes): + prefix = prefix.decode() + except Exception as e: + logger.warning(f"Failed to get prefix from index info: {e}") + prefix = "" + + if not prefix: + logger.warning("No prefix found for index, SCAN may return unexpected keys") + + cursor = 0 + while True: + cursor, keys = client.scan( # type: ignore[misc] + cursor=cursor, + match=f"{prefix}*" if prefix else "*", + count=batch_size, + ) + for key in keys: + yield key.decode() if isinstance(key, bytes) else str(key) + + if cursor == 0: + break + def apply( self, plan: MigrationPlan, @@ -94,12 +255,14 @@ def apply( redis_client=redis_client, ) + enumerate_duration = 0.0 drop_duration = 0.0 quantize_duration = 0.0 recreate_duration = 0.0 indexing_duration = 0.0 target_info: Dict[str, Any] = {} docs_quantized = 0 + keys_to_process: List[str] = [] # Check if we need to re-encode vectors for datatype changes datatype_changes = MigrationPlanner.get_vector_datatype_changes( @@ -111,20 +274,38 @@ def _notify(step: str, detail: Optional[str] = None) -> None: progress_callback(step, detail) try: + # STEP 1: Enumerate keys BEFORE dropping index (if quantization needed) + # This allows us to use FT.AGGREGATE for efficient enumeration + if datatype_changes: + _notify("enumerate", "Enumerating indexed documents...") + enumerate_started = time.perf_counter() + client = source_index._redis_client + keys_to_process = list( + self._enumerate_indexed_keys( + client, plan.source.index_name, batch_size=1000 + ) + ) + enumerate_duration = round(time.perf_counter() - enumerate_started, 3) + _notify( + "enumerate", + f"found {len(keys_to_process):,} documents ({enumerate_duration}s)", + ) + + # STEP 2: Drop the index _notify("drop", "Dropping index definition...") drop_started = time.perf_counter() source_index.delete(drop=False) drop_duration = round(time.perf_counter() - drop_started, 3) _notify("drop", f"done ({drop_duration}s)") - # Re-encode vectors if datatype changes are needed - if datatype_changes: + # STEP 3: Re-encode vectors using pre-enumerated keys + if datatype_changes and keys_to_process: _notify("quantize", "Re-encoding vectors...") quantize_started = time.perf_counter() docs_quantized = self._quantize_vectors( source_index, datatype_changes, - plan, + keys_to_process, progress_callback=lambda done, total: _notify( "quantize", f"{done:,}/{total:,} docs" ), @@ -233,79 +414,49 @@ def _quantize_vectors( self, source_index: SearchIndex, datatype_changes: Dict[str, Dict[str, str]], - plan: MigrationPlan, + keys: List[str], progress_callback: Optional[Callable[[int, int], None]] = None, ) -> int: """Re-encode vectors in documents for datatype changes (quantization). - This iterates over all documents matching the index prefix and converts - vector fields from source datatype to target datatype. + Uses pre-enumerated keys (from _enumerate_indexed_keys) to process + only the documents that were in the index, avoiding full keyspace scan. Args: source_index: The source SearchIndex (already dropped but client available) datatype_changes: Dict mapping field_name -> {"source": dtype, "target": dtype} - plan: The migration plan containing schema info + keys: Pre-enumerated list of document keys to process progress_callback: Optional callback(docs_done, total_docs) Returns: Number of documents processed """ client = source_index._redis_client - prefix = plan.source.schema_snapshot["index"]["prefix"] - storage_type = ( - plan.source.schema_snapshot["index"].get("storage_type", "hash").lower() - ) - - # Get estimated total from source stats - estimated_total = int(plan.source.stats_snapshot.get("num_docs", 0) or 0) - - # Get vector field dimensions for validation - field_dims: Dict[str, int] = {} - for field in plan.source.schema_snapshot.get("fields", []): - if field.get("type") == "vector" and field["name"] in datatype_changes: - field_dims[field["name"]] = field.get("attrs", {}).get("dims", 0) - + total_keys = len(keys) docs_processed = 0 batch_size = 500 - cursor = 0 - - while True: - cursor, keys = client.scan( - cursor=cursor, - match=f"{prefix}*", - count=batch_size, - ) - if keys: - pipe = client.pipeline() - keys_to_update = [] - - for key in keys: - if storage_type == "hash": - # Read all vector fields that need conversion - for field_name, change in datatype_changes.items(): - field_data = client.hget(key, field_name) - if field_data: - # Convert: source dtype -> array -> target dtype -> bytes - array = buffer_to_array(field_data, change["source"]) - new_bytes = array_to_buffer(array, change["target"]) - pipe.hset(key, field_name, new_bytes) - keys_to_update.append(key) - else: - # JSON storage - vectors stored as arrays, need different handling - logger.warning( - f"JSON storage quantization for key {key} - " - "vectors stored as arrays may not need re-encoding" - ) - - if keys_to_update: - pipe.execute() - docs_processed += len(set(keys_to_update)) - if progress_callback: - progress_callback(docs_processed, estimated_total) - - if cursor == 0: - break + for i in range(0, total_keys, batch_size): + batch = keys[i : i + batch_size] + pipe = client.pipeline() + keys_updated_in_batch = set() + + for key in batch: + # Read all vector fields that need conversion + for field_name, change in datatype_changes.items(): + field_data: bytes | None = client.hget(key, field_name) # type: ignore[misc,assignment] + if field_data: + # Convert: source dtype -> array -> target dtype -> bytes + array = buffer_to_array(field_data, change["source"]) + new_bytes = array_to_buffer(array, change["target"]) + pipe.hset(key, field_name, new_bytes) # type: ignore[arg-type] + keys_updated_in_batch.add(key) + + if keys_updated_in_batch: + pipe.execute() + docs_processed += len(keys_updated_in_batch) + if progress_callback: + progress_callback(docs_processed, total_keys) logger.info(f"Quantized {docs_processed} documents: {datatype_changes}") return docs_processed diff --git a/redisvl/migration/planner.py b/redisvl/migration/planner.py index 8f79db1b..181910e3 100644 --- a/redisvl/migration/planner.py +++ b/redisvl/migration/planner.py @@ -66,6 +66,8 @@ def create_plan( if schema_patch_path: schema_patch = self.load_schema_patch(schema_patch_path) else: + # target_schema_path is guaranteed non-None here due to validation above + assert target_schema_path is not None schema_patch = self.normalize_target_schema_to_patch( source_schema, target_schema_path ) diff --git a/redisvl/migration/validation.py b/redisvl/migration/validation.py index f5bc0ca7..06558b3d 100644 --- a/redisvl/migration/validation.py +++ b/redisvl/migration/validation.py @@ -96,7 +96,10 @@ def _run_query_checks( ) for key in query_checks.get("keys_exist", []): - exists = bool(target_index.client.exists(key)) + client = target_index.client + if client is None: + raise ValueError("Redis client not connected") + exists = bool(client.exists(key)) results.append( QueryCheckResult( name=f"key:{key}", diff --git a/redisvl/migration/wizard.py b/redisvl/migration/wizard.py index b1a9edd5..1554e257 100644 --- a/redisvl/migration/wizard.py +++ b/redisvl/migration/wizard.py @@ -379,6 +379,7 @@ def _prompt_vector_attrs(self, field: Dict[str, Any]) -> Dict[str, Any]: effective_algo = attrs.get( "algorithm", current.get("algorithm", "HNSW") ).upper() + valid_datatypes: tuple[str, ...] if effective_algo == "SVS-VAMANA": # SVS-VAMANA only supports float16, float32 print( diff --git a/tests/integration/test_batch_migration_integration.py b/tests/integration/test_batch_migration_integration.py new file mode 100644 index 00000000..cd581f72 --- /dev/null +++ b/tests/integration/test_batch_migration_integration.py @@ -0,0 +1,486 @@ +""" +Integration tests for batch migration. + +Tests the full batch migration flow with real Redis: +- Batch planning with patterns and explicit lists +- Batch apply with checkpointing +- Resume after interruption +- Failure policies (fail_fast, continue_on_error) +""" + +import uuid + +import pytest +import yaml + +from redisvl.index import SearchIndex +from redisvl.migration import BatchMigrationExecutor, BatchMigrationPlanner +from redisvl.migration.utils import list_indexes +from redisvl.redis.utils import array_to_buffer + + +def create_test_index(name: str, prefix: str, redis_url: str) -> SearchIndex: + """Helper to create a test index with standard schema.""" + index = SearchIndex.from_dict( + { + "index": { + "name": name, + "prefix": prefix, + "storage_type": "hash", + }, + "fields": [ + {"name": "doc_id", "type": "tag"}, + {"name": "title", "type": "text"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "hnsw", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + redis_url=redis_url, + ) + return index + + +def load_test_data(index: SearchIndex) -> None: + """Load sample documents into an index.""" + docs = [ + { + "doc_id": "1", + "title": "alpha", + "embedding": array_to_buffer([0.1, 0.2, 0.3], "float32"), + }, + { + "doc_id": "2", + "title": "beta", + "embedding": array_to_buffer([0.2, 0.1, 0.4], "float32"), + }, + ] + index.load(docs, id_field="doc_id") + + +class TestBatchMigrationPlanIntegration: + """Test batch plan creation with real Redis.""" + + def test_batch_plan_with_pattern(self, redis_url, worker_id, tmp_path): + """Test creating a batch plan using pattern matching.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_test:{worker_id}:{unique_id}" + indexes = [] + + # Create multiple indexes matching pattern + for i in range(3): + name = f"batch_{unique_id}_idx_{i}" + index = create_test_index(name, f"{prefix}_{i}", redis_url) + index.create(overwrite=True) + load_test_data(index) + indexes.append(index) + + # Create shared patch (add sortable to title) + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "title", "attrs": {"sortable": True}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + pattern=f"batch_{unique_id}_idx_*", + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + # Verify batch plan + assert batch_plan.batch_id is not None + assert len(batch_plan.indexes) == 3 + for entry in batch_plan.indexes: + assert entry.applicable is True + assert entry.skip_reason is None + + # Cleanup + for index in indexes: + index.delete(drop=True) + + def test_batch_plan_with_explicit_list(self, redis_url, worker_id, tmp_path): + """Test creating a batch plan with explicit index list.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_list_test:{worker_id}:{unique_id}" + index_names = [] + indexes = [] + + # Create indexes + for i in range(2): + name = f"list_batch_{unique_id}_{i}" + index = create_test_index(name, f"{prefix}_{i}", redis_url) + index.create(overwrite=True) + load_test_data(index) + indexes.append(index) + index_names.append(name) + + # Create shared patch + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "title", "attrs": {"sortable": True}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan with explicit list + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=index_names, + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + assert len(batch_plan.indexes) == 2 + assert all(idx.applicable for idx in batch_plan.indexes) + + # Cleanup + for index in indexes: + index.delete(drop=True) + + +class TestBatchMigrationApplyIntegration: + """Test batch apply with real Redis.""" + + def test_batch_apply_full_flow(self, redis_url, worker_id, tmp_path): + """Test complete batch apply flow: plan -> apply -> verify.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_apply:{worker_id}:{unique_id}" + indexes = [] + index_names = [] + + # Create multiple indexes + for i in range(3): + name = f"apply_batch_{unique_id}_{i}" + index = create_test_index(name, f"{prefix}_{i}", redis_url) + index.create(overwrite=True) + load_test_data(index) + indexes.append(index) + index_names.append(name) + + # Create shared patch (make title sortable) + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "title", "attrs": {"sortable": True}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=index_names, + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + # Save batch plan + plan_path = tmp_path / "batch_plan.yaml" + planner.write_batch_plan(batch_plan, str(plan_path)) + + # Apply batch migration + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + executor = BatchMigrationExecutor() + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_url=redis_url, + ) + + # Verify report + assert report.status == "completed" + assert report.summary.total_indexes == 3 + assert report.summary.successful == 3 + assert report.summary.failed == 0 + + # Verify all indexes were migrated (title is now sortable) + for name in index_names: + migrated = SearchIndex.from_existing(name, redis_url=redis_url) + title_field = migrated.schema.fields.get("title") + assert title_field is not None + assert title_field.attrs.sortable is True + + # Cleanup + for name in index_names: + idx = SearchIndex.from_existing(name, redis_url=redis_url) + idx.delete(drop=True) + + def test_batch_apply_with_inapplicable_indexes( + self, redis_url, worker_id, tmp_path + ): + """Test batch apply skips indexes that don't have matching fields.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_skip:{worker_id}:{unique_id}" + indexes_to_cleanup = [] + + # Create an index WITH embedding field + with_embedding = f"with_emb_{unique_id}" + idx1 = create_test_index(with_embedding, f"{prefix}_1", redis_url) + idx1.create(overwrite=True) + load_test_data(idx1) + indexes_to_cleanup.append(with_embedding) + + # Create an index WITHOUT embedding field + without_embedding = f"no_emb_{unique_id}" + idx2 = SearchIndex.from_dict( + { + "index": { + "name": without_embedding, + "prefix": f"{prefix}_2", + "storage_type": "hash", + }, + "fields": [ + {"name": "doc_id", "type": "tag"}, + {"name": "content", "type": "text"}, + ], + }, + redis_url=redis_url, + ) + idx2.create(overwrite=True) + idx2.load([{"doc_id": "1", "content": "test"}], id_field="doc_id") + indexes_to_cleanup.append(without_embedding) + + # Create patch targeting embedding field (won't apply to idx2) + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "embedding", "attrs": {"datatype": "float16"}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=[with_embedding, without_embedding], + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + # One should be applicable, one not + applicable = [idx for idx in batch_plan.indexes if idx.applicable] + not_applicable = [idx for idx in batch_plan.indexes if not idx.applicable] + assert len(applicable) == 1 + assert len(not_applicable) == 1 + assert "embedding" in not_applicable[0].skip_reason.lower() + + # Apply + executor = BatchMigrationExecutor() + report = executor.apply( + batch_plan, + state_path=str(tmp_path / "state.yaml"), + report_dir=str(tmp_path / "reports"), + redis_url=redis_url, + ) + + assert report.summary.successful == 1 + assert report.summary.skipped == 1 + + # Cleanup + for name in indexes_to_cleanup: + idx = SearchIndex.from_existing(name, redis_url=redis_url) + idx.delete(drop=True) + + +class TestBatchMigrationResumeIntegration: + """Test batch resume functionality with real Redis.""" + + def test_resume_from_checkpoint(self, redis_url, worker_id, tmp_path): + """Test resuming a batch migration from checkpoint state.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_resume:{worker_id}:{unique_id}" + index_names = [] + indexes = [] + + # Create indexes + for i in range(3): + name = f"resume_batch_{unique_id}_{i}" + index = create_test_index(name, f"{prefix}_{i}", redis_url) + index.create(overwrite=True) + load_test_data(index) + indexes.append(index) + index_names.append(name) + + # Create patch + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "title", "attrs": {"sortable": True}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=index_names, + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + # Save batch plan (needed for resume) + plan_path = tmp_path / "batch_plan.yaml" + planner.write_batch_plan(batch_plan, str(plan_path)) + + # Create a checkpoint state simulating partial completion + state_path = tmp_path / "batch_state.yaml" + partial_state = { + "batch_id": batch_plan.batch_id, + "plan_path": str(plan_path), + "started_at": "2026-03-20T10:00:00Z", + "updated_at": "2026-03-20T10:01:00Z", + "completed": [ + { + "name": index_names[0], + "status": "succeeded", + "completed_at": "2026-03-20T10:00:30Z", + } + ], + "remaining": index_names[1:], # Still need to process idx 1 and 2 + "current_index": None, + } + state_path.write_text(yaml.safe_dump(partial_state, sort_keys=False)) + + # Resume from checkpoint + executor = BatchMigrationExecutor() + report = executor.resume( + state_path=str(state_path), + batch_plan_path=str(plan_path), + report_dir=str(tmp_path / "reports"), + redis_url=redis_url, + ) + + # Should complete remaining 2 indexes + # Note: The first index was marked as succeeded in checkpoint but not actually + # migrated, so the report will show 2 successful (the ones actually processed) + assert report.summary.successful >= 2 + assert report.status == "completed" + + # Verify at least the resumed indexes were migrated + for name in index_names[1:]: + migrated = SearchIndex.from_existing(name, redis_url=redis_url) + title_field = migrated.schema.fields.get("title") + assert title_field is not None + assert title_field.attrs.sortable is True + + # Cleanup + for name in index_names: + idx = SearchIndex.from_existing(name, redis_url=redis_url) + idx.delete(drop=True) + + def test_progress_callback_called(self, redis_url, worker_id, tmp_path): + """Test that progress callback is invoked during batch apply.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_progress:{worker_id}:{unique_id}" + index_names = [] + indexes = [] + + # Create indexes + for i in range(2): + name = f"progress_batch_{unique_id}_{i}" + index = create_test_index(name, f"{prefix}_{i}", redis_url) + index.create(overwrite=True) + load_test_data(index) + indexes.append(index) + index_names.append(name) + + # Create patch + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "title", "attrs": {"sortable": True}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=index_names, + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + # Track progress callbacks + progress_calls = [] + + def progress_cb(name, pos, total, status): + progress_calls.append((name, pos, total, status)) + + # Apply with progress callback + executor = BatchMigrationExecutor() + executor.apply( + batch_plan, + state_path=str(tmp_path / "state.yaml"), + report_dir=str(tmp_path / "reports"), + redis_url=redis_url, + progress_callback=progress_cb, + ) + + # Verify progress was reported for each index + assert len(progress_calls) >= 2 # At least one call per index + reported_names = {call[0] for call in progress_calls} + for name in index_names: + assert name in reported_names + + # Cleanup + for name in index_names: + idx = SearchIndex.from_existing(name, redis_url=redis_url) + idx.delete(drop=True) diff --git a/tests/unit/test_batch_migration.py b/tests/unit/test_batch_migration.py new file mode 100644 index 00000000..31adecd1 --- /dev/null +++ b/tests/unit/test_batch_migration.py @@ -0,0 +1,1366 @@ +""" +Unit tests for BatchMigrationPlanner and BatchMigrationExecutor. + +Tests use mocked Redis clients to verify: +- Pattern matching and index selection +- Applicability checking +- Checkpoint persistence and resume +- Failure policies +- Progress callbacks +""" + +from fnmatch import fnmatch +from pathlib import Path +from typing import Any, Dict, List +from unittest.mock import MagicMock, Mock, patch + +import pytest +import yaml + +from redisvl.migration import ( + BatchMigrationExecutor, + BatchMigrationPlanner, + BatchPlan, + BatchState, + SchemaPatch, +) +from redisvl.migration.models import BatchIndexEntry, BatchIndexState +from redisvl.schema.schema import IndexSchema + +# ============================================================================= +# Test Fixtures and Mock Helpers +# ============================================================================= + + +class MockRedisClient: + """Mock Redis client for batch migration tests.""" + + def __init__(self, indexes: List[str] = None, keys: Dict[str, List[str]] = None): + self.indexes = indexes or [] + self.keys = keys or {} + self._data: Dict[str, Dict[str, bytes]] = {} + + def execute_command(self, *args, **kwargs): + if args[0] == "FT._LIST": + return [idx.encode() for idx in self.indexes] + raise NotImplementedError(f"Command not mocked: {args}") + + def scan(self, cursor=0, match=None, count=None): + matched = [] + all_keys = [] + for prefix_keys in self.keys.values(): + all_keys.extend(prefix_keys) + + for key in all_keys: + decoded_key = key.decode() if isinstance(key, bytes) else str(key) + if match is None or fnmatch(decoded_key, match): + matched.append(key if isinstance(key, bytes) else key.encode()) + return 0, matched + + def hget(self, key, field): + return self._data.get(key, {}).get(field) + + def hset(self, key, field, value): + if key not in self._data: + self._data[key] = {} + self._data[key][field] = value + + def pipeline(self): + return MockPipeline(self) + + +class MockPipeline: + """Mock Redis pipeline.""" + + def __init__(self, client: MockRedisClient): + self._client = client + self._commands: List[tuple] = [] + + def hset(self, key, field, value): + self._commands.append(("hset", key, field, value)) + return self + + def execute(self): + results = [] + for cmd in self._commands: + if cmd[0] == "hset": + self._client.hset(cmd[1], cmd[2], cmd[3]) + results.append(1) + self._commands = [] + return results + + +def make_dummy_index(name: str, schema_dict: Dict[str, Any], stats: Dict[str, Any]): + """Create a mock SearchIndex for testing.""" + mock_index = Mock() + mock_index.name = name + mock_index.schema = IndexSchema.from_dict(schema_dict) + mock_index._redis_client = MockRedisClient() + mock_index.client = mock_index._redis_client + mock_index.info = Mock(return_value=stats) + mock_index.delete = Mock() + mock_index.create = Mock() + mock_index.exists = Mock(return_value=True) + return mock_index + + +def make_test_schema(name: str, prefix: str = None, dims: int = 3) -> Dict[str, Any]: + """Create a test schema dictionary.""" + return { + "index": { + "name": name, + "prefix": prefix or name, + "key_separator": ":", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "flat", + "dims": dims, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + } + + +def make_shared_patch( + update_fields: List[Dict] = None, + add_fields: List[Dict] = None, + remove_fields: List[str] = None, +) -> Dict[str, Any]: + """Create a test schema patch dictionary.""" + return { + "version": 1, + "changes": { + "update_fields": update_fields or [], + "add_fields": add_fields or [], + "remove_fields": remove_fields or [], + "index": {}, + }, + } + + +def make_batch_plan( + batch_id: str, + indexes: List[BatchIndexEntry], + failure_policy: str = "fail_fast", + requires_quantization: bool = False, +) -> BatchPlan: + """Create a BatchPlan with default values for testing.""" + return BatchPlan( + batch_id=batch_id, + shared_patch=SchemaPatch( + version=1, + changes={"update_fields": [], "add_fields": [], "remove_fields": []}, + ), + indexes=indexes, + requires_quantization=requires_quantization, + failure_policy=failure_policy, + created_at="2026-03-20T10:00:00Z", + ) + + +# ============================================================================= +# BatchMigrationPlanner Tests +# ============================================================================= + + +class TestBatchMigrationPlannerPatternMatching: + """Test pattern matching for index discovery.""" + + def test_pattern_matches_multiple_indexes(self, monkeypatch, tmp_path): + """Pattern should match multiple indexes.""" + mock_client = MockRedisClient( + indexes=["products_idx", "users_idx", "orders_idx", "logs_idx"] + ) + + def mock_list_indexes(**kwargs): + return ["products_idx", "users_idx", "orders_idx", "logs_idx"] + + monkeypatch.setattr( + "redisvl.migration.batch_planner.list_indexes", mock_list_indexes + ) + + # Mock from_existing for each index + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + make_shared_patch( + update_fields=[ + {"name": "embedding", "attrs": {"algorithm": "hnsw"}} + ] + ) + ) + ) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + pattern="*_idx", + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 4 + assert all(idx.name.endswith("_idx") for idx in batch_plan.indexes) + + def test_pattern_no_matches_raises_error(self, monkeypatch, tmp_path): + """Empty pattern results should raise ValueError.""" + mock_client = MockRedisClient(indexes=["products", "users"]) + + def mock_list_indexes(**kwargs): + return ["products", "users"] + + monkeypatch.setattr( + "redisvl.migration.batch_planner.list_indexes", mock_list_indexes + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + with pytest.raises(ValueError, match="No indexes found"): + planner.create_batch_plan( + pattern="*_idx", # Won't match anything + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + def test_pattern_with_special_characters(self, monkeypatch, tmp_path): + """Pattern matching with special characters in index names.""" + mock_client = MockRedisClient( + indexes=["app:prod:idx", "app:dev:idx", "app:staging:idx"] + ) + + def mock_list_indexes(**kwargs): + return ["app:prod:idx", "app:dev:idx", "app:staging:idx"] + + monkeypatch.setattr( + "redisvl.migration.batch_planner.list_indexes", mock_list_indexes + ) + + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 5, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + pattern="app:*:idx", + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 3 + + +class TestBatchMigrationPlannerIndexSelection: + """Test explicit index list selection.""" + + def test_explicit_index_list(self, monkeypatch, tmp_path): + """Explicit index list should be used directly.""" + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3", "idx4", "idx5"]) + + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1", "idx3", "idx5"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 3 + assert [idx.name for idx in batch_plan.indexes] == ["idx1", "idx3", "idx5"] + + def test_duplicate_index_names(self, monkeypatch, tmp_path): + """Duplicate index names in list should be preserved (user intent).""" + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + # Duplicates are preserved - user explicitly listed them twice + batch_plan = planner.create_batch_plan( + indexes=["idx1", "idx1", "idx2"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 3 + + def test_non_existent_index(self, monkeypatch, tmp_path): + """Non-existent index should be marked as not applicable.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + def mock_from_existing(name, **kwargs): + if name == "idx1": + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + raise Exception(f"Index '{name}' not found") + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1", "nonexistent"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 2 + assert batch_plan.indexes[0].applicable is True + assert batch_plan.indexes[1].applicable is False + assert "not found" in batch_plan.indexes[1].skip_reason.lower() + + def test_indexes_from_file(self, monkeypatch, tmp_path): + """Load index names from file.""" + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + # Create indexes file + indexes_file = tmp_path / "indexes.txt" + indexes_file.write_text("idx1\n# comment\nidx2\n\nidx3\n") + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes_file=str(indexes_file), + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 3 + assert [idx.name for idx in batch_plan.indexes] == ["idx1", "idx2", "idx3"] + + +class TestBatchMigrationPlannerApplicability: + """Test applicability checking for shared patches.""" + + def test_missing_field_marks_not_applicable(self, monkeypatch, tmp_path): + """Index missing field in update_fields should be marked not applicable.""" + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + def mock_from_existing(name, **kwargs): + if name == "idx1": + # Has embedding field + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + # idx2 - no embedding field + schema = { + "index": {"name": name, "prefix": name, "storage_type": "hash"}, + "fields": [{"name": "title", "type": "text"}], + } + return make_dummy_index(name, schema, {"num_docs": 5, "indexing": False}) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + make_shared_patch( + update_fields=[ + {"name": "embedding", "attrs": {"algorithm": "hnsw"}} + ] + ) + ) + ) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1", "idx2"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + idx1_entry = next(e for e in batch_plan.indexes if e.name == "idx1") + idx2_entry = next(e for e in batch_plan.indexes if e.name == "idx2") + + assert idx1_entry.applicable is True + assert idx2_entry.applicable is False + assert "embedding" in idx2_entry.skip_reason.lower() + + def test_field_already_exists_marks_not_applicable(self, monkeypatch, tmp_path): + """Adding field that already exists should mark not applicable.""" + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + def mock_from_existing(name, **kwargs): + schema = make_test_schema(name) + # Add 'category' field to idx2 + if name == "idx2": + schema["fields"].append({"name": "category", "type": "tag"}) + return make_dummy_index(name, schema, {"num_docs": 10, "indexing": False}) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + make_shared_patch(add_fields=[{"name": "category", "type": "tag"}]) + ) + ) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1", "idx2"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + idx1_entry = next(e for e in batch_plan.indexes if e.name == "idx1") + idx2_entry = next(e for e in batch_plan.indexes if e.name == "idx2") + + assert idx1_entry.applicable is True + assert idx2_entry.applicable is False + assert "category" in idx2_entry.skip_reason.lower() + + def test_blocked_change_marks_not_applicable(self, monkeypatch, tmp_path): + """Blocked changes (e.g., dims change) should mark not applicable.""" + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + def mock_from_existing(name, **kwargs): + dims = 3 if name == "idx1" else 768 + return make_dummy_index( + name, + make_test_schema(name, dims=dims), + {"num_docs": 10, "indexing": False}, + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + make_shared_patch( + update_fields=[ + {"name": "embedding", "attrs": {"dims": 1536}} # Change dims + ] + ) + ) + ) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1", "idx2"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + # Both should be not applicable because dims change is blocked + for entry in batch_plan.indexes: + assert entry.applicable is False + assert "dims" in entry.skip_reason.lower() + + +class TestBatchMigrationPlannerQuantization: + """Test quantization detection in batch plans.""" + + def test_detects_quantization_required(self, monkeypatch, tmp_path): + """Batch plan should detect when quantization is required.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + make_shared_patch( + update_fields=[ + {"name": "embedding", "attrs": {"datatype": "float16"}} + ] + ) + ) + ) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert batch_plan.requires_quantization is True + + +class TestBatchMigrationPlannerEdgeCases: + """Test edge cases and error handling.""" + + def test_multiple_source_specification_error(self, tmp_path): + """Should error when multiple source types are specified.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + with pytest.raises(ValueError, match="only one of"): + planner.create_batch_plan( + indexes=["idx1"], + pattern="*", # Can't specify both + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + def test_no_source_specification_error(self, tmp_path): + """Should error when no source is specified.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + with pytest.raises(ValueError, match="Must provide one of"): + planner.create_batch_plan( + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + def test_missing_patch_file_error(self): + """Should error when patch file doesn't exist.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + planner = BatchMigrationPlanner() + with pytest.raises(FileNotFoundError): + planner.create_batch_plan( + indexes=["idx1"], + schema_patch_path="/nonexistent/patch.yaml", + redis_client=mock_client, + ) + + def test_missing_indexes_file_error(self, tmp_path): + """Should error when indexes file doesn't exist.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + with pytest.raises(FileNotFoundError): + planner.create_batch_plan( + indexes_file="/nonexistent/indexes.txt", + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + +# ============================================================================= +# BatchMigrationExecutor Tests +# ============================================================================= + + +class MockMigrationPlan: + """Mock migration plan for testing.""" + + def __init__(self, index_name: str): + self.source = Mock() + self.source.schema_snapshot = make_test_schema(index_name) + self.merged_target_schema = make_test_schema(index_name) + + +class MockMigrationReport: + """Mock migration report for testing.""" + + def __init__(self, result: str = "succeeded", errors: List[str] = None): + self.result = result + self.validation = Mock(errors=errors or []) + + def model_dump(self, **kwargs): + return {"result": self.result} + + +def create_mock_executor( + succeed_on: List[str] = None, + fail_on: List[str] = None, + track_calls: List[str] = None, +): + """Create a properly configured BatchMigrationExecutor with mocks. + + Args: + succeed_on: Index names that should succeed. + fail_on: Index names that should fail. + track_calls: List to append index names as they're migrated. + + Returns: + A BatchMigrationExecutor with mocked planner and executor. + """ + succeed_on = succeed_on or [] + fail_on = fail_on or [] + if track_calls is None: + track_calls = [] + + # Create mock planner + mock_planner = Mock() + + def create_plan_from_patch(index_name, **kwargs): + track_calls.append(index_name) + return MockMigrationPlan(index_name) + + mock_planner.create_plan_from_patch = create_plan_from_patch + + # Create mock executor + mock_single_executor = Mock() + + def apply(plan, **kwargs): + # Determine if this should succeed or fail based on tracked calls + if track_calls: + last_index = track_calls[-1] + if last_index in fail_on: + return MockMigrationReport( + result="failed", errors=["Simulated failure"] + ) + return MockMigrationReport(result="succeeded") + + mock_single_executor.apply = apply + + # Create the batch executor with injected mocks + batch_executor = BatchMigrationExecutor(executor=mock_single_executor) + batch_executor._planner = mock_planner + + return batch_executor, track_calls + + +class TestBatchMigrationExecutorCheckpointing: + """Test checkpoint persistence and state management.""" + + def test_checkpoint_created_at_start(self, tmp_path): + """Checkpoint state file should be created when migration starts.""" + batch_plan = make_batch_plan( + batch_id="test-batch-001", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="fail_fast", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx2"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # Verify checkpoint file was created + assert state_path.exists() + state_data = yaml.safe_load(state_path.read_text()) + assert state_data["batch_id"] == "test-batch-001" + + def test_checkpoint_updated_after_each_index(self, monkeypatch, tmp_path): + """Checkpoint should be updated after each index is processed.""" + batch_plan = make_batch_plan( + batch_id="test-batch-002", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + checkpoint_snapshots = [] + + # Capture checkpoints as they're written + original_write = BatchMigrationExecutor._write_state + + def capture_checkpoint(self, state, path): + checkpoint_snapshots.append( + {"remaining": list(state.remaining), "completed": len(state.completed)} + ) + return original_write(self, state, path) + + monkeypatch.setattr(BatchMigrationExecutor, "_write_state", capture_checkpoint) + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx2", "idx3"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # Verify checkpoints were written progressively + # Each index should trigger 2 writes: start and end + assert len(checkpoint_snapshots) >= 6 # At least 2 per index + + def test_resume_from_checkpoint(self, tmp_path): + """Resume should continue from where migration left off.""" + # Create a checkpoint state simulating interrupted migration + batch_plan = make_batch_plan( + batch_id="test-batch-003", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + # Write the batch plan + plan_path = tmp_path / "batch_plan.yaml" + with open(plan_path, "w") as f: + yaml.safe_dump(batch_plan.model_dump(exclude_none=True), f, sort_keys=False) + + # Write a checkpoint state (idx1 completed, idx2 and idx3 remaining) + state_path = tmp_path / "batch_state.yaml" + checkpoint_state = BatchState( + batch_id="test-batch-003", + plan_path=str(plan_path), + started_at="2026-03-20T10:00:00Z", + updated_at="2026-03-20T10:05:00Z", + remaining=["idx2", "idx3"], + completed=[ + BatchIndexState( + name="idx1", + status="succeeded", + completed_at="2026-03-20T10:05:00Z", + ) + ], + current_index=None, + ) + with open(state_path, "w") as f: + yaml.safe_dump( + checkpoint_state.model_dump(exclude_none=True), f, sort_keys=False + ) + + report_dir = tmp_path / "reports" + migrated_indexes: List[str] = [] + + executor, migrated_indexes = create_mock_executor( + succeed_on=["idx2", "idx3"], + ) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + # Resume from checkpoint + report = executor.resume( + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # idx1 should NOT be migrated again (already completed) + assert "idx1" not in migrated_indexes + # Only idx2 and idx3 should be migrated + assert migrated_indexes == ["idx2", "idx3"] + # Report should show all 3 as succeeded + assert report.summary.successful == 3 + + +class TestBatchMigrationExecutorFailurePolicies: + """Test failure policy behavior (fail_fast vs continue_on_error).""" + + def test_fail_fast_stops_on_first_error(self, tmp_path): + """fail_fast policy should stop processing after first failure.""" + batch_plan = make_batch_plan( + batch_id="test-batch-fail-fast", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), # This will fail + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="fail_fast", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, migrated_indexes = create_mock_executor( + succeed_on=["idx1", "idx3"], + fail_on=["idx2"], + ) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # idx3 should NOT have been attempted due to fail_fast + assert "idx3" not in migrated_indexes + assert migrated_indexes == ["idx1", "idx2"] + + # Report should show partial results + assert report.summary.successful == 1 + assert report.summary.failed == 1 + assert report.summary.skipped == 1 # idx3 was skipped + + def test_continue_on_error_processes_all(self, tmp_path): + """continue_on_error policy should process all indexes.""" + batch_plan = make_batch_plan( + batch_id="test-batch-continue", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), # This will fail + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, migrated_indexes = create_mock_executor( + succeed_on=["idx1", "idx3"], + fail_on=["idx2"], + ) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # ALL indexes should have been attempted + assert migrated_indexes == ["idx1", "idx2", "idx3"] + + # Report should show mixed results + assert report.summary.successful == 2 # idx1 and idx3 + assert report.summary.failed == 1 # idx2 + assert report.summary.skipped == 0 + assert report.status == "partial_failure" + + def test_retry_failed_on_resume(self, tmp_path): + """retry_failed=True should retry previously failed indexes.""" + batch_plan = make_batch_plan( + batch_id="test-batch-retry", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="continue_on_error", + ) + + plan_path = tmp_path / "batch_plan.yaml" + with open(plan_path, "w") as f: + yaml.safe_dump(batch_plan.model_dump(exclude_none=True), f, sort_keys=False) + + # Create checkpoint with idx1 failed + state_path = tmp_path / "batch_state.yaml" + checkpoint_state = BatchState( + batch_id="test-batch-retry", + plan_path=str(plan_path), + started_at="2026-03-20T10:00:00Z", + updated_at="2026-03-20T10:05:00Z", + remaining=[], # All "done" but idx1 failed + completed=[ + BatchIndexState( + name="idx1", status="failed", completed_at="2026-03-20T10:03:00Z" + ), + BatchIndexState( + name="idx2", status="succeeded", completed_at="2026-03-20T10:05:00Z" + ), + ], + current_index=None, + ) + with open(state_path, "w") as f: + yaml.safe_dump( + checkpoint_state.model_dump(exclude_none=True), f, sort_keys=False + ) + + report_dir = tmp_path / "reports" + + executor, migrated_indexes = create_mock_executor(succeed_on=["idx1", "idx2"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + report = executor.resume( + state_path=str(state_path), + retry_failed=True, + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # idx1 should be retried, idx2 should not (already succeeded) + assert "idx1" in migrated_indexes + assert "idx2" not in migrated_indexes + assert report.summary.successful == 2 + + +class TestBatchMigrationExecutorProgressCallback: + """Test progress callback functionality.""" + + def test_progress_callback_called_for_each_index(self, tmp_path): + """Progress callback should be invoked for each index.""" + batch_plan = make_batch_plan( + batch_id="test-batch-progress", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + progress_events = [] + + def progress_callback(index_name, position, total, status): + progress_events.append( + {"index": index_name, "pos": position, "total": total, "status": status} + ) + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx2", "idx3"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + progress_callback=progress_callback, + ) + + # Should have 2 events per index (starting + final status) + assert len(progress_events) == 6 + # Check first index events + assert progress_events[0] == { + "index": "idx1", + "pos": 1, + "total": 3, + "status": "starting", + } + assert progress_events[1] == { + "index": "idx1", + "pos": 1, + "total": 3, + "status": "succeeded", + } + + +class TestBatchMigrationExecutorEdgeCases: + """Test edge cases and error scenarios.""" + + def test_exception_during_migration_captured(self, tmp_path): + """Exception during migration should be captured in state.""" + batch_plan = make_batch_plan( + batch_id="test-batch-exception", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + # Track calls and raise exception for idx1 + call_count = [0] + + # Create mock planner that raises on idx1 + mock_planner = Mock() + + def create_plan_from_patch(index_name, **kwargs): + call_count[0] += 1 + if index_name == "idx1": + raise RuntimeError("Connection lost to Redis") + return MockMigrationPlan(index_name) + + mock_planner.create_plan_from_patch = create_plan_from_patch + + # Create mock executor + mock_single_executor = Mock() + mock_single_executor.apply = Mock( + return_value=MockMigrationReport(result="succeeded") + ) + + # Create batch executor with mocks + executor = BatchMigrationExecutor(executor=mock_single_executor) + executor._planner = mock_planner + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # Both should have been attempted + assert call_count[0] == 2 + # idx1 failed with exception, idx2 succeeded + assert report.summary.failed == 1 + assert report.summary.successful == 1 + + # Check error message is captured + idx1_report = next(r for r in report.indexes if r.name == "idx1") + assert "Connection lost" in idx1_report.error + + def test_non_applicable_indexes_skipped(self, tmp_path): + """Non-applicable indexes should be skipped and reported.""" + batch_plan = make_batch_plan( + batch_id="test-batch-skip", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry( + name="idx2", + applicable=False, + skip_reason="Missing field: embedding", + ), + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, migrated_indexes = create_mock_executor(succeed_on=["idx1", "idx3"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # idx2 should NOT be migrated + assert "idx2" not in migrated_indexes + assert migrated_indexes == ["idx1", "idx3"] + + # Report should show idx2 as skipped + assert report.summary.successful == 2 + assert report.summary.skipped == 1 + + idx2_report = next(r for r in report.indexes if r.name == "idx2") + assert idx2_report.status == "skipped" + assert "Missing field" in idx2_report.error + + def test_empty_batch_plan(self, monkeypatch, tmp_path): + """Empty batch plan should complete immediately.""" + batch_plan = make_batch_plan( + batch_id="test-batch-empty", + indexes=[], # No indexes + failure_policy="fail_fast", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor = BatchMigrationExecutor() + mock_client = MockRedisClient(indexes=[]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + assert report.status == "completed" + assert report.summary.total_indexes == 0 + assert report.summary.successful == 0 + + def test_missing_redis_connection_error(self, tmp_path): + """Should error when no Redis connection is provided.""" + batch_plan = make_batch_plan( + batch_id="test-batch-no-redis", + indexes=[BatchIndexEntry(name="idx1", applicable=True)], + failure_policy="fail_fast", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor = BatchMigrationExecutor() + + with pytest.raises(ValueError, match="redis"): + executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + # No redis_url or redis_client provided + ) + + def test_resume_missing_state_file_error(self, tmp_path): + """Resume should error when state file doesn't exist.""" + executor = BatchMigrationExecutor() + mock_client = MockRedisClient(indexes=[]) + + with pytest.raises(FileNotFoundError, match="State file"): + executor.resume( + state_path=str(tmp_path / "nonexistent_state.yaml"), + report_dir=str(tmp_path / "reports"), + redis_client=mock_client, + ) + + def test_resume_missing_plan_file_error(self, tmp_path): + """Resume should error when plan file doesn't exist.""" + # Create state file pointing to nonexistent plan + state_path = tmp_path / "batch_state.yaml" + state = BatchState( + batch_id="test-batch", + plan_path="/nonexistent/plan.yaml", + started_at="2026-03-20T10:00:00Z", + updated_at="2026-03-20T10:05:00Z", + remaining=["idx1"], + completed=[], + current_index=None, + ) + with open(state_path, "w") as f: + yaml.safe_dump(state.model_dump(exclude_none=True), f) + + executor = BatchMigrationExecutor() + mock_client = MockRedisClient(indexes=["idx1"]) + + with pytest.raises(FileNotFoundError, match="Batch plan"): + executor.resume( + state_path=str(state_path), + report_dir=str(tmp_path / "reports"), + redis_client=mock_client, + ) + + +class TestBatchMigrationExecutorReportGeneration: + """Test batch report generation.""" + + def test_report_contains_all_indexes(self, tmp_path): + """Final report should contain entries for all indexes.""" + batch_plan = make_batch_plan( + batch_id="test-batch-report", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry( + name="idx2", applicable=False, skip_reason="Missing field" + ), + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx3"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # All indexes should be in report + index_names = {r.name for r in report.indexes} + assert index_names == {"idx1", "idx2", "idx3"} + + # Verify totals + assert report.summary.total_indexes == 3 + assert report.summary.successful == 2 + assert report.summary.skipped == 1 + + def test_per_index_reports_written(self, tmp_path): + """Individual reports should be written for each migrated index.""" + batch_plan = make_batch_plan( + batch_id="test-batch-files", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx2"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # Report files should exist + assert (report_dir / "idx1_report.yaml").exists() + assert (report_dir / "idx2_report.yaml").exists() + + def test_completed_status_when_all_succeed(self, tmp_path): + """Status should be 'completed' when all indexes succeed.""" + batch_plan = make_batch_plan( + batch_id="test-batch-complete", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx2"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + assert report.status == "completed" + + def test_failed_status_when_all_fail(self, tmp_path): + """Status should be 'failed' when all indexes fail.""" + batch_plan = make_batch_plan( + batch_id="test-batch-all-fail", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + # Create a mock that raises exceptions for all indexes + mock_planner = Mock() + mock_planner.create_plan_from_patch = Mock( + side_effect=RuntimeError("All migrations fail") + ) + + mock_single_executor = Mock() + executor = BatchMigrationExecutor(executor=mock_single_executor) + executor._planner = mock_planner + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + assert report.status == "failed" + assert report.summary.failed == 2 + assert report.summary.successful == 0