From b1ac7772a3fdd806c145ad5eebb82ff74336110f Mon Sep 17 00:00:00 2001 From: clemsgrs Date: Sat, 11 Apr 2026 11:12:54 +0000 Subject: [PATCH 1/2] reflect hs2p change to supress cufile logging --- pyproject.toml | 4 ++-- slide2vec/inference.py | 32 ++++++++++++++++++-------------- slide2vec/utils/log_utils.py | 8 +++++++- tests/test_progress.py | 2 +- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5439f15..75fade3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - "hs2p[asap,cucim,openslide,vips]>=3.1.4", + "hs2p[asap,cucim,openslide,vips]>=3.1.5", "omegaconf", "matplotlib", "numpy<2", @@ -85,7 +85,7 @@ fm = [ "pandas", "pillow", "rich", - "hs2p[asap,cucim,openslide,vips]>=3.1.4", + "hs2p[asap,cucim,openslide,vips]>=3.1.5", "wandb", "torch>=2.3,<2.8", "torchvision>=0.18.0", diff --git a/slide2vec/inference.py b/slide2vec/inference.py index 28e8cf0..3c8ad79 100644 --- a/slide2vec/inference.py +++ b/slide2vec/inference.py @@ -17,7 +17,7 @@ import pandas as pd import torch from hs2p import SlideSpec, FilterConfig, PreviewConfig, SegmentationConfig, TilingConfig, load_tiling_result, tile_slides -from hs2p.utils.stderr import run_with_filtered_stderr +from hs2p.utils.stderr import run_with_filtered_stderr, run_with_filtered_stdio import numpy as np from transformers.image_processing_utils import BaseImageProcessor @@ -2383,19 +2383,23 @@ def _tile_slides( ) -> list[Any]: _preload_asap_wholeslidedata(preprocessing) tiling_cfg, segmentation_cfg, filtering_cfg, preview_cfg, read_coordinates_from, resume = _build_hs2p_configs(preprocessing) - return tile_slides( - slides, - tiling=tiling_cfg, - segmentation=segmentation_cfg, - filtering=filtering_cfg, - preview=preview_cfg, - output_dir=output_dir, - num_workers=num_workers, - read_coordinates_from=read_coordinates_from, - resume=resume, - save_tiles=not preprocessing.on_the_fly and preprocessing.read_tiles_from is None, - jpeg_backend=preprocessing.jpeg_backend, - ) + + def _run_tile_slides(): + return tile_slides( + slides, + tiling=tiling_cfg, + segmentation=segmentation_cfg, + filtering=filtering_cfg, + preview=preview_cfg, + output_dir=output_dir, + num_workers=num_workers, + read_coordinates_from=read_coordinates_from, + resume=resume, + save_tiles=not preprocessing.on_the_fly and preprocessing.read_tiles_from is None, + jpeg_backend=preprocessing.jpeg_backend, + ) + + return run_with_filtered_stdio(_run_tile_slides) def _preload_asap_wholeslidedata(preprocessing: PreprocessingConfig) -> None: diff --git a/slide2vec/utils/log_utils.py b/slide2vec/utils/log_utils.py index 331717c..a59be3e 100644 --- a/slide2vec/utils/log_utils.py +++ b/slide2vec/utils/log_utils.py @@ -4,10 +4,15 @@ import os import sys -import slide2vec.distributed as distributed from slide2vec.progress import emit_progress_log +def _distributed_module(): + import slide2vec.distributed as distributed + + return distributed + + @contextlib.contextmanager def suppress_c_stderr(): """Temporarily redirect C-level stderr to /dev/null. @@ -66,6 +71,7 @@ def _configure_logger( logger = logging.getLogger(name) logger.setLevel(level) logger.propagate = False + distributed = _distributed_module() # Loosely match Google glog format: # [IWEF]yyyymmdd hh:mm:ss.uuuuuu threadid file:line] msg diff --git a/tests/test_progress.py b/tests/test_progress.py index 8eebe49..06360ab 100644 --- a/tests/test_progress.py +++ b/tests/test_progress.py @@ -1,5 +1,6 @@ import io import json +import subprocess import sys import types from contextlib import nullcontext @@ -540,7 +541,6 @@ def test_progress_aware_log_handler_routes_logs_through_active_reporter(): assert reporter.log_lines == ["INFO hello from logger"] - def test_embedding_summary_rows_match_tiling_style(): import slide2vec.progress as progress From b9fc4e543984a5ed9f13ad98c9029a1251a4ed82 Mon Sep 17 00:00:00 2001 From: clemsgrs Date: Sat, 11 Apr 2026 12:34:19 +0000 Subject: [PATCH 2/2] bump hs2p and align with new variable terminology --- README.md | 8 +- docs/python-api.md | 50 +++---- pyproject.toml | 4 +- scripts/benchmark_embedding_throughput.py | 4 +- scripts/benchmark_end_to_end_paths.py | 8 +- scripts/benchmark_tile_read_strategies.py | 8 +- scripts/generate_gt.py | 4 +- slide2vec/api.py | 70 +++++----- slide2vec/configs/default.yaml | 12 +- slide2vec/data/tile_reader.py | 12 +- slide2vec/encoders/base.py | 2 +- slide2vec/encoders/models/gigapath.py | 4 +- slide2vec/encoders/registry.py | 4 +- slide2vec/encoders/validation.py | 16 +-- slide2vec/inference.py | 122 ++++++++++-------- slide2vec/utils/config.py | 16 +-- .../gt/test-wsi.coordinates.meta.json | 4 +- tests/test_batch_collator_timing.py | 10 +- tests/test_output_consistency.py | 4 +- tests/test_progress.py | 2 +- tests/test_regression_core.py | 92 ++++++------- tests/test_regression_inference.py | 103 +++++++++------ tests/test_regression_models.py | 14 +- 23 files changed, 301 insertions(+), 272 deletions(-) diff --git a/README.md b/README.md index 811ac66..ee84a2d 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,8 @@ from slide2vec import ExecutionOptions, Pipeline, PreprocessingConfig pipeline = Pipeline( model=model, preprocessing=PreprocessingConfig( - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, tissue_threshold=0.1, ), execution=ExecutionOptions(output_dir="outputs/demo"), @@ -62,8 +62,8 @@ Tile embeddings can be spatially grouped into regions for downstream models that ```python preprocessing = PreprocessingConfig( - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, region_tile_multiple=6, # 6x6 tiles per region ) embedded = model.embed_slide("/path/to/slide.svs", preprocessing=preprocessing) diff --git a/docs/python-api.md b/docs/python-api.md index ecce5bb..ec7b2c5 100644 --- a/docs/python-api.md +++ b/docs/python-api.md @@ -49,8 +49,8 @@ from slide2vec import Model, PreprocessingConfig model = Model.from_preset("virchow2") preprocessing = PreprocessingConfig( backend="auto", - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, tissue_threshold=0.1, segmentation={"downsample": 64}, filtering={"ref_tile_size": 224}, @@ -65,15 +65,15 @@ embedded = model.embed_slide("/path/to/slide.svs", preprocessing=preprocessing) Common fields: -- `target_spacing_um` -- `target_tile_size_px` +- `requested_spacing_um` +- `requested_tile_size_px` - `tissue_threshold` -- `backend` — `"auto"`, `"cucim"`, `"openslide"`, `"vips"`, or `"asap"` -- `on_the_fly` — read tiles directly from WSI during embedding (default `True`) -- `use_supertiles` — group tiles into spatial blocks to reduce WSI read calls (default `True`) -- `read_coordinates_from` — reuse pre-extracted coordinates -- `read_tiles_from` — reuse pre-extracted tile tar archives -- `resume` — resume from a previous tiling run (default `False`) +- `backend` - `"auto"`, `"cucim"`, `"openslide"`, `"vips"`, or `"asap"` +- `on_the_fly` - read tiles directly from WSI during embedding (default `True`) +- `use_supertiles` - group tiles into spatial blocks to reduce WSI read calls (default `True`) +- `read_coordinates_from` - reuse pre-extracted coordinates +- `read_tiles_from` - reuse pre-extracted tile tar archives +- `resume` - resume from a previous tiling run (default `False`) - `preview` For hierarchical extraction, see the [dedicated section](#hierarchical-feature-extraction) below. @@ -100,15 +100,15 @@ Common fields: - `batch_size` - `num_gpus` -- `precision` — `"fp16"`, `"bf16"`, `"fp32"`, or `None` (auto-determined from model) -- `num_workers` — DataLoader workers (`None` means auto; this resolves to the job CPU budget, capped by SLURM and 64, except cuCIM on-the-fly mode derives `cpu_budget // num_cucim_workers`) -- `num_preprocessing_workers` — hs2p tiling workers (default: all CPUs available to the job, capped by SLURM when present and limited to 64) -- `prefetch_factor` — DataLoader prefetch factor (default `4`) -- `persistent_workers` — keep DataLoader workers alive across batches (default `True`) +- `precision` - `"fp16"`, `"bf16"`, `"fp32"`, or `None` (auto-determined from model) +- `num_workers` - DataLoader workers (`None` means auto; this resolves to the job CPU budget, capped by SLURM and 64, except cuCIM on-the-fly mode derives `cpu_budget // num_cucim_workers`) +- `num_preprocessing_workers` - hs2p tiling workers (default: all CPUs available to the job, capped by SLURM when present and limited to 64) +- `prefetch_factor` - DataLoader prefetch factor (default `4`) +- `persistent_workers` - keep DataLoader workers alive across batches (default `True`) - `output_dir` -- `output_format` — `"pt"` (default) or `"npz"` -- `save_tile_embeddings` — persist tile embeddings for slide-level models (default `False`) -- `save_latents` — persist latent representations when available (default `False`) +- `output_format` - `"pt"` (default) or `"npz"` +- `save_tile_embeddings` - persist tile embeddings for slide-level models (default `False`) +- `save_latents` - persist latent representations when available (default `False`) `num_gpus` defaults to all available GPUs. `embed_slide(...)` uses tile sharding for one slide, and `embed_slides(...)` balances whole slides across GPUs while preserving input order. @@ -125,8 +125,8 @@ from slide2vec import Model, PreprocessingConfig model = Model.from_preset("virchow2") preprocessing = PreprocessingConfig( - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, region_tile_multiple=6, # 6x6 tiles per region ) embedded = model.embed_slide("/path/to/slide.svs", preprocessing=preprocessing) @@ -134,10 +134,10 @@ embedded = model.embed_slide("/path/to/slide.svs", preprocessing=preprocessing) Config fields: -- `region_tile_multiple` — region grid width/height in tiles (e.g., `6` means 6x6 = 36 tiles per region; must be >= 2) -- `target_region_size_px` — explicit parent region size in pixels; auto-derived from `target_tile_size_px * region_tile_multiple` if omitted +- `region_tile_multiple` - region grid width/height in tiles (e.g., `6` means 6x6 = 36 tiles per region; must be >= 2) +- `requested_region_size_px` - explicit parent region size in pixels; auto-derived from `requested_tile_size_px * region_tile_multiple` if omitted -When the selected read spacing differs from `target_spacing_um`, hierarchical extraction resolves effective geometry tile-first: it scales `target_tile_size_px` to the effective read spacing, then derives the effective parent region as `effective_tile_size_px * region_tile_multiple`. This keeps unrolled subtile geometry aligned with the model-facing tile size contract under spacing-driven rounding. +When the selected read spacing differs from `requested_spacing_um`, hierarchical extraction resolves geometry tile-first: it scales `requested_tile_size_px` to the read spacing, then derives the read parent region as `read_tile_size_px * region_tile_multiple`. This keeps unrolled subtile geometry aligned with the model-facing tile size contract under spacing-driven rounding. When persisted via `Pipeline`, hierarchical artifacts are written to `hierarchical_embeddings/` and `RunResult` includes a `hierarchical_artifacts` list. @@ -152,8 +152,8 @@ from slide2vec import ExecutionOptions, Model, Pipeline, PreprocessingConfig model = Model.from_preset("virchow2") preprocessing = PreprocessingConfig( - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, tissue_threshold=0.1, ) pipeline = Pipeline( diff --git a/pyproject.toml b/pyproject.toml index 75fade3..4537c7d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - "hs2p[asap,cucim,openslide,vips]>=3.1.5", + "hs2p[asap,cucim,openslide,vips]>=3.2.0", "omegaconf", "matplotlib", "numpy<2", @@ -85,7 +85,7 @@ fm = [ "pandas", "pillow", "rich", - "hs2p[asap,cucim,openslide,vips]>=3.1.5", + "hs2p[asap,cucim,openslide,vips]>=3.2.0", "wandb", "torch>=2.3,<2.8", "torchvision>=0.18.0", diff --git a/scripts/benchmark_embedding_throughput.py b/scripts/benchmark_embedding_throughput.py index 6b4c15f..89f2080 100644 --- a/scripts/benchmark_embedding_throughput.py +++ b/scripts/benchmark_embedding_throughput.py @@ -787,8 +787,8 @@ def _build_model_pipeline_from_config(config: dict[str, Any]): preview = dict(tiling_cfg.get("preview", {})) preprocessing = PreprocessingConfig( backend=str(tiling_cfg.get("backend", "asap")), - target_spacing_um=float(params.get("target_spacing_um", 0.5)), - target_tile_size_px=int(params.get("target_tile_size_px", 224)), + requested_spacing_um=float(params.get("requested_spacing_um", 0.5)), + requested_tile_size_px=int(params.get("requested_tile_size_px", 224)), tolerance=float(params.get("tolerance", 0.05)), overlap=float(params.get("overlap", 0.0)), tissue_threshold=float(params.get("tissue_threshold", 0.01)), diff --git a/scripts/benchmark_end_to_end_paths.py b/scripts/benchmark_end_to_end_paths.py index d833178..cf1e8da 100644 --- a/scripts/benchmark_end_to_end_paths.py +++ b/scripts/benchmark_end_to_end_paths.py @@ -227,9 +227,9 @@ def _default_base_config( "read_coordinates_from": None, "read_tiles_from": None, "params": { - "target_spacing_um": 0.5, + "requested_spacing_um": 0.5, "tolerance": 0.05, - "target_tile_size_px": 256, + "requested_tile_size_px": 256, "overlap": 0.0, "tissue_threshold": 0.01, "drop_holes": False, @@ -440,8 +440,8 @@ def _build_pipeline_from_config_dict(config: dict[str, Any]): preprocessing = PreprocessingConfig( backend=str(tiling_cfg.get("backend", "cucim")), - target_spacing_um=float(params.get("target_spacing_um", 0.5)), - target_tile_size_px=int(params.get("target_tile_size_px", 256)), + requested_spacing_um=float(params.get("requested_spacing_um", 0.5)), + requested_tile_size_px=int(params.get("requested_tile_size_px", 256)), tolerance=float(params.get("tolerance", 0.05)), overlap=float(params.get("overlap", 0.0)), tissue_threshold=float(params.get("tissue_threshold", 0.01)), diff --git a/scripts/benchmark_tile_read_strategies.py b/scripts/benchmark_tile_read_strategies.py index 25bb36f..4058bb2 100644 --- a/scripts/benchmark_tile_read_strategies.py +++ b/scripts/benchmark_tile_read_strategies.py @@ -281,9 +281,9 @@ def _default_base_config( "read_coordinates_from": None, "read_tiles_from": None, "params": { - "target_spacing_um": 0.5, + "requested_spacing_um": 0.5, "tolerance": 0.05, - "target_tile_size_px": 224, + "requested_tile_size_px": 224, "overlap": 0.0, "tissue_threshold": 0.1, "drop_holes": False, @@ -510,8 +510,8 @@ def _build_pipeline_from_config_dict(config: dict[str, Any]): preprocessing = PreprocessingConfig( backend=str(tiling_cfg.get("backend", "cucim")), - target_spacing_um=float(params.get("target_spacing_um", 0.5)), - target_tile_size_px=int(params.get("target_tile_size_px", 256)), + requested_spacing_um=float(params.get("requested_spacing_um", 0.5)), + requested_tile_size_px=int(params.get("requested_tile_size_px", 256)), tolerance=float(params.get("tolerance", 0.05)), overlap=float(params.get("overlap", 0.0)), tissue_threshold=float(params.get("tissue_threshold", 0.01)), diff --git a/scripts/generate_gt.py b/scripts/generate_gt.py index c635690..c1407f2 100644 --- a/scripts/generate_gt.py +++ b/scripts/generate_gt.py @@ -27,9 +27,9 @@ # Must stay in sync with test_output_consistency.py TILING_PARAMS = dict( - target_spacing_um=0.5, + requested_spacing_um=0.5, tolerance=0.07, - target_tile_size_px=224, + requested_tile_size_px=224, overlap=0.0, tissue_threshold=0.1, drop_holes=False, diff --git a/slide2vec/api.py b/slide2vec/api.py index 7a787e9..e9978a4 100644 --- a/slide2vec/api.py +++ b/slide2vec/api.py @@ -42,9 +42,9 @@ class SlideLike(Protocol): @dataclass(frozen=True, kw_only=True) class PreprocessingConfig: backend: str = "auto" - target_spacing_um: float | None = None - target_tile_size_px: int | None = None - target_region_size_px: int | None = None + requested_spacing_um: float | None = None + requested_tile_size_px: int | None = None + requested_region_size_px: int | None = None region_tile_multiple: int | None = None tolerance: float = 0.05 overlap: float = 0.0 @@ -75,11 +75,11 @@ def from_config(cls, cfg: Any) -> "PreprocessingConfig": preview_downsample = int(preview_cfg.downsample) return cls( backend=tiling.backend, - target_spacing_um=float(tiling.params.target_spacing_um), - target_tile_size_px=int(tiling.params.target_tile_size_px), - target_region_size_px=( + requested_spacing_um=float(tiling.params.requested_spacing_um), + requested_tile_size_px=int(tiling.params.requested_tile_size_px), + requested_region_size_px=( int(v) - if (v := getattr(tiling.params, "target_region_size_px", None)) is not None + if (v := getattr(tiling.params, "requested_region_size_px", None)) is not None else None ), region_tile_multiple=( @@ -454,28 +454,28 @@ def ensure_defaults() -> tuple[int, float]: return defaults if preprocessing is None: - target_tile_size_px, target_spacing_um = ensure_defaults() + requested_tile_size_px, requested_spacing_um = ensure_defaults() return _resolve_hierarchical_preprocessing( PreprocessingConfig( backend="auto", - target_spacing_um=target_spacing_um, - target_tile_size_px=target_tile_size_px, + requested_spacing_um=requested_spacing_um, + requested_tile_size_px=requested_tile_size_px, ) ) - target_spacing_um = preprocessing.target_spacing_um - target_tile_size_px = preprocessing.target_tile_size_px - if target_spacing_um is None or target_tile_size_px is None: + requested_spacing_um = preprocessing.requested_spacing_um + requested_tile_size_px = preprocessing.requested_tile_size_px + if requested_spacing_um is None or requested_tile_size_px is None: default_tile_size_px, default_spacing_um = ensure_defaults() - if target_spacing_um is None: - target_spacing_um = default_spacing_um - if target_tile_size_px is None: - target_tile_size_px = default_tile_size_px + if requested_spacing_um is None: + requested_spacing_um = default_spacing_um + if requested_tile_size_px is None: + requested_tile_size_px = default_tile_size_px return _resolve_hierarchical_preprocessing( replace( preprocessing, - target_spacing_um=target_spacing_um, - target_tile_size_px=target_tile_size_px, + requested_spacing_um=requested_spacing_um, + requested_tile_size_px=requested_tile_size_px, ) ) @@ -484,7 +484,7 @@ def _default_preprocessing_from_registry(name: str | None) -> tuple[int, float]: if not name or name not in encoder_registry: raise ValueError( "Cannot infer preprocessing defaults without a registered model. " - "Pass preprocessing.target_spacing_um and preprocessing.target_tile_size_px explicitly." + "Pass preprocessing.requested_spacing_um and preprocessing.requested_tile_size_px explicitly." ) defaults = resolve_preprocessing_defaults(name) @@ -499,7 +499,7 @@ def _validate_model_config( name = model.name if name not in encoder_registry: return - if preprocessing.region_tile_multiple is not None or preprocessing.target_region_size_px is not None: + if preprocessing.region_tile_multiple is not None or preprocessing.requested_region_size_px is not None: info = encoder_registry.info(name) if info["level"] != "tile": raise ValueError("Hierarchical preprocessing is only supported for tile encoders") @@ -508,8 +508,8 @@ def _validate_model_config( precision = None if on_cpu or execution is None else execution.precision validate_encoder_config( name, - target_tile_size_px=preprocessing.target_tile_size_px, - target_spacing_um=preprocessing.target_spacing_um, + requested_tile_size_px=preprocessing.requested_tile_size_px, + requested_spacing_um=preprocessing.requested_spacing_um, precision=precision, output_variant=model._output_variant, allow_non_recommended=bool(model.allow_non_recommended_settings), @@ -518,32 +518,32 @@ def _validate_model_config( def _resolve_hierarchical_preprocessing(preprocessing: PreprocessingConfig) -> PreprocessingConfig: multiple = preprocessing.region_tile_multiple - target_region_size_px = preprocessing.target_region_size_px + requested_region_size_px = preprocessing.requested_region_size_px if multiple is not None: multiple = int(multiple) if multiple < 2: raise ValueError("region_tile_multiple must be at least 2") - if multiple is None and target_region_size_px is None: + if multiple is None and requested_region_size_px is None: return preprocessing - if preprocessing.target_tile_size_px is None: + if preprocessing.requested_tile_size_px is None: raise ValueError( - "target_tile_size_px must be resolved before deriving hierarchical region geometry" + "requested_tile_size_px must be resolved before deriving hierarchical region geometry" ) - if target_region_size_px is None: - target_region_size_px = int(preprocessing.target_tile_size_px) * int(multiple) + if requested_region_size_px is None: + requested_region_size_px = int(preprocessing.requested_tile_size_px) * int(multiple) elif multiple is None: - if int(target_region_size_px) % int(preprocessing.target_tile_size_px) != 0: + if int(requested_region_size_px) % int(preprocessing.requested_tile_size_px) != 0: raise ValueError( - "target_region_size_px must be an exact multiple of target_tile_size_px" + "requested_region_size_px must be an exact multiple of requested_tile_size_px" ) - multiple = int(target_region_size_px) // int(preprocessing.target_tile_size_px) - elif int(target_region_size_px) != int(preprocessing.target_tile_size_px) * int(multiple): + multiple = int(requested_region_size_px) // int(preprocessing.requested_tile_size_px) + elif int(requested_region_size_px) != int(preprocessing.requested_tile_size_px) * int(multiple): raise ValueError( - "target_region_size_px must match target_tile_size_px * region_tile_multiple" + "requested_region_size_px must match requested_tile_size_px * region_tile_multiple" ) return replace( preprocessing, - target_region_size_px=int(target_region_size_px), + requested_region_size_px=int(requested_region_size_px), region_tile_multiple=int(multiple), ) diff --git a/slide2vec/configs/default.yaml b/slide2vec/configs/default.yaml index 9fc9bd0..0aedce9 100644 --- a/slide2vec/configs/default.yaml +++ b/slide2vec/configs/default.yaml @@ -26,10 +26,10 @@ tiling: read_tiles_from: # path to an existing directory containing pre-extracted `.tiles.tar` tile stores to reuse instead of starting tiling from scratch backend: "auto" # backend to use for slide reading; "auto" lets hs2p resolve the best backend per slide, preferring cuCIM when available params: - target_spacing_um: # spacing at which to tile the slide, in microns per pixel; filled from a preset model when available + requested_spacing_um: # spacing at which to tile the slide, in microns per pixel; filled from a preset model when available tolerance: 0.05 # tolerance for matching the spacing (float between 0 and 1, deciding how much the spacing can deviate from the one specified in the slide metadata) - target_tile_size_px: # size of the tiles to extract, in pixels; filled from a preset model when available - target_region_size_px: # size of hierarchical parent regions in pixels; when unset and region_tile_multiple is set, derived from target_tile_size_px * region_tile_multiple + requested_tile_size_px: # size of the tiles to extract, in pixels; filled from a preset model when available + requested_region_size_px: # size of hierarchical parent regions in pixels; when unset and region_tile_multiple is set, derived from requested_tile_size_px * region_tile_multiple region_tile_multiple: # hierarchical region grid width/height in tiles; e.g. 6 means 6x6 tiles per region overlap: 0.0 # percentage of overlap between two consecutive tiles (float between 0 and 1) tissue_threshold: 0.1 # minimum fraction of pixels that must be tissue to keep a tile (float between 0 and 1) @@ -44,8 +44,8 @@ tiling: use_otsu: false # use otsu's method instead of simple binary thresholding use_hsv: true # use HSV thresholding instead of simple binary thresholding filter_params: - ref_tile_size: ${tiling.params.target_tile_size_px} # reference tile size at the target spacing - a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.target_spacing_um will be kept) + ref_tile_size: ${tiling.params.requested_tile_size_px} # reference tile size at the target spacing + a_t: 4 # area filter threshold for tissue (positive integer, the minimum size of detected foreground contours to consider, relative to the reference tile size ref_tile_size, e.g. a value 10 means only detected foreground contours of size greater than 10 [ref_tile_size, ref_tile_size] tiles at spacing tiling.params.requested_spacing_um will be kept) a_h: 2 # area filter threshold for holes (positive integer, the minimum size of detected holes/cavities in foreground contours to avoid, once again relative to the reference tile size ref_tile_size) filter_white: false # whether to filter out mostly white tiles filter_black: false # whether to filter out mostly black tiles @@ -78,7 +78,7 @@ wandb: project: "" # wandb project name username: "" # wandb username exp_name: "" # wandb experiment name - tags: ["features", "${model.name}", "${tiling.params.target_tile_size_px}"] # wandb tags + tags: ["features", "${model.name}", "${tiling.params.requested_tile_size_px}"] # wandb tags dir: "/home/user/" group: resume_id: "${resume_dirname}" diff --git a/slide2vec/data/tile_reader.py b/slide2vec/data/tile_reader.py index 61bd8e8..07a8836 100644 --- a/slide2vec/data/tile_reader.py +++ b/slide2vec/data/tile_reader.py @@ -89,7 +89,7 @@ def __init__( self._num_cucim_workers = num_cucim_workers self._gpu_decode = gpu_decode self._read_level = int(tiling_result.read_level) - self._tile_size_px = int(tiling_result.effective_tile_size_px) + self._tile_size_px = int(tiling_result.read_tile_size_px) self._x = tiling_result.x self._y = tiling_result.y self._reader = None @@ -215,7 +215,7 @@ def __init__( gpu_decode: bool = False, use_supertiles: bool = True, ): - self.tile_size = int(tiling_result.effective_tile_size_px) + self.tile_size = int(tiling_result.read_tile_size_px) self._reader = WSITileReader( image_path, tiling_result, @@ -354,8 +354,8 @@ def __init__( tiling_result: TilingResult, region_index: np.ndarray, subtile_index_within_region: np.ndarray, - effective_region_size_px: int, - effective_tile_size_px: int, + read_region_size_px: int, + read_tile_size_px: int, backend: str = "cucim", num_cucim_workers: int = 4, gpu_decode: bool = False, @@ -363,11 +363,11 @@ def __init__( self._region_index = np.asarray(region_index, dtype=np.int32) self._subtile_index_within_region = np.asarray(subtile_index_within_region, dtype=np.int32) self._tiles_per_region = int(self._subtile_index_within_region.max()) + 1 if len(self._subtile_index_within_region) else 0 - self._tile_size = int(effective_tile_size_px) + self._tile_size = int(read_tile_size_px) self._reader = WSIRegionReader( image_path, read_level=int(tiling_result.read_level), - region_size_px=int(effective_region_size_px), + region_size_px=int(read_region_size_px), backend=backend, num_cucim_workers=num_cucim_workers, gpu_decode=gpu_decode, diff --git a/slide2vec/encoders/base.py b/slide2vec/encoders/base.py index 2801e60..fd3cdd3 100644 --- a/slide2vec/encoders/base.py +++ b/slide2vec/encoders/base.py @@ -90,7 +90,7 @@ def prepare_coordinates( coordinates: Tensor, *, base_spacing_um: float, - target_spacing_um: float, + requested_spacing_um: float, ) -> Tensor: """Hook for model-specific coordinate normalization.""" return coordinates diff --git a/slide2vec/encoders/models/gigapath.py b/slide2vec/encoders/models/gigapath.py index f253c3d..e5eadf2 100644 --- a/slide2vec/encoders/models/gigapath.py +++ b/slide2vec/encoders/models/gigapath.py @@ -69,9 +69,9 @@ def prepare_coordinates( coordinates: torch.Tensor, *, base_spacing_um: float, - target_spacing_um: float, + requested_spacing_um: float, ) -> torch.Tensor: - scale = float(base_spacing_um) / float(target_spacing_um) + scale = float(base_spacing_um) / float(requested_spacing_um) return torch.floor(coordinates.to(torch.float32) * scale).to(torch.long) def encode_slide( diff --git a/slide2vec/encoders/registry.py b/slide2vec/encoders/registry.py index 51935d8..ef4ce10 100644 --- a/slide2vec/encoders/registry.py +++ b/slide2vec/encoders/registry.py @@ -131,8 +131,8 @@ def resolve_preprocessing_defaults( supported_text = ", ".join(f"{s:g}" for s in unique_spacings) raise ValueError( f"Encoder '{encoder_name}' supports multiple spacings [{supported_text}]; " - "cannot infer a default target_spacing_um. " - "Pass preprocessing.target_spacing_um explicitly." + "cannot infer a default requested_spacing_um. " + "Pass preprocessing.requested_spacing_um explicitly." ) spacing_um = unique_spacings[0] return { diff --git a/slide2vec/encoders/validation.py b/slide2vec/encoders/validation.py index 47d0fc5..bb5080e 100644 --- a/slide2vec/encoders/validation.py +++ b/slide2vec/encoders/validation.py @@ -17,8 +17,8 @@ def validate_encoder_config( encoder_name: str, *, info: dict[str, Any] | None = None, - target_tile_size_px: int | None = None, - target_spacing_um: float | None = None, + requested_tile_size_px: int | None = None, + requested_spacing_um: float | None = None, precision: str | None = None, output_variant: str | None = None, allow_non_recommended: bool = False, @@ -44,20 +44,20 @@ def validate_encoder_config( ) rec_spacing = info["supported_spacing_um"] if "supported_spacing_um" in info else None - if target_spacing_um is not None and rec_spacing is not None: + if requested_spacing_um is not None and rec_spacing is not None: valid_spacings = rec_spacing if isinstance(rec_spacing, list) else [rec_spacing] - if not any(abs(float(target_spacing_um) - float(s)) <= 1e-8 for s in valid_spacings): + if not any(abs(float(requested_spacing_um) - float(s)) <= 1e-8 for s in valid_spacings): supported_text = ", ".join(f"{s:g}" for s in valid_spacings) mismatches.append( - f"target_spacing_um={float(target_spacing_um):g} (recommended: [{supported_text}])" + f"requested_spacing_um={float(requested_spacing_um):g} (recommended: [{supported_text}])" ) - if target_tile_size_px is not None: + if requested_tile_size_px is not None: reqs = resolve_preprocessing_requirements(encoder_name, info) rec_tile_size = reqs["tile_size_px"] - if rec_tile_size is not None and int(target_tile_size_px) != int(rec_tile_size): + if rec_tile_size is not None and int(requested_tile_size_px) != int(rec_tile_size): mismatches.append( - f"target_tile_size_px={target_tile_size_px} (recommended: {rec_tile_size})" + f"requested_tile_size_px={requested_tile_size_px} (recommended: {rec_tile_size})" ) if not mismatches: diff --git a/slide2vec/inference.py b/slide2vec/inference.py index 3c8ad79..3e2300d 100644 --- a/slide2vec/inference.py +++ b/slide2vec/inference.py @@ -101,32 +101,40 @@ class HierarchicalIndex: def _is_hierarchical_preprocessing(preprocessing: PreprocessingConfig | None) -> bool: if preprocessing is None: return False - return preprocessing.region_tile_multiple is not None or preprocessing.target_region_size_px is not None + return preprocessing.region_tile_multiple is not None or preprocessing.requested_region_size_px is not None def _resolve_hierarchical_geometry(preprocessing: PreprocessingConfig, tiling_result) -> dict[str, int]: if preprocessing.region_tile_multiple is None: raise ValueError("Hierarchical preprocessing requires region_tile_multiple") - if preprocessing.target_region_size_px is None: - raise ValueError("Hierarchical preprocessing requires target_region_size_px") - target_tile_size_px = int(preprocessing.target_tile_size_px) - target_region_size_px = int(preprocessing.target_region_size_px) - target_spacing_um = float(preprocessing.target_spacing_um) + if preprocessing.requested_region_size_px is None: + raise ValueError("Hierarchical preprocessing requires requested_region_size_px") + requested_tile_size_px = int(preprocessing.requested_tile_size_px) + requested_region_size_px = int(preprocessing.requested_region_size_px) + requested_spacing_um = float(preprocessing.requested_spacing_um) multiple = int(preprocessing.region_tile_multiple) - if target_region_size_px % multiple != 0: - raise ValueError("target_region_size_px must be divisible by region_tile_multiple") - effective_spacing_um = float(getattr(tiling_result, "effective_spacing_um")) + if requested_region_size_px % multiple != 0: + raise ValueError("requested_region_size_px must be divisible by region_tile_multiple") + read_spacing_um = float(getattr(tiling_result, "read_spacing_um")) base_spacing_um = float(getattr(tiling_result, "base_spacing_um")) - effective_tile_size_px = int(round(target_tile_size_px * target_spacing_um / effective_spacing_um)) - effective_region_size_px = effective_tile_size_px * multiple - tile_size_lv0 = int(round(target_tile_size_px * target_spacing_um / base_spacing_um)) + if abs(read_spacing_um - requested_spacing_um) / requested_spacing_um <= float(preprocessing.tolerance): + read_tile_size_px = requested_tile_size_px + else: + read_tile_size_px = int( + round(requested_tile_size_px * requested_spacing_um / read_spacing_um) + ) + read_region_size_px = read_tile_size_px * multiple + # Use the actual read geometry that produced the tile crop. When the + # resolved spacing is considered equivalent to the requested spacing, + # this keeps the level-0 footprint aligned with the real crop size. + tile_size_lv0 = int(round(read_tile_size_px * read_spacing_um / base_spacing_um)) return { "region_tile_multiple": multiple, "tiles_per_region": multiple * multiple, - "target_tile_size_px": target_tile_size_px, - "effective_tile_size_px": effective_tile_size_px, - "target_region_size_px": target_region_size_px, - "effective_region_size_px": effective_region_size_px, + "requested_tile_size_px": requested_tile_size_px, + "read_tile_size_px": read_tile_size_px, + "requested_region_size_px": requested_region_size_px, + "read_region_size_px": read_region_size_px, "tile_size_lv0": tile_size_lv0, } @@ -562,7 +570,7 @@ def aggregate_tiles( coordinates = _scale_coordinates( coordinates, float(tiling_result.base_spacing_um), - float(tiling_result.target_spacing_um), + float(tiling_result.requested_spacing_um), ) coordinate_tensor = torch.tensor(coordinates, dtype=torch.int, device=loaded.device) tile_features = load_array(artifact.path) @@ -1298,8 +1306,8 @@ def _compute_hierarchical_embeddings_for_slide( tiling_result=tiling_result, region_index=index.region_index, subtile_index_within_region=index.subtile_index_within_region, - effective_region_size_px=int(geometry["effective_region_size_px"]), - effective_tile_size_px=int(geometry["effective_tile_size_px"]), + read_region_size_px=int(geometry["read_region_size_px"]), + read_tile_size_px=int(geometry["read_tile_size_px"]), backend=_resolve_slide_backend(preprocessing, tiling_result), num_cucim_workers=preprocessing.num_cucim_workers, gpu_decode=preprocessing.gpu_decode, @@ -1307,7 +1315,7 @@ def _compute_hierarchical_embeddings_for_slide( dataset = TileIndexDataset(resolved_indices) batch_preprocessor = _build_batch_preprocessor_for_tile_images( loaded, - target_tile_size_px=int(geometry["target_tile_size_px"]), + requested_tile_size_px=int(geometry["requested_tile_size_px"]), ) loader_kwargs = _embedding_dataloader_kwargs(loaded, execution) resolved_backend = _resolve_slide_backend(preprocessing, tiling_result) @@ -1387,8 +1395,8 @@ def _compute_hierarchical_embedding_shard_for_slide( tiling_result=tiling_result, region_index=index.region_index, subtile_index_within_region=index.subtile_index_within_region, - effective_region_size_px=int(geometry["effective_region_size_px"]), - effective_tile_size_px=int(geometry["effective_tile_size_px"]), + read_region_size_px=int(geometry["read_region_size_px"]), + read_tile_size_px=int(geometry["read_tile_size_px"]), backend=_resolve_slide_backend(preprocessing, tiling_result), num_cucim_workers=preprocessing.num_cucim_workers, gpu_decode=preprocessing.gpu_decode, @@ -1396,7 +1404,7 @@ def _compute_hierarchical_embedding_shard_for_slide( dataset = TileIndexDataset(resolved_indices) batch_preprocessor = _build_batch_preprocessor_for_tile_images( loaded, - target_tile_size_px=int(geometry["target_tile_size_px"]), + requested_tile_size_px=int(geometry["requested_tile_size_px"]), ) loader_kwargs = _embedding_dataloader_kwargs(loaded, execution) resolved_backend = _resolve_slide_backend(preprocessing, tiling_result) @@ -1459,7 +1467,7 @@ def _aggregate_tile_embeddings_for_slide( coordinates = _scale_coordinates( coordinates, float(tiling_result.base_spacing_um), - float(tiling_result.target_spacing_um), + float(tiling_result.requested_spacing_um), ) coordinate_tensor = torch.tensor(coordinates, dtype=torch.int, device=loaded.device) if not torch.is_tensor(tile_embeddings): @@ -1642,11 +1650,11 @@ def _build_hierarchical_embedding_metadata( "mask_path": str(mask_path) if mask_path is not None else None, "backend": backend, "region_tile_multiple": int(geometry["region_tile_multiple"]), - "target_tile_size_px": int(geometry["target_tile_size_px"]), - "effective_tile_size_px": int(geometry["effective_tile_size_px"]), - "target_region_size_px": int(geometry["target_region_size_px"]), - "effective_region_size_px": int(geometry["effective_region_size_px"]), - "target_spacing_um": float(preprocessing.target_spacing_um), + "requested_tile_size_px": int(geometry["requested_tile_size_px"]), + "read_tile_size_px": int(geometry["read_tile_size_px"]), + "requested_region_size_px": int(geometry["requested_region_size_px"]), + "read_region_size_px": int(geometry["read_region_size_px"]), + "requested_spacing_um": float(preprocessing.requested_spacing_um), "subtile_order": "row_major", } @@ -1728,14 +1736,14 @@ def _build_batch_preprocessor( ): return _build_batch_preprocessor_for_tile_images( loaded, - target_tile_size_px=int(getattr(tiling_result, "requested_tile_size_px")), + requested_tile_size_px=int(getattr(tiling_result, "requested_tile_size_px")), ) def _build_batch_preprocessor_for_tile_images( loaded: LoadedModel, *, - target_tile_size_px: int, + requested_tile_size_px: int, ): spec = _build_batch_transform_spec(loaded.transforms) if spec is None: @@ -1751,7 +1759,7 @@ def preprocess(batch): if spec.resize_size is None: image = _resize_image_batch( image, - (int(target_tile_size_px), int(target_tile_size_px)), + (int(requested_tile_size_px), int(requested_tile_size_px)), ) image = _apply_batch_transform_spec(image, spec) if image.device != loaded.device: @@ -2487,15 +2495,15 @@ def _resolve_path_str(value: Any) -> str | None: def _build_hs2p_configs(preprocessing: PreprocessingConfig): - target_tile_size_px = ( - preprocessing.target_region_size_px + requested_tile_size_px = ( + preprocessing.requested_region_size_px if _is_hierarchical_preprocessing(preprocessing) - else preprocessing.target_tile_size_px + else preprocessing.requested_tile_size_px ) tiling_cfg = TilingConfig( backend=_resolve_tiling_backend(preprocessing), - target_spacing_um=preprocessing.target_spacing_um, - target_tile_size_px=target_tile_size_px, + requested_spacing_um=preprocessing.requested_spacing_um, + requested_tile_size_px=requested_tile_size_px, tolerance=preprocessing.tolerance, overlap=preprocessing.overlap, tissue_threshold=preprocessing.tissue_threshold, @@ -2570,25 +2578,25 @@ def ensure_defaults() -> tuple[int, float]: return int(defaults["tile_size_px"]), float(defaults["spacing_um"]) if preprocessing is None: - target_tile_size_px, target_spacing_um = ensure_defaults() + requested_tile_size_px, requested_spacing_um = ensure_defaults() return _resolve_hierarchical_preprocessing(PreprocessingConfig( backend="auto", - target_spacing_um=target_spacing_um, - target_tile_size_px=target_tile_size_px, + requested_spacing_um=requested_spacing_um, + requested_tile_size_px=requested_tile_size_px, )) - target_spacing_um = preprocessing.target_spacing_um - target_tile_size_px = preprocessing.target_tile_size_px - if target_spacing_um is None or target_tile_size_px is None: + requested_spacing_um = preprocessing.requested_spacing_um + requested_tile_size_px = preprocessing.requested_tile_size_px + if requested_spacing_um is None or requested_tile_size_px is None: default_tile_size_px, default_spacing_um = ensure_defaults() - if target_spacing_um is None: - target_spacing_um = default_spacing_um - if target_tile_size_px is None: - target_tile_size_px = default_tile_size_px + if requested_spacing_um is None: + requested_spacing_um = default_spacing_um + if requested_tile_size_px is None: + requested_tile_size_px = default_tile_size_px return _resolve_hierarchical_preprocessing(replace( preprocessing, - target_spacing_um=target_spacing_um, - target_tile_size_px=target_tile_size_px, + requested_spacing_um=requested_spacing_um, + requested_tile_size_px=requested_tile_size_px, )) @@ -3004,9 +3012,9 @@ def _serialize_model(model) -> dict[str, Any]: def _serialize_preprocessing(preprocessing: PreprocessingConfig) -> dict[str, Any]: return { "backend": preprocessing.backend, - "target_spacing_um": preprocessing.target_spacing_um, - "target_tile_size_px": preprocessing.target_tile_size_px, - "target_region_size_px": preprocessing.target_region_size_px, + "requested_spacing_um": preprocessing.requested_spacing_um, + "requested_tile_size_px": preprocessing.requested_tile_size_px, + "requested_region_size_px": preprocessing.requested_region_size_px, "region_tile_multiple": preprocessing.region_tile_multiple, "tolerance": preprocessing.tolerance, "overlap": preprocessing.overlap, @@ -3056,11 +3064,11 @@ def deserialize_preprocessing(payload: dict[str, Any]) -> PreprocessingConfig: ) return PreprocessingConfig( backend=payload["backend"], - target_spacing_um=float(payload["target_spacing_um"]), - target_tile_size_px=int(payload["target_tile_size_px"]), - target_region_size_px=( - int(payload["target_region_size_px"]) - if "target_region_size_px" in payload and payload["target_region_size_px"] is not None + requested_spacing_um=float(payload["requested_spacing_um"]), + requested_tile_size_px=int(payload["requested_tile_size_px"]), + requested_region_size_px=( + int(payload["requested_region_size_px"]) + if "requested_region_size_px" in payload and payload["requested_region_size_px"] is not None else None ), region_tile_multiple=( diff --git a/slide2vec/utils/config.py b/slide2vec/utils/config.py index 8dcd82f..2c7b1c4 100644 --- a/slide2vec/utils/config.py +++ b/slide2vec/utils/config.py @@ -39,8 +39,8 @@ def _encoder_derived_cfg(model_name: str) -> dict: return { "tiling": { "params": { - "target_tile_size_px": reqs["tile_size_px"], - "target_spacing_um": float(reqs["spacing_um"]), + "requested_tile_size_px": reqs["tile_size_px"], + "requested_spacing_um": float(reqs["spacing_um"]), } }, "speed": { @@ -63,15 +63,15 @@ def validate_model_recommended_settings(cfg, *, run_on_cpu: bool = False) -> Non return tiling_params = cfg.tiling.params - target_spacing_um = tiling_params.target_spacing_um - target_tile_size_px = tiling_params.target_tile_size_px + requested_spacing_um = tiling_params.requested_spacing_um + requested_tile_size_px = tiling_params.requested_tile_size_px precision = None if run_on_cpu else cfg.speed.precision allow_non_recommended = bool(model_cfg.allow_non_recommended_settings) validate_encoder_config( canonical, - target_tile_size_px=target_tile_size_px, - target_spacing_um=target_spacing_um, + requested_tile_size_px=requested_tile_size_px, + requested_spacing_um=requested_spacing_um, precision=precision, allow_non_recommended=allow_non_recommended, ) @@ -96,8 +96,8 @@ def get_cfg_from_args(args): cli_cfg = OmegaConf.from_cli(args.opts) requested_cfg = OmegaConf.merge(user_cfg, cli_cfg) model_name = (OmegaConf.select(requested_cfg, "model.name") or "") - spacing_defined = OmegaConf.select(requested_cfg, "tiling.params.target_spacing_um") is not None - tile_size_defined = OmegaConf.select(requested_cfg, "tiling.params.target_tile_size_px") is not None + spacing_defined = OmegaConf.select(requested_cfg, "tiling.params.requested_spacing_um") is not None + tile_size_defined = OmegaConf.select(requested_cfg, "tiling.params.requested_tile_size_px") is not None if model_name and (not spacing_defined or not tile_size_defined): encoder_defaults = _encoder_derived_cfg(model_name) if encoder_defaults: diff --git a/tests/fixtures/gt/test-wsi.coordinates.meta.json b/tests/fixtures/gt/test-wsi.coordinates.meta.json index 9827733..0cc3196 100644 --- a/tests/fixtures/gt/test-wsi.coordinates.meta.json +++ b/tests/fixtures/gt/test-wsi.coordinates.meta.json @@ -51,8 +51,8 @@ "level_downsamples": [1.0] }, "tiling": { - "effective_spacing_um": 0.25200000393750005, - "effective_tile_size_px": 444, + "read_spacing_um": 0.25200000393750005, + "read_tile_size_px": 444, "is_within_tolerance": false, "min_tissue_fraction": 0.1, "n_tiles": 459, diff --git a/tests/test_batch_collator_timing.py b/tests/test_batch_collator_timing.py index 6c4e5e6..0bad9a7 100644 --- a/tests/test_batch_collator_timing.py +++ b/tests/test_batch_collator_timing.py @@ -47,7 +47,7 @@ class FakeReader: ordered_indices = None def __init__(self, image_path, tiling_result, *, backend: str, num_cucim_workers: int, gpu_decode: bool, use_supertiles: bool): - self.tile_size = int(tiling_result.effective_tile_size_px) + self.tile_size = int(tiling_result.read_tile_size_px) def read_batch_with_timing(self, tile_indices): tensor = torch.zeros((len(tile_indices), 3, self.tile_size, self.tile_size), dtype=torch.uint8) @@ -57,7 +57,7 @@ def read_batch_with_timing(self, tile_indices): collator = tile_reader.OnTheFlyBatchTileCollator( image_path=Path("/tmp/fake.svs"), - tiling_result=SimpleNamespace(effective_tile_size_px=4), + tiling_result=SimpleNamespace(read_tile_size_px=4), backend="cucim", num_cucim_workers=4, gpu_decode=False, @@ -99,7 +99,7 @@ def read_regions(self, locations, level, size, *, num_workers=None): reader = tile_reader.WSITileReader( Path("/tmp/fake.svs"), SimpleNamespace( - effective_tile_size_px=4, + read_tile_size_px=4, read_level=0, x=np.array([0, 4]), y=np.array([0, 0]), @@ -129,7 +129,7 @@ class FakeReader: _backend = "cucim" def __init__(self, image_path, tiling_result, *, backend: str, num_cucim_workers: int, gpu_decode: bool, use_supertiles: bool): - self.tile_size = int(tiling_result.effective_tile_size_px) + self.tile_size = int(tiling_result.read_tile_size_px) def read_batch_with_timing(self, tile_indices): tensor = torch.zeros((len(tile_indices), 3, self.tile_size, self.tile_size), dtype=torch.uint8) @@ -145,7 +145,7 @@ def _fake_run_with_filtered_stderr(func, *, suppress_patterns=()): collator = tile_reader.OnTheFlyBatchTileCollator( image_path=Path("/tmp/fake.svs"), - tiling_result=SimpleNamespace(effective_tile_size_px=4), + tiling_result=SimpleNamespace(read_tile_size_px=4), backend="cucim", num_cucim_workers=4, gpu_decode=False, diff --git a/tests/test_output_consistency.py b/tests/test_output_consistency.py index 2dffff7..8f47123 100644 --- a/tests/test_output_consistency.py +++ b/tests/test_output_consistency.py @@ -16,9 +16,9 @@ # -- tiling.params -- TILING_PARAMS = dict( - target_spacing_um=0.5, + requested_spacing_um=0.5, tolerance=0.07, # override (default: 0.05) - target_tile_size_px=224, # override (default: 256) + requested_tile_size_px=224, # override (default: 256) overlap=0.0, tissue_threshold=0.1, # override (default: 0.01) ) diff --git a/tests/test_progress.py b/tests/test_progress.py index 06360ab..aa7fab1 100644 --- a/tests/test_progress.py +++ b/tests/test_progress.py @@ -13,7 +13,7 @@ from slide2vec.api import PreprocessingConfig -DEFAULT_PREPROCESSING = PreprocessingConfig(target_spacing_um=0.5, target_tile_size_px=224) +DEFAULT_PREPROCESSING = PreprocessingConfig(requested_spacing_um=0.5, requested_tile_size_px=224) class RecordingReporter: diff --git a/tests/test_regression_core.py b/tests/test_regression_core.py index 5b8b9e6..8ed10d1 100644 --- a/tests/test_regression_core.py +++ b/tests/test_regression_core.py @@ -22,7 +22,7 @@ from slide2vec.resources import config_resource, load_config ROOT = Path(__file__).resolve().parents[1] -DEFAULT_PREPROCESSING = PreprocessingConfig(target_spacing_um=0.5, target_tile_size_px=224) +DEFAULT_PREPROCESSING = PreprocessingConfig(requested_spacing_um=0.5, requested_tile_size_px=224) def test_resource_loading_uses_packaged_configs(): pytest.importorskip("omegaconf") @@ -66,8 +66,8 @@ def test_get_cfg_from_args_fills_missing_preprocessing_from_single_spacing_model cfg = get_cfg_from_args(args) - assert cfg.tiling.params.target_spacing_um == pytest.approx(0.5) - assert cfg.tiling.params.target_tile_size_px == 448 + assert cfg.tiling.params.requested_spacing_um == pytest.approx(0.5) + assert cfg.tiling.params.requested_tile_size_px == 448 def test_get_cfg_from_args_rejects_models_with_ambiguous_spacing_defaults(tmp_path: Path): @@ -142,10 +142,10 @@ def test_hierarchical_npz_artifacts_round_trip(tmp_path: Path): output_format="npz", metadata={ "coordinates_npz_path": "/tmp/sample-h.coordinates.npz", - "target_tile_size_px": 224, - "effective_tile_size_px": 224, - "target_region_size_px": 672, - "effective_region_size_px": 672, + "requested_tile_size_px": 224, + "read_tile_size_px": 224, + "requested_region_size_px": 672, + "read_region_size_px": 672, "tiles_per_region": 3, }, ) @@ -159,24 +159,24 @@ def test_hierarchical_npz_artifacts_round_trip(tmp_path: Path): assert metadata["num_regions"] == 2 assert metadata["tiles_per_region"] == 3 assert metadata["feature_dim"] == 4 - assert metadata["target_region_size_px"] == 672 + assert metadata["requested_region_size_px"] == 672 -def test_resolve_direct_api_preprocessing_derives_target_region_size_from_multiple(): +def test_resolve_direct_api_preprocessing_derives_requested_region_size_from_multiple(): import slide2vec.api as api model = Model.from_preset("uni") resolved = api._resolve_direct_api_preprocessing( model, PreprocessingConfig( - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, region_tile_multiple=6, ), ) - assert resolved.target_tile_size_px == 224 - assert resolved.target_region_size_px == 1344 + assert resolved.requested_tile_size_px == 224 + assert resolved.requested_region_size_px == 1344 def test_resolve_direct_api_preprocessing_uses_model_defaults_before_region_derivation(): @@ -190,9 +190,9 @@ def test_resolve_direct_api_preprocessing_uses_model_defaults_before_region_deri ), ) - assert resolved.target_spacing_um == pytest.approx(0.5) - assert resolved.target_tile_size_px == 448 - assert resolved.target_region_size_px == 2688 + assert resolved.requested_spacing_um == pytest.approx(0.5) + assert resolved.requested_tile_size_px == 448 + assert resolved.requested_region_size_px == 2688 def test_resolve_direct_api_preprocessing_rejects_mismatched_region_size_and_multiple(): @@ -200,13 +200,13 @@ def test_resolve_direct_api_preprocessing_rejects_mismatched_region_size_and_mul model = Model.from_preset("uni") - with pytest.raises(ValueError, match="target_region_size_px"): + with pytest.raises(ValueError, match="requested_region_size_px"): api._resolve_direct_api_preprocessing( model, PreprocessingConfig( - target_spacing_um=0.5, - target_tile_size_px=224, - target_region_size_px=1024, + requested_spacing_um=0.5, + requested_tile_size_px=224, + requested_region_size_px=1024, region_tile_multiple=6, ), ) @@ -230,8 +230,8 @@ def fake_run_pipeline(model_arg, **kwargs): assert captured["model"] is model assert captured["kwargs"]["manifest_path"] == "/tmp/slides.csv" assert captured["kwargs"]["preprocessing"].backend == preprocessing.backend - assert captured["kwargs"]["preprocessing"].target_spacing_um == preprocessing.target_spacing_um - assert captured["kwargs"]["preprocessing"].target_tile_size_px == preprocessing.target_tile_size_px + assert captured["kwargs"]["preprocessing"].requested_spacing_um == preprocessing.requested_spacing_um + assert captured["kwargs"]["preprocessing"].requested_tile_size_px == preprocessing.requested_tile_size_px def test_pipeline_run_requires_output_dir(): model = Model.from_preset("virchow2") @@ -479,8 +479,8 @@ def test_execution_options_from_config_forces_fp32_for_cpu_runs(monkeypatch, tmp def test_preprocessing_with_backend_preserves_other_fields(): base = PreprocessingConfig( backend="asap", - target_spacing_um=0.75, - target_tile_size_px=256, + requested_spacing_um=0.75, + requested_tile_size_px=256, tolerance=0.1, overlap=0.2, tissue_threshold=0.4, @@ -495,8 +495,8 @@ def test_preprocessing_with_backend_preserves_other_fields(): updated = base.with_backend("openslide") assert updated.backend == "openslide" - assert updated.target_spacing_um == base.target_spacing_um - assert updated.target_tile_size_px == base.target_tile_size_px + assert updated.requested_spacing_um == base.requested_spacing_um + assert updated.requested_tile_size_px == base.requested_tile_size_px assert updated.segmentation == base.segmentation assert updated.filtering == base.filtering assert updated.preview == base.preview @@ -513,8 +513,8 @@ def test_preprocessing_config_defaults_spacing_and_tile_size_to_none(): cfg = PreprocessingConfig(backend="asap") assert cfg.backend == "asap" - assert cfg.target_spacing_um is None - assert cfg.target_tile_size_px is None + assert cfg.requested_spacing_um is None + assert cfg.requested_tile_size_px is None def test_execution_options_with_output_dir_preserves_other_fields(tmp_path: Path): @@ -580,8 +580,8 @@ def test_cli_build_model_and_pipeline_delegates_to_public_api(monkeypatch, tmp_p use_supertiles=True, jpeg_backend="turbojpeg", params=SimpleNamespace( - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, tolerance=0.05, overlap=0.0, tissue_threshold=0.1, @@ -635,8 +635,8 @@ def test_get_cfg_from_args_rejects_non_recommended_model_settings_by_default(tmp "output_dir: output", "tiling:", " params:", - " target_spacing_um: 1.0", - " target_tile_size_px: 256", + " requested_spacing_um: 1.0", + " requested_tile_size_px: 256", "model:", " name: virchow", ] @@ -665,8 +665,8 @@ def test_get_cfg_from_args_warns_when_non_recommended_model_settings_are_allowed "output_dir: output", "tiling:", " params:", - " target_spacing_um: 1.0", - " target_tile_size_px: 256", + " requested_spacing_um: 1.0", + " requested_tile_size_px: 256", "model:", " name: virchow", " allow_non_recommended_settings: true", @@ -697,8 +697,8 @@ def test_get_cfg_from_args_rejects_non_recommended_model_precision_by_default(tm "output_dir: output", "tiling:", " params:", - " target_spacing_um: 0.5", - " target_tile_size_px: 224", + " requested_spacing_um: 0.5", + " requested_tile_size_px: 224", "model:", " name: virchow", "speed:", @@ -729,8 +729,8 @@ def test_get_cfg_from_args_warns_when_non_recommended_model_precision_is_allowed "output_dir: output", "tiling:", " params:", - " target_spacing_um: 0.5", - " target_tile_size_px: 224", + " requested_spacing_um: 0.5", + " requested_tile_size_px: 224", "model:", " name: virchow", " allow_non_recommended_settings: true", @@ -762,8 +762,8 @@ def test_get_cfg_from_args_allows_cpu_runs_with_non_recommended_precision(tmp_pa "output_dir: output", "tiling:", " params:", - " target_spacing_um: 0.5", - " target_tile_size_px: 224", + " requested_spacing_um: 0.5", + " requested_tile_size_px: 224", "model:", " name: prism", "speed:", @@ -801,8 +801,8 @@ def test_preprocessing_config_from_config_preserves_tile_store_dir(): use_supertiles=True, jpeg_backend="turbojpeg", params=SimpleNamespace( - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, tolerance=0.07, overlap=0.0, tissue_threshold=0.1, @@ -835,8 +835,8 @@ def test_preprocessing_config_from_config_uses_explicit_speed_num_cucim_workers( use_supertiles=True, jpeg_backend="turbojpeg", params=SimpleNamespace( - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, tolerance=0.07, overlap=0.0, tissue_threshold=0.1, @@ -867,8 +867,8 @@ def test_preprocessing_config_from_config_disables_gpu_decode_by_default(): use_supertiles=True, jpeg_backend="turbojpeg", params=SimpleNamespace( - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, tolerance=0.07, overlap=0.0, tissue_threshold=0.1, diff --git a/tests/test_regression_inference.py b/tests/test_regression_inference.py index ed02318..4b73def 100644 --- a/tests/test_regression_inference.py +++ b/tests/test_regression_inference.py @@ -30,8 +30,8 @@ def PreprocessingConfig(*args, **kwargs): - kwargs.setdefault("target_spacing_um", 0.5) - kwargs.setdefault("target_tile_size_px", 224) + kwargs.setdefault("requested_spacing_um", 0.5) + kwargs.setdefault("requested_tile_size_px", 224) return BasePreprocessingConfig(*args, **kwargs) @@ -251,7 +251,7 @@ def test_collect_distributed_pipeline_artifacts_uses_hierarchical_artifacts_for_ ) preprocessing = replace( DEFAULT_PREPROCESSING, - target_region_size_px=448, + requested_region_size_px=448, region_tile_multiple=2, ) execution = ExecutionOptions(output_dir=tmp_path, num_gpus=2, output_format="pt") @@ -980,8 +980,8 @@ def __init__(self, **kwargs): preprocessing = PreprocessingConfig( backend="asap", - target_spacing_um=0.5, - target_tile_size_px=224, + requested_spacing_um=0.5, + requested_tile_size_px=224, tolerance=0.05, overlap=0.0, tissue_threshold=0.1, @@ -1220,7 +1220,7 @@ def test_embed_single_slide_distributed_uses_shared_slide_aggregation_helper(mon x=np.array([0, 1]), y=np.array([2, 3]), tile_size_lv0=224, - target_spacing_um=0.5, + requested_spacing_um=0.5, ) @contextmanager @@ -1284,7 +1284,7 @@ def test_embed_single_slide_distributed_skips_parent_backend_load_for_tile_model x=np.array([0, 1]), y=np.array([2, 3]), tile_size_lv0=224, - target_spacing_um=0.5, + requested_spacing_um=0.5, ) @contextmanager @@ -2170,8 +2170,8 @@ def __call__(self, batch_indices): x=np.array([0, 10]), y=np.array([5, 15]), backend="asap", - target_spacing_um=0.5, - target_tile_size_px=4, + requested_spacing_um=0.5, + requested_tile_size_px=4, read_spacing_um=0.5, read_tile_size_px=4, tile_size_lv0=224, @@ -2233,10 +2233,10 @@ def encode_tiles(self, image): tiling_result = SimpleNamespace( x=np.array([0, 10]), y=np.array([5, 15]), - target_spacing_um=0.5, + requested_spacing_um=0.5, requested_tile_size_px=4, read_spacing_um=0.5, - effective_tile_size_px=4, + read_tile_size_px=4, tile_size_lv0=224, tiles_tar_path=Path("/tmp/slide-a.tiles.tar"), ) @@ -2316,10 +2316,10 @@ def encode_tiles(self, image): tiling_result = SimpleNamespace( x=np.array([0]), y=np.array([5]), - target_spacing_um=0.5, + requested_spacing_um=0.5, requested_tile_size_px=4, read_spacing_um=0.5, - effective_tile_size_px=4, + read_tile_size_px=4, tile_size_lv0=224, tiles_tar_path=Path("/tmp/current-run.tiles.tar"), ) @@ -2420,10 +2420,10 @@ def __call__(self, batch_indices): tiling_result = SimpleNamespace( x=np.array([0, 10]), y=np.array([5, 15]), - target_spacing_um=0.5, + requested_spacing_um=0.5, requested_tile_size_px=4, read_spacing_um=0.5, - effective_tile_size_px=4, + read_tile_size_px=4, tile_size_lv0=224, ) execution = ExecutionOptions( @@ -2513,10 +2513,10 @@ def _fake_run_with_filtered_stderr(func, **kwargs): tiling_result = SimpleNamespace( x=np.array([0, 10]), y=np.array([5, 15]), - target_spacing_um=0.5, + requested_spacing_um=0.5, requested_tile_size_px=4, read_spacing_um=0.5, - effective_tile_size_px=4, + read_tile_size_px=4, tile_size_lv0=224, ) execution = ExecutionOptions( @@ -2607,8 +2607,8 @@ def __call__(self, batch_indices): x=np.array([0, 10]), y=np.array([5, 15]), backend="cucim", - target_spacing_um=0.5, - target_tile_size_px=4, + requested_spacing_um=0.5, + requested_tile_size_px=4, read_spacing_um=0.5, read_tile_size_px=4, tile_size_lv0=224, @@ -2684,8 +2684,8 @@ def __call__(self, batch_indices): x=np.array([0, 10]), y=np.array([5, 15]), backend="asap", - target_spacing_um=0.5, - target_tile_size_px=4, + requested_spacing_um=0.5, + requested_tile_size_px=4, read_spacing_um=0.5, read_tile_size_px=4, tile_size_lv0=224, @@ -2759,8 +2759,8 @@ def test_compute_tile_embeddings_for_slide_requires_current_run_tile_store_witho SimpleNamespace( x=np.array([0]), y=np.array([1]), - target_spacing_um=0.5, - target_tile_size_px=4, + requested_spacing_um=0.5, + requested_tile_size_px=4, read_spacing_um=0.5, read_tile_size_px=4, tile_size_lv0=224, @@ -2778,10 +2778,10 @@ def test_build_hierarchical_index_is_region_major_and_row_major_within_region(): x=np.array([100, 1000], dtype=np.int64), y=np.array([200, 1200], dtype=np.int64), tile_size_lv0=672, - effective_region_size_px=672, - target_region_size_px=672, - effective_tile_size_px=224, - target_tile_size_px=224, + read_region_size_px=672, + requested_region_size_px=672, + read_tile_size_px=224, + requested_tile_size_px=224, ) index = inference._build_hierarchical_index( @@ -2812,26 +2812,48 @@ def test_resolve_hierarchical_geometry_scales_tile_first_under_spacing_mismatch( import slide2vec.inference as inference preprocessing = PreprocessingConfig( - target_spacing_um=0.5, - target_tile_size_px=224, - target_region_size_px=1792, + requested_spacing_um=0.5, + requested_tile_size_px=224, + requested_region_size_px=1792, region_tile_multiple=8, ) tiling_result = SimpleNamespace( - effective_tile_size_px=3319, - effective_spacing_um=0.27, + read_tile_size_px=3319, + read_spacing_um=0.27, tile_size_lv0=3319, base_spacing_um=0.27, ) geometry = inference._resolve_hierarchical_geometry(preprocessing, tiling_result) - assert geometry["effective_tile_size_px"] == 415 - assert geometry["effective_region_size_px"] == 3320 + assert geometry["read_tile_size_px"] == 415 + assert geometry["read_region_size_px"] == 3320 assert geometry["tile_size_lv0"] == 415 assert geometry["tiles_per_region"] == 64 +def test_resolve_hierarchical_geometry_keeps_level0_footprint_when_spacing_matches_base(): + import slide2vec.inference as inference + + preprocessing = PreprocessingConfig( + requested_spacing_um=0.5, + requested_tile_size_px=224, + requested_region_size_px=448, + region_tile_multiple=2, + ) + tiling_result = SimpleNamespace( + read_tile_size_px=224, + read_spacing_um=0.486187607049942, + tile_size_lv0=224, + base_spacing_um=0.486187607049942, + ) + + geometry = inference._resolve_hierarchical_geometry(preprocessing, tiling_result) + + assert geometry["read_tile_size_px"] == 224 + assert geometry["tile_size_lv0"] == 224 + + def test_build_hierarchical_index_uses_tile_first_level0_offsets_under_spacing_mismatch(): import slide2vec.inference as inference @@ -2968,13 +2990,12 @@ def build_batch_sampler(self, *, batch_size, dataset_indices): x=np.array([0, 100], dtype=np.int64), y=np.array([0, 100], dtype=np.int64), requested_tile_size_px=224, - effective_tile_size_px=224, - target_tile_size_px=224, - target_region_size_px=448, - effective_region_size_px=448, + read_tile_size_px=224, + requested_region_size_px=448, + read_region_size_px=448, tile_size_lv0=448, - target_spacing_um=0.5, - effective_spacing_um=0.5, + requested_spacing_um=0.5, + read_spacing_um=0.5, base_spacing_um=0.5, read_level=0, ) @@ -2983,7 +3004,7 @@ def build_batch_sampler(self, *, batch_size, dataset_indices): loaded, slide, tiling_result, - preprocessing=replace(DEFAULT_PREPROCESSING, region_tile_multiple=2, target_region_size_px=448), + preprocessing=replace(DEFAULT_PREPROCESSING, region_tile_multiple=2, requested_region_size_px=448), execution=ExecutionOptions(batch_size=4, num_workers=0, num_gpus=1), ) diff --git a/tests/test_regression_models.py b/tests/test_regression_models.py index 8b34f5a..0290e62 100644 --- a/tests/test_regression_models.py +++ b/tests/test_regression_models.py @@ -20,7 +20,7 @@ from slide2vec.resources import config_resource, load_config ROOT = Path(__file__).resolve().parents[1] -DEFAULT_PREPROCESSING = PreprocessingConfig(target_spacing_um=0.5, target_tile_size_px=224) +DEFAULT_PREPROCESSING = PreprocessingConfig(requested_spacing_um=0.5, requested_tile_size_px=224) def test_model_embed_slide_uses_direct_api_and_returns_first_result(monkeypatch): @@ -275,8 +275,8 @@ def fake_embed_slides(model_arg, slides, **kwargs): assert captured["slides"][0]["image_path"] == Path("/tmp/slide-a.svs") assert isinstance(captured["preprocessing"], PreprocessingConfig) assert captured["preprocessing"].backend == "auto" - assert captured["preprocessing"].target_tile_size_px == 224 - assert captured["preprocessing"].target_spacing_um == pytest.approx(0.5) + assert captured["preprocessing"].requested_tile_size_px == 224 + assert captured["preprocessing"].requested_spacing_um == pytest.approx(0.5) def test_model_embed_slide_infers_missing_values_from_explicit_backend_only_preprocessing( @@ -308,8 +308,8 @@ def fake_embed_slides(model_arg, slides, **kwargs): assert result is expected assert captured["preprocessing"].backend == "asap" - assert captured["preprocessing"].target_tile_size_px == 224 - assert captured["preprocessing"].target_spacing_um == pytest.approx(0.5) + assert captured["preprocessing"].requested_tile_size_px == 224 + assert captured["preprocessing"].requested_spacing_um == pytest.approx(0.5) def test_model_embed_slides_rejects_ambiguous_default_spacing( @@ -356,7 +356,7 @@ def test_model_embed_slides_rejects_non_recommended_preprocessing_by_default(): with pytest.raises(ValueError, match="allow_non_recommended_settings"): model.embed_slides( [{"sample_id": "slide-a", "image_path": "/tmp/slide-a.svs"}], - preprocessing=PreprocessingConfig(target_spacing_um=1.0, target_tile_size_px=256), + preprocessing=PreprocessingConfig(requested_spacing_um=1.0, requested_tile_size_px=256), ) @@ -383,7 +383,7 @@ def test_model_embed_slides_warns_when_non_recommended_settings_are_allowed( with caplog.at_level("WARNING", logger="slide2vec"): result = model.embed_slides( [{"sample_id": "slide-a", "image_path": "/tmp/slide-a.svs"}], - preprocessing=PreprocessingConfig(target_spacing_um=1.0, target_tile_size_px=256), + preprocessing=PreprocessingConfig(requested_spacing_um=1.0, requested_tile_size_px=256), ) assert result == expected