Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = [
"hs2p[asap,cucim,openslide,vips]>=3.2.0",
"hs2p[asap,cucim,openslide,vips]>=3.2.1",
"omegaconf",
"matplotlib",
"numpy<2",
Expand Down Expand Up @@ -88,7 +88,7 @@ fm = [
"pandas",
"pillow",
"rich",
"hs2p[asap,cucim,openslide,vips]>=3.2.0",
"hs2p[asap,cucim,openslide,vips]>=3.2.1",
"wandb",
"torch>=2.3,<2.8",
"torchvision>=0.18.0",
Expand Down
80 changes: 64 additions & 16 deletions slide2vec/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import pandas as pd
import torch
from hs2p import SlideSpec, FilterConfig, PreviewConfig, SegmentationConfig, TilingConfig, load_tiling_result, tile_slides
from hs2p.wsi.backend import resolve_backend
from hs2p.utils.stderr import run_with_filtered_stderr, run_with_filtered_stdio
import numpy as np
from transformers.image_processing_utils import BaseImageProcessor
Expand Down Expand Up @@ -209,6 +210,25 @@ def _resolve_on_the_fly_num_workers(num_cucim_workers: int) -> tuple[int, str]:
return effective_num_workers, " // ".join(details)


def _log_on_the_fly_worker_override_once(
preprocessing: PreprocessingConfig,
execution: ExecutionOptions,
tiling_results: Sequence[Any],
) -> None:
if not preprocessing.on_the_fly or preprocessing.read_tiles_from is not None:
return
if not any(_resolve_slide_backend(preprocessing, tiling_result) == "cucim" for tiling_result in tiling_results):
return
effective_num_workers, worker_context = _resolve_on_the_fly_num_workers(preprocessing.num_cucim_workers)
if effective_num_workers == execution.num_workers:
return
logging.getLogger(__name__).info(
f"on-the-fly mode: setting DataLoader num_workers={effective_num_workers} "
f"({worker_context}); "
f"ignoring speed.num_dataloader_workers={execution.num_workers}"
)


def _redirect_worker_output() -> None:
worker_log_path = os.path.join(
tempfile.gettempdir(),
Expand Down Expand Up @@ -358,6 +378,11 @@ def embed_slides(
prepared_slides,
tiling_results,
)
_log_on_the_fly_worker_override_once(
preprocessing,
execution,
embeddable_tiling_results,
)
_write_zero_tile_embedding_sidecars(
zero_tile_pairs,
model=model,
Expand Down Expand Up @@ -532,6 +557,11 @@ def embed_patients(
prepared_slides,
tiling_results,
)
_log_on_the_fly_worker_override_once(
preprocessing,
execution,
embeddable_tiling_results,
)
emit_progress("embedding.started", slide_count=len(embeddable_slides))
loaded = model._load_backend()

Expand Down Expand Up @@ -651,6 +681,11 @@ def embed_tiles(
resolved_tiling_results = _normalize_tiling_results(tiling_results, slide_records)
resolved_preprocessing = _resolve_model_preprocessing(model, preprocessing)
hierarchical_mode = _is_hierarchical_preprocessing(resolved_preprocessing)
_log_on_the_fly_worker_override_once(
resolved_preprocessing,
execution,
resolved_tiling_results,
)
artifacts: list[TileEmbeddingArtifact] | list[HierarchicalEmbeddingArtifact] = []
for slide, tiling_result in zip(slide_records, resolved_tiling_results):
if hierarchical_mode:
Expand Down Expand Up @@ -811,6 +846,11 @@ def run_pipeline(
successful_slides,
tiling_results,
)
_log_on_the_fly_worker_override_once(
resolved_preprocessing,
execution,
embeddable_tiling_results,
)

if tiling_only:
emit_progress(
Expand Down Expand Up @@ -1011,6 +1051,11 @@ def run_pipeline_with_coordinates(
slide_records,
tiling_results,
)
_log_on_the_fly_worker_override_once(
resolved_preprocessing,
execution,
embeddable_tiling_results,
)
_write_zero_tile_embedding_sidecars(
zero_tile_pairs,
model=model,
Expand Down Expand Up @@ -1535,13 +1580,7 @@ def _compute_tile_embeddings_for_slide(
loader_kwargs = _embedding_dataloader_kwargs(loaded, execution)
resolved_backend = _resolve_slide_backend(preprocessing, tiling_result)
if preprocessing.on_the_fly and preprocessing.read_tiles_from is None and resolved_backend == "cucim":
effective_num_workers, worker_context = _resolve_on_the_fly_num_workers(preprocessing.num_cucim_workers)
if effective_num_workers != execution.num_workers:
logging.getLogger(__name__).info(
f"on-the-fly mode: setting DataLoader num_workers={effective_num_workers} "
f"({worker_context}); "
f"ignoring speed.num_dataloader_workers={execution.num_workers}"
)
effective_num_workers, _ = _resolve_on_the_fly_num_workers(preprocessing.num_cucim_workers)
loader_kwargs["num_workers"] = effective_num_workers
if effective_num_workers == 0:
loader_kwargs.pop("persistent_workers", None)
Expand Down Expand Up @@ -1620,13 +1659,7 @@ def _compute_hierarchical_embeddings_for_slide(
loader_kwargs = _embedding_dataloader_kwargs(loaded, execution)
resolved_backend = _resolve_slide_backend(preprocessing, tiling_result)
if resolved_backend == "cucim":
effective_num_workers, worker_context = _resolve_on_the_fly_num_workers(preprocessing.num_cucim_workers)
if effective_num_workers != execution.num_workers:
logging.getLogger(__name__).info(
f"on-the-fly hierarchical mode: setting DataLoader num_workers={effective_num_workers} "
f"({worker_context}); "
f"ignoring speed.num_dataloader_workers={execution.num_workers}"
)
effective_num_workers, _ = _resolve_on_the_fly_num_workers(preprocessing.num_cucim_workers)
loader_kwargs["num_workers"] = effective_num_workers
if effective_num_workers == 0:
loader_kwargs.pop("persistent_workers", None)
Expand Down Expand Up @@ -2425,7 +2458,8 @@ def _resolve_device(device: str, default_device):


def _describe_device_mode(model, execution: ExecutionOptions) -> str:
if model._requested_device == "cpu":
requested_device = getattr(model, "_requested_device", None)
if requested_device == "cpu":
return "cpu"
if execution.num_gpus and execution.num_gpus > 1:
return f"{execution.num_gpus} gpus"
Expand Down Expand Up @@ -2722,6 +2756,19 @@ def _tile_slides(
) -> list[Any]:
_preload_asap_wholeslidedata(preprocessing)
tiling_cfg, segmentation_cfg, filtering_cfg, preview_cfg, read_coordinates_from, resume = _build_hs2p_configs(preprocessing)
for slide in slides:
backend_selection = resolve_backend(
tiling_cfg.requested_backend,
wsi_path=slide.image_path,
mask_path=slide.mask_path,
)
if backend_selection.reason is not None:
emit_progress(
"backend.selected",
sample_id=slide.sample_id,
backend=backend_selection.backend,
reason=backend_selection.reason,
)

def _run_tile_slides():
return tile_slides(
Expand Down Expand Up @@ -2932,7 +2979,8 @@ def ensure_defaults() -> tuple[int, float]:


def _validate_multi_gpu_execution(model, execution: ExecutionOptions) -> None:
if model._requested_device == "cpu":
requested_device = getattr(model, "_requested_device", None)
if requested_device == "cpu":
raise ValueError("ExecutionOptions.num_gpus > 1 is incompatible with device='cpu'")
if not torch.cuda.is_available():
raise RuntimeError("ExecutionOptions.num_gpus > 1 requires CUDA")
Expand Down
12 changes: 12 additions & 0 deletions slide2vec/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ def write_log(self, message: str, *, stream=None) -> None:
print(message, file=target, flush=True)


def _format_backend_selected_message(payload: dict[str, Any]) -> str:
reason = payload.get("reason")
if reason:
return f"[backend] {payload['sample_id']}: {reason}"
return f"[backend] {payload['sample_id']}: using {payload['backend']}"


class PlainTextCliProgressReporter:
def __init__(self, *, stream=None) -> None:
self.stream = stream or sys.stdout
Expand Down Expand Up @@ -133,6 +140,8 @@ def _format_line(self, kind: str, payload: dict[str, Any]) -> str | None:
f"Embedding finished: {payload['slides_completed']}/{payload['slide_count']} slides, "
f"{payload['tile_artifacts']} tile artifacts, {payload['slide_artifacts']} slide artifacts"
)
if kind == "backend.selected":
return _format_backend_selected_message(payload)
if kind == "run.finished":
return f"Run finished successfully. Logs: {payload['logs_dir']}"
if kind == "run.failed":
Expand Down Expand Up @@ -313,6 +322,9 @@ def emit(self, event: ProgressEvent) -> None:
_embedding_summary_rows(payload),
)
return
if kind == "backend.selected":
self.console.print(_format_backend_selected_message(payload))
return
if kind == "run.finished":
self._print_summary(
"Run Complete",
Expand Down
15 changes: 10 additions & 5 deletions slide2vec/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,19 @@ def setup(args):
def hf_login():
from huggingface_hub import login

if "HF_TOKEN" not in os.environ and distributed.is_main_process():
hf_token = getpass.getpass(
token = os.environ.get("HF_TOKEN")
prompted = False
if token is None and distributed.is_main_process():
token = getpass.getpass(
"Enter your Hugging Face API token (input will not be visible): "
)
os.environ["HF_TOKEN"] = hf_token
os.environ["HF_TOKEN"] = token
prompted = True
if token is None:
return
if distributed.is_enabled_and_multiple_gpus():
import torch.distributed as dist

dist.barrier()
if distributed.is_main_process():
login(os.environ["HF_TOKEN"])
if distributed.is_main_process() and prompted:
login(token)
5 changes: 5 additions & 0 deletions tasks/lessons.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
## 2026-04-12

- When refactoring CLI parsing to support `parse_known_args()`, prefer updating the test double to match the real parser API instead of adding a production fallback for mocks. Keep the runtime code clean unless the fallback is genuinely needed by real callers.
- When a regression test stubs a helper that normally returns a structured object, keep the production code strict and make the stub return the expected attributes instead of broadening the runtime API to accept a placeholder value.
- When stdout noise comes from both a local wrapper and an upstream dependency, suppress the common-case message at the upstream boundary and keep the per-item local note at debug level instead of stacking multiple INFO lines.
- When a diagnostic line is still useful, move it to the run boundary and log it once per run instead of per slide or per worker.
- When a backend-selection reason is user-relevant, emit it as a structured progress event and let the reporter choose how to render it instead of filtering it out in the API.
- When a CLI depends on both slide2vec and hs2p progress systems, activate them with a shared reporter bridge so upstream events do not disappear into the default null reporter.

## 2026-04-10

Expand Down
4 changes: 4 additions & 0 deletions tests/test_progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def __init__(self, file=None, **kwargs):
def print(self, message, **kwargs):
self.lines.append((message, kwargs))

def log(self, message, **kwargs):
self.lines.append((message, kwargs))

class FakeProgress:
def __init__(self, *args, **kwargs):
self.tasks = {}
Expand Down Expand Up @@ -131,6 +134,7 @@ def parse_known_args(self, argv=None):
assert isinstance(progress.get_progress_reporter(), progress.NullProgressReporter)



def test_cli_entrypoint_returns_zero(monkeypatch):
import slide2vec.cli as cli

Expand Down
20 changes: 20 additions & 0 deletions tests/test_regression_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,26 @@ def test_execution_options_logs_resolved_auto_num_workers(monkeypatch, caplog):
assert "ExecutionOptions: num_workers=18 (requested=auto)" in caplog.text
assert "num_workers=auto" not in caplog.text


def test_hf_login_skips_hub_login_when_token_is_already_set(monkeypatch):
import slide2vec.utils.config as config

called = False

def _fake_login(*args, **kwargs):
del args, kwargs
nonlocal called
called = True

monkeypatch.setenv("HF_TOKEN", "token-from-env")
monkeypatch.setattr(config.distributed, "is_main_process", lambda: True)
monkeypatch.setattr(config.distributed, "is_enabled_and_multiple_gpus", lambda: False)
monkeypatch.setattr("huggingface_hub.login", _fake_login)

config.hf_login()

assert called is False

def test_execution_options_from_config_maps_cli_fields(tmp_path: Path):
cfg = SimpleNamespace(
output_dir=str(tmp_path),
Expand Down
Loading
Loading