From 4974adfc0a5008387821e863c147b9e55c1407a2 Mon Sep 17 00:00:00 2001 From: mducducd Date: Wed, 11 Feb 2026 08:40:03 +0000 Subject: [PATCH 1/9] update comments --- src/stamp/encoding/encoder/eagle.py | 4 ++- src/stamp/preprocessing/extractor/ticon.py | 8 ++++-- src/stamp/statistics/survival.py | 31 +++++++++++++++------- uv.lock | 9 +++---- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/stamp/encoding/encoder/eagle.py b/src/stamp/encoding/encoder/eagle.py index d966c84e..3d9baca4 100644 --- a/src/stamp/encoding/encoder/eagle.py +++ b/src/stamp/encoding/encoder/eagle.py @@ -263,10 +263,12 @@ def _align_vir2_to_ctp_by_coords( decimals: int = 5, ) -> tuple[torch.Tensor, np.ndarray]: """Align vir2 features to ctp features based on coordinates.""" + # round coordinates to avoid floating-point precision mismatches ref = np.round(np.asarray(ref_coords_um, dtype=np.float64), decimals) oth = np.round(np.asarray(other_coords_um, dtype=np.float64), decimals) - # coord -> queue(indices) + # build mapping: coordinate -> queue of indices + # using deque ensures stable matching when duplicates exist buckets = defaultdict(deque) for j, key in enumerate(map(tuple, oth)): buckets[key].append(j) diff --git a/src/stamp/preprocessing/extractor/ticon.py b/src/stamp/preprocessing/extractor/ticon.py index fb8f9b43..ab7eb829 100644 --- a/src/stamp/preprocessing/extractor/ticon.py +++ b/src/stamp/preprocessing/extractor/ticon.py @@ -1,3 +1,9 @@ +""" +This file contains code adapted from: +TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning +https://github.com/cvlab-stonybrook/TICON +""" + import math from collections.abc import Callable, Mapping from functools import partial @@ -8,8 +14,6 @@ import torch.nn as nn from huggingface_hub import hf_hub_download from jaxtyping import Float - -# from omegaconf import OmegaConf from torch import Tensor from torchvision import transforms diff --git a/src/stamp/statistics/survival.py b/src/stamp/statistics/survival.py index 063793cf..46371022 100644 --- a/src/stamp/statistics/survival.py +++ b/src/stamp/statistics/survival.py @@ -136,16 +136,27 @@ def _plot_km( ) kmf_high.plot_survival_function(ax=ax, ci_show=False, color="red") - add_at_risk_counts(kmf_low, kmf_high, ax=ax) - - # --- log-rank and c-index --- - res = logrank_test( - low_df[time_label], - high_df[time_label], - event_observed_A=low_df[status_label], - event_observed_B=high_df[status_label], - ) - logrank_p = float(res.p_value) + # ---- add at-risk counts only for fitted curves ---- + fitters = [] + if len(low_df) > 0: + fitters.append(kmf_low) + if len(high_df) > 0: + fitters.append(kmf_high) + + if len(fitters) > 0: + add_at_risk_counts(*fitters, ax=ax) + + # ---- log-rank only if both groups exist ---- + if len(low_df) > 0 and len(high_df) > 0: + res = logrank_test( + low_df[time_label], + high_df[time_label], + event_observed_A=low_df[status_label], + event_observed_B=high_df[status_label], + ) + logrank_p = float(res.p_value) + else: + logrank_p = float("nan") c_used, used, *_ = _cindex(time, event, risk) ax.text( diff --git a/uv.lock b/uv.lock index c4015d9f..96b4b73a 100644 --- a/uv.lock +++ b/uv.lock @@ -3699,13 +3699,14 @@ wheels = [ [[package]] name = "stamp" -version = "2.3.0" +version = "2.4.0" source = { editable = "." } dependencies = [ { name = "beartype" }, { name = "einops" }, { name = "h5py" }, { name = "jaxtyping" }, + { name = "lifelines" }, { name = "lightning" }, { name = "matplotlib" }, { name = "numpy" }, @@ -3807,7 +3808,6 @@ gigapath = [ { name = "fvcore" }, { name = "gigapath" }, { name = "iopath" }, - { name = "lifelines" }, { name = "monai" }, { name = "scikit-image" }, { name = "scikit-survival" }, @@ -3828,7 +3828,6 @@ gpu = [ { name = "huggingface-hub" }, { name = "iopath" }, { name = "jinja2" }, - { name = "lifelines" }, { name = "madeleine" }, { name = "mamba-ssm" }, { name = "monai" }, @@ -3920,7 +3919,7 @@ requires-dist = [ { name = "iopath", marker = "extra == 'gigapath'" }, { name = "jaxtyping", specifier = ">=0.3.2" }, { name = "jinja2", marker = "extra == 'cobra'", specifier = ">=3.1.4" }, - { name = "lifelines", marker = "extra == 'gigapath'" }, + { name = "lifelines", specifier = ">=0.28.0" }, { name = "lightning", specifier = ">=2.5.2" }, { name = "madeleine", marker = "extra == 'madeleine'", git = "https://github.com/mahmoodlab/MADELEINE.git?rev=de7c85acc2bdad352e6df8eee5694f8b6f288012" }, { name = "mamba-ssm", marker = "extra == 'cobra'", specifier = ">=2.2.6.post3" }, @@ -4747,4 +4746,4 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/24/fd/725b8e73ac2a50e78a4534ac43c6addf5c1c2d65380dd48a9169cc6739a9/yarl-1.20.1-cp313-cp313t-win32.whl", hash = "sha256:b121ff6a7cbd4abc28985b6028235491941b9fe8fe226e6fdc539c977ea1739d", size = 86591, upload-time = "2025-06-10T00:45:25.793Z" }, { url = "https://files.pythonhosted.org/packages/94/c3/b2e9f38bc3e11191981d57ea08cab2166e74ea770024a646617c9cddd9f6/yarl-1.20.1-cp313-cp313t-win_amd64.whl", hash = "sha256:541d050a355bbbc27e55d906bc91cb6fe42f96c01413dd0f4ed5a5240513874f", size = 93003, upload-time = "2025-06-10T00:45:27.752Z" }, { url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" }, -] \ No newline at end of file +] From f6a4279cc3775db01431c19b37495413a8931963 Mon Sep 17 00:00:00 2001 From: mducducd Date: Wed, 11 Feb 2026 08:42:48 +0000 Subject: [PATCH 2/9] update comments --- src/stamp/statistics/survival.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stamp/statistics/survival.py b/src/stamp/statistics/survival.py index 46371022..d8f5ebf9 100644 --- a/src/stamp/statistics/survival.py +++ b/src/stamp/statistics/survival.py @@ -136,7 +136,7 @@ def _plot_km( ) kmf_high.plot_survival_function(ax=ax, ci_show=False, color="red") - # ---- add at-risk counts only for fitted curves ---- + # add at-risk counts only for fitted curves fitters = [] if len(low_df) > 0: fitters.append(kmf_low) @@ -146,7 +146,7 @@ def _plot_km( if len(fitters) > 0: add_at_risk_counts(*fitters, ax=ax) - # ---- log-rank only if both groups exist ---- + # log-rank only if both groups exist if len(low_df) > 0 and len(high_df) > 0: res = logrank_test( low_df[time_label], From 2aa37a5fbdbb4d4ad61ce0e46eeab3d2ced2e431 Mon Sep 17 00:00:00 2001 From: mducducd Date: Wed, 11 Feb 2026 08:45:54 +0000 Subject: [PATCH 3/9] update comments --- src/stamp/modeling/data.py | 14 +++----------- src/stamp/statistics/survival.py | 12 ++++++------ 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/stamp/modeling/data.py b/src/stamp/modeling/data.py index 2f121f27..3eb53b37 100755 --- a/src/stamp/modeling/data.py +++ b/src/stamp/modeling/data.py @@ -341,9 +341,9 @@ def load_patient_level_data( clini_table: Path, feature_dir: Path, patient_label: PandasLabel, - ground_truth_label: PandasLabel | None = None, # <- now optional - time_label: PandasLabel | None = None, # <- for survival - status_label: PandasLabel | None = None, # <- for survival + ground_truth_label: PandasLabel | None = None, + time_label: PandasLabel | None = None, + status_label: PandasLabel | None = None, feature_ext: str = ".h5", ) -> dict[PatientId, PatientData]: """ @@ -902,15 +902,7 @@ def _parse_survival_status(value) -> int | None: None, NaN, '' -> None """ - # Handle missing inputs gracefully - # if value is None: - # return 0 # treat empty/missing as censored - # if isinstance(value, float) and math.isnan(value): - # return 0 # treat empty/missing as censored - s = str(value).strip().lower() - # if s in {"", "nan", "none"}: - # return 0 # treat empty/missing as censored # Known mappings positives = {"1", "event", "dead", "deceased", "yes", "y", "True", "true"} diff --git a/src/stamp/statistics/survival.py b/src/stamp/statistics/survival.py index d8f5ebf9..78fb51cd 100644 --- a/src/stamp/statistics/survival.py +++ b/src/stamp/statistics/survival.py @@ -24,7 +24,7 @@ def _comparable_pairs_count(times: np.ndarray, events: np.ndarray) -> int: def _cindex( time: np.ndarray, event: np.ndarray, - risk: np.ndarray, # will be flipped in function + risk: np.ndarray, ) -> tuple[float, int]: """Compute C-index using Lifelines convention: higher risk → shorter survival (worse outcome). @@ -40,13 +40,13 @@ def _survival_stats_for_csv( time_label: str, status_label: str, risk_label: str | None = None, - cut_off: float | None = None, # will be flipped in function + cut_off: float | None = None, ) -> pd.Series: """Compute C-index and log-rank p for one CSV.""" if risk_label is None: risk_label = "pred_score" - # --- Clean NaNs and invalid events before computing stats --- + # Clean NaNs and invalid events before computing stats df = df.dropna(subset=[time_label, status_label, risk_label]).copy() df = df[df[status_label].isin([0, 1])] if len(df) == 0: @@ -56,10 +56,10 @@ def _survival_stats_for_csv( event = np.asarray(df[status_label], dtype=int) risk = np.asarray(df[risk_label], dtype=float) - # --- Concordance index --- + # Concordance index c_index, n_pairs = _cindex(time, event, risk) - # --- Log-rank test (median split) --- + # Log-rank test (median split) median_risk = float(cut_off) if cut_off is not None else float(np.nanmedian(risk)) low_mask = risk <= median_risk high_mask = risk > median_risk @@ -113,7 +113,7 @@ def _plot_km( event = np.asarray(df[status_label], dtype=int) risk = np.asarray(df[risk_label], dtype=float) - # --- split groups --- + # split groups median_risk = float(cut_off) if cut_off is not None else np.nanmedian(risk) low_mask = risk <= median_risk high_mask = risk > median_risk From 5f4f1bbe46a804ab85628204d4ce90f06b492f05 Mon Sep 17 00:00:00 2001 From: mducducd Date: Mon, 16 Feb 2026 11:38:14 +0000 Subject: [PATCH 4/9] add red-dino --- src/stamp/preprocessing/__init__.py | 5 ++ src/stamp/preprocessing/config.py | 1 + src/stamp/preprocessing/extractor/reddino.py | 67 ++++++++++++++++++++ src/stamp/preprocessing/extractor/uni2.py | 17 +++-- 4 files changed, 84 insertions(+), 6 deletions(-) create mode 100644 src/stamp/preprocessing/extractor/reddino.py diff --git a/src/stamp/preprocessing/__init__.py b/src/stamp/preprocessing/__init__.py index ab3ff0d2..3b747d67 100755 --- a/src/stamp/preprocessing/__init__.py +++ b/src/stamp/preprocessing/__init__.py @@ -177,6 +177,11 @@ def extract_( extractor = dino_bloom() + case ExtractorName.RED_DINO: + from stamp.preprocessing.extractor.reddino import red_dino + + extractor = red_dino() + case ExtractorName.VIRCHOW: from stamp.preprocessing.extractor.virchow import virchow diff --git a/src/stamp/preprocessing/config.py b/src/stamp/preprocessing/config.py index 5eca41dd..8931ca5f 100644 --- a/src/stamp/preprocessing/config.py +++ b/src/stamp/preprocessing/config.py @@ -19,6 +19,7 @@ class ExtractorName(StrEnum): UNI = "uni" UNI2 = "uni2" DINO_BLOOM = "dino-bloom" + RED_DINO = "red_dino" GIGAPATH = "gigapath" H_OPTIMUS_0 = "h-optimus-0" H_OPTIMUS_1 = "h-optimus-1" diff --git a/src/stamp/preprocessing/extractor/reddino.py b/src/stamp/preprocessing/extractor/reddino.py new file mode 100644 index 00000000..5f010b6e --- /dev/null +++ b/src/stamp/preprocessing/extractor/reddino.py @@ -0,0 +1,67 @@ +""" +Port from https://github.com/Snarci/RedDino +RedDino: A Foundation Model for Red Blood Cell Analysis +""" + +from typing import Callable, cast + +try: + import timm + import torch + from PIL import Image + from torchvision import transforms +except ModuleNotFoundError as e: + raise ModuleNotFoundError( + "red_dino dependencies not installed." + " Please reinstall stamp using `pip install 'stamp[red_dino]'`" + ) from e + +from stamp.preprocessing.config import ExtractorName +from stamp.preprocessing.extractor import Extractor + +__author__ = "" +__copyright__ = "" +__license__ = "MIT" + + +class RedDinoClsOnly(torch.nn.Module): + def __init__(self, model) -> None: + super().__init__() + self.model = model + + def forward(self, batch: torch.Tensor) -> torch.Tensor: + out = self.model(batch) + if isinstance(out, tuple): + out = out[0] + # if model returns tokens, return class token + if getattr(out, "ndim", 0) >= 2 and out.shape[1] > 1: + return out[:, 0] + return out + + +def red_dino() -> Extractor[RedDinoClsOnly]: + """Extracts features from single image using RedDino encoder.""" + + model = timm.create_model( + "hf-hub:Snarcy/RedDino-large", + pretrained=True, + ) + + transform = cast( + Callable[[Image.Image], torch.Tensor], + transforms.Compose( + [ + transforms.Resize((224, 224)), + transforms.ToTensor(), + transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ), + ] + ), + ) + + return Extractor( + model=RedDinoClsOnly(model), + transform=transform, + identifier=ExtractorName.RED_DINO, + ) diff --git a/src/stamp/preprocessing/extractor/uni2.py b/src/stamp/preprocessing/extractor/uni2.py index 459eff95..f187bf3c 100644 --- a/src/stamp/preprocessing/extractor/uni2.py +++ b/src/stamp/preprocessing/extractor/uni2.py @@ -1,8 +1,12 @@ +from typing import Callable, cast + try: import timm import torch - from timm.data import resolve_data_config # type: ignore + from PIL import Image + from timm.data.config import resolve_data_config from timm.data.transforms_factory import create_transform + from timm.layers.mlp import SwiGLUPacked except ModuleNotFoundError as e: raise ModuleNotFoundError( "uni2 dependencies not installed." @@ -13,7 +17,7 @@ from stamp.preprocessing.extractor import Extractor -def uni2() -> Extractor: +def uni2() -> Extractor[torch.nn.Module]: # pretrained=True needed to load UNI2-h weights (and download weights for the first time) timm_kwargs = { "img_size": 224, @@ -25,7 +29,7 @@ def uni2() -> Extractor: "mlp_ratio": 2.66667 * 2, "num_classes": 0, "no_embed_class": True, - "mlp_layer": timm.layers.SwiGLUPacked, + "mlp_layer": SwiGLUPacked, "act_layer": torch.nn.SiLU, "reg_tokens": 8, "dynamic_img_size": True, @@ -33,13 +37,14 @@ def uni2() -> Extractor: model = timm.create_model( "hf-hub:MahmoodLab/UNI2-h", pretrained=True, **timm_kwargs ) - transform = create_transform( - **resolve_data_config(model.pretrained_cfg, model=model) + transform = cast( + Callable[[Image.Image], torch.Tensor], + create_transform(**resolve_data_config(model.pretrained_cfg, model=model)), ) model.eval() return Extractor( model=model, transform=transform, - identifier=ExtractorName.UNI2, # type: ignore + identifier=ExtractorName.UNI2, ) From b951cf790d611558595ad3ccee35b20e74366e02 Mon Sep 17 00:00:00 2001 From: Minh Duc Nguyen <37109868+mducducd@users.noreply.github.com> Date: Wed, 18 Feb 2026 13:03:17 +0100 Subject: [PATCH 5/9] Add RedDino to the list of models --- getting-started.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/getting-started.md b/getting-started.md index 6d5bffec..3ad8890a 100644 --- a/getting-started.md +++ b/getting-started.md @@ -59,6 +59,7 @@ Stamp currently supports the following feature extractors: - [MUSK][musk] - [PLIP][plip] - [TICON][ticon] + - [RedDino][reddino] As some of the above require you to request access to the model on huggingface, @@ -514,4 +515,4 @@ STAMP automatically adapts its **model architecture**, **loss function**, and ** **Regression** tasks only require `ground_truth_label`. **Survival analysis** tasks require `time_label` (follow-up time) and `status_label` (event indicator). -These requirements apply consistently across cross-validation, training, deployment, and statistics. \ No newline at end of file +These requirements apply consistently across cross-validation, training, deployment, and statistics. From 5ed6575c09c6790ba0930c196ee3a8de7578df41 Mon Sep 17 00:00:00 2001 From: Minh Duc Nguyen <37109868+mducducd@users.noreply.github.com> Date: Wed, 18 Feb 2026 13:05:38 +0100 Subject: [PATCH 6/9] Add RedDino --- getting-started.md | 1 + 1 file changed, 1 insertion(+) diff --git a/getting-started.md b/getting-started.md index 3ad8890a..2bb81ee6 100644 --- a/getting-started.md +++ b/getting-started.md @@ -161,6 +161,7 @@ meaning ignored that it was ignored during feature extraction. [MADELEINE]: https://huggingface.co/MahmoodLab/madeleine [PRISM]: https://huggingface.co/paige-ai/Prism [TICON]: https://cvlab-stonybrook.github.io/TICON/ "TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning" +[reddino]: https://github.com/Snarci/RedDino "RedDino: A Foundation Model for Red Blood Cell Analysis" From 8ee3a2906aa62544342550d950832dee3aede755 Mon Sep 17 00:00:00 2001 From: Minh Duc Nguyen <37109868+mducducd@users.noreply.github.com> Date: Thu, 19 Feb 2026 13:23:46 +0100 Subject: [PATCH 7/9] Update src/stamp/preprocessing/extractor/reddino.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/stamp/preprocessing/extractor/reddino.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/stamp/preprocessing/extractor/reddino.py b/src/stamp/preprocessing/extractor/reddino.py index 5f010b6e..b5d16ece 100644 --- a/src/stamp/preprocessing/extractor/reddino.py +++ b/src/stamp/preprocessing/extractor/reddino.py @@ -19,8 +19,6 @@ from stamp.preprocessing.config import ExtractorName from stamp.preprocessing.extractor import Extractor -__author__ = "" -__copyright__ = "" __license__ = "MIT" From 9ce4d28e525d40e287379359bdf7edd8875911d1 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Feb 2026 13:26:14 +0100 Subject: [PATCH 8/9] Initial plan (#153) Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3e61f9c4..eb834ee3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,6 +21,7 @@ jobs: "uni", "uni2", "dino-bloom", + "red_dino", "gigapath", "h-optimus-0", "h-optimus-1", From 3e625a3e0c5bffb2ba74afb7fd3dff4860ff9b6c Mon Sep 17 00:00:00 2001 From: mducducd Date: Fri, 20 Feb 2026 15:47:40 +0000 Subject: [PATCH 9/9] add red-dino --- src/stamp/config.yaml | 2 +- src/stamp/preprocessing/config.py | 2 +- src/stamp/preprocessing/extractor/reddino.py | 5 +---- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/stamp/config.yaml b/src/stamp/config.yaml index 8440560b..1c0965d2 100644 --- a/src/stamp/config.yaml +++ b/src/stamp/config.yaml @@ -4,7 +4,7 @@ preprocessing: # Extractor to use for feature extractor. Possible options are "ctranspath", # "uni", "conch", "chief-ctranspath", "conch1_5", "uni2", "dino-bloom", # "gigapath", "h-optimus-0", "h-optimus-1", "virchow2", "virchow", - # "virchow-full", "musk", "mstar", "plip" + # "virchow-full", "musk", "mstar", "plip", "ticon", "red-dino" # Some of them require requesting access to the respective authors beforehand. extractor: "chief-ctranspath" diff --git a/src/stamp/preprocessing/config.py b/src/stamp/preprocessing/config.py index 8931ca5f..072253e4 100644 --- a/src/stamp/preprocessing/config.py +++ b/src/stamp/preprocessing/config.py @@ -19,7 +19,7 @@ class ExtractorName(StrEnum): UNI = "uni" UNI2 = "uni2" DINO_BLOOM = "dino-bloom" - RED_DINO = "red_dino" + RED_DINO = "red-dino" GIGAPATH = "gigapath" H_OPTIMUS_0 = "h-optimus-0" H_OPTIMUS_1 = "h-optimus-1" diff --git a/src/stamp/preprocessing/extractor/reddino.py b/src/stamp/preprocessing/extractor/reddino.py index b5d16ece..ef92c551 100644 --- a/src/stamp/preprocessing/extractor/reddino.py +++ b/src/stamp/preprocessing/extractor/reddino.py @@ -11,10 +11,7 @@ from PIL import Image from torchvision import transforms except ModuleNotFoundError as e: - raise ModuleNotFoundError( - "red_dino dependencies not installed." - " Please reinstall stamp using `pip install 'stamp[red_dino]'`" - ) from e + raise ModuleNotFoundError("red-dino dependencies not installed.") from e from stamp.preprocessing.config import ExtractorName from stamp.preprocessing.extractor import Extractor