Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ STAMP is an **end‑to‑end, weakly‑supervised deep‑learning pipeline** tha
* 🎓 **Beginner‑friendly & expert‑ready**: Zero‑code CLI and YAML config for routine use; optional code‑level customization for advanced research.
* 🧩 **Model‑rich**: Out‑of‑the‑box support for **+20 foundation models** at [tile level](getting-started.md#feature-extraction) (e.g., *Virchow‑v2*, *UNI‑v2*) and [slide level](getting-started.md#slide-level-encoding) (e.g., *TITAN*, *COBRA*).
* 🔬 **Weakly‑supervised**: End‑to‑end MIL with Transformer aggregation for training, cross‑validation and external deployment; no pixel‑level labels required.
* 🧮 **Multi-task learning**: Unified framework for **classification**, **regression**, and **cox-based survival analysis**.
* 🧮 **Multi-task learning**: Unified framework for **classification**, **multi-target classification**, **regression**, and **cox-based survival analysis**.
* 📊 **Stats & results**: Built‑in metrics and patient‑level predictions, ready for analysis and reporting.
* 🖼️ **Explainable**: Generates heatmaps and top‑tile exports out‑of‑the‑box for transparent model auditing and publication‑ready figures.
* 🤝 **Collaborative by design**: Clinicians drive hypothesis & interpretation while engineers handle compute; STAMP’s modular CLI mirrors real‑world workflows and tracks every step for full reproducibility.
Expand Down
4 changes: 4 additions & 0 deletions getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ Stamp currently supports the following feature extractors:
- [mSTAR][mstar]
- [MUSK][musk]
- [PLIP][plip]
- [KEEP][keep]
- [TICON][ticon]
- [RedDino][reddino]


As some of the above require you to request access to the model on huggingface,
Expand Down Expand Up @@ -154,12 +156,14 @@ meaning ignored that it was ignored during feature extraction.
[mstar]: https://huggingface.co/Wangyh/mSTAR
[musk]: https://huggingface.co/xiangjx/musk
[plip]: https://github.com/PathologyFoundation/plip
[keep]: https://loiesun.github.io/keep/ "A Knowledge-enhanced Pathology Vision-language Foundation Model for Cancer Diagnosis"
[TITAN]: https://huggingface.co/MahmoodLab/TITAN
[COBRA2]: https://huggingface.co/KatherLab/COBRA
[EAGLE]: https://github.com/KatherLab/EAGLE
[MADELEINE]: https://huggingface.co/MahmoodLab/madeleine
[PRISM]: https://huggingface.co/paige-ai/Prism
[TICON]: https://cvlab-stonybrook.github.io/TICON/ "TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning"
[reddino]: https://github.com/Snarci/RedDino "RedDino: A Foundation Model for Red Blood Cell Analysis"



Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "stamp"
version = "2.4.0"
version = "2.4.1"
authors = [
{ name = "Omar El Nahhas", email = "omar.el_nahhas@tu-dresden.de" },
{ name = "Marko van Treeck", email = "markovantreeck@gmail.com" },
Expand Down
42 changes: 25 additions & 17 deletions src/stamp/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,6 @@

import yaml

from stamp.config import StampConfig
from stamp.modeling.config import (
AdvancedConfig,
MlpModelParams,
ModelParams,
VitModelParams,
)
from stamp.seed import Seed

STAMP_FACTORY_SETTINGS = Path(__file__).with_name("config.yaml")

# Set up the logger
Expand All @@ -41,23 +32,38 @@ def _create_config_file(config_file: Path) -> None:


def _run_cli(args: argparse.Namespace) -> None:
# Handle init command
# Handle init command before any stamp-internal imports so that
# `stamp init` and `stamp --help` don't pay the full torch/pydantic
# import cost.
if args.command == "init":
_create_config_file(args.config_file_path)
return

# Deferred imports: only reached for real commands, not --help / init.
from stamp.modeling.config import (
AdvancedConfig,
MlpModelParams,
ModelParams,
VitModelParams,
)
from stamp.utils.config import StampConfig
from stamp.utils.seed import Seed

# Load YAML configuration
with open(args.config_file_path, "r") as config_yaml:
config = StampConfig.model_validate(yaml.safe_load(config_yaml))

# use default advanced config in case none is provided
if config.advanced_config is None:
config.advanced_config = AdvancedConfig(
model_params=ModelParams(vit=VitModelParams(), mlp=MlpModelParams()),
)
# Only build a default AdvancedConfig (with model-params) for commands
# that actually use it. Preprocess / encode / statistics / heatmaps
# never touch config.advanced_config, so don't pay the construction cost.
if args.command in {"train", "crossval"}:
if config.advanced_config is None:
config.advanced_config = AdvancedConfig(
model_params=ModelParams(vit=VitModelParams(), mlp=MlpModelParams()),
)

# Set global random seed
if config.advanced_config.seed is not None:
# Apply the global seed for any command that has one configured.
if config.advanced_config is not None and config.advanced_config.seed is not None:
Seed.set(config.advanced_config.seed)

match args.command:
Expand Down Expand Up @@ -153,6 +159,7 @@ def _run_cli(args: argparse.Namespace) -> None:
if config.training.task is None:
raise ValueError("task must be set in training configuration")

assert config.advanced_config is not None # guaranteed above for "train"
train_categorical_model_(
config=config.training, advanced=config.advanced_config
)
Expand Down Expand Up @@ -198,6 +205,7 @@ def _run_cli(args: argparse.Namespace) -> None:
f"{yaml.dump(config.crossval.model_dump(mode='json', exclude_none=True))}"
)

assert config.advanced_config is not None # guaranteed above for "crossval"
categorical_crossval_(
config=config.crossval,
advanced=config.advanced_config,
Expand Down
53 changes: 0 additions & 53 deletions src/stamp/cache.py

This file was deleted.

24 changes: 22 additions & 2 deletions src/stamp/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ preprocessing:
# Extractor to use for feature extractor. Possible options are "ctranspath",
# "uni", "conch", "chief-ctranspath", "conch1_5", "uni2", "dino-bloom",
# "gigapath", "h-optimus-0", "h-optimus-1", "virchow2", "virchow",
# "virchow-full", "musk", "mstar", "plip"
# "virchow-full", "musk", "mstar", "plip", "ticon", "red-dino", "keep"
# Some of them require requesting access to the respective authors beforehand.
extractor: "chief-ctranspath"

Expand Down Expand Up @@ -76,6 +76,8 @@ crossval:

# Name of the column from the clini table to train on.
ground_truth_label: "KRAS"
# For multi-target classification you may specify a list of columns,
# e.g. ground_truth_label: ["KRAS", "BRAF", "NRAS"]

# For survival (should be status and follow-up days columns in clini table)
# status_label: "event"
Expand Down Expand Up @@ -133,6 +135,8 @@ training:

# Name of the column from the clini table to train on.
ground_truth_label: "KRAS"
# For multi-target classification you may specify a list of columns,
# e.g. ground_truth_label: ["KRAS", "BRAF", "NRAS"]

# For survival (should be status and follow-up days columns in clini table)
# status_label: "event"
Expand Down Expand Up @@ -175,6 +179,8 @@ deployment:

# Name of the column from the clini to compare predictions to.
ground_truth_label: "KRAS"
# For multi-target classification you may specify a list of columns,
# e.g. ground_truth_label: ["KRAS", "BRAF", "NRAS"]

# For survival (should be status and follow-up days columns in clini table)
# status_label: "event"
Expand All @@ -200,6 +206,8 @@ statistics:

# Name of the target label.
ground_truth_label: "KRAS"
# For multi-target classification you may specify a list of columns,
# e.g. ground_truth_label: ["KRAS", "BRAF", "NRAS"]

# A lot of the statistics are computed "one-vs-all", i.e. there needs to be
# a positive class to calculate the statistics for.
Expand Down Expand Up @@ -319,7 +327,7 @@ advanced_config:
max_lr: 1e-4
div_factor: 25.
# Select a model regardless of task
model_name: "vit" # or mlp, trans_mil
model_name: "vit" # or mlp, trans_mil, barspoon

model_params:
vit: # Vision Transformer
Expand All @@ -338,3 +346,15 @@ advanced_config:
dim_hidden: 512
num_layers: 2
dropout: 0.25

# NOTE: Only the `barspoon` model supports multi-target classification
# (i.e. `ground_truth_label` can be a list of column names). Other
# models expect a single target column.
barspoon: # Encoder-Decoder Transformer for multi-target classification
d_model: 512
num_encoder_heads: 8
num_decoder_heads: 8
num_encoder_layers: 2
num_decoder_layers: 2
dim_feedforward: 2048
positional_encoding: true
2 changes: 1 addition & 1 deletion src/stamp/encoding/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def init_slide_encoder_(
selected_encoder = encoder

case _ as unreachable:
assert_never(unreachable) # type: ignore
assert_never(unreachable)

selected_encoder.encode_slides_(
output_dir=output_dir,
Expand Down
10 changes: 7 additions & 3 deletions src/stamp/encoding/encoder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from abc import ABC, abstractmethod
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import cast

import h5py
import numpy as np
Expand All @@ -12,11 +13,11 @@
from tqdm import tqdm

import stamp
from stamp.cache import get_processing_code_hash
from stamp.encoding.config import EncoderName
from stamp.modeling.data import CoordsInfo, get_coords, read_table
from stamp.preprocessing.config import ExtractorName
from stamp.types import DeviceLikeType, PandasLabel
from stamp.utils.cache import get_processing_code_hash

__author__ = "Juan Pablo Ricapito"
__copyright__ = "Copyright (C) 2025 Juan Pablo Ricapito"
Expand Down Expand Up @@ -61,7 +62,8 @@ def encode_slides_(
if self.precision == torch.float16:
self.model.half()

for tile_feats_filename in (progress := tqdm(os.listdir(feat_dir))):
h5_files = sorted(f for f in os.listdir(feat_dir) if f.endswith(".h5"))
for tile_feats_filename in (progress := tqdm(h5_files)):
h5_path = os.path.join(feat_dir, tile_feats_filename)
slide_name: str = Path(tile_feats_filename).stem
progress.set_description(slide_name)
Expand Down Expand Up @@ -183,7 +185,9 @@ def _read_h5(
elif not h5_path.endswith(".h5"):
raise ValueError(f"File is not of type .h5: {os.path.basename(h5_path)}")
with h5py.File(h5_path, "r") as f:
feats: Tensor = torch.tensor(f["feats"][:], dtype=self.precision) # type: ignore
feats_ds = cast(h5py.Dataset, f["feats"])
# torch.from_numpy avoids a redundant data copy vs torch.tensor(array)
feats: Tensor = torch.from_numpy(feats_ds[()]).to(dtype=self.precision)
coords: CoordsInfo = get_coords(f)
extractor: str = f.attrs.get("extractor", "")
if extractor == "":
Expand Down
2 changes: 1 addition & 1 deletion src/stamp/encoding/encoder/chief.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
from numpy import ndarray
from tqdm import tqdm

from stamp.cache import STAMP_CACHE_DIR, file_digest, get_processing_code_hash
from stamp.encoding.config import EncoderName
from stamp.encoding.encoder import Encoder
from stamp.preprocessing.config import ExtractorName
from stamp.types import DeviceLikeType, PandasLabel
from stamp.utils.cache import STAMP_CACHE_DIR, file_digest, get_processing_code_hash

__author__ = "Juan Pablo Ricapito"
__copyright__ = "Copyright (C) 2025 Juan Pablo Ricapito"
Expand Down
2 changes: 1 addition & 1 deletion src/stamp/encoding/encoder/eagle.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
from torch import Tensor
from tqdm import tqdm

from stamp.cache import get_processing_code_hash
from stamp.encoding.config import EncoderName
from stamp.encoding.encoder import Encoder
from stamp.encoding.encoder.chief import CHIEF
from stamp.modeling.data import CoordsInfo
from stamp.preprocessing.config import ExtractorName
from stamp.types import DeviceLikeType, PandasLabel
from stamp.utils.cache import get_processing_code_hash

__author__ = "Juan Pablo Ricapito"
__copyright__ = "Copyright (C) 2025 Juan Pablo Ricapito"
Expand Down
2 changes: 1 addition & 1 deletion src/stamp/encoding/encoder/gigapath.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
from gigapath import slide_encoder
from tqdm import tqdm

from stamp.cache import get_processing_code_hash
from stamp.encoding.config import EncoderName
from stamp.encoding.encoder import Encoder
from stamp.modeling.data import CoordsInfo
from stamp.preprocessing.config import ExtractorName
from stamp.types import PandasLabel, SlideMPP
from stamp.utils.cache import get_processing_code_hash

__author__ = "Juan Pablo Ricapito"
__copyright__ = "Copyright (C) 2025 Juan Pablo Ricapito"
Expand Down
2 changes: 1 addition & 1 deletion src/stamp/encoding/encoder/madeleine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import torch
from numpy import ndarray

from stamp.cache import STAMP_CACHE_DIR
from stamp.encoding.config import EncoderName
from stamp.encoding.encoder import Encoder
from stamp.preprocessing.config import ExtractorName
from stamp.utils.cache import STAMP_CACHE_DIR

try:
from madeleine.models.factory import create_model_from_pretrained
Expand Down
2 changes: 1 addition & 1 deletion src/stamp/encoding/encoder/titan.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
from tqdm import tqdm
from transformers import AutoModel

from stamp.cache import get_processing_code_hash
from stamp.encoding.config import EncoderName
from stamp.encoding.encoder import Encoder
from stamp.modeling.data import CoordsInfo
from stamp.preprocessing.config import ExtractorName
from stamp.types import DeviceLikeType, Microns, PandasLabel, SlideMPP
from stamp.utils.cache import get_processing_code_hash

__author__ = "Juan Pablo Ricapito"
__copyright__ = "Copyright (C) 2025 Juan Pablo Ricapito"
Expand Down
Loading