Skip to content

Commit cf732d0

Browse files
authored
Merge pull request #334 from BiomedicalMachineLearning/newmana/add_features
Add Leiden clustering and update documentation
2 parents 6f2bc93 + 502a69f commit cf732d0

File tree

13 files changed

+292
-163
lines changed

13 files changed

+292
-163
lines changed

HISTORY.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
History
33
=======
44

5+
1.1.2 (2025-09-17)
6+
------------------
7+
* Add Leiden clustering wrapper.
8+
* Fix documentation, refactor code in spatial.SME.
9+
510
1.1.1 (2025-07-07)
611
------------------
712
* Support Python 3.10.x

docs/release_notes/1.1.2.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
1.1.2 `2025-09-17`
2+
~~~~~~~~~~~~~~~~~~~~~~~~~
3+
4+
.. rubric:: Features
5+
* Add Leiden clustering wrapper.
6+
* Fix documentation, refactor code in spatial.SME.

docs/release_notes/index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
Release Notes
22
===================================================
33

4+
.. include:: 1.1.2.rst
5+
46
.. include:: 1.1.1.rst
57

68
.. include:: 0.4.6.rst

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "stlearn"
7-
version = "1.1.1"
7+
version = "1.1.2"
88
authors = [
99
{name = "Genomics and Machine Learning lab", email = "andrew.newman@uq.edu.au"},
1010
]
@@ -14,7 +14,7 @@ license = {text = "BSD license"}
1414
requires-python = "~=3.10.0"
1515
keywords = ["stlearn"]
1616
classifiers = [
17-
"Development Status :: 2 - Pre-Alpha",
17+
"Development Status :: 5 - Production/Stable",
1818
"Intended Audience :: Developers",
1919
"License :: OSI Approved :: BSD License",
2020
"Natural Language :: English",

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ tensorflow==2.14.1
1111
keras==2.14.0
1212
types-tensorflow>=2.8.0
1313
imageio==2.37.0
14-
scipy==1.11.4
14+
scipy==1.11.4
15+
scikit-learn==1.7.0

stlearn/spatial/SME/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
from .impute import SME_impute0, pseudo_spot
2-
from .normalize import SME_normalize
1+
from .pseudo_spot import pseudo_spot
2+
from .sme_impute0 import SME_impute0
3+
from .sme_normalize import SME_normalize
34

45
__all__ = [
56
"SME_normalize",

stlearn/spatial/SME/_weighting_matrix.py

Lines changed: 60 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import math
12
from typing import Literal
23

34
import numpy as np
45
from anndata import AnnData
6+
from sklearn.linear_model import LinearRegression # type: ignore
57
from sklearn.metrics import pairwise_distances
68
from tqdm import tqdm
79

@@ -17,16 +19,9 @@
1719
]
1820

1921

20-
def calculate_weight_matrix(
21-
adata: AnnData,
22-
adata_imputed: AnnData | None = None,
23-
pseudo_spots: bool = False,
24-
platform: _PLATFORM = "Visium",
25-
) -> AnnData | None:
26-
import math
27-
28-
from sklearn.linear_model import LinearRegression
29-
22+
def row_col_by_platform(
23+
adata, platform
24+
) -> tuple[LinearRegression, LinearRegression, float]:
3025
rate: float
3126
if platform == "Visium":
3227
img_row = adata.obs["imagerow"]
@@ -46,64 +41,61 @@ def calculate_weight_matrix(
4641
{platform!r} does not support.
4742
"""
4843
)
49-
50-
reg_row = LinearRegression().fit(array_row.values.reshape(-1, 1), img_row)
51-
52-
reg_col = LinearRegression().fit(array_col.values.reshape(-1, 1), img_col)
53-
54-
if pseudo_spots and adata_imputed:
55-
pd = pairwise_distances(
56-
adata_imputed.obs[["imagecol", "imagerow"]],
57-
adata.obs[["imagecol", "imagerow"]],
58-
metric="euclidean",
59-
)
60-
unit = math.sqrt(reg_row.coef_**2 + reg_col.coef_**2)
61-
pd_norm = np.where(pd >= unit, 0, 1)
62-
63-
md = 1 - pairwise_distances(
64-
adata_imputed.obsm["X_morphology"],
65-
adata.obsm["X_morphology"],
66-
metric="cosine",
67-
)
68-
md[md < 0] = 0
69-
70-
adata_imputed.uns["physical_distance"] = pd_norm
71-
adata_imputed.uns["morphological_distance"] = md
72-
73-
adata_imputed.uns["weights_matrix_all"] = (
74-
adata_imputed.uns["physical_distance"]
75-
* adata_imputed.uns["morphological_distance"]
76-
)
77-
78-
else:
79-
pd = pairwise_distances(adata.obs[["imagecol", "imagerow"]], metric="euclidean")
80-
unit = math.sqrt(reg_row.coef_**2 + reg_col.coef_**2)
81-
pd_norm = np.where(pd >= rate * unit, 0, 1)
82-
83-
md = 1 - pairwise_distances(adata.obsm["X_morphology"], metric="cosine")
84-
md[md < 0] = 0
85-
86-
gd = 1 - pairwise_distances(adata.obsm["X_pca"], metric="correlation")
87-
adata.uns["gene_expression_correlation"] = gd
88-
adata.uns["physical_distance"] = pd_norm
89-
adata.uns["morphological_distance"] = md
90-
91-
adata.uns["weights_matrix_all"] = (
92-
adata.uns["physical_distance"]
93-
* adata.uns["morphological_distance"]
94-
* adata.uns["gene_expression_correlation"]
95-
)
96-
adata.uns["weights_matrix_pd_gd"] = (
97-
adata.uns["physical_distance"] * adata.uns["gene_expression_correlation"]
98-
)
99-
adata.uns["weights_matrix_pd_md"] = (
100-
adata.uns["physical_distance"] * adata.uns["morphological_distance"]
101-
)
102-
adata.uns["weights_matrix_gd_md"] = (
103-
adata.uns["gene_expression_correlation"]
104-
* adata.uns["morphological_distance"]
105-
)
106-
return adata
44+
regression = LinearRegression()
45+
reg_row: LinearRegression = regression.fit(array_row.values.reshape(-1, 1), img_row) # type: ignore
46+
reg_col: LinearRegression = regression.fit(array_col.values.reshape(-1, 1), img_col) # type: ignore
47+
return reg_col, reg_row, rate
48+
49+
50+
def weight_matrix(adata, platform):
51+
reg_col, reg_row, rate = row_col_by_platform(adata, platform)
52+
pd = pairwise_distances(adata.obs[["imagecol", "imagerow"]], metric="euclidean")
53+
unit = math.sqrt(reg_row.coef_[0] ** 2 + reg_col.coef_[0] ** 2)
54+
pd_norm = np.where(pd >= rate * unit, 0, 1)
55+
md = 1 - pairwise_distances(adata.obsm["X_morphology"], metric="cosine")
56+
md[md < 0] = 0
57+
gd = 1 - pairwise_distances(adata.obsm["X_pca"], metric="correlation")
58+
adata.uns["gene_expression_correlation"] = gd
59+
adata.uns["physical_distance"] = pd_norm
60+
adata.uns["morphological_distance"] = md
61+
adata.uns["weights_matrix_all"] = (
62+
adata.uns["physical_distance"]
63+
* adata.uns["morphological_distance"]
64+
* adata.uns["gene_expression_correlation"]
65+
)
66+
adata.uns["weights_matrix_pd_gd"] = (
67+
adata.uns["physical_distance"] * adata.uns["gene_expression_correlation"]
68+
)
69+
adata.uns["weights_matrix_pd_md"] = (
70+
adata.uns["physical_distance"] * adata.uns["morphological_distance"]
71+
)
72+
adata.uns["weights_matrix_gd_md"] = (
73+
adata.uns["gene_expression_correlation"] * adata.uns["morphological_distance"]
74+
)
75+
76+
77+
def weight_matrix_imputed(adata, adata_imputed, platform):
78+
reg_col, reg_row, _ = row_col_by_platform(adata, platform)
79+
80+
pd = pairwise_distances(
81+
adata_imputed.obs[["imagecol", "imagerow"]],
82+
adata.obs[["imagecol", "imagerow"]],
83+
metric="euclidean",
84+
)
85+
unit = math.sqrt(reg_row.coef_[0] ** 2 + reg_col.coef_[0] ** 2)
86+
pd_norm = np.where(pd >= unit, 0, 1)
87+
md = 1 - pairwise_distances(
88+
adata_imputed.obsm["X_morphology"],
89+
adata.obsm["X_morphology"],
90+
metric="cosine",
91+
)
92+
md[md < 0] = 0
93+
adata_imputed.uns["physical_distance"] = pd_norm
94+
adata_imputed.uns["morphological_distance"] = md
95+
adata_imputed.uns["weights_matrix_all"] = (
96+
adata_imputed.uns["physical_distance"]
97+
* adata_imputed.uns["morphological_distance"]
98+
)
10799

108100

109101
def impute_neighbour(
Lines changed: 4 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -12,79 +12,10 @@
1212
from ._weighting_matrix import (
1313
_PLATFORM,
1414
_WEIGHTING_MATRIX,
15-
calculate_weight_matrix,
1615
impute_neighbour,
16+
weight_matrix_imputed,
1717
)
1818

19-
20-
def SME_impute0(
21-
adata: AnnData,
22-
use_data: str = "raw",
23-
weights: _WEIGHTING_MATRIX = "weights_matrix_all",
24-
platform: _PLATFORM = "Visium",
25-
copy: bool = False,
26-
) -> AnnData | None:
27-
"""\
28-
using spatial location (S), tissue morphological feature (M) and gene
29-
expression (E) information to impute missing values
30-
31-
Parameters
32-
----------
33-
adata
34-
Annotated data matrix.
35-
use_data
36-
input data, can be `raw` counts or log transformed data
37-
weights
38-
weighting matrix for imputation.
39-
if `weights_matrix_all`, matrix combined all information from spatial
40-
location (S), tissue morphological feature (M) and gene expression (E)
41-
if `weights_matrix_pd_md`, matrix combined information from spatial
42-
location (S), tissue morphological feature (M)
43-
platform
44-
`Visium` or `Old_ST`
45-
copy
46-
Return a copy instead of writing to adata.
47-
Returns
48-
-------
49-
Anndata
50-
"""
51-
adata = adata.copy() if copy else adata
52-
53-
if use_data == "raw":
54-
if isinstance(adata.X, csr_matrix):
55-
count_embed = adata.X.toarray()
56-
elif isinstance(adata.X, np.ndarray):
57-
count_embed = adata.X
58-
elif isinstance(adata.X, pd.Dataframe):
59-
count_embed = adata.X.values
60-
else:
61-
raise ValueError(
62-
f"""\
63-
{type(adata.X)} is not a valid type.
64-
"""
65-
)
66-
else:
67-
count_embed = adata.obsm[use_data]
68-
69-
calculate_weight_matrix(adata, platform=platform)
70-
71-
impute_neighbour(adata, count_embed=count_embed, weights=weights)
72-
73-
imputed_data = adata.obsm["imputed_data"].astype(float)
74-
mask = count_embed != 0
75-
count_embed_ = count_embed.astype(float)
76-
count_embed_[count_embed_ == 0] = np.nan
77-
adjusted_count_matrix = np.nanmean(np.array([count_embed_, imputed_data]), axis=0)
78-
adjusted_count_matrix[mask] = count_embed[mask]
79-
80-
key_added = use_data + "_SME_imputed"
81-
adata.obsm[key_added] = adjusted_count_matrix
82-
83-
print("The data adjusted by SME is added to adata.obsm['" + key_added + "']")
84-
85-
return adata if copy else None
86-
87-
8819
_COPY = Literal["pseudo_spot_adata", "combined_adata"]
8920

9021

@@ -98,9 +29,8 @@ def pseudo_spot(
9829
copy: _COPY = "pseudo_spot_adata",
9930
) -> AnnData | None:
10031
"""\
101-
using spatial location (S), tissue morphological feature (M) and gene
102-
expression (E) information to impute gap between spots and increase resolution
103-
for gene detection
32+
Improve spatial resolution by imputing (creating) new spots from existing ones
33+
using spatial, morphological, and expression (SME) information.
10434
10535
Parameters
10636
----------
@@ -306,9 +236,7 @@ def pseudo_spot(
306236
else:
307237
count_embed = adata.obsm[use_data]
308238

309-
calculate_weight_matrix(
310-
adata, pseudo_spot_adata, pseudo_spots=True, platform=platform
311-
)
239+
weight_matrix_imputed(adata, pseudo_spot_adata, platform=platform)
312240

313241
impute_neighbour(pseudo_spot_adata, count_embed=count_embed, weights=weights)
314242

0 commit comments

Comments
 (0)