From 77e62be13080c364eacd03174301523e860d30e7 Mon Sep 17 00:00:00 2001
From: Andreas Eisenbarth <andreas.eisenbarth@embl.de>
Date: Wed, 6 Nov 2024 22:43:53 +0100
Subject: [PATCH 1/9] Create test cases for corrupted SpatialData files

---
 tests/io/test_partial_read.py | 253 ++++++++++++++++++++++++++++++++++
 1 file changed, 253 insertions(+)
 create mode 100644 tests/io/test_partial_read.py

diff --git a/tests/io/test_partial_read.py b/tests/io/test_partial_read.py
new file mode 100644
index 000000000..b55d13497
--- /dev/null
+++ b/tests/io/test_partial_read.py
@@ -0,0 +1,253 @@
+from __future__ import annotations
+
+import json
+import re
+import shutil
+import tempfile
+from dataclasses import dataclass
+from json import JSONDecodeError
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import numpy as np
+import py
+import pytest
+import zarr
+from pyarrow import ArrowInvalid
+from zarr.errors import ArrayNotFoundError
+
+from spatialdata import SpatialData, read_zarr
+from spatialdata.datasets import blobs
+
+if TYPE_CHECKING:
+    import _pytest.fixtures
+
+
+@pytest.fixture(scope="module")
+def test_case(request: _pytest.fixtures.SubRequest):
+    """
+    Fixture that helps using fixtures as arguments in parametrize.
+
+    The fixture `test_case` takes up values from other fixture functions used as parameters.
+    """
+    fixture_function = request.param
+    fixture_name = fixture_function.__name__
+    return request.getfixturevalue(fixture_name)
+
+
+@dataclass
+class PartialReadTestCase:
+    path: Path
+    expected_elements: list[str]
+    expected_exceptions: type[Exception] | tuple[type[Exception], ...]
+    warnings_patterns: list[str]
+
+
+@pytest.fixture(scope="session")
+def session_tmp_path(request: _pytest.fixtures.SubRequest) -> Path:
+    """
+    Create a temporary directory as a fixture with session scope and deletes it afterward.
+
+    The default tmp_path fixture has function scope and cannot be used as input to session-scoped
+    fixtures.
+    """
+    directory = py.path.local(tempfile.mkdtemp())
+    request.addfinalizer(lambda: directory.remove(rec=1))
+    return Path(directory)
+
+
+@pytest.fixture(scope="module")
+def sdata_with_corrupted_zattrs(session_tmp_path: Path) -> PartialReadTestCase:
+    # .zattrs is a zero-byte file, aborted during write, or contains invalid JSON syntax
+    sdata = blobs()
+    sdata_path = session_tmp_path / "sdata_with_corrupted_zattrs.zarr"
+    sdata.write(sdata_path)
+
+    corrupted_elements = ["blobs_image", "blobs_labels", "blobs_points", "blobs_polygons", "table"]
+    warnings_patterns = []
+    for corrupted_element in corrupted_elements:
+        elem_path = sdata.locate_element(sdata[corrupted_element])[0]
+        (sdata_path / elem_path / ".zattrs").write_bytes(b"")
+        warnings_patterns.append(f"{elem_path}: JSONDecodeError")
+    not_corrupted = [name for _, name, _ in sdata.gen_elements() if name not in corrupted_elements]
+
+    return PartialReadTestCase(
+        path=sdata_path,
+        expected_elements=not_corrupted,
+        expected_exceptions=JSONDecodeError,
+        warnings_patterns=warnings_patterns,
+    )
+
+
+@pytest.fixture(scope="module")
+def sdata_with_corrupted_image_chunks(session_tmp_path: Path) -> PartialReadTestCase:
+    # images/blobs_image/0 is a zero-byte file or aborted during write
+    sdata = blobs()
+    sdata_path = session_tmp_path / "sdata_with_corrupted_image_chunks.zarr"
+    sdata.write(sdata_path)
+
+    corrupted = "blobs_image"
+    shutil.rmtree(sdata_path / "images" / corrupted / "0")
+    (sdata_path / "images" / corrupted / "0").touch()
+    not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]
+
+    return PartialReadTestCase(
+        path=sdata_path,
+        expected_elements=not_corrupted,
+        expected_exceptions=ArrayNotFoundError,
+        warnings_patterns=[rf"images/{corrupted}: ArrayNotFoundError"],
+    )
+
+
+@pytest.fixture(scope="module")
+def sdata_with_corrupted_parquet(session_tmp_path: Path) -> PartialReadTestCase:
+    # points/blobs_points/0 is a zero-byte file or aborted during write
+    sdata = blobs()
+    sdata_path = session_tmp_path / "sdata_with_corrupted_parquet.zarr"
+    sdata.write(sdata_path)
+
+    corrupted = "blobs_points"
+    shutil.rmtree(sdata_path / "points" / corrupted / "points.parquet")
+    (sdata_path / "points" / corrupted / "points.parquet").touch()
+    not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]
+
+    return PartialReadTestCase(
+        path=sdata_path,
+        expected_elements=not_corrupted,
+        expected_exceptions=ArrowInvalid,
+        warnings_patterns=[rf"points/{corrupted}: ArrowInvalid"],
+    )
+
+
+@pytest.fixture(scope="module")
+def sdata_with_missing_zattrs(session_tmp_path: Path) -> PartialReadTestCase:
+    # .zattrs is missing
+    sdata = blobs()
+    sdata_path = session_tmp_path / "sdata_with_missing_zattrs.zarr"
+    sdata.write(sdata_path)
+
+    corrupted = "blobs_image"
+    (sdata_path / "images" / corrupted / ".zattrs").unlink()
+    not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]
+
+    return PartialReadTestCase(
+        path=sdata_path,
+        expected_elements=not_corrupted,
+        expected_exceptions=ValueError,
+        warnings_patterns=[rf"images/{corrupted}: .* Unable to read the NGFF file"],
+    )
+
+
+@pytest.fixture(scope="module")
+def sdata_with_missing_image_chunks(
+    session_tmp_path: Path,
+) -> PartialReadTestCase:
+    # .zattrs exists, but refers to binary array chunks that do not exist
+    sdata = blobs()
+    sdata_path = session_tmp_path / "sdata_with_missing_image_chunks.zarr"
+    sdata.write(sdata_path)
+
+    corrupted = "blobs_image"
+    shutil.rmtree(sdata_path / "images" / corrupted / "0")
+    not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]
+
+    return PartialReadTestCase(
+        path=sdata_path,
+        expected_elements=not_corrupted,
+        expected_exceptions=ArrayNotFoundError,
+        warnings_patterns=[rf"images/{corrupted}: ArrayNotFoundError"],
+    )
+
+
+@pytest.fixture(scope="module")
+def sdata_with_invalid_zattrs_violating_spec(session_tmp_path: Path) -> PartialReadTestCase:
+    # .zattrs contains readable JSON which is not valid for SpatialData/NGFF specs
+    # for example due to a missing/misspelled/renamed key
+    sdata = blobs()
+    sdata_path = session_tmp_path / "sdata_with_invalid_zattrs_violating_spec.zarr"
+    sdata.write(sdata_path)
+
+    corrupted = "blobs_image"
+    json_dict = json.loads((sdata_path / "images" / corrupted / ".zattrs").read_text())
+    del json_dict["multiscales"][0]["coordinateTransformations"]
+    (sdata_path / "images" / corrupted / ".zattrs").write_text(json.dumps(json_dict, indent=4))
+    not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]
+
+    return PartialReadTestCase(
+        path=sdata_path,
+        expected_elements=not_corrupted,
+        expected_exceptions=KeyError,
+        warnings_patterns=[rf"images/{corrupted}: KeyError: coordinateTransformations"],
+    )
+
+
+@pytest.fixture(scope="module")
+def sdata_with_invalid_zattrs_table_region_not_found(session_tmp_path: Path) -> PartialReadTestCase:
+    # table/table/.zarr referring to a region that is not found
+    # This has been emitting just a warning, but does not fail reading the table element.
+    sdata = blobs()
+    sdata_path = session_tmp_path / "sdata_with_invalid_zattrs_table_region_not_found.zarr"
+    sdata.write(sdata_path)
+
+    corrupted = "blobs_labels"
+    # The element data is missing
+    shutil.rmtree(sdata_path / "labels" / corrupted)
+    # But the labels element is referenced as a region in a table
+    regions = zarr.open_group(sdata_path / "tables" / "table" / "obs" / "region", mode="r")
+    assert corrupted in np.asarray(regions.categories)[regions.codes]
+    not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]
+
+    return PartialReadTestCase(
+        path=sdata_path,
+        expected_elements=not_corrupted,
+        expected_exceptions=(),
+        warnings_patterns=[
+            rf"The table is annotating '{re.escape(corrupted)}', which is not present in the SpatialData object"
+        ],
+    )
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        sdata_with_corrupted_zattrs,
+        sdata_with_corrupted_image_chunks,
+        sdata_with_corrupted_parquet,
+        sdata_with_missing_zattrs,
+        sdata_with_missing_image_chunks,
+        sdata_with_invalid_zattrs_violating_spec,
+        sdata_with_invalid_zattrs_table_region_not_found,
+    ],
+    indirect=True,
+)
+def test_read_zarr_with_error(test_case: PartialReadTestCase):
+    # The specific type of exception depends on the read function for the SpatialData element
+    if test_case.expected_exceptions:
+        with pytest.raises(test_case.expected_exceptions):
+            read_zarr(test_case.path, on_bad_files="error")
+    else:
+        read_zarr(test_case.path, on_bad_files="error")
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        sdata_with_corrupted_zattrs,
+        sdata_with_corrupted_image_chunks,
+        sdata_with_corrupted_parquet,
+        sdata_with_missing_zattrs,
+        sdata_with_missing_image_chunks,
+        sdata_with_invalid_zattrs_violating_spec,
+        sdata_with_invalid_zattrs_table_region_not_found,
+    ],
+    indirect=True,
+)
+def test_read_zarr_with_warnings(test_case: PartialReadTestCase):
+    with pytest.warns(UserWarning) as all_warnings:
+        actual: SpatialData = read_zarr(test_case.path, on_bad_files="warn")
+
+    for warning_pattern, warning_record in zip(test_case.warnings_patterns, all_warnings):
+        assert re.search(warning_pattern, warning_record.message.args[0])
+
+    for elem in test_case.expected_elements:
+        assert elem in actual

From 23e3558ce289409df9c302b554137c103c1e3b7c Mon Sep 17 00:00:00 2001
From: Andreas Eisenbarth <andreas.eisenbarth@embl.de>
Date: Wed, 6 Nov 2024 23:04:08 +0100
Subject: [PATCH 2/9] Create context manager to conditionally convert errors to
 warnings

---
 src/spatialdata/_io/_utils.py | 61 ++++++++++++++++++++++++++++++++++-
 tests/io/test_utils.py        | 21 +++++++++++-
 2 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py
index 18b86f1a4..e3f4abdd2 100644
--- a/src/spatialdata/_io/_utils.py
+++ b/src/spatialdata/_io/_utils.py
@@ -4,13 +4,16 @@
 import logging
 import os.path
 import re
+import sys
 import tempfile
+import traceback
 import warnings
 from collections.abc import Generator, Mapping, Sequence
 from contextlib import contextmanager
+from enum import Enum
 from functools import singledispatch
 from pathlib import Path
-from typing import Any
+from typing import Any, Literal
 
 import zarr
 from anndata import AnnData
@@ -364,3 +367,59 @@ def save_transformations(sdata: SpatialData) -> None:
         stacklevel=2,
     )
     sdata.write_transformations()
+
+
+class BadFileHandleMethod(Enum):
+    ERROR = "error"
+    WARN = "warn"
+
+
+@contextmanager
+def handle_read_errors(
+    on_bad_files: Literal[BadFileHandleMethod.ERROR, BadFileHandleMethod.WARN],
+    location: str,
+    exc_types: tuple[type[Exception], ...],
+) -> Generator[None, None, None]:
+    """
+    Handle read errors according to parameter `on_bad_files`.
+
+    Parameters
+    ----------
+    on_bad_files
+        Specifies what to do upon encountering an exception.
+        Allowed values are :
+
+        - 'error', let the exception be raised.
+        - 'warn', convert the exception into a warning if it is one of the expected exception types.
+    location
+        String identifying the function call where the exception happened
+    exc_types
+        A tuple of expected exception classes that should be converted into warnings.
+
+    Raises
+    ------
+    If `on_bad_files="error"`, all encountered exceptions are raised.
+    If `on_bad_files="warn"`, any encountered exceptions not matching the `exc_types` are raised.
+    """
+    on_bad_files = BadFileHandleMethod(on_bad_files)  # str to enum
+    if on_bad_files == BadFileHandleMethod.WARN:
+        try:
+            yield
+        except exc_types as e:
+            # Extract the original filename and line number from the exception and
+            # create a warning from it.
+            exc_traceback = sys.exc_info()[-1]
+            last_frame, lineno = list(traceback.walk_tb(exc_traceback))[-1]
+            filename = last_frame.f_code.co_filename
+            # Include the location (element path) in the warning message.
+            message = f"{location}: {e.__class__.__name__}: {e.args[0]}"
+            warnings.warn_explicit(
+                message=message,
+                category=UserWarning,
+                filename=filename,
+                lineno=lineno,
+            )
+            # continue
+    else:  # on_bad_files == BadFileHandleMethod.ERROR
+        # Let it raise exceptions
+        yield
diff --git a/tests/io/test_utils.py b/tests/io/test_utils.py
index 31346528e..f9778f5c7 100644
--- a/tests/io/test_utils.py
+++ b/tests/io/test_utils.py
@@ -1,10 +1,12 @@
 import os
 import tempfile
+from contextlib import nullcontext
 
 import dask.dataframe as dd
+import pytest
 
 from spatialdata import read_zarr
-from spatialdata._io._utils import get_dask_backing_files
+from spatialdata._io._utils import get_dask_backing_files, handle_read_errors
 
 
 def test_backing_files_points(points):
@@ -118,3 +120,20 @@ def test_backing_files_combining_points_and_images(points, images):
             os.path.realpath(os.path.join(f1, "images/image2d")),
         ]
         assert set(files) == set(expected_zarr_locations_old) or set(files) == set(expected_zarr_locations_new)
+
+
+@pytest.mark.parametrize(
+    ("on_bad_files", "actual_error", "expectation"),
+    [
+        ("error", None, nullcontext()),
+        ("error", KeyError("key"), pytest.raises(KeyError)),
+        ("warn", None, nullcontext()),
+        ("warn", KeyError("key"), pytest.warns(UserWarning, match="location: KeyError")),
+        ("warn", RuntimeError("unhandled"), pytest.raises(RuntimeError)),
+    ],
+)
+def test_handle_read_errors(on_bad_files: str, actual_error: Exception, expectation):
+    with expectation:  # noqa: SIM117
+        with handle_read_errors(on_bad_files=on_bad_files, location="location", exc_types=KeyError):
+            if actual_error is not None:
+                raise actual_error

From 7dcb70b2b3c31f36e98095c0eaa5b8cc3dcd0064 Mon Sep 17 00:00:00 2001
From: Andreas Eisenbarth <andreas.eisenbarth@embl.de>
Date: Wed, 6 Nov 2024 23:06:18 +0100
Subject: [PATCH 3/9] Wrap element readers into context manager to continue
 reading despite errors

---
 src/spatialdata/_io/io_table.py | 65 +++++++++++++++++-----------
 src/spatialdata/_io/io_zarr.py  | 75 +++++++++++++++++++++++++++------
 2 files changed, 101 insertions(+), 39 deletions(-)

diff --git a/src/spatialdata/_io/io_table.py b/src/spatialdata/_io/io_table.py
index 55136da12..023129d50 100644
--- a/src/spatialdata/_io/io_table.py
+++ b/src/spatialdata/_io/io_table.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 import os
+from json import JSONDecodeError
+from typing import Literal
 
 import numpy as np
 import zarr
@@ -8,14 +10,20 @@
 from anndata import read_zarr as read_anndata_zarr
 from anndata._io.specs import write_elem as write_adata
 from ome_zarr.format import Format
+from zarr.errors import ArrayNotFoundError
 
+from spatialdata._io._utils import BadFileHandleMethod, handle_read_errors
 from spatialdata._io.format import CurrentTablesFormat, TablesFormats, _parse_version
 from spatialdata._logging import logger
 from spatialdata.models import TableModel
 
 
 def _read_table(
-    zarr_store_path: str, group: zarr.Group, subgroup: zarr.Group, tables: dict[str, AnnData]
+    zarr_store_path: str,
+    group: zarr.Group,
+    subgroup: zarr.Group,
+    tables: dict[str, AnnData],
+    on_bad_files: Literal[BadFileHandleMethod.ERROR, BadFileHandleMethod.WARN] = BadFileHandleMethod.ERROR,
 ) -> dict[str, AnnData]:
     """
     Read in tables in the tables Zarr.group of a SpatialData Zarr store.
@@ -30,6 +38,8 @@ def _read_table(
         The subgroup containing the tables.
     tables
         A dictionary of tables.
+    on_bad_files
+        Specifies what to do upon encountering a bad file, e.g. corrupted, invalid or missing files.
 
     Returns
     -------
@@ -40,33 +50,38 @@ def _read_table(
         f_elem = subgroup[table_name]
         f_elem_store = os.path.join(zarr_store_path, f_elem.path)
 
-        tables[table_name] = read_anndata_zarr(f_elem_store)
+        with handle_read_errors(
+            on_bad_files=on_bad_files,
+            location=f"{subgroup.path}/{table_name}",
+            exc_types=(JSONDecodeError, KeyError, ValueError, ArrayNotFoundError),
+        ):
+            tables[table_name] = read_anndata_zarr(f_elem_store)
 
-        f = zarr.open(f_elem_store, mode="r")
-        version = _parse_version(f, expect_attrs_key=False)
-        assert version is not None
-        # since have just one table format, we currently read it but do not use it; if we ever change the format
-        # we can rename the two _ to format and implement the per-format read logic (as we do for shapes)
-        _ = TablesFormats[version]
-        f.store.close()
+            f = zarr.open(f_elem_store, mode="r")
+            version = _parse_version(f, expect_attrs_key=False)
+            assert version is not None
+            # since have just one table format, we currently read it but do not use it; if we ever change the format
+            # we can rename the two _ to format and implement the per-format read logic (as we do for shapes)
+            _ = TablesFormats[version]
+            f.store.close()
 
-        # # replace with format from above
-        # version = "0.1"
-        # format = TablesFormats[version]
-        if TableModel.ATTRS_KEY in tables[table_name].uns:
-            # fill out eventual missing attributes that has been omitted because their value was None
-            attrs = tables[table_name].uns[TableModel.ATTRS_KEY]
-            if "region" not in attrs:
-                attrs["region"] = None
-            if "region_key" not in attrs:
-                attrs["region_key"] = None
-            if "instance_key" not in attrs:
-                attrs["instance_key"] = None
-            # fix type for region
-            if "region" in attrs and isinstance(attrs["region"], np.ndarray):
-                attrs["region"] = attrs["region"].tolist()
+            # # replace with format from above
+            # version = "0.1"
+            # format = TablesFormats[version]
+            if TableModel.ATTRS_KEY in tables[table_name].uns:
+                # fill out eventual missing attributes that has been omitted because their value was None
+                attrs = tables[table_name].uns[TableModel.ATTRS_KEY]
+                if "region" not in attrs:
+                    attrs["region"] = None
+                if "region_key" not in attrs:
+                    attrs["region_key"] = None
+                if "instance_key" not in attrs:
+                    attrs["instance_key"] = None
+                # fix type for region
+                if "region" in attrs and isinstance(attrs["region"], np.ndarray):
+                    attrs["region"] = attrs["region"].tolist()
 
-        count += 1
+            count += 1
 
     logger.debug(f"Found {count} elements in {subgroup}")
     return tables
diff --git a/src/spatialdata/_io/io_zarr.py b/src/spatialdata/_io/io_zarr.py
index bb14fbc83..9c55f8049 100644
--- a/src/spatialdata/_io/io_zarr.py
+++ b/src/spatialdata/_io/io_zarr.py
@@ -1,14 +1,17 @@
 import logging
 import os
 import warnings
+from json import JSONDecodeError
 from pathlib import Path
-from typing import Optional, Union
+from typing import Literal, Optional, Union
 
 import zarr
 from anndata import AnnData
+from pyarrow import ArrowInvalid
+from zarr.errors import ArrayNotFoundError
 
 from spatialdata._core.spatialdata import SpatialData
-from spatialdata._io._utils import ome_zarr_logger
+from spatialdata._io._utils import BadFileHandleMethod, handle_read_errors, ome_zarr_logger
 from spatialdata._io.io_points import _read_points
 from spatialdata._io.io_raster import _read_multiscale
 from spatialdata._io.io_shapes import _read_shapes
@@ -37,7 +40,11 @@ def _open_zarr_store(store: Union[str, Path, zarr.Group]) -> tuple[zarr.Group, s
     return f, f_store_path
 
 
-def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str]] = None) -> SpatialData:
+def read_zarr(
+    store: Union[str, Path, zarr.Group],
+    selection: Optional[tuple[str]] = None,
+    on_bad_files: Literal[BadFileHandleMethod.ERROR, BadFileHandleMethod.WARN] = BadFileHandleMethod.ERROR,
+) -> SpatialData:
     """
     Read a SpatialData dataset from a zarr store (on-disk or remote).
 
@@ -50,6 +57,16 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
         List of elements to read from the zarr store (images, labels, points, shapes, table). If None, all elements are
         read.
 
+    on_bad_files
+        Specifies what to do upon encountering a bad file, e.g. corrupted, invalid or missing files.
+        Allowed values are :
+
+        - 'error', raise an exception when a bad file is encountered. Reading aborts immediately
+          with an error.
+        - 'warn', raise a warning when a bad file is encountered and skip that file. A SpatialData
+          object is returned containing only elements that could be read. Failures can only be
+          determined from the warnings.
+
     Returns
     -------
     A SpatialData object.
@@ -76,9 +93,19 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
                 continue
             f_elem = group[subgroup_name]
             f_elem_store = os.path.join(f_store_path, f_elem.path)
-            element = _read_multiscale(f_elem_store, raster_type="image")
-            images[subgroup_name] = element
-            count += 1
+            with handle_read_errors(
+                on_bad_files,
+                location=f"{group.path}/{subgroup_name}",
+                exc_types=(
+                    JSONDecodeError,  # JSON parse error
+                    ValueError,  # ome_zarr: Unable to read the NGFF file
+                    KeyError,  # Missing JSON key
+                    ArrayNotFoundError,  # Image chunks missing
+                ),
+            ):
+                element = _read_multiscale(f_elem_store, raster_type="image")
+                images[subgroup_name] = element
+                count += 1
         logger.debug(f"Found {count} elements in {group}")
 
     # read multiscale labels
@@ -92,8 +119,13 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
                     continue
                 f_elem = group[subgroup_name]
                 f_elem_store = os.path.join(f_store_path, f_elem.path)
-                labels[subgroup_name] = _read_multiscale(f_elem_store, raster_type="labels")
-                count += 1
+                with handle_read_errors(
+                    on_bad_files,
+                    location=f"{group.path}/{subgroup_name}",
+                    exc_types=(JSONDecodeError, KeyError, ValueError, ArrayNotFoundError),
+                ):
+                    labels[subgroup_name] = _read_multiscale(f_elem_store, raster_type="labels")
+                    count += 1
             logger.debug(f"Found {count} elements in {group}")
 
     # now read rest of the data
@@ -106,8 +138,13 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
                 # skip hidden files like .zgroup or .zmetadata
                 continue
             f_elem_store = os.path.join(f_store_path, f_elem.path)
-            points[subgroup_name] = _read_points(f_elem_store)
-            count += 1
+            with handle_read_errors(
+                on_bad_files,
+                location=f"{group.path}/{subgroup_name}",
+                exc_types=(JSONDecodeError, KeyError, ArrowInvalid),
+            ):
+                points[subgroup_name] = _read_points(f_elem_store)
+                count += 1
         logger.debug(f"Found {count} elements in {group}")
 
     if "shapes" in selector and "shapes" in f:
@@ -119,12 +156,22 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
                 continue
             f_elem = group[subgroup_name]
             f_elem_store = os.path.join(f_store_path, f_elem.path)
-            shapes[subgroup_name] = _read_shapes(f_elem_store)
-            count += 1
+            with handle_read_errors(
+                on_bad_files,
+                location=f"{group.path}/{subgroup_name}",
+                exc_types=(
+                    JSONDecodeError,
+                    ValueError,
+                    KeyError,
+                    ArrayNotFoundError,
+                ),
+            ):
+                shapes[subgroup_name] = _read_shapes(f_elem_store)
+                count += 1
         logger.debug(f"Found {count} elements in {group}")
     if "tables" in selector and "tables" in f:
         group = f["tables"]
-        tables = _read_table(f_store_path, f, group, tables)
+        tables = _read_table(f_store_path, f, group, tables, on_bad_files=on_bad_files)
 
     if "table" in selector and "table" in f:
         warnings.warn(
@@ -135,7 +182,7 @@ def read_zarr(store: Union[str, Path, zarr.Group], selection: Optional[tuple[str
         )
         subgroup_name = "table"
         group = f[subgroup_name]
-        tables = _read_table(f_store_path, f, group, tables)
+        tables = _read_table(f_store_path, f, group, tables, on_bad_files=on_bad_files)
 
         logger.debug(f"Found {count} elements in {group}")
 

From c2eac30fbbba8833308685ddb33ada908a90dac5 Mon Sep 17 00:00:00 2001
From: Andreas Eisenbarth <andreas.eisenbarth@embl.de>
Date: Wed, 6 Nov 2024 23:12:34 +0100
Subject: [PATCH 4/9] Update change log

---
 CHANGELOG.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b02d76702..f9edfacbc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,12 @@ and this project adheres to [Semantic Versioning][].
 [keep a changelog]: https://keepachangelog.com/en/1.0.0/
 [semantic versioning]: https://semver.org/spec/v2.0.0.html
 
+## [0.x.x] - xxxx-xx-xx
+
+### Minor
+
+-   Added option `on_bad_files="warn"` to `read_zarr` for partially reading corrupted datasets #457
+
 ## [0.2.5] - 2024-06-11
 
 ### Fixed

From 723b51734d7259ef1d2207c6da505ec393e9abbe Mon Sep 17 00:00:00 2001
From: Andreas Eisenbarth <andreas.eisenbarth@embl.de>
Date: Fri, 8 Nov 2024 15:29:45 +0100
Subject: [PATCH 5/9] Fix tests for Python 3.12

---
 src/spatialdata/_io/io_zarr.py |  3 ++-
 tests/io/test_partial_read.py  | 49 +++++++++++++++++++++++++++-------
 2 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/src/spatialdata/_io/io_zarr.py b/src/spatialdata/_io/io_zarr.py
index 9c55f8049..4420240d5 100644
--- a/src/spatialdata/_io/io_zarr.py
+++ b/src/spatialdata/_io/io_zarr.py
@@ -101,6 +101,7 @@ def read_zarr(
                     ValueError,  # ome_zarr: Unable to read the NGFF file
                     KeyError,  # Missing JSON key
                     ArrayNotFoundError,  # Image chunks missing
+                    TypeError,  # instead of ArrayNotFoundError, with dask>=2024.10.0 zarr<=2.18.3
                 ),
             ):
                 element = _read_multiscale(f_elem_store, raster_type="image")
@@ -122,7 +123,7 @@ def read_zarr(
                 with handle_read_errors(
                     on_bad_files,
                     location=f"{group.path}/{subgroup_name}",
-                    exc_types=(JSONDecodeError, KeyError, ValueError, ArrayNotFoundError),
+                    exc_types=(JSONDecodeError, KeyError, ValueError, ArrayNotFoundError, TypeError),
                 ):
                     labels[subgroup_name] = _read_multiscale(f_elem_store, raster_type="labels")
                     count += 1
diff --git a/tests/io/test_partial_read.py b/tests/io/test_partial_read.py
index b55d13497..64e57793b 100644
--- a/tests/io/test_partial_read.py
+++ b/tests/io/test_partial_read.py
@@ -4,6 +4,8 @@
 import re
 import shutil
 import tempfile
+from collections.abc import Generator, Iterable
+from contextlib import contextmanager
 from dataclasses import dataclass
 from json import JSONDecodeError
 from pathlib import Path
@@ -23,10 +25,36 @@
     import _pytest.fixtures
 
 
+@contextmanager
+def pytest_warns_multiple(
+    expected_warning: type[Warning] | tuple[type[Warning], ...] = Warning, matches: Iterable[str] = ()
+) -> Generator[None, None, None]:
+    """
+    Assert that code raises a warnings matching particular patterns.
+
+    Like `pytest.warns`, but with multiple patterns which each must match a warning.
+
+    Parameters
+    ----------
+    expected_warning
+        A warning class or a tuple of warning classes for which at least one matching warning must be found
+    matches
+        Regular expression patterns that of which each must be found in at least one warning message.
+    """
+    if not matches:
+        yield
+    else:
+        with (
+            pytest.warns(expected_warning, match=matches[0]),
+            pytest_warns_multiple(expected_warning, matches=matches[1:]),
+        ):
+            yield
+
+
 @pytest.fixture(scope="module")
 def test_case(request: _pytest.fixtures.SubRequest):
     """
-    Fixture that helps using fixtures as arguments in parametrize.
+    Fixture that helps to use fixtures as arguments in parametrize.
 
     The fixture `test_case` takes up values from other fixture functions used as parameters.
     """
@@ -94,8 +122,11 @@ def sdata_with_corrupted_image_chunks(session_tmp_path: Path) -> PartialReadTest
     return PartialReadTestCase(
         path=sdata_path,
         expected_elements=not_corrupted,
-        expected_exceptions=ArrayNotFoundError,
-        warnings_patterns=[rf"images/{corrupted}: ArrayNotFoundError"],
+        expected_exceptions=(
+            ArrayNotFoundError,
+            TypeError,  # instead of ArrayNotFoundError, with dask>=2024.10.0 zarr<=2.18.3
+        ),
+        warnings_patterns=[rf"images/{corrupted}: (ArrayNotFoundError|TypeError)"],
     )
 
 
@@ -154,8 +185,11 @@ def sdata_with_missing_image_chunks(
     return PartialReadTestCase(
         path=sdata_path,
         expected_elements=not_corrupted,
-        expected_exceptions=ArrayNotFoundError,
-        warnings_patterns=[rf"images/{corrupted}: ArrayNotFoundError"],
+        expected_exceptions=(
+            ArrayNotFoundError,
+            TypeError,  # instead of ArrayNotFoundError, with dask>=2024.10.0 zarr<=2.18.3
+        ),
+        warnings_patterns=[rf"images/{corrupted}: (ArrayNotFoundError|TypeError)"],
     )
 
 
@@ -243,11 +277,8 @@ def test_read_zarr_with_error(test_case: PartialReadTestCase):
     indirect=True,
 )
 def test_read_zarr_with_warnings(test_case: PartialReadTestCase):
-    with pytest.warns(UserWarning) as all_warnings:
+    with pytest_warns_multiple(UserWarning, matches=test_case.warnings_patterns):
         actual: SpatialData = read_zarr(test_case.path, on_bad_files="warn")
 
-    for warning_pattern, warning_record in zip(test_case.warnings_patterns, all_warnings):
-        assert re.search(warning_pattern, warning_record.message.args[0])
-
     for elem in test_case.expected_elements:
         assert elem in actual

From f30fdc2b583a0f99582c56b18af6c391ad057f16 Mon Sep 17 00:00:00 2001
From: Andreas Eisenbarth <andreas.eisenbarth@embl.de>
Date: Fri, 8 Nov 2024 15:36:31 +0100
Subject: [PATCH 6/9] Handle corruption of element type directories ("images/")

---
 src/spatialdata/_io/io_zarr.py | 174 +++++++++++++++++++--------------
 tests/io/test_partial_read.py  |  24 ++++-
 2 files changed, 125 insertions(+), 73 deletions(-)

diff --git a/src/spatialdata/_io/io_zarr.py b/src/spatialdata/_io/io_zarr.py
index 4420240d5..59f3e25ce 100644
--- a/src/spatialdata/_io/io_zarr.py
+++ b/src/spatialdata/_io/io_zarr.py
@@ -8,7 +8,7 @@
 import zarr
 from anndata import AnnData
 from pyarrow import ArrowInvalid
-from zarr.errors import ArrayNotFoundError
+from zarr.errors import ArrayNotFoundError, MetadataError
 
 from spatialdata._core.spatialdata import SpatialData
 from spatialdata._io._utils import BadFileHandleMethod, handle_read_errors, ome_zarr_logger
@@ -85,34 +85,12 @@ def read_zarr(
 
     # read multiscale images
     if "images" in selector and "images" in f:
-        group = f["images"]
-        count = 0
-        for subgroup_name in group:
-            if Path(subgroup_name).name.startswith("."):
-                # skip hidden files like .zgroup or .zmetadata
-                continue
-            f_elem = group[subgroup_name]
-            f_elem_store = os.path.join(f_store_path, f_elem.path)
-            with handle_read_errors(
-                on_bad_files,
-                location=f"{group.path}/{subgroup_name}",
-                exc_types=(
-                    JSONDecodeError,  # JSON parse error
-                    ValueError,  # ome_zarr: Unable to read the NGFF file
-                    KeyError,  # Missing JSON key
-                    ArrayNotFoundError,  # Image chunks missing
-                    TypeError,  # instead of ArrayNotFoundError, with dask>=2024.10.0 zarr<=2.18.3
-                ),
-            ):
-                element = _read_multiscale(f_elem_store, raster_type="image")
-                images[subgroup_name] = element
-                count += 1
-        logger.debug(f"Found {count} elements in {group}")
-
-    # read multiscale labels
-    with ome_zarr_logger(logging.ERROR):
-        if "labels" in selector and "labels" in f:
-            group = f["labels"]
+        with handle_read_errors(
+            on_bad_files,
+            location="images",
+            exc_types=(JSONDecodeError, MetadataError),
+        ):
+            group = f["images"]
             count = 0
             for subgroup_name in group:
                 if Path(subgroup_name).name.startswith("."):
@@ -123,56 +101,103 @@ def read_zarr(
                 with handle_read_errors(
                     on_bad_files,
                     location=f"{group.path}/{subgroup_name}",
-                    exc_types=(JSONDecodeError, KeyError, ValueError, ArrayNotFoundError, TypeError),
+                    exc_types=(
+                        JSONDecodeError,  # JSON parse error
+                        ValueError,  # ome_zarr: Unable to read the NGFF file
+                        KeyError,  # Missing JSON key
+                        ArrayNotFoundError,  # Image chunks missing
+                        TypeError,  # instead of ArrayNotFoundError, with dask>=2024.10.0 zarr<=2.18.3
+                    ),
                 ):
-                    labels[subgroup_name] = _read_multiscale(f_elem_store, raster_type="labels")
+                    element = _read_multiscale(f_elem_store, raster_type="image")
+                    images[subgroup_name] = element
                     count += 1
             logger.debug(f"Found {count} elements in {group}")
 
-    # now read rest of the data
-    if "points" in selector and "points" in f:
-        group = f["points"]
-        count = 0
-        for subgroup_name in group:
-            f_elem = group[subgroup_name]
-            if Path(subgroup_name).name.startswith("."):
-                # skip hidden files like .zgroup or .zmetadata
-                continue
-            f_elem_store = os.path.join(f_store_path, f_elem.path)
+    # read multiscale labels
+    with ome_zarr_logger(logging.ERROR):
+        if "labels" in selector and "labels" in f:
             with handle_read_errors(
                 on_bad_files,
-                location=f"{group.path}/{subgroup_name}",
-                exc_types=(JSONDecodeError, KeyError, ArrowInvalid),
+                location="labels",
+                exc_types=(JSONDecodeError, MetadataError),
             ):
-                points[subgroup_name] = _read_points(f_elem_store)
-                count += 1
-        logger.debug(f"Found {count} elements in {group}")
+                group = f["labels"]
+                count = 0
+                for subgroup_name in group:
+                    if Path(subgroup_name).name.startswith("."):
+                        # skip hidden files like .zgroup or .zmetadata
+                        continue
+                    f_elem = group[subgroup_name]
+                    f_elem_store = os.path.join(f_store_path, f_elem.path)
+                    with handle_read_errors(
+                        on_bad_files,
+                        location=f"{group.path}/{subgroup_name}",
+                        exc_types=(JSONDecodeError, KeyError, ValueError, ArrayNotFoundError, TypeError),
+                    ):
+                        labels[subgroup_name] = _read_multiscale(f_elem_store, raster_type="labels")
+                        count += 1
+                logger.debug(f"Found {count} elements in {group}")
+
+    # now read rest of the data
+    if "points" in selector and "points" in f:
+        with handle_read_errors(
+            on_bad_files,
+            location="points",
+            exc_types=(JSONDecodeError, MetadataError),
+        ):
+            group = f["points"]
+            count = 0
+            for subgroup_name in group:
+                f_elem = group[subgroup_name]
+                if Path(subgroup_name).name.startswith("."):
+                    # skip hidden files like .zgroup or .zmetadata
+                    continue
+                f_elem_store = os.path.join(f_store_path, f_elem.path)
+                with handle_read_errors(
+                    on_bad_files,
+                    location=f"{group.path}/{subgroup_name}",
+                    exc_types=(JSONDecodeError, KeyError, ArrowInvalid),
+                ):
+                    points[subgroup_name] = _read_points(f_elem_store)
+                    count += 1
+            logger.debug(f"Found {count} elements in {group}")
 
     if "shapes" in selector and "shapes" in f:
-        group = f["shapes"]
-        count = 0
-        for subgroup_name in group:
-            if Path(subgroup_name).name.startswith("."):
-                # skip hidden files like .zgroup or .zmetadata
-                continue
-            f_elem = group[subgroup_name]
-            f_elem_store = os.path.join(f_store_path, f_elem.path)
-            with handle_read_errors(
-                on_bad_files,
-                location=f"{group.path}/{subgroup_name}",
-                exc_types=(
-                    JSONDecodeError,
-                    ValueError,
-                    KeyError,
-                    ArrayNotFoundError,
-                ),
-            ):
-                shapes[subgroup_name] = _read_shapes(f_elem_store)
-                count += 1
-        logger.debug(f"Found {count} elements in {group}")
+        with handle_read_errors(
+            on_bad_files,
+            location="shapes",
+            exc_types=(JSONDecodeError, MetadataError),
+        ):
+            group = f["shapes"]
+            count = 0
+            for subgroup_name in group:
+                if Path(subgroup_name).name.startswith("."):
+                    # skip hidden files like .zgroup or .zmetadata
+                    continue
+                f_elem = group[subgroup_name]
+                f_elem_store = os.path.join(f_store_path, f_elem.path)
+                with handle_read_errors(
+                    on_bad_files,
+                    location=f"{group.path}/{subgroup_name}",
+                    exc_types=(
+                        JSONDecodeError,
+                        ValueError,
+                        KeyError,
+                        ArrayNotFoundError,
+                    ),
+                ):
+                    shapes[subgroup_name] = _read_shapes(f_elem_store)
+                    count += 1
+            logger.debug(f"Found {count} elements in {group}")
     if "tables" in selector and "tables" in f:
-        group = f["tables"]
-        tables = _read_table(f_store_path, f, group, tables, on_bad_files=on_bad_files)
+        with handle_read_errors(
+            on_bad_files,
+            location="tables",
+            exc_types=(JSONDecodeError, MetadataError),
+        ):
+            group = f["tables"]
+            tables = _read_table(f_store_path, f, group, tables, on_bad_files=on_bad_files)
 
     if "table" in selector and "table" in f:
         warnings.warn(
@@ -182,10 +207,15 @@ def read_zarr(
             stacklevel=2,
         )
         subgroup_name = "table"
-        group = f[subgroup_name]
-        tables = _read_table(f_store_path, f, group, tables, on_bad_files=on_bad_files)
+        with handle_read_errors(
+            on_bad_files,
+            location=subgroup_name,
+            exc_types=(JSONDecodeError, MetadataError),
+        ):
+            group = f[subgroup_name]
+            tables = _read_table(f_store_path, f, group, tables, on_bad_files=on_bad_files)
 
-        logger.debug(f"Found {count} elements in {group}")
+            logger.debug(f"Found {count} elements in {group}")
 
     sdata = SpatialData(
         images=images,
diff --git a/tests/io/test_partial_read.py b/tests/io/test_partial_read.py
index 64e57793b..a8386b565 100644
--- a/tests/io/test_partial_read.py
+++ b/tests/io/test_partial_read.py
@@ -16,7 +16,7 @@
 import pytest
 import zarr
 from pyarrow import ArrowInvalid
-from zarr.errors import ArrayNotFoundError
+from zarr.errors import ArrayNotFoundError, MetadataError
 
 from spatialdata import SpatialData, read_zarr
 from spatialdata.datasets import blobs
@@ -84,6 +84,26 @@ def session_tmp_path(request: _pytest.fixtures.SubRequest) -> Path:
     return Path(directory)
 
 
+@pytest.fixture(scope="module")
+def sdata_with_corrupted_elem_type_zgroup(session_tmp_path: Path) -> PartialReadTestCase:
+    # .zattrs is a zero-byte file, aborted during write, or contains invalid JSON syntax
+    sdata = blobs()
+    sdata_path = session_tmp_path / "sdata_with_corrupted_top_level_zgroup.zarr"
+    sdata.write(sdata_path)
+
+    (sdata_path / "images" / ".zgroup").unlink()  # missing, not detected by reader
+    (sdata_path / "labels" / ".zgroup").write_text("")  # corrupted
+    (sdata_path / "points" / ".zgroup").write_text("{}")  # invalid
+    not_corrupted = [name for t, name, _ in sdata.gen_elements() if t not in ("images", "labels", "points")]
+
+    return PartialReadTestCase(
+        path=sdata_path,
+        expected_elements=not_corrupted,
+        expected_exceptions=(JSONDecodeError, MetadataError),
+        warnings_patterns=["labels: JSONDecodeError", "points: MetadataError"],
+    )
+
+
 @pytest.fixture(scope="module")
 def sdata_with_corrupted_zattrs(session_tmp_path: Path) -> PartialReadTestCase:
     # .zattrs is a zero-byte file, aborted during write, or contains invalid JSON syntax
@@ -251,6 +271,7 @@ def sdata_with_invalid_zattrs_table_region_not_found(session_tmp_path: Path) ->
         sdata_with_missing_image_chunks,
         sdata_with_invalid_zattrs_violating_spec,
         sdata_with_invalid_zattrs_table_region_not_found,
+        sdata_with_corrupted_elem_type_zgroup,
     ],
     indirect=True,
 )
@@ -273,6 +294,7 @@ def test_read_zarr_with_error(test_case: PartialReadTestCase):
         sdata_with_missing_image_chunks,
         sdata_with_invalid_zattrs_violating_spec,
         sdata_with_invalid_zattrs_table_region_not_found,
+        sdata_with_corrupted_elem_type_zgroup,
     ],
     indirect=True,
 )

From 109af39128059d45005d45fd769e5cfedd2c47de Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Mon, 20 Jan 2025 16:42:53 +0100
Subject: [PATCH 7/9] update notebooks

---
 docs/tutorials/notebooks | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks
index 3663b4bf0..9049252ec 160000
--- a/docs/tutorials/notebooks
+++ b/docs/tutorials/notebooks
@@ -1 +1 @@
-Subproject commit 3663b4bf05d8d2c79b77f2db226338922705def4
+Subproject commit 9049252ecdd48ad077e40f9f434db4c3fb078f22

From c357f9249172f4d428e0b07fa00cd282af112fef Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Fri, 31 Jan 2025 15:51:07 +0100
Subject: [PATCH 8/9] update notebooks

---
 docs/tutorials/notebooks | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks
index 9049252ec..49ad4ef72 160000
--- a/docs/tutorials/notebooks
+++ b/docs/tutorials/notebooks
@@ -1 +1 @@
-Subproject commit 9049252ecdd48ad077e40f9f434db4c3fb078f22
+Subproject commit 49ad4ef723f65ed56886c14bea242332f9244d8a

From 56583db10ee7738d0bcad4003f6ac075d937bb58 Mon Sep 17 00:00:00 2001
From: Luca Marconato <m.lucalmer@gmail.com>
Date: Fri, 31 Jan 2025 15:52:47 +0100
Subject: [PATCH 9/9] minor adjustments after code review

---
 tests/io/test_partial_read.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/tests/io/test_partial_read.py b/tests/io/test_partial_read.py
index a8386b565..7c7cdbfa2 100644
--- a/tests/io/test_partial_read.py
+++ b/tests/io/test_partial_read.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
 import json
+import os
 import re
-import shutil
 import tempfile
 from collections.abc import Generator, Iterable
 from contextlib import contextmanager
@@ -91,7 +91,8 @@ def sdata_with_corrupted_elem_type_zgroup(session_tmp_path: Path) -> PartialRead
     sdata_path = session_tmp_path / "sdata_with_corrupted_top_level_zgroup.zarr"
     sdata.write(sdata_path)
 
-    (sdata_path / "images" / ".zgroup").unlink()  # missing, not detected by reader
+    (sdata_path / "images" / ".zgroup").unlink()  # missing, not detected by reader. So it doesn't raise an exception,
+    # but it will not be found in the read SpatialData object
     (sdata_path / "labels" / ".zgroup").write_text("")  # corrupted
     (sdata_path / "points" / ".zgroup").write_text("{}")  # invalid
     not_corrupted = [name for t, name, _ in sdata.gen_elements() if t not in ("images", "labels", "points")]
@@ -135,8 +136,10 @@ def sdata_with_corrupted_image_chunks(session_tmp_path: Path) -> PartialReadTest
     sdata.write(sdata_path)
 
     corrupted = "blobs_image"
-    shutil.rmtree(sdata_path / "images" / corrupted / "0")
+    os.unlink(sdata_path / "images" / corrupted / "0" / ".zarray")  # it will hide the "0" array from the Zarr reader
+    os.rename(sdata_path / "images" / corrupted / "0", sdata_path / "images" / corrupted / "0_corrupted")
     (sdata_path / "images" / corrupted / "0").touch()
+
     not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]
 
     return PartialReadTestCase(
@@ -158,8 +161,12 @@ def sdata_with_corrupted_parquet(session_tmp_path: Path) -> PartialReadTestCase:
     sdata.write(sdata_path)
 
     corrupted = "blobs_points"
-    shutil.rmtree(sdata_path / "points" / corrupted / "points.parquet")
+    os.rename(
+        sdata_path / "points" / corrupted / "points.parquet",
+        sdata_path / "points" / corrupted / "points_corrupted.parquet",
+    )
     (sdata_path / "points" / corrupted / "points.parquet").touch()
+
     not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]
 
     return PartialReadTestCase(
@@ -199,7 +206,9 @@ def sdata_with_missing_image_chunks(
     sdata.write(sdata_path)
 
     corrupted = "blobs_image"
-    shutil.rmtree(sdata_path / "images" / corrupted / "0")
+    os.unlink(sdata_path / "images" / corrupted / "0" / ".zarray")
+    os.rename(sdata_path / "images" / corrupted / "0", sdata_path / "images" / corrupted / "0_corrupted")
+
     not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]
 
     return PartialReadTestCase(
@@ -245,7 +254,8 @@ def sdata_with_invalid_zattrs_table_region_not_found(session_tmp_path: Path) ->
 
     corrupted = "blobs_labels"
     # The element data is missing
-    shutil.rmtree(sdata_path / "labels" / corrupted)
+    os.unlink(sdata_path / "labels" / corrupted / ".zgroup")
+    os.rename(sdata_path / "labels" / corrupted, sdata_path / "labels" / f"{corrupted}_corrupted")
     # But the labels element is referenced as a region in a table
     regions = zarr.open_group(sdata_path / "tables" / "table" / "obs" / "region", mode="r")
     assert corrupted in np.asarray(regions.categories)[regions.codes]
@@ -302,5 +312,5 @@ def test_read_zarr_with_warnings(test_case: PartialReadTestCase):
     with pytest_warns_multiple(UserWarning, matches=test_case.warnings_patterns):
         actual: SpatialData = read_zarr(test_case.path, on_bad_files="warn")
 
-    for elem in test_case.expected_elements:
-        assert elem in actual
+    actual_elements = {name for _, name, _ in actual.gen_elements()}
+    assert set(test_case.expected_elements) == actual_elements