Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 151 additions & 1 deletion adit-client/adit_client/client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import importlib.metadata
from typing import Iterator
import os
from io import BytesIO
from typing import Iterator, Union

from dicomweb_client import DICOMwebClient, session_utils
from pydicom import Dataset
Expand Down Expand Up @@ -189,6 +191,154 @@ def store_images(self, ae_title: str, images: list[Dataset]) -> Dataset:
"""Store images."""
return self._create_dicom_web_client(ae_title).store_instances(images)

def retrieve_nifti_study(self, ae_title: str, study_uid: str) -> list[tuple[str, BytesIO]]:
"""Retrieve NIfTI files for a study."""
url = f"{self.server_url}/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/nifti"
dicomweb_client = self._create_dicom_web_client(ae_title)
response = dicomweb_client._http_get(
url,
headers={"Accept": "multipart/related; type=application/octet-stream"},
stream=True,
)
return list(self._iter_multipart_response(response, stream=False))

def iter_nifti_study(self, ae_title: str, study_uid: str) -> Iterator[tuple[str, BytesIO]]:
"""Iterate over NIfTI files for a study."""
url = f"{self.server_url}/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/nifti"
dicomweb_client = self._create_dicom_web_client(ae_title)
response = dicomweb_client._http_get(
url,
headers={"Accept": "multipart/related; type=application/octet-stream"},
stream=True,
)
yield from self._iter_multipart_response(response, stream=True)

def retrieve_nifti_series(
self, ae_title: str, study_uid: str, series_uid: str
) -> list[tuple[str, BytesIO]]:
"""Retrieve NIfTI files for a series."""
url = (
f"{self.server_url}/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/"
f"series/{series_uid}/nifti"
)
dicomweb_client = self._create_dicom_web_client(ae_title)
response = dicomweb_client._http_get(
url,
headers={"Accept": "multipart/related; type=application/octet-stream"},
stream=True,
)
return list(self._iter_multipart_response(response, stream=False))

def iter_nifti_series(
self, ae_title: str, study_uid: str, series_uid: str
) -> Iterator[tuple[str, BytesIO]]:
"""Iterate over NIfTI files for a series."""
url = (
f"{self.server_url}/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/"
f"series/{series_uid}/nifti"
)
dicomweb_client = self._create_dicom_web_client(ae_title)
response = dicomweb_client._http_get(
url,
headers={"Accept": "multipart/related; type=application/octet-stream"},
stream=True,
)
yield from self._iter_multipart_response(response, stream=True)

def retrieve_nifti_image(
self, ae_title: str, study_uid: str, series_uid: str, image_uid: str
) -> list[tuple[str, BytesIO]]:
"""Retrieve NIfTI files for a single image."""
url = (
f"{self.server_url}/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/"
f"series/{series_uid}/instances/{image_uid}/nifti"
)
dicomweb_client = self._create_dicom_web_client(ae_title)
response = dicomweb_client._http_get(
url,
headers={"Accept": "multipart/related; type=application/octet-stream"},
stream=True,
)
return list(self._iter_multipart_response(response, stream=False))

def iter_nifti_image(
self, ae_title: str, study_uid: str, series_uid: str, image_uid: str
) -> Iterator[tuple[str, BytesIO]]:
"""Iterate over NIfTI files for a single image."""
url = (
f"{self.server_url}/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/"
f"series/{series_uid}/instances/{image_uid}/nifti"
)
dicomweb_client = self._create_dicom_web_client(ae_title)
response = dicomweb_client._http_get(
url,
headers={"Accept": "multipart/related; type=application/octet-stream"},
stream=True,
)
yield from self._iter_multipart_response(response, stream=True)
Comment on lines +194 to +278
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There is significant code duplication across the retrieve_nifti_* and iter_nifti_* methods for study, series, and image levels. The logic for URL construction, making the HTTP request, and processing the response is nearly identical in all six methods. This makes the code harder to maintain.

Consider refactoring this logic into one or two private helper methods. For example, you could have a method that constructs the URL and another that performs the request and yields the multipart content. The public methods would then just call these helpers with the appropriate parameters. This would reduce redundancy and improve maintainability.

Comment on lines +194 to +278
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

There is significant code duplication across the retrieve_nifti_* and iter_nifti_* methods. The logic for URL construction and making the HTTP request is repeated in each method. This can be refactored to improve maintainability and reduce redundancy.

I suggest introducing a private helper method to handle the common logic of making the request. This will make the public methods much cleaner and easier to manage.

    def _make_nifti_request(self, path: str):
        url = f"{self.server_url}{path}"
        dicomweb_client = self._create_dicom_web_client("")
        return dicomweb_client._http_get(
            url,
            headers={"Accept": "multipart/related; type=application/octet-stream"},
            stream=True,
        )

    def retrieve_nifti_study(self, ae_title: str, study_uid: str) -> list[tuple[str, BytesIO]]:
        """Retrieve NIfTI files for a study."""
        path = f"/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/nifti"
        response = self._make_nifti_request(path)
        return list(self._iter_multipart_response(response, stream=False))

    def iter_nifti_study(self, ae_title: str, study_uid: str) -> Iterator[tuple[str, BytesIO]]:
        """Iterate over NIfTI files for a study."""
        path = f"/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/nifti"
        response = self._make_nifti_request(path)
        yield from self._iter_multipart_response(response, stream=True)

    def retrieve_nifti_series(
        self, ae_title: str, study_uid: str, series_uid: str
    ) -> list[tuple[str, BytesIO]]:
        """Retrieve NIfTI files for a series."""
        path = (
            f"/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/"
            f"series/{series_uid}/nifti"
        )
        response = self._make_nifti_request(path)
        return list(self._iter_multipart_response(response, stream=False))

    def iter_nifti_series(
        self, ae_title: str, study_uid: str, series_uid: str
    ) -> Iterator[tuple[str, BytesIO]]:
        """Iterate over NIfTI files for a series."""
        path = (
            f"/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/"
            f"series/{series_uid}/nifti"
        )
        response = self._make_nifti_request(path)
        yield from self._iter_multipart_response(response, stream=True)

    def retrieve_nifti_image(
        self, ae_title: str, study_uid: str, series_uid: str, image_uid: str
    ) -> list[tuple[str, BytesIO]]:
        """Retrieve NIfTI files for a single image."""
        path = (
            f"/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/"
            f"series/{series_uid}/instances/{image_uid}/nifti"
        )
        response = self._make_nifti_request(path)
        return list(self._iter_multipart_response(response, stream=False))

    def iter_nifti_image(
        self, ae_title: str, study_uid: str, series_uid: str, image_uid: str
    ) -> Iterator[tuple[str, BytesIO]]:
        """Iterate over NIfTI files for a single image."""
        path = (
            f"/api/dicom-web/{ae_title}/wadors/studies/{study_uid}/"
            f"series/{series_uid}/instances/{image_uid}/nifti"
        )
        response = self._make_nifti_request(path)
        yield from self._iter_multipart_response(response, stream=True)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion. I considered this but decided to keep the explicit methods as-is. The "duplication" is only ~3 lines of boilerplate per method (URL construction + _http_get call), and having six clearly-named methods with distinct type signatures provides better IDE support (autocomplete, go-to-definition) and makes each method's URL path immediately visible without indirection. The proposed helper doesn't meaningfully reduce complexity, it just moves it behind another layer


def _extract_filename(self, content_disposition: str | None) -> str:
"""Extract filename from Content-Disposition header."""
if not content_disposition or "filename=" not in content_disposition:
raise ValueError("No filename found in Content-Disposition header")
filename = content_disposition.split("filename=")[1].strip('"')
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-medium medium

The filename extraction from the Content-Disposition header is fragile and insecure. It fails to handle multiple parameters correctly and does not sanitize the resulting filename, which could lead to path traversal in applications using this library if they use the returned filename to save files without further validation. Please use a robust parsing method and sanitize the filename to remove any path components or dangerous characters.

filename = os.path.basename(filename)
if not filename:
raise ValueError("Content-Disposition filename resolved to empty string")
return filename

def _extract_part_content_with_headers(self, part: bytes) -> Union[bytes, None]:
"""Extract content from a multipart part, keeping headers intact.

Used to patch DICOMwebClient's _extract_part_content to allow access
to per-part headers (especially Content-Disposition for filenames).
"""
if part in (b"", b"--", b"\r\n") or part.startswith(b"--\r\n"):
return None
return part

# NOTE: This method monkey-patches DICOMwebClient._extract_part_content and
# uses DICOMwebClient._decode_multipart_message — both private APIs.
# These methods are not part of the public interface and may change without
# warning. If dicomweb-client is upgraded, verify that these private methods
# still exist and behave the same way.
def _iter_multipart_response(self, response, stream=False) -> Iterator[tuple[str, BytesIO]]:
"""Parse a multipart response, yielding (filename, content) tuples."""
dicomweb_client = self._create_dicom_web_client("")
original_extract_method = dicomweb_client._extract_part_content

try:
dicomweb_client._extract_part_content = self._extract_part_content_with_headers

for part in dicomweb_client._decode_multipart_message(response, stream=stream):
headers = {}
content = part

idx = part.find(b"\r\n\r\n")
if idx > -1:
headers_bytes = part[:idx]
content = part[idx + 4 :]

for header_line in headers_bytes.split(b"\r\n"):
if header_line and b":" in header_line:
name, value = header_line.split(b":", 1)
headers[name.decode("utf-8").strip()] = value.decode("utf-8").strip()

content_disposition = headers.get("Content-Disposition")
if content_disposition:
filename = self._extract_filename(content_disposition)
else:
for header, value in response.headers.items():
if header.lower() == "content-disposition":
filename = self._extract_filename(value)
break
else:
raise ValueError("No Content-Disposition header found in response")

yield (filename, BytesIO(content))
finally:
dicomweb_client._extract_part_content = original_extract_method
Comment on lines +305 to +340
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The _iter_multipart_response method relies on monkey-patching the private _extract_part_content method of dicomweb_client.DICOMwebClient. This is a fragile approach that can break with future updates to the dicomweb-client library. It would be more robust to handle the HTTP request and multipart parsing without relying on the client's internal implementation for this specific case.

A more robust approach would be to use the underlying requests.Session from the DICOMwebClient to make the GET request and then use a dedicated multipart parsing library (like requests_toolbelt.multipart.decoder) or parse the response stream manually. This would decouple your client from the internal implementation details of dicomweb-client.


def _create_dicom_web_client(self, ae_title: str) -> DICOMwebClient:
session = session_utils.create_session()

Expand Down
122 changes: 122 additions & 0 deletions adit-client/tests/test_nifti_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from unittest.mock import MagicMock, patch

import pytest
from adit_client.client import AditClient


class TestExtractFilename:
def test_valid_filename(self):
client = AditClient("http://localhost", "token")
result = client._extract_filename('attachment; filename="scan.nii.gz"')
assert result == "scan.nii.gz"

def test_filename_with_path(self):
client = AditClient("http://localhost", "token")
result = client._extract_filename('attachment; filename="path/to/scan.nii.gz"')
assert result == "scan.nii.gz"

def test_missing_header(self):
client = AditClient("http://localhost", "token")
with pytest.raises(ValueError, match="No filename found"):
client._extract_filename(None)

def test_no_filename_field(self):
client = AditClient("http://localhost", "token")
with pytest.raises(ValueError, match="No filename found"):
client._extract_filename("attachment")


class TestExtractPartContentWithHeaders:
def test_empty_bytes(self):
client = AditClient("http://localhost", "token")
assert client._extract_part_content_with_headers(b"") is None

def test_boundary_marker(self):
client = AditClient("http://localhost", "token")
assert client._extract_part_content_with_headers(b"--") is None

def test_crlf(self):
client = AditClient("http://localhost", "token")
assert client._extract_part_content_with_headers(b"\r\n") is None

def test_boundary_with_crlf(self):
client = AditClient("http://localhost", "token")
assert client._extract_part_content_with_headers(b"--\r\n") is None

def test_normal_content(self):
client = AditClient("http://localhost", "token")
part = b"Content-Type: application/octet-stream\r\n\r\ndata"
assert client._extract_part_content_with_headers(part) == part


class TestIterMultipartResponse:
def test_parses_parts_with_content_disposition(self):
client = AditClient("http://localhost", "token")

# Create a fake part with headers + content separated by \r\n\r\n
part = (
b"Content-Type: application/octet-stream\r\n"
b'Content-Disposition: attachment; filename="scan.nii.gz"\r\n'
b"\r\n"
b"nifti content"
)

fake_dicomweb_client = MagicMock()
fake_dicomweb_client._decode_multipart_message.return_value = [part]
# Let _extract_part_content return part as-is (our patched method)
fake_dicomweb_client._extract_part_content = client._extract_part_content_with_headers

response = MagicMock()
response.headers = {}

with patch.object(client, "_create_dicom_web_client", return_value=fake_dicomweb_client):
results = list(client._iter_multipart_response(response, stream=False))

assert len(results) == 1
assert results[0][0] == "scan.nii.gz"
assert results[0][1].read() == b"nifti content"

def test_falls_back_to_response_headers(self):
client = AditClient("http://localhost", "token")

# Part without Content-Disposition (no \r\n\r\n separator means no headers parsed)
part = b"just raw content without headers"

fake_dicomweb_client = MagicMock()
fake_dicomweb_client._decode_multipart_message.return_value = [part]
fake_dicomweb_client._extract_part_content = client._extract_part_content_with_headers

fake_headers = MagicMock()
fake_headers.items.return_value = [
("content-disposition", 'attachment; filename="fallback.nii.gz"')
]
fake_headers.get.return_value = None

response = MagicMock()
response.headers = fake_headers

with patch.object(client, "_create_dicom_web_client", return_value=fake_dicomweb_client):
results = list(client._iter_multipart_response(response, stream=False))

assert len(results) == 1
assert results[0][0] == "fallback.nii.gz"

def test_no_disposition_anywhere_raises(self):
client = AditClient("http://localhost", "token")

part = b"content without any disposition"

fake_dicomweb_client = MagicMock()
fake_dicomweb_client._decode_multipart_message.return_value = [part]
fake_dicomweb_client._extract_part_content = client._extract_part_content_with_headers

fake_headers = MagicMock()
fake_headers.items.return_value = []
fake_headers.get.return_value = None

response = MagicMock()
response.headers = fake_headers

with patch.object(client, "_create_dicom_web_client", return_value=fake_dicomweb_client):
with pytest.raises(ValueError, match="No Content-Disposition"):
list(client._iter_multipart_response(response, stream=False))
42 changes: 42 additions & 0 deletions adit/core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,48 @@ def is_retriable_http_status(status_code: int) -> bool:
return status_code in retriable_status_codes


class DcmToNiftiConversionError(Exception):
"""Base exception for DICOM to NIfTI conversion errors."""

pass


class NoValidDicomError(DcmToNiftiConversionError):
"""Exception raised when no valid DICOM files are found."""

pass


class InvalidDicomError(DcmToNiftiConversionError):
"""Exception raised when DICOM files are invalid or corrupt."""

pass


class OutputDirectoryError(DcmToNiftiConversionError):
"""Exception raised when there are issues with the output directory."""

pass


class InputDirectoryError(DcmToNiftiConversionError):
"""Exception raised when there are issues with the input directory."""

pass


class ExternalToolError(DcmToNiftiConversionError):
"""Exception raised when there are issues with the external dcm2niix tool."""

pass


class NoSpatialDataError(DcmToNiftiConversionError):
"""Exception raised when DICOM data doesn't have spatial attributes."""

pass


class BatchFileSizeError(Exception):
def __init__(self, batch_tasks_count: int, max_batch_size: int) -> None:
super().__init__("Too many batch tasks.")
Expand Down
Loading
Loading