From bf3a45c2c96a0338e9b1d4ad7c7bfe986542b794 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Tue, 3 Feb 2026 15:20:07 +0100 Subject: [PATCH 001/103] Add mass transfer app and processing --- .../commands/cleanup_jobs_and_tasks.py | 12 +- adit/core/tasks.py | 56 ++- adit/core/templates/core/home.html | 6 + adit/mass_transfer/__init__.py | 0 adit/mass_transfer/admin.py | 22 + adit/mass_transfer/apps.py | 49 ++ adit/mass_transfer/filters.py | 13 + adit/mass_transfer/forms.py | 156 ++++++ adit/mass_transfer/migrations/0001_initial.py | 244 ++++++++++ adit/mass_transfer/migrations/__init__.py | 0 adit/mass_transfer/mixins.py | 9 + adit/mass_transfer/models.py | 173 +++++++ adit/mass_transfer/processors.py | 458 ++++++++++++++++++ .../static/mass_transfer/mass_transfer.js | 32 ++ adit/mass_transfer/tables.py | 13 + .../mass_transfer_filter_confirm_delete.html | 19 + .../mass_transfer_filter_form.html | 24 + .../mass_transfer_filter_list.html | 64 +++ .../mass_transfer_job_detail.html | 68 +++ .../mass_transfer/mass_transfer_job_form.html | 25 + .../mass_transfer/mass_transfer_job_list.html | 18 + .../mass_transfer/mass_transfer_layout.html | 6 + .../mass_transfer_task_detail.html | 30 ++ adit/mass_transfer/templatetags/__init__.py | 0 .../templatetags/mass_transfer_extras.py | 30 ++ adit/mass_transfer/tests/__init__.py | 1 + adit/mass_transfer/tests/test_partitions.py | 26 + adit/mass_transfer/tests/test_processor.py | 155 ++++++ adit/mass_transfer/urls.py | 95 ++++ adit/mass_transfer/utils/__init__.py | 0 adit/mass_transfer/utils/partitions.py | 50 ++ adit/mass_transfer/views.py | 160 ++++++ adit/settings/base.py | 13 + adit/urls.py | 1 + docker-compose.base.yml | 2 + docker-compose.dev.yml | 9 + docker-compose.prod.yml | 11 + example.env | 7 + 38 files changed, 2033 insertions(+), 24 deletions(-) create mode 100644 adit/mass_transfer/__init__.py create mode 100644 adit/mass_transfer/admin.py create mode 100644 adit/mass_transfer/apps.py create mode 100644 adit/mass_transfer/filters.py create mode 100644 adit/mass_transfer/forms.py create mode 100644 adit/mass_transfer/migrations/0001_initial.py create mode 100644 adit/mass_transfer/migrations/__init__.py create mode 100644 adit/mass_transfer/mixins.py create mode 100644 adit/mass_transfer/models.py create mode 100644 adit/mass_transfer/processors.py create mode 100644 adit/mass_transfer/static/mass_transfer/mass_transfer.js create mode 100644 adit/mass_transfer/tables.py create mode 100644 adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_confirm_delete.html create mode 100644 adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_form.html create mode 100644 adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_list.html create mode 100644 adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html create mode 100644 adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html create mode 100644 adit/mass_transfer/templates/mass_transfer/mass_transfer_job_list.html create mode 100644 adit/mass_transfer/templates/mass_transfer/mass_transfer_layout.html create mode 100644 adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html create mode 100644 adit/mass_transfer/templatetags/__init__.py create mode 100644 adit/mass_transfer/templatetags/mass_transfer_extras.py create mode 100644 adit/mass_transfer/tests/__init__.py create mode 100644 adit/mass_transfer/tests/test_partitions.py create mode 100644 adit/mass_transfer/tests/test_processor.py create mode 100644 adit/mass_transfer/urls.py create mode 100644 adit/mass_transfer/utils/__init__.py create mode 100644 adit/mass_transfer/utils/partitions.py create mode 100644 adit/mass_transfer/views.py diff --git a/adit/core/management/commands/cleanup_jobs_and_tasks.py b/adit/core/management/commands/cleanup_jobs_and_tasks.py index 4f04d24c6..4c3b6a964 100644 --- a/adit/core/management/commands/cleanup_jobs_and_tasks.py +++ b/adit/core/management/commands/cleanup_jobs_and_tasks.py @@ -4,6 +4,7 @@ from adit.batch_query.models import BatchQueryJob, BatchQueryTask from adit.batch_transfer.models import BatchTransferJob, BatchTransferTask from adit.core.models import DicomJob, DicomTask +from adit.mass_transfer.models import MassTransferJob, MassTransferTask from adit.selective_transfer.models import SelectiveTransferJob, SelectiveTransferTask @@ -11,6 +12,7 @@ class Command(BaseCommand): help = "Cleanup all DICOM jobs and tasks that are stuck." def cleanup_tasks(self, model: type[DicomTask]): + job_model = model._meta.get_field("job").related_model job_ids = set() message = "Unexpected crash while processing this task." @@ -18,7 +20,7 @@ def cleanup_tasks(self, model: type[DicomTask]): tasks_in_progress = model.objects.filter(status=model.Status.IN_PROGRESS).all() for task in tasks_in_progress: - task.status = SelectiveTransferTask.Status.FAILURE + task.status = model.Status.FAILURE task.message = message task.log = task_log task.save() @@ -27,14 +29,14 @@ def cleanup_tasks(self, model: type[DicomTask]): tasks_pending = model.objects.filter(Q(status=model.Status.PENDING)).all() for task in tasks_pending: if task.queued_job_id is None: - task.status = SelectiveTransferTask.Status.FAILURE + task.status = model.Status.FAILURE task.message = message task.log = task_log task.save() job_ids.add(task.job_id) for job_id in job_ids: - job = SelectiveTransferJob.objects.get(id=job_id) + job = job_model.objects.get(id=job_id) job.post_process(suppress_email=True) def cleanup_jobs(self, model: type[DicomJob]): @@ -45,7 +47,7 @@ def cleanup_jobs(self, model: type[DicomJob]): ).all() for job in jobs: - job.status = SelectiveTransferJob.Status.FAILURE + job.status = model.Status.FAILURE job.message = message job.save() @@ -65,5 +67,7 @@ def handle(self, *args, **options): self.cleanup_jobs(BatchQueryJob) self.cleanup_tasks(BatchTransferTask) self.cleanup_jobs(BatchTransferJob) + self.cleanup_tasks(MassTransferTask) + self.cleanup_jobs(MassTransferJob) self.stdout.write("Done") diff --git a/adit/core/tasks.py b/adit/core/tasks.py index 0326504de..6b3330a49 100644 --- a/adit/core/tasks.py +++ b/adit/core/tasks.py @@ -53,27 +53,14 @@ def backup_db(*args, **kwargs): call_command("dbbackup", "--clean", "-v 2") -@app.task( - queue="dicom", - pass_context=True, - # TODO: Increase the priority slightly when it will be retried - # See https://github.com/procrastinate-org/procrastinate/issues/1096 - # - # Two-level retry strategy: - # 1. Network layer (Stamina): Fast retries for transient failures (5-10 attempts) - # - Applied at DIMSE/DICOMweb connector level - # - Handles: connection timeouts, HTTP 503, temporary server unavailability - # 2. Task layer (Procrastinate): Slow retries for complete operation failures - # - Applied here (max_attempts below) - # - Only triggers after network-level retries are exhausted - # - Retries the entire task - retry=RetryStrategy( - max_attempts=settings.DICOM_TASK_MAX_ATTEMPTS, - exponential_wait=settings.DICOM_TASK_EXPONENTIAL_WAIT, - retry_exceptions={RetriableDicomError}, - ), +DICOM_TASK_RETRY_STRATEGY = RetryStrategy( + max_attempts=settings.DICOM_TASK_MAX_ATTEMPTS, + exponential_wait=settings.DICOM_TASK_EXPONENTIAL_WAIT, + retry_exceptions={RetriableDicomError}, ) -def process_dicom_task(context: JobContext, model_label: str, task_id: int): + + +def _run_dicom_task(context: JobContext, model_label: str, task_id: int): assert context.job dicom_task = get_dicom_task(model_label, task_id) @@ -172,3 +159,32 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: # TODO: https://github.com/procrastinate-org/procrastinate/issues/1106 db.close_old_connections() + + +@app.task( + queue="dicom", + pass_context=True, + # TODO: Increase the priority slightly when it will be retried + # See https://github.com/procrastinate-org/procrastinate/issues/1096 + # + # Two-level retry strategy: + # 1. Network layer (Stamina): Fast retries for transient failures (5-10 attempts) + # - Applied at DIMSE/DICOMweb connector level + # - Handles: connection timeouts, HTTP 503, temporary server unavailability + # 2. Task layer (Procrastinate): Slow retries for complete operation failures + # - Applied here (max_attempts below) + # - Only triggers after network-level retries are exhausted + # - Retries the entire task + retry=DICOM_TASK_RETRY_STRATEGY, +) +def process_dicom_task(context: JobContext, model_label: str, task_id: int): + _run_dicom_task(context, model_label, task_id) + + +@app.task( + queue="mass_transfer", + pass_context=True, + retry=DICOM_TASK_RETRY_STRATEGY, +) +def process_mass_transfer_task(context: JobContext, model_label: str, task_id: int): + _run_dicom_task(context, model_label, task_id) diff --git a/adit/core/templates/core/home.html b/adit/core/templates/core/home.html index 91d3d7cd4..2534ecde5 100644 --- a/adit/core/templates/core/home.html +++ b/adit/core/templates/core/home.html @@ -46,6 +46,12 @@

Transfer or download multiple studies specified in a batch file.
+
+ Mass Transfer +
+
+ Transfer large volumes of imaging data over a time range using reusable filters. +
DICOM Explorer
diff --git a/adit/mass_transfer/__init__.py b/adit/mass_transfer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/adit/mass_transfer/admin.py b/adit/mass_transfer/admin.py new file mode 100644 index 000000000..05ab53e5b --- /dev/null +++ b/adit/mass_transfer/admin.py @@ -0,0 +1,22 @@ +from django.contrib import admin + +from adit.core.admin import DicomJobAdmin, DicomTaskAdmin + +from .models import ( + MassTransferFilter, + MassTransferJob, + MassTransferSettings, + MassTransferTask, + MassTransferVolume, +) + + +class MassTransferJobAdmin(DicomJobAdmin): + exclude = ("urgent",) + + +admin.site.register(MassTransferJob, MassTransferJobAdmin) +admin.site.register(MassTransferTask, DicomTaskAdmin) +admin.site.register(MassTransferSettings, admin.ModelAdmin) +admin.site.register(MassTransferFilter, admin.ModelAdmin) +admin.site.register(MassTransferVolume, admin.ModelAdmin) diff --git a/adit/mass_transfer/apps.py b/adit/mass_transfer/apps.py new file mode 100644 index 000000000..679047239 --- /dev/null +++ b/adit/mass_transfer/apps.py @@ -0,0 +1,49 @@ +from django.apps import AppConfig +from django.db.models.signals import post_migrate + +from adit.core.utils.model_utils import get_model_label + +SECTION_NAME = "Mass Transfer" + + +class MassTransferConfig(AppConfig): + name = "adit.mass_transfer" + + def ready(self): + register_app() + + # Put calls to db stuff in this signal handler + post_migrate.connect(init_db, sender=self) + + +def register_app(): + from adit_radis_shared.common.site import MainMenuItem, register_main_menu_item + + from adit.core.site import JobStats, register_dicom_processor, register_job_stats_collector + + from .models import MassTransferJob, MassTransferTask + from .processors import MassTransferTaskProcessor + + register_main_menu_item( + MainMenuItem( + url_name="mass_transfer_job_create", + label=SECTION_NAME, + ) + ) + + register_dicom_processor(get_model_label(MassTransferTask), MassTransferTaskProcessor) + + def collect_job_stats() -> JobStats: + counts: dict[MassTransferJob.Status, int] = {} + for status in MassTransferJob.Status: + counts[status] = MassTransferJob.objects.filter(status=status).count() + return JobStats("Mass Transfer", "mass_transfer_job_list", counts) + + register_job_stats_collector(collect_job_stats) + + +def init_db(**kwargs): + from .models import MassTransferSettings + + if not MassTransferSettings.objects.exists(): + MassTransferSettings.objects.create() diff --git a/adit/mass_transfer/filters.py b/adit/mass_transfer/filters.py new file mode 100644 index 000000000..ef1851433 --- /dev/null +++ b/adit/mass_transfer/filters.py @@ -0,0 +1,13 @@ +from adit.core.filters import DicomJobFilter, DicomTaskFilter + +from .models import MassTransferJob, MassTransferTask + + +class MassTransferJobFilter(DicomJobFilter): + class Meta(DicomJobFilter.Meta): + model = MassTransferJob + + +class MassTransferTaskFilter(DicomTaskFilter): + class Meta(DicomTaskFilter.Meta): + model = MassTransferTask diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py new file mode 100644 index 000000000..1c09cd3f9 --- /dev/null +++ b/adit/mass_transfer/forms.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +from typing import cast + +from adit_radis_shared.accounts.models import User +from crispy_forms.helper import FormHelper +from crispy_forms.layout import Layout, Submit +from django import forms +from django.core.exceptions import ValidationError + +from adit.core.fields import DicomNodeChoiceField +from adit.core.models import DicomNode + +from .models import MassTransferFilter, MassTransferJob, MassTransferTask +from .utils.partitions import build_partitions + + +class MassTransferFilterForm(forms.ModelForm): + class Meta: + model = MassTransferFilter + fields = ( + "name", + "modality", + "institution_name", + "apply_institution_on_study", + "study_description", + "series_description", + "series_number", + ) + labels = { + "name": "Filter name", + "modality": "Modality", + "institution_name": "Institution name", + "apply_institution_on_study": "Apply institution filter on study", + "study_description": "Study description", + "series_description": "Series description", + "series_number": "Series number", + } + + +class MassTransferJobForm(forms.ModelForm): + filters = forms.ModelMultipleChoiceField( + queryset=MassTransferFilter.objects.all(), + required=True, + widget=forms.CheckboxSelectMultiple, + ) + + tasks: list[MassTransferTask] + + class Meta: + model = MassTransferJob + fields = ( + "source", + "destination", + "start_date", + "end_date", + "partition_granularity", + "filters", + "send_finished_mail", + ) + labels = { + "start_date": "Start date", + "end_date": "End date", + "partition_granularity": "Partition granularity", + "send_finished_mail": "Send Email when job is finished", + } + help_texts = { + "partition_granularity": "Daily or weekly partition windows.", + } + + def __init__(self, *args, **kwargs): + self.tasks = [] + self.save_tasks = None + self.user: User = kwargs.pop("user") + + super().__init__(*args, **kwargs) + + self.fields["source"] = DicomNodeChoiceField("source", self.user) + self.fields["source"].widget.attrs["@change"] = "onSourceChange($event)" + + self.fields["destination"] = DicomNodeChoiceField("destination", self.user) + self.fields["destination"].widget.attrs["@change"] = "onDestinationChange($event)" + + self.fields["partition_granularity"].widget.attrs["@change"] = ( + "onGranularityChange($event)" + ) + + self.fields["send_finished_mail"].widget.attrs["@change"] = ( + "onSendFinishedMailChange($event)" + ) + + self.helper = FormHelper(self) + self.helper.layout = Layout("source", "destination") + self.helper.render_unmentioned_fields = True + self.helper.attrs["x-data"] = "massTransferJobForm()" + self.helper.add_input(Submit("save", "Create Job")) + + def clean_source(self): + source = cast(DicomNode, self.cleaned_data["source"]) + if not source.is_accessible_by_user(self.user, "source"): + raise ValidationError("You do not have access to this source.") + if source.node_type != DicomNode.NodeType.SERVER: + raise ValidationError("Source must be a DICOM server.") + return source + + def clean_destination(self): + destination = cast(DicomNode, self.cleaned_data["destination"]) + if not destination.is_accessible_by_user(self.user, "destination"): + raise ValidationError("You do not have access to this destination.") + if destination.node_type != DicomNode.NodeType.FOLDER: + raise ValidationError("Destination must be a DICOM folder.") + return destination + + def clean(self): + cleaned = super().clean() + start_date = cleaned.get("start_date") + end_date = cleaned.get("end_date") + if start_date and end_date and end_date < start_date: + raise ValidationError("End date must be on or after the start date.") + return cleaned + + def _save_tasks(self, job: MassTransferJob) -> None: + partitions = build_partitions( + job.start_date, + job.end_date, + job.partition_granularity, + ) + + tasks: list[MassTransferTask] = [] + for partition in partitions: + tasks.append( + MassTransferTask( + job=job, + source=job.source, + partition_start=partition.start, + partition_end=partition.end, + partition_key=partition.key, + ) + ) + + MassTransferTask.objects.bulk_create(tasks) + + def save(self, commit: bool = True): + job = super().save(commit=False) + # Mass transfer always converts to NIfTI + job.convert_to_nifti = True + job.urgent = False + + if commit: + job.save() + self.save_m2m() + self._save_tasks(job) + else: + self.save_tasks = self._save_tasks + + return job diff --git a/adit/mass_transfer/migrations/0001_initial.py b/adit/mass_transfer/migrations/0001_initial.py new file mode 100644 index 000000000..d5246432a --- /dev/null +++ b/adit/mass_transfer/migrations/0001_initial.py @@ -0,0 +1,244 @@ +# Generated by Codex on 2026-02-03 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + +from adit_radis_shared.common.utils.migration_utils import procrastinate_on_delete_sql + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ("core", "0015_delete_queuedtask"), + ("procrastinate", "0028_add_cancel_states"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="MassTransferFilter", + fields=[ + ("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ("name", models.CharField(blank=True, default="", max_length=150)), + ("modality", models.CharField(blank=True, default="", max_length=16)), + ("institution_name", models.CharField(blank=True, default="", max_length=128)), + ("apply_institution_on_study", models.BooleanField(default=True)), + ("study_description", models.CharField(blank=True, default="", max_length=256)), + ("series_description", models.CharField(blank=True, default="", max_length=256)), + ("series_number", models.PositiveIntegerField(blank=True, null=True)), + ], + options={ + "ordering": ("name", "id"), + }, + ), + migrations.CreateModel( + name="MassTransferSettings", + fields=[ + ("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ("locked", models.BooleanField(default=False)), + ("suspended", models.BooleanField(default=False)), + ], + options={ + "verbose_name_plural": "Mass transfer settings", + }, + ), + migrations.CreateModel( + name="MassTransferJob", + fields=[ + ("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ( + "status", + models.CharField( + choices=[ + ("UV", "Unverified"), + ("PE", "Pending"), + ("IP", "In Progress"), + ("CI", "Canceling"), + ("CA", "Canceled"), + ("SU", "Success"), + ("WA", "Warning"), + ("FA", "Failure"), + ], + default="UV", + max_length=2, + ), + ), + ("urgent", models.BooleanField(default=False)), + ("message", models.TextField(blank=True, default="")), + ("send_finished_mail", models.BooleanField(default=False)), + ("convert_to_nifti", models.BooleanField(default=False)), + ("created", models.DateTimeField(auto_now_add=True)), + ("start", models.DateTimeField(blank=True, null=True)), + ("end", models.DateTimeField(blank=True, null=True)), + ("start_date", models.DateField()), + ("end_date", models.DateField()), + ( + "partition_granularity", + models.CharField( + choices=[("daily", "Daily"), ("weekly", "Weekly")], + default="daily", + max_length=16, + ), + ), + ( + "destination", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="core.dicomnode", + ), + ), + ( + "owner", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="mass_transfer_jobs", + to=settings.AUTH_USER_MODEL, + ), + ), + ( + "source", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="core.dicomnode", + ), + ), + ( + "filters", + models.ManyToManyField(blank=True, related_name="jobs", to="mass_transfer.masstransferfilter"), + ), + ], + options={ + "abstract": False, + "permissions": [("can_process_urgently", "Can process urgently")], + }, + ), + migrations.CreateModel( + name="MassTransferTask", + fields=[ + ("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ( + "status", + models.CharField( + choices=[ + ("PE", "Pending"), + ("IP", "In Progress"), + ("CA", "Canceled"), + ("SU", "Success"), + ("WA", "Warning"), + ("FA", "Failure"), + ], + default="PE", + max_length=2, + ), + ), + ("attempts", models.PositiveSmallIntegerField(default=0)), + ("message", models.TextField(blank=True, default="")), + ("log", models.TextField(blank=True, default="")), + ("created", models.DateTimeField(auto_now_add=True)), + ("start", models.DateTimeField(blank=True, null=True)), + ("end", models.DateTimeField(blank=True, null=True)), + ("partition_start", models.DateTimeField()), + ("partition_end", models.DateTimeField()), + ("partition_key", models.CharField(max_length=64)), + ( + "job", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="tasks", + to="mass_transfer.masstransferjob", + ), + ), + ( + "source", + models.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + related_name="+", + to="core.dicomnode", + ), + ), + ( + "queued_job", + models.OneToOneField( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="+", + to="procrastinate.procrastinatejob", + ), + ), + ], + options={ + "ordering": ("id",), + "abstract": False, + }, + ), + migrations.CreateModel( + name="MassTransferVolume", + fields=[ + ("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ( + "status", + models.CharField( + choices=[ + ("pending", "Pending"), + ("exported", "Exported"), + ("converted", "Converted"), + ("error", "Error"), + ], + default="pending", + max_length=16, + ), + ), + ("partition_key", models.CharField(max_length=64)), + ("pseudonym", models.CharField(blank=True, default="", max_length=64)), + ("patient_id", models.CharField(blank=True, default="", max_length=64)), + ("accession_number", models.CharField(blank=True, default="", max_length=64)), + ("study_instance_uid", models.CharField(max_length=64)), + ("series_instance_uid", models.CharField(max_length=64)), + ("modality", models.CharField(blank=True, default="", max_length=16)), + ("study_description", models.CharField(blank=True, default="", max_length=256)), + ("series_description", models.CharField(blank=True, default="", max_length=256)), + ("series_number", models.IntegerField(blank=True, null=True)), + ("study_datetime", models.DateTimeField()), + ("institution_name", models.CharField(blank=True, default="", max_length=128)), + ("number_of_images", models.PositiveIntegerField(default=0)), + ("exported_folder", models.TextField(blank=True, default="")), + ("converted_file", models.TextField(blank=True, default="")), + ("log", models.TextField(blank=True, default="")), + ("created", models.DateTimeField(auto_now_add=True)), + ("updated", models.DateTimeField(auto_now=True)), + ( + "job", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="volumes", + to="mass_transfer.masstransferjob", + ), + ), + ], + options={ + "ordering": ("study_datetime", "series_instance_uid"), + }, + ), + migrations.AddIndex( + model_name="masstransferjob", + index=models.Index(fields=["owner", "status"], name="mass_trans_owner_i_2403f1_idx"), + ), + migrations.AddConstraint( + model_name="masstransfervolume", + constraint=models.UniqueConstraint( + fields=("job", "series_instance_uid"), + name="mass_transfer_unique_series_per_job", + ), + ), + migrations.RunSQL( + sql=procrastinate_on_delete_sql("mass_transfer", "masstransfertask"), + reverse_sql=procrastinate_on_delete_sql( + "mass_transfer", "masstransfertask", reverse=True + ), + ), + ] diff --git a/adit/mass_transfer/migrations/__init__.py b/adit/mass_transfer/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/adit/mass_transfer/mixins.py b/adit/mass_transfer/mixins.py new file mode 100644 index 000000000..5861656ae --- /dev/null +++ b/adit/mass_transfer/mixins.py @@ -0,0 +1,9 @@ +from adit_radis_shared.common.mixins import LockedMixin + +from .apps import SECTION_NAME +from .models import MassTransferSettings + + +class MassTransferLockedMixin(LockedMixin): + settings_model = MassTransferSettings + section_name = SECTION_NAME diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py new file mode 100644 index 000000000..73a209f1d --- /dev/null +++ b/adit/mass_transfer/models.py @@ -0,0 +1,173 @@ +from __future__ import annotations + +from django.conf import settings +from django.db import models +from django.urls import reverse +from procrastinate.contrib.django import app + +from adit.core.models import DicomAppSettings, DicomJob, DicomNode, DicomTask +from adit.core.utils.model_utils import get_model_label + + +class MassTransferSettings(DicomAppSettings): + class Meta: + verbose_name_plural = "Mass transfer settings" + + +class MassTransferFilter(models.Model): + name = models.CharField(max_length=150, blank=True, default="") + modality = models.CharField(max_length=16, blank=True, default="") + institution_name = models.CharField(max_length=128, blank=True, default="") + apply_institution_on_study = models.BooleanField(default=True) + study_description = models.CharField(max_length=256, blank=True, default="") + series_description = models.CharField(max_length=256, blank=True, default="") + series_number = models.PositiveIntegerField(null=True, blank=True) + + class Meta: + ordering = ("name", "id") + + def __str__(self) -> str: + if self.name: + return self.name + + parts: list[str] = [] + if self.modality: + parts.append(self.modality) + if self.institution_name: + parts.append(f"Institution={self.institution_name}") + if self.study_description: + parts.append(f"Study={self.study_description}") + if self.series_description: + parts.append(f"Series={self.series_description}") + if self.series_number is not None: + parts.append(f"SeriesNumber={self.series_number}") + + return "; ".join(parts) if parts else f"Filter #{self.pk}" + + +class MassTransferJob(DicomJob): + class PartitionGranularity(models.TextChoices): + DAILY = "daily", "Daily" + WEEKLY = "weekly", "Weekly" + + default_priority = settings.MASS_TRANSFER_DEFAULT_PRIORITY + urgent_priority = settings.MASS_TRANSFER_URGENT_PRIORITY + + source = models.ForeignKey(DicomNode, related_name="+", on_delete=models.PROTECT) + destination = models.ForeignKey(DicomNode, related_name="+", on_delete=models.PROTECT) + start_date = models.DateField() + end_date = models.DateField() + partition_granularity = models.CharField( + max_length=16, + choices=PartitionGranularity.choices, + default=PartitionGranularity.DAILY, + ) + + filters = models.ManyToManyField(MassTransferFilter, related_name="jobs", blank=True) + + tasks: models.QuerySet["MassTransferTask"] + + def get_absolute_url(self): + return reverse("mass_transfer_job_detail", args=[self.pk]) + + def queue_pending_tasks(self): + """Queues all pending tasks of this job in the mass_transfer queue.""" + assert self.status == DicomJob.Status.PENDING + + priority = self.default_priority + if self.urgent: + priority = self.urgent_priority + + for mass_task in self.tasks.filter(status=DicomTask.Status.PENDING): + assert mass_task.queued_job is None + + model_label = get_model_label(mass_task.__class__) + queued_job_id = app.configure_task( + "adit.core.tasks.process_mass_transfer_task", + allow_unknown=False, + priority=priority, + ).defer(model_label=model_label, task_id=mass_task.pk) + mass_task.queued_job_id = queued_job_id + mass_task.save() + + +class MassTransferTask(DicomTask): + job = models.ForeignKey( + MassTransferJob, + on_delete=models.CASCADE, + related_name="tasks", + ) + partition_start = models.DateTimeField() + partition_end = models.DateTimeField() + partition_key = models.CharField(max_length=64) + + def get_absolute_url(self): + return reverse("mass_transfer_task_detail", args=[self.pk]) + + def queue_pending_task(self) -> None: + """Queues a mass transfer task.""" + assert self.status == DicomTask.Status.PENDING + assert self.queued_job is None + + priority = self.job.default_priority + if self.job.urgent: + priority = self.job.urgent_priority + + model_label = get_model_label(self.__class__) + queued_job_id = app.configure_task( + "adit.core.tasks.process_mass_transfer_task", + allow_unknown=False, + priority=priority, + ).defer(model_label=model_label, task_id=self.pk) + self.queued_job_id = queued_job_id + self.save() + + +class MassTransferVolume(models.Model): + class Status(models.TextChoices): + PENDING = "pending", "Pending" + EXPORTED = "exported", "Exported" + CONVERTED = "converted", "Converted" + ERROR = "error", "Error" + + job = models.ForeignKey(MassTransferJob, on_delete=models.CASCADE, related_name="volumes") + partition_key = models.CharField(max_length=64) + + pseudonym = models.CharField(max_length=64, blank=True, default="") + patient_id = models.CharField(max_length=64, blank=True, default="") + accession_number = models.CharField(max_length=64, blank=True, default="") + study_instance_uid = models.CharField(max_length=64) + series_instance_uid = models.CharField(max_length=64) + modality = models.CharField(max_length=16, blank=True, default="") + study_description = models.CharField(max_length=256, blank=True, default="") + series_description = models.CharField(max_length=256, blank=True, default="") + series_number = models.IntegerField(null=True, blank=True) + study_datetime = models.DateTimeField() + institution_name = models.CharField(max_length=128, blank=True, default="") + number_of_images = models.PositiveIntegerField(default=0) + + exported_folder = models.TextField(blank=True, default="") + converted_file = models.TextField(blank=True, default="") + + status = models.CharField(max_length=16, choices=Status.choices, default=Status.PENDING) + log = models.TextField(blank=True, default="") + + created = models.DateTimeField(auto_now_add=True) + updated = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ("study_datetime", "series_instance_uid") + constraints = [ + models.UniqueConstraint( + fields=["job", "series_instance_uid"], + name="mass_transfer_unique_series_per_job", + ) + ] + + def __str__(self) -> str: + return f"MassTransferVolume {self.series_instance_uid}" + + def add_log(self, msg: str) -> None: + if self.log: + self.log += "\n" + self.log += msg diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py new file mode 100644 index 000000000..6fa159b67 --- /dev/null +++ b/adit/mass_transfer/processors.py @@ -0,0 +1,458 @@ +from __future__ import annotations + +import logging +import shutil +import subprocess +import uuid +from datetime import datetime, timedelta +from pathlib import Path + +from django.conf import settings +from django.utils import timezone +from pydicom import Dataset + +from adit.core.errors import DicomError +from adit.core.models import DicomNode, DicomTask +from adit.core.processors import DicomTaskProcessor +from adit.core.utils.dicom_dataset import QueryDataset, ResultDataset +from adit.core.utils.dicom_manipulator import DicomManipulator +from adit.core.utils.dicom_operator import DicomOperator +from adit.core.utils.dicom_utils import convert_to_python_regex, write_dataset +from adit.core.utils.sanitize import sanitize_filename + +from .models import MassTransferFilter, MassTransferSettings, MassTransferTask, MassTransferVolume + +logger = logging.getLogger(__name__) + +_MIN_SPLIT_WINDOW = timedelta(minutes=30) + + +def _dicom_match(pattern: str, value: str | None) -> bool: + if not pattern: + return True + if value is None: + return False + regex = convert_to_python_regex(pattern) + return bool(regex.search(str(value))) + + +def _parse_int(value: object, default: int | None = None) -> int | None: + try: + if value is None or value == "": + return default + return int(value) + except (TypeError, ValueError): + return default + + +def _series_folder_name( + series_number: int | None, series_description: str, series_uid: str +) -> str: + if series_number is None: + base = series_uid + else: + description = series_description or "Undefined" + base = f"{series_number}-{description}" + return sanitize_filename(str(base)) + + +def _study_datetime(study: ResultDataset) -> datetime: + study_date = study.StudyDate + study_time = study.StudyTime + if study_time is None: + study_time = datetime.min.time() + return datetime.combine(study_date, study_time) + + +def _export_base_dir() -> Path: + base = Path(settings.MASS_TRANSFER_EXPORT_BASE_DIR) + base.mkdir(parents=True, exist_ok=True) + return base + + +def _destination_base_dir(node: DicomNode) -> Path: + assert node.node_type == DicomNode.NodeType.FOLDER + path = Path(node.dicomfolder.path) + path.mkdir(parents=True, exist_ok=True) + return path + + +def _volume_export_path( + base_dir: Path, + study_dt: datetime, + pseudonym: str, + series_name: str, +) -> Path: + year_month = study_dt.strftime("%Y%m") + return base_dir / year_month / pseudonym / series_name + + +def _volume_output_path( + base_dir: Path, + study_dt: datetime, + pseudonym: str, + series_name: str, +) -> Path: + year_month = study_dt.strftime("%Y%m") + return base_dir / year_month / pseudonym / series_name + + +class MassTransferTaskProcessor(DicomTaskProcessor): + app_name = "mass_transfer" + dicom_task_class = MassTransferTask + app_settings_class = MassTransferSettings + + def __init__(self, dicom_task: DicomTask) -> None: + assert isinstance(dicom_task, MassTransferTask) + super().__init__(dicom_task) + self.mass_task = dicom_task + + def process(self): + if self.is_suspended(): + return { + "status": MassTransferTask.Status.WARNING, + "message": "Mass transfer is currently suspended.", + "log": "Task skipped because the mass transfer app is suspended.", + } + + job = self.mass_task.job + source_node = job.source + destination_node = job.destination + + if source_node.node_type != DicomNode.NodeType.SERVER: + raise DicomError("Mass transfer source must be a DICOM server.") + if destination_node.node_type != DicomNode.NodeType.FOLDER: + raise DicomError("Mass transfer destination must be a DICOM folder.") + + filters = list(job.filters.all()) + if not filters: + return { + "status": MassTransferTask.Status.FAILURE, + "message": "No filters configured for this job.", + "log": "Mass transfer requires at least one filter.", + } + + operator = DicomOperator(source_node.dicomserver) + volumes = self._find_volumes(operator, filters) + + export_base = _export_base_dir() + output_base = _destination_base_dir(destination_node) + + converted_count = 0 + failed_count = 0 + + volumes_by_study: dict[str, list[MassTransferVolume]] = {} + for volume in volumes: + volumes_by_study.setdefault(volume.study_instance_uid, []).append(volume) + + for _, study_volumes in volumes_by_study.items(): + existing_pseudonym = next((v.pseudonym for v in study_volumes if v.pseudonym), None) + pseudonym = existing_pseudonym or uuid.uuid4().hex + + for volume in study_volumes: + if volume.status == MassTransferVolume.Status.CONVERTED: + continue + + try: + self._export_volume(operator, volume, export_base, pseudonym) + self._convert_volume(volume, output_base, pseudonym) + converted_count += 1 + except Exception as err: + logger.exception( + "Mass transfer failed for volume %s", volume.series_instance_uid + ) + self._cleanup_export(volume) + volume.status = MassTransferVolume.Status.ERROR + volume.add_log(str(err)) + volume.save() + failed_count += 1 + + log_lines = [ + f"Partition {self.mass_task.partition_key}", + f"Volumes processed: {len(volumes)}", + f"Converted: {converted_count}", + f"Failed: {failed_count}", + ] + + if failed_count and converted_count: + status = MassTransferTask.Status.WARNING + message = "Some volumes failed during mass transfer." + elif failed_count and not converted_count: + status = MassTransferTask.Status.FAILURE + message = "All volumes failed during mass transfer." + else: + status = MassTransferTask.Status.SUCCESS + message = "Mass transfer task completed successfully." + + return { + "status": status, + "message": message, + "log": "\n".join(log_lines), + } + + def _find_volumes( + self, + operator: DicomOperator, + filters: list[MassTransferFilter], + ) -> list[MassTransferVolume]: + start = self.mass_task.partition_start + end = self.mass_task.partition_end + job = self.mass_task.job + + found_series: dict[str, MassTransferVolume] = {} + + for mf in filters: + studies = self._find_studies(operator, mf, start, end) + + for study in studies: + if mf.modality and mf.modality not in study.ModalitiesInStudy: + continue + + if mf.study_description and not _dicom_match( + mf.study_description, study.StudyDescription + ): + continue + + if mf.institution_name and mf.apply_institution_on_study: + if not self._study_has_institution(operator, study, mf.institution_name): + continue + + series_query = QueryDataset.create( + PatientID=study.PatientID, + StudyInstanceUID=study.StudyInstanceUID, + ) + # Request institution name at series level when possible + series_query.dataset.InstitutionName = "" + + series_list = list(operator.find_series(series_query)) + + for series in series_list: + series_uid = series.SeriesInstanceUID + if not series_uid: + continue + + series_number = _parse_int(series.get("SeriesNumber"), default=None) + + if ( + mf.institution_name + and not mf.apply_institution_on_study + and not _dicom_match( + mf.institution_name, series.get("InstitutionName", None) + ) + ): + continue + + if mf.modality and mf.modality != series.Modality: + continue + + if mf.series_description and not _dicom_match( + mf.series_description, series.SeriesDescription + ): + continue + + if mf.series_number is not None: + try: + if series_number is None or mf.series_number != series_number: + continue + except (TypeError, ValueError): + continue + + if series_uid in found_series: + continue + + study_dt = _study_datetime(study) + volume, created = MassTransferVolume.objects.get_or_create( + job=job, + series_instance_uid=series_uid, + defaults={ + "partition_key": self.mass_task.partition_key, + "patient_id": str(study.PatientID), + "accession_number": str(study.get("AccessionNumber", "")), + "study_instance_uid": str(study.StudyInstanceUID), + "modality": str(series.Modality), + "study_description": str(study.get("StudyDescription", "")), + "series_description": str(series.get("SeriesDescription", "")), + "series_number": series_number, + "study_datetime": timezone.make_aware(study_dt), + "institution_name": str(series.get("InstitutionName", "")), + "number_of_images": _parse_int( + series.get("NumberOfSeriesRelatedInstances"), default=0 + ), + }, + ) + if not created: + volume.partition_key = self.mass_task.partition_key + volume.patient_id = str(study.PatientID) + volume.accession_number = str(study.get("AccessionNumber", "")) + volume.study_instance_uid = str(study.StudyInstanceUID) + volume.modality = str(series.Modality) + volume.study_description = str(study.get("StudyDescription", "")) + volume.series_description = str(series.get("SeriesDescription", "")) + volume.series_number = series_number + volume.study_datetime = timezone.make_aware(study_dt) + volume.institution_name = str(series.get("InstitutionName", "")) + volume.number_of_images = _parse_int( + series.get("NumberOfSeriesRelatedInstances"), default=0 + ) + volume.save() + + found_series[series_uid] = volume + + return list(found_series.values()) + + def _find_studies( + self, + operator: DicomOperator, + mf: MassTransferFilter, + start: datetime, + end: datetime, + ) -> list[ResultDataset]: + max_results = settings.MASS_TRANSFER_MAX_SEARCH_RESULTS + + query = QueryDataset.create( + StudyDate=(start.date(), end.date()), + StudyTime=(start.time(), end.time()), + ) + + if mf.modality: + query.dataset.ModalitiesInStudy = mf.modality + if mf.study_description: + query.dataset.StudyDescription = mf.study_description + + studies = list(operator.find_studies(query, limit_results=max_results + 1)) + + if len(studies) > max_results: + if end - start < _MIN_SPLIT_WINDOW: + raise DicomError( + f"Time window too small ({start} to {end}) for filter {mf}." + ) + + mid = start + (end - start) / 2 + return self._find_studies(operator, mf, start, mid) + self._find_studies( + operator, mf, mid, end + ) + + return studies + + def _study_has_institution( + self, + operator: DicomOperator, + study: ResultDataset, + institution_name: str, + ) -> bool: + series_query = QueryDataset.create( + PatientID=study.PatientID, + StudyInstanceUID=study.StudyInstanceUID, + ) + series_query.dataset.InstitutionName = "" + + for series in operator.find_series(series_query): + institution = series.get("InstitutionName", None) + if _dicom_match(institution_name, institution): + return True + + return False + + def _export_volume( + self, + operator: DicomOperator, + volume: MassTransferVolume, + export_base: Path, + pseudonym: str, + ) -> None: + if volume.status == MassTransferVolume.Status.EXPORTED and volume.exported_folder: + return + + study_dt = volume.study_datetime + series_name = _series_folder_name( + volume.series_number, + volume.series_description, + volume.series_instance_uid, + ) + + export_path = _volume_export_path(export_base, study_dt, pseudonym, series_name) + export_path.mkdir(parents=True, exist_ok=True) + volume.exported_folder = str(export_path) + + manipulator = DicomManipulator() + + def callback(ds: Dataset | None) -> None: + if ds is None: + return + manipulator.manipulate(ds, pseudonym=pseudonym) + file_name = sanitize_filename(f"{ds.SOPInstanceUID}.dcm") + write_dataset(ds, export_path / file_name) + + operator.fetch_series( + patient_id=volume.patient_id, + study_uid=volume.study_instance_uid, + series_uid=volume.series_instance_uid, + callback=callback, + ) + + volume.pseudonym = pseudonym + volume.status = MassTransferVolume.Status.EXPORTED + volume.save() + + def _convert_volume( + self, + volume: MassTransferVolume, + output_base: Path, + pseudonym: str, + ) -> None: + if volume.status == MassTransferVolume.Status.CONVERTED and volume.converted_file: + return + + if not volume.exported_folder: + raise DicomError("Missing exported folder for conversion.") + + study_dt = volume.study_datetime + volume.pseudonym = pseudonym + series_name = _series_folder_name( + volume.series_number, + volume.series_description, + volume.series_instance_uid, + ) + + output_path = _volume_output_path(output_base, study_dt, pseudonym, series_name) + output_path.mkdir(parents=True, exist_ok=True) + + cmd = [ + "dcm2niix", + "-z", + "y", + "-o", + str(output_path), + "-f", + series_name, + str(volume.exported_folder), + ] + + result = subprocess.run(cmd, check=False, capture_output=True, text=True) + if result.returncode != 0: + raise DicomError( + f"Conversion failed for series {volume.series_instance_uid}: {result.stderr}" + ) + + volume.converted_file = str(output_path / f"{series_name}.nii.gz") + volume.status = MassTransferVolume.Status.CONVERTED + volume.save() + + self._cleanup_export(volume) + + def _cleanup_export(self, volume: MassTransferVolume) -> None: + export_folder = volume.exported_folder + if not export_folder or export_folder.endswith(" (cleaned)"): + return + + try: + shutil.rmtree(export_folder) + except FileNotFoundError: + pass + except Exception as err: + volume.add_log(f"Cleanup failed: {err}") + volume.save() + return + + volume.exported_folder = f"{export_folder} (cleaned)" + volume.save() diff --git a/adit/mass_transfer/static/mass_transfer/mass_transfer.js b/adit/mass_transfer/static/mass_transfer/mass_transfer.js new file mode 100644 index 000000000..470f11186 --- /dev/null +++ b/adit/mass_transfer/static/mass_transfer/mass_transfer.js @@ -0,0 +1,32 @@ +"use strict"; + +// Keep those variables in sync with the ones in the Django view +const MASS_TRANSFER_SOURCE = "mass_transfer_source"; +const MASS_TRANSFER_DESTINATION = "mass_transfer_destination"; +const MASS_TRANSFER_GRANULARITY = "mass_transfer_granularity"; +const MASS_TRANSFER_SEND_FINISHED_MAIL = "mass_transfer_send_finished_mail"; + +function massTransferJobForm() { + return { + onSourceChange: function (ev) { + updatePreferences("mass-transfer", { + [MASS_TRANSFER_SOURCE]: ev.target.value, + }); + }, + onDestinationChange: function (ev) { + updatePreferences("mass-transfer", { + [MASS_TRANSFER_DESTINATION]: ev.target.value, + }); + }, + onGranularityChange: function (ev) { + updatePreferences("mass-transfer", { + [MASS_TRANSFER_GRANULARITY]: ev.target.value, + }); + }, + onSendFinishedMailChange: function (ev) { + updatePreferences("mass-transfer", { + [MASS_TRANSFER_SEND_FINISHED_MAIL]: ev.target.checked, + }); + }, + }; +} diff --git a/adit/mass_transfer/tables.py b/adit/mass_transfer/tables.py new file mode 100644 index 000000000..f2ddb0440 --- /dev/null +++ b/adit/mass_transfer/tables.py @@ -0,0 +1,13 @@ +from adit.core.tables import DicomTaskTable, TransferJobTable + +from .models import MassTransferJob, MassTransferTask + + +class MassTransferJobTable(TransferJobTable): + class Meta(TransferJobTable.Meta): + model = MassTransferJob + + +class MassTransferTaskTable(DicomTaskTable): + class Meta(DicomTaskTable.Meta): + model = MassTransferTask diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_confirm_delete.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_confirm_delete.html new file mode 100644 index 000000000..1b1a8a434 --- /dev/null +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_confirm_delete.html @@ -0,0 +1,19 @@ +{% extends "mass_transfer/mass_transfer_layout.html" %} +{% load bootstrap_icon from common_extras %} +{% block title %} + Delete Mass Transfer Filter +{% endblock title %} +{% block heading %} + +{% endblock heading %} +{% block content %} +

Are you sure you want to delete the filter "{{ object }}"?

+
+ {% csrf_token %} + + Cancel +
+{% endblock content %} diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_form.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_form.html new file mode 100644 index 000000000..ef07b1e93 --- /dev/null +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_form.html @@ -0,0 +1,24 @@ +{% extends "mass_transfer/mass_transfer_layout.html" %} +{% load crispy from crispy_forms_tags %} +{% load bootstrap_icon from common_extras %} +{% block title %} + Mass Transfer Filter +{% endblock title %} +{% block heading %} + + + + {% bootstrap_icon "list" %} + Filter List + + + +{% endblock heading %} +{% block content %} +

+ Text fields accept DICOM wildcards like * and ?. The institution + scope decides whether the institution name is matched at the study level or at the series + level. +

+ {% crispy form %} +{% endblock content %} diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_list.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_list.html new file mode 100644 index 000000000..eca7a5b26 --- /dev/null +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_filter_list.html @@ -0,0 +1,64 @@ +{% extends "mass_transfer/mass_transfer_layout.html" %} +{% load bootstrap_icon from common_extras %} +{% block title %} + Mass Transfer Filters +{% endblock title %} +{% block heading %} + + + + {% bootstrap_icon "plus-lg" %} + New Filter + + + +{% endblock heading %} +{% block content %} + + + + + + + + + + + + + + + {% for f in filters %} + + + + + + + + + + + {% empty %} + + + + {% endfor %} + +
NameModalityInstitutionInstitution ScopeStudy DescriptionSeries DescriptionSeries NumberActions
{{ f.name|default:"—" }}{{ f.modality|default:"—" }}{{ f.institution_name|default:"—" }} + {% if f.institution_name %} + {{ f.apply_institution_on_study|yesno:"Study,Series" }} + {% else %} + — + {% endif %} + {{ f.study_description|default:"—" }}{{ f.series_description|default:"—" }}{{ f.series_number|default:"—" }} + + {% bootstrap_icon "pencil" %} + + + {% bootstrap_icon "trash" %} + +
No filters defined.
+{% endblock content %} diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html new file mode 100644 index 000000000..6d5e8f7bc --- /dev/null +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html @@ -0,0 +1,68 @@ +{% extends "mass_transfer/mass_transfer_layout.html" %} +{% load crispy from crispy_forms_tags %} +{% load render_table from django_tables2 %} +{% load bootstrap_icon from common_extras %} +{% load dicom_job_status_css_class from core_extras %} +{% load job_control_panel from mass_transfer_extras %} +{% block title %} + Mass Transfer Job +{% endblock title %} +{% block heading %} + + + + {% bootstrap_icon "list" %} + Job List + + + +{% endblock heading %} +{% block content %} +
+
Job ID
+
{{ job.id }}
+
Created At
+
{{ job.created }}
+ {% if user.is_staff %} +
Created By
+
{{ job.owner }}
+ {% endif %} +
Source
+
{{ job.source }}
+
Destination
+
{{ job.destination }}
+
Start Date
+
{{ job.start_date }}
+
End Date
+
{{ job.end_date }}
+
Granularity
+
{{ job.get_partition_granularity_display }}
+
Filters
+
+ {% if job.filters.all %} + {% for f in job.filters.all %} +
{{ f }}
+ {% empty %} + — + {% endfor %} + {% endif %} +
+
Processed Tasks
+
{{ job.processed_tasks.count }} of {{ job.tasks.count }}
+
Status
+
+ + {{ job.get_status_display }} + +
+
Message
+
{{ job.message|default:"—" }}
+
+ + + {% crispy filter.form %} + + + {% render_table table %} + {% job_control_panel %} +{% endblock content %} diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html new file mode 100644 index 000000000..785686e79 --- /dev/null +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html @@ -0,0 +1,25 @@ +{% extends "mass_transfer/mass_transfer_layout.html" %} +{% load crispy from crispy_forms_tags %} +{% load bootstrap_icon from common_extras %} +{% block title %} + New Mass Transfer Job +{% endblock title %} +{% block heading %} + + + + Manage Filters + {% bootstrap_icon "funnel" %} + + + + + {% bootstrap_icon "list" %} + Previous Jobs + + + +{% endblock heading %} +{% block content %} + {% crispy form %} +{% endblock content %} diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_list.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_list.html new file mode 100644 index 000000000..897209fee --- /dev/null +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_list.html @@ -0,0 +1,18 @@ +{% extends "mass_transfer/mass_transfer_layout.html" %} +{% load bootstrap_icon from common_extras %} +{% block title %} + Mass Transfer Jobs +{% endblock title %} +{% block heading %} + + + + {% bootstrap_icon "plus-lg" %} + Create New Job + + + +{% endblock heading %} +{% block content %} + {% include "core/_dicom_job_table.html" %} +{% endblock content %} diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_layout.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_layout.html new file mode 100644 index 000000000..3bff24fe8 --- /dev/null +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_layout.html @@ -0,0 +1,6 @@ +{% extends "core/core_layout.html" %} +{% load static from static %} +{% block script %} + {{ block.super }} + +{% endblock script %} diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html new file mode 100644 index 000000000..7fc7ad869 --- /dev/null +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html @@ -0,0 +1,30 @@ +{% extends "mass_transfer/mass_transfer_layout.html" %} +{% load dicom_task_status_css_class from core_extras %} +{% load task_control_panel from mass_transfer_extras %} +{% block title %} + Mass Transfer Task +{% endblock title %} +{% block heading %} + +{% endblock heading %} +{% block content %} +
+
Task ID
+
{{ task.id }}
+
Partition
+
{{ task.partition_key }}
+
Window
+
{{ task.partition_start }} – {{ task.partition_end }}
+
Status
+
+ + {{ task.get_status_display }} + +
+
Message
+
{{ task.message|default:"—" }}
+
Log
+
{{ task.log|default:"" }}
+
+ {% task_control_panel %} +{% endblock content %} diff --git a/adit/mass_transfer/templatetags/__init__.py b/adit/mass_transfer/templatetags/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/adit/mass_transfer/templatetags/mass_transfer_extras.py b/adit/mass_transfer/templatetags/mass_transfer_extras.py new file mode 100644 index 000000000..559e93832 --- /dev/null +++ b/adit/mass_transfer/templatetags/mass_transfer_extras.py @@ -0,0 +1,30 @@ +from typing import Any + +from django.template import Library + +register = Library() + + +@register.inclusion_tag("core/_job_detail_control_panel.html", takes_context=True) +def job_control_panel(context: dict[str, Any]) -> dict[str, Any]: + return { + "job_delete_url": "mass_transfer_job_delete", + "job_verify_url": "mass_transfer_job_verify", + "job_cancel_url": "mass_transfer_job_cancel", + "job_resume_url": "mass_transfer_job_resume", + "job_retry_url": "mass_transfer_job_retry", + "job_restart_url": "mass_transfer_job_restart", + "user": context["user"], + "job": context["job"], + } + + +@register.inclusion_tag("core/_task_detail_control_panel.html", takes_context=True) +def task_control_panel(context: dict[str, Any]) -> dict[str, Any]: + return { + "task_delete_url": "mass_transfer_task_delete", + "task_reset_url": "mass_transfer_task_reset", + "task_kill_url": "mass_transfer_task_kill", + "user": context["user"], + "task": context["task"], + } diff --git a/adit/mass_transfer/tests/__init__.py b/adit/mass_transfer/tests/__init__.py new file mode 100644 index 000000000..9758d0118 --- /dev/null +++ b/adit/mass_transfer/tests/__init__.py @@ -0,0 +1 @@ +# Tests for mass_transfer app. diff --git a/adit/mass_transfer/tests/test_partitions.py b/adit/mass_transfer/tests/test_partitions.py new file mode 100644 index 000000000..f82198c55 --- /dev/null +++ b/adit/mass_transfer/tests/test_partitions.py @@ -0,0 +1,26 @@ +from datetime import date + +from adit.mass_transfer.utils.partitions import build_partitions + + +def test_build_partitions_daily(): + windows = build_partitions(date(2024, 1, 1), date(2024, 1, 3), "daily") + + assert len(windows) == 3 + assert [window.key for window in windows] == ["20240101", "20240102", "20240103"] + assert windows[0].start.hour == 0 + assert windows[0].start.minute == 0 + assert windows[0].end.hour == 23 + assert windows[0].end.minute == 59 + assert windows[0].end.second == 59 + + +def test_build_partitions_weekly(): + windows = build_partitions(date(2024, 1, 1), date(2024, 1, 10), "weekly") + + assert len(windows) == 2 + assert [window.key for window in windows] == ["20240101-20240107", "20240108-20240110"] + assert windows[0].start.date() == date(2024, 1, 1) + assert windows[0].end.date() == date(2024, 1, 7) + assert windows[1].start.date() == date(2024, 1, 8) + assert windows[1].end.date() == date(2024, 1, 10) diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py new file mode 100644 index 000000000..7638d172a --- /dev/null +++ b/adit/mass_transfer/tests/test_processor.py @@ -0,0 +1,155 @@ +import uuid +from datetime import date, datetime, timedelta +from pathlib import Path + +import pytest +from adit_radis_shared.accounts.factories import UserFactory +from django.utils import timezone +from pytest_mock import MockerFixture + +from adit.core.errors import DicomError +from adit.core.factories import DicomFolderFactory, DicomServerFactory +from adit.core.utils.dicom_operator import DicomOperator +from adit.mass_transfer.models import ( + MassTransferFilter, + MassTransferJob, + MassTransferSettings, + MassTransferTask, + MassTransferVolume, +) +from adit.mass_transfer.processors import MassTransferTaskProcessor, _volume_output_path + + +@pytest.mark.django_db +def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture, settings): + settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 1 + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create() + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + ) + mf = MassTransferFilter.objects.create(modality="CT") + job.filters.add(mf) + + start = timezone.now() + end = start + timedelta(minutes=10) + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=start, + partition_end=end, + partition_key="20240101", + ) + + processor = MassTransferTaskProcessor(task) + operator = mocker.create_autospec(DicomOperator) + operator.find_studies.return_value = [object(), object()] + + with pytest.raises(DicomError, match="Time window too small"): + processor._find_studies(operator, mf, start, end) + + +@pytest.mark.django_db +def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp_path: Path): + settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + ) + mf = MassTransferFilter.objects.create(modality="CT") + job.filters.add(mf) + + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + volume1 = MassTransferVolume.objects.create( + job=job, + partition_key="20240101", + study_instance_uid="study-1", + series_instance_uid="series-1", + modality="CT", + study_description="", + series_description="A", + series_number=1, + study_datetime=timezone.now(), + ) + volume2 = MassTransferVolume.objects.create( + job=job, + partition_key="20240101", + study_instance_uid="study-1", + series_instance_uid="series-2", + modality="CT", + study_description="", + series_description="B", + series_number=2, + study_datetime=timezone.now(), + ) + volume3 = MassTransferVolume.objects.create( + job=job, + partition_key="20240101", + study_instance_uid="study-2", + series_instance_uid="series-3", + modality="CT", + study_description="", + series_description="C", + series_number=3, + study_datetime=timezone.now(), + ) + + processor = MassTransferTaskProcessor(task) + mocker.patch.object( + processor, + "_find_volumes", + return_value=[volume1, volume2, volume3], + ) + + export_calls: list[tuple[str, str]] = [] + + def fake_export(_, volume, __, pseudonym): + export_calls.append((volume.series_instance_uid, pseudonym)) + + mocker.patch.object(processor, "_export_volume", side_effect=fake_export) + mocker.patch.object(processor, "_convert_volume", return_value=None) + + uuid_side_effect = [ + uuid.UUID(int=1), + uuid.UUID(int=2), + ] + mocker.patch("adit.mass_transfer.processors.uuid.uuid4", side_effect=uuid_side_effect) + + result = processor.process() + + pseudonyms_by_series = {series_uid: pseudonym for series_uid, pseudonym in export_calls} + assert pseudonyms_by_series["series-1"] == pseudonyms_by_series["series-2"] + assert pseudonyms_by_series["series-1"] != pseudonyms_by_series["series-3"] + assert result["status"] == MassTransferTask.Status.SUCCESS + + +def test_volume_output_path_uses_year_month_and_pseudonym(): + base_dir = Path("/tmp/base") + study_dt = datetime(2024, 2, 15, 10, 30) + path = _volume_output_path(base_dir, study_dt, "pseudo", "1-Head") + + assert path == base_dir / "202402" / "pseudo" / "1-Head" diff --git a/adit/mass_transfer/urls.py b/adit/mass_transfer/urls.py new file mode 100644 index 000000000..45b29ddd7 --- /dev/null +++ b/adit/mass_transfer/urls.py @@ -0,0 +1,95 @@ +from django.urls import path + +from .views import ( + MassTransferFilterCreateView, + MassTransferFilterDeleteView, + MassTransferFilterListView, + MassTransferFilterUpdateView, + MassTransferJobCancelView, + MassTransferJobCreateView, + MassTransferJobDeleteView, + MassTransferJobDetailView, + MassTransferJobListView, + MassTransferJobRestartView, + MassTransferJobResumeView, + MassTransferJobRetryView, + MassTransferJobVerifyView, + MassTransferTaskDeleteView, + MassTransferTaskDetailView, + MassTransferTaskKillView, + MassTransferTaskResetView, + MassTransferUpdatePreferencesView, +) + +urlpatterns = [ + path("filters/", MassTransferFilterListView.as_view(), name="mass_transfer_filter_list"), + path( + "filters/new/", + MassTransferFilterCreateView.as_view(), + name="mass_transfer_filter_create", + ), + path( + "filters//edit/", + MassTransferFilterUpdateView.as_view(), + name="mass_transfer_filter_update", + ), + path( + "filters//delete/", + MassTransferFilterDeleteView.as_view(), + name="mass_transfer_filter_delete", + ), + path( + "preferences/", + MassTransferUpdatePreferencesView.as_view(), + name="mass_transfer_update_preferences", + ), + path("jobs/", MassTransferJobListView.as_view(), name="mass_transfer_job_list"), + path("jobs/new/", MassTransferJobCreateView.as_view(), name="mass_transfer_job_create"), + path("jobs//", MassTransferJobDetailView.as_view(), name="mass_transfer_job_detail"), + path( + "jobs//delete/", + MassTransferJobDeleteView.as_view(), + name="mass_transfer_job_delete", + ), + path( + "jobs//verify/", + MassTransferJobVerifyView.as_view(), + name="mass_transfer_job_verify", + ), + path( + "jobs//cancel/", + MassTransferJobCancelView.as_view(), + name="mass_transfer_job_cancel", + ), + path( + "jobs//resume/", + MassTransferJobResumeView.as_view(), + name="mass_transfer_job_resume", + ), + path( + "jobs//retry/", + MassTransferJobRetryView.as_view(), + name="mass_transfer_job_retry", + ), + path( + "jobs//restart/", + MassTransferJobRestartView.as_view(), + name="mass_transfer_job_restart", + ), + path("tasks//", MassTransferTaskDetailView.as_view(), name="mass_transfer_task_detail"), + path( + "tasks//delete/", + MassTransferTaskDeleteView.as_view(), + name="mass_transfer_task_delete", + ), + path( + "tasks//reset/", + MassTransferTaskResetView.as_view(), + name="mass_transfer_task_reset", + ), + path( + "tasks//kill/", + MassTransferTaskKillView.as_view(), + name="mass_transfer_task_kill", + ), +] diff --git a/adit/mass_transfer/utils/__init__.py b/adit/mass_transfer/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/adit/mass_transfer/utils/partitions.py b/adit/mass_transfer/utils/partitions.py new file mode 100644 index 000000000..97292bca5 --- /dev/null +++ b/adit/mass_transfer/utils/partitions.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import date, datetime, time, timedelta + +from django.utils import timezone + + +@dataclass(frozen=True) +class PartitionWindow: + start: datetime + end: datetime + key: str + + +def build_partitions( + start_date: date, + end_date: date, + granularity: str, +) -> list[PartitionWindow]: + if end_date < start_date: + raise ValueError("End date must be on or after the start date.") + + if granularity not in {"daily", "weekly"}: + raise ValueError(f"Invalid granularity: {granularity}") + + if granularity == "daily": + step = timedelta(days=1) + else: + step = timedelta(days=7) + + tz = timezone.get_current_timezone() + windows: list[PartitionWindow] = [] + + current = start_date + while current <= end_date: + window_end_date = min(current + step - timedelta(days=1), end_date) + + start_dt = timezone.make_aware(datetime.combine(current, time(0, 0, 0)), tz) + end_dt = timezone.make_aware(datetime.combine(window_end_date, time(23, 59, 59)), tz) + + if current == window_end_date: + key = current.strftime("%Y%m%d") + else: + key = f"{current:%Y%m%d}-{window_end_date:%Y%m%d}" + + windows.append(PartitionWindow(start=start_dt, end=end_dt, key=key)) + current = window_end_date + timedelta(days=1) + + return windows diff --git a/adit/mass_transfer/views.py b/adit/mass_transfer/views.py new file mode 100644 index 000000000..fc00bcb36 --- /dev/null +++ b/adit/mass_transfer/views.py @@ -0,0 +1,160 @@ +from typing import Any, cast + +from adit_radis_shared.common.views import BaseUpdatePreferencesView +from django.conf import settings +from django.contrib.auth.mixins import LoginRequiredMixin +from django.urls import reverse_lazy +from django.views.generic import CreateView, DeleteView, ListView, UpdateView + +from adit.core.views import ( + DicomJobCancelView, + DicomJobCreateView, + DicomJobDeleteView, + DicomJobDetailView, + DicomJobRestartView, + DicomJobResumeView, + DicomJobRetryView, + DicomJobVerifyView, + DicomTaskDeleteView, + DicomTaskDetailView, + DicomTaskKillView, + DicomTaskResetView, + TransferJobListView, +) + +from .filters import MassTransferJobFilter, MassTransferTaskFilter +from .forms import MassTransferFilterForm, MassTransferJobForm +from .mixins import MassTransferLockedMixin +from .models import MassTransferFilter, MassTransferJob, MassTransferTask +from .tables import MassTransferJobTable, MassTransferTaskTable + +MASS_TRANSFER_SOURCE = "mass_transfer_source" +MASS_TRANSFER_DESTINATION = "mass_transfer_destination" +MASS_TRANSFER_GRANULARITY = "mass_transfer_granularity" +MASS_TRANSFER_SEND_FINISHED_MAIL = "mass_transfer_send_finished_mail" + + +class MassTransferUpdatePreferencesView(MassTransferLockedMixin, BaseUpdatePreferencesView): + allowed_keys = [ + MASS_TRANSFER_SOURCE, + MASS_TRANSFER_DESTINATION, + MASS_TRANSFER_GRANULARITY, + MASS_TRANSFER_SEND_FINISHED_MAIL, + ] + + +class MassTransferJobListView(MassTransferLockedMixin, TransferJobListView): + model = MassTransferJob + table_class = MassTransferJobTable + filterset_class = MassTransferJobFilter + template_name = "mass_transfer/mass_transfer_job_list.html" + + +class MassTransferJobCreateView(MassTransferLockedMixin, DicomJobCreateView): + model = MassTransferJob + form_class = MassTransferJobForm + template_name = "mass_transfer/mass_transfer_job_form.html" + permission_required = "mass_transfer.add_masstransferjob" + object: MassTransferJob + + def get_initial(self) -> dict[str, Any]: + initial = super().get_initial() + preferences: dict[str, Any] = self.request.user.preferences + + source = preferences.get(MASS_TRANSFER_SOURCE) + if source is not None: + initial["source"] = source + + destination = preferences.get(MASS_TRANSFER_DESTINATION) + if destination is not None: + initial["destination"] = destination + + granularity = preferences.get(MASS_TRANSFER_GRANULARITY) + if granularity is not None: + initial["partition_granularity"] = granularity + + send_finished_mail = preferences.get(MASS_TRANSFER_SEND_FINISHED_MAIL) + if send_finished_mail is not None: + initial["send_finished_mail"] = send_finished_mail + + return initial + + def form_valid(self, form): + return super().form_valid(form, settings.START_MASS_TRANSFER_UNVERIFIED) + + +class MassTransferJobDetailView(MassTransferLockedMixin, DicomJobDetailView): + table_class = MassTransferTaskTable + filterset_class = MassTransferTaskFilter + model = MassTransferJob + context_object_name = "job" + template_name = "mass_transfer/mass_transfer_job_detail.html" + + +class MassTransferJobDeleteView(MassTransferLockedMixin, DicomJobDeleteView): + model = MassTransferJob + success_url = cast(str, reverse_lazy("mass_transfer_job_list")) + + +class MassTransferJobVerifyView(MassTransferLockedMixin, DicomJobVerifyView): + model = MassTransferJob + + +class MassTransferJobCancelView(MassTransferLockedMixin, DicomJobCancelView): + model = MassTransferJob + + +class MassTransferJobResumeView(MassTransferLockedMixin, DicomJobResumeView): + model = MassTransferJob + + +class MassTransferJobRetryView(MassTransferLockedMixin, DicomJobRetryView): + model = MassTransferJob + + +class MassTransferJobRestartView(MassTransferLockedMixin, DicomJobRestartView): + model = MassTransferJob + + +class MassTransferTaskDetailView(MassTransferLockedMixin, DicomTaskDetailView): + model = MassTransferTask + job_url_name = "mass_transfer_job_detail" + template_name = "mass_transfer/mass_transfer_task_detail.html" + + +class MassTransferTaskDeleteView(MassTransferLockedMixin, DicomTaskDeleteView): + model = MassTransferTask + + +class MassTransferTaskResetView(MassTransferLockedMixin, DicomTaskResetView): + model = MassTransferTask + + +class MassTransferTaskKillView(MassTransferLockedMixin, DicomTaskKillView): + model = MassTransferTask + + +class MassTransferFilterListView(LoginRequiredMixin, MassTransferLockedMixin, ListView): + model = MassTransferFilter + template_name = "mass_transfer/mass_transfer_filter_list.html" + context_object_name = "filters" + + +class MassTransferFilterCreateView(LoginRequiredMixin, MassTransferLockedMixin, CreateView): + model = MassTransferFilter + form_class = MassTransferFilterForm + template_name = "mass_transfer/mass_transfer_filter_form.html" + success_url = cast(str, reverse_lazy("mass_transfer_filter_list")) + + +class MassTransferFilterUpdateView(LoginRequiredMixin, MassTransferLockedMixin, UpdateView): + model = MassTransferFilter + form_class = MassTransferFilterForm + template_name = "mass_transfer/mass_transfer_filter_form.html" + success_url = cast(str, reverse_lazy("mass_transfer_filter_list")) + + +class MassTransferFilterDeleteView(LoginRequiredMixin, MassTransferLockedMixin, DeleteView): + model = MassTransferFilter + template_name = "mass_transfer/mass_transfer_filter_confirm_delete.html" + success_url = cast(str, reverse_lazy("mass_transfer_filter_list")) diff --git a/adit/settings/base.py b/adit/settings/base.py index f71f2b4e3..0d3ff53db 100644 --- a/adit/settings/base.py +++ b/adit/settings/base.py @@ -81,6 +81,7 @@ "adit.selective_transfer.apps.SelectiveTransferConfig", "adit.batch_query.apps.BatchQueryConfig", "adit.batch_transfer.apps.BatchTransferConfig", + "adit.mass_transfer.apps.MassTransferConfig", "adit.upload.apps.UploadConfig", "adit.dicom_explorer.apps.DicomExplorerConfig", "adit.dicom_web.apps.DicomWebConfig", @@ -353,6 +354,7 @@ START_SELECTIVE_TRANSFER_UNVERIFIED = True START_BATCH_QUERY_UNVERIFIED = True START_BATCH_TRANSFER_UNVERIFIED = True +START_MASS_TRANSFER_UNVERIFIED = True # Priorities of dicom tasks # Selective transfers have the highest priority as those are @@ -364,6 +366,8 @@ BATCH_TRANSFER_URGENT_PRIORITY = 6 BATCH_QUERY_DEFAULT_PRIORITY = 3 BATCH_QUERY_URGENT_PRIORITY = 7 +MASS_TRANSFER_DEFAULT_PRIORITY = 1 +MASS_TRANSFER_URGENT_PRIORITY = 5 # The priority for stalled jobs that are retried. STALLED_JOBS_RETRY_PRIORITY = 10 @@ -379,6 +383,15 @@ # The maximum number of results (patients or studies) in dicom_explorer DICOM_EXPLORER_RESULT_LIMIT = 101 +# Maximum number of C-FIND results for mass transfer before splitting time windows +MASS_TRANSFER_MAX_SEARCH_RESULTS = env.int("MASS_TRANSFER_MAX_SEARCH_RESULTS", default=200) + +# Base directory for temporary DICOM exports in mass transfer +MASS_TRANSFER_EXPORT_BASE_DIR = env.str( + "MASS_TRANSFER_EXPORT_BASE_DIR", + default="/mnt/mass_transfer_exports", +) + # The timeout in dicom_explorer a DICOM server must respond DICOM_EXPLORER_RESPONSE_TIMEOUT = 3 # seconds diff --git a/adit/urls.py b/adit/urls.py index 5f833509c..5d1b67ab7 100644 --- a/adit/urls.py +++ b/adit/urls.py @@ -27,6 +27,7 @@ path("selective-transfer/", include("adit.selective_transfer.urls")), path("batch-query/", include("adit.batch_query.urls")), path("batch-transfer/", include("adit.batch_transfer.urls")), + path("mass-transfer/", include("adit.mass_transfer.urls")), path("upload/", include("adit.upload.urls")), path("dicom-explorer/", include("adit.dicom_explorer.urls")), path("token-authentication/", include("adit_radis_shared.token_authentication.urls")), diff --git a/docker-compose.base.yml b/docker-compose.base.yml index 2a122f109..5c95ec02c 100644 --- a/docker-compose.base.yml +++ b/docker-compose.base.yml @@ -15,6 +15,8 @@ x-app: &default-app DJANGO_SECRET_KEY: ${DJANGO_SECRET_KEY:?} DJANGO_SERVER_EMAIL: ${DJANGO_SERVER_EMAIL:?} EXCLUDE_MODALITIES: ${EXCLUDE_MODALITIES:-} + MASS_TRANSFER_EXPORT_BASE_DIR: ${MASS_TRANSFER_EXPORT_BASE_DIR:-/mnt/mass_transfer_exports} + MASS_TRANSFER_MAX_SEARCH_RESULTS: ${MASS_TRANSFER_MAX_SEARCH_RESULTS:-200} IS_DOCKER_CONTAINER: 1 FILE_TRANSMIT_HOST: receiver.local FILE_TRANSMIT_PORT: 14638 diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index beb0511d7..a36da1aa0 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -64,6 +64,15 @@ services: ./manage.py bg_worker -l debug -q dicom --autoreload " + mass_transfer_worker: + <<: *default-app + image: adit_dev-mass_transfer_worker:latest + command: > + bash -c " + wait-for-it -s postgres.local:5432 -t 60 && + ./manage.py bg_worker -l debug -q mass_transfer --autoreload + " + receiver: <<: *default-app ports: diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index a1ed27bcb..fdc2acbe7 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -78,6 +78,17 @@ services: <<: *deploy replicas: ${DICOM_WORKER_REPLICAS:-3} + mass_transfer_worker: + <<: *default-app + command: > + bash -c " + wait-for-it -s postgres.local:5432 -t 60 && + ./manage.py bg_worker -q mass_transfer + " + deploy: + <<: *deploy + replicas: ${MASS_TRANSFER_WORKER_REPLICAS:-1} + receiver: <<: *default-app ports: diff --git a/example.env b/example.env index e17158350..78d0498a4 100644 --- a/example.env +++ b/example.env @@ -89,9 +89,16 @@ RECEIVER_AE_TITLE="ADIT1DEV" # This does not affect downloads using the ADIT client. EXCLUDE_MODALITIES="PR,SR" +# Mass transfer settings +# Maximum number of C-FIND results before a time window is split +MASS_TRANSFER_MAX_SEARCH_RESULTS=200 +# Base directory for temporary DICOM exports during mass transfer +MASS_TRANSFER_EXPORT_BASE_DIR="/mnt/mass_transfer_exports" + # Replicas of the services that can be scaled (production only). WEB_REPLICAS=5 DICOM_WORKER_REPLICAS=3 +MASS_TRANSFER_WORKER_REPLICAS=1 # The directory where download folders are mounted. MOUNT_DIR="/mnt" From ea4d78199a5be3b4ade04be05998487d4db0c33f Mon Sep 17 00:00:00 2001 From: Ritwik Date: Tue, 3 Feb 2026 15:37:16 +0100 Subject: [PATCH 002/103] Add mass transfer opt-out pseudonymization --- adit/mass_transfer/forms.py | 6 ++ .../0002_masstransferjob_pseudonymize.py | 18 +++++ adit/mass_transfer/models.py | 1 + adit/mass_transfer/processors.py | 23 ++++--- .../mass_transfer_job_detail.html | 2 + adit/mass_transfer/tests/test_processor.py | 68 ++++++++++++++++++- 6 files changed, 107 insertions(+), 11 deletions(-) create mode 100644 adit/mass_transfer/migrations/0002_masstransferjob_pseudonymize.py diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 1c09cd3f9..a57c76190 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -56,16 +56,22 @@ class Meta: "end_date", "partition_granularity", "filters", + "pseudonymize", "send_finished_mail", ) labels = { "start_date": "Start date", "end_date": "End date", "partition_granularity": "Partition granularity", + "pseudonymize": "Pseudonymize data", "send_finished_mail": "Send Email when job is finished", } help_texts = { "partition_granularity": "Daily or weekly partition windows.", + "pseudonymize": ( + "When disabled, patient identifiers are preserved and output folders use " + "Patient ID." + ), } def __init__(self, *args, **kwargs): diff --git a/adit/mass_transfer/migrations/0002_masstransferjob_pseudonymize.py b/adit/mass_transfer/migrations/0002_masstransferjob_pseudonymize.py new file mode 100644 index 000000000..e24dfd679 --- /dev/null +++ b/adit/mass_transfer/migrations/0002_masstransferjob_pseudonymize.py @@ -0,0 +1,18 @@ +# Generated by Codex on 2026-02-03 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("mass_transfer", "0001_initial"), + ] + + operations = [ + migrations.AddField( + model_name="masstransferjob", + name="pseudonymize", + field=models.BooleanField(default=True), + ), + ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index 73a209f1d..556008ed1 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -62,6 +62,7 @@ class PartitionGranularity(models.TextChoices): choices=PartitionGranularity.choices, default=PartitionGranularity.DAILY, ) + pseudonymize = models.BooleanField(default=True) filters = models.ManyToManyField(MassTransferFilter, related_name="jobs", blank=True) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 6fa159b67..249cff028 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -80,21 +80,21 @@ def _destination_base_dir(node: DicomNode) -> Path: def _volume_export_path( base_dir: Path, study_dt: datetime, - pseudonym: str, + subject_id: str, series_name: str, ) -> Path: year_month = study_dt.strftime("%Y%m") - return base_dir / year_month / pseudonym / series_name + return base_dir / year_month / subject_id / series_name def _volume_output_path( base_dir: Path, study_dt: datetime, - pseudonym: str, + subject_id: str, series_name: str, ) -> Path: year_month = study_dt.strftime("%Y%m") - return base_dir / year_month / pseudonym / series_name + return base_dir / year_month / subject_id / series_name class MassTransferTaskProcessor(DicomTaskProcessor): @@ -146,8 +146,13 @@ def process(self): volumes_by_study.setdefault(volume.study_instance_uid, []).append(volume) for _, study_volumes in volumes_by_study.items(): - existing_pseudonym = next((v.pseudonym for v in study_volumes if v.pseudonym), None) - pseudonym = existing_pseudonym or uuid.uuid4().hex + pseudonym = "" + if job.pseudonymize: + existing_pseudonym = next( + (v.pseudonym for v in study_volumes if v.pseudonym), + None, + ) + pseudonym = existing_pseudonym or uuid.uuid4().hex for volume in study_volumes: if volume.status == MassTransferVolume.Status.CONVERTED: @@ -370,7 +375,8 @@ def _export_volume( volume.series_instance_uid, ) - export_path = _volume_export_path(export_base, study_dt, pseudonym, series_name) + subject_id = sanitize_filename(pseudonym or volume.patient_id) + export_path = _volume_export_path(export_base, study_dt, subject_id, series_name) export_path.mkdir(parents=True, exist_ok=True) volume.exported_folder = str(export_path) @@ -414,7 +420,8 @@ def _convert_volume( volume.series_instance_uid, ) - output_path = _volume_output_path(output_base, study_dt, pseudonym, series_name) + subject_id = sanitize_filename(pseudonym or volume.patient_id) + output_path = _volume_output_path(output_base, study_dt, subject_id, series_name) output_path.mkdir(parents=True, exist_ok=True) cmd = [ diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html index 6d5e8f7bc..42291778d 100644 --- a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html @@ -37,6 +37,8 @@
{{ job.end_date }}
Granularity
{{ job.get_partition_granularity_display }}
+
Pseudonymization
+
{{ job.pseudonymize|yesno:"Enabled,Disabled" }}
Filters
{% if job.filters.all %} diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 7638d172a..75545d1fc 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -147,9 +147,71 @@ def fake_export(_, volume, __, pseudonym): assert result["status"] == MassTransferTask.Status.SUCCESS -def test_volume_output_path_uses_year_month_and_pseudonym(): +@pytest.mark.django_db +def test_process_opt_out_skips_pseudonymization( + mocker: MockerFixture, + settings, + tmp_path: Path, +): + settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + pseudonymize=False, + ) + mf = MassTransferFilter.objects.create(modality="CT") + job.filters.add(mf) + + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + volume = MassTransferVolume.objects.create( + job=job, + partition_key="20240101", + patient_id="PATIENT-1", + study_instance_uid="study-1", + series_instance_uid="series-1", + modality="CT", + study_description="", + series_description="A", + series_number=1, + study_datetime=timezone.now(), + ) + + processor = MassTransferTaskProcessor(task) + mocker.patch.object(processor, "_find_volumes", return_value=[volume]) + + export_calls: list[str] = [] + + def fake_export(_, __, ___, pseudonym): + export_calls.append(pseudonym) + + mocker.patch.object(processor, "_export_volume", side_effect=fake_export) + mocker.patch.object(processor, "_convert_volume", return_value=None) + + result = processor.process() + + assert export_calls == [""] + assert result["status"] == MassTransferTask.Status.SUCCESS + + +def test_volume_output_path_uses_year_month_and_subject_id(): base_dir = Path("/tmp/base") study_dt = datetime(2024, 2, 15, 10, 30) - path = _volume_output_path(base_dir, study_dt, "pseudo", "1-Head") + path = _volume_output_path(base_dir, study_dt, "subject", "1-Head") - assert path == base_dir / "202402" / "pseudo" / "1-Head" + assert path == base_dir / "202402" / "subject" / "1-Head" From 1a283683d4f75fc6d362bf60ed7e9e474c9b1392 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Tue, 3 Feb 2026 15:52:45 +0100 Subject: [PATCH 003/103] Ensure mass transfer exports are cleaned on failure --- adit/core/tasks.py | 44 +++++++++++++++++ adit/mass_transfer/tests/test_cleanup.py | 63 ++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 adit/mass_transfer/tests/test_cleanup.py diff --git a/adit/core/tasks.py b/adit/core/tasks.py index 6b3330a49..447b60bcb 100644 --- a/adit/core/tasks.py +++ b/adit/core/tasks.py @@ -1,7 +1,9 @@ import logging +import shutil import subprocess import traceback from concurrent import futures +from pathlib import Path from time import sleep from typing import cast @@ -60,6 +62,45 @@ def backup_db(*args, **kwargs): ) +def _cleanup_mass_transfer_exports(dicom_task: DicomTask) -> None: + if dicom_task._meta.app_label != "mass_transfer": + return + + partition_key = getattr(dicom_task, "partition_key", None) + if not partition_key: + return + + try: + from adit.mass_transfer.models import MassTransferVolume + except Exception: + return + + volumes = MassTransferVolume.objects.filter( + job_id=dicom_task.job_id, + partition_key=partition_key, + ).exclude(exported_folder="") + + for volume in volumes: + if volume.status == MassTransferVolume.Status.CONVERTED: + continue + + export_folder = volume.exported_folder + if export_folder: + try: + shutil.rmtree(Path(export_folder)) + except FileNotFoundError: + pass + except Exception as err: + volume.add_log(f"Cleanup failed: {err}") + volume.save() + continue + + volume.exported_folder = "" + volume.status = MassTransferVolume.Status.ERROR + volume.add_log("Export cleaned up after task failure.") + volume.save() + + def _run_dicom_task(context: JobContext, model_label: str, task_id: int): assert context.job @@ -111,6 +152,7 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: dicom_task.message = "Task was aborted due to timeout." dicom_task.status = DicomTask.Status.FAILURE ensure_db_connection() + _cleanup_mass_transfer_exports(dicom_task) except RetriableDicomError as err: logger.exception("Retriable error occurred during %s.", dicom_task) @@ -129,6 +171,7 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: dicom_task.message = str(err) ensure_db_connection() + _cleanup_mass_transfer_exports(dicom_task) raise err except Exception as err: @@ -144,6 +187,7 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: dicom_task.log += traceback.format_exc() ensure_db_connection() + _cleanup_mass_transfer_exports(dicom_task) finally: dicom_task.end = timezone.now() diff --git a/adit/mass_transfer/tests/test_cleanup.py b/adit/mass_transfer/tests/test_cleanup.py new file mode 100644 index 000000000..f17a582bf --- /dev/null +++ b/adit/mass_transfer/tests/test_cleanup.py @@ -0,0 +1,63 @@ +from pathlib import Path + +import pytest +from adit_radis_shared.accounts.factories import UserFactory +from django.utils import timezone + +from adit.core.factories import DicomFolderFactory, DicomServerFactory +from adit.core.tasks import _cleanup_mass_transfer_exports +from adit.mass_transfer.models import ( + MassTransferJob, + MassTransferSettings, + MassTransferTask, + MassTransferVolume, +) + + +@pytest.mark.django_db +def test_cleanup_mass_transfer_exports_on_failure(tmp_path: Path): + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=timezone.now().date(), + end_date=timezone.now().date(), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + ) + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + export_dir = tmp_path / "exports" / "202401" / "PATIENT" / "1-Head" + export_dir.mkdir(parents=True, exist_ok=True) + + volume = MassTransferVolume.objects.create( + job=job, + partition_key="20240101", + patient_id="PATIENT", + study_instance_uid="study-1", + series_instance_uid="series-1", + modality="CT", + study_description="", + series_description="Head", + series_number=1, + study_datetime=timezone.now(), + exported_folder=str(export_dir), + status=MassTransferVolume.Status.EXPORTED, + ) + + _cleanup_mass_transfer_exports(task) + + volume.refresh_from_db() + assert not export_dir.exists() + assert volume.status == MassTransferVolume.Status.ERROR + assert volume.exported_folder == "" From 687dc298313f341501703ab8d4ff14bac78ef776 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Tue, 3 Feb 2026 16:07:17 +0100 Subject: [PATCH 004/103] Improve mass transfer filter and date inputs --- adit/mass_transfer/forms.py | 50 +++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index a57c76190..9a270caee 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -16,6 +16,46 @@ class MassTransferFilterForm(forms.ModelForm): + MODALITY_CHOICES = [ + ("", "Any modality"), + ("CT", "CT"), + ("MR", "MR"), + ("XR", "XR"), + ("US", "US"), + ("NM", "NM"), + ("PT", "PT"), + ("MG", "MG"), + ("CR", "CR"), + ("DX", "DX"), + ("RF", "RF"), + ("XA", "XA"), + ("OT", "OT"), + ("SR", "SR"), + ("PR", "PR"), + ("ECG", "ECG"), + ("SEG", "SEG"), + ("RTSTRUCT", "RTSTRUCT"), + ("RTPLAN", "RTPLAN"), + ("RTDOSE", "RTDOSE"), + ("RTIMAGE", "RTIMAGE"), + ("SM", "SM"), + ("IVUS", "IVUS"), + ("OCT", "OCT"), + ("ES", "ES"), + ("OP", "OP"), + ("IO", "IO"), + ("FA", "FA"), + ("RG", "RG"), + ("MS", "MS"), + ("DOC", "DOC"), + ] + + modality = forms.ChoiceField( + required=False, + choices=MODALITY_CHOICES, + help_text="Leave blank for any modality.", + ) + class Meta: model = MassTransferFilter fields = ( @@ -37,6 +77,12 @@ class Meta: "series_number": "Series number", } + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.helper = FormHelper(self) + self.helper.render_unmentioned_fields = True + self.helper.add_input(Submit("save", "Save Filter")) + class MassTransferJobForm(forms.ModelForm): filters = forms.ModelMultipleChoiceField( @@ -73,6 +119,10 @@ class Meta: "Patient ID." ), } + widgets = { + "start_date": forms.DateInput(attrs={"type": "date"}), + "end_date": forms.DateInput(attrs={"type": "date"}), + } def __init__(self, *args, **kwargs): self.tasks = [] From 4a5a0f1de080920a53251e59f28cf805cd3c5ad8 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Tue, 3 Feb 2026 16:24:57 +0100 Subject: [PATCH 005/103] Fix CI image build and refine mass transfer form layout --- adit/mass_transfer/forms.py | 45 ++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 9a270caee..3a8a9ab14 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -4,7 +4,7 @@ from adit_radis_shared.accounts.models import User from crispy_forms.helper import FormHelper -from crispy_forms.layout import Layout, Submit +from crispy_forms.layout import Column, Div, Field, HTML, Layout, Row, Submit from django import forms from django.core.exceptions import ValidationError @@ -146,8 +146,47 @@ def __init__(self, *args, **kwargs): ) self.helper = FormHelper(self) - self.helper.layout = Layout("source", "destination") - self.helper.render_unmentioned_fields = True + self.helper.layout = Layout( + Div( + HTML("
Transfer scope
"), + Div( + Row( + Column(Field("source"), css_class="col-md-6"), + Column(Field("destination"), css_class="col-md-6"), + css_class="g-3", + ), + Row( + Column(Field("start_date"), css_class="col-md-6"), + Column(Field("end_date"), css_class="col-md-6"), + css_class="g-3", + ), + Row( + Column(Field("partition_granularity"), css_class="col-md-6"), + Column(Field("pseudonymize"), css_class="col-md-6"), + css_class="g-3", + ), + css_class="card-body", + ), + css_class="card mb-3", + ), + Div( + HTML("
Filters
"), + Div( + Field("filters"), + css_class="card-body", + ), + css_class="card mb-3", + ), + Div( + HTML("
Notifications
"), + Div( + Field("send_finished_mail"), + css_class="card-body", + ), + css_class="card mb-3", + ), + ) + self.helper.render_unmentioned_fields = False self.helper.attrs["x-data"] = "massTransferJobForm()" self.helper.add_input(Submit("save", "Create Job")) From b5fb532d5d15a1a8342e5907d957f63cfaa58275 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Tue, 3 Feb 2026 16:30:08 +0100 Subject: [PATCH 006/103] Document dcm2niix dependency and adjust mass transfer form layout --- adit/mass_transfer/forms.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 3a8a9ab14..90bbcb842 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -165,6 +165,10 @@ def __init__(self, *args, **kwargs): Column(Field("pseudonymize"), css_class="col-md-6"), css_class="g-3", ), + Row( + Column(Field("send_finished_mail"), css_class="col-md-6"), + css_class="g-3", + ), css_class="card-body", ), css_class="card mb-3", @@ -177,14 +181,6 @@ def __init__(self, *args, **kwargs): ), css_class="card mb-3", ), - Div( - HTML("
Notifications
"), - Div( - Field("send_finished_mail"), - css_class="card-body", - ), - css_class="card mb-3", - ), ) self.helper.render_unmentioned_fields = False self.helper.attrs["x-data"] = "massTransferJobForm()" From c747e228fed191c577872e3b66cc7bb071b24abb Mon Sep 17 00:00:00 2001 From: Ritwik Date: Fri, 6 Feb 2026 13:49:45 +0100 Subject: [PATCH 007/103] Fix mass transfer study time range and worker env --- adit/mass_transfer/processors.py | 2 +- docker-compose.base.yml | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 249cff028..d92417804 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -316,7 +316,7 @@ def _find_studies( query = QueryDataset.create( StudyDate=(start.date(), end.date()), - StudyTime=(start.time(), end.time()), + StudyTime=(datetime.min.time(), datetime.max.time().replace(microsecond=0)), ) if mf.modality: diff --git a/docker-compose.base.yml b/docker-compose.base.yml index 5c95ec02c..78f14bfd5 100644 --- a/docker-compose.base.yml +++ b/docker-compose.base.yml @@ -66,6 +66,10 @@ services: <<: *default-app hostname: dicom_worker.local + mass_transfer_worker: + <<: *default-app + hostname: mass_transfer_worker.local + receiver: <<: *default-app hostname: receiver.local From 61a1b987c8f57f78b21bd323a513dc950c82a3ba Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sun, 8 Feb 2026 18:41:26 +0100 Subject: [PATCH 008/103] Route mass transfer via dicom queue and scope filters --- adit/core/tasks.py | 2 +- adit/mass_transfer/forms.py | 12 ++++++++- .../0003_masstransferfilter_owner.py | 27 +++++++++++++++++++ adit/mass_transfer/models.py | 11 ++++++-- .../static/mass_transfer/mass_transfer.css | 9 +++++++ .../mass_transfer/mass_transfer_layout.html | 6 +++++ adit/mass_transfer/tests/test_processor.py | 6 ++--- adit/mass_transfer/views.py | 13 +++++++++ docker-compose.base.yml | 4 --- docker-compose.dev.yml | 9 ------- docker-compose.prod.yml | 11 -------- example.env | 1 - 12 files changed, 79 insertions(+), 32 deletions(-) create mode 100644 adit/mass_transfer/migrations/0003_masstransferfilter_owner.py create mode 100644 adit/mass_transfer/static/mass_transfer/mass_transfer.css diff --git a/adit/core/tasks.py b/adit/core/tasks.py index 447b60bcb..ba739621c 100644 --- a/adit/core/tasks.py +++ b/adit/core/tasks.py @@ -226,7 +226,7 @@ def process_dicom_task(context: JobContext, model_label: str, task_id: int): @app.task( - queue="mass_transfer", + queue="dicom", pass_context=True, retry=DICOM_TASK_RETRY_STRATEGY, ) diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 90bbcb842..6a244d77e 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -88,7 +88,9 @@ class MassTransferJobForm(forms.ModelForm): filters = forms.ModelMultipleChoiceField( queryset=MassTransferFilter.objects.all(), required=True, - widget=forms.CheckboxSelectMultiple, + widget=forms.CheckboxSelectMultiple( + attrs={"class": "mass-transfer-filter-list"}, + ), ) tasks: list[MassTransferTask] @@ -131,6 +133,8 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self.fields["filters"].queryset = MassTransferFilter.objects.filter(owner=self.user) + self.fields["source"] = DicomNodeChoiceField("source", self.user) self.fields["source"].widget.attrs["@change"] = "onSourceChange($event)" @@ -210,6 +214,12 @@ def clean(self): raise ValidationError("End date must be on or after the start date.") return cleaned + def clean_filters(self): + filters = self.cleaned_data["filters"] + if filters.exclude(owner=self.user).exists(): + raise ValidationError("Selected filters are not available to this user.") + return filters + def _save_tasks(self, job: MassTransferJob) -> None: partitions = build_partitions( job.start_date, diff --git a/adit/mass_transfer/migrations/0003_masstransferfilter_owner.py b/adit/mass_transfer/migrations/0003_masstransferfilter_owner.py new file mode 100644 index 000000000..fd9b0bd98 --- /dev/null +++ b/adit/mass_transfer/migrations/0003_masstransferfilter_owner.py @@ -0,0 +1,27 @@ +# Generated by Codex on 2026-02-08 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("mass_transfer", "0002_masstransferjob_pseudonymize"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AddField( + model_name="masstransferfilter", + name="owner", + field=models.ForeignKey( + to=settings.AUTH_USER_MODEL, + on_delete=django.db.models.deletion.CASCADE, + related_name="mass_transfer_filters", + null=True, + blank=True, + ), + ), + ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index 556008ed1..a0cf2da70 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -15,6 +15,13 @@ class Meta: class MassTransferFilter(models.Model): + owner = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.CASCADE, + related_name="mass_transfer_filters", + null=True, + blank=True, + ) name = models.CharField(max_length=150, blank=True, default="") modality = models.CharField(max_length=16, blank=True, default="") institution_name = models.CharField(max_length=128, blank=True, default="") @@ -72,7 +79,7 @@ def get_absolute_url(self): return reverse("mass_transfer_job_detail", args=[self.pk]) def queue_pending_tasks(self): - """Queues all pending tasks of this job in the mass_transfer queue.""" + """Queues all pending tasks of this job in the dicom queue.""" assert self.status == DicomJob.Status.PENDING priority = self.default_priority @@ -106,7 +113,7 @@ def get_absolute_url(self): return reverse("mass_transfer_task_detail", args=[self.pk]) def queue_pending_task(self) -> None: - """Queues a mass transfer task.""" + """Queues a mass transfer task in the dicom queue.""" assert self.status == DicomTask.Status.PENDING assert self.queued_job is None diff --git a/adit/mass_transfer/static/mass_transfer/mass_transfer.css b/adit/mass_transfer/static/mass_transfer/mass_transfer.css new file mode 100644 index 000000000..0268f9acf --- /dev/null +++ b/adit/mass_transfer/static/mass_transfer/mass_transfer.css @@ -0,0 +1,9 @@ +.mass-transfer-filter-list { + display: flex; + flex-wrap: wrap; + gap: 0.5rem 1.25rem; +} + +.mass-transfer-filter-list .form-check { + margin: 0; +} diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_layout.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_layout.html index 3bff24fe8..ed8d0ed76 100644 --- a/adit/mass_transfer/templates/mass_transfer/mass_transfer_layout.html +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_layout.html @@ -1,5 +1,11 @@ {% extends "core/core_layout.html" %} {% load static from static %} +{% block css %} + {{ block.super }} + +{% endblock css %} {% block script %} {{ block.super }} diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 75545d1fc..a99e8d43e 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -36,7 +36,7 @@ def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture, s end_date=date(2024, 1, 1), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, ) - mf = MassTransferFilter.objects.create(modality="CT") + mf = MassTransferFilter.objects.create(owner=user, modality="CT") job.filters.add(mf) start = timezone.now() @@ -73,7 +73,7 @@ def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp end_date=date(2024, 1, 1), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, ) - mf = MassTransferFilter.objects.create(modality="CT") + mf = MassTransferFilter.objects.create(owner=user, modality="CT") job.filters.add(mf) task = MassTransferTask.objects.create( @@ -168,7 +168,7 @@ def test_process_opt_out_skips_pseudonymization( partition_granularity=MassTransferJob.PartitionGranularity.DAILY, pseudonymize=False, ) - mf = MassTransferFilter.objects.create(modality="CT") + mf = MassTransferFilter.objects.create(owner=user, modality="CT") job.filters.add(mf) task = MassTransferTask.objects.create( diff --git a/adit/mass_transfer/views.py b/adit/mass_transfer/views.py index fc00bcb36..2d391a399 100644 --- a/adit/mass_transfer/views.py +++ b/adit/mass_transfer/views.py @@ -139,6 +139,9 @@ class MassTransferFilterListView(LoginRequiredMixin, MassTransferLockedMixin, Li template_name = "mass_transfer/mass_transfer_filter_list.html" context_object_name = "filters" + def get_queryset(self): + return MassTransferFilter.objects.filter(owner=self.request.user) + class MassTransferFilterCreateView(LoginRequiredMixin, MassTransferLockedMixin, CreateView): model = MassTransferFilter @@ -146,6 +149,10 @@ class MassTransferFilterCreateView(LoginRequiredMixin, MassTransferLockedMixin, template_name = "mass_transfer/mass_transfer_filter_form.html" success_url = cast(str, reverse_lazy("mass_transfer_filter_list")) + def form_valid(self, form): + form.instance.owner = self.request.user + return super().form_valid(form) + class MassTransferFilterUpdateView(LoginRequiredMixin, MassTransferLockedMixin, UpdateView): model = MassTransferFilter @@ -153,8 +160,14 @@ class MassTransferFilterUpdateView(LoginRequiredMixin, MassTransferLockedMixin, template_name = "mass_transfer/mass_transfer_filter_form.html" success_url = cast(str, reverse_lazy("mass_transfer_filter_list")) + def get_queryset(self): + return MassTransferFilter.objects.filter(owner=self.request.user) + class MassTransferFilterDeleteView(LoginRequiredMixin, MassTransferLockedMixin, DeleteView): model = MassTransferFilter template_name = "mass_transfer/mass_transfer_filter_confirm_delete.html" success_url = cast(str, reverse_lazy("mass_transfer_filter_list")) + + def get_queryset(self): + return MassTransferFilter.objects.filter(owner=self.request.user) diff --git a/docker-compose.base.yml b/docker-compose.base.yml index 78f14bfd5..5c95ec02c 100644 --- a/docker-compose.base.yml +++ b/docker-compose.base.yml @@ -66,10 +66,6 @@ services: <<: *default-app hostname: dicom_worker.local - mass_transfer_worker: - <<: *default-app - hostname: mass_transfer_worker.local - receiver: <<: *default-app hostname: receiver.local diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index a36da1aa0..beb0511d7 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -64,15 +64,6 @@ services: ./manage.py bg_worker -l debug -q dicom --autoreload " - mass_transfer_worker: - <<: *default-app - image: adit_dev-mass_transfer_worker:latest - command: > - bash -c " - wait-for-it -s postgres.local:5432 -t 60 && - ./manage.py bg_worker -l debug -q mass_transfer --autoreload - " - receiver: <<: *default-app ports: diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index fdc2acbe7..a1ed27bcb 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -78,17 +78,6 @@ services: <<: *deploy replicas: ${DICOM_WORKER_REPLICAS:-3} - mass_transfer_worker: - <<: *default-app - command: > - bash -c " - wait-for-it -s postgres.local:5432 -t 60 && - ./manage.py bg_worker -q mass_transfer - " - deploy: - <<: *deploy - replicas: ${MASS_TRANSFER_WORKER_REPLICAS:-1} - receiver: <<: *default-app ports: diff --git a/example.env b/example.env index 78d0498a4..c7a9cf6d5 100644 --- a/example.env +++ b/example.env @@ -98,7 +98,6 @@ MASS_TRANSFER_EXPORT_BASE_DIR="/mnt/mass_transfer_exports" # Replicas of the services that can be scaled (production only). WEB_REPLICAS=5 DICOM_WORKER_REPLICAS=3 -MASS_TRANSFER_WORKER_REPLICAS=1 # The directory where download folders are mounted. MOUNT_DIR="/mnt" From 0efaa85c31e9cb7397cc3813bc3b5c89d8977b52 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sun, 8 Feb 2026 19:29:27 +0100 Subject: [PATCH 009/103] Enforce filter names and tweak UI spacing --- adit/mass_transfer/forms.py | 21 ++++++-- .../0004_masstransferfilter_require_name.py | 50 +++++++++++++++++++ ...05_masstransferfilter_unique_owner_name.py | 20 ++++++++ adit/mass_transfer/models.py | 12 ++++- .../static/mass_transfer/mass_transfer.css | 11 ++-- adit/mass_transfer/tests/test_processor.py | 6 +-- adit/mass_transfer/views.py | 10 ++++ 7 files changed, 114 insertions(+), 16 deletions(-) create mode 100644 adit/mass_transfer/migrations/0004_masstransferfilter_require_name.py create mode 100644 adit/mass_transfer/migrations/0005_masstransferfilter_unique_owner_name.py diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 6a244d77e..b362aed62 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -4,7 +4,7 @@ from adit_radis_shared.accounts.models import User from crispy_forms.helper import FormHelper -from crispy_forms.layout import Column, Div, Field, HTML, Layout, Row, Submit +from crispy_forms.layout import HTML, Column, Div, Field, Layout, Row, Submit from django import forms from django.core.exceptions import ValidationError @@ -78,19 +78,30 @@ class Meta: } def __init__(self, *args, **kwargs): + self.user: User | None = kwargs.pop("user", None) super().__init__(*args, **kwargs) self.helper = FormHelper(self) self.helper.render_unmentioned_fields = True self.helper.add_input(Submit("save", "Save Filter")) + def clean_name(self): + name = (self.cleaned_data.get("name") or "").strip() + if not name: + raise ValidationError("Name is required.") + if self.user is not None: + qs = MassTransferFilter.objects.filter(owner=self.user, name=name) + if self.instance.pk: + qs = qs.exclude(pk=self.instance.pk) + if qs.exists(): + raise ValidationError("You already have a filter with this name.") + return name + class MassTransferJobForm(forms.ModelForm): filters = forms.ModelMultipleChoiceField( queryset=MassTransferFilter.objects.all(), required=True, - widget=forms.CheckboxSelectMultiple( - attrs={"class": "mass-transfer-filter-list"}, - ), + widget=forms.CheckboxSelectMultiple, ) tasks: list[MassTransferTask] @@ -180,7 +191,7 @@ def __init__(self, *args, **kwargs): Div( HTML("
Filters
"), Div( - Field("filters"), + Field("filters", wrapper_class="mass-transfer-filter-list"), css_class="card-body", ), css_class="card mb-3", diff --git a/adit/mass_transfer/migrations/0004_masstransferfilter_require_name.py b/adit/mass_transfer/migrations/0004_masstransferfilter_require_name.py new file mode 100644 index 000000000..d05070451 --- /dev/null +++ b/adit/mass_transfer/migrations/0004_masstransferfilter_require_name.py @@ -0,0 +1,50 @@ +# Generated by Codex on 2026-02-08 + +from django.db import migrations, models + + +def fill_missing_filter_names(apps, schema_editor): + MassTransferFilter = apps.get_model("mass_transfer", "MassTransferFilter") + filters = MassTransferFilter.objects.filter(name="") + for mf in filters: + parts = [] + if getattr(mf, "modality", ""): + parts.append(mf.modality) + if getattr(mf, "institution_name", ""): + parts.append(f"Institution={mf.institution_name}") + if getattr(mf, "study_description", ""): + parts.append(f"Study={mf.study_description}") + if getattr(mf, "series_description", ""): + parts.append(f"Series={mf.series_description}") + series_number = getattr(mf, "series_number", None) + if series_number is not None: + parts.append(f"SeriesNumber={series_number}") + mf.name = "; ".join(parts) if parts else f"Filter {mf.pk}" + mf.save(update_fields=["name"]) + + +def noop_reverse(apps, schema_editor): + return None + + +class Migration(migrations.Migration): + + dependencies = [ + ("mass_transfer", "0003_masstransferfilter_owner"), + ] + + operations = [ + migrations.RunPython(fill_missing_filter_names, noop_reverse), + migrations.AlterField( + model_name="masstransferfilter", + name="name", + field=models.CharField(max_length=150), + ), + migrations.AddConstraint( + model_name="masstransferfilter", + constraint=models.CheckConstraint( + condition=~models.Q(name=""), + name="mass_transfer_filter_name_not_blank", + ), + ), + ] diff --git a/adit/mass_transfer/migrations/0005_masstransferfilter_unique_owner_name.py b/adit/mass_transfer/migrations/0005_masstransferfilter_unique_owner_name.py new file mode 100644 index 000000000..8289bfbba --- /dev/null +++ b/adit/mass_transfer/migrations/0005_masstransferfilter_unique_owner_name.py @@ -0,0 +1,20 @@ +# Generated by Codex on 2026-02-08 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("mass_transfer", "0004_masstransferfilter_require_name"), + ] + + operations = [ + migrations.AddConstraint( + model_name="masstransferfilter", + constraint=models.UniqueConstraint( + fields=("owner", "name"), + name="mass_transfer_filter_unique_owner_name", + ), + ), + ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index a0cf2da70..08920e4b2 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -22,7 +22,7 @@ class MassTransferFilter(models.Model): null=True, blank=True, ) - name = models.CharField(max_length=150, blank=True, default="") + name = models.CharField(max_length=150) modality = models.CharField(max_length=16, blank=True, default="") institution_name = models.CharField(max_length=128, blank=True, default="") apply_institution_on_study = models.BooleanField(default=True) @@ -32,6 +32,16 @@ class MassTransferFilter(models.Model): class Meta: ordering = ("name", "id") + constraints = [ + models.CheckConstraint( + condition=~models.Q(name=""), + name="mass_transfer_filter_name_not_blank", + ), + models.UniqueConstraint( + fields=["owner", "name"], + name="mass_transfer_filter_unique_owner_name", + ), + ] def __str__(self) -> str: if self.name: diff --git a/adit/mass_transfer/static/mass_transfer/mass_transfer.css b/adit/mass_transfer/static/mass_transfer/mass_transfer.css index 0268f9acf..62bd94d98 100644 --- a/adit/mass_transfer/static/mass_transfer/mass_transfer.css +++ b/adit/mass_transfer/static/mass_transfer/mass_transfer.css @@ -1,9 +1,6 @@ -.mass-transfer-filter-list { - display: flex; - flex-wrap: wrap; - gap: 0.5rem 1.25rem; -} - .mass-transfer-filter-list .form-check { - margin: 0; + display: inline-flex; + align-items: center; + gap: 0.35rem; + margin: 0 1.25rem 0.5rem 0; } diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index a99e8d43e..f6c54de3f 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -36,7 +36,7 @@ def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture, s end_date=date(2024, 1, 1), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, ) - mf = MassTransferFilter.objects.create(owner=user, modality="CT") + mf = MassTransferFilter.objects.create(owner=user, name="CT Filter", modality="CT") job.filters.add(mf) start = timezone.now() @@ -73,7 +73,7 @@ def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp end_date=date(2024, 1, 1), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, ) - mf = MassTransferFilter.objects.create(owner=user, modality="CT") + mf = MassTransferFilter.objects.create(owner=user, name="CT Filter", modality="CT") job.filters.add(mf) task = MassTransferTask.objects.create( @@ -168,7 +168,7 @@ def test_process_opt_out_skips_pseudonymization( partition_granularity=MassTransferJob.PartitionGranularity.DAILY, pseudonymize=False, ) - mf = MassTransferFilter.objects.create(owner=user, modality="CT") + mf = MassTransferFilter.objects.create(owner=user, name="CT Filter", modality="CT") job.filters.add(mf) task = MassTransferTask.objects.create( diff --git a/adit/mass_transfer/views.py b/adit/mass_transfer/views.py index 2d391a399..308d41564 100644 --- a/adit/mass_transfer/views.py +++ b/adit/mass_transfer/views.py @@ -149,6 +149,11 @@ class MassTransferFilterCreateView(LoginRequiredMixin, MassTransferLockedMixin, template_name = "mass_transfer/mass_transfer_filter_form.html" success_url = cast(str, reverse_lazy("mass_transfer_filter_list")) + def get_form_kwargs(self) -> dict[str, Any]: + kwargs = super().get_form_kwargs() + kwargs["user"] = self.request.user + return kwargs + def form_valid(self, form): form.instance.owner = self.request.user return super().form_valid(form) @@ -160,6 +165,11 @@ class MassTransferFilterUpdateView(LoginRequiredMixin, MassTransferLockedMixin, template_name = "mass_transfer/mass_transfer_filter_form.html" success_url = cast(str, reverse_lazy("mass_transfer_filter_list")) + def get_form_kwargs(self) -> dict[str, Any]: + kwargs = super().get_form_kwargs() + kwargs["user"] = self.request.user + return kwargs + def get_queryset(self): return MassTransferFilter.objects.filter(owner=self.request.user) From 1086dbfb2e1ee4da48101437bed16f37ff49fd3e Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 16 Feb 2026 21:41:14 +0100 Subject: [PATCH 010/103] Fix duplicate studies in _find_studies recursive time-window split The recursive split used `mid` as the boundary for both halves, causing studies at the midpoint to appear in both. Additionally, since the DICOM query operates at date-level granularity, same-day splits produced identical queries returning the same results in both halves. Shift the right half to start at mid + 1s and deduplicate by StudyInstanceUID when merging. Co-Authored-By: Claude Opus 4.6 --- adit/mass_transfer/processors.py | 15 +- adit/mass_transfer/tests/test_processor.py | 180 +++++++++++++++++++++ 2 files changed, 192 insertions(+), 3 deletions(-) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index d92417804..61e933502 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -333,9 +333,18 @@ def _find_studies( ) mid = start + (end - start) / 2 - return self._find_studies(operator, mf, start, mid) + self._find_studies( - operator, mf, mid, end - ) + left = self._find_studies(operator, mf, start, mid) + right = self._find_studies(operator, mf, mid + timedelta(seconds=1), end) + + # Deduplicate: the date-level DICOM query can return the same study + # in both halves when the split falls within a single day. + seen: set[str] = {str(s.StudyInstanceUID) for s in left} + for study in right: + if str(study.StudyInstanceUID) not in seen: + left.append(study) + seen.add(str(study.StudyInstanceUID)) + + return left return studies diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index f6c54de3f..d7ea18a0a 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -5,10 +5,12 @@ import pytest from adit_radis_shared.accounts.factories import UserFactory from django.utils import timezone +from pydicom import Dataset from pytest_mock import MockerFixture from adit.core.errors import DicomError from adit.core.factories import DicomFolderFactory, DicomServerFactory +from adit.core.utils.dicom_dataset import ResultDataset from adit.core.utils.dicom_operator import DicomOperator from adit.mass_transfer.models import ( MassTransferFilter, @@ -20,6 +22,17 @@ from adit.mass_transfer.processors import MassTransferTaskProcessor, _volume_output_path +def _make_study(study_uid: str, study_date: str = "20240101") -> ResultDataset: + """Create a minimal ResultDataset for testing _find_studies.""" + ds = Dataset() + ds.StudyInstanceUID = study_uid + ds.StudyDate = study_date + ds.StudyTime = "120000" + ds.PatientID = "PAT1" + ds.ModalitiesInStudy = ["CT"] + return ResultDataset(ds) + + @pytest.mark.django_db def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture, settings): settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 1 @@ -215,3 +228,170 @@ def test_volume_output_path_uses_year_month_and_subject_id(): path = _volume_output_path(base_dir, study_dt, "subject", "1-Head") assert path == base_dir / "202402" / "subject" / "1-Head" + + +# --------------------------------------------------------------------------- +# _find_studies tests +# --------------------------------------------------------------------------- + + +def _make_processor(mocker: MockerFixture, settings) -> MassTransferTaskProcessor: + """Create a MassTransferTaskProcessor with a mocked task (no DB required).""" + settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = getattr( + settings, "MASS_TRANSFER_MAX_SEARCH_RESULTS", 200 + ) + mock_task = mocker.MagicMock(spec=MassTransferTask) + mock_task._meta = MassTransferTask._meta + # Bypass the isinstance assertion in __init__ + mocker.patch.object(MassTransferTaskProcessor, "__init__", return_value=None) + processor = MassTransferTaskProcessor.__new__(MassTransferTaskProcessor) + processor.dicom_task = mock_task + processor.mass_task = mock_task + return processor + + +def _make_filter(mocker: MockerFixture, **kwargs) -> MassTransferFilter: + """Create a mock MassTransferFilter (no DB required).""" + mf = mocker.MagicMock(spec=MassTransferFilter) + mf.modality = kwargs.get("modality", "CT") + mf.study_description = kwargs.get("study_description", "") + mf.institution_name = kwargs.get("institution_name", "") + mf.apply_institution_on_study = kwargs.get("apply_institution_on_study", True) + mf.series_description = kwargs.get("series_description", "") + mf.series_number = kwargs.get("series_number", None) + return mf + + +def test_find_studies_returns_all_when_under_limit(mocker: MockerFixture, settings): + """When the PACS returns fewer results than max, return them directly.""" + settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 10 + + processor = _make_processor(mocker, settings) + mf = _make_filter(mocker, modality="CT") + + start = datetime(2024, 1, 1, 0, 0, 0) + end = datetime(2024, 1, 1, 23, 59, 59) + + studies = [_make_study("1.2.3"), _make_study("1.2.4"), _make_study("1.2.5")] + + operator = mocker.create_autospec(DicomOperator) + operator.find_studies.return_value = studies + + result = processor._find_studies(operator, mf, start, end) + + assert len(result) == 3 + assert operator.find_studies.call_count == 1 + + +def test_find_studies_splits_and_deduplicates(mocker: MockerFixture, settings): + """When results exceed max, _find_studies splits the window and deduplicates + studies that appear in both halves (same-day split).""" + settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 2 + + processor = _make_processor(mocker, settings) + mf = _make_filter(mocker, modality="CT") + + start = datetime(2024, 1, 1, 0, 0, 0) + end = datetime(2024, 1, 2, 23, 59, 59) + + study_a = _make_study("1.2.100") + study_b = _make_study("1.2.200") + study_c = _make_study("1.2.300") + # A duplicate of study_a that would appear in the right half too + study_a_dup = _make_study("1.2.100") + + # First call: too many results (3 > max=2), triggers split + # Left half: returns [study_a, study_b] (under limit) + # Right half: returns [study_a_dup, study_c] (under limit) + operator = mocker.create_autospec(DicomOperator) + operator.find_studies.side_effect = [ + [study_a, study_b, study_c], # initial call — over limit + [study_a, study_b], # left half + [study_a_dup, study_c], # right half + ] + + result = processor._find_studies(operator, mf, start, end) + + result_uids = [str(s.StudyInstanceUID) for s in result] + assert len(result) == 3 + assert result_uids.count("1.2.100") == 1, "Duplicate study should be removed" + assert "1.2.200" in result_uids + assert "1.2.300" in result_uids + + +def test_find_studies_split_boundaries_dont_overlap(mocker: MockerFixture, settings): + """Verify that the left and right halves of a split use non-overlapping time ranges.""" + settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 1 + + processor = _make_processor(mocker, settings) + mf = _make_filter(mocker, modality="") + + start = datetime(2024, 1, 1, 0, 0, 0) + end = datetime(2024, 1, 3, 23, 59, 59) + + # Track all (start, end) pairs passed to _find_studies + call_ranges: list[tuple[datetime, datetime]] = [] + + original_find_studies = MassTransferTaskProcessor._find_studies + + def tracking_find_studies(self_inner, operator, mf, s, e): + call_ranges.append((s, e)) + return original_find_studies(self_inner, operator, mf, s, e) + + # First call: over limit, triggers split + # Sub-calls: under limit, return single study each + operator = mocker.create_autospec(DicomOperator) + operator.find_studies.side_effect = [ + [_make_study("1"), _make_study("2")], # initial — over limit + [_make_study("1")], # left half + [_make_study("2")], # right half + ] + + mocker.patch.object( + MassTransferTaskProcessor, + "_find_studies", + side_effect=lambda self_inner, op, mf, s, e: tracking_find_studies( + self_inner, op, mf, s, e + ), + autospec=True, + ) + + processor._find_studies(operator, mf, start, end) + + # We expect 3 calls: the original + 2 recursive halves + assert len(call_ranges) == 3 + _, _ = call_ranges[0] + left_start, left_end = call_ranges[1] + right_start, right_end = call_ranges[2] + + assert left_start == start + # The right half must start strictly after the left half ends + assert right_start > left_end + + +def test_find_studies_preserves_order_with_unique_studies(mocker: MockerFixture, settings): + """Left-half studies come first, then unique right-half studies are appended.""" + settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 2 + + processor = _make_processor(mocker, settings) + mf = _make_filter(mocker, modality="") + + start = datetime(2024, 1, 1, 0, 0, 0) + end = datetime(2024, 1, 3, 23, 59, 59) + + operator = mocker.create_autospec(DicomOperator) + operator.find_studies.side_effect = [ + # Initial: over limit (3 > 2) + [_make_study("1.2.1"), _make_study("1.2.2"), _make_study("1.2.3")], + # Left half: within limit + [_make_study("1.2.1"), _make_study("1.2.2")], + # Right half: 1.2.2 is duplicate, 1.2.3 is new + [_make_study("1.2.2"), _make_study("1.2.3")], + ] + + result = processor._find_studies(operator, mf, start, end) + + result_uids = [str(s.StudyInstanceUID) for s in result] + # Left-half results come first, then unique right-half additions + assert result_uids == ["1.2.1", "1.2.2", "1.2.3"] + assert len(result) == 3 From 281303de924b313bddee9948cd7040374ec42d1f Mon Sep 17 00:00:00 2001 From: Ritwik Date: Wed, 18 Feb 2026 13:24:26 +0100 Subject: [PATCH 011/103] Fix pyright type errors --- adit/core/management/commands/cleanup_jobs_and_tasks.py | 3 ++- adit/mass_transfer/forms.py | 3 ++- adit/mass_transfer/processors.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/adit/core/management/commands/cleanup_jobs_and_tasks.py b/adit/core/management/commands/cleanup_jobs_and_tasks.py index 4c3b6a964..dc8025814 100644 --- a/adit/core/management/commands/cleanup_jobs_and_tasks.py +++ b/adit/core/management/commands/cleanup_jobs_and_tasks.py @@ -12,7 +12,8 @@ class Command(BaseCommand): help = "Cleanup all DICOM jobs and tasks that are stuck." def cleanup_tasks(self, model: type[DicomTask]): - job_model = model._meta.get_field("job").related_model + job_model: type[DicomJob] | None = model._meta.get_field("job").related_model # type: ignore[assignment] + assert job_model is not None job_ids = set() message = "Unexpected crash while processing this task." diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index b362aed62..806a6cf2c 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -144,7 +144,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.fields["filters"].queryset = MassTransferFilter.objects.filter(owner=self.user) + self.fields["filters"].queryset = MassTransferFilter.objects.filter(owner=self.user) # type: ignore[union-attr] self.fields["source"] = DicomNodeChoiceField("source", self.user) self.fields["source"].widget.attrs["@change"] = "onSourceChange($event)" @@ -219,6 +219,7 @@ def clean_destination(self): def clean(self): cleaned = super().clean() + assert cleaned is not None start_date = cleaned.get("start_date") end_date = cleaned.get("end_date") if start_date and end_date and end_date < start_date: diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 61e933502..1d17f704c 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -6,6 +6,7 @@ import uuid from datetime import datetime, timedelta from pathlib import Path +from typing import cast from django.conf import settings from django.utils import timezone @@ -40,7 +41,7 @@ def _parse_int(value: object, default: int | None = None) -> int | None: try: if value is None or value == "": return default - return int(value) + return int(cast(str, value)) except (TypeError, ValueError): return default From f4925c2008b9c9e3e86a5e4f256b63ca66ed2798 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Wed, 18 Feb 2026 15:45:42 +0100 Subject: [PATCH 012/103] Address code review feedback for mass transfer PR --- adit/core/models.py | 4 ++ adit/core/tasks.py | 50 +++---------------- adit/mass_transfer/forms.py | 9 +++- adit/mass_transfer/migrations/0001_initial.py | 28 +++++++++-- .../0002_masstransferjob_pseudonymize.py | 18 ------- .../0003_masstransferfilter_owner.py | 27 ---------- .../0004_masstransferfilter_require_name.py | 50 ------------------- ...05_masstransferfilter_unique_owner_name.py | 20 -------- adit/mass_transfer/models.py | 36 ++++++++++++- adit/mass_transfer/processors.py | 36 ++++++------- adit/mass_transfer/tests/test_cleanup.py | 3 +- adit/mass_transfer/tests/test_processor.py | 6 +-- 12 files changed, 99 insertions(+), 188 deletions(-) delete mode 100644 adit/mass_transfer/migrations/0002_masstransferjob_pseudonymize.py delete mode 100644 adit/mass_transfer/migrations/0003_masstransferfilter_owner.py delete mode 100644 adit/mass_transfer/migrations/0004_masstransferfilter_require_name.py delete mode 100644 adit/mass_transfer/migrations/0005_masstransferfilter_unique_owner_name.py diff --git a/adit/core/models.py b/adit/core/models.py index 75e0d11ab..630027977 100644 --- a/adit/core/models.py +++ b/adit/core/models.py @@ -410,6 +410,10 @@ def __str__(self) -> str: def get_absolute_url(self) -> str: ... + def cleanup_on_failure(self) -> None: + """Hook for subclasses to clean up resources after task failure or timeout.""" + pass + def queue_pending_task(self) -> None: """Queues a dicom task.""" assert self.status == DicomTask.Status.PENDING diff --git a/adit/core/tasks.py b/adit/core/tasks.py index ba739621c..9b393c9af 100644 --- a/adit/core/tasks.py +++ b/adit/core/tasks.py @@ -1,9 +1,7 @@ import logging -import shutil import subprocess import traceback from concurrent import futures -from pathlib import Path from time import sleep from typing import cast @@ -62,45 +60,6 @@ def backup_db(*args, **kwargs): ) -def _cleanup_mass_transfer_exports(dicom_task: DicomTask) -> None: - if dicom_task._meta.app_label != "mass_transfer": - return - - partition_key = getattr(dicom_task, "partition_key", None) - if not partition_key: - return - - try: - from adit.mass_transfer.models import MassTransferVolume - except Exception: - return - - volumes = MassTransferVolume.objects.filter( - job_id=dicom_task.job_id, - partition_key=partition_key, - ).exclude(exported_folder="") - - for volume in volumes: - if volume.status == MassTransferVolume.Status.CONVERTED: - continue - - export_folder = volume.exported_folder - if export_folder: - try: - shutil.rmtree(Path(export_folder)) - except FileNotFoundError: - pass - except Exception as err: - volume.add_log(f"Cleanup failed: {err}") - volume.save() - continue - - volume.exported_folder = "" - volume.status = MassTransferVolume.Status.ERROR - volume.add_log("Export cleaned up after task failure.") - volume.save() - - def _run_dicom_task(context: JobContext, model_label: str, task_id: int): assert context.job @@ -152,7 +111,7 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: dicom_task.message = "Task was aborted due to timeout." dicom_task.status = DicomTask.Status.FAILURE ensure_db_connection() - _cleanup_mass_transfer_exports(dicom_task) + dicom_task.cleanup_on_failure() except RetriableDicomError as err: logger.exception("Retriable error occurred during %s.", dicom_task) @@ -171,7 +130,7 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: dicom_task.message = str(err) ensure_db_connection() - _cleanup_mass_transfer_exports(dicom_task) + dicom_task.cleanup_on_failure() raise err except Exception as err: @@ -187,7 +146,7 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: dicom_task.log += traceback.format_exc() ensure_db_connection() - _cleanup_mass_transfer_exports(dicom_task) + dicom_task.cleanup_on_failure() finally: dicom_task.end = timezone.now() @@ -225,6 +184,9 @@ def process_dicom_task(context: JobContext, model_label: str, task_id: int): _run_dicom_task(context, model_label, task_id) +# Separate task function for mass transfer so Procrastinate can route it +# independently (e.g. to a different queue or with different retry/priority) +# without affecting other transfer types. @app.task( queue="dicom", pass_context=True, diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 806a6cf2c..1507c6512 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -116,6 +116,7 @@ class Meta: "partition_granularity", "filters", "pseudonymize", + "convert_to_nifti", "send_finished_mail", ) labels = { @@ -123,6 +124,7 @@ class Meta: "end_date": "End date", "partition_granularity": "Partition granularity", "pseudonymize": "Pseudonymize data", + "convert_to_nifti": "Convert to NIfTI", "send_finished_mail": "Send Email when job is finished", } help_texts = { @@ -131,6 +133,10 @@ class Meta: "When disabled, patient identifiers are preserved and output folders use " "Patient ID." ), + "convert_to_nifti": ( + "When enabled, exported DICOM series are converted to NIfTI format " + "using dcm2niix." + ), } widgets = { "start_date": forms.DateInput(attrs={"type": "date"}), @@ -181,6 +187,7 @@ def __init__(self, *args, **kwargs): css_class="g-3", ), Row( + Column(Field("convert_to_nifti"), css_class="col-md-6"), Column(Field("send_finished_mail"), css_class="col-md-6"), css_class="g-3", ), @@ -255,8 +262,6 @@ def _save_tasks(self, job: MassTransferJob) -> None: def save(self, commit: bool = True): job = super().save(commit=False) - # Mass transfer always converts to NIfTI - job.convert_to_nifti = True job.urgent = False if commit: diff --git a/adit/mass_transfer/migrations/0001_initial.py b/adit/mass_transfer/migrations/0001_initial.py index d5246432a..eb9dda662 100644 --- a/adit/mass_transfer/migrations/0001_initial.py +++ b/adit/mass_transfer/migrations/0001_initial.py @@ -1,5 +1,3 @@ -# Generated by Codex on 2026-02-03 - from django.conf import settings from django.db import migrations, models import django.db.models.deletion @@ -22,13 +20,21 @@ class Migration(migrations.Migration): name="MassTransferFilter", fields=[ ("id", models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), - ("name", models.CharField(blank=True, default="", max_length=150)), + ("name", models.CharField(max_length=150)), ("modality", models.CharField(blank=True, default="", max_length=16)), ("institution_name", models.CharField(blank=True, default="", max_length=128)), ("apply_institution_on_study", models.BooleanField(default=True)), ("study_description", models.CharField(blank=True, default="", max_length=256)), ("series_description", models.CharField(blank=True, default="", max_length=256)), ("series_number", models.PositiveIntegerField(blank=True, null=True)), + ( + "owner", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="mass_transfer_filters", + to=settings.AUTH_USER_MODEL, + ), + ), ], options={ "ordering": ("name", "id"), @@ -83,6 +89,7 @@ class Migration(migrations.Migration): max_length=16, ), ), + ("pseudonymize", models.BooleanField(default=True)), ( "destination", models.ForeignKey( @@ -207,6 +214,7 @@ class Migration(migrations.Migration): ("institution_name", models.CharField(blank=True, default="", max_length=128)), ("number_of_images", models.PositiveIntegerField(default=0)), ("exported_folder", models.TextField(blank=True, default="")), + ("export_cleaned", models.BooleanField(default=False)), ("converted_file", models.TextField(blank=True, default="")), ("log", models.TextField(blank=True, default="")), ("created", models.DateTimeField(auto_now_add=True)), @@ -228,6 +236,20 @@ class Migration(migrations.Migration): model_name="masstransferjob", index=models.Index(fields=["owner", "status"], name="mass_trans_owner_i_2403f1_idx"), ), + migrations.AddConstraint( + model_name="masstransferfilter", + constraint=models.CheckConstraint( + condition=~models.Q(name=""), + name="mass_transfer_filter_name_not_blank", + ), + ), + migrations.AddConstraint( + model_name="masstransferfilter", + constraint=models.UniqueConstraint( + fields=("owner", "name"), + name="mass_transfer_filter_unique_owner_name", + ), + ), migrations.AddConstraint( model_name="masstransfervolume", constraint=models.UniqueConstraint( diff --git a/adit/mass_transfer/migrations/0002_masstransferjob_pseudonymize.py b/adit/mass_transfer/migrations/0002_masstransferjob_pseudonymize.py deleted file mode 100644 index e24dfd679..000000000 --- a/adit/mass_transfer/migrations/0002_masstransferjob_pseudonymize.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Codex on 2026-02-03 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("mass_transfer", "0001_initial"), - ] - - operations = [ - migrations.AddField( - model_name="masstransferjob", - name="pseudonymize", - field=models.BooleanField(default=True), - ), - ] diff --git a/adit/mass_transfer/migrations/0003_masstransferfilter_owner.py b/adit/mass_transfer/migrations/0003_masstransferfilter_owner.py deleted file mode 100644 index fd9b0bd98..000000000 --- a/adit/mass_transfer/migrations/0003_masstransferfilter_owner.py +++ /dev/null @@ -1,27 +0,0 @@ -# Generated by Codex on 2026-02-08 - -from django.conf import settings -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ("mass_transfer", "0002_masstransferjob_pseudonymize"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ] - - operations = [ - migrations.AddField( - model_name="masstransferfilter", - name="owner", - field=models.ForeignKey( - to=settings.AUTH_USER_MODEL, - on_delete=django.db.models.deletion.CASCADE, - related_name="mass_transfer_filters", - null=True, - blank=True, - ), - ), - ] diff --git a/adit/mass_transfer/migrations/0004_masstransferfilter_require_name.py b/adit/mass_transfer/migrations/0004_masstransferfilter_require_name.py deleted file mode 100644 index d05070451..000000000 --- a/adit/mass_transfer/migrations/0004_masstransferfilter_require_name.py +++ /dev/null @@ -1,50 +0,0 @@ -# Generated by Codex on 2026-02-08 - -from django.db import migrations, models - - -def fill_missing_filter_names(apps, schema_editor): - MassTransferFilter = apps.get_model("mass_transfer", "MassTransferFilter") - filters = MassTransferFilter.objects.filter(name="") - for mf in filters: - parts = [] - if getattr(mf, "modality", ""): - parts.append(mf.modality) - if getattr(mf, "institution_name", ""): - parts.append(f"Institution={mf.institution_name}") - if getattr(mf, "study_description", ""): - parts.append(f"Study={mf.study_description}") - if getattr(mf, "series_description", ""): - parts.append(f"Series={mf.series_description}") - series_number = getattr(mf, "series_number", None) - if series_number is not None: - parts.append(f"SeriesNumber={series_number}") - mf.name = "; ".join(parts) if parts else f"Filter {mf.pk}" - mf.save(update_fields=["name"]) - - -def noop_reverse(apps, schema_editor): - return None - - -class Migration(migrations.Migration): - - dependencies = [ - ("mass_transfer", "0003_masstransferfilter_owner"), - ] - - operations = [ - migrations.RunPython(fill_missing_filter_names, noop_reverse), - migrations.AlterField( - model_name="masstransferfilter", - name="name", - field=models.CharField(max_length=150), - ), - migrations.AddConstraint( - model_name="masstransferfilter", - constraint=models.CheckConstraint( - condition=~models.Q(name=""), - name="mass_transfer_filter_name_not_blank", - ), - ), - ] diff --git a/adit/mass_transfer/migrations/0005_masstransferfilter_unique_owner_name.py b/adit/mass_transfer/migrations/0005_masstransferfilter_unique_owner_name.py deleted file mode 100644 index 8289bfbba..000000000 --- a/adit/mass_transfer/migrations/0005_masstransferfilter_unique_owner_name.py +++ /dev/null @@ -1,20 +0,0 @@ -# Generated by Codex on 2026-02-08 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("mass_transfer", "0004_masstransferfilter_require_name"), - ] - - operations = [ - migrations.AddConstraint( - model_name="masstransferfilter", - constraint=models.UniqueConstraint( - fields=("owner", "name"), - name="mass_transfer_filter_unique_owner_name", - ), - ), - ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index 08920e4b2..16d327256 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -1,5 +1,8 @@ from __future__ import annotations +import shutil +from pathlib import Path + from django.conf import settings from django.db import models from django.urls import reverse @@ -19,8 +22,6 @@ class MassTransferFilter(models.Model): settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name="mass_transfer_filters", - null=True, - blank=True, ) name = models.CharField(max_length=150) modality = models.CharField(max_length=16, blank=True, default="") @@ -122,6 +123,36 @@ class MassTransferTask(DicomTask): def get_absolute_url(self): return reverse("mass_transfer_task_detail", args=[self.pk]) + def cleanup_on_failure(self) -> None: + """Clean up exported DICOM files when a mass transfer task fails or times out.""" + if not self.partition_key: + return + + volumes = MassTransferVolume.objects.filter( + job_id=self.job_id, + partition_key=self.partition_key, + ).exclude(exported_folder="") + + for volume in volumes: + if volume.status == MassTransferVolume.Status.CONVERTED: + continue + + export_folder = volume.exported_folder + if export_folder: + try: + shutil.rmtree(Path(export_folder)) + except FileNotFoundError: + pass + except Exception as err: + volume.add_log(f"Cleanup failed: {err}") + volume.save() + continue + + volume.exported_folder = "" + volume.status = MassTransferVolume.Status.ERROR + volume.add_log("Export cleaned up after task failure.") + volume.save() + def queue_pending_task(self) -> None: """Queues a mass transfer task in the dicom queue.""" assert self.status == DicomTask.Status.PENDING @@ -165,6 +196,7 @@ class Status(models.TextChoices): number_of_images = models.PositiveIntegerField(default=0) exported_folder = models.TextField(blank=True, default="") + export_cleaned = models.BooleanField(default=False) converted_file = models.TextField(blank=True, default="") status = models.CharField(max_length=16, choices=Status.choices, default=Status.PENDING) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 1d17f704c..91d8ab610 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -78,17 +78,7 @@ def _destination_base_dir(node: DicomNode) -> Path: return path -def _volume_export_path( - base_dir: Path, - study_dt: datetime, - subject_id: str, - series_name: str, -) -> Path: - year_month = study_dt.strftime("%Y%m") - return base_dir / year_month / subject_id / series_name - - -def _volume_output_path( +def _volume_path( base_dir: Path, study_dt: datetime, subject_id: str, @@ -267,6 +257,9 @@ def _find_volumes( continue study_dt = _study_datetime(study) + # Use get_or_create for resumability: if a task failed halfway + # and is retried, volumes that were already exported/converted + # are returned as-is and skipped later in the processing loop. volume, created = MassTransferVolume.objects.get_or_create( job=job, series_instance_uid=series_uid, @@ -286,7 +279,10 @@ def _find_volumes( ), }, ) - if not created: + # Refresh metadata from PACS in case it changed between runs, + # but only for volumes that haven't been processed yet to avoid + # clobbering partition_key on already exported/converted volumes. + if not created and volume.status == MassTransferVolume.Status.PENDING: volume.partition_key = self.mass_task.partition_key volume.patient_id = str(study.PatientID) volume.accession_number = str(study.get("AccessionNumber", "")) @@ -386,7 +382,7 @@ def _export_volume( ) subject_id = sanitize_filename(pseudonym or volume.patient_id) - export_path = _volume_export_path(export_base, study_dt, subject_id, series_name) + export_path = _volume_path(export_base, study_dt, subject_id, series_name) export_path.mkdir(parents=True, exist_ok=True) volume.exported_folder = str(export_path) @@ -431,7 +427,7 @@ def _convert_volume( ) subject_id = sanitize_filename(pseudonym or volume.patient_id) - output_path = _volume_output_path(output_base, study_dt, subject_id, series_name) + output_path = _volume_path(output_base, study_dt, subject_id, series_name) output_path.mkdir(parents=True, exist_ok=True) cmd = [ @@ -451,7 +447,13 @@ def _convert_volume( f"Conversion failed for series {volume.series_instance_uid}: {result.stderr}" ) - volume.converted_file = str(output_path / f"{series_name}.nii.gz") + nifti_files = sorted(output_path.glob("*.nii.gz")) + if not nifti_files: + raise DicomError( + f"dcm2niix produced no .nii.gz files for series {volume.series_instance_uid}" + ) + + volume.converted_file = "\n".join(str(f) for f in nifti_files) volume.status = MassTransferVolume.Status.CONVERTED volume.save() @@ -459,7 +461,7 @@ def _convert_volume( def _cleanup_export(self, volume: MassTransferVolume) -> None: export_folder = volume.exported_folder - if not export_folder or export_folder.endswith(" (cleaned)"): + if not export_folder or volume.export_cleaned: return try: @@ -471,5 +473,5 @@ def _cleanup_export(self, volume: MassTransferVolume) -> None: volume.save() return - volume.exported_folder = f"{export_folder} (cleaned)" + volume.export_cleaned = True volume.save() diff --git a/adit/mass_transfer/tests/test_cleanup.py b/adit/mass_transfer/tests/test_cleanup.py index f17a582bf..210cba075 100644 --- a/adit/mass_transfer/tests/test_cleanup.py +++ b/adit/mass_transfer/tests/test_cleanup.py @@ -5,7 +5,6 @@ from django.utils import timezone from adit.core.factories import DicomFolderFactory, DicomServerFactory -from adit.core.tasks import _cleanup_mass_transfer_exports from adit.mass_transfer.models import ( MassTransferJob, MassTransferSettings, @@ -55,7 +54,7 @@ def test_cleanup_mass_transfer_exports_on_failure(tmp_path: Path): status=MassTransferVolume.Status.EXPORTED, ) - _cleanup_mass_transfer_exports(task) + task.cleanup_on_failure() volume.refresh_from_db() assert not export_dir.exists() diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index d7ea18a0a..bb897404e 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -19,7 +19,7 @@ MassTransferTask, MassTransferVolume, ) -from adit.mass_transfer.processors import MassTransferTaskProcessor, _volume_output_path +from adit.mass_transfer.processors import MassTransferTaskProcessor, _volume_path def _make_study(study_uid: str, study_date: str = "20240101") -> ResultDataset: @@ -222,10 +222,10 @@ def fake_export(_, __, ___, pseudonym): assert result["status"] == MassTransferTask.Status.SUCCESS -def test_volume_output_path_uses_year_month_and_subject_id(): +def test_volume_path_uses_year_month_and_subject_id(): base_dir = Path("/tmp/base") study_dt = datetime(2024, 2, 15, 10, 30) - path = _volume_output_path(base_dir, study_dt, "subject", "1-Head") + path = _volume_path(base_dir, study_dt, "subject", "1-Head") assert path == base_dir / "202402" / "subject" / "1-Head" From deccb114a2556abcdcc45678fe405340274820d1 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Thu, 19 Feb 2026 15:05:59 +0100 Subject: [PATCH 013/103] Use rslave mount propagation so containers see NAS mounts --- docker-compose.base.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docker-compose.base.yml b/docker-compose.base.yml index 5c95ec02c..20faab538 100644 --- a/docker-compose.base.yml +++ b/docker-compose.base.yml @@ -1,7 +1,11 @@ x-app: &default-app volumes: - ${BACKUP_DIR:?}:/backups - - ${MOUNT_DIR:?}:/mnt + - type: bind + source: ${MOUNT_DIR:?} + target: /mnt + bind: + propagation: rslave depends_on: - postgres environment: From a3c74137fbe006f44febbcddfbff333390fda476 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Thu, 19 Feb 2026 16:21:19 +0100 Subject: [PATCH 014/103] split up tasks into processing and querying to manage creation of tasks better --- adit/mass_transfer/forms.py | 1 + .../migrations/0002_two_phase_task_type.py | 71 +++++++++ adit/mass_transfer/models.py | 50 +++++- adit/mass_transfer/processors.py | 144 ++++++++++++++---- adit/mass_transfer/tests/test_cleanup.py | 4 + adit/mass_transfer/tests/test_processor.py | 59 +++---- 6 files changed, 257 insertions(+), 72 deletions(-) create mode 100644 adit/mass_transfer/migrations/0002_two_phase_task_type.py diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 1507c6512..9cab2a58b 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -252,6 +252,7 @@ def _save_tasks(self, job: MassTransferJob) -> None: MassTransferTask( job=job, source=job.source, + task_type=MassTransferTask.TaskType.DISCOVERY, partition_start=partition.start, partition_end=partition.end, partition_key=partition.key, diff --git a/adit/mass_transfer/migrations/0002_two_phase_task_type.py b/adit/mass_transfer/migrations/0002_two_phase_task_type.py new file mode 100644 index 000000000..77821a579 --- /dev/null +++ b/adit/mass_transfer/migrations/0002_two_phase_task_type.py @@ -0,0 +1,71 @@ +# Generated by Django 5.2.8 on 2026-02-19 15:16 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('mass_transfer', '0001_initial'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.RenameIndex( + model_name='masstransferjob', + new_name='mass_transf_owner_i_c363b8_idx', + old_name='mass_trans_owner_i_2403f1_idx', + ), + migrations.AddField( + model_name='masstransfertask', + name='patient_id', + field=models.CharField(blank=True, default='', max_length=64), + ), + migrations.AddField( + model_name='masstransfertask', + name='study_instance_uid', + field=models.CharField(blank=True, default='', max_length=128), + ), + migrations.AddField( + model_name='masstransfertask', + name='task_type', + field=models.CharField(choices=[('discovery', 'Discovery'), ('processing', 'Processing')], default='discovery', max_length=16), + ), + migrations.AddField( + model_name='masstransfervolume', + name='task', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='volumes', to='mass_transfer.masstransfertask'), + ), + migrations.AlterField( + model_name='masstransferfilter', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='masstransferjob', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='masstransferjob', + name='owner', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(app_label)s_jobs', to=settings.AUTH_USER_MODEL), + ), + migrations.AlterField( + model_name='masstransfersettings', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='masstransfertask', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + migrations.AlterField( + model_name='masstransfervolume', + name='id', + field=models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID'), + ), + ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index 16d327256..231185294 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -90,14 +90,32 @@ def get_absolute_url(self): return reverse("mass_transfer_job_detail", args=[self.pk]) def queue_pending_tasks(self): - """Queues all pending tasks of this job in the dicom queue.""" + """Queues pending tasks using two-phase scheduling. + + When pending discovery tasks exist, only those are enqueued (they will + create and enqueue processing tasks when they run). Otherwise, pending + processing tasks are enqueued directly (e.g. on resume after cancel). + """ assert self.status == DicomJob.Status.PENDING priority = self.default_priority if self.urgent: priority = self.urgent_priority - for mass_task in self.tasks.filter(status=DicomTask.Status.PENDING): + pending_discovery = self.tasks.filter( + status=DicomTask.Status.PENDING, + task_type=MassTransferTask.TaskType.DISCOVERY, + ) + + if pending_discovery.exists(): + tasks_to_enqueue = pending_discovery + else: + tasks_to_enqueue = self.tasks.filter( + status=DicomTask.Status.PENDING, + task_type=MassTransferTask.TaskType.PROCESSING, + ) + + for mass_task in tasks_to_enqueue: assert mass_task.queued_job is None model_label = get_model_label(mass_task.__class__) @@ -111,27 +129,37 @@ def queue_pending_tasks(self): class MassTransferTask(DicomTask): + class TaskType(models.TextChoices): + DISCOVERY = "discovery", "Discovery" + PROCESSING = "processing", "Processing" + job = models.ForeignKey( MassTransferJob, on_delete=models.CASCADE, related_name="tasks", ) + task_type = models.CharField( + max_length=16, + choices=TaskType.choices, + default=TaskType.DISCOVERY, + ) partition_start = models.DateTimeField() partition_end = models.DateTimeField() partition_key = models.CharField(max_length=64) + study_instance_uid = models.CharField(max_length=128, blank=True, default="") + patient_id = models.CharField(max_length=64, blank=True, default="") + + volumes: models.QuerySet["MassTransferVolume"] def get_absolute_url(self): return reverse("mass_transfer_task_detail", args=[self.pk]) def cleanup_on_failure(self) -> None: """Clean up exported DICOM files when a mass transfer task fails or times out.""" - if not self.partition_key: + if self.task_type == self.TaskType.DISCOVERY: return - volumes = MassTransferVolume.objects.filter( - job_id=self.job_id, - partition_key=self.partition_key, - ).exclude(exported_folder="") + volumes = self.volumes.exclude(exported_folder="") for volume in volumes: if volume.status == MassTransferVolume.Status.CONVERTED: @@ -180,6 +208,14 @@ class Status(models.TextChoices): ERROR = "error", "Error" job = models.ForeignKey(MassTransferJob, on_delete=models.CASCADE, related_name="volumes") + task_id: int | None + task = models.ForeignKey( + MassTransferTask, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="volumes", + ) partition_key = models.CharField(max_length=64) pseudonym = models.CharField(max_length=64, blank=True, default="") diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 91d8ab610..607a595c8 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -106,14 +106,18 @@ def process(self): "log": "Task skipped because the mass transfer app is suspended.", } + if self.mass_task.task_type == MassTransferTask.TaskType.DISCOVERY: + return self._process_discovery() + else: + return self._process_study() + + def _process_discovery(self): + """Phase 1: Query PACS, create volumes, spawn per-study processing tasks.""" job = self.mass_task.job source_node = job.source - destination_node = job.destination if source_node.node_type != DicomNode.NodeType.SERVER: raise DicomError("Mass transfer source must be a DICOM server.") - if destination_node.node_type != DicomNode.NodeType.FOLDER: - raise DicomError("Mass transfer destination must be a DICOM folder.") filters = list(job.filters.all()) if not filters: @@ -126,45 +130,123 @@ def process(self): operator = DicomOperator(source_node.dicomserver) volumes = self._find_volumes(operator, filters) - export_base = _export_base_dir() - output_base = _destination_base_dir(destination_node) - - converted_count = 0 - failed_count = 0 - + # Group volumes by study volumes_by_study: dict[str, list[MassTransferVolume]] = {} for volume in volumes: volumes_by_study.setdefault(volume.study_instance_uid, []).append(volume) - for _, study_volumes in volumes_by_study.items(): - pseudonym = "" - if job.pseudonymize: - existing_pseudonym = next( - (v.pseudonym for v in study_volumes if v.pseudonym), - None, + # Create one processing task per study and link volumes to it + tasks_created = 0 + tasks_reused = 0 + for study_uid, study_volumes in volumes_by_study.items(): + patient_id = study_volumes[0].patient_id + + # Check for an existing processing task for this study (e.g. on restart) + existing_task = MassTransferTask.objects.filter( + job=job, + task_type=MassTransferTask.TaskType.PROCESSING, + study_instance_uid=study_uid, + ).first() + + if existing_task is not None: + processing_task = existing_task + tasks_reused += 1 + else: + processing_task = MassTransferTask.objects.create( + job=job, + source=job.source, + task_type=MassTransferTask.TaskType.PROCESSING, + partition_start=self.mass_task.partition_start, + partition_end=self.mass_task.partition_end, + partition_key=self.mass_task.partition_key, + study_instance_uid=study_uid, + patient_id=patient_id, ) - pseudonym = existing_pseudonym or uuid.uuid4().hex + tasks_created += 1 + # Link volumes to the processing task for volume in study_volumes: - if volume.status == MassTransferVolume.Status.CONVERTED: - continue + if volume.task_id != processing_task.pk: + volume.task = processing_task + volume.save(update_fields=["task"]) - try: - self._export_volume(operator, volume, export_base, pseudonym) - self._convert_volume(volume, output_base, pseudonym) - converted_count += 1 - except Exception as err: - logger.exception( - "Mass transfer failed for volume %s", volume.series_instance_uid - ) - self._cleanup_export(volume) - volume.status = MassTransferVolume.Status.ERROR - volume.add_log(str(err)) - volume.save() - failed_count += 1 + # Enqueue the processing task if it's still pending + is_pending = processing_task.status == DicomTask.Status.PENDING + if is_pending and processing_task.queued_job is None: + processing_task.queue_pending_task() log_lines = [ f"Partition {self.mass_task.partition_key}", + f"Studies found: {len(volumes_by_study)}", + f"Volumes found: {len(volumes)}", + f"Processing tasks created: {tasks_created}", + f"Processing tasks reused: {tasks_reused}", + ] + + return { + "status": MassTransferTask.Status.SUCCESS, + "message": ( + f"Discovery complete: {len(volumes_by_study)} studies, " + f"{len(volumes)} volumes." + ), + "log": "\n".join(log_lines), + } + + def _process_study(self): + """Phase 2: Export + convert all volumes for a single study.""" + job = self.mass_task.job + source_node = job.source + destination_node = job.destination + + if source_node.node_type != DicomNode.NodeType.SERVER: + raise DicomError("Mass transfer source must be a DICOM server.") + if destination_node.node_type != DicomNode.NodeType.FOLDER: + raise DicomError("Mass transfer destination must be a DICOM folder.") + + operator = DicomOperator(source_node.dicomserver) + volumes = list(self.mass_task.volumes.all()) + + if not volumes: + return { + "status": MassTransferTask.Status.SUCCESS, + "message": "No volumes to process.", + "log": f"Study {self.mass_task.study_instance_uid}: no volumes linked.", + } + + export_base = _export_base_dir() + output_base = _destination_base_dir(destination_node) + + pseudonym = "" + if job.pseudonymize: + existing_pseudonym = next( + (v.pseudonym for v in volumes if v.pseudonym), + None, + ) + pseudonym = existing_pseudonym or uuid.uuid4().hex + + converted_count = 0 + failed_count = 0 + + for volume in volumes: + if volume.status == MassTransferVolume.Status.CONVERTED: + continue + + try: + self._export_volume(operator, volume, export_base, pseudonym) + self._convert_volume(volume, output_base, pseudonym) + converted_count += 1 + except Exception as err: + logger.exception( + "Mass transfer failed for volume %s", volume.series_instance_uid + ) + self._cleanup_export(volume) + volume.status = MassTransferVolume.Status.ERROR + volume.add_log(str(err)) + volume.save() + failed_count += 1 + + log_lines = [ + f"Study {self.mass_task.study_instance_uid}", f"Volumes processed: {len(volumes)}", f"Converted: {converted_count}", f"Failed: {failed_count}", diff --git a/adit/mass_transfer/tests/test_cleanup.py b/adit/mass_transfer/tests/test_cleanup.py index 210cba075..5f11edbe6 100644 --- a/adit/mass_transfer/tests/test_cleanup.py +++ b/adit/mass_transfer/tests/test_cleanup.py @@ -31,9 +31,12 @@ def test_cleanup_mass_transfer_exports_on_failure(tmp_path: Path): task = MassTransferTask.objects.create( job=job, source=source, + task_type=MassTransferTask.TaskType.PROCESSING, partition_start=timezone.now(), partition_end=timezone.now(), partition_key="20240101", + study_instance_uid="study-1", + patient_id="PATIENT", ) export_dir = tmp_path / "exports" / "202401" / "PATIENT" / "1-Head" @@ -41,6 +44,7 @@ def test_cleanup_mass_transfer_exports_on_failure(tmp_path: Path): volume = MassTransferVolume.objects.create( job=job, + task=task, partition_key="20240101", patient_id="PATIENT", study_instance_uid="study-1", diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index bb897404e..581863ff1 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -71,7 +71,8 @@ def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture, s @pytest.mark.django_db -def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp_path: Path): +def test_process_study_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp_path: Path): + """Processing task generates the same pseudonym for all volumes in a study.""" settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") MassTransferSettings.objects.create() @@ -86,19 +87,22 @@ def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp end_date=date(2024, 1, 1), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, ) - mf = MassTransferFilter.objects.create(owner=user, name="CT Filter", modality="CT") - job.filters.add(mf) - task = MassTransferTask.objects.create( + # Create a processing task for study-1 + task1 = MassTransferTask.objects.create( job=job, source=source, + task_type=MassTransferTask.TaskType.PROCESSING, partition_start=timezone.now(), partition_end=timezone.now(), partition_key="20240101", + study_instance_uid="study-1", + patient_id="PAT1", ) - volume1 = MassTransferVolume.objects.create( + MassTransferVolume.objects.create( job=job, + task=task1, partition_key="20240101", study_instance_uid="study-1", series_instance_uid="series-1", @@ -108,8 +112,9 @@ def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp series_number=1, study_datetime=timezone.now(), ) - volume2 = MassTransferVolume.objects.create( + MassTransferVolume.objects.create( job=job, + task=task1, partition_key="20240101", study_instance_uid="study-1", series_instance_uid="series-2", @@ -119,24 +124,8 @@ def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp series_number=2, study_datetime=timezone.now(), ) - volume3 = MassTransferVolume.objects.create( - job=job, - partition_key="20240101", - study_instance_uid="study-2", - series_instance_uid="series-3", - modality="CT", - study_description="", - series_description="C", - series_number=3, - study_datetime=timezone.now(), - ) - processor = MassTransferTaskProcessor(task) - mocker.patch.object( - processor, - "_find_volumes", - return_value=[volume1, volume2, volume3], - ) + processor = MassTransferTaskProcessor(task1) export_calls: list[tuple[str, str]] = [] @@ -146,26 +135,27 @@ def fake_export(_, volume, __, pseudonym): mocker.patch.object(processor, "_export_volume", side_effect=fake_export) mocker.patch.object(processor, "_convert_volume", return_value=None) - uuid_side_effect = [ - uuid.UUID(int=1), - uuid.UUID(int=2), - ] - mocker.patch("adit.mass_transfer.processors.uuid.uuid4", side_effect=uuid_side_effect) + mocker.patch( + "adit.mass_transfer.processors.uuid.uuid4", + return_value=uuid.UUID(int=1), + ) result = processor.process() pseudonyms_by_series = {series_uid: pseudonym for series_uid, pseudonym in export_calls} + # Both volumes in the same study should share a pseudonym assert pseudonyms_by_series["series-1"] == pseudonyms_by_series["series-2"] - assert pseudonyms_by_series["series-1"] != pseudonyms_by_series["series-3"] + assert pseudonyms_by_series["series-1"] != "" assert result["status"] == MassTransferTask.Status.SUCCESS @pytest.mark.django_db -def test_process_opt_out_skips_pseudonymization( +def test_process_study_opt_out_skips_pseudonymization( mocker: MockerFixture, settings, tmp_path: Path, ): + """When pseudonymize=False, processing task passes empty pseudonym.""" settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") MassTransferSettings.objects.create() @@ -181,19 +171,21 @@ def test_process_opt_out_skips_pseudonymization( partition_granularity=MassTransferJob.PartitionGranularity.DAILY, pseudonymize=False, ) - mf = MassTransferFilter.objects.create(owner=user, name="CT Filter", modality="CT") - job.filters.add(mf) task = MassTransferTask.objects.create( job=job, source=source, + task_type=MassTransferTask.TaskType.PROCESSING, partition_start=timezone.now(), partition_end=timezone.now(), partition_key="20240101", + study_instance_uid="study-1", + patient_id="PATIENT-1", ) - volume = MassTransferVolume.objects.create( + MassTransferVolume.objects.create( job=job, + task=task, partition_key="20240101", patient_id="PATIENT-1", study_instance_uid="study-1", @@ -206,7 +198,6 @@ def test_process_opt_out_skips_pseudonymization( ) processor = MassTransferTaskProcessor(task) - mocker.patch.object(processor, "_find_volumes", return_value=[volume]) export_calls: list[str] = [] From 8f5444af175207f908c3600e1de7f07f6d6db348 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sat, 28 Feb 2026 18:44:16 +0100 Subject: [PATCH 015/103] Collapse two-phase mass transfer into single-phase with dedicated worker One task per partition handles discovery + export + convert on a separate mass_transfer queue. Includes review fixes and comprehensive test suite. --- adit/core/tasks.py | 26 +- adit/mass_transfer/forms.py | 1 - .../migrations/0003_collapse_single_phase.py | 25 + adit/mass_transfer/models.py | 61 +- adit/mass_transfer/processors.py | 183 ++-- adit/mass_transfer/tests/test_cleanup.py | 188 +++- adit/mass_transfer/tests/test_processor.py | 860 +++++++++++++++++- docker-compose.base.yml | 4 + docker-compose.dev.yml | 8 + docker-compose.prod.yml | 11 + 10 files changed, 1165 insertions(+), 202 deletions(-) create mode 100644 adit/mass_transfer/migrations/0003_collapse_single_phase.py diff --git a/adit/core/tasks.py b/adit/core/tasks.py index 9b393c9af..7a5777465 100644 --- a/adit/core/tasks.py +++ b/adit/core/tasks.py @@ -60,7 +60,13 @@ def backup_db(*args, **kwargs): ) -def _run_dicom_task(context: JobContext, model_label: str, task_id: int): +def _run_dicom_task( + context: JobContext, + model_label: str, + task_id: int, + *, + process_timeout: int | None = None, +): assert context.job dicom_task = get_dicom_task(model_label, task_id) @@ -82,7 +88,7 @@ def _run_dicom_task(context: JobContext, model_label: str, task_id: int): logger.info(f"Processing of {dicom_task} started.") - @concurrent.process(timeout=settings.DICOM_TASK_PROCESS_TIMEOUT, daemon=True) + @concurrent.process(timeout=process_timeout, daemon=True) def _process_dicom_task(model_label: str, task_id: int) -> ProcessingResult: dicom_task = get_dicom_task(model_label, task_id) processor = get_dicom_processor(dicom_task) @@ -181,16 +187,20 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: retry=DICOM_TASK_RETRY_STRATEGY, ) def process_dicom_task(context: JobContext, model_label: str, task_id: int): - _run_dicom_task(context, model_label, task_id) + _run_dicom_task( + context, model_label, task_id, process_timeout=settings.DICOM_TASK_PROCESS_TIMEOUT + ) -# Separate task function for mass transfer so Procrastinate can route it -# independently (e.g. to a different queue or with different retry/priority) -# without affecting other transfer types. +# Separate task function for mass transfer on a dedicated queue so it does not +# starve batch/selective transfers. Mass transfer tasks process an entire +# partition (discovery + export + convert) and can run for hours, so the +# pebble process timeout is disabled (process_timeout=None). Individual DICOM +# operations are still protected by Stamina / pynetdicom-level timeouts. @app.task( - queue="dicom", + queue="mass_transfer", pass_context=True, retry=DICOM_TASK_RETRY_STRATEGY, ) def process_mass_transfer_task(context: JobContext, model_label: str, task_id: int): - _run_dicom_task(context, model_label, task_id) + _run_dicom_task(context, model_label, task_id, process_timeout=None) diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 9cab2a58b..1507c6512 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -252,7 +252,6 @@ def _save_tasks(self, job: MassTransferJob) -> None: MassTransferTask( job=job, source=job.source, - task_type=MassTransferTask.TaskType.DISCOVERY, partition_start=partition.start, partition_end=partition.end, partition_key=partition.key, diff --git a/adit/mass_transfer/migrations/0003_collapse_single_phase.py b/adit/mass_transfer/migrations/0003_collapse_single_phase.py new file mode 100644 index 000000000..9980aa186 --- /dev/null +++ b/adit/mass_transfer/migrations/0003_collapse_single_phase.py @@ -0,0 +1,25 @@ +# Generated by Django 5.2.8 on 2026-02-28 11:39 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('mass_transfer', '0002_two_phase_task_type'), + ] + + operations = [ + migrations.RemoveField( + model_name='masstransfertask', + name='patient_id', + ), + migrations.RemoveField( + model_name='masstransfertask', + name='study_instance_uid', + ), + migrations.RemoveField( + model_name='masstransfertask', + name='task_type', + ), + ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index 231185294..67de2bfb0 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -90,32 +90,14 @@ def get_absolute_url(self): return reverse("mass_transfer_job_detail", args=[self.pk]) def queue_pending_tasks(self): - """Queues pending tasks using two-phase scheduling. - - When pending discovery tasks exist, only those are enqueued (they will - create and enqueue processing tasks when they run). Otherwise, pending - processing tasks are enqueued directly (e.g. on resume after cancel). - """ + """Queues all pending mass transfer tasks.""" assert self.status == DicomJob.Status.PENDING priority = self.default_priority if self.urgent: priority = self.urgent_priority - pending_discovery = self.tasks.filter( - status=DicomTask.Status.PENDING, - task_type=MassTransferTask.TaskType.DISCOVERY, - ) - - if pending_discovery.exists(): - tasks_to_enqueue = pending_discovery - else: - tasks_to_enqueue = self.tasks.filter( - status=DicomTask.Status.PENDING, - task_type=MassTransferTask.TaskType.PROCESSING, - ) - - for mass_task in tasks_to_enqueue: + for mass_task in self.tasks.filter(status=DicomTask.Status.PENDING): assert mass_task.queued_job is None model_label = get_model_label(mass_task.__class__) @@ -129,25 +111,14 @@ def queue_pending_tasks(self): class MassTransferTask(DicomTask): - class TaskType(models.TextChoices): - DISCOVERY = "discovery", "Discovery" - PROCESSING = "processing", "Processing" - job = models.ForeignKey( MassTransferJob, on_delete=models.CASCADE, related_name="tasks", ) - task_type = models.CharField( - max_length=16, - choices=TaskType.choices, - default=TaskType.DISCOVERY, - ) partition_start = models.DateTimeField() partition_end = models.DateTimeField() partition_key = models.CharField(max_length=64) - study_instance_uid = models.CharField(max_length=128, blank=True, default="") - patient_id = models.CharField(max_length=64, blank=True, default="") volumes: models.QuerySet["MassTransferVolume"] @@ -156,14 +127,18 @@ def get_absolute_url(self): def cleanup_on_failure(self) -> None: """Clean up exported DICOM files when a mass transfer task fails or times out.""" - if self.task_type == self.TaskType.DISCOVERY: - return - volumes = self.volumes.exclude(exported_folder="") for volume in volumes: if volume.status == MassTransferVolume.Status.CONVERTED: continue + # When not converting to NIfTI, EXPORTED is the final state and + # the files live in the destination folder — don't delete them. + if ( + not self.job.convert_to_nifti + and volume.status == MassTransferVolume.Status.EXPORTED + ): + continue export_folder = volume.exported_folder if export_folder: @@ -181,24 +156,6 @@ def cleanup_on_failure(self) -> None: volume.add_log("Export cleaned up after task failure.") volume.save() - def queue_pending_task(self) -> None: - """Queues a mass transfer task in the dicom queue.""" - assert self.status == DicomTask.Status.PENDING - assert self.queued_job is None - - priority = self.job.default_priority - if self.job.urgent: - priority = self.job.urgent_priority - - model_label = get_model_label(self.__class__) - queued_job_id = app.configure_task( - "adit.core.tasks.process_mass_transfer_task", - allow_unknown=False, - priority=priority, - ).defer(model_label=model_label, task_id=self.pk) - self.queued_job_id = queued_job_id - self.save() - class MassTransferVolume(models.Model): class Status(models.TextChoices): diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 607a595c8..86eac6fe2 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -12,7 +12,7 @@ from django.utils import timezone from pydicom import Dataset -from adit.core.errors import DicomError +from adit.core.errors import DicomError, RetriableDicomError from adit.core.models import DicomNode, DicomTask from adit.core.processors import DicomTaskProcessor from adit.core.utils.dicom_dataset import QueryDataset, ResultDataset @@ -106,18 +106,14 @@ def process(self): "log": "Task skipped because the mass transfer app is suspended.", } - if self.mass_task.task_type == MassTransferTask.TaskType.DISCOVERY: - return self._process_discovery() - else: - return self._process_study() - - def _process_discovery(self): - """Phase 1: Query PACS, create volumes, spawn per-study processing tasks.""" job = self.mass_task.job source_node = job.source + destination_node = job.destination if source_node.node_type != DicomNode.NodeType.SERVER: raise DicomError("Mass transfer source must be a DICOM server.") + if destination_node.node_type != DicomNode.NodeType.FOLDER: + raise DicomError("Mass transfer destination must be a DICOM folder.") filters = list(job.filters.all()) if not filters: @@ -127,140 +123,93 @@ def _process_discovery(self): "log": "Mass transfer requires at least one filter.", } + # Discovery: query PACS and create volume records operator = DicomOperator(source_node.dicomserver) volumes = self._find_volumes(operator, filters) - # Group volumes by study + # Link all discovered volumes to this task (for cleanup_on_failure) + for volume in volumes: + if volume.task_id != self.mass_task.pk: + volume.task = self.mass_task + volume.save(update_fields=["task"]) + + # Group volumes by study for pseudonymization — all series in a study + # must share the same pseudonym so the data stays linked. volumes_by_study: dict[str, list[MassTransferVolume]] = {} for volume in volumes: volumes_by_study.setdefault(volume.study_instance_uid, []).append(volume) - # Create one processing task per study and link volumes to it - tasks_created = 0 - tasks_reused = 0 + export_base = _export_base_dir() + output_base = _destination_base_dir(destination_node) + + # The "done" status depends on whether NIfTI conversion is enabled: + # CONVERTED when converting, EXPORTED when exporting DICOM only. + done_status = ( + MassTransferVolume.Status.CONVERTED + if job.convert_to_nifti + else MassTransferVolume.Status.EXPORTED + ) + + total_processed = 0 + total_failed = 0 + for study_uid, study_volumes in volumes_by_study.items(): - patient_id = study_volumes[0].patient_id - - # Check for an existing processing task for this study (e.g. on restart) - existing_task = MassTransferTask.objects.filter( - job=job, - task_type=MassTransferTask.TaskType.PROCESSING, - study_instance_uid=study_uid, - ).first() - - if existing_task is not None: - processing_task = existing_task - tasks_reused += 1 - else: - processing_task = MassTransferTask.objects.create( - job=job, - source=job.source, - task_type=MassTransferTask.TaskType.PROCESSING, - partition_start=self.mass_task.partition_start, - partition_end=self.mass_task.partition_end, - partition_key=self.mass_task.partition_key, - study_instance_uid=study_uid, - patient_id=patient_id, + pseudonym = "" + if job.pseudonymize: + existing_pseudonym = next( + (v.pseudonym for v in study_volumes if v.pseudonym), + None, ) - tasks_created += 1 + pseudonym = existing_pseudonym or uuid.uuid4().hex - # Link volumes to the processing task for volume in study_volumes: - if volume.task_id != processing_task.pk: - volume.task = processing_task - volume.save(update_fields=["task"]) + if volume.status == done_status: + total_processed += 1 + continue - # Enqueue the processing task if it's still pending - is_pending = processing_task.status == DicomTask.Status.PENDING - if is_pending and processing_task.queued_job is None: - processing_task.queue_pending_task() + try: + if job.convert_to_nifti: + self._export_volume(operator, volume, export_base, pseudonym) + self._convert_volume(volume, output_base, pseudonym) + else: + # Export DICOM directly to the destination folder + self._export_volume(operator, volume, output_base, pseudonym) + total_processed += 1 + except RetriableDicomError: + raise # let Procrastinate retry the entire task + except Exception as err: + logger.exception( + "Mass transfer failed for volume %s", volume.series_instance_uid + ) + self._cleanup_export(volume) + volume.status = MassTransferVolume.Status.ERROR + volume.add_log(str(err)) + volume.save() + total_failed += 1 log_lines = [ f"Partition {self.mass_task.partition_key}", f"Studies found: {len(volumes_by_study)}", f"Volumes found: {len(volumes)}", - f"Processing tasks created: {tasks_created}", - f"Processing tasks reused: {tasks_reused}", + f"Processed: {total_processed}", + f"Failed: {total_failed}", ] - return { - "status": MassTransferTask.Status.SUCCESS, - "message": ( - f"Discovery complete: {len(volumes_by_study)} studies, " - f"{len(volumes)} volumes." - ), - "log": "\n".join(log_lines), - } - - def _process_study(self): - """Phase 2: Export + convert all volumes for a single study.""" - job = self.mass_task.job - source_node = job.source - destination_node = job.destination - - if source_node.node_type != DicomNode.NodeType.SERVER: - raise DicomError("Mass transfer source must be a DICOM server.") - if destination_node.node_type != DicomNode.NodeType.FOLDER: - raise DicomError("Mass transfer destination must be a DICOM folder.") - - operator = DicomOperator(source_node.dicomserver) - volumes = list(self.mass_task.volumes.all()) - - if not volumes: - return { - "status": MassTransferTask.Status.SUCCESS, - "message": "No volumes to process.", - "log": f"Study {self.mass_task.study_instance_uid}: no volumes linked.", - } - - export_base = _export_base_dir() - output_base = _destination_base_dir(destination_node) - - pseudonym = "" - if job.pseudonymize: - existing_pseudonym = next( - (v.pseudonym for v in volumes if v.pseudonym), - None, - ) - pseudonym = existing_pseudonym or uuid.uuid4().hex - - converted_count = 0 - failed_count = 0 - - for volume in volumes: - if volume.status == MassTransferVolume.Status.CONVERTED: - continue - - try: - self._export_volume(operator, volume, export_base, pseudonym) - self._convert_volume(volume, output_base, pseudonym) - converted_count += 1 - except Exception as err: - logger.exception( - "Mass transfer failed for volume %s", volume.series_instance_uid - ) - self._cleanup_export(volume) - volume.status = MassTransferVolume.Status.ERROR - volume.add_log(str(err)) - volume.save() - failed_count += 1 - - log_lines = [ - f"Study {self.mass_task.study_instance_uid}", - f"Volumes processed: {len(volumes)}", - f"Converted: {converted_count}", - f"Failed: {failed_count}", - ] - - if failed_count and converted_count: + if total_failed and total_processed: status = MassTransferTask.Status.WARNING message = "Some volumes failed during mass transfer." - elif failed_count and not converted_count: + elif total_failed and not total_processed: status = MassTransferTask.Status.FAILURE message = "All volumes failed during mass transfer." + elif not volumes: + status = MassTransferTask.Status.SUCCESS + message = "No volumes found for this partition." else: status = MassTransferTask.Status.SUCCESS - message = "Mass transfer task completed successfully." + message = ( + f"Mass transfer complete: {len(volumes_by_study)} studies, " + f"{total_processed} volumes processed." + ) return { "status": status, diff --git a/adit/mass_transfer/tests/test_cleanup.py b/adit/mass_transfer/tests/test_cleanup.py index 5f11edbe6..ea971e96d 100644 --- a/adit/mass_transfer/tests/test_cleanup.py +++ b/adit/mass_transfer/tests/test_cleanup.py @@ -14,7 +14,13 @@ @pytest.mark.django_db -def test_cleanup_mass_transfer_exports_on_failure(tmp_path: Path): +def test_cleanup_removes_intermediate_exports_when_converting(tmp_path: Path): + """When convert_to_nifti=True, EXPORTED volumes hold intermediate DICOM files + that should be cleaned up on failure. + + Proves: cleanup_on_failure deletes intermediate DICOM exports and marks + the volume as ERROR when convert_to_nifti is enabled (EXPORTED is not final). + """ MassTransferSettings.objects.create() user = UserFactory.create() @@ -27,16 +33,14 @@ def test_cleanup_mass_transfer_exports_on_failure(tmp_path: Path): start_date=timezone.now().date(), end_date=timezone.now().date(), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + convert_to_nifti=True, ) task = MassTransferTask.objects.create( job=job, source=source, - task_type=MassTransferTask.TaskType.PROCESSING, partition_start=timezone.now(), partition_end=timezone.now(), partition_key="20240101", - study_instance_uid="study-1", - patient_id="PATIENT", ) export_dir = tmp_path / "exports" / "202401" / "PATIENT" / "1-Head" @@ -64,3 +68,179 @@ def test_cleanup_mass_transfer_exports_on_failure(tmp_path: Path): assert not export_dir.exists() assert volume.status == MassTransferVolume.Status.ERROR assert volume.exported_folder == "" + + +@pytest.mark.django_db +def test_cleanup_preserves_exported_volumes_when_not_converting(tmp_path: Path): + """When convert_to_nifti=False, EXPORTED is the final state and the files + live in the destination folder — cleanup should not delete them. + + Proves: cleanup_on_failure preserves destination files and keeps the EXPORTED + status when convert_to_nifti is disabled (EXPORTED is the terminal state). + """ + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=timezone.now().date(), + end_date=timezone.now().date(), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + convert_to_nifti=False, + ) + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + export_dir = tmp_path / "output" / "202401" / "PATIENT" / "1-Head" + export_dir.mkdir(parents=True, exist_ok=True) + + volume = MassTransferVolume.objects.create( + job=job, + task=task, + partition_key="20240101", + patient_id="PATIENT", + study_instance_uid="study-1", + series_instance_uid="series-1", + modality="CT", + study_description="", + series_description="Head", + series_number=1, + study_datetime=timezone.now(), + exported_folder=str(export_dir), + status=MassTransferVolume.Status.EXPORTED, + ) + + task.cleanup_on_failure() + + volume.refresh_from_db() + assert export_dir.exists(), "Exported destination files should be preserved" + assert volume.status == MassTransferVolume.Status.EXPORTED + assert volume.exported_folder == str(export_dir) + + +@pytest.mark.django_db +def test_cleanup_skips_converted_volumes(tmp_path: Path): + """CONVERTED volumes represent fully-processed data in the destination. + + Proves: cleanup_on_failure never touches CONVERTED volumes — their status + stays CONVERTED and their destination files are preserved. + """ + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=timezone.now().date(), + end_date=timezone.now().date(), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + convert_to_nifti=True, + ) + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + # Simulate a CONVERTED volume whose intermediate export folder still exists + export_dir = tmp_path / "exports" / "202401" / "PATIENT" / "1-Head" + export_dir.mkdir(parents=True, exist_ok=True) + + volume = MassTransferVolume.objects.create( + job=job, + task=task, + partition_key="20240101", + patient_id="PATIENT", + study_instance_uid="study-1", + series_instance_uid="series-1", + modality="CT", + study_description="", + series_description="Head", + series_number=1, + study_datetime=timezone.now(), + exported_folder=str(export_dir), + status=MassTransferVolume.Status.CONVERTED, + converted_file=str(tmp_path / "output" / "result.nii.gz"), + ) + + task.cleanup_on_failure() + + volume.refresh_from_db() + # CONVERTED volumes must be left untouched + assert volume.status == MassTransferVolume.Status.CONVERTED + assert volume.exported_folder == str(export_dir) + assert export_dir.exists(), "CONVERTED volume's export folder should not be deleted" + + +@pytest.mark.django_db +def test_cleanup_deletes_pending_volumes_with_partial_export(tmp_path: Path): + """PENDING volumes with an exported_folder represent a mid-export crash. + + Proves: cleanup_on_failure removes the partially-written export folder + and marks the volume as ERROR so it can be re-exported on retry. + """ + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=timezone.now().date(), + end_date=timezone.now().date(), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + convert_to_nifti=True, + ) + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + # A PENDING volume that had its export folder created but fetch_series + # crashed before setting status to EXPORTED + partial_dir = tmp_path / "exports" / "202401" / "PATIENT" / "2-Body" + partial_dir.mkdir(parents=True, exist_ok=True) + # Write a partial file to simulate incomplete download + (partial_dir / "partial.dcm").write_bytes(b"incomplete") + + volume = MassTransferVolume.objects.create( + job=job, + task=task, + partition_key="20240101", + patient_id="PATIENT", + study_instance_uid="study-1", + series_instance_uid="series-2", + modality="CT", + study_description="", + series_description="Body", + series_number=2, + study_datetime=timezone.now(), + exported_folder=str(partial_dir), + status=MassTransferVolume.Status.PENDING, + ) + + task.cleanup_on_failure() + + volume.refresh_from_db() + assert not partial_dir.exists(), "Partial export should be deleted" + assert volume.status == MassTransferVolume.Status.ERROR + assert volume.exported_folder == "" diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 581863ff1..35febb159 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -8,7 +8,8 @@ from pydicom import Dataset from pytest_mock import MockerFixture -from adit.core.errors import DicomError +from adit.core.errors import DicomError, RetriableDicomError +from adit.core.models import DicomNode from adit.core.factories import DicomFolderFactory, DicomServerFactory from adit.core.utils.dicom_dataset import ResultDataset from adit.core.utils.dicom_operator import DicomOperator @@ -19,7 +20,14 @@ MassTransferTask, MassTransferVolume, ) -from adit.mass_transfer.processors import MassTransferTaskProcessor, _volume_path +from adit.mass_transfer.processors import ( + MassTransferTaskProcessor, + _dicom_match, + _parse_int, + _series_folder_name, + _study_datetime, + _volume_path, +) def _make_study(study_uid: str, study_date: str = "20240101") -> ResultDataset: @@ -71,8 +79,8 @@ def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture, s @pytest.mark.django_db -def test_process_study_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp_path: Path): - """Processing task generates the same pseudonym for all volumes in a study.""" +def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp_path: Path): + """All volumes in the same study receive the same pseudonym.""" settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") MassTransferSettings.objects.create() @@ -88,21 +96,17 @@ def test_process_study_groups_pseudonyms_by_study(mocker: MockerFixture, setting partition_granularity=MassTransferJob.PartitionGranularity.DAILY, ) - # Create a processing task for study-1 - task1 = MassTransferTask.objects.create( + task = MassTransferTask.objects.create( job=job, source=source, - task_type=MassTransferTask.TaskType.PROCESSING, partition_start=timezone.now(), partition_end=timezone.now(), partition_key="20240101", - study_instance_uid="study-1", - patient_id="PAT1", ) - MassTransferVolume.objects.create( + vol1 = MassTransferVolume.objects.create( job=job, - task=task1, + task=task, partition_key="20240101", study_instance_uid="study-1", series_instance_uid="series-1", @@ -112,9 +116,9 @@ def test_process_study_groups_pseudonyms_by_study(mocker: MockerFixture, setting series_number=1, study_datetime=timezone.now(), ) - MassTransferVolume.objects.create( + vol2 = MassTransferVolume.objects.create( job=job, - task=task1, + task=task, partition_key="20240101", study_instance_uid="study-1", series_instance_uid="series-2", @@ -125,7 +129,10 @@ def test_process_study_groups_pseudonyms_by_study(mocker: MockerFixture, setting study_datetime=timezone.now(), ) - processor = MassTransferTaskProcessor(task1) + processor = MassTransferTaskProcessor(task) + + # Mock _find_volumes to return pre-created volumes (skip PACS query) + mocker.patch.object(processor, "_find_volumes", return_value=[vol1, vol2]) export_calls: list[tuple[str, str]] = [] @@ -150,12 +157,12 @@ def fake_export(_, volume, __, pseudonym): @pytest.mark.django_db -def test_process_study_opt_out_skips_pseudonymization( +def test_process_opt_out_skips_pseudonymization( mocker: MockerFixture, settings, tmp_path: Path, ): - """When pseudonymize=False, processing task passes empty pseudonym.""" + """When pseudonymize=False, process passes empty pseudonym.""" settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") MassTransferSettings.objects.create() @@ -175,15 +182,12 @@ def test_process_study_opt_out_skips_pseudonymization( task = MassTransferTask.objects.create( job=job, source=source, - task_type=MassTransferTask.TaskType.PROCESSING, partition_start=timezone.now(), partition_end=timezone.now(), partition_key="20240101", - study_instance_uid="study-1", - patient_id="PATIENT-1", ) - MassTransferVolume.objects.create( + vol = MassTransferVolume.objects.create( job=job, task=task, partition_key="20240101", @@ -199,6 +203,9 @@ def test_process_study_opt_out_skips_pseudonymization( processor = MassTransferTaskProcessor(task) + # Mock _find_volumes to return pre-created volume (skip PACS query) + mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + export_calls: list[str] = [] def fake_export(_, __, ___, pseudonym): @@ -386,3 +393,816 @@ def test_find_studies_preserves_order_with_unique_studies(mocker: MockerFixture, # Left-half results come first, then unique right-half additions assert result_uids == ["1.2.1", "1.2.2", "1.2.3"] assert len(result) == 3 + + +# --------------------------------------------------------------------------- +# process() tests (no DB required — fully mocked) +# --------------------------------------------------------------------------- + + +def _make_process_env( + mocker: MockerFixture, + settings, + tmp_path: Path, + *, + convert_to_nifti: bool = False, + pseudonymize: bool = True, +) -> MassTransferTaskProcessor: + """Create a processor with a fully mocked job for testing process().""" + settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + + processor = _make_processor(mocker, settings) + + mock_job = processor.mass_task.job + mock_job.pseudonymize = pseudonymize + mock_job.convert_to_nifti = convert_to_nifti + mock_job.source.node_type = DicomNode.NodeType.SERVER + mock_job.source.dicomserver = mocker.MagicMock() + mock_job.destination.node_type = DicomNode.NodeType.FOLDER + mock_job.destination.dicomfolder.path = str(tmp_path / "output") + mock_job.filters.all.return_value = [_make_filter(mocker)] + + processor.mass_task.pk = 42 + processor.mass_task.partition_key = "20240101" + + mocker.patch.object(processor, "is_suspended", return_value=False) + mocker.patch("adit.mass_transfer.processors.DicomOperator") + + return processor + + +def _make_mock_volume( + mocker: MockerFixture, + *, + study_uid: str = "study-1", + series_uid: str = "series-1", + status: str | None = None, + pseudonym: str = "", + task_id: int | None = None, +) -> MassTransferVolume: + """Create a mock MassTransferVolume for testing process().""" + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.status = status or MassTransferVolume.Status.PENDING + vol.study_instance_uid = study_uid + vol.series_instance_uid = series_uid + vol.pseudonym = pseudonym + vol.task_id = task_id + return vol + + +def test_process_reraises_retriable_dicom_error( + mocker: MockerFixture, settings, tmp_path: Path +): + """RetriableDicomError from _export_volume propagates for Procrastinate retry. + + Proves: RetriableDicomError is not swallowed by the broad except Exception + handler and propagates out of process() so Procrastinate can retry the task. + """ + processor = _make_process_env(mocker, settings, tmp_path) + vol = _make_mock_volume(mocker) + + mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + mocker.patch.object( + processor, + "_export_volume", + side_effect=RetriableDicomError("PACS connection lost"), + ) + mocker.patch.object(processor, "_convert_volume") + + with pytest.raises(RetriableDicomError, match="PACS connection lost"): + processor.process() + + +def test_process_calls_convert_when_enabled( + mocker: MockerFixture, settings, tmp_path: Path +): + """With convert_to_nifti=True, both _export_volume and _convert_volume are called + and the export uses the intermediate export_base directory. + + Proves: When convert_to_nifti is enabled, both export and convert are called, + and the export writes to the intermediate directory (not the final destination). + """ + processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=True) + vol = _make_mock_volume(mocker) + + mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + mock_export = mocker.patch.object(processor, "_export_volume") + mock_convert = mocker.patch.object(processor, "_convert_volume") + + result = processor.process() + + assert mock_export.call_count == 1 + assert mock_convert.call_count == 1 + # Export should use export_base (intermediate dir), not output_base + export_call_base = mock_export.call_args[0][2] + assert "exports" in str(export_call_base) + assert result["status"] == MassTransferTask.Status.SUCCESS + + +def test_process_skips_convert_when_disabled( + mocker: MockerFixture, settings, tmp_path: Path +): + """With convert_to_nifti=False, _convert_volume is not called and export + goes directly to the destination folder. + + Proves: When convert_to_nifti is disabled, _convert_volume is never called + and the export writes directly to the destination folder. + """ + processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=False) + vol = _make_mock_volume(mocker) + + mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + mock_export = mocker.patch.object(processor, "_export_volume") + mock_convert = mocker.patch.object(processor, "_convert_volume") + + result = processor.process() + + assert mock_export.call_count == 1 + assert mock_convert.call_count == 0 + # Export should go directly to output_base (destination) + export_call_base = mock_export.call_args[0][2] + assert "output" in str(export_call_base) + assert result["status"] == MassTransferTask.Status.SUCCESS + + +def test_process_counts_already_done_volumes( + mocker: MockerFixture, settings, tmp_path: Path +): + """Already-processed volumes are counted and skipped on retry. + + Proves: On retry, already-CONVERTED volumes are counted in total_processed + (not silently skipped) and are not re-exported or re-converted. + """ + processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=True) + + vol_done = _make_mock_volume( + mocker, series_uid="s-done", status=MassTransferVolume.Status.CONVERTED + ) + vol_pending = _make_mock_volume(mocker, series_uid="s-pending") + + mocker.patch.object(processor, "_find_volumes", return_value=[vol_done, vol_pending]) + mock_export = mocker.patch.object(processor, "_export_volume") + mock_convert = mocker.patch.object(processor, "_convert_volume") + + result = processor.process() + + # Only the pending volume should be exported/converted + assert mock_export.call_count == 1 + assert mock_convert.call_count == 1 + # Both volumes should be counted as processed (1 already done + 1 new) + assert "Processed: 2" in result["log"] + assert result["status"] == MassTransferTask.Status.SUCCESS + + +def test_process_returns_warning_on_partial_failure( + mocker: MockerFixture, settings, tmp_path: Path +): + """When some volumes fail, the task status is WARNING. + + Proves: Mixed success/failure returns WARNING status with correct processed + and failed counts in the log. + """ + processor = _make_process_env(mocker, settings, tmp_path) + + vol1 = _make_mock_volume(mocker, series_uid="s-1") + vol2 = _make_mock_volume(mocker, series_uid="s-2") + + mocker.patch.object(processor, "_find_volumes", return_value=[vol1, vol2]) + + call_count = {"n": 0} + + def fake_export(op, volume, base, pseudo): + call_count["n"] += 1 + if call_count["n"] == 2: + raise DicomError("Export failed") + + mocker.patch.object(processor, "_export_volume", side_effect=fake_export) + mocker.patch.object(processor, "_convert_volume") + mocker.patch.object(processor, "_cleanup_export") + + result = processor.process() + + assert result["status"] == MassTransferTask.Status.WARNING + assert "Processed: 1" in result["log"] + assert "Failed: 1" in result["log"] + + +# --------------------------------------------------------------------------- +# Resumability tests — verify no re-download after outage +# --------------------------------------------------------------------------- + + +def test_export_volume_skips_fetch_when_already_exported( + mocker: MockerFixture, settings +): + """_export_volume returns immediately for EXPORTED volumes — no PACS fetch. + + Proves: _export_volume short-circuits when status=EXPORTED and exported_folder + is set, so operator.fetch_series is never called — no redundant PACS download. + """ + processor = _make_processor(mocker, settings) + operator = mocker.create_autospec(DicomOperator) + + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.status = MassTransferVolume.Status.EXPORTED + vol.exported_folder = "/tmp/already/exported" + + processor._export_volume(operator, vol, Path("/tmp/base"), "pseudo") + + operator.fetch_series.assert_not_called() + + +def test_convert_volume_skips_when_already_converted( + mocker: MockerFixture, settings +): + """_convert_volume returns immediately for CONVERTED volumes — no dcm2niix. + + Proves: _convert_volume short-circuits when status=CONVERTED and converted_file + is set, so subprocess.run (dcm2niix) is never called — no redundant conversion. + """ + processor = _make_processor(mocker, settings) + + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.status = MassTransferVolume.Status.CONVERTED + vol.converted_file = "/tmp/output/result.nii.gz" + + mock_run = mocker.patch("adit.mass_transfer.processors.subprocess.run") + + processor._convert_volume(vol, Path("/tmp/output"), "pseudo") + + mock_run.assert_not_called() + + +def test_process_resumes_after_outage_without_refetch( + mocker: MockerFixture, settings, tmp_path: Path +): + """After an outage, only PENDING volumes trigger a PACS fetch. + + Simulates a crash-and-resume where the task has three volumes in different + states: + - PENDING: needs full processing (export + convert) + - EXPORTED: export finished before crash, needs conversion only + - CONVERTED: fully done, skip entirely + + Proves: Full integration — only PENDING triggers fetch_series (1 call). + EXPORTED skips re-download but still proceeds to conversion. CONVERTED is + fully skipped. All 3 volumes are counted as processed. + """ + processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=True) + + vol_pending = _make_mock_volume(mocker, series_uid="s-pending") + vol_pending.study_datetime = datetime(2024, 1, 15, 10, 30) + vol_pending.series_number = 1 + vol_pending.series_description = "Head" + vol_pending.patient_id = "PATIENT1" + + vol_exported = _make_mock_volume( + mocker, series_uid="s-exported", status=MassTransferVolume.Status.EXPORTED + ) + vol_exported.exported_folder = str(tmp_path / "already_exported") + + vol_converted = _make_mock_volume( + mocker, series_uid="s-converted", status=MassTransferVolume.Status.CONVERTED + ) + + mocker.patch.object( + processor, + "_find_volumes", + return_value=[vol_pending, vol_exported, vol_converted], + ) + # Don't mock _export_volume — let the real early-return guard run + mocker.patch("adit.mass_transfer.processors.DicomManipulator") + mock_convert = mocker.patch.object(processor, "_convert_volume") + mocker.patch( + "adit.mass_transfer.processors.uuid.uuid4", + return_value=uuid.UUID(int=42), + ) + + result = processor.process() + + # Get the mock operator that process() instantiated + import adit.mass_transfer.processors as _proc + + mock_operator = _proc.DicomOperator.return_value + + # Only the PENDING volume should trigger a PACS fetch + assert mock_operator.fetch_series.call_count == 1 + assert mock_operator.fetch_series.call_args.kwargs["series_uid"] == "s-pending" + + # Conversion should run for PENDING + EXPORTED, not CONVERTED + assert mock_convert.call_count == 2 + + # All 3 volumes counted as processed + assert "Processed: 3" in result["log"] + assert result["status"] == MassTransferTask.Status.SUCCESS + + +# --------------------------------------------------------------------------- +# HIGH: Pseudonym reuse on retry +# --------------------------------------------------------------------------- + + +def test_process_reuses_existing_pseudonym_on_retry( + mocker: MockerFixture, settings, tmp_path: Path +): + """On retry, volumes that already have a pseudonym from a prior run are reused. + + Proves: When a study has a volume with an existing pseudonym (set during a + previous partial run), process() reuses that pseudonym instead of generating + a new one — preserving data linkage between series in the same study. + """ + processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=False) + + # vol_done was exported in a prior run and has a pseudonym + vol_done = _make_mock_volume( + mocker, + series_uid="s-done", + status=MassTransferVolume.Status.EXPORTED, + pseudonym="existing-pseudo-abc", + ) + # vol_pending is in the same study but wasn't exported yet + vol_pending = _make_mock_volume(mocker, series_uid="s-pending", pseudonym="") + + mocker.patch.object( + processor, "_find_volumes", return_value=[vol_done, vol_pending] + ) + + export_calls: list[tuple[str, str]] = [] + + def fake_export(op, volume, base, pseudonym): + export_calls.append((volume.series_instance_uid, pseudonym)) + + mocker.patch.object(processor, "_export_volume", side_effect=fake_export) + + # Should NOT be called — uuid should never be generated + mock_uuid = mocker.patch( + "adit.mass_transfer.processors.uuid.uuid4", + return_value=uuid.UUID(int=99), + ) + + result = processor.process() + + # The pending volume should receive the existing pseudonym, not a new one + assert len(export_calls) == 1 + assert export_calls[0] == ("s-pending", "existing-pseudo-abc") + mock_uuid.assert_not_called() + assert result["status"] == MassTransferTask.Status.SUCCESS + + +# --------------------------------------------------------------------------- +# HIGH: done_status=EXPORTED when convert_to_nifti=False +# --------------------------------------------------------------------------- + + +def test_process_counts_exported_as_done_when_not_converting( + mocker: MockerFixture, settings, tmp_path: Path +): + """With convert_to_nifti=False, EXPORTED is the terminal state. + + Proves: Already-EXPORTED volumes are counted as done (not re-exported) + when convert_to_nifti is disabled. The done_status logic correctly uses + EXPORTED instead of CONVERTED. + """ + processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=False) + + vol_done = _make_mock_volume( + mocker, series_uid="s-done", status=MassTransferVolume.Status.EXPORTED + ) + vol_pending = _make_mock_volume(mocker, series_uid="s-pending") + + mocker.patch.object( + processor, "_find_volumes", return_value=[vol_done, vol_pending] + ) + mock_export = mocker.patch.object(processor, "_export_volume") + mock_convert = mocker.patch.object(processor, "_convert_volume") + + result = processor.process() + + # Only the pending volume should be exported + assert mock_export.call_count == 1 + assert mock_convert.call_count == 0 + # Both volumes should be counted as processed + assert "Processed: 2" in result["log"] + assert result["status"] == MassTransferTask.Status.SUCCESS + + +# --------------------------------------------------------------------------- +# HIGH: All-fail → FAILURE +# --------------------------------------------------------------------------- + + +def test_process_returns_failure_when_all_volumes_fail( + mocker: MockerFixture, settings, tmp_path: Path +): + """When every volume fails, the task status is FAILURE. + + Proves: The all-fail branch (total_failed > 0, total_processed == 0) + returns FAILURE status, distinguishing it from partial failure (WARNING). + """ + processor = _make_process_env(mocker, settings, tmp_path) + + vol1 = _make_mock_volume(mocker, series_uid="s-1") + vol2 = _make_mock_volume(mocker, series_uid="s-2") + + mocker.patch.object(processor, "_find_volumes", return_value=[vol1, vol2]) + mocker.patch.object( + processor, "_export_volume", side_effect=DicomError("PACS down") + ) + mocker.patch.object(processor, "_cleanup_export") + + result = processor.process() + + assert result["status"] == MassTransferTask.Status.FAILURE + assert "Processed: 0" in result["log"] + assert "Failed: 2" in result["log"] + + +# --------------------------------------------------------------------------- +# MEDIUM: process() early guards +# --------------------------------------------------------------------------- + + +def test_process_returns_warning_when_suspended( + mocker: MockerFixture, settings, tmp_path: Path +): + """When the mass transfer app is suspended, process() returns WARNING. + + Proves: The suspended guard fires before any PACS interaction and returns + a WARNING so the task can be retried later without being marked as failed. + """ + processor = _make_process_env(mocker, settings, tmp_path) + # Override the is_suspended mock from _make_process_env + mocker.patch.object(processor, "is_suspended", return_value=True) + + result = processor.process() + + assert result["status"] == MassTransferTask.Status.WARNING + assert "suspended" in result["log"].lower() + + +def test_process_raises_when_source_not_server( + mocker: MockerFixture, settings, tmp_path: Path +): + """Source must be a DICOM server. + + Proves: process() raises DicomError with a clear message when the source + node is not a SERVER, before any volumes are processed. + """ + processor = _make_process_env(mocker, settings, tmp_path) + processor.mass_task.job.source.node_type = DicomNode.NodeType.FOLDER + + with pytest.raises(DicomError, match="source must be a DICOM server"): + processor.process() + + +def test_process_raises_when_destination_not_folder( + mocker: MockerFixture, settings, tmp_path: Path +): + """Destination must be a DICOM folder. + + Proves: process() raises DicomError with a clear message when the destination + node is not a FOLDER, before any volumes are processed. + """ + processor = _make_process_env(mocker, settings, tmp_path) + processor.mass_task.job.destination.node_type = DicomNode.NodeType.SERVER + + with pytest.raises(DicomError, match="destination must be a DICOM folder"): + processor.process() + + +def test_process_returns_failure_when_no_filters( + mocker: MockerFixture, settings, tmp_path: Path +): + """When no filters are configured, process() returns FAILURE. + + Proves: The no-filters guard returns FAILURE with a clear message instead + of silently succeeding with zero volumes. + """ + processor = _make_process_env(mocker, settings, tmp_path) + processor.mass_task.job.filters.all.return_value = [] + + result = processor.process() + + assert result["status"] == MassTransferTask.Status.FAILURE + assert "filter" in result["log"].lower() + + +def test_process_returns_success_for_empty_partition( + mocker: MockerFixture, settings, tmp_path: Path +): + """When no volumes are found, process() returns SUCCESS. + + Proves: An empty partition is a legitimate outcome (not an error). The task + reports SUCCESS with a "No volumes found" message. + """ + processor = _make_process_env(mocker, settings, tmp_path) + mocker.patch.object(processor, "_find_volumes", return_value=[]) + + result = processor.process() + + assert result["status"] == MassTransferTask.Status.SUCCESS + assert "No volumes found" in result["message"] + + +# --------------------------------------------------------------------------- +# MEDIUM: _convert_volume error cases +# --------------------------------------------------------------------------- + + +def test_convert_volume_raises_when_no_exported_folder( + mocker: MockerFixture, settings +): + """_convert_volume raises DicomError when exported_folder is empty. + + Proves: The guard at the top of _convert_volume catches a missing + exported_folder and raises a clear DicomError instead of passing garbage + to dcm2niix. + """ + processor = _make_processor(mocker, settings) + + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.status = MassTransferVolume.Status.EXPORTED + vol.exported_folder = "" + vol.converted_file = "" + + with pytest.raises(DicomError, match="Missing exported folder"): + processor._convert_volume(vol, Path("/tmp/output"), "pseudo") + + +def test_convert_volume_raises_on_dcm2niix_failure( + mocker: MockerFixture, settings, tmp_path: Path +): + """_convert_volume raises DicomError when dcm2niix returns non-zero. + + Proves: A dcm2niix crash produces a clear DicomError with stderr content, + not a silent pass or uncaught exception. + """ + processor = _make_processor(mocker, settings) + + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.status = MassTransferVolume.Status.EXPORTED + vol.exported_folder = str(tmp_path / "dicom_input") + vol.converted_file = "" + vol.series_instance_uid = "1.2.3" + vol.series_number = 1 + vol.series_description = "Head" + vol.pseudonym = "pseudo" + vol.patient_id = "PAT1" + vol.study_datetime = datetime(2024, 1, 15, 10, 30) + + mock_result = mocker.MagicMock() + mock_result.returncode = 1 + mock_result.stderr = "Segmentation fault" + mocker.patch( + "adit.mass_transfer.processors.subprocess.run", return_value=mock_result + ) + + with pytest.raises(DicomError, match="Conversion failed"): + processor._convert_volume(vol, tmp_path / "output", "pseudo") + + +def test_convert_volume_raises_when_no_nifti_output( + mocker: MockerFixture, settings, tmp_path: Path +): + """_convert_volume raises DicomError when dcm2niix produces no .nii.gz files. + + Proves: A successful dcm2niix run that produces no output files is caught + and raises a clear DicomError instead of silently writing empty metadata. + """ + processor = _make_processor(mocker, settings) + + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.status = MassTransferVolume.Status.EXPORTED + vol.exported_folder = str(tmp_path / "dicom_input") + vol.converted_file = "" + vol.series_instance_uid = "1.2.3" + vol.series_number = 1 + vol.series_description = "Head" + vol.pseudonym = "pseudo" + vol.patient_id = "PAT1" + vol.study_datetime = datetime(2024, 1, 15, 10, 30) + + mock_result = mocker.MagicMock() + mock_result.returncode = 0 + mock_result.stderr = "" + mocker.patch( + "adit.mass_transfer.processors.subprocess.run", return_value=mock_result + ) + + with pytest.raises(DicomError, match="no .nii.gz files"): + processor._convert_volume(vol, tmp_path / "output", "pseudo") + + +# --------------------------------------------------------------------------- +# MEDIUM: _cleanup_export tests +# --------------------------------------------------------------------------- + + +def test_cleanup_export_sets_export_cleaned_flag( + mocker: MockerFixture, settings, tmp_path: Path +): + """_cleanup_export removes the folder and sets export_cleaned=True. + + Proves: On success, the export folder is deleted and export_cleaned is + set so the cleanup is not attempted again on a subsequent call. + """ + processor = _make_processor(mocker, settings) + + export_dir = tmp_path / "exports" / "202401" / "PATIENT" / "1-Head" + export_dir.mkdir(parents=True) + + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.exported_folder = str(export_dir) + vol.export_cleaned = False + + processor._cleanup_export(vol) + + assert not export_dir.exists() + assert vol.export_cleaned is True + vol.save.assert_called() + + +def test_cleanup_export_skips_when_already_cleaned( + mocker: MockerFixture, settings +): + """_cleanup_export is a no-op when export_cleaned is already True. + + Proves: The already-cleaned guard prevents redundant rmtree calls, + avoiding FileNotFoundError on repeated invocations. + """ + processor = _make_processor(mocker, settings) + + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.exported_folder = "/tmp/some/path" + vol.export_cleaned = True + + mock_rmtree = mocker.patch("adit.mass_transfer.processors.shutil.rmtree") + + processor._cleanup_export(vol) + + mock_rmtree.assert_not_called() + vol.save.assert_not_called() + + +def test_cleanup_export_skips_when_no_folder(mocker: MockerFixture, settings): + """_cleanup_export is a no-op when exported_folder is empty. + + Proves: Volumes that were never exported (empty exported_folder) don't + trigger any filesystem operations. + """ + processor = _make_processor(mocker, settings) + + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.exported_folder = "" + vol.export_cleaned = False + + mock_rmtree = mocker.patch("adit.mass_transfer.processors.shutil.rmtree") + + processor._cleanup_export(vol) + + mock_rmtree.assert_not_called() + vol.save.assert_not_called() + + +def test_cleanup_export_handles_file_not_found( + mocker: MockerFixture, settings +): + """_cleanup_export silently passes when the folder is already gone. + + Proves: FileNotFoundError (e.g., another process already cleaned up) is + caught and the volume is still marked as cleaned. + """ + processor = _make_processor(mocker, settings) + + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.exported_folder = "/tmp/already/gone" + vol.export_cleaned = False + + mocker.patch( + "adit.mass_transfer.processors.shutil.rmtree", + side_effect=FileNotFoundError, + ) + + processor._cleanup_export(vol) + + assert vol.export_cleaned is True + vol.save.assert_called() + + +def test_cleanup_export_handles_permission_error( + mocker: MockerFixture, settings +): + """_cleanup_export logs the error and does NOT set export_cleaned on PermissionError. + + Proves: When rmtree fails with a non-FileNotFoundError (e.g., permissions), + the error is logged but the task doesn't crash, and export_cleaned stays + False so cleanup can be reattempted. + """ + processor = _make_processor(mocker, settings) + + vol = mocker.MagicMock(spec=MassTransferVolume) + vol.exported_folder = "/tmp/locked/folder" + vol.export_cleaned = False + + mocker.patch( + "adit.mass_transfer.processors.shutil.rmtree", + side_effect=PermissionError("Access denied"), + ) + + processor._cleanup_export(vol) + + # export_cleaned should NOT be set — cleanup needs to be retried + assert vol.export_cleaned is False + vol.add_log.assert_called_once() + assert "Cleanup failed" in vol.add_log.call_args[0][0] + + +# --------------------------------------------------------------------------- +# LOW: Utility function tests +# --------------------------------------------------------------------------- + + +def test_series_folder_name_with_number_and_description(): + """Proves: Normal case produces '{number}-{description}' format.""" + assert _series_folder_name(1, "Head CT", "1.2.3") == "1-Head CT" + + +def test_series_folder_name_with_no_description(): + """Proves: Missing description falls back to 'Undefined'.""" + assert _series_folder_name(1, "", "1.2.3") == "1-Undefined" + + +def test_series_folder_name_with_no_number(): + """Proves: Missing series_number falls back to the series UID.""" + assert _series_folder_name(None, "Head CT", "1.2.3.4.5") == "1.2.3.4.5" + + +def test_parse_int_normal(): + """Proves: String integers are parsed correctly.""" + assert _parse_int("42") == 42 + + +def test_parse_int_none_returns_default(): + """Proves: None returns the specified default.""" + assert _parse_int(None, default=7) == 7 + + +def test_parse_int_empty_returns_default(): + """Proves: Empty string returns the specified default.""" + assert _parse_int("", default=0) == 0 + + +def test_parse_int_garbage_returns_default(): + """Proves: Non-numeric strings return the default instead of raising.""" + assert _parse_int("abc", default=None) is None + + +def test_study_datetime_with_time(): + """Proves: StudyDate + StudyTime are combined into a datetime.""" + ds = Dataset() + ds.StudyDate = "20240115" + ds.StudyTime = "103000" + result = _study_datetime(ResultDataset(ds)) + assert result == datetime(2024, 1, 15, 10, 30, 0) + + +def test_study_datetime_with_midnight(): + """Proves: StudyTime of "000000" (midnight) is correctly parsed. + + Note: The `if study_time is None` guard in _study_datetime is dead code — + ResultDataset.StudyTime always passes through convert_to_python_time() which + asserts on both None and empty string before the guard can fire. If PACS + returns a study with no StudyTime, the crash happens in the converter, not + in _study_datetime. Consider fixing convert_to_python_time to return + time(0,0) for None/empty, or catching it in _study_datetime. + """ + ds = Dataset() + ds.StudyDate = "20240115" + ds.StudyTime = "000000" + result = _study_datetime(ResultDataset(ds)) + assert result == datetime(2024, 1, 15, 0, 0, 0) + + +def test_dicom_match_empty_pattern_matches_anything(): + """Proves: An empty pattern matches any value (wildcard behavior).""" + assert _dicom_match("", "anything") is True + assert _dicom_match("", None) is True + assert _dicom_match("", "") is True + + +def test_dicom_match_none_value_never_matches(): + """Proves: A non-empty pattern never matches None.""" + assert _dicom_match("CT", None) is False + + +def test_dicom_match_exact(): + """Proves: An exact pattern matches its value.""" + assert _dicom_match("CT", "CT") is True + assert _dicom_match("CT", "MR") is False + + +def test_dicom_match_wildcard(): + """Proves: DICOM wildcard patterns (converted to regex) work correctly.""" + # DICOM uses * as wildcard, which should be converted to regex .* + assert _dicom_match("Head*", "Head CT") is True + assert _dicom_match("Head*", "Foot CT") is False diff --git a/docker-compose.base.yml b/docker-compose.base.yml index 20faab538..3ca80114b 100644 --- a/docker-compose.base.yml +++ b/docker-compose.base.yml @@ -70,6 +70,10 @@ services: <<: *default-app hostname: dicom_worker.local + mass_transfer_worker: + <<: *default-app + hostname: mass_transfer_worker.local + receiver: <<: *default-app hostname: receiver.local diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index beb0511d7..433032ffd 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -64,6 +64,14 @@ services: ./manage.py bg_worker -l debug -q dicom --autoreload " + mass_transfer_worker: + <<: *default-app + command: > + bash -c " + wait-for-it -s postgres.local:5432 -t 60 && + ./manage.py bg_worker -l debug -q mass_transfer --autoreload + " + receiver: <<: *default-app ports: diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index a1ed27bcb..fdc2acbe7 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -78,6 +78,17 @@ services: <<: *deploy replicas: ${DICOM_WORKER_REPLICAS:-3} + mass_transfer_worker: + <<: *default-app + command: > + bash -c " + wait-for-it -s postgres.local:5432 -t 60 && + ./manage.py bg_worker -q mass_transfer + " + deploy: + <<: *deploy + replicas: ${MASS_TRANSFER_WORKER_REPLICAS:-1} + receiver: <<: *default-app ports: From 253a1d3bddd63e3c8157709527e68ec574e1656c Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 23 Feb 2026 13:56:25 +0100 Subject: [PATCH 016/103] Add migration for export_cleaned field on MassTransferVolume --- .../0003_masstransfervolume_export_cleaned.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 adit/mass_transfer/migrations/0003_masstransfervolume_export_cleaned.py diff --git a/adit/mass_transfer/migrations/0003_masstransfervolume_export_cleaned.py b/adit/mass_transfer/migrations/0003_masstransfervolume_export_cleaned.py new file mode 100644 index 000000000..ca635c764 --- /dev/null +++ b/adit/mass_transfer/migrations/0003_masstransfervolume_export_cleaned.py @@ -0,0 +1,16 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("mass_transfer", "0002_two_phase_task_type"), + ] + + operations = [ + migrations.AddField( + model_name="masstransfervolume", + name="export_cleaned", + field=models.BooleanField(default=False), + ), + ] From 268f078231baa967b4e9c4378be236eca26a3c75 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 23 Feb 2026 14:06:16 +0100 Subject: [PATCH 017/103] Include stdout in dcm2niix conversion error messages --- adit/mass_transfer/processors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 86eac6fe2..7de168c93 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -474,8 +474,9 @@ def _convert_volume( result = subprocess.run(cmd, check=False, capture_output=True, text=True) if result.returncode != 0: + output = result.stderr or result.stdout raise DicomError( - f"Conversion failed for series {volume.series_instance_uid}: {result.stderr}" + f"Conversion failed for series {volume.series_instance_uid}: {output}" ) nifti_files = sorted(output_path.glob("*.nii.gz")) From 3c028e857d6aaf592adab613ad234c9139630137 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sat, 28 Feb 2026 20:41:47 +0100 Subject: [PATCH 018/103] Add three-mode anonymization with longitudinal linking --- adit/core/utils/dicom_manipulator.py | 12 +- adit/core/utils/pseudonymizer.py | 5 +- adit/mass_transfer/admin.py | 2 + adit/mass_transfer/forms.py | 13 +- .../0003_masstransfervolume_export_cleaned.py | 16 - .../migrations/0004_anonymization_mode.py | 85 +++ adit/mass_transfer/models.py | 56 +- adit/mass_transfer/processors.py | 84 ++- .../mass_transfer_job_detail.html | 10 +- adit/mass_transfer/tests/test_processor.py | 494 +++++++++++++++++- adit/mass_transfer/urls.py | 6 + adit/mass_transfer/views.py | 38 +- 12 files changed, 772 insertions(+), 49 deletions(-) delete mode 100644 adit/mass_transfer/migrations/0003_masstransfervolume_export_cleaned.py create mode 100644 adit/mass_transfer/migrations/0004_anonymization_mode.py diff --git a/adit/core/utils/dicom_manipulator.py b/adit/core/utils/dicom_manipulator.py index 34cf391e2..521d5dc77 100644 --- a/adit/core/utils/dicom_manipulator.py +++ b/adit/core/utils/dicom_manipulator.py @@ -1,20 +1,18 @@ -from typing import Optional - from pydicom.dataset import Dataset from adit.core.utils.pseudonymizer import Pseudonymizer class DicomManipulator: - def __init__(self): - self.pseudonymizer = Pseudonymizer() + def __init__(self, pseudonymizer: Pseudonymizer | None = None): + self.pseudonymizer = pseudonymizer or Pseudonymizer() def manipulate( self, ds: Dataset, - pseudonym: Optional[str] = None, - trial_protocol_id: Optional[str] = None, - trial_protocol_name: Optional[str] = None, + pseudonym: str | None = None, + trial_protocol_id: str | None = None, + trial_protocol_name: str | None = None, ) -> None: """ Manipulates the DICOM dataset by pseudonymizing and setting trial protocol details. diff --git a/adit/core/utils/pseudonymizer.py b/adit/core/utils/pseudonymizer.py index 3b0a2886b..82513731e 100644 --- a/adit/core/utils/pseudonymizer.py +++ b/adit/core/utils/pseudonymizer.py @@ -9,13 +9,14 @@ class Pseudonymizer: A utility class for pseudonymizing (or anonymizing) DICOM data. """ - def __init__(self) -> None: + def __init__(self, anonymizer: Anonymizer | None = None) -> None: """ Initialize the Pseudonymizer. Sets up the anonymizer instance and configures it to skip specific elements. + If an existing Anonymizer is provided, it will be used instead of creating a new one. """ - self.anonymizer = self._setup_anonymizer() + self.anonymizer = anonymizer or self._setup_anonymizer() def _setup_anonymizer(self) -> Anonymizer: """ diff --git a/adit/mass_transfer/admin.py b/adit/mass_transfer/admin.py index 05ab53e5b..c73237348 100644 --- a/adit/mass_transfer/admin.py +++ b/adit/mass_transfer/admin.py @@ -3,6 +3,7 @@ from adit.core.admin import DicomJobAdmin, DicomTaskAdmin from .models import ( + MassTransferAssociation, MassTransferFilter, MassTransferJob, MassTransferSettings, @@ -20,3 +21,4 @@ class MassTransferJobAdmin(DicomJobAdmin): admin.site.register(MassTransferSettings, admin.ModelAdmin) admin.site.register(MassTransferFilter, admin.ModelAdmin) admin.site.register(MassTransferVolume, admin.ModelAdmin) +admin.site.register(MassTransferAssociation, admin.ModelAdmin) diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 1507c6512..9e594bfa2 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -115,7 +115,7 @@ class Meta: "end_date", "partition_granularity", "filters", - "pseudonymize", + "anonymization_mode", "convert_to_nifti", "send_finished_mail", ) @@ -123,15 +123,15 @@ class Meta: "start_date": "Start date", "end_date": "End date", "partition_granularity": "Partition granularity", - "pseudonymize": "Pseudonymize data", + "anonymization_mode": "Anonymization", "convert_to_nifti": "Convert to NIfTI", "send_finished_mail": "Send Email when job is finished", } help_texts = { "partition_granularity": "Daily or weekly partition windows.", - "pseudonymize": ( - "When disabled, patient identifiers are preserved and output folders use " - "Patient ID." + "anonymization_mode": ( + "No anonymization preserves all identifiers. Pseudonymize replaces them. " + "Pseudonymize with linking also exports a mapping CSV." ), "convert_to_nifti": ( "When enabled, exported DICOM series are converted to NIfTI format " @@ -141,6 +141,7 @@ class Meta: widgets = { "start_date": forms.DateInput(attrs={"type": "date"}), "end_date": forms.DateInput(attrs={"type": "date"}), + "anonymization_mode": forms.RadioSelect, } def __init__(self, *args, **kwargs): @@ -183,7 +184,7 @@ def __init__(self, *args, **kwargs): ), Row( Column(Field("partition_granularity"), css_class="col-md-6"), - Column(Field("pseudonymize"), css_class="col-md-6"), + Column(Field("anonymization_mode"), css_class="col-md-6"), css_class="g-3", ), Row( diff --git a/adit/mass_transfer/migrations/0003_masstransfervolume_export_cleaned.py b/adit/mass_transfer/migrations/0003_masstransfervolume_export_cleaned.py deleted file mode 100644 index ca635c764..000000000 --- a/adit/mass_transfer/migrations/0003_masstransfervolume_export_cleaned.py +++ /dev/null @@ -1,16 +0,0 @@ -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("mass_transfer", "0002_two_phase_task_type"), - ] - - operations = [ - migrations.AddField( - model_name="masstransfervolume", - name="export_cleaned", - field=models.BooleanField(default=False), - ), - ] diff --git a/adit/mass_transfer/migrations/0004_anonymization_mode.py b/adit/mass_transfer/migrations/0004_anonymization_mode.py new file mode 100644 index 000000000..fe247bf20 --- /dev/null +++ b/adit/mass_transfer/migrations/0004_anonymization_mode.py @@ -0,0 +1,85 @@ +import django.db.models.deletion +from django.db import migrations, models + + +def migrate_pseudonymize_to_anonymization_mode(apps, schema_editor): + MassTransferJob = apps.get_model("mass_transfer", "MassTransferJob") + MassTransferJob.objects.filter(pseudonymize=True).update(anonymization_mode="pseudonymize") + MassTransferJob.objects.filter(pseudonymize=False).update(anonymization_mode="none") + + +class Migration(migrations.Migration): + dependencies = [ + ("mass_transfer", "0003_collapse_single_phase"), + ] + + operations = [ + migrations.AddField( + model_name="masstransferjob", + name="anonymization_mode", + field=models.CharField( + choices=[ + ("none", "No anonymization"), + ("pseudonymize", "Pseudonymize"), + ("pseudonymize_with_linking", "Pseudonymize with linking"), + ], + default="pseudonymize", + max_length=32, + ), + ), + migrations.RunPython( + migrate_pseudonymize_to_anonymization_mode, + migrations.RunPython.noop, + ), + migrations.RemoveField( + model_name="masstransferjob", + name="pseudonymize", + ), + migrations.CreateModel( + name="MassTransferAssociation", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("pseudonym", models.CharField(max_length=64)), + ("patient_id", models.CharField(max_length=64)), + ("original_study_instance_uid", models.CharField(max_length=128)), + ("pseudonymized_study_instance_uid", models.CharField(max_length=128)), + ("created", models.DateTimeField(auto_now_add=True)), + ( + "job", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="associations", + to="mass_transfer.masstransferjob", + ), + ), + ( + "task", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="associations", + to="mass_transfer.masstransfertask", + ), + ), + ], + options={ + "ordering": ("id",), + }, + ), + migrations.AddConstraint( + model_name="masstransferassociation", + constraint=models.UniqueConstraint( + fields=("job", "original_study_instance_uid"), + name="mass_transfer_unique_association_per_study", + ), + ), + ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index 67de2bfb0..a3b3641ba 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -68,6 +68,11 @@ class PartitionGranularity(models.TextChoices): DAILY = "daily", "Daily" WEEKLY = "weekly", "Weekly" + class AnonymizationMode(models.TextChoices): + NONE = "none", "No anonymization" + PSEUDONYMIZE = "pseudonymize", "Pseudonymize" + PSEUDONYMIZE_WITH_LINKING = "pseudonymize_with_linking", "Pseudonymize with linking" + default_priority = settings.MASS_TRANSFER_DEFAULT_PRIORITY urgent_priority = settings.MASS_TRANSFER_URGENT_PRIORITY @@ -80,10 +85,22 @@ class PartitionGranularity(models.TextChoices): choices=PartitionGranularity.choices, default=PartitionGranularity.DAILY, ) - pseudonymize = models.BooleanField(default=True) + anonymization_mode = models.CharField( + max_length=32, + choices=AnonymizationMode.choices, + default=AnonymizationMode.PSEUDONYMIZE, + ) filters = models.ManyToManyField(MassTransferFilter, related_name="jobs", blank=True) + @property + def should_pseudonymize(self) -> bool: + return self.anonymization_mode != self.AnonymizationMode.NONE + + @property + def should_link(self) -> bool: + return self.anonymization_mode == self.AnonymizationMode.PSEUDONYMIZE_WITH_LINKING + tasks: models.QuerySet["MassTransferTask"] def get_absolute_url(self): @@ -214,3 +231,40 @@ def add_log(self, msg: str) -> None: if self.log: self.log += "\n" self.log += msg + + +class MassTransferAssociation(models.Model): + """Maps original DICOM UIDs to their pseudonymized counterparts for longitudinal linking.""" + + job = models.ForeignKey( + MassTransferJob, + on_delete=models.CASCADE, + related_name="associations", + ) + task = models.ForeignKey( + MassTransferTask, + on_delete=models.SET_NULL, + null=True, + blank=True, + related_name="associations", + ) + pseudonym = models.CharField(max_length=64) + patient_id = models.CharField(max_length=64) + original_study_instance_uid = models.CharField(max_length=128) + pseudonymized_study_instance_uid = models.CharField(max_length=128) + created = models.DateTimeField(auto_now_add=True) + + class Meta: + ordering = ("id",) + constraints = [ + models.UniqueConstraint( + fields=["job", "original_study_instance_uid"], + name="mass_transfer_unique_association_per_study", + ) + ] + + def __str__(self) -> str: + return ( + f"MassTransferAssociation {self.original_study_instance_uid} " + f"-> {self.pseudonymized_study_instance_uid}" + ) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 7de168c93..743477f91 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -11,6 +11,7 @@ from django.conf import settings from django.utils import timezone from pydicom import Dataset +from pydicom.uid import ExplicitVRLittleEndian from adit.core.errors import DicomError, RetriableDicomError from adit.core.models import DicomNode, DicomTask @@ -19,9 +20,16 @@ from adit.core.utils.dicom_manipulator import DicomManipulator from adit.core.utils.dicom_operator import DicomOperator from adit.core.utils.dicom_utils import convert_to_python_regex, write_dataset +from adit.core.utils.pseudonymizer import Pseudonymizer from adit.core.utils.sanitize import sanitize_filename -from .models import MassTransferFilter, MassTransferSettings, MassTransferTask, MassTransferVolume +from .models import ( + MassTransferAssociation, + MassTransferFilter, + MassTransferSettings, + MassTransferTask, + MassTransferVolume, +) logger = logging.getLogger(__name__) @@ -155,12 +163,17 @@ def process(self): for study_uid, study_volumes in volumes_by_study.items(): pseudonym = "" - if job.pseudonymize: + study_pseudonymizer: Pseudonymizer | None = None + + if job.should_pseudonymize: existing_pseudonym = next( (v.pseudonym for v in study_volumes if v.pseudonym), None, ) pseudonym = existing_pseudonym or uuid.uuid4().hex + # One Anonymizer per study: all series in the same study share + # the same Anonymizer so UIDs stay consistent within the study. + study_pseudonymizer = Pseudonymizer() for volume in study_volumes: if volume.status == done_status: @@ -169,11 +182,16 @@ def process(self): try: if job.convert_to_nifti: - self._export_volume(operator, volume, export_base, pseudonym) + self._export_volume( + operator, volume, export_base, pseudonym, + study_pseudonymizer=study_pseudonymizer, + ) self._convert_volume(volume, output_base, pseudonym) else: - # Export DICOM directly to the destination folder - self._export_volume(operator, volume, output_base, pseudonym) + self._export_volume( + operator, volume, output_base, pseudonym, + study_pseudonymizer=study_pseudonymizer, + ) total_processed += 1 except RetriableDicomError: raise # let Procrastinate retry the entire task @@ -187,6 +205,16 @@ def process(self): volume.save() total_failed += 1 + # Record association for longitudinal linking, but only if at + # least one volume in the study was successfully processed. + study_has_success = any( + v.status != MassTransferVolume.Status.ERROR for v in study_volumes + ) + if job.should_link and pseudonym and study_pseudonymizer and study_has_success: + self._create_association( + study_uid, pseudonym, study_volumes, study_pseudonymizer, + ) + log_lines = [ f"Partition {self.mass_task.partition_key}", f"Studies found: {len(volumes_by_study)}", @@ -401,6 +429,8 @@ def _export_volume( volume: MassTransferVolume, export_base: Path, pseudonym: str, + *, + study_pseudonymizer: Pseudonymizer | None = None, ) -> None: if volume.status == MassTransferVolume.Status.EXPORTED and volume.exported_folder: return @@ -417,7 +447,9 @@ def _export_volume( export_path.mkdir(parents=True, exist_ok=True) volume.exported_folder = str(export_path) - manipulator = DicomManipulator() + # Share the study-level Pseudonymizer (and thus Anonymizer) across + # all volumes in the same study. + manipulator = DicomManipulator(pseudonymizer=study_pseudonymizer) def callback(ds: Dataset | None) -> None: if ds is None: @@ -491,6 +523,46 @@ def _convert_volume( self._cleanup_export(volume) + def _create_association( + self, + original_study_uid: str, + pseudonym: str, + study_volumes: list[MassTransferVolume], + study_pseudonymizer: Pseudonymizer, + ) -> None: + """Create a MassTransferAssociation record linking original to pseudonymized UIDs.""" + job = self.mass_task.job + + # Recover the pseudonymized StudyInstanceUID by running a probe dataset + # through the same Anonymizer instance that processed the real data. + # This is deterministic — same input UID always yields the same output. + # NOTE: dicognito's anonymize() walks file_meta, so the probe needs + # a minimal file_meta block to avoid AttributeError. + probe = Dataset() + probe.file_meta = Dataset() + probe.file_meta.TransferSyntaxUID = ExplicitVRLittleEndian + probe.file_meta.MediaStorageSOPClassUID = "1.2.840.10008.5.1.4.1.1.2" + probe.file_meta.MediaStorageSOPInstanceUID = "1.2.3" + probe.StudyInstanceUID = original_study_uid + probe.SOPClassUID = "1.2.840.10008.5.1.4.1.1.2" # CT Image Storage + study_pseudonymizer.anonymizer.anonymize(probe) + pseudonymized_study_uid = str(probe.StudyInstanceUID) + + patient_id = next( + (v.patient_id for v in study_volumes if v.patient_id), "" + ) + + MassTransferAssociation.objects.update_or_create( + job=job, + original_study_instance_uid=original_study_uid, + defaults={ + "task": self.mass_task, + "pseudonym": pseudonym, + "patient_id": patient_id, + "pseudonymized_study_instance_uid": pseudonymized_study_uid, + }, + ) + def _cleanup_export(self, volume: MassTransferVolume) -> None: export_folder = volume.exported_folder if not export_folder or volume.export_cleaned: diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html index 42291778d..051ab5328 100644 --- a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html @@ -10,6 +10,12 @@ {% block heading %} + {% if job.should_link %} + + {% bootstrap_icon "download" %} + Associations CSV + + {% endif %} {% bootstrap_icon "list" %} Job List @@ -37,8 +43,8 @@
{{ job.end_date }}
Granularity
{{ job.get_partition_granularity_display }}
-
Pseudonymization
-
{{ job.pseudonymize|yesno:"Enabled,Disabled" }}
+
Anonymization
+
{{ job.get_anonymization_mode_display }}
Filters
{% if job.filters.all %} diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 35febb159..9d8f01ab4 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -95,6 +95,7 @@ def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp end_date=date(2024, 1, 1), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, ) + job.filters.create(owner=user, name="CT Filter", modality="CT") task = MassTransferTask.objects.create( job=job, @@ -133,10 +134,11 @@ def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp # Mock _find_volumes to return pre-created volumes (skip PACS query) mocker.patch.object(processor, "_find_volumes", return_value=[vol1, vol2]) + mocker.patch("adit.mass_transfer.processors.DicomOperator") export_calls: list[tuple[str, str]] = [] - def fake_export(_, volume, __, pseudonym): + def fake_export(_, volume, __, pseudonym, **kwargs): export_calls.append((volume.series_instance_uid, pseudonym)) mocker.patch.object(processor, "_export_volume", side_effect=fake_export) @@ -162,7 +164,7 @@ def test_process_opt_out_skips_pseudonymization( settings, tmp_path: Path, ): - """When pseudonymize=False, process passes empty pseudonym.""" + """When anonymization_mode=NONE, process passes empty pseudonym.""" settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") MassTransferSettings.objects.create() @@ -176,8 +178,9 @@ def test_process_opt_out_skips_pseudonymization( start_date=date(2024, 1, 1), end_date=date(2024, 1, 1), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - pseudonymize=False, + anonymization_mode=MassTransferJob.AnonymizationMode.NONE, ) + job.filters.create(owner=user, name="CT Filter", modality="CT") task = MassTransferTask.objects.create( job=job, @@ -205,10 +208,11 @@ def test_process_opt_out_skips_pseudonymization( # Mock _find_volumes to return pre-created volume (skip PACS query) mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + mocker.patch("adit.mass_transfer.processors.DicomOperator") export_calls: list[str] = [] - def fake_export(_, __, ___, pseudonym): + def fake_export(_, __, ___, pseudonym, **kwargs): export_calls.append(pseudonym) mocker.patch.object(processor, "_export_volume", side_effect=fake_export) @@ -406,7 +410,7 @@ def _make_process_env( tmp_path: Path, *, convert_to_nifti: bool = False, - pseudonymize: bool = True, + anonymization_mode: str = "pseudonymize", ) -> MassTransferTaskProcessor: """Create a processor with a fully mocked job for testing process().""" settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") @@ -414,7 +418,9 @@ def _make_process_env( processor = _make_processor(mocker, settings) mock_job = processor.mass_task.job - mock_job.pseudonymize = pseudonymize + mock_job.anonymization_mode = anonymization_mode + mock_job.should_pseudonymize = anonymization_mode != "none" + mock_job.should_link = anonymization_mode == "pseudonymize_with_linking" mock_job.convert_to_nifti = convert_to_nifti mock_job.source.node_type = DicomNode.NodeType.SERVER mock_job.source.dicomserver = mocker.MagicMock() @@ -571,7 +577,7 @@ def test_process_returns_warning_on_partial_failure( call_count = {"n": 0} - def fake_export(op, volume, base, pseudo): + def fake_export(op, volume, base, pseudo, **kwargs): call_count["n"] += 1 if call_count["n"] == 2: raise DicomError("Export failed") @@ -729,7 +735,7 @@ def test_process_reuses_existing_pseudonym_on_retry( export_calls: list[tuple[str, str]] = [] - def fake_export(op, volume, base, pseudonym): + def fake_export(op, volume, base, pseudonym, **kwargs): export_calls.append((volume.series_instance_uid, pseudonym)) mocker.patch.object(processor, "_export_volume", side_effect=fake_export) @@ -1206,3 +1212,475 @@ def test_dicom_match_wildcard(): # DICOM uses * as wildcard, which should be converted to regex .* assert _dicom_match("Head*", "Head CT") is True assert _dicom_match("Head*", "Foot CT") is False + + +# --------------------------------------------------------------------------- +# Anonymization mode tests +# --------------------------------------------------------------------------- + + +@pytest.mark.django_db +def test_process_linking_mode_creates_associations( + mocker: MockerFixture, settings, tmp_path: Path +): + """In linking mode, MassTransferAssociation records are created per study.""" + from adit.mass_transfer.models import MassTransferAssociation + + settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + anonymization_mode=MassTransferJob.AnonymizationMode.PSEUDONYMIZE_WITH_LINKING, + ) + job.filters.create(owner=user, name="CT Filter", modality="CT") + + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + vol = MassTransferVolume.objects.create( + job=job, + task=task, + partition_key="20240101", + patient_id="PAT1", + study_instance_uid="1.2.3.4", + series_instance_uid="1.2.3.4.5", + modality="CT", + study_description="", + series_description="Head", + series_number=1, + study_datetime=timezone.now(), + ) + + processor = MassTransferTaskProcessor(task) + mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + mocker.patch("adit.mass_transfer.processors.DicomOperator") + + def fake_export(op, volume, base, pseudonym, **kwargs): + volume.status = MassTransferVolume.Status.EXPORTED + volume.pseudonym = pseudonym + volume.save() + + mocker.patch.object(processor, "_export_volume", side_effect=fake_export) + + mocker.patch( + "adit.mass_transfer.processors.uuid.uuid4", + return_value=uuid.UUID(int=1), + ) + + result = processor.process() + + assocs = MassTransferAssociation.objects.filter(job=job) + assert assocs.count() == 1 + assoc = assocs.first() + assert assoc.original_study_instance_uid == "1.2.3.4" + assert assoc.pseudonym == uuid.UUID(int=1).hex + assert assoc.patient_id == "PAT1" + assert assoc.pseudonymized_study_instance_uid != "" + assert result["status"] == MassTransferTask.Status.SUCCESS + + +@pytest.mark.django_db +def test_process_linking_mode_skips_association_for_failed_study( + mocker: MockerFixture, settings, tmp_path: Path +): + """In linking mode, no association is created if all volumes in a study failed.""" + from adit.mass_transfer.models import MassTransferAssociation + + settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + anonymization_mode=MassTransferJob.AnonymizationMode.PSEUDONYMIZE_WITH_LINKING, + ) + job.filters.create(owner=user, name="CT Filter", modality="CT") + + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + vol = MassTransferVolume.objects.create( + job=job, + task=task, + partition_key="20240101", + patient_id="PAT1", + study_instance_uid="1.2.3.4", + series_instance_uid="1.2.3.4.5", + modality="CT", + study_description="", + series_description="Head", + series_number=1, + study_datetime=timezone.now(), + ) + + processor = MassTransferTaskProcessor(task) + mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + mocker.patch("adit.mass_transfer.processors.DicomOperator") + + def fake_export_failure(op, volume, base, pseudonym, **kwargs): + raise RuntimeError("DICOM export failed") + + mocker.patch.object(processor, "_export_volume", side_effect=fake_export_failure) + + mocker.patch( + "adit.mass_transfer.processors.uuid.uuid4", + return_value=uuid.UUID(int=1), + ) + + result = processor.process() + + assert MassTransferAssociation.objects.filter(job=job).count() == 0 + assert result["status"] == MassTransferTask.Status.FAILURE + + +@pytest.mark.django_db +def test_longitudinal_linking_across_partitions( + mocker: MockerFixture, settings, tmp_path: Path +): + """Prove that linking mode enables longitudinal tracking across an entire job. + + Scenario: + - Partition 1 (Jan 1): PAT1/Study-A (2 series), PAT2/Study-B (1 series) + - Partition 2 (Jan 2): PAT1/Study-C (1 series) + + After processing both partitions: + - 3 association records exist (one per study) + - PAT1 has 2 associations → linkable via patient_id + - PAT2 has 1 association + - The pseudonymized StudyInstanceUID in each association matches + what dicognito actually produced during export (probe-anonymize + consistency) + """ + from adit.mass_transfer.models import MassTransferAssociation + from adit.core.utils.pseudonymizer import Pseudonymizer + + settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 2), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + anonymization_mode=MassTransferJob.AnonymizationMode.PSEUDONYMIZE_WITH_LINKING, + ) + job.filters.create(owner=user, name="CT Filter", modality="CT") + + # --- Partition 1: Jan 1 --- + task1 = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.make_aware(datetime(2024, 1, 1)), + partition_end=timezone.make_aware(datetime(2024, 1, 1, 23, 59, 59)), + partition_key="20240101", + ) + + # PAT1, Study-A, two series + vol_a1 = MassTransferVolume.objects.create( + job=job, task=task1, partition_key="20240101", + patient_id="PAT1", + study_instance_uid="1.2.840.10001.1.1", + series_instance_uid="1.2.840.10001.1.1.1", + modality="CT", study_description="Brain CT", + series_description="Axial", series_number=1, + study_datetime=timezone.make_aware(datetime(2024, 1, 1, 10, 0)), + ) + vol_a2 = MassTransferVolume.objects.create( + job=job, task=task1, partition_key="20240101", + patient_id="PAT1", + study_instance_uid="1.2.840.10001.1.1", + series_instance_uid="1.2.840.10001.1.1.2", + modality="CT", study_description="Brain CT", + series_description="Coronal", series_number=2, + study_datetime=timezone.make_aware(datetime(2024, 1, 1, 10, 0)), + ) + + # PAT2, Study-B, one series + vol_b = MassTransferVolume.objects.create( + job=job, task=task1, partition_key="20240101", + patient_id="PAT2", + study_instance_uid="1.2.840.10002.1.1", + series_instance_uid="1.2.840.10002.1.1.1", + modality="CT", study_description="Chest CT", + series_description="Axial", series_number=1, + study_datetime=timezone.make_aware(datetime(2024, 1, 1, 14, 0)), + ) + + # --- Partition 2: Jan 2 --- + task2 = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.make_aware(datetime(2024, 1, 2)), + partition_end=timezone.make_aware(datetime(2024, 1, 2, 23, 59, 59)), + partition_key="20240102", + ) + + # PAT1 again, Study-C (different study, same patient) + vol_c = MassTransferVolume.objects.create( + job=job, task=task2, partition_key="20240102", + patient_id="PAT1", + study_instance_uid="1.2.840.10001.1.2", + series_instance_uid="1.2.840.10001.1.2.1", + modality="CT", study_description="Follow-up Brain CT", + series_description="Axial", series_number=1, + study_datetime=timezone.make_aware(datetime(2024, 1, 2, 9, 0)), + ) + + # --- Capture pseudonymized UIDs produced during export --- + # Each study's export uses a Pseudonymizer instance. We capture the + # pseudonymized StudyInstanceUID that dicognito actually produces during + # the export callback, keyed by original StudyInstanceUID. + pseudonymized_uids: dict[str, str] = {} + + def fake_export(op, volume, base, pseudonym, *, study_pseudonymizer=None): + """Fake export that actually runs dicognito on a realistic dataset, + so we can capture the pseudonymized StudyInstanceUID.""" + if study_pseudonymizer is not None: + # Build a realistic DICOM dataset and run the pseudonymizer + ds = Dataset() + ds.file_meta = Dataset() + ds.file_meta.TransferSyntaxUID = "1.2.840.10008.1.2.1" + ds.file_meta.MediaStorageSOPClassUID = "1.2.840.10008.5.1.4.1.1.2" + ds.file_meta.MediaStorageSOPInstanceUID = volume.series_instance_uid + ds.StudyInstanceUID = volume.study_instance_uid + ds.SeriesInstanceUID = volume.series_instance_uid + ds.SOPInstanceUID = volume.series_instance_uid + ".1" + ds.SOPClassUID = "1.2.840.10008.5.1.4.1.1.2" + ds.PatientID = volume.patient_id + ds.PatientName = volume.patient_id + ds.StudyDate = "20240101" + ds.StudyTime = "100000" + + study_pseudonymizer.pseudonymize(ds, pseudonym) + + # Capture the pseudonymized StudyInstanceUID (should be the same + # for all series in the same study sharing the same Anonymizer) + pseudonymized_uids[volume.study_instance_uid] = str(ds.StudyInstanceUID) + + volume.pseudonym = pseudonym + volume.status = MassTransferVolume.Status.EXPORTED + volume.save() + + mocker.patch("adit.mass_transfer.processors.DicomOperator") + + # --- Process partition 1 --- + processor1 = MassTransferTaskProcessor(task1) + mocker.patch.object(processor1, "_find_volumes", return_value=[vol_a1, vol_a2, vol_b]) + mocker.patch.object(processor1, "_export_volume", side_effect=fake_export) + + result1 = processor1.process() + assert result1["status"] == MassTransferTask.Status.SUCCESS + + # --- Process partition 2 --- + processor2 = MassTransferTaskProcessor(task2) + mocker.patch.object(processor2, "_find_volumes", return_value=[vol_c]) + mocker.patch.object(processor2, "_export_volume", side_effect=fake_export) + + result2 = processor2.process() + assert result2["status"] == MassTransferTask.Status.SUCCESS + + # --- Verify association records --- + assocs = MassTransferAssociation.objects.filter(job=job).order_by( + "original_study_instance_uid" + ) + assert assocs.count() == 3 # one per study + + assoc_map = {a.original_study_instance_uid: a for a in assocs} + assert set(assoc_map.keys()) == { + "1.2.840.10001.1.1", + "1.2.840.10002.1.1", + "1.2.840.10001.1.2", + } + + # Each association's pseudonymized UID differs from the original + for assoc in assocs: + assert assoc.pseudonymized_study_instance_uid != assoc.original_study_instance_uid + assert assoc.pseudonymized_study_instance_uid != "" + + # Probe-anonymize consistency: the UID in the association table must + # match what dicognito actually produced during export + for orig_uid, assoc in assoc_map.items(): + assert orig_uid in pseudonymized_uids, f"No export captured for {orig_uid}" + assert assoc.pseudonymized_study_instance_uid == pseudonymized_uids[orig_uid], ( + f"Probe UID mismatch for {orig_uid}: " + f"association={assoc.pseudonymized_study_instance_uid}, " + f"export={pseudonymized_uids[orig_uid]}" + ) + + # --- Longitudinal linking via patient_id --- + # PAT1 has studies A and C — we can link them through the association table + pat1_assocs = [a for a in assocs if a.patient_id == "PAT1"] + assert len(pat1_assocs) == 2 + pat1_studies = {a.original_study_instance_uid for a in pat1_assocs} + assert pat1_studies == {"1.2.840.10001.1.1", "1.2.840.10001.1.2"} + + # Their pseudonymized UIDs are different (different studies) + pat1_pseudo_uids = {a.pseudonymized_study_instance_uid for a in pat1_assocs} + assert len(pat1_pseudo_uids) == 2 + + # PAT2 has only study B + pat2_assocs = [a for a in assocs if a.patient_id == "PAT2"] + assert len(pat2_assocs) == 1 + assert pat2_assocs[0].original_study_instance_uid == "1.2.840.10002.1.1" + + # Associations are tied to the correct tasks + assoc_a = assoc_map["1.2.840.10001.1.1"] + assoc_b = assoc_map["1.2.840.10002.1.1"] + assoc_c = assoc_map["1.2.840.10001.1.2"] + assert assoc_a.task_id == task1.pk + assert assoc_b.task_id == task1.pk + assert assoc_c.task_id == task2.pk + + +@pytest.mark.django_db +def test_process_pseudonymize_mode_no_associations( + mocker: MockerFixture, settings, tmp_path: Path +): + """In pseudonymize mode (without linking), no associations are created.""" + from adit.mass_transfer.models import MassTransferAssociation + + settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + anonymization_mode=MassTransferJob.AnonymizationMode.PSEUDONYMIZE, + ) + job.filters.create(owner=user, name="CT Filter", modality="CT") + + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + vol = MassTransferVolume.objects.create( + job=job, + task=task, + partition_key="20240101", + patient_id="PAT1", + study_instance_uid="1.2.3.4", + series_instance_uid="1.2.3.4.5", + modality="CT", + study_description="", + series_description="Head", + series_number=1, + study_datetime=timezone.now(), + ) + + processor = MassTransferTaskProcessor(task) + mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + mocker.patch("adit.mass_transfer.processors.DicomOperator") + + def fake_export(op, volume, base, pseudonym, **kwargs): + volume.status = MassTransferVolume.Status.EXPORTED + volume.pseudonym = pseudonym + volume.save() + + mocker.patch.object(processor, "_export_volume", side_effect=fake_export) + + result = processor.process() + + assert MassTransferAssociation.objects.filter(job=job).count() == 0 + assert result["status"] == MassTransferTask.Status.SUCCESS + + +def test_process_none_mode_skips_pseudonymizer( + mocker: MockerFixture, settings, tmp_path: Path +): + """In 'none' anonymization mode, no pseudonym is generated and no pseudonymizer is created.""" + processor = _make_process_env( + mocker, settings, tmp_path, anonymization_mode="none" + ) + vol = _make_mock_volume(mocker, series_uid="s-1") + + mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + + export_calls: list[tuple[str, object]] = [] + + def fake_export(op, volume, base, pseudonym, **kwargs): + export_calls.append((pseudonym, kwargs.get("study_pseudonymizer"))) + + mocker.patch.object(processor, "_export_volume", side_effect=fake_export) + + result = processor.process() + + assert len(export_calls) == 1 + pseudonym, study_pseudonymizer = export_calls[0] + assert pseudonym == "" + assert study_pseudonymizer is None + assert result["status"] == MassTransferTask.Status.SUCCESS + + +def test_process_pseudonymize_mode_creates_per_study_pseudonymizer( + mocker: MockerFixture, settings, tmp_path: Path +): + """In pseudonymize mode, a Pseudonymizer is created per study and shared across volumes.""" + from adit.core.utils.pseudonymizer import Pseudonymizer + + processor = _make_process_env(mocker, settings, tmp_path) + + vol1 = _make_mock_volume(mocker, study_uid="study-A", series_uid="s-1") + vol2 = _make_mock_volume(mocker, study_uid="study-A", series_uid="s-2") + vol3 = _make_mock_volume(mocker, study_uid="study-B", series_uid="s-3") + + mocker.patch.object(processor, "_find_volumes", return_value=[vol1, vol2, vol3]) + + pseudonymizer_ids: list[int | None] = [] + + def fake_export(op, volume, base, pseudonym, **kwargs): + ps = kwargs.get("study_pseudonymizer") + pseudonymizer_ids.append(id(ps) if ps else None) + + mocker.patch.object(processor, "_export_volume", side_effect=fake_export) + + result = processor.process() + + # Two volumes in study-A share the same Pseudonymizer instance + assert pseudonymizer_ids[0] is not None + assert pseudonymizer_ids[0] == pseudonymizer_ids[1] + # Volume in study-B gets a different Pseudonymizer instance + assert pseudonymizer_ids[2] is not None + assert pseudonymizer_ids[2] != pseudonymizer_ids[0] + assert result["status"] == MassTransferTask.Status.SUCCESS diff --git a/adit/mass_transfer/urls.py b/adit/mass_transfer/urls.py index 45b29ddd7..c4d5d6d7c 100644 --- a/adit/mass_transfer/urls.py +++ b/adit/mass_transfer/urls.py @@ -5,6 +5,7 @@ MassTransferFilterDeleteView, MassTransferFilterListView, MassTransferFilterUpdateView, + MassTransferJobAssociationsExportView, MassTransferJobCancelView, MassTransferJobCreateView, MassTransferJobDeleteView, @@ -46,6 +47,11 @@ path("jobs/", MassTransferJobListView.as_view(), name="mass_transfer_job_list"), path("jobs/new/", MassTransferJobCreateView.as_view(), name="mass_transfer_job_create"), path("jobs//", MassTransferJobDetailView.as_view(), name="mass_transfer_job_detail"), + path( + "jobs//associations/", + MassTransferJobAssociationsExportView.as_view(), + name="mass_transfer_job_associations_export", + ), path( "jobs//delete/", MassTransferJobDeleteView.as_view(), diff --git a/adit/mass_transfer/views.py b/adit/mass_transfer/views.py index 308d41564..0522ae25f 100644 --- a/adit/mass_transfer/views.py +++ b/adit/mass_transfer/views.py @@ -1,9 +1,12 @@ +import csv from typing import Any, cast from adit_radis_shared.common.views import BaseUpdatePreferencesView from django.conf import settings from django.contrib.auth.mixins import LoginRequiredMixin +from django.http import HttpResponse from django.urls import reverse_lazy +from django.views import View from django.views.generic import CreateView, DeleteView, ListView, UpdateView from adit.core.views import ( @@ -25,7 +28,7 @@ from .filters import MassTransferJobFilter, MassTransferTaskFilter from .forms import MassTransferFilterForm, MassTransferJobForm from .mixins import MassTransferLockedMixin -from .models import MassTransferFilter, MassTransferJob, MassTransferTask +from .models import MassTransferAssociation, MassTransferFilter, MassTransferJob, MassTransferTask from .tables import MassTransferJobTable, MassTransferTaskTable MASS_TRANSFER_SOURCE = "mass_transfer_source" @@ -91,6 +94,39 @@ class MassTransferJobDetailView(MassTransferLockedMixin, DicomJobDetailView): template_name = "mass_transfer/mass_transfer_job_detail.html" +class MassTransferJobAssociationsExportView(LoginRequiredMixin, MassTransferLockedMixin, View): + """Streams a CSV of pseudonymization associations for a linking-mode job.""" + + def get(self, request, pk): + if request.user.is_staff: + qs = MassTransferJob.objects.all() + else: + qs = MassTransferJob.objects.filter(owner=request.user) + + job = qs.get(pk=pk) + associations = MassTransferAssociation.objects.filter(job=job).order_by("id") + + response = HttpResponse(content_type="text/csv") + response["Content-Disposition"] = f'attachment; filename="associations_job_{job.pk}.csv"' + + writer = csv.writer(response) + writer.writerow([ + "pseudonym", + "patient_id", + "original_study_instance_uid", + "pseudonymized_study_instance_uid", + ]) + for assoc in associations.iterator(): + writer.writerow([ + assoc.pseudonym, + assoc.patient_id, + assoc.original_study_instance_uid, + assoc.pseudonymized_study_instance_uid, + ]) + + return response + + class MassTransferJobDeleteView(MassTransferLockedMixin, DicomJobDeleteView): model = MassTransferJob success_url = cast(str, reverse_lazy("mass_transfer_job_list")) From 866220777ffcb5a29f5c9ef94e64f2744674188a Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sat, 28 Feb 2026 21:25:33 +0100 Subject: [PATCH 019/103] Fix DIMSE connection leak on abandoned generators and enable job cancellation of in-progress tasks --- adit/core/tasks.py | 5 +++++ adit/core/utils/dimse_connector.py | 8 ++++---- adit/core/views.py | 14 ++++++++++---- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/adit/core/tasks.py b/adit/core/tasks.py index 7a5777465..3bd8dcf38 100644 --- a/adit/core/tasks.py +++ b/adit/core/tasks.py @@ -113,6 +113,11 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: dicom_task.log = result["log"] ensure_db_connection() + except futures.CancelledError: + dicom_task.status = DicomTask.Status.CANCELED + dicom_task.message = "Task was canceled." + ensure_db_connection() + except futures.TimeoutError: dicom_task.message = "Task was aborted due to timeout." dicom_task.status = DicomTask.Status.FAILURE diff --git a/adit/core/utils/dimse_connector.py b/adit/core/utils/dimse_connector.py index 9f8ff9684..66dcab6bf 100644 --- a/adit/core/utils/dimse_connector.py +++ b/adit/core/utils/dimse_connector.py @@ -75,10 +75,9 @@ def gen_wrapper(self: "DimseConnector", *args, **kwargs): except Exception as err: self.abort_connection() raise err - - if opened_connection and self.auto_connect: - self.close_connection() - opened_connection = False + finally: + if opened_connection and self.auto_connect and self.assoc: + self.close_connection() @wraps(func) def func_wrapper(self: "DimseConnector", *args, **kwargs): @@ -205,6 +204,7 @@ def abort_connection(self): if self.assoc: logger.debug("Aborting connection to DICOM server %s.", self.server.ae_title) self.assoc.abort() + self.assoc = None @retry_dimse_find @connect_to_server("C-FIND") diff --git a/adit/core/views.py b/adit/core/views.py index 41bc32b72..934a3a644 100644 --- a/adit/core/views.py +++ b/adit/core/views.py @@ -226,14 +226,20 @@ def post(self, request: AuthenticatedHttpRequest, *args, **kwargs) -> HttpRespon f"Job with ID {job.pk} and status {job.get_status_display()} is not cancelable." ) - tasks = job.tasks.filter(status=DicomTask.Status.PENDING) - for dicom_task in tasks: + pending_tasks = job.tasks.filter(status=DicomTask.Status.PENDING) + for dicom_task in pending_tasks: queued_job_id = dicom_task.queued_job_id if queued_job_id is not None: app.job_manager.cancel_job_by_id(queued_job_id, delete_job=True) - tasks.update(status=DicomTask.Status.CANCELED) + pending_tasks.update(status=DicomTask.Status.CANCELED) - if job.tasks.filter(status=DicomTask.Status.IN_PROGRESS).exists(): + in_progress_tasks = job.tasks.filter(status=DicomTask.Status.IN_PROGRESS) + for dicom_task in in_progress_tasks: + queued_job_id = dicom_task.queued_job_id + if queued_job_id is not None: + app.job_manager.cancel_job_by_id(queued_job_id, abort=True) + + if in_progress_tasks.exists(): job.status = DicomJob.Status.CANCELING else: job.status = DicomJob.Status.CANCELED From 31bd1e345e861e5bd9eb61ad1cc58b0fb66fc12d Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sun, 1 Mar 2026 15:20:22 +0100 Subject: [PATCH 020/103] Remove MassTransferAssociation model, show user failed transfers with actual reason, improve status message and fix previous jobs from not being processed if the worker processing them was dead --- adit/core/tasks.py | 7 +- adit/core/utils/dimse_connector.py | 4 + adit/mass_transfer/admin.py | 2 - .../migrations/0005_skipped_volume_status.py | 18 ++ ..._replace_association_with_volume_fields.py | 26 +++ adit/mass_transfer/models.py | 44 +---- adit/mass_transfer/processors.py | 135 ++++++++------- .../mass_transfer_task_detail.html | 35 +++- .../templatetags/mass_transfer_extras.py | 14 ++ adit/mass_transfer/tests/test_processor.py | 163 ++++++------------ adit/mass_transfer/views.py | 42 ++++- 11 files changed, 270 insertions(+), 220 deletions(-) create mode 100644 adit/mass_transfer/migrations/0005_skipped_volume_status.py create mode 100644 adit/mass_transfer/migrations/0006_replace_association_with_volume_fields.py diff --git a/adit/core/tasks.py b/adit/core/tasks.py index 3bd8dcf38..d3a592f81 100644 --- a/adit/core/tasks.py +++ b/adit/core/tasks.py @@ -70,7 +70,12 @@ def _run_dicom_task( assert context.job dicom_task = get_dicom_task(model_label, task_id) - assert dicom_task.status == DicomTask.Status.PENDING + # The assertion status == PENDING assumed that tasks always arrive fresh, + # but in reality a retried task can arrive in a half-finished state. + # A task may still be IN_PROGRESS if the worker was killed before the + # finally block could update its status. Accept both PENDING and + # IN_PROGRESS so the retry can proceed. + assert dicom_task.status in (DicomTask.Status.PENDING, DicomTask.Status.IN_PROGRESS) # When the first DICOM task of a job is processed then the status of the # job switches from PENDING to IN_PROGRESS diff --git a/adit/core/utils/dimse_connector.py b/adit/core/utils/dimse_connector.py index 66dcab6bf..221874217 100644 --- a/adit/core/utils/dimse_connector.py +++ b/adit/core/utils/dimse_connector.py @@ -76,6 +76,10 @@ def gen_wrapper(self: "DimseConnector", *args, **kwargs): self.abort_connection() raise err finally: + # When a generator is abandoned mid-iteration (e.g. early return from + # a for loop), Python throws GeneratorExit — a BaseException, not an + # Exception. Without finally, close_connection() was skipped and the + # DIMSE association leaked on the PACS side. if opened_connection and self.auto_connect and self.assoc: self.close_connection() diff --git a/adit/mass_transfer/admin.py b/adit/mass_transfer/admin.py index c73237348..05ab53e5b 100644 --- a/adit/mass_transfer/admin.py +++ b/adit/mass_transfer/admin.py @@ -3,7 +3,6 @@ from adit.core.admin import DicomJobAdmin, DicomTaskAdmin from .models import ( - MassTransferAssociation, MassTransferFilter, MassTransferJob, MassTransferSettings, @@ -21,4 +20,3 @@ class MassTransferJobAdmin(DicomJobAdmin): admin.site.register(MassTransferSettings, admin.ModelAdmin) admin.site.register(MassTransferFilter, admin.ModelAdmin) admin.site.register(MassTransferVolume, admin.ModelAdmin) -admin.site.register(MassTransferAssociation, admin.ModelAdmin) diff --git a/adit/mass_transfer/migrations/0005_skipped_volume_status.py b/adit/mass_transfer/migrations/0005_skipped_volume_status.py new file mode 100644 index 000000000..3f23f3423 --- /dev/null +++ b/adit/mass_transfer/migrations/0005_skipped_volume_status.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.8 on 2026-03-01 12:37 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('mass_transfer', '0004_anonymization_mode'), + ] + + operations = [ + migrations.AlterField( + model_name='masstransfervolume', + name='status', + field=models.CharField(choices=[('pending', 'Pending'), ('exported', 'Exported'), ('converted', 'Converted'), ('skipped', 'Skipped'), ('error', 'Error')], default='pending', max_length=16), + ), + ] diff --git a/adit/mass_transfer/migrations/0006_replace_association_with_volume_fields.py b/adit/mass_transfer/migrations/0006_replace_association_with_volume_fields.py new file mode 100644 index 000000000..b0cddefa2 --- /dev/null +++ b/adit/mass_transfer/migrations/0006_replace_association_with_volume_fields.py @@ -0,0 +1,26 @@ +# Generated by Django 5.2.8 on 2026-03-01 13:30 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('mass_transfer', '0005_skipped_volume_status'), + ] + + operations = [ + migrations.AddField( + model_name='masstransfervolume', + name='series_instance_uid_pseudonymized', + field=models.CharField(blank=True, default='', max_length=128), + ), + migrations.AddField( + model_name='masstransfervolume', + name='study_instance_uid_pseudonymized', + field=models.CharField(blank=True, default='', max_length=128), + ), + migrations.DeleteModel( + name='MassTransferAssociation', + ), + ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index a3b3641ba..4a0da32c1 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -147,7 +147,10 @@ def cleanup_on_failure(self) -> None: volumes = self.volumes.exclude(exported_folder="") for volume in volumes: - if volume.status == MassTransferVolume.Status.CONVERTED: + if volume.status in ( + MassTransferVolume.Status.CONVERTED, + MassTransferVolume.Status.SKIPPED, + ): continue # When not converting to NIfTI, EXPORTED is the final state and # the files live in the destination folder — don't delete them. @@ -179,6 +182,7 @@ class Status(models.TextChoices): PENDING = "pending", "Pending" EXPORTED = "exported", "Exported" CONVERTED = "converted", "Converted" + SKIPPED = "skipped", "Skipped" ERROR = "error", "Error" job = models.ForeignKey(MassTransferJob, on_delete=models.CASCADE, related_name="volumes") @@ -209,6 +213,9 @@ class Status(models.TextChoices): export_cleaned = models.BooleanField(default=False) converted_file = models.TextField(blank=True, default="") + study_instance_uid_pseudonymized = models.CharField(max_length=128, blank=True, default="") + series_instance_uid_pseudonymized = models.CharField(max_length=128, blank=True, default="") + status = models.CharField(max_length=16, choices=Status.choices, default=Status.PENDING) log = models.TextField(blank=True, default="") @@ -233,38 +240,3 @@ def add_log(self, msg: str) -> None: self.log += msg -class MassTransferAssociation(models.Model): - """Maps original DICOM UIDs to their pseudonymized counterparts for longitudinal linking.""" - - job = models.ForeignKey( - MassTransferJob, - on_delete=models.CASCADE, - related_name="associations", - ) - task = models.ForeignKey( - MassTransferTask, - on_delete=models.SET_NULL, - null=True, - blank=True, - related_name="associations", - ) - pseudonym = models.CharField(max_length=64) - patient_id = models.CharField(max_length=64) - original_study_instance_uid = models.CharField(max_length=128) - pseudonymized_study_instance_uid = models.CharField(max_length=128) - created = models.DateTimeField(auto_now_add=True) - - class Meta: - ordering = ("id",) - constraints = [ - models.UniqueConstraint( - fields=["job", "original_study_instance_uid"], - name="mass_transfer_unique_association_per_study", - ) - ] - - def __str__(self) -> str: - return ( - f"MassTransferAssociation {self.original_study_instance_uid} " - f"-> {self.pseudonymized_study_instance_uid}" - ) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 743477f91..9b34836ec 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -11,7 +11,6 @@ from django.conf import settings from django.utils import timezone from pydicom import Dataset -from pydicom.uid import ExplicitVRLittleEndian from adit.core.errors import DicomError, RetriableDicomError from adit.core.models import DicomNode, DicomTask @@ -24,7 +23,6 @@ from adit.core.utils.sanitize import sanitize_filename from .models import ( - MassTransferAssociation, MassTransferFilter, MassTransferSettings, MassTransferTask, @@ -45,6 +43,14 @@ def _dicom_match(pattern: str, value: str | None) -> bool: return bool(regex.search(str(value))) +def _short_error_reason(error: str) -> str: + """Extract a short, groupable reason from a volume error message.""" + # Take the last non-empty line — for dcm2niix output this is the + # meaningful summary (e.g. "No valid DICOM images were found"). + lines = [line.strip() for line in error.strip().splitlines() if line.strip()] + return lines[-1] if lines else error + + def _parse_int(value: object, default: int | None = None) -> int | None: try: if value is None or value == "": @@ -159,7 +165,9 @@ def process(self): ) total_processed = 0 + total_skipped = 0 total_failed = 0 + failed_reasons: dict[str, int] = {} for study_uid, study_volumes in volumes_by_study.items(): pseudonym = "" @@ -179,6 +187,9 @@ def process(self): if volume.status == done_status: total_processed += 1 continue + if volume.status == MassTransferVolume.Status.SKIPPED: + total_skipped += 1 + continue try: if job.convert_to_nifti: @@ -192,7 +203,12 @@ def process(self): operator, volume, output_base, pseudonym, study_pseudonymizer=study_pseudonymizer, ) - total_processed += 1 + + # _convert_volume may set SKIPPED for non-image DICOMs + if volume.status == MassTransferVolume.Status.SKIPPED: + total_skipped += 1 + else: + total_processed += 1 except RetriableDicomError: raise # let Procrastinate retry the entire task except Exception as err: @@ -204,39 +220,47 @@ def process(self): volume.add_log(str(err)) volume.save() total_failed += 1 - - # Record association for longitudinal linking, but only if at - # least one volume in the study was successfully processed. - study_has_success = any( - v.status != MassTransferVolume.Status.ERROR for v in study_volumes - ) - if job.should_link and pseudonym and study_pseudonymizer and study_has_success: - self._create_association( - study_uid, pseudonym, study_volumes, study_pseudonymizer, - ) + reason = _short_error_reason(str(err)) + failed_reasons[reason] = failed_reasons.get(reason, 0) + 1 log_lines = [ f"Partition {self.mass_task.partition_key}", f"Studies found: {len(volumes_by_study)}", f"Volumes found: {len(volumes)}", f"Processed: {total_processed}", - f"Failed: {total_failed}", ] - - if total_failed and total_processed: - status = MassTransferTask.Status.WARNING - message = "Some volumes failed during mass transfer." - elif total_failed and not total_processed: - status = MassTransferTask.Status.FAILURE - message = "All volumes failed during mass transfer." - elif not volumes: + if total_skipped: + log_lines.append(f"Skipped: {total_skipped}") + if total_failed: + log_lines.append(f"Failed: {total_failed}") + if failed_reasons: + log_lines.append("Failure reasons:") + for reason, count in failed_reasons.items(): + log_lines.append(f" {count}x {reason}") + + if not volumes: status = MassTransferTask.Status.SUCCESS message = "No volumes found for this partition." + elif total_failed and not total_processed: + status = MassTransferTask.Status.FAILURE + message = f"All {total_failed} volumes failed during mass transfer." else: - status = MassTransferTask.Status.SUCCESS + # Build a unified message: "x studies, y volumes processed (z skipped, w failed)" + parts = [] + if total_skipped: + parts.append(f"{total_skipped} skipped") + if total_failed: + parts.append(f"{total_failed} failed") + suffix = f" ({', '.join(parts)})" if parts else "" + + if total_failed: + status = MassTransferTask.Status.WARNING + else: + status = MassTransferTask.Status.SUCCESS + message = ( - f"Mass transfer complete: {len(volumes_by_study)} studies, " - f"{total_processed} volumes processed." + f"{len(volumes_by_study)} studies, " + f"{total_processed} volumes processed{suffix}." ) return { @@ -451,10 +475,18 @@ def _export_volume( # all volumes in the same study. manipulator = DicomManipulator(pseudonymizer=study_pseudonymizer) + # Capture pseudonymized UIDs from the first image after anonymization. + pseudonymized_study_uid = "" + pseudonymized_series_uid = "" + def callback(ds: Dataset | None) -> None: + nonlocal pseudonymized_study_uid, pseudonymized_series_uid if ds is None: return manipulator.manipulate(ds, pseudonym=pseudonym) + if pseudonym and not pseudonymized_study_uid: + pseudonymized_study_uid = str(ds.StudyInstanceUID) + pseudonymized_series_uid = str(ds.SeriesInstanceUID) file_name = sanitize_filename(f"{ds.SOPInstanceUID}.dcm") write_dataset(ds, export_path / file_name) @@ -466,6 +498,8 @@ def callback(ds: Dataset | None) -> None: ) volume.pseudonym = pseudonym + volume.study_instance_uid_pseudonymized = pseudonymized_study_uid + volume.series_instance_uid_pseudonymized = pseudonymized_series_uid volume.status = MassTransferVolume.Status.EXPORTED volume.save() @@ -505,6 +539,17 @@ def _convert_volume( ] result = subprocess.run(cmd, check=False, capture_output=True, text=True) + combined_output = (result.stdout or "") + (result.stderr or "") + + # dcm2niix returns non-zero when the input contains only non-image + # DICOM objects (structured reports, presentation states, etc.). + # This is not an error — there is simply nothing to convert. + if "No valid DICOM images" in combined_output: + volume.status = MassTransferVolume.Status.SKIPPED + volume.add_log("Non-image DICOM series (skipped by dcm2niix)") + volume.save() + return + if result.returncode != 0: output = result.stderr or result.stdout raise DicomError( @@ -523,46 +568,6 @@ def _convert_volume( self._cleanup_export(volume) - def _create_association( - self, - original_study_uid: str, - pseudonym: str, - study_volumes: list[MassTransferVolume], - study_pseudonymizer: Pseudonymizer, - ) -> None: - """Create a MassTransferAssociation record linking original to pseudonymized UIDs.""" - job = self.mass_task.job - - # Recover the pseudonymized StudyInstanceUID by running a probe dataset - # through the same Anonymizer instance that processed the real data. - # This is deterministic — same input UID always yields the same output. - # NOTE: dicognito's anonymize() walks file_meta, so the probe needs - # a minimal file_meta block to avoid AttributeError. - probe = Dataset() - probe.file_meta = Dataset() - probe.file_meta.TransferSyntaxUID = ExplicitVRLittleEndian - probe.file_meta.MediaStorageSOPClassUID = "1.2.840.10008.5.1.4.1.1.2" - probe.file_meta.MediaStorageSOPInstanceUID = "1.2.3" - probe.StudyInstanceUID = original_study_uid - probe.SOPClassUID = "1.2.840.10008.5.1.4.1.1.2" # CT Image Storage - study_pseudonymizer.anonymizer.anonymize(probe) - pseudonymized_study_uid = str(probe.StudyInstanceUID) - - patient_id = next( - (v.patient_id for v in study_volumes if v.patient_id), "" - ) - - MassTransferAssociation.objects.update_or_create( - job=job, - original_study_instance_uid=original_study_uid, - defaults={ - "task": self.mass_task, - "pseudonym": pseudonym, - "patient_id": patient_id, - "pseudonymized_study_instance_uid": pseudonymized_study_uid, - }, - ) - def _cleanup_export(self, volume: MassTransferVolume) -> None: export_folder = volume.exported_folder if not export_folder or volume.export_cleaned: diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html index 7fc7ad869..2f3b30fdc 100644 --- a/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html @@ -1,6 +1,6 @@ {% extends "mass_transfer/mass_transfer_layout.html" %} {% load dicom_task_status_css_class from core_extras %} -{% load task_control_panel from mass_transfer_extras %} +{% load task_control_panel volume_status_css_class from mass_transfer_extras %} {% block title %} Mass Transfer Task {% endblock title %} @@ -26,5 +26,38 @@
Log
{{ task.log|default:"" }}
+ {% if problem_volumes %} +
Skipped & Failed Volumes
+
+ + + + + + + + + + + + + {% for vol in problem_volumes %} + + + + + + + + + {% endfor %} + +
StatusModalitySeries #Series DescriptionStudy DateReason
+ + {{ vol.get_status_display }} + + {{ vol.modality }}{{ vol.series_number|default:"—" }}{{ vol.series_description|default:"—" }}{{ vol.study_datetime|date:"Y-m-d" }}{{ vol.log|default:"—" }}
+
+ {% endif %} {% task_control_panel %} {% endblock content %} diff --git a/adit/mass_transfer/templatetags/mass_transfer_extras.py b/adit/mass_transfer/templatetags/mass_transfer_extras.py index 559e93832..fd4af6d7a 100644 --- a/adit/mass_transfer/templatetags/mass_transfer_extras.py +++ b/adit/mass_transfer/templatetags/mass_transfer_extras.py @@ -2,9 +2,23 @@ from django.template import Library +from ..models import MassTransferVolume + register = Library() +@register.filter +def volume_status_css_class(status: str) -> str: + css_classes = { + MassTransferVolume.Status.PENDING: "text-secondary", + MassTransferVolume.Status.EXPORTED: "text-info", + MassTransferVolume.Status.CONVERTED: "text-success", + MassTransferVolume.Status.SKIPPED: "text-muted", + MassTransferVolume.Status.ERROR: "text-danger", + } + return css_classes.get(status, "text-secondary") + + @register.inclusion_tag("core/_job_detail_control_panel.html", takes_context=True) def job_control_panel(context: dict[str, Any]) -> dict[str, Any]: return { diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 9d8f01ab4..1f53517a5 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -1220,12 +1220,10 @@ def test_dicom_match_wildcard(): @pytest.mark.django_db -def test_process_linking_mode_creates_associations( +def test_process_linking_mode_stores_pseudonymized_uids( mocker: MockerFixture, settings, tmp_path: Path ): - """In linking mode, MassTransferAssociation records are created per study.""" - from adit.mass_transfer.models import MassTransferAssociation - + """In linking mode, pseudonymized UIDs are stored on the volume.""" settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") MassTransferSettings.objects.create() @@ -1272,6 +1270,8 @@ def test_process_linking_mode_creates_associations( def fake_export(op, volume, base, pseudonym, **kwargs): volume.status = MassTransferVolume.Status.EXPORTED volume.pseudonym = pseudonym + volume.study_instance_uid_pseudonymized = "9.8.7.6" + volume.series_instance_uid_pseudonymized = "9.8.7.6.5" volume.save() mocker.patch.object(processor, "_export_volume", side_effect=fake_export) @@ -1283,23 +1283,18 @@ def fake_export(op, volume, base, pseudonym, **kwargs): result = processor.process() - assocs = MassTransferAssociation.objects.filter(job=job) - assert assocs.count() == 1 - assoc = assocs.first() - assert assoc.original_study_instance_uid == "1.2.3.4" - assert assoc.pseudonym == uuid.UUID(int=1).hex - assert assoc.patient_id == "PAT1" - assert assoc.pseudonymized_study_instance_uid != "" + vol.refresh_from_db() + assert vol.study_instance_uid_pseudonymized == "9.8.7.6" + assert vol.series_instance_uid_pseudonymized == "9.8.7.6.5" + assert vol.pseudonym == uuid.UUID(int=1).hex assert result["status"] == MassTransferTask.Status.SUCCESS @pytest.mark.django_db -def test_process_linking_mode_skips_association_for_failed_study( +def test_process_failed_volume_has_no_pseudonymized_uids( mocker: MockerFixture, settings, tmp_path: Path ): - """In linking mode, no association is created if all volumes in a study failed.""" - from adit.mass_transfer.models import MassTransferAssociation - + """Failed volumes have empty pseudonymized UID fields.""" settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") MassTransferSettings.objects.create() @@ -1355,7 +1350,9 @@ def fake_export_failure(op, volume, base, pseudonym, **kwargs): result = processor.process() - assert MassTransferAssociation.objects.filter(job=job).count() == 0 + vol.refresh_from_db() + assert vol.study_instance_uid_pseudonymized == "" + assert vol.series_instance_uid_pseudonymized == "" assert result["status"] == MassTransferTask.Status.FAILURE @@ -1363,23 +1360,17 @@ def fake_export_failure(op, volume, base, pseudonym, **kwargs): def test_longitudinal_linking_across_partitions( mocker: MockerFixture, settings, tmp_path: Path ): - """Prove that linking mode enables longitudinal tracking across an entire job. + """Prove that pseudonymized UIDs on volumes enable longitudinal tracking. Scenario: - Partition 1 (Jan 1): PAT1/Study-A (2 series), PAT2/Study-B (1 series) - Partition 2 (Jan 2): PAT1/Study-C (1 series) After processing both partitions: - - 3 association records exist (one per study) - - PAT1 has 2 associations → linkable via patient_id - - PAT2 has 1 association - - The pseudonymized StudyInstanceUID in each association matches - what dicognito actually produced during export (probe-anonymize - consistency) + - All 4 volumes have pseudonymized UIDs + - PAT1 volumes are linkable via patient_id + - Two series in the same study share the same pseudonymized study UID """ - from adit.mass_transfer.models import MassTransferAssociation - from adit.core.utils.pseudonymizer import Pseudonymizer - settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") MassTransferSettings.objects.create() @@ -1457,38 +1448,15 @@ def test_longitudinal_linking_across_partitions( study_datetime=timezone.make_aware(datetime(2024, 1, 2, 9, 0)), ) - # --- Capture pseudonymized UIDs produced during export --- - # Each study's export uses a Pseudonymizer instance. We capture the - # pseudonymized StudyInstanceUID that dicognito actually produces during - # the export callback, keyed by original StudyInstanceUID. - pseudonymized_uids: dict[str, str] = {} + # Use a counter to generate distinct but deterministic pseudonymized UIDs + export_counter = {"n": 0} def fake_export(op, volume, base, pseudonym, *, study_pseudonymizer=None): - """Fake export that actually runs dicognito on a realistic dataset, - so we can capture the pseudonymized StudyInstanceUID.""" - if study_pseudonymizer is not None: - # Build a realistic DICOM dataset and run the pseudonymizer - ds = Dataset() - ds.file_meta = Dataset() - ds.file_meta.TransferSyntaxUID = "1.2.840.10008.1.2.1" - ds.file_meta.MediaStorageSOPClassUID = "1.2.840.10008.5.1.4.1.1.2" - ds.file_meta.MediaStorageSOPInstanceUID = volume.series_instance_uid - ds.StudyInstanceUID = volume.study_instance_uid - ds.SeriesInstanceUID = volume.series_instance_uid - ds.SOPInstanceUID = volume.series_instance_uid + ".1" - ds.SOPClassUID = "1.2.840.10008.5.1.4.1.1.2" - ds.PatientID = volume.patient_id - ds.PatientName = volume.patient_id - ds.StudyDate = "20240101" - ds.StudyTime = "100000" - - study_pseudonymizer.pseudonymize(ds, pseudonym) - - # Capture the pseudonymized StudyInstanceUID (should be the same - # for all series in the same study sharing the same Anonymizer) - pseudonymized_uids[volume.study_instance_uid] = str(ds.StudyInstanceUID) - + export_counter["n"] += 1 + n = export_counter["n"] volume.pseudonym = pseudonym + volume.study_instance_uid_pseudonymized = f"2.16.{volume.study_instance_uid}" + volume.series_instance_uid_pseudonymized = f"2.16.{volume.series_instance_uid}" volume.status = MassTransferVolume.Status.EXPORTED volume.save() @@ -1510,66 +1478,45 @@ def fake_export(op, volume, base, pseudonym, *, study_pseudonymizer=None): result2 = processor2.process() assert result2["status"] == MassTransferTask.Status.SUCCESS - # --- Verify association records --- - assocs = MassTransferAssociation.objects.filter(job=job).order_by( - "original_study_instance_uid" + # --- Verify pseudonymized UIDs on volumes --- + for vol in [vol_a1, vol_a2, vol_b, vol_c]: + vol.refresh_from_db() + assert vol.study_instance_uid_pseudonymized != "" + assert vol.series_instance_uid_pseudonymized != "" + assert vol.study_instance_uid_pseudonymized != vol.study_instance_uid + + # Two series in the same study share the same pseudonymized study UID + assert ( + vol_a1.study_instance_uid_pseudonymized + == vol_a2.study_instance_uid_pseudonymized + ) + # But different pseudonymized series UIDs + assert ( + vol_a1.series_instance_uid_pseudonymized + != vol_a2.series_instance_uid_pseudonymized ) - assert assocs.count() == 3 # one per study - - assoc_map = {a.original_study_instance_uid: a for a in assocs} - assert set(assoc_map.keys()) == { - "1.2.840.10001.1.1", - "1.2.840.10002.1.1", - "1.2.840.10001.1.2", - } - - # Each association's pseudonymized UID differs from the original - for assoc in assocs: - assert assoc.pseudonymized_study_instance_uid != assoc.original_study_instance_uid - assert assoc.pseudonymized_study_instance_uid != "" - - # Probe-anonymize consistency: the UID in the association table must - # match what dicognito actually produced during export - for orig_uid, assoc in assoc_map.items(): - assert orig_uid in pseudonymized_uids, f"No export captured for {orig_uid}" - assert assoc.pseudonymized_study_instance_uid == pseudonymized_uids[orig_uid], ( - f"Probe UID mismatch for {orig_uid}: " - f"association={assoc.pseudonymized_study_instance_uid}, " - f"export={pseudonymized_uids[orig_uid]}" - ) # --- Longitudinal linking via patient_id --- - # PAT1 has studies A and C — we can link them through the association table - pat1_assocs = [a for a in assocs if a.patient_id == "PAT1"] - assert len(pat1_assocs) == 2 - pat1_studies = {a.original_study_instance_uid for a in pat1_assocs} - assert pat1_studies == {"1.2.840.10001.1.1", "1.2.840.10001.1.2"} - - # Their pseudonymized UIDs are different (different studies) - pat1_pseudo_uids = {a.pseudonymized_study_instance_uid for a in pat1_assocs} - assert len(pat1_pseudo_uids) == 2 + pat1_vols = MassTransferVolume.objects.filter( + job=job, patient_id="PAT1" + ).exclude(study_instance_uid_pseudonymized="") + assert pat1_vols.count() == 3 # vol_a1, vol_a2, vol_c - # PAT2 has only study B - pat2_assocs = [a for a in assocs if a.patient_id == "PAT2"] - assert len(pat2_assocs) == 1 - assert pat2_assocs[0].original_study_instance_uid == "1.2.840.10002.1.1" + pat1_studies = set(pat1_vols.values_list("study_instance_uid", flat=True)) + assert pat1_studies == {"1.2.840.10001.1.1", "1.2.840.10001.1.2"} - # Associations are tied to the correct tasks - assoc_a = assoc_map["1.2.840.10001.1.1"] - assoc_b = assoc_map["1.2.840.10002.1.1"] - assoc_c = assoc_map["1.2.840.10001.1.2"] - assert assoc_a.task_id == task1.pk - assert assoc_b.task_id == task1.pk - assert assoc_c.task_id == task2.pk + pat2_vols = MassTransferVolume.objects.filter( + job=job, patient_id="PAT2" + ).exclude(study_instance_uid_pseudonymized="") + assert pat2_vols.count() == 1 + assert pat2_vols.first().study_instance_uid == "1.2.840.10002.1.1" @pytest.mark.django_db -def test_process_pseudonymize_mode_no_associations( +def test_process_pseudonymize_mode_stores_pseudonymized_uids( mocker: MockerFixture, settings, tmp_path: Path ): - """In pseudonymize mode (without linking), no associations are created.""" - from adit.mass_transfer.models import MassTransferAssociation - + """In pseudonymize mode (without linking), pseudonymized UIDs are still stored on volumes.""" settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") MassTransferSettings.objects.create() @@ -1616,13 +1563,17 @@ def test_process_pseudonymize_mode_no_associations( def fake_export(op, volume, base, pseudonym, **kwargs): volume.status = MassTransferVolume.Status.EXPORTED volume.pseudonym = pseudonym + volume.study_instance_uid_pseudonymized = "9.8.7.6" + volume.series_instance_uid_pseudonymized = "9.8.7.6.5" volume.save() mocker.patch.object(processor, "_export_volume", side_effect=fake_export) result = processor.process() - assert MassTransferAssociation.objects.filter(job=job).count() == 0 + vol.refresh_from_db() + assert vol.study_instance_uid_pseudonymized == "9.8.7.6" + assert vol.series_instance_uid_pseudonymized == "9.8.7.6.5" assert result["status"] == MassTransferTask.Status.SUCCESS diff --git a/adit/mass_transfer/views.py b/adit/mass_transfer/views.py index 0522ae25f..b0c3f92fb 100644 --- a/adit/mass_transfer/views.py +++ b/adit/mass_transfer/views.py @@ -28,7 +28,12 @@ from .filters import MassTransferJobFilter, MassTransferTaskFilter from .forms import MassTransferFilterForm, MassTransferJobForm from .mixins import MassTransferLockedMixin -from .models import MassTransferAssociation, MassTransferFilter, MassTransferJob, MassTransferTask +from .models import ( + MassTransferFilter, + MassTransferJob, + MassTransferTask, + MassTransferVolume, +) from .tables import MassTransferJobTable, MassTransferTaskTable MASS_TRANSFER_SOURCE = "mass_transfer_source" @@ -104,7 +109,11 @@ def get(self, request, pk): qs = MassTransferJob.objects.filter(owner=request.user) job = qs.get(pk=pk) - associations = MassTransferAssociation.objects.filter(job=job).order_by("id") + volumes = ( + MassTransferVolume.objects.filter(job=job) + .exclude(study_instance_uid_pseudonymized="") + .order_by("study_datetime", "series_instance_uid") + ) response = HttpResponse(content_type="text/csv") response["Content-Disposition"] = f'attachment; filename="associations_job_{job.pk}.csv"' @@ -113,15 +122,19 @@ def get(self, request, pk): writer.writerow([ "pseudonym", "patient_id", - "original_study_instance_uid", - "pseudonymized_study_instance_uid", + "study_instance_uid", + "study_instance_uid_pseudonymized", + "series_instance_uid", + "series_instance_uid_pseudonymized", ]) - for assoc in associations.iterator(): + for vol in volumes.iterator(): writer.writerow([ - assoc.pseudonym, - assoc.patient_id, - assoc.original_study_instance_uid, - assoc.pseudonymized_study_instance_uid, + vol.pseudonym, + vol.patient_id, + vol.study_instance_uid, + vol.study_instance_uid_pseudonymized, + vol.series_instance_uid, + vol.series_instance_uid_pseudonymized, ]) return response @@ -157,6 +170,17 @@ class MassTransferTaskDetailView(MassTransferLockedMixin, DicomTaskDetailView): job_url_name = "mass_transfer_job_detail" template_name = "mass_transfer/mass_transfer_task_detail.html" + def get_context_data(self, **kwargs) -> dict[str, Any]: + context = super().get_context_data(**kwargs) + task = self.object + context["problem_volumes"] = task.volumes.filter( + status__in=[ + MassTransferVolume.Status.ERROR, + MassTransferVolume.Status.SKIPPED, + ] + ).order_by("status", "study_datetime") + return context + class MassTransferTaskDeleteView(MassTransferLockedMixin, DicomTaskDeleteView): model = MassTransferTask From d96a0367d8ebf2e4958b39aee0f61f6169e54487 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Wed, 4 Mar 2026 12:37:39 +0100 Subject: [PATCH 021/103] Add persistent DIMSE connection mode to avoid per-operation association overhead --- adit/core/utils/dicom_operator.py | 6 ++ adit/core/utils/dimse_connector.py | 18 +++- adit/mass_transfer/processors.py | 163 +++++++++++++++-------------- 3 files changed, 106 insertions(+), 81 deletions(-) diff --git a/adit/core/utils/dicom_operator.py b/adit/core/utils/dicom_operator.py index 67692b935..9ee2670b3 100644 --- a/adit/core/utils/dicom_operator.py +++ b/adit/core/utils/dicom_operator.py @@ -44,10 +44,12 @@ def __init__( self, server: DicomServer, dimse_timeout: int | None = 60, + persistent: bool = False, ): self.server = server self.dimse_connector = DimseConnector( server, + persistent=persistent, dimse_timeout=dimse_timeout, ) # TODO: also make retries and timeouts possible in DicomWebConnector @@ -58,6 +60,10 @@ def __init__( def get_logs(self) -> list[DicomLogEntry]: return self.dimse_connector.logs + self.dicom_web_connector.logs + self.logs + def close(self) -> None: + if self.dimse_connector.assoc: + self.dimse_connector.close_connection() + def abort(self) -> None: self.dimse_connector.abort_connection() self.dicom_web_connector.abort() diff --git a/adit/core/utils/dimse_connector.py b/adit/core/utils/dimse_connector.py index 221874217..0eef3d391 100644 --- a/adit/core/utils/dimse_connector.py +++ b/adit/core/utils/dimse_connector.py @@ -69,6 +69,10 @@ def gen_wrapper(self: "DimseConnector", *args, **kwargs): if self.auto_connect and not is_connected: self.open_connection(service) opened_connection = True + elif self.persistent and is_connected and self._current_service != service: + self.close_connection() + self.open_connection(service) + opened_connection = True try: yield from func(self, *args, **kwargs) @@ -80,7 +84,7 @@ def gen_wrapper(self: "DimseConnector", *args, **kwargs): # a for loop), Python throws GeneratorExit — a BaseException, not an # Exception. Without finally, close_connection() was skipped and the # DIMSE association leaked on the PACS side. - if opened_connection and self.auto_connect and self.assoc: + if opened_connection and self.auto_connect and not self.persistent and self.assoc: self.close_connection() @wraps(func) @@ -91,6 +95,10 @@ def func_wrapper(self: "DimseConnector", *args, **kwargs): if self.auto_connect and not is_connected: self.open_connection(service) opened_connection = True + elif self.persistent and is_connected and self._current_service != service: + self.close_connection() + self.open_connection(service) + opened_connection = True try: result = func(self, *args, **kwargs) @@ -98,7 +106,7 @@ def func_wrapper(self: "DimseConnector", *args, **kwargs): self.abort_connection() raise err - if opened_connection and self.auto_connect: + if opened_connection and self.auto_connect and not self.persistent: self.close_connection() opened_connection = False @@ -116,6 +124,7 @@ def __init__( self, server: DicomServer, auto_connect: bool = True, + persistent: bool = False, acse_timeout: int | None = 60, connection_timeout: int | None = None, dimse_timeout: int | None = 60, @@ -123,11 +132,13 @@ def __init__( ) -> None: self.server = server self.auto_connect = auto_connect + self.persistent = persistent self.acse_timeout = acse_timeout self.connection_timeout = connection_timeout self.dimse_timeout = dimse_timeout self.network_timeout = network_timeout self.logs: list[DicomLogEntry] = [] + self._current_service: DimseService | None = None if settings.ENABLE_DICOM_DEBUG_LOGGER: debug_logger() # Debug mode of pynetdicom @@ -141,6 +152,7 @@ def open_connection(self, service: DimseService): # Call _associate which is decorated with @retry_dimse_connect # Stamina will handle retries automatically (5 attempts with exponential backoff) self._associate(service) + self._current_service = service @retry_dimse_connect def _associate(self, service: DimseService): @@ -203,12 +215,14 @@ def close_connection(self): assert self.assoc self.assoc.release() self.assoc = None + self._current_service = None def abort_connection(self): if self.assoc: logger.debug("Aborting connection to DICOM server %s.", self.server.ae_title) self.assoc.abort() self.assoc = None + self._current_service = None @retry_dimse_find @connect_to_server("C-FIND") diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 9b34836ec..0bcb40b9f 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -137,91 +137,96 @@ def process(self): "log": "Mass transfer requires at least one filter.", } - # Discovery: query PACS and create volume records - operator = DicomOperator(source_node.dicomserver) - volumes = self._find_volumes(operator, filters) - - # Link all discovered volumes to this task (for cleanup_on_failure) - for volume in volumes: - if volume.task_id != self.mass_task.pk: - volume.task = self.mass_task - volume.save(update_fields=["task"]) - - # Group volumes by study for pseudonymization — all series in a study - # must share the same pseudonym so the data stays linked. - volumes_by_study: dict[str, list[MassTransferVolume]] = {} - for volume in volumes: - volumes_by_study.setdefault(volume.study_instance_uid, []).append(volume) - - export_base = _export_base_dir() - output_base = _destination_base_dir(destination_node) - - # The "done" status depends on whether NIfTI conversion is enabled: - # CONVERTED when converting, EXPORTED when exporting DICOM only. - done_status = ( - MassTransferVolume.Status.CONVERTED - if job.convert_to_nifti - else MassTransferVolume.Status.EXPORTED - ) + # Discovery: query PACS and create volume records. + # persistent=True keeps the DIMSE association open across multiple + # C-FIND/C-GET calls instead of reconnecting for every operation. + operator = DicomOperator(source_node.dicomserver, persistent=True) + try: + volumes = self._find_volumes(operator, filters) + + # Link all discovered volumes to this task (for cleanup_on_failure) + for volume in volumes: + if volume.task_id != self.mass_task.pk: + volume.task = self.mass_task + volume.save(update_fields=["task"]) + + # Group volumes by study for pseudonymization — all series in a study + # must share the same pseudonym so the data stays linked. + volumes_by_study: dict[str, list[MassTransferVolume]] = {} + for volume in volumes: + volumes_by_study.setdefault(volume.study_instance_uid, []).append(volume) + + export_base = _export_base_dir() + output_base = _destination_base_dir(destination_node) + + # The "done" status depends on whether NIfTI conversion is enabled: + # CONVERTED when converting, EXPORTED when exporting DICOM only. + done_status = ( + MassTransferVolume.Status.CONVERTED + if job.convert_to_nifti + else MassTransferVolume.Status.EXPORTED + ) - total_processed = 0 - total_skipped = 0 - total_failed = 0 - failed_reasons: dict[str, int] = {} + total_processed = 0 + total_skipped = 0 + total_failed = 0 + failed_reasons: dict[str, int] = {} - for study_uid, study_volumes in volumes_by_study.items(): - pseudonym = "" - study_pseudonymizer: Pseudonymizer | None = None + for study_uid, study_volumes in volumes_by_study.items(): + pseudonym = "" + study_pseudonymizer: Pseudonymizer | None = None - if job.should_pseudonymize: - existing_pseudonym = next( - (v.pseudonym for v in study_volumes if v.pseudonym), - None, - ) - pseudonym = existing_pseudonym or uuid.uuid4().hex - # One Anonymizer per study: all series in the same study share - # the same Anonymizer so UIDs stay consistent within the study. - study_pseudonymizer = Pseudonymizer() - - for volume in study_volumes: - if volume.status == done_status: - total_processed += 1 - continue - if volume.status == MassTransferVolume.Status.SKIPPED: - total_skipped += 1 - continue - - try: - if job.convert_to_nifti: - self._export_volume( - operator, volume, export_base, pseudonym, - study_pseudonymizer=study_pseudonymizer, - ) - self._convert_volume(volume, output_base, pseudonym) - else: - self._export_volume( - operator, volume, output_base, pseudonym, - study_pseudonymizer=study_pseudonymizer, - ) + if job.should_pseudonymize: + existing_pseudonym = next( + (v.pseudonym for v in study_volumes if v.pseudonym), + None, + ) + pseudonym = existing_pseudonym or uuid.uuid4().hex + # One Anonymizer per study: all series in the same study share + # the same Anonymizer so UIDs stay consistent within the study. + study_pseudonymizer = Pseudonymizer() - # _convert_volume may set SKIPPED for non-image DICOMs + for volume in study_volumes: + if volume.status == done_status: + total_processed += 1 + continue if volume.status == MassTransferVolume.Status.SKIPPED: total_skipped += 1 - else: - total_processed += 1 - except RetriableDicomError: - raise # let Procrastinate retry the entire task - except Exception as err: - logger.exception( - "Mass transfer failed for volume %s", volume.series_instance_uid - ) - self._cleanup_export(volume) - volume.status = MassTransferVolume.Status.ERROR - volume.add_log(str(err)) - volume.save() - total_failed += 1 - reason = _short_error_reason(str(err)) - failed_reasons[reason] = failed_reasons.get(reason, 0) + 1 + continue + + try: + if job.convert_to_nifti: + self._export_volume( + operator, volume, export_base, pseudonym, + study_pseudonymizer=study_pseudonymizer, + ) + self._convert_volume(volume, output_base, pseudonym) + else: + self._export_volume( + operator, volume, output_base, pseudonym, + study_pseudonymizer=study_pseudonymizer, + ) + + # _convert_volume may set SKIPPED for non-image DICOMs + if volume.status == MassTransferVolume.Status.SKIPPED: + total_skipped += 1 + else: + total_processed += 1 + except RetriableDicomError: + raise # let Procrastinate retry the entire task + except Exception as err: + logger.exception( + "Mass transfer failed for volume %s", volume.series_instance_uid + ) + self._cleanup_export(volume) + volume.status = MassTransferVolume.Status.ERROR + volume.add_log(str(err)) + volume.save() + total_failed += 1 + reason = _short_error_reason(str(err)) + failed_reasons[reason] = failed_reasons.get(reason, 0) + 1 + finally: + operator.close() log_lines = [ f"Partition {self.mass_task.partition_key}", From ff1d71f2bdadbef89790a1e97f96a7bda3e9f198 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Thu, 5 Mar 2026 13:10:40 +0100 Subject: [PATCH 022/103] Rewrite mass transfer processing with deferred insertion, patient-centric folder structure, and two-mode pseudonymization --- adit/core/utils/pseudonymizer.py | 33 +- .../migrations/0007_add_pseudonym_salt.py | 19 + .../0008_remove_exported_folder_fields.py | 21 + .../0009_remove_pseudonymized_uid_fields.py | 21 + adit/mass_transfer/models.py | 52 +- adit/mass_transfer/processors.py | 475 +++--- adit/mass_transfer/tests/test_cleanup.py | 204 +-- adit/mass_transfer/tests/test_processor.py | 1449 +++++------------ adit/mass_transfer/views.py | 31 +- adit/settings/base.py | 6 - 10 files changed, 751 insertions(+), 1560 deletions(-) create mode 100644 adit/mass_transfer/migrations/0007_add_pseudonym_salt.py create mode 100644 adit/mass_transfer/migrations/0008_remove_exported_folder_fields.py create mode 100644 adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py diff --git a/adit/core/utils/pseudonymizer.py b/adit/core/utils/pseudonymizer.py index 82513731e..669bacc6d 100644 --- a/adit/core/utils/pseudonymizer.py +++ b/adit/core/utils/pseudonymizer.py @@ -1,4 +1,5 @@ from dicognito.anonymizer import Anonymizer +from dicognito.randomizer import Randomizer from dicognito.value_keeper import ValueKeeper from django.conf import settings from pydicom import Dataset @@ -9,26 +10,50 @@ class Pseudonymizer: A utility class for pseudonymizing (or anonymizing) DICOM data. """ - def __init__(self, anonymizer: Anonymizer | None = None) -> None: + _ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + _ID_LENGTH = 12 + + def __init__( + self, + anonymizer: Anonymizer | None = None, + seed: str | None = None, + ) -> None: """ Initialize the Pseudonymizer. Sets up the anonymizer instance and configures it to skip specific elements. If an existing Anonymizer is provided, it will be used instead of creating a new one. + When a seed is provided, the anonymizer produces deterministic results — + the same input always maps to the same output. """ - self.anonymizer = anonymizer or self._setup_anonymizer() + self._seed = seed + self.anonymizer = anonymizer or self._setup_anonymizer(seed=seed) - def _setup_anonymizer(self) -> Anonymizer: + def _setup_anonymizer(self, seed: str | None = None) -> Anonymizer: """ Set up the anonymizer instance and configure it to skip specific elements. :return: An instance of the Anonymizer class. """ - anonymizer = Anonymizer() + anonymizer = Anonymizer(seed=seed) for element in settings.SKIP_ELEMENTS_ANONYMIZATION: anonymizer.add_element_handler(ValueKeeper(element)) return anonymizer + def compute_pseudonym(self, patient_id: str) -> str: + """Pre-compute the pseudonym for a patient ID without a full DICOM dataset. + + Uses the same algorithm as dicognito's IDAnonymizer so the result + matches what anonymize() would produce for PatientID. + Requires that this Pseudonymizer was created with a seed. + """ + if self._seed is None: + raise ValueError("compute_pseudonym requires a seeded Pseudonymizer") + randomizer = Randomizer(self._seed) + ranges = [len(self._ALPHABET)] * self._ID_LENGTH + indices = randomizer.get_ints_from_ranges(patient_id, *ranges) + return "".join(self._ALPHABET[i] for i in indices) + def pseudonymize(self, ds: Dataset, pseudonym: str) -> None: """ Pseudonymize the given DICOM dataset using the anonymizer and the provided pseudonym. diff --git a/adit/mass_transfer/migrations/0007_add_pseudonym_salt.py b/adit/mass_transfer/migrations/0007_add_pseudonym_salt.py new file mode 100644 index 000000000..bb350a39b --- /dev/null +++ b/adit/mass_transfer/migrations/0007_add_pseudonym_salt.py @@ -0,0 +1,19 @@ +# Generated by Django 5.2.8 on 2026-03-05 11:19 + +import secrets +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('mass_transfer', '0006_replace_association_with_volume_fields'), + ] + + operations = [ + migrations.AddField( + model_name='masstransferjob', + name='pseudonym_salt', + field=models.CharField(default=secrets.token_hex, max_length=64), + ), + ] diff --git a/adit/mass_transfer/migrations/0008_remove_exported_folder_fields.py b/adit/mass_transfer/migrations/0008_remove_exported_folder_fields.py new file mode 100644 index 000000000..98b8e4470 --- /dev/null +++ b/adit/mass_transfer/migrations/0008_remove_exported_folder_fields.py @@ -0,0 +1,21 @@ +# Generated by Django 5.2.8 on 2026-03-05 11:28 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('mass_transfer', '0007_add_pseudonym_salt'), + ] + + operations = [ + migrations.RemoveField( + model_name='masstransfervolume', + name='export_cleaned', + ), + migrations.RemoveField( + model_name='masstransfervolume', + name='exported_folder', + ), + ] diff --git a/adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py b/adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py new file mode 100644 index 000000000..3499082da --- /dev/null +++ b/adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py @@ -0,0 +1,21 @@ +# Generated by Django 5.2.8 on 2026-03-05 12:06 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('mass_transfer', '0008_remove_exported_folder_fields'), + ] + + operations = [ + migrations.RemoveField( + model_name='masstransfervolume', + name='series_instance_uid_pseudonymized', + ), + migrations.RemoveField( + model_name='masstransfervolume', + name='study_instance_uid_pseudonymized', + ), + ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index 4a0da32c1..d7af702f5 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -1,7 +1,6 @@ from __future__ import annotations -import shutil -from pathlib import Path +import secrets from django.conf import settings from django.db import models @@ -92,6 +91,10 @@ class AnonymizationMode(models.TextChoices): ) filters = models.ManyToManyField(MassTransferFilter, related_name="jobs", blank=True) + pseudonym_salt = models.CharField( + max_length=64, + default=secrets.token_hex, + ) @property def should_pseudonymize(self) -> bool: @@ -143,38 +146,14 @@ def get_absolute_url(self): return reverse("mass_transfer_task_detail", args=[self.pk]) def cleanup_on_failure(self) -> None: - """Clean up exported DICOM files when a mass transfer task fails or times out.""" - volumes = self.volumes.exclude(exported_folder="") - - for volume in volumes: - if volume.status in ( - MassTransferVolume.Status.CONVERTED, - MassTransferVolume.Status.SKIPPED, - ): - continue - # When not converting to NIfTI, EXPORTED is the final state and - # the files live in the destination folder — don't delete them. - if ( - not self.job.convert_to_nifti - and volume.status == MassTransferVolume.Status.EXPORTED - ): - continue - - export_folder = volume.exported_folder - if export_folder: - try: - shutil.rmtree(Path(export_folder)) - except FileNotFoundError: - pass - except Exception as err: - volume.add_log(f"Cleanup failed: {err}") - volume.save() - continue - - volume.exported_folder = "" - volume.status = MassTransferVolume.Status.ERROR - volume.add_log("Export cleaned up after task failure.") - volume.save() + """Mark ERROR volumes when a mass transfer task fails or times out. + + With deferred insertion, volumes only exist in the DB after successful + export/conversion — there are no intermediate files to clean up. + Temp directories used during NIfTI conversion are automatically removed + by the TemporaryDirectory context manager. + """ + pass class MassTransferVolume(models.Model): @@ -209,13 +188,8 @@ class Status(models.TextChoices): institution_name = models.CharField(max_length=128, blank=True, default="") number_of_images = models.PositiveIntegerField(default=0) - exported_folder = models.TextField(blank=True, default="") - export_cleaned = models.BooleanField(default=False) converted_file = models.TextField(blank=True, default="") - study_instance_uid_pseudonymized = models.CharField(max_length=128, blank=True, default="") - series_instance_uid_pseudonymized = models.CharField(max_length=128, blank=True, default="") - status = models.CharField(max_length=16, choices=Status.choices, default=Status.PENDING) log = models.TextField(blank=True, default="") diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 0bcb40b9f..09fed1ad4 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -1,9 +1,11 @@ from __future__ import annotations +import hashlib import logging -import shutil +import secrets import subprocess -import uuid +import tempfile +from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path from typing import cast @@ -24,6 +26,7 @@ from .models import ( MassTransferFilter, + MassTransferJob, MassTransferSettings, MassTransferTask, MassTransferVolume, @@ -34,6 +37,21 @@ _MIN_SPLIT_WINDOW = timedelta(minutes=30) +@dataclass +class DiscoveredSeries: + patient_id: str + accession_number: str + study_instance_uid: str + series_instance_uid: str + modality: str + study_description: str + series_description: str + series_number: int | None + study_datetime: datetime + institution_name: str + number_of_images: int + + def _dicom_match(pattern: str, value: str | None) -> bool: if not pattern: return True @@ -44,9 +62,6 @@ def _dicom_match(pattern: str, value: str | None) -> bool: def _short_error_reason(error: str) -> str: - """Extract a short, groupable reason from a volume error message.""" - # Take the last non-empty line — for dcm2niix output this is the - # meaningful summary (e.g. "No valid DICOM images were found"). lines = [line.strip() for line in error.strip().splitlines() if line.strip()] return lines[-1] if lines else error @@ -60,17 +75,6 @@ def _parse_int(value: object, default: int | None = None) -> int | None: return default -def _series_folder_name( - series_number: int | None, series_description: str, series_uid: str -) -> str: - if series_number is None: - base = series_uid - else: - description = series_description or "Undefined" - base = f"{series_number}-{description}" - return sanitize_filename(str(base)) - - def _study_datetime(study: ResultDataset) -> datetime: study_date = study.StudyDate study_time = study.StudyTime @@ -79,10 +83,20 @@ def _study_datetime(study: ResultDataset) -> datetime: return datetime.combine(study_date, study_time) -def _export_base_dir() -> Path: - base = Path(settings.MASS_TRANSFER_EXPORT_BASE_DIR) - base.mkdir(parents=True, exist_ok=True) - return base +def _study_folder_name(study_description: str, study_dt: datetime, study_uid: str) -> str: + desc = sanitize_filename(study_description or "Undefined") + date_str = study_dt.strftime("%Y%m%d") + short_hash = hashlib.sha256(study_uid.encode()).hexdigest()[:4] + return f"{desc}_{date_str}_{short_hash}" + + +def _series_folder_name( + series_description: str, series_number: int | None, series_uid: str +) -> str: + if series_number is None: + return sanitize_filename(series_uid) + desc = sanitize_filename(series_description or "Undefined") + return f"{desc}_{series_number}" def _destination_base_dir(node: DicomNode) -> Path: @@ -92,16 +106,6 @@ def _destination_base_dir(node: DicomNode) -> Path: return path -def _volume_path( - base_dir: Path, - study_dt: datetime, - subject_id: str, - series_name: str, -) -> Path: - year_month = study_dt.strftime("%Y%m") - return base_dir / year_month / subject_id / series_name - - class MassTransferTaskProcessor(DicomTaskProcessor): app_name = "mass_transfer" dicom_task_class = MassTransferTask @@ -137,30 +141,36 @@ def process(self): "log": "Mass transfer requires at least one filter.", } - # Discovery: query PACS and create volume records. - # persistent=True keeps the DIMSE association open across multiple - # C-FIND/C-GET calls instead of reconnecting for every operation. + pseudonymizer: Pseudonymizer | None = None + if job.should_link: + pseudonymizer = Pseudonymizer(seed=job.pseudonym_salt) + elif job.should_pseudonymize: + pseudonymizer = Pseudonymizer() + operator = DicomOperator(source_node.dicomserver, persistent=True) try: - volumes = self._find_volumes(operator, filters) - - # Link all discovered volumes to this task (for cleanup_on_failure) - for volume in volumes: - if volume.task_id != self.mass_task.pk: - volume.task = self.mass_task - volume.save(update_fields=["task"]) + discovered = self._discover_series(operator, filters) + + # Filter out series already processed in a previous run + done_uids = set( + MassTransferVolume.objects.filter( + job=job, + status__in=[ + MassTransferVolume.Status.EXPORTED, + MassTransferVolume.Status.CONVERTED, + MassTransferVolume.Status.SKIPPED, + ], + ).values_list("series_instance_uid", flat=True) + ) + # Delete ERROR volumes so they can be retried cleanly + MassTransferVolume.objects.filter( + job=job, status=MassTransferVolume.Status.ERROR + ).delete() - # Group volumes by study for pseudonymization — all series in a study - # must share the same pseudonym so the data stays linked. - volumes_by_study: dict[str, list[MassTransferVolume]] = {} - for volume in volumes: - volumes_by_study.setdefault(volume.study_instance_uid, []).append(volume) + pending = [s for s in discovered if s.series_instance_uid not in done_uids] + total_skipped_prior = len(done_uids) - export_base = _export_base_dir() output_base = _destination_base_dir(destination_node) - - # The "done" status depends on whether NIfTI conversion is enabled: - # CONVERTED when converting, EXPORTED when exporting DICOM only. done_status = ( MassTransferVolume.Status.CONVERTED if job.convert_to_nifti @@ -172,68 +182,139 @@ def process(self): total_failed = 0 failed_reasons: dict[str, int] = {} - for study_uid, study_volumes in volumes_by_study.items(): - pseudonym = "" - study_pseudonymizer: Pseudonymizer | None = None + # Group by patient for folder structure + by_patient: dict[str, list[DiscoveredSeries]] = {} + for s in pending: + by_patient.setdefault(s.patient_id, []).append(s) - if job.should_pseudonymize: - existing_pseudonym = next( - (v.pseudonym for v in study_volumes if v.pseudonym), - None, + # For non-linking pseudonymize mode, generate random pseudonyms per patient + random_pseudonyms: dict[str, str] = {} + + for patient_id, series_list in by_patient.items(): + if job.should_link and pseudonymizer: + subject_id = pseudonymizer.compute_pseudonym(patient_id) + elif pseudonymizer: + if patient_id not in random_pseudonyms: + random_pseudonyms[patient_id] = secrets.token_hex(6).upper() + subject_id = random_pseudonyms[patient_id] + else: + subject_id = sanitize_filename(patient_id) + + for series in series_list: + study_folder = _study_folder_name( + series.study_description, + series.study_datetime, + series.study_instance_uid, + ) + series_folder = _series_folder_name( + series.series_description, + series.series_number, + series.series_instance_uid, ) - pseudonym = existing_pseudonym or uuid.uuid4().hex - # One Anonymizer per study: all series in the same study share - # the same Anonymizer so UIDs stay consistent within the study. - study_pseudonymizer = Pseudonymizer() - - for volume in study_volumes: - if volume.status == done_status: - total_processed += 1 - continue - if volume.status == MassTransferVolume.Status.SKIPPED: - total_skipped += 1 - continue try: if job.convert_to_nifti: - self._export_volume( - operator, volume, export_base, pseudonym, - study_pseudonymizer=study_pseudonymizer, - ) - self._convert_volume(volume, output_base, pseudonym) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = Path(tmp_dir) + self._export_series( + operator, series, tmp_path, + subject_id, pseudonymizer, + ) + output_path = ( + output_base / self.mass_task.partition_key + / subject_id / study_folder / series_folder + ) + nifti_files = self._convert_series( + series, tmp_path, output_path, + ) else: - self._export_volume( - operator, volume, output_base, pseudonym, - study_pseudonymizer=study_pseudonymizer, + output_path = ( + output_base / self.mass_task.partition_key + / subject_id / study_folder / series_folder ) + self._export_series( + operator, series, output_path, + subject_id, pseudonymizer, + ) + nifti_files = [] + + converted_file = "" + if nifti_files: + converted_file = "\n".join(str(f) for f in nifti_files) + status = done_status + elif job.convert_to_nifti: + status = MassTransferVolume.Status.SKIPPED + else: + status = done_status + + MassTransferVolume.objects.create( + job=job, + task=self.mass_task, + partition_key=self.mass_task.partition_key, + patient_id=series.patient_id, + pseudonym=subject_id if pseudonymizer else "", + accession_number=series.accession_number, + study_instance_uid=series.study_instance_uid, + series_instance_uid=series.series_instance_uid, + modality=series.modality, + study_description=series.study_description, + series_description=series.series_description, + series_number=series.series_number, + study_datetime=timezone.make_aware(series.study_datetime), + institution_name=series.institution_name, + number_of_images=series.number_of_images, + converted_file=converted_file, + status=status, + ) - # _convert_volume may set SKIPPED for non-image DICOMs - if volume.status == MassTransferVolume.Status.SKIPPED: + if status == MassTransferVolume.Status.SKIPPED: total_skipped += 1 else: total_processed += 1 + except RetriableDicomError: - raise # let Procrastinate retry the entire task + raise except Exception as err: logger.exception( - "Mass transfer failed for volume %s", volume.series_instance_uid + "Mass transfer failed for series %s", + series.series_instance_uid, + ) + MassTransferVolume.objects.create( + job=job, + task=self.mass_task, + partition_key=self.mass_task.partition_key, + patient_id=series.patient_id, + pseudonym=subject_id if pseudonymizer else "", + accession_number=series.accession_number, + study_instance_uid=series.study_instance_uid, + series_instance_uid=series.series_instance_uid, + modality=series.modality, + study_description=series.study_description, + series_description=series.series_description, + series_number=series.series_number, + study_datetime=timezone.make_aware(series.study_datetime), + institution_name=series.institution_name, + number_of_images=series.number_of_images, + status=MassTransferVolume.Status.ERROR, + log=str(err), ) - self._cleanup_export(volume) - volume.status = MassTransferVolume.Status.ERROR - volume.add_log(str(err)) - volume.save() total_failed += 1 reason = _short_error_reason(str(err)) failed_reasons[reason] = failed_reasons.get(reason, 0) + 1 finally: operator.close() + # Count unique studies across all discovered series + study_uids = {s.study_instance_uid for s in discovered} + log_lines = [ f"Partition {self.mass_task.partition_key}", - f"Studies found: {len(volumes_by_study)}", - f"Volumes found: {len(volumes)}", + f"Studies found: {len(study_uids)}", + f"Series found: {len(discovered)}", f"Processed: {total_processed}", ] + if total_skipped_prior: + log_lines.append(f"Already done (prior run): {total_skipped_prior}") if total_skipped: log_lines.append(f"Skipped: {total_skipped}") if total_failed: @@ -243,14 +324,13 @@ def process(self): for reason, count in failed_reasons.items(): log_lines.append(f" {count}x {reason}") - if not volumes: + if not discovered: status = MassTransferTask.Status.SUCCESS - message = "No volumes found for this partition." + message = "No series found for this partition." elif total_failed and not total_processed: status = MassTransferTask.Status.FAILURE - message = f"All {total_failed} volumes failed during mass transfer." + message = f"All {total_failed} series failed during mass transfer." else: - # Build a unified message: "x studies, y volumes processed (z skipped, w failed)" parts = [] if total_skipped: parts.append(f"{total_skipped} skipped") @@ -258,14 +338,10 @@ def process(self): parts.append(f"{total_failed} failed") suffix = f" ({', '.join(parts)})" if parts else "" - if total_failed: - status = MassTransferTask.Status.WARNING - else: - status = MassTransferTask.Status.SUCCESS - + status = MassTransferTask.Status.WARNING if total_failed else MassTransferTask.Status.SUCCESS message = ( - f"{len(volumes_by_study)} studies, " - f"{total_processed} volumes processed{suffix}." + f"{len(study_uids)} studies, " + f"{total_processed} series processed{suffix}." ) return { @@ -274,16 +350,15 @@ def process(self): "log": "\n".join(log_lines), } - def _find_volumes( + def _discover_series( self, operator: DicomOperator, filters: list[MassTransferFilter], - ) -> list[MassTransferVolume]: + ) -> list[DiscoveredSeries]: start = self.mass_task.partition_start end = self.mass_task.partition_end - job = self.mass_task.job - found_series: dict[str, MassTransferVolume] = {} + found: dict[str, DiscoveredSeries] = {} for mf in filters: studies = self._find_studies(operator, mf, start, end) @@ -305,7 +380,6 @@ def _find_volumes( PatientID=study.PatientID, StudyInstanceUID=study.StudyInstanceUID, ) - # Request institution name at series level when possible series_query.dataset.InstitutionName = "" series_list = list(operator.find_series(series_query)) @@ -341,54 +415,27 @@ def _find_volumes( except (TypeError, ValueError): continue - if series_uid in found_series: + if series_uid in found: continue study_dt = _study_datetime(study) - # Use get_or_create for resumability: if a task failed halfway - # and is retried, volumes that were already exported/converted - # are returned as-is and skipped later in the processing loop. - volume, created = MassTransferVolume.objects.get_or_create( - job=job, - series_instance_uid=series_uid, - defaults={ - "partition_key": self.mass_task.partition_key, - "patient_id": str(study.PatientID), - "accession_number": str(study.get("AccessionNumber", "")), - "study_instance_uid": str(study.StudyInstanceUID), - "modality": str(series.Modality), - "study_description": str(study.get("StudyDescription", "")), - "series_description": str(series.get("SeriesDescription", "")), - "series_number": series_number, - "study_datetime": timezone.make_aware(study_dt), - "institution_name": str(series.get("InstitutionName", "")), - "number_of_images": _parse_int( - series.get("NumberOfSeriesRelatedInstances"), default=0 - ), - }, - ) - # Refresh metadata from PACS in case it changed between runs, - # but only for volumes that haven't been processed yet to avoid - # clobbering partition_key on already exported/converted volumes. - if not created and volume.status == MassTransferVolume.Status.PENDING: - volume.partition_key = self.mass_task.partition_key - volume.patient_id = str(study.PatientID) - volume.accession_number = str(study.get("AccessionNumber", "")) - volume.study_instance_uid = str(study.StudyInstanceUID) - volume.modality = str(series.Modality) - volume.study_description = str(study.get("StudyDescription", "")) - volume.series_description = str(series.get("SeriesDescription", "")) - volume.series_number = series_number - volume.study_datetime = timezone.make_aware(study_dt) - volume.institution_name = str(series.get("InstitutionName", "")) - volume.number_of_images = _parse_int( + found[series_uid] = DiscoveredSeries( + patient_id=str(study.PatientID), + accession_number=str(study.get("AccessionNumber", "")), + study_instance_uid=str(study.StudyInstanceUID), + series_instance_uid=str(series_uid), + modality=str(series.Modality), + study_description=str(study.get("StudyDescription", "")), + series_description=str(series.get("SeriesDescription", "")), + series_number=series_number, + study_datetime=study_dt, + institution_name=str(series.get("InstitutionName", "")), + number_of_images=_parse_int( series.get("NumberOfSeriesRelatedInstances"), default=0 - ) - volume.save() - - found_series[series_uid] = volume + ) or 0, + ) - return list(found_series.values()) + return list(found.values()) def _find_studies( self, @@ -421,8 +468,6 @@ def _find_studies( left = self._find_studies(operator, mf, start, mid) right = self._find_studies(operator, mf, mid + timedelta(seconds=1), end) - # Deduplicate: the date-level DICOM query can return the same study - # in both halves when the split falls within a single day. seen: set[str] = {str(s.StudyInstanceUID) for s in left} for study in right: if str(study.StudyInstanceUID) not in seen: @@ -452,140 +497,76 @@ def _study_has_institution( return False - def _export_volume( + def _export_series( self, operator: DicomOperator, - volume: MassTransferVolume, - export_base: Path, - pseudonym: str, - *, - study_pseudonymizer: Pseudonymizer | None = None, + series: DiscoveredSeries, + output_path: Path, + subject_id: str, + pseudonymizer: Pseudonymizer | None, ) -> None: - if volume.status == MassTransferVolume.Status.EXPORTED and volume.exported_folder: - return - - study_dt = volume.study_datetime - series_name = _series_folder_name( - volume.series_number, - volume.series_description, - volume.series_instance_uid, - ) - - subject_id = sanitize_filename(pseudonym or volume.patient_id) - export_path = _volume_path(export_base, study_dt, subject_id, series_name) - export_path.mkdir(parents=True, exist_ok=True) - volume.exported_folder = str(export_path) - - # Share the study-level Pseudonymizer (and thus Anonymizer) across - # all volumes in the same study. - manipulator = DicomManipulator(pseudonymizer=study_pseudonymizer) + output_path.mkdir(parents=True, exist_ok=True) - # Capture pseudonymized UIDs from the first image after anonymization. - pseudonymized_study_uid = "" - pseudonymized_series_uid = "" + manipulator = DicomManipulator(pseudonymizer=pseudonymizer) if pseudonymizer else None def callback(ds: Dataset | None) -> None: - nonlocal pseudonymized_study_uid, pseudonymized_series_uid if ds is None: return - manipulator.manipulate(ds, pseudonym=pseudonym) - if pseudonym and not pseudonymized_study_uid: - pseudonymized_study_uid = str(ds.StudyInstanceUID) - pseudonymized_series_uid = str(ds.SeriesInstanceUID) + if manipulator: + manipulator.manipulate(ds, pseudonym=subject_id) file_name = sanitize_filename(f"{ds.SOPInstanceUID}.dcm") - write_dataset(ds, export_path / file_name) + write_dataset(ds, output_path / file_name) operator.fetch_series( - patient_id=volume.patient_id, - study_uid=volume.study_instance_uid, - series_uid=volume.series_instance_uid, + patient_id=series.patient_id, + study_uid=series.study_instance_uid, + series_uid=series.series_instance_uid, callback=callback, ) - volume.pseudonym = pseudonym - volume.study_instance_uid_pseudonymized = pseudonymized_study_uid - volume.series_instance_uid_pseudonymized = pseudonymized_series_uid - volume.status = MassTransferVolume.Status.EXPORTED - volume.save() - - def _convert_volume( + def _convert_series( self, - volume: MassTransferVolume, - output_base: Path, - pseudonym: str, - ) -> None: - if volume.status == MassTransferVolume.Status.CONVERTED and volume.converted_file: - return - - if not volume.exported_folder: - raise DicomError("Missing exported folder for conversion.") + series: DiscoveredSeries, + dicom_dir: Path, + output_path: Path, + ) -> list[Path]: + """Convert DICOM to NIfTI. Returns list of produced .nii.gz files (empty for non-image).""" + output_path.mkdir(parents=True, exist_ok=True) - study_dt = volume.study_datetime - volume.pseudonym = pseudonym series_name = _series_folder_name( - volume.series_number, - volume.series_description, - volume.series_instance_uid, + series.series_description, + series.series_number, + series.series_instance_uid, ) - subject_id = sanitize_filename(pseudonym or volume.patient_id) - output_path = _volume_path(output_base, study_dt, subject_id, series_name) - output_path.mkdir(parents=True, exist_ok=True) - cmd = [ "dcm2niix", - "-z", - "y", - "-o", - str(output_path), - "-f", - series_name, - str(volume.exported_folder), + "-z", "y", + "-o", str(output_path), + "-f", series_name, + str(dicom_dir), ] result = subprocess.run(cmd, check=False, capture_output=True, text=True) combined_output = (result.stdout or "") + (result.stderr or "") - # dcm2niix returns non-zero when the input contains only non-image - # DICOM objects (structured reports, presentation states, etc.). - # This is not an error — there is simply nothing to convert. if "No valid DICOM images" in combined_output: - volume.status = MassTransferVolume.Status.SKIPPED - volume.add_log("Non-image DICOM series (skipped by dcm2niix)") - volume.save() - return + try: + if output_path.exists() and not any(output_path.iterdir()): + output_path.rmdir() + except OSError: + pass + return [] if result.returncode != 0: output = result.stderr or result.stdout raise DicomError( - f"Conversion failed for series {volume.series_instance_uid}: {output}" + f"Conversion failed for series {series.series_instance_uid}: {output}" ) nifti_files = sorted(output_path.glob("*.nii.gz")) if not nifti_files: raise DicomError( - f"dcm2niix produced no .nii.gz files for series {volume.series_instance_uid}" + f"dcm2niix produced no .nii.gz files for series {series.series_instance_uid}" ) - - volume.converted_file = "\n".join(str(f) for f in nifti_files) - volume.status = MassTransferVolume.Status.CONVERTED - volume.save() - - self._cleanup_export(volume) - - def _cleanup_export(self, volume: MassTransferVolume) -> None: - export_folder = volume.exported_folder - if not export_folder or volume.export_cleaned: - return - - try: - shutil.rmtree(export_folder) - except FileNotFoundError: - pass - except Exception as err: - volume.add_log(f"Cleanup failed: {err}") - volume.save() - return - - volume.export_cleaned = True - volume.save() + return nifti_files diff --git a/adit/mass_transfer/tests/test_cleanup.py b/adit/mass_transfer/tests/test_cleanup.py index ea971e96d..0c2ff672a 100644 --- a/adit/mass_transfer/tests/test_cleanup.py +++ b/adit/mass_transfer/tests/test_cleanup.py @@ -1,5 +1,3 @@ -from pathlib import Path - import pytest from adit_radis_shared.accounts.factories import UserFactory from django.utils import timezone @@ -14,75 +12,17 @@ @pytest.mark.django_db -def test_cleanup_removes_intermediate_exports_when_converting(tmp_path: Path): - """When convert_to_nifti=True, EXPORTED volumes hold intermediate DICOM files - that should be cleaned up on failure. - - Proves: cleanup_on_failure deletes intermediate DICOM exports and marks - the volume as ERROR when convert_to_nifti is enabled (EXPORTED is not final). - """ - MassTransferSettings.objects.create() - - user = UserFactory.create() - source = DicomServerFactory.create() - destination = DicomFolderFactory.create(path=str(tmp_path / "output")) - job = MassTransferJob.objects.create( - owner=user, - source=source, - destination=destination, - start_date=timezone.now().date(), - end_date=timezone.now().date(), - partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - convert_to_nifti=True, - ) - task = MassTransferTask.objects.create( - job=job, - source=source, - partition_start=timezone.now(), - partition_end=timezone.now(), - partition_key="20240101", - ) - - export_dir = tmp_path / "exports" / "202401" / "PATIENT" / "1-Head" - export_dir.mkdir(parents=True, exist_ok=True) - - volume = MassTransferVolume.objects.create( - job=job, - task=task, - partition_key="20240101", - patient_id="PATIENT", - study_instance_uid="study-1", - series_instance_uid="series-1", - modality="CT", - study_description="", - series_description="Head", - series_number=1, - study_datetime=timezone.now(), - exported_folder=str(export_dir), - status=MassTransferVolume.Status.EXPORTED, - ) - - task.cleanup_on_failure() - - volume.refresh_from_db() - assert not export_dir.exists() - assert volume.status == MassTransferVolume.Status.ERROR - assert volume.exported_folder == "" - - -@pytest.mark.django_db -def test_cleanup_preserves_exported_volumes_when_not_converting(tmp_path: Path): - """When convert_to_nifti=False, EXPORTED is the final state and the files - live in the destination folder — cleanup should not delete them. +def test_cleanup_on_failure_is_noop(): + """With deferred insertion, cleanup_on_failure has nothing to do. - Proves: cleanup_on_failure preserves destination files and keeps the EXPORTED - status when convert_to_nifti is disabled (EXPORTED is the terminal state). + Volumes are only created in the DB after successful export/conversion, + and temp directories are cleaned up by TemporaryDirectory context managers. """ MassTransferSettings.objects.create() user = UserFactory.create() source = DicomServerFactory.create() - destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + destination = DicomFolderFactory.create() job = MassTransferJob.objects.create( owner=user, source=source, @@ -90,7 +30,6 @@ def test_cleanup_preserves_exported_volumes_when_not_converting(tmp_path: Path): start_date=timezone.now().date(), end_date=timezone.now().date(), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - convert_to_nifti=False, ) task = MassTransferTask.objects.create( job=job, @@ -100,10 +39,8 @@ def test_cleanup_preserves_exported_volumes_when_not_converting(tmp_path: Path): partition_key="20240101", ) - export_dir = tmp_path / "output" / "202401" / "PATIENT" / "1-Head" - export_dir.mkdir(parents=True, exist_ok=True) - - volume = MassTransferVolume.objects.create( + # Create some volumes in various states + MassTransferVolume.objects.create( job=job, task=task, partition_key="20240101", @@ -115,132 +52,11 @@ def test_cleanup_preserves_exported_volumes_when_not_converting(tmp_path: Path): series_description="Head", series_number=1, study_datetime=timezone.now(), - exported_folder=str(export_dir), status=MassTransferVolume.Status.EXPORTED, ) + # Should not raise or modify anything task.cleanup_on_failure() - volume.refresh_from_db() - assert export_dir.exists(), "Exported destination files should be preserved" - assert volume.status == MassTransferVolume.Status.EXPORTED - assert volume.exported_folder == str(export_dir) - - -@pytest.mark.django_db -def test_cleanup_skips_converted_volumes(tmp_path: Path): - """CONVERTED volumes represent fully-processed data in the destination. - - Proves: cleanup_on_failure never touches CONVERTED volumes — their status - stays CONVERTED and their destination files are preserved. - """ - MassTransferSettings.objects.create() - - user = UserFactory.create() - source = DicomServerFactory.create() - destination = DicomFolderFactory.create(path=str(tmp_path / "output")) - job = MassTransferJob.objects.create( - owner=user, - source=source, - destination=destination, - start_date=timezone.now().date(), - end_date=timezone.now().date(), - partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - convert_to_nifti=True, - ) - task = MassTransferTask.objects.create( - job=job, - source=source, - partition_start=timezone.now(), - partition_end=timezone.now(), - partition_key="20240101", - ) - - # Simulate a CONVERTED volume whose intermediate export folder still exists - export_dir = tmp_path / "exports" / "202401" / "PATIENT" / "1-Head" - export_dir.mkdir(parents=True, exist_ok=True) - - volume = MassTransferVolume.objects.create( - job=job, - task=task, - partition_key="20240101", - patient_id="PATIENT", - study_instance_uid="study-1", - series_instance_uid="series-1", - modality="CT", - study_description="", - series_description="Head", - series_number=1, - study_datetime=timezone.now(), - exported_folder=str(export_dir), - status=MassTransferVolume.Status.CONVERTED, - converted_file=str(tmp_path / "output" / "result.nii.gz"), - ) - - task.cleanup_on_failure() - - volume.refresh_from_db() - # CONVERTED volumes must be left untouched - assert volume.status == MassTransferVolume.Status.CONVERTED - assert volume.exported_folder == str(export_dir) - assert export_dir.exists(), "CONVERTED volume's export folder should not be deleted" - - -@pytest.mark.django_db -def test_cleanup_deletes_pending_volumes_with_partial_export(tmp_path: Path): - """PENDING volumes with an exported_folder represent a mid-export crash. - - Proves: cleanup_on_failure removes the partially-written export folder - and marks the volume as ERROR so it can be re-exported on retry. - """ - MassTransferSettings.objects.create() - - user = UserFactory.create() - source = DicomServerFactory.create() - destination = DicomFolderFactory.create(path=str(tmp_path / "output")) - job = MassTransferJob.objects.create( - owner=user, - source=source, - destination=destination, - start_date=timezone.now().date(), - end_date=timezone.now().date(), - partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - convert_to_nifti=True, - ) - task = MassTransferTask.objects.create( - job=job, - source=source, - partition_start=timezone.now(), - partition_end=timezone.now(), - partition_key="20240101", - ) - - # A PENDING volume that had its export folder created but fetch_series - # crashed before setting status to EXPORTED - partial_dir = tmp_path / "exports" / "202401" / "PATIENT" / "2-Body" - partial_dir.mkdir(parents=True, exist_ok=True) - # Write a partial file to simulate incomplete download - (partial_dir / "partial.dcm").write_bytes(b"incomplete") - - volume = MassTransferVolume.objects.create( - job=job, - task=task, - partition_key="20240101", - patient_id="PATIENT", - study_instance_uid="study-1", - series_instance_uid="series-2", - modality="CT", - study_description="", - series_description="Body", - series_number=2, - study_datetime=timezone.now(), - exported_folder=str(partial_dir), - status=MassTransferVolume.Status.PENDING, - ) - - task.cleanup_on_failure() - - volume.refresh_from_db() - assert not partial_dir.exists(), "Partial export should be deleted" - assert volume.status == MassTransferVolume.Status.ERROR - assert volume.exported_folder == "" + vol = MassTransferVolume.objects.get(series_instance_uid="series-1") + assert vol.status == MassTransferVolume.Status.EXPORTED diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 1f53517a5..951bc3354 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -1,4 +1,3 @@ -import uuid from datetime import date, datetime, timedelta from pathlib import Path @@ -21,17 +20,17 @@ MassTransferVolume, ) from adit.mass_transfer.processors import ( + DiscoveredSeries, MassTransferTaskProcessor, _dicom_match, _parse_int, _series_folder_name, _study_datetime, - _volume_path, + _study_folder_name, ) def _make_study(study_uid: str, study_date: str = "20240101") -> ResultDataset: - """Create a minimal ResultDataset for testing _find_studies.""" ds = Dataset() ds.StudyInstanceUID = study_uid ds.StudyDate = study_date @@ -41,6 +40,61 @@ def _make_study(study_uid: str, study_date: str = "20240101") -> ResultDataset: return ResultDataset(ds) +def _make_discovered( + *, + patient_id: str = "PAT1", + study_uid: str = "study-1", + series_uid: str = "series-1", + modality: str = "CT", + study_description: str = "Brain CT", + series_description: str = "Axial", + series_number: int | None = 1, + study_datetime: datetime | None = None, +) -> DiscoveredSeries: + return DiscoveredSeries( + patient_id=patient_id, + accession_number="ACC001", + study_instance_uid=study_uid, + series_instance_uid=series_uid, + modality=modality, + study_description=study_description, + series_description=series_description, + series_number=series_number, + study_datetime=study_datetime or datetime(2024, 1, 1, 12, 0), + institution_name="Radiology", + number_of_images=10, + ) + + +# --------------------------------------------------------------------------- +# _find_studies tests +# --------------------------------------------------------------------------- + + +def _make_processor(mocker: MockerFixture, settings) -> MassTransferTaskProcessor: + settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = getattr( + settings, "MASS_TRANSFER_MAX_SEARCH_RESULTS", 200 + ) + mock_task = mocker.MagicMock(spec=MassTransferTask) + mock_task._meta = MassTransferTask._meta + mocker.patch.object(MassTransferTaskProcessor, "__init__", return_value=None) + processor = MassTransferTaskProcessor.__new__(MassTransferTaskProcessor) + processor.dicom_task = mock_task + processor.mass_task = mock_task + return processor + + +def _make_filter(mocker: MockerFixture, **kwargs) -> MassTransferFilter: + mf = mocker.MagicMock(spec=MassTransferFilter) + mf.modality = kwargs.get("modality", "CT") + mf.study_description = kwargs.get("study_description", "") + mf.institution_name = kwargs.get("institution_name", "") + mf.apply_institution_on_study = kwargs.get("apply_institution_on_study", True) + mf.series_description = kwargs.get("series_description", "") + mf.series_number = kwargs.get("series_number", None) + return mf + + @pytest.mark.django_db def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture, settings): settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 1 @@ -78,194 +132,7 @@ def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture, s processor._find_studies(operator, mf, start, end) -@pytest.mark.django_db -def test_process_groups_pseudonyms_by_study(mocker: MockerFixture, settings, tmp_path: Path): - """All volumes in the same study receive the same pseudonym.""" - settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") - MassTransferSettings.objects.create() - - user = UserFactory.create() - source = DicomServerFactory.create() - destination = DicomFolderFactory.create(path=str(tmp_path / "output")) - job = MassTransferJob.objects.create( - owner=user, - source=source, - destination=destination, - start_date=date(2024, 1, 1), - end_date=date(2024, 1, 1), - partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - ) - job.filters.create(owner=user, name="CT Filter", modality="CT") - - task = MassTransferTask.objects.create( - job=job, - source=source, - partition_start=timezone.now(), - partition_end=timezone.now(), - partition_key="20240101", - ) - - vol1 = MassTransferVolume.objects.create( - job=job, - task=task, - partition_key="20240101", - study_instance_uid="study-1", - series_instance_uid="series-1", - modality="CT", - study_description="", - series_description="A", - series_number=1, - study_datetime=timezone.now(), - ) - vol2 = MassTransferVolume.objects.create( - job=job, - task=task, - partition_key="20240101", - study_instance_uid="study-1", - series_instance_uid="series-2", - modality="CT", - study_description="", - series_description="B", - series_number=2, - study_datetime=timezone.now(), - ) - - processor = MassTransferTaskProcessor(task) - - # Mock _find_volumes to return pre-created volumes (skip PACS query) - mocker.patch.object(processor, "_find_volumes", return_value=[vol1, vol2]) - mocker.patch("adit.mass_transfer.processors.DicomOperator") - - export_calls: list[tuple[str, str]] = [] - - def fake_export(_, volume, __, pseudonym, **kwargs): - export_calls.append((volume.series_instance_uid, pseudonym)) - - mocker.patch.object(processor, "_export_volume", side_effect=fake_export) - mocker.patch.object(processor, "_convert_volume", return_value=None) - - mocker.patch( - "adit.mass_transfer.processors.uuid.uuid4", - return_value=uuid.UUID(int=1), - ) - - result = processor.process() - - pseudonyms_by_series = {series_uid: pseudonym for series_uid, pseudonym in export_calls} - # Both volumes in the same study should share a pseudonym - assert pseudonyms_by_series["series-1"] == pseudonyms_by_series["series-2"] - assert pseudonyms_by_series["series-1"] != "" - assert result["status"] == MassTransferTask.Status.SUCCESS - - -@pytest.mark.django_db -def test_process_opt_out_skips_pseudonymization( - mocker: MockerFixture, - settings, - tmp_path: Path, -): - """When anonymization_mode=NONE, process passes empty pseudonym.""" - settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") - MassTransferSettings.objects.create() - - user = UserFactory.create() - source = DicomServerFactory.create() - destination = DicomFolderFactory.create(path=str(tmp_path / "output")) - job = MassTransferJob.objects.create( - owner=user, - source=source, - destination=destination, - start_date=date(2024, 1, 1), - end_date=date(2024, 1, 1), - partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - anonymization_mode=MassTransferJob.AnonymizationMode.NONE, - ) - job.filters.create(owner=user, name="CT Filter", modality="CT") - - task = MassTransferTask.objects.create( - job=job, - source=source, - partition_start=timezone.now(), - partition_end=timezone.now(), - partition_key="20240101", - ) - - vol = MassTransferVolume.objects.create( - job=job, - task=task, - partition_key="20240101", - patient_id="PATIENT-1", - study_instance_uid="study-1", - series_instance_uid="series-1", - modality="CT", - study_description="", - series_description="A", - series_number=1, - study_datetime=timezone.now(), - ) - - processor = MassTransferTaskProcessor(task) - - # Mock _find_volumes to return pre-created volume (skip PACS query) - mocker.patch.object(processor, "_find_volumes", return_value=[vol]) - mocker.patch("adit.mass_transfer.processors.DicomOperator") - - export_calls: list[str] = [] - - def fake_export(_, __, ___, pseudonym, **kwargs): - export_calls.append(pseudonym) - - mocker.patch.object(processor, "_export_volume", side_effect=fake_export) - mocker.patch.object(processor, "_convert_volume", return_value=None) - - result = processor.process() - - assert export_calls == [""] - assert result["status"] == MassTransferTask.Status.SUCCESS - - -def test_volume_path_uses_year_month_and_subject_id(): - base_dir = Path("/tmp/base") - study_dt = datetime(2024, 2, 15, 10, 30) - path = _volume_path(base_dir, study_dt, "subject", "1-Head") - - assert path == base_dir / "202402" / "subject" / "1-Head" - - -# --------------------------------------------------------------------------- -# _find_studies tests -# --------------------------------------------------------------------------- - - -def _make_processor(mocker: MockerFixture, settings) -> MassTransferTaskProcessor: - """Create a MassTransferTaskProcessor with a mocked task (no DB required).""" - settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = getattr( - settings, "MASS_TRANSFER_MAX_SEARCH_RESULTS", 200 - ) - mock_task = mocker.MagicMock(spec=MassTransferTask) - mock_task._meta = MassTransferTask._meta - # Bypass the isinstance assertion in __init__ - mocker.patch.object(MassTransferTaskProcessor, "__init__", return_value=None) - processor = MassTransferTaskProcessor.__new__(MassTransferTaskProcessor) - processor.dicom_task = mock_task - processor.mass_task = mock_task - return processor - - -def _make_filter(mocker: MockerFixture, **kwargs) -> MassTransferFilter: - """Create a mock MassTransferFilter (no DB required).""" - mf = mocker.MagicMock(spec=MassTransferFilter) - mf.modality = kwargs.get("modality", "CT") - mf.study_description = kwargs.get("study_description", "") - mf.institution_name = kwargs.get("institution_name", "") - mf.apply_institution_on_study = kwargs.get("apply_institution_on_study", True) - mf.series_description = kwargs.get("series_description", "") - mf.series_number = kwargs.get("series_number", None) - return mf - - def test_find_studies_returns_all_when_under_limit(mocker: MockerFixture, settings): - """When the PACS returns fewer results than max, return them directly.""" settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 10 processor = _make_processor(mocker, settings) @@ -286,8 +153,6 @@ def test_find_studies_returns_all_when_under_limit(mocker: MockerFixture, settin def test_find_studies_splits_and_deduplicates(mocker: MockerFixture, settings): - """When results exceed max, _find_studies splits the window and deduplicates - studies that appear in both halves (same-day split).""" settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 2 processor = _make_processor(mocker, settings) @@ -299,30 +164,25 @@ def test_find_studies_splits_and_deduplicates(mocker: MockerFixture, settings): study_a = _make_study("1.2.100") study_b = _make_study("1.2.200") study_c = _make_study("1.2.300") - # A duplicate of study_a that would appear in the right half too study_a_dup = _make_study("1.2.100") - # First call: too many results (3 > max=2), triggers split - # Left half: returns [study_a, study_b] (under limit) - # Right half: returns [study_a_dup, study_c] (under limit) operator = mocker.create_autospec(DicomOperator) operator.find_studies.side_effect = [ - [study_a, study_b, study_c], # initial call — over limit - [study_a, study_b], # left half - [study_a_dup, study_c], # right half + [study_a, study_b, study_c], + [study_a, study_b], + [study_a_dup, study_c], ] result = processor._find_studies(operator, mf, start, end) result_uids = [str(s.StudyInstanceUID) for s in result] assert len(result) == 3 - assert result_uids.count("1.2.100") == 1, "Duplicate study should be removed" + assert result_uids.count("1.2.100") == 1 assert "1.2.200" in result_uids assert "1.2.300" in result_uids def test_find_studies_split_boundaries_dont_overlap(mocker: MockerFixture, settings): - """Verify that the left and right halves of a split use non-overlapping time ranges.""" settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 1 processor = _make_processor(mocker, settings) @@ -331,22 +191,18 @@ def test_find_studies_split_boundaries_dont_overlap(mocker: MockerFixture, setti start = datetime(2024, 1, 1, 0, 0, 0) end = datetime(2024, 1, 3, 23, 59, 59) - # Track all (start, end) pairs passed to _find_studies call_ranges: list[tuple[datetime, datetime]] = [] - original_find_studies = MassTransferTaskProcessor._find_studies def tracking_find_studies(self_inner, operator, mf, s, e): call_ranges.append((s, e)) return original_find_studies(self_inner, operator, mf, s, e) - # First call: over limit, triggers split - # Sub-calls: under limit, return single study each operator = mocker.create_autospec(DicomOperator) operator.find_studies.side_effect = [ - [_make_study("1"), _make_study("2")], # initial — over limit - [_make_study("1")], # left half - [_make_study("2")], # right half + [_make_study("1"), _make_study("2")], + [_make_study("1")], + [_make_study("2")], ] mocker.patch.object( @@ -360,19 +216,15 @@ def tracking_find_studies(self_inner, operator, mf, s, e): processor._find_studies(operator, mf, start, end) - # We expect 3 calls: the original + 2 recursive halves assert len(call_ranges) == 3 - _, _ = call_ranges[0] left_start, left_end = call_ranges[1] right_start, right_end = call_ranges[2] assert left_start == start - # The right half must start strictly after the left half ends assert right_start > left_end def test_find_studies_preserves_order_with_unique_studies(mocker: MockerFixture, settings): - """Left-half studies come first, then unique right-half studies are appended.""" settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 2 processor = _make_processor(mocker, settings) @@ -383,24 +235,19 @@ def test_find_studies_preserves_order_with_unique_studies(mocker: MockerFixture, operator = mocker.create_autospec(DicomOperator) operator.find_studies.side_effect = [ - # Initial: over limit (3 > 2) [_make_study("1.2.1"), _make_study("1.2.2"), _make_study("1.2.3")], - # Left half: within limit [_make_study("1.2.1"), _make_study("1.2.2")], - # Right half: 1.2.2 is duplicate, 1.2.3 is new [_make_study("1.2.2"), _make_study("1.2.3")], ] result = processor._find_studies(operator, mf, start, end) result_uids = [str(s.StudyInstanceUID) for s in result] - # Left-half results come first, then unique right-half additions assert result_uids == ["1.2.1", "1.2.2", "1.2.3"] - assert len(result) == 3 # --------------------------------------------------------------------------- -# process() tests (no DB required — fully mocked) +# process() tests — mocked environment # --------------------------------------------------------------------------- @@ -412,9 +259,6 @@ def _make_process_env( convert_to_nifti: bool = False, anonymization_mode: str = "pseudonymize", ) -> MassTransferTaskProcessor: - """Create a processor with a fully mocked job for testing process().""" - settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") - processor = _make_processor(mocker, settings) mock_job = processor.mass_task.job @@ -422,6 +266,7 @@ def _make_process_env( mock_job.should_pseudonymize = anonymization_mode != "none" mock_job.should_link = anonymization_mode == "pseudonymize_with_linking" mock_job.convert_to_nifti = convert_to_nifti + mock_job.pseudonym_salt = "test-salt-for-deterministic-pseudonyms" mock_job.source.node_type = DicomNode.NodeType.SERVER mock_job.source.dicomserver = mocker.MagicMock() mock_job.destination.node_type = DicomNode.NodeType.FOLDER @@ -434,410 +279,90 @@ def _make_process_env( mocker.patch.object(processor, "is_suspended", return_value=False) mocker.patch("adit.mass_transfer.processors.DicomOperator") - return processor - - -def _make_mock_volume( - mocker: MockerFixture, - *, - study_uid: str = "study-1", - series_uid: str = "series-1", - status: str | None = None, - pseudonym: str = "", - task_id: int | None = None, -) -> MassTransferVolume: - """Create a mock MassTransferVolume for testing process().""" - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.status = status or MassTransferVolume.Status.PENDING - vol.study_instance_uid = study_uid - vol.series_instance_uid = series_uid - vol.pseudonym = pseudonym - vol.task_id = task_id - return vol - - -def test_process_reraises_retriable_dicom_error( - mocker: MockerFixture, settings, tmp_path: Path -): - """RetriableDicomError from _export_volume propagates for Procrastinate retry. - - Proves: RetriableDicomError is not swallowed by the broad except Exception - handler and propagates out of process() so Procrastinate can retry the task. - """ - processor = _make_process_env(mocker, settings, tmp_path) - vol = _make_mock_volume(mocker) - - mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + # Mock DB queries for deferred insertion mocker.patch.object( - processor, - "_export_volume", - side_effect=RetriableDicomError("PACS connection lost"), - ) - mocker.patch.object(processor, "_convert_volume") - - with pytest.raises(RetriableDicomError, match="PACS connection lost"): - processor.process() - - -def test_process_calls_convert_when_enabled( - mocker: MockerFixture, settings, tmp_path: Path -): - """With convert_to_nifti=True, both _export_volume and _convert_volume are called - and the export uses the intermediate export_base directory. - - Proves: When convert_to_nifti is enabled, both export and convert are called, - and the export writes to the intermediate directory (not the final destination). - """ - processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=True) - vol = _make_mock_volume(mocker) - - mocker.patch.object(processor, "_find_volumes", return_value=[vol]) - mock_export = mocker.patch.object(processor, "_export_volume") - mock_convert = mocker.patch.object(processor, "_convert_volume") - - result = processor.process() - - assert mock_export.call_count == 1 - assert mock_convert.call_count == 1 - # Export should use export_base (intermediate dir), not output_base - export_call_base = mock_export.call_args[0][2] - assert "exports" in str(export_call_base) - assert result["status"] == MassTransferTask.Status.SUCCESS - - -def test_process_skips_convert_when_disabled( - mocker: MockerFixture, settings, tmp_path: Path -): - """With convert_to_nifti=False, _convert_volume is not called and export - goes directly to the destination folder. - - Proves: When convert_to_nifti is disabled, _convert_volume is never called - and the export writes directly to the destination folder. - """ - processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=False) - vol = _make_mock_volume(mocker) - - mocker.patch.object(processor, "_find_volumes", return_value=[vol]) - mock_export = mocker.patch.object(processor, "_export_volume") - mock_convert = mocker.patch.object(processor, "_convert_volume") - - result = processor.process() - - assert mock_export.call_count == 1 - assert mock_convert.call_count == 0 - # Export should go directly to output_base (destination) - export_call_base = mock_export.call_args[0][2] - assert "output" in str(export_call_base) - assert result["status"] == MassTransferTask.Status.SUCCESS - - -def test_process_counts_already_done_volumes( - mocker: MockerFixture, settings, tmp_path: Path -): - """Already-processed volumes are counted and skipped on retry. - - Proves: On retry, already-CONVERTED volumes are counted in total_processed - (not silently skipped) and are not re-exported or re-converted. - """ - processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=True) - - vol_done = _make_mock_volume( - mocker, series_uid="s-done", status=MassTransferVolume.Status.CONVERTED + MassTransferVolume.objects, "filter", + return_value=mocker.MagicMock( + values_list=mocker.MagicMock(return_value=mocker.MagicMock( + __iter__=lambda self: iter([]), + )), + delete=mocker.MagicMock(), + ), ) - vol_pending = _make_mock_volume(mocker, series_uid="s-pending") - - mocker.patch.object(processor, "_find_volumes", return_value=[vol_done, vol_pending]) - mock_export = mocker.patch.object(processor, "_export_volume") - mock_convert = mocker.patch.object(processor, "_convert_volume") - result = processor.process() - - # Only the pending volume should be exported/converted - assert mock_export.call_count == 1 - assert mock_convert.call_count == 1 - # Both volumes should be counted as processed (1 already done + 1 new) - assert "Processed: 2" in result["log"] - assert result["status"] == MassTransferTask.Status.SUCCESS + return processor -def test_process_returns_warning_on_partial_failure( +def test_process_reraises_retriable_dicom_error( mocker: MockerFixture, settings, tmp_path: Path ): - """When some volumes fail, the task status is WARNING. - - Proves: Mixed success/failure returns WARNING status with correct processed - and failed counts in the log. - """ processor = _make_process_env(mocker, settings, tmp_path) + series = [_make_discovered(series_uid="s-1")] - vol1 = _make_mock_volume(mocker, series_uid="s-1") - vol2 = _make_mock_volume(mocker, series_uid="s-2") - - mocker.patch.object(processor, "_find_volumes", return_value=[vol1, vol2]) - - call_count = {"n": 0} - - def fake_export(op, volume, base, pseudo, **kwargs): - call_count["n"] += 1 - if call_count["n"] == 2: - raise DicomError("Export failed") - - mocker.patch.object(processor, "_export_volume", side_effect=fake_export) - mocker.patch.object(processor, "_convert_volume") - mocker.patch.object(processor, "_cleanup_export") - - result = processor.process() - - assert result["status"] == MassTransferTask.Status.WARNING - assert "Processed: 1" in result["log"] - assert "Failed: 1" in result["log"] - - -# --------------------------------------------------------------------------- -# Resumability tests — verify no re-download after outage -# --------------------------------------------------------------------------- - - -def test_export_volume_skips_fetch_when_already_exported( - mocker: MockerFixture, settings -): - """_export_volume returns immediately for EXPORTED volumes — no PACS fetch. - - Proves: _export_volume short-circuits when status=EXPORTED and exported_folder - is set, so operator.fetch_series is never called — no redundant PACS download. - """ - processor = _make_processor(mocker, settings) - operator = mocker.create_autospec(DicomOperator) - - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.status = MassTransferVolume.Status.EXPORTED - vol.exported_folder = "/tmp/already/exported" - - processor._export_volume(operator, vol, Path("/tmp/base"), "pseudo") - - operator.fetch_series.assert_not_called() - - -def test_convert_volume_skips_when_already_converted( - mocker: MockerFixture, settings -): - """_convert_volume returns immediately for CONVERTED volumes — no dcm2niix. - - Proves: _convert_volume short-circuits when status=CONVERTED and converted_file - is set, so subprocess.run (dcm2niix) is never called — no redundant conversion. - """ - processor = _make_processor(mocker, settings) - - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.status = MassTransferVolume.Status.CONVERTED - vol.converted_file = "/tmp/output/result.nii.gz" - - mock_run = mocker.patch("adit.mass_transfer.processors.subprocess.run") - - processor._convert_volume(vol, Path("/tmp/output"), "pseudo") - - mock_run.assert_not_called() - - -def test_process_resumes_after_outage_without_refetch( - mocker: MockerFixture, settings, tmp_path: Path -): - """After an outage, only PENDING volumes trigger a PACS fetch. - - Simulates a crash-and-resume where the task has three volumes in different - states: - - PENDING: needs full processing (export + convert) - - EXPORTED: export finished before crash, needs conversion only - - CONVERTED: fully done, skip entirely - - Proves: Full integration — only PENDING triggers fetch_series (1 call). - EXPORTED skips re-download but still proceeds to conversion. CONVERTED is - fully skipped. All 3 volumes are counted as processed. - """ - processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=True) - - vol_pending = _make_mock_volume(mocker, series_uid="s-pending") - vol_pending.study_datetime = datetime(2024, 1, 15, 10, 30) - vol_pending.series_number = 1 - vol_pending.series_description = "Head" - vol_pending.patient_id = "PATIENT1" - - vol_exported = _make_mock_volume( - mocker, series_uid="s-exported", status=MassTransferVolume.Status.EXPORTED - ) - vol_exported.exported_folder = str(tmp_path / "already_exported") - - vol_converted = _make_mock_volume( - mocker, series_uid="s-converted", status=MassTransferVolume.Status.CONVERTED - ) - + mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch.object( processor, - "_find_volumes", - return_value=[vol_pending, vol_exported, vol_converted], - ) - # Don't mock _export_volume — let the real early-return guard run - mocker.patch("adit.mass_transfer.processors.DicomManipulator") - mock_convert = mocker.patch.object(processor, "_convert_volume") - mocker.patch( - "adit.mass_transfer.processors.uuid.uuid4", - return_value=uuid.UUID(int=42), - ) - - result = processor.process() - - # Get the mock operator that process() instantiated - import adit.mass_transfer.processors as _proc - - mock_operator = _proc.DicomOperator.return_value - - # Only the PENDING volume should trigger a PACS fetch - assert mock_operator.fetch_series.call_count == 1 - assert mock_operator.fetch_series.call_args.kwargs["series_uid"] == "s-pending" - - # Conversion should run for PENDING + EXPORTED, not CONVERTED - assert mock_convert.call_count == 2 - - # All 3 volumes counted as processed - assert "Processed: 3" in result["log"] - assert result["status"] == MassTransferTask.Status.SUCCESS - - -# --------------------------------------------------------------------------- -# HIGH: Pseudonym reuse on retry -# --------------------------------------------------------------------------- - - -def test_process_reuses_existing_pseudonym_on_retry( - mocker: MockerFixture, settings, tmp_path: Path -): - """On retry, volumes that already have a pseudonym from a prior run are reused. - - Proves: When a study has a volume with an existing pseudonym (set during a - previous partial run), process() reuses that pseudonym instead of generating - a new one — preserving data linkage between series in the same study. - """ - processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=False) - - # vol_done was exported in a prior run and has a pseudonym - vol_done = _make_mock_volume( - mocker, - series_uid="s-done", - status=MassTransferVolume.Status.EXPORTED, - pseudonym="existing-pseudo-abc", - ) - # vol_pending is in the same study but wasn't exported yet - vol_pending = _make_mock_volume(mocker, series_uid="s-pending", pseudonym="") - - mocker.patch.object( - processor, "_find_volumes", return_value=[vol_done, vol_pending] - ) - - export_calls: list[tuple[str, str]] = [] - - def fake_export(op, volume, base, pseudonym, **kwargs): - export_calls.append((volume.series_instance_uid, pseudonym)) - - mocker.patch.object(processor, "_export_volume", side_effect=fake_export) - - # Should NOT be called — uuid should never be generated - mock_uuid = mocker.patch( - "adit.mass_transfer.processors.uuid.uuid4", - return_value=uuid.UUID(int=99), - ) - - result = processor.process() - - # The pending volume should receive the existing pseudonym, not a new one - assert len(export_calls) == 1 - assert export_calls[0] == ("s-pending", "existing-pseudo-abc") - mock_uuid.assert_not_called() - assert result["status"] == MassTransferTask.Status.SUCCESS - + "_export_series", + side_effect=RetriableDicomError("PACS connection lost"), + ) -# --------------------------------------------------------------------------- -# HIGH: done_status=EXPORTED when convert_to_nifti=False -# --------------------------------------------------------------------------- + with pytest.raises(RetriableDicomError, match="PACS connection lost"): + processor.process() -def test_process_counts_exported_as_done_when_not_converting( +def test_process_returns_warning_on_partial_failure( mocker: MockerFixture, settings, tmp_path: Path ): - """With convert_to_nifti=False, EXPORTED is the terminal state. - - Proves: Already-EXPORTED volumes are counted as done (not re-exported) - when convert_to_nifti is disabled. The done_status logic correctly uses - EXPORTED instead of CONVERTED. - """ - processor = _make_process_env(mocker, settings, tmp_path, convert_to_nifti=False) + processor = _make_process_env(mocker, settings, tmp_path) + series = [ + _make_discovered(series_uid="s-1"), + _make_discovered(series_uid="s-2"), + ] - vol_done = _make_mock_volume( - mocker, series_uid="s-done", status=MassTransferVolume.Status.EXPORTED - ) - vol_pending = _make_mock_volume(mocker, series_uid="s-pending") + mocker.patch.object(processor, "_discover_series", return_value=series) - mocker.patch.object( - processor, "_find_volumes", return_value=[vol_done, vol_pending] - ) - mock_export = mocker.patch.object(processor, "_export_volume") - mock_convert = mocker.patch.object(processor, "_convert_volume") + call_count = {"n": 0} - result = processor.process() + def fake_export(*args, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 2: + raise DicomError("Export failed") - # Only the pending volume should be exported - assert mock_export.call_count == 1 - assert mock_convert.call_count == 0 - # Both volumes should be counted as processed - assert "Processed: 2" in result["log"] - assert result["status"] == MassTransferTask.Status.SUCCESS + mocker.patch.object(processor, "_export_series", side_effect=fake_export) + mocker.patch.object(MassTransferVolume.objects, "create") + result = processor.process() -# --------------------------------------------------------------------------- -# HIGH: All-fail → FAILURE -# --------------------------------------------------------------------------- + assert result["status"] == MassTransferTask.Status.WARNING + assert "Processed: 1" in result["log"] + assert "Failed: 1" in result["log"] -def test_process_returns_failure_when_all_volumes_fail( +def test_process_returns_failure_when_all_fail( mocker: MockerFixture, settings, tmp_path: Path ): - """When every volume fails, the task status is FAILURE. - - Proves: The all-fail branch (total_failed > 0, total_processed == 0) - returns FAILURE status, distinguishing it from partial failure (WARNING). - """ processor = _make_process_env(mocker, settings, tmp_path) + series = [ + _make_discovered(series_uid="s-1"), + _make_discovered(series_uid="s-2"), + ] - vol1 = _make_mock_volume(mocker, series_uid="s-1") - vol2 = _make_mock_volume(mocker, series_uid="s-2") - - mocker.patch.object(processor, "_find_volumes", return_value=[vol1, vol2]) + mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch.object( - processor, "_export_volume", side_effect=DicomError("PACS down") + processor, "_export_series", side_effect=DicomError("PACS down") ) - mocker.patch.object(processor, "_cleanup_export") + mocker.patch.object(MassTransferVolume.objects, "create") result = processor.process() assert result["status"] == MassTransferTask.Status.FAILURE - assert "Processed: 0" in result["log"] assert "Failed: 2" in result["log"] -# --------------------------------------------------------------------------- -# MEDIUM: process() early guards -# --------------------------------------------------------------------------- - - def test_process_returns_warning_when_suspended( mocker: MockerFixture, settings, tmp_path: Path ): - """When the mass transfer app is suspended, process() returns WARNING. - - Proves: The suspended guard fires before any PACS interaction and returns - a WARNING so the task can be retried later without being marked as failed. - """ processor = _make_process_env(mocker, settings, tmp_path) - # Override the is_suspended mock from _make_process_env mocker.patch.object(processor, "is_suspended", return_value=True) result = processor.process() @@ -849,11 +374,6 @@ def test_process_returns_warning_when_suspended( def test_process_raises_when_source_not_server( mocker: MockerFixture, settings, tmp_path: Path ): - """Source must be a DICOM server. - - Proves: process() raises DicomError with a clear message when the source - node is not a SERVER, before any volumes are processed. - """ processor = _make_process_env(mocker, settings, tmp_path) processor.mass_task.job.source.node_type = DicomNode.NodeType.FOLDER @@ -864,11 +384,6 @@ def test_process_raises_when_source_not_server( def test_process_raises_when_destination_not_folder( mocker: MockerFixture, settings, tmp_path: Path ): - """Destination must be a DICOM folder. - - Proves: process() raises DicomError with a clear message when the destination - node is not a FOLDER, before any volumes are processed. - """ processor = _make_process_env(mocker, settings, tmp_path) processor.mass_task.job.destination.node_type = DicomNode.NodeType.SERVER @@ -879,11 +394,6 @@ def test_process_raises_when_destination_not_folder( def test_process_returns_failure_when_no_filters( mocker: MockerFixture, settings, tmp_path: Path ): - """When no filters are configured, process() returns FAILURE. - - Proves: The no-filters guard returns FAILURE with a clear message instead - of silently succeeding with zero volumes. - """ processor = _make_process_env(mocker, settings, tmp_path) processor.mass_task.job.filters.all.return_value = [] @@ -896,275 +406,264 @@ def test_process_returns_failure_when_no_filters( def test_process_returns_success_for_empty_partition( mocker: MockerFixture, settings, tmp_path: Path ): - """When no volumes are found, process() returns SUCCESS. - - Proves: An empty partition is a legitimate outcome (not an error). The task - reports SUCCESS with a "No volumes found" message. - """ processor = _make_process_env(mocker, settings, tmp_path) - mocker.patch.object(processor, "_find_volumes", return_value=[]) + mocker.patch.object(processor, "_discover_series", return_value=[]) result = processor.process() assert result["status"] == MassTransferTask.Status.SUCCESS - assert "No volumes found" in result["message"] - - -# --------------------------------------------------------------------------- -# MEDIUM: _convert_volume error cases -# --------------------------------------------------------------------------- + assert "No series found" in result["message"] -def test_convert_volume_raises_when_no_exported_folder( - mocker: MockerFixture, settings +def test_process_skips_already_done_series( + mocker: MockerFixture, settings, tmp_path: Path ): - """_convert_volume raises DicomError when exported_folder is empty. - - Proves: The guard at the top of _convert_volume catches a missing - exported_folder and raises a clear DicomError instead of passing garbage - to dcm2niix. - """ - processor = _make_processor(mocker, settings) - - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.status = MassTransferVolume.Status.EXPORTED - vol.exported_folder = "" - vol.converted_file = "" + """Already-processed series (from prior runs) are skipped.""" + processor = _make_process_env(mocker, settings, tmp_path) + series = [ + _make_discovered(series_uid="s-done"), + _make_discovered(series_uid="s-new"), + ] - with pytest.raises(DicomError, match="Missing exported folder"): - processor._convert_volume(vol, Path("/tmp/output"), "pseudo") + mocker.patch.object(processor, "_discover_series", return_value=series) + # Mock the DB query to return s-done as already processed + mock_qs = mocker.MagicMock() + mock_qs.values_list.return_value = {"s-done"} + mock_delete_qs = mocker.MagicMock() -def test_convert_volume_raises_on_dcm2niix_failure( - mocker: MockerFixture, settings, tmp_path: Path -): - """_convert_volume raises DicomError when dcm2niix returns non-zero. + def filter_side_effect(**kwargs): + if "status__in" in kwargs: + return mock_qs + return mock_delete_qs - Proves: A dcm2niix crash produces a clear DicomError with stderr content, - not a silent pass or uncaught exception. - """ - processor = _make_processor(mocker, settings) + mocker.patch.object(MassTransferVolume.objects, "filter", side_effect=filter_side_effect) - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.status = MassTransferVolume.Status.EXPORTED - vol.exported_folder = str(tmp_path / "dicom_input") - vol.converted_file = "" - vol.series_instance_uid = "1.2.3" - vol.series_number = 1 - vol.series_description = "Head" - vol.pseudonym = "pseudo" - vol.patient_id = "PAT1" - vol.study_datetime = datetime(2024, 1, 15, 10, 30) + export_calls = [] + def fake_export(*args, **kwargs): + export_calls.append(1) + mocker.patch.object(processor, "_export_series", side_effect=fake_export) + mocker.patch.object(MassTransferVolume.objects, "create") - mock_result = mocker.MagicMock() - mock_result.returncode = 1 - mock_result.stderr = "Segmentation fault" - mocker.patch( - "adit.mass_transfer.processors.subprocess.run", return_value=mock_result - ) + result = processor.process() - with pytest.raises(DicomError, match="Conversion failed"): - processor._convert_volume(vol, tmp_path / "output", "pseudo") + assert len(export_calls) == 1 # only s-new was exported + assert result["status"] == MassTransferTask.Status.SUCCESS -def test_convert_volume_raises_when_no_nifti_output( +def test_process_none_mode_uses_patient_id_as_subject( mocker: MockerFixture, settings, tmp_path: Path ): - """_convert_volume raises DicomError when dcm2niix produces no .nii.gz files. + """In 'none' anonymization mode, no pseudonymizer is used.""" + processor = _make_process_env( + mocker, settings, tmp_path, anonymization_mode="none" + ) + series = [_make_discovered(patient_id="REAL-PAT-1", series_uid="s-1")] - Proves: A successful dcm2niix run that produces no output files is caught - and raises a clear DicomError instead of silently writing empty metadata. - """ - processor = _make_processor(mocker, settings) + mocker.patch.object(processor, "_discover_series", return_value=series) - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.status = MassTransferVolume.Status.EXPORTED - vol.exported_folder = str(tmp_path / "dicom_input") - vol.converted_file = "" - vol.series_instance_uid = "1.2.3" - vol.series_number = 1 - vol.series_description = "Head" - vol.pseudonym = "pseudo" - vol.patient_id = "PAT1" - vol.study_datetime = datetime(2024, 1, 15, 10, 30) + export_calls: list[tuple] = [] - mock_result = mocker.MagicMock() - mock_result.returncode = 0 - mock_result.stderr = "" - mocker.patch( - "adit.mass_transfer.processors.subprocess.run", return_value=mock_result - ) + def fake_export(op, s, path, subject_id, pseudonymizer): + export_calls.append((subject_id, pseudonymizer)) - with pytest.raises(DicomError, match="no .nii.gz files"): - processor._convert_volume(vol, tmp_path / "output", "pseudo") + mocker.patch.object(processor, "_export_series", side_effect=fake_export) + mocker.patch.object(MassTransferVolume.objects, "create") + result = processor.process() -# --------------------------------------------------------------------------- -# MEDIUM: _cleanup_export tests -# --------------------------------------------------------------------------- + assert len(export_calls) == 1 + subject_id, pseudonymizer = export_calls[0] + assert subject_id == "REAL-PAT-1" + assert pseudonymizer is None + assert result["status"] == MassTransferTask.Status.SUCCESS -def test_cleanup_export_sets_export_cleaned_flag( +def test_process_pseudonymize_mode_consistent_within_task( mocker: MockerFixture, settings, tmp_path: Path ): - """_cleanup_export removes the folder and sets export_cleaned=True. + """In pseudonymize mode (non-linking), same patient gets same pseudonym within a task.""" + processor = _make_process_env(mocker, settings, tmp_path) + # Two series, same patient, different studies + series = [ + _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), + _make_discovered(patient_id="PAT1", study_uid="study-B", series_uid="s-2"), + ] - Proves: On success, the export folder is deleted and export_cleaned is - set so the cleanup is not attempted again on a subsequent call. - """ - processor = _make_processor(mocker, settings) + mocker.patch.object(processor, "_discover_series", return_value=series) + + subject_ids: list[str] = [] - export_dir = tmp_path / "exports" / "202401" / "PATIENT" / "1-Head" - export_dir.mkdir(parents=True) + def fake_export(op, s, path, subject_id, pseudonymizer): + subject_ids.append(subject_id) - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.exported_folder = str(export_dir) - vol.export_cleaned = False + mocker.patch.object(processor, "_export_series", side_effect=fake_export) + mocker.patch.object(MassTransferVolume.objects, "create") - processor._cleanup_export(vol) + processor.process() - assert not export_dir.exists() - assert vol.export_cleaned is True - vol.save.assert_called() + # Both series for PAT1 get the same pseudonym within this task + assert subject_ids[0] == subject_ids[1] + assert subject_ids[0] != "" + assert subject_ids[0] != "PAT1" -def test_cleanup_export_skips_when_already_cleaned( - mocker: MockerFixture, settings +def test_process_linking_mode_uses_deterministic_pseudonym( + mocker: MockerFixture, settings, tmp_path: Path ): - """_cleanup_export is a no-op when export_cleaned is already True. + """In linking mode, pseudonyms are deterministic (seeded).""" + processor = _make_process_env( + mocker, settings, tmp_path, anonymization_mode="pseudonymize_with_linking" + ) + series = [ + _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), + ] - Proves: The already-cleaned guard prevents redundant rmtree calls, - avoiding FileNotFoundError on repeated invocations. - """ - processor = _make_processor(mocker, settings) + mocker.patch.object(processor, "_discover_series", return_value=series) + + subject_ids: list[str] = [] - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.exported_folder = "/tmp/some/path" - vol.export_cleaned = True + def fake_export(op, s, path, subject_id, pseudonymizer): + subject_ids.append(subject_id) - mock_rmtree = mocker.patch("adit.mass_transfer.processors.shutil.rmtree") + mocker.patch.object(processor, "_export_series", side_effect=fake_export) + mocker.patch.object(MassTransferVolume.objects, "create") - processor._cleanup_export(vol) + processor.process() - mock_rmtree.assert_not_called() - vol.save.assert_not_called() + assert subject_ids[0] != "" + assert subject_ids[0] != "PAT1" + # Pseudonym should be deterministic — running again with same salt gives same result + from adit.core.utils.pseudonymizer import Pseudonymizer + expected = Pseudonymizer(seed="test-salt-for-deterministic-pseudonyms").compute_pseudonym("PAT1") + assert subject_ids[0] == expected -def test_cleanup_export_skips_when_no_folder(mocker: MockerFixture, settings): - """_cleanup_export is a no-op when exported_folder is empty. +# --------------------------------------------------------------------------- +# _convert_series tests +# --------------------------------------------------------------------------- - Proves: Volumes that were never exported (empty exported_folder) don't - trigger any filesystem operations. - """ - processor = _make_processor(mocker, settings) - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.exported_folder = "" - vol.export_cleaned = False +def test_convert_series_raises_on_dcm2niix_failure( + mocker: MockerFixture, settings, tmp_path: Path +): + processor = _make_processor(mocker, settings) + series = _make_discovered(series_uid="1.2.3") - mock_rmtree = mocker.patch("adit.mass_transfer.processors.shutil.rmtree") + dicom_dir = tmp_path / "dicom_input" + dicom_dir.mkdir() + output_path = tmp_path / "output" - processor._cleanup_export(vol) + mock_result = mocker.MagicMock() + mock_result.returncode = 1 + mock_result.stderr = "Segmentation fault" + mock_result.stdout = "" + mocker.patch( + "adit.mass_transfer.processors.subprocess.run", return_value=mock_result + ) - mock_rmtree.assert_not_called() - vol.save.assert_not_called() + with pytest.raises(DicomError, match="Conversion failed"): + processor._convert_series(series, dicom_dir, output_path) -def test_cleanup_export_handles_file_not_found( - mocker: MockerFixture, settings +def test_convert_series_raises_when_no_nifti_output( + mocker: MockerFixture, settings, tmp_path: Path ): - """_cleanup_export silently passes when the folder is already gone. - - Proves: FileNotFoundError (e.g., another process already cleaned up) is - caught and the volume is still marked as cleaned. - """ processor = _make_processor(mocker, settings) + series = _make_discovered(series_uid="1.2.3") - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.exported_folder = "/tmp/already/gone" - vol.export_cleaned = False + dicom_dir = tmp_path / "dicom_input" + dicom_dir.mkdir() + output_path = tmp_path / "output" + mock_result = mocker.MagicMock() + mock_result.returncode = 0 + mock_result.stderr = "" + mock_result.stdout = "" mocker.patch( - "adit.mass_transfer.processors.shutil.rmtree", - side_effect=FileNotFoundError, + "adit.mass_transfer.processors.subprocess.run", return_value=mock_result ) - processor._cleanup_export(vol) - - assert vol.export_cleaned is True - vol.save.assert_called() + with pytest.raises(DicomError, match="no .nii.gz files"): + processor._convert_series(series, dicom_dir, output_path) -def test_cleanup_export_handles_permission_error( - mocker: MockerFixture, settings +def test_convert_series_skips_non_image_dicom( + mocker: MockerFixture, settings, tmp_path: Path ): - """_cleanup_export logs the error and does NOT set export_cleaned on PermissionError. - - Proves: When rmtree fails with a non-FileNotFoundError (e.g., permissions), - the error is logged but the task doesn't crash, and export_cleaned stays - False so cleanup can be reattempted. - """ processor = _make_processor(mocker, settings) + series = _make_discovered(series_uid="1.2.3") - vol = mocker.MagicMock(spec=MassTransferVolume) - vol.exported_folder = "/tmp/locked/folder" - vol.export_cleaned = False + dicom_dir = tmp_path / "dicom_input" + dicom_dir.mkdir() + output_path = tmp_path / "output" + mock_result = mocker.MagicMock() + mock_result.returncode = 1 + mock_result.stderr = "No valid DICOM images were found" + mock_result.stdout = "" mocker.patch( - "adit.mass_transfer.processors.shutil.rmtree", - side_effect=PermissionError("Access denied"), + "adit.mass_transfer.processors.subprocess.run", return_value=mock_result ) - processor._cleanup_export(vol) - - # export_cleaned should NOT be set — cleanup needs to be retried - assert vol.export_cleaned is False - vol.add_log.assert_called_once() - assert "Cleanup failed" in vol.add_log.call_args[0][0] + # Should not raise — non-image DICOMs are silently skipped + processor._convert_series(series, dicom_dir, output_path) # --------------------------------------------------------------------------- -# LOW: Utility function tests +# Utility function tests # --------------------------------------------------------------------------- def test_series_folder_name_with_number_and_description(): - """Proves: Normal case produces '{number}-{description}' format.""" - assert _series_folder_name(1, "Head CT", "1.2.3") == "1-Head CT" + assert _series_folder_name("Head CT", 1, "1.2.3") == "Head CT_1" def test_series_folder_name_with_no_description(): - """Proves: Missing description falls back to 'Undefined'.""" - assert _series_folder_name(1, "", "1.2.3") == "1-Undefined" + assert _series_folder_name("", 1, "1.2.3") == "Undefined_1" def test_series_folder_name_with_no_number(): - """Proves: Missing series_number falls back to the series UID.""" - assert _series_folder_name(None, "Head CT", "1.2.3.4.5") == "1.2.3.4.5" + assert _series_folder_name("Head CT", None, "1.2.3.4.5") == "1.2.3.4.5" + + +def test_study_folder_name_includes_description_date_and_hash(): + name = _study_folder_name("Brain CT", datetime(2024, 1, 15, 10, 30), "1.2.3.4") + assert name.startswith("Brain CT_20240115_") + assert len(name.split("_")) == 3 + # Hash part is 4 chars + assert len(name.split("_")[2]) == 4 + + +def test_study_folder_name_deterministic(): + name1 = _study_folder_name("Brain CT", datetime(2024, 1, 15), "1.2.3.4") + name2 = _study_folder_name("Brain CT", datetime(2024, 1, 15), "1.2.3.4") + assert name1 == name2 + + +def test_study_folder_name_different_uid_different_hash(): + name1 = _study_folder_name("Brain CT", datetime(2024, 1, 15), "1.2.3.4") + name2 = _study_folder_name("Brain CT", datetime(2024, 1, 15), "1.2.3.5") + assert name1 != name2 def test_parse_int_normal(): - """Proves: String integers are parsed correctly.""" assert _parse_int("42") == 42 def test_parse_int_none_returns_default(): - """Proves: None returns the specified default.""" assert _parse_int(None, default=7) == 7 def test_parse_int_empty_returns_default(): - """Proves: Empty string returns the specified default.""" assert _parse_int("", default=0) == 0 def test_parse_int_garbage_returns_default(): - """Proves: Non-numeric strings return the default instead of raising.""" assert _parse_int("abc", default=None) is None def test_study_datetime_with_time(): - """Proves: StudyDate + StudyTime are combined into a datetime.""" ds = Dataset() ds.StudyDate = "20240115" ds.StudyTime = "103000" @@ -1173,15 +672,6 @@ def test_study_datetime_with_time(): def test_study_datetime_with_midnight(): - """Proves: StudyTime of "000000" (midnight) is correctly parsed. - - Note: The `if study_time is None` guard in _study_datetime is dead code — - ResultDataset.StudyTime always passes through convert_to_python_time() which - asserts on both None and empty string before the guard can fire. If PACS - returns a study with no StudyTime, the crash happens in the converter, not - in _study_datetime. Consider fixing convert_to_python_time to return - time(0,0) for None/empty, or catching it in _study_datetime. - """ ds = Dataset() ds.StudyDate = "20240115" ds.StudyTime = "000000" @@ -1190,41 +680,35 @@ def test_study_datetime_with_midnight(): def test_dicom_match_empty_pattern_matches_anything(): - """Proves: An empty pattern matches any value (wildcard behavior).""" assert _dicom_match("", "anything") is True assert _dicom_match("", None) is True assert _dicom_match("", "") is True def test_dicom_match_none_value_never_matches(): - """Proves: A non-empty pattern never matches None.""" assert _dicom_match("CT", None) is False def test_dicom_match_exact(): - """Proves: An exact pattern matches its value.""" assert _dicom_match("CT", "CT") is True assert _dicom_match("CT", "MR") is False def test_dicom_match_wildcard(): - """Proves: DICOM wildcard patterns (converted to regex) work correctly.""" - # DICOM uses * as wildcard, which should be converted to regex .* assert _dicom_match("Head*", "Head CT") is True assert _dicom_match("Head*", "Foot CT") is False # --------------------------------------------------------------------------- -# Anonymization mode tests +# DB integration tests # --------------------------------------------------------------------------- @pytest.mark.django_db -def test_process_linking_mode_stores_pseudonymized_uids( +def test_process_creates_volume_records_on_success( mocker: MockerFixture, settings, tmp_path: Path ): - """In linking mode, pseudonymized UIDs are stored on the volume.""" - settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + """Deferred insertion: volumes are created in DB after successful export.""" MassTransferSettings.objects.create() user = UserFactory.create() @@ -1237,7 +721,7 @@ def test_process_linking_mode_stores_pseudonymized_uids( start_date=date(2024, 1, 1), end_date=date(2024, 1, 1), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - anonymization_mode=MassTransferJob.AnonymizationMode.PSEUDONYMIZE_WITH_LINKING, + anonymization_mode=MassTransferJob.AnonymizationMode.NONE, ) job.filters.create(owner=user, name="CT Filter", modality="CT") @@ -1249,53 +733,75 @@ def test_process_linking_mode_stores_pseudonymized_uids( partition_key="20240101", ) - vol = MassTransferVolume.objects.create( - job=job, - task=task, - partition_key="20240101", - patient_id="PAT1", - study_instance_uid="1.2.3.4", - series_instance_uid="1.2.3.4.5", - modality="CT", - study_description="", - series_description="Head", - series_number=1, - study_datetime=timezone.now(), - ) + series = [_make_discovered(patient_id="PAT1", series_uid="1.2.3.4.5")] processor = MassTransferTaskProcessor(task) - mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch("adit.mass_transfer.processors.DicomOperator") + mocker.patch.object(processor, "_export_series") - def fake_export(op, volume, base, pseudonym, **kwargs): - volume.status = MassTransferVolume.Status.EXPORTED - volume.pseudonym = pseudonym - volume.study_instance_uid_pseudonymized = "9.8.7.6" - volume.series_instance_uid_pseudonymized = "9.8.7.6.5" - volume.save() + assert MassTransferVolume.objects.filter(job=job).count() == 0 - mocker.patch.object(processor, "_export_volume", side_effect=fake_export) + result = processor.process() - mocker.patch( - "adit.mass_transfer.processors.uuid.uuid4", - return_value=uuid.UUID(int=1), + assert result["status"] == MassTransferTask.Status.SUCCESS + vol = MassTransferVolume.objects.get(job=job, series_instance_uid="1.2.3.4.5") + assert vol.status == MassTransferVolume.Status.EXPORTED + assert vol.patient_id == "PAT1" + assert vol.task == task + + +@pytest.mark.django_db +def test_process_creates_error_volume_on_failure( + mocker: MockerFixture, settings, tmp_path: Path +): + """Failed exports still create a volume record with ERROR status.""" + MassTransferSettings.objects.create() + + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + anonymization_mode=MassTransferJob.AnonymizationMode.NONE, + ) + job.filters.create(owner=user, name="CT Filter", modality="CT") + + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + series = [_make_discovered(patient_id="PAT1", series_uid="1.2.3.4.5")] + + processor = MassTransferTaskProcessor(task) + mocker.patch.object(processor, "_discover_series", return_value=series) + mocker.patch("adit.mass_transfer.processors.DicomOperator") + mocker.patch.object( + processor, "_export_series", side_effect=DicomError("Export failed") ) result = processor.process() - vol.refresh_from_db() - assert vol.study_instance_uid_pseudonymized == "9.8.7.6" - assert vol.series_instance_uid_pseudonymized == "9.8.7.6.5" - assert vol.pseudonym == uuid.UUID(int=1).hex - assert result["status"] == MassTransferTask.Status.SUCCESS + assert result["status"] == MassTransferTask.Status.FAILURE + vol = MassTransferVolume.objects.get(job=job, series_instance_uid="1.2.3.4.5") + assert vol.status == MassTransferVolume.Status.ERROR + assert "Export failed" in vol.log @pytest.mark.django_db -def test_process_failed_volume_has_no_pseudonymized_uids( +def test_process_deletes_error_volumes_on_retry( mocker: MockerFixture, settings, tmp_path: Path ): - """Failed volumes have empty pseudonymized UID fields.""" - settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + """On retry, ERROR volumes from prior runs are deleted so they can be reprocessed.""" MassTransferSettings.objects.create() user = UserFactory.create() @@ -1308,7 +814,7 @@ def test_process_failed_volume_has_no_pseudonymized_uids( start_date=date(2024, 1, 1), end_date=date(2024, 1, 1), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - anonymization_mode=MassTransferJob.AnonymizationMode.PSEUDONYMIZE_WITH_LINKING, + anonymization_mode=MassTransferJob.AnonymizationMode.NONE, ) job.filters.create(owner=user, name="CT Filter", modality="CT") @@ -1320,58 +826,44 @@ def test_process_failed_volume_has_no_pseudonymized_uids( partition_key="20240101", ) - vol = MassTransferVolume.objects.create( + # Simulate a prior failed run that left an ERROR volume + MassTransferVolume.objects.create( job=job, task=task, partition_key="20240101", patient_id="PAT1", - study_instance_uid="1.2.3.4", + study_instance_uid="study-1", series_instance_uid="1.2.3.4.5", modality="CT", - study_description="", - series_description="Head", + study_description="Brain CT", + series_description="Axial", series_number=1, study_datetime=timezone.now(), + status=MassTransferVolume.Status.ERROR, + log="Previous failure", ) + series = [_make_discovered(patient_id="PAT1", series_uid="1.2.3.4.5")] + processor = MassTransferTaskProcessor(task) - mocker.patch.object(processor, "_find_volumes", return_value=[vol]) + mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch("adit.mass_transfer.processors.DicomOperator") - - def fake_export_failure(op, volume, base, pseudonym, **kwargs): - raise RuntimeError("DICOM export failed") - - mocker.patch.object(processor, "_export_volume", side_effect=fake_export_failure) - - mocker.patch( - "adit.mass_transfer.processors.uuid.uuid4", - return_value=uuid.UUID(int=1), - ) + mocker.patch.object(processor, "_export_series") result = processor.process() - vol.refresh_from_db() - assert vol.study_instance_uid_pseudonymized == "" - assert vol.series_instance_uid_pseudonymized == "" - assert result["status"] == MassTransferTask.Status.FAILURE + assert result["status"] == MassTransferTask.Status.SUCCESS + # Old ERROR volume deleted, new EXPORTED volume created + vols = MassTransferVolume.objects.filter(job=job, series_instance_uid="1.2.3.4.5") + assert vols.count() == 1 + assert vols.first().status == MassTransferVolume.Status.EXPORTED @pytest.mark.django_db -def test_longitudinal_linking_across_partitions( +def test_process_deterministic_pseudonyms_across_partitions( mocker: MockerFixture, settings, tmp_path: Path ): - """Prove that pseudonymized UIDs on volumes enable longitudinal tracking. - - Scenario: - - Partition 1 (Jan 1): PAT1/Study-A (2 series), PAT2/Study-B (1 series) - - Partition 2 (Jan 2): PAT1/Study-C (1 series) - - After processing both partitions: - - All 4 volumes have pseudonymized UIDs - - PAT1 volumes are linkable via patient_id - - Two series in the same study share the same pseudonymized study UID - """ - settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + """Same patient gets the same pseudonym across different partitions (linking mode).""" MassTransferSettings.objects.create() user = UserFactory.create() @@ -1388,7 +880,6 @@ def test_longitudinal_linking_across_partitions( ) job.filters.create(owner=user, name="CT Filter", modality="CT") - # --- Partition 1: Jan 1 --- task1 = MassTransferTask.objects.create( job=job, source=source, @@ -1396,39 +887,6 @@ def test_longitudinal_linking_across_partitions( partition_end=timezone.make_aware(datetime(2024, 1, 1, 23, 59, 59)), partition_key="20240101", ) - - # PAT1, Study-A, two series - vol_a1 = MassTransferVolume.objects.create( - job=job, task=task1, partition_key="20240101", - patient_id="PAT1", - study_instance_uid="1.2.840.10001.1.1", - series_instance_uid="1.2.840.10001.1.1.1", - modality="CT", study_description="Brain CT", - series_description="Axial", series_number=1, - study_datetime=timezone.make_aware(datetime(2024, 1, 1, 10, 0)), - ) - vol_a2 = MassTransferVolume.objects.create( - job=job, task=task1, partition_key="20240101", - patient_id="PAT1", - study_instance_uid="1.2.840.10001.1.1", - series_instance_uid="1.2.840.10001.1.1.2", - modality="CT", study_description="Brain CT", - series_description="Coronal", series_number=2, - study_datetime=timezone.make_aware(datetime(2024, 1, 1, 10, 0)), - ) - - # PAT2, Study-B, one series - vol_b = MassTransferVolume.objects.create( - job=job, task=task1, partition_key="20240101", - patient_id="PAT2", - study_instance_uid="1.2.840.10002.1.1", - series_instance_uid="1.2.840.10002.1.1.1", - modality="CT", study_description="Chest CT", - series_description="Axial", series_number=1, - study_datetime=timezone.make_aware(datetime(2024, 1, 1, 14, 0)), - ) - - # --- Partition 2: Jan 2 --- task2 = MassTransferTask.objects.create( job=job, source=source, @@ -1437,87 +895,40 @@ def test_longitudinal_linking_across_partitions( partition_key="20240102", ) - # PAT1 again, Study-C (different study, same patient) - vol_c = MassTransferVolume.objects.create( - job=job, task=task2, partition_key="20240102", - patient_id="PAT1", - study_instance_uid="1.2.840.10001.1.2", - series_instance_uid="1.2.840.10001.1.2.1", - modality="CT", study_description="Follow-up Brain CT", - series_description="Axial", series_number=1, - study_datetime=timezone.make_aware(datetime(2024, 1, 2, 9, 0)), - ) - - # Use a counter to generate distinct but deterministic pseudonymized UIDs - export_counter = {"n": 0} - - def fake_export(op, volume, base, pseudonym, *, study_pseudonymizer=None): - export_counter["n"] += 1 - n = export_counter["n"] - volume.pseudonym = pseudonym - volume.study_instance_uid_pseudonymized = f"2.16.{volume.study_instance_uid}" - volume.series_instance_uid_pseudonymized = f"2.16.{volume.series_instance_uid}" - volume.status = MassTransferVolume.Status.EXPORTED - volume.save() - mocker.patch("adit.mass_transfer.processors.DicomOperator") - # --- Process partition 1 --- + # Partition 1: PAT1 + series1 = [_make_discovered( + patient_id="PAT1", study_uid="1.2.3.100", series_uid="1.2.3.100.1", + )] processor1 = MassTransferTaskProcessor(task1) - mocker.patch.object(processor1, "_find_volumes", return_value=[vol_a1, vol_a2, vol_b]) - mocker.patch.object(processor1, "_export_volume", side_effect=fake_export) - - result1 = processor1.process() - assert result1["status"] == MassTransferTask.Status.SUCCESS - - # --- Process partition 2 --- + mocker.patch.object(processor1, "_discover_series", return_value=series1) + mocker.patch.object(processor1, "_export_series") + processor1.process() + + # Partition 2: same PAT1 + series2 = [_make_discovered( + patient_id="PAT1", study_uid="1.2.3.200", series_uid="1.2.3.200.1", + )] processor2 = MassTransferTaskProcessor(task2) - mocker.patch.object(processor2, "_find_volumes", return_value=[vol_c]) - mocker.patch.object(processor2, "_export_volume", side_effect=fake_export) - - result2 = processor2.process() - assert result2["status"] == MassTransferTask.Status.SUCCESS - - # --- Verify pseudonymized UIDs on volumes --- - for vol in [vol_a1, vol_a2, vol_b, vol_c]: - vol.refresh_from_db() - assert vol.study_instance_uid_pseudonymized != "" - assert vol.series_instance_uid_pseudonymized != "" - assert vol.study_instance_uid_pseudonymized != vol.study_instance_uid - - # Two series in the same study share the same pseudonymized study UID - assert ( - vol_a1.study_instance_uid_pseudonymized - == vol_a2.study_instance_uid_pseudonymized - ) - # But different pseudonymized series UIDs - assert ( - vol_a1.series_instance_uid_pseudonymized - != vol_a2.series_instance_uid_pseudonymized - ) - - # --- Longitudinal linking via patient_id --- - pat1_vols = MassTransferVolume.objects.filter( - job=job, patient_id="PAT1" - ).exclude(study_instance_uid_pseudonymized="") - assert pat1_vols.count() == 3 # vol_a1, vol_a2, vol_c + mocker.patch.object(processor2, "_discover_series", return_value=series2) + mocker.patch.object(processor2, "_export_series") + processor2.process() - pat1_studies = set(pat1_vols.values_list("study_instance_uid", flat=True)) - assert pat1_studies == {"1.2.840.10001.1.1", "1.2.840.10001.1.2"} + vol1 = MassTransferVolume.objects.get(series_instance_uid="1.2.3.100.1") + vol2 = MassTransferVolume.objects.get(series_instance_uid="1.2.3.200.1") - pat2_vols = MassTransferVolume.objects.filter( - job=job, patient_id="PAT2" - ).exclude(study_instance_uid_pseudonymized="") - assert pat2_vols.count() == 1 - assert pat2_vols.first().study_instance_uid == "1.2.840.10002.1.1" + # Linking mode: same patient → same pseudonym across partitions + assert vol1.pseudonym == vol2.pseudonym + assert vol1.pseudonym != "" + assert vol1.pseudonym != "PAT1" @pytest.mark.django_db -def test_process_pseudonymize_mode_stores_pseudonymized_uids( +def test_process_pseudonymize_mode_not_linked_across_partitions( mocker: MockerFixture, settings, tmp_path: Path ): - """In pseudonymize mode (without linking), pseudonymized UIDs are still stored on volumes.""" - settings.MASS_TRANSFER_EXPORT_BASE_DIR = str(tmp_path / "exports") + """Non-linking pseudonymize mode: same patient gets different pseudonyms across partitions.""" MassTransferSettings.objects.create() user = UserFactory.create() @@ -1528,110 +939,50 @@ def test_process_pseudonymize_mode_stores_pseudonymized_uids( source=source, destination=destination, start_date=date(2024, 1, 1), - end_date=date(2024, 1, 1), + end_date=date(2024, 1, 2), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, anonymization_mode=MassTransferJob.AnonymizationMode.PSEUDONYMIZE, ) job.filters.create(owner=user, name="CT Filter", modality="CT") - task = MassTransferTask.objects.create( + task1 = MassTransferTask.objects.create( job=job, source=source, - partition_start=timezone.now(), - partition_end=timezone.now(), + partition_start=timezone.make_aware(datetime(2024, 1, 1)), + partition_end=timezone.make_aware(datetime(2024, 1, 1, 23, 59, 59)), partition_key="20240101", ) - - vol = MassTransferVolume.objects.create( + task2 = MassTransferTask.objects.create( job=job, - task=task, - partition_key="20240101", - patient_id="PAT1", - study_instance_uid="1.2.3.4", - series_instance_uid="1.2.3.4.5", - modality="CT", - study_description="", - series_description="Head", - series_number=1, - study_datetime=timezone.now(), + source=source, + partition_start=timezone.make_aware(datetime(2024, 1, 2)), + partition_end=timezone.make_aware(datetime(2024, 1, 2, 23, 59, 59)), + partition_key="20240102", ) - processor = MassTransferTaskProcessor(task) - mocker.patch.object(processor, "_find_volumes", return_value=[vol]) mocker.patch("adit.mass_transfer.processors.DicomOperator") - def fake_export(op, volume, base, pseudonym, **kwargs): - volume.status = MassTransferVolume.Status.EXPORTED - volume.pseudonym = pseudonym - volume.study_instance_uid_pseudonymized = "9.8.7.6" - volume.series_instance_uid_pseudonymized = "9.8.7.6.5" - volume.save() - - mocker.patch.object(processor, "_export_volume", side_effect=fake_export) - - result = processor.process() - - vol.refresh_from_db() - assert vol.study_instance_uid_pseudonymized == "9.8.7.6" - assert vol.series_instance_uid_pseudonymized == "9.8.7.6.5" - assert result["status"] == MassTransferTask.Status.SUCCESS - - -def test_process_none_mode_skips_pseudonymizer( - mocker: MockerFixture, settings, tmp_path: Path -): - """In 'none' anonymization mode, no pseudonym is generated and no pseudonymizer is created.""" - processor = _make_process_env( - mocker, settings, tmp_path, anonymization_mode="none" - ) - vol = _make_mock_volume(mocker, series_uid="s-1") - - mocker.patch.object(processor, "_find_volumes", return_value=[vol]) - - export_calls: list[tuple[str, object]] = [] - - def fake_export(op, volume, base, pseudonym, **kwargs): - export_calls.append((pseudonym, kwargs.get("study_pseudonymizer"))) - - mocker.patch.object(processor, "_export_volume", side_effect=fake_export) - - result = processor.process() - - assert len(export_calls) == 1 - pseudonym, study_pseudonymizer = export_calls[0] - assert pseudonym == "" - assert study_pseudonymizer is None - assert result["status"] == MassTransferTask.Status.SUCCESS - - -def test_process_pseudonymize_mode_creates_per_study_pseudonymizer( - mocker: MockerFixture, settings, tmp_path: Path -): - """In pseudonymize mode, a Pseudonymizer is created per study and shared across volumes.""" - from adit.core.utils.pseudonymizer import Pseudonymizer - - processor = _make_process_env(mocker, settings, tmp_path) - - vol1 = _make_mock_volume(mocker, study_uid="study-A", series_uid="s-1") - vol2 = _make_mock_volume(mocker, study_uid="study-A", series_uid="s-2") - vol3 = _make_mock_volume(mocker, study_uid="study-B", series_uid="s-3") - - mocker.patch.object(processor, "_find_volumes", return_value=[vol1, vol2, vol3]) - - pseudonymizer_ids: list[int | None] = [] - - def fake_export(op, volume, base, pseudonym, **kwargs): - ps = kwargs.get("study_pseudonymizer") - pseudonymizer_ids.append(id(ps) if ps else None) - - mocker.patch.object(processor, "_export_volume", side_effect=fake_export) - - result = processor.process() + series1 = [_make_discovered( + patient_id="PAT1", study_uid="1.2.3.100", series_uid="1.2.3.100.1", + )] + processor1 = MassTransferTaskProcessor(task1) + mocker.patch.object(processor1, "_discover_series", return_value=series1) + mocker.patch.object(processor1, "_export_series") + processor1.process() - # Two volumes in study-A share the same Pseudonymizer instance - assert pseudonymizer_ids[0] is not None - assert pseudonymizer_ids[0] == pseudonymizer_ids[1] - # Volume in study-B gets a different Pseudonymizer instance - assert pseudonymizer_ids[2] is not None - assert pseudonymizer_ids[2] != pseudonymizer_ids[0] - assert result["status"] == MassTransferTask.Status.SUCCESS + series2 = [_make_discovered( + patient_id="PAT1", study_uid="1.2.3.200", series_uid="1.2.3.200.1", + )] + processor2 = MassTransferTaskProcessor(task2) + mocker.patch.object(processor2, "_discover_series", return_value=series2) + mocker.patch.object(processor2, "_export_series") + processor2.process() + + vol1 = MassTransferVolume.objects.get(series_instance_uid="1.2.3.100.1") + vol2 = MassTransferVolume.objects.get(series_instance_uid="1.2.3.200.1") + + # Non-linking mode: same patient should get DIFFERENT random pseudonyms + assert vol1.pseudonym != "" + assert vol2.pseudonym != "" + assert vol1.pseudonym != "PAT1" + assert vol1.pseudonym != vol2.pseudonym diff --git a/adit/mass_transfer/views.py b/adit/mass_transfer/views.py index b0c3f92fb..f7f632c8d 100644 --- a/adit/mass_transfer/views.py +++ b/adit/mass_transfer/views.py @@ -5,6 +5,7 @@ from django.conf import settings from django.contrib.auth.mixins import LoginRequiredMixin from django.http import HttpResponse +from django.shortcuts import get_object_or_404 from django.urls import reverse_lazy from django.views import View from django.views.generic import CreateView, DeleteView, ListView, UpdateView @@ -108,34 +109,22 @@ def get(self, request, pk): else: qs = MassTransferJob.objects.filter(owner=request.user) - job = qs.get(pk=pk) - volumes = ( + job = get_object_or_404(qs, pk=pk) + associations = ( MassTransferVolume.objects.filter(job=job) - .exclude(study_instance_uid_pseudonymized="") - .order_by("study_datetime", "series_instance_uid") + .exclude(pseudonym="") + .values_list("patient_id", "pseudonym") + .distinct() + .order_by("patient_id") ) response = HttpResponse(content_type="text/csv") response["Content-Disposition"] = f'attachment; filename="associations_job_{job.pk}.csv"' writer = csv.writer(response) - writer.writerow([ - "pseudonym", - "patient_id", - "study_instance_uid", - "study_instance_uid_pseudonymized", - "series_instance_uid", - "series_instance_uid_pseudonymized", - ]) - for vol in volumes.iterator(): - writer.writerow([ - vol.pseudonym, - vol.patient_id, - vol.study_instance_uid, - vol.study_instance_uid_pseudonymized, - vol.series_instance_uid, - vol.series_instance_uid_pseudonymized, - ]) + writer.writerow(["patient_id", "pseudonym"]) + for patient_id, pseudonym in associations.iterator(): + writer.writerow([patient_id, pseudonym]) return response diff --git a/adit/settings/base.py b/adit/settings/base.py index 0d3ff53db..aafbae23d 100644 --- a/adit/settings/base.py +++ b/adit/settings/base.py @@ -386,12 +386,6 @@ # Maximum number of C-FIND results for mass transfer before splitting time windows MASS_TRANSFER_MAX_SEARCH_RESULTS = env.int("MASS_TRANSFER_MAX_SEARCH_RESULTS", default=200) -# Base directory for temporary DICOM exports in mass transfer -MASS_TRANSFER_EXPORT_BASE_DIR = env.str( - "MASS_TRANSFER_EXPORT_BASE_DIR", - default="/mnt/mass_transfer_exports", -) - # The timeout in dicom_explorer a DICOM server must respond DICOM_EXPLORER_RESPONSE_TIMEOUT = 3 # seconds From 9a86256948ca0c857c654b4d75b6c28b48e8d30c Mon Sep 17 00:00:00 2001 From: Ritwik Date: Thu, 5 Mar 2026 13:49:28 +0100 Subject: [PATCH 023/103] Use per-study random pseudonyms in non-linking mode to prevent patient correlation --- adit/mass_transfer/processors.py | 19 +- adit/mass_transfer/tests/test_processor.py | 37 +- adit/mass_transfer/views.py | 4 + docs/mass_transfer_spec.md | 431 +++++++++++++++++++++ 4 files changed, 479 insertions(+), 12 deletions(-) create mode 100644 docs/mass_transfer_spec.md diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 09fed1ad4..7ee369cb6 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -182,25 +182,30 @@ def process(self): total_failed = 0 failed_reasons: dict[str, int] = {} - # Group by patient for folder structure + # Group by patient for folder structure (linking + no-anon modes) by_patient: dict[str, list[DiscoveredSeries]] = {} for s in pending: by_patient.setdefault(s.patient_id, []).append(s) - # For non-linking pseudonymize mode, generate random pseudonyms per patient + # Non-linking pseudonymize: random pseudonym per study so that + # studies for the same patient cannot be correlated. random_pseudonyms: dict[str, str] = {} for patient_id, series_list in by_patient.items(): if job.should_link and pseudonymizer: subject_id = pseudonymizer.compute_pseudonym(patient_id) - elif pseudonymizer: - if patient_id not in random_pseudonyms: - random_pseudonyms[patient_id] = secrets.token_hex(6).upper() - subject_id = random_pseudonyms[patient_id] - else: + elif not pseudonymizer: subject_id = sanitize_filename(patient_id) + else: + # subject_id set per-study below + subject_id = "" for series in series_list: + if pseudonymizer and not job.should_link: + study_uid = series.study_instance_uid + if study_uid not in random_pseudonyms: + random_pseudonyms[study_uid] = secrets.token_hex(6).upper() + subject_id = random_pseudonyms[study_uid] study_folder = _study_folder_name( series.study_description, series.study_datetime, diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 951bc3354..01381da67 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -479,15 +479,14 @@ def fake_export(op, s, path, subject_id, pseudonymizer): assert result["status"] == MassTransferTask.Status.SUCCESS -def test_process_pseudonymize_mode_consistent_within_task( +def test_process_pseudonymize_mode_same_study_same_pseudonym( mocker: MockerFixture, settings, tmp_path: Path ): - """In pseudonymize mode (non-linking), same patient gets same pseudonym within a task.""" + """In non-linking mode, series in the same study share a pseudonym.""" processor = _make_process_env(mocker, settings, tmp_path) - # Two series, same patient, different studies series = [ _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), - _make_discovered(patient_id="PAT1", study_uid="study-B", series_uid="s-2"), + _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-2"), ] mocker.patch.object(processor, "_discover_series", return_value=series) @@ -502,12 +501,40 @@ def fake_export(op, s, path, subject_id, pseudonymizer): processor.process() - # Both series for PAT1 get the same pseudonym within this task + # Same study → same pseudonym assert subject_ids[0] == subject_ids[1] assert subject_ids[0] != "" assert subject_ids[0] != "PAT1" +def test_process_pseudonymize_mode_different_studies_different_pseudonyms( + mocker: MockerFixture, settings, tmp_path: Path +): + """In non-linking mode, different studies for the same patient get different pseudonyms.""" + processor = _make_process_env(mocker, settings, tmp_path) + series = [ + _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), + _make_discovered(patient_id="PAT1", study_uid="study-B", series_uid="s-2"), + ] + + mocker.patch.object(processor, "_discover_series", return_value=series) + + subject_ids: list[str] = [] + + def fake_export(op, s, path, subject_id, pseudonymizer): + subject_ids.append(subject_id) + + mocker.patch.object(processor, "_export_series", side_effect=fake_export) + mocker.patch.object(MassTransferVolume.objects, "create") + + processor.process() + + # Different studies → different pseudonyms (non-linkable) + assert subject_ids[0] != subject_ids[1] + assert subject_ids[0] != "" + assert subject_ids[0] != "PAT1" + + def test_process_linking_mode_uses_deterministic_pseudonym( mocker: MockerFixture, settings, tmp_path: Path ): diff --git a/adit/mass_transfer/views.py b/adit/mass_transfer/views.py index f7f632c8d..76c30c152 100644 --- a/adit/mass_transfer/views.py +++ b/adit/mass_transfer/views.py @@ -110,6 +110,10 @@ def get(self, request, pk): qs = MassTransferJob.objects.filter(owner=request.user) job = get_object_or_404(qs, pk=pk) + + if not job.should_link: + return HttpResponse("CSV export is only available for linking mode.", status=400) + associations = ( MassTransferVolume.objects.filter(job=job) .exclude(pseudonym="") diff --git a/docs/mass_transfer_spec.md b/docs/mass_transfer_spec.md new file mode 100644 index 000000000..1e2d1d627 --- /dev/null +++ b/docs/mass_transfer_spec.md @@ -0,0 +1,431 @@ +# Mass Transfer — Branch Specification + +## What Is It? + +Mass Transfer is a new ADIT module that bulk-exports DICOM data from a PACS +server to a network folder. It targets research use cases where you need to +pull large cohorts — e.g. "all CT head scans from Neuroradiologie in 2024" — +pseudonymize them, and optionally convert to NIfTI. + +``` +┌──────────┐ C-FIND ┌──────────┐ C-GET ┌──────────────────┐ +│ ADIT │──────────────>│ PACS │──────────────>│ Network Folder │ +│ Worker │ discover │ Server │ fetch + │ /mnt/data/... │ +│ │ studies & │ │ pseudonymize│ │ +│ │ series │ │ + write │ PartitionKey/ │ +│ │ │ │ │ Subject/ │ +│ │ │ │ │ Study/ │ +│ │ │ │ │ Series/ │ +└──────────┘ └──────────┘ └──────────────────┘ +``` + +--- + +## Core Concepts + +### Job, Task, Volume + +``` +MassTransferJob (one per user request) + ├── source: PACS Server + ├── destination: Network Folder + ├── date range: 2025-01-01 → 2025-06-30 + ├── granularity: weekly + ├── anonymization_mode: pseudonymize_with_linking + ├── filters: [CT + Neuroradiologie, MR + Neuroradiologie] + ├── pseudonym_salt: "a7f3..." (random per job, used for linking) + │ + ├── MassTransferTask (one per time partition) + │ ├── partition_key: "20250101-20250107" + │ ├── partition_start / partition_end + │ │ + │ ├── MassTransferVolume (one per exported series) + │ │ ├── patient_id, pseudonym + │ │ ├── study/series UIDs + │ │ ├── status: exported | converted | skipped | error + │ │ └── log (error reason if failed) + │ └── ... + └── ... +``` + +### Filters + +Reusable, user-owned filter presets. A job references one or more filters. +Each filter can specify: + +| Field | Example | Notes | +| ------------------ | ------------------ | ------------------------ | +| modality | `CT` | Exact match | +| institution_name | `Neuroradiologie*` | DICOM wildcard supported | +| study_description | `*Schädel*` | DICOM wildcard supported | +| series_description | `Axial*` | DICOM wildcard supported | +| series_number | `2` | Exact integer match | + +Institution can be checked at study level (one C-FIND per study to check any +series) or at series level (checked per series during enumeration). + +### Partitioning + +The date range is split into non-overlapping time windows: + +``` +Job: 2025-01-01 → 2025-01-21, granularity=weekly + +Task 1: 2025-01-01 → 2025-01-07 key="20250101-20250107" +Task 2: 2025-01-08 → 2025-01-14 key="20250108-20250114" +Task 3: 2025-01-15 → 2025-01-21 key="20250115-20250121" +``` + +Each task is an independent Procrastinate job. Tasks can run in parallel +across workers, but each task is guaranteed to run on exactly one worker +(`FOR UPDATE SKIP LOCKED`). + +--- + +## Processing Pipeline + +One task = one partition. Here is the full flow inside `MassTransferTaskProcessor.process()`: + +``` + ┌─────────────────────┐ + │ Start task │ + │ (one partition) │ + └──────────┬──────────┘ + │ + ┌──────────▼────────────┐ + │ Check: suspended? │──Yes──> return WARNING + │ Check: source/dest? │──Bad──> raise DicomError + │ Check: filters? │──None─> return FAILURE + └──────────┬────────────┘ + │ + ┌────────────────▼─────────────────┐ + │ Phase 1: DISCOVER │ + │ │ + │ For each filter: │ + │ C-FIND studies in time window │ + │ For each study: │ + │ C-FIND series │ + │ Apply modality/desc/inst │ + │ filters on each series │ + │ │ + │ Result: list[DiscoveredSeries] │ + │ (in-memory, no DB writes) │ + └────────────────┬─────────────────┘ + │ + ┌────────────────▼─────────────────┐ + │ Resumability check │ + │ │ + │ done_uids = DB volumes with │ + │ status in (EXPORTED, CONVERTED, │ + │ SKIPPED) │ + │ Delete any ERROR volumes (retry │ + │ pending = discovered - done_uid │ + └────────────────┬─────────────────┘ + │ + ┌────────────────▼─────────────────┐ + │ Group by patient_id │ + │ Compute subject_id (pseudonym │ + │ or raw patient_id) │ + └────────────────┬─────────────────┘ + │ + ┌──────────▼────────────┐ + │ For each series: │ + │ │ + ┌────────────┤ DICOM export path? │ + │ │ NIfTI conversion? │ + │ └──────────┬─────────────┘ + │ │ + ┌──────▼───────┐ ┌─────────▼──────────┐ + │ DICOM only │ │ NIfTI mode │ + │ │ │ │ + │ C-GET series │ │ C-GET to temp dir │ + │ pseudonymize │ │ pseudonymize │ + │ write .dcm │ │ dcm2niix → .nii.gz │ + │ to final dir │ │ write to final dir │ + │ │ │ temp dir auto- │ + │ │ │ cleaned │ + └──────┬───────┘ └─────────┬──────────┘ + │ │ + └──────────┬───────────┘ + │ + ┌──────────▼────────────┐ + │ Create DB volume │ + │ (deferred insertion) │ + │ status = EXPORTED │ + │ | CONVERTED │ + │ | SKIPPED │ + │ | ERROR │ + └──────────┬────────────┘ + │ + ┌──────────▼────────────┐ + │ Next series... │ + │ (RetriableDicomError │ + │ re-raised for │ + │ Procrastinate retry)│ + └──────────┬────────────┘ + │ + ┌──────────▼────────────┐ + │ Return task result │ + │ SUCCESS / WARNING / │ + │ FAILURE + summary │ + └───────────────────────┘ +``` + +### Output Folder Structure + +``` +/mnt/data/mass_transfer_exports/ +└── 20250101-20250107/ # partition key + ├── A7B3X9K2M1Q4/ # pseudonym (or raw PatientID) + │ ├── CT_Schaedel_20250103_f2a1/ # StudyDescription_Date_ShortHash + │ │ ├── Axial_1/ # SeriesDescription_SeriesNumber + │ │ │ ├── 1.2.3.4.5.6.7.dcm + │ │ │ ├── 1.2.3.4.5.6.8.dcm + │ │ │ └── ... + │ │ └── Sagittal_2/ + │ │ └── ... + │ └── MRT_Kopf_20250105_b8c2/ + │ └── T1_1/ + │ └── ... + └── R4T7Y2W8N3P1/ + └── ... +``` + +The study folder name includes a 4-char hash of the StudyInstanceUID to +prevent collisions when the same patient has multiple studies with the same +description on the same date. + +--- + +## Anonymization Modes + +| Mode | Folder name | DICOM tags | Cross-partition consistency | CSV export | +| ----------------------------- | -------------------------------- | ------------------ | --------------------------------------------- | ----------------------------- | +| **None** | Raw PatientID | Untouched | N/A | Not available | +| **Pseudonymize** | Random hex per study | dicognito (random) | No — each study gets a unique random folder | Not available | +| **Pseudonymize with Linking** | Deterministic pseudonym | dicognito (seeded) | Yes — same patient always gets same pseudonym | patient_id → pseudonym pairs | + +### How Linking Works + +``` +job.pseudonym_salt = "a7f3e2..." # random, generated once per job + + ┌──────────────────────────┐ + │ Pseudonymizer(seed=salt)│ + │ │ + "PATIENT_12345" ──┤ md5(salt + patient_id) ├──> "A7B3X9K2M1Q4" + │ → deterministic 12-char │ + └──────────────────────────┘ + +Same salt + same patient_id = same pseudonym, always. +No lookup table needed. Works across partitions. +Uses dicognito's Randomizer internally. +``` + +In non-linking mode, each study gets a fresh `secrets.token_hex(6)` — even +two studies from the same patient land in separate opaque folders, so there +is no way to correlate which studies belong to the same person. + +--- + +## Adaptive Study Discovery (recursive split) + +PACS servers often limit C-FIND results (e.g. 200 max). When a query returns +more results than the limit, the time window is recursively bisected: + +``` +Query: 2025-01-01 → 2025-01-07, limit=200 + → 250 results (over limit!) + → Split: + Left: 2025-01-01 → 2025-01-04 → 120 results (ok) + Right: 2025-01-05 → 2025-01-07 → 140 results (ok) + → Merge + deduplicate by StudyInstanceUID + → 245 unique studies +``` + +Recursion stops with an error if the window is smaller than 30 minutes +(safety valve against infinite recursion on a PACS that always returns +too many results). + +--- + +## Persistent DIMSE Connections + +DICOM network operations (C-FIND, C-GET) require a TCP association with +specific presentation contexts. By default, ADIT opens and closes an +association per operation. + +For mass transfer with hundreds of series, this is wasteful (~500ms overhead +per association). The `persistent=True` mode keeps the association open: + +``` +Default mode (persistent=False): + open → C-FIND study 1 → close + open → C-FIND study 2 → close + open → C-FIND study 3 → close + open → C-GET series 1 → close + open → C-GET series 2 → close + ... + ~700 associations for 100 studies × ~500ms = ~350s overhead + +Persistent mode (persistent=True): + open(C-FIND) → C-FIND study 1 → study 2 → study 3 → ... + close(C-FIND) + open(C-GET) → C-GET series 1 → series 2 → ... + close(C-GET) + 2-3 associations total × ~500ms = ~1s overhead +``` + +Service-type switching (C-FIND → C-GET) automatically closes and reopens +with the correct presentation contexts. After an abort (e.g. `limit_results` +in C-FIND), the next call auto-reconnects. + +Only mass transfer opts in. All existing code is unchanged (`persistent=False` +is the default). + +--- + +## Deferred Volume Insertion + +Volumes (DB records tracking each exported series) are only created **after** +successful export or conversion — not during discovery. + +``` +Old approach: + discover → create PENDING volumes in DB → export → update to EXPORTED + Problem: failed exports leave orphan PENDING records + +New approach: + discover → in-memory DiscoveredSeries list → export → create EXPORTED volume + No orphans. Resumability via: "skip series whose UID is already in DB" +``` + +On retry, ERROR volumes from prior runs are deleted first, then reprocessed. +This avoids UniqueConstraint violations on `(job, series_instance_uid)`. + +--- + +## Error Handling + +| Error type | Behavior | +| ------------------------------------------------------- | ------------------------------------------------------- | +| `RetriableDicomError` (PACS timeout, connection lost) | Re-raised → Procrastinate retries the whole task | +| `DicomError` / other exceptions (single series) | Caught → ERROR volume created → continue to next series | +| All series fail | Task status = FAILURE | +| Some series fail | Task status = WARNING, message shows count | +| Non-image DICOM (dcm2niix says "No valid DICOM images") | SKIPPED volume, no error | + +Task detail page shows a table of all skipped and failed volumes with the +specific error reason for each. + +--- + +## Infrastructure Changes + +### Dedicated Worker + +Mass transfer runs on its own Procrastinate worker (`mass_transfer_worker`) +listening on the `mass_transfer` queue. This prevents long-running bulk +exports from blocking the normal DICOM transfer queue. + +### Mount Propagation + +Containers use `rslave` mount propagation so that NAS mounts made on the host +(e.g. `/mnt/nfs/ccinas01/adit`) are visible inside the container without +restart. + +### Job Cancellation + +In-progress tasks can be cancelled. The DIMSE connection leak fix ensures +that abandoned C-GET generators properly close their associations (via +`finally` blocks in the `connect_to_server` decorator). + +--- + +## Design Decisions + +1. **Partition-per-task, not study-per-task.** + One Procrastinate job per time window, not per study. Reduces job queue + overhead from thousands to dozens. Each task discovers and exports + everything in its window. + +2. **Filters are reusable objects, not inline fields.** + Users define filters once ("CT Neuroradiologie") and attach them to + multiple jobs. Filters support DICOM wildcards for fuzzy matching. + +3. **Deferred insertion over eager insertion.** + DB records only exist for successfully processed series. No cleanup + needed for partial failures. Resumability works by checking existing UIDs. + +4. **dicognito for pseudonymization, not a custom implementation.** + dicognito handles UIDs, dates, names, and all DICOM-specific anonymization + rules. We only add a seed parameter for deterministic (linking) mode. + +5. **Folder pseudonyms computed independently from DICOM pseudonyms.** + The folder name uses `compute_pseudonym()` (12-char alphanumeric from + the seed) while DICOM tags are pseudonymized by dicognito's full pipeline. + This means the folder name is stable and predictable while the internal + DICOM data gets proper anonymization. + +6. **Temp directories for NIfTI conversion.** + DICOM files are exported to a `tempfile.TemporaryDirectory()`, converted + with `dcm2niix`, and the temp dir is auto-cleaned. No persistent staging + area needed. + +7. **Persistent connections opt-in only.** + `persistent=False` is the default. Only mass transfer enables it. No + risk to existing transfer modules. + +--- + +## Files Added/Modified + +### New: `adit/mass_transfer/` (entire app — 39 files) + +| File | Purpose | +| --------------------- | ----------------------------------------------------- | +| `models.py` | Job, Task, Volume, Filter, Settings models | +| `processors.py` | Discovery, export, NIfTI conversion, pseudonymization | +| `forms.py` | Job creation form with dynamic filter selection | +| `views.py` | CRUD views + CSV associations export | +| `urls.py` | 18 URL patterns | +| `utils/partitions.py` | Date range → partition windows | +| `apps.py` | App registration, menu item, processor registration | +| `templates/` | Job form, job detail, task detail, filter CRUD | +| `tests/` | 44 tests (processor, partitions, cleanup) | + +### Modified: `adit/core/` + +| File | Change | +| -------------------------- | ------------------------------------------------------------ | +| `utils/dimse_connector.py` | `persistent` mode, service-type tracking, generator leak fix | +| `utils/dicom_operator.py` | Pass-through `persistent` param, `close()` method | +| `utils/pseudonymizer.py` | `seed` parameter, `compute_pseudonym()` method | + +### Modified: Infrastructure + +| File | Change | +| ----------------------- | ------------------------------------------------------- | +| `docker-compose.*.yml` | `mass_transfer_worker` service, `rslave` propagation | +| `adit/settings/base.py` | Mass transfer settings (priorities, max search results) | + +--- + +## Test Coverage + +44 tests covering: + +- **Discovery**: recursive time-window splitting, deduplication, boundary correctness +- **Processing**: success, partial failure, total failure, suspension, bad source/dest, no filters, empty partition +- **Resumability**: skipping already-done series, deleting ERROR volumes on retry +- **Pseudonymization**: within-task consistency, cross-partition linking, cross-partition non-linking, no-anonymization mode +- **NIfTI conversion**: dcm2niix failure, no output, non-image DICOM skip +- **Utilities**: folder name generation, DICOM wildcard matching, integer parsing, datetime handling +- **Cleanup**: no-op verification (deferred insertion means nothing to clean up) + +Run with: + +```bash +DJANGO_SETTINGS_MODULE=adit.settings.development \ + python -m pytest adit/mass_transfer/tests/ -v +``` From 3e53c060b6be9aa2b6fb54b0bfef31a55ce9523f Mon Sep 17 00:00:00 2001 From: Ritwik Date: Thu, 5 Mar 2026 16:12:37 +0100 Subject: [PATCH 024/103] Prefer C-MOVE over C-GET for mass transfer --- adit/core/utils/dicom_operator.py | 10 ++++-- adit/mass_transfer/processors.py | 56 +++++++++++++++++++++++++------ 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/adit/core/utils/dicom_operator.py b/adit/core/utils/dicom_operator.py index 9ee2670b3..3f35c8c58 100644 --- a/adit/core/utils/dicom_operator.py +++ b/adit/core/utils/dicom_operator.py @@ -398,6 +398,7 @@ def fetch_series( study_uid: str, series_uid: str, callback: Callable[[Dataset], None], + force_move: bool = False, ) -> None: """Fetch a series. @@ -406,6 +407,7 @@ def fetch_series( study_uid: The study instance UID. series_uid: The series instance UID. callback: A callback function that is called for each fetched image. + force_move: If True, skip C-GET and use C-MOVE directly. """ query = QueryDataset.create( @@ -415,8 +417,12 @@ def fetch_series( SeriesInstanceUID=series_uid, ) - # We prefer WADO-RS over C-GET over C-MOVE - if self.server.dicomweb_wado_support: + if force_move: + if self.server.patient_root_move_support or self.server.study_root_move_support: + self._fetch_images_with_c_move(query, callback) + else: + raise DicomError("C-MOVE not supported by this server.") + elif self.server.dicomweb_wado_support: self._fetch_images_with_wado_rs(query, callback) elif self.server.patient_root_get_support or self.server.study_root_get_support: self._fetch_images_with_c_get(query, callback) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 7ee369cb6..b5564c01d 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -221,36 +221,45 @@ def process(self): if job.convert_to_nifti: with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = Path(tmp_dir) - self._export_series( + image_count = self._export_series( operator, series, tmp_path, subject_id, pseudonymizer, ) - output_path = ( - output_base / self.mass_task.partition_key - / subject_id / study_folder / series_folder - ) - nifti_files = self._convert_series( - series, tmp_path, output_path, - ) + if image_count == 0: + nifti_files = [] + else: + output_path = ( + output_base / self.mass_task.partition_key + / subject_id / study_folder / series_folder + ) + nifti_files = self._convert_series( + series, tmp_path, output_path, + ) else: output_path = ( output_base / self.mass_task.partition_key / subject_id / study_folder / series_folder ) - self._export_series( + image_count = self._export_series( operator, series, output_path, subject_id, pseudonymizer, ) nifti_files = [] converted_file = "" - if nifti_files: + if image_count == 0: + status = MassTransferVolume.Status.SKIPPED + log_msg = "C-GET returned 0 images" + elif nifti_files: converted_file = "\n".join(str(f) for f in nifti_files) status = done_status + log_msg = "" elif job.convert_to_nifti: status = MassTransferVolume.Status.SKIPPED + log_msg = "No valid DICOM images for NIfTI conversion" else: status = done_status + log_msg = "" MassTransferVolume.objects.create( job=job, @@ -270,6 +279,7 @@ def process(self): number_of_images=series.number_of_images, converted_file=converted_file, status=status, + log=log_msg, ) if status == MassTransferVolume.Status.SKIPPED: @@ -509,26 +519,50 @@ def _export_series( output_path: Path, subject_id: str, pseudonymizer: Pseudonymizer | None, - ) -> None: + ) -> int: + """Export a series to output_path. Returns number of images written. + + If C-GET returns 0 images and the server supports C-MOVE, automatically + retries with C-MOVE (IMPAX and some other PACS have broken C-GET). + """ output_path.mkdir(parents=True, exist_ok=True) manipulator = DicomManipulator(pseudonymizer=pseudonymizer) if pseudonymizer else None + image_count = 0 def callback(ds: Dataset | None) -> None: + nonlocal image_count if ds is None: return if manipulator: manipulator.manipulate(ds, pseudonym=subject_id) file_name = sanitize_filename(f"{ds.SOPInstanceUID}.dcm") write_dataset(ds, output_path / file_name) + image_count += 1 + # Prefer C-MOVE for mass transfer — C-GET is unreliable on some PACS + # (e.g. IMPAX returns 0 images). Fall back to C-GET if C-MOVE is not supported. + use_move = ( + operator.server.patient_root_move_support + or operator.server.study_root_move_support + ) operator.fetch_series( patient_id=series.patient_id, study_uid=series.study_instance_uid, series_uid=series.series_instance_uid, callback=callback, + force_move=use_move, ) + if image_count == 0: + try: + if output_path.exists() and not any(output_path.iterdir()): + output_path.rmdir() + except OSError: + pass + + return image_count + def _convert_series( self, series: DiscoveredSeries, From ccd8bd9dedc05e397a557d851d2ab9b4ef3cfa42 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sat, 7 Mar 2026 23:58:05 +0100 Subject: [PATCH 025/103] Revert C-MOVE preference, keep pseudonymized UID fields, and detect empty retrievals Reverts the C-MOVE preference since C-MOVE requires PACS-side AE registration. Restores study/series pseudonymized UID fields on MassTransferVolume and populates them after DICOM anonymization. Tracks image count from C-GET to skip series with 0 retrieved images instead of passing empty directories to dcm2niix. --- adit/core/utils/dicom_operator.py | 10 ++----- .../0009_remove_pseudonymized_uid_fields.py | 21 -------------- adit/mass_transfer/models.py | 2 ++ adit/mass_transfer/processors.py | 29 +++++++++---------- adit/mass_transfer/tests/test_processor.py | 18 ++++++++---- 5 files changed, 30 insertions(+), 50 deletions(-) delete mode 100644 adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py diff --git a/adit/core/utils/dicom_operator.py b/adit/core/utils/dicom_operator.py index 3f35c8c58..9ee2670b3 100644 --- a/adit/core/utils/dicom_operator.py +++ b/adit/core/utils/dicom_operator.py @@ -398,7 +398,6 @@ def fetch_series( study_uid: str, series_uid: str, callback: Callable[[Dataset], None], - force_move: bool = False, ) -> None: """Fetch a series. @@ -407,7 +406,6 @@ def fetch_series( study_uid: The study instance UID. series_uid: The series instance UID. callback: A callback function that is called for each fetched image. - force_move: If True, skip C-GET and use C-MOVE directly. """ query = QueryDataset.create( @@ -417,12 +415,8 @@ def fetch_series( SeriesInstanceUID=series_uid, ) - if force_move: - if self.server.patient_root_move_support or self.server.study_root_move_support: - self._fetch_images_with_c_move(query, callback) - else: - raise DicomError("C-MOVE not supported by this server.") - elif self.server.dicomweb_wado_support: + # We prefer WADO-RS over C-GET over C-MOVE + if self.server.dicomweb_wado_support: self._fetch_images_with_wado_rs(query, callback) elif self.server.patient_root_get_support or self.server.study_root_get_support: self._fetch_images_with_c_get(query, callback) diff --git a/adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py b/adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py deleted file mode 100644 index 3499082da..000000000 --- a/adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py +++ /dev/null @@ -1,21 +0,0 @@ -# Generated by Django 5.2.8 on 2026-03-05 12:06 - -from django.db import migrations - - -class Migration(migrations.Migration): - - dependencies = [ - ('mass_transfer', '0008_remove_exported_folder_fields'), - ] - - operations = [ - migrations.RemoveField( - model_name='masstransfervolume', - name='series_instance_uid_pseudonymized', - ), - migrations.RemoveField( - model_name='masstransfervolume', - name='study_instance_uid_pseudonymized', - ), - ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index d7af702f5..414a4ae17 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -179,7 +179,9 @@ class Status(models.TextChoices): patient_id = models.CharField(max_length=64, blank=True, default="") accession_number = models.CharField(max_length=64, blank=True, default="") study_instance_uid = models.CharField(max_length=64) + study_instance_uid_pseudonymized = models.CharField(max_length=128, blank=True, default="") series_instance_uid = models.CharField(max_length=64) + series_instance_uid_pseudonymized = models.CharField(max_length=128, blank=True, default="") modality = models.CharField(max_length=16, blank=True, default="") study_description = models.CharField(max_length=256, blank=True, default="") series_description = models.CharField(max_length=256, blank=True, default="") diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index b5564c01d..8badc9705 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -221,7 +221,7 @@ def process(self): if job.convert_to_nifti: with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = Path(tmp_dir) - image_count = self._export_series( + image_count, p_study_uid, p_series_uid = self._export_series( operator, series, tmp_path, subject_id, pseudonymizer, ) @@ -240,7 +240,7 @@ def process(self): output_base / self.mass_task.partition_key / subject_id / study_folder / series_folder ) - image_count = self._export_series( + image_count, p_study_uid, p_series_uid = self._export_series( operator, series, output_path, subject_id, pseudonymizer, ) @@ -269,7 +269,9 @@ def process(self): pseudonym=subject_id if pseudonymizer else "", accession_number=series.accession_number, study_instance_uid=series.study_instance_uid, + study_instance_uid_pseudonymized=p_study_uid, series_instance_uid=series.series_instance_uid, + series_instance_uid_pseudonymized=p_series_uid, modality=series.modality, study_description=series.study_description, series_description=series.series_description, @@ -519,39 +521,36 @@ def _export_series( output_path: Path, subject_id: str, pseudonymizer: Pseudonymizer | None, - ) -> int: - """Export a series to output_path. Returns number of images written. + ) -> tuple[int, str, str]: + """Export a series to output_path. - If C-GET returns 0 images and the server supports C-MOVE, automatically - retries with C-MOVE (IMPAX and some other PACS have broken C-GET). + Returns (image_count, pseudonymized_study_uid, pseudonymized_series_uid). """ output_path.mkdir(parents=True, exist_ok=True) manipulator = DicomManipulator(pseudonymizer=pseudonymizer) if pseudonymizer else None image_count = 0 + pseudo_study_uid = "" + pseudo_series_uid = "" def callback(ds: Dataset | None) -> None: - nonlocal image_count + nonlocal image_count, pseudo_study_uid, pseudo_series_uid if ds is None: return if manipulator: manipulator.manipulate(ds, pseudonym=subject_id) + if not pseudo_study_uid: + pseudo_study_uid = str(ds.StudyInstanceUID) + pseudo_series_uid = str(ds.SeriesInstanceUID) file_name = sanitize_filename(f"{ds.SOPInstanceUID}.dcm") write_dataset(ds, output_path / file_name) image_count += 1 - # Prefer C-MOVE for mass transfer — C-GET is unreliable on some PACS - # (e.g. IMPAX returns 0 images). Fall back to C-GET if C-MOVE is not supported. - use_move = ( - operator.server.patient_root_move_support - or operator.server.study_root_move_support - ) operator.fetch_series( patient_id=series.patient_id, study_uid=series.study_instance_uid, series_uid=series.series_instance_uid, callback=callback, - force_move=use_move, ) if image_count == 0: @@ -561,7 +560,7 @@ def callback(ds: Dataset | None) -> None: except OSError: pass - return image_count + return image_count, pseudo_study_uid, pseudo_series_uid def _convert_series( self, diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 01381da67..5e7668388 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -327,6 +327,7 @@ def fake_export(*args, **kwargs): call_count["n"] += 1 if call_count["n"] == 2: raise DicomError("Export failed") + return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) mocker.patch.object(MassTransferVolume.objects, "create") @@ -442,6 +443,7 @@ def filter_side_effect(**kwargs): export_calls = [] def fake_export(*args, **kwargs): export_calls.append(1) + return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) mocker.patch.object(MassTransferVolume.objects, "create") @@ -466,6 +468,7 @@ def test_process_none_mode_uses_patient_id_as_subject( def fake_export(op, s, path, subject_id, pseudonymizer): export_calls.append((subject_id, pseudonymizer)) + return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) mocker.patch.object(MassTransferVolume.objects, "create") @@ -495,6 +498,7 @@ def test_process_pseudonymize_mode_same_study_same_pseudonym( def fake_export(op, s, path, subject_id, pseudonymizer): subject_ids.append(subject_id) + return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) mocker.patch.object(MassTransferVolume.objects, "create") @@ -523,6 +527,7 @@ def test_process_pseudonymize_mode_different_studies_different_pseudonyms( def fake_export(op, s, path, subject_id, pseudonymizer): subject_ids.append(subject_id) + return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) mocker.patch.object(MassTransferVolume.objects, "create") @@ -552,6 +557,7 @@ def test_process_linking_mode_uses_deterministic_pseudonym( def fake_export(op, s, path, subject_id, pseudonymizer): subject_ids.append(subject_id) + return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) mocker.patch.object(MassTransferVolume.objects, "create") @@ -765,7 +771,7 @@ def test_process_creates_volume_records_on_success( processor = MassTransferTaskProcessor(task) mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch("adit.mass_transfer.processors.DicomOperator") - mocker.patch.object(processor, "_export_series") + mocker.patch.object(processor, "_export_series", return_value=(1, "", "")) assert MassTransferVolume.objects.filter(job=job).count() == 0 @@ -875,7 +881,7 @@ def test_process_deletes_error_volumes_on_retry( processor = MassTransferTaskProcessor(task) mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch("adit.mass_transfer.processors.DicomOperator") - mocker.patch.object(processor, "_export_series") + mocker.patch.object(processor, "_export_series", return_value=(1, "", "")) result = processor.process() @@ -930,7 +936,7 @@ def test_process_deterministic_pseudonyms_across_partitions( )] processor1 = MassTransferTaskProcessor(task1) mocker.patch.object(processor1, "_discover_series", return_value=series1) - mocker.patch.object(processor1, "_export_series") + mocker.patch.object(processor1, "_export_series", return_value=(1, "", "")) processor1.process() # Partition 2: same PAT1 @@ -939,7 +945,7 @@ def test_process_deterministic_pseudonyms_across_partitions( )] processor2 = MassTransferTaskProcessor(task2) mocker.patch.object(processor2, "_discover_series", return_value=series2) - mocker.patch.object(processor2, "_export_series") + mocker.patch.object(processor2, "_export_series", return_value=(1, "", "")) processor2.process() vol1 = MassTransferVolume.objects.get(series_instance_uid="1.2.3.100.1") @@ -994,7 +1000,7 @@ def test_process_pseudonymize_mode_not_linked_across_partitions( )] processor1 = MassTransferTaskProcessor(task1) mocker.patch.object(processor1, "_discover_series", return_value=series1) - mocker.patch.object(processor1, "_export_series") + mocker.patch.object(processor1, "_export_series", return_value=(1, "", "")) processor1.process() series2 = [_make_discovered( @@ -1002,7 +1008,7 @@ def test_process_pseudonymize_mode_not_linked_across_partitions( )] processor2 = MassTransferTaskProcessor(task2) mocker.patch.object(processor2, "_discover_series", return_value=series2) - mocker.patch.object(processor2, "_export_series") + mocker.patch.object(processor2, "_export_series", return_value=(1, "", "")) processor2.process() vol1 = MassTransferVolume.objects.get(series_instance_uid="1.2.3.100.1") From e9370fd140c2e8885e9b4b0465cb8128f05c1244 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sun, 8 Mar 2026 00:21:02 +0100 Subject: [PATCH 026/103] Include study time in folder name --- adit/mass_transfer/processors.py | 4 ++-- adit/mass_transfer/tests/test_processor.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 8badc9705..7a4fd927c 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -85,9 +85,9 @@ def _study_datetime(study: ResultDataset) -> datetime: def _study_folder_name(study_description: str, study_dt: datetime, study_uid: str) -> str: desc = sanitize_filename(study_description or "Undefined") - date_str = study_dt.strftime("%Y%m%d") + dt_str = study_dt.strftime("%Y%m%d_%H%M%S") short_hash = hashlib.sha256(study_uid.encode()).hexdigest()[:4] - return f"{desc}_{date_str}_{short_hash}" + return f"{desc}_{dt_str}_{short_hash}" def _series_folder_name( diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 5e7668388..7d617875a 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -662,10 +662,9 @@ def test_series_folder_name_with_no_number(): def test_study_folder_name_includes_description_date_and_hash(): name = _study_folder_name("Brain CT", datetime(2024, 1, 15, 10, 30), "1.2.3.4") - assert name.startswith("Brain CT_20240115_") - assert len(name.split("_")) == 3 + assert name.startswith("Brain CT_20240115_103000_") # Hash part is 4 chars - assert len(name.split("_")[2]) == 4 + assert len(name.split("_")[-1]) == 4 def test_study_folder_name_deterministic(): From 9e61dd5d022f39308ccc4cdcff60329e686c5c2a Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sun, 8 Mar 2026 00:28:13 +0100 Subject: [PATCH 027/103] Update spec with pseudonymized UID fields, folder name format, and cleanup notes --- docs/mass_transfer_spec.md | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/docs/mass_transfer_spec.md b/docs/mass_transfer_spec.md index 1e2d1d627..1fde6b54d 100644 --- a/docs/mass_transfer_spec.md +++ b/docs/mass_transfer_spec.md @@ -8,7 +8,7 @@ pull large cohorts — e.g. "all CT head scans from Neuroradiologie in 2024" — pseudonymize them, and optionally convert to NIfTI. ``` -┌──────────┐ C-FIND ┌──────────┐ C-GET ┌──────────────────┐ +┌──────────┐ ┌──────────┐ ┌──────────────────┐ │ ADIT │──────────────>│ PACS │──────────────>│ Network Folder │ │ Worker │ discover │ Server │ fetch + │ /mnt/data/... │ │ │ studies & │ │ pseudonymize│ │ @@ -41,7 +41,8 @@ MassTransferJob (one per user request) │ │ │ ├── MassTransferVolume (one per exported series) │ │ ├── patient_id, pseudonym - │ │ ├── study/series UIDs + │ │ ├── study_instance_uid, study_instance_uid_pseudonymized + │ │ ├── series_instance_uid, series_instance_uid_pseudonymized │ │ ├── status: exported | converted | skipped | error │ │ └── log (error reason if failed) │ └── ... @@ -177,33 +178,29 @@ One task = one partition. Here is the full flow inside `MassTransferTaskProcesso /mnt/data/mass_transfer_exports/ └── 20250101-20250107/ # partition key ├── A7B3X9K2M1Q4/ # pseudonym (or raw PatientID) - │ ├── CT_Schaedel_20250103_f2a1/ # StudyDescription_Date_ShortHash + │ ├── CT_Schaedel_20250103_221030/ # Description_Date_Time │ │ ├── Axial_1/ # SeriesDescription_SeriesNumber │ │ │ ├── 1.2.3.4.5.6.7.dcm │ │ │ ├── 1.2.3.4.5.6.8.dcm │ │ │ └── ... │ │ └── Sagittal_2/ │ │ └── ... - │ └── MRT_Kopf_20250105_b8c2/ + │ └── MRT_Kopf_20250105_221030/ │ └── T1_1/ │ └── ... └── R4T7Y2W8N3P1/ └── ... ``` -The study folder name includes a 4-char hash of the StudyInstanceUID to -prevent collisions when the same patient has multiple studies with the same -description on the same date. - --- ## Anonymization Modes -| Mode | Folder name | DICOM tags | Cross-partition consistency | CSV export | -| ----------------------------- | -------------------------------- | ------------------ | --------------------------------------------- | ----------------------------- | -| **None** | Raw PatientID | Untouched | N/A | Not available | -| **Pseudonymize** | Random hex per study | dicognito (random) | No — each study gets a unique random folder | Not available | -| **Pseudonymize with Linking** | Deterministic pseudonym | dicognito (seeded) | Yes — same patient always gets same pseudonym | patient_id → pseudonym pairs | +| Mode | Folder name | DICOM tags | Cross-partition consistency | CSV export | +| ----------------------------- | ----------------------- | ------------------ | --------------------------------------------- | ---------------------------- | +| **None** | Raw PatientID | Untouched | N/A | Not available | +| **Pseudonymize** | Random hex per study | dicognito (random) | No — each study gets a unique random folder | Not available | +| **Pseudonymize with Linking** | Deterministic pseudonym | dicognito (seeded) | Yes — same patient always gets same pseudonym | patient_id → pseudonym pairs | ### How Linking Works @@ -392,7 +389,7 @@ that abandoned C-GET generators properly close their associations (via | `utils/partitions.py` | Date range → partition windows | | `apps.py` | App registration, menu item, processor registration | | `templates/` | Job form, job detail, task detail, filter CRUD | -| `tests/` | 44 tests (processor, partitions, cleanup) | +| `tests/` | 45 tests (processor, partitions, cleanup) | ### Modified: `adit/core/` @@ -413,7 +410,7 @@ that abandoned C-GET generators properly close their associations (via ## Test Coverage -44 tests covering: +45 tests covering: - **Discovery**: recursive time-window splitting, deduplication, boundary correctness - **Processing**: success, partial failure, total failure, suspension, bad source/dest, no filters, empty partition @@ -422,10 +419,3 @@ that abandoned C-GET generators properly close their associations (via - **NIfTI conversion**: dcm2niix failure, no output, non-image DICOM skip - **Utilities**: folder name generation, DICOM wildcard matching, integer parsing, datetime handling - **Cleanup**: no-op verification (deferred insertion means nothing to clean up) - -Run with: - -```bash -DJANGO_SETTINGS_MODULE=adit.settings.development \ - python -m pytest adit/mass_transfer/tests/ -v -``` From 05c5a9435a9e6e28679637f9651032f9a4756600 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sun, 8 Mar 2026 00:41:20 +0100 Subject: [PATCH 028/103] Re-add pseudonymized UID columns via migration 0010 Restores 0009 (which removed the fields) and adds 0010 to re-add them, since 0009 already ran on existing databases. --- .../0009_remove_pseudonymized_uid_fields.py | 21 ++++++++++++++++ .../0010_re_add_pseudonymized_uid_fields.py | 25 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py create mode 100644 adit/mass_transfer/migrations/0010_re_add_pseudonymized_uid_fields.py diff --git a/adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py b/adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py new file mode 100644 index 000000000..3499082da --- /dev/null +++ b/adit/mass_transfer/migrations/0009_remove_pseudonymized_uid_fields.py @@ -0,0 +1,21 @@ +# Generated by Django 5.2.8 on 2026-03-05 12:06 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('mass_transfer', '0008_remove_exported_folder_fields'), + ] + + operations = [ + migrations.RemoveField( + model_name='masstransfervolume', + name='series_instance_uid_pseudonymized', + ), + migrations.RemoveField( + model_name='masstransfervolume', + name='study_instance_uid_pseudonymized', + ), + ] diff --git a/adit/mass_transfer/migrations/0010_re_add_pseudonymized_uid_fields.py b/adit/mass_transfer/migrations/0010_re_add_pseudonymized_uid_fields.py new file mode 100644 index 000000000..3cb87c2df --- /dev/null +++ b/adit/mass_transfer/migrations/0010_re_add_pseudonymized_uid_fields.py @@ -0,0 +1,25 @@ +# Re-add pseudonymized UID fields that were removed in 0009. +# Migration 0009 (remove_pseudonymized_uid_fields) removed these columns. +# This migration adds them back. + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('mass_transfer', '0009_remove_pseudonymized_uid_fields'), + ] + + operations = [ + migrations.AddField( + model_name='masstransfervolume', + name='study_instance_uid_pseudonymized', + field=models.CharField(blank=True, default='', max_length=128), + ), + migrations.AddField( + model_name='masstransfervolume', + name='series_instance_uid_pseudonymized', + field=models.CharField(blank=True, default='', max_length=128), + ), + ] From d6ac90e2bc3a1e73bf490054f6f16cf45697fe5d Mon Sep 17 00:00:00 2001 From: Ritwik Date: Sun, 8 Mar 2026 01:01:03 +0100 Subject: [PATCH 029/103] Add compressed transfer syntaxes to storage presentation contexts for C-GET IMPAX PACS stores most images in compressed formats (JPEG Lossless, JPEG 2000, etc.) but our storage contexts only offered uncompressed transfer syntaxes. During C-GET, the PACS could not send images back because no matching transfer syntax was negotiated, resulting in Success status with 0 images delivered. --- adit/core/utils/presentation_contexts.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/adit/core/utils/presentation_contexts.py b/adit/core/utils/presentation_contexts.py index 445e9c0cc..ed3067d53 100644 --- a/adit/core/utils/presentation_contexts.py +++ b/adit/core/utils/presentation_contexts.py @@ -2,6 +2,24 @@ build_context, ) +# Transfer syntaxes to offer for storage contexts. Includes both uncompressed +# and compressed syntaxes so that C-GET can receive images regardless of how +# the PACS stores them internally (IMPAX, for example, stores most images in +# JPEG Lossless and returns 0 images if we only offer uncompressed). +_transfer_syntaxes = [ + "1.2.840.10008.1.2", # Implicit VR Little Endian + "1.2.840.10008.1.2.1", # Explicit VR Little Endian + "1.2.840.10008.1.2.4.50", # JPEG Baseline + "1.2.840.10008.1.2.4.51", # JPEG Extended + "1.2.840.10008.1.2.4.57", # JPEG Lossless + "1.2.840.10008.1.2.4.70", # JPEG Lossless SV1 (default for most PACS) + "1.2.840.10008.1.2.4.80", # JPEG-LS Lossless + "1.2.840.10008.1.2.4.81", # JPEG-LS Near Lossless + "1.2.840.10008.1.2.4.90", # JPEG 2000 Lossless + "1.2.840.10008.1.2.4.91", # JPEG 2000 + "1.2.840.10008.1.2.5", # RLE Lossless +] + # Prebuilt context matching the DCMTK Implementation https://github.com/DCMTK/dcmtk/blob/d1fb197927fd4178b5a24e0f0dba6f8d785a8f93/dcmdata/libsrc/dcuid.cc#L895 _storage = [ "1.2.840.10008.5.1.4.1.1.9.1.3", # AmbulatoryECGWaveformStorage @@ -128,5 +146,5 @@ ] assert len(_storage) <= 120 -StoragePresentationContexts = [build_context(uid) for uid in sorted(_storage)] +StoragePresentationContexts = [build_context(uid, _transfer_syntaxes) for uid in sorted(_storage)] """Pre-built presentation contexts for :dcm:`Storage` containing 120 selected SOP Classes.""" # noqa: E501 From 3cdd3216d8c5fdd2bd482acd8abcfb51b13a6cad Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 9 Mar 2026 12:37:56 +0100 Subject: [PATCH 030/103] Pass compressed transfer syntaxes when adding C-GET storage contexts The previous commit added compressed transfer syntaxes to StoragePresentationContexts, but dimse_connector.py was only passing cx.abstract_syntax to add_requested_context (which defaults to uncompressed-only). Now passes cx.transfer_syntax so the PACS can actually send images in JPEG Lossless and other compressed formats. --- adit/core/utils/dimse_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adit/core/utils/dimse_connector.py b/adit/core/utils/dimse_connector.py index 0eef3d391..8fee0dd87 100644 --- a/adit/core/utils/dimse_connector.py +++ b/adit/core/utils/dimse_connector.py @@ -190,7 +190,7 @@ def _associate(self, service: DimseService): ae.add_requested_context(StudyRootQueryRetrieveInformationModelGet) for cx in StoragePresentationContexts: assert cx.abstract_syntax is not None - ae.add_requested_context(cx.abstract_syntax) + ae.add_requested_context(cx.abstract_syntax, cx.transfer_syntax) ext_neg.append(build_role(cx.abstract_syntax, scp_role=True)) elif service == "C-MOVE": ae.requested_contexts = QueryRetrievePresentationContexts From a23f2609246bbabfbc909071044ae28859ed5392 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 9 Mar 2026 17:16:08 +0100 Subject: [PATCH 031/103] Fix C-GET reliability: presentation context split, dead association cleanup, and IMAGE-level pre-check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Split storage presentation contexts into image (compressed+uncompressed TSes) and non-image (uncompressed only) to prevent 0xA702 errors when PACS selects JPEG Lossless for non-pixel-data SOP classes like RawDataStorage - Fix persistent connection crash ("A former connection was not closed properly") when PACS drops the association between calls — clean up dead associations instead of raising AssertionError - Add IMAGE-level C-FIND pre-check before C-GET to skip series with no retrievable instances, avoiding wasted C-GET attempts on stale PACS catalog entries - Move max_search_results from Django settings to DicomServer model field - Add store handler error logging for C-GET sub-operation failures - Fix _find_studies to use actual time components for same-day queries and handle cross-midnight splits correctly --- ...6_add_max_search_results_to_dicomserver.py | 18 ++ adit/core/models.py | 3 + adit/core/utils/dicom_operator.py | 17 ++ adit/core/utils/dimse_connector.py | 17 +- adit/core/utils/presentation_contexts.py | 281 ++++++++++-------- adit/mass_transfer/processors.py | 65 +++- adit/mass_transfer/tests/test_processor.py | 191 ++++++++---- adit/settings/base.py | 2 - docker-compose.base.yml | 1 - example.env | 2 - 10 files changed, 392 insertions(+), 205 deletions(-) create mode 100644 adit/core/migrations/0016_add_max_search_results_to_dicomserver.py diff --git a/adit/core/migrations/0016_add_max_search_results_to_dicomserver.py b/adit/core/migrations/0016_add_max_search_results_to_dicomserver.py new file mode 100644 index 000000000..dc2e0d732 --- /dev/null +++ b/adit/core/migrations/0016_add_max_search_results_to_dicomserver.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.8 on 2026-03-09 12:00 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0015_delete_queuedtask'), + ] + + operations = [ + migrations.AddField( + model_name='dicomserver', + name='max_search_results', + field=models.PositiveIntegerField(default=200), + ), + ] diff --git a/adit/core/models.py b/adit/core/models.py index 630027977..ad2b94813 100644 --- a/adit/core/models.py +++ b/adit/core/models.py @@ -148,6 +148,9 @@ class DicomServer(DicomNode): dicomweb_stow_prefix = models.CharField(blank=True, max_length=2000) dicomweb_authorization_header = models.CharField(blank=True, max_length=2000) + # C-FIND result limit before recursive time-window splitting + max_search_results = models.PositiveIntegerField(default=200) + objects: DicomNodeManager["DicomServer"] = DicomNodeManager["DicomServer"]() diff --git a/adit/core/utils/dicom_operator.py b/adit/core/utils/dicom_operator.py index 9ee2670b3..0006c3309 100644 --- a/adit/core/utils/dicom_operator.py +++ b/adit/core/utils/dicom_operator.py @@ -392,6 +392,22 @@ def fetch_study( logger.debug("Successfully downloaded study %s.", study_uid) + def series_has_instances( + self, + patient_id: str, + study_uid: str, + series_uid: str, + ) -> bool: + """Quick IMAGE-level C-FIND to check if a series has any retrievable instances.""" + query = QueryDataset.create( + PatientID=patient_id, + StudyInstanceUID=study_uid, + SeriesInstanceUID=series_uid, + ) + for _ in self.find_images(query, limit_results=1): + return True + return False + def fetch_series( self, patient_id: str, @@ -536,6 +552,7 @@ def store_handler(event: Event, store_errors: list[Exception]) -> int: try: self._handle_fetched_image(ds, callback) except Exception as err: + logger.error("Store handler failed for SOP %s: %s", ds.SOPInstanceUID, err, exc_info=True) store_errors.append(err) # Unfortunately not all PACS servers support or respect a C-CANCEL request, diff --git a/adit/core/utils/dimse_connector.py b/adit/core/utils/dimse_connector.py index 8fee0dd87..f4a754acf 100644 --- a/adit/core/utils/dimse_connector.py +++ b/adit/core/utils/dimse_connector.py @@ -145,7 +145,13 @@ def __init__( def open_connection(self, service: DimseService): if self.assoc: - raise AssertionError("A former connection was not closed properly.") + if not self.assoc.is_alive(): + # Association died (PACS dropped it, timeout, etc.) — clean up the stale reference + logger.debug("Cleaning up dead association to %s.", self.server.ae_title) + self.assoc = None + self._current_service = None + else: + raise AssertionError("A former connection was not closed properly.") logger.debug("Opening connection to DICOM server %s.", self.server.ae_title) @@ -209,6 +215,15 @@ def _associate(self, service: DimseService): if not self.assoc.is_established: raise RetriableDicomError(f"Could not connect to {self.server}.") + if service == "C-GET": + rejected = [] + for cx in self.assoc.rejected_contexts: + rejected.append(f"{cx.abstract_syntax}") + if rejected: + logger.warning("C-GET: %d presentation contexts rejected by SCP: %s", len(rejected), rejected) + accepted = [cx.abstract_syntax for cx in self.assoc.accepted_contexts] + logger.debug("C-GET: %d presentation contexts accepted", len(accepted)) + def close_connection(self): logger.debug("Closing connection to DICOM server %s.", self.server.ae_title) diff --git a/adit/core/utils/presentation_contexts.py b/adit/core/utils/presentation_contexts.py index ed3067d53..43c75fcd4 100644 --- a/adit/core/utils/presentation_contexts.py +++ b/adit/core/utils/presentation_contexts.py @@ -2,13 +2,12 @@ build_context, ) -# Transfer syntaxes to offer for storage contexts. Includes both uncompressed -# and compressed syntaxes so that C-GET can receive images regardless of how -# the PACS stores them internally (IMPAX, for example, stores most images in -# JPEG Lossless and returns 0 images if we only offer uncompressed). -_transfer_syntaxes = [ +_uncompressed_transfer_syntaxes = [ "1.2.840.10008.1.2", # Implicit VR Little Endian "1.2.840.10008.1.2.1", # Explicit VR Little Endian +] + +_compressed_transfer_syntaxes = [ "1.2.840.10008.1.2.4.50", # JPEG Baseline "1.2.840.10008.1.2.4.51", # JPEG Extended "1.2.840.10008.1.2.4.57", # JPEG Lossless @@ -20,131 +19,155 @@ "1.2.840.10008.1.2.5", # RLE Lossless ] -# Prebuilt context matching the DCMTK Implementation https://github.com/DCMTK/dcmtk/blob/d1fb197927fd4178b5a24e0f0dba6f8d785a8f93/dcmdata/libsrc/dcuid.cc#L895 -_storage = [ - "1.2.840.10008.5.1.4.1.1.9.1.3", # AmbulatoryECGWaveformStorage - "1.2.840.10008.5.1.4.1.1.9.5.1", # ArterialPulseWaveformStorage - "1.2.840.10008.5.1.4.1.1.78.2", # AutorefractionMeasurementsStorage - "1.2.840.10008.5.1.4.1.1.131", # BasicStructuredDisplayStorage - "1.2.840.10008.5.1.4.1.1.88.11", # BasicTextSRStorage - "1.2.840.10008.5.1.4.1.1.9.4.1", # BasicVoiceAudioWaveformStorage - "1.2.840.10008.5.1.4.1.1.11.4", # BlendingSoftcopyPresentationStateStorage +# Image SOP classes contain pixel data and may be stored in compressed transfer +# syntaxes. We offer both compressed and uncompressed so the SCP can pick +# whichever matches its internal storage. +_image_storage = [ "1.2.840.10008.5.1.4.1.1.13.1.3", # BreastTomosynthesisImageStorage - "1.2.840.10008.5.1.4.1.1.9.3.1", # CardiacElectrophysiologyWaveformStorage - "1.2.840.10008.5.1.4.1.1.88.65", # ChestCADSRStorage - "1.2.840.10008.5.1.4.1.1.88.69", # ColonCADSRStorage - "1.2.840.10008.5.1.4.1.1.11.2", # ColorSoftcopyPresentationStateStorage - "1.2.840.10008.5.1.4.1.1.88.34", # Comprehensive3DSRStorage - "1.2.840.10008.5.1.4.1.1.88.33", # ComprehensiveSRStorage - "1.2.840.10008.5.1.4.1.1.1", # ComputedRadiographyImageStorage - "1.2.840.10008.5.1.4.1.1.2", # CTImageStorage - "1.2.840.10008.5.1.4.1.1.66.3", # DeformableSpatialRegistrationStorage - "1.2.840.10008.5.1.4.1.1.1.3", # DigitalIntraOralXRayImageStorageForPresentation - "1.2.840.10008.5.1.4.1.1.1.3.1", # DigitalIntraOralXRayImageStorageForProcessing - "1.2.840.10008.5.1.4.1.1.1.2", # DigitalMammographyXRayImageStorageForPresentation - "1.2.840.10008.5.1.4.1.1.1.2.1", # DigitalMammographyXRayImageStorageForProcessing - "1.2.840.10008.5.1.4.1.1.1.1", # DigitalXRayImageStorageForPresentation - "1.2.840.10008.5.1.4.1.1.1.1.1", # DigitalXRayImageStorageForProcessing - "1.2.840.10008.5.1.4.1.1.104.2", # EncapsulatedCDAStorage - "1.2.840.10008.5.1.4.1.1.104.1", # EncapsulatedPDFStorage - "1.2.840.10008.5.1.4.1.1.2.1", # EnhancedCTImageStorage - "1.2.840.10008.5.1.4.1.1.4.3", # EnhancedMRColorImageStorage - "1.2.840.10008.5.1.4.1.1.4.1", # EnhancedMRImageStorage - "1.2.840.10008.5.1.4.1.1.130", # EnhancedPETImageStorage - "1.2.840.10008.5.1.4.1.1.88.22", # EnhancedSRStorage - "1.2.840.10008.5.1.4.1.1.6.2", # EnhancedUSVolumeStorage - "1.2.840.10008.5.1.4.1.1.12.1.1", # EnhancedXAImageStorage - "1.2.840.10008.5.1.4.1.1.12.2.1", # EnhancedXRFImageStorage - "1.2.840.10008.5.1.4.1.1.9.4.2", # GeneralAudioWaveformStorage - "1.2.840.10008.5.1.4.1.1.9.1.2", # GeneralECGWaveformStorage - "1.2.840.10008.5.1.4.1.1.11.1", # GrayscaleSoftcopyPresentationStateStorage - "1.2.840.10008.5.1.4.1.1.9.2.1", # HemodynamicWaveformStorage - "1.2.840.10008.5.1.4.1.1.88.70", # ImplantationPlanSRStorage - "1.2.840.10008.5.1.4.1.1.78.8", # IntraocularLensCalculationsStorage - "1.2.840.10008.5.1.4.1.1.14.1", # IntravascularOpticalCoherenceTomographyImageStorageForPresentation # noqa: E501 - "1.2.840.10008.5.1.4.1.1.14.2", # IntravascularOpticalCoherenceTomographyImageStorageForProcessing # noqa: E501 - "1.2.840.10008.5.1.4.1.1.78.3", # KeratometryMeasurementsStorage - "1.2.840.10008.5.1.4.1.1.88.59", # KeyObjectSelectionDocumentStorage - "1.2.840.10008.5.1.4.1.1.2.2", # LegacyConvertedEnhancedCTImageStorage - "1.2.840.10008.5.1.4.1.1.4.4", # LegacyConvertedEnhancedMRImageStorage - "1.2.840.10008.5.1.4.1.1.128.1", # LegacyConvertedEnhancedPETImageStorage - "1.2.840.10008.5.1.4.1.1.78.1", # LensometryMeasurementsStorage - "1.2.840.10008.5.1.4.1.1.79.1", # MacularGridThicknessAndVolumeReportStorage - "1.2.840.10008.5.1.4.1.1.88.50", # MammographyCADSRStorage - "1.2.840.10008.5.1.4.1.1.4", # MRImageStorage - "1.2.840.10008.5.1.4.1.1.4.2", # MRSpectroscopyStorage - "1.2.840.10008.5.1.4.1.1.7.2", # MultiframeGrayscaleByteSecondaryCaptureImageStorage - "1.2.840.10008.5.1.4.1.1.7.3", # MultiframeGrayscaleWordSecondaryCaptureImageStorage - "1.2.840.10008.5.1.4.1.1.7.1", # MultiframeSingleBitSecondaryCaptureImageStorage - "1.2.840.10008.5.1.4.1.1.7.4", # MultiframeTrueColorSecondaryCaptureImageStorage - "1.2.840.10008.5.1.4.1.1.20", # NuclearMedicineImageStorage - "1.2.840.10008.5.1.4.1.1.78.7", # OphthalmicAxialMeasurementsStorage - "1.2.840.10008.5.1.4.1.1.77.1.5.2", # OphthalmicPhotography16BitImageStorage - "1.2.840.10008.5.1.4.1.1.77.1.5.1", # OphthalmicPhotography8BitImageStorage - "1.2.840.10008.5.1.4.1.1.81.1", # OphthalmicThicknessMapStorage - "1.2.840.10008.5.1.4.1.1.77.1.5.4", # OphthalmicTomographyImageStorage - "1.2.840.10008.5.1.4.1.1.80.1", # OphthalmicVisualFieldStaticPerimetryMeasurementsStorage - "1.2.840.10008.5.1.4.1.1.128", # PositronEmissionTomographyImageStorage - "1.2.840.10008.5.1.4.1.1.88.40", # ProcedureLogStorage - "1.2.840.10008.5.1.4.1.1.11.3", # PseudoColorSoftcopyPresentationStateStorage - "1.2.840.10008.5.1.4.1.1.66", # RawDataStorage - "1.2.840.10008.5.1.4.1.1.67", # RealWorldValueMappingStorage - "1.2.840.10008.5.1.4.1.1.9.6.1", # RespiratoryWaveformStorage - "1.2.840.10008.5.1.4.34.7", # RTBeamsDeliveryInstructionStorage - "1.2.840.10008.5.1.4.1.1.481.4", # RTBeamsTreatmentRecordStorage - "1.2.840.10008.5.1.4.1.1.481.6", # RTBrachyTreatmentRecordStorage - "1.2.840.10008.5.1.4.1.1.481.2", # RTDoseStorage - "1.2.840.10008.5.1.4.1.1.481.1", # RTImageStorage - "1.2.840.10008.5.1.4.1.1.481.9", # RTIonBeamsTreatmentRecordStorage - "1.2.840.10008.5.1.4.1.1.481.8", # RTIonPlanStorage - "1.2.840.10008.5.1.4.1.1.481.5", # RTPlanStorage - "1.2.840.10008.5.1.4.1.1.481.3", # RTStructureSetStorage - "1.2.840.10008.5.1.4.1.1.481.7", # RTTreatmentSummaryRecordStorage - "1.2.840.10008.5.1.4.1.1.7", # SecondaryCaptureImageStorage - "1.2.840.10008.5.1.4.1.1.66.4", # SegmentationStorage - "1.2.840.10008.5.1.4.1.1.66.2", # SpatialFiducialsStorage - "1.2.840.10008.5.1.4.1.1.66.1", # SpatialRegistrationStorage - "1.2.840.10008.5.1.4.1.1.78.6", # SpectaclePrescriptionReportStorage - "1.2.840.10008.5.1.4.1.1.77.1.5.3", # StereometricRelationshipStorage - "1.2.840.10008.5.1.4.1.1.78.4", # SubjectiveRefractionMeasurementsStorage - "1.2.840.10008.5.1.4.1.1.68.1", # SurfaceScanMeshStorage - "1.2.840.10008.5.1.4.1.1.68.2", # SurfaceScanPointCloudStorage - "1.2.840.10008.5.1.4.1.1.66.5", # SurfaceSegmentationStorage - "1.2.840.10008.5.1.4.1.1.9.1.1", # TwelveLeadECGWaveformStorage - "1.2.840.10008.5.1.4.1.1.6.1", # UltrasoundImageStorage - "1.2.840.10008.5.1.4.1.1.3.1", # UltrasoundMultiframeImageStorage - "1.2.840.10008.5.1.4.1.1.77.1.1.1", # VideoEndoscopicImageStorage - "1.2.840.10008.5.1.4.1.1.77.1.2.1", # VideoMicroscopicImageStorage - "1.2.840.10008.5.1.4.1.1.77.1.4.1", # VideoPhotographicImageStorage - "1.2.840.10008.5.1.4.1.1.78.5", # VisualAcuityMeasurementsStorage - "1.2.840.10008.5.1.4.1.1.77.1.1", # VLEndoscopicImageStorage - "1.2.840.10008.5.1.4.1.1.77.1.2", # VLMicroscopicImageStorage - "1.2.840.10008.5.1.4.1.1.77.1.4", # VLPhotographicImageStorage - "1.2.840.10008.5.1.4.1.1.77.1.3", # VLSlideCoordinatesMicroscopicImageStorage - "1.2.840.10008.5.1.4.1.1.77.1.6", # VLWholeSlideMicroscopyImageStorage - "1.2.840.10008.5.1.4.1.1.11.5", # XAXRFGrayscaleSoftcopyPresentationStateStorage - "1.2.840.10008.5.1.4.1.1.13.1.1", # XRay3DAngiographicImageStorage - "1.2.840.10008.5.1.4.1.1.13.1.2", # XRay3DCraniofacialImageStorage - "1.2.840.10008.5.1.4.1.1.12.1", # XRayAngiographicImageStorage - "1.2.840.10008.5.1.4.1.1.88.67", # XRayRadiationDoseSRStorage - "1.2.840.10008.5.1.4.1.1.12.2", # XRayRadiofluoroscopicImageStorage + "1.2.840.10008.5.1.4.1.1.1", # ComputedRadiographyImageStorage + "1.2.840.10008.5.1.4.1.1.2", # CTImageStorage + "1.2.840.10008.5.1.4.1.1.1.3", # DigitalIntraOralXRayImageStorageForPresentation + "1.2.840.10008.5.1.4.1.1.1.3.1", # DigitalIntraOralXRayImageStorageForProcessing + "1.2.840.10008.5.1.4.1.1.1.2", # DigitalMammographyXRayImageStorageForPresentation + "1.2.840.10008.5.1.4.1.1.1.2.1", # DigitalMammographyXRayImageStorageForProcessing + "1.2.840.10008.5.1.4.1.1.1.1", # DigitalXRayImageStorageForPresentation + "1.2.840.10008.5.1.4.1.1.1.1.1", # DigitalXRayImageStorageForProcessing + "1.2.840.10008.5.1.4.1.1.2.1", # EnhancedCTImageStorage + "1.2.840.10008.5.1.4.1.1.4.3", # EnhancedMRColorImageStorage + "1.2.840.10008.5.1.4.1.1.4.1", # EnhancedMRImageStorage + "1.2.840.10008.5.1.4.1.1.130", # EnhancedPETImageStorage + "1.2.840.10008.5.1.4.1.1.6.2", # EnhancedUSVolumeStorage + "1.2.840.10008.5.1.4.1.1.12.1.1", # EnhancedXAImageStorage + "1.2.840.10008.5.1.4.1.1.12.2.1", # EnhancedXRFImageStorage + "1.2.840.10008.5.1.4.1.1.14.1", # IntravascularOpticalCoherenceTomographyImageStorageForPresentation # noqa: E501 + "1.2.840.10008.5.1.4.1.1.14.2", # IntravascularOpticalCoherenceTomographyImageStorageForProcessing # noqa: E501 + "1.2.840.10008.5.1.4.1.1.2.2", # LegacyConvertedEnhancedCTImageStorage + "1.2.840.10008.5.1.4.1.1.4.4", # LegacyConvertedEnhancedMRImageStorage + "1.2.840.10008.5.1.4.1.1.128.1", # LegacyConvertedEnhancedPETImageStorage + "1.2.840.10008.5.1.4.1.1.4", # MRImageStorage + "1.2.840.10008.5.1.4.1.1.7.2", # MultiframeGrayscaleByteSecondaryCaptureImageStorage + "1.2.840.10008.5.1.4.1.1.7.3", # MultiframeGrayscaleWordSecondaryCaptureImageStorage + "1.2.840.10008.5.1.4.1.1.7.1", # MultiframeSingleBitSecondaryCaptureImageStorage + "1.2.840.10008.5.1.4.1.1.7.4", # MultiframeTrueColorSecondaryCaptureImageStorage + "1.2.840.10008.5.1.4.1.1.20", # NuclearMedicineImageStorage + "1.2.840.10008.5.1.4.1.1.77.1.5.2", # OphthalmicPhotography16BitImageStorage + "1.2.840.10008.5.1.4.1.1.77.1.5.1", # OphthalmicPhotography8BitImageStorage + "1.2.840.10008.5.1.4.1.1.81.1", # OphthalmicThicknessMapStorage + "1.2.840.10008.5.1.4.1.1.77.1.5.4", # OphthalmicTomographyImageStorage + "1.2.840.10008.5.1.4.1.1.128", # PositronEmissionTomographyImageStorage + "1.2.840.10008.5.1.4.1.1.481.1", # RTImageStorage + "1.2.840.10008.5.1.4.1.1.7", # SecondaryCaptureImageStorage + "1.2.840.10008.5.1.4.1.1.6.1", # UltrasoundImageStorage + "1.2.840.10008.5.1.4.1.1.3.1", # UltrasoundMultiframeImageStorage + "1.2.840.10008.5.1.4.1.1.77.1.1.1", # VideoEndoscopicImageStorage + "1.2.840.10008.5.1.4.1.1.77.1.2.1", # VideoMicroscopicImageStorage + "1.2.840.10008.5.1.4.1.1.77.1.4.1", # VideoPhotographicImageStorage + "1.2.840.10008.5.1.4.1.1.77.1.1", # VLEndoscopicImageStorage + "1.2.840.10008.5.1.4.1.1.77.1.2", # VLMicroscopicImageStorage + "1.2.840.10008.5.1.4.1.1.77.1.4", # VLPhotographicImageStorage + "1.2.840.10008.5.1.4.1.1.77.1.3", # VLSlideCoordinatesMicroscopicImageStorage + "1.2.840.10008.5.1.4.1.1.77.1.6", # VLWholeSlideMicroscopyImageStorage + "1.2.840.10008.5.1.4.1.1.13.1.1", # XRay3DAngiographicImageStorage + "1.2.840.10008.5.1.4.1.1.13.1.2", # XRay3DCraniofacialImageStorage + "1.2.840.10008.5.1.4.1.1.12.1", # XRayAngiographicImageStorage + "1.2.840.10008.5.1.4.1.1.12.2", # XRayRadiofluoroscopicImageStorage + ## retired but still in use + "1.2.840.10008.5.1.1.30", # HardcopyColorImageStorage + "1.2.840.10008.5.1.1.29", # HardcopyGrayscaleImageStorage + "1.2.840.10008.5.1.4.1.1.5", # NuclearMedicineImageStorageRetired + "1.2.840.10008.5.1.4.1.1.6", # UltrasoundImageStorageRetired + "1.2.840.10008.5.1.4.1.1.3", # UltrasoundMultiframeImageStorageRetired + "1.2.840.10008.5.1.4.1.1.77.1", # VLImageStorage + "1.2.840.10008.5.1.4.1.1.77.2", # VLMultiframeImageStorage + "1.2.840.10008.5.1.4.1.1.12.3", # XRayAngiographicBiPlaneImageStorage +] + +# Non-image SOP classes: structured reports, waveforms, presentation states, +# raw data, registration, RT non-image, etc. These do NOT contain pixel data +# and must NOT be offered with compressed transfer syntaxes. If the SCP +# negotiates a compressed TS for these, it will fail when it tries to encode +# the non-pixel payload as JPEG. +_non_image_storage = [ + "1.2.840.10008.5.1.4.1.1.9.1.3", # AmbulatoryECGWaveformStorage + "1.2.840.10008.5.1.4.1.1.9.5.1", # ArterialPulseWaveformStorage + "1.2.840.10008.5.1.4.1.1.78.2", # AutorefractionMeasurementsStorage + "1.2.840.10008.5.1.4.1.1.131", # BasicStructuredDisplayStorage + "1.2.840.10008.5.1.4.1.1.88.11", # BasicTextSRStorage + "1.2.840.10008.5.1.4.1.1.9.4.1", # BasicVoiceAudioWaveformStorage + "1.2.840.10008.5.1.4.1.1.11.4", # BlendingSoftcopyPresentationStateStorage + "1.2.840.10008.5.1.4.1.1.9.3.1", # CardiacElectrophysiologyWaveformStorage + "1.2.840.10008.5.1.4.1.1.88.65", # ChestCADSRStorage + "1.2.840.10008.5.1.4.1.1.88.69", # ColonCADSRStorage + "1.2.840.10008.5.1.4.1.1.11.2", # ColorSoftcopyPresentationStateStorage + "1.2.840.10008.5.1.4.1.1.88.34", # Comprehensive3DSRStorage + "1.2.840.10008.5.1.4.1.1.88.33", # ComprehensiveSRStorage + "1.2.840.10008.5.1.4.1.1.66.3", # DeformableSpatialRegistrationStorage + "1.2.840.10008.5.1.4.1.1.104.2", # EncapsulatedCDAStorage + "1.2.840.10008.5.1.4.1.1.104.1", # EncapsulatedPDFStorage + "1.2.840.10008.5.1.4.1.1.88.22", # EnhancedSRStorage + "1.2.840.10008.5.1.4.1.1.9.4.2", # GeneralAudioWaveformStorage + "1.2.840.10008.5.1.4.1.1.9.1.2", # GeneralECGWaveformStorage + "1.2.840.10008.5.1.4.1.1.11.1", # GrayscaleSoftcopyPresentationStateStorage + "1.2.840.10008.5.1.4.1.1.9.2.1", # HemodynamicWaveformStorage + "1.2.840.10008.5.1.4.1.1.88.70", # ImplantationPlanSRStorage + "1.2.840.10008.5.1.4.1.1.78.8", # IntraocularLensCalculationsStorage + "1.2.840.10008.5.1.4.1.1.78.3", # KeratometryMeasurementsStorage + "1.2.840.10008.5.1.4.1.1.88.59", # KeyObjectSelectionDocumentStorage + "1.2.840.10008.5.1.4.1.1.78.1", # LensometryMeasurementsStorage + "1.2.840.10008.5.1.4.1.1.79.1", # MacularGridThicknessAndVolumeReportStorage + "1.2.840.10008.5.1.4.1.1.88.50", # MammographyCADSRStorage + "1.2.840.10008.5.1.4.1.1.4.2", # MRSpectroscopyStorage + "1.2.840.10008.5.1.4.1.1.78.7", # OphthalmicAxialMeasurementsStorage + "1.2.840.10008.5.1.4.1.1.80.1", # OphthalmicVisualFieldStaticPerimetryMeasurementsStorage + "1.2.840.10008.5.1.4.1.1.88.40", # ProcedureLogStorage + "1.2.840.10008.5.1.4.1.1.11.3", # PseudoColorSoftcopyPresentationStateStorage + "1.2.840.10008.5.1.4.1.1.66", # RawDataStorage + "1.2.840.10008.5.1.4.1.1.67", # RealWorldValueMappingStorage + "1.2.840.10008.5.1.4.1.1.9.6.1", # RespiratoryWaveformStorage + "1.2.840.10008.5.1.4.34.7", # RTBeamsDeliveryInstructionStorage + "1.2.840.10008.5.1.4.1.1.481.4", # RTBeamsTreatmentRecordStorage + "1.2.840.10008.5.1.4.1.1.481.6", # RTBrachyTreatmentRecordStorage + "1.2.840.10008.5.1.4.1.1.481.2", # RTDoseStorage + "1.2.840.10008.5.1.4.1.1.481.9", # RTIonBeamsTreatmentRecordStorage + "1.2.840.10008.5.1.4.1.1.481.8", # RTIonPlanStorage + "1.2.840.10008.5.1.4.1.1.481.5", # RTPlanStorage + "1.2.840.10008.5.1.4.1.1.481.3", # RTStructureSetStorage + "1.2.840.10008.5.1.4.1.1.481.7", # RTTreatmentSummaryRecordStorage + "1.2.840.10008.5.1.4.1.1.66.4", # SegmentationStorage + "1.2.840.10008.5.1.4.1.1.66.2", # SpatialFiducialsStorage + "1.2.840.10008.5.1.4.1.1.66.1", # SpatialRegistrationStorage + "1.2.840.10008.5.1.4.1.1.78.6", # SpectaclePrescriptionReportStorage + "1.2.840.10008.5.1.4.1.1.77.1.5.3", # StereometricRelationshipStorage + "1.2.840.10008.5.1.4.1.1.78.4", # SubjectiveRefractionMeasurementsStorage + "1.2.840.10008.5.1.4.1.1.68.1", # SurfaceScanMeshStorage + "1.2.840.10008.5.1.4.1.1.68.2", # SurfaceScanPointCloudStorage + "1.2.840.10008.5.1.4.1.1.66.5", # SurfaceSegmentationStorage + "1.2.840.10008.5.1.4.1.1.9.1.1", # TwelveLeadECGWaveformStorage + "1.2.840.10008.5.1.4.1.1.78.5", # VisualAcuityMeasurementsStorage + "1.2.840.10008.5.1.4.1.1.11.5", # XAXRFGrayscaleSoftcopyPresentationStateStorage + "1.2.840.10008.5.1.4.1.1.88.67", # XRayRadiationDoseSRStorage ## retired but still in use - "1.2.840.10008.5.1.1.30", # HardcopyColorImageStorage - "1.2.840.10008.5.1.1.29", # HardcopyGrayscaleImageStorage - "1.2.840.10008.5.1.4.1.1.5", # NuclearMedicineImageStorageRetired - "1.2.840.10008.5.1.4.1.1.9", # StandaloneCurveStorage - "1.2.840.10008.5.1.4.1.1.10", # StandaloneModalityLUTStorage - "1.2.840.10008.5.1.4.1.1.8", # StandaloneOverlayStorage - "1.2.840.10008.5.1.4.1.1.129", # StandalonePETCurveStorage - "1.2.840.10008.5.1.4.1.1.11", # StandaloneVOILUTStorage - "1.2.840.10008.5.1.1.27", # StoredPrintStorage - "1.2.840.10008.5.1.4.1.1.6", # UltrasoundImageStorageRetired - "1.2.840.10008.5.1.4.1.1.3", # UltrasoundMultiframeImageStorageRetired - "1.2.840.10008.5.1.4.1.1.77.1", # VLImageStorage - "1.2.840.10008.5.1.4.1.1.77.2", # VLMultiframeImageStorage - "1.2.840.10008.5.1.4.1.1.12.3", # XRayAngiographicBiPlaneImageStorage + "1.2.840.10008.5.1.4.1.1.9", # StandaloneCurveStorage + "1.2.840.10008.5.1.4.1.1.10", # StandaloneModalityLUTStorage + "1.2.840.10008.5.1.4.1.1.8", # StandaloneOverlayStorage + "1.2.840.10008.5.1.4.1.1.129", # StandalonePETCurveStorage + "1.2.840.10008.5.1.4.1.1.11", # StandaloneVOILUTStorage + "1.2.840.10008.5.1.1.27", # StoredPrintStorage ] -assert len(_storage) <= 120 -StoragePresentationContexts = [build_context(uid, _transfer_syntaxes) for uid in sorted(_storage)] -"""Pre-built presentation contexts for :dcm:`Storage` containing 120 selected SOP Classes.""" # noqa: E501 +_all_transfer_syntaxes = _uncompressed_transfer_syntaxes + _compressed_transfer_syntaxes + +StoragePresentationContexts = ( + [build_context(uid, _all_transfer_syntaxes) for uid in sorted(_image_storage)] + + [build_context(uid, _uncompressed_transfer_syntaxes) for uid in sorted(_non_image_storage)] +) +"""Pre-built presentation contexts for Storage SOP Classes. + +Image SOP classes are offered with both compressed and uncompressed transfer +syntaxes. Non-image SOP classes (SR, waveforms, raw data, presentation +states, etc.) are offered with only uncompressed transfer syntaxes to prevent +the SCP from negotiating a compressed TS it cannot actually use for non-pixel +data. +""" + +assert len(StoragePresentationContexts) <= 120 diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 7a4fd927c..4facce0b0 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -10,7 +10,6 @@ from pathlib import Path from typing import cast -from django.conf import settings from django.utils import timezone from pydicom import Dataset @@ -218,7 +217,22 @@ def process(self): ) try: - if job.convert_to_nifti: + # Quick IMAGE-level C-FIND to check if instances are + # actually retrievable before attempting the expensive + # C-GET. Many PACS (especially IMPAX) report instances + # at SERIES level that are archived/unavailable. + has_instances = operator.series_has_instances( + patient_id=series.patient_id, + study_uid=series.study_instance_uid, + series_uid=series.series_instance_uid, + ) + + if not has_instances: + image_count = 0 + p_study_uid = "" + p_series_uid = "" + nifti_files = [] + elif job.convert_to_nifti: with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = Path(tmp_dir) image_count, p_study_uid, p_series_uid = self._export_series( @@ -249,7 +263,15 @@ def process(self): converted_file = "" if image_count == 0: status = MassTransferVolume.Status.SKIPPED - log_msg = "C-GET returned 0 images" + if not has_instances: + log_msg = "No instances available in PACS" + elif series.number_of_images == 0: + log_msg = "Non-image series (0 instances in PACS)" + else: + log_msg = ( + f"C-GET returned 0 images" + f" (PACS reports {series.number_of_images} instances)" + ) elif nifti_files: converted_file = "\n".join(str(f) for f in nifti_files) status = done_status @@ -461,11 +483,44 @@ def _find_studies( start: datetime, end: datetime, ) -> list[ResultDataset]: - max_results = settings.MASS_TRANSFER_MAX_SEARCH_RESULTS + max_results = operator.server.max_search_results + + # DICOM applies StudyTime independently per day, so a cross-midnight + # range like Date=20250227-20250228 Time=234500-000730 does NOT mean + # "from Feb 27 23:45 to Feb 28 00:07". When the window is within a + # single day we can use precise time filtering. For multi-day ranges + # we use full-day times and rely on date-based splitting. But when + # the window has narrowed to just two consecutive days (i.e. a + # cross-midnight split), we split at midnight so each half becomes a + # single-day query with proper time filtering. + if start.date() != end.date(): + days_apart = (end.date() - start.date()).days + if days_apart <= 1: + # Cross-midnight: split at midnight boundary + midnight = datetime.combine( + end.date(), datetime.min.time(), tzinfo=end.tzinfo + ) + left = self._find_studies( + operator, mf, start, midnight - timedelta(seconds=1) + ) + right = self._find_studies(operator, mf, midnight, end) + + seen: set[str] = {str(s.StudyInstanceUID) for s in left} + for study in right: + if str(study.StudyInstanceUID) not in seen: + left.append(study) + seen.add(str(study.StudyInstanceUID)) + + return left + + # Multi-day: full-day times, splitting will narrow by date + study_time = (datetime.min.time(), datetime.max.time().replace(microsecond=0)) + else: + study_time = (start.time(), end.time()) query = QueryDataset.create( StudyDate=(start.date(), end.date()), - StudyTime=(datetime.min.time(), datetime.max.time().replace(microsecond=0)), + StudyTime=study_time, ) if mf.modality: diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 7d617875a..5e901a39f 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -10,7 +10,7 @@ from adit.core.errors import DicomError, RetriableDicomError from adit.core.models import DicomNode from adit.core.factories import DicomFolderFactory, DicomServerFactory -from adit.core.utils.dicom_dataset import ResultDataset +from adit.core.utils.dicom_dataset import QueryDataset, ResultDataset from adit.core.utils.dicom_operator import DicomOperator from adit.mass_transfer.models import ( MassTransferFilter, @@ -71,10 +71,7 @@ def _make_discovered( # --------------------------------------------------------------------------- -def _make_processor(mocker: MockerFixture, settings) -> MassTransferTaskProcessor: - settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = getattr( - settings, "MASS_TRANSFER_MAX_SEARCH_RESULTS", 200 - ) +def _make_processor(mocker: MockerFixture) -> MassTransferTaskProcessor: mock_task = mocker.MagicMock(spec=MassTransferTask) mock_task._meta = MassTransferTask._meta mocker.patch.object(MassTransferTaskProcessor, "__init__", return_value=None) @@ -96,12 +93,11 @@ def _make_filter(mocker: MockerFixture, **kwargs) -> MassTransferFilter: @pytest.mark.django_db -def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture, settings): - settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 1 +def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture): MassTransferSettings.objects.create() user = UserFactory.create() - source = DicomServerFactory.create() + source = DicomServerFactory.create(max_search_results=1) destination = DicomFolderFactory.create() job = MassTransferJob.objects.create( owner=user, @@ -126,16 +122,15 @@ def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture, s processor = MassTransferTaskProcessor(task) operator = mocker.create_autospec(DicomOperator) + operator.server = source operator.find_studies.return_value = [object(), object()] with pytest.raises(DicomError, match="Time window too small"): processor._find_studies(operator, mf, start, end) -def test_find_studies_returns_all_when_under_limit(mocker: MockerFixture, settings): - settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 10 - - processor = _make_processor(mocker, settings) +def test_find_studies_returns_all_when_under_limit(mocker: MockerFixture): + processor = _make_processor(mocker) mf = _make_filter(mocker, modality="CT") start = datetime(2024, 1, 1, 0, 0, 0) @@ -144,6 +139,7 @@ def test_find_studies_returns_all_when_under_limit(mocker: MockerFixture, settin studies = [_make_study("1.2.3"), _make_study("1.2.4"), _make_study("1.2.5")] operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=10) operator.find_studies.return_value = studies result = processor._find_studies(operator, mf, start, end) @@ -152,14 +148,13 @@ def test_find_studies_returns_all_when_under_limit(mocker: MockerFixture, settin assert operator.find_studies.call_count == 1 -def test_find_studies_splits_and_deduplicates(mocker: MockerFixture, settings): - settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 2 - - processor = _make_processor(mocker, settings) +def test_find_studies_splits_and_deduplicates(mocker: MockerFixture): + processor = _make_processor(mocker) mf = _make_filter(mocker, modality="CT") + # Use a single-day range to test the time-based midpoint split start = datetime(2024, 1, 1, 0, 0, 0) - end = datetime(2024, 1, 2, 23, 59, 59) + end = datetime(2024, 1, 1, 23, 59, 59) study_a = _make_study("1.2.100") study_b = _make_study("1.2.200") @@ -167,6 +162,7 @@ def test_find_studies_splits_and_deduplicates(mocker: MockerFixture, settings): study_a_dup = _make_study("1.2.100") operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=2) operator.find_studies.side_effect = [ [study_a, study_b, study_c], [study_a, study_b], @@ -182,14 +178,13 @@ def test_find_studies_splits_and_deduplicates(mocker: MockerFixture, settings): assert "1.2.300" in result_uids -def test_find_studies_split_boundaries_dont_overlap(mocker: MockerFixture, settings): - settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 1 - - processor = _make_processor(mocker, settings) +def test_find_studies_split_boundaries_dont_overlap(mocker: MockerFixture): + processor = _make_processor(mocker) mf = _make_filter(mocker, modality="") + # Use a single-day range so we test the time-based midpoint split start = datetime(2024, 1, 1, 0, 0, 0) - end = datetime(2024, 1, 3, 23, 59, 59) + end = datetime(2024, 1, 1, 23, 59, 59) call_ranges: list[tuple[datetime, datetime]] = [] original_find_studies = MassTransferTaskProcessor._find_studies @@ -199,6 +194,7 @@ def tracking_find_studies(self_inner, operator, mf, s, e): return original_find_studies(self_inner, operator, mf, s, e) operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=1) operator.find_studies.side_effect = [ [_make_study("1"), _make_study("2")], [_make_study("1")], @@ -224,16 +220,82 @@ def tracking_find_studies(self_inner, operator, mf, s, e): assert right_start > left_end -def test_find_studies_preserves_order_with_unique_studies(mocker: MockerFixture, settings): - settings.MASS_TRANSFER_MAX_SEARCH_RESULTS = 2 +def test_find_studies_same_day_split_narrows_study_time(mocker: MockerFixture): + """When splitting within a single day, StudyTime must narrow to avoid infinite recursion.""" + processor = _make_processor(mocker) + mf = _make_filter(mocker, modality="CT") + + start = datetime(2024, 1, 1, 8, 0, 0) + end = datetime(2024, 1, 1, 20, 0, 0) + + operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=1) + # First call returns too many results (triggers split), sub-calls return under limit + operator.find_studies.side_effect = [ + [_make_study("1"), _make_study("2")], + [_make_study("1")], + [_make_study("2")], + ] + + processor._find_studies(operator, mf, start, end) + + # 3 calls: initial + left half + right half + assert operator.find_studies.call_count == 3 + + queries = [call.args[0] for call in operator.find_studies.call_args_list] + initial_time = queries[0].dataset.StudyTime + left_time = queries[1].dataset.StudyTime + right_time = queries[2].dataset.StudyTime + + # Initial query should use the actual start/end times + assert "080000" in initial_time + assert "200000" in initial_time + + # Sub-queries should have narrower time ranges than the initial query + assert left_time != initial_time + assert right_time != initial_time + + +def test_find_studies_cross_midnight_splits_at_midnight(mocker: MockerFixture): + """A cross-midnight window must split at midnight, not at the midpoint.""" + processor = _make_processor(mocker) + mf = _make_filter(mocker, modality="CT") + + # Window spans midnight: Jan 1 23:45 to Jan 2 00:15 + start = datetime(2024, 1, 1, 23, 45, 0) + end = datetime(2024, 1, 2, 0, 15, 0) - processor = _make_processor(mocker, settings) + operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=200) + # Two sub-queries: before midnight and after midnight + operator.find_studies.side_effect = [ + [_make_study("1")], + [_make_study("2")], + ] + + result = processor._find_studies(operator, mf, start, end) + + assert len(result) == 2 + assert operator.find_studies.call_count == 2 + + # Verify the queries use single-day ranges with proper times + q1 = operator.find_studies.call_args_list[0].args[0] + q2 = operator.find_studies.call_args_list[1].args[0] + assert "234500" in q1.dataset.StudyTime + assert "235959" in q1.dataset.StudyTime + assert "000000" in q2.dataset.StudyTime + assert "001500" in q2.dataset.StudyTime + + +def test_find_studies_preserves_order_with_unique_studies(mocker: MockerFixture): + processor = _make_processor(mocker) mf = _make_filter(mocker, modality="") start = datetime(2024, 1, 1, 0, 0, 0) - end = datetime(2024, 1, 3, 23, 59, 59) + end = datetime(2024, 1, 1, 23, 59, 59) operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=2) operator.find_studies.side_effect = [ [_make_study("1.2.1"), _make_study("1.2.2"), _make_study("1.2.3")], [_make_study("1.2.1"), _make_study("1.2.2")], @@ -253,13 +315,12 @@ def test_find_studies_preserves_order_with_unique_studies(mocker: MockerFixture, def _make_process_env( mocker: MockerFixture, - settings, tmp_path: Path, *, convert_to_nifti: bool = False, anonymization_mode: str = "pseudonymize", ) -> MassTransferTaskProcessor: - processor = _make_processor(mocker, settings) + processor = _make_processor(mocker) mock_job = processor.mass_task.job mock_job.anonymization_mode = anonymization_mode @@ -294,9 +355,9 @@ def _make_process_env( def test_process_reraises_retriable_dicom_error( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) series = [_make_discovered(series_uid="s-1")] mocker.patch.object(processor, "_discover_series", return_value=series) @@ -311,9 +372,9 @@ def test_process_reraises_retriable_dicom_error( def test_process_returns_warning_on_partial_failure( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) series = [ _make_discovered(series_uid="s-1"), _make_discovered(series_uid="s-2"), @@ -340,9 +401,9 @@ def fake_export(*args, **kwargs): def test_process_returns_failure_when_all_fail( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) series = [ _make_discovered(series_uid="s-1"), _make_discovered(series_uid="s-2"), @@ -361,9 +422,9 @@ def test_process_returns_failure_when_all_fail( def test_process_returns_warning_when_suspended( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) mocker.patch.object(processor, "is_suspended", return_value=True) result = processor.process() @@ -373,9 +434,9 @@ def test_process_returns_warning_when_suspended( def test_process_raises_when_source_not_server( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) processor.mass_task.job.source.node_type = DicomNode.NodeType.FOLDER with pytest.raises(DicomError, match="source must be a DICOM server"): @@ -383,9 +444,9 @@ def test_process_raises_when_source_not_server( def test_process_raises_when_destination_not_folder( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) processor.mass_task.job.destination.node_type = DicomNode.NodeType.SERVER with pytest.raises(DicomError, match="destination must be a DICOM folder"): @@ -393,9 +454,9 @@ def test_process_raises_when_destination_not_folder( def test_process_returns_failure_when_no_filters( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) processor.mass_task.job.filters.all.return_value = [] result = processor.process() @@ -405,9 +466,9 @@ def test_process_returns_failure_when_no_filters( def test_process_returns_success_for_empty_partition( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) mocker.patch.object(processor, "_discover_series", return_value=[]) result = processor.process() @@ -417,10 +478,10 @@ def test_process_returns_success_for_empty_partition( def test_process_skips_already_done_series( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): """Already-processed series (from prior runs) are skipped.""" - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) series = [ _make_discovered(series_uid="s-done"), _make_discovered(series_uid="s-new"), @@ -454,11 +515,11 @@ def fake_export(*args, **kwargs): def test_process_none_mode_uses_patient_id_as_subject( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): """In 'none' anonymization mode, no pseudonymizer is used.""" processor = _make_process_env( - mocker, settings, tmp_path, anonymization_mode="none" + mocker, tmp_path, anonymization_mode="none" ) series = [_make_discovered(patient_id="REAL-PAT-1", series_uid="s-1")] @@ -483,10 +544,10 @@ def fake_export(op, s, path, subject_id, pseudonymizer): def test_process_pseudonymize_mode_same_study_same_pseudonym( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): """In non-linking mode, series in the same study share a pseudonym.""" - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) series = [ _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-2"), @@ -512,10 +573,10 @@ def fake_export(op, s, path, subject_id, pseudonymizer): def test_process_pseudonymize_mode_different_studies_different_pseudonyms( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): """In non-linking mode, different studies for the same patient get different pseudonyms.""" - processor = _make_process_env(mocker, settings, tmp_path) + processor = _make_process_env(mocker, tmp_path) series = [ _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), _make_discovered(patient_id="PAT1", study_uid="study-B", series_uid="s-2"), @@ -541,11 +602,11 @@ def fake_export(op, s, path, subject_id, pseudonymizer): def test_process_linking_mode_uses_deterministic_pseudonym( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): """In linking mode, pseudonyms are deterministic (seeded).""" processor = _make_process_env( - mocker, settings, tmp_path, anonymization_mode="pseudonymize_with_linking" + mocker, tmp_path, anonymization_mode="pseudonymize_with_linking" ) series = [ _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), @@ -578,9 +639,9 @@ def fake_export(op, s, path, subject_id, pseudonymizer): def test_convert_series_raises_on_dcm2niix_failure( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_processor(mocker, settings) + processor = _make_processor(mocker) series = _make_discovered(series_uid="1.2.3") dicom_dir = tmp_path / "dicom_input" @@ -600,9 +661,9 @@ def test_convert_series_raises_on_dcm2niix_failure( def test_convert_series_raises_when_no_nifti_output( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_processor(mocker, settings) + processor = _make_processor(mocker) series = _make_discovered(series_uid="1.2.3") dicom_dir = tmp_path / "dicom_input" @@ -622,9 +683,9 @@ def test_convert_series_raises_when_no_nifti_output( def test_convert_series_skips_non_image_dicom( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): - processor = _make_processor(mocker, settings) + processor = _make_processor(mocker) series = _make_discovered(series_uid="1.2.3") dicom_dir = tmp_path / "dicom_input" @@ -738,7 +799,7 @@ def test_dicom_match_wildcard(): @pytest.mark.django_db def test_process_creates_volume_records_on_success( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): """Deferred insertion: volumes are created in DB after successful export.""" MassTransferSettings.objects.create() @@ -785,7 +846,7 @@ def test_process_creates_volume_records_on_success( @pytest.mark.django_db def test_process_creates_error_volume_on_failure( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): """Failed exports still create a volume record with ERROR status.""" MassTransferSettings.objects.create() @@ -831,7 +892,7 @@ def test_process_creates_error_volume_on_failure( @pytest.mark.django_db def test_process_deletes_error_volumes_on_retry( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): """On retry, ERROR volumes from prior runs are deleted so they can be reprocessed.""" MassTransferSettings.objects.create() @@ -893,7 +954,7 @@ def test_process_deletes_error_volumes_on_retry( @pytest.mark.django_db def test_process_deterministic_pseudonyms_across_partitions( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): """Same patient gets the same pseudonym across different partitions (linking mode).""" MassTransferSettings.objects.create() @@ -958,7 +1019,7 @@ def test_process_deterministic_pseudonyms_across_partitions( @pytest.mark.django_db def test_process_pseudonymize_mode_not_linked_across_partitions( - mocker: MockerFixture, settings, tmp_path: Path + mocker: MockerFixture, tmp_path: Path ): """Non-linking pseudonymize mode: same patient gets different pseudonyms across partitions.""" MassTransferSettings.objects.create() diff --git a/adit/settings/base.py b/adit/settings/base.py index aafbae23d..f278e15d7 100644 --- a/adit/settings/base.py +++ b/adit/settings/base.py @@ -383,8 +383,6 @@ # The maximum number of results (patients or studies) in dicom_explorer DICOM_EXPLORER_RESULT_LIMIT = 101 -# Maximum number of C-FIND results for mass transfer before splitting time windows -MASS_TRANSFER_MAX_SEARCH_RESULTS = env.int("MASS_TRANSFER_MAX_SEARCH_RESULTS", default=200) # The timeout in dicom_explorer a DICOM server must respond DICOM_EXPLORER_RESPONSE_TIMEOUT = 3 # seconds diff --git a/docker-compose.base.yml b/docker-compose.base.yml index 3ca80114b..c8415b0c3 100644 --- a/docker-compose.base.yml +++ b/docker-compose.base.yml @@ -20,7 +20,6 @@ x-app: &default-app DJANGO_SERVER_EMAIL: ${DJANGO_SERVER_EMAIL:?} EXCLUDE_MODALITIES: ${EXCLUDE_MODALITIES:-} MASS_TRANSFER_EXPORT_BASE_DIR: ${MASS_TRANSFER_EXPORT_BASE_DIR:-/mnt/mass_transfer_exports} - MASS_TRANSFER_MAX_SEARCH_RESULTS: ${MASS_TRANSFER_MAX_SEARCH_RESULTS:-200} IS_DOCKER_CONTAINER: 1 FILE_TRANSMIT_HOST: receiver.local FILE_TRANSMIT_PORT: 14638 diff --git a/example.env b/example.env index c7a9cf6d5..6db34db97 100644 --- a/example.env +++ b/example.env @@ -90,8 +90,6 @@ RECEIVER_AE_TITLE="ADIT1DEV" EXCLUDE_MODALITIES="PR,SR" # Mass transfer settings -# Maximum number of C-FIND results before a time window is split -MASS_TRANSFER_MAX_SEARCH_RESULTS=200 # Base directory for temporary DICOM exports during mass transfer MASS_TRANSFER_EXPORT_BASE_DIR="/mnt/mass_transfer_exports" From 1dc05e1b46f955f13a77825f583df49581565aaf Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 9 Mar 2026 21:25:45 +0100 Subject: [PATCH 032/103] Pace C-GET requests and retry 0-image responses from IMPAX IMPAX returns "Success with 0 sub-operations" when overwhelmed by rapid-fire C-GET requests. Unlike selective transfer where each study is a separate procrastinate task (with natural inter-task delay), mass transfer processes hundreds of series in a tight loop with zero spacing. - Add 0.5s delay between C-GET requests to pace PACS load - Retry 0-image responses with exponential backoff (5-80s) + jitter - Mark 0-image failures as ERROR (not SKIPPED) so they are retried on subsequent task runs instead of being permanently lost - Remove series_has_instances preflight C-FIND (extra PACS load) - Log "silent empty" C-GET responses in dimse_connector - Persistent mode: check self.assoc before closing in func_wrapper --- adit/core/utils/dicom_operator.py | 16 ----- adit/core/utils/dimse_connector.py | 23 ++++++- adit/mass_transfer/processors.py | 97 +++++++++++++++++++++--------- 3 files changed, 89 insertions(+), 47 deletions(-) diff --git a/adit/core/utils/dicom_operator.py b/adit/core/utils/dicom_operator.py index 0006c3309..6bb6d0249 100644 --- a/adit/core/utils/dicom_operator.py +++ b/adit/core/utils/dicom_operator.py @@ -392,22 +392,6 @@ def fetch_study( logger.debug("Successfully downloaded study %s.", study_uid) - def series_has_instances( - self, - patient_id: str, - study_uid: str, - series_uid: str, - ) -> bool: - """Quick IMAGE-level C-FIND to check if a series has any retrievable instances.""" - query = QueryDataset.create( - PatientID=patient_id, - StudyInstanceUID=study_uid, - SeriesInstanceUID=series_uid, - ) - for _ in self.find_images(query, limit_results=1): - return True - return False - def fetch_series( self, patient_id: str, diff --git a/adit/core/utils/dimse_connector.py b/adit/core/utils/dimse_connector.py index f4a754acf..ba703e0a8 100644 --- a/adit/core/utils/dimse_connector.py +++ b/adit/core/utils/dimse_connector.py @@ -106,7 +106,11 @@ def func_wrapper(self: "DimseConnector", *args, **kwargs): self.abort_connection() raise err - if opened_connection and self.auto_connect and not self.persistent: + # Close the connection unless persistent mode is active. + # In persistent mode, the association is reused across calls of the + # same service type (e.g. multiple C-GETs), which avoids overwhelming + # the PACS with rapid association open/close cycles. + if opened_connection and self.auto_connect and not self.persistent and self.assoc: self.close_connection() opened_connection = False @@ -485,6 +489,23 @@ def _handle_get_and_move_responses( ) logger.warn(message) + # Log "silent empty" responses: PACS returns Success with + # 0 completed, 0 failed, 0 warning. This happens on IMPAX + # when it is overwhelmed by rapid-fire association requests. + # We don't raise here — the caller handles retry to avoid + # aborting the entire task. + if ( + status_category == STATUS_SUCCESS + and completed_suboperations == 0 + and not failed_suboperations + and not warning_suboperations + ): + logger.warning( + "%s returned success with 0 sub-operations — " + "PACS may be busy.", + op, + ) + if status_category == STATUS_FAILURE: if identifier: failed_image_uids = identifier.get("FailedSOPInstanceUIDList", []) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 4facce0b0..7413b9d31 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -2,9 +2,11 @@ import hashlib import logging +import random import secrets import subprocess import tempfile +import time from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path @@ -150,10 +152,11 @@ def process(self): try: discovered = self._discover_series(operator, filters) - # Filter out series already processed in a previous run + # Filter out series already processed in a previous run (same partition) done_uids = set( MassTransferVolume.objects.filter( job=job, + partition_key=self.mass_task.partition_key, status__in=[ MassTransferVolume.Status.EXPORTED, MassTransferVolume.Status.CONVERTED, @@ -163,11 +166,13 @@ def process(self): ) # Delete ERROR volumes so they can be retried cleanly MassTransferVolume.objects.filter( - job=job, status=MassTransferVolume.Status.ERROR + job=job, + partition_key=self.mass_task.partition_key, + status=MassTransferVolume.Status.ERROR, ).delete() pending = [s for s in discovered if s.series_instance_uid not in done_uids] - total_skipped_prior = len(done_uids) + total_skipped_prior = len(discovered) - len(pending) output_base = _destination_base_dir(destination_node) done_status = ( @@ -217,22 +222,15 @@ def process(self): ) try: - # Quick IMAGE-level C-FIND to check if instances are - # actually retrievable before attempting the expensive - # C-GET. Many PACS (especially IMPAX) report instances - # at SERIES level that are archived/unavailable. - has_instances = operator.series_has_instances( - patient_id=series.patient_id, - study_uid=series.study_instance_uid, - series_uid=series.series_instance_uid, - ) - - if not has_instances: - image_count = 0 - p_study_uid = "" - p_series_uid = "" - nifti_files = [] - elif job.convert_to_nifti: + # Small delay between C-GET requests to avoid overwhelming + # the PACS. In selective transfer each study is a separate + # procrastinate task with natural inter-task overhead (seconds). + # Here we process hundreds of series in a tight loop, so we + # add explicit pacing. + if total_processed + total_failed + total_skipped > 0: + time.sleep(0.5) + + if job.convert_to_nifti: with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = Path(tmp_dir) image_count, p_study_uid, p_series_uid = self._export_series( @@ -262,12 +260,14 @@ def process(self): converted_file = "" if image_count == 0: - status = MassTransferVolume.Status.SKIPPED - if not has_instances: - log_msg = "No instances available in PACS" - elif series.number_of_images == 0: + if series.number_of_images == 0: + status = MassTransferVolume.Status.SKIPPED log_msg = "Non-image series (0 instances in PACS)" else: + # PACS reports instances but C-GET returned 0. + # Mark as ERROR so it's retried on the next run + # (ERROR volumes are deleted before processing). + status = MassTransferVolume.Status.ERROR log_msg = ( f"C-GET returned 0 images" f" (PACS reports {series.number_of_images} instances)" @@ -306,7 +306,11 @@ def process(self): log=log_msg, ) - if status == MassTransferVolume.Status.SKIPPED: + if status == MassTransferVolume.Status.ERROR: + total_failed += 1 + reason = "C-GET returned 0 images" + failed_reasons[reason] = failed_reasons.get(reason, 0) + 1 + elif status == MassTransferVolume.Status.SKIPPED: total_skipped += 1 else: total_processed += 1 @@ -601,12 +605,45 @@ def callback(ds: Dataset | None) -> None: write_dataset(ds, output_path / file_name) image_count += 1 - operator.fetch_series( - patient_id=series.patient_id, - study_uid=series.study_instance_uid, - series_uid=series.series_instance_uid, - callback=callback, - ) + # IMPAX returns "Success with 0 sub-operations" when overwhelmed by + # concurrent C-GET associations. Retry with exponential backoff + jitter + # to give the PACS time to recover and desynchronize from other workers. + # Only retry when PACS reports instances — a genuine 0-instance series + # is skipped immediately. + max_retries = 5 + for attempt in range(max_retries + 1): + operator.fetch_series( + patient_id=series.patient_id, + study_uid=series.study_instance_uid, + series_uid=series.series_instance_uid, + callback=callback, + ) + if image_count > 0 or series.number_of_images == 0: + break + if attempt < max_retries: + # Exponential backoff: 5, 10, 20, 40, 80 base seconds + # with ±25% jitter to avoid thundering herd + base_delay = 5 * (2 ** attempt) + jitter = base_delay * 0.25 * (2 * random.random() - 1) + delay = base_delay + jitter + logger.warning( + "C-GET returned 0 images for %s (PACS reports %d) — " + "retrying in %.0fs (attempt %d/%d)", + series.series_instance_uid, + series.number_of_images, + delay, + attempt + 1, + max_retries, + ) + time.sleep(delay) + + if image_count == 0 and series.number_of_images > 0: + logger.error( + "C-GET returned 0 images for %s after %d attempts (PACS reports %d)", + series.series_instance_uid, + max_retries + 1, + series.number_of_images, + ) if image_count == 0: try: From 402b9044784cc8cd3a03e6e87684367407622619 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Tue, 10 Mar 2026 12:21:40 +0100 Subject: [PATCH 033/103] Reduce C-GET retry from 5 exponential to 1 quick retry Raw pynetdicom testing confirmed that IMPAX permanently refuses to serve certain series via C-GET (archived/offline storage), returning "Success with 0 sub-operations" regardless of load, timing, or transfer syntax. Five retries with exponential backoff (up to 80s) wasted ~40 minutes per affected study. One retry after 3-5s is enough to distinguish transient (PACS busy) from permanent (series unretrievable). Failed series are marked ERROR and retried on the next task run. --- adit/mass_transfer/processors.py | 51 +++++++++++++++----------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 7413b9d31..5377bb0d1 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -605,43 +605,40 @@ def callback(ds: Dataset | None) -> None: write_dataset(ds, output_path / file_name) image_count += 1 - # IMPAX returns "Success with 0 sub-operations" when overwhelmed by - # concurrent C-GET associations. Retry with exponential backoff + jitter - # to give the PACS time to recover and desynchronize from other workers. - # Only retry when PACS reports instances — a genuine 0-instance series - # is skipped immediately. - max_retries = 5 - for attempt in range(max_retries + 1): + # IMPAX returns "Success with 0 sub-operations" for two reasons: + # 1. Transient: PACS is overwhelmed by rapid requests (fixed by pacing) + # 2. Permanent: series is archived/offline and can't be served via C-GET + # One retry after a short delay distinguishes the two cases. If the + # second attempt also fails, the series is unretrievable — move on and + # let the ERROR status trigger a retry on the next task run. + operator.fetch_series( + patient_id=series.patient_id, + study_uid=series.study_instance_uid, + series_uid=series.series_instance_uid, + callback=callback, + ) + if image_count == 0 and series.number_of_images > 0: + delay = 3 + random.random() * 2 + logger.warning( + "C-GET returned 0 images for %s (PACS reports %d) — " + "retrying in %.0fs", + series.series_instance_uid, + series.number_of_images, + delay, + ) + time.sleep(delay) operator.fetch_series( patient_id=series.patient_id, study_uid=series.study_instance_uid, series_uid=series.series_instance_uid, callback=callback, ) - if image_count > 0 or series.number_of_images == 0: - break - if attempt < max_retries: - # Exponential backoff: 5, 10, 20, 40, 80 base seconds - # with ±25% jitter to avoid thundering herd - base_delay = 5 * (2 ** attempt) - jitter = base_delay * 0.25 * (2 * random.random() - 1) - delay = base_delay + jitter - logger.warning( - "C-GET returned 0 images for %s (PACS reports %d) — " - "retrying in %.0fs (attempt %d/%d)", - series.series_instance_uid, - series.number_of_images, - delay, - attempt + 1, - max_retries, - ) - time.sleep(delay) if image_count == 0 and series.number_of_images > 0: logger.error( - "C-GET returned 0 images for %s after %d attempts (PACS reports %d)", + "C-GET returned 0 images for %s (PACS reports %d) — " + "series may be archived/offline", series.series_instance_uid, - max_retries + 1, series.number_of_images, ) From 7362344b5a9e6320ac6297c490f9523a9823f2c6 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Thu, 12 Mar 2026 17:31:30 +0100 Subject: [PATCH 034/103] Fix stale C-FIND responses on persistent DIMSE connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a C-FIND generator was abandoned mid-iteration (e.g. early return in _study_has_institution), the persistent association kept unconsumed responses. The next C-FIND on the same association mixed stale results from the previous query, causing ADIT to discover phantom series with wrong UIDs and institution names — leading to ~94% C-GET failure rate. Abort the association when a generator is not fully consumed in persistent mode so the next request gets a clean connection. Also update the task summary message to show downloaded/failed/skipped breakdown. --- adit/core/utils/dimse_connector.py | 13 +++++++++---- adit/mass_transfer/processors.py | 10 +++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/adit/core/utils/dimse_connector.py b/adit/core/utils/dimse_connector.py index ba703e0a8..39e23148a 100644 --- a/adit/core/utils/dimse_connector.py +++ b/adit/core/utils/dimse_connector.py @@ -74,18 +74,23 @@ def gen_wrapper(self: "DimseConnector", *args, **kwargs): self.open_connection(service) opened_connection = True + completed = False try: yield from func(self, *args, **kwargs) + completed = True except Exception as err: self.abort_connection() raise err finally: - # When a generator is abandoned mid-iteration (e.g. early return from - # a for loop), Python throws GeneratorExit — a BaseException, not an - # Exception. Without finally, close_connection() was skipped and the - # DIMSE association leaked on the PACS side. if opened_connection and self.auto_connect and not self.persistent and self.assoc: self.close_connection() + elif self.persistent and not completed and self.assoc: + # Generator was abandoned mid-iteration (e.g. early return + # from a for loop). The PACS may still be sending pending + # responses on this association. Reusing it would mix stale + # responses into the next query. Abort so the next request + # opens a clean association. + self.abort_connection() @wraps(func) def func_wrapper(self: "DimseConnector", *args, **kwargs): diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 5377bb0d1..d0f6a77aa 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -374,17 +374,17 @@ def process(self): status = MassTransferTask.Status.FAILURE message = f"All {total_failed} series failed during mass transfer." else: - parts = [] - if total_skipped: - parts.append(f"{total_skipped} skipped") + total_series = total_processed + total_failed + total_skipped + parts = [f"{total_processed} downloaded"] if total_failed: parts.append(f"{total_failed} failed") - suffix = f" ({', '.join(parts)})" if parts else "" + if total_skipped: + parts.append(f"{total_skipped} skipped") status = MassTransferTask.Status.WARNING if total_failed else MassTransferTask.Status.SUCCESS message = ( f"{len(study_uids)} studies, " - f"{total_processed} series processed{suffix}." + f"{total_series} series ({', '.join(parts)})." ) return { From fd5c99b0a881207dd383e834d99f92bfc766c974 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 16 Mar 2026 11:18:01 +0000 Subject: [PATCH 035/103] Add job-identifying parent folder to mass transfer output path The output path was missing the parent folder (adit_{app}_{pk}_{date}_{owner}) that other transfer types use via _create_destination_name, making it hard to associate output directories with specific jobs. Co-Authored-By: Claude Opus 4.6 --- adit/mass_transfer/processors.py | 87 +++++++++++++++++--------------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index d0f6a77aa..4604197f9 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -27,7 +27,6 @@ from .models import ( MassTransferFilter, - MassTransferJob, MassTransferSettings, MassTransferTask, MassTransferVolume, @@ -91,18 +90,19 @@ def _study_folder_name(study_description: str, study_dt: datetime, study_uid: st return f"{desc}_{dt_str}_{short_hash}" -def _series_folder_name( - series_description: str, series_number: int | None, series_uid: str -) -> str: +def _series_folder_name(series_description: str, series_number: int | None, series_uid: str) -> str: if series_number is None: return sanitize_filename(series_uid) desc = sanitize_filename(series_description or "Undefined") return f"{desc}_{series_number}" -def _destination_base_dir(node: DicomNode) -> Path: +def _destination_base_dir(node: DicomNode, job) -> Path: assert node.node_type == DicomNode.NodeType.FOLDER - path = Path(node.dicomfolder.path) + name = sanitize_filename( + f"adit_{job._meta.app_label}_{job.pk}_{job.created.strftime('%Y%m%d')}_{job.owner.username}" + ) + path = Path(node.dicomfolder.path) / name path.mkdir(parents=True, exist_ok=True) return path @@ -174,7 +174,7 @@ def process(self): pending = [s for s in discovered if s.series_instance_uid not in done_uids] total_skipped_prior = len(discovered) - len(pending) - output_base = _destination_base_dir(destination_node) + output_base = _destination_base_dir(destination_node, job) done_status = ( MassTransferVolume.Status.CONVERTED if job.convert_to_nifti @@ -234,27 +234,41 @@ def process(self): with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = Path(tmp_dir) image_count, p_study_uid, p_series_uid = self._export_series( - operator, series, tmp_path, - subject_id, pseudonymizer, + operator, + series, + tmp_path, + subject_id, + pseudonymizer, ) if image_count == 0: nifti_files = [] else: output_path = ( - output_base / self.mass_task.partition_key - / subject_id / study_folder / series_folder + output_base + / self.mass_task.partition_key + / subject_id + / study_folder + / series_folder ) nifti_files = self._convert_series( - series, tmp_path, output_path, + series, + tmp_path, + output_path, ) else: output_path = ( - output_base / self.mass_task.partition_key - / subject_id / study_folder / series_folder + output_base + / self.mass_task.partition_key + / subject_id + / study_folder + / series_folder ) image_count, p_study_uid, p_series_uid = self._export_series( - operator, series, output_path, - subject_id, pseudonymizer, + operator, + series, + output_path, + subject_id, + pseudonymizer, ) nifti_files = [] @@ -381,11 +395,10 @@ def process(self): if total_skipped: parts.append(f"{total_skipped} skipped") - status = MassTransferTask.Status.WARNING if total_failed else MassTransferTask.Status.SUCCESS - message = ( - f"{len(study_uids)} studies, " - f"{total_series} series ({', '.join(parts)})." + status = ( + MassTransferTask.Status.WARNING if total_failed else MassTransferTask.Status.SUCCESS ) + message = f"{len(study_uids)} studies, {total_series} series ({', '.join(parts)})." return { "status": status, @@ -475,7 +488,8 @@ def _discover_series( institution_name=str(series.get("InstitutionName", "")), number_of_images=_parse_int( series.get("NumberOfSeriesRelatedInstances"), default=0 - ) or 0, + ) + or 0, ) return list(found.values()) @@ -501,12 +515,8 @@ def _find_studies( days_apart = (end.date() - start.date()).days if days_apart <= 1: # Cross-midnight: split at midnight boundary - midnight = datetime.combine( - end.date(), datetime.min.time(), tzinfo=end.tzinfo - ) - left = self._find_studies( - operator, mf, start, midnight - timedelta(seconds=1) - ) + midnight = datetime.combine(end.date(), datetime.min.time(), tzinfo=end.tzinfo) + left = self._find_studies(operator, mf, start, midnight - timedelta(seconds=1)) right = self._find_studies(operator, mf, midnight, end) seen: set[str] = {str(s.StudyInstanceUID) for s in left} @@ -536,9 +546,7 @@ def _find_studies( if len(studies) > max_results: if end - start < _MIN_SPLIT_WINDOW: - raise DicomError( - f"Time window too small ({start} to {end}) for filter {mf}." - ) + raise DicomError(f"Time window too small ({start} to {end}) for filter {mf}.") mid = start + (end - start) / 2 left = self._find_studies(operator, mf, start, mid) @@ -620,8 +628,7 @@ def callback(ds: Dataset | None) -> None: if image_count == 0 and series.number_of_images > 0: delay = 3 + random.random() * 2 logger.warning( - "C-GET returned 0 images for %s (PACS reports %d) — " - "retrying in %.0fs", + "C-GET returned 0 images for %s (PACS reports %d) — retrying in %.0fs", series.series_instance_uid, series.number_of_images, delay, @@ -636,8 +643,7 @@ def callback(ds: Dataset | None) -> None: if image_count == 0 and series.number_of_images > 0: logger.error( - "C-GET returned 0 images for %s (PACS reports %d) — " - "series may be archived/offline", + "C-GET returned 0 images for %s (PACS reports %d) — series may be archived/offline", series.series_instance_uid, series.number_of_images, ) @@ -668,9 +674,12 @@ def _convert_series( cmd = [ "dcm2niix", - "-z", "y", - "-o", str(output_path), - "-f", series_name, + "-z", + "y", + "-o", + str(output_path), + "-f", + series_name, str(dicom_dir), ] @@ -687,9 +696,7 @@ def _convert_series( if result.returncode != 0: output = result.stderr or result.stdout - raise DicomError( - f"Conversion failed for series {series.series_instance_uid}: {output}" - ) + raise DicomError(f"Conversion failed for series {series.series_instance_uid}: {output}") nifti_files = sorted(output_path.glob("*.nii.gz")) if not nifti_files: From 71c0a0126b19fea3f654b2730df2788976bd948f Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 16 Mar 2026 11:40:08 +0000 Subject: [PATCH 036/103] Override rslave mount propagation in dev compose Co-Authored-By: Claude Opus 4.6 --- docker-compose.dev.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 6261c0c54..f5c168276 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -1,4 +1,6 @@ x-app: &default-app + volumes: + - ${MOUNT_DIR:?}:/mnt build: target: development pull_policy: build From b91af721fc9a0c1154e0b14ddcbe73a223911585 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 16 Mar 2026 13:12:48 +0100 Subject: [PATCH 037/103] Add inline JSON filters, volume table, DICOM sidecar, and UI improvements - Replace M2M filter relation with inline filters_json JSONField - Add FilterSpec dataclass and pydantic validation (FilterSchema) - Add age filtering (min_age/max_age) with birth date range C-FIND and exact client-side age check - Replace dcm2niix sidecar augmentation with independent DICOM tag sidecar system (_extract_dicom_sidecar + _write_dicom_sidecar) - Add MassTransferVolumeTable (django-tables2) with status filter, replacing hardcoded HTML table in task detail view - Add CodeMirror 5 JSON editor for filter input - Change anonymization default to None; salt field only shown for linking mode - Add tests for sidecar extraction, pseudonymization leak prevention, age filtering, and FilterSpec --- adit/mass_transfer/filters.py | 22 +- adit/mass_transfer/forms.py | 128 +++++++-- .../migrations/0011_add_filters_json.py | 18 ++ adit/mass_transfer/models.py | 16 +- adit/mass_transfer/processors.py | 188 ++++++++++++- .../static/mass_transfer/mass_transfer.js | 14 + adit/mass_transfer/tables.py | 53 +++- .../mass_transfer_job_detail.html | 8 +- .../mass_transfer/mass_transfer_job_form.html | 56 +++- .../mass_transfer_task_detail.html | 42 +-- adit/mass_transfer/tests/test_processor.py | 256 ++++++++++++++++-- adit/mass_transfer/views.py | 31 ++- pyproject.toml | 1 + uv.lock | 109 ++++++++ 14 files changed, 837 insertions(+), 105 deletions(-) create mode 100644 adit/mass_transfer/migrations/0011_add_filters_json.py diff --git a/adit/mass_transfer/filters.py b/adit/mass_transfer/filters.py index ef1851433..75e46abaf 100644 --- a/adit/mass_transfer/filters.py +++ b/adit/mass_transfer/filters.py @@ -1,6 +1,11 @@ +import django_filters +from adit_radis_shared.common.forms import SingleFilterFieldFormHelper +from adit_radis_shared.common.types import with_form_helper +from django.http import HttpRequest + from adit.core.filters import DicomJobFilter, DicomTaskFilter -from .models import MassTransferJob, MassTransferTask +from .models import MassTransferJob, MassTransferTask, MassTransferVolume class MassTransferJobFilter(DicomJobFilter): @@ -11,3 +16,18 @@ class Meta(DicomJobFilter.Meta): class MassTransferTaskFilter(DicomTaskFilter): class Meta(DicomTaskFilter.Meta): model = MassTransferTask + + +class MassTransferVolumeFilter(django_filters.FilterSet): + request: HttpRequest + + class Meta: + model = MassTransferVolume + fields = ("status",) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + with_form_helper(self.form).helper = SingleFilterFieldFormHelper( + self.request.GET, "status" + ) diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 9e594bfa2..4b962d1e4 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -1,12 +1,15 @@ from __future__ import annotations -from typing import cast +import json +import secrets +from typing import Annotated, cast from adit_radis_shared.accounts.models import User from crispy_forms.helper import FormHelper from crispy_forms.layout import HTML, Column, Div, Field, Layout, Row, Submit from django import forms from django.core.exceptions import ValidationError +from pydantic import BaseModel, ValidationError as PydanticValidationError, model_validator from adit.core.fields import DicomNodeChoiceField from adit.core.models import DicomNode @@ -15,6 +18,52 @@ from .utils.partitions import build_partitions +class FilterSchema(BaseModel): + """Pydantic model for validating mass transfer filter JSON objects.""" + + modality: str = "" + institution_name: str = "" + apply_institution_on_study: bool = True + study_description: str = "" + series_description: str = "" + series_number: int | None = None + min_age: Annotated[int, "non-negative"] | None = None + max_age: Annotated[int, "non-negative"] | None = None + + model_config = {"extra": "forbid"} + + @model_validator(mode="after") + def check_age_range(self): + if self.min_age is not None and self.min_age < 0: + raise ValueError("min_age must be non-negative") + if self.max_age is not None and self.max_age < 0: + raise ValueError("max_age must be non-negative") + if ( + self.min_age is not None + and self.max_age is not None + and self.min_age > self.max_age + ): + raise ValueError(f"min_age ({self.min_age}) cannot exceed max_age ({self.max_age})") + return self + + +FILTERS_JSON_EXAMPLE = json.dumps( + [ + { + "modality": "MR", + "institution_name": "Neuroradiologie", + "study_description": "", + "series_description": "", + "series_number": None, + "apply_institution_on_study": True, + "min_age": 18, + "max_age": 90, + } + ], + indent=2, +) + + class MassTransferFilterForm(forms.ModelForm): MODALITY_CHOICES = [ ("", "Any modality"), @@ -98,10 +147,27 @@ def clean_name(self): class MassTransferJobForm(forms.ModelForm): - filters = forms.ModelMultipleChoiceField( - queryset=MassTransferFilter.objects.all(), - required=True, - widget=forms.CheckboxSelectMultiple, + filters_json = forms.CharField( + label="Filters (JSON)", + initial=FILTERS_JSON_EXAMPLE, + widget=forms.Textarea(attrs={ + "id": "id_filters_json", + }), + help_text=( + "A JSON array of filter objects. Each filter can have: " + "modality, institution_name, apply_institution_on_study, " + "study_description, series_description, series_number, " + "min_age, max_age. A series matching ANY filter is included." + ), + ) + + pseudonym_salt = forms.CharField( + label="Pseudonym salt", + required=False, + help_text="Deterministic seed for pseudonymization. Same salt + same patient ID = same pseudonym.", + widget=forms.TextInput(attrs={ + "class": "form-control", + }), ) tasks: list[MassTransferTask] @@ -114,8 +180,8 @@ class Meta: "start_date", "end_date", "partition_granularity", - "filters", "anonymization_mode", + "pseudonym_salt", "convert_to_nifti", "send_finished_mail", ) @@ -130,8 +196,10 @@ class Meta: help_texts = { "partition_granularity": "Daily or weekly partition windows.", "anonymization_mode": ( - "No anonymization preserves all identifiers. Pseudonymize replaces them. " - "Pseudonymize with linking also exports a mapping CSV." + "None: all identifiers are preserved. " + "Pseudonymize: identifiers are replaced with random values. " + "Pseudonymize with linking: deterministic pseudonyms (same salt + patient = same pseudonym) " + "and a downloadable association CSV." ), "convert_to_nifti": ( "When enabled, exported DICOM series are converted to NIfTI format " @@ -151,7 +219,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.fields["filters"].queryset = MassTransferFilter.objects.filter(owner=self.user) # type: ignore[union-attr] + # Auto-populate salt with a fresh random value + if not self.initial.get("pseudonym_salt"): + self.initial["pseudonym_salt"] = secrets.token_hex() self.fields["source"] = DicomNodeChoiceField("source", self.user) self.fields["source"].widget.attrs["@change"] = "onSourceChange($event)" @@ -167,6 +237,10 @@ def __init__(self, *args, **kwargs): "onSendFinishedMailChange($event)" ) + self.fields["anonymization_mode"].widget.attrs["@change"] = ( + "onAnonymizationModeChange($event)" + ) + self.helper = FormHelper(self) self.helper.layout = Layout( Div( @@ -192,6 +266,11 @@ def __init__(self, *args, **kwargs): Column(Field("send_finished_mail"), css_class="col-md-6"), css_class="g-3", ), + Div( + Field("pseudonym_salt"), + css_id="salt-wrapper", + **{"x-show": "showSalt"}, + ), css_class="card-body", ), css_class="card mb-3", @@ -199,7 +278,7 @@ def __init__(self, *args, **kwargs): Div( HTML("
Filters
"), Div( - Field("filters", wrapper_class="mass-transfer-filter-list"), + Field("filters_json"), css_class="card-body", ), css_class="card mb-3", @@ -234,11 +313,28 @@ def clean(self): raise ValidationError("End date must be on or after the start date.") return cleaned - def clean_filters(self): - filters = self.cleaned_data["filters"] - if filters.exclude(owner=self.user).exists(): - raise ValidationError("Selected filters are not available to this user.") - return filters + def clean_filters_json(self): + raw = self.cleaned_data["filters_json"].strip() + try: + data = json.loads(raw) + except json.JSONDecodeError as e: + raise ValidationError(f"Invalid JSON: {e}") + + if not isinstance(data, list) or not data: + raise ValidationError("Filters must be a non-empty JSON array.") + + validated: list[dict] = [] + for i, item in enumerate(data): + if not isinstance(item, dict): + raise ValidationError(f"Filter #{i + 1} must be a JSON object.") + try: + fs = FilterSchema(**item) + validated.append(fs.model_dump(exclude_none=True)) + except PydanticValidationError as e: + errors = "; ".join(err["msg"] for err in e.errors()) + raise ValidationError(f"Filter #{i + 1}: {errors}") + + return validated def _save_tasks(self, job: MassTransferJob) -> None: partitions = build_partitions( @@ -264,10 +360,10 @@ def _save_tasks(self, job: MassTransferJob) -> None: def save(self, commit: bool = True): job = super().save(commit=False) job.urgent = False + job.filters_json = self.cleaned_data["filters_json"] if commit: job.save() - self.save_m2m() self._save_tasks(job) else: self.save_tasks = self._save_tasks diff --git a/adit/mass_transfer/migrations/0011_add_filters_json.py b/adit/mass_transfer/migrations/0011_add_filters_json.py new file mode 100644 index 000000000..72c2946eb --- /dev/null +++ b/adit/mass_transfer/migrations/0011_add_filters_json.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.8 on 2026-03-13 13:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('mass_transfer', '0010_re_add_pseudonymized_uid_fields'), + ] + + operations = [ + migrations.AddField( + model_name='masstransferjob', + name='filters_json', + field=models.JSONField(blank=True, help_text='Inline filter configuration as a JSON list of filter objects.', null=True), + ), + ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index 414a4ae17..b18d59d93 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import secrets from django.conf import settings @@ -68,7 +69,7 @@ class PartitionGranularity(models.TextChoices): WEEKLY = "weekly", "Weekly" class AnonymizationMode(models.TextChoices): - NONE = "none", "No anonymization" + NONE = "none", "None" PSEUDONYMIZE = "pseudonymize", "Pseudonymize" PSEUDONYMIZE_WITH_LINKING = "pseudonymize_with_linking", "Pseudonymize with linking" @@ -87,15 +88,26 @@ class AnonymizationMode(models.TextChoices): anonymization_mode = models.CharField( max_length=32, choices=AnonymizationMode.choices, - default=AnonymizationMode.PSEUDONYMIZE, + default=AnonymizationMode.NONE, ) filters = models.ManyToManyField(MassTransferFilter, related_name="jobs", blank=True) + filters_json = models.JSONField( + blank=True, + null=True, + help_text="Inline filter configuration as a JSON list of filter objects.", + ) pseudonym_salt = models.CharField( max_length=64, default=secrets.token_hex, ) + @property + def filters_json_pretty(self) -> str: + if self.filters_json: + return json.dumps(self.filters_json, indent=2) + return "" + @property def should_pseudonymize(self) -> bool: return self.anonymization_mode != self.AnonymizationMode.NONE diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 4604197f9..be339a5b8 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -1,6 +1,7 @@ from __future__ import annotations import hashlib +import json import logging import random import secrets @@ -8,7 +9,7 @@ import tempfile import time from dataclasses import dataclass -from datetime import datetime, timedelta +from datetime import date, datetime, timedelta from pathlib import Path from typing import cast @@ -32,6 +33,48 @@ MassTransferVolume, ) + +@dataclass +class FilterSpec: + """Unified filter representation used by the processor. + + Built from either a MassTransferFilter model instance (old M2M path) + or a plain dict from the job's filters_json field. + """ + + modality: str = "" + institution_name: str = "" + apply_institution_on_study: bool = True + study_description: str = "" + series_description: str = "" + series_number: int | None = None + min_age: int | None = None + max_age: int | None = None + + @classmethod + def from_model(cls, mf: MassTransferFilter) -> "FilterSpec": + return cls( + modality=mf.modality, + institution_name=mf.institution_name, + apply_institution_on_study=mf.apply_institution_on_study, + study_description=mf.study_description, + series_description=mf.series_description, + series_number=mf.series_number, + ) + + @classmethod + def from_dict(cls, d: dict) -> "FilterSpec": + return cls( + modality=d.get("modality", ""), + institution_name=d.get("institution_name", ""), + apply_institution_on_study=d.get("apply_institution_on_study", True), + study_description=d.get("study_description", ""), + series_description=d.get("series_description", ""), + series_number=d.get("series_number"), + min_age=d.get("min_age"), + max_age=d.get("max_age"), + ) + logger = logging.getLogger(__name__) _MIN_SPLIT_WINDOW = timedelta(minutes=30) @@ -50,6 +93,7 @@ class DiscoveredSeries: study_datetime: datetime institution_name: str number_of_images: int + patient_birth_date: date | None = None def _dicom_match(pattern: str, value: str | None) -> bool: @@ -97,6 +141,109 @@ def _series_folder_name(series_description: str, series_number: int | None, seri return f"{desc}_{series_number}" +_SIDECAR_DICOM_TAGS = [ + "PatientBirthDate", + "PatientSex", + "PatientAge", + "PatientID", + "PatientName", + "StudyDate", + "StudyInstanceUID", + "SeriesInstanceUID", + "Modality", + "InstitutionName", + "StudyDescription", + "SeriesDescription", + "SeriesNumber", +] + + +def _extract_dicom_sidecar(dicom_dir: Path) -> dict[str, str]: + """Read the first DICOM file in *dicom_dir* and extract sidecar fields. + + These are post-pseudonymization values — shifted dates, replaced UIDs, + etc. The function also computes a ``PatientAgeAtStudy`` field from + PatientBirthDate and StudyDate when both are present. + """ + import pydicom + + for dcm_path in sorted(dicom_dir.glob("*.dcm")): + try: + ds = pydicom.dcmread(dcm_path, stop_before_pixels=True) + except Exception: + continue + fields: dict[str, str] = {} + for tag in _SIDECAR_DICOM_TAGS: + val = ds.get(tag) + if val is not None: + fields[tag] = str(val) + + # Compute age at study from birth date and study date + birth_str = fields.get("PatientBirthDate", "") + study_str = fields.get("StudyDate", "") + if len(birth_str) == 8 and len(study_str) == 8: + try: + bd = date(int(birth_str[:4]), int(birth_str[4:6]), int(birth_str[6:8])) + sd = date(int(study_str[:4]), int(study_str[4:6]), int(study_str[6:8])) + fields["PatientAgeAtStudy"] = str(_age_at_study(bd, sd)) + except (ValueError, OverflowError): + pass + + return fields + return {} + + +def _write_dicom_sidecar(output_path: Path, sidecar_name: str, fields: dict[str, str]) -> None: + """Write a DICOM metadata sidecar JSON file alongside NIfTI outputs.""" + if not fields: + return + sidecar_path = output_path / f"{sidecar_name}_dicom.json" + try: + sidecar_path.write_text(json.dumps(fields, indent=2)) + except Exception: + logger.warning("Failed to write sidecar %s", sidecar_path, exc_info=True) + + +def _age_at_study(birth_date: date, study_date: date) -> int: + """Return the patient's age in whole years on the study date.""" + age = study_date.year - birth_date.year + if (study_date.month, study_date.day) < (birth_date.month, birth_date.day): + age -= 1 + return age + + +def _birth_date_range( + study_start: date, + study_end: date, + min_age: int | None, + max_age: int | None, +) -> tuple[date, date] | None: + """Compute a PatientBirthDate range for C-FIND from age bounds. + + Uses the widest possible range: someone who is max_age on the earliest + study date was born at the latest on study_start - max_age years, and + someone who is min_age on the latest study date was born at the earliest + on study_end - min_age years. We widen by 1 year on each side to account + for birthday boundary effects and let client-side filtering be exact. + """ + if min_age is None and max_age is None: + return None + + # Earliest possible birth date: max_age on the earliest study day + if max_age is not None: + earliest_birth = date(study_start.year - max_age - 1, 1, 1) + else: + earliest_birth = date(1900, 1, 1) + + # Latest possible birth date: min_age on the latest study day + if min_age is not None: + latest_birth = date(study_end.year - min_age + 1, 12, 31) + else: + latest_birth = study_end + + return (earliest_birth, latest_birth) + + def _destination_base_dir(node: DicomNode, job) -> Path: assert node.node_type == DicomNode.NodeType.FOLDER name = sanitize_filename( @@ -134,7 +281,13 @@ def process(self): if destination_node.node_type != DicomNode.NodeType.FOLDER: raise DicomError("Mass transfer destination must be a DICOM folder.") - filters = list(job.filters.all()) + # Build filter specs: prefer inline JSON, fall back to old M2M. + if job.filters_json: + filters = [FilterSpec.from_dict(d) for d in job.filters_json] + else: + legacy_filters = list(job.filters.all()) + filters = [FilterSpec.from_model(mf) for mf in legacy_filters] + if not filters: return { "status": MassTransferTask.Status.FAILURE, @@ -243,6 +396,11 @@ def process(self): if image_count == 0: nifti_files = [] else: + # Extract DICOM metadata before dcm2niix + # deletes the temp files. These are + # post-pseudonymization values. + dicom_sidecar = _extract_dicom_sidecar(tmp_path) + output_path = ( output_base / self.mass_task.partition_key @@ -255,6 +413,10 @@ def process(self): tmp_path, output_path, ) + if nifti_files: + _write_dicom_sidecar( + output_path, series_folder, dicom_sidecar, + ) else: output_path = ( output_base @@ -409,7 +571,7 @@ def process(self): def _discover_series( self, operator: DicomOperator, - filters: list[MassTransferFilter], + filters: list[FilterSpec], ) -> list[DiscoveredSeries]: start = self.mass_task.partition_start end = self.mass_task.partition_end @@ -432,6 +594,16 @@ def _discover_series( if not self._study_has_institution(operator, study, mf.institution_name): continue + # Exact client-side age filtering using actual StudyDate and + # PatientBirthDate (the C-FIND birth date range is approximate). + birth_date = study.PatientBirthDate + if birth_date and study.StudyDate and (mf.min_age is not None or mf.max_age is not None): + age = _age_at_study(birth_date, study.StudyDate) + if mf.min_age is not None and age < mf.min_age: + continue + if mf.max_age is not None and age > mf.max_age: + continue + series_query = QueryDataset.create( PatientID=study.PatientID, StudyInstanceUID=study.StudyInstanceUID, @@ -488,8 +660,8 @@ def _discover_series( institution_name=str(series.get("InstitutionName", "")), number_of_images=_parse_int( series.get("NumberOfSeriesRelatedInstances"), default=0 - ) - or 0, + ) or 0, + patient_birth_date=birth_date, ) return list(found.values()) @@ -497,7 +669,7 @@ def _discover_series( def _find_studies( self, operator: DicomOperator, - mf: MassTransferFilter, + mf: FilterSpec, start: datetime, end: datetime, ) -> list[ResultDataset]: @@ -532,9 +704,13 @@ def _find_studies( else: study_time = (start.time(), end.time()) + birth_range = _birth_date_range( + start.date(), end.date(), mf.min_age, mf.max_age, + ) query = QueryDataset.create( StudyDate=(start.date(), end.date()), StudyTime=study_time, + **({"PatientBirthDate": birth_range} if birth_range else {}), ) if mf.modality: diff --git a/adit/mass_transfer/static/mass_transfer/mass_transfer.js b/adit/mass_transfer/static/mass_transfer/mass_transfer.js index 470f11186..54d4d627f 100644 --- a/adit/mass_transfer/static/mass_transfer/mass_transfer.js +++ b/adit/mass_transfer/static/mass_transfer/mass_transfer.js @@ -7,7 +7,19 @@ const MASS_TRANSFER_GRANULARITY = "mass_transfer_granularity"; const MASS_TRANSFER_SEND_FINISHED_MAIL = "mass_transfer_send_finished_mail"; function massTransferJobForm() { + var checked = document.querySelector( + 'input[name="anonymization_mode"]:checked' + ); + var mode = checked ? checked.value : "none"; + return { + anonymizationMode: mode, + get showSalt() { + return this.anonymizationMode === "pseudonymize_with_linking"; + }, + onAnonymizationModeChange: function (ev) { + this.anonymizationMode = ev.target.value; + }, onSourceChange: function (ev) { updatePreferences("mass-transfer", { [MASS_TRANSFER_SOURCE]: ev.target.value, @@ -30,3 +42,5 @@ function massTransferJobForm() { }, }; } + +// JSON editor is now handled by CodeMirror in the form template. diff --git a/adit/mass_transfer/tables.py b/adit/mass_transfer/tables.py index f2ddb0440..198bc608e 100644 --- a/adit/mass_transfer/tables.py +++ b/adit/mass_transfer/tables.py @@ -1,6 +1,10 @@ +import django_tables2 as tables +from django.utils.html import format_html + from adit.core.tables import DicomTaskTable, TransferJobTable -from .models import MassTransferJob, MassTransferTask +from .models import MassTransferJob, MassTransferTask, MassTransferVolume +from .templatetags.mass_transfer_extras import volume_status_css_class class MassTransferJobTable(TransferJobTable): @@ -11,3 +15,50 @@ class Meta(TransferJobTable.Meta): class MassTransferTaskTable(DicomTaskTable): class Meta(DicomTaskTable.Meta): model = MassTransferTask + + +class MassTransferVolumeTable(tables.Table): + status = tables.Column(verbose_name="Status") + study_info = tables.Column(verbose_name="Study Info", empty_values=(), orderable=False) + modality = tables.Column(verbose_name="Modality") + series_number = tables.Column(verbose_name="Series #") + series_description = tables.Column(verbose_name="Series Description") + institution_name = tables.Column(verbose_name="Institution") + number_of_images = tables.Column(verbose_name="# Images") + log = tables.Column(verbose_name="Reason", attrs={"td": {"class": "small"}}) + + class Meta: + model = MassTransferVolume + fields = ( + "status", + "study_info", + "modality", + "series_number", + "series_description", + "institution_name", + "number_of_images", + "log", + ) + order_by = ("status", "study_datetime") + empty_text = "No volumes to show" + attrs = {"class": "table table-bordered table-hover table-sm"} + + def render_status(self, value, record): + css_class = volume_status_css_class(record.status) + return format_html( + '{}', css_class, record.get_status_display() + ) + + def render_study_info(self, record): + desc = record.study_description or "—" + dt = record.study_datetime.strftime("%Y-%m-%d") if record.study_datetime else "" + return format_html("{}
{}", desc, dt) + + def render_series_number(self, value): + return value if value is not None else "—" + + def render_series_description(self, value): + return value or "—" + + def render_log(self, value): + return value or "—" diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html index 051ab5328..4b804b302 100644 --- a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html @@ -47,12 +47,14 @@
{{ job.get_anonymization_mode_display }}
Filters
- {% if job.filters.all %} + {% if job.filters_json %} +
{{ job.filters_json_pretty }}
+ {% elif job.filters.all %} {% for f in job.filters.all %}
{{ f }}
- {% empty %} - — {% endfor %} + {% else %} + — {% endif %}
Processed Tasks
diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html index 785686e79..f001889d6 100644 --- a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html @@ -4,14 +4,26 @@ {% block title %} New Mass Transfer Job {% endblock title %} +{% block css %} + {{ block.super }} + {# CodeMirror 5: chosen over CodeMirror 6 because it needs no build step — a single #} + {# CSS + JS include gives us syntax highlighting, bracket matching, auto-indent, #} + {# and lint markers for JSON. CM6 is more modular but requires bundling ES modules. #} + + + +{% endblock css %} {% block heading %} - -
- Manage Filters - {% bootstrap_icon "funnel" %} - - {% bootstrap_icon "list" %} @@ -23,3 +35,35 @@ {% block content %} {% crispy form %} {% endblock content %} +{% block script %} + {{ block.super }} + + + + + + + + +{% endblock script %} diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html index 2f3b30fdc..e6d1a37b2 100644 --- a/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html @@ -1,6 +1,8 @@ {% extends "mass_transfer/mass_transfer_layout.html" %} {% load dicom_task_status_css_class from core_extras %} -{% load task_control_panel volume_status_css_class from mass_transfer_extras %} +{% load task_control_panel from mass_transfer_extras %} +{% load render_table from django_tables2 %} +{% load crispy from crispy_forms_tags %} {% block title %} Mass Transfer Task {% endblock title %} @@ -26,38 +28,10 @@
Log
{{ task.log|default:"" }}
- {% if problem_volumes %} -
Skipped & Failed Volumes
-
- - - - - - - - - - - - - {% for vol in problem_volumes %} - - - - - - - - - {% endfor %} - -
StatusModalitySeries #Series DescriptionStudy DateReason
- - {{ vol.get_status_display }} - - {{ vol.modality }}{{ vol.series_number|default:"—" }}{{ vol.series_description|default:"—" }}{{ vol.study_datetime|date:"Y-m-d" }}{{ vol.log|default:"—" }}
-
- {% endif %} +
Volumes
+ {% crispy filter.form %} +
+ {% render_table table %} +
{% task_control_panel %} {% endblock content %} diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 5e901a39f..1e633cb41 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -21,7 +21,10 @@ ) from adit.mass_transfer.processors import ( DiscoveredSeries, + FilterSpec, MassTransferTaskProcessor, + _age_at_study, + _birth_date_range, _dicom_match, _parse_int, _series_folder_name, @@ -81,15 +84,17 @@ def _make_processor(mocker: MockerFixture) -> MassTransferTaskProcessor: return processor -def _make_filter(mocker: MockerFixture, **kwargs) -> MassTransferFilter: - mf = mocker.MagicMock(spec=MassTransferFilter) - mf.modality = kwargs.get("modality", "CT") - mf.study_description = kwargs.get("study_description", "") - mf.institution_name = kwargs.get("institution_name", "") - mf.apply_institution_on_study = kwargs.get("apply_institution_on_study", True) - mf.series_description = kwargs.get("series_description", "") - mf.series_number = kwargs.get("series_number", None) - return mf +def _make_filter(**kwargs) -> FilterSpec: + return FilterSpec( + modality=kwargs.get("modality", "CT"), + study_description=kwargs.get("study_description", ""), + institution_name=kwargs.get("institution_name", ""), + apply_institution_on_study=kwargs.get("apply_institution_on_study", True), + series_description=kwargs.get("series_description", ""), + series_number=kwargs.get("series_number", None), + min_age=kwargs.get("min_age", None), + max_age=kwargs.get("max_age", None), + ) @pytest.mark.django_db @@ -107,8 +112,8 @@ def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture): end_date=date(2024, 1, 1), partition_granularity=MassTransferJob.PartitionGranularity.DAILY, ) - mf = MassTransferFilter.objects.create(owner=user, name="CT Filter", modality="CT") - job.filters.add(mf) + job.filters_json = [{"modality": "CT"}] + job.save(update_fields=["filters_json"]) start = timezone.now() end = start + timedelta(minutes=10) @@ -125,13 +130,14 @@ def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture): operator.server = source operator.find_studies.return_value = [object(), object()] + mf = FilterSpec(modality="CT") with pytest.raises(DicomError, match="Time window too small"): processor._find_studies(operator, mf, start, end) def test_find_studies_returns_all_when_under_limit(mocker: MockerFixture): processor = _make_processor(mocker) - mf = _make_filter(mocker, modality="CT") + mf = _make_filter( modality="CT") start = datetime(2024, 1, 1, 0, 0, 0) end = datetime(2024, 1, 1, 23, 59, 59) @@ -150,7 +156,7 @@ def test_find_studies_returns_all_when_under_limit(mocker: MockerFixture): def test_find_studies_splits_and_deduplicates(mocker: MockerFixture): processor = _make_processor(mocker) - mf = _make_filter(mocker, modality="CT") + mf = _make_filter( modality="CT") # Use a single-day range to test the time-based midpoint split start = datetime(2024, 1, 1, 0, 0, 0) @@ -180,7 +186,7 @@ def test_find_studies_splits_and_deduplicates(mocker: MockerFixture): def test_find_studies_split_boundaries_dont_overlap(mocker: MockerFixture): processor = _make_processor(mocker) - mf = _make_filter(mocker, modality="") + mf = _make_filter( modality="") # Use a single-day range so we test the time-based midpoint split start = datetime(2024, 1, 1, 0, 0, 0) @@ -223,7 +229,7 @@ def tracking_find_studies(self_inner, operator, mf, s, e): def test_find_studies_same_day_split_narrows_study_time(mocker: MockerFixture): """When splitting within a single day, StudyTime must narrow to avoid infinite recursion.""" processor = _make_processor(mocker) - mf = _make_filter(mocker, modality="CT") + mf = _make_filter( modality="CT") start = datetime(2024, 1, 1, 8, 0, 0) end = datetime(2024, 1, 1, 20, 0, 0) @@ -259,7 +265,7 @@ def test_find_studies_same_day_split_narrows_study_time(mocker: MockerFixture): def test_find_studies_cross_midnight_splits_at_midnight(mocker: MockerFixture): """A cross-midnight window must split at midnight, not at the midpoint.""" processor = _make_processor(mocker) - mf = _make_filter(mocker, modality="CT") + mf = _make_filter( modality="CT") # Window spans midnight: Jan 1 23:45 to Jan 2 00:15 start = datetime(2024, 1, 1, 23, 45, 0) @@ -289,7 +295,7 @@ def test_find_studies_cross_midnight_splits_at_midnight(mocker: MockerFixture): def test_find_studies_preserves_order_with_unique_studies(mocker: MockerFixture): processor = _make_processor(mocker) - mf = _make_filter(mocker, modality="") + mf = _make_filter( modality="") start = datetime(2024, 1, 1, 0, 0, 0) end = datetime(2024, 1, 1, 23, 59, 59) @@ -332,7 +338,8 @@ def _make_process_env( mock_job.source.dicomserver = mocker.MagicMock() mock_job.destination.node_type = DicomNode.NodeType.FOLDER mock_job.destination.dicomfolder.path = str(tmp_path / "output") - mock_job.filters.all.return_value = [_make_filter(mocker)] + mock_job.filters_json = [{"modality": "CT"}] + mock_job.filters.all.return_value = [] processor.mass_task.pk = 42 processor.mass_task.partition_key = "20240101" @@ -457,6 +464,7 @@ def test_process_returns_failure_when_no_filters( mocker: MockerFixture, tmp_path: Path ): processor = _make_process_env(mocker, tmp_path) + processor.mass_task.job.filters_json = [] processor.mass_task.job.filters.all.return_value = [] result = processor.process() @@ -816,7 +824,8 @@ def test_process_creates_volume_records_on_success( partition_granularity=MassTransferJob.PartitionGranularity.DAILY, anonymization_mode=MassTransferJob.AnonymizationMode.NONE, ) - job.filters.create(owner=user, name="CT Filter", modality="CT") + job.filters_json = [{"modality": "CT"}] + job.save(update_fields=["filters_json"]) task = MassTransferTask.objects.create( job=job, @@ -863,7 +872,8 @@ def test_process_creates_error_volume_on_failure( partition_granularity=MassTransferJob.PartitionGranularity.DAILY, anonymization_mode=MassTransferJob.AnonymizationMode.NONE, ) - job.filters.create(owner=user, name="CT Filter", modality="CT") + job.filters_json = [{"modality": "CT"}] + job.save(update_fields=["filters_json"]) task = MassTransferTask.objects.create( job=job, @@ -909,7 +919,8 @@ def test_process_deletes_error_volumes_on_retry( partition_granularity=MassTransferJob.PartitionGranularity.DAILY, anonymization_mode=MassTransferJob.AnonymizationMode.NONE, ) - job.filters.create(owner=user, name="CT Filter", modality="CT") + job.filters_json = [{"modality": "CT"}] + job.save(update_fields=["filters_json"]) task = MassTransferTask.objects.create( job=job, @@ -971,7 +982,8 @@ def test_process_deterministic_pseudonyms_across_partitions( partition_granularity=MassTransferJob.PartitionGranularity.DAILY, anonymization_mode=MassTransferJob.AnonymizationMode.PSEUDONYMIZE_WITH_LINKING, ) - job.filters.create(owner=user, name="CT Filter", modality="CT") + job.filters_json = [{"modality": "CT"}] + job.save(update_fields=["filters_json"]) task1 = MassTransferTask.objects.create( job=job, @@ -1036,7 +1048,8 @@ def test_process_pseudonymize_mode_not_linked_across_partitions( partition_granularity=MassTransferJob.PartitionGranularity.DAILY, anonymization_mode=MassTransferJob.AnonymizationMode.PSEUDONYMIZE, ) - job.filters.create(owner=user, name="CT Filter", modality="CT") + job.filters_json = [{"modality": "CT"}] + job.save(update_fields=["filters_json"]) task1 = MassTransferTask.objects.create( job=job, @@ -1079,3 +1092,200 @@ def test_process_pseudonymize_mode_not_linked_across_partitions( assert vol2.pseudonym != "" assert vol1.pseudonym != "PAT1" assert vol1.pseudonym != vol2.pseudonym + + +# --------------------------------------------------------------------------- +# Age filtering tests +# --------------------------------------------------------------------------- + + +def test_age_at_study_basic(): + assert _age_at_study(date(1990, 6, 15), date(2025, 6, 15)) == 35 + assert _age_at_study(date(1990, 6, 15), date(2025, 6, 14)) == 34 + assert _age_at_study(date(1990, 6, 15), date(2025, 6, 16)) == 35 + + +def test_age_at_study_leap_year(): + assert _age_at_study(date(2000, 2, 29), date(2025, 2, 28)) == 24 + assert _age_at_study(date(2000, 2, 29), date(2025, 3, 1)) == 25 + + +def test_birth_date_range_no_age_limits(): + assert _birth_date_range(date(2025, 1, 1), date(2025, 1, 31), None, None) is None + + +def test_birth_date_range_min_only(): + result = _birth_date_range(date(2025, 3, 15), date(2025, 3, 15), 18, None) + assert result is not None + earliest, latest = result + # Latest birth: someone who is 18 on study date could be born up to end of year 2008 + assert latest.year >= 2007 + assert earliest == date(1900, 1, 1) + + +def test_birth_date_range_max_only(): + result = _birth_date_range(date(2025, 3, 15), date(2025, 3, 15), None, 65) + assert result is not None + earliest, latest = result + # Earliest birth: someone who is 65 on study date was born ~1959 + assert earliest.year <= 1960 + + +def test_birth_date_range_both(): + result = _birth_date_range(date(2025, 3, 15), date(2025, 3, 15), 18, 65) + assert result is not None + earliest, latest = result + assert earliest < latest + + +# --------------------------------------------------------------------------- +# FilterSpec tests +# --------------------------------------------------------------------------- + + +def test_filter_spec_from_dict(): + d = { + "modality": "MR", + "institution_name": "Neuroradiologie", + "min_age": 18, + "max_age": 90, + } + fs = FilterSpec.from_dict(d) + assert fs.modality == "MR" + assert fs.institution_name == "Neuroradiologie" + assert fs.min_age == 18 + assert fs.max_age == 90 + assert fs.study_description == "" + assert fs.apply_institution_on_study is True + + +def test_filter_spec_from_model(mocker: MockerFixture): + mf = mocker.MagicMock(spec=MassTransferFilter) + mf.modality = "CT" + mf.institution_name = "" + mf.apply_institution_on_study = True + mf.study_description = "Brain*" + mf.series_description = "" + mf.series_number = None + fs = FilterSpec.from_model(mf) + assert fs.modality == "CT" + assert fs.study_description == "Brain*" + assert fs.min_age is None + assert fs.max_age is None + + +# --------------------------------------------------------------------------- +# DICOM sidecar tests +# --------------------------------------------------------------------------- + + +def test_write_dicom_sidecar(tmp_path: Path): + from adit.mass_transfer.processors import _write_dicom_sidecar + + fields = { + "PatientBirthDate": "19900101", + "PatientSex": "M", + "PatientAgeAtStudy": "35", + "StudyDate": "20250315", + "StudyInstanceUID": "1.2.3.4.5", + "SeriesInstanceUID": "1.2.3.4.5.6", + "Modality": "MR", + } + + _write_dicom_sidecar(tmp_path, "T1w_3D_101", fields) + + import json + + sidecar = tmp_path / "T1w_3D_101_dicom.json" + assert sidecar.exists() + result = json.loads(sidecar.read_text()) + assert result["PatientBirthDate"] == "19900101" + assert result["PatientAgeAtStudy"] == "35" + assert result["StudyInstanceUID"] == "1.2.3.4.5" + assert result["Modality"] == "MR" + + +def test_write_dicom_sidecar_empty_fields(tmp_path: Path): + from adit.mass_transfer.processors import _write_dicom_sidecar + + _write_dicom_sidecar(tmp_path, "series_1", {}) + + # No file should be written when fields are empty + assert not list(tmp_path.glob("*.json")) + + +def _write_test_dicom(path: Path, **kwargs) -> None: + """Write a minimal valid DICOM file for testing.""" + import pydicom + + ds = pydicom.Dataset() + for k, v in kwargs.items(): + setattr(ds, k, v) + ds.SOPClassUID = kwargs.get("SOPClassUID", "1.2.840.10008.5.1.4.1.1.4") + ds.SOPInstanceUID = kwargs.get("SOPInstanceUID", "1.2.3.4.5") + ds.file_meta = pydicom.Dataset() + ds.file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian + ds.file_meta.MediaStorageSOPClassUID = ds.SOPClassUID + ds.file_meta.MediaStorageSOPInstanceUID = ds.SOPInstanceUID + pydicom.dcmwrite(str(path), ds, enforce_file_format=True) + + +def test_extract_dicom_sidecar_computes_age(tmp_path: Path): + """_extract_dicom_sidecar should compute PatientAgeAtStudy from birth date and study date.""" + from adit.mass_transfer.processors import _extract_dicom_sidecar + + _write_test_dicom( + tmp_path / "test.dcm", + PatientBirthDate="19900615", + PatientSex="M", + StudyDate="20250615", + StudyInstanceUID="1.2.3", + SeriesInstanceUID="1.2.3.4", + Modality="MR", + ) + + result = _extract_dicom_sidecar(tmp_path) + assert result["PatientAgeAtStudy"] == "35" + assert result["PatientBirthDate"] == "19900615" + assert result["PatientSex"] == "M" + assert result["StudyInstanceUID"] == "1.2.3" + + +def test_extract_dicom_sidecar_pseudonymized_has_no_real_data(tmp_path: Path): + """When pseudonymization is applied, sidecar should contain pseudonymized values, not originals. + + This test simulates the post-pseudonymization state: the DICOM files on disk have already + been anonymized by dicognito + Pseudonymizer before _extract_dicom_sidecar runs. + We verify the sidecar contains only the pseudonymized values. + """ + from adit.mass_transfer.processors import _extract_dicom_sidecar + + _write_test_dicom( + tmp_path / "test.dcm", + PatientID="ABCDEF123456", + PatientName="ABCDEF123456", + PatientBirthDate="19920101", + PatientSex="M", + StudyDate="20260101", + StudyInstanceUID="2.25.999999999", + SeriesInstanceUID="2.25.888888888", + Modality="MR", + ) + + result = _extract_dicom_sidecar(tmp_path) + + # Sidecar must contain the pseudonymized values (what's on disk) + assert result["PatientID"] == "ABCDEF123456" + assert result["PatientBirthDate"] == "19920101" + assert result["StudyInstanceUID"] == "2.25.999999999" + assert result["SeriesInstanceUID"] == "2.25.888888888" + assert result["StudyDate"] == "20260101" + + # Real values must NOT appear anywhere + real_patient_id = "4654954" + real_birth_date = "19900615" + real_study_uid = "1.2.276.0.18.14.200.2.0.0.2.20250311.175028.78.91" + for val in result.values(): + assert real_patient_id not in val + assert real_birth_date not in val + assert real_study_uid not in val diff --git a/adit/mass_transfer/views.py b/adit/mass_transfer/views.py index 76c30c152..bf01bb569 100644 --- a/adit/mass_transfer/views.py +++ b/adit/mass_transfer/views.py @@ -1,14 +1,17 @@ import csv from typing import Any, cast +from adit_radis_shared.common.mixins import PageSizeSelectMixin, RelatedFilterMixin from adit_radis_shared.common.views import BaseUpdatePreferencesView from django.conf import settings from django.contrib.auth.mixins import LoginRequiredMixin +from django.db.models import QuerySet from django.http import HttpResponse from django.shortcuts import get_object_or_404 from django.urls import reverse_lazy from django.views import View from django.views.generic import CreateView, DeleteView, ListView, UpdateView +from django_tables2 import SingleTableMixin from adit.core.views import ( DicomJobCancelView, @@ -26,7 +29,7 @@ TransferJobListView, ) -from .filters import MassTransferJobFilter, MassTransferTaskFilter +from .filters import MassTransferJobFilter, MassTransferTaskFilter, MassTransferVolumeFilter from .forms import MassTransferFilterForm, MassTransferJobForm from .mixins import MassTransferLockedMixin from .models import ( @@ -35,7 +38,7 @@ MassTransferTask, MassTransferVolume, ) -from .tables import MassTransferJobTable, MassTransferTaskTable +from .tables import MassTransferJobTable, MassTransferTaskTable, MassTransferVolumeTable MASS_TRANSFER_SOURCE = "mass_transfer_source" MASS_TRANSFER_DESTINATION = "mass_transfer_destination" @@ -158,21 +161,23 @@ class MassTransferJobRestartView(MassTransferLockedMixin, DicomJobRestartView): model = MassTransferJob -class MassTransferTaskDetailView(MassTransferLockedMixin, DicomTaskDetailView): +class MassTransferTaskDetailView( + MassTransferLockedMixin, + SingleTableMixin, + RelatedFilterMixin, + PageSizeSelectMixin, + DicomTaskDetailView, +): model = MassTransferTask job_url_name = "mass_transfer_job_detail" template_name = "mass_transfer/mass_transfer_task_detail.html" + table_class = MassTransferVolumeTable + filterset_class = MassTransferVolumeFilter + table_pagination = {"per_page": 25} - def get_context_data(self, **kwargs) -> dict[str, Any]: - context = super().get_context_data(**kwargs) - task = self.object - context["problem_volumes"] = task.volumes.filter( - status__in=[ - MassTransferVolume.Status.ERROR, - MassTransferVolume.Status.SKIPPED, - ] - ).order_by("status", "study_datetime") - return context + def get_filter_queryset(self) -> QuerySet[MassTransferVolume]: + task = cast(MassTransferTask, self.get_object()) + return task.volumes class MassTransferTaskDeleteView(MassTransferLockedMixin, DicomTaskDeleteView): diff --git a/pyproject.toml b/pyproject.toml index dda104e76..a33245abe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ dependencies = [ "wait-for-it>=2.3.0", "watchfiles>=1.0.4", "whitenoise>=6.9.0", + "pydantic>=2.12.5", ] [dependency-groups] diff --git a/uv.lock b/uv.lock index 2f234cf14..ddba346ed 100644 --- a/uv.lock +++ b/uv.lock @@ -45,6 +45,7 @@ dependencies = [ { name = "procrastinate", extra = ["django"] }, { name = "psycopg", extra = ["binary"] }, { name = "pyarrow" }, + { name = "pydantic" }, { name = "pydicom" }, { name = "pynetdicom" }, { name = "stamina" }, @@ -137,6 +138,7 @@ requires-dist = [ { name = "procrastinate", extras = ["django"], specifier = ">=3.0.2" }, { name = "psycopg", extras = ["binary"], specifier = ">=3.2.5" }, { name = "pyarrow", specifier = ">=19.0.1" }, + { name = "pydantic", specifier = ">=2.12.5" }, { name = "pydicom", specifier = ">=2.4.4" }, { name = "pynetdicom", specifier = ">=2.1.1" }, { name = "stamina", specifier = ">=24.2.0" }, @@ -263,6 +265,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" }, ] +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + [[package]] name = "anyio" version = "4.12.1" @@ -2342,6 +2353,92 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/3d/f9441a0d798bf2b1e645adc3265e55706aead1255ccdad3856dbdcffec14/pycryptodome-3.23.0-cp37-abi3-win_arm64.whl", hash = "sha256:11eeeb6917903876f134b56ba11abe95c0b0fd5e3330def218083c7d98bbcb3c", size = 1703675, upload-time = "2025-05-17T17:21:13.146Z" }, ] +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" }, + { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" }, + { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" }, + { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, +] + [[package]] name = "pydicom" version = "3.0.1" @@ -3145,6 +3242,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + [[package]] name = "tzdata" version = "2025.3" From 70ca1ee9617c569a145bb512375fa273d0f792fc Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 16 Mar 2026 12:45:29 +0000 Subject: [PATCH 038/103] Fix CI failure by adding missing mass_transfer_worker Docker image tag The mass_transfer_worker service was missing its image tag in both the dev compose file and the CI workflow, causing "No such image" errors. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci.yml | 1 + docker-compose.dev.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 47e7daa23..b6fb26146 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,6 +35,7 @@ jobs: adit_dev-web:latest adit_dev-default_worker:latest adit_dev-dicom_worker:latest + adit_dev-mass_transfer_worker:latest adit_dev-receiver:latest cache-from: type=gha cache-to: type=gha,mode=max diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index f5c168276..9e7da4fd7 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -70,6 +70,7 @@ services: mass_transfer_worker: <<: *default-app + image: adit_dev-mass_transfer_worker:latest command: > bash -c " wait-for-it -s postgres.local:5432 -t 60 && From b33959828863d2d67b20aa5629981d1fc0070fac Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 16 Mar 2026 13:46:08 +0100 Subject: [PATCH 039/103] Fix CI: add missing mass_transfer_worker image tag, add type hint and tests for _destination_base_dir CI failed because the mass_transfer_worker service had no pre-built image tag in the GitHub Actions workflow, causing 'No such image' on startup. Also adds MassTransferJob type annotation to _destination_base_dir and 5 test cases covering the job-identifying output folder. --- adit/mass_transfer/processors.py | 3 +- adit/mass_transfer/tests/test_processor.py | 138 +++++++++++++++++++++ 2 files changed, 140 insertions(+), 1 deletion(-) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index be339a5b8..8f8641f4e 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -28,6 +28,7 @@ from .models import ( MassTransferFilter, + MassTransferJob, MassTransferSettings, MassTransferTask, MassTransferVolume, @@ -244,7 +245,7 @@ def _birth_date_range( return (earliest_birth, latest_birth) -def _destination_base_dir(node: DicomNode, job) -> Path: +def _destination_base_dir(node: DicomNode, job: MassTransferJob) -> Path: assert node.node_type == DicomNode.NodeType.FOLDER name = sanitize_filename( f"adit_{job._meta.app_label}_{job.pk}_{job.created.strftime('%Y%m%d')}_{job.owner.username}" diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 1e633cb41..827971ea6 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -25,6 +25,7 @@ MassTransferTaskProcessor, _age_at_study, _birth_date_range, + _destination_base_dir, _dicom_match, _parse_int, _series_folder_name, @@ -1289,3 +1290,140 @@ def test_extract_dicom_sidecar_pseudonymized_has_no_real_data(tmp_path: Path): assert real_patient_id not in val assert real_birth_date not in val assert real_study_uid not in val + + +# --------------------------------------------------------------------------- +# _destination_base_dir tests +# --------------------------------------------------------------------------- + + +@pytest.mark.django_db +def test_destination_base_dir_creates_job_folder(tmp_path: Path): + """Output dir should include adit_{app}_{pk}_{date}_{owner} parent folder.""" + user = UserFactory.create(username="rghosh") + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path)) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2025, 3, 16), + end_date=date(2025, 3, 16), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + ) + + result = _destination_base_dir(destination, job) + + expected_name = f"adit_mass_transfer_{job.pk}_{job.created.strftime('%Y%m%d')}_rghosh" + assert result == tmp_path / expected_name + assert result.is_dir() + + +@pytest.mark.django_db +def test_destination_base_dir_is_idempotent(tmp_path: Path): + """Calling _destination_base_dir twice should not fail or create duplicates.""" + user = UserFactory.create(username="testuser") + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path)) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2025, 1, 1), + end_date=date(2025, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + ) + + result1 = _destination_base_dir(destination, job) + result2 = _destination_base_dir(destination, job) + + assert result1 == result2 + assert result1.is_dir() + + +def test_destination_base_dir_asserts_on_server_node(mocker: MockerFixture): + """Should raise AssertionError when node is not a FOLDER.""" + node = mocker.MagicMock() + node.node_type = DicomNode.NodeType.SERVER + job = mocker.MagicMock() + + with pytest.raises(AssertionError): + _destination_base_dir(node, job) + + +@pytest.mark.django_db +def test_destination_base_dir_sanitizes_username(tmp_path: Path): + """Usernames with special chars should be sanitized in the folder name.""" + user = UserFactory.create(username="user/with:special") + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path)) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2025, 1, 1), + end_date=date(2025, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + ) + + result = _destination_base_dir(destination, job) + + # Should not contain path separators + folder_name = result.name + assert "/" not in folder_name + assert "\\" not in folder_name + assert result.is_dir() + + +@pytest.mark.django_db +def test_process_output_path_includes_job_folder( + mocker: MockerFixture, tmp_path: Path +): + """End-to-end: the output path used during process() should include the job-identifying folder.""" + MassTransferSettings.objects.create() + + user = UserFactory.create(username="researcher") + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + source=source, + destination=destination, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + anonymization_mode=MassTransferJob.AnonymizationMode.NONE, + ) + job.filters_json = [{"modality": "CT"}] + job.save(update_fields=["filters_json"]) + + task = MassTransferTask.objects.create( + job=job, + source=source, + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + + series = [_make_discovered(patient_id="PAT1", series_uid="1.2.3.4.5")] + + processor = MassTransferTaskProcessor(task) + mocker.patch.object(processor, "_discover_series", return_value=series) + mocker.patch("adit.mass_transfer.processors.DicomOperator") + + export_paths: list[Path] = [] + + def fake_export(op, s, path, subject_id, pseudonymizer): + export_paths.append(path) + return (1, "", "") + + mocker.patch.object(processor, "_export_series", side_effect=fake_export) + + result = processor.process() + + assert result["status"] == MassTransferTask.Status.SUCCESS + assert len(export_paths) == 1 + + # The path should contain the job-identifying folder + expected_prefix = f"adit_mass_transfer_{job.pk}_{job.created.strftime('%Y%m%d')}_researcher" + assert expected_prefix in str(export_paths[0]) From 77cdac8887c1accc05b1caefbda5d7cc4f06de8e Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 16 Mar 2026 17:05:31 +0100 Subject: [PATCH 040/103] Fix ruff lint errors: line length, import sorting, unused import --- adit/core/utils/dicom_operator.py | 5 ++++- adit/core/utils/dimse_connector.py | 5 ++++- adit/mass_transfer/forms.py | 11 ++++++++--- adit/mass_transfer/processors.py | 3 ++- adit/mass_transfer/tests/test_processor.py | 9 +++++---- 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/adit/core/utils/dicom_operator.py b/adit/core/utils/dicom_operator.py index 6bb6d0249..d3ab11805 100644 --- a/adit/core/utils/dicom_operator.py +++ b/adit/core/utils/dicom_operator.py @@ -536,7 +536,10 @@ def store_handler(event: Event, store_errors: list[Exception]) -> int: try: self._handle_fetched_image(ds, callback) except Exception as err: - logger.error("Store handler failed for SOP %s: %s", ds.SOPInstanceUID, err, exc_info=True) + logger.error( + "Store handler failed for SOP %s: %s", + ds.SOPInstanceUID, err, exc_info=True, + ) store_errors.append(err) # Unfortunately not all PACS servers support or respect a C-CANCEL request, diff --git a/adit/core/utils/dimse_connector.py b/adit/core/utils/dimse_connector.py index 39e23148a..b880156ca 100644 --- a/adit/core/utils/dimse_connector.py +++ b/adit/core/utils/dimse_connector.py @@ -229,7 +229,10 @@ def _associate(self, service: DimseService): for cx in self.assoc.rejected_contexts: rejected.append(f"{cx.abstract_syntax}") if rejected: - logger.warning("C-GET: %d presentation contexts rejected by SCP: %s", len(rejected), rejected) + logger.warning( + "C-GET: %d presentation contexts rejected by SCP: %s", + len(rejected), rejected, + ) accepted = [cx.abstract_syntax for cx in self.assoc.accepted_contexts] logger.debug("C-GET: %d presentation contexts accepted", len(accepted)) diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index 4b962d1e4..83437eb36 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -9,7 +9,8 @@ from crispy_forms.layout import HTML, Column, Div, Field, Layout, Row, Submit from django import forms from django.core.exceptions import ValidationError -from pydantic import BaseModel, ValidationError as PydanticValidationError, model_validator +from pydantic import BaseModel, model_validator +from pydantic import ValidationError as PydanticValidationError from adit.core.fields import DicomNodeChoiceField from adit.core.models import DicomNode @@ -164,7 +165,10 @@ class MassTransferJobForm(forms.ModelForm): pseudonym_salt = forms.CharField( label="Pseudonym salt", required=False, - help_text="Deterministic seed for pseudonymization. Same salt + same patient ID = same pseudonym.", + help_text=( + "Deterministic seed for pseudonymization." + " Same salt + same patient ID = same pseudonym." + ), widget=forms.TextInput(attrs={ "class": "form-control", }), @@ -198,7 +202,8 @@ class Meta: "anonymization_mode": ( "None: all identifiers are preserved. " "Pseudonymize: identifiers are replaced with random values. " - "Pseudonymize with linking: deterministic pseudonyms (same salt + patient = same pseudonym) " + "Pseudonymize with linking: deterministic pseudonyms " + "(same salt + patient = same pseudonym) " "and a downloadable association CSV." ), "convert_to_nifti": ( diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 8f8641f4e..1536df314 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -598,7 +598,8 @@ def _discover_series( # Exact client-side age filtering using actual StudyDate and # PatientBirthDate (the C-FIND birth date range is approximate). birth_date = study.PatientBirthDate - if birth_date and study.StudyDate and (mf.min_age is not None or mf.max_age is not None): + has_age_filter = mf.min_age is not None or mf.max_age is not None + if birth_date and study.StudyDate and has_age_filter: age = _age_at_study(birth_date, study.StudyDate) if mf.min_age is not None and age < mf.min_age: continue diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 827971ea6..224f4414b 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -8,9 +8,9 @@ from pytest_mock import MockerFixture from adit.core.errors import DicomError, RetriableDicomError -from adit.core.models import DicomNode from adit.core.factories import DicomFolderFactory, DicomServerFactory -from adit.core.utils.dicom_dataset import QueryDataset, ResultDataset +from adit.core.models import DicomNode +from adit.core.utils.dicom_dataset import ResultDataset from adit.core.utils.dicom_operator import DicomOperator from adit.mass_transfer.models import ( MassTransferFilter, @@ -638,7 +638,8 @@ def fake_export(op, s, path, subject_id, pseudonymizer): assert subject_ids[0] != "PAT1" # Pseudonym should be deterministic — running again with same salt gives same result from adit.core.utils.pseudonymizer import Pseudonymizer - expected = Pseudonymizer(seed="test-salt-for-deterministic-pseudonyms").compute_pseudonym("PAT1") + ps = Pseudonymizer(seed="test-salt-for-deterministic-pseudonyms") + expected = ps.compute_pseudonym("PAT1") assert subject_ids[0] == expected @@ -1379,7 +1380,7 @@ def test_destination_base_dir_sanitizes_username(tmp_path: Path): def test_process_output_path_includes_job_folder( mocker: MockerFixture, tmp_path: Path ): - """End-to-end: the output path used during process() should include the job-identifying folder.""" + """End-to-end: process() output path should include job-identifying folder.""" MassTransferSettings.objects.create() user = UserFactory.create(username="researcher") From 7c2dfcfa5f90e7de079ee1f26e928e0bd5572660 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Thu, 19 Mar 2026 11:48:21 +0100 Subject: [PATCH 041/103] Fix pyright type errors in mass transfer module --- adit/mass_transfer/processors.py | 5 ++++- .../templatetags/mass_transfer_extras.py | 2 +- adit/mass_transfer/tests/test_processor.py | 16 +++++++++++----- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 1536df314..0f1550959 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -709,10 +709,13 @@ def _find_studies( birth_range = _birth_date_range( start.date(), end.date(), mf.min_age, mf.max_age, ) + birth_date_kwarg: dict[str, tuple[date, date]] = {} + if birth_range: + birth_date_kwarg["PatientBirthDate"] = birth_range query = QueryDataset.create( StudyDate=(start.date(), end.date()), StudyTime=study_time, - **({"PatientBirthDate": birth_range} if birth_range else {}), + **birth_date_kwarg, # type: ignore[arg-type] ) if mf.modality: diff --git a/adit/mass_transfer/templatetags/mass_transfer_extras.py b/adit/mass_transfer/templatetags/mass_transfer_extras.py index fd4af6d7a..9d846645b 100644 --- a/adit/mass_transfer/templatetags/mass_transfer_extras.py +++ b/adit/mass_transfer/templatetags/mass_transfer_extras.py @@ -16,7 +16,7 @@ def volume_status_css_class(status: str) -> str: MassTransferVolume.Status.SKIPPED: "text-muted", MassTransferVolume.Status.ERROR: "text-danger", } - return css_classes.get(status, "text-secondary") + return css_classes.get(MassTransferVolume.Status(status), "text-secondary") @register.inclusion_tag("core/_job_detail_control_panel.html", takes_context=True) diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 224f4414b..3e2afcc1b 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -962,7 +962,9 @@ def test_process_deletes_error_volumes_on_retry( # Old ERROR volume deleted, new EXPORTED volume created vols = MassTransferVolume.objects.filter(job=job, series_instance_uid="1.2.3.4.5") assert vols.count() == 1 - assert vols.first().status == MassTransferVolume.Status.EXPORTED + vol = vols.first() + assert vol is not None + assert vol.status == MassTransferVolume.Status.EXPORTED @pytest.mark.django_db @@ -1225,10 +1227,14 @@ def _write_test_dicom(path: Path, **kwargs) -> None: setattr(ds, k, v) ds.SOPClassUID = kwargs.get("SOPClassUID", "1.2.840.10008.5.1.4.1.1.4") ds.SOPInstanceUID = kwargs.get("SOPInstanceUID", "1.2.3.4.5") - ds.file_meta = pydicom.Dataset() - ds.file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian - ds.file_meta.MediaStorageSOPClassUID = ds.SOPClassUID - ds.file_meta.MediaStorageSOPInstanceUID = ds.SOPInstanceUID + from pydicom.dataset import FileMetaDataset + from pydicom.uid import ExplicitVRLittleEndian + + file_meta = FileMetaDataset() + file_meta.TransferSyntaxUID = ExplicitVRLittleEndian + file_meta.MediaStorageSOPClassUID = ds.SOPClassUID + file_meta.MediaStorageSOPInstanceUID = ds.SOPInstanceUID + ds.file_meta = file_meta pydicom.dcmwrite(str(path), ds, enforce_file_format=True) From 2b43ab4ffabf13b61e1daadf58690e217fea56ae Mon Sep 17 00:00:00 2001 From: Ritwik Date: Thu, 19 Mar 2026 14:18:28 +0100 Subject: [PATCH 042/103] Delegate compute_pseudonym to dicognito's IDAnonymizer Instead of reimplementing dicognito's ID generation algorithm, use IDAnonymizer directly so future algorithm changes are picked up automatically. --- adit/core/tests/utils/test_pseudonymizer.py | 44 +++++++++++++++++++++ adit/core/utils/pseudonymizer.py | 17 ++++---- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/adit/core/tests/utils/test_pseudonymizer.py b/adit/core/tests/utils/test_pseudonymizer.py index b62bf5061..6d1d17391 100644 --- a/adit/core/tests/utils/test_pseudonymizer.py +++ b/adit/core/tests/utils/test_pseudonymizer.py @@ -97,3 +97,47 @@ def test_pseudonymize_preserves_acquisition_datetime(self, pseudonymizer: Pseudo pseudonymizer.pseudonymize(ds, pseudonym) assert ds.AcquisitionDateTime == "20230101120000" + + +class TestComputePseudonym: + def test_requires_seed(self): + ps = Pseudonymizer() + with pytest.raises(ValueError, match="requires a seeded Pseudonymizer"): + ps.compute_pseudonym("PAT1") + + def test_deterministic_same_seed(self): + """Same seed + same patient ID always produces the same pseudonym.""" + ps1 = Pseudonymizer(seed="fixed-seed") + ps2 = Pseudonymizer(seed="fixed-seed") + assert ps1.compute_pseudonym("PAT1") == ps2.compute_pseudonym("PAT1") + + def test_different_seeds_produce_different_pseudonyms(self): + ps1 = Pseudonymizer(seed="seed-a") + ps2 = Pseudonymizer(seed="seed-b") + assert ps1.compute_pseudonym("PAT1") != ps2.compute_pseudonym("PAT1") + + def test_different_patients_produce_different_pseudonyms(self): + ps = Pseudonymizer(seed="fixed-seed") + assert ps.compute_pseudonym("PAT1") != ps.compute_pseudonym("PAT2") + + def test_matches_dicognito_anonymize(self): + """compute_pseudonym must match what dicognito produces for PatientID.""" + seed = "test-consistency-seed" + ps = Pseudonymizer(seed=seed) + pseudonym = ps.compute_pseudonym("PATIENT_42") + + # Run the full anonymizer on a real dataset and check the PatientID + # before our pseudonymize() overwrites it. + from dicognito.anonymizer import Anonymizer + + anon = Anonymizer(seed=seed) + ds = create_base_dataset() + ds.PatientID = "PATIENT_42" + anon.anonymize(ds) + assert ds.PatientID == pseudonym + + def test_pseudonym_is_alphanumeric(self): + ps = Pseudonymizer(seed="alpha-seed") + result = ps.compute_pseudonym("SOME_PATIENT") + assert result.isalnum() + assert result == result.upper() diff --git a/adit/core/utils/pseudonymizer.py b/adit/core/utils/pseudonymizer.py index 669bacc6d..8b64c961f 100644 --- a/adit/core/utils/pseudonymizer.py +++ b/adit/core/utils/pseudonymizer.py @@ -1,4 +1,5 @@ from dicognito.anonymizer import Anonymizer +from dicognito.idanonymizer import IDAnonymizer from dicognito.randomizer import Randomizer from dicognito.value_keeper import ValueKeeper from django.conf import settings @@ -10,9 +11,6 @@ class Pseudonymizer: A utility class for pseudonymizing (or anonymizing) DICOM data. """ - _ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" - _ID_LENGTH = 12 - def __init__( self, anonymizer: Anonymizer | None = None, @@ -43,16 +41,19 @@ def _setup_anonymizer(self, seed: str | None = None) -> Anonymizer: def compute_pseudonym(self, patient_id: str) -> str: """Pre-compute the pseudonym for a patient ID without a full DICOM dataset. - Uses the same algorithm as dicognito's IDAnonymizer so the result - matches what anonymize() would produce for PatientID. + Delegates to dicognito's IDAnonymizer so the result always matches + what anonymize() would produce for PatientID, even if dicognito + changes its internal algorithm. Requires that this Pseudonymizer was created with a seed. """ if self._seed is None: raise ValueError("compute_pseudonym requires a seeded Pseudonymizer") randomizer = Randomizer(self._seed) - ranges = [len(self._ALPHABET)] * self._ID_LENGTH - indices = randomizer.get_ints_from_ranges(patient_id, *ranges) - return "".join(self._ALPHABET[i] for i in indices) + id_anon = IDAnonymizer(randomizer, "", "", "PatientID") + ds = Dataset() + ds.PatientID = patient_id + id_anon(ds, ds["PatientID"]) + return str(ds.PatientID) def pseudonymize(self, ds: Dataset, pseudonym: str) -> None: """ From 5e63ec3fd8da594770e490e0a593862eb143a5e2 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Thu, 19 Mar 2026 15:21:24 +0100 Subject: [PATCH 043/103] Vendor CodeMirror and jsonlint instead of loading from CDN Serves JS/CSS from Django static files so the app works in air-gapped hospital environments without internet access. --- .../addon/edit/closebrackets.min.js | 1 + .../addon/edit/matchbrackets.min.js | 1 + .../codemirror/addon/lint/json-lint.min.js | 1 + .../vendor/codemirror/addon/lint/lint.min.css | 1 + .../vendor/codemirror/addon/lint/lint.min.js | 1 + .../vendor/codemirror/codemirror.min.css | 1 + .../vendor/codemirror/codemirror.min.js | 1 + .../mode/javascript/javascript.min.js | 1 + .../vendor/jsonlint/jsonlint.min.js | 1 + .../mass_transfer/mass_transfer_job_form.html | 25 ++++++++++--------- 10 files changed, 22 insertions(+), 12 deletions(-) create mode 100644 adit/mass_transfer/static/mass_transfer/vendor/codemirror/addon/edit/closebrackets.min.js create mode 100644 adit/mass_transfer/static/mass_transfer/vendor/codemirror/addon/edit/matchbrackets.min.js create mode 100644 adit/mass_transfer/static/mass_transfer/vendor/codemirror/addon/lint/json-lint.min.js create mode 100644 adit/mass_transfer/static/mass_transfer/vendor/codemirror/addon/lint/lint.min.css create mode 100644 adit/mass_transfer/static/mass_transfer/vendor/codemirror/addon/lint/lint.min.js create mode 100644 adit/mass_transfer/static/mass_transfer/vendor/codemirror/codemirror.min.css create mode 100644 adit/mass_transfer/static/mass_transfer/vendor/codemirror/codemirror.min.js create mode 100644 adit/mass_transfer/static/mass_transfer/vendor/codemirror/mode/javascript/javascript.min.js create mode 100644 adit/mass_transfer/static/mass_transfer/vendor/jsonlint/jsonlint.min.js diff --git a/adit/mass_transfer/static/mass_transfer/vendor/codemirror/addon/edit/closebrackets.min.js b/adit/mass_transfer/static/mass_transfer/vendor/codemirror/addon/edit/closebrackets.min.js new file mode 100644 index 000000000..f5bb3ecd2 --- /dev/null +++ b/adit/mass_transfer/static/mass_transfer/vendor/codemirror/addon/edit/closebrackets.min.js @@ -0,0 +1 @@ +!function(e){"object"==typeof exports&&"object"==typeof module?e(require("../../lib/codemirror")):"function"==typeof define&&define.amd?define(["../../lib/codemirror"],e):e(CodeMirror)}(function(S){var n={pairs:"()[]{}''\"\"",closeBefore:")]}'\":;>",triples:"",explode:"[]{}"},k=S.Pos;function y(e,t){return"pairs"==t&&"string"==typeof e?e:("object"==typeof e&&null!=e[t]?e:n)[t]}S.defineOption("autoCloseBrackets",!1,function(e,t,n){n&&n!=S.Init&&(e.removeKeyMap(i),e.state.closeBrackets=null),t&&(r(y(t,"pairs")),e.state.closeBrackets=t,e.addKeyMap(i))});var i={Backspace:function(e){var t=O(e);if(!t||e.getOption("disableInput"))return S.Pass;for(var n=y(t,"pairs"),r=e.listSelections(),i=0;i",")":"(<","[":"]>","]":"[<","{":"}>","}":"{<","<":">>",">":"<<"};function y(t){return t&&t.bracketRegex||/[(){}[\]]/}function f(t,e,n){var r=t.getLineHandle(e.line),i=e.ch-1,c=n&&n.afterCursor,a=(null==c&&(c=/(^| )cm-fat-cursor($| )/.test(t.getWrapperElement().className)),y(n)),c=!c&&0<=i&&a.test(r.text.charAt(i))&&p[r.text.charAt(i)]||a.test(r.text.charAt(i+1))&&p[r.text.charAt(++i)];if(!c)return null;a=">"==c.charAt(1)?1:-1;if(n&&n.strict&&0c))for(s==e.line&&(f=e.ch-(n<0?1:0));f!=m;f+=n){var g=u.charAt(f);if(h.test(g)&&(void 0===r||(t.getTokenTypeAt(k(s,f+1))||"")==(r||""))){var d=p[g];if(d&&">"==d.charAt(1)==0span::selection,.cm-fat-cursor .CodeMirror-line>span>span::selection{background:0 0}.cm-fat-cursor .CodeMirror-line::-moz-selection,.cm-fat-cursor .CodeMirror-line>span::-moz-selection,.cm-fat-cursor .CodeMirror-line>span>span::-moz-selection{background:0 0}.cm-fat-cursor{caret-color:transparent}@-moz-keyframes blink{50%{background-color:transparent}}@-webkit-keyframes blink{50%{background-color:transparent}}@keyframes blink{50%{background-color:transparent}}.cm-tab{display:inline-block;text-decoration:inherit}.CodeMirror-rulers{position:absolute;left:0;right:0;top:-50px;bottom:0;overflow:hidden}.CodeMirror-ruler{border-left:1px solid #ccc;top:0;bottom:0;position:absolute}.cm-s-default .cm-header{color:#00f}.cm-s-default .cm-quote{color:#090}.cm-negative{color:#d44}.cm-positive{color:#292}.cm-header,.cm-strong{font-weight:700}.cm-em{font-style:italic}.cm-link{text-decoration:underline}.cm-strikethrough{text-decoration:line-through}.cm-s-default .cm-keyword{color:#708}.cm-s-default .cm-atom{color:#219}.cm-s-default .cm-number{color:#164}.cm-s-default .cm-def{color:#00f}.cm-s-default .cm-variable-2{color:#05a}.cm-s-default .cm-type,.cm-s-default .cm-variable-3{color:#085}.cm-s-default .cm-comment{color:#a50}.cm-s-default .cm-string{color:#a11}.cm-s-default .cm-string-2{color:#f50}.cm-s-default .cm-meta{color:#555}.cm-s-default .cm-qualifier{color:#555}.cm-s-default .cm-builtin{color:#30a}.cm-s-default .cm-bracket{color:#997}.cm-s-default .cm-tag{color:#170}.cm-s-default .cm-attribute{color:#00c}.cm-s-default .cm-hr{color:#999}.cm-s-default .cm-link{color:#00c}.cm-s-default .cm-error{color:red}.cm-invalidchar{color:red}.CodeMirror-composing{border-bottom:2px solid}div.CodeMirror span.CodeMirror-matchingbracket{color:#0b0}div.CodeMirror span.CodeMirror-nonmatchingbracket{color:#a22}.CodeMirror-matchingtag{background:rgba(255,150,0,.3)}.CodeMirror-activeline-background{background:#e8f2ff}.CodeMirror{position:relative;overflow:hidden;background:#fff}.CodeMirror-scroll{overflow:scroll!important;margin-bottom:-50px;margin-right:-50px;padding-bottom:50px;height:100%;outline:0;position:relative;z-index:0}.CodeMirror-sizer{position:relative;border-right:50px solid transparent}.CodeMirror-gutter-filler,.CodeMirror-hscrollbar,.CodeMirror-scrollbar-filler,.CodeMirror-vscrollbar{position:absolute;z-index:6;display:none;outline:0}.CodeMirror-vscrollbar{right:0;top:0;overflow-x:hidden;overflow-y:scroll}.CodeMirror-hscrollbar{bottom:0;left:0;overflow-y:hidden;overflow-x:scroll}.CodeMirror-scrollbar-filler{right:0;bottom:0}.CodeMirror-gutter-filler{left:0;bottom:0}.CodeMirror-gutters{position:absolute;left:0;top:0;min-height:100%;z-index:3}.CodeMirror-gutter{white-space:normal;height:100%;display:inline-block;vertical-align:top;margin-bottom:-50px}.CodeMirror-gutter-wrapper{position:absolute;z-index:4;background:0 0!important;border:none!important}.CodeMirror-gutter-background{position:absolute;top:0;bottom:0;z-index:4}.CodeMirror-gutter-elt{position:absolute;cursor:default;z-index:4}.CodeMirror-gutter-wrapper ::selection{background-color:transparent}.CodeMirror-gutter-wrapper ::-moz-selection{background-color:transparent}.CodeMirror-lines{cursor:text;min-height:1px}.CodeMirror pre.CodeMirror-line,.CodeMirror pre.CodeMirror-line-like{-moz-border-radius:0;-webkit-border-radius:0;border-radius:0;border-width:0;background:0 0;font-family:inherit;font-size:inherit;margin:0;white-space:pre;word-wrap:normal;line-height:inherit;color:inherit;z-index:2;position:relative;overflow:visible;-webkit-tap-highlight-color:transparent;-webkit-font-variant-ligatures:contextual;font-variant-ligatures:contextual}.CodeMirror-wrap pre.CodeMirror-line,.CodeMirror-wrap pre.CodeMirror-line-like{word-wrap:break-word;white-space:pre-wrap;word-break:normal}.CodeMirror-linebackground{position:absolute;left:0;right:0;top:0;bottom:0;z-index:0}.CodeMirror-linewidget{position:relative;z-index:2;padding:.1px}.CodeMirror-rtl pre{direction:rtl}.CodeMirror-code{outline:0}.CodeMirror-gutter,.CodeMirror-gutters,.CodeMirror-linenumber,.CodeMirror-scroll,.CodeMirror-sizer{-moz-box-sizing:content-box;box-sizing:content-box}.CodeMirror-measure{position:absolute;width:100%;height:0;overflow:hidden;visibility:hidden}.CodeMirror-cursor{position:absolute;pointer-events:none}.CodeMirror-measure pre{position:static}div.CodeMirror-cursors{visibility:hidden;position:relative;z-index:3}div.CodeMirror-dragcursors{visibility:visible}.CodeMirror-focused div.CodeMirror-cursors{visibility:visible}.CodeMirror-selected{background:#d9d9d9}.CodeMirror-focused .CodeMirror-selected{background:#d7d4f0}.CodeMirror-crosshair{cursor:crosshair}.CodeMirror-line::selection,.CodeMirror-line>span::selection,.CodeMirror-line>span>span::selection{background:#d7d4f0}.CodeMirror-line::-moz-selection,.CodeMirror-line>span::-moz-selection,.CodeMirror-line>span>span::-moz-selection{background:#d7d4f0}.cm-searching{background-color:#ffa;background-color:rgba(255,255,0,.4)}.cm-force-border{padding-right:.1px}@media print{.CodeMirror div.CodeMirror-cursors{visibility:hidden}}.cm-tab-wrap-hack:after{content:''}span.CodeMirror-selectedtext{background:0 0} \ No newline at end of file diff --git a/adit/mass_transfer/static/mass_transfer/vendor/codemirror/codemirror.min.js b/adit/mass_transfer/static/mass_transfer/vendor/codemirror/codemirror.min.js new file mode 100644 index 000000000..b0c56d969 --- /dev/null +++ b/adit/mass_transfer/static/mass_transfer/vendor/codemirror/codemirror.min.js @@ -0,0 +1 @@ +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e=e||self).CodeMirror=t()}(this,function(){"use strict";var e=navigator.userAgent,l=navigator.platform,d=/gecko\/\d/i.test(e),s=/MSIE \d/.test(e),a=/Trident\/(?:[7-9]|\d{2,})\..*rv:(\d+)/.exec(e),u=/Edge\/(\d+)/.exec(e),w=s||a||u,v=w&&(s?document.documentMode||6:+(u||a)[1]),x=!u&&/WebKit\//.test(e),s=x&&/Qt\/\d+\.\d+/.test(e),m=!u&&/Chrome\/(\d+)/.exec(e),V=m&&+m[1],K=/Opera\//.test(e),j=/Apple Computer/.test(navigator.vendor),c=/Mac OS X 1\d\D([8-9]|\d\d)\D/.test(e),X=/PhantomJS/.test(e),Y=j&&(/Mobile\/\w+/.test(e)||2t)return i;o.to==t&&(o.from!=o.to&&"before"==n?r=i:Fe=i),o.from==t&&(o.from!=o.to&&"before"!=n?r=i:Fe=i)}return null!=r?r:Fe}Ee=/[\u0590-\u05f4\u0600-\u06ff\u0700-\u08ac]/,Re=/[stwN]/,ze=/[LRr]/,Ie=/[Lb1n]/,Be=/[1n]/;var Ee,Re,ze,Ie,Be,Ge=function(e,t){var n="ltr"==t?"L":"R";if(0==e.length||"ltr"==t&&!Ee.test(e))return!1;for(var r,i=e.length,o=[],l=0;l=e.size)throw new Error("There is no line "+(t+e.first)+" in the document.");for(var n=e;!n.lines;)for(var r=0;;++r){var i=n.children[r],o=i.chunkSize();if(t=e.first&&tn?F(n,W(e,n).text.length):(e=W(e,(n=t).line).text.length,null==(t=n.ch)||e=this.string.length},g.prototype.sol=function(){return this.pos==this.lineStart},g.prototype.peek=function(){return this.string.charAt(this.pos)||void 0},g.prototype.next=function(){if(this.post},g.prototype.eatSpace=function(){for(var e=this.pos;/[\s\u00a0]/.test(this.string.charAt(this.pos));)++this.pos;return this.pos>e},g.prototype.skipToEnd=function(){this.pos=this.string.length},g.prototype.skipTo=function(e){e=this.string.indexOf(e,this.pos);if(-1e.options.maxHighlightLength&&ft(e.doc.mode,r.state),o=At(e,t,r),i&&(r.state=i),t.stateAfter=r.save(!i),t.styles=o.styles,o.classes?t.styleClasses=o.classes:t.styleClasses&&(t.styleClasses=null),n===e.doc.highlightFrontier&&(e.doc.modeFrontier=Math.max(e.doc.modeFrontier,++e.doc.highlightFrontier))),t.styles}function Wt(n,r,e){var t=n.doc,i=n.display;if(!t.mode.startState)return new Ot(t,!0,r);var o=function(e,t,n){for(var r,i,o=e.doc,l=n?-1:t-(e.doc.mode.innerMode?1e3:100),s=t;lt.first&&W(t,o-1).stateAfter,s=l?Ot.fromSaved(t,l,o):new Ot(t,gt(t.mode),o);return t.iter(o,r,function(e){Ht(n,e.text,s);var t=s.line;e.stateAfter=t==r-1||t%5==0||t>=i.viewFrom&&tt.start)return o}throw new Error("Mode "+e.name+" failed to advance stream.")}Ot.prototype.lookAhead=function(e){var t=this.doc.getLine(this.line+e);return null!=t&&e>this.maxLookAhead&&(this.maxLookAhead=e),t},Ot.prototype.baseToken=function(e){if(!this.baseTokens)return null;for(;this.baseTokens[this.baseTokenPos]<=e;)this.baseTokenPos+=2;var t=this.baseTokens[this.baseTokenPos+1];return{type:t&&t.replace(/( |^)overlay .*/,""),size:this.baseTokens[this.baseTokenPos]-e}},Ot.prototype.nextLine=function(){this.line++,0e.options.maxHighlightLength?(s=!1,l&&Ht(e,t,r,c.pos),c.pos=t.length,null):zt(Pt(n,c,r.state,h),o);if(!h||(d=h[0].name)&&(f="m-"+(f?d+" "+f:d)),!s||u!=f){for(;a=t:l.to>t),(r=r||[]).push(new Ut(s,l.from,o?null:l.to)))}return r}(n,r,o),s=function(e,t,n){var r;if(e)for(var i=0;i=t:l.to>t))&&(l.from!=t||"bookmark"!=s.type||n&&!l.marker.insertLeft)||(o=null==l.from||(s.inclusiveLeft?l.from<=t:l.frome.lastLine())return t;var n,r=W(e,t);if(!on(e,r))return t;for(;n=Jt(r);)r=n.find(1,!0).line;return H(r)+1}function on(e,t){var n=Gt&&t.markedSpans;if(n)for(var r,i=0;in.maxLineLength&&(n.maxLineLength=t,n.maxLine=e)})}var un=function(e,t,n){this.text=e,Yt(this,t),this.height=n?n(this):1};un.prototype.lineNo=function(){return H(this)},$e(un);var cn={},hn={};function dn(e,t){if(!e||/^\s*$/.test(e))return null;t=t.addModeClass?hn:cn;return t[e]||(t[e]=e.replace(/\S+/g,"cm-$&"))}function fn(e,t){var n=ne("span",null,null,x?"padding-right: .1px":null),r={pre:ne("pre",[n],"CodeMirror-line"),content:n,col:0,pos:0,cm:e,trailingSpace:!1,splitSpaces:e.getOption("lineWrapping")};t.measure={};for(var i=0;i<=(t.rest?t.rest.length:0);i++){var o=i?t.rest[i-1]:t.line,l=void 0,l=(r.pos=0,r.addToken=gn,function(e){if(null!=tt)return tt;var t=y(e,document.createTextNode("AخA")),n=le(t,0,1).getBoundingClientRect(),t=le(t,1,2).getBoundingClientRect();return te(e),n&&n.left!=n.right&&(tt=t.right-n.right<3)}(e.display.measure)&&(l=Ve(o,e.doc.direction))&&(r.addToken=function(h,d){return function(e,t,n,r,i,o,l){n=n?n+" cm-force-border":"cm-force-border";for(var s=e.pos,a=s+t.length;;){for(var u=void 0,c=0;cs&&u.from<=s);c++);if(u.to>=a)return h(e,t,n,r,i,o,l);h(e,t.slice(0,u.to-s),n,r,null,o,l),r=null,t=t.slice(u.to-s),s=u.to}}}(r.addToken,l)),r.map=[],t!=e.display.externalMeasured&&H(o));!function(e,t,n){var r=e.markedSpans,i=e.text,o=0;if(r)for(var l,s,a,u,c,h,d,f=i.length,p=0,g=1,m="",v=0;;){if(v==p){a=u=c=s="",h=d=null,v=1/0;for(var y=[],b=void 0,w=0;wp||C.collapsed&&x.to==p&&x.from==p)){if(null!=x.to&&x.to!=p&&v>x.to&&(v=x.to,u=""),C.className&&(a+=" "+C.className),C.css&&(s=(s?s+";":"")+C.css),C.startStyle&&x.from==p&&(c+=" "+C.startStyle),C.endStyle&&x.to==v&&(b=b||[]).push(C.endStyle,x.to),C.title&&((d=d||{}).title=C.title),C.attributes)for(var S in C.attributes)(d=d||{})[S]=C.attributes[S];C.collapsed&&(!h||qt(h.marker,C)<0)&&(h=x)}else x.from>p&&v>x.from&&(v=x.from)}if(b)for(var L=0;Ln)return{map:e.measure.maps[i],cache:e.measure.caches[i],before:!0}}}function zn(e,t,n,r){return Gn(e,Bn(e,t),n,r)}function In(e,t){if(t>=e.display.viewFrom&&t=e.lineN&&tt)&&(i=(o=a-s)-1,a<=t&&(l="right")),null!=i){if(r=e[u+2],s==a&&n==(r.insertLeft?"left":"right")&&(l=n),"left"==n&&0==i)for(;u&&e[u-2]==e[u-3]&&e[u-1].insertLeft;)r=e[2+(u-=3)],l="left";if("right"==n&&i==a-s)for(;u=i.text.length?(t=i.text.length,e="before"):t<=0&&(t=0,e="after"),!a)return s("before"==e?t-1:t,"before"==e);function u(e,t,n){return s(n?e-1:e,1==a[t].level!=n)}var c=Pe(a,t,e),h=Fe,c=u(t,c,"before"==e);return null!=h&&(c.other=u(t,h,"before"!=e)),c}function tr(e,t){var n=0,t=(t=E(e.doc,t),e.options.lineWrapping||(n=cr(e.display)*t.ch),W(e.doc,t.line)),e=ln(t)+Dn(e.display);return{left:n,right:n,top:e,bottom:e+t.height}}function nr(e,t,n,r,i){e=F(e,t,n);return e.xRel=i,r&&(e.outside=r),e}function rr(e,t,n){var r=e.doc;if((n+=e.display.viewOffset)<0)return nr(r.first,0,null,-1,-1);var i=bt(r,n),o=r.first+r.size-1;if(o=a.bottom?1:0)}return c=We(e.text,c,1),nr(t,c,g,f,r-p)}(e,l,i,t,n),a=function(e,t){var n,r=Gt&&e.markedSpans;if(r)for(var i=0;it)&&(!n||qt(n,o.marker)<0)&&(n=o.marker)}return n}(l,s.ch+(0r},i,e)}}function or(e,t,n,r){return ir(e,t,n=n||Bn(e,t),Zn(e,t,Gn(e,n,r),"line").top)}function lr(e,t,n,r){return!(e.bottom<=n)&&(e.top>n||(r?e.left:e.right)>t)}function sr(n,r,i,o,l,s,a){var e,t=He(function(e){var e=l[e],t=1!=e.level;return lr(er(n,F(i,t?e.to:e.from,t?"before":"after"),"line",r,o),s,a,!0)},0,l.length-1),u=l[t];return 0a&&(u=l[t-1])),u}function ar(e,t,n,r,i,o,l){for(var l=ir(e,t,r,l),s=l.begin,a=l.end,u=(/\s/.test(t.text.charAt(a-1))&&a--,null),c=null,h=0;h=a||f.to<=s||(d=(d=Gn(e,r,1!=f.level?Math.min(a,f.to)-1:Math.max(s,f.from)).right)a?{from:u.from,to:a,level:u.level}:u}function ur(e){if(null!=e.cachedTextHeight)return e.cachedTextHeight;if(null==Un){Un=M("pre",null,"CodeMirror-line-like");for(var t=0;t<49;++t)Un.appendChild(document.createTextNode("x")),Un.appendChild(M("br"));Un.appendChild(document.createTextNode("x"))}y(e.measure,Un);var n=Un.offsetHeight/50;return 3=e.display.viewTo)return null;if((t-=e.display.viewFrom)<0)return null;for(var n=e.display.view,r=0;rt)&&(o.updateLineNumbers=t),e.curOp.viewChanged=!0,t>=o.viewTo?Gt&&nn(e.doc,t)o.viewFrom?yr(e):(o.viewFrom+=r,o.viewTo+=r):t<=o.viewFrom&&n>=o.viewTo?yr(e):t<=o.viewFrom?(l=br(e,n,n+r,1))?(o.view=o.view.slice(l.index),o.viewFrom=l.lineN,o.viewTo+=r):yr(e):n>=o.viewTo?(l=br(e,t,t,-1))?(o.view=o.view.slice(0,l.index),o.viewTo=l.lineN):yr(e):(l=br(e,t,t,-1),i=br(e,n,n+r,1),l&&i?(o.view=o.view.slice(0,l.index).concat(yn(e,l.lineN,i.lineN)).concat(o.view.slice(i.index)),o.viewTo+=r):yr(e)),o.externalMeasured);l&&(n=i.lineN&&t=r.viewTo||null!=(i=r.view[mr(e,t)]).node&&-1==L(r=i.changes||(i.changes=[]),n)&&r.push(n)}function yr(e){e.display.viewFrom=e.display.viewTo=e.doc.first,e.display.view=[],e.display.viewOffset=0}function br(e,t,n,r){var i,o=mr(e,t),l=e.display.view;if(!Gt||n==e.doc.first+e.doc.size)return{index:o,lineN:n};for(var s=e.display.viewFrom,a=0;a=e.display.viewTo||s.to().linet||t==n&&l.to==t)&&(r(Math.max(l.from,t),Math.min(l.to,n),1==l.level?"rtl":"ltr",o),i=!0)}i||r(t,n,"ltr")}(C,g||0,null==m?b:m,function(e,t,n,r){var i,o,l,s,a,u="ltr"==n,c=w(e,u?"left":"right"),h=w(t-1,u?"right":"left"),d=null==g&&0==e,f=null==m&&t==b,p=0==r,r=!C||r==C.length-1;h.top-c.top<=3?(i=(k?d:f)&&p?S:(u?c:h).left,a=(k?f:d)&&r?L:(u?h:c).right,T(i,c.top,a-i,c.bottom)):(a=u?(o=k&&d&&p?S:c.left,l=k?L:x(e,n,"before"),s=k?S:x(t,n,"after"),k&&f&&r?L:h.right):(o=k?x(e,n,"before"):S,l=!k&&d&&p?L:c.right,s=!k&&f&&r?S:h.left,k?x(t,n,"after"):L),T(o,c.top,l-o,c.bottom),c.bottome.display.sizerWidth&&((a=Math.ceil(c/cr(e.display)))>e.display.maxLineLength&&(e.display.maxLineLength=a,e.display.maxLine=s.line,e.display.maxLineChanged=!0))}}2=o&&(i=bt(t,ln(W(t,n))-e.wrapper.clientHeight),o=n)),{from:i,to:Math.max(o,i+1)}}function Hr(e,t){var n=e.display,r=ur(e.display),i=(t.top<0&&(t.top=0),(e.curOp&&null!=e.curOp.scrollTop?e.curOp:n.scroller).scrollTop),o=En(e),l={},s=(t.bottom-t.top>o&&(t.bottom=t.top+o),e.doc.height+Wn(n)),a=t.tops-r,r=(t.topi+o&&((a=Math.min(t.top,(r?s:t.bottom)-o))!=i&&(l.scrollTop=a)),e.options.fixedGutter?0:n.gutters.offsetWidth),s=e.curOp&&null!=e.curOp.scrollLeft?e.curOp.scrollLeft:n.scroller.scrollLeft-r,o=Pn(e)-n.gutters.offsetWidth,i=t.right-t.left>o;return i&&(t.right=t.left+o),t.left<10?l.scrollLeft=0:t.lefto+s-3&&(l.scrollLeft=t.right+(i?0:10)-o),l}function Fr(e,t){null!=t&&(Rr(e),e.curOp.scrollTop=(null==e.curOp.scrollTop?e.doc:e.curOp).scrollTop+t)}function Pr(e){Rr(e);var t=e.getCursor();e.curOp.scrollToPos={from:t,to:t,margin:e.options.cursorScrollMargin}}function Er(e,t,n){null==t&&null==n||Rr(e),null!=t&&(e.curOp.scrollLeft=t),null!=n&&(e.curOp.scrollTop=n)}function Rr(e){var t=e.curOp.scrollToPos;t&&(e.curOp.scrollToPos=null,zr(e,tr(e,t.from),tr(e,t.to),t.margin))}function zr(e,t,n,r){t=Hr(e,{left:Math.min(t.left,n.left),top:Math.min(t.top,n.top)-r,right:Math.max(t.right,n.right),bottom:Math.max(t.bottom,n.bottom)+r});Er(e,t.scrollLeft,t.scrollTop)}function Ir(e,t){Math.abs(e.doc.scrollTop-t)<2||(d||ri(e,{top:t}),Br(e,t,!0),d&&ri(e),Qr(e,100))}function Br(e,t,n){t=Math.max(0,Math.min(e.display.scroller.scrollHeight-e.display.scroller.clientHeight,t)),e.display.scroller.scrollTop==t&&!n||(e.doc.scrollTop=t,e.display.scrollbars.setScrollTop(t),e.display.scroller.scrollTop!=t&&(e.display.scroller.scrollTop=t))}function Gr(e,t,n,r){t=Math.max(0,Math.min(t,e.display.scroller.scrollWidth-e.display.scroller.clientWidth)),(n?t==e.doc.scrollLeft:Math.abs(e.doc.scrollLeft-t)<2)&&!r||(e.doc.scrollLeft=t,li(e),e.display.scroller.scrollLeft!=t&&(e.display.scroller.scrollLeft=t),e.display.scrollbars.setScrollLeft(t))}function Ur(e){var t=e.display,n=t.gutters.offsetWidth,r=Math.round(e.doc.height+Wn(e.display));return{clientHeight:t.scroller.clientHeight,viewHeight:t.wrapper.clientHeight,scrollWidth:t.scroller.scrollWidth,clientWidth:t.scroller.clientWidth,viewWidth:t.wrapper.clientWidth,barLeft:e.options.fixedGutter?n:0,docHeight:r,scrollHeight:r+Fn(e)+t.barHeight,nativeBarWidth:t.nativeBarWidth,gutterWidth:n}}function Vr(e,t,n){this.cm=n;var r=this.vert=M("div",[M("div",null,null,"min-width: 1px")],"CodeMirror-vscrollbar"),i=this.horiz=M("div",[M("div",null,null,"height: 100%; min-height: 1px")],"CodeMirror-hscrollbar");r.tabIndex=i.tabIndex=-1,e(r),e(i),k(r,"scroll",function(){r.clientHeight&&t(r.scrollTop,"vertical")}),k(i,"scroll",function(){i.clientWidth&&t(i.scrollLeft,"horizontal")}),this.checkedZeroWidth=!1,w&&v<8&&(this.horiz.style.minHeight=this.vert.style.minWidth="18px")}function Kr(){}Vr.prototype.update=function(e){var t,n=e.scrollWidth>e.clientWidth+1,r=e.scrollHeight>e.clientHeight+1,i=e.nativeBarWidth;return r?(this.vert.style.display="block",this.vert.style.bottom=n?i+"px":"0",t=e.viewHeight-(n?i:0),this.vert.firstChild.style.height=Math.max(0,e.scrollHeight-e.clientHeight+t)+"px"):(this.vert.scrollTop=0,this.vert.style.display="",this.vert.firstChild.style.height="0"),n?(this.horiz.style.display="block",this.horiz.style.right=r?i+"px":"0",this.horiz.style.left=e.barLeft+"px",t=e.viewWidth-e.barLeft-(r?i:0),this.horiz.firstChild.style.width=Math.max(0,e.scrollWidth-e.clientWidth+t)+"px"):(this.horiz.style.display="",this.horiz.firstChild.style.width="0"),!this.checkedZeroWidth&&0=l.viewTo)||l.maxLineChanged&&o.options.lineWrapping,i.update=i.mustUpdate&&new ei(o,i.mustUpdate&&{top:i.scrollTop,ensure:i.scrollToPos},i.forceUpdate)}for(var s=0;s(i.defaultView.innerHeight||i.documentElement.clientHeight)&&(r=!1),null==r||X||(o=M("div","​",null,"position: absolute;\n top: "+(t.top-n.viewOffset-Dn(e.display))+"px;\n height: "+(t.bottom-t.top+Fn(e)+n.barHeight)+"px;\n left: "+t.left+"px; width: "+Math.max(2,t.right-t.left)+"px;"),e.display.lineSpace.appendChild(o),o.scrollIntoView(r),e.display.lineSpace.removeChild(o)))}(w,v));var S=b.maybeHiddenMarkers,L=b.maybeUnhiddenMarkers;if(S)for(var k=0;k=l.display.viewTo||(s=+new Date+l.options.workTime,a=Wt(l,c.highlightFrontier),u=[],c.iter(a.line,Math.min(c.first+c.size,l.display.viewTo+500),function(e){if(a.line>=l.display.viewFrom){for(var t=e.styles,n=e.text.length>l.options.maxHighlightLength?ft(c.mode,a.state):null,r=At(l,e,a,!0),n=(n&&(a.state=n),e.styles=r.styles,e.styleClasses),r=r.classes,i=(r?e.styleClasses=r:n&&(e.styleClasses=null),!t||t.length!=e.styles.length||n!=r&&(!n||!r||n.bgClass!=r.bgClass||n.textClass!=r.textClass)),o=0;!i&&os)return Qr(l,l.options.workDelay),!0}),c.highlightFrontier=a.line,c.modeFrontier=Math.max(c.modeFrontier,a.line),u.length&&h(l,function(){for(var e=0;e=n.viewFrom&&t.visible.to<=n.viewTo&&(null==n.updateLineNumbers||n.updateLineNumbers>=n.viewTo)&&n.renderedView==n.view&&0==wr(e))return!1;si(e)&&(yr(e),t.dims=hr(e));var i=r.first+r.size,o=Math.max(t.visible.from-e.options.viewportMargin,r.first),l=Math.min(i,t.visible.to+e.options.viewportMargin),r=(n.viewFroml&&n.viewTo-l<20&&(l=Math.min(i,n.viewTo)),Gt&&(o=nn(e.doc,o),l=rn(e.doc,l)),o!=n.viewFrom||l!=n.viewTo||n.lastWrapHeight!=t.wrapperHeight||n.lastWrapWidth!=t.wrapperWidth),i=(i=o,o=l,0==(c=(l=e).display).view.length||i>=c.viewTo||o<=c.viewFrom?(c.view=yn(l,i,o),c.viewFrom=i):(c.viewFrom>i?c.view=yn(l,i,c.viewFrom).concat(c.view):c.viewFromo&&(c.view=c.view.slice(0,mr(l,o)))),c.viewTo=o,n.viewOffset=ln(W(e.doc,n.viewFrom)),e.display.mover.style.top=n.viewOffset+"px",wr(e));if(!r&&0==i&&!t.force&&n.renderedView==n.view&&(null==n.updateLineNumbers||n.updateLineNumbers>=n.viewTo))return!1;var l=function(e){if(e.hasFocus())return null;if(!(n=N(ue(e)))||!re(e.display.lineDiv,n))return null;var t,n={activeElt:n};return window.getSelection&&(t=he(e).getSelection()).anchorNode&&t.extend&&re(e.display.lineDiv,t.anchorNode)&&(n.anchorNode=t.anchorNode,n.anchorOffset=t.anchorOffset,n.focusNode=t.focusNode,n.focusOffset=t.focusOffset),n}(e),s=(4=e.display.viewFrom&&t.visible.to<=e.display.viewTo)break;if(!ti(e,t))break;Ar(e);var i=Ur(e);xr(e),jr(e,i),oi(e,i),t.force=!1}t.signal(e,"update",e),e.display.viewFrom==e.display.reportedViewFrom&&e.display.viewTo==e.display.reportedViewTo||(t.signal(e,"viewportChange",e,e.display.viewFrom,e.display.viewTo),e.display.reportedViewFrom=e.display.viewFrom,e.display.reportedViewTo=e.display.viewTo)}function ri(e,t){var n,t=new ei(e,t);ti(e,t)&&(Ar(e),ni(e,t),n=Ur(e),xr(e),jr(e,n),oi(e,n),t.finish())}function ii(e){var t=e.gutters.offsetWidth;e.sizer.style.marginLeft=t+"px",b(e,"gutterChanged",e)}function oi(e,t){e.display.sizer.style.minHeight=t.docHeight+"px",e.display.heightForcer.style.top=t.docHeight+"px",e.display.gutters.style.height=t.docHeight+e.display.barHeight+Fn(e)+"px"}function li(e){var t=e.display,n=t.view;if(t.alignWidgets||t.gutters.firstChild&&e.options.fixedGutter){for(var r=dr(t)-t.scroller.scrollLeft+e.doc.scrollLeft,i=t.gutters.offsetWidth,o=r+"px",l=0;ll.clientWidth,a=l.scrollHeight>l.clientHeight;if(r&&s||n&&a){if(n&&C&&x)e:for(var u=t.target,c=o.view;u!=l;u=u.parentNode)for(var h=0;hs-(e.cm?e.cm.options.historyEventDelay:500)||"*"==t.origin.charAt(0)))&&(o=(o=l).lastOp==r?(Wi(o.done),z(o.done)):o.done.length&&!z(o.done).ranges?z(o.done):1l.undoDepth;)l.done.shift(),l.done[0].ranges||l.done.shift()}l.done.push(n),l.generation=++l.maxGeneration,l.lastModTime=l.lastSelTime=s,l.lastOp=l.lastSelOp=r,l.lastOrigin=l.lastSelOrigin=t.origin,i||O(e,"historyAdded")}function Fi(e,t,n,r){var i,o,l,s=e.history,a=r&&r.origin;n==s.lastSelOp||a&&s.lastSelOrigin==a&&(s.lastModTime==s.lastSelTime&&s.lastOrigin==a||(e=e,i=a,o=z(s.done),l=t,"*"==(i=i.charAt(0))||"+"==i&&o.ranges.length==l.ranges.length&&o.somethingSelected()==l.somethingSelected()&&new Date-e.history.lastSelTime<=(e.cm?e.cm.options.historyEventDelay:500)))?s.done[s.done.length-1]=t:Pi(t,s.done),s.lastSelTime=+new Date,s.lastSelOrigin=a,s.lastSelOp=n,r&&!1!==r.clearRedo&&Wi(s.undone)}function Pi(e,t){var n=z(t);n&&n.ranges&&n.equals(e)||t.push(e)}function Ei(t,n,e,r){var i=n["spans_"+t.id],o=0;t.iter(Math.max(t.first,e),Math.min(t.first+t.size,r),function(e){e.markedSpans&&((i=i||(n["spans_"+t.id]={}))[o]=e.markedSpans),++o})}function Ri(e,t){var n=t["spans_"+e.id];if(!n)return null;for(var r=[],i=0;i=t.ch:s.to>t.ch))){if(i&&(O(a,"beforeCursorEnter"),a.explicitlyCleared)){if(o.markedSpans){--l;continue}break}if(a.atomic){if(n){var s=a.find(r<0?1:-1),h=void 0;if((s=(r<0?c:u)?Qi(e,s,-r,s&&s.line==t.line?o:null):s)&&s.line==t.line&&(h=P(s,n))&&(r<0?h<0:0e.first?E(e,F(t.line-1)):null:0e.lastLine())){t.from.linei?{from:t.from,to:F(i,W(e,i).text.length),text:[t.text[0]],origin:t.origin}:t).removed=mt(e,t.from,t.to),n=n||xi(e,t),e.cm){var i=e.cm,o=t,l=r,s=i.doc,a=i.display,u=o.from,c=o.to,h=!1,d=u.line,f=(i.options.lineWrapping||(d=H(tn(W(s,u.line))),s.iter(d,c.line+1,function(e){if(e==a.maxLine)return h=!0})),-1a.maxLineLength&&(a.maxLine=e,a.maxLineLength=t,a.maxLineChanged=!0,h=!1)}),h&&(i.curOp.updateMaxLine=!0)),s),p=u.line;if(f.modeFrontier=Math.min(f.modeFrontier,p),!(f.highlightFrontiert.display.maxLineLength&&(t.display.maxLine=u,t.display.maxLineLength=c,t.display.maxLineChanged=!0)}null!=r&&t&&this.collapsed&&R(t,r,i+1),this.lines.length=0,this.explicitlyCleared=!0,this.atomic&&this.doc.cantEdit&&(this.doc.cantEdit=!1,t&&$i(t.doc)),t&&b(t,"markerCleared",t,this,r,i),n&&Zr(t),this.parent&&this.parent.clear()}},mo.prototype.find=function(e,t){var n,r;null==e&&"bookmark"==this.type&&(e=1);for(var i=0;i=e.ch)&&t.push(i.marker.parent||i.marker)}return t},findMarks:function(i,o,l){i=E(this,i),o=E(this,o);var s=[],a=i.line;return this.iter(i.line,o.line+1,function(e){var t=e.markedSpans;if(t)for(var n=0;n=r.to||null==r.from&&a!=i.line||null!=r.from&&a==o.line&&r.from>=o.ch||l&&!l(r.marker)||s.push(r.marker.parent||r.marker)}++a}),s},getAllMarks:function(){var r=[];return this.iter(function(e){var t=e.markedSpans;if(t)for(var n=0;nt&&(t=e.from),null!=e.to&&e.toe.text.length?null:t}function Ko(e,t,n){e=Vo(e,t.ch,n);return null==e?null:new F(t.line,e,n<0?"after":"before")}function jo(e,t,n,r,i){if(e){"rtl"==t.doc.direction&&(i=-i);var o,l,s,a,e=Ve(n,t.doc.direction);if(e)return o=i<0==(1==(e=i<0?z(e):e[0]).level)?"after":"before",0=n.text.length?(s.ch=n.text.length,s.sticky="before"):s.ch<=0&&(s.ch=0,s.sticky="after");var r=Pe(a,s.ch,s.sticky),i=a[r];if("ltr"==t.doc.direction&&i.level%2==0&&(0s.ch:i.from=i.from&&d>=c.begin))return new F(s.line,d,h?"before":"after")}function f(e,t,n){for(var r=function(e,t){return t?new F(s.line,u(e,1),"before"):new F(s.line,e,"after")};0<=e&&el.doc.first&&((n=W(l.doc,e.line-1).text)&&(e=new F(e.line,1),l.replaceRange(t.charAt(0)+l.doc.lineSeparator()+n.charAt(n.length-1),F(e.line-1,n.length-1),e,"+transpose")))),i.push(new G(e,e)));l.setSelections(i)})},newlineAndIndent:function(r){return h(r,function(){for(var e=(t=r.listSelections()).length-1;0<=e;e--)r.replaceRange(r.doc.lineSeparator(),t[e].anchor,t[e].head,"+input");for(var t=r.listSelections(),n=0;nc&&t.push(new G(F(s,c),F(s,we(u,l,n))))}t.length||t.push(new G(f,f)),U(g,vi(d,y.ranges.slice(0,v).concat(t),v),{origin:"*mouse",scroll:!1}),d.scrollIntoView(e)}else{var h,r=m,i=ul(d,e,p.unit),e=r.anchor,e=0=n.to||o.linea.bottom?20:0)&&setTimeout(I(d,function(){u==i&&(l.scroller.scrollTop+=r,e(t))}),50))}:n)(e)}),i=I(d,n);d.state.selectingText=i,k(l.wrapper.ownerDocument,"mousemove",r),k(l.wrapper.ownerDocument,"mouseup",i)})(i,s,o,a)):Qe(e)==h.scroller&&D(e):2==n?(t&&Gi(c.doc,t),setTimeout(function(){return h.input.focus()},20)):3==n&&(Q?c.display.input.onContextMenu(e):Mr(c)))))}function ul(e,t,n){if("char"==n)return new G(t,t);if("word"==n)return e.findWordAt(t);if("line"==n)return new G(F(t.line,0),E(e.doc,F(t.line+1,0)));n=n(e,t);return new G(n.from,n.to)}function cl(e,t,n,r){var i,o;if(t.touches)i=t.touches[0].clientX,o=t.touches[0].clientY;else try{i=t.clientX,o=t.clientY}catch(e){return!1}if(i>=Math.floor(e.display.gutters.getBoundingClientRect().right))return!1;r&&D(t);var l=e.display,r=l.lineDiv.getBoundingClientRect();if(o>r.bottom||!Ye(e,n))return qe(t);o-=r.top-l.viewOffset;for(var s=0;s=i)return O(e,n,e,bt(e.doc,o),e.display.gutterSpecs[s].className,t),qe(t)}}function hl(e,t){return cl(e,t,"gutterClick",!0)}function dl(e,t){var n,r;An(e.display,t)||(r=t,Ye(n=e,"gutterContextMenu")&&cl(n,r,"gutterContextMenu",!1))||A(e,t,"contextmenu")||Q||e.display.input.onContextMenu(t)}function fl(e){e.display.wrapper.className=e.display.wrapper.className.replace(/\s*cm-s-\S+/g,"")+e.options.theme.replace(/(^|\s)\s*/g," cm-s-"),Yn(e)}ol.prototype.compare=function(e,t,n){return this.time+400>e&&0==P(t,this.pos)&&n==this.button};var pl={toString:function(){return"CodeMirror.Init"}},gl={},ml={};function vl(e,t,n){!t!=!(n&&n!=pl)&&(n=e.display.dragFunctions,(t=t?k:T)(e.display.scroller,"dragstart",n.start),t(e.display.scroller,"dragenter",n.enter),t(e.display.scroller,"dragover",n.over),t(e.display.scroller,"dragleave",n.leave),t(e.display.scroller,"drop",n.drop))}function yl(e){e.options.lineWrapping?(ie(e.display.wrapper,"CodeMirror-wrap"),e.display.sizer.style.minWidth="",e.display.sizerWidth=null):(ee(e.display.wrapper,"CodeMirror-wrap"),an(e)),pr(e),R(e),Yn(e),setTimeout(function(){return jr(e)},100)}function p(e,t){var n=this;if(!(this instanceof p))return new p(e,t);this.options=t=t?fe(t):{},fe(gl,t,!1);var r,i=t.value,o=("string"==typeof i?i=new f(i,t.mode,null,t.lineSeparator,t.direction):t.mode&&(i.modeOption=t.mode),this.doc=i,new p.inputStyles[t.inputStyle](this)),e=this.display=new hi(e,i,o,t),l=(fl(e.wrapper.CodeMirror=this),t.lineWrapping&&(this.display.wrapper.className+=" CodeMirror-wrap"),$r(this),this.state={keyMaps:[],overlays:[],modeGen:0,overwrite:!1,delayingBlurEvent:!1,focused:!1,suppressEdits:!1,pasteIncoming:-1,cutIncoming:-1,selectingText:!1,draggingText:!1,highlight:new pe,keySeq:null,specialChars:null},t.autofocus&&!_&&e.input.focus(),w&&v<11&&setTimeout(function(){return n.display.input.reset(!0)},20),this),s=l.display;k(s.scroller,"mousedown",I(l,al)),k(s.scroller,"dblclick",w&&v<11?I(l,function(e){var t;A(l,e)||(!(t=gr(l,e))||hl(l,e)||An(l.display,e)||(D(e),e=l.findWordAt(t),Gi(l.doc,e.anchor,e.head)))}):function(e){return A(l,e)||D(e)}),k(s.scroller,"contextmenu",function(e){return dl(l,e)}),k(s.input.getField(),"contextmenu",function(e){s.scroller.contains(e.target)||dl(l,e)});var a,u={end:0};function c(){s.activeTouch&&(a=setTimeout(function(){return s.activeTouch=null},1e3),(u=s.activeTouch).end=+new Date)}function h(e,t){if(null==t.left)return 1;var n=t.left-e.left,t=t.top-e.top;return 400o.first?S(W(o,t-1).text,null,l):0:"add"==n?c=a+e.options.indentUnit:"subtract"==n?c=a-e.options.indentUnit:"number"==typeof n&&(c=a+n);var c=Math.max(0,c),h="",d=0;if(e.options.indentWithTabs)for(var f=Math.floor(c/l);f;--f)d+=l,h+="\t";if(dl,a=rt(t),u=null;if(s&&1l?"cut":"+input")});to(e.doc,f),b(e,"inputRead",e,f)}t&&!s&&kl(e,t),Pr(e),e.curOp.updateInput<2&&(e.curOp.updateInput=h),e.curOp.typing=!0,e.state.pasteIncoming=e.state.cutIncoming=-1}function Ll(e,t){var n=e.clipboardData&&e.clipboardData.getData("Text");return n&&(e.preventDefault(),t.isReadOnly()||t.options.disableInput||!t.hasFocus()||h(t,function(){return Sl(t,n,0,null,"paste")}),1)}function kl(e,t){if(e.options.electricChars&&e.options.smartIndent)for(var n=e.doc.sel,r=n.ranges.length-1;0<=r;r--){var i=n.ranges[r];if(!(100=n.first+n.size||(r=new F(e,r.ch,r.sticky),!(s=W(n,e))))return;r=jo(l,n.cm,s,r.line,a)}else r=t;return 1}if("char"==o||"codepoint"==o)u();else if("column"==o)u(!0);else if("word"==o||"group"==o)for(var c=null,h="group"==o,d=n.cm&&n.cm.getHelper(r,"wordChars"),f=!0;!(i<0)||u(!f);f=!1){var p=s.text.charAt(r.ch)||"\n",p=Ne(p,d)?"w":h&&"\n"==p?"n":!h||/\s/.test(p)?null:"p";if(!h||f||p||(p="s"),c&&c!=p){i<0&&(i=1,u(),r.sticky="after");break}if(p&&(c=p),0=s.height){l.hitSide=!0;break}o+=5*n}return l}function r(e){this.cm=e,this.lastAnchorNode=this.lastAnchorOffset=this.lastFocusNode=this.lastFocusOffset=null,this.polling=new pe,this.composing=null,this.gracePeriod=!1,this.readDOMTimeout=null}function Dl(e,t){var n=In(e,t.line);if(!n||n.hidden)return null;var r=W(e.doc,t.line),n=Rn(n,r,t.line),r=Ve(r,e.doc.direction),e="left",r=(r&&(e=Pe(r,t.ch)%2?"right":"left"),Kn(n.map,t.ch,e));return r.offset="right"==r.collapse?r.end:r.start,r}function Wl(e,t){return t&&(e.bad=!0),e}function Hl(e,t,n){var r;if(t==e.display.lineDiv){if(!(r=e.display.lineDiv.childNodes[n]))return Wl(e.clipPos(F(e.display.viewTo-1)),!0);t=null,n=0}else for(r=t;;r=r.parentNode){if(!r||r==e.display.lineDiv)return null;if(r.parentNode&&r.parentNode==e.display.lineDiv)break}for(var i=0;i=t.display.viewTo||n.line=t.display.viewFrom&&Dl(t,r)||{node:i[0].measure.map[2],offset:0},r=n.linet.firstLine()&&(i=F(i.line-1,W(t.doc,i.line-1).length)),r.ch==W(t.doc,r.line).text.length&&r.linen.viewTo-1)return!1;var o,l=i.line==n.viewFrom||0==(l=mr(t,i.line))?(e=H(n.view[0].line),n.view[0].node):(e=H(n.view[l].line),n.view[l-1].node.nextSibling),r=mr(t,r.line),n=r==n.view.length-1?(o=n.viewTo-1,n.lineDiv.lastChild):(o=H(n.view[r+1].line)-1,n.view[r+1].node.previousSibling);if(!l)return!1;for(var s=t.doc.splitLines(function(o,e,t,l,s){var n="",a=!1,u=o.doc.lineSeparator(),c=!1;function h(){a&&(n+=u,c&&(n+=u),a=c=!1)}function d(e){e&&(h(),n+=e)}for(;!function e(t){if(1==t.nodeType){var n=t.getAttribute("cm-text");if(n)d(n);else if(n=t.getAttribute("cm-marker"))(n=o.findMarks(F(l,0),F(s+1,0),(i=+n,function(e){return e.id==i}))).length&&(n=n[0].find(0))&&d(mt(o.doc,n.from,n.to).join(u));else if("false"!=t.getAttribute("contenteditable")&&(n=/^(pre|div|p|li|table|br)$/i.test(t.nodeName),/^br$/i.test(t.nodeName)||0!=t.textContent.length)){n&&h();for(var r=0;ri.ch&&p.charCodeAt(p.length-c-1)==g.charCodeAt(g.length-c-1);)u--,c++;s[s.length-1]=p.slice(0,p.length-c).replace(/^\u200b+/,""),s[0]=s[0].slice(u).replace(/\u200b+$/,"");r=F(e,u),l=F(o,a.length?z(a).length-c:0);return 1n&&(wl(this,i.head.line,e,!0),n=i.head.line,r==this.doc.sel.primIndex&&Pr(this));else{for(var o=i.from(),i=i.to(),l=Math.max(n,o.line),n=Math.min(this.lastLine(),i.line-(i.ch?0:1))+1,s=l;s>1;if((l?n[2*l-1]:0)>=o)i=l;else{if(!(n[2*l+1]l)&&e.top>t.offsetHeight?a=e.top-t.offsetHeight:e.bottom+t.offsetHeight<=l&&(a=e.bottom),u+t.offsetWidth>o&&(u=o-t.offsetWidth)),t.style.top=a+"px",t.style.left=t.style.right="","right"==i?(u=s.sizer.clientWidth-t.offsetWidth,t.style.right="0px"):("left"==i?u=0:"middle"==i&&(u=(s.sizer.clientWidth-t.offsetWidth)/2),t.style.left=u+"px"),n&&(r=this,l={left:u,top:a,right:u+t.offsetWidth,bottom:a+t.offsetHeight},null!=(l=Hr(r,l)).scrollTop&&Ir(r,l.scrollTop),null!=l.scrollLeft&&Gr(r,l.scrollLeft))},triggerOnKeyDown:t(nl),triggerOnKeyPress:t(il),triggerOnKeyUp:rl,triggerOnMouseDown:t(al),execCommand:function(e){if(Yo.hasOwnProperty(e))return Yo[e].call(null,this)},triggerElectric:t(function(e){kl(this,e)}),findPosH:function(e,t,n,r){for(var i=1,o=(t<0&&(i=-1,t=-t),E(this.doc,e)),l=0;l!?|~^@]/,J=/^@(context|id|value|language|type|container|list|set|reverse|index|base|vocab|graph)"/;function i(e,t,r){return H=e,D=r,t}function d(e,t){var a,r=e.next();if('"'==r||"'"==r)return t.tokenize=(a=r,function(e,t){var r,n=!1;if(U&&"@"==e.peek()&&e.match(J))return t.tokenize=d,i("jsonld-keyword","meta");for(;null!=(r=e.next())&&(r!=a||n);)n=!n&&"\\"==r;return n||(t.tokenize=d),i("string","string")}),t.tokenize(e,t);if("."==r&&e.match(/^\d[\d_]*(?:[eE][+\-]?[\d_]+)?/))return i("number","number");if("."==r&&e.match(".."))return i("spread","meta");if(/[\[\]{}\(\),;\:\.]/.test(r))return i(r);if("="==r&&e.eat(">"))return i("=>","operator");if("0"==r&&e.match(/^(?:x[\dA-Fa-f_]+|o[0-7_]+|b[01_]+)n?/))return i("number","number");if(/\d/.test(r))return e.match(/^[\d_]*(?:n|(?:\.[\d_]*)?(?:[eE][+\-]?[\d_]+)?)?/),i("number","number");if("/"==r)return e.eat("*")?(t.tokenize=K)(e,t):e.eat("/")?(e.skipToEnd(),i("comment","comment")):tt(e,t,1)?(function(e){for(var t,r=!1,n=!1;null!=(t=e.next());){if(!r){if("/"==t&&!n)return;"["==t?n=!0:n&&"]"==t&&(n=!1)}r=!r&&"\\"==t}}(e),e.match(/^\b(([gimyus])(?![gimyus]*\2))+\b/),i("regexp","string-2")):(e.eat("="),i("operator","operator",e.current()));if("`"==r)return(t.tokenize=L)(e,t);if("#"==r&&"!"==e.peek())return e.skipToEnd(),i("meta","meta");if("#"==r&&e.eatWhile(B))return i("variable","property");if("<"==r&&e.match("!--")||"-"==r&&e.match("->")&&!/\S/.test(e.string.slice(0,e.start)))return e.skipToEnd(),i("comment","comment");if(G.test(r))return">"==r&&t.lexical&&">"==t.lexical.type||(e.eat("=")?"!"!=r&&"="!=r||e.eat("="):/[<>*+\-|&?]/.test(r)&&(e.eat(r),">"==r&&e.eat(r))),"?"==r&&e.eat(".")?i("."):i("operator","operator",e.current());if(B.test(r)){e.eatWhile(B);r=e.current();if("."!=t.lastType){if(F.propertyIsEnumerable(r))return i((t=F[r]).type,t.style,r);if("async"==r&&e.match(/^(\s|\/\*([^*]|\*(?!\/))*?\*\/)*[\[\(\w]/,!1))return i("async","keyword",r)}return i("variable","variable",r)}}function K(e,t){for(var r,n=!1;r=e.next();){if("/"==r&&n){t.tokenize=d;break}n="*"==r}return i("comment","comment")}function L(e,t){for(var r,n=!1;null!=(r=e.next());){if(!n&&("`"==r||"$"==r&&e.eat("{"))){t.tokenize=d;break}n=!n&&"\\"==r}return i("quasi","string-2",e.current())}function Q(e,t){t.fatArrowAt&&(t.fatArrowAt=null);var r=e.string.indexOf("=>",e.start);if(!(r<0)){!u||(n=/:\s*(?:\w+(?:<[^>]*>|\[\])?|\{[^}]*\})\s*$/.exec(e.string.slice(e.start,r)))&&(r=n.index);for(var n,a=0,i=!1,o=r-1;0<=o;--o){var c=e.string.charAt(o),s="([{}])".indexOf(c);if(0<=s&&s<3){if(!a){++o;break}if(0==--a){"("==c&&(i=!0);break}}else if(3<=s&&s<6)++a;else if(B.test(c))i=!0;else if(/["'\/`]/.test(c))for(;;--o){if(0==o)return;if(e.string.charAt(o-1)==c&&"\\"!=e.string.charAt(o-2)){o--;break}}else if(i&&!a){++o;break}}i&&!a&&(t.fatArrowAt=o)}}var R={atom:!0,number:!0,variable:!0,string:!0,regexp:!0,this:!0,import:!0,"jsonld-keyword":!0};function X(e,t,r,n,a,i){this.indented=e,this.column=t,this.type=r,this.prev=a,this.info=i,null!=n&&(this.align=n)}function Y(e,t,r,n,a){var i=e.cc;for(c.state=e,c.stream=a,c.marked=null,c.cc=i,c.style=t,e.lexical.hasOwnProperty("align")||(e.lexical.align=!0);;)if((i.length?i.pop():o?x:b)(r,n)){for(;i.length&&i[i.length-1].lex;)i.pop()();return c.marked?c.marked:"variable"==r&&function(e,t){if(W){for(var r=e.localVars;r;r=r.next)if(r.name==t)return 1;for(var n=e.context;n;n=n.prev)for(r=n.vars;r;r=r.next)if(r.name==t)return 1}}(e,n)?"variable-2":t}}var c={state:null,column:null,marked:null,cc:null};function s(){for(var e=arguments.length-1;0<=e;e--)c.cc.push(arguments[e])}function p(){return s.apply(null,arguments),!0}function Z(e,t){for(var r=t;r;r=r.next)if(r.name==e)return 1}function a(e){var t=c.state;if(c.marked="def",W){if(t.context)if("var"==t.lexical.info&&t.context&&t.context.block){var r=function e(t,r){{var n;return r?r.block?(n=e(t,r.prev))?n==r.prev?r:new te(n,r.vars,!0):null:Z(t,r.vars)?r:new te(r.prev,new re(t,r.vars),!1):null}}(e,t.context);if(null!=r)return void(t.context=r)}else if(!Z(e,t.localVars))return void(t.localVars=new re(e,t.localVars));l.globalVars&&!Z(e,t.globalVars)&&(t.globalVars=new re(e,t.globalVars))}}function ee(e){return"public"==e||"private"==e||"protected"==e||"abstract"==e||"readonly"==e}function te(e,t,r){this.prev=e,this.vars=t,this.block=r}function re(e,t){this.name=e,this.next=t}var ne=new re("this",new re("arguments",null));function m(){c.state.context=new te(c.state.context,c.state.localVars,!1),c.state.localVars=ne}function ae(){c.state.context=new te(c.state.context,c.state.localVars,!0),c.state.localVars=null}function k(){c.state.localVars=c.state.context.vars,c.state.context=c.state.context.prev}function v(n,a){function e(){var e=c.state,t=e.indented;if("stat"==e.lexical.type)t=e.lexical.indented;else for(var r=e.lexical;r&&")"==r.type&&r.align;r=r.prev)t=r.indented;e.lexical=new X(t,c.stream.column(),n,null,e.lexical,a)}return e.lex=!0,e}function y(){var e=c.state;e.lexical.prev&&(")"==e.lexical.type&&(e.indented=e.lexical.indented),e.lexical=e.lexical.prev)}function w(r){return function e(t){return t==r?p():";"==r||"}"==t||")"==t||"]"==t?s():p(e)}}function b(e,t){return"var"==e?p(v("vardef",t),qe,w(";"),y):"keyword a"==e?p(v("form"),oe,b,y):"keyword b"==e?p(v("form"),b,y):"keyword d"==e?c.stream.match(/^\s*$/,!1)?p():p(v("stat"),g,w(";"),y):"debugger"==e?p(w(";")):"{"==e?p(v("}"),ae,be,y,k):";"==e?p():"if"==e?("else"==c.state.lexical.info&&c.state.cc[c.state.cc.length-1]==y&&c.state.cc.pop()(),p(v("form"),oe,b,y,Oe)):"function"==e?p(q):"for"==e?p(v("form"),ae,Pe,b,k,y):"class"==e||u&&"interface"==t?(c.marked="keyword",p(v("form","class"==e?e:t),Fe,y)):"variable"==e?u&&"declare"==t?(c.marked="keyword",p(b)):u&&("module"==t||"enum"==t||"type"==t)&&c.stream.match(/^\s*\w/,!1)?(c.marked="keyword","enum"==t?p(Ze):"type"==t?p(We,w("operator"),z,w(";")):p(v("form"),T,w("{"),v("}"),be,y,y)):u&&"namespace"==t?(c.marked="keyword",p(v("form"),x,b,y)):u&&"abstract"==t?(c.marked="keyword",p(b)):p(v("stat"),me):"switch"==e?p(v("form"),oe,w("{"),v("}","switch"),ae,be,y,y,k):"case"==e?p(x,w(":")):"default"==e?p(w(":")):"catch"==e?p(v("form"),m,ie,b,y,k):"export"==e?p(v("stat"),Ge,y):"import"==e?p(v("stat"),Ke,y):"async"==e?p(b):"@"==t?p(x,b):s(v("stat"),x,w(";"),y)}function ie(e){if("("==e)return p(S,w(")"))}function x(e,t){return ce(e,t,!1)}function h(e,t){return ce(e,t,!0)}function oe(e){return"("!=e?s():p(v(")"),g,w(")"),y)}function ce(e,t,r){if(c.state.fatArrowAt==c.stream.start){var n=r?fe:le;if("("==e)return p(m,v(")"),V(S,")"),y,w("=>"),n,k);if("variable"==e)return s(m,T,w("=>"),n,k)}var a,n=r?M:j;return R.hasOwnProperty(e)?p(n):"function"==e?p(q,n):"class"==e||u&&"interface"==t?(c.marked="keyword",p(v("form"),Be,y)):"keyword c"==e||"async"==e?p(r?h:x):"("==e?p(v(")"),g,w(")"),y,n):"operator"==e||"spread"==e?p(r?h:x):"["==e?p(v("]"),Ye,y,n):"{"==e?we(ve,"}",null,n):"quasi"==e?s(se,n):"new"==e?p((a=r,function(e){return"."==e?p(a?pe:de):"variable"==e&&u?p(Ie,a?M:j):s(a?h:x)})):p()}function g(e){return e.match(/[;\}\)\],]/)?s():s(x)}function j(e,t){return","==e?p(g):M(e,t,!1)}function M(e,t,r){var n=0==r?j:M,a=0==r?x:h;return"=>"==e?p(m,r?fe:le,k):"operator"==e?/\+\+|--/.test(t)||u&&"!"==t?p(n):u&&"<"==t&&c.stream.match(/^([^<>]|<[^<>]*>)*>\s*\(/,!1)?p(v(">"),V(z,">"),y,n):"?"==t?p(x,w(":"),a):p(a):"quasi"==e?s(se,n):";"!=e?"("==e?we(h,")","call",n):"."==e?p(ke,n):"["==e?p(v("]"),g,w("]"),y,n):u&&"as"==t?(c.marked="keyword",p(z,n)):"regexp"==e?(c.state.lastType=c.marked="operator",c.stream.backUp(c.stream.pos-c.stream.start-1),p(a)):void 0:void 0}function se(e,t){return"quasi"!=e?s():"${"!=t.slice(t.length-2)?p(se):p(g,ue)}function ue(e){if("}"==e)return c.marked="string-2",c.state.tokenize=L,p(se)}function le(e){return Q(c.stream,c.state),s("{"==e?b:x)}function fe(e){return Q(c.stream,c.state),s("{"==e?b:h)}function de(e,t){if("target"==t)return c.marked="keyword",p(j)}function pe(e,t){if("target"==t)return c.marked="keyword",p(M)}function me(e){return":"==e?p(y,b):s(j,w(";"),y)}function ke(e){if("variable"==e)return c.marked="property",p()}function ve(e,t){return"async"==e?(c.marked="property",p(ve)):"variable"!=e&&"keyword"!=c.style?"number"==e||"string"==e?(c.marked=U?"property":c.style+" property",p(A)):"jsonld-keyword"==e?p(A):u&&ee(t)?(c.marked="keyword",p(ve)):"["==e?p(x,E,w("]"),A):"spread"==e?p(h,A):"*"==t?(c.marked="keyword",p(ve)):":"==e?s(A):void 0:(c.marked="property","get"==t||"set"==t?p(ye):(u&&c.state.fatArrowAt==c.stream.start&&(e=c.stream.match(/^\s*:\s*/,!1))&&(c.state.fatArrowAt=c.stream.pos+e[0].length),p(A)))}function ye(e){return"variable"!=e?s(A):(c.marked="property",p(q))}function A(e){return":"==e?p(h):"("==e?s(q):void 0}function V(n,a,i){function o(e,t){var r;return(i?-1"),z):"quasi"==e?s(Ve,I):void 0}function je(e){if("=>"==e)return p(z)}function Me(e){return e.match(/[\}\)\]]/)?p():","==e||";"==e?p(Me):s(Ae,Me)}function Ae(e,t){return"variable"==e||"keyword"==c.style?(c.marked="property",p(Ae)):"?"==t||"number"==e||"string"==e?p(Ae):":"==e?p(z):"["==e?p(w("variable"),xe,w("]"),Ae):"("==e?s(C,Ae):e.match(/[;\}\)\],]/)?void 0:p()}function Ve(e,t){return"quasi"!=e?s():"${"!=t.slice(t.length-2)?p(Ve):p(z,Ee)}function Ee(e){if("}"==e)return c.marked="string-2",c.state.tokenize=L,p(Ve)}function ze(e,t){return"variable"==e&&c.stream.match(/^\s*[?:]/,!1)||"?"==t?p(ze):":"==e?p(z):"spread"==e?p(ze):s(z)}function I(e,t){return"<"==t?p(v(">"),V(z,">"),y,I):"|"==t||"."==e||"&"==t?p(z):"["==e?p(z,w("]"),I):"extends"==t||"implements"==t?(c.marked="keyword",p(z)):"?"==t?p(z,w(":"),z):void 0}function Ie(e,t){if("<"==t)return p(v(">"),V(z,">"),y,I)}function Te(){return s(z,$e)}function $e(e,t){if("="==t)return p(z)}function qe(e,t){return"enum"==t?(c.marked="keyword",p(Ze)):s(T,E,$,_e)}function T(e,t){return u&&ee(t)?(c.marked="keyword",p(T)):"variable"==e?(a(t),p()):"spread"==e?p(T):"["==e?we(Se,"]"):"{"==e?we(Ce,"}"):void 0}function Ce(e,t){return"variable"!=e||c.stream.match(/^\s*:/,!1)?("variable"==e&&(c.marked="property"),"spread"==e?p(T):"}"==e?s():"["==e?p(x,w("]"),w(":"),Ce):p(w(":"),T,$)):(a(t),p($))}function Se(){return s(T,$)}function $(e,t){if("="==t)return p(h)}function _e(e){if(","==e)return p(qe)}function Oe(e,t){if("keyword b"==e&&"else"==t)return p(v("form","else"),b,y)}function Pe(e,t){return"await"==t?p(Pe):"("==e?p(v(")"),Ne,y):void 0}function Ne(e){return"var"==e?p(qe,Ue):("variable"==e?p:s)(Ue)}function Ue(e,t){return")"==e?p():";"==e?p(Ue):"in"==t||"of"==t?(c.marked="keyword",p(x,Ue)):s(x,Ue)}function q(e,t){return"*"==t?(c.marked="keyword",p(q)):"variable"==e?(a(t),p(q)):"("==e?p(m,v(")"),V(S,")"),y,he,b,k):u&&"<"==t?p(v(">"),V(Te,">"),y,q):void 0}function C(e,t){return"*"==t?(c.marked="keyword",p(C)):"variable"==e?(a(t),p(C)):"("==e?p(m,v(")"),V(S,")"),y,he,k):u&&"<"==t?p(v(">"),V(Te,">"),y,C):void 0}function We(e,t){return"keyword"==e||"variable"==e?(c.marked="type",p(We)):"<"==t?p(v(">"),V(Te,">"),y):void 0}function S(e,t){return"@"==t&&p(x,S),"spread"==e?p(S):u&&ee(t)?(c.marked="keyword",p(S)):u&&"this"==e?p(E,$):s(T,E,$)}function Be(e,t){return("variable"==e?Fe:He)(e,t)}function Fe(e,t){if("variable"==e)return a(t),p(He)}function He(e,t){return"<"==t?p(v(">"),V(Te,">"),y,He):"extends"==t||"implements"==t||u&&","==e?("implements"==t&&(c.marked="keyword"),p(u?z:x,He)):"{"==e?p(v("}"),_,y):void 0}function _(e,t){return"async"==e||"variable"==e&&("static"==t||"get"==t||"set"==t||u&&ee(t))&&c.stream.match(/^\s+#?[\w$\xa1-\uffff]/,!1)?(c.marked="keyword",p(_)):"variable"==e||"keyword"==c.style?(c.marked="property",p(De,_)):"number"==e||"string"==e?p(De,_):"["==e?p(x,E,w("]"),De,_):"*"==t?(c.marked="keyword",p(_)):u&&"("==e?s(C,_):";"==e||","==e?p(_):"}"==e?p():"@"==t?p(x,_):void 0}function De(e,t){if("!"==t)return p(De);if("?"==t)return p(De);if(":"==e)return p(z,$);if("="==t)return p(h);e=c.state.lexical.prev;return s(e&&"interface"==e.info?C:q)}function Ge(e,t){return"*"==t?(c.marked="keyword",p(Xe,w(";"))):"default"==t?(c.marked="keyword",p(x,w(";"))):"{"==e?p(V(Je,"}"),Xe,w(";")):s(b)}function Je(e,t){return"as"==t?(c.marked="keyword",p(w("variable"))):"variable"==e?s(h,Je):void 0}function Ke(e){return"string"==e?p():"("==e?s(x):"."==e?s(j):s(Le,Qe,Xe)}function Le(e,t){return"{"==e?we(Le,"}"):("variable"==e&&a(t),"*"==t&&(c.marked="keyword"),p(Re))}function Qe(e){if(","==e)return p(Le,Qe)}function Re(e,t){if("as"==t)return c.marked="keyword",p(Le)}function Xe(e,t){if("from"==t)return c.marked="keyword",p(x)}function Ye(e){return"]"==e?p():s(V(h,"]"))}function Ze(){return s(v("form"),T,w("{"),v("}"),V(et,"}"),y,y)}function et(){return s(T,$)}function tt(e,t,r){return t.tokenize==d&&/^(?:operator|sof|keyword [bcd]|case|new|export|default|spread|[\[{}\(,;:]|=>)$/.test(t.lastType)||"quasi"==t.lastType&&/\{\s*$/.test(e.string.slice(0,e.pos-(r||0)))}return m.lex=ae.lex=!0,y.lex=k.lex=!0,{startState:function(e){e={tokenize:d,lastType:"sof",cc:[],lexical:new X((e||0)-f,0,"block",!1),localVars:l.localVars,context:l.localVars&&new te(null,null,!1),indented:e||0};return l.globalVars&&"object"==typeof l.globalVars&&(e.globalVars=l.globalVars),e},token:function(e,t){if(e.sol()&&(t.lexical.hasOwnProperty("align")||(t.lexical.align=!1),t.indented=e.indentation(),Q(e,t)),t.tokenize!=K&&e.eatSpace())return null;var r=t.tokenize(e,t);return"comment"==H?r:(t.lastType="operator"!=H||"++"!=D&&"--"!=D?H:"incdec",Y(t,r,H,D,e))},indent:function(e,t){if(e.tokenize==K||e.tokenize==L)return rt.Pass;if(e.tokenize!=d)return 0;var r,n=t&&t.charAt(0),a=e.lexical;if(!/^\s*else\b/.test(t))for(var i=e.cc.length-1;0<=i;--i){var o=e.cc[i];if(o==y)a=a.prev;else if(o!=Oe&&o!=k)break}for(;("stat"==a.type||"form"==a.type)&&("}"==n||(r=e.cc[e.cc.length-1])&&(r==j||r==M)&&!/^[,\.=+\-*:?[\(]/.test(t));)a=a.prev;var c,s=(a=N&&")"==a.type&&"stat"==a.prev.type?a.prev:a).type,u=n==s;return"vardef"==s?a.indented+("operator"==e.lastType||","==e.lastType?a.info.length+1:0):"form"==s&&"{"==n?a.indented:"form"==s?a.indented+f:"stat"==s?a.indented+(s=t,"operator"==(c=e).lastType||","==c.lastType||G.test(s.charAt(0))||/[,.]/.test(s.charAt(0))?N||f:0):"switch"!=a.info||u||0==l.doubleIndentSwitch?a.align?a.column+(u?0:1):a.indented+(u?0:f):a.indented+(/^(?:case|default)\b/.test(t)?f:2*f)},electricInput:/^\s*(?:case .*?:|default:|\{|\})$/,blockCommentStart:o?null:"/*",blockCommentEnd:o?null:"*/",blockCommentContinue:o?null:" * ",lineComment:o?null:"//",fold:"brace",closeBrackets:"()[]{}''\"\"``",helperType:o?"json":"javascript",jsonldMode:U,jsonMode:o,expressionAllowed:tt,skipExpression:function(e){Y(e,"atom","atom","true",new rt.StringStream("",2,null))}}}),rt.registerHelper("wordChars","javascript",/[\w$]/),rt.defineMIME("text/javascript","javascript"),rt.defineMIME("text/ecmascript","javascript"),rt.defineMIME("application/javascript","javascript"),rt.defineMIME("application/x-javascript","javascript"),rt.defineMIME("application/ecmascript","javascript"),rt.defineMIME("application/json",{name:"javascript",json:!0}),rt.defineMIME("application/x-json",{name:"javascript",json:!0}),rt.defineMIME("application/manifest+json",{name:"javascript",json:!0}),rt.defineMIME("application/ld+json",{name:"javascript",jsonld:!0}),rt.defineMIME("text/typescript",{name:"javascript",typescript:!0}),rt.defineMIME("application/typescript",{name:"javascript",typescript:!0})}); \ No newline at end of file diff --git a/adit/mass_transfer/static/mass_transfer/vendor/jsonlint/jsonlint.min.js b/adit/mass_transfer/static/mass_transfer/vendor/jsonlint/jsonlint.min.js new file mode 100644 index 000000000..89b1b4a1e --- /dev/null +++ b/adit/mass_transfer/static/mass_transfer/vendor/jsonlint/jsonlint.min.js @@ -0,0 +1 @@ +var jsonlint=function(){var a={trace:function(){},yy:{},symbols_:{error:2,JSONString:3,STRING:4,JSONNumber:5,NUMBER:6,JSONNullLiteral:7,NULL:8,JSONBooleanLiteral:9,TRUE:10,FALSE:11,JSONText:12,JSONValue:13,EOF:14,JSONObject:15,JSONArray:16,"{":17,"}":18,JSONMemberList:19,JSONMember:20,":":21,",":22,"[":23,"]":24,JSONElementList:25,$accept:0,$end:1},terminals_:{2:"error",4:"STRING",6:"NUMBER",8:"NULL",10:"TRUE",11:"FALSE",14:"EOF",17:"{",18:"}",21:":",22:",",23:"[",24:"]"},productions_:[0,[3,1],[5,1],[7,1],[9,1],[9,1],[12,2],[13,1],[13,1],[13,1],[13,1],[13,1],[13,1],[15,2],[15,3],[20,3],[19,1],[19,3],[16,2],[16,3],[25,1],[25,3]],performAction:function(a,b,c,d,e,f){var h=f.length-1;switch(e){case 1:this.$=a.replace(/\\(\\|")/g,"$1").replace(/\\n/g,"\n").replace(/\\r/g,"\r").replace(/\\t/g," ").replace(/\\v/g,"").replace(/\\f/g,"\f").replace(/\\b/g,"\b");break;case 2:this.$=Number(a);break;case 3:this.$=null;break;case 4:this.$=!0;break;case 5:this.$=!1;break;case 6:return this.$=f[h-1];case 13:this.$={};break;case 14:this.$=f[h-1];break;case 15:this.$=[f[h-2],f[h]];break;case 16:this.$={},this.$[f[h][0]]=f[h][1];break;case 17:this.$=f[h-2],f[h-2][f[h][0]]=f[h][1];break;case 18:this.$=[];break;case 19:this.$=f[h-1];break;case 20:this.$=[f[h]];break;case 21:this.$=f[h-2],f[h-2].push(f[h])}},table:[{3:5,4:[1,12],5:6,6:[1,13],7:3,8:[1,9],9:4,10:[1,10],11:[1,11],12:1,13:2,15:7,16:8,17:[1,14],23:[1,15]},{1:[3]},{14:[1,16]},{14:[2,7],18:[2,7],22:[2,7],24:[2,7]},{14:[2,8],18:[2,8],22:[2,8],24:[2,8]},{14:[2,9],18:[2,9],22:[2,9],24:[2,9]},{14:[2,10],18:[2,10],22:[2,10],24:[2,10]},{14:[2,11],18:[2,11],22:[2,11],24:[2,11]},{14:[2,12],18:[2,12],22:[2,12],24:[2,12]},{14:[2,3],18:[2,3],22:[2,3],24:[2,3]},{14:[2,4],18:[2,4],22:[2,4],24:[2,4]},{14:[2,5],18:[2,5],22:[2,5],24:[2,5]},{14:[2,1],18:[2,1],21:[2,1],22:[2,1],24:[2,1]},{14:[2,2],18:[2,2],22:[2,2],24:[2,2]},{3:20,4:[1,12],18:[1,17],19:18,20:19},{3:5,4:[1,12],5:6,6:[1,13],7:3,8:[1,9],9:4,10:[1,10],11:[1,11],13:23,15:7,16:8,17:[1,14],23:[1,15],24:[1,21],25:22},{1:[2,6]},{14:[2,13],18:[2,13],22:[2,13],24:[2,13]},{18:[1,24],22:[1,25]},{18:[2,16],22:[2,16]},{21:[1,26]},{14:[2,18],18:[2,18],22:[2,18],24:[2,18]},{22:[1,28],24:[1,27]},{22:[2,20],24:[2,20]},{14:[2,14],18:[2,14],22:[2,14],24:[2,14]},{3:20,4:[1,12],20:29},{3:5,4:[1,12],5:6,6:[1,13],7:3,8:[1,9],9:4,10:[1,10],11:[1,11],13:30,15:7,16:8,17:[1,14],23:[1,15]},{14:[2,19],18:[2,19],22:[2,19],24:[2,19]},{3:5,4:[1,12],5:6,6:[1,13],7:3,8:[1,9],9:4,10:[1,10],11:[1,11],13:31,15:7,16:8,17:[1,14],23:[1,15]},{18:[2,17],22:[2,17]},{18:[2,15],22:[2,15]},{22:[2,21],24:[2,21]}],defaultActions:{16:[2,6]},parseError:function(a){throw new Error(a)},parse:function(a){function n(a){c.length=c.length-2*a,d.length=d.length-a,e.length=e.length-a}function o(){var a;return a=b.lexer.lex()||1,"number"!=typeof a&&(a=b.symbols_[a]||a),a}var b=this,c=[0],d=[null],e=[],f=this.table,g="",h=0,i=0,j=0,k=2,l=1;this.lexer.setInput(a),this.lexer.yy=this.yy,this.yy.lexer=this.lexer,"undefined"==typeof this.lexer.yylloc&&(this.lexer.yylloc={});var m=this.lexer.yylloc;e.push(m),"function"==typeof this.yy.parseError&&(this.parseError=this.yy.parseError);for(var p,q,r,s,u,w,x,y,z,v={};;){if(r=c[c.length-1],this.defaultActions[r]?s=this.defaultActions[r]:(null==p&&(p=o()),s=f[r]&&f[r][p]),"undefined"==typeof s||!s.length||!s[0]){if(!j){z=[];for(w in f[r])this.terminals_[w]&&w>2&&z.push("'"+this.terminals_[w]+"'");var A="";A=this.lexer.showPosition?"Parse error on line "+(h+1)+":\n"+this.lexer.showPosition()+"\nExpecting "+z.join(", ")+", got '"+this.terminals_[p]+"'":"Parse error on line "+(h+1)+": Unexpected "+(1==p?"end of input":"'"+(this.terminals_[p]||p)+"'"),this.parseError(A,{text:this.lexer.match,token:this.terminals_[p]||p,line:this.lexer.yylineno,loc:m,expected:z})}if(3==j){if(p==l)throw new Error(A||"Parsing halted.");i=this.lexer.yyleng,g=this.lexer.yytext,h=this.lexer.yylineno,m=this.lexer.yylloc,p=o()}for(;;){if(k.toString()in f[r])break;if(0==r)throw new Error(A||"Parsing halted.");n(1),r=c[c.length-1]}q=p,p=k,r=c[c.length-1],s=f[r]&&f[r][k],j=3}if(s[0]instanceof Array&&s.length>1)throw new Error("Parse Error: multiple actions possible at state: "+r+", token: "+p);switch(s[0]){case 1:c.push(p),d.push(this.lexer.yytext),e.push(this.lexer.yylloc),c.push(s[1]),p=null,q?(p=q,q=null):(i=this.lexer.yyleng,g=this.lexer.yytext,h=this.lexer.yylineno,m=this.lexer.yylloc,j>0&&j--);break;case 2:if(x=this.productions_[s[1]][1],v.$=d[d.length-x],v._$={first_line:e[e.length-(x||1)].first_line,last_line:e[e.length-1].last_line,first_column:e[e.length-(x||1)].first_column,last_column:e[e.length-1].last_column},u=this.performAction.call(v,g,i,h,this.yy,s[1],d,e),"undefined"!=typeof u)return u;x&&(c=c.slice(0,2*-1*x),d=d.slice(0,-1*x),e=e.slice(0,-1*x)),c.push(this.productions_[s[1]][0]),d.push(v.$),e.push(v._$),y=f[c[c.length-2]][c[c.length-1]],c.push(y);break;case 3:return!0}}return!0}},b=function(){var a={EOF:1,parseError:function(a,b){if(!this.yy.parseError)throw new Error(a);this.yy.parseError(a,b)},setInput:function(a){return this._input=a,this._more=this._less=this.done=!1,this.yylineno=this.yyleng=0,this.yytext=this.matched=this.match="",this.conditionStack=["INITIAL"],this.yylloc={first_line:1,first_column:0,last_line:1,last_column:0},this},input:function(){var a=this._input[0];this.yytext+=a,this.yyleng++,this.match+=a,this.matched+=a;var b=a.match(/\n/);return b&&this.yylineno++,this._input=this._input.slice(1),a},unput:function(a){return this._input=a+this._input,this},more:function(){return this._more=!0,this},less:function(a){this._input=this.match.slice(a)+this._input},pastInput:function(){var a=this.matched.substr(0,this.matched.length-this.match.length);return(a.length>20?"...":"")+a.substr(-20).replace(/\n/g,"")},upcomingInput:function(){var a=this.match;return a.length<20&&(a+=this._input.substr(0,20-a.length)),(a.substr(0,20)+(a.length>20?"...":"")).replace(/\n/g,"")},showPosition:function(){var a=this.pastInput(),b=new Array(a.length+1).join("-");return a+this.upcomingInput()+"\n"+b+"^"},next:function(){if(this.done)return this.EOF;this._input||(this.done=!0);var a,b,c,d,f;this._more||(this.yytext="",this.match="");for(var g=this._currentRules(),h=0;hb[0].length)||(b=c,d=h,this.options.flex));h++);return b?(f=b[0].match(/\n.*/g),f&&(this.yylineno+=f.length),this.yylloc={first_line:this.yylloc.last_line,last_line:this.yylineno+1,first_column:this.yylloc.last_column,last_column:f?f[f.length-1].length-1:this.yylloc.last_column+b[0].length},this.yytext+=b[0],this.match+=b[0],this.yyleng=this.yytext.length,this._more=!1,this._input=this._input.slice(b[0].length),this.matched+=b[0],a=this.performAction.call(this,this.yy,this,g[d],this.conditionStack[this.conditionStack.length-1]),this.done&&this._input&&(this.done=!1),a?a:void 0):""===this._input?this.EOF:(this.parseError("Lexical error on line "+(this.yylineno+1)+". Unrecognized text.\n"+this.showPosition(),{text:"",token:null,line:this.yylineno}),void 0)},lex:function(){var a=this.next();return"undefined"!=typeof a?a:this.lex()},begin:function(a){this.conditionStack.push(a)},popState:function(){return this.conditionStack.pop()},_currentRules:function(){return this.conditions[this.conditionStack[this.conditionStack.length-1]].rules},topState:function(){return this.conditionStack[this.conditionStack.length-2]},pushState:function(a){this.begin(a)}};return a.options={},a.performAction=function(a,b,c,d){switch(c){case 0:break;case 1:return 6;case 2:return b.yytext=b.yytext.substr(1,b.yyleng-2),4;case 3:return 17;case 4:return 18;case 5:return 23;case 6:return 24;case 7:return 22;case 8:return 21;case 9:return 10;case 10:return 11;case 11:return 8;case 12:return 14;case 13:return"INVALID"}},a.rules=[/^(?:\s+)/,/^(?:(-?([0-9]|[1-9][0-9]+))(\.[0-9]+)?([eE][-+]?[0-9]+)?\b)/,/^(?:"(?:\\[\\"bfnrt/]|\\u[a-fA-F0-9]{4}|[^\\\0-\x09\x0a-\x1f"])*")/,/^(?:\{)/,/^(?:\})/,/^(?:\[)/,/^(?:\])/,/^(?:,)/,/^(?::)/,/^(?:true\b)/,/^(?:false\b)/,/^(?:null\b)/,/^(?:$)/,/^(?:.)/],a.conditions={INITIAL:{rules:[0,1,2,3,4,5,6,7,8,9,10,11,12,13],inclusive:!0}},a}();return a.lexer=b,a}();"undefined"!=typeof require&&"undefined"!=typeof exports&&(exports.parser=jsonlint,exports.parse=function(){return jsonlint.parse.apply(jsonlint,arguments)},exports.main=function(a){if(!a[1])throw new Error("Usage: "+a[0]+" FILE");if("undefined"!=typeof process)var b=require("fs").readFileSync(require("path").join(process.cwd(),a[1]),"utf8");else var c=require("file").path(require("file").cwd()),b=c.join(a[1]).read({charset:"utf-8"});return exports.parser.parse(b)},"undefined"!=typeof module&&require.main===module&&exports.main("undefined"!=typeof process?process.argv.slice(1):require("system").args)); diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html index f001889d6..6335a9de5 100644 --- a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_form.html @@ -1,4 +1,5 @@ {% extends "mass_transfer/mass_transfer_layout.html" %} +{% load static %} {% load crispy from crispy_forms_tags %} {% load bootstrap_icon from common_extras %} {% block title %} @@ -6,11 +7,11 @@ {% endblock title %} {% block css %} {{ block.super }} - {# CodeMirror 5: chosen over CodeMirror 6 because it needs no build step — a single #} - {# CSS + JS include gives us syntax highlighting, bracket matching, auto-indent, #} - {# and lint markers for JSON. CM6 is more modular but requires bundling ES modules. #} - - + {# CodeMirror 5: vendored locally (no CDN dependency). Chosen over CM6 because #} + {# it needs no build step — single CSS + JS includes give us syntax highlighting, #} + {# bracket matching, auto-indent, and lint markers for JSON. #} + + {% endblock css %} {% block heading %} From bfd9943a76e4777cbe028300ac93901c8cebe725 Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Sun, 29 Mar 2026 11:51:21 +0000 Subject: [PATCH 085/103] Refactor mass transfer processor to create PENDING volumes before transfer Create MassTransferVolume records in PENDING state immediately after discovery so they appear in the UI while the task is running. Each volume is updated in place during transfer and saved via a finally block that guarantees persistence even on RetriableDicomError. Key changes: - Remove SeriesTransferResult dataclass; transfer methods now update MassTransferVolume fields directly instead of returning result objects - Replace _group_series() with _create_pending_volumes() + _group_volumes() - Remove _create_volume_record(); volumes are bulk-created upfront - Add persistent parameter to DicomOperator to abstract over DimseConnector.auto_close - Move destination setup (dest_operator/output_base) before try block with assert ensuring exactly one is set - Add defensive PENDING check and safe volume.save() in finally block - Replace protocol-specific comments (C-GET/C-FIND/DIMSE) with protocol-agnostic terms (fetch/query) - Restyle task detail template with Back to Job button and pagination - Add test fixture and _fake_export_success helper to reduce boilerplate Co-Authored-By: Claude Opus 4.6 (1M context) --- adit/core/utils/dicom_operator.py | 2 + adit/mass_transfer/processors.py | 542 +++++++++--------- .../mass_transfer_task_detail.html | 19 +- adit/mass_transfer/tests/test_processor.py | 438 +++++++------- 4 files changed, 523 insertions(+), 478 deletions(-) diff --git a/adit/core/utils/dicom_operator.py b/adit/core/utils/dicom_operator.py index 9713a7882..3b9e5e3f3 100644 --- a/adit/core/utils/dicom_operator.py +++ b/adit/core/utils/dicom_operator.py @@ -43,11 +43,13 @@ class DicomOperator: def __init__( self, server: DicomServer, + persistent: bool = False, dimse_timeout: int | None = 60, ): self.server = server self.dimse_connector = DimseConnector( server, + auto_close=not persistent, dimse_timeout=dimse_timeout, ) # TODO: also make retries and timeouts possible in DicomWebConnector diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 1a68d0433..d19c4b9fb 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -3,7 +3,6 @@ import hashlib import json import logging -import random import secrets import shutil import tempfile @@ -67,6 +66,8 @@ def from_dict(cls, d: dict) -> "FilterSpec": logger = logging.getLogger(__name__) _MIN_SPLIT_WINDOW = timedelta(minutes=30) +_DELAY_BETWEEN_SERIES = 0.5 # seconds between fetch requests to avoid overwhelming the PACS +_DELAY_RETRY_FETCH = 3 # seconds before retrying a fetch that returned 0 images @dataclass @@ -85,19 +86,6 @@ class DiscoveredSeries: patient_birth_date: date | None = None -@dataclass -class SeriesTransferResult: - """Result of transferring a single series.""" - - image_count: int = 0 - study_uid_pseudonymized: str = "" - series_uid_pseudonymized: str = "" - nifti_files: list[Path] | None = None - status: str = "" - log: str = "" - error: str | None = None - - def _dicom_match(pattern: str, value: str | None) -> bool: if not pattern: return True @@ -220,7 +208,7 @@ def _birth_date_range( min_age: int | None, max_age: int | None, ) -> tuple[date, date] | None: - """Compute a PatientBirthDate range for C-FIND from age bounds. + """Compute a PatientBirthDate range for study queries from age bounds. Uses the widest possible range: someone who is max_age on the earliest study date was born at the latest on study_start - max_age years, and @@ -284,8 +272,12 @@ def process(self): raise DicomError("Mass transfer destination must be a DICOM folder or server.") dest_operator: DicomOperator | None = None + output_base: Path | None = None if destination_node.node_type == DicomNode.NodeType.SERVER: dest_operator = DicomOperator(destination_node.dicomserver) + else: + assert destination_node.node_type == DicomNode.NodeType.FOLDER + output_base = _destination_base_dir(destination_node, job) try: filters = job.get_filters() @@ -297,151 +289,164 @@ def process(self): "log": "Mass transfer requires at least one filter.", } - pseudonymizer: Pseudonymizer | None = None - if job.pseudonymize and job.pseudonym_salt: - pseudonymizer = Pseudonymizer(seed=job.pseudonym_salt) - elif job.pseudonymize: - pseudonymizer = Pseudonymizer() - - output_base: Path | None = None - if destination_node.node_type == DicomNode.NodeType.FOLDER: - output_base = _destination_base_dir(destination_node, job) - - # Clean up partition on retry: delete the partition folder on disk + # Clean up on retry + if output_base: partition_path = output_base / self.mass_task.partition_key if partition_path.exists(): shutil.rmtree(partition_path) - # Always clean up existing DB volume records on retry MassTransferVolume.objects.filter( job=job, partition_key=self.mass_task.partition_key, ).delete() - operator = DicomOperator(source_node.dicomserver) - operator.dimse_connector.auto_close = False + pseudonymizer: Pseudonymizer | None = None + if job.pseudonymize and job.pseudonym_salt: + pseudonymizer = Pseudonymizer(seed=job.pseudonym_salt) + elif job.pseudonymize: + pseudonymizer = Pseudonymizer() - # Discovery: With DIMSE one C-FIND association for all queries + operator = DicomOperator(source_node.dicomserver, persistent=True) + + # Discovery: query the source server for all matching series discovered = self._discover_series(operator, filters) operator.close() - grouped, subject_ids = self._group_series(discovered, job, pseudonymizer) + # Create PENDING volumes so they appear in the UI immediately + volumes = self._create_pending_volumes(discovered, job, pseudonymizer) + grouped_volumes = self._group_volumes(volumes) - # Transfer: With DIMSE one C-GET association per study + # Transfer: fetch series grouped by study return self._transfer_grouped_series( operator, - grouped, - subject_ids, + grouped_volumes, job, pseudonymizer, output_base, - discovered, dest_operator, ) finally: if dest_operator: dest_operator.close() - def _group_series( + def _create_pending_volumes( self, discovered: list[DiscoveredSeries], job: MassTransferJob, pseudonymizer: Pseudonymizer | None, - ) -> tuple[ - dict[str, dict[str, list[DiscoveredSeries]]], - dict[str, str], - ]: - """Group discovered series by patient -> study. - - Returns: - grouped: {patient_id: {study_uid: [series, ...]}} - subject_ids: {patient_id: subject_id} mapping for folder names. - For deterministic (linked) mode, maps patient_id -> pseudonym. - For no-anonymization mode, maps patient_id -> sanitized patient_id. - For random mode, subject_ids is empty here — random pseudonyms - are assigned per-study in _transfer_grouped_series. + ) -> list[MassTransferVolume]: + """Bulk-create PENDING volumes for all discovered series. + + Handles all three pseudonym modes: + - Deterministic (linked): same patient always gets same pseudonym. + - Random: per-study random pseudonym. + - No pseudonymization: pseudonym left empty. """ - subject_ids: dict[str, str] = {} - grouped: dict[str, dict[str, list[DiscoveredSeries]]] = {} + deterministic_ids: dict[str, str] = {} + random_pseudonyms: dict[str, str] = {} + volumes = [] for series in discovered: pid = series.patient_id study_uid = series.study_instance_uid - if pid not in subject_ids: - if pseudonymizer and job.pseudonym_salt: - # Deterministic (linked): same patient always gets same pseudonym - subject_ids[pid] = pseudonymizer.compute_pseudonym(pid) - elif not pseudonymizer: - # No anonymization: use raw patient ID as folder name - subject_ids[pid] = sanitize_filename(pid) - # else: random mode — subject_id assigned per-study during transfer - - grouped.setdefault(pid, {}).setdefault(study_uid, []).append(series) + if pseudonymizer and job.pseudonym_salt: + if pid not in deterministic_ids: + deterministic_ids[pid] = pseudonymizer.compute_pseudonym(pid) + pseudonym = deterministic_ids[pid] + elif pseudonymizer: + if study_uid not in random_pseudonyms: + random_pseudonyms[study_uid] = secrets.token_hex(6).upper() + pseudonym = random_pseudonyms[study_uid] + else: + pseudonym = "" + + volumes.append( + MassTransferVolume( + job_id=job.pk, + task_id=self.mass_task.pk, + partition_key=self.mass_task.partition_key, + patient_id=series.patient_id, + pseudonym=pseudonym, + accession_number=series.accession_number, + study_instance_uid=series.study_instance_uid, + series_instance_uid=series.series_instance_uid, + modality=series.modality, + study_description=series.study_description, + series_description=series.series_description, + series_number=series.series_number, + study_datetime=timezone.make_aware(series.study_datetime), + institution_name=series.institution_name, + number_of_images=series.number_of_images, + status=MassTransferVolume.Status.PENDING, + ) + ) - return grouped, subject_ids + return MassTransferVolume.objects.bulk_create(volumes) + + @staticmethod + def _group_volumes( + volumes: list[MassTransferVolume], + ) -> dict[str, dict[str, list[MassTransferVolume]]]: + """Group volumes by patient_id -> study_instance_uid.""" + grouped: dict[str, dict[str, list[MassTransferVolume]]] = {} + for vol in volumes: + grouped.setdefault(vol.patient_id, {}).setdefault(vol.study_instance_uid, []).append( + vol + ) + return grouped def _transfer_grouped_series( self, operator: DicomOperator, - grouped: dict[str, dict[str, list[DiscoveredSeries]]], - subject_ids: dict[str, str], + grouped_volumes: dict[str, dict[str, list[MassTransferVolume]]], job: MassTransferJob, pseudonymizer: Pseudonymizer | None, output_base: Path | None, - discovered: list[DiscoveredSeries], dest_operator: DicomOperator | None = None, ) -> dict: - """Transfer all grouped series via C-GET. + """Transfer all grouped series. - Iterates patients -> studies -> series. + Iterates patients -> studies -> volumes, updating each volume in place. """ total_processed = 0 total_skipped = 0 total_failed = 0 + total_volumes = 0 + study_count = 0 failed_reasons: dict[str, int] = {} - random_pseudonyms: dict[str, str] = {} - - for patient_id, studies in grouped.items(): - subject_id = subject_ids.get(patient_id, "") - for study_uid, series_list in studies.items(): - # For random pseudonymization, use per-study pseudonym - if pseudonymizer and not job.pseudonym_salt: - if study_uid not in random_pseudonyms: - random_pseudonyms[study_uid] = secrets.token_hex(6).upper() - subject_id = random_pseudonyms[study_uid] + for patient_id, studies in grouped_volumes.items(): + for study_uid, volumes_list in studies.items(): + study_count += 1 - # One C-GET association per study + # One fetch association per study try: - for series in series_list: - # Small delay between C-GET requests to avoid - # overwhelming the PACS. + for volume in volumes_list: + total_volumes += 1 + if total_processed + total_failed + total_skipped > 0: - time.sleep(0.5) + # TODO: Investigate why such a pacing delay is really needed. + # Such a timeout was never necessary with batch transfer where we also + # transfer series one by one. + time.sleep(_DELAY_BETWEEN_SERIES) - result = self._transfer_single_series( + subject_id = volume.pseudonym or sanitize_filename(volume.patient_id) + self._transfer_single_series( operator, - series, + volume, job, pseudonymizer, subject_id, output_base, dest_operator, ) - self._create_volume_record( - job, - series, - subject_id, - pseudonymizer, - result, - ) - if result.status == MassTransferVolume.Status.ERROR: + if volume.status == MassTransferVolume.Status.ERROR: total_failed += 1 - reason = result.error or "C-GET returned 0 images" + reason = _short_error_reason(volume.log) if volume.log else "Unknown" failed_reasons[reason] = failed_reasons.get(reason, 0) + 1 - elif result.status == MassTransferVolume.Status.SKIPPED: + elif volume.status == MassTransferVolume.Status.SKIPPED: total_skipped += 1 else: total_processed += 1 @@ -449,7 +454,8 @@ def _transfer_grouped_series( operator.close() return self._build_task_summary( - discovered, + total_volumes, + study_count, total_processed, total_skipped, total_failed, @@ -459,270 +465,245 @@ def _transfer_grouped_series( def _transfer_single_series( self, operator: DicomOperator, - series: DiscoveredSeries, + volume: MassTransferVolume, job: MassTransferJob, pseudonymizer: Pseudonymizer | None, subject_id: str, output_base: Path | None, dest_operator: DicomOperator | None = None, - ) -> SeriesTransferResult: + ) -> None: """Export (and optionally convert) a single series. - Returns a result object; never raises except for RetriableDicomError. + Updates volume fields in place and saves. Never raises except for + RetriableDicomError. """ try: if dest_operator: - return self._export_series_to_server( + self._export_series_to_server( operator, - series, + volume, pseudonymizer, subject_id, dest_operator, ) - - if output_base is None: - raise DicomError( - "output_base is None for folder destination; this should not happen." + else: + assert output_base is not None + study_folder = _study_folder_name( + volume.study_description, + volume.study_datetime, + volume.study_instance_uid, ) - study_folder = _study_folder_name( - series.study_description, - series.study_datetime, - series.study_instance_uid, - ) - series_folder = _series_folder_name( - series.series_description, - series.series_number, - series.series_instance_uid, - ) - output_path = ( - output_base - / self.mass_task.partition_key - / subject_id - / study_folder - / series_folder - ) - - if job.convert_to_nifti: - return self._export_and_convert_series( - operator, - series, - pseudonymizer, - subject_id, - output_path, + series_folder = _series_folder_name( + volume.series_description, + volume.series_number, + volume.series_instance_uid, ) - return self._export_series_to_folder( - operator, - series, - pseudonymizer, - subject_id, - output_path, - ) + output_path = ( + output_base + / self.mass_task.partition_key + / subject_id + / study_folder + / series_folder + ) + + if job.convert_to_nifti: + self._export_and_convert_series( + operator, + volume, + pseudonymizer, + subject_id, + output_path, + ) + else: + self._export_series_to_folder( + operator, + volume, + pseudonymizer, + subject_id, + output_path, + ) except RetriableDicomError: + volume.status = MassTransferVolume.Status.ERROR + volume.log = "Transfer interrupted by retriable error; task will be retried." raise except Exception as err: logger.exception( "Mass transfer failed for series %s", - series.series_instance_uid, - ) - return SeriesTransferResult( - status=MassTransferVolume.Status.ERROR, - log=str(err), - error=_short_error_reason(str(err)), + volume.series_instance_uid, ) + volume.status = MassTransferVolume.Status.ERROR + volume.log = str(err) + finally: + if volume.status == MassTransferVolume.Status.PENDING: + logger.error( + "Volume %s still PENDING after transfer — setting to ERROR.", + volume.series_instance_uid, + ) + volume.status = MassTransferVolume.Status.ERROR + volume.log = "Internal error: volume status was not updated after transfer." + try: + volume.save( + update_fields=[ + "status", + "log", + "study_instance_uid_pseudonymized", + "series_instance_uid_pseudonymized", + "converted_file", + "updated", + ] + ) + except Exception: + logger.exception( + "Failed to save volume %s status to database", + volume.series_instance_uid, + ) def _export_and_convert_series( self, operator: DicomOperator, - series: DiscoveredSeries, + volume: MassTransferVolume, pseudonymizer: Pseudonymizer | None, subject_id: str, output_path: Path, - ) -> SeriesTransferResult: - """Export a series to a temp dir, then convert to NIfTI.""" + ) -> None: + """Export a series to a temp dir, then convert to NIfTI. + + Updates volume fields in place (status, pseudonymized UIDs, converted_file). + """ with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = Path(tmp_dir) image_count, study_uid_pseudonymized, series_uid_pseudonymized = self._export_series( operator, - series, + volume, tmp_path, subject_id, pseudonymizer, ) if image_count == 0: - return self._zero_image_result( - series, study_uid_pseudonymized, series_uid_pseudonymized + self._set_zero_image_status( + volume, study_uid_pseudonymized, series_uid_pseudonymized ) + return dicom_metadata = _extract_dicom_metadata(tmp_path) - nifti_files = self._convert_series(series, tmp_path, output_path) + nifti_files = self._convert_series(volume, tmp_path, output_path) + + volume.study_instance_uid_pseudonymized = study_uid_pseudonymized + volume.series_instance_uid_pseudonymized = series_uid_pseudonymized if nifti_files: series_folder = _series_folder_name( - series.series_description, - series.series_number, - series.series_instance_uid, + volume.series_description, + volume.series_number, + volume.series_instance_uid, ) _write_dicom_metadata(output_path, series_folder, dicom_metadata) - return SeriesTransferResult( - image_count=image_count, - study_uid_pseudonymized=study_uid_pseudonymized, - series_uid_pseudonymized=series_uid_pseudonymized, - nifti_files=nifti_files, - status=MassTransferVolume.Status.CONVERTED, - log="", - ) - - return SeriesTransferResult( - image_count=image_count, - study_uid_pseudonymized=study_uid_pseudonymized, - series_uid_pseudonymized=series_uid_pseudonymized, - status=MassTransferVolume.Status.SKIPPED, - log="No valid DICOM images for NIfTI conversion", - ) + volume.converted_file = "\n".join(str(f) for f in nifti_files) + volume.status = MassTransferVolume.Status.CONVERTED + else: + volume.status = MassTransferVolume.Status.SKIPPED + volume.log = "No valid DICOM images for NIfTI conversion" def _export_series_to_folder( self, operator: DicomOperator, - series: DiscoveredSeries, + volume: MassTransferVolume, pseudonymizer: Pseudonymizer | None, subject_id: str, output_path: Path, - ) -> SeriesTransferResult: - """Export a series directly to the output folder (no NIfTI conversion).""" + ) -> None: + """Export a series directly to the output folder (no NIfTI conversion). + + Updates volume fields in place (status, pseudonymized UIDs). + """ image_count, study_uid_pseudonymized, series_uid_pseudonymized = self._export_series( operator, - series, + volume, output_path, subject_id, pseudonymizer, ) if image_count == 0: - return self._zero_image_result( - series, study_uid_pseudonymized, series_uid_pseudonymized - ) + self._set_zero_image_status(volume, study_uid_pseudonymized, series_uid_pseudonymized) + return - return SeriesTransferResult( - image_count=image_count, - study_uid_pseudonymized=study_uid_pseudonymized, - series_uid_pseudonymized=series_uid_pseudonymized, - status=MassTransferVolume.Status.EXPORTED, - log="", - ) + volume.study_instance_uid_pseudonymized = study_uid_pseudonymized + volume.series_instance_uid_pseudonymized = series_uid_pseudonymized + volume.status = MassTransferVolume.Status.EXPORTED def _export_series_to_server( self, operator: DicomOperator, - series: DiscoveredSeries, + volume: MassTransferVolume, pseudonymizer: Pseudonymizer | None, subject_id: str, dest_operator: DicomOperator, - ) -> SeriesTransferResult: - """Export a series to a temp dir and upload to a destination server.""" + ) -> None: + """Export a series to a temp dir and upload to a destination server. + + Updates volume fields in place (status, pseudonymized UIDs). + """ with tempfile.TemporaryDirectory(prefix="adit_") as tmpdir: tmp_path = Path(tmpdir) image_count, study_uid_pseudonymized, series_uid_pseudonymized = self._export_series( operator, - series, + volume, tmp_path, subject_id, pseudonymizer, ) if image_count == 0: - return self._zero_image_result( - series, study_uid_pseudonymized, series_uid_pseudonymized + self._set_zero_image_status( + volume, study_uid_pseudonymized, series_uid_pseudonymized ) + return logger.debug( "Uploading %d images for series %s to destination server", image_count, - series.series_instance_uid, + volume.series_instance_uid, ) dest_operator.upload_images(tmp_path) - return SeriesTransferResult( - image_count=image_count, - study_uid_pseudonymized=study_uid_pseudonymized, - series_uid_pseudonymized=series_uid_pseudonymized, - status=MassTransferVolume.Status.EXPORTED, - log="", - ) + volume.study_instance_uid_pseudonymized = study_uid_pseudonymized + volume.series_instance_uid_pseudonymized = series_uid_pseudonymized + volume.status = MassTransferVolume.Status.EXPORTED - def _zero_image_result( - self, - series: DiscoveredSeries, + @staticmethod + def _set_zero_image_status( + volume: MassTransferVolume, study_uid_pseudonymized: str, series_uid_pseudonymized: str, - ) -> SeriesTransferResult: - """Build a result for a series where C-GET returned 0 images.""" - if series.number_of_images == 0: - return SeriesTransferResult( - study_uid_pseudonymized=study_uid_pseudonymized, - series_uid_pseudonymized=series_uid_pseudonymized, - status=MassTransferVolume.Status.SKIPPED, - log="Non-image series (0 instances in PACS)", - ) - return SeriesTransferResult( - study_uid_pseudonymized=study_uid_pseudonymized, - series_uid_pseudonymized=series_uid_pseudonymized, - status=MassTransferVolume.Status.ERROR, - log=(f"C-GET returned 0 images (PACS reports {series.number_of_images} instances)"), - error="C-GET returned 0 images", - ) - - def _create_volume_record( - self, - job: MassTransferJob, - series: DiscoveredSeries, - subject_id: str, - pseudonymizer: Pseudonymizer | None, - result: SeriesTransferResult, ) -> None: - """Persist a MassTransferVolume for one series transfer result.""" - converted_file = "" - if result.nifti_files: - converted_file = "\n".join(str(f) for f in result.nifti_files) - - MassTransferVolume.objects.create( - job=job, - task=self.mass_task, - partition_key=self.mass_task.partition_key, - patient_id=series.patient_id, - pseudonym=subject_id if pseudonymizer else "", - accession_number=series.accession_number, - study_instance_uid=series.study_instance_uid, - study_instance_uid_pseudonymized=result.study_uid_pseudonymized, - series_instance_uid=series.series_instance_uid, - series_instance_uid_pseudonymized=result.series_uid_pseudonymized, - modality=series.modality, - study_description=series.study_description, - series_description=series.series_description, - series_number=series.series_number, - study_datetime=timezone.make_aware(series.study_datetime), - institution_name=series.institution_name, - number_of_images=series.number_of_images, - converted_file=converted_file, - status=result.status, - log=result.log, - ) + """Set status on a volume where the fetch returned 0 images.""" + volume.study_instance_uid_pseudonymized = study_uid_pseudonymized + volume.series_instance_uid_pseudonymized = series_uid_pseudonymized + if volume.number_of_images == 0: + volume.status = MassTransferVolume.Status.SKIPPED + volume.log = "Non-image series (0 instances in PACS)" + else: + volume.status = MassTransferVolume.Status.ERROR + volume.log = ( + f"Fetch returned 0 images (PACS reports {volume.number_of_images} instances)" + ) def _build_task_summary( self, - discovered: list[DiscoveredSeries], + total_volumes: int, + study_count: int, total_processed: int, total_skipped: int, total_failed: int, failed_reasons: dict[str, int], ) -> dict: """Build the final status dict returned to the task processor.""" - study_uids = {s.study_instance_uid for s in discovered} - log_lines = [ f"Partition {self.mass_task.partition_key}", - f"Studies found: {len(study_uids)}", - f"Series found: {len(discovered)}", + f"Studies found: {study_count}", + f"Series found: {total_volumes}", f"Processed: {total_processed}", ] if total_skipped: @@ -734,7 +715,7 @@ def _build_task_summary( for reason, count in failed_reasons.items(): log_lines.append(f" {count}x {reason}") - if not discovered: + if total_volumes == 0: status = MassTransferTask.Status.SUCCESS message = "No series found for this partition." elif total_failed and not total_processed: @@ -751,7 +732,7 @@ def _build_task_summary( status = ( MassTransferTask.Status.WARNING if total_failed else MassTransferTask.Status.SUCCESS ) - message = f"{len(study_uids)} studies, {total_series} series ({', '.join(parts)})." + message = f"{study_count} studies, {total_series} series ({', '.join(parts)})." return { "status": status, @@ -786,7 +767,7 @@ def _discover_series( continue # Exact client-side age filtering using actual StudyDate and - # PatientBirthDate (the C-FIND birth date range is approximate). + # PatientBirthDate (the query birth date range is approximate). birth_date = study.PatientBirthDate has_age_filter = mf.min_age is not None or mf.max_age is not None if birth_date and study.StudyDate and has_age_filter: @@ -958,7 +939,7 @@ def _study_has_institution( def _export_series( self, operator: DicomOperator, - series: DiscoveredSeries, + volume: MassTransferVolume, output_path: Path, subject_id: str, pseudonymizer: Pseudonymizer | None, @@ -993,39 +974,38 @@ def callback(ds: Dataset | None) -> None: write_dataset(ds, output_path / file_name) image_count += 1 - # IMPAX returns "Success with 0 sub-operations" for two reasons: + # Some PACS return 0 images for two reasons: # 1. Transient: PACS is overwhelmed by rapid requests (fixed by pacing) - # 2. Permanent: series is archived/offline and can't be served via C-GET + # 2. Permanent: series is archived/offline and can't be served # One retry after a short delay distinguishes the two cases. If the # second attempt also fails, the series is unretrievable — move on and # let the ERROR status trigger a retry on the next task run. operator.fetch_series( - patient_id=series.patient_id, - study_uid=series.study_instance_uid, - series_uid=series.series_instance_uid, + patient_id=volume.patient_id, + study_uid=volume.study_instance_uid, + series_uid=volume.series_instance_uid, callback=callback, ) - if image_count == 0 and series.number_of_images > 0: - delay = 3 + random.random() * 2 + if image_count == 0 and volume.number_of_images > 0: logger.warning( - "C-GET returned 0 images for %s (PACS reports %d) — retrying in %.0fs", - series.series_instance_uid, - series.number_of_images, - delay, + "Fetch returned 0 images for %s (PACS reports %d) — retrying in %ds", + volume.series_instance_uid, + volume.number_of_images, + _DELAY_RETRY_FETCH, ) - time.sleep(delay) + time.sleep(_DELAY_RETRY_FETCH) operator.fetch_series( - patient_id=series.patient_id, - study_uid=series.study_instance_uid, - series_uid=series.series_instance_uid, + patient_id=volume.patient_id, + study_uid=volume.study_instance_uid, + series_uid=volume.series_instance_uid, callback=callback, ) - if image_count == 0 and series.number_of_images > 0: + if image_count == 0 and volume.number_of_images > 0: logger.error( - "C-GET returned 0 images for %s (PACS reports %d) — series may be archived/offline", - series.series_instance_uid, - series.number_of_images, + "Fetch returned 0 images for %s (PACS reports %d) — may be archived/offline", + volume.series_instance_uid, + volume.number_of_images, ) if image_count == 0: @@ -1039,7 +1019,7 @@ def callback(ds: Dataset | None) -> None: def _convert_series( self, - series: DiscoveredSeries, + volume: MassTransferVolume, dicom_dir: Path, output_path: Path, ) -> list[Path]: @@ -1061,12 +1041,12 @@ def _convert_series( pass return [] raise DicomError( - f"Conversion failed for series {series.series_instance_uid}: {err_msg}" + f"Conversion failed for series {volume.series_instance_uid}: {err_msg}" ) nifti_files = sorted(output_path.glob("*.nii.gz")) if not nifti_files: raise DicomError( - f"dcm2niix produced no .nii.gz files for series {series.series_instance_uid}" + f"dcm2niix produced no .nii.gz files for series {volume.series_instance_uid}" ) return nifti_files diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html index e6d1a37b2..ed587f814 100644 --- a/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_task_detail.html @@ -3,11 +3,19 @@ {% load task_control_panel from mass_transfer_extras %} {% load render_table from django_tables2 %} {% load crispy from crispy_forms_tags %} +{% load bootstrap_icon from common_extras %} {% block title %} Mass Transfer Task {% endblock title %} {% block heading %} - + + +
+ {% bootstrap_icon "arrow-left" %} + Back to Job + +
+
{% endblock heading %} {% block content %}
@@ -28,10 +36,11 @@
Log
{{ task.log|default:"" }}
-
Volumes
+ + {% crispy filter.form %} -
- {% render_table table %} -
+
+
+ {% render_table table %} {% task_control_panel %} {% endblock content %} diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 1456c1675..a04d74b15 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -1,5 +1,6 @@ from datetime import date, datetime, timedelta from pathlib import Path +from types import SimpleNamespace from unittest.mock import MagicMock import pytest @@ -44,6 +45,11 @@ def _make_study(study_uid: str, study_date: str = "20240101") -> ResultDataset: return ResultDataset(ds) +def _fake_export_success(*args, **kwargs): + """Stub for _export_series that simulates a successful single-image export.""" + return (1, "", "") + + def _make_discovered( *, patient_id: str = "PAT1", @@ -98,6 +104,38 @@ def _make_filter(**kwargs) -> FilterSpec: ) +@pytest.fixture +def mass_transfer_env(tmp_path): + """Common setup for DB integration tests: settings, user, source, folder dest, job, task.""" + MassTransferSettings.objects.create() + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create(path=str(tmp_path / "output")) + job = MassTransferJob.objects.create( + owner=user, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + pseudonymize=False, + pseudonym_salt="", + ) + job.filters_json = [{"modality": "CT"}] + job.save(update_fields=["filters_json"]) + task = MassTransferTask.objects.create( + job=job, + source=source, + destination=destination, + patient_id="", + study_uid="", + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) + return SimpleNamespace( + job=job, task=task, source=source, destination=destination, user=user + ) + + @pytest.mark.django_db def test_find_studies_raises_when_time_window_too_small(mocker: MockerFixture): MassTransferSettings.objects.create() @@ -349,19 +387,18 @@ def _make_process_env( mocker.patch.object(processor, "is_suspended", return_value=False) mocker.patch("adit.mass_transfer.processors.DicomOperator") - # Mock DB queries for deferred insertion + # Mock DB operations used by the processor mocker.patch.object( MassTransferVolume.objects, "filter", - return_value=mocker.MagicMock( - values_list=mocker.MagicMock( - return_value=mocker.MagicMock( - __iter__=lambda self: iter([]), - ) - ), - delete=mocker.MagicMock(), - ), + return_value=mocker.MagicMock(delete=mocker.MagicMock()), + ) + mocker.patch.object( + MassTransferVolume.objects, + "bulk_create", + side_effect=lambda objs: objs, ) + mocker.patch.object(MassTransferVolume, "save") return processor @@ -405,19 +442,18 @@ def _make_process_env_server_dest( side_effect=[dest_operator, source_mock], ) - # Mock DB queries for deferred insertion + # Mock DB operations used by the processor mocker.patch.object( MassTransferVolume.objects, "filter", - return_value=mocker.MagicMock( - values_list=mocker.MagicMock( - return_value=mocker.MagicMock( - __iter__=lambda self: iter([]), - ) - ), - delete=mocker.MagicMock(), - ), + return_value=mocker.MagicMock(delete=mocker.MagicMock()), + ) + mocker.patch.object( + MassTransferVolume.objects, + "bulk_create", + side_effect=lambda objs: objs, ) + mocker.patch.object(MassTransferVolume, "save") return processor, dest_operator @@ -455,7 +491,6 @@ def fake_export(*args, **kwargs): return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) - mocker.patch.object(MassTransferVolume.objects, "create") result = processor.process() @@ -473,7 +508,6 @@ def test_process_returns_failure_when_all_fail(mocker: MockerFixture, tmp_path: mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch.object(processor, "_export_series", side_effect=DicomError("PACS down")) - mocker.patch.object(MassTransferVolume.objects, "create") result = processor.process() @@ -541,7 +575,6 @@ def fake_export(*args, **kwargs): return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) - mocker.patch.object(MassTransferVolume.objects, "create") result = processor.process() @@ -569,7 +602,6 @@ def fake_export(op, s, path, subject_id, pseudonymizer): return (1, "pseudo-study-uid", "pseudo-series-uid") mocker.patch.object(processor, "_export_series", side_effect=fake_export) - mocker.patch.object(MassTransferVolume.objects, "create") result = processor.process() @@ -587,11 +619,7 @@ def test_process_server_destination_cleans_volumes_on_retry(mocker: MockerFixtur mock_filter_qs = mocker.MagicMock() mocker.patch.object(MassTransferVolume.objects, "filter", return_value=mock_filter_qs) - def fake_export(*args, **kwargs): - return (1, "", "") - - mocker.patch.object(processor, "_export_series", side_effect=fake_export) - mocker.patch.object(MassTransferVolume.objects, "create") + mocker.patch.object(processor, "_export_series", side_effect=_fake_export_success) processor.process() @@ -605,7 +633,6 @@ def test_process_server_destination_closes_dest_operator(mocker: MockerFixture): mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch.object(processor, "_export_series", side_effect=DicomError("PACS down")) - mocker.patch.object(MassTransferVolume.objects, "create") processor.process() @@ -615,18 +642,47 @@ def test_process_server_destination_closes_dest_operator(mocker: MockerFixture): def test_export_series_to_server_skips_upload_on_zero_images(mocker: MockerFixture): """When _export_series returns 0 images, upload_images must NOT be called.""" processor = _make_processor(mocker) - series = _make_discovered(series_uid="s-1") + volume = MassTransferVolume( + series_instance_uid="s-1", + study_instance_uid="study-1", + patient_id="PAT1", + number_of_images=10, + study_datetime=timezone.now(), + ) mock_operator = mocker.MagicMock() mock_dest_operator = mocker.MagicMock() mocker.patch.object(processor, "_export_series", return_value=(0, "", "")) - result = processor._export_series_to_server( - mock_operator, series, None, "subject-1", mock_dest_operator + processor._export_series_to_server( + mock_operator, volume, None, "subject-1", mock_dest_operator ) mock_dest_operator.upload_images.assert_not_called() - assert result.status == MassTransferVolume.Status.ERROR + assert volume.status == MassTransferVolume.Status.ERROR + + +def test_export_series_to_server_skips_non_image_series(mocker: MockerFixture): + """Non-image series (0 instances in PACS) gets SKIPPED, not ERROR.""" + processor = _make_processor(mocker) + volume = MassTransferVolume( + series_instance_uid="s-1", + study_instance_uid="study-1", + patient_id="PAT1", + number_of_images=0, + study_datetime=timezone.now(), + ) + mock_operator = mocker.MagicMock() + mock_dest_operator = mocker.MagicMock() + + mocker.patch.object(processor, "_export_series", return_value=(0, "", "")) + + processor._export_series_to_server( + mock_operator, volume, None, "subject-1", mock_dest_operator + ) + + mock_dest_operator.upload_images.assert_not_called() + assert volume.status == MassTransferVolume.Status.SKIPPED def test_server_destination_upload_dicom_error_marks_failure(mocker: MockerFixture): @@ -641,7 +697,6 @@ def fake_export(op, s, path, subject_id, pseudonymizer): mocker.patch.object(processor, "_export_series", side_effect=fake_export) mock_dest_operator.upload_images.side_effect = DicomError("C-STORE rejected") - mocker.patch.object(MassTransferVolume.objects, "create") result = processor.process() @@ -660,7 +715,6 @@ def fake_export(op, s, path, subject_id, pseudonymizer): mocker.patch.object(processor, "_export_series", side_effect=fake_export) mock_dest_operator.upload_images.side_effect = RetriableDicomError("Connection reset") - mocker.patch.object(MassTransferVolume.objects, "create") with pytest.raises(RetriableDicomError, match="Connection reset"): processor.process() @@ -680,7 +734,6 @@ def fake_export(op, s, path, subject_id, pseudonymizer): return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) - mocker.patch.object(MassTransferVolume.objects, "create") result = processor.process() @@ -708,7 +761,6 @@ def fake_export(op, s, path, subject_id, pseudonymizer): return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) - mocker.patch.object(MassTransferVolume.objects, "create") processor.process() @@ -737,7 +789,6 @@ def fake_export(op, s, path, subject_id, pseudonymizer): return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) - mocker.patch.object(MassTransferVolume.objects, "create") processor.process() @@ -765,7 +816,6 @@ def fake_export(op, s, path, subject_id, pseudonymizer): return (1, "", "") mocker.patch.object(processor, "_export_series", side_effect=fake_export) - mocker.patch.object(MassTransferVolume.objects, "create") processor.process() @@ -786,7 +836,7 @@ def fake_export(op, s, path, subject_id, pseudonymizer): def test_convert_series_raises_on_dcm2niix_failure(mocker: MockerFixture, tmp_path: Path): processor = _make_processor(mocker) - series = _make_discovered(series_uid="1.2.3") + volume = MassTransferVolume(series_instance_uid="1.2.3", study_datetime=timezone.now()) dicom_dir = tmp_path / "dicom_input" dicom_dir.mkdir() @@ -798,12 +848,12 @@ def test_convert_series_raises_on_dcm2niix_failure(mocker: MockerFixture, tmp_pa ) with pytest.raises(DicomError, match="Conversion failed"): - processor._convert_series(series, dicom_dir, output_path) + processor._convert_series(volume, dicom_dir, output_path) def test_convert_series_raises_when_no_nifti_output(mocker: MockerFixture, tmp_path: Path): processor = _make_processor(mocker) - series = _make_discovered(series_uid="1.2.3") + volume = MassTransferVolume(series_instance_uid="1.2.3", study_datetime=timezone.now()) dicom_dir = tmp_path / "dicom_input" dicom_dir.mkdir() @@ -815,12 +865,12 @@ def test_convert_series_raises_when_no_nifti_output(mocker: MockerFixture, tmp_p ) with pytest.raises(DicomError, match="no .nii.gz files"): - processor._convert_series(series, dicom_dir, output_path) + processor._convert_series(volume, dicom_dir, output_path) def test_convert_series_skips_non_image_dicom(mocker: MockerFixture, tmp_path: Path): processor = _make_processor(mocker) - series = _make_discovered(series_uid="1.2.3") + volume = MassTransferVolume(series_instance_uid="1.2.3", study_datetime=timezone.now()) dicom_dir = tmp_path / "dicom_input" dicom_dir.mkdir() @@ -832,7 +882,7 @@ def test_convert_series_skips_non_image_dicom(mocker: MockerFixture, tmp_path: P ) # Should not raise — non-image DICOMs are silently skipped - processor._convert_series(series, dicom_dir, output_path) + processor._convert_series(volume, dicom_dir, output_path) # --------------------------------------------------------------------------- @@ -929,86 +979,34 @@ def test_dicom_match_wildcard(): @pytest.mark.django_db -def test_process_creates_volume_records_on_success(mocker: MockerFixture, tmp_path: Path): - """Deferred insertion: volumes are created in DB after successful export.""" - MassTransferSettings.objects.create() - - user = UserFactory.create() - source = DicomServerFactory.create() - destination = DicomFolderFactory.create(path=str(tmp_path / "output")) - job = MassTransferJob.objects.create( - owner=user, - start_date=date(2024, 1, 1), - end_date=date(2024, 1, 1), - partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - pseudonymize=False, - pseudonym_salt="", - ) - job.filters_json = [{"modality": "CT"}] - job.save(update_fields=["filters_json"]) - - task = MassTransferTask.objects.create( - job=job, - source=source, - destination=destination, - patient_id="", - study_uid="", - partition_start=timezone.now(), - partition_end=timezone.now(), - partition_key="20240101", - ) - +def test_process_creates_volume_records_on_success(mocker: MockerFixture, mass_transfer_env): + """Volumes are created in PENDING then updated to EXPORTED after successful export.""" + env = mass_transfer_env series = [_make_discovered(patient_id="PAT1", series_uid="1.2.3.4.5")] - processor = MassTransferTaskProcessor(task) + processor = MassTransferTaskProcessor(env.task) mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch("adit.mass_transfer.processors.DicomOperator") - mocker.patch.object(processor, "_export_series", return_value=(1, "", "")) + mocker.patch.object(processor, "_export_series", side_effect=_fake_export_success) - assert MassTransferVolume.objects.filter(job=job).count() == 0 + assert MassTransferVolume.objects.filter(job=env.job).count() == 0 result = processor.process() assert result["status"] == MassTransferTask.Status.SUCCESS - vol = MassTransferVolume.objects.get(job=job, series_instance_uid="1.2.3.4.5") + vol = MassTransferVolume.objects.get(job=env.job, series_instance_uid="1.2.3.4.5") assert vol.status == MassTransferVolume.Status.EXPORTED assert vol.patient_id == "PAT1" - assert vol.task == task + assert vol.task == env.task @pytest.mark.django_db -def test_process_creates_error_volume_on_failure(mocker: MockerFixture, tmp_path: Path): +def test_process_creates_error_volume_on_failure(mocker: MockerFixture, mass_transfer_env): """Failed exports still create a volume record with ERROR status.""" - MassTransferSettings.objects.create() - - user = UserFactory.create() - source = DicomServerFactory.create() - destination = DicomFolderFactory.create(path=str(tmp_path / "output")) - job = MassTransferJob.objects.create( - owner=user, - start_date=date(2024, 1, 1), - end_date=date(2024, 1, 1), - partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - pseudonymize=False, - pseudonym_salt="", - ) - job.filters_json = [{"modality": "CT"}] - job.save(update_fields=["filters_json"]) - - task = MassTransferTask.objects.create( - job=job, - source=source, - destination=destination, - patient_id="", - study_uid="", - partition_start=timezone.now(), - partition_end=timezone.now(), - partition_key="20240101", - ) - + env = mass_transfer_env series = [_make_discovered(patient_id="PAT1", series_uid="1.2.3.4.5")] - processor = MassTransferTaskProcessor(task) + processor = MassTransferTaskProcessor(env.task) mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch("adit.mass_transfer.processors.DicomOperator") mocker.patch.object(processor, "_export_series", side_effect=DicomError("Export failed")) @@ -1016,40 +1014,16 @@ def test_process_creates_error_volume_on_failure(mocker: MockerFixture, tmp_path result = processor.process() assert result["status"] == MassTransferTask.Status.FAILURE - vol = MassTransferVolume.objects.get(job=job, series_instance_uid="1.2.3.4.5") + vol = MassTransferVolume.objects.get(job=env.job, series_instance_uid="1.2.3.4.5") assert vol.status == MassTransferVolume.Status.ERROR assert "Export failed" in vol.log @pytest.mark.django_db -def test_process_deletes_all_volumes_on_retry(mocker: MockerFixture, tmp_path: Path): +def test_process_deletes_all_volumes_on_retry(mocker: MockerFixture, mass_transfer_env): """On retry, ALL volumes from prior runs are deleted before rediscovery.""" - MassTransferSettings.objects.create() - - user = UserFactory.create() - source = DicomServerFactory.create() - destination = DicomFolderFactory.create(path=str(tmp_path / "output")) - job = MassTransferJob.objects.create( - owner=user, - start_date=date(2024, 1, 1), - end_date=date(2024, 1, 1), - partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - pseudonymize=False, - pseudonym_salt="", - ) - job.filters_json = [{"modality": "CT"}] - job.save(update_fields=["filters_json"]) - - task = MassTransferTask.objects.create( - job=job, - source=source, - destination=destination, - patient_id="", - study_uid="", - partition_start=timezone.now(), - partition_end=timezone.now(), - partition_key="20240101", - ) + env = mass_transfer_env + job, task = env.job, env.task # Simulate a prior failed run that left an ERROR volume MassTransferVolume.objects.create( @@ -1073,7 +1047,7 @@ def test_process_deletes_all_volumes_on_retry(mocker: MockerFixture, tmp_path: P processor = MassTransferTaskProcessor(task) mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch("adit.mass_transfer.processors.DicomOperator") - mocker.patch.object(processor, "_export_series", return_value=(1, "", "")) + mocker.patch.object(processor, "_export_series", side_effect=_fake_export_success) result = processor.process() @@ -1137,7 +1111,7 @@ def test_process_deterministic_pseudonyms_across_partitions(mocker: MockerFixtur ] processor1 = MassTransferTaskProcessor(task1) mocker.patch.object(processor1, "_discover_series", return_value=series1) - mocker.patch.object(processor1, "_export_series", return_value=(1, "", "")) + mocker.patch.object(processor1, "_export_series", side_effect=_fake_export_success) processor1.process() # Partition 2: same PAT1 @@ -1150,7 +1124,7 @@ def test_process_deterministic_pseudonyms_across_partitions(mocker: MockerFixtur ] processor2 = MassTransferTaskProcessor(task2) mocker.patch.object(processor2, "_discover_series", return_value=series2) - mocker.patch.object(processor2, "_export_series", return_value=(1, "", "")) + mocker.patch.object(processor2, "_export_series", side_effect=_fake_export_success) processor2.process() vol1 = MassTransferVolume.objects.get(series_instance_uid="1.2.3.100.1") @@ -1215,7 +1189,7 @@ def test_process_pseudonymize_mode_not_linked_across_partitions( ] processor1 = MassTransferTaskProcessor(task1) mocker.patch.object(processor1, "_discover_series", return_value=series1) - mocker.patch.object(processor1, "_export_series", return_value=(1, "", "")) + mocker.patch.object(processor1, "_export_series", side_effect=_fake_export_success) processor1.process() series2 = [ @@ -1227,7 +1201,7 @@ def test_process_pseudonymize_mode_not_linked_across_partitions( ] processor2 = MassTransferTaskProcessor(task2) mocker.patch.object(processor2, "_discover_series", return_value=series2) - mocker.patch.object(processor2, "_export_series", return_value=(1, "", "")) + mocker.patch.object(processor2, "_export_series", side_effect=_fake_export_success) processor2.process() vol1 = MassTransferVolume.objects.get(series_instance_uid="1.2.3.100.1") @@ -1428,106 +1402,186 @@ def test_extract_dicom_metadata_pseudonymized_has_no_real_data(tmp_path: Path): # --------------------------------------------------------------------------- -# _group_series tests +# _create_pending_volumes / _group_volumes tests # --------------------------------------------------------------------------- -def test_group_series_deterministic_pseudonym(mocker: MockerFixture): - """Seeded pseudonymizer with salt: subject_ids maps patient_id to deterministic pseudonym.""" +@pytest.mark.django_db +def test_create_pending_volumes_deterministic_pseudonym(): + """Seeded pseudonymizer with salt: volumes get deterministic pseudonyms.""" from adit.core.utils.pseudonymizer import Pseudonymizer - processor = _make_processor(mocker) - mock_job = mocker.MagicMock() - mock_job.pseudonym_salt = "test-seed-123" + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create() + job = MassTransferJob.objects.create( + owner=user, + start_date=date(2024, 1, 1), + end_date=date(2024, 1, 1), + partition_granularity=MassTransferJob.PartitionGranularity.DAILY, + pseudonym_salt="test-seed-123", + ) + job.filters_json = [{"modality": "CT"}] + job.save(update_fields=["filters_json"]) + + task = MassTransferTask.objects.create( + job=job, + source=source, + destination=destination, + patient_id="", + study_uid="", + partition_start=timezone.now(), + partition_end=timezone.now(), + partition_key="20240101", + ) ps = Pseudonymizer(seed="test-seed-123") - expected = ps.compute_pseudonym("PAT1") + expected_pat1 = ps.compute_pseudonym("PAT1") + expected_pat2 = ps.compute_pseudonym("PAT2") series = [ _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), _make_discovered(patient_id="PAT2", study_uid="study-B", series_uid="s-2"), ] - grouped, subject_ids = processor._group_series(series, mock_job, ps) + processor = MassTransferTaskProcessor(task) + volumes = processor._create_pending_volumes(series, job, ps) - assert subject_ids["PAT1"] == expected - assert subject_ids["PAT2"] == ps.compute_pseudonym("PAT2") + assert len(volumes) == 2 + assert volumes[0].pseudonym == expected_pat1 + assert volumes[1].pseudonym == expected_pat2 + assert all(v.status == MassTransferVolume.Status.PENDING for v in volumes) + assert all(v.pk is not None for v in volumes) + + grouped = MassTransferTaskProcessor._group_volumes(volumes) assert "PAT1" in grouped assert "PAT2" in grouped -def test_group_series_no_anonymization(mocker: MockerFixture): - """Without pseudonymizer, subject_ids maps patient_id to sanitized patient_id.""" +def test_create_pending_volumes_no_anonymization(mocker: MockerFixture): + """Without pseudonymizer, volumes have empty pseudonym.""" processor = _make_processor(mocker) - mock_job = mocker.MagicMock() - mock_job.pseudonym_salt = "" + mocker.patch.object( + MassTransferVolume.objects, + "bulk_create", + side_effect=lambda objs: objs, + ) series = [ _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), _make_discovered(patient_id="PAT2", study_uid="study-B", series_uid="s-2"), ] - grouped, subject_ids = processor._group_series(series, mock_job, None) + mock_job = mocker.MagicMock() + mock_job.pseudonym_salt = "" - assert subject_ids["PAT1"] == "PAT1" - assert subject_ids["PAT2"] == "PAT2" + volumes = processor._create_pending_volumes(series, mock_job, None) + assert volumes[0].pseudonym == "" + assert volumes[1].pseudonym == "" -def test_group_series_random_leaves_subject_ids_empty(mocker: MockerFixture): - """With pseudonymizer but no salt, subject_ids is empty (random assigned during transfer).""" + +def test_create_pending_volumes_random_assigns_per_study(mocker: MockerFixture): + """With pseudonymizer but no salt, volumes get per-study random pseudonyms.""" from adit.core.utils.pseudonymizer import Pseudonymizer processor = _make_processor(mocker) - mock_job = mocker.MagicMock() - mock_job.pseudonym_salt = "" + mocker.patch.object( + MassTransferVolume.objects, + "bulk_create", + side_effect=lambda objs: objs, + ) ps = Pseudonymizer() + mock_job = mocker.MagicMock() + mock_job.pseudonym_salt = "" + series = [ _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), + _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-2"), + _make_discovered(patient_id="PAT1", study_uid="study-B", series_uid="s-3"), ] - grouped, subject_ids = processor._group_series(series, mock_job, ps) + volumes = processor._create_pending_volumes(series, mock_job, ps) - assert subject_ids == {} - assert "PAT1" in grouped + # Same study → same pseudonym + assert volumes[0].pseudonym == volumes[1].pseudonym + assert volumes[0].pseudonym != "" + # Different study → different pseudonym + assert volumes[0].pseudonym != volumes[2].pseudonym + assert volumes[2].pseudonym != "" # --------------------------------------------------------------------------- -# Partition cleanup DB integration test +# _group_volumes tests # --------------------------------------------------------------------------- -@pytest.mark.django_db -def test_partition_cleanup_deletes_folder_and_volumes(mocker: MockerFixture, tmp_path: Path): - """process() deletes the partition folder on disk and all volumes for that partition.""" - MassTransferSettings.objects.create() - - user = UserFactory.create() - source = DicomServerFactory.create() - destination = DicomFolderFactory.create(path=str(tmp_path / "output")) - job = MassTransferJob.objects.create( - owner=user, - start_date=date(2024, 1, 1), - end_date=date(2024, 1, 1), - partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - pseudonymize=False, - pseudonym_salt="", +def test_group_volumes_multi_patient_multi_study(): + """Volumes are grouped by patient_id -> study_instance_uid.""" + now = timezone.now() + v1 = MassTransferVolume( + patient_id="PAT1", study_instance_uid="study-A", series_instance_uid="s-1", + study_datetime=now, + ) + v2 = MassTransferVolume( + patient_id="PAT1", study_instance_uid="study-A", series_instance_uid="s-2", + study_datetime=now, + ) + v3 = MassTransferVolume( + patient_id="PAT1", study_instance_uid="study-B", series_instance_uid="s-3", + study_datetime=now, + ) + v4 = MassTransferVolume( + patient_id="PAT2", study_instance_uid="study-C", series_instance_uid="s-4", + study_datetime=now, ) - job.filters_json = [{"modality": "CT"}] - job.save(update_fields=["filters_json"]) - task = MassTransferTask.objects.create( - job=job, - source=source, - destination=destination, - patient_id="", - study_uid="", - partition_start=timezone.now(), - partition_end=timezone.now(), - partition_key="20240101", + grouped = MassTransferTaskProcessor._group_volumes([v1, v2, v3, v4]) + + assert set(grouped.keys()) == {"PAT1", "PAT2"} + assert set(grouped["PAT1"].keys()) == {"study-A", "study-B"} + assert grouped["PAT1"]["study-A"] == [v1, v2] + assert grouped["PAT1"]["study-B"] == [v3] + assert grouped["PAT2"]["study-C"] == [v4] + + +# --------------------------------------------------------------------------- +# RetriableDicomError volume status tests +# --------------------------------------------------------------------------- + + +def test_retriable_error_saves_volume_as_error(mocker: MockerFixture, tmp_path: Path): + """RetriableDicomError should save the current volume as ERROR before propagating.""" + processor = _make_process_env(mocker, tmp_path) + series = [_make_discovered(series_uid="s-1")] + + mocker.patch.object(processor, "_discover_series", return_value=series) + mocker.patch.object( + processor, + "_export_series", + side_effect=RetriableDicomError("PACS connection lost"), ) + with pytest.raises(RetriableDicomError): + processor.process() + + # volume.save() should have been called (via the finally block) + MassTransferVolume.save.assert_called() + + +# --------------------------------------------------------------------------- +# Partition cleanup DB integration test +# --------------------------------------------------------------------------- + + +@pytest.mark.django_db +def test_partition_cleanup_deletes_folder_and_volumes(mocker: MockerFixture, mass_transfer_env): + """process() deletes the partition folder on disk and all volumes for that partition.""" + env = mass_transfer_env + job, task, destination = env.job, env.task, env.destination + # Create pre-existing volumes for uid in ["1.2.3.1", "1.2.3.2"]: MassTransferVolume.objects.create( @@ -1558,7 +1612,7 @@ def test_partition_cleanup_deletes_folder_and_volumes(mocker: MockerFixture, tmp processor = MassTransferTaskProcessor(task) mocker.patch.object(processor, "_discover_series", return_value=series) mocker.patch("adit.mass_transfer.processors.DicomOperator") - mocker.patch.object(processor, "_export_series", return_value=(1, "", "")) + mocker.patch.object(processor, "_export_series", side_effect=_fake_export_success) result = processor.process() From 0e4eaffd24f222f8ed1dd7e33b3a6d6df0802e7d Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Sun, 29 Mar 2026 13:09:25 +0000 Subject: [PATCH 086/103] Move mass transfer task queueing to background job Defers individual task queuing to a background Procrastinate job on the default queue so that the HTTP view returns immediately instead of blocking on potentially thousands of defer() calls. Adds factories, a dedicated tasks module, and comprehensive tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- adit/core/tasks.py | 14 -- adit/mass_transfer/factories.py | 27 +++ adit/mass_transfer/models.py | 22 +-- adit/mass_transfer/tasks.py | 65 +++++++ .../tests/test_queue_pending_tasks.py | 162 ++++++++++++++++++ 5 files changed, 259 insertions(+), 31 deletions(-) create mode 100644 adit/mass_transfer/factories.py create mode 100644 adit/mass_transfer/tasks.py create mode 100644 adit/mass_transfer/tests/test_queue_pending_tasks.py diff --git a/adit/core/tasks.py b/adit/core/tasks.py index 90bfa5c10..067a58580 100644 --- a/adit/core/tasks.py +++ b/adit/core/tasks.py @@ -204,17 +204,3 @@ def process_dicom_task(context: JobContext, model_label: str, task_id: int): _run_dicom_task( context, model_label, task_id, process_timeout=settings.DICOM_TASK_PROCESS_TIMEOUT ) - - -# Separate task function for mass transfer on a dedicated queue so it does not -# starve batch/selective transfers. Mass transfer tasks process an entire -# partition (discovery + export + convert) and can run for hours, so the -# pebble process timeout is disabled (process_timeout=None). Individual DICOM -# operations are still protected by Stamina / pynetdicom-level timeouts. -@app.task( - queue="mass_transfer", - pass_context=True, - retry=DICOM_TASK_RETRY_STRATEGY, -) -def process_mass_transfer_task(context: JobContext, model_label: str, task_id: int): - _run_dicom_task(context, model_label, task_id, process_timeout=None) diff --git a/adit/mass_transfer/factories.py b/adit/mass_transfer/factories.py new file mode 100644 index 000000000..a2bb14fc1 --- /dev/null +++ b/adit/mass_transfer/factories.py @@ -0,0 +1,27 @@ +import factory +from django.utils import timezone + +from adit.core.factories import ( + AbstractTransferJobFactory, + AbstractTransferTaskFactory, +) + +from .models import MassTransferJob, MassTransferTask + + +class MassTransferJobFactory(AbstractTransferJobFactory[MassTransferJob]): + class Meta: + model = MassTransferJob + + start_date = factory.LazyFunction(lambda: timezone.now().date()) + end_date = factory.LazyFunction(lambda: timezone.now().date()) + + +class MassTransferTaskFactory(AbstractTransferTaskFactory[MassTransferTask]): + class Meta: + model = MassTransferTask + + job = factory.SubFactory(MassTransferJobFactory) + partition_start = factory.LazyFunction(timezone.now) + partition_end = factory.LazyFunction(timezone.now) + partition_key = factory.Faker("date", pattern="%Y-%m-%d") diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index 9f900a252..8b58b224d 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -10,7 +10,6 @@ from procrastinate.contrib.django import app from adit.core.models import DicomAppSettings, DicomJob, TransferJob, TransferTask -from adit.core.utils.model_utils import get_model_label if TYPE_CHECKING: from .processors import FilterSpec @@ -69,24 +68,13 @@ def get_absolute_url(self): return reverse("mass_transfer_job_detail", args=[self.pk]) def queue_pending_tasks(self): - """Queues all pending mass transfer tasks.""" + """Queues all pending mass transfer tasks via a background job.""" assert self.status == DicomJob.Status.PENDING - priority = self.default_priority - if self.urgent: - priority = self.urgent_priority - - for mass_task in self.tasks.filter(status=TransferTask.Status.PENDING): - assert mass_task.queued_job is None - - model_label = get_model_label(mass_task.__class__) - queued_job_id = app.configure_task( - "adit.core.tasks.process_mass_transfer_task", - allow_unknown=False, - priority=priority, - ).defer(model_label=model_label, task_id=mass_task.pk) - mass_task.queued_job_id = queued_job_id - mass_task.save() + app.configure_task( + "adit.mass_transfer.tasks.queue_mass_transfer_tasks", + allow_unknown=False, + ).defer(job_id=self.pk) class MassTransferTask(TransferTask): diff --git a/adit/mass_transfer/tasks.py b/adit/mass_transfer/tasks.py new file mode 100644 index 000000000..f7ebd3993 --- /dev/null +++ b/adit/mass_transfer/tasks.py @@ -0,0 +1,65 @@ +import logging + +from procrastinate import JobContext, RetryStrategy +from procrastinate.contrib.django import app + +from adit.core.models import DicomJob, DicomTask +from adit.core.tasks import DICOM_TASK_RETRY_STRATEGY, _run_dicom_task +from adit.core.utils.model_utils import get_model_label + +logger = logging.getLogger(__name__) + + +# Separate task function for mass transfer on a dedicated queue so it does not +# starve batch/selective transfers. Mass transfer tasks process an entire +# partition (discovery + export + convert) and can run for hours, so the +# pebble process timeout is disabled (process_timeout=None). Individual DICOM +# operations are still protected by Stamina / pynetdicom-level timeouts. +@app.task( + queue="mass_transfer", + pass_context=True, + retry=DICOM_TASK_RETRY_STRATEGY, +) +def process_mass_transfer_task(context: JobContext, model_label: str, task_id: int): + _run_dicom_task(context, model_label, task_id, process_timeout=None) + + +@app.task(queue="default", retry=RetryStrategy(max_attempts=3, wait=10)) +def queue_mass_transfer_tasks(job_id: int): + """Queues all pending tasks for a mass transfer job. + + Runs on the default worker so that the HTTP view returns immediately + instead of blocking on thousands of individual defer() calls. + """ + from .models import MassTransferJob + + try: + job = MassTransferJob.objects.get(pk=job_id) + except MassTransferJob.DoesNotExist: + logger.info("MassTransferJob %d no longer exists; skipping queue.", job_id) + return + + if job.status != DicomJob.Status.PENDING: + logger.warning( + "MassTransferJob %d has status %s (expected PENDING); skipping queue.", + job_id, + job.status, + ) + return + + priority = job.default_priority + if job.urgent: + priority = job.urgent_priority + + for mass_task in job.tasks.filter( + status=DicomTask.Status.PENDING, + queued_job__isnull=True, # Skip tasks already queued (idempotency guard) + ): + model_label = get_model_label(mass_task.__class__) + queued_job_id = app.configure_task( + "adit.mass_transfer.tasks.process_mass_transfer_task", + allow_unknown=False, + priority=priority, + ).defer(model_label=model_label, task_id=mass_task.pk) + mass_task.queued_job_id = queued_job_id + mass_task.save() diff --git a/adit/mass_transfer/tests/test_queue_pending_tasks.py b/adit/mass_transfer/tests/test_queue_pending_tasks.py new file mode 100644 index 000000000..20ac293d0 --- /dev/null +++ b/adit/mass_transfer/tests/test_queue_pending_tasks.py @@ -0,0 +1,162 @@ +import pytest +from adit_radis_shared.common.utils.testing_helpers import run_worker_once +from procrastinate.contrib.django.models import ProcrastinateJob + +from adit.core.models import DicomJob, DicomTask + +from ..factories import MassTransferJobFactory, MassTransferTaskFactory +from ..tasks import queue_mass_transfer_tasks + + +@pytest.mark.django_db(transaction=True) +def test_queue_pending_tasks_defers_background_job(): + """queue_pending_tasks() should defer a single job on the default queue + without queuing individual tasks inline.""" + job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) + MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + + job.queue_pending_tasks() + + # A single queueing job should be deferred on the default queue + queueing_jobs = ProcrastinateJob.objects.filter( + task_name="adit.mass_transfer.tasks.queue_mass_transfer_tasks" + ) + assert queueing_jobs.count() == 1 + queueing_job = queueing_jobs.first() + assert queueing_job is not None + assert queueing_job.queue_name == "default" + + # Individual tasks should NOT have been queued yet + for task in job.tasks.all(): + assert task.queued_job is None + + +@pytest.mark.django_db(transaction=True) +def test_background_job_queues_all_pending_tasks(): + """After the background job runs, all pending tasks should have been + picked up by the worker (status progressed beyond PENDING).""" + job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) + task1 = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + task2 = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + + job.queue_pending_tasks() + run_worker_once() + + # run_worker_once processes all jobs (queueing + processing) and deletes + # ProcrastinateJob records. Verify that tasks were actually processed. + task1.refresh_from_db() + task2.refresh_from_db() + assert task1.status != DicomTask.Status.PENDING + assert task2.status != DicomTask.Status.PENDING + + +@pytest.mark.django_db(transaction=True) +def test_background_job_skips_canceled_tasks(): + """Canceled tasks should not be queued.""" + job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) + pending_task = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + canceled_task = MassTransferTaskFactory.create(status=DicomTask.Status.CANCELED, job=job) + + job.queue_pending_tasks() + run_worker_once() + + pending_task.refresh_from_db() + canceled_task.refresh_from_db() + assert pending_task.status != DicomTask.Status.PENDING + assert canceled_task.status == DicomTask.Status.CANCELED + + +@pytest.mark.django_db(transaction=True) +def test_background_job_is_idempotent(): + """Deferring queue_pending_tasks twice should not double-queue tasks.""" + job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) + task1 = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + task2 = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + + job.queue_pending_tasks() + run_worker_once() + + task1.refresh_from_db() + task2.refresh_from_db() + assert task1.attempts == 1 + assert task2.attempts == 1 + + # Reset job to PENDING and defer again + job.refresh_from_db() + job.status = DicomJob.Status.PENDING + job.save() + job.queue_pending_tasks() + run_worker_once() + + # Tasks should not have been processed again (status is no longer PENDING) + task1.refresh_from_db() + task2.refresh_from_db() + assert task1.attempts == 1 + assert task2.attempts == 1 + + +@pytest.mark.django_db(transaction=True) +def test_background_job_skips_deleted_job(): + """If the job is deleted before the background task runs, it should + gracefully skip.""" + job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) + MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + + job.queue_pending_tasks() + job.delete() + + # Should not raise + run_worker_once() + + +@pytest.mark.django_db(transaction=True) +def test_background_job_skips_non_pending_job(): + """If the job status changes before the background task runs (e.g. cancel), + tasks should not be queued.""" + job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) + task = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + + job.queue_pending_tasks() + + # Simulate cancel happening before the background job runs + job.status = DicomJob.Status.CANCELED + job.save() + + run_worker_once() + + task.refresh_from_db() + assert task.status == DicomTask.Status.PENDING + + +@pytest.mark.django_db(transaction=True) +def test_queue_mass_transfer_tasks_sets_queued_job(): + """Calling queue_mass_transfer_tasks directly should set queued_job + on each pending task and place them on the mass_transfer queue.""" + job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) + task1 = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + task2 = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + + queue_mass_transfer_tasks(job_id=job.pk) + + task1.refresh_from_db() + task2.refresh_from_db() + assert task1.queued_job is not None + assert task2.queued_job is not None + + for task in [task1, task2]: + procrastinate_job = ProcrastinateJob.objects.get(pk=task.queued_job_id) + assert procrastinate_job.queue_name == "mass_transfer" + + +@pytest.mark.django_db(transaction=True) +def test_queue_mass_transfer_tasks_uses_urgent_priority(): + """Urgent jobs should queue tasks with urgent priority.""" + job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING, urgent=True) + task = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) + + queue_mass_transfer_tasks(job_id=job.pk) + + task.refresh_from_db() + procrastinate_job = ProcrastinateJob.objects.get(pk=task.queued_job_id) + assert procrastinate_job.priority == job.urgent_priority From 41bf368793a2c406ada49ce9e9d546daf6639332 Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Sun, 29 Mar 2026 13:14:41 +0000 Subject: [PATCH 087/103] Add 24-hour safety timeout for mass transfer task processing Replace unlimited process timeout (None) with a 24-hour cap to prevent runaway tasks from blocking the mass_transfer queue indefinitely. Co-Authored-By: Claude Opus 4.6 (1M context) --- adit/mass_transfer/tasks.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/adit/mass_transfer/tasks.py b/adit/mass_transfer/tasks.py index f7ebd3993..4ff076f5b 100644 --- a/adit/mass_transfer/tasks.py +++ b/adit/mass_transfer/tasks.py @@ -13,15 +13,18 @@ # Separate task function for mass transfer on a dedicated queue so it does not # starve batch/selective transfers. Mass transfer tasks process an entire # partition (discovery + export + convert) and can run for hours, so the -# pebble process timeout is disabled (process_timeout=None). Individual DICOM +# pebble process timeout is set to 24 hours as a safety net. Individual DICOM # operations are still protected by Stamina / pynetdicom-level timeouts. +MASS_TRANSFER_PROCESS_TIMEOUT = 24 * 60 * 60 # 24 hours + + @app.task( queue="mass_transfer", pass_context=True, retry=DICOM_TASK_RETRY_STRATEGY, ) def process_mass_transfer_task(context: JobContext, model_label: str, task_id: int): - _run_dicom_task(context, model_label, task_id, process_timeout=None) + _run_dicom_task(context, model_label, task_id, process_timeout=MASS_TRANSFER_PROCESS_TIMEOUT) @app.task(queue="default", retry=RetryStrategy(max_attempts=3, wait=10)) From a1c4003ec6c06d1c03663e0680c293842a8c1f12 Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Sun, 29 Mar 2026 15:15:55 +0000 Subject: [PATCH 088/103] Address PR review findings for mass transfer module - Fix task reset routing: override queue_pending_task() on MassTransferTask so single-task resets use the mass_transfer queue instead of the generic dicom queue with wrong timeout - Deduplicate queue logic: queue_mass_transfer_tasks now delegates to task.queue_pending_task() instead of duplicating the defer/save code - Add error handling to task queueing loop with logging and re-raise - Narrow bare except clauses in _extract_dicom_metadata and _write_dicom_metadata to specific exception types with logging - Move close() error guard from processor into DicomOperator.close() - Add debug logging to silent except OSError: pass blocks - Remove unused cleanup_on_failure hook from DicomTask and all call sites - Add model-level clean() to MassTransferJob for date range validation - Add CheckConstraint ensuring partition_start < partition_end - Add MinValueValidator(1) to DicomServer.max_search_results - Make FilterSpec and DiscoveredSeries frozen dataclasses - Fix dimse_connector docstring referencing non-existent auto_config - Add docstring to build_partitions(), expand filters_json help_text - Sync inherited TransferTask field validators in migration - Fix test fixtures to satisfy new partition constraint - Add 10 new tests: filter JSON validation, discover_series filtering, date range rejection, source authorization - Delete outdated mass_transfer_spec.md - Document assertion usage policy in CLAUDE.md Co-Authored-By: Claude Opus 4.6 (1M context) --- AGENTS.md | 4 + adit/core/migrations/0017_review_fixes.py | 21 + adit/core/models.py | 7 +- adit/core/tasks.py | 7 +- adit/core/utils/dicom_operator.py | 7 +- adit/core/utils/dimse_connector.py | 2 +- adit/mass_transfer/factories.py | 6 +- .../0004_sync_transfertask_fields.py | 96 ++++ .../0005_add_partition_constraint.py | 26 ++ adit/mass_transfer/models.py | 43 +- adit/mass_transfer/processors.py | 25 +- adit/mass_transfer/tasks.py | 22 +- adit/mass_transfer/tests/test_cleanup.py | 63 --- adit/mass_transfer/tests/test_forms.py | 147 ++++++ adit/mass_transfer/tests/test_processor.py | 107 ++++- adit/mass_transfer/utils/partitions.py | 7 + docs/mass_transfer_spec.md | 421 ------------------ 17 files changed, 486 insertions(+), 525 deletions(-) create mode 100644 adit/core/migrations/0017_review_fixes.py create mode 100644 adit/mass_transfer/migrations/0004_sync_transfertask_fields.py create mode 100644 adit/mass_transfer/migrations/0005_add_partition_constraint.py delete mode 100644 adit/mass_transfer/tests/test_cleanup.py delete mode 100644 docs/mass_transfer_spec.md diff --git a/AGENTS.md b/AGENTS.md index 55c6955d2..c72b15302 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -116,6 +116,10 @@ Key variables in `.env` (see `example.env`): - **Type Checking**: pyright in basic mode (migrations excluded) - **Linting**: Ruff with E, F, I, DJ rules +### Assertions + +- Use `assert` for internal programming error checks (preconditions, invariants). Do not replace with `ValueError` or similar — this app is never run with `python -O`. + ### Django Field Conventions - Text/char fields: use `blank=True` alone (not `null=True`) diff --git a/adit/core/migrations/0017_review_fixes.py b/adit/core/migrations/0017_review_fixes.py new file mode 100644 index 000000000..67cadba52 --- /dev/null +++ b/adit/core/migrations/0017_review_fixes.py @@ -0,0 +1,21 @@ +# Generated by Django 6.0.3 on 2026-03-29 13:52 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("core", "0016_add_max_search_results_to_dicomserver"), + ] + + operations = [ + migrations.AlterField( + model_name="dicomserver", + name="max_search_results", + field=models.PositiveIntegerField( + default=200, validators=[django.core.validators.MinValueValidator(1)] + ), + ), + ] diff --git a/adit/core/models.py b/adit/core/models.py index acbe50381..9135231e9 100644 --- a/adit/core/models.py +++ b/adit/core/models.py @@ -156,7 +156,9 @@ class DicomServer(DicomNode): dicomweb_authorization_header = models.CharField(blank=True, max_length=2000) # C-FIND result limit before recursive time-window splitting - max_search_results = models.PositiveIntegerField(default=200) + max_search_results = models.PositiveIntegerField( + default=200, validators=[MinValueValidator(1)] + ) objects: DicomNodeManager["DicomServer"] = DicomNodeManager["DicomServer"]() @@ -420,9 +422,6 @@ def __str__(self) -> str: def get_absolute_url(self) -> str: ... - def cleanup_on_failure(self) -> None: - """Hook for subclasses to clean up resources after task failure or timeout.""" - pass def queue_pending_task(self) -> None: """Queues a dicom task.""" diff --git a/adit/core/tasks.py b/adit/core/tasks.py index 067a58580..d5b45b544 100644 --- a/adit/core/tasks.py +++ b/adit/core/tasks.py @@ -124,11 +124,12 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: dicom_task.message = "Task was canceled." ensure_db_connection() + except futures.TimeoutError: dicom_task.message = "Task was aborted due to timeout." dicom_task.status = DicomTask.Status.FAILURE ensure_db_connection() - dicom_task.cleanup_on_failure() + except RetriableDicomError as err: logger.exception("Retriable error occurred during %s.", dicom_task) @@ -150,7 +151,7 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: dicom_task.message = str(err) ensure_db_connection() - dicom_task.cleanup_on_failure() + raise err except Exception as err: @@ -166,7 +167,7 @@ def _monitor_task(context: JobContext, future: ProcessFuture) -> None: dicom_task.log += traceback.format_exc() ensure_db_connection() - dicom_task.cleanup_on_failure() + finally: dicom_task.end = timezone.now() diff --git a/adit/core/utils/dicom_operator.py b/adit/core/utils/dicom_operator.py index 3b9e5e3f3..2d4241295 100644 --- a/adit/core/utils/dicom_operator.py +++ b/adit/core/utils/dicom_operator.py @@ -61,8 +61,11 @@ def get_logs(self) -> list[DicomLogEntry]: return self.dimse_connector.logs + self.dicom_web_connector.logs + self.logs def close(self) -> None: - if self.dimse_connector.assoc: - self.dimse_connector.close_connection() + try: + if self.dimse_connector.assoc: + self.dimse_connector.close_connection() + except Exception: + logger.debug("Error closing DIMSE association", exc_info=True) def abort(self) -> None: self.dimse_connector.abort_connection() diff --git a/adit/core/utils/dimse_connector.py b/adit/core/utils/dimse_connector.py index ba463c60b..4ed6b80e4 100644 --- a/adit/core/utils/dimse_connector.py +++ b/adit/core/utils/dimse_connector.py @@ -55,7 +55,7 @@ def connect_to_server(service: DimseService): - """Automatically handles the connection when `auto_config` option is set. + """Handles the DIMSE association lifecycle based on `auto_connect` and `auto_close`. TODO: Think about using a context manager instead of a decorator. """ diff --git a/adit/mass_transfer/factories.py b/adit/mass_transfer/factories.py index a2bb14fc1..6c04aaaeb 100644 --- a/adit/mass_transfer/factories.py +++ b/adit/mass_transfer/factories.py @@ -1,3 +1,5 @@ +from datetime import timedelta + import factory from django.utils import timezone @@ -23,5 +25,7 @@ class Meta: job = factory.SubFactory(MassTransferJobFactory) partition_start = factory.LazyFunction(timezone.now) - partition_end = factory.LazyFunction(timezone.now) + partition_end = factory.LazyFunction( + lambda: timezone.now() + timedelta(hours=23, minutes=59, seconds=59) + ) partition_key = factory.Faker("date", pattern="%Y-%m-%d") diff --git a/adit/mass_transfer/migrations/0004_sync_transfertask_fields.py b/adit/mass_transfer/migrations/0004_sync_transfertask_fields.py new file mode 100644 index 000000000..8894c31ad --- /dev/null +++ b/adit/mass_transfer/migrations/0004_sync_transfertask_fields.py @@ -0,0 +1,96 @@ +# Generated by Django 6.0.3 on 2026-03-29 15:10 + +import django.contrib.postgres.fields +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("mass_transfer", "0003_fix_queued_job_on_delete"), + ] + + operations = [ + migrations.AlterField( + model_name="masstransferjob", + name="filters_json", + field=models.JSONField( + blank=True, + help_text="JSON list of filter objects. Valid keys: modality, institution_name, apply_institution_on_study, study_description, series_description, series_number, min_age, max_age.", + null=True, + ), + ), + migrations.AlterField( + model_name="masstransfertask", + name="patient_id", + field=models.CharField( + max_length=64, + validators=[ + django.core.validators.RegexValidator( + inverse_match=True, + message="Contains invalid backslash character", + regex="\\\\", + ), + django.core.validators.RegexValidator( + inverse_match=True, + message="Contains invalid control characters.", + regex="[\\f\\n\\r]", + ), + django.core.validators.RegexValidator( + inverse_match=True, + message="Contains invalid wildcard characters.", + regex="[\\*\\?]", + ), + ], + ), + ), + migrations.AlterField( + model_name="masstransfertask", + name="pseudonym", + field=models.CharField( + blank=True, + max_length=64, + validators=[ + django.core.validators.RegexValidator( + inverse_match=True, + message="Contains invalid backslash character", + regex="\\\\", + ), + django.core.validators.RegexValidator( + inverse_match=True, + message="Contains invalid control characters.", + regex="[\\f\\n\\r]", + ), + ], + ), + ), + migrations.AlterField( + model_name="masstransfertask", + name="series_uids", + field=django.contrib.postgres.fields.ArrayField( + base_field=models.CharField( + max_length=64, + validators=[ + django.core.validators.RegexValidator( + message="Invalid character in UID.", regex="^[\\d\\.]+$" + ) + ], + ), + blank=True, + default=list, + ), + ), + migrations.AlterField( + model_name="masstransfertask", + name="study_uid", + field=models.CharField( + max_length=64, + validators=[ + django.core.validators.RegexValidator( + message="Invalid character in UID.", regex="^[\\d\\.]+$" + ) + ], + ), + ), + ] diff --git a/adit/mass_transfer/migrations/0005_add_partition_constraint.py b/adit/mass_transfer/migrations/0005_add_partition_constraint.py new file mode 100644 index 000000000..77580a51e --- /dev/null +++ b/adit/mass_transfer/migrations/0005_add_partition_constraint.py @@ -0,0 +1,26 @@ +# Generated by Django 6.0.3 on 2026-03-29 15:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("core", "0017_review_fixes"), + ("mass_transfer", "0004_sync_transfertask_fields"), + ("procrastinate", "0041_post_retry_failed_job"), + ] + + operations = [ + migrations.AlterModelOptions( + name="masstransfertask", + options={}, + ), + migrations.AddConstraint( + model_name="masstransfertask", + constraint=models.CheckConstraint( + condition=models.Q(("partition_start__lt", models.F("partition_end"))), + name="mass_transfer_partition_start_before_end", + ), + ), + ] diff --git a/adit/mass_transfer/models.py b/adit/mass_transfer/models.py index 8b58b224d..dd3c03832 100644 --- a/adit/mass_transfer/models.py +++ b/adit/mass_transfer/models.py @@ -5,11 +5,13 @@ from typing import TYPE_CHECKING from django.conf import settings +from django.core.exceptions import ValidationError from django.db import models from django.urls import reverse from procrastinate.contrib.django import app -from adit.core.models import DicomAppSettings, DicomJob, TransferJob, TransferTask +from adit.core.models import DicomAppSettings, DicomJob, DicomTask, TransferJob, TransferTask +from adit.core.utils.model_utils import get_model_label if TYPE_CHECKING: from .processors import FilterSpec @@ -46,7 +48,11 @@ class PartitionGranularity(models.TextChoices): filters_json = models.JSONField( blank=True, null=True, - help_text="Inline filter configuration as a JSON list of filter objects.", + help_text=( + "JSON list of filter objects. Valid keys: modality, institution_name, " + "apply_institution_on_study, study_description, series_description, " + "series_number, min_age, max_age." + ), ) @property @@ -67,6 +73,13 @@ def get_filters(self) -> list[FilterSpec]: def get_absolute_url(self): return reverse("mass_transfer_job_detail", args=[self.pk]) + def clean(self): + super().clean() + if self.start_date and self.end_date and self.end_date < self.start_date: + raise ValidationError("End date must be on or after the start date.") + if not self.pseudonymize: + self.pseudonym_salt = "" + def queue_pending_tasks(self): """Queues all pending mass transfer tasks via a background job.""" assert self.status == DicomJob.Status.PENDING @@ -89,9 +102,35 @@ class MassTransferTask(TransferTask): volumes: models.QuerySet["MassTransferVolume"] + class Meta: + constraints = [ + models.CheckConstraint( + condition=models.Q(partition_start__lt=models.F("partition_end")), + name="mass_transfer_partition_start_before_end", + ) + ] + def get_absolute_url(self): return reverse("mass_transfer_task_detail", args=[self.pk]) + def queue_pending_task(self) -> None: + """Queue this single task on the mass transfer queue.""" + assert self.status == DicomTask.Status.PENDING + assert self.queued_job is None + + priority = self.job.default_priority + if self.job.urgent: + priority = self.job.urgent_priority + + model_label = get_model_label(self.__class__) + queued_job_id = app.configure_task( + "adit.mass_transfer.tasks.process_mass_transfer_task", + allow_unknown=False, + priority=priority, + ).defer(model_label=model_label, task_id=self.pk) + self.queued_job_id = queued_job_id + self.save() + class MassTransferVolume(models.Model): class Status(models.TextChoices): diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index d19c4b9fb..9c6e34d98 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -14,6 +14,7 @@ from django.utils import timezone from pydicom import Dataset +from pydicom.errors import InvalidDicomError from adit.core.errors import DicomError, RetriableDicomError from adit.core.models import DicomNode, DicomTask @@ -33,7 +34,7 @@ ) -@dataclass +@dataclass(frozen=True) class FilterSpec: """Unified filter representation used by the processor. @@ -70,7 +71,7 @@ def from_dict(cls, d: dict) -> "FilterSpec": _DELAY_RETRY_FETCH = 3 # seconds before retrying a fetch that returned 0 images -@dataclass +@dataclass(frozen=True) class DiscoveredSeries: patient_id: str accession_number: str @@ -160,7 +161,8 @@ def _extract_dicom_metadata(dicom_dir: Path) -> dict[str, str]: for dcm_path in sorted(dicom_dir.glob("*.dcm")): try: ds = pydicom.dcmread(dcm_path, stop_before_pixels=True) - except Exception: + except (InvalidDicomError, OSError) as exc: + logger.warning("Skipping unreadable DICOM file %s: %s", dcm_path, exc) continue fields: dict[str, str] = {} for tag in _DICOM_METADATA_TAGS: @@ -190,7 +192,7 @@ def _write_dicom_metadata(output_path: Path, metadata_name: str, fields: dict[st metadata_path = output_path / f"{metadata_name}_dicom.json" try: metadata_path.write_text(json.dumps(fields, indent=2)) - except Exception: + except (OSError, TypeError, ValueError): logger.warning("Failed to write metadata %s", metadata_path, exc_info=True) @@ -426,9 +428,14 @@ def _transfer_grouped_series( total_volumes += 1 if total_processed + total_failed + total_skipped > 0: - # TODO: Investigate why such a pacing delay is really needed. - # Such a timeout was never necessary with batch transfer where we also - # transfer series one by one. + # Pacing delay between consecutive C-GET/C-MOVE requests. + # Some PACS servers reject or drop associations under + # rapid-fire requests. Batch transfer does not need this + # because it processes fewer series per task. The 0.5s + # value was chosen empirically. + # TODO: Investigate if this is really needed and if the + # delay value is appropriate (was never necessary in mass + # transfer which also transfers series one by one). time.sleep(_DELAY_BETWEEN_SERIES) subject_id = volume.pseudonym or sanitize_filename(volume.patient_id) @@ -1013,7 +1020,7 @@ def callback(ds: Dataset | None) -> None: if output_path.exists() and not any(output_path.iterdir()): output_path.rmdir() except OSError: - pass + logger.debug("Failed to remove empty directory %s", output_path, exc_info=True) return image_count, study_uid_pseudonymized, series_uid_pseudonymized @@ -1038,7 +1045,7 @@ def _convert_series( if output_path.exists() and not any(output_path.iterdir()): output_path.rmdir() except OSError: - pass + logger.debug("Failed to remove empty directory %s", output_path, exc_info=True) return [] raise DicomError( f"Conversion failed for series {volume.series_instance_uid}: {err_msg}" diff --git a/adit/mass_transfer/tasks.py b/adit/mass_transfer/tasks.py index 4ff076f5b..af2a56c4c 100644 --- a/adit/mass_transfer/tasks.py +++ b/adit/mass_transfer/tasks.py @@ -5,7 +5,6 @@ from adit.core.models import DicomJob, DicomTask from adit.core.tasks import DICOM_TASK_RETRY_STRATEGY, _run_dicom_task -from adit.core.utils.model_utils import get_model_label logger = logging.getLogger(__name__) @@ -50,19 +49,16 @@ def queue_mass_transfer_tasks(job_id: int): ) return - priority = job.default_priority - if job.urgent: - priority = job.urgent_priority - for mass_task in job.tasks.filter( status=DicomTask.Status.PENDING, queued_job__isnull=True, # Skip tasks already queued (idempotency guard) ): - model_label = get_model_label(mass_task.__class__) - queued_job_id = app.configure_task( - "adit.mass_transfer.tasks.process_mass_transfer_task", - allow_unknown=False, - priority=priority, - ).defer(model_label=model_label, task_id=mass_task.pk) - mass_task.queued_job_id = queued_job_id - mass_task.save() + try: + mass_task.queue_pending_task() + except Exception: + logger.exception( + "Failed to queue MassTransferTask %d for job %d", + mass_task.pk, + job_id, + ) + raise diff --git a/adit/mass_transfer/tests/test_cleanup.py b/adit/mass_transfer/tests/test_cleanup.py deleted file mode 100644 index 13c27109f..000000000 --- a/adit/mass_transfer/tests/test_cleanup.py +++ /dev/null @@ -1,63 +0,0 @@ -import pytest -from adit_radis_shared.accounts.factories import UserFactory -from django.utils import timezone - -from adit.core.factories import DicomFolderFactory, DicomServerFactory -from adit.mass_transfer.models import ( - MassTransferJob, - MassTransferSettings, - MassTransferTask, - MassTransferVolume, -) - - -@pytest.mark.django_db -def test_cleanup_on_failure_is_noop(): - """With deferred insertion, cleanup_on_failure has nothing to do. - - Volumes are only created in the DB after successful export/conversion, - and temp directories are cleaned up by TemporaryDirectory context managers. - """ - MassTransferSettings.objects.create() - - user = UserFactory.create() - source = DicomServerFactory.create() - destination = DicomFolderFactory.create() - job = MassTransferJob.objects.create( - owner=user, - start_date=timezone.now().date(), - end_date=timezone.now().date(), - partition_granularity=MassTransferJob.PartitionGranularity.DAILY, - ) - task = MassTransferTask.objects.create( - job=job, - source=source, - destination=destination, - patient_id="", - study_uid="", - partition_start=timezone.now(), - partition_end=timezone.now(), - partition_key="20240101", - ) - - # Create some volumes in various states - MassTransferVolume.objects.create( - job=job, - task=task, - partition_key="20240101", - patient_id="PATIENT", - study_instance_uid="study-1", - series_instance_uid="series-1", - modality="CT", - study_description="", - series_description="Head", - series_number=1, - study_datetime=timezone.now(), - status=MassTransferVolume.Status.EXPORTED, - ) - - # Should not raise or modify anything - task.cleanup_on_failure() - - vol = MassTransferVolume.objects.get(series_instance_uid="series-1") - assert vol.status == MassTransferVolume.Status.EXPORTED diff --git a/adit/mass_transfer/tests/test_forms.py b/adit/mass_transfer/tests/test_forms.py index d27ff505d..fcca4808a 100644 --- a/adit/mass_transfer/tests/test_forms.py +++ b/adit/mass_transfer/tests/test_forms.py @@ -10,6 +10,35 @@ from ..forms import MassTransferJobForm +@pytest.fixture +def form_env(): + """Create a user, source server, destination folder, and grant access.""" + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create() + group = GroupFactory.create() + add_user_to_group(user, group) + grant_access(group, source, source=True) + grant_access(group, destination, destination=True) + return {"user": user, "source": source, "destination": destination} + + +def _make_form(form_env, **overrides): + """Build a MassTransferJobForm with sensible defaults, applying overrides.""" + data = { + "source": form_env["source"].pk, + "destination": form_env["destination"].pk, + "start_date": "2024-01-01", + "end_date": "2024-01-03", + "partition_granularity": "daily", + "pseudonymize": False, + "pseudonym_salt": "", + "filters_json": json.dumps([{"modality": "CT"}]), + } + data.update(overrides) + return MassTransferJobForm(data=data, user=form_env["user"]) + + @pytest.mark.django_db def test_clean_clears_salt_when_pseudonymize_unchecked(): """When pseudonymize is unchecked, the salt should be cleared.""" @@ -205,3 +234,121 @@ def test_clean_allows_nifti_with_folder_destination(): ) assert form.is_valid(), form.errors assert form.cleaned_data["convert_to_nifti"] is True + + +# --- clean_filters_json tests --- + + +@pytest.mark.django_db +def test_clean_filters_json_invalid_json(form_env): + form = _make_form(form_env, filters_json="{not valid json") + assert not form.is_valid() + assert "filters_json" in form.errors + + +@pytest.mark.django_db +def test_clean_filters_json_non_array(form_env): + form = _make_form(form_env, filters_json=json.dumps({"modality": "CT"})) + assert not form.is_valid() + assert "filters_json" in form.errors + + +@pytest.mark.django_db +def test_clean_filters_json_empty_array(form_env): + form = _make_form(form_env, filters_json=json.dumps([])) + assert not form.is_valid() + assert "filters_json" in form.errors + + +@pytest.mark.django_db +def test_clean_filters_json_non_dict_item(form_env): + form = _make_form(form_env, filters_json=json.dumps(["not a dict"])) + assert not form.is_valid() + assert "filters_json" in form.errors + + +@pytest.mark.django_db +def test_clean_filters_json_negative_age(form_env): + form = _make_form(form_env, filters_json=json.dumps([{"min_age": -5}])) + assert not form.is_valid() + assert "filters_json" in form.errors + + +@pytest.mark.django_db +def test_clean_filters_json_inverted_age_range(form_env): + form = _make_form(form_env, filters_json=json.dumps([{"min_age": 90, "max_age": 18}])) + assert not form.is_valid() + assert "filters_json" in form.errors + + +@pytest.mark.django_db +def test_clean_filters_json_extra_fields(form_env): + form = _make_form( + form_env, filters_json=json.dumps([{"modality": "CT", "unknown_field": "x"}]) + ) + assert not form.is_valid() + assert "filters_json" in form.errors + + +# --- clean / clean_source validation tests --- + + +@pytest.mark.django_db +def test_clean_rejects_end_date_before_start_date(form_env): + form = _make_form(form_env, start_date="2024-06-01", end_date="2024-01-01") + assert not form.is_valid() + assert "__all__" in form.errors + + +@pytest.mark.django_db +def test_clean_source_rejects_folder(): + user = UserFactory.create() + source_folder = DicomFolderFactory.create() + destination = DicomFolderFactory.create() + group = GroupFactory.create() + add_user_to_group(user, group) + grant_access(group, source_folder, source=True) + grant_access(group, destination, destination=True) + + form = MassTransferJobForm( + data={ + "source": source_folder.pk, + "destination": destination.pk, + "start_date": "2024-01-01", + "end_date": "2024-01-03", + "partition_granularity": "daily", + "pseudonymize": False, + "pseudonym_salt": "", + "filters_json": json.dumps([{"modality": "CT"}]), + }, + user=user, + ) + assert not form.is_valid() + assert "source" in form.errors + + +@pytest.mark.django_db +def test_clean_source_rejects_unauthorized_server(): + user = UserFactory.create() + source = DicomServerFactory.create() + destination = DicomFolderFactory.create() + group = GroupFactory.create() + add_user_to_group(user, group) + # Only grant destination access, not source + grant_access(group, destination, destination=True) + + form = MassTransferJobForm( + data={ + "source": source.pk, + "destination": destination.pk, + "start_date": "2024-01-01", + "end_date": "2024-01-03", + "partition_granularity": "daily", + "pseudonymize": False, + "pseudonym_salt": "", + "filters_json": json.dumps([{"modality": "CT"}]), + }, + user=user, + ) + assert not form.is_valid() + assert "source" in form.errors diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index a04d74b15..79722787a 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -121,14 +121,15 @@ def mass_transfer_env(tmp_path): ) job.filters_json = [{"modality": "CT"}] job.save(update_fields=["filters_json"]) + now = timezone.now() task = MassTransferTask.objects.create( job=job, source=source, destination=destination, patient_id="", study_uid="", - partition_start=timezone.now(), - partition_end=timezone.now(), + partition_start=now, + partition_end=now + timedelta(hours=23, minutes=59, seconds=59), partition_key="20240101", ) return SimpleNamespace( @@ -354,6 +355,98 @@ def test_find_studies_preserves_order_with_unique_studies(mocker: MockerFixture) assert result_uids == ["1.2.1", "1.2.2", "1.2.3"] +# --------------------------------------------------------------------------- +# _discover_series tests +# --------------------------------------------------------------------------- + + +def _make_series_result( + series_uid: str, + modality: str = "CT", + series_description: str = "Axial", + series_number: int = 1, + institution_name: str = "Radiology", + num_images: int = 10, +) -> ResultDataset: + ds = Dataset() + ds.SeriesInstanceUID = series_uid + ds.Modality = modality + ds.SeriesDescription = series_description + ds.SeriesNumber = series_number + ds.InstitutionName = institution_name + ds.NumberOfSeriesRelatedInstances = num_images + return ResultDataset(ds) + + +def test_discover_series_filters_by_modality(mocker: MockerFixture): + processor = _make_processor(mocker) + processor.mass_task.partition_start = datetime(2024, 1, 1, 0, 0) + processor.mass_task.partition_end = datetime(2024, 1, 1, 23, 59, 59) + + operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=200) + + study = _make_study("study-1") + study.dataset.ModalitiesInStudy = ["CT", "MR"] + operator.find_studies.return_value = [study] + + ct_series = _make_series_result("series-ct", modality="CT") + mr_series = _make_series_result("series-mr", modality="MR") + operator.find_series.return_value = [ct_series, mr_series] + + # Filter for MR only + filters = [_make_filter(modality="MR")] + result = processor._discover_series(operator, filters) + + assert len(result) == 1 + assert result[0].series_instance_uid == "series-mr" + + +def test_discover_series_deduplicates_across_filters(mocker: MockerFixture): + processor = _make_processor(mocker) + processor.mass_task.partition_start = datetime(2024, 1, 1, 0, 0) + processor.mass_task.partition_end = datetime(2024, 1, 1, 23, 59, 59) + + operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=200) + + study = _make_study("study-1") + study.dataset.ModalitiesInStudy = ["CT"] + operator.find_studies.return_value = [study] + + series = _make_series_result("series-1", modality="CT") + operator.find_series.return_value = [series] + + # Two filters that both match the same series + filters = [_make_filter(modality="CT"), _make_filter(modality="CT")] + result = processor._discover_series(operator, filters) + + assert len(result) == 1 + + +def test_discover_series_filters_by_series_description(mocker: MockerFixture): + processor = _make_processor(mocker) + processor.mass_task.partition_start = datetime(2024, 1, 1, 0, 0) + processor.mass_task.partition_end = datetime(2024, 1, 1, 23, 59, 59) + + operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=200) + + study = _make_study("study-1") + study.dataset.ModalitiesInStudy = ["CT"] + operator.find_studies.return_value = [study] + + axial = _make_series_result("series-ax", series_description="Axial T1") + sagittal = _make_series_result("series-sag", series_description="Sagittal T2") + operator.find_series.return_value = [axial, sagittal] + + filters = [_make_filter(modality="CT", series_description="Axial*")] + result = processor._discover_series(operator, filters) + + assert len(result) == 1 + assert result[0].series_instance_uid == "series-ax" + + # --------------------------------------------------------------------------- # process() tests — mocked environment # --------------------------------------------------------------------------- @@ -1424,14 +1517,15 @@ def test_create_pending_volumes_deterministic_pseudonym(): job.filters_json = [{"modality": "CT"}] job.save(update_fields=["filters_json"]) + now = timezone.now() task = MassTransferTask.objects.create( job=job, source=source, destination=destination, patient_id="", study_uid="", - partition_start=timezone.now(), - partition_end=timezone.now(), + partition_start=now, + partition_end=now + timedelta(hours=23, minutes=59, seconds=59), partition_key="20240101", ) @@ -1772,14 +1866,15 @@ def test_process_output_path_includes_job_folder(mocker: MockerFixture, tmp_path job.filters_json = [{"modality": "CT"}] job.save(update_fields=["filters_json"]) + now = timezone.now() task = MassTransferTask.objects.create( job=job, source=source, destination=destination, patient_id="", study_uid="", - partition_start=timezone.now(), - partition_end=timezone.now(), + partition_start=now, + partition_end=now + timedelta(hours=23, minutes=59, seconds=59), partition_key="20240101", ) diff --git a/adit/mass_transfer/utils/partitions.py b/adit/mass_transfer/utils/partitions.py index 97292bca5..9ae415869 100644 --- a/adit/mass_transfer/utils/partitions.py +++ b/adit/mass_transfer/utils/partitions.py @@ -18,6 +18,13 @@ def build_partitions( end_date: date, granularity: str, ) -> list[PartitionWindow]: + """Split a date range into non-overlapping partition windows. + + Each partition covers one day (daily) or up to seven days (weekly). + Returns a list of PartitionWindow objects ordered chronologically. + The last partition may be shorter than the step if *end_date* does not + align with a full window. + """ if end_date < start_date: raise ValueError("End date must be on or after the start date.") diff --git a/docs/mass_transfer_spec.md b/docs/mass_transfer_spec.md deleted file mode 100644 index 1fde6b54d..000000000 --- a/docs/mass_transfer_spec.md +++ /dev/null @@ -1,421 +0,0 @@ -# Mass Transfer — Branch Specification - -## What Is It? - -Mass Transfer is a new ADIT module that bulk-exports DICOM data from a PACS -server to a network folder. It targets research use cases where you need to -pull large cohorts — e.g. "all CT head scans from Neuroradiologie in 2024" — -pseudonymize them, and optionally convert to NIfTI. - -``` -┌──────────┐ ┌──────────┐ ┌──────────────────┐ -│ ADIT │──────────────>│ PACS │──────────────>│ Network Folder │ -│ Worker │ discover │ Server │ fetch + │ /mnt/data/... │ -│ │ studies & │ │ pseudonymize│ │ -│ │ series │ │ + write │ PartitionKey/ │ -│ │ │ │ │ Subject/ │ -│ │ │ │ │ Study/ │ -│ │ │ │ │ Series/ │ -└──────────┘ └──────────┘ └──────────────────┘ -``` - ---- - -## Core Concepts - -### Job, Task, Volume - -``` -MassTransferJob (one per user request) - ├── source: PACS Server - ├── destination: Network Folder - ├── date range: 2025-01-01 → 2025-06-30 - ├── granularity: weekly - ├── anonymization_mode: pseudonymize_with_linking - ├── filters: [CT + Neuroradiologie, MR + Neuroradiologie] - ├── pseudonym_salt: "a7f3..." (random per job, used for linking) - │ - ├── MassTransferTask (one per time partition) - │ ├── partition_key: "20250101-20250107" - │ ├── partition_start / partition_end - │ │ - │ ├── MassTransferVolume (one per exported series) - │ │ ├── patient_id, pseudonym - │ │ ├── study_instance_uid, study_instance_uid_pseudonymized - │ │ ├── series_instance_uid, series_instance_uid_pseudonymized - │ │ ├── status: exported | converted | skipped | error - │ │ └── log (error reason if failed) - │ └── ... - └── ... -``` - -### Filters - -Reusable, user-owned filter presets. A job references one or more filters. -Each filter can specify: - -| Field | Example | Notes | -| ------------------ | ------------------ | ------------------------ | -| modality | `CT` | Exact match | -| institution_name | `Neuroradiologie*` | DICOM wildcard supported | -| study_description | `*Schädel*` | DICOM wildcard supported | -| series_description | `Axial*` | DICOM wildcard supported | -| series_number | `2` | Exact integer match | - -Institution can be checked at study level (one C-FIND per study to check any -series) or at series level (checked per series during enumeration). - -### Partitioning - -The date range is split into non-overlapping time windows: - -``` -Job: 2025-01-01 → 2025-01-21, granularity=weekly - -Task 1: 2025-01-01 → 2025-01-07 key="20250101-20250107" -Task 2: 2025-01-08 → 2025-01-14 key="20250108-20250114" -Task 3: 2025-01-15 → 2025-01-21 key="20250115-20250121" -``` - -Each task is an independent Procrastinate job. Tasks can run in parallel -across workers, but each task is guaranteed to run on exactly one worker -(`FOR UPDATE SKIP LOCKED`). - ---- - -## Processing Pipeline - -One task = one partition. Here is the full flow inside `MassTransferTaskProcessor.process()`: - -``` - ┌─────────────────────┐ - │ Start task │ - │ (one partition) │ - └──────────┬──────────┘ - │ - ┌──────────▼────────────┐ - │ Check: suspended? │──Yes──> return WARNING - │ Check: source/dest? │──Bad──> raise DicomError - │ Check: filters? │──None─> return FAILURE - └──────────┬────────────┘ - │ - ┌────────────────▼─────────────────┐ - │ Phase 1: DISCOVER │ - │ │ - │ For each filter: │ - │ C-FIND studies in time window │ - │ For each study: │ - │ C-FIND series │ - │ Apply modality/desc/inst │ - │ filters on each series │ - │ │ - │ Result: list[DiscoveredSeries] │ - │ (in-memory, no DB writes) │ - └────────────────┬─────────────────┘ - │ - ┌────────────────▼─────────────────┐ - │ Resumability check │ - │ │ - │ done_uids = DB volumes with │ - │ status in (EXPORTED, CONVERTED, │ - │ SKIPPED) │ - │ Delete any ERROR volumes (retry │ - │ pending = discovered - done_uid │ - └────────────────┬─────────────────┘ - │ - ┌────────────────▼─────────────────┐ - │ Group by patient_id │ - │ Compute subject_id (pseudonym │ - │ or raw patient_id) │ - └────────────────┬─────────────────┘ - │ - ┌──────────▼────────────┐ - │ For each series: │ - │ │ - ┌────────────┤ DICOM export path? │ - │ │ NIfTI conversion? │ - │ └──────────┬─────────────┘ - │ │ - ┌──────▼───────┐ ┌─────────▼──────────┐ - │ DICOM only │ │ NIfTI mode │ - │ │ │ │ - │ C-GET series │ │ C-GET to temp dir │ - │ pseudonymize │ │ pseudonymize │ - │ write .dcm │ │ dcm2niix → .nii.gz │ - │ to final dir │ │ write to final dir │ - │ │ │ temp dir auto- │ - │ │ │ cleaned │ - └──────┬───────┘ └─────────┬──────────┘ - │ │ - └──────────┬───────────┘ - │ - ┌──────────▼────────────┐ - │ Create DB volume │ - │ (deferred insertion) │ - │ status = EXPORTED │ - │ | CONVERTED │ - │ | SKIPPED │ - │ | ERROR │ - └──────────┬────────────┘ - │ - ┌──────────▼────────────┐ - │ Next series... │ - │ (RetriableDicomError │ - │ re-raised for │ - │ Procrastinate retry)│ - └──────────┬────────────┘ - │ - ┌──────────▼────────────┐ - │ Return task result │ - │ SUCCESS / WARNING / │ - │ FAILURE + summary │ - └───────────────────────┘ -``` - -### Output Folder Structure - -``` -/mnt/data/mass_transfer_exports/ -└── 20250101-20250107/ # partition key - ├── A7B3X9K2M1Q4/ # pseudonym (or raw PatientID) - │ ├── CT_Schaedel_20250103_221030/ # Description_Date_Time - │ │ ├── Axial_1/ # SeriesDescription_SeriesNumber - │ │ │ ├── 1.2.3.4.5.6.7.dcm - │ │ │ ├── 1.2.3.4.5.6.8.dcm - │ │ │ └── ... - │ │ └── Sagittal_2/ - │ │ └── ... - │ └── MRT_Kopf_20250105_221030/ - │ └── T1_1/ - │ └── ... - └── R4T7Y2W8N3P1/ - └── ... -``` - ---- - -## Anonymization Modes - -| Mode | Folder name | DICOM tags | Cross-partition consistency | CSV export | -| ----------------------------- | ----------------------- | ------------------ | --------------------------------------------- | ---------------------------- | -| **None** | Raw PatientID | Untouched | N/A | Not available | -| **Pseudonymize** | Random hex per study | dicognito (random) | No — each study gets a unique random folder | Not available | -| **Pseudonymize with Linking** | Deterministic pseudonym | dicognito (seeded) | Yes — same patient always gets same pseudonym | patient_id → pseudonym pairs | - -### How Linking Works - -``` -job.pseudonym_salt = "a7f3e2..." # random, generated once per job - - ┌──────────────────────────┐ - │ Pseudonymizer(seed=salt)│ - │ │ - "PATIENT_12345" ──┤ md5(salt + patient_id) ├──> "A7B3X9K2M1Q4" - │ → deterministic 12-char │ - └──────────────────────────┘ - -Same salt + same patient_id = same pseudonym, always. -No lookup table needed. Works across partitions. -Uses dicognito's Randomizer internally. -``` - -In non-linking mode, each study gets a fresh `secrets.token_hex(6)` — even -two studies from the same patient land in separate opaque folders, so there -is no way to correlate which studies belong to the same person. - ---- - -## Adaptive Study Discovery (recursive split) - -PACS servers often limit C-FIND results (e.g. 200 max). When a query returns -more results than the limit, the time window is recursively bisected: - -``` -Query: 2025-01-01 → 2025-01-07, limit=200 - → 250 results (over limit!) - → Split: - Left: 2025-01-01 → 2025-01-04 → 120 results (ok) - Right: 2025-01-05 → 2025-01-07 → 140 results (ok) - → Merge + deduplicate by StudyInstanceUID - → 245 unique studies -``` - -Recursion stops with an error if the window is smaller than 30 minutes -(safety valve against infinite recursion on a PACS that always returns -too many results). - ---- - -## Persistent DIMSE Connections - -DICOM network operations (C-FIND, C-GET) require a TCP association with -specific presentation contexts. By default, ADIT opens and closes an -association per operation. - -For mass transfer with hundreds of series, this is wasteful (~500ms overhead -per association). The `persistent=True` mode keeps the association open: - -``` -Default mode (persistent=False): - open → C-FIND study 1 → close - open → C-FIND study 2 → close - open → C-FIND study 3 → close - open → C-GET series 1 → close - open → C-GET series 2 → close - ... - ~700 associations for 100 studies × ~500ms = ~350s overhead - -Persistent mode (persistent=True): - open(C-FIND) → C-FIND study 1 → study 2 → study 3 → ... - close(C-FIND) - open(C-GET) → C-GET series 1 → series 2 → ... - close(C-GET) - 2-3 associations total × ~500ms = ~1s overhead -``` - -Service-type switching (C-FIND → C-GET) automatically closes and reopens -with the correct presentation contexts. After an abort (e.g. `limit_results` -in C-FIND), the next call auto-reconnects. - -Only mass transfer opts in. All existing code is unchanged (`persistent=False` -is the default). - ---- - -## Deferred Volume Insertion - -Volumes (DB records tracking each exported series) are only created **after** -successful export or conversion — not during discovery. - -``` -Old approach: - discover → create PENDING volumes in DB → export → update to EXPORTED - Problem: failed exports leave orphan PENDING records - -New approach: - discover → in-memory DiscoveredSeries list → export → create EXPORTED volume - No orphans. Resumability via: "skip series whose UID is already in DB" -``` - -On retry, ERROR volumes from prior runs are deleted first, then reprocessed. -This avoids UniqueConstraint violations on `(job, series_instance_uid)`. - ---- - -## Error Handling - -| Error type | Behavior | -| ------------------------------------------------------- | ------------------------------------------------------- | -| `RetriableDicomError` (PACS timeout, connection lost) | Re-raised → Procrastinate retries the whole task | -| `DicomError` / other exceptions (single series) | Caught → ERROR volume created → continue to next series | -| All series fail | Task status = FAILURE | -| Some series fail | Task status = WARNING, message shows count | -| Non-image DICOM (dcm2niix says "No valid DICOM images") | SKIPPED volume, no error | - -Task detail page shows a table of all skipped and failed volumes with the -specific error reason for each. - ---- - -## Infrastructure Changes - -### Dedicated Worker - -Mass transfer runs on its own Procrastinate worker (`mass_transfer_worker`) -listening on the `mass_transfer` queue. This prevents long-running bulk -exports from blocking the normal DICOM transfer queue. - -### Mount Propagation - -Containers use `rslave` mount propagation so that NAS mounts made on the host -(e.g. `/mnt/nfs/ccinas01/adit`) are visible inside the container without -restart. - -### Job Cancellation - -In-progress tasks can be cancelled. The DIMSE connection leak fix ensures -that abandoned C-GET generators properly close their associations (via -`finally` blocks in the `connect_to_server` decorator). - ---- - -## Design Decisions - -1. **Partition-per-task, not study-per-task.** - One Procrastinate job per time window, not per study. Reduces job queue - overhead from thousands to dozens. Each task discovers and exports - everything in its window. - -2. **Filters are reusable objects, not inline fields.** - Users define filters once ("CT Neuroradiologie") and attach them to - multiple jobs. Filters support DICOM wildcards for fuzzy matching. - -3. **Deferred insertion over eager insertion.** - DB records only exist for successfully processed series. No cleanup - needed for partial failures. Resumability works by checking existing UIDs. - -4. **dicognito for pseudonymization, not a custom implementation.** - dicognito handles UIDs, dates, names, and all DICOM-specific anonymization - rules. We only add a seed parameter for deterministic (linking) mode. - -5. **Folder pseudonyms computed independently from DICOM pseudonyms.** - The folder name uses `compute_pseudonym()` (12-char alphanumeric from - the seed) while DICOM tags are pseudonymized by dicognito's full pipeline. - This means the folder name is stable and predictable while the internal - DICOM data gets proper anonymization. - -6. **Temp directories for NIfTI conversion.** - DICOM files are exported to a `tempfile.TemporaryDirectory()`, converted - with `dcm2niix`, and the temp dir is auto-cleaned. No persistent staging - area needed. - -7. **Persistent connections opt-in only.** - `persistent=False` is the default. Only mass transfer enables it. No - risk to existing transfer modules. - ---- - -## Files Added/Modified - -### New: `adit/mass_transfer/` (entire app — 39 files) - -| File | Purpose | -| --------------------- | ----------------------------------------------------- | -| `models.py` | Job, Task, Volume, Filter, Settings models | -| `processors.py` | Discovery, export, NIfTI conversion, pseudonymization | -| `forms.py` | Job creation form with dynamic filter selection | -| `views.py` | CRUD views + CSV associations export | -| `urls.py` | 18 URL patterns | -| `utils/partitions.py` | Date range → partition windows | -| `apps.py` | App registration, menu item, processor registration | -| `templates/` | Job form, job detail, task detail, filter CRUD | -| `tests/` | 45 tests (processor, partitions, cleanup) | - -### Modified: `adit/core/` - -| File | Change | -| -------------------------- | ------------------------------------------------------------ | -| `utils/dimse_connector.py` | `persistent` mode, service-type tracking, generator leak fix | -| `utils/dicom_operator.py` | Pass-through `persistent` param, `close()` method | -| `utils/pseudonymizer.py` | `seed` parameter, `compute_pseudonym()` method | - -### Modified: Infrastructure - -| File | Change | -| ----------------------- | ------------------------------------------------------- | -| `docker-compose.*.yml` | `mass_transfer_worker` service, `rslave` propagation | -| `adit/settings/base.py` | Mass transfer settings (priorities, max search results) | - ---- - -## Test Coverage - -45 tests covering: - -- **Discovery**: recursive time-window splitting, deduplication, boundary correctness -- **Processing**: success, partial failure, total failure, suspension, bad source/dest, no filters, empty partition -- **Resumability**: skipping already-done series, deleting ERROR volumes on retry -- **Pseudonymization**: within-task consistency, cross-partition linking, cross-partition non-linking, no-anonymization mode -- **NIfTI conversion**: dcm2niix failure, no output, non-image DICOM skip -- **Utilities**: folder name generation, DICOM wildcard matching, integer parsing, datetime handling -- **Cleanup**: no-op verification (deferred insertion means nothing to clean up) From 5a100f8b15a632a5cd175c9c57d7081e08802bed Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Sun, 29 Mar 2026 15:29:10 +0000 Subject: [PATCH 089/103] Use DICOM-style UIDs in mass transfer processor test fixtures Replace plain string identifiers with realistic DICOM UID format in test helper calls for consistency with actual DICOM data. Co-Authored-By: Claude Opus 4.6 (1M context) --- adit/mass_transfer/tests/test_processor.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 79722787a..7c239c257 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -386,12 +386,12 @@ def test_discover_series_filters_by_modality(mocker: MockerFixture): operator = mocker.create_autospec(DicomOperator) operator.server = mocker.MagicMock(max_search_results=200) - study = _make_study("study-1") + study = _make_study("1.2.3.100") study.dataset.ModalitiesInStudy = ["CT", "MR"] operator.find_studies.return_value = [study] - ct_series = _make_series_result("series-ct", modality="CT") - mr_series = _make_series_result("series-mr", modality="MR") + ct_series = _make_series_result("1.2.3.201", modality="CT") + mr_series = _make_series_result("1.2.3.202", modality="MR") operator.find_series.return_value = [ct_series, mr_series] # Filter for MR only @@ -399,7 +399,7 @@ def test_discover_series_filters_by_modality(mocker: MockerFixture): result = processor._discover_series(operator, filters) assert len(result) == 1 - assert result[0].series_instance_uid == "series-mr" + assert result[0].series_instance_uid == "1.2.3.202" def test_discover_series_deduplicates_across_filters(mocker: MockerFixture): @@ -410,11 +410,11 @@ def test_discover_series_deduplicates_across_filters(mocker: MockerFixture): operator = mocker.create_autospec(DicomOperator) operator.server = mocker.MagicMock(max_search_results=200) - study = _make_study("study-1") + study = _make_study("1.2.3.100") study.dataset.ModalitiesInStudy = ["CT"] operator.find_studies.return_value = [study] - series = _make_series_result("series-1", modality="CT") + series = _make_series_result("1.2.3.301", modality="CT") operator.find_series.return_value = [series] # Two filters that both match the same series @@ -432,19 +432,19 @@ def test_discover_series_filters_by_series_description(mocker: MockerFixture): operator = mocker.create_autospec(DicomOperator) operator.server = mocker.MagicMock(max_search_results=200) - study = _make_study("study-1") + study = _make_study("1.2.3.100") study.dataset.ModalitiesInStudy = ["CT"] operator.find_studies.return_value = [study] - axial = _make_series_result("series-ax", series_description="Axial T1") - sagittal = _make_series_result("series-sag", series_description="Sagittal T2") + axial = _make_series_result("1.2.3.401", series_description="Axial T1") + sagittal = _make_series_result("1.2.3.402", series_description="Sagittal T2") operator.find_series.return_value = [axial, sagittal] filters = [_make_filter(modality="CT", series_description="Axial*")] result = processor._discover_series(operator, filters) assert len(result) == 1 - assert result[0].series_instance_uid == "series-ax" + assert result[0].series_instance_uid == "1.2.3.401" # --------------------------------------------------------------------------- From 21565280f4e17f687f8939d2d32d612f9a84e515 Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Sun, 29 Mar 2026 20:44:58 +0000 Subject: [PATCH 090/103] Avoid pebble subprocess DB leaks in queue pending tasks tests Replace run_worker_once() with direct queue_mass_transfer_tasks() calls in the queue tests. The worker spawned pebble daemon subprocesses that left PostgreSQL connections open, causing a teardown warning when running the full test suite. The queueing logic is now tested directly while task processing is already covered by core/tests/test_tasks.py. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../tests/test_queue_pending_tasks.py | 59 ++++++++----------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/adit/mass_transfer/tests/test_queue_pending_tasks.py b/adit/mass_transfer/tests/test_queue_pending_tasks.py index 20ac293d0..2f00665b4 100644 --- a/adit/mass_transfer/tests/test_queue_pending_tasks.py +++ b/adit/mass_transfer/tests/test_queue_pending_tasks.py @@ -1,5 +1,4 @@ import pytest -from adit_radis_shared.common.utils.testing_helpers import run_worker_once from procrastinate.contrib.django.models import ProcrastinateJob from adit.core.models import DicomJob, DicomTask @@ -34,21 +33,22 @@ def test_queue_pending_tasks_defers_background_job(): @pytest.mark.django_db(transaction=True) def test_background_job_queues_all_pending_tasks(): - """After the background job runs, all pending tasks should have been - picked up by the worker (status progressed beyond PENDING).""" + """After queue_mass_transfer_tasks runs, all pending tasks should have + queued_job set and be placed on the mass_transfer queue.""" job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) task1 = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) task2 = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) - job.queue_pending_tasks() - run_worker_once() + queue_mass_transfer_tasks(job_id=job.pk) - # run_worker_once processes all jobs (queueing + processing) and deletes - # ProcrastinateJob records. Verify that tasks were actually processed. task1.refresh_from_db() task2.refresh_from_db() - assert task1.status != DicomTask.Status.PENDING - assert task2.status != DicomTask.Status.PENDING + assert task1.queued_job is not None + assert task2.queued_job is not None + + for task in [task1, task2]: + procrastinate_job = ProcrastinateJob.objects.get(pk=task.queued_job_id) + assert procrastinate_job.queue_name == "mass_transfer" @pytest.mark.django_db(transaction=True) @@ -58,42 +58,37 @@ def test_background_job_skips_canceled_tasks(): pending_task = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) canceled_task = MassTransferTaskFactory.create(status=DicomTask.Status.CANCELED, job=job) - job.queue_pending_tasks() - run_worker_once() + queue_mass_transfer_tasks(job_id=job.pk) pending_task.refresh_from_db() canceled_task.refresh_from_db() - assert pending_task.status != DicomTask.Status.PENDING - assert canceled_task.status == DicomTask.Status.CANCELED + assert pending_task.queued_job is not None + assert canceled_task.queued_job is None @pytest.mark.django_db(transaction=True) def test_background_job_is_idempotent(): - """Deferring queue_pending_tasks twice should not double-queue tasks.""" + """Calling queue_mass_transfer_tasks twice should not double-queue tasks.""" job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) task1 = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) task2 = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) - job.queue_pending_tasks() - run_worker_once() + queue_mass_transfer_tasks(job_id=job.pk) task1.refresh_from_db() task2.refresh_from_db() - assert task1.attempts == 1 - assert task2.attempts == 1 + first_queued_job_1 = task1.queued_job_id + first_queued_job_2 = task2.queued_job_id + assert first_queued_job_1 is not None + assert first_queued_job_2 is not None - # Reset job to PENDING and defer again - job.refresh_from_db() - job.status = DicomJob.Status.PENDING - job.save() - job.queue_pending_tasks() - run_worker_once() + # Call again — tasks already have queued_job set, so they should be skipped + queue_mass_transfer_tasks(job_id=job.pk) - # Tasks should not have been processed again (status is no longer PENDING) task1.refresh_from_db() task2.refresh_from_db() - assert task1.attempts == 1 - assert task2.attempts == 1 + assert task1.queued_job_id == first_queued_job_1 + assert task2.queued_job_id == first_queued_job_2 @pytest.mark.django_db(transaction=True) @@ -103,11 +98,11 @@ def test_background_job_skips_deleted_job(): job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) - job.queue_pending_tasks() + job_id = job.pk job.delete() # Should not raise - run_worker_once() + queue_mass_transfer_tasks(job_id=job_id) @pytest.mark.django_db(transaction=True) @@ -117,16 +112,14 @@ def test_background_job_skips_non_pending_job(): job = MassTransferJobFactory.create(status=DicomJob.Status.PENDING) task = MassTransferTaskFactory.create(status=DicomTask.Status.PENDING, job=job) - job.queue_pending_tasks() - # Simulate cancel happening before the background job runs job.status = DicomJob.Status.CANCELED job.save() - run_worker_once() + queue_mass_transfer_tasks(job_id=job.pk) task.refresh_from_db() - assert task.status == DicomTask.Status.PENDING + assert task.queued_job is None @pytest.mark.django_db(transaction=True) From 4a67599e0747e5e4cf4a823d8d67a8c4a3bcfc39 Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Sun, 29 Mar 2026 21:22:32 +0000 Subject: [PATCH 091/103] Simplify mass transfer folder naming helpers Remove unnecessary SHA hash suffix from study folder names since studies are already nested under patient folders, and include series description in the fallback when series number is missing. Co-Authored-By: Claude Opus 4.6 (1M context) --- adit/mass_transfer/processors.py | 11 ++++------- adit/mass_transfer/tests/test_processor.py | 22 ++++------------------ 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 9c6e34d98..55ea9bc2f 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -1,6 +1,5 @@ from __future__ import annotations -import hashlib import json import logging import secrets @@ -118,17 +117,16 @@ def _study_datetime(study: ResultDataset) -> datetime: return datetime.combine(study_date, study_time) -def _study_folder_name(study_description: str, study_dt: datetime, study_uid: str) -> str: +def _study_folder_name(study_description: str, study_dt: datetime) -> str: desc = sanitize_filename(study_description or "Undefined") dt_str = study_dt.strftime("%Y%m%d_%H%M%S") - short_hash = hashlib.sha256(study_uid.encode()).hexdigest()[:4] - return f"{desc}_{dt_str}_{short_hash}" + return f"{desc}_{dt_str}" def _series_folder_name(series_description: str, series_number: int | None, series_uid: str) -> str: - if series_number is None: - return sanitize_filename(series_uid) desc = sanitize_filename(series_description or "Undefined") + if series_number is None: + return f"{desc}_{sanitize_filename(series_uid)}" return f"{desc}_{series_number}" @@ -498,7 +496,6 @@ def _transfer_single_series( study_folder = _study_folder_name( volume.study_description, volume.study_datetime, - volume.study_instance_uid, ) series_folder = _series_folder_name( volume.series_description, diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 7c239c257..6fa0ee8d1 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -992,26 +992,12 @@ def test_series_folder_name_with_no_description(): def test_series_folder_name_with_no_number(): - assert _series_folder_name("Head CT", None, "1.2.3.4.5") == "1.2.3.4.5" + assert _series_folder_name("Head CT", None, "1.2.3.4.5") == "Head CT_1.2.3.4.5" -def test_study_folder_name_includes_description_date_and_hash(): - name = _study_folder_name("Brain CT", datetime(2024, 1, 15, 10, 30), "1.2.3.4") - assert name.startswith("Brain CT_20240115_103000_") - # Hash part is 4 chars - assert len(name.split("_")[-1]) == 4 - - -def test_study_folder_name_deterministic(): - name1 = _study_folder_name("Brain CT", datetime(2024, 1, 15), "1.2.3.4") - name2 = _study_folder_name("Brain CT", datetime(2024, 1, 15), "1.2.3.4") - assert name1 == name2 - - -def test_study_folder_name_different_uid_different_hash(): - name1 = _study_folder_name("Brain CT", datetime(2024, 1, 15), "1.2.3.4") - name2 = _study_folder_name("Brain CT", datetime(2024, 1, 15), "1.2.3.5") - assert name1 != name2 +def test_study_folder_name_includes_description_and_date(): + name = _study_folder_name("Brain CT", datetime(2024, 1, 15, 10, 30)) + assert name == "Brain CT_20240115_103000" def test_parse_int_normal(): From cb5c0b3f95bd3882c8e5c1a1ec25a64e34032937 Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Sun, 29 Mar 2026 22:22:17 +0000 Subject: [PATCH 092/103] Replace dicognito-based pseudonym generation with own SHA-256 algorithm Decouple pseudonym computation from dicognito's IDAnonymizer to ensure stability across library upgrades. The new standalone compute_pseudonym function uses SHA-256 with divmod extraction into A-Z0-9, matching dicognito's approach but with a stronger hash. Deterministic pseudonyms are 14 chars, random pseudonyms are 15 chars so the two modes can be distinguished by length. Co-Authored-By: Claude Opus 4.6 (1M context) --- adit/core/tests/utils/test_pseudonymizer.py | 56 ++++++++------------- adit/core/utils/pseudonymizer.py | 39 +++++++------- adit/mass_transfer/processors.py | 16 ++++-- adit/mass_transfer/tests/test_processor.py | 20 ++++++-- 4 files changed, 70 insertions(+), 61 deletions(-) diff --git a/adit/core/tests/utils/test_pseudonymizer.py b/adit/core/tests/utils/test_pseudonymizer.py index 735dd1cfe..6adcf2d24 100644 --- a/adit/core/tests/utils/test_pseudonymizer.py +++ b/adit/core/tests/utils/test_pseudonymizer.py @@ -3,7 +3,7 @@ from pydicom.dataset import FileMetaDataset from pydicom.uid import UID -from adit.core.utils.pseudonymizer import Pseudonymizer +from adit.core.utils.pseudonymizer import Pseudonymizer, compute_pseudonym @pytest.fixture @@ -101,44 +101,32 @@ def test_pseudonymize_preserves_acquisition_datetime(self, pseudonymizer: Pseudo class TestComputePseudonym: - def test_requires_seed(self): - ps = Pseudonymizer() - with pytest.raises(ValueError, match="requires a seeded Pseudonymizer"): - ps.compute_pseudonym("PAT1") - def test_deterministic_same_seed(self): - """Same seed + same patient ID always produces the same pseudonym.""" - ps1 = Pseudonymizer(seed="fixed-seed") - ps2 = Pseudonymizer(seed="fixed-seed") - assert ps1.compute_pseudonym("PAT1") == ps2.compute_pseudonym("PAT1") + """Same seed + same identifier always produces the same pseudonym.""" + result1 = compute_pseudonym("fixed-seed", "PAT1", 14) + result2 = compute_pseudonym("fixed-seed", "PAT1", 14) + assert result1 == result2 def test_different_seeds_produce_different_pseudonyms(self): - ps1 = Pseudonymizer(seed="seed-a") - ps2 = Pseudonymizer(seed="seed-b") - assert ps1.compute_pseudonym("PAT1") != ps2.compute_pseudonym("PAT1") - - def test_different_patients_produce_different_pseudonyms(self): - ps = Pseudonymizer(seed="fixed-seed") - assert ps.compute_pseudonym("PAT1") != ps.compute_pseudonym("PAT2") - - def test_matches_dicognito_anonymize(self): - """compute_pseudonym must match what dicognito produces for PatientID.""" - seed = "test-consistency-seed" - ps = Pseudonymizer(seed=seed) - pseudonym = ps.compute_pseudonym("PATIENT_42") + result1 = compute_pseudonym("seed-a", "PAT1", 14) + result2 = compute_pseudonym("seed-b", "PAT1", 14) + assert result1 != result2 - # Run the full anonymizer on a real dataset and check the PatientID - # before our pseudonymize() overwrites it. - from dicognito.anonymizer import Anonymizer + def test_different_identifiers_produce_different_pseudonyms(self): + result1 = compute_pseudonym("fixed-seed", "PAT1", 14) + result2 = compute_pseudonym("fixed-seed", "PAT2", 14) + assert result1 != result2 - anon = Anonymizer(seed=seed) - ds = create_base_dataset() - ds.PatientID = "PATIENT_42" - anon.anonymize(ds) - assert ds.PatientID == pseudonym + def test_length(self): + assert len(compute_pseudonym("seed", "PAT1", 14)) == 14 + assert len(compute_pseudonym("seed", "PAT1", 8)) == 8 - def test_pseudonym_is_alphanumeric(self): - ps = Pseudonymizer(seed="alpha-seed") - result = ps.compute_pseudonym("SOME_PATIENT") + def test_pseudonym_is_uppercase_alphanumeric(self): + result = compute_pseudonym("alpha-seed", "SOME_PATIENT", 14) assert result.isalnum() assert result == result.upper() + + def test_stable_output(self): + """Pseudonyms must not change across code updates (breaks cross-transfer linking).""" + assert compute_pseudonym("my-salt", "PAT1", 12) == "81T9LZGKTAM3" + assert compute_pseudonym("my-salt", "PAT1", 14) == "81T9LZGKTAM3UV" diff --git a/adit/core/utils/pseudonymizer.py b/adit/core/utils/pseudonymizer.py index 8b64c961f..b852d16aa 100644 --- a/adit/core/utils/pseudonymizer.py +++ b/adit/core/utils/pseudonymizer.py @@ -1,10 +1,28 @@ +import hashlib +import string + from dicognito.anonymizer import Anonymizer -from dicognito.idanonymizer import IDAnonymizer -from dicognito.randomizer import Randomizer from dicognito.value_keeper import ValueKeeper from django.conf import settings from pydicom import Dataset +_PSEUDONYM_ALPHABET = string.ascii_uppercase + string.digits # A-Z0-9 + + +def compute_pseudonym(seed: str, identifier: str, length: int) -> str: + """Derive a pseudonym from a seed and identifier using SHA-256. + + Uses the same divmod extraction approach as dicognito's IDAnonymizer + but with SHA-256 instead of MD5 for stability and security. + """ + digest = hashlib.sha256((seed + identifier).encode("utf8")).digest() + big_int = int.from_bytes(digest, "big") + chars = [] + for _ in range(length): + big_int, idx = divmod(big_int, len(_PSEUDONYM_ALPHABET)) + chars.append(_PSEUDONYM_ALPHABET[idx]) + return "".join(chars) + class Pseudonymizer: """ @@ -38,23 +56,6 @@ def _setup_anonymizer(self, seed: str | None = None) -> Anonymizer: anonymizer.add_element_handler(ValueKeeper(element)) return anonymizer - def compute_pseudonym(self, patient_id: str) -> str: - """Pre-compute the pseudonym for a patient ID without a full DICOM dataset. - - Delegates to dicognito's IDAnonymizer so the result always matches - what anonymize() would produce for PatientID, even if dicognito - changes its internal algorithm. - Requires that this Pseudonymizer was created with a seed. - """ - if self._seed is None: - raise ValueError("compute_pseudonym requires a seeded Pseudonymizer") - randomizer = Randomizer(self._seed) - id_anon = IDAnonymizer(randomizer, "", "", "PatientID") - ds = Dataset() - ds.PatientID = patient_id - id_anon(ds, ds["PatientID"]) - return str(ds.PatientID) - def pseudonymize(self, ds: Dataset, pseudonym: str) -> None: """ Pseudonymize the given DICOM dataset using the anonymizer and the provided pseudonym. diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index 55ea9bc2f..dcebdeaba 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -22,7 +22,7 @@ from adit.core.utils.dicom_manipulator import DicomManipulator from adit.core.utils.dicom_operator import DicomOperator from adit.core.utils.dicom_utils import convert_to_python_regex, write_dataset -from adit.core.utils.pseudonymizer import Pseudonymizer +from adit.core.utils.pseudonymizer import Pseudonymizer, compute_pseudonym from adit.core.utils.sanitize import sanitize_filename from .models import ( @@ -69,6 +69,11 @@ def from_dict(cls, d: dict) -> "FilterSpec": _DELAY_BETWEEN_SERIES = 0.5 # seconds between fetch requests to avoid overwhelming the PACS _DELAY_RETRY_FETCH = 3 # seconds before retrying a fetch that returned 0 images +# Deterministic pseudonyms use 14 characters. Random pseudonyms use 15 so the +# two modes can be distinguished by length. +_DETERMINISTIC_PSEUDONYM_LENGTH = 14 +_RANDOM_PSEUDONYM_LENGTH = 15 + @dataclass(frozen=True) class DiscoveredSeries: @@ -352,11 +357,16 @@ def _create_pending_volumes( if pseudonymizer and job.pseudonym_salt: if pid not in deterministic_ids: - deterministic_ids[pid] = pseudonymizer.compute_pseudonym(pid) + deterministic_ids[pid] = compute_pseudonym( + job.pseudonym_salt, pid, length=_DETERMINISTIC_PSEUDONYM_LENGTH + ) pseudonym = deterministic_ids[pid] elif pseudonymizer: if study_uid not in random_pseudonyms: - random_pseudonyms[study_uid] = secrets.token_hex(6).upper() + random_seed = secrets.token_hex(16) + random_pseudonyms[study_uid] = compute_pseudonym( + random_seed, pid, length=_RANDOM_PSEUDONYM_LENGTH + ) pseudonym = random_pseudonyms[study_uid] else: pseudonym = "" diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 6fa0ee8d1..4d9e21ab8 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -915,10 +915,13 @@ def fake_export(op, s, path, subject_id, pseudonymizer): assert subject_ids[0] != "" assert subject_ids[0] != "PAT1" # Pseudonym should be deterministic — running again with same salt gives same result - from adit.core.utils.pseudonymizer import Pseudonymizer + from adit.core.utils.pseudonymizer import compute_pseudonym + from adit.mass_transfer.processors import _DETERMINISTIC_PSEUDONYM_LENGTH - ps = Pseudonymizer(seed="test-salt-for-deterministic-pseudonyms") - expected = ps.compute_pseudonym("PAT1") + expected = compute_pseudonym( + "test-salt-for-deterministic-pseudonyms", "PAT1", + length=_DETERMINISTIC_PSEUDONYM_LENGTH, + ) assert subject_ids[0] == expected @@ -1515,9 +1518,16 @@ def test_create_pending_volumes_deterministic_pseudonym(): partition_key="20240101", ) + from adit.core.utils.pseudonymizer import compute_pseudonym + from adit.mass_transfer.processors import _DETERMINISTIC_PSEUDONYM_LENGTH + ps = Pseudonymizer(seed="test-seed-123") - expected_pat1 = ps.compute_pseudonym("PAT1") - expected_pat2 = ps.compute_pseudonym("PAT2") + expected_pat1 = compute_pseudonym( + "test-seed-123", "PAT1", length=_DETERMINISTIC_PSEUDONYM_LENGTH + ) + expected_pat2 = compute_pseudonym( + "test-seed-123", "PAT2", length=_DETERMINISTIC_PSEUDONYM_LENGTH + ) series = [ _make_discovered(patient_id="PAT1", study_uid="study-A", series_uid="s-1"), From 21178bda82cdf071f81455f0bd14faa9125816b6 Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Mon, 30 Mar 2026 00:14:37 +0000 Subject: [PATCH 093/103] Auto-reconnect DIMSE on service switch and add mass transfer acceptance tests Instead of raising an error when switching DICOM services (e.g. C-FIND to C-GET) on a persistent connection, the DimseConnector now transparently closes the old association and opens a new one. This simplifies callers that need to chain different operations. Also adds Playwright acceptance tests covering pseudonymized, unpseudonymized, folder-destination, and NIfTI-conversion mass transfers across all three transfer protocols (C-MOVE, C-GET, DICOMweb). Co-Authored-By: Claude Opus 4.6 (1M context) --- adit/core/tests/utils/test_dimse_connector.py | 65 ++++++ adit/core/utils/dimse_connector.py | 16 +- .../tests/acceptance/__init__.py | 0 .../tests/acceptance/conftest.py | 5 + .../tests/acceptance/test_mass_transfer.py | 201 ++++++++++++++++++ adit/mass_transfer/tests/test_processor.py | 33 +-- adit/mass_transfer/utils/testing_helpers.py | 9 + 7 files changed, 304 insertions(+), 25 deletions(-) create mode 100644 adit/mass_transfer/tests/acceptance/__init__.py create mode 100644 adit/mass_transfer/tests/acceptance/conftest.py create mode 100644 adit/mass_transfer/tests/acceptance/test_mass_transfer.py create mode 100644 adit/mass_transfer/utils/testing_helpers.py diff --git a/adit/core/tests/utils/test_dimse_connector.py b/adit/core/tests/utils/test_dimse_connector.py index 535884889..d70a1f936 100644 --- a/adit/core/tests/utils/test_dimse_connector.py +++ b/adit/core/tests/utils/test_dimse_connector.py @@ -158,3 +158,68 @@ def test_abort_connection_with_no_connection(self): # Assert assert connector.assoc is None + + def test_service_switch_closes_and_reopens_connection(self, mocker): + """Test that switching services (e.g. C-FIND -> C-GET) closes the old connection + and opens a new one with the correct presentation contexts.""" + server = DicomServerFactory.create() + connector = DimseConnector(server, auto_connect=True) + + associate_mock = mocker.patch("adit.core.utils.dimse_connector.AE.associate") + + # First association for C-FIND + find_assoc = create_association_mock() + find_assoc.is_alive.return_value = True + find_assoc.send_c_find.return_value = DicomTestHelper.create_successful_c_find_responses( + [{"PatientID": "12345", "QueryRetrieveLevel": "STUDY"}] + ) + + # Second association for C-GET + get_assoc = create_association_mock() + get_assoc.is_alive.return_value = True + get_assoc.send_c_get.return_value = DicomTestHelper.create_successful_c_get_response() + + associate_mock.side_effect = [find_assoc, get_assoc] + + # Act: perform a C-FIND + query = QueryDataset.create( + PatientID="12345", + StudyInstanceUID="1.2.3.4.5", + QueryRetrieveLevel="STUDY", + ) + list(connector.send_c_find(query)) + + # After C-FIND with auto_close, connection is closed + assert connector.assoc is None + + # Now open a persistent connection for C-FIND, then switch to C-GET + connector.auto_close = False + + # Reset the mock for the persistent connections + find_assoc2 = create_association_mock() + find_assoc2.is_alive.return_value = True + find_assoc2.send_c_find.return_value = DicomTestHelper.create_successful_c_find_responses( + [{"PatientID": "12345", "QueryRetrieveLevel": "STUDY"}] + ) + + get_assoc2 = create_association_mock() + get_assoc2.is_alive.return_value = True + get_assoc2.send_c_get.return_value = DicomTestHelper.create_successful_c_get_response() + + associate_mock.side_effect = [find_assoc2, get_assoc2] + + # C-FIND with auto_close=False keeps connection open + list(connector.send_c_find(query)) + assert connector.assoc is find_assoc2 + assert connector._current_service == "C-FIND" + + # C-GET should close the C-FIND connection and open a new one + store_handler = MagicMock() + store_errors = [] + connector.send_c_get(query, store_handler, store_errors) + + # The C-FIND association should have been released (closed) + assert find_assoc2.release.called + # The connector should now be on the C-GET association + assert connector.assoc is get_assoc2 + assert connector._current_service == "C-GET" diff --git a/adit/core/utils/dimse_connector.py b/adit/core/utils/dimse_connector.py index 4ed6b80e4..3da93e4c6 100644 --- a/adit/core/utils/dimse_connector.py +++ b/adit/core/utils/dimse_connector.py @@ -66,14 +66,12 @@ def gen_wrapper(self: "DimseConnector", *args, **kwargs): opened_connection = False is_connected = self.assoc and self.assoc.is_alive() + if is_connected and self._current_service != service: + self.close_connection() + is_connected = False if self.auto_connect and not is_connected: self.open_connection(service) opened_connection = True - elif is_connected and self._current_service != service: - raise DicomError( - f"Cannot use {service} while a {self._current_service} " - f"association is open. Close the current association first." - ) try: yield from func(self, *args, **kwargs) @@ -102,14 +100,12 @@ def func_wrapper(self: "DimseConnector", *args, **kwargs): opened_connection = False is_connected = self.assoc and self.assoc.is_alive() + if is_connected and self._current_service != service: + self.close_connection() + is_connected = False if self.auto_connect and not is_connected: self.open_connection(service) opened_connection = True - elif is_connected and self._current_service != service: - raise DicomError( - f"Cannot use {service} while a {self._current_service} " - f"association is open. Close the current association first." - ) try: result = func(self, *args, **kwargs) diff --git a/adit/mass_transfer/tests/acceptance/__init__.py b/adit/mass_transfer/tests/acceptance/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/adit/mass_transfer/tests/acceptance/conftest.py b/adit/mass_transfer/tests/acceptance/conftest.py new file mode 100644 index 000000000..92e966529 --- /dev/null +++ b/adit/mass_transfer/tests/acceptance/conftest.py @@ -0,0 +1,5 @@ +import os + +# Workaround to make playwright work with Django +# see https://github.com/microsoft/playwright-pytest/issues/29#issuecomment-731515676 +os.environ.setdefault("DJANGO_ALLOW_ASYNC_UNSAFE", "true") diff --git a/adit/mass_transfer/tests/acceptance/test_mass_transfer.py b/adit/mass_transfer/tests/acceptance/test_mass_transfer.py new file mode 100644 index 000000000..97de29edd --- /dev/null +++ b/adit/mass_transfer/tests/acceptance/test_mass_transfer.py @@ -0,0 +1,201 @@ +import json +import tempfile +from pathlib import Path + +import nibabel as nib +import pytest +from adit_radis_shared.common.utils.testing_helpers import ( + add_permission, + add_user_to_group, + create_and_login_example_user, + run_worker_once, +) +from playwright.sync_api import Page, expect +from pytest_django.live_server_helper import LiveServer + +from adit.core.factories import DicomFolderFactory +from adit.core.utils.auth_utils import grant_access +from adit.core.utils.testing_helpers import setup_dicomweb_orthancs, setup_dimse_orthancs +from adit.mass_transfer.models import MassTransferJob +from adit.mass_transfer.utils.testing_helpers import create_mass_transfer_group + +FILTERS_CT_ONLY = json.dumps([{"modality": "CT"}]) + + +def _fill_mass_transfer_form( + page: Page, + *, + source_label: str = "DICOM Server Orthanc Test Server 1", + destination_label: str = "DICOM Server Orthanc Test Server 2", + start_date: str = "2018-08-20", + end_date: str = "2018-08-20", + pseudonymize: bool = True, + convert_to_nifti: bool = False, + filters_json: str = FILTERS_CT_ONLY, +): + page.get_by_label("Source").select_option(label=source_label) + page.get_by_label("Destination").select_option(label=destination_label) + page.get_by_label("Start date").fill(start_date) + page.get_by_label("End date").fill(end_date) + + pseudonymize_checkbox = page.get_by_label("Pseudonymize") + if pseudonymize and not pseudonymize_checkbox.is_checked(): + pseudonymize_checkbox.click(force=True) + elif not pseudonymize and pseudonymize_checkbox.is_checked(): + pseudonymize_checkbox.click(force=True) + + if convert_to_nifti: + page.get_by_label("Convert to NIfTI").click(force=True) + + # Set filters in CodeMirror editor + page.evaluate( + """(value) => { + const cm = document.querySelector('.CodeMirror').CodeMirror; + cm.setValue(value); + }""", + filters_json, + ) + + page.locator('input:has-text("Create Job")').click() + + +def _run_mass_transfer_workers(): + # First run: processes queue_mass_transfer_tasks on default queue + run_worker_once() + # Second run: processes process_mass_transfer_task on mass_transfer queue + run_worker_once() + + +def _setup_orthancs(transfer_protocol: str): + if transfer_protocol == "dicomweb": + return setup_dicomweb_orthancs() + elif transfer_protocol == "c-move": + return setup_dimse_orthancs(cget_enabled=False) + else: + return setup_dimse_orthancs() + + +@pytest.mark.acceptance +@pytest.mark.order("last") +@pytest.mark.django_db(transaction=True) +@pytest.mark.parametrize("transfer_protocol", ["c-move", "c-get", "dicomweb"]) +def test_unpseudonymized_mass_transfer_to_server( + page: Page, live_server: LiveServer, transfer_protocol: str +): + user = create_and_login_example_user(page, live_server.url) + group = create_mass_transfer_group() + add_user_to_group(user, group) + add_permission(group, MassTransferJob, "can_transfer_unpseudonymized") + + orthancs = _setup_orthancs(transfer_protocol) + grant_access(group, orthancs[0], source=True) + grant_access(group, orthancs[1], destination=True) + + page.goto(live_server.url + "/mass-transfer/jobs/new/") + _fill_mass_transfer_form(page, pseudonymize=False) + + _run_mass_transfer_workers() + page.reload() + + expect(page.locator('dl:has-text("Success")')).to_be_visible() + + +@pytest.mark.acceptance +@pytest.mark.order("last") +@pytest.mark.django_db(transaction=True) +@pytest.mark.parametrize("transfer_protocol", ["c-move", "c-get", "dicomweb"]) +def test_pseudonymized_mass_transfer_to_server( + page: Page, live_server: LiveServer, transfer_protocol: str +): + user = create_and_login_example_user(page, live_server.url) + group = create_mass_transfer_group() + add_user_to_group(user, group) + + orthancs = _setup_orthancs(transfer_protocol) + grant_access(group, orthancs[0], source=True) + grant_access(group, orthancs[1], destination=True) + + page.goto(live_server.url + "/mass-transfer/jobs/new/") + _fill_mass_transfer_form(page, pseudonymize=True) + + _run_mass_transfer_workers() + page.reload() + + expect(page.locator('dl:has-text("Success")')).to_be_visible() + + +@pytest.mark.acceptance +@pytest.mark.order("last") +@pytest.mark.django_db(transaction=True) +@pytest.mark.parametrize("transfer_protocol", ["c-move", "c-get", "dicomweb"]) +def test_mass_transfer_to_folder( + page: Page, live_server: LiveServer, transfer_protocol: str +): + user = create_and_login_example_user(page, live_server.url) + group = create_mass_transfer_group() + add_user_to_group(user, group) + add_permission(group, MassTransferJob, "can_transfer_unpseudonymized") + + orthancs = _setup_orthancs(transfer_protocol) + grant_access(group, orthancs[0], source=True) + + with tempfile.TemporaryDirectory() as temp_dir: + download_folder = DicomFolderFactory.create(name="Downloads", path=temp_dir) + grant_access(group, download_folder, destination=True) + + page.goto(live_server.url + "/mass-transfer/jobs/new/") + _fill_mass_transfer_form( + page, + destination_label="DICOM Folder Downloads", + pseudonymize=False, + ) + + _run_mass_transfer_workers() + page.reload() + + expect(page.locator('dl:has-text("Success")')).to_be_visible() + + # Verify DICOM files were written to disk + dcm_files = list(Path(temp_dir).glob("**/*.dcm")) + assert len(dcm_files) > 0, "No DICOM files were written to the output folder." + + +@pytest.mark.acceptance +@pytest.mark.order("last") +@pytest.mark.django_db(transaction=True) +@pytest.mark.parametrize("transfer_protocol", ["c-move", "c-get", "dicomweb"]) +def test_mass_transfer_to_folder_with_nifti_conversion( + page: Page, live_server: LiveServer, transfer_protocol: str +): + user = create_and_login_example_user(page, live_server.url) + group = create_mass_transfer_group() + add_user_to_group(user, group) + add_permission(group, MassTransferJob, "can_transfer_unpseudonymized") + + orthancs = _setup_orthancs(transfer_protocol) + grant_access(group, orthancs[0], source=True) + + with tempfile.TemporaryDirectory() as temp_dir: + download_folder = DicomFolderFactory.create(name="Downloads", path=temp_dir) + grant_access(group, download_folder, destination=True) + + page.goto(live_server.url + "/mass-transfer/jobs/new/") + _fill_mass_transfer_form( + page, + destination_label="DICOM Folder Downloads", + pseudonymize=False, + convert_to_nifti=True, + ) + + _run_mass_transfer_workers() + page.reload() + + expect(page.locator('dl:has-text("Success")')).to_be_visible() + + # Verify NIfTI files were generated + nifti_files = list(Path(temp_dir).glob("**/*.nii*")) + assert len(nifti_files) > 0, "No NIfTI files were generated." + + for nifti_file in nifti_files: + img = nib.load(nifti_file) # type: ignore + assert img is not None, f"Invalid NIfTI file: {nifti_file}" diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 4d9e21ab8..7f660e1f4 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -132,9 +132,7 @@ def mass_transfer_env(tmp_path): partition_end=now + timedelta(hours=23, minutes=59, seconds=59), partition_key="20240101", ) - return SimpleNamespace( - job=job, task=task, source=source, destination=destination, user=user - ) + return SimpleNamespace(job=job, task=task, source=source, destination=destination, user=user) @pytest.mark.django_db @@ -529,7 +527,7 @@ def _make_process_env_server_dest( source_mock = mocker.MagicMock() if dest_operator is None: dest_operator = mocker.MagicMock() - # dest DicomOperator is created first (line 288), source second (line 320) + # dest DicomOperator is created first, source second mocker.patch( "adit.mass_transfer.processors.DicomOperator", side_effect=[dest_operator, source_mock], @@ -747,9 +745,7 @@ def test_export_series_to_server_skips_upload_on_zero_images(mocker: MockerFixtu mocker.patch.object(processor, "_export_series", return_value=(0, "", "")) - processor._export_series_to_server( - mock_operator, volume, None, "subject-1", mock_dest_operator - ) + processor._export_series_to_server(mock_operator, volume, None, "subject-1", mock_dest_operator) mock_dest_operator.upload_images.assert_not_called() assert volume.status == MassTransferVolume.Status.ERROR @@ -770,9 +766,7 @@ def test_export_series_to_server_skips_non_image_series(mocker: MockerFixture): mocker.patch.object(processor, "_export_series", return_value=(0, "", "")) - processor._export_series_to_server( - mock_operator, volume, None, "subject-1", mock_dest_operator - ) + processor._export_series_to_server(mock_operator, volume, None, "subject-1", mock_dest_operator) mock_dest_operator.upload_images.assert_not_called() assert volume.status == MassTransferVolume.Status.SKIPPED @@ -919,7 +913,8 @@ def fake_export(op, s, path, subject_id, pseudonymizer): from adit.mass_transfer.processors import _DETERMINISTIC_PSEUDONYM_LENGTH expected = compute_pseudonym( - "test-salt-for-deterministic-pseudonyms", "PAT1", + "test-salt-for-deterministic-pseudonyms", + "PAT1", length=_DETERMINISTIC_PSEUDONYM_LENGTH, ) assert subject_ids[0] == expected @@ -1612,19 +1607,27 @@ def test_group_volumes_multi_patient_multi_study(): """Volumes are grouped by patient_id -> study_instance_uid.""" now = timezone.now() v1 = MassTransferVolume( - patient_id="PAT1", study_instance_uid="study-A", series_instance_uid="s-1", + patient_id="PAT1", + study_instance_uid="study-A", + series_instance_uid="s-1", study_datetime=now, ) v2 = MassTransferVolume( - patient_id="PAT1", study_instance_uid="study-A", series_instance_uid="s-2", + patient_id="PAT1", + study_instance_uid="study-A", + series_instance_uid="s-2", study_datetime=now, ) v3 = MassTransferVolume( - patient_id="PAT1", study_instance_uid="study-B", series_instance_uid="s-3", + patient_id="PAT1", + study_instance_uid="study-B", + series_instance_uid="s-3", study_datetime=now, ) v4 = MassTransferVolume( - patient_id="PAT2", study_instance_uid="study-C", series_instance_uid="s-4", + patient_id="PAT2", + study_instance_uid="study-C", + series_instance_uid="s-4", study_datetime=now, ) diff --git a/adit/mass_transfer/utils/testing_helpers.py b/adit/mass_transfer/utils/testing_helpers.py new file mode 100644 index 000000000..a8edd4b45 --- /dev/null +++ b/adit/mass_transfer/utils/testing_helpers.py @@ -0,0 +1,9 @@ +from adit_radis_shared.accounts.factories import GroupFactory +from adit_radis_shared.common.utils.testing_helpers import add_permission + + +def create_mass_transfer_group(): + group = GroupFactory.create(name="Radiologists") + add_permission(group, "mass_transfer", "add_masstransferjob") + add_permission(group, "mass_transfer", "view_masstransferjob") + return group From d20cee26ba0af237b37351f16124af5226b15a3b Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Mon, 30 Mar 2026 00:39:58 +0000 Subject: [PATCH 094/103] Close stale DB connection in queue_mass_transfer_tasks worker The procrastinate worker thread running queue_mass_transfer_tasks opens a Django DB connection (via MassTransferTask.save() in queue_pending_task) that was never closed, since worker threads don't go through Django's request/response lifecycle. This left an idle PostgreSQL session that blocked test database teardown, causing a PytestWarning. Add db.close_old_connections() in a finally block, matching the pattern already used in core/tasks.py _run_dicom_task. Co-Authored-By: Claude Opus 4.6 (1M context) --- adit/mass_transfer/tasks.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/adit/mass_transfer/tasks.py b/adit/mass_transfer/tasks.py index af2a56c4c..cb59fa8db 100644 --- a/adit/mass_transfer/tasks.py +++ b/adit/mass_transfer/tasks.py @@ -1,5 +1,6 @@ import logging +from django import db from procrastinate import JobContext, RetryStrategy from procrastinate.contrib.django import app @@ -49,16 +50,19 @@ def queue_mass_transfer_tasks(job_id: int): ) return - for mass_task in job.tasks.filter( - status=DicomTask.Status.PENDING, - queued_job__isnull=True, # Skip tasks already queued (idempotency guard) - ): - try: - mass_task.queue_pending_task() - except Exception: - logger.exception( - "Failed to queue MassTransferTask %d for job %d", - mass_task.pk, - job_id, - ) - raise + try: + for mass_task in job.tasks.filter( + status=DicomTask.Status.PENDING, + queued_job__isnull=True, # Skip tasks already queued (idempotency guard) + ): + try: + mass_task.queue_pending_task() + except Exception: + logger.exception( + "Failed to queue MassTransferTask %d for job %d", + mass_task.pk, + job_id, + ) + raise + finally: + db.close_old_connections() From a70ff7f3a4685f36dfe8f7dd89fbadd971955229 Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 30 Mar 2026 08:53:48 +0000 Subject: [PATCH 095/103] Add environment variables for staging replica scaling Co-Authored-By: Claude Opus 4.6 --- docker-compose.staging.yml | 6 +++--- example.env | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docker-compose.staging.yml b/docker-compose.staging.yml index 92c9d9cad..d9780034e 100644 --- a/docker-compose.staging.yml +++ b/docker-compose.staging.yml @@ -57,7 +57,7 @@ services: test: ["CMD", "curl", "-f", "http://localhost:8000/health/"] deploy: <<: *deploy - replicas: 1 + replicas: ${WEB_STAGING_REPLICAS:-1} default_worker: <<: *default-app @@ -83,7 +83,7 @@ services: " deploy: <<: *deploy - replicas: 1 + replicas: ${DICOM_WORKER_STAGING_REPLICAS:-1} mass_transfer_worker: <<: *default-app @@ -96,7 +96,7 @@ services: " deploy: <<: *deploy - replicas: 3 # Start with 3 to test scaling + replicas: ${MASS_TRANSFER_WORKER_STAGING_REPLICAS:-3} receiver: <<: *default-app diff --git a/example.env b/example.env index f825b672e..797522544 100644 --- a/example.env +++ b/example.env @@ -97,9 +97,15 @@ RECEIVER_AE_TITLE="ADIT1DEV" # This does not affect downloads using the ADIT client. EXCLUDE_MODALITIES="PR,SR" -# Replicas of the services that can be scaled (production only). +# Replicas of the services that can be scaled (staging). +WEB_STAGING_REPLICAS=1 +DICOM_WORKER_STAGING_REPLICAS=1 +MASS_TRANSFER_WORKER_STAGING_REPLICAS=3 + +# Replicas of the services that can be scaled (production). WEB_REPLICAS=5 DICOM_WORKER_REPLICAS=3 +MASS_TRANSFER_WORKER_REPLICAS=1 # The directory where download folders are mounted. MOUNT_DIR="/mnt" From 3452466d488e4cf2c041cc98894a9c9c8a3fbba9 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 30 Mar 2026 13:32:16 +0200 Subject: [PATCH 096/103] Add scrollbar for long filters and fix cancel FK violation - Cap filters JSON display at 20 lines with overflow scroll - Clear queued_job_id before deleting procrastinate job on cancel --- adit/core/views.py | 2 ++ .../templates/mass_transfer/mass_transfer_job_detail.html | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/adit/core/views.py b/adit/core/views.py index acbb2bd1d..4feeccda3 100644 --- a/adit/core/views.py +++ b/adit/core/views.py @@ -229,6 +229,8 @@ def post(self, request: AuthenticatedHttpRequest, *args, **kwargs) -> HttpRespon for dicom_task in pending_tasks: queued_job_id = dicom_task.queued_job_id if queued_job_id is not None: + dicom_task.queued_job_id = None + dicom_task.save(update_fields=["queued_job_id"]) app.job_manager.cancel_job_by_id(queued_job_id, delete_job=True) pending_tasks.update(status=DicomTask.Status.CANCELED) diff --git a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html index b238ebb31..d233459ca 100644 --- a/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html +++ b/adit/mass_transfer/templates/mass_transfer/mass_transfer_job_detail.html @@ -50,7 +50,7 @@
Filters
{% if job.filters_json %} -
{{ job.filters_json_pretty }}
+
{{ job.filters_json_pretty }}
{% else %} — {% endif %} From ad6818b2bdd7bc3dfd0581c21ae1f99b50e0e964 Mon Sep 17 00:00:00 2001 From: Ritwik Date: Mon, 30 Mar 2026 14:02:16 +0200 Subject: [PATCH 097/103] Fix pyright error: queued_job_id type annotation allows None --- adit/core/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adit/core/models.py b/adit/core/models.py index 9135231e9..1a1e88d4c 100644 --- a/adit/core/models.py +++ b/adit/core/models.py @@ -396,7 +396,7 @@ class Status(models.TextChoices): job = models.ForeignKey(DicomJob, on_delete=models.CASCADE, related_name="tasks") source_id: int source = models.ForeignKey(DicomNode, related_name="+", on_delete=models.PROTECT) - queued_job_id: int + queued_job_id: int | None queued_job = models.OneToOneField( ProcrastinateJob, null=True, on_delete=models.SET_NULL, related_name="+" ) From 6e3a19181ff4e788d2c0ae5646ada4431b10eadb Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 30 Mar 2026 16:18:02 +0000 Subject: [PATCH 098/103] Add min_number_of_series_related_instances filter for mass transfer Add a new filter field that allows excluding series with fewer than a specified number of instances. Includes form validation (must be >= 1), processor filtering logic, and comprehensive tests for both. Co-Authored-By: Claude Opus 4.6 --- adit/mass_transfer/forms.py | 10 +++- adit/mass_transfer/processors.py | 14 +++++ adit/mass_transfer/tests/test_forms.py | 36 +++++++++++++ adit/mass_transfer/tests/test_processor.py | 60 ++++++++++++++++++++++ 4 files changed, 119 insertions(+), 1 deletion(-) diff --git a/adit/mass_transfer/forms.py b/adit/mass_transfer/forms.py index d81d7b8f9..9108d672b 100644 --- a/adit/mass_transfer/forms.py +++ b/adit/mass_transfer/forms.py @@ -31,6 +31,7 @@ class FilterSchema(BaseModel): series_number: int | None = None min_age: Annotated[int, "non-negative"] | None = None max_age: Annotated[int, "non-negative"] | None = None + min_number_of_series_related_instances: int | None = None model_config = {"extra": "forbid"} @@ -40,6 +41,11 @@ def check_age_range(self): raise ValueError("min_age must be non-negative") if self.max_age is not None and self.max_age < 0: raise ValueError("max_age must be non-negative") + if ( + self.min_number_of_series_related_instances is not None + and self.min_number_of_series_related_instances < 1 + ): + raise ValueError("min_number_of_series_related_instances must be >= 1") if ( self.min_age is not None and self.max_age is not None @@ -60,6 +66,7 @@ def check_age_range(self): "apply_institution_on_study": True, "min_age": 18, "max_age": 90, + "min_number_of_series_related_instances": None, } ], indent=2, @@ -75,7 +82,8 @@ class MassTransferJobForm(forms.ModelForm): "A JSON array of filter objects. Each filter can have: " "modality, institution_name, apply_institution_on_study, " "study_description, series_description, series_number, " - "min_age, max_age. A series matching ANY filter is included." + "min_age, max_age, min_number_of_series_related_instances. " + "A series matching ANY filter is included." ), ) diff --git a/adit/mass_transfer/processors.py b/adit/mass_transfer/processors.py index dcebdeaba..d546e9803 100644 --- a/adit/mass_transfer/processors.py +++ b/adit/mass_transfer/processors.py @@ -48,6 +48,7 @@ class FilterSpec: series_number: int | None = None min_age: int | None = None max_age: int | None = None + min_number_of_series_related_instances: int | None = None @classmethod def from_dict(cls, d: dict) -> "FilterSpec": @@ -60,6 +61,9 @@ def from_dict(cls, d: dict) -> "FilterSpec": series_number=d.get("series_number"), min_age=d.get("min_age"), max_age=d.get("max_age"), + min_number_of_series_related_instances=d.get( + "min_number_of_series_related_instances" + ), ) @@ -830,6 +834,16 @@ def _discover_series( except (TypeError, ValueError): continue + if mf.min_number_of_series_related_instances is not None: + num_instances = ( + _parse_int( + series.get("NumberOfSeriesRelatedInstances"), default=0 + ) + or 0 + ) + if num_instances < mf.min_number_of_series_related_instances: + continue + if series_uid in found: continue diff --git a/adit/mass_transfer/tests/test_forms.py b/adit/mass_transfer/tests/test_forms.py index fcca4808a..d7db6f434 100644 --- a/adit/mass_transfer/tests/test_forms.py +++ b/adit/mass_transfer/tests/test_forms.py @@ -327,6 +327,42 @@ def test_clean_source_rejects_folder(): assert "source" in form.errors +@pytest.mark.django_db +def test_clean_filters_json_min_instances_valid(form_env): + form = _make_form( + form_env, + filters_json=json.dumps([{"modality": "CT", "min_number_of_series_related_instances": 5}]), + ) + assert form.is_valid(), form.errors + + +@pytest.mark.django_db +def test_clean_filters_json_min_instances_zero_rejected(form_env): + form = _make_form( + form_env, + filters_json=json.dumps([{"modality": "CT", "min_number_of_series_related_instances": 0}]), + ) + assert not form.is_valid() + assert "filters_json" in form.errors + + +@pytest.mark.django_db +def test_clean_filters_json_min_instances_null_accepted(form_env): + form = _make_form( + form_env, + filters_json=json.dumps( + [{"modality": "CT", "min_number_of_series_related_instances": None}] + ), + ) + assert form.is_valid(), form.errors + + +@pytest.mark.django_db +def test_clean_filters_json_min_instances_omitted_accepted(form_env): + form = _make_form(form_env, filters_json=json.dumps([{"modality": "CT"}])) + assert form.is_valid(), form.errors + + @pytest.mark.django_db def test_clean_source_rejects_unauthorized_server(): user = UserFactory.create() diff --git a/adit/mass_transfer/tests/test_processor.py b/adit/mass_transfer/tests/test_processor.py index 7f660e1f4..8af97febf 100644 --- a/adit/mass_transfer/tests/test_processor.py +++ b/adit/mass_transfer/tests/test_processor.py @@ -101,6 +101,9 @@ def _make_filter(**kwargs) -> FilterSpec: series_number=kwargs.get("series_number", None), min_age=kwargs.get("min_age", None), max_age=kwargs.get("max_age", None), + min_number_of_series_related_instances=kwargs.get( + "min_number_of_series_related_instances", None + ), ) @@ -445,6 +448,51 @@ def test_discover_series_filters_by_series_description(mocker: MockerFixture): assert result[0].series_instance_uid == "1.2.3.401" +def test_discover_series_filters_by_min_instances(mocker: MockerFixture): + processor = _make_processor(mocker) + processor.mass_task.partition_start = datetime(2024, 1, 1, 0, 0) + processor.mass_task.partition_end = datetime(2024, 1, 1, 23, 59, 59) + + operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=200) + + study = _make_study("1.2.3.100") + study.dataset.ModalitiesInStudy = ["CT"] + operator.find_studies.return_value = [study] + + big_series = _make_series_result("1.2.3.501", num_images=10) + small_series = _make_series_result("1.2.3.502", num_images=2) + operator.find_series.return_value = [big_series, small_series] + + filters = [_make_filter(modality="CT", min_number_of_series_related_instances=5)] + result = processor._discover_series(operator, filters) + + assert len(result) == 1 + assert result[0].series_instance_uid == "1.2.3.501" + + +def test_discover_series_no_min_instances_filter_includes_all(mocker: MockerFixture): + processor = _make_processor(mocker) + processor.mass_task.partition_start = datetime(2024, 1, 1, 0, 0) + processor.mass_task.partition_end = datetime(2024, 1, 1, 23, 59, 59) + + operator = mocker.create_autospec(DicomOperator) + operator.server = mocker.MagicMock(max_search_results=200) + + study = _make_study("1.2.3.100") + study.dataset.ModalitiesInStudy = ["CT"] + operator.find_studies.return_value = [study] + + big_series = _make_series_result("1.2.3.501", num_images=10) + small_series = _make_series_result("1.2.3.502", num_images=2) + operator.find_series.return_value = [big_series, small_series] + + filters = [_make_filter(modality="CT")] # no min_number_of_series_related_instances + result = processor._discover_series(operator, filters) + + assert len(result) == 2 + + # --------------------------------------------------------------------------- # process() tests — mocked environment # --------------------------------------------------------------------------- @@ -1356,6 +1404,18 @@ def test_filter_spec_from_dict(): assert fs.apply_institution_on_study is True +def test_filter_spec_from_dict_with_min_instances(): + d = {"modality": "CT", "min_number_of_series_related_instances": 5} + fs = FilterSpec.from_dict(d) + assert fs.min_number_of_series_related_instances == 5 + + +def test_filter_spec_from_dict_without_min_instances(): + d = {"modality": "CT"} + fs = FilterSpec.from_dict(d) + assert fs.min_number_of_series_related_instances is None + + # --------------------------------------------------------------------------- # DICOM metadata tests # --------------------------------------------------------------------------- From 409053b76be76e4b74876917cbcf7c08ecbf5f5a Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 30 Mar 2026 16:28:07 +0000 Subject: [PATCH 099/103] Add --min-series-instances arg to csv_to_mass_transfer_filters script Co-Authored-By: Claude Opus 4.6 --- scripts/csv_to_mass_transfer_filters.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/scripts/csv_to_mass_transfer_filters.py b/scripts/csv_to_mass_transfer_filters.py index 6fd8d7fb0..b11f82ceb 100644 --- a/scripts/csv_to_mass_transfer_filters.py +++ b/scripts/csv_to_mass_transfer_filters.py @@ -9,6 +9,7 @@ python scripts/csv_to_mass_transfer_filters.py filters.csv python scripts/csv_to_mass_transfer_filters.py filters.csv --min-age 18 python scripts/csv_to_mass_transfer_filters.py filters.csv --min-age 18 --max-age 90 + python scripts/csv_to_mass_transfer_filters.py filters.csv --min-series-instances 5 python scripts/csv_to_mass_transfer_filters.py filters.csv -o output.json """ @@ -47,6 +48,12 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: default=None, help="Set a constant max_age for every filter", ) + parser.add_argument( + "--min-series-instances", + type=int, + default=None, + help="Set a constant min_number_of_series_related_instances for every filter", + ) return parser.parse_args(argv) @@ -55,6 +62,7 @@ def csv_to_filters( *, min_age: int | None = None, max_age: int | None = None, + min_number_of_series_related_instances: int | None = None, ) -> list[dict]: with csv_path.open(newline="", encoding="utf-8-sig") as f: reader = csv.DictReader(f) @@ -83,6 +91,10 @@ def csv_to_filters( entry["min_age"] = min_age if max_age is not None: entry["max_age"] = max_age + if min_number_of_series_related_instances is not None: + entry["min_number_of_series_related_instances"] = ( + min_number_of_series_related_instances + ) if not entry: print(f"Warning: skipping empty row {row_num}", file=sys.stderr) @@ -111,8 +123,15 @@ def main(argv: list[str] | None = None) -> None: raise SystemExit( f"Error: --min-age ({args.min_age}) cannot exceed --max-age ({args.max_age})" ) - - filters = csv_to_filters(args.csv_file, min_age=args.min_age, max_age=args.max_age) + if args.min_series_instances is not None and args.min_series_instances < 1: + raise SystemExit("Error: --min-series-instances must be at least 1") + + filters = csv_to_filters( + args.csv_file, + min_age=args.min_age, + max_age=args.max_age, + min_number_of_series_related_instances=args.min_series_instances, + ) if not filters: raise SystemExit("Error: no valid filter rows found in CSV.") From 50adf2ccf0b3c9cdb10e05d39a0364d79d1baeae Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Mon, 30 Mar 2026 16:40:06 +0000 Subject: [PATCH 100/103] Add --delimiter arg to csv_to_mass_transfer_filters script Allows parsing semicolon-delimited CSVs (common in European Excel exports) by passing e.g. --delimiter ";". Defaults to comma for backwards compatibility. Co-Authored-By: Claude Opus 4.6 --- scripts/csv_to_mass_transfer_filters.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/csv_to_mass_transfer_filters.py b/scripts/csv_to_mass_transfer_filters.py index b11f82ceb..3b395f2dc 100644 --- a/scripts/csv_to_mass_transfer_filters.py +++ b/scripts/csv_to_mass_transfer_filters.py @@ -7,6 +7,7 @@ Usage examples: python scripts/csv_to_mass_transfer_filters.py filters.csv + python scripts/csv_to_mass_transfer_filters.py filters.csv --delimiter ";" python scripts/csv_to_mass_transfer_filters.py filters.csv --min-age 18 python scripts/csv_to_mass_transfer_filters.py filters.csv --min-age 18 --max-age 90 python scripts/csv_to_mass_transfer_filters.py filters.csv --min-series-instances 5 @@ -54,18 +55,25 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace: default=None, help="Set a constant min_number_of_series_related_instances for every filter", ) + parser.add_argument( + "-d", + "--delimiter", + default=",", + help="CSV column delimiter (default: ',')", + ) return parser.parse_args(argv) def csv_to_filters( csv_path: Path, *, + delimiter: str = ",", min_age: int | None = None, max_age: int | None = None, min_number_of_series_related_instances: int | None = None, ) -> list[dict]: with csv_path.open(newline="", encoding="utf-8-sig") as f: - reader = csv.DictReader(f) + reader = csv.DictReader(f, delimiter=delimiter) if reader.fieldnames is None: raise SystemExit(f"Error: {csv_path} appears to be empty or has no header row.") @@ -128,6 +136,7 @@ def main(argv: list[str] | None = None) -> None: filters = csv_to_filters( args.csv_file, + delimiter=args.delimiter, min_age=args.min_age, max_age=args.max_age, min_number_of_series_related_instances=args.min_series_instances, From 962904d8f37a11be2ed65671a4f3ffd6b6780f0c Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Tue, 31 Mar 2026 10:22:07 +0000 Subject: [PATCH 101/103] Add MASS_TRANSFER_WORKER_REPLICAS to example.env Co-Authored-By: Claude Opus 4.6 --- example.env | 1 + 1 file changed, 1 insertion(+) diff --git a/example.env b/example.env index e17158350..010c838e2 100644 --- a/example.env +++ b/example.env @@ -92,6 +92,7 @@ EXCLUDE_MODALITIES="PR,SR" # Replicas of the services that can be scaled (production only). WEB_REPLICAS=5 DICOM_WORKER_REPLICAS=3 +MASS_TRANSFER_WORKER_REPLICAS=5 # The directory where download folders are mounted. MOUNT_DIR="/mnt" From c93cf1f64fbca6c61d3059e0c692d55fb1a7679a Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Tue, 31 Mar 2026 10:24:35 +0000 Subject: [PATCH 102/103] Remove STAGING_DEPLOYMENT.md Co-Authored-By: Claude Opus 4.6 --- STAGING_DEPLOYMENT.md | 389 ------------------------------------------ 1 file changed, 389 deletions(-) delete mode 100644 STAGING_DEPLOYMENT.md diff --git a/STAGING_DEPLOYMENT.md b/STAGING_DEPLOYMENT.md deleted file mode 100644 index 54b3ce0d5..000000000 --- a/STAGING_DEPLOYMENT.md +++ /dev/null @@ -1,389 +0,0 @@ -# Staging Deployment - Dev Mode on Docker Swarm - -## What is Staging? - -Staging is **literally your dev environment running on Docker Swarm**. It's useful for: - -- ✅ Testing worker scaling (multiple mass_transfer_worker replicas) -- ✅ Testing Swarm deployment before production -- ✅ Testing auto-restart and self-healing features -- ✅ **Same as dev in every other way**: localhost, DEBUG=True, debug toolbar, plain HTTP - -## Key Differences from Dev - -| Feature | Dev (`compose up`) | Staging (Swarm) | -|---------|-------------------|-----------------| -| **Django settings** | development | development ✓ | -| **Access URL** | http://localhost:8000 | http://localhost:8001 | -| **DICOM receiver** | Port 11122 | Port 11123 | -| **PostgreSQL** | Port 5432 | Port 5433 | -| **Debug toolbar** | ✓ Yes | ✓ Yes | -| **SSL/HTTPS** | ✗ Plain HTTP | ✗ Plain HTTP ✓ | -| **Email** | Console logs | Console logs ✓ | -| **Worker scaling** | `--scale` flag | `deploy.replicas` | -| **Auto-restart** | Manual | Automatic | -| **Database password** | postgres | postgres ✓ | - -**Summary**: Staging = Dev + Swarm orchestration + Different ports - -## Quick Start - -### 1. Use Your Existing .env - -```bash -# Just use your existing .env - it already has staging ports defined! -# No changes needed! - -# Staging uses these ports from example.env: -# WEB_STAGING_PORT=8001 -# POSTGRES_STAGING_PORT=5433 -# RECEIVER_STAGING_PORT=11123 -# etc. -``` - -### 2. Deploy to Swarm - -```bash -# Initialize swarm (one-time setup) -docker swarm init - -# Deploy staging stack (uses .env automatically) -docker stack deploy \ - -c docker-compose.base.yml \ - -c docker-compose.staging.yml \ - adit-staging -``` - -### 3. Access Staging - -```bash -# Different port from dev! -http://localhost:8001 -``` - -## Scaling Workers - -This is the **main reason** to use staging instead of regular dev. - -### Default Replicas - -Staging starts with these replicas (defined in `docker-compose.staging.yml`): -- Web: 1 -- Default worker: 1 -- DICOM worker: 1 -- **Mass transfer worker: 3** (to test scaling) - -### Check Current Replicas - -```bash -docker service ls - -# NAME REPLICAS -# adit-staging_mass_transfer_worker 3/3 -# adit-staging_dicom_worker 1/1 -# adit-staging_web 1/1 -``` - -### Scale Workers Dynamically - -```bash -# Scale mass_transfer workers to 5 -docker service scale adit-staging_mass_transfer_worker=5 - -# Scale down to 1 -docker service scale adit-staging_mass_transfer_worker=1 - -# Verify -docker service ps adit-staging_mass_transfer_worker -``` - -### Change Default Replicas - -Edit `docker-compose.staging.yml` to change defaults: - -```yaml -mass_transfer_worker: - deploy: - replicas: 10 # Start with 10 workers -``` - -Then redeploy: -```bash -docker stack deploy -c docker-compose.base.yml -c docker-compose.staging.yml adit-staging -``` - -## Monitoring - -### View Logs - -```bash -# Web server logs -docker service logs -f adit-staging_web - -# Mass transfer worker logs (all 5 replicas) -docker service logs -f adit-staging_mass_transfer_worker - -# Single worker logs -docker logs -``` - -### Check Service Status - -```bash -# List all services -docker service ls - -# Detailed info on a service -docker service ps adit-staging_mass_transfer_worker - -# See which containers are running -docker ps | grep adit-staging -``` - -### Shell Access - -```bash -# Get shell in web container -docker exec -it $(docker ps -q -f name=adit-staging_web) bash - -# Run Django commands -docker exec $(docker ps -q -f name=adit-staging_web) \ - python manage.py shell -``` - -## Testing Worker Scaling - -### Create Test Jobs - -```bash -# Access Django shell -docker exec -it $(docker ps -q -f name=adit-staging_web) bash -./manage.py shell -``` - -```python -# Create a bunch of mass transfer tasks -from adit.batch_transfer.models import BatchTransferJob - -# Create test job -job = BatchTransferJob.objects.create(...) - -# Now scale workers and watch them process tasks -``` - -### Watch Workers Process Tasks - -```bash -# Scale to 5 workers -docker service scale adit-staging_mass_transfer_worker=5 - -# Watch logs from all 5 workers -docker service logs -f adit-staging_mass_transfer_worker - -# You'll see all 5 workers processing tasks concurrently! -``` - -## Running All Three Environments Simultaneously - -✅ **All three can run at the same time!** They use different ports: - -### Port Allocation - -| Service | Dev | Staging | Production | -|---------|-----|---------|------------| -| **Web UI** | 8000 | 8001 | 80, 443 | -| **PostgreSQL** | 5432 | 5433 | (internal) | -| **DICOM Receiver** | 11122 | 11123 | 11112 | -| **Orthanc1** | 7501 | 7503 | (internal) | -| **Orthanc2** | 7502 | 7504 | (internal) | - -### Run All Three Together - -```bash -# 1. Start dev (docker compose) -uv run cli compose-up - -# 2. Start production (docker swarm) -docker swarm init # if not already in swarm mode -export $(cat .env | grep -v '^#' | xargs) -docker stack deploy -c docker-compose.base.yml -c docker-compose.prod.yml adit-prod - -# 3. Start staging (docker swarm - uses same .env) -docker stack deploy -c docker-compose.base.yml -c docker-compose.staging.yml adit-staging - -# Now all three are running! -``` - -### Access Each Environment - -```bash -# Development -http://localhost:8000 # Web UI -# PostgreSQL: localhost:5432 -# DICOM: localhost:11122 - -# Staging -http://localhost:8001 # Web UI -# PostgreSQL: localhost:5433 -# DICOM: localhost:11123 - -# Production -http://localhost:80 # or your domain -https://localhost:443 # with SSL -# DICOM: localhost:11112 -``` - -### Why Run All Three? - -- **Dev**: Active development with hot reload -- **Staging**: Test worker scaling before pushing to production -- **Production**: Serve real users - -Each has its own database, so changes in dev/staging don't affect production. - -## Cleanup - -### Remove Staging Stack - -```bash -# Remove all services -docker stack rm adit-staging - -# Wait 30 seconds for graceful shutdown -sleep 30 - -# Check they're gone -docker service ls -``` - -### Remove Volumes (WARNING: deletes data!) - -```bash -# List volumes -docker volume ls | grep adit-staging - -# Remove (careful!) -docker volume prune -``` - -### Leave Swarm Mode - -```bash -# If you want to go back to regular docker compose -docker swarm leave --force -``` - -## Troubleshooting - -### Port Already in Use - -```bash -# Dev is probably still running -uv run cli compose-down - -# Or use different ports in .env.staging -``` - -### Workers Not Scaling - -```bash -# Check service status -docker service ps adit-staging_mass_transfer_worker - -# Common issue: not enough resources -# Swarm won't schedule if host is resource-constrained -``` - -### Can't Access localhost:8000 - -```bash -# Check if web service is running -docker service ps adit-staging_web - -# Check logs -docker service logs adit-staging_web - -# Verify port mapping -docker service inspect adit-staging_web --pretty -``` - -### Database Connection Issues - -```bash -# Each service needs time to start -# Check postgres is ready: -docker service ps adit-staging_postgres - -# Check logs for "database system is ready" -docker service logs adit-staging_postgres -``` - -## Comparison: Dev vs Staging vs Production - -``` -┌──────────────────────────────────────────────────────────────┐ -│ Development (docker compose up) │ -├──────────────────────────────────────────────────────────────┤ -│ Command: uv run cli compose-up -- --scale worker=3 │ -│ URL: http://localhost:8000 │ -│ Ports: 8000, 5432, 11122, 7501, 7502 │ -│ Settings: DEBUG=True, debug toolbar, console email │ -│ Workers: Manual scaling with --scale flag │ -│ Restart: Manual (container stops if it crashes) │ -│ Use for: Local development │ -└──────────────────────────────────────────────────────────────┘ - -┌──────────────────────────────────────────────────────────────┐ -│ Staging (docker swarm - dev settings) │ -├──────────────────────────────────────────────────────────────┤ -│ Command: docker stack deploy ... adit-staging │ -│ URL: http://localhost:8001 │ -│ Ports: 8001, 5433, 11123, 7503, 7504 │ -│ Settings: DEBUG=True, debug toolbar, console email │ -│ Workers: Swarm replicas, auto-scale, auto-restart │ -│ Restart: Automatic (Swarm reschedules failed containers) │ -│ Use for: Testing worker scaling, Swarm deployment │ -└──────────────────────────────────────────────────────────────┘ - -┌──────────────────────────────────────────────────────────────┐ -│ Production (docker swarm - production settings) │ -├──────────────────────────────────────────────────────────────┤ -│ Command: docker stack deploy ... adit-prod │ -│ URL: https://adit.example.com │ -│ Ports: 80, 443, 11112 │ -│ Settings: DEBUG=False, real SSL, SMTP email │ -│ Workers: Swarm replicas, multi-node, auto-restart │ -│ Restart: Automatic across multiple servers │ -│ Use for: Real clinical use │ -└──────────────────────────────────────────────────────────────┘ -``` - -## Summary - -**Staging is dev mode on Swarm with different ports** - that's it! - -```bash -# Just use your existing .env (ports already defined in example.env) -# No .env.staging needed! - -# Deploy to Swarm -docker swarm init -docker stack deploy -c docker-compose.base.yml -c docker-compose.staging.yml adit-staging - -# Starts with 3 mass_transfer_workers by default -# Scale if needed: -docker service scale adit-staging_mass_transfer_worker=5 - -# Access on different port from dev -http://localhost:8001 - -# Cleanup -docker stack rm adit-staging -``` - -**Port allocation** (all three can run simultaneously): -- Dev: 8000, 5432, 11122, 7501, 7502 -- Staging: 8001, 5433, 11123, 7503, 7504 -- Production: 80, 443, 11112 - -**No .env.staging, no SSL, no special config** - just use your .env and deploy! From 1279ecf71f2c229c34498ac64f1b8c12be91af4b Mon Sep 17 00:00:00 2001 From: Samuel Kwong Date: Thu, 2 Apr 2026 12:33:08 +0000 Subject: [PATCH 103/103] Add mass transfer worker to observability config example Co-Authored-By: Claude Opus 4.6 --- docker-compose.override.yml.example | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.override.yml.example b/docker-compose.override.yml.example index a129db932..7336fd80b 100644 --- a/docker-compose.override.yml.example +++ b/docker-compose.override.yml.example @@ -19,5 +19,7 @@ services: <<: *observability dicom_worker: <<: *observability + mass_transfer_worker: + <<: *observability receiver: <<: *observability