Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,14 @@
- `jupyter_kernel_info` is now optional in `RecordProperties`; workflow configs without a notebook URL no longer require this field.
- Removed redundant `hasattr` guard for `_last_generator` in `Publisher.publish()`.
- Added automated MkDocs GitHub Pages deployment on release via a dedicated `docs.yml` workflow.
- `osc_themes` values are now automatically lowercased, so `'Land'` and `'LAND'` are treated the same as `'land'`, preventing theme validation failures.
- `collection_id` is now validated to contain no spaces; a clear error is raised with a hint to use hyphens instead.
- `license_type` (dataset) and `properties.license` (workflow) are now mandatory fields; publishing fails immediately with a descriptive error if either is missing.
- Variable catalog `description` now falls back to the title-cased variable ID when neither `description` nor `long_name` attrs are present on the zarr variable, preventing `null` description validation failures.
- Pull requests opened by deep-code now include a "Generated with deep-code" note in the PR description.
- `stac_catalog_s3_root` is now a mandatory field in the dataset config; publishing fails immediately with a descriptive error if it is absent.
- STAC catalog links in the OSC collection now follow the OSC convention: a `via` link to the STAC browser URL and a `child` link to the direct HTTPS catalog URL. The `s3://` URL is converted to HTTPS (AWS virtual-hosted style) to satisfy the `uri-reference` format check in the OSC products schema.
- Added optional `visualisation_link` field to the dataset config; when provided, a `visualisation` link with title `"Dataset visualisation"` is added to the generated OSC collection.
- Added optional `description` field to the dataset config; overrides the `description` attribute from the Zarr store when set.
- Added optional `osc_project_title` field to the dataset config to correctly set the project link title (e.g. `"DeepESDL"`) instead of deriving it from the project ID.
- Fixed `workflow_id` not being normalised (slugified) when stored on `Publisher`, causing spaces in experiment link hrefs and failing `uri-reference` format validation.
204 changes: 192 additions & 12 deletions deep_code/tests/tools/test_publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,22 @@ def test_environment_repo_selection(self, mock_gp):
== "open-science-catalog-metadata-testing"
)

@patch.object(Publisher, "_write_stac_catalog_to_s3")
@patch.object(Publisher, "publish_dataset", return_value={"a": {}})
@patch.object(
Publisher, "generate_workflow_experiment_records", return_value={"b": {}}
)
def test_publish_mode_routing(self, mock_wf, mock_ds):
def test_publish_mode_routing(self, mock_wf, mock_ds, mock_s3):
mock_generator = MagicMock()
mock_generator.build_zarr_stac_catalog_file_dict.return_value = {}
self.publisher._last_generator = mock_generator
self.publisher.dataset_config = {
"stac_catalog_s3_root": "s3://bucket/stac/",
"collection_id": "test-collection",
"dataset_id": "test-dataset",
}
self.publisher.gh_publisher.publish_files.return_value = "PR_URL"

# dataset only
self.publisher.publish(write_to_file=True, mode="dataset")
mock_ds.assert_called()
Expand All @@ -142,28 +153,41 @@ def test_publish_mode_routing(self, mock_wf, mock_ds):
mock_ds.assert_not_called()
mock_wf.assert_called()

@patch.object(Publisher, "_write_stac_catalog_to_s3")
@patch.object(Publisher, "generate_workflow_experiment_records", return_value={})
@patch.object(Publisher, "publish_dataset", return_value={})
def test_publish_nothing_to_publish_raises(
self, mock_publish_dataset, mock_generate_workflow_experiment_records
self, mock_publish_dataset, mock_generate_workflow_experiment_records, mock_s3
):
mock_generator = MagicMock()
mock_generator.build_zarr_stac_catalog_file_dict.return_value = {}
self.publisher._last_generator = mock_generator
self.publisher.dataset_config = {"stac_catalog_s3_root": "s3://bucket/stac/"}

with pytest.raises(ValueError):
self.publisher.publish(write_to_file=False, mode="dataset")
mock_publish_dataset.assert_called_once()
mock_generate_workflow_experiment_records.assert_not_called()

@patch.object(Publisher, "_write_stac_catalog_to_s3")
@patch.object(Publisher, "publish_dataset", return_value={"x": {}})
@patch.object(
Publisher, "generate_workflow_experiment_records", return_value={"y": {}}
)
def test_publish_builds_pr_params(self, mock_wf, mock_ds):
def test_publish_builds_pr_params(self, mock_wf, mock_ds, mock_s3):
# Make PR creation return a fixed URL
self.publisher.gh_publisher.publish_files.return_value = "PR_URL"

# Provide IDs for commit/PR labels
self.publisher.collection_id = "col"
self.publisher.workflow_id = "wf"

# _last_generator is set by publish_dataset; since that's mocked, stub it
mock_generator = MagicMock()
mock_generator.build_zarr_stac_catalog_file_dict.return_value = {}
self.publisher._last_generator = mock_generator
self.publisher.dataset_config = {"stac_catalog_s3_root": "s3://bucket/stac/"}

url = self.publisher.publish(write_to_file=False, mode="all")
assert url == "PR_URL"

Expand Down Expand Up @@ -309,6 +333,7 @@ def test_publish_dataset_creates_project_collection_when_missing(
"dataset_id": "test-dataset",
"collection_id": "test-collection",
"license_type": "CC-BY-4.0",
"stac_catalog_s3_root": "s3://bucket/stac/test-collection/",
}
self.publisher.collection_id = "test-collection"

Expand Down Expand Up @@ -343,6 +368,7 @@ def test_publish_dataset_updates_project_collection_when_exists(
"dataset_id": "test-dataset",
"collection_id": "test-collection",
"license_type": "CC-BY-4.0",
"stac_catalog_s3_root": "s3://bucket/stac/test-collection/",
}
self.publisher.collection_id = "test-collection"

Expand All @@ -359,20 +385,174 @@ def test_publish_dataset_updates_project_collection_when_exists(
update_methods = [call.args[2] for call in mock_update.call_args_list]
self.assertIn(mock_gen.update_deepesdl_collection, update_methods)

@patch.object(Publisher, "publish_dataset", return_value={"github_file.json": {}})
def test_publish_skips_zarr_stac_when_not_configured(self, mock_publish_ds):
# No stac_catalog_s3_root in config
def test_publish_dataset_raises_when_stac_root_missing(self):
self.publisher.dataset_config = {
"collection_id": "test-collection",
"dataset_id": "test-dataset",
"license_type": "CC-BY-4.0",
}
self.publisher.gh_publisher.publish_files.return_value = "PR_URL"
with pytest.raises(ValueError, match="stac_catalog_s3_root"):
self.publisher.publish_dataset(write_to_file=False)

def test_publish_dataset_raises_when_no_dataset_config(self):
self.publisher.dataset_config = None
with pytest.raises(ValueError, match="No dataset config"):
self.publisher.publish_dataset(write_to_file=False)

def test_publish_dataset_raises_when_ids_missing(self):
self.publisher.dataset_config = {"collection_id": "", "dataset_id": ""}
with pytest.raises(ValueError, match="Dataset ID or Collection ID missing"):
self.publisher.publish_dataset(write_to_file=False)

def test_publish_dataset_raises_when_license_missing(self):
self.publisher.dataset_config = {
"collection_id": "test-collection",
"dataset_id": "test-dataset",
}
with pytest.raises(ValueError, match="license_type is required"):
self.publisher.publish_dataset(write_to_file=False)

with patch.object(
self.publisher, "_write_stac_catalog_to_s3"
) as mock_write:
self.publisher.publish(mode="dataset")
mock_write.assert_not_called()
def test_write_to_file_serializes_dict(self):
import json
import os
import tempfile

with tempfile.NamedTemporaryFile(delete=False, suffix=".json") as f:
path = f.name
try:
Publisher._write_to_file(path, {"a": 1})
with open(path) as f:
result = json.load(f)
self.assertEqual(result, {"a": 1})
finally:
os.unlink(path)

def test_update_and_add_to_file_dict(self):
file_dict = {}
self.publisher.gh_publisher.github_automation.local_clone_dir = "/tmp"
update_method = MagicMock(return_value={"key": "value"})
self.publisher._update_and_add_to_file_dict(file_dict, "some/catalog.json", update_method)
update_method.assert_called_once()
assert any("some/catalog.json" in str(k) for k in file_dict)

def test_update_variable_catalogs_creates_new_when_missing(self):
mock_gen = MagicMock()
mock_gen.variables_metadata = {"var1": {"variable_id": "var1"}}
mock_gen.build_variable_catalog.return_value.to_dict.return_value = {"id": "var1"}
self.publisher.gh_publisher.github_automation.file_exists.return_value = False

file_dict = {}
self.publisher._update_variable_catalogs(mock_gen, file_dict, ["var1"])

mock_gen.build_variable_catalog.assert_called_once()
assert "variables/var1/catalog.json" in file_dict

def test_update_variable_catalogs_updates_existing(self):
mock_gen = MagicMock()
self.publisher.gh_publisher.github_automation.file_exists.return_value = True
self.publisher.gh_publisher.github_automation.local_clone_dir = "/tmp"
mock_gen.update_existing_variable_catalog.return_value = {"id": "var1"}

file_dict = {}
self.publisher._update_variable_catalogs(mock_gen, file_dict, ["var1"])

mock_gen.update_existing_variable_catalog.assert_called_once()
assert "variables/var1/catalog.json" in file_dict

# ------------------------------------------------------------------
# generate_workflow_experiment_records
# ------------------------------------------------------------------

def _setup_workflow_mocks(self):
"""Patch all internals of generate_workflow_experiment_records."""
mock_rg = MagicMock()
mock_props = MagicMock()
mock_props.jupyter_kernel_info.to_dict.return_value = {}
mock_rg.build_record_properties.return_value = mock_props

mock_wf_record = MagicMock()
mock_wf_record.to_dict.return_value = {"id": "wf", "properties": {}}

mock_exp_record = MagicMock()
mock_exp_record.to_dict.return_value = {
"id": "wf",
"properties": {},
"jupyter_notebook_url": "url",
"collection_id": "col",
}
return mock_rg, mock_props, mock_wf_record, mock_exp_record

@patch("deep_code.tools.publish.WorkflowAsOgcRecord")
@patch("deep_code.tools.publish.LinksBuilder")
@patch("deep_code.tools.publish.OSCWorkflowOGCApiRecordGenerator")
def test_generate_workflow_records_mode_workflow(self, MockRG, MockLinks, MockWF):
mock_rg, mock_props, mock_wf_record, _ = self._setup_workflow_mocks()
MockRG.return_value = mock_rg
MockWF.return_value = mock_wf_record

self.publisher.workflow_config = {
"workflow_id": "my-workflow",
"properties": {"title": "My WF", "license": "CC-BY-4.0"},
}
with patch.object(self.publisher, "_update_base_catalog", return_value={}):
result = self.publisher.generate_workflow_experiment_records(
write_to_file=False, mode="workflow"
)

self.assertIn("workflows/my-workflow/record.json", result)
self.assertIn("workflows/catalog.json", result)
self.assertNotIn("experiments/catalog.json", result)

@patch("deep_code.tools.publish.ExperimentAsOgcRecord")
@patch("deep_code.tools.publish.WorkflowAsOgcRecord")
@patch("deep_code.tools.publish.LinksBuilder")
@patch("deep_code.tools.publish.OSCWorkflowOGCApiRecordGenerator")
def test_generate_workflow_records_mode_all(self, MockRG, MockLinks, MockWF, MockExp):
mock_rg, mock_props, mock_wf_record, mock_exp_record = self._setup_workflow_mocks()
MockRG.return_value = mock_rg
MockWF.return_value = mock_wf_record
MockExp.return_value = mock_exp_record

self.publisher.workflow_config = {
"workflow_id": "my-workflow",
"properties": {"title": "My WF", "license": "CC-BY-4.0"},
}
self.publisher.collection_id = "my-collection"
with patch.object(self.publisher, "_update_base_catalog", return_value={}):
result = self.publisher.generate_workflow_experiment_records(
write_to_file=False, mode="all"
)

self.assertIn("workflows/my-workflow/record.json", result)
self.assertIn("workflows/catalog.json", result)
self.assertIn("experiments/catalog.json", result)

@patch("deep_code.tools.publish.OSCWorkflowOGCApiRecordGenerator")
def test_generate_workflow_records_raises_when_workflow_id_missing(self, MockRG):
self.publisher.workflow_config = {
"properties": {"title": "My WF", "license": "CC-BY-4.0"},
}
with pytest.raises(ValueError, match="workflow_id is missing"):
self.publisher.generate_workflow_experiment_records(
write_to_file=False, mode="workflow"
)

@patch("deep_code.tools.publish.OSCWorkflowOGCApiRecordGenerator")
def test_generate_workflow_records_raises_when_license_missing(self, MockRG):
self.publisher.workflow_config = {
"workflow_id": "my-wf",
"properties": {"title": "My WF"},
}
with pytest.raises(ValueError, match="license is required"):
self.publisher.generate_workflow_experiment_records(
write_to_file=False, mode="workflow"
)

def test_generate_workflow_records_returns_empty_for_dataset_mode(self):
result = self.publisher.generate_workflow_experiment_records(
write_to_file=False, mode="dataset"
)
self.assertEqual(result, {})


class TestParseGithubNotebookUrl:
Expand Down
Loading
Loading