From ff3bfd787659f4fe1d499c97462df1626f77534f Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 5 Feb 2025 12:42:17 +0100 Subject: [PATCH 1/5] introduced DataTreeNode; added _linter.validate._visit_datatree_node(); added rule.RuleOp.validate_datatree() --- tests/_linter/test_rulectx.py | 6 +- tests/cli/test_main.py | 3 + .../plugins/core/rules/test_access_latency.py | 8 +- tests/test_linter.py | 22 +++--- xrlint/_linter/apply.py | 73 ++++++++++++++----- xrlint/_linter/rulectx.py | 32 ++++++-- xrlint/_linter/validate.py | 24 +++--- xrlint/constants.py | 4 +- xrlint/linter.py | 11 +-- xrlint/node.py | 14 ++++ xrlint/rule.py | 13 +++- 11 files changed, 153 insertions(+), 57 deletions(-) diff --git a/tests/_linter/test_rulectx.py b/tests/_linter/test_rulectx.py index 58102ef..62f4a68 100644 --- a/tests/_linter/test_rulectx.py +++ b/tests/_linter/test_rulectx.py @@ -9,7 +9,7 @@ # noinspection PyProtectedMember from xrlint._linter.rulectx import RuleContextImpl from xrlint.config import ConfigObject -from xrlint.constants import NODE_ROOT_NAME +from xrlint.constants import DATASET_ROOT_NAME from xrlint.result import Message, Suggestion @@ -39,7 +39,7 @@ def test_report(self): [ Message( message="What the heck do you mean?", - node_path=NODE_ROOT_NAME, + node_path=DATASET_ROOT_NAME, rule_id="no-xxx", severity=2, suggestions=[ @@ -48,7 +48,7 @@ def test_report(self): ), Message( message="You said it.", - node_path=NODE_ROOT_NAME, + node_path=DATASET_ROOT_NAME, rule_id="no-xxx", severity=2, fatal=True, diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py index 4f2898c..f2d6c41 100644 --- a/tests/cli/test_main.py +++ b/tests/cli/test_main.py @@ -66,6 +66,9 @@ def xrlint(self, *args: tuple[str, ...]) -> click.testing.Result: runner = CliRunner() result = runner.invoke(main, args) if not isinstance(result.exception, SystemExit): + import traceback + + traceback.print_exception(result.exception) self.assertIsNone(result.exception) return result diff --git a/tests/plugins/core/rules/test_access_latency.py b/tests/plugins/core/rules/test_access_latency.py index 57b9231..12e2881 100644 --- a/tests/plugins/core/rules/test_access_latency.py +++ b/tests/plugins/core/rules/test_access_latency.py @@ -10,6 +10,7 @@ # noinspection PyProtectedMember from xrlint._linter.rulectx import RuleContextImpl from xrlint.config import ConfigObject +from xrlint.constants import DATASET_ROOT_NAME from xrlint.node import DatasetNode from xrlint.plugins.core.rules.access_latency import AccessLatency from xrlint.result import Message @@ -33,8 +34,9 @@ def invoke_op( access_latency=access_latency, ) node = DatasetNode( - path="dataset", parent=None, + path=DATASET_ROOT_NAME, + name=DATASET_ROOT_NAME, dataset=ctx.dataset, ) rule_op = ( @@ -59,7 +61,7 @@ def test_invalid(self): [ Message( message="Access latency exceeds threshold: 3.2 > 2.5 seconds.", - node_path="dataset", + node_path=DATASET_ROOT_NAME, severity=2, ) ], @@ -71,7 +73,7 @@ def test_invalid(self): [ Message( message="Access latency exceeds threshold: 0.2 > 0.1 seconds.", - node_path="dataset", + node_path=DATASET_ROOT_NAME, severity=2, ) ], diff --git a/tests/test_linter.py b/tests/test_linter.py index 752d0f1..9f4b899 100644 --- a/tests/test_linter.py +++ b/tests/test_linter.py @@ -8,7 +8,7 @@ import xarray as xr from xrlint.config import Config, ConfigObject -from xrlint.constants import CORE_PLUGIN_NAME, NODE_ROOT_NAME +from xrlint.constants import CORE_PLUGIN_NAME, DATASET_ROOT_NAME from xrlint.linter import Linter, new_linter from xrlint.node import AttrNode, AttrsNode, DatasetNode, VariableNode from xrlint.plugin import new_plugin @@ -179,7 +179,7 @@ def test_linter_respects_rule_severity_error(self): messages=[ Message( message="Dataset does not have data variables", - node_path="dataset", + node_path=DATASET_ROOT_NAME, rule_id="test/dataset-without-data-vars", severity=2, ) @@ -204,7 +204,7 @@ def test_linter_respects_rule_severity_warn(self): messages=[ Message( message="Dataset does not have data variables", - node_path="dataset", + node_path=DATASET_ROOT_NAME, rule_id="test/dataset-without-data-vars", severity=1, ) @@ -238,7 +238,7 @@ def test_linter_recognized_unknown_rule(self): Message( message="unknown rule 'test/dataset-is-fast'", rule_id="test/dataset-is-fast", - node_path=NODE_ROOT_NAME, + node_path=DATASET_ROOT_NAME, severity=2, fatal=True, ) @@ -294,13 +294,13 @@ def test_linter_real_life_scenario(self): messages=[ Message( message="Attribute name with space: 'created at'", - node_path="dataset.attrs['created at']", + node_path=f"{DATASET_ROOT_NAME}.attrs['created at']", rule_id="test/no-space-in-attr-name", severity=2, ), Message( message="Empty attributes", - node_path="dataset.data_vars['tsm'].attrs", + node_path=f"{DATASET_ROOT_NAME}.data_vars['tsm'].attrs", rule_id="test/no-empty-attrs", severity=1, ), @@ -310,7 +310,7 @@ def test_linter_real_life_scenario(self): "variable 'chl' is missing a " "coordinate variable" ), - node_path="dataset.data_vars['chl']", + node_path=f"{DATASET_ROOT_NAME}.data_vars['chl']", rule_id="test/data-var-dim-must-have-coord", severity=2, ), @@ -320,7 +320,7 @@ def test_linter_real_life_scenario(self): "variable 'tsm' is missing a " "coordinate variable" ), - node_path="dataset.data_vars['tsm']", + node_path=f"{DATASET_ROOT_NAME}.data_vars['tsm']", rule_id="test/data-var-dim-must-have-coord", severity=2, ), @@ -342,13 +342,13 @@ def test_processor_ok(self): [ Message( message="Dataset does not have data variables", - node_path="dataset[0]", + node_path=f"{DATASET_ROOT_NAME}[0]", rule_id="test/dataset-without-data-vars", severity=1, ), Message( message="Dataset does not have data variables", - node_path="dataset[1]", + node_path=f"{DATASET_ROOT_NAME}[1]", rule_id="test/dataset-without-data-vars", severity=1, ), @@ -371,7 +371,7 @@ def test_processor_fail(self): message="bad checksum", severity=2, fatal=True, - node_path=NODE_ROOT_NAME, + node_path=DATASET_ROOT_NAME, ) ], result.messages, diff --git a/xrlint/_linter/apply.py b/xrlint/_linter/apply.py index ade4959..8ffda67 100644 --- a/xrlint/_linter/apply.py +++ b/xrlint/_linter/apply.py @@ -2,15 +2,15 @@ # This software is distributed under the terms and conditions of the # MIT license (https://mit-license.org/). -from xrlint.node import AttrNode, AttrsNode, DatasetNode, VariableNode +from xrlint.node import AttrNode, AttrsNode, DataTreeNode, DatasetNode, VariableNode from xrlint.rule import RuleConfig, RuleExit, RuleOp -from ..constants import NODE_ROOT_NAME +from ..constants import DATASET_ROOT_NAME, DATATREE_ROOT_NAME from .rulectx import RuleContextImpl def apply_rule( - context: RuleContextImpl, + ctx: RuleContextImpl, rule_id: str, rule_config: RuleConfig, ): @@ -18,40 +18,79 @@ def apply_rule( `context` using rule configuration `rule_config`. """ try: - rule = context.config.get_rule(rule_id) + rule = ctx.config.get_rule(rule_id) except ValueError as e: - context.report(f"{e}", fatal=True) + ctx.report(f"{e}", fatal=True) return if rule_config.severity == 0: # rule is off return - with context.use_state(severity=rule_config.severity): + with ctx.use_state(severity=rule_config.severity): # TODO: validate rule_config.args/kwargs against rule.meta.schema # noinspection PyArgumentList rule_op: RuleOp = rule.op_class(*rule_config.args, **rule_config.kwargs) try: + if ctx.datatree is not None: + name = ( + DATATREE_ROOT_NAME + if ctx.file_index is None + else f"{DATATREE_ROOT_NAME}[{ctx.file_index}]" + ) + _visit_datatree_node( + rule_op, + ctx, + DataTreeNode( + parent=None, path=name, name=name, datatree=ctx.datatree + ), + ) + else: + name = ( + DATASET_ROOT_NAME + if ctx.file_index is None + else f"{DATASET_ROOT_NAME}[{ctx.file_index}]" + ) + _visit_dataset_node( + rule_op, + ctx, + DatasetNode(parent=None, path=name, name=name, dataset=ctx.dataset), + ) + except RuleExit: + # This is ok, the rule requested it. + pass + + +def _visit_datatree_node(rule_op: RuleOp, context: RuleContextImpl, node: DataTreeNode): + with context.use_state(node=node): + rule_op.validate_datatree(context, node) + if node.datatree.is_leaf: _visit_dataset_node( rule_op, context, DatasetNode( - parent=None, - path=( - NODE_ROOT_NAME - if context.file_index is None - else f"{NODE_ROOT_NAME}[{context.file_index}]" - ), - dataset=context.dataset, + parent=node, + path=f"{node.path}/{node.datatree.name}", + name=node.datatree.name, + dataset=node.datatree.dataset, ), ) - except RuleExit: - # This is ok, the rule requested it. - pass + else: + for name, datatree in node.datatree.children.items(): + _visit_datatree_node( + rule_op, + context, + DataTreeNode( + parent=node, + path=f"{node.path}/{name}", + name=name, + datatree=datatree, + ), + ) def _visit_dataset_node(rule_op: RuleOp, context: RuleContextImpl, node: DatasetNode): - with context.use_state(node=node): + with context.use_state(dataset=node.dataset, node=node): rule_op.validate_dataset(context, node) _visit_attrs_node( rule_op, diff --git a/xrlint/_linter/rulectx.py b/xrlint/_linter/rulectx.py index 1a1df73..87c0bb6 100644 --- a/xrlint/_linter/rulectx.py +++ b/xrlint/_linter/rulectx.py @@ -8,7 +8,7 @@ import xarray as xr from xrlint.config import ConfigObject -from xrlint.constants import NODE_ROOT_NAME, SEVERITY_ERROR +from xrlint.constants import DATASET_ROOT_NAME, SEVERITY_ERROR from xrlint.node import Node from xrlint.result import Message, Suggestion from xrlint.rule import RuleContext @@ -18,16 +18,26 @@ class RuleContextImpl(RuleContext): def __init__( self, config: ConfigObject, - dataset: xr.Dataset, + dataset: xr.Dataset | xr.DataTree, file_path: str, file_index: int | None, access_latency: float | None, ): - assert config is not None - assert dataset is not None - assert file_path is not None + assert isinstance(config, ConfigObject) + assert isinstance(dataset, (xr.Dataset | xr.DataTree)) + assert isinstance(file_path, str) assert file_index is None or isinstance(file_index, int) + assert access_latency is None or isinstance(access_latency, float) + if isinstance(dataset, xr.DataTree): + datatree = dataset + dataset = None + if datatree.is_leaf: + dataset = datatree.dataset + datatree = None + else: + datatree = None self._config = config + self._datatree = datatree self._dataset = dataset self._file_path = file_path self._file_index = file_index @@ -46,9 +56,17 @@ def settings(self) -> dict[str, Any]: return self._config.settings or {} @property - def dataset(self) -> xr.Dataset: + def datatree(self) -> xr.DataTree | None: + return self._datatree + + @property + def dataset(self) -> xr.Dataset | None: return self._dataset + @dataset.setter + def dataset(self, value: xr.Dataset) -> None: + self._dataset = value + @property def file_path(self) -> str: return self._file_path @@ -76,7 +94,7 @@ def report( fatal=fatal, suggestions=suggestions, rule_id=self.rule_id, - node_path=self.node.path if self.node is not None else NODE_ROOT_NAME, + node_path=self.node.path if self.node is not None else DATASET_ROOT_NAME, severity=self.severity, ) self.messages.append(m) diff --git a/xrlint/_linter/validate.py b/xrlint/_linter/validate.py index 8c962ac..270b3ba 100644 --- a/xrlint/_linter/validate.py +++ b/xrlint/_linter/validate.py @@ -10,7 +10,7 @@ from xrlint.config import ConfigObject from xrlint.result import Message, Result -from ..constants import NODE_ROOT_NAME +from ..constants import DATASET_ROOT_NAME from .apply import apply_rule from .rulectx import RuleContextImpl @@ -28,13 +28,13 @@ def validate_dataset(config_obj: ConfigObject, dataset: Any, file_path: str): def _validate_dataset( config_obj: ConfigObject, - dataset: xr.Dataset, + dataset: xr.Dataset | xr.DataTree, file_path: str, file_index: int | None, access_latency: float | None, ) -> list[Message]: assert isinstance(config_obj, ConfigObject) - assert isinstance(dataset, xr.Dataset) + assert isinstance(dataset, (xr.Dataset, xr.DataTree)) assert isinstance(file_path, str) context = RuleContextImpl( @@ -70,12 +70,12 @@ def _open_and_validate_dataset( file_path, ) else: - t0 = time.time() try: - dataset = _open_dataset(ds_source, opener_options, file_path) + dataset, access_latency = _open_dataset( + ds_source, opener_options, file_path + ) except (OSError, ValueError, TypeError) as e: return [new_fatal_message(str(e))] - access_latency = time.time() - t0 with dataset: return _validate_dataset( config_obj, dataset, file_path, None, access_latency @@ -84,12 +84,18 @@ def _open_and_validate_dataset( def _open_dataset( ds_source: Any, opener_options: dict[str, Any] | None, file_path: str -) -> xr.Dataset: +) -> tuple[xr.Dataset | xr.DataTree, float]: """Open a dataset.""" engine = opener_options.pop("engine", None) if engine is None and (file_path.endswith(".zarr") or file_path.endswith(".zarr/")): engine = "zarr" - return xr.open_dataset(ds_source, engine=engine, **(opener_options or {})) + try: + t0 = time.time() + result = xr.open_datatree(ds_source, engine=engine, **(opener_options or {})) + except (OSError, ValueError, TypeError): + t0 = time.time() + result = xr.open_dataset(ds_source, engine=engine, **(opener_options or {})) + return result, time.time() - t0 def new_fatal_message(message: str) -> Message: @@ -97,5 +103,5 @@ def new_fatal_message(message: str) -> Message: message=message, fatal=True, severity=2, - node_path=NODE_ROOT_NAME, + node_path=DATASET_ROOT_NAME, ) diff --git a/xrlint/constants.py b/xrlint/constants.py index 5eff952..1043cea 100644 --- a/xrlint/constants.py +++ b/xrlint/constants.py @@ -7,7 +7,9 @@ CORE_PLUGIN_NAME: Final = "__core__" CORE_DOCS_URL = "https://bcdev.github.io/xrlint/rule-ref" -NODE_ROOT_NAME: Final = "dataset" +DATATREE_ROOT_NAME: Final = "dt" +DATASET_ROOT_NAME: Final = "ds" +MISSING_DATATREE_FILE_PATH: Final = "" MISSING_DATASET_FILE_PATH: Final = "" SEVERITY_ERROR: Final = 2 diff --git a/xrlint/linter.py b/xrlint/linter.py index e6e8d3c..67db80f 100644 --- a/xrlint/linter.py +++ b/xrlint/linter.py @@ -70,9 +70,10 @@ def validate( """Validate a dataset against applicable rules. Args: - dataset: The dataset. Can be a `xr.Dataset` instance - or a file path, or any dataset source that can be opened - using `xarray.open_dataset()`. + dataset: The dataset. Can be a `xr.Dataset` or `xr.DataTree` + instance or a file path, or any dataset source that can + be opened using `xarray.open_dataset()` + or `xarray.open_datatree()`. file_path: Optional file path used for formatting messages. Useful if `dataset` is not a file path. config: Optional configuration-like value. @@ -86,7 +87,7 @@ def validate( Result of the validation. """ if not file_path: - if isinstance(dataset, xr.Dataset): + if isinstance(dataset, (xr.Dataset, xr.DataTree)): file_path = file_path or _get_file_path_for_dataset(dataset) else: file_path = file_path or _get_file_path_for_source(dataset) @@ -107,7 +108,7 @@ def validate( return validate_dataset(config_obj, dataset, file_path) -def _get_file_path_for_dataset(dataset: xr.Dataset) -> str: +def _get_file_path_for_dataset(dataset: xr.Dataset | xr.DataTree) -> str: ds_source = dataset.encoding.get("source") return _get_file_path_for_source(ds_source) diff --git a/xrlint/node.py b/xrlint/node.py index 95ef739..9b190a4 100644 --- a/xrlint/node.py +++ b/xrlint/node.py @@ -38,10 +38,24 @@ def in_root(self) -> bool: return not self.in_coords() and not self.in_data_vars() +@dataclass(frozen=True, kw_only=True) +class DataTreeNode(XarrayNode): + """DataTree node.""" + + name: Hashable + """The name of the datatree.""" + + datatree: xr.DataTree + """The `xarray.DataTree` instance.""" + + @dataclass(frozen=True, kw_only=True) class DatasetNode(XarrayNode): """Dataset node.""" + name: Hashable + """The name of the dataset.""" + dataset: xr.Dataset """The `xarray.Dataset` instance.""" diff --git a/xrlint/rule.py b/xrlint/rule.py index 9906180..5317c5e 100644 --- a/xrlint/rule.py +++ b/xrlint/rule.py @@ -10,7 +10,7 @@ import xarray as xr from xrlint.constants import SEVERITY_ENUM, SEVERITY_ENUM_TEXT -from xrlint.node import AttrNode, AttrsNode, DatasetNode, VariableNode +from xrlint.node import AttrNode, AttrsNode, DataTreeNode, DatasetNode, VariableNode from xrlint.operation import Operation, OperationMeta from xrlint.result import Suggestion from xrlint.util.constructible import ValueConstructible @@ -86,6 +86,17 @@ class RuleExit(Exception): class RuleOp(ABC): """Define the specific rule validation operations.""" + def validate_datatree(self, ctx: RuleContext, node: DataTreeNode) -> None: + """Validate the given datatree node. + + Args: + ctx: The current rule context. + node: The datatree node. + + Raises: + RuleExit: to exit rule logic and further node traversal + """ + def validate_dataset(self, ctx: RuleContext, node: DatasetNode) -> None: """Validate the given dataset node. From e579f46584b3bd1663da8c5e08bf7a7c0b37a66c Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 5 Feb 2025 16:34:12 +0100 Subject: [PATCH 2/5] fix --- xrlint/_linter/validate.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/xrlint/_linter/validate.py b/xrlint/_linter/validate.py index 270b3ba..51284f2 100644 --- a/xrlint/_linter/validate.py +++ b/xrlint/_linter/validate.py @@ -92,7 +92,13 @@ def _open_dataset( try: t0 = time.time() result = xr.open_datatree(ds_source, engine=engine, **(opener_options or {})) - except (OSError, ValueError, TypeError): + # When opening no-group Zarr datasets we get with xarray 2025.1.2: + # + # File "<...>/site-packages/xarray/backends/zarr.py", line 741, in __init__ + # self._read_only = self.zarr_group.read_only + # ^^^^^^^^^^^^^^^^^^^^^^^^^ + # AttributeError: 'NoneType' object has no attribute 'read_only' + except (OSError, ValueError, TypeError, AttributeError): t0 = time.time() result = xr.open_dataset(ds_source, engine=engine, **(opener_options or {})) return result, time.time() - t0 From a0441ae03e877ca82da894d7d757627012d104fe Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 12 Feb 2025 18:10:09 +0100 Subject: [PATCH 3/5] fix --- docs/api.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api.md b/docs/api.md index c9da60e..0eb018d 100644 --- a/docs/api.md +++ b/docs/api.md @@ -26,7 +26,7 @@ This chapter provides a plain reference for the XRLint Python API. - The `node` module defines the nodes passed to [RuleOp][xrlint.rule.RuleOp]: base classes [None][xrlint.node.Node], [XarrayNode][xrlint.node.XarrayNode], and the specific [DatasetNode][xrlint.node.DatasetNode], - [DataArray][xrlint.node.DataArrayNode], [AttrsNode][xrlint.node.AttrsNode], + [VariableNode][xrlint.node.VariableNode], [AttrsNode][xrlint.node.AttrsNode], and [AttrNode][xrlint.node.AttrNode] nodes. - The `processor` module provides processor related classes and functions: [Processor][xrlint.processor.Processor] comprising processor metadata @@ -84,7 +84,7 @@ Note: ::: xrlint.node.DatasetNode -::: xrlint.node.DataArrayNode +::: xrlint.node.VariableNode ::: xrlint.node.AttrsNode From ac461f83ad196b3b0f0718c11a78dc9cbd5a133c Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 12 Feb 2025 20:26:57 +0100 Subject: [PATCH 4/5] Removed Result.new(), added test for linting data trees --- CHANGES.md | 5 ++ pyproject.toml | 2 +- tests/formatters/helpers.py | 8 +-- tests/formatters/test_simple.py | 8 +-- tests/test_linter.py | 104 +++++++++++++++++++++++++------- tests/test_result.py | 39 ++++++------ xrlint/_linter/validate.py | 4 +- xrlint/linter.py | 16 +++-- xrlint/result.py | 70 ++++++++------------- 9 files changed, 153 insertions(+), 103 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 840b0b1..41bcec6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,11 @@ ### Adjustments and Enhancements +- Added support for validating Zarr and HDF-5 groups and their items. + Rules can now validate `xarray.DataTree` objects originating + from `xarray.open_datatree()` by implementing + rule operation method `RuleOp.validate_datatree(ctx, node)`. (#54) + - Added a new core rule `access-latency` that can be used to check the time it takes to open a dataset. diff --git a/pyproject.toml b/pyproject.toml index 19869f2..c9d25c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] description = "A linter for xarray datasets." keywords = [ - "xarray" + "xarray", "data-science", "cf", "metadata" ] license = {text = "MIT"} requires-python = ">=3.10" diff --git a/tests/formatters/helpers.py b/tests/formatters/helpers.py index 70c46ef..02b6286 100644 --- a/tests/formatters/helpers.py +++ b/tests/formatters/helpers.py @@ -45,9 +45,9 @@ class Rule2(RuleOp): config_obj = ConfigObject(plugins={"test": plugin}) return [ - Result.new( - config_object=config_obj, + Result( file_path="test.nc", + config_object=config_obj, messages=[ Message( message="message-1", @@ -64,9 +64,9 @@ class Rule2(RuleOp): Message(message="message-3", fatal=True), ], ), - Result.new( + Result( + file_path="test.nc", config_object=config_obj, - file_path="test-2.nc", messages=[ Message(message="message-1", rule_id="test/rule-1", severity=1), Message(message="message-2", rule_id="test/rule-2", severity=2), diff --git a/tests/formatters/test_simple.py b/tests/formatters/test_simple.py index 70e9e07..1bf6d15 100644 --- a/tests/formatters/test_simple.py +++ b/tests/formatters/test_simple.py @@ -12,9 +12,9 @@ class SimpleTest(TestCase): errors_and_warnings = [ - Result.new( - config_object=ConfigObject(), + Result( file_path="test1.nc", + config_object=ConfigObject(), messages=[ Message(message="what", rule_id="rule-1", severity=2), Message(message="is", fatal=True), @@ -24,9 +24,9 @@ class SimpleTest(TestCase): ] warnings_only = [ - Result.new( - ConfigObject(), + Result( file_path="test2.nc", + config_object=ConfigObject(), messages=[ Message(message="what", rule_id="rule-1", severity=1), Message(message="happened?", rule_id="rule-2", severity=1), diff --git a/tests/test_linter.py b/tests/test_linter.py index 9f4b899..4042d8a 100644 --- a/tests/test_linter.py +++ b/tests/test_linter.py @@ -10,7 +10,7 @@ from xrlint.config import Config, ConfigObject from xrlint.constants import CORE_PLUGIN_NAME, DATASET_ROOT_NAME from xrlint.linter import Linter, new_linter -from xrlint.node import AttrNode, AttrsNode, DatasetNode, VariableNode +from xrlint.node import AttrNode, AttrsNode, DatasetNode, VariableNode, DataTreeNode from xrlint.plugin import new_plugin from xrlint.processor import ProcessorOp from xrlint.result import Message, Result @@ -97,6 +97,7 @@ def assert_result_ok(self, result: Result, expected_message: str): class LinterValidateTest(TestCase): + # noinspection PyUnusedLocal def setUp(self): plugin = new_plugin(name="test") @@ -113,7 +114,7 @@ def validate_attrs(self, ctx: RuleContext, node: AttrsNode): ctx.report("Empty attributes") @plugin.define_rule("data-var-dim-must-have-coord") - class DataArrayVer(RuleOp): + class VariableVer(RuleOp): def validate_variable(self, ctx: RuleContext, node: VariableNode): if node.in_data_vars(): for dim_name in node.array.dims: @@ -131,6 +132,12 @@ def validate_dataset(self, ctx: RuleContext, node: DatasetNode): ctx.report("Dataset does not have data variables") raise RuleExit # no need to traverse further + @plugin.define_rule("datatree-without-data-vars") + class DataTreeVer(RuleOp): + def validate_datatree(self, ctx: RuleContext, node: DataTreeNode): + if len(node.datatree.data_vars) == 0: + ctx.report("DataTree does not have data variables") + @plugin.define_processor("multi-level-dataset") class MultiLevelDataset(ProcessorOp): def preprocess( @@ -159,6 +166,7 @@ def test_rules_are_ok(self): "no-empty-attrs", "data-var-dim-must-have-coord", "dataset-without-data-vars", + "datatree-without-data-vars", ], list(self.linter.config.objects[0].plugins["test"].rules.keys()), ) @@ -171,11 +179,6 @@ def test_linter_respects_rule_severity_error(self): Result( config_object=result.config_object, file_path="", - warning_count=0, - error_count=1, - fatal_error_count=0, - fixable_warning_count=0, - fixable_error_count=0, messages=[ Message( message="Dataset does not have data variables", @@ -187,6 +190,9 @@ def test_linter_respects_rule_severity_error(self): ), result, ) + self.assertEqual(0, result.warning_count) + self.assertEqual(1, result.error_count) + self.assertEqual(0, result.fatal_error_count) def test_linter_respects_rule_severity_warn(self): result = self.linter.validate( @@ -196,11 +202,6 @@ def test_linter_respects_rule_severity_warn(self): Result( config_object=result.config_object, file_path="", - warning_count=1, - error_count=0, - fatal_error_count=0, - fixable_warning_count=0, - fixable_error_count=0, messages=[ Message( message="Dataset does not have data variables", @@ -212,6 +213,9 @@ def test_linter_respects_rule_severity_warn(self): ), result, ) + self.assertEqual(1, result.warning_count) + self.assertEqual(0, result.error_count) + self.assertEqual(0, result.fatal_error_count) def test_linter_respects_rule_severity_off(self): result = self.linter.validate( @@ -221,15 +225,13 @@ def test_linter_respects_rule_severity_off(self): Result( config_object=result.config_object, file_path="", - warning_count=0, - error_count=0, - fatal_error_count=0, - fixable_warning_count=0, - fixable_error_count=0, messages=[], ), result, ) + self.assertEqual(0, result.warning_count) + self.assertEqual(0, result.error_count) + self.assertEqual(0, result.fatal_error_count) def test_linter_recognized_unknown_rule(self): result = self.linter.validate(xr.Dataset(), rules={"test/dataset-is-fast": 2}) @@ -246,6 +248,66 @@ def test_linter_recognized_unknown_rule(self): result.messages, ) + def test_linter_recognized_datatree_rule(self): + result = self.linter.validate( + xr.DataTree( + children={ + "measurement": xr.DataTree( + children={ + "r10m": xr.DataTree(), + "r20m": xr.DataTree(), + "r60m": xr.DataTree(), + } + ) + } + ), + rules={"test/datatree-without-data-vars": 2}, + ) + self.assertEqual( + [ + Message( + message="DataTree does not have data variables", + node_path="dt", + rule_id="test/datatree-without-data-vars", + severity=2, + fatal=None, + fix=None, + suggestions=None, + ), + Message( + message="DataTree does not have data variables", + node_path="dt/measurement", + rule_id="test/datatree-without-data-vars", + severity=2, + fatal=None, + fix=None, + suggestions=None, + ), + Message( + message="DataTree does not have data variables", + node_path="dt/measurement/r10m", + rule_id="test/datatree-without-data-vars", + severity=2, + ), + Message( + message="DataTree does not have data variables", + node_path="dt/measurement/r20m", + rule_id="test/datatree-without-data-vars", + severity=2, + ), + Message( + message="DataTree does not have data variables", + node_path="dt/measurement/r60m", + rule_id="test/datatree-without-data-vars", + severity=2, + ), + ], + result.messages, + ) + self.assertEqual(0, result.warning_count) + self.assertEqual(5, result.error_count) + self.assertEqual(0, result.fatal_error_count) + def test_linter_real_life_scenario(self): dataset = xr.Dataset( attrs={ @@ -286,11 +348,6 @@ def test_linter_real_life_scenario(self): Result( config_object=result.config_object, file_path="chl-tsm.zarr", - warning_count=1, - error_count=3, - fatal_error_count=0, - fixable_warning_count=0, - fixable_error_count=0, messages=[ Message( message="Attribute name with space: 'created at'", @@ -328,6 +385,9 @@ def test_linter_real_life_scenario(self): ), result, ) + self.assertEqual(1, result.warning_count) + self.assertEqual(3, result.error_count) + self.assertEqual(0, result.fatal_error_count) def test_processor_ok(self): result = self.linter.validate( diff --git a/tests/test_result.py b/tests/test_result.py index 722f571..506cf3c 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -32,24 +32,24 @@ class MyRule2(RuleOp): config_obj = ConfigObject(plugins={"test": plugin}) rules_meta = get_rules_meta_for_results( results=[ - Result.new( - config_object=config_obj, + Result( file_path="test.zarr", + config_object=config_obj, messages=[Message(message="m 1", rule_id="test/my-rule-1")], ), - Result.new( - config_object=config_obj, + Result( file_path="test.zarr", + config_object=config_obj, messages=[Message(message="m 2", rule_id="test/my-rule-2")], ), - Result.new( - config_object=config_obj, + Result( file_path="test.zarr", + config_object=config_obj, messages=[Message(message="m 3", rule_id="test/my-rule-1")], ), - Result.new( - config_object=config_obj, + Result( file_path="test.zarr", + config_object=config_obj, messages=[Message(message="m 4", rule_id="test/my-rule-2")], ), ] @@ -66,9 +66,9 @@ class MyRule2(RuleOp): ) def test_repr_html(self): - result = Result.new( - config_object=ConfigObject(), + result = Result( file_path="test.zarr", + config_object=ConfigObject(), messages=[], ) html = result._repr_html_() @@ -76,9 +76,9 @@ def test_repr_html(self): self.assertIn("ok", html) self.assertIn(" "Linter": @@ -95,8 +95,7 @@ def validate( config = Config.from_config(self._config, config, config_props) config_obj = config.compute_config_object(file_path) if config_obj is None or not config_obj.rules: - return Result.new( - config_object=None, + return Result( file_path=file_path, messages=[ new_fatal_message( @@ -110,9 +109,14 @@ def validate( def _get_file_path_for_dataset(dataset: xr.Dataset | xr.DataTree) -> str: ds_source = dataset.encoding.get("source") - return _get_file_path_for_source(ds_source) + return _get_file_path_for_source( + ds_source, + MISSING_DATASET_FILE_PATH + if isinstance(dataset, xr.Dataset) + else MISSING_DATATREE_FILE_PATH, + ) -def _get_file_path_for_source(ds_source: Any) -> str: +def _get_file_path_for_source(ds_source: Any, default: str | None = None) -> str: file_path = str(ds_source) if isinstance(ds_source, (str, Path, PathLike)) else "" - return file_path or MISSING_DATASET_FILE_PATH + return file_path or (default or MISSING_DATASET_FILE_PATH) diff --git a/xrlint/result.py b/xrlint/result.py index b1fac09..2138398 100644 --- a/xrlint/result.py +++ b/xrlint/result.py @@ -4,12 +4,12 @@ from collections.abc import Iterable from dataclasses import dataclass, field +from functools import cached_property from typing import TYPE_CHECKING, Literal, Union from xrlint.constants import ( CORE_DOCS_URL, CORE_PLUGIN_NAME, - MISSING_DATASET_FILE_PATH, SEVERITY_ERROR, SEVERITY_WARN, ) @@ -86,63 +86,41 @@ class Message(JsonSerializable): """ -@dataclass(kw_only=True) +@dataclass(kw_only=True, frozen=True) class Result(JsonSerializable): """The aggregated information of linting a dataset.""" - config_object: Union["ConfigObject", None] = None - """The configuration object that produced this result - together with `file_path`. - """ - - file_path: str = MISSING_DATASET_FILE_PATH + file_path: str """The absolute path to the file of this result. This is the string "" if the file path is unknown (when you didn't pass the `file_path` option to the `xrlint.lint_dataset()` method). """ + config_object: Union["ConfigObject", None] = None + """The configuration object that produced this result + together with `file_path`. + """ + messages: list[Message] = field(default_factory=list) """The array of message objects.""" - fixable_error_count: int = 0 - """The number of errors that can be fixed automatically - by the fix constructor option. - """ - - fixable_warning_count: int = 0 - """The number of warnings that can be fixed automatically - by the fix constructor option. - """ - - error_count: int = 0 - """The number of errors. This includes fixable errors - and fatal errors. - """ - - fatal_error_count: int = 0 - """The number of fatal errors.""" - - warning_count: int = 0 - """The number of warnings. This includes fixable warnings.""" - - @classmethod - def new( - cls, - config_object: Union["ConfigObject", None] = None, - file_path: str | None = None, - messages: list[Message] | None = None, - ): - result = Result( - config_object=config_object, - file_path=file_path or MISSING_DATASET_FILE_PATH, - messages=messages or [], - ) - for m in messages: - result.warning_count += 1 if m.severity == SEVERITY_WARN else 0 - result.error_count += 1 if m.severity == SEVERITY_ERROR else 0 - result.fatal_error_count += 1 if m.fatal else 0 - return result + @cached_property + def warning_count(self) -> int: + """The number of warnings. This includes fixable warnings.""" + return sum(1 if m.severity == SEVERITY_WARN else 0 for m in self.messages) + + @cached_property + def error_count(self) -> int: + """The number of errors. This includes fixable errors + and fatal errors. + """ + return sum(1 if m.severity == SEVERITY_ERROR else 0 for m in self.messages) + + @cached_property + def fatal_error_count(self) -> int: + """The number of fatal errors.""" + return sum(1 if m.fatal else 0 for m in self.messages) def to_html(self) -> str: from xrlint.formatters.html import format_result From 978ec9285ebae8ad269a7e7ce81f039c38e378d2 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 12 Feb 2025 20:28:24 +0100 Subject: [PATCH 5/5] Update --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 41bcec6..a823e2b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -50,6 +50,8 @@ - added class method `from_config()` to `ConfigList`. - removed function `xrlint.config.merge_configs` as it was no longer used. +- Removed class method `Result.new()` as it was no longer used. + ### Other changes - XRLint now works with zarr >=2,<3 and zarr >=3.0.2