diff --git a/CHANGES.md b/CHANGES.md index 140ee62..e571bcf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,11 @@ ### Adjustments and Enhancements +- Added support for validating Zarr and HDF-5 groups and their items. + Rules can now validate `xarray.DataTree` objects originating + from `xarray.open_datatree()` by implementing + rule operation method `RuleOp.validate_datatree(ctx, node)`. (#54) + - Added a new core rule `var-missing-data` that checks for the recommended use of a variable's missing data. @@ -48,6 +53,8 @@ - added class method `from_config()` to `ConfigList`. - removed function `xrlint.config.merge_configs` as it was no longer used. +- Removed class method `Result.new()` as it was no longer used. + ### Other changes - XRLint now works with zarr >=2,<3 and zarr >=3.0.2 diff --git a/docs/api.md b/docs/api.md index c9da60e..0eb018d 100644 --- a/docs/api.md +++ b/docs/api.md @@ -26,7 +26,7 @@ This chapter provides a plain reference for the XRLint Python API. - The `node` module defines the nodes passed to [RuleOp][xrlint.rule.RuleOp]: base classes [None][xrlint.node.Node], [XarrayNode][xrlint.node.XarrayNode], and the specific [DatasetNode][xrlint.node.DatasetNode], - [DataArray][xrlint.node.DataArrayNode], [AttrsNode][xrlint.node.AttrsNode], + [VariableNode][xrlint.node.VariableNode], [AttrsNode][xrlint.node.AttrsNode], and [AttrNode][xrlint.node.AttrNode] nodes. - The `processor` module provides processor related classes and functions: [Processor][xrlint.processor.Processor] comprising processor metadata @@ -84,7 +84,7 @@ Note: ::: xrlint.node.DatasetNode -::: xrlint.node.DataArrayNode +::: xrlint.node.VariableNode ::: xrlint.node.AttrsNode diff --git a/pyproject.toml b/pyproject.toml index 19869f2..c9d25c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] description = "A linter for xarray datasets." keywords = [ - "xarray" + "xarray", "data-science", "cf", "metadata" ] license = {text = "MIT"} requires-python = ">=3.10" diff --git a/tests/_linter/test_rulectx.py b/tests/_linter/test_rulectx.py index 58102ef..62f4a68 100644 --- a/tests/_linter/test_rulectx.py +++ b/tests/_linter/test_rulectx.py @@ -9,7 +9,7 @@ # noinspection PyProtectedMember from xrlint._linter.rulectx import RuleContextImpl from xrlint.config import ConfigObject -from xrlint.constants import NODE_ROOT_NAME +from xrlint.constants import DATASET_ROOT_NAME from xrlint.result import Message, Suggestion @@ -39,7 +39,7 @@ def test_report(self): [ Message( message="What the heck do you mean?", - node_path=NODE_ROOT_NAME, + node_path=DATASET_ROOT_NAME, rule_id="no-xxx", severity=2, suggestions=[ @@ -48,7 +48,7 @@ def test_report(self): ), Message( message="You said it.", - node_path=NODE_ROOT_NAME, + node_path=DATASET_ROOT_NAME, rule_id="no-xxx", severity=2, fatal=True, diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py index 4f2898c..f2d6c41 100644 --- a/tests/cli/test_main.py +++ b/tests/cli/test_main.py @@ -66,6 +66,9 @@ def xrlint(self, *args: tuple[str, ...]) -> click.testing.Result: runner = CliRunner() result = runner.invoke(main, args) if not isinstance(result.exception, SystemExit): + import traceback + + traceback.print_exception(result.exception) self.assertIsNone(result.exception) return result diff --git a/tests/formatters/helpers.py b/tests/formatters/helpers.py index 70c46ef..02b6286 100644 --- a/tests/formatters/helpers.py +++ b/tests/formatters/helpers.py @@ -45,9 +45,9 @@ class Rule2(RuleOp): config_obj = ConfigObject(plugins={"test": plugin}) return [ - Result.new( - config_object=config_obj, + Result( file_path="test.nc", + config_object=config_obj, messages=[ Message( message="message-1", @@ -64,9 +64,9 @@ class Rule2(RuleOp): Message(message="message-3", fatal=True), ], ), - Result.new( + Result( + file_path="test.nc", config_object=config_obj, - file_path="test-2.nc", messages=[ Message(message="message-1", rule_id="test/rule-1", severity=1), Message(message="message-2", rule_id="test/rule-2", severity=2), diff --git a/tests/formatters/test_simple.py b/tests/formatters/test_simple.py index 70e9e07..1bf6d15 100644 --- a/tests/formatters/test_simple.py +++ b/tests/formatters/test_simple.py @@ -12,9 +12,9 @@ class SimpleTest(TestCase): errors_and_warnings = [ - Result.new( - config_object=ConfigObject(), + Result( file_path="test1.nc", + config_object=ConfigObject(), messages=[ Message(message="what", rule_id="rule-1", severity=2), Message(message="is", fatal=True), @@ -24,9 +24,9 @@ class SimpleTest(TestCase): ] warnings_only = [ - Result.new( - ConfigObject(), + Result( file_path="test2.nc", + config_object=ConfigObject(), messages=[ Message(message="what", rule_id="rule-1", severity=1), Message(message="happened?", rule_id="rule-2", severity=1), diff --git a/tests/plugins/core/rules/test_access_latency.py b/tests/plugins/core/rules/test_access_latency.py index 57b9231..12e2881 100644 --- a/tests/plugins/core/rules/test_access_latency.py +++ b/tests/plugins/core/rules/test_access_latency.py @@ -10,6 +10,7 @@ # noinspection PyProtectedMember from xrlint._linter.rulectx import RuleContextImpl from xrlint.config import ConfigObject +from xrlint.constants import DATASET_ROOT_NAME from xrlint.node import DatasetNode from xrlint.plugins.core.rules.access_latency import AccessLatency from xrlint.result import Message @@ -33,8 +34,9 @@ def invoke_op( access_latency=access_latency, ) node = DatasetNode( - path="dataset", parent=None, + path=DATASET_ROOT_NAME, + name=DATASET_ROOT_NAME, dataset=ctx.dataset, ) rule_op = ( @@ -59,7 +61,7 @@ def test_invalid(self): [ Message( message="Access latency exceeds threshold: 3.2 > 2.5 seconds.", - node_path="dataset", + node_path=DATASET_ROOT_NAME, severity=2, ) ], @@ -71,7 +73,7 @@ def test_invalid(self): [ Message( message="Access latency exceeds threshold: 0.2 > 0.1 seconds.", - node_path="dataset", + node_path=DATASET_ROOT_NAME, severity=2, ) ], diff --git a/tests/test_linter.py b/tests/test_linter.py index 752d0f1..4042d8a 100644 --- a/tests/test_linter.py +++ b/tests/test_linter.py @@ -8,9 +8,9 @@ import xarray as xr from xrlint.config import Config, ConfigObject -from xrlint.constants import CORE_PLUGIN_NAME, NODE_ROOT_NAME +from xrlint.constants import CORE_PLUGIN_NAME, DATASET_ROOT_NAME from xrlint.linter import Linter, new_linter -from xrlint.node import AttrNode, AttrsNode, DatasetNode, VariableNode +from xrlint.node import AttrNode, AttrsNode, DatasetNode, VariableNode, DataTreeNode from xrlint.plugin import new_plugin from xrlint.processor import ProcessorOp from xrlint.result import Message, Result @@ -97,6 +97,7 @@ def assert_result_ok(self, result: Result, expected_message: str): class LinterValidateTest(TestCase): + # noinspection PyUnusedLocal def setUp(self): plugin = new_plugin(name="test") @@ -113,7 +114,7 @@ def validate_attrs(self, ctx: RuleContext, node: AttrsNode): ctx.report("Empty attributes") @plugin.define_rule("data-var-dim-must-have-coord") - class DataArrayVer(RuleOp): + class VariableVer(RuleOp): def validate_variable(self, ctx: RuleContext, node: VariableNode): if node.in_data_vars(): for dim_name in node.array.dims: @@ -131,6 +132,12 @@ def validate_dataset(self, ctx: RuleContext, node: DatasetNode): ctx.report("Dataset does not have data variables") raise RuleExit # no need to traverse further + @plugin.define_rule("datatree-without-data-vars") + class DataTreeVer(RuleOp): + def validate_datatree(self, ctx: RuleContext, node: DataTreeNode): + if len(node.datatree.data_vars) == 0: + ctx.report("DataTree does not have data variables") + @plugin.define_processor("multi-level-dataset") class MultiLevelDataset(ProcessorOp): def preprocess( @@ -159,6 +166,7 @@ def test_rules_are_ok(self): "no-empty-attrs", "data-var-dim-must-have-coord", "dataset-without-data-vars", + "datatree-without-data-vars", ], list(self.linter.config.objects[0].plugins["test"].rules.keys()), ) @@ -171,15 +179,10 @@ def test_linter_respects_rule_severity_error(self): Result( config_object=result.config_object, file_path="", - warning_count=0, - error_count=1, - fatal_error_count=0, - fixable_warning_count=0, - fixable_error_count=0, messages=[ Message( message="Dataset does not have data variables", - node_path="dataset", + node_path=DATASET_ROOT_NAME, rule_id="test/dataset-without-data-vars", severity=2, ) @@ -187,6 +190,9 @@ def test_linter_respects_rule_severity_error(self): ), result, ) + self.assertEqual(0, result.warning_count) + self.assertEqual(1, result.error_count) + self.assertEqual(0, result.fatal_error_count) def test_linter_respects_rule_severity_warn(self): result = self.linter.validate( @@ -196,15 +202,10 @@ def test_linter_respects_rule_severity_warn(self): Result( config_object=result.config_object, file_path="", - warning_count=1, - error_count=0, - fatal_error_count=0, - fixable_warning_count=0, - fixable_error_count=0, messages=[ Message( message="Dataset does not have data variables", - node_path="dataset", + node_path=DATASET_ROOT_NAME, rule_id="test/dataset-without-data-vars", severity=1, ) @@ -212,6 +213,9 @@ def test_linter_respects_rule_severity_warn(self): ), result, ) + self.assertEqual(1, result.warning_count) + self.assertEqual(0, result.error_count) + self.assertEqual(0, result.fatal_error_count) def test_linter_respects_rule_severity_off(self): result = self.linter.validate( @@ -221,15 +225,13 @@ def test_linter_respects_rule_severity_off(self): Result( config_object=result.config_object, file_path="", - warning_count=0, - error_count=0, - fatal_error_count=0, - fixable_warning_count=0, - fixable_error_count=0, messages=[], ), result, ) + self.assertEqual(0, result.warning_count) + self.assertEqual(0, result.error_count) + self.assertEqual(0, result.fatal_error_count) def test_linter_recognized_unknown_rule(self): result = self.linter.validate(xr.Dataset(), rules={"test/dataset-is-fast": 2}) @@ -238,7 +240,7 @@ def test_linter_recognized_unknown_rule(self): Message( message="unknown rule 'test/dataset-is-fast'", rule_id="test/dataset-is-fast", - node_path=NODE_ROOT_NAME, + node_path=DATASET_ROOT_NAME, severity=2, fatal=True, ) @@ -246,6 +248,66 @@ def test_linter_recognized_unknown_rule(self): result.messages, ) + def test_linter_recognized_datatree_rule(self): + result = self.linter.validate( + xr.DataTree( + children={ + "measurement": xr.DataTree( + children={ + "r10m": xr.DataTree(), + "r20m": xr.DataTree(), + "r60m": xr.DataTree(), + } + ) + } + ), + rules={"test/datatree-without-data-vars": 2}, + ) + self.assertEqual( + [ + Message( + message="DataTree does not have data variables", + node_path="dt", + rule_id="test/datatree-without-data-vars", + severity=2, + fatal=None, + fix=None, + suggestions=None, + ), + Message( + message="DataTree does not have data variables", + node_path="dt/measurement", + rule_id="test/datatree-without-data-vars", + severity=2, + fatal=None, + fix=None, + suggestions=None, + ), + Message( + message="DataTree does not have data variables", + node_path="dt/measurement/r10m", + rule_id="test/datatree-without-data-vars", + severity=2, + ), + Message( + message="DataTree does not have data variables", + node_path="dt/measurement/r20m", + rule_id="test/datatree-without-data-vars", + severity=2, + ), + Message( + message="DataTree does not have data variables", + node_path="dt/measurement/r60m", + rule_id="test/datatree-without-data-vars", + severity=2, + ), + ], + result.messages, + ) + self.assertEqual(0, result.warning_count) + self.assertEqual(5, result.error_count) + self.assertEqual(0, result.fatal_error_count) + def test_linter_real_life_scenario(self): dataset = xr.Dataset( attrs={ @@ -286,21 +348,16 @@ def test_linter_real_life_scenario(self): Result( config_object=result.config_object, file_path="chl-tsm.zarr", - warning_count=1, - error_count=3, - fatal_error_count=0, - fixable_warning_count=0, - fixable_error_count=0, messages=[ Message( message="Attribute name with space: 'created at'", - node_path="dataset.attrs['created at']", + node_path=f"{DATASET_ROOT_NAME}.attrs['created at']", rule_id="test/no-space-in-attr-name", severity=2, ), Message( message="Empty attributes", - node_path="dataset.data_vars['tsm'].attrs", + node_path=f"{DATASET_ROOT_NAME}.data_vars['tsm'].attrs", rule_id="test/no-empty-attrs", severity=1, ), @@ -310,7 +367,7 @@ def test_linter_real_life_scenario(self): "variable 'chl' is missing a " "coordinate variable" ), - node_path="dataset.data_vars['chl']", + node_path=f"{DATASET_ROOT_NAME}.data_vars['chl']", rule_id="test/data-var-dim-must-have-coord", severity=2, ), @@ -320,7 +377,7 @@ def test_linter_real_life_scenario(self): "variable 'tsm' is missing a " "coordinate variable" ), - node_path="dataset.data_vars['tsm']", + node_path=f"{DATASET_ROOT_NAME}.data_vars['tsm']", rule_id="test/data-var-dim-must-have-coord", severity=2, ), @@ -328,6 +385,9 @@ def test_linter_real_life_scenario(self): ), result, ) + self.assertEqual(1, result.warning_count) + self.assertEqual(3, result.error_count) + self.assertEqual(0, result.fatal_error_count) def test_processor_ok(self): result = self.linter.validate( @@ -342,13 +402,13 @@ def test_processor_ok(self): [ Message( message="Dataset does not have data variables", - node_path="dataset[0]", + node_path=f"{DATASET_ROOT_NAME}[0]", rule_id="test/dataset-without-data-vars", severity=1, ), Message( message="Dataset does not have data variables", - node_path="dataset[1]", + node_path=f"{DATASET_ROOT_NAME}[1]", rule_id="test/dataset-without-data-vars", severity=1, ), @@ -371,7 +431,7 @@ def test_processor_fail(self): message="bad checksum", severity=2, fatal=True, - node_path=NODE_ROOT_NAME, + node_path=DATASET_ROOT_NAME, ) ], result.messages, diff --git a/tests/test_result.py b/tests/test_result.py index 722f571..506cf3c 100644 --- a/tests/test_result.py +++ b/tests/test_result.py @@ -32,24 +32,24 @@ class MyRule2(RuleOp): config_obj = ConfigObject(plugins={"test": plugin}) rules_meta = get_rules_meta_for_results( results=[ - Result.new( - config_object=config_obj, + Result( file_path="test.zarr", + config_object=config_obj, messages=[Message(message="m 1", rule_id="test/my-rule-1")], ), - Result.new( - config_object=config_obj, + Result( file_path="test.zarr", + config_object=config_obj, messages=[Message(message="m 2", rule_id="test/my-rule-2")], ), - Result.new( - config_object=config_obj, + Result( file_path="test.zarr", + config_object=config_obj, messages=[Message(message="m 3", rule_id="test/my-rule-1")], ), - Result.new( - config_object=config_obj, + Result( file_path="test.zarr", + config_object=config_obj, messages=[Message(message="m 4", rule_id="test/my-rule-2")], ), ] @@ -66,9 +66,9 @@ class MyRule2(RuleOp): ) def test_repr_html(self): - result = Result.new( - config_object=ConfigObject(), + result = Result( file_path="test.zarr", + config_object=ConfigObject(), messages=[], ) html = result._repr_html_() @@ -76,9 +76,9 @@ def test_repr_html(self): self.assertIn("ok", html) self.assertIn(" dict[str, Any]: return self._config.settings or {} @property - def dataset(self) -> xr.Dataset: + def datatree(self) -> xr.DataTree | None: + return self._datatree + + @property + def dataset(self) -> xr.Dataset | None: return self._dataset + @dataset.setter + def dataset(self, value: xr.Dataset) -> None: + self._dataset = value + @property def file_path(self) -> str: return self._file_path @@ -76,7 +94,7 @@ def report( fatal=fatal, suggestions=suggestions, rule_id=self.rule_id, - node_path=self.node.path if self.node is not None else NODE_ROOT_NAME, + node_path=self.node.path if self.node is not None else DATASET_ROOT_NAME, severity=self.severity, ) self.messages.append(m) diff --git a/xrlint/_linter/validate.py b/xrlint/_linter/validate.py index 8c962ac..42164d7 100644 --- a/xrlint/_linter/validate.py +++ b/xrlint/_linter/validate.py @@ -10,7 +10,7 @@ from xrlint.config import ConfigObject from xrlint.result import Message, Result -from ..constants import NODE_ROOT_NAME +from ..constants import DATASET_ROOT_NAME from .apply import apply_rule from .rulectx import RuleContextImpl @@ -19,22 +19,22 @@ def validate_dataset(config_obj: ConfigObject, dataset: Any, file_path: str): assert isinstance(config_obj, ConfigObject) assert dataset is not None assert isinstance(file_path, str) - if isinstance(dataset, xr.Dataset): + if isinstance(dataset, (xr.Dataset, xr.DataTree)): messages = _validate_dataset(config_obj, dataset, file_path, None, None) else: messages = _open_and_validate_dataset(config_obj, dataset, file_path) - return Result.new(config_object=config_obj, messages=messages, file_path=file_path) + return Result(file_path=file_path, config_object=config_obj, messages=messages) def _validate_dataset( config_obj: ConfigObject, - dataset: xr.Dataset, + dataset: xr.Dataset | xr.DataTree, file_path: str, file_index: int | None, access_latency: float | None, ) -> list[Message]: assert isinstance(config_obj, ConfigObject) - assert isinstance(dataset, xr.Dataset) + assert isinstance(dataset, (xr.Dataset, xr.DataTree)) assert isinstance(file_path, str) context = RuleContextImpl( @@ -70,12 +70,12 @@ def _open_and_validate_dataset( file_path, ) else: - t0 = time.time() try: - dataset = _open_dataset(ds_source, opener_options, file_path) + dataset, access_latency = _open_dataset( + ds_source, opener_options, file_path + ) except (OSError, ValueError, TypeError) as e: return [new_fatal_message(str(e))] - access_latency = time.time() - t0 with dataset: return _validate_dataset( config_obj, dataset, file_path, None, access_latency @@ -84,12 +84,24 @@ def _open_and_validate_dataset( def _open_dataset( ds_source: Any, opener_options: dict[str, Any] | None, file_path: str -) -> xr.Dataset: +) -> tuple[xr.Dataset | xr.DataTree, float]: """Open a dataset.""" engine = opener_options.pop("engine", None) if engine is None and (file_path.endswith(".zarr") or file_path.endswith(".zarr/")): engine = "zarr" - return xr.open_dataset(ds_source, engine=engine, **(opener_options or {})) + try: + t0 = time.time() + result = xr.open_datatree(ds_source, engine=engine, **(opener_options or {})) + # When opening no-group Zarr datasets we get with xarray 2025.1.2: + # + # File "<...>/site-packages/xarray/backends/zarr.py", line 741, in __init__ + # self._read_only = self.zarr_group.read_only + # ^^^^^^^^^^^^^^^^^^^^^^^^^ + # AttributeError: 'NoneType' object has no attribute 'read_only' + except (OSError, ValueError, TypeError, AttributeError): + t0 = time.time() + result = xr.open_dataset(ds_source, engine=engine, **(opener_options or {})) + return result, time.time() - t0 def new_fatal_message(message: str) -> Message: @@ -97,5 +109,5 @@ def new_fatal_message(message: str) -> Message: message=message, fatal=True, severity=2, - node_path=NODE_ROOT_NAME, + node_path=DATASET_ROOT_NAME, ) diff --git a/xrlint/constants.py b/xrlint/constants.py index 5eff952..1043cea 100644 --- a/xrlint/constants.py +++ b/xrlint/constants.py @@ -7,7 +7,9 @@ CORE_PLUGIN_NAME: Final = "__core__" CORE_DOCS_URL = "https://bcdev.github.io/xrlint/rule-ref" -NODE_ROOT_NAME: Final = "dataset" +DATATREE_ROOT_NAME: Final = "dt" +DATASET_ROOT_NAME: Final = "ds" +MISSING_DATATREE_FILE_PATH: Final = "" MISSING_DATASET_FILE_PATH: Final = "" SEVERITY_ERROR: Final = 2 diff --git a/xrlint/linter.py b/xrlint/linter.py index e6e8d3c..b7422b7 100644 --- a/xrlint/linter.py +++ b/xrlint/linter.py @@ -12,7 +12,7 @@ from xrlint.result import Result from ._linter.validate import new_fatal_message, validate_dataset -from .constants import MISSING_DATASET_FILE_PATH +from .constants import MISSING_DATASET_FILE_PATH, MISSING_DATATREE_FILE_PATH def new_linter(*configs: ConfigLike, **config_props: Any) -> "Linter": @@ -70,9 +70,10 @@ def validate( """Validate a dataset against applicable rules. Args: - dataset: The dataset. Can be a `xr.Dataset` instance - or a file path, or any dataset source that can be opened - using `xarray.open_dataset()`. + dataset: The dataset. Can be a `xr.Dataset` or `xr.DataTree` + instance or a file path, or any dataset source that can + be opened using `xarray.open_dataset()` + or `xarray.open_datatree()`. file_path: Optional file path used for formatting messages. Useful if `dataset` is not a file path. config: Optional configuration-like value. @@ -86,7 +87,7 @@ def validate( Result of the validation. """ if not file_path: - if isinstance(dataset, xr.Dataset): + if isinstance(dataset, (xr.Dataset, xr.DataTree)): file_path = file_path or _get_file_path_for_dataset(dataset) else: file_path = file_path or _get_file_path_for_source(dataset) @@ -94,8 +95,7 @@ def validate( config = Config.from_config(self._config, config, config_props) config_obj = config.compute_config_object(file_path) if config_obj is None or not config_obj.rules: - return Result.new( - config_object=None, + return Result( file_path=file_path, messages=[ new_fatal_message( @@ -107,11 +107,16 @@ def validate( return validate_dataset(config_obj, dataset, file_path) -def _get_file_path_for_dataset(dataset: xr.Dataset) -> str: +def _get_file_path_for_dataset(dataset: xr.Dataset | xr.DataTree) -> str: ds_source = dataset.encoding.get("source") - return _get_file_path_for_source(ds_source) + return _get_file_path_for_source( + ds_source, + MISSING_DATASET_FILE_PATH + if isinstance(dataset, xr.Dataset) + else MISSING_DATATREE_FILE_PATH, + ) -def _get_file_path_for_source(ds_source: Any) -> str: +def _get_file_path_for_source(ds_source: Any, default: str | None = None) -> str: file_path = str(ds_source) if isinstance(ds_source, (str, Path, PathLike)) else "" - return file_path or MISSING_DATASET_FILE_PATH + return file_path or (default or MISSING_DATASET_FILE_PATH) diff --git a/xrlint/node.py b/xrlint/node.py index 95ef739..9b190a4 100644 --- a/xrlint/node.py +++ b/xrlint/node.py @@ -38,10 +38,24 @@ def in_root(self) -> bool: return not self.in_coords() and not self.in_data_vars() +@dataclass(frozen=True, kw_only=True) +class DataTreeNode(XarrayNode): + """DataTree node.""" + + name: Hashable + """The name of the datatree.""" + + datatree: xr.DataTree + """The `xarray.DataTree` instance.""" + + @dataclass(frozen=True, kw_only=True) class DatasetNode(XarrayNode): """Dataset node.""" + name: Hashable + """The name of the dataset.""" + dataset: xr.Dataset """The `xarray.Dataset` instance.""" diff --git a/xrlint/result.py b/xrlint/result.py index b1fac09..2138398 100644 --- a/xrlint/result.py +++ b/xrlint/result.py @@ -4,12 +4,12 @@ from collections.abc import Iterable from dataclasses import dataclass, field +from functools import cached_property from typing import TYPE_CHECKING, Literal, Union from xrlint.constants import ( CORE_DOCS_URL, CORE_PLUGIN_NAME, - MISSING_DATASET_FILE_PATH, SEVERITY_ERROR, SEVERITY_WARN, ) @@ -86,63 +86,41 @@ class Message(JsonSerializable): """ -@dataclass(kw_only=True) +@dataclass(kw_only=True, frozen=True) class Result(JsonSerializable): """The aggregated information of linting a dataset.""" - config_object: Union["ConfigObject", None] = None - """The configuration object that produced this result - together with `file_path`. - """ - - file_path: str = MISSING_DATASET_FILE_PATH + file_path: str """The absolute path to the file of this result. This is the string "" if the file path is unknown (when you didn't pass the `file_path` option to the `xrlint.lint_dataset()` method). """ + config_object: Union["ConfigObject", None] = None + """The configuration object that produced this result + together with `file_path`. + """ + messages: list[Message] = field(default_factory=list) """The array of message objects.""" - fixable_error_count: int = 0 - """The number of errors that can be fixed automatically - by the fix constructor option. - """ - - fixable_warning_count: int = 0 - """The number of warnings that can be fixed automatically - by the fix constructor option. - """ - - error_count: int = 0 - """The number of errors. This includes fixable errors - and fatal errors. - """ - - fatal_error_count: int = 0 - """The number of fatal errors.""" - - warning_count: int = 0 - """The number of warnings. This includes fixable warnings.""" - - @classmethod - def new( - cls, - config_object: Union["ConfigObject", None] = None, - file_path: str | None = None, - messages: list[Message] | None = None, - ): - result = Result( - config_object=config_object, - file_path=file_path or MISSING_DATASET_FILE_PATH, - messages=messages or [], - ) - for m in messages: - result.warning_count += 1 if m.severity == SEVERITY_WARN else 0 - result.error_count += 1 if m.severity == SEVERITY_ERROR else 0 - result.fatal_error_count += 1 if m.fatal else 0 - return result + @cached_property + def warning_count(self) -> int: + """The number of warnings. This includes fixable warnings.""" + return sum(1 if m.severity == SEVERITY_WARN else 0 for m in self.messages) + + @cached_property + def error_count(self) -> int: + """The number of errors. This includes fixable errors + and fatal errors. + """ + return sum(1 if m.severity == SEVERITY_ERROR else 0 for m in self.messages) + + @cached_property + def fatal_error_count(self) -> int: + """The number of fatal errors.""" + return sum(1 if m.fatal else 0 for m in self.messages) def to_html(self) -> str: from xrlint.formatters.html import format_result diff --git a/xrlint/rule.py b/xrlint/rule.py index 9906180..5317c5e 100644 --- a/xrlint/rule.py +++ b/xrlint/rule.py @@ -10,7 +10,7 @@ import xarray as xr from xrlint.constants import SEVERITY_ENUM, SEVERITY_ENUM_TEXT -from xrlint.node import AttrNode, AttrsNode, DatasetNode, VariableNode +from xrlint.node import AttrNode, AttrsNode, DataTreeNode, DatasetNode, VariableNode from xrlint.operation import Operation, OperationMeta from xrlint.result import Suggestion from xrlint.util.constructible import ValueConstructible @@ -86,6 +86,17 @@ class RuleExit(Exception): class RuleOp(ABC): """Define the specific rule validation operations.""" + def validate_datatree(self, ctx: RuleContext, node: DataTreeNode) -> None: + """Validate the given datatree node. + + Args: + ctx: The current rule context. + node: The datatree node. + + Raises: + RuleExit: to exit rule logic and further node traversal + """ + def validate_dataset(self, ctx: RuleContext, node: DatasetNode) -> None: """Validate the given dataset node.