diff --git a/docs/src/common_links.inc b/docs/src/common_links.inc index 0f31261131..f84b8bf1ba 100644 --- a/docs/src/common_links.inc +++ b/docs/src/common_links.inc @@ -83,5 +83,6 @@ .. _@tkknight: https://github.com/tkknight .. _@trexfeathers: https://github.com/trexfeathers .. _@ukmo-ccbunney: https://github.com/ukmo-ccbunney +.. _@vsherratt: https://github.com/vsherratt .. _@wjbenfold: https://github.com/wjbenfold .. _@zklaus: https://github.com/zklaus diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index f1d8547c75..4988eca6e0 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -53,6 +53,10 @@ This document explains the changes made to Iris for this release #. `@HGWright`_ added to the Nimrod loader to expand the types of Nimrod files it can load. This includes selecting which Nimrod table to use the data entry headers from. (:issue:`4505`, :pull:`6763`) +#. `@stephenworsley`_ added the coordinate method :meth:`~iris.coords.Coord.as_string_arrays` + and the class :class:`~iris.coords.PointBoundStrings` to help represent points and + bounds data on coordinates. This adapts code suggestions by `@rcomer`_ and `@vsherratt`_. (:issue:`4508`, :pull:`6978`) + 🐛 Bugs Fixed ============= diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 044dc21f0f..9593457d28 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -45,6 +45,57 @@ DEFAULT_IGNORE_AXIS = False +class PointBoundStrings: + """Class for representing formatted string arrays of points and bounds.""" + + def __init__(self, core_points, core_bounds, units, fmt=None): + """Construct an object for formatting points and bounds as string arrays.""" + self._core_points = core_points + self._core_bounds = core_bounds + self._units = units + self._points = None + self._bounds = None + self.fmt = fmt + + @property + def points(self): + """Format the points as a string array.""" + if self._points is None: + points = _lazy.as_concrete_data(self._core_points) + if self._units.is_time_reference(): + points = self._units.num2date(points) + if self.fmt: + self._points = np.vectorize(lambda x: format(x, self.fmt))(points) + else: + self._points = points.astype("str") + self._core_points = None + return self._points + + @property + def bounds(self): + """Format the bounds as a string array.""" + if self._bounds is None: + if self._core_bounds is not None: + bounds = _lazy.as_concrete_data(self._core_bounds) + if self._units.is_time_reference(): + bounds = self._units.num2date(bounds) + if self.fmt: + self._bounds = np.vectorize(lambda x: format(x, self.fmt))(bounds) + else: + self._bounds = bounds.astype("str") + self._core_bounds = None + return self._bounds + + def __str__(self): + """Format the points and bounds as a string.""" + output = ["Points:", np.array2string(self.points)] + if self.bounds is not None: + output.extend(["Bounds:", np.array2string(self.bounds)]) + else: + output.extend(["Bounds:", "None"]) + return "\n".join(output) + + class _DimensionalMetadata(CFVariableMixin, metaclass=ABCMeta): """Superclass for dimensional metadata.""" @@ -2651,6 +2702,26 @@ def _xml_id_extra(self, unique_value): unique_value += str(self.coord_system).encode("utf-8") + b"\0" return unique_value + def as_string_arrays(self, fmt=None): + """Access a formatted array of strings from the points and bounds. + + Will return a :class:`~iris.coords.PointBoundString`. This can either be + converted directly to a string, or numpy string arrays for the points and + bounds can be accessed via the `points` and `bounds` properties. These + properties are designed to be only calculated when they are called and any + lazy points and bounds on the coordinate will remain lazy. + + Parameters + ---------- + fmt : str, optional + The format string to be applied when converting to a string. If the + coordinate contains datetime information, the points and bounds will + be converted to datetimes before being formatted to strings. + """ + return PointBoundStrings( + self.core_points(), self.core_bounds(), self.units, fmt=fmt + ) + _regular_points = lru_cache(iris.util.regular_points) """Caching version of iris.util.regular_points""" diff --git a/lib/iris/tests/unit/coords/test_PointBoundStrings.py b/lib/iris/tests/unit/coords/test_PointBoundStrings.py new file mode 100644 index 0000000000..9e54c1028a --- /dev/null +++ b/lib/iris/tests/unit/coords/test_PointBoundStrings.py @@ -0,0 +1,83 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Unit tests for the :class:`iris.coords.PointBoundString class.""" + +from cf_units import Unit +import dask.array as da +import numpy as np + +from iris._lazy_data import is_lazy_data +from iris.coords import AuxCoord, PointBoundStrings +from iris.tests._shared_utils import assert_array_equal + + +def test_PointBoundStrings_lazy(): + lazy_points = da.arange(5, dtype=np.float64) + lazy_bounds = da.arange(10, dtype=np.float64).reshape([5, 2]) + + lazy_coord = AuxCoord(lazy_points, bounds=lazy_bounds, standard_name="latitude") + + fmt = ".0f" + pbs = lazy_coord.as_string_arrays(fmt) + assert is_lazy_data(pbs._core_bounds) + assert pbs._bounds is None + + expected_bounds = "[['0' '1']\n ['2' '3']\n ['4' '5']\n ['6' '7']\n ['8' '9']]" + assert np.array2string(pbs.bounds) == expected_bounds + assert pbs._core_bounds is None + assert pbs._bounds is pbs.bounds + assert is_lazy_data(pbs._core_points) + assert pbs._points is None + + assert lazy_coord.has_lazy_points() + assert lazy_coord.has_lazy_bounds() + + +def test_PointBoundStrings_no_bounds(): + points = np.arange(5, dtype=np.float64) + + coord = AuxCoord(points, standard_name="latitude") + pbs = coord.as_string_arrays() + + expected_output = "Points:\n['0.0' '1.0' '2.0' '3.0' '4.0']\nBounds:\nNone" + assert str(pbs) == expected_output + + expected_points = np.array(["0.0", "1.0", "2.0", "3.0", "4.0"]) + assert_array_equal(pbs.points, expected_points) + + assert pbs.bounds is None + + +def test_PointBoundStrings_time_coord(): + time_unit = Unit("days since epoch") + points = np.arange(5) + bounds = np.arange(10).reshape([5, 2]) + + pbs_unformatted = PointBoundStrings(points, bounds, time_unit) + expected_unformatted = ( + "Points:\n" + "['1970-01-01 00:00:00' '1970-01-02 00:00:00' '1970-01-03 00:00:00'\n" + " '1970-01-04 00:00:00' '1970-01-05 00:00:00']\n" + "Bounds:\n" + "[['1970-01-01 00:00:00' '1970-01-02 00:00:00']\n" + " ['1970-01-03 00:00:00' '1970-01-04 00:00:00']\n" + " ['1970-01-05 00:00:00' '1970-01-06 00:00:00']\n" + " ['1970-01-07 00:00:00' '1970-01-08 00:00:00']\n" + " ['1970-01-09 00:00:00' '1970-01-10 00:00:00']]" + ) + assert str(pbs_unformatted) == expected_unformatted + fmt = "%Y-%m-%d" + pbs_formatted = PointBoundStrings(points, bounds, time_unit, fmt=fmt) + expected_formatted = ( + "Points:\n" + "['1970-01-01' '1970-01-02' '1970-01-03' '1970-01-04' '1970-01-05']\n" + "Bounds:\n" + "[['1970-01-01' '1970-01-02']\n" + " ['1970-01-03' '1970-01-04']\n" + " ['1970-01-05' '1970-01-06']\n" + " ['1970-01-07' '1970-01-08']\n" + " ['1970-01-09' '1970-01-10']]" + ) + assert str(pbs_formatted) == expected_formatted