diff --git a/Changelog.rst b/Changelog.rst index aabccd21a..c7c1fe8a8 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -21,6 +21,9 @@ Version NEXTVERSION * New optional backend for netCDF-3 in `cfdm.read` that allows parallel reading: ``netcdf_file`` (https://github.com/NCAS-CMS/cfdm/issues/375) +* Fix bug in `cfdm.netcdf_indexer` that sometimes caused a failure + with a `np.newaxis` index + (https://github.com/NCAS-CMS/cfdm/issues/395) * Fix bug in `cfdm.read` that wouldn't read non-Zarr and Zarr datasets from the same directory (https://github.com/NCAS-CMS/cfdm/issues/391) diff --git a/cfdm/__init__.py b/cfdm/__init__.py index 2c1a636b3..54a282946 100644 --- a/cfdm/__init__.py +++ b/cfdm/__init__.py @@ -57,6 +57,7 @@ RTOL, abspath, atol, + axis_dropping_index, chunksize, configuration, dirname, diff --git a/cfdm/data/data.py b/cfdm/data/data.py index aedcf594b..b8eaea3b9 100644 --- a/cfdm/data/data.py +++ b/cfdm/data/data.py @@ -3,7 +3,6 @@ import operator from itertools import product, zip_longest from math import prod -from numbers import Integral from os.path import commonprefix import numpy as np @@ -19,6 +18,7 @@ _DEPRECATION_ERROR_KWARGS, _DEPRECATION_ERROR_METHOD, _numpy_allclose, + axis_dropping_index, display_data, is_log_level_info, parse_indices, @@ -783,7 +783,7 @@ def __getitem__(self, indices): new_axes = [ axis for axis, x in zip(self._axes, indices) - if not isinstance(x, Integral) and getattr(x, "shape", True) + if not axis_dropping_index(x) and getattr(x, "shape", True) ] new._axes = new_axes diff --git a/cfdm/data/mixin/indexmixin.py b/cfdm/data/mixin/indexmixin.py index 0807fec84..e7eb8f4e1 100644 --- a/cfdm/data/mixin/indexmixin.py +++ b/cfdm/data/mixin/indexmixin.py @@ -1,8 +1,6 @@ -from numbers import Integral - import numpy as np -from cfdm.functions import indices_shape, parse_indices +from cfdm.functions import axis_dropping_index, indices_shape, parse_indices class IndexMixin: @@ -127,7 +125,7 @@ def __getitem__(self, index): i = 0 j = 0 for ind0, reference_size in zip(index0, reference_shape[:]): - if isinstance(ind0, Integral): + if axis_dropping_index(ind0): # The previous call to __getitem__ resulted in a # dimension being removed (i.e. 'ind0' is # integer-valued). Therefore 'index1' must have fewer @@ -142,7 +140,7 @@ def __getitem__(self, index): i += 1 if ind0 is newaxis: - if isinstance(ind1, Integral): + if axis_dropping_index(ind1): # A previously introduced new axis is being # removed by an integer index if ind1 not in (0, -1): diff --git a/cfdm/data/netcdfindexer.py b/cfdm/data/netcdfindexer.py index fdd491ec1..57d1da857 100644 --- a/cfdm/data/netcdfindexer.py +++ b/cfdm/data/netcdfindexer.py @@ -22,10 +22,11 @@ import logging from math import prod -from numbers import Integral import numpy as np +from cfdm.functions import axis_dropping_index + logger = logging.getLogger(__name__) @@ -230,39 +231,51 @@ def __getitem__(self, index): # ------------------------------------------------------------ # Index the variable # ------------------------------------------------------------ - try: - data = self._index(index) - except (IndexError, AttributeError): - # Assume we are here because we have one or more - # np.newaxis values in 'index', and the variable doesn't - # support that type of indexing. It is known that - # `netCDF4` and `zarr` raise an IndexError and `h5netcdf` - # raises an AttributeError. - - # Subspace the variable with the np.newaxis elements - # removed + + # Create the index without any new-axis elements. We'll first + # subspace the variable without new axes (given that some + # variables don't like them, such as `h5py.Variable`), and + # reinstate them (if any) on the `numpy` array later. + # + # E.g. index : (1, np.newaxis, slice(1, 5)) + # => index1: (1, slice(1, 5)) + index1 = index + new_axes = False + if index1 is not Ellipsis: + if not isinstance(index, tuple): + index = (index,) + newaxis = np.newaxis - index1 = [i for i in index if i is not newaxis] - data = self._index(tuple(index1)) - - # Now subspace the result (which we're assuming is - # something that likes np.newaxis indices) with the - # np.newaxis elements reinstated. - index2 = [i if i is newaxis else slice(None) for i in index] - data = self._index(tuple(index2), data=data) - - # E.g. index : (1, np.newaxis, slice(1, 5)) - # => index1: (1, slice(1, 5)) - # and index2: (slice(None), np.newaxis, slice(None)) - except ValueError: - # Something went wrong, which is indicative of the - # variable not supporting the appropriate slicing method - # (e.g. `h5netcdf` might have returned "ValueError: Step - # must be >= 1 (got -2)"). Therefore we'll just get the - # entire array as a numpy array, and then try indexing - # that. + index1 = tuple([i for i in index if i is not newaxis]) + new_axes = len(index1) < len(index) + + try: + # Subspace with any new-axis elements removed + data = self._index(index1) + except Exception: + # Something went wrong. Therefore we'll just get the + # entire array as a numpy array, and try subspacing that. data = self._index(Ellipsis) data = self._index(index, data=data) + else: + if new_axes: + # There were new-axis elements in the original index, + # so apply them to the data. + # + # E.g. index : (1, np.newaxis, slice(1, 5)) + # => index1: (1, slice(1, 5)) + # => index2: (np.newaxis, slice(None)) + index2 = [] + for i in index: + if axis_dropping_index(i): + continue + + if i is not newaxis: + i = slice(None) + + index2.append(i) + + data = self._index(tuple(index2), data=data) # Reset a netCDF4 variable's scale and mask behaviour if netCDF4_scale: @@ -474,7 +487,7 @@ def _index(self, index, data=None): # so that their axes are not dropped yet (they will be dropped # later). index0 = [ - slice(i, i + 1) if isinstance(i, Integral) else i for i in index + slice(i, i + 1) if axis_dropping_index(i) else i for i in index ] if data_orthogonal_indexing or len(axes_with_list_indices) <= 1: @@ -532,7 +545,7 @@ def _index(self, index, data=None): data = data[tuple(index2)] # Apply any integer indices that will drop axes - index3 = [0 if isinstance(i, Integral) else slice(None) for i in index] + index3 = [0 if axis_dropping_index(i) else slice(None) for i in index] if index3: data = data[tuple(index3)] @@ -1013,7 +1026,7 @@ def index_shape(cls, index, shape): # List of int size = len(ind) else: - # Index is Integral + # Index is axis-dropping continue implied_shape.append(size) diff --git a/cfdm/data/pyfivearray.py b/cfdm/data/pyfivearray.py index e04134990..4d86d7f63 100644 --- a/cfdm/data/pyfivearray.py +++ b/cfdm/data/pyfivearray.py @@ -82,9 +82,6 @@ def _get_array(self, index=None): # Cache the variable self._set_component("variable", variable, copy=False) - self.close(dataset0) - del dataset, dataset0 - # Get the data, applying masking and scaling as required. array = netcdf_indexer( variable, diff --git a/cfdm/functions.py b/cfdm/functions.py index a00036d33..20af91084 100644 --- a/cfdm/functions.py +++ b/cfdm/functions.py @@ -2486,6 +2486,53 @@ def indices_shape(indices, full_shape, keepdims=True): return shape +def axis_dropping_index(index): + """Whether a `numpy` index is axis-dropping. + + An axis-dropping index is typically integer-like. + + .. versionadded:: (cfdm) NEXTVERSION + + :Parameters: + + index: + The `numpy` index. + + :Returns: + + `bool` + `True` if the index would drop an axis during `numpy` + slicing, otherwise `False`. + + **Examples** + + >>> axis_dropping_index(2) + True + >>> axis_dropping_index(np.array(2)) + True + >>> axis_dropping_index(np.int64(2)) + True + >>> axis_dropping_index([2]) + False + >>> axis_dropping_index(np.array([2])) + False + >>> axis_dropping_index(slice(None)) + False + >>> axis_dropping_index([True, False]) + False + + """ + # Standard Python integer and numpy integer scalar + if isinstance(index, (Integral, np.integer)): + return True + + # 0-d numpy array + if isinstance(index, np.ndarray) and not index.ndim: + return np.issubdtype(index.dtype, np.integer) + + return False + + def parse_indices(shape, indices, keepdims=True, newaxis=False): """Parse indices for array access and assignment. @@ -2581,12 +2628,13 @@ def parse_indices(shape, indices, keepdims=True, newaxis=False): "New axis indices are not allowed" ) - # Check that any integer indices are in range for the dimension sizes - # before integral indices are converted to slices below, for (one for) - # consistent behaviour between setitem and getitem. Note out-of-range - # slicing works in Python generally (slices are allowed to extend past - # end points with clipping applied) so we allow those. - integral_index = isinstance(index, Integral) + # Check that any integer indices are in range for the + # dimension sizes before integral indices are converted to + # slices below, for (one for) consistent behaviour between + # setitem and getitem. Note out-of-range slicing works in + # Python generally (slices are allowed to extend past end + # points with clipping applied) so we allow those. + integral_index = axis_dropping_index(index) if integral_index and not -size <= index < size: # could be negative raise IndexError( f"Index {index!r} is out of bounds for axis {i} with " diff --git a/cfdm/test/test_functions.py b/cfdm/test/test_functions.py index 8e2f94631..883b6eecd 100644 --- a/cfdm/test/test_functions.py +++ b/cfdm/test/test_functions.py @@ -780,6 +780,14 @@ def test_dirname(self): "/data", ) + def test_axis_dropping_index(self): + """Test cfdm.axis_dropping_index.""" + for i in (2, np.array(2), np.int64(2), np.int32(2)): + self.assertTrue(cfdm.axis_dropping_index(i)) + + for i in ([2], np.array([2]), slice(None), [True, False]): + self.assertFalse(cfdm.axis_dropping_index(i)) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cfdm/test/test_netcdf_indexer.py b/cfdm/test/test_netcdf_indexer.py index 4bbd209b3..c559fa02d 100644 --- a/cfdm/test/test_netcdf_indexer.py +++ b/cfdm/test/test_netcdf_indexer.py @@ -238,6 +238,27 @@ def test_netcdf_indexer_index_shape(self): ) self.assertEqual(x.index_shape((slice(1, 5, -3), 3), (10, 20)), [0]) + def test_netcdf_indexer_newaxis(self): + """Test netcdf_indexer with np.newaxis indices.""" + a = np.arange(12).reshape(3, 4) + v = cfdm.netcdf_indexer(a) + self.assertEqual(v[...].shape, (3, 4)) + self.assertEqual(v[:2, :3].shape, (2, 3)) + self.assertEqual(v[:2, np.newaxis, :3].shape, (2, 1, 3)) + self.assertEqual(v[1, :3].shape, (3,)) + self.assertEqual(v[1, np.newaxis, :3].shape, (1, 3)) + + # Test with netCDF backends + for klass in (cfdm.H5netcdfArray, cfdm.NetCDF4Array, cfdm.PyfiveArray): + k = klass("example_field_0.nc", "time", shape=()) + dataset, address = k.open() + variable = dataset[address] + v = cfdm.netcdf_indexer(variable) + a = v[...] + self.assertEqual(a.ndim, 0) + b = v[(np.newaxis,)] + self.assertEqual(b.ndim, 1) + if __name__ == "__main__": print("Run date:", datetime.datetime.now())