Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ Version NEXTVERSION

**2026-??-??**

* Read Kerchunk datasets with `cf.read`
(https://github.com/NCAS-CMS/cf-python/936)
* Read open file handle datasets with `cf.read`
(https://github.com/NCAS-CMS/cf-python/issues/937)
* Support for HEALPix grids
(https://github.com/NCAS-CMS/cf-python/issues/909)
* New HEALPix methods: `cf.Field.healpix_info`,
Expand Down
7 changes: 4 additions & 3 deletions cf/read_write/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
class read(cfdm.read):
"""Read field or domain constructs from files.

The following file formats are supported: netCDF, CDL, Zarr, PP,
and UM fields file.
The following file formats are supported: netCDF, CDL, Zarr,
Kerchunk, PP, and UM fields file.

NetCDF and Zarr datasets may be on local disk, on an OPeNDAP
server, or in an S3 object store.
Expand Down Expand Up @@ -144,7 +144,7 @@ class read(cfdm.read):

:Parameters:

{{read datasets: (arbitrarily nested sequence of) `str`}}
{{read datasets:}}

{{read recursive: `bool`, optional}}

Expand All @@ -162,6 +162,7 @@ class read(cfdm.read):
``'netCDF'`` A netCDF-3 or netCDF-4 dataset
``'CDL'`` A text CDL file of a netCDF dataset
``'Zarr'`` A Zarr v2 (xarray) or Zarr v3 dataset
``'Kerchunk'`` A Kerchunked dataset
``'UM'`` A UM fields file or PP dataset
============== ==========================================

Expand Down
110 changes: 47 additions & 63 deletions cf/read_write/um/umread.py
Original file line number Diff line number Diff line change
Expand Up @@ -3549,6 +3549,13 @@ def read(
"(only Field constructs)"
)

representation = self.dataset_representation(dataset)
if representation != "path":
raise NotImplementedError(
"Can't yet read Field constructs from a UM or PP "
f"{representation!r} dataset: {dataset!r}"
)

if not _stash2standard_name:
# --------------------------------------------------------
# Create the STASH code to standard_name conversion
Expand Down Expand Up @@ -3605,75 +3612,17 @@ def read(
# Return now if there are valid file types
return []

# Parse the 'storage_options' keyword parameter
if storage_options is None:
storage_options = {}
elif filesystem is not None:
raise ValueError(
"Can't set both storage_options and filesystem keywords"
if storage_options is not None:
raise NotImplementedError(
"Can't yet open PP/UM files with file system storage options"
)

storage_protocol = None

if filesystem is not None:
# --------------------------------------------------------
# A pre-authenticated filesystem was provided: open the
# dataset as a file-like object and pass it to the backend.
# --------------------------------------------------------
raise NotImplementedError(
"Can't yet open PP/UM files from a remote file system"
"Can't yet open PP/UM files from a pre-defined file system"
)

try:
dataset = filesystem.open(dataset, "rb")
except AttributeError:
raise AttributeError(
f"The 'filesystem' object {filesystem!r} does not have "
"an 'open' method. Please provide a valid filesystem "
"object (e.g. an fsspec filesystem instance)."
)
except Exception as exc:
raise OSError(
f"Failed to open {dataset!r} using the provided "
f"'filesystem' object {filesystem!r}: {exc}"
) from exc

else:
from uritools import urisplit

u = urisplit(dataset)
if u.scheme == "s3":
# ----------------------------------------------------
# Dataset is an s3://... string.
# ----------------------------------------------------
raise NotImplementedError(
"Can't yet open PP/UM files from an s3 object store"
)

import fsspec

client_kwargs = storage_options.get("client_kwargs", {})
if (
"endpoint_url" not in storage_options
and "endpoint_url" not in client_kwargs
):
authority = u.authority
if not authority:
authority = ""

storage_options["endpoint_url"] = f"https://{authority}"

filesystem = fsspec.filesystem(
protocol=u.scheme, **storage_options
)
dataset = filesystem.open(u.path[1:], "rb")

if not storage_options:
storage_options = None

if filesystem is not None:
storage_protocol = filesystem.protocol
storage_options = filesystem.storage_options
storage_protocol = None

f = self.dataset_open(dataset, parse=True)

Expand Down Expand Up @@ -3835,6 +3784,41 @@ def dataset_open(self, filename, parse=True):
parse=parse,
)

@classmethod
def dataset_representation(cls, dataset):
"""Return the logical representation type of the input dataset.

.. versionadded:: NEXTVERSION

:Parameters:

dataset:
The dataset. May be a string-valued path or a
file-like object.

:Returns:

`str`
The dataset representation:

* ``'path'``: A string-valued path.

* ``'file_handle'``: An open file handle (such as
returned by `fsspec.filesystem.open`)

* ``'unknown'``: Anything else.

"""
# Strings (Paths)
if isinstance(dataset, str):
return "path"

# Check for a "binary stream" (file handle)
if hasattr(dataset, "read") and hasattr(dataset, "seek"):
return "file_handle"

return "unknown"


"""
Problems:
Expand Down
145 changes: 145 additions & 0 deletions cf/test/create_test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -2228,6 +2228,150 @@ def _make_ugrid_2(filename):
return filename


def _make_ugrid_3(filename):
"""Create a UGRID mesh topology and no fields/domains."""
n = netCDF4.Dataset(filename, "w")

n.Conventions = f"CF-{VN}"

n.createDimension("nMesh3_node", 7)
n.createDimension("nMesh3_edge", 9)
n.createDimension("nMesh3_face", 3)
n.createDimension("connectivity2", 2)
n.createDimension("connectivity4", 4)
n.createDimension("connectivity5", 5)

Mesh3 = n.createVariable("Mesh3", "i4", ())
Mesh3.cf_role = "mesh_topology"
Mesh3.topology_dimension = 2
Mesh3.node_coordinates = "Mesh3_node_x Mesh3_node_y"
Mesh3.face_node_connectivity = "Mesh3_face_nodes"
Mesh3.edge_node_connectivity = "Mesh3_edge_nodes"
Mesh3.face_dimension = "nMesh3_face"
Mesh3.edge_dimension = "nMesh3_edge"
Mesh3.face_face_connectivity = "Mesh3_face_links"
Mesh3.edge_edge_connectivity = "Mesh3_edge_links"

# Node
Mesh3_node_x = n.createVariable("Mesh3_node_x", "f4", ("nMesh3_node",))
Mesh3_node_x.standard_name = "longitude"
Mesh3_node_x.units = "degrees_east"
Mesh3_node_x[...] = [-45, -43, -45, -43, -45, -43, -40]

Mesh3_node_y = n.createVariable("Mesh3_node_y", "f4", ("nMesh3_node",))
Mesh3_node_y.standard_name = "latitude"
Mesh3_node_y.units = "degrees_north"
Mesh3_node_y[...] = [35, 35, 33, 33, 31, 31, 34]

Mesh3_edge_nodes = n.createVariable(
"Mesh3_edge_nodes", "i4", ("nMesh3_edge", "connectivity2")
)
Mesh3_edge_nodes.long_name = "Maps every edge to its two nodes"
Mesh3_edge_nodes[...] = [
[1, 6],
[3, 6],
[3, 1],
[0, 1],
[2, 0],
[2, 3],
[2, 4],
[5, 4],
[3, 5],
]

# Face
Mesh3_face_x = n.createVariable(
"Mesh3_face_x", "f8", ("nMesh3_face",), fill_value=-99
)
Mesh3_face_x.standard_name = "longitude"
Mesh3_face_x.units = "degrees_east"
Mesh3_face_x[...] = [-44, -44, -42]

Mesh3_face_y = n.createVariable(
"Mesh3_face_y", "f8", ("nMesh3_face",), fill_value=-99
)
Mesh3_face_y.standard_name = "latitude"
Mesh3_face_y.units = "degrees_north"
Mesh3_face_y[...] = [34, 32, 34]

Mesh3_face_nodes = n.createVariable(
"Mesh3_face_nodes",
"i4",
("nMesh3_face", "connectivity4"),
fill_value=-99,
)
Mesh3_face_nodes.long_name = "Maps every face to its corner nodes"
Mesh3_face_nodes[...] = [[2, 3, 1, 0], [4, 5, 3, 2], [6, 1, 3, -99]]

Mesh3_face_links = n.createVariable(
"Mesh3_face_links",
"i4",
("nMesh3_face", "connectivity4"),
fill_value=-99,
)
Mesh3_face_links.long_name = "neighbour faces for faces"
Mesh3_face_links[...] = [
[1, 2, -99, -99],
[0, -99, -99, -99],
[0, -99, -99, -99],
]

# Edge
Mesh3_edge_x = n.createVariable(
"Mesh3_edge_x", "f8", ("nMesh3_edge",), fill_value=-99
)
Mesh3_edge_x.standard_name = "longitude"
Mesh3_edge_x.units = "degrees_east"
Mesh3_edge_x[...] = [-41.5, -41.5, -43, -44, -45, -44, -45, -44, -43]

Mesh3_edge_y = n.createVariable(
"Mesh3_edge_y", "f8", ("nMesh3_edge",), fill_value=-99
)
Mesh3_edge_y.standard_name = "latitude"
Mesh3_edge_y.units = "degrees_north"
Mesh3_edge_y[...] = [34.5, 33.5, 34, 35, 34, 33, 32, 31, 32]

Mesh3_edge_links = n.createVariable(
"Mesh3_edge_links",
"i4",
("nMesh3_edge", "connectivity5"),
fill_value=-99,
)
Mesh3_edge_links.long_name = "neighbour edges for edges"
Mesh3_edge_links[...] = [
[1, 2, 3, -99, -99],
[0, 2, 5, 8, -99],
[3, 0, 1, 5, 8],
[4, 2, 0, -99, -99],
[
3,
5,
6,
-99,
-99,
],
[4, 6, 2, 1, 8],
[
4,
5,
7,
-99,
-99,
],
[
6,
8,
-99,
-99,
-99,
],
[7, 5, 2, 1, -99],
]

n.close()
return filename


def _make_aggregation_value(filename):
"""Create an aggregation variable with 'unique_values'."""
n = netCDF4.Dataset(filename, "w")
Expand Down Expand Up @@ -2341,6 +2485,7 @@ def _make_aggregation_value(filename):

ugrid_1 = _make_ugrid_1("ugrid_1.nc")
ugrid_2 = _make_ugrid_2("ugrid_2.nc")
ugrid_3 = _make_ugrid_3("ugrid_3.nc")

aggregation_value = _make_aggregation_value("aggregation_value.nc")

Expand Down
1 change: 1 addition & 0 deletions cf/test/example_field_0.kerchunk
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"version":1,"refs":{"lat\/0":"base64:eF5jYMABDjQwNBwIcmNwCzpwgMHBAQAxqAWx","lat_bnds\/0.0":"base64:eF5jYCAVNMAZYX5+dkDg5xd2AAQcQAAAZ+II3Q==","lon\/0":"base64:eF5jYCAONDxQ2BDwwUPCLCAmOTOvqNQBCgB9ngjU","lon_bnds\/0.0":"base64:eF5jYKAyaACCBw8aGhQUHjzYsKGBwc0tLCwhIS0tJ6egoLi4jMEBFQAA\/noSOQ==","q\/0.0":["example_field_0.nc",17755,100],"q\/0.1":"base64:eF6z7v4lGfX4l4\/QxJO77ufUsaQsrTNvflvm6f88K8x6dlbngZp9PL2\/9\/DdP7LH8ZT9fKbpK0t0\/RVKEhaaSmnnfZy8qXfD5KeucfrtkxbNmLt8U8WGbbvWttkjAQAjCS4Q","q\/1.0":"base64:eF6TzNB+88v69hsGIGCZ2B5Yd\/9nIIgd9lZsR5Z\/3Q4Qm6\/mptye3iw5EFvX\/lxryfQ9rSD2JtPvryfnlbwGseeuW7yxYtHkhfOAwB4K3IEAAKyvI6g=","q\/1.1":"base64:eF6TtLaeY909x5IBCFju3z9yP+fIFhA7zN9fwf+5wgkQm6+3d0Pv7w1lILbu9OlF01cW7QOxN+XlZeR9zJgPYq\/d1DRx0qKFXfOAwB4K3IEAAGAXIr4=","time\/0":"\u0000\u0000\u0000\u0000\u0000\u0000?@",".zgroup":"{\"zarr_format\":2}",".zattrs":"{\"Conventions\":\"CF-1.12\"}","lat\/.zarray":"{\"shape\":[5],\"chunks\":[5],\"dtype\":\"<f8\",\"fill_value\":null,\"order\":\"C\",\"filters\":[{\"id\":\"shuffle\",\"elementsize\":8},{\"id\":\"zlib\",\"level\":4}],\"dimension_separator\":\".\",\"compressor\":null,\"zarr_format\":2}","lat\/.zattrs":"{\"_ARRAY_DIMENSIONS\":[\"lat\"],\"units\":\"degrees_north\",\"standard_name\":\"latitude\",\"bounds\":\"lat_bnds\"}","lat_bnds\/.zarray":"{\"shape\":[5,2],\"chunks\":[5,2],\"dtype\":\"<f8\",\"fill_value\":null,\"order\":\"C\",\"filters\":[{\"id\":\"shuffle\",\"elementsize\":8},{\"id\":\"zlib\",\"level\":4}],\"dimension_separator\":\".\",\"compressor\":null,\"zarr_format\":2}","lat_bnds\/.zattrs":"{\"_ARRAY_DIMENSIONS\":[\"lat\",\"bounds2\"]}","lon\/.zarray":"{\"shape\":[8],\"chunks\":[8],\"dtype\":\"<f8\",\"fill_value\":null,\"order\":\"C\",\"filters\":[{\"id\":\"shuffle\",\"elementsize\":8},{\"id\":\"zlib\",\"level\":4}],\"dimension_separator\":\".\",\"compressor\":null,\"zarr_format\":2}","lon\/.zattrs":"{\"_ARRAY_DIMENSIONS\":[\"lon\"],\"units\":\"degrees_east\",\"standard_name\":\"longitude\",\"bounds\":\"lon_bnds\"}","lon_bnds\/.zarray":"{\"shape\":[8,2],\"chunks\":[8,2],\"dtype\":\"<f8\",\"fill_value\":null,\"order\":\"C\",\"filters\":[{\"id\":\"shuffle\",\"elementsize\":8},{\"id\":\"zlib\",\"level\":4}],\"dimension_separator\":\".\",\"compressor\":null,\"zarr_format\":2}","lon_bnds\/.zattrs":"{\"_ARRAY_DIMENSIONS\":[\"lon\",\"bounds2\"]}","q\/.zarray":"{\"shape\":[5,8],\"chunks\":[3,4],\"dtype\":\"<f8\",\"fill_value\":null,\"order\":\"C\",\"filters\":[{\"id\":\"shuffle\",\"elementsize\":8},{\"id\":\"zlib\",\"level\":4}],\"dimension_separator\":\".\",\"compressor\":null,\"zarr_format\":2}","q\/.zattrs":"{\"_ARRAY_DIMENSIONS\":[\"lat\",\"lon\"],\"project\":\"research\",\"standard_name\":\"specific_humidity\",\"units\":\"1\",\"coordinates\":\"time\",\"cell_methods\":\"area: mean\"}","time\/.zarray":"{\"shape\":[],\"chunks\":[],\"dtype\":\"<f8\",\"fill_value\":null,\"order\":\"C\",\"filters\":null,\"dimension_separator\":\".\",\"compressor\":null,\"zarr_format\":2}","time\/.zattrs":"{\"_ARRAY_DIMENSIONS\":[],\"units\":\"days since 2018-12-01\",\"standard_name\":\"time\"}"}}
Loading