Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

h5netcdf read #307

Merged
merged 93 commits into from
Jul 25, 2024
Merged
Changes from 1 commit
Commits
Show all changes
93 commits
Select commit Hold shift + click to select a range
06cd592
h5
davidhassell Jan 18, 2024
20c3886
h5
davidhassell Jan 19, 2024
41e75ce
h5
davidhassell Jan 19, 2024
581ad89
h5
davidhassell Jan 19, 2024
6d2f5f5
h5
davidhassell Jan 22, 2024
8492d21
dev
davidhassell Jan 22, 2024
0bccae4
Merge branch 'h5-read' of github.com:davidhassell/cfdm into h5-read
davidhassell Jan 23, 2024
75a4c1c
dev
davidhassell Jan 23, 2024
b54d37e
dev
davidhassell Jan 23, 2024
21942d5
dev
davidhassell Jan 24, 2024
85d5d88
dev
davidhassell Jan 24, 2024
4427c83
dev
davidhassell Jan 24, 2024
3061454
dev
davidhassell Jan 24, 2024
5b2287a
dev
davidhassell Jan 25, 2024
cb4e5c2
h5py, h5netcdf, s3fs requirements
davidhassell Jan 25, 2024
6045ab5
dev
davidhassell Jan 25, 2024
45bcfab
dev
davidhassell Jan 26, 2024
03808df
dev
davidhassell Jan 26, 2024
c28e36d
dev
davidhassell Jan 27, 2024
03733fa
dev
davidhassell Jan 30, 2024
2e7f76f
dev
davidhassell Jan 30, 2024
f36c143
dev
davidhassell Jan 31, 2024
be74775
dev
davidhassell Jan 31, 2024
29faa9d
dev
davidhassell Feb 1, 2024
f811f93
dev
davidhassell Feb 2, 2024
8c3eb5c
dev
davidhassell Feb 2, 2024
a3aa8d8
dev
davidhassell Feb 3, 2024
cf0fd2c
dev
davidhassell Feb 3, 2024
1cff2d0
dev
davidhassell Feb 4, 2024
740f4cd
dev
davidhassell Feb 5, 2024
c2321be
dev
davidhassell Feb 6, 2024
0009c31
dev
davidhassell Feb 7, 2024
d19d6c3
dev
davidhassell Feb 8, 2024
92f59eb
dev
davidhassell Feb 9, 2024
59669a1
dev
davidhassell Feb 12, 2024
589dc13
dev
davidhassell Feb 13, 2024
6366990
client_kwargs endpoint_url
davidhassell Feb 22, 2024
0e7ef66
netcdf_flatten tidy and docs
davidhassell Feb 27, 2024
b00f4f5
netcdf_flatten tidy and docs
davidhassell Feb 27, 2024
0ae5bf6
dev
davidhassell Feb 28, 2024
23852a7
dev
davidhassell Mar 4, 2024
3374b31
fix upstream merge conflicts
davidhassell Mar 4, 2024
d0bb0ce
fix upstream merge conflicts
davidhassell Mar 4, 2024
123bd37
dev
davidhassell Mar 4, 2024
1c0eb30
dev
davidhassell Mar 4, 2024
0255859
dev
davidhassell Mar 4, 2024
dc00a05
dev
davidhassell Mar 4, 2024
00a15e9
dev
davidhassell Mar 7, 2024
17e67c3
dev
davidhassell Mar 7, 2024
2899d96
dev
davidhassell Mar 8, 2024
7312b53
dev
davidhassell Mar 15, 2024
fdab1c9
orthogonal indexing
davidhassell Mar 21, 2024
a715c23
dev
davidhassell Mar 21, 2024
6b2049a
dev
davidhassell Mar 22, 2024
995c285
Merge branch 'main' of github.com:NCAS-CMS/cfdm into h5-read
davidhassell Mar 26, 2024
3d90bef
dev
davidhassell Mar 26, 2024
01687fc
dev
davidhassell Mar 28, 2024
c68b7a0
dev
davidhassell Apr 5, 2024
4335e02
dev
davidhassell Apr 5, 2024
4283f44
dev
davidhassell Apr 5, 2024
889490a
dev
davidhassell Apr 8, 2024
d590fd9
dev
davidhassell May 13, 2024
3703495
upstream merge conflicts
davidhassell Jun 20, 2024
b3a1b56
dev
davidhassell Jun 20, 2024
a1e8bc8
dev
davidhassell Jun 20, 2024
283e9dd
Typos
davidhassell Jul 23, 2024
e1cba05
Update Changelog.rst
davidhassell Jul 23, 2024
d4fbbbd
netcdf_indexer
davidhassell Jul 23, 2024
cb84a9c
remove h5py dependency
davidhassell Jul 23, 2024
ea7e229
Merge branch 'h5-read' of github.com:davidhassell/cfdm into h5-read
davidhassell Jul 23, 2024
50c5b9f
Typo
davidhassell Jul 23, 2024
040e0a2
Remove incorrect logic
davidhassell Jul 23, 2024
d79d4b3
Typo
davidhassell Jul 23, 2024
c4945a1
Typo
davidhassell Jul 23, 2024
3fd8b43
Typo
davidhassell Jul 23, 2024
c687796
Typo
davidhassell Jul 23, 2024
de8402c
Typos
davidhassell Jul 23, 2024
82fc7dd
netcdf_flattener licence
davidhassell Jul 23, 2024
40db78c
Typos
davidhassell Jul 23, 2024
6ff5a4a
Improved docstrings
davidhassell Jul 23, 2024
4633880
Typos
davidhassell Jul 23, 2024
8cf9594
rename variables
davidhassell Jul 23, 2024
e97d14f
Improved docstrings
davidhassell Jul 23, 2024
66c711f
rename variables
davidhassell Jul 23, 2024
1642521
remove debugging code
davidhassell Jul 23, 2024
3b62cbf
rename variables for clarity
davidhassell Jul 23, 2024
fe41003
unskip opendap test
davidhassell Jul 23, 2024
e3ad717
Tidy docs
davidhassell Jul 23, 2024
dad52b9
No NetCDFIndexer class
davidhassell Jul 23, 2024
4874560
Non deprecated in new class docs
davidhassell Jul 23, 2024
065ccc6
Better docstring
davidhassell Jul 23, 2024
31919a9
remove TODO (all OK)
davidhassell Jul 24, 2024
6f1c397
tidy
davidhassell Jul 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
dev
davidhassell committed Feb 4, 2024
commit 1cff2d0d49fa7e6a488aad5dc610dea68816eca4
1 change: 1 addition & 0 deletions cfdm/data/mixin/filearraymixin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from copy import deepcopy
from urllib.parse import urlparse

from s3fs import S3FileSystem
2 changes: 0 additions & 2 deletions cfdm/data/mixin/netcdffilemixin.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from copy import deepcopy

from ..numpyarray import NumpyArray


29 changes: 29 additions & 0 deletions cfdm/docstring/docstring.py
Original file line number Diff line number Diff line change
@@ -402,6 +402,35 @@
"{{init cell_dimension: `int`}}": """cell_dimension: `int`
The position of the *data* dimension that indexes the
cells, either ``0`` or ``1``.""",
# storage_options
"{{storage_options: `dict` or `None`, optional}}": """storage_options: `dict` or `None`, optional
Key/value pairs to be passed on to the `s3fs.S3FileSystem`
file-system backend to control the opening of files in an
S3 object store. By default, or if `None`, then a value of
``{'anon': True}`` is used. Ignored for file names that
don't start with ``s3:``.

If and only if *s3* has no ``'endpoint_url'`` key, then
one will be automatically derived from the file name and
included in the keyword parameters. For example, for a
file name of ``'s3://store/data/file.nc'``, an
``'endpoint_url'`` key with value ``'https://store'``
would be created. To disable this behaviour, assign `None`
to the ``'endpoint_url'`` key.

*Parameter example:*
``{'anon': True}``

*Parameter example:*
For a file name of ``'s3://store/data/file.nc'``, the
following are equivalent: ``{'anon': True}`` and
``{'anon': True, 'endpoint_url': 'https://store'}``.

*Parameter example:*
``{'key": 'jhsadf8756', 'secret': '862t3gyebh',
'client_kwargs': {'endpoint_url': 'http://some-s3.com',
'config_kwargs': {'s3': {'addressing_style':
'virtual'}}``""",
# ----------------------------------------------------------------
# Method description susbstitutions (4 levels of indentataion)
# ----------------------------------------------------------------
2 changes: 1 addition & 1 deletion cfdm/read_write/netcdf/netcdfread.py
Original file line number Diff line number Diff line change
@@ -519,7 +519,7 @@ def file_open(self, filename, flatten=True, verbose=None):
storage_options = storage_options.copy()
storage_options["endpoint_url"] = f"https://{u.netloc}"

key = tuple(sorted(s3.items()))
key = tuple(sorted(storage_options.items()))
file_systems = g["file_systems"]
fs = file_systems.get(key)
if fs is None:
9 changes: 5 additions & 4 deletions cfdm/read_write/read.py
Original file line number Diff line number Diff line change
@@ -23,13 +23,14 @@ def read(
):
"""Read field or domain constructs from a dataset.

The dataset may be a netCDF file on disk or on an OPeNDAP server,
or a CDL file on disk (see below).
The following file formats are supported: netCDF and CDL.

NetCDF files may be on disk, on an OPeNDAP server, or in an S3
object store.

The returned constructs are sorted by the netCDF variable names of
their corresponding data or domain variables.


**CDL files**

A file is considered to be a CDL representation of a netCDF
@@ -288,7 +289,7 @@ def read(
``{'anon': True, 'endpoint_url': 'https://store'}``.

*Parameter example:*
``{'key": 'kjhsadf8756', 'secret': '862t3gyebh',
``{'key": 'kjhsadf8756', 'secret': '862t3gyebh',
'client_kwargs': {'endpoint_url': 'http://some-s3.com',
'config_kwargs': {'s3': {'addressing_style':
'virtual'}}``
4 changes: 2 additions & 2 deletions cfdm/test/test_VariableIndexer.py
Original file line number Diff line number Diff line change
@@ -69,8 +69,8 @@ def test_mask(self):

cfdm.write(fields, tempfile, warn_valid=False)

fh5 = cfdm.read(tempfile, library="h5netcdf")
fnc = cfdm.read(tempfile, library="netCDF4")
fh5 = cfdm.read(tempfile, netCDF_backend="h5netcdf")
fnc = cfdm.read(tempfile, netCDF_backend="netCDF4")
for h, n in zip(fh5, fnc):
self.assertTrue(h.data.mask.equals(n.data.mask))

16 changes: 9 additions & 7 deletions cfdm/test/test_groups.py
Original file line number Diff line number Diff line change
@@ -180,13 +180,15 @@ def test_groups(self):
)
nc.close()

h = cfdm.read(grouped_file, library="netCDF4", verbose="WARNING")
h = cfdm.read(
grouped_file, netCDF_backend="netCDF4", verbose="WARNING"
)
self.assertEqual(len(h), 1)
h = h[0]
self.assertTrue(f.equals(h, verbose=2))

# Check that h5netcdf reads the file correctly
h5 = cfdm.read(grouped_file, library="h5netcdf")
h5 = cfdm.read(grouped_file, netCDF_backend="h5netcdf")
self.assertEqual(len(h5), 1)
self._check_h5netcdf_groups(h5[0], h)

@@ -319,7 +321,7 @@ def test_groups_geometry(self):
self.assertTrue(f.equals(h, verbose=2))

# Check that h5netcdf reads the file correctly
h5 = cfdm.read(grouped_file, library="h5netcdf")
h5 = cfdm.read(grouped_file, netCDF_backend="h5netcdf")
self.assertEqual(len(h5), 1)
self._check_h5netcdf_groups(h5[0], h)

@@ -392,7 +394,7 @@ def test_groups_compression(self):
self.assertTrue(f.equals(h, verbose=2))

# Check that h5netcdf reads the file correctly
h5 = cfdm.read(grouped_file, library="h5netcdf")
h5 = cfdm.read(grouped_file, netCDF_backend="h5netcdf")
self.assertEqual(len(h5), 1)
self._check_h5netcdf_groups(h5[0], h)

@@ -465,7 +467,7 @@ def test_groups_dimension(self):
self.assertTrue(f.equals(h, verbose=3))

# Check that h5netcdf reads the file correctly
h5 = cfdm.read(grouped_file, library="h5netcdf")
h5 = cfdm.read(grouped_file, netCDF_backend="h5netcdf")
self.assertEqual(len(h5), 1)
self._check_h5netcdf_groups(h5[0], h)

@@ -502,13 +504,13 @@ def test_groups_unlimited_dimension(self):

cfdm.write(f, grouped_file5, verbose=1)

h = cfdm.read(grouped_file, library="netCDF4")
h = cfdm.read(grouped_file, netCDF_backend="netCDF4")
self.assertEqual(len(h), 1)
h = h[0]
self.assertTrue(f.equals(h))

# Check that h5netcdf reads the file correctly
h5 = cfdm.read(grouped_file, library="h5netcdf")
h5 = cfdm.read(grouped_file, netCDF_backend="h5netcdf")
self.assertEqual(len(h5), 1)
self._check_h5netcdf_groups(h5[0], h)

4 changes: 2 additions & 2 deletions cfdm/test/test_read_write.py
Original file line number Diff line number Diff line change
@@ -671,8 +671,8 @@ def test_read_CDL(self):

def test_read_write_string(self):
"""Test the `string` keyword argument to `read` and `write`."""
f = cfdm.read(self.string_filename, library="netCDF4")
fh = cfdm.read(self.string_filename, library="h5netcdf")
f = cfdm.read(self.string_filename, netCDF_backend="netCDF4")
fh = cfdm.read(self.string_filename, netCDF_backend="h5netcdf")

n = int(len(f) / 2)