From f5c8cded5eebf858b3a3ad6568ae108448e525a7 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 25 Feb 2025 16:05:39 +0100 Subject: [PATCH 1/5] fix kerchunk handler for ftp host could not check for lazy support with ftp protocol paths --- argopy/stores/kerchunker.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/argopy/stores/kerchunker.py b/argopy/stores/kerchunker.py index 7ffde4e6..0eba14fc 100644 --- a/argopy/stores/kerchunker.py +++ b/argopy/stores/kerchunker.py @@ -2,6 +2,7 @@ import xarray as xr from typing import List, Union, Dict, Literal from pathlib import Path +from urllib.parse import urlparse from fsspec.core import split_protocol import json import logging @@ -343,7 +344,13 @@ def _magic(self, ncfile: Union[str, Path]) -> str: ------ :class:`aiohttp.ClientResponseError` """ - fs = fsspec.filesystem(split_protocol(str(ncfile))[0]) + protocol = split_protocol(str(ncfile))[0] + if protocol == 'ftp': + opts = {'host': urlparse(ncfile).hostname, # host eg: ftp.ifremer.fr + 'port': 0 if urlparse(ncfile).port is None else urlparse(ncfile).port} + else: + opts = {} + fs = fsspec.filesystem(protocol, **opts) def is_read(fs, uri): try: From 17791f0e60bdbde2d89eb2c8a4f9ce999820d4fc Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 25 Feb 2025 16:06:33 +0100 Subject: [PATCH 2/5] fix Argofloat ls method to return absolute path, including protocol for ftp (hostname was missing) and s3 paths --- argopy/stores/float/spec.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/argopy/stores/float/spec.py b/argopy/stores/float/spec.py index 95e0a190..ff5629b4 100644 --- a/argopy/stores/float/spec.py +++ b/argopy/stores/float/spec.py @@ -1,4 +1,7 @@ from typing import Union + +import fsspec.core + import xarray as xr from pathlib import Path import pandas as pd @@ -195,6 +198,8 @@ def path(self) -> str: def ls(self) -> list: """Return the list of files in float path + Protocol is included + Examples -------- >>> ArgoFloat(4902640).ls() @@ -222,6 +227,14 @@ def ls(self) -> list: paths += self.fs.glob(self.host_sep.join([self.path.replace('dac', 'aux'), "*"])) paths = [p for p in paths if Path(p).suffix != ""] + + # Ensure the protocol is included for non-local files on FTP server: + for ip, p in enumerate(paths): + if self.host_protocol == 'ftp': + paths[ip] = "ftp://" + self.fs.fs.host + fsspec.core.split_protocol(p)[-1] + if self.host_protocol == 's3': + paths[ip] = "s3://" + fsspec.core.split_protocol(p)[-1] + paths.sort() return paths From e03043530e0803ea77c298ae880778c8971d7f6e Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 25 Feb 2025 16:14:46 +0100 Subject: [PATCH 3/5] Update kerchunker.py fix docstring --- argopy/stores/kerchunker.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/argopy/stores/kerchunker.py b/argopy/stores/kerchunker.py index 0eba14fc..253b5a17 100644 --- a/argopy/stores/kerchunker.py +++ b/argopy/stores/kerchunker.py @@ -372,8 +372,12 @@ def is_read(fs, uri): def supported(self, ncfile: Union[str, Path]) -> bool: """Check if a netcdf file can be accessed through byte ranges + The absolute path toward the netcdf file must include the file protocol to return a correct answer. + Argo GDAC supporting byte ranges: + - ftp://ftp.ifremer.fr/ifremer/argo - s3://argo-gdac-sandbox/pub + - https://usgodae.org/pub/outgoing/argo - https://argo-gdac-sandbox.s3-eu-west-3.amazonaws.com/pub Not supporting: @@ -382,5 +386,6 @@ def supported(self, ncfile: Union[str, Path]) -> bool: Parameters ---------- ncfile: str, Path + Absolute path toward the netcdf file to assess for lazy support. """ return self._magic(ncfile) is not None From 3de0fbf03f73bc48810f7ac6d218d2e4d35cb58d Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 25 Feb 2025 16:18:42 +0100 Subject: [PATCH 4/5] Update kerchunker.py --- argopy/stores/kerchunker.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/argopy/stores/kerchunker.py b/argopy/stores/kerchunker.py index 253b5a17..d7898b19 100644 --- a/argopy/stores/kerchunker.py +++ b/argopy/stores/kerchunker.py @@ -372,7 +372,8 @@ def is_read(fs, uri): def supported(self, ncfile: Union[str, Path]) -> bool: """Check if a netcdf file can be accessed through byte ranges - The absolute path toward the netcdf file must include the file protocol to return a correct answer. + For non-local files, the absolute path toward the netcdf file must include the file protocol to return + a correct answer. Argo GDAC supporting byte ranges: - ftp://ftp.ifremer.fr/ifremer/argo @@ -386,6 +387,6 @@ def supported(self, ncfile: Union[str, Path]) -> bool: Parameters ---------- ncfile: str, Path - Absolute path toward the netcdf file to assess for lazy support. + Absolute path toward the netcdf file to assess for lazy support, must include protocol for non-local files. """ return self._magic(ncfile) is not None From 4be42bb4a55697adf05bb1a84f37d3c0e7a8e0a9 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 25 Feb 2025 16:48:15 +0100 Subject: [PATCH 5/5] Update kerchunker.py Add `store_path` property to return the absolute path of the store toward kerchunk json data --- argopy/stores/kerchunker.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/argopy/stores/kerchunker.py b/argopy/stores/kerchunker.py index d7898b19..f248538b 100644 --- a/argopy/stores/kerchunker.py +++ b/argopy/stores/kerchunker.py @@ -164,6 +164,16 @@ def __repr__(self): summary.append("- %i reference%s loaded" % (n, "s" if n > 0 else "")) return "\n".join(summary) + @property + def store_path(self): + p = getattr(self.fs, 'path', str(Path('.').absolute())) + # Ensure the protocol is included for non-local files: + if self.fs.fs.protocol[0] == 'ftp': + p = "ftp://" + self.fs.fs.host + fsspec.core.split_protocol(p)[-1] + if self.fs.fs.protocol[0] == 's3': + p = "s3://" + fsspec.core.split_protocol(p)[-1] + return p + def _ncfile2jsfile(self, ncfile): return Path(ncfile).name.replace(".nc", ".json")