Skip to content

Commit

Permalink
Merge pull request #446 from euroargodev/fix-kerchunk
Browse files Browse the repository at this point in the history
Fix kerchunk handling of s3 and ftp paths
  • Loading branch information
gmaze authored Feb 26, 2025
2 parents 753f47e + 4be42bb commit 5309743
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
13 changes: 13 additions & 0 deletions argopy/stores/float/spec.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from typing import Union

import fsspec.core

import xarray as xr
from pathlib import Path
import pandas as pd
Expand Down Expand Up @@ -195,6 +198,8 @@ def path(self) -> str:
def ls(self) -> list:
"""Return the list of files in float path
Protocol is included
Examples
--------
>>> ArgoFloat(4902640).ls()
Expand Down Expand Up @@ -222,6 +227,14 @@ def ls(self) -> list:
paths += self.fs.glob(self.host_sep.join([self.path.replace('dac', 'aux'), "*"]))

paths = [p for p in paths if Path(p).suffix != ""]

# Ensure the protocol is included for non-local files on FTP server:
for ip, p in enumerate(paths):
if self.host_protocol == 'ftp':
paths[ip] = "ftp://" + self.fs.fs.host + fsspec.core.split_protocol(p)[-1]
if self.host_protocol == 's3':
paths[ip] = "s3://" + fsspec.core.split_protocol(p)[-1]

paths.sort()
return paths

Expand Down
25 changes: 24 additions & 1 deletion argopy/stores/kerchunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import xarray as xr
from typing import List, Union, Dict, Literal
from pathlib import Path
from urllib.parse import urlparse
from fsspec.core import split_protocol
import json
import logging
Expand Down Expand Up @@ -163,6 +164,16 @@ def __repr__(self):
summary.append("- %i reference%s loaded" % (n, "s" if n > 0 else ""))
return "\n".join(summary)

@property
def store_path(self):
p = getattr(self.fs, 'path', str(Path('.').absolute()))
# Ensure the protocol is included for non-local files:
if self.fs.fs.protocol[0] == 'ftp':
p = "ftp://" + self.fs.fs.host + fsspec.core.split_protocol(p)[-1]
if self.fs.fs.protocol[0] == 's3':
p = "s3://" + fsspec.core.split_protocol(p)[-1]
return p

def _ncfile2jsfile(self, ncfile):
return Path(ncfile).name.replace(".nc", ".json")

Expand Down Expand Up @@ -343,7 +354,13 @@ def _magic(self, ncfile: Union[str, Path]) -> str:
------
:class:`aiohttp.ClientResponseError`
"""
fs = fsspec.filesystem(split_protocol(str(ncfile))[0])
protocol = split_protocol(str(ncfile))[0]
if protocol == 'ftp':
opts = {'host': urlparse(ncfile).hostname, # host eg: ftp.ifremer.fr
'port': 0 if urlparse(ncfile).port is None else urlparse(ncfile).port}
else:
opts = {}
fs = fsspec.filesystem(protocol, **opts)

def is_read(fs, uri):
try:
Expand All @@ -365,8 +382,13 @@ def is_read(fs, uri):
def supported(self, ncfile: Union[str, Path]) -> bool:
"""Check if a netcdf file can be accessed through byte ranges
For non-local files, the absolute path toward the netcdf file must include the file protocol to return
a correct answer.
Argo GDAC supporting byte ranges:
- ftp://ftp.ifremer.fr/ifremer/argo
- s3://argo-gdac-sandbox/pub
- https://usgodae.org/pub/outgoing/argo
- https://argo-gdac-sandbox.s3-eu-west-3.amazonaws.com/pub
Not supporting:
Expand All @@ -375,5 +397,6 @@ def supported(self, ncfile: Union[str, Path]) -> bool:
Parameters
----------
ncfile: str, Path
Absolute path toward the netcdf file to assess for lazy support, must include protocol for non-local files.
"""
return self._magic(ncfile) is not None

0 comments on commit 5309743

Please sign in to comment.