From 4328499f6f6b609d17a40f92b0925ff8c91c4a3f Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Fri, 13 Sep 2024 00:21:00 -0700 Subject: [PATCH] basic CF correction interface --- src/astype_filter.jl | 40 ++++++++++++++++++ src/cf_corrections.jl | 98 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 src/astype_filter.jl create mode 100644 src/cf_corrections.jl diff --git a/src/astype_filter.jl b/src/astype_filter.jl new file mode 100644 index 0000000..66c69cc --- /dev/null +++ b/src/astype_filter.jl @@ -0,0 +1,40 @@ +import Zarr: Filter, zencode, zdecode, getfilter, JSON, filterdict + +# We implement here some filters: +# - AsTypeFilter from numcodecs, due to be upstreamed to Zarr +# - CFMaskFilter (to apply before fixedscaleoffset), a translation of Xarray's CFMaskCoder as a Zarr filter +# - CFTimeDeltaFilter +# - CFDatetimeFilter + +struct AstypeFilter{EncodedType, DecodedType} <: Filter{EncodedType, DecodedType} +end + +function JSON.lower(::AstypeFilter{EncodedType, DecodedType}) where {EncodedType, DecodedType} + Dict( + "id" => "astype", + "encode_dtype" => Zarr.typestr(EncodedType), + "decode_dtype" => Zarr.typestr(DecodedType) + ) +end + +function Zarr.getfilter(::Type{<: AstypeFilter}, d::Dict) + return AstypeFilter{Zarr.typestr(d["encode_dtype"]), Zarr.typestr(get(d, "decode_dtype", d["encode_dtype"]))}() +end + +function zdecode(ain, ::AstypeFilter{EncodedType, DecodedType}) where {EncodedType, DecodedType} + data = reinterpret(EncodedType, ain) + if sizeof(EncodedType) == sizeof(DecodedType) + return reinterpret(DecodedType, data) + else + return DecodedType.(data) + end +end + +function zencode(ain, ::AstypeFilter{EncodedType, DecodedType}) where {EncodedType, DecodedType} + data = reinterpret(DecodedType, ain) + if sizeof(EncodedType) == sizeof(DecodedType) + return reinterpret(EncodedType, data) + else + return EncodedType.(data) + end +end diff --git a/src/cf_corrections.jl b/src/cf_corrections.jl new file mode 100644 index 0000000..b1d7521 --- /dev/null +++ b/src/cf_corrections.jl @@ -0,0 +1,98 @@ +# TODO: don't use this function, since some CF convention tags +# don't work with it. +# Instead simply load via YAXArrays, which does the work correctly. +# For some reason - Rasters.jl doesn't apply the CF conventions +# (or maybe it's ZarrDatasets.jl's problem). + +function do_correction!(f!, store, path) + zarray = Zarr.JSON.parse(store[path * "/.zarray"]) + zattrs = Zarr.JSON.parse(store[path * "/.zattrs"]) + + f!(zarray, zattrs) + + # Cache always overrides raw data, so by embedding the new metadata + # in the cache, we can ensure it's always read instead of the old + # stuff, without modifying the file. + store.cache[path * "/.zarray"] = (Zarr.JSON.json(zarray),) + store.cache[path * "/.zattrs"] = (Zarr.JSON.json(zattrs),) + + return +end + +function add_scale_offset_filter_and_set_mask!(zarray::Dict, zattrs::Dict) + scale = get(zattrs, "scale_factor", 1.0) + offset = get(zattrs, "add_offset", 0.0) + if iszero(offset) && isone(scale) + return # we need neither scale nor offset + end + pop!(zattrs, "scale_factor") + pop!(zattrs, "add_offset") + filter_dict = Zarr.JSON.lower( + Zarr.FixedScaleOffsetFilter{ + Float64, + Zarr.typestr(zarray["dtype"]), + Zarr.typestr(get(zarray, "astype", " Zarr.JSON.lower) + end + zarray["filters"] = new_filters + zarray["dtype"] = Zarr.typestr(Float64) # TODO: should this be f32?? + if haskey(zarray, "fill_value") + zarray["fill_value"] = zattrs["_FillValue"] # TODO: this should be made obsolete! + end +end + +function set_unsigned!(zarray::Dict{Symbol, <: Any}, zattrs::Dict{Symbol, <: Any}) + if haskey(zattrs, "_Unsigned") && zattrs["_Unsigned"] == "true" + #= + filter_dict = Zarr.JSON.lower(Zarr.AsTypeFilter{}()) + current_filters = zarray["filters"] + new_filters = if isnothing(current_filters) || isempty(current_filters) + [filter_dict] + else + pushfirst!(collect(current_filters), filter_dict) + end + zarray["filters"] = new_filters + =# + old_dtype = Zarr.typestr(zarray["dtype"]) + zarray["dtype"] = replace(zarray["dtype"], "i" => "u") + zarray["fillvalue"] = reinterpret(unsigned(old_dtype), old_dtype(zarray["fillvalue"])) + end +end + +function move_filter_to_compressors!(zarray::Dict{Symbol, <: Any}, zattrs::Dict{Symbol, <: Any}) +end + +function cache_group!(store::ReferenceStore, group::String) + mkpath(joinpath(store.cache_dir, group)) + for key in (group * "/" .* Zarr.subkeys(store, group)) + cached_file_path = joinpath(store.cache_dir, key) + touch(cached_file_path) + bytes = _get_file_bytes(store, store[key]) + write(cached_file_path, bytes) + store.cache[key] = (cached_file_path,) + end +end \ No newline at end of file