Skip to content

Commit

Permalink
basic CF correction interface
Browse files Browse the repository at this point in the history
  • Loading branch information
asinghvi17 committed Sep 13, 2024
1 parent f1e74b9 commit 4328499
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 0 deletions.
40 changes: 40 additions & 0 deletions src/astype_filter.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import Zarr: Filter, zencode, zdecode, getfilter, JSON, filterdict

# We implement here some filters:
# - AsTypeFilter from numcodecs, due to be upstreamed to Zarr
# - CFMaskFilter (to apply before fixedscaleoffset), a translation of Xarray's CFMaskCoder as a Zarr filter
# - CFTimeDeltaFilter
# - CFDatetimeFilter

struct AstypeFilter{EncodedType, DecodedType} <: Filter{EncodedType, DecodedType}
end

function JSON.lower(::AstypeFilter{EncodedType, DecodedType}) where {EncodedType, DecodedType}
Dict(
"id" => "astype",
"encode_dtype" => Zarr.typestr(EncodedType),
"decode_dtype" => Zarr.typestr(DecodedType)
)
end

function Zarr.getfilter(::Type{<: AstypeFilter}, d::Dict)
return AstypeFilter{Zarr.typestr(d["encode_dtype"]), Zarr.typestr(get(d, "decode_dtype", d["encode_dtype"]))}()
end

function zdecode(ain, ::AstypeFilter{EncodedType, DecodedType}) where {EncodedType, DecodedType}
data = reinterpret(EncodedType, ain)
if sizeof(EncodedType) == sizeof(DecodedType)
return reinterpret(DecodedType, data)
else
return DecodedType.(data)
end
end

function zencode(ain, ::AstypeFilter{EncodedType, DecodedType}) where {EncodedType, DecodedType}
data = reinterpret(DecodedType, ain)
if sizeof(EncodedType) == sizeof(DecodedType)
return reinterpret(EncodedType, data)
else
return EncodedType.(data)
end
end
98 changes: 98 additions & 0 deletions src/cf_corrections.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# TODO: don't use this function, since some CF convention tags
# don't work with it.
# Instead simply load via YAXArrays, which does the work correctly.
# For some reason - Rasters.jl doesn't apply the CF conventions
# (or maybe it's ZarrDatasets.jl's problem).

function do_correction!(f!, store, path)
zarray = Zarr.JSON.parse(store[path * "/.zarray"])
zattrs = Zarr.JSON.parse(store[path * "/.zattrs"])

f!(zarray, zattrs)

# Cache always overrides raw data, so by embedding the new metadata
# in the cache, we can ensure it's always read instead of the old
# stuff, without modifying the file.
store.cache[path * "/.zarray"] = (Zarr.JSON.json(zarray),)
store.cache[path * "/.zattrs"] = (Zarr.JSON.json(zattrs),)

return
end

function add_scale_offset_filter_and_set_mask!(zarray::Dict, zattrs::Dict)
scale = get(zattrs, "scale_factor", 1.0)
offset = get(zattrs, "add_offset", 0.0)
if iszero(offset) && isone(scale)
return # we need neither scale nor offset
end
pop!(zattrs, "scale_factor")
pop!(zattrs, "add_offset")
filter_dict = Zarr.JSON.lower(
Zarr.FixedScaleOffsetFilter{
Float64,
Zarr.typestr(zarray["dtype"]),
Zarr.typestr(get(zarray, "astype", "<f8"))
}(1/scale, offset) # Zarr filter is not the same as CF definition. Hopefully this doesn't get too unstable...
)
current_filters = zarray["filters"]
new_filters = if isnothing(current_filters) || isempty(current_filters)
[filter_dict]
else
pushfirst!(collect(current_filters), filter_dict)
end
if haskey(zattrs, "_Unsigned") && zattrs["_Unsigned"] == "true" # correct for unsigned values
old_dtype = Zarr.typestr(zarray["dtype"])
new_dtype = unsigned(old_dtype)
zattrs["_FillValue"] = reinterpret(new_dtype, old_dtype(zattrs["_FillValue"])) * scale + offset
pop!(zattrs, "_Unsigned")
if haskey(zattrs, "valid_range")
zattrs["valid_range"] = reinterpret(new_dtype, old_dtype.(zattrs["valid_range"])) .* scale .+ offset
end
if haskey(zattrs, "valid_min")
zattrs["valid_min"] = reinterpret(new_dtype, old_dtype(zattrs["valid_min"])) * scale + offset
end
if haskey(zattrs, "valid_min")
zattrs["valid_min"] = reinterpret(new_dtype, old_dtype(zattrs["valid_min"])) * scale + offset
end
# Add a type conversion filter before the scale/offset, to ensure that
# only unsigned values are used!
insert!(new_filters, 2, AstypeFilter{old_dtype, new_dtype}() |> Zarr.JSON.lower)
end
zarray["filters"] = new_filters
zarray["dtype"] = Zarr.typestr(Float64) # TODO: should this be f32??
if haskey(zarray, "fill_value")
zarray["fill_value"] = zattrs["_FillValue"] # TODO: this should be made obsolete!
end
end

function set_unsigned!(zarray::Dict{Symbol, <: Any}, zattrs::Dict{Symbol, <: Any})
if haskey(zattrs, "_Unsigned") && zattrs["_Unsigned"] == "true"
#=
filter_dict = Zarr.JSON.lower(Zarr.AsTypeFilter{}())
current_filters = zarray["filters"]
new_filters = if isnothing(current_filters) || isempty(current_filters)
[filter_dict]
else
pushfirst!(collect(current_filters), filter_dict)
end
zarray["filters"] = new_filters
=#
old_dtype = Zarr.typestr(zarray["dtype"])
zarray["dtype"] = replace(zarray["dtype"], "i" => "u")
zarray["fillvalue"] = reinterpret(unsigned(old_dtype), old_dtype(zarray["fillvalue"]))
end
end

function move_filter_to_compressors!(zarray::Dict{Symbol, <: Any}, zattrs::Dict{Symbol, <: Any})
end

function cache_group!(store::ReferenceStore, group::String)
mkpath(joinpath(store.cache_dir, group))
for key in (group * "/" .* Zarr.subkeys(store, group))
cached_file_path = joinpath(store.cache_dir, key)
touch(cached_file_path)
bytes = _get_file_bytes(store, store[key])
write(cached_file_path, bytes)
store.cache[key] = (cached_file_path,)
end
end

0 comments on commit 4328499

Please sign in to comment.