Skip to content

Commit

Permalink
Copy over the implementation from FSSpec.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
asinghvi17 committed Sep 5, 2024
1 parent 1cc7842 commit dfa9605
Show file tree
Hide file tree
Showing 11 changed files with 488 additions and 4 deletions.
29 changes: 27 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,38 @@
name = "Kerchunk"
uuid = "12c09fd5-fe6a-4e79-8f42-b31f49215243"
authors = ["Anshul Singhvi <[email protected]> and contributors"]
version = "0.1.0-DEV"
version = "0.1.0"

[deps]
AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95"
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
Mustache = "ffc61752-8dc7-55ee-8c37-f3e9cdd09e70"
URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"

[compat]
AWSS3 = "0.10, 0.11"
FilePathsBase = "0.9"
HTTP = "1.10"
JSON3 = "1"
Mustache = "1"
URIs = "1.5"
Zarr = "0.9"
julia = "1.10"

[extras]
CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab"
PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab"
Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
YAXArrays = "c21b50f5-aa40-41ea-b809-c0f5e47bfa5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
test = ["CondaPkg", "PythonCall", "Dates", "JSON3", "NCDatasets", "Rasters", "YAXArrays", "Test"]
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,36 @@
[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaIO.github.io/Kerchunk.jl/stable/)
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaIO.github.io/Kerchunk.jl/dev/)
[![Build Status](https://github.com/JuliaIO/Kerchunk.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/JuliaIO/Kerchunk.jl/actions/workflows/CI.yml?query=branch%3Amain)

Kerchunk.jl is a Julia package that enables loading Kerchunk reference catalogs as Zarr arrays via a storage backend.

## Installation

```julia
]
add Kerchunk
```

## Quick start

```julia
using Kerchunk, Zarr

za = Zarr.zopen(Kerchunk.ReferenceStore("path/to/kerchunk/catalog.json"))
# and treat it like any other Zarr array!
# You can even wrap it in YAXArrays.jl to get DimensionalData.jl accessors:
using YAXArrays
YAXArrays.open_dataset(za)
```

## Background

[`kerchunk`] is a Python package that generates the reference catalogs.

## Alternatives and related packages

- You can always use Python's `xarray` directly via PythonCall.jl
- [FSSpec.jl](https://github.com/asinghvi17/FSSpec.jl) is an alternative storage backends for Zarr.jl that wraps the same [`fsspec`](https://github.com/fsspec/filesystem_spec) that `xarray` uses under the hood.

This package is of course built on top of [Zarr.jl](https://github.com/JuliaIO/Zarr.jl), which is a pure-Julia Zarr array library.
[YAXArrays.jl](https://github.com/JuliaDataCubes/YAXArrays.jl) is a Julia package that can wrap Zarr arrays in a DimensionalData-compatible interface.
17 changes: 16 additions & 1 deletion src/Kerchunk.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
module Kerchunk

# Write your package code here.
using JSON3, Base64 # for decoding
using URIs, Mustache # to resolve paths
using FilePathsBase, AWSS3 # to access files
using Zarr # this is where the magic happens


# Utility functions
include("readbytes.jl")

# Reference store implementation
include("referencestore.jl")

# Materializing a reference store
include("materialize.jl")

export ReferenceStore

end
16 changes: 16 additions & 0 deletions src/materialize.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# This file is meant to materialize a Zarr directory from a Kerchunk catalog.

"""
materialize(path, store::ReferenceStore)
Materialize a Zarr directory from a Kerchunk catalog. This actually downloads and writes the files to the given path, and you can open that with any Zarr reader.
"""
function materialize(path::Union{String, FilePathsBase.AbstractPath}, store::ReferenceStore)
mkpath(path)
for key in keys(store.mapper)
println("Writing $key")
mkpath(splitdir(joinpath(path, string(key)))[1])
write(joinpath(path, string(key)), _get_file_bytes(store, store.mapper[key]))
end
return path
end
17 changes: 17 additions & 0 deletions src/readbytes.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""
readbytes(path, start::Integer, stop::Integer)::Vector{UInt8}
Read bytes from a file at a given range.
"""
function readbytes(path, start::Integer, stop::Integer)
@assert start < stop "In `readbytes`, start ($(start)) must be less than stop ($(stop))."
open(path) do f
seek(f, start)
return read(f, stop + 1 - start)
end
end

function readbytes(path::Zarr.AWSS3.S3Path, start::Integer, stop::Integer)
@assert start < stop "In `readbytes`, start ($(start)) must be less than stop ($(stop))."
return read(path; byte_range = (start+1):stop)
end
Loading

0 comments on commit dfa9605

Please sign in to comment.