-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b105af9
commit e3f5385
Showing
11 changed files
with
176 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
/Manifest.toml | ||
/docs/Manifest.toml | ||
/docs/build/ | ||
|
||
/test/ref.parquet/ | ||
/test/real_zarray.zarr/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,84 @@ | ||
using Kerchunk | ||
using Documenter | ||
using Documenter, DocumenterVitepress | ||
|
||
DocMeta.setdocmeta!(Kerchunk, :DocTestSetup, :(using Kerchunk); recursive=true) | ||
|
||
using Literate | ||
|
||
|
||
# First, remove any codecov files that may have been generated by the CI run | ||
for (root, dirs, files) in walkdir(dirname(@__DIR__)) # walk through `GeometryOps/*` | ||
# Iterate through all files in the current directory | ||
for file in files | ||
# If the file is a codecov file, remove it | ||
if splitext(file)[2] == ".cov" | ||
rm(joinpath(root, file)) | ||
end | ||
end | ||
end | ||
|
||
# Now, we convert the source code to markdown files using Literate.jl | ||
source_path = joinpath(dirname(@__DIR__), "src") | ||
output_path = joinpath(@__DIR__, "src", "source") | ||
mkpath(output_path) | ||
|
||
literate_pages = Any[] | ||
|
||
# We don't want Literate to convert the code into Documenter blocks, so we use a custom postprocessor | ||
# to add the `@meta` block to the markdown file, which will be used by Documenter to add an edit link. | ||
function _add_meta_edit_link_generator(path) | ||
return function (input) | ||
return """ | ||
```@meta | ||
EditURL = "$(path).jl" | ||
``` | ||
""" * input # we add `.jl` because `relpath` eats the file extension, apparently :shrug: | ||
end | ||
end | ||
|
||
# First letter of `str` is made uppercase and returned | ||
ucfirst(str::String) = string(uppercase(str[1]), str[2:end]) | ||
|
||
function process_literate_recursive!(pages::Vector{Any}, path::String) | ||
global source_path | ||
global output_path | ||
if isdir(path) | ||
contents = [] | ||
process_literate_recursive!.((contents,), normpath.(readdir(path; join = true))) | ||
push!(pages, ucfirst(splitdir(path)[2]) => contents) | ||
elseif isfile(path) | ||
if endswith(path, ".jl") | ||
relative_path = relpath(path, source_path) | ||
output_dir = joinpath(output_path, splitdir(relative_path)[1]) | ||
Literate.markdown( | ||
path, output_dir; | ||
flavor = Literate.CommonMarkFlavor(), | ||
postprocess = _add_meta_edit_link_generator(joinpath(relpath(source_path, output_dir), relative_path)) | ||
) | ||
push!(pages, joinpath("source", splitext(relative_path)[1] * ".md")) | ||
end | ||
end | ||
end | ||
|
||
withenv("JULIA_DEBUG" => "Literate") do # allow Literate debug output to escape to the terminal! | ||
global literate_pages | ||
vec = [] | ||
process_literate_recursive!(vec, source_path) | ||
literate_pages = vec[1][2] # this is a hack to get the pages in the correct order, without an initial "src" folder. | ||
# TODO: We should probably fix the above in `process_literate_recursive!`. | ||
end | ||
|
||
makedocs(; | ||
modules=[Kerchunk], | ||
authors="Anshul Singhvi <[email protected]> and contributors", | ||
sitename="Kerchunk.jl", | ||
format=Documenter.HTML(; | ||
canonical="https://JuliaIO.github.io/Kerchunk.jl", | ||
edit_link="main", | ||
assets=String[], | ||
), | ||
format=MarkdownVitepress(repo = "https://github.com/JuliaIO/Kerchunk.jl",) | ||
pages=[ | ||
"Home" => "index.md", | ||
"What is Kerchunk?" => "what_the_heck.md", | ||
"API" => "api.md", | ||
"Source code" => literate_pages, | ||
], | ||
) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
```@index | ||
``` | ||
|
||
```@autodocs | ||
Modules = [Kerchunk] | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# What is Kerchunk? | ||
|
||
## Available data sources | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#= | ||
Kerchunk has two file formats - JSON as discussed earlier, and Parquet. | ||
The Parquet format is a bit complicated - files are nested in | ||
a directory structure and row indices are computable by | ||
the chunk index. | ||
The files are also paginated based on a parameter. | ||
Files might look something like this: | ||
``` | ||
ref.parquet/deep/name/refs.0.parq | ||
ref.parquet/name/refs.0.parq | ||
ref.parquet/.zmetadata | ||
``` | ||
One must first parse `.zmetadata`, a JSON file, which has two fields: | ||
- A `dict[str, str]` that encodes the zmetadata, this may contain inlined files also | ||
- A field `record_size` that encodes how many records may be stored in a single Parquet file. | ||
=# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
using CondaPkg, Parquet | ||
|
||
# Generate the Parquet reference file | ||
CondaPkg.withenv() do | ||
run(``` | ||
$(CondaPkg.which("python")) -c " | ||
import numpy as np | ||
import fsspec | ||
import fsspec.implementations.reference | ||
import zarr | ||
lz = fsspec.implementations.reference.LazyReferenceMapper.create(\"ref.parquet\") | ||
z = zarr.open_group(lz, mode=\"w\") | ||
d = z.create_dataset(\"name\", shape=(10,10)) | ||
d[:, :] = np.random.randn(10, 10) | ||
g2 = z.create_group(\"deep\") | ||
d = g2.create_dataset(\"name\", shape=(15, 15)) | ||
d[:, :] = np.random.randn(15, 15) | ||
" | ||
```) | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# One way to benchmark how much performance the Kerchunk implementation | ||
# is costing us is to use an actual Zarr file. | ||
# We can simulate a Kerchunk catalog but use an actual Zarr array, | ||
# so the difference in benchmark speeds between Kerchunk and Zarr | ||
# should provide useful data. | ||
|