Skip to content

Commit

Permalink
progress on rust docs (geoarrow#426)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron authored Jan 11, 2024
1 parent ee29f20 commit 4e91882
Show file tree
Hide file tree
Showing 38 changed files with 245 additions and 211 deletions.
13 changes: 13 additions & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,16 @@ To build website:
poetry install
poetry run mkdocs serve
```

To deploy: We have a couple manual steps because `mkdocs gh-deploy` _replaces_
any existing content on the `gh-pages` branch and we want an _upsert_ that
doesn't touch the `js/` or `python/` directories, which are deployed separately.

```
poetry run mkdocs build
git checkout gh-pages
mv site/* ../
git add 404.html assets index.html sitemap.xml sitemap.xml.gz search stylesheets
git commit -m "New revision of top-level docs site"
git push
```
2 changes: 1 addition & 1 deletion src/algorithm/native/downcast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ impl Downcast for GeoTable {
.map(|(mut batch, geom_chunk)| {
batch.remove_column(geometry_column_index);
let mut columns = batch.columns().to_vec();
columns.push(geom_chunk.clone().as_ref().to_array_ref());
columns.push(geom_chunk.to_array_ref());
RecordBatch::try_new(new_schema.clone(), columns).unwrap()
})
.collect();
Expand Down
46 changes: 30 additions & 16 deletions src/chunked_array/chunked_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ use crate::datatypes::GeoDataType;
use crate::error::{GeoArrowError, Result};
use crate::GeometryArrayTrait;

/// A collection of Arrow arrays of the same type.
///
/// This can be thought of as a column in a table, as Table objects normally have internal batches.
#[derive(Debug, Clone, PartialEq)]
pub struct ChunkedArray<A: Array> {
pub(crate) chunks: Vec<A>,
Expand Down Expand Up @@ -91,6 +94,11 @@ impl<A: Array> TryFrom<Vec<A>> for ChunkedArray<A> {
}
}

/// A collection of GeoArrow geometry arrays of the same type.
///
/// This can be thought of as a geometry column in a table, as Table objects normally have internal
/// batches.
///
/// ## Invariants:
///
/// - Must have at least one chunk
Expand Down Expand Up @@ -187,15 +195,29 @@ pub type ChunkedWKBArray<O> = ChunkedGeometryArray<WKBArray<O>>;
pub type ChunkedRectArray = ChunkedGeometryArray<RectArray>;
pub type ChunkedUnknownGeometryArray = ChunkedGeometryArray<Arc<dyn GeometryArrayTrait>>;

/// A trait implemented by all chunked geometry arrays.
///
/// This trait is often used for downcasting. For example, the [`from_geoarrow_chunks`] function
/// returns a dynamically-typed `Arc<dyn ChunkedGeometryArrayTrait>`. To downcast into a
/// strongly-typed chunked array, use `as_any` with the `data_type` method to discern which chunked
/// array type to pass to `downcast_ref`.
pub trait ChunkedGeometryArrayTrait: std::fmt::Debug + Send + Sync {
/// Returns the array as [`Any`] so that it can be
/// downcasted to a specific implementation.
fn as_any(&self) -> &dyn Any;

/// Returns a reference to the [`GeoDataType`] of this array.
fn data_type(&self) -> &GeoDataType;

/// Returns an Arrow [`Field`] describing this chunked array. This field will always have the
/// `ARROW:extension:name` key of the field metadata set, signifying that it describes a
/// GeoArrow extension type.
fn extension_field(&self) -> Arc<Field>;

fn geometry_chunks(&self) -> Vec<Arc<dyn GeometryArrayTrait>>;
/// Access the geometry chunks contained within this chunked array.
fn geometry_chunks(&self) -> Vec<&dyn GeometryArrayTrait>;

/// The number of chunks in this chunked array.
fn num_chunks(&self) -> usize;
}

Expand All @@ -214,11 +236,8 @@ impl ChunkedGeometryArrayTrait for ChunkedPointArray {
self.chunks.first().unwrap().extension_field()
}

fn geometry_chunks(&self) -> Vec<Arc<dyn GeometryArrayTrait>> {
self.chunks
.iter()
.map(|chunk| Arc::new(chunk.clone()) as Arc<dyn GeometryArrayTrait>)
.collect()
fn geometry_chunks(&self) -> Vec<&dyn GeometryArrayTrait> {
self.chunks.iter().map(|chunk| chunk.as_ref()).collect()
}

fn num_chunks(&self) -> usize {
Expand All @@ -243,11 +262,8 @@ macro_rules! impl_trait {
self.chunks.first().unwrap().extension_field()
}

fn geometry_chunks(&self) -> Vec<Arc<dyn GeometryArrayTrait>> {
self.chunks
.iter()
.map(|chunk| Arc::new(chunk.clone()) as Arc<dyn GeometryArrayTrait>)
.collect()
fn geometry_chunks(&self) -> Vec<&dyn GeometryArrayTrait> {
self.chunks.iter().map(|chunk| chunk.as_ref()).collect()
}

fn num_chunks(&self) -> usize {
Expand Down Expand Up @@ -281,18 +297,16 @@ impl ChunkedGeometryArrayTrait for ChunkedRectArray {
self.chunks.first().unwrap().extension_field()
}

fn geometry_chunks(&self) -> Vec<Arc<dyn GeometryArrayTrait>> {
self.chunks
.iter()
.map(|chunk| Arc::new(chunk.clone()) as Arc<dyn GeometryArrayTrait>)
.collect()
fn geometry_chunks(&self) -> Vec<&dyn GeometryArrayTrait> {
self.chunks.iter().map(|chunk| chunk.as_ref()).collect()
}

fn num_chunks(&self) -> usize {
self.chunks.len()
}
}

/// Construct
/// Does **not** parse WKB. Will return a ChunkedWKBArray for WKB input.
pub fn from_arrow_chunks(
chunks: &[&dyn Array],
Expand Down
9 changes: 9 additions & 0 deletions src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
//! Contains implementations of _chunked_ GeoArrow arrays.
//!
//! In contrast to the structures in [array](../array), these data structures only have contiguous
//! memory within each individual _chunk_. These chunked arrays are essentially wrappers around a
//! `Vec` of geometry arrays.
//!
//! Additionally, if the `rayon` feature is active, operations on chunked arrays will automatically
//! be parallelized across each chunk.
#[allow(clippy::module_inception)]
pub mod chunked_array;

Expand Down
100 changes: 84 additions & 16 deletions src/datatypes.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
//! Contains the implementation of [`GeoDataType`], which defines all geometry arrays in this
//! crate.
use std::collections::{HashMap, HashSet};
use std::sync::Arc;

Expand All @@ -7,25 +10,91 @@ use arrow_schema::{DataType, Field, UnionFields, UnionMode};
use crate::array::CoordType;
use crate::error::{GeoArrowError, Result};

/// The geometry type is designed to aid in downcasting from dynamically-typed geometry arrays by
/// uniquely identifying the physical buffer layout of each geometry array type.
///
/// It must always be possible to accurately downcast from a `dyn &GeometryArrayTrait` or `dyn
/// &ChunkedGeometryArrayTrait` to a unique concrete array type using this enum.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum GeoDataType {
/// Represents a [PointArray][crate::array::PointArray] or
/// [ChunkedPointArray][crate::chunked_array::ChunkedPointArray].
Point(CoordType),

/// Represents a [LineStringArray][crate::array::LineStringArray] or
/// [ChunkedLineStringArray][crate::chunked_array::ChunkedLineStringArray] with `i32` offsets.
LineString(CoordType),

/// Represents a [LineStringArray][crate::array::LineStringArray] or
/// [ChunkedLineStringArray][crate::chunked_array::ChunkedLineStringArray] with `i64` offsets.
LargeLineString(CoordType),

/// Represents a [PolygonArray][crate::array::PolygonArray] or
/// [ChunkedPolygonArray][crate::chunked_array::ChunkedPolygonArray] with `i32` offsets.
Polygon(CoordType),

/// Represents a [PolygonArray][crate::array::PolygonArray] or
/// [ChunkedPolygonArray][crate::chunked_array::ChunkedPolygonArray] with `i64` offsets.
LargePolygon(CoordType),

/// Represents a [MultiPointArray][crate::array::MultiPointArray] or
/// [ChunkedMultiPointArray][crate::chunked_array::ChunkedMultiPointArray] with `i32` offsets.
MultiPoint(CoordType),

/// Represents a [MultiPointArray][crate::array::MultiPointArray] or
/// [ChunkedMultiPointArray][crate::chunked_array::ChunkedMultiPointArray] with `i64` offsets.
LargeMultiPoint(CoordType),

/// Represents a [MultiLineStringArray][crate::array::MultiLineStringArray] or
/// [ChunkedMultiLineStringArray][crate::chunked_array::ChunkedMultiLineStringArray] with `i32`
/// offsets.
MultiLineString(CoordType),

/// Represents a [MultiLineStringArray][crate::array::MultiLineStringArray] or
/// [ChunkedMultiLineStringArray][crate::chunked_array::ChunkedMultiLineStringArray] with `i64`
/// offsets.
LargeMultiLineString(CoordType),

/// Represents a [MultiPolygonArray][crate::array::MultiPolygonArray] or
/// [ChunkedMultiPolygonArray][crate::chunked_array::ChunkedMultiPolygonArray] with `i32`
/// offsets.
MultiPolygon(CoordType),

/// Represents a [MultiPolygonArray][crate::array::MultiPolygonArray] or
/// [ChunkedMultiPolygonArray][crate::chunked_array::ChunkedMultiPolygonArray] with `i64`
/// offsets.
LargeMultiPolygon(CoordType),

/// Represents a [MixedGeometryArray][crate::array::MixedGeometryArray] or
/// [ChunkedMixedGeometryArray][crate::chunked_array::ChunkedMixedGeometryArray] with `i32`
/// offsets.
Mixed(CoordType),

/// Represents a [MixedGeometryArray][crate::array::MixedGeometryArray] or
/// [ChunkedMixedGeometryArray][crate::chunked_array::ChunkedMixedGeometryArray] with `i64`
/// offsets.
LargeMixed(CoordType),

/// Represents a [GeometryCollectionArray][crate::array::GeometryCollectionArray] or
/// [ChunkedGeometryCollectionArray][crate::chunked_array::ChunkedGeometryCollectionArray] with
/// `i32` offsets.
GeometryCollection(CoordType),

/// Represents a [GeometryCollectionArray][crate::array::GeometryCollectionArray] or
/// [ChunkedGeometryCollectionArray][crate::chunked_array::ChunkedGeometryCollectionArray] with
/// `i64` offsets.
LargeGeometryCollection(CoordType),

/// Represents a [WKBArray][crate::array::WKBArray] or
/// [ChunkedWKBArray][crate::chunked_array::ChunkedWKBArray] with `i32` offsets.
WKB,

/// Represents a [WKBArray][crate::array::WKBArray] or
/// [ChunkedWKBArray][crate::chunked_array::ChunkedWKBArray] with `i64` offsets.
LargeWKB,

/// Represents a [RectArray][crate::array::RectArray] or
/// [ChunkedRectArray][crate::chunked_array::ChunkedRectArray].
Rect,
}

Expand Down Expand Up @@ -181,6 +250,10 @@ fn rect_data_type() -> DataType {
}

impl GeoDataType {
/// Convert a [`GeoDataType`] into the relevant arrow [`DataType`].
///
/// Note that an arrow [`DataType`] will lose the accompanying GeoArrow metadata if it is not
/// part of a [`Field`] with GeoArrow extension metadata in its field metadata.
pub fn to_data_type(&self) -> DataType {
use GeoDataType::*;
match self {
Expand All @@ -205,30 +278,25 @@ impl GeoDataType {
}
}

/// Get the GeoArrow extension name pertaining to this data type.
pub fn extension_name(&self) -> &'static str {
use GeoDataType::*;
match self {
Point(_) => "geoarrow.point",
LineString(_) => "geoarrow.linestring",
LargeLineString(_) => "geoarrow.linestring",
Polygon(_) => "geoarrow.polygon",
LargePolygon(_) => "geoarrow.polygon",
MultiPoint(_) => "geoarrow.multipoint",
LargeMultiPoint(_) => "geoarrow.multipoint",
MultiLineString(_) => "geoarrow.multilinestring",
LargeMultiLineString(_) => "geoarrow.multilinestring",
MultiPolygon(_) => "geoarrow.multipolygon",
LargeMultiPolygon(_) => "geoarrow.multipolygon",
Mixed(_) => "geoarrow.geometry",
LargeMixed(_) => "geoarrow.geometry",
GeometryCollection(_) => "geoarrow.geometrycollection",
LargeGeometryCollection(_) => "geoarrow.geometrycollection",
WKB => "geoarrow.wkb",
LargeWKB => "geoarrow.wkb",
LineString(_) | LargeLineString(_) => "geoarrow.linestring",
Polygon(_) | LargePolygon(_) => "geoarrow.polygon",
MultiPoint(_) | LargeMultiPoint(_) => "geoarrow.multipoint",
MultiLineString(_) | LargeMultiLineString(_) => "geoarrow.multilinestring",
MultiPolygon(_) | LargeMultiPolygon(_) => "geoarrow.multipolygon",
Mixed(_) | LargeMixed(_) => "geoarrow.geometry",
GeometryCollection(_) | LargeGeometryCollection(_) => "geoarrow.geometrycollection",
WKB | LargeWKB => "geoarrow.wkb",
Rect => unimplemented!(),
}
}

/// Convert this [`GeoDataType`] into an arrow [`Field`], maintaining GeoArrow extension
/// metadata.
pub fn to_field<N: Into<String>>(&self, name: N, nullable: bool) -> Field {
let extension_name = self.extension_name();
let mut metadata = HashMap::with_capacity(1);
Expand Down
2 changes: 2 additions & 0 deletions src/io/csv/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! Contains implementations of reading from and writing to CSV files.
pub use reader::{read_csv, CSVReaderOptions};
pub use writer::write_csv;

Expand Down
2 changes: 1 addition & 1 deletion src/io/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::io::Read;

use crate::array::CoordType;
use crate::error::Result;
use crate::io::geozero::array::mixed::MixedGeometryStreamBuilder;
use crate::io::geozero::array::MixedGeometryStreamBuilder;
use crate::io::geozero::table::builder::GeoTableBuilder;
use crate::io::geozero::table::GeoTableBuilderOptions;
use crate::table::GeoTable;
Expand Down
3 changes: 2 additions & 1 deletion src/io/flatgeobuf/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//! Read the [FlatGeobuf](https://flatgeobuf.org/) format.
//! Contains implementations of reading from and writing to [FlatGeobuf](https://flatgeobuf.org/)
//! files.
mod reader;
mod writer;
Expand Down
2 changes: 1 addition & 1 deletion src/io/flatgeobuf/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
use crate::algorithm::native::Downcast;
use crate::array::*;
use crate::error::{GeoArrowError, Result};
use crate::io::geozero::array::mixed::MixedGeometryStreamBuilder;
use crate::io::geozero::array::MixedGeometryStreamBuilder;
use crate::io::geozero::table::{GeoTableBuilder, GeoTableBuilderOptions};
use crate::table::GeoTable;
use arrow_schema::{DataType, Field, SchemaBuilder, TimeUnit};
Expand Down
2 changes: 2 additions & 0 deletions src/io/geojson/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! Contains implementations of reading from and writing to [GeoJSON](https://geojson.org/) files.
pub use reader::read_geojson;
pub use writer::write_geojson;

Expand Down
2 changes: 1 addition & 1 deletion src/io/geojson/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::io::Read;

use crate::array::CoordType;
use crate::error::Result;
use crate::io::geozero::array::mixed::MixedGeometryStreamBuilder;
use crate::io::geozero::array::MixedGeometryStreamBuilder;
use crate::io::geozero::table::builder::{GeoTableBuilder, GeoTableBuilderOptions};
use crate::table::GeoTable;

Expand Down
3 changes: 3 additions & 0 deletions src/io/geojson_lines/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
//! Contains an implementation of reading from [newline-delimited
//! GeoJSON](https://stevage.github.io/ndgeojson/) files.
pub mod reader;

pub use reader::read_geojson_lines;
2 changes: 1 addition & 1 deletion src/io/geojson_lines/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::io::{BufRead, Cursor};

use crate::array::CoordType;
use crate::error::Result;
use crate::io::geozero::array::mixed::MixedGeometryStreamBuilder;
use crate::io::geozero::array::MixedGeometryStreamBuilder;
use crate::io::geozero::table::builder::{GeoTableBuilder, GeoTableBuilderOptions};
use crate::table::GeoTable;

Expand Down
Loading

0 comments on commit 4e91882

Please sign in to comment.