Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: remove remaining uses of arrow_array and use reexport in arrow instead #14528

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@ ahash = { version = "0.8", default-features = false, features = [
] }
arrow = { version = "54.1.0", features = [
"prettyprint",
] }
arrow-array = { version = "54.1.0", default-features = false, features = [
"chrono-tz",
] }
arrow-buffer = { version = "54.1.0", default-features = false }
Expand Down
21 changes: 8 additions & 13 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ publish = false
[dependencies]
abi_stable = "0.11.3"
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-schema = { workspace = true }
datafusion = { workspace = true }
datafusion-ffi = { workspace = true }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,9 @@
use std::sync::Arc;

use abi_stable::{export_root_module, prefix_type::PrefixTypeTrait};
use arrow_array::RecordBatch;
use datafusion::{
arrow::datatypes::{DataType, Field, Schema},
common::record_batch,
datasource::MemTable,
};
use arrow::array::RecordBatch;
use arrow::datatypes::{DataType, Field, Schema};
use datafusion::{common::record_batch, datasource::MemTable};
use datafusion_ffi::table_provider::FFI_TableProvider;
use ffi_module_interface::{TableProviderModule, TableProviderModuleRef};

Expand Down
1 change: 0 additions & 1 deletion datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ apache-avro = { version = "0.17", default-features = false, features = [
"zstandard",
], optional = true }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-ipc = { workspace = true }
arrow-schema = { workspace = true }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, looks like there is one PR left -- arrow-schema 🤔

Any chance you want another fun cleanup PR @Chen-Yuan-Lai :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alamb Sure 💪

base64 = "0.22.1"
Expand Down
8 changes: 4 additions & 4 deletions datafusion/common/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
//! kernels in arrow-rs such as `as_boolean_array` do.

use crate::{downcast_value, DataFusionError, Result};
use arrow::array::{
BinaryViewArray, Float16Array, Int16Array, Int8Array, LargeBinaryArray,
LargeStringArray, StringViewArray, UInt16Array,
};
use arrow::{
array::{
Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array,
Expand All @@ -36,10 +40,6 @@ use arrow::{
},
datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType},
};
use arrow_array::{
BinaryViewArray, Float16Array, Int16Array, Int8Array, LargeBinaryArray,
LargeStringArray, StringViewArray, UInt16Array,
};

// Downcast ArrayRef to Date32Array
pub fn as_date32_array(array: &dyn Array) -> Result<&Date32Array> {
Expand Down
3 changes: 1 addition & 2 deletions datafusion/common/src/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@

//! Conversions between PyArrow and DataFusion types

use arrow::array::ArrayData;
use arrow::array::{Array, ArrayData};
use arrow::pyarrow::{FromPyArrow, ToPyArrow};
use arrow_array::Array;
use pyo3::exceptions::PyException;
use pyo3::prelude::PyErr;
use pyo3::types::{PyAnyMethods, PyList};
Expand Down
34 changes: 18 additions & 16 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,24 @@ use crate::cast::{
use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
use crate::hash_utils::create_hashes;
use crate::utils::SingleRowListArrayBuilder;
use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
use arrow::array::{
types::{IntervalDayTime, IntervalMonthDayNano},
*,
};
use arrow::buffer::ScalarBuffer;
use arrow::compute::kernels::numeric::*;
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
use arrow::{
array::*,
compute::kernels::cast::{cast_with_options, CastOptions},
datatypes::{
i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
IntervalYearMonthType, TimeUnit, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
UInt16Type, UInt32Type, UInt64Type, UInt8Type, DECIMAL128_MAX_PRECISION,
},
use arrow::compute::kernels::{
cast::{cast_with_options, CastOptions},
numeric::*,
};
use arrow::datatypes::{
i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
IntervalYearMonthType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
UInt8Type, DECIMAL128_MAX_PRECISION,
};
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
use arrow_schema::{UnionFields, UnionMode};

use crate::format::DEFAULT_CAST_OPTIONS;
Expand Down Expand Up @@ -165,7 +167,7 @@ pub use struct_builder::ScalarStructBuilder;
/// ```
/// # use std::sync::Arc;
/// # use arrow::datatypes::{DataType, Field, Fields};
/// # use arrow_array::{ArrayRef, Int32Array, StructArray, StringArray};
/// # use arrow::array::{ArrayRef, Int32Array, StructArray, StringArray};
/// # use datafusion_common::ScalarValue;
/// // Build a struct like: {a: 1, b: "foo"}
/// // Field description
Expand Down Expand Up @@ -1674,7 +1676,7 @@ impl ScalarValue {
///
/// assert_eq!(&result, &expected);
/// ```
/// [`Datum`]: arrow_array::Datum
/// [`Datum`]: arrow::array::Datum
pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
Ok(Scalar::new(self.to_array_of_size(1)?))
}
Expand Down
6 changes: 3 additions & 3 deletions datafusion/common/src/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use std::{error::Error, path::PathBuf};
/// ```
/// # use std::sync::Arc;
/// # use arrow::record_batch::RecordBatch;
/// # use arrow_array::{ArrayRef, Int32Array};
/// # use arrow::array::{ArrayRef, Int32Array};
/// # use datafusion_common::assert_batches_eq;
/// let col: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
/// let batch = RecordBatch::try_from_iter([("column", col)]).unwrap();
Expand Down Expand Up @@ -344,7 +344,7 @@ macro_rules! record_batch {
)*
]));

let batch = arrow_array::RecordBatch::try_new(
let batch = arrow::array::RecordBatch::try_new(
schema,
vec![$(
$crate::create_array!($type, $values),
Expand Down Expand Up @@ -416,7 +416,7 @@ mod tests {

#[test]
fn test_create_record_batch() -> Result<()> {
use arrow_array::Array;
use arrow::array::Array;

let batch = record_batch!(
("a", Int32, vec![1, 2, 3, 4]),
Expand Down
13 changes: 6 additions & 7 deletions datafusion/common/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,13 @@ pub mod string_utils;

use crate::error::{_internal_datafusion_err, _internal_err};
use crate::{DataFusionError, Result, ScalarValue};
use arrow::array::ArrayRef;
use arrow::array::{
cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
OffsetSizeTrait,
};
use arrow::buffer::OffsetBuffer;
use arrow::compute::{partition, SortColumn, SortOptions};
use arrow::datatypes::{Field, SchemaRef};
use arrow_array::cast::AsArray;
use arrow_array::{
Array, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait,
};
use arrow_schema::DataType;
use sqlparser::ast::Ident;
use sqlparser::dialect::GenericDialect;
Expand Down Expand Up @@ -329,8 +328,8 @@ pub fn longest_consecutive_prefix<T: Borrow<usize>>(
/// # Example
/// ```
/// # use std::sync::Arc;
/// # use arrow_array::{Array, ListArray};
/// # use arrow_array::types::Int64Type;
/// # use arrow::array::{Array, ListArray};
/// # use arrow::array::types::Int64Type;
/// # use datafusion_common::utils::SingleRowListArrayBuilder;
/// // Array is [1, 2, 3]
/// let arr = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/src/utils/string_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

//! Utilities for working with strings

use arrow::{array::AsArray, datatypes::DataType};
use arrow_array::Array;
use arrow::array::{Array, AsArray};
use arrow::datatypes::DataType;

/// Convenient function to convert an Arrow string array to a vector of strings
pub fn string_array_to_vec(array: &dyn Array) -> Vec<Option<&str>> {
Expand Down
1 change: 0 additions & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ extended_tests = []
[dependencies]
apache-avro = { version = "0.17", optional = true }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-ipc = { workspace = true }
arrow-schema = { workspace = true }
async-compression = { version = "0.4.0", features = [
Expand Down
12 changes: 4 additions & 8 deletions datafusion/core/benches/data_utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,11 @@

//! This module provides the in-memory table for more realistic benchmarking.

use arrow::{
array::Float32Array,
array::Float64Array,
array::StringArray,
array::UInt64Array,
datatypes::{DataType, Field, Schema, SchemaRef},
record_batch::RecordBatch,
use arrow::array::{
builder::{Int64Builder, StringBuilder},
Float32Array, Float64Array, RecordBatch, StringArray, UInt64Array,
};
use arrow_array::builder::{Int64Builder, StringBuilder};
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use datafusion::datasource::MemTable;
use datafusion::error::Result;
use datafusion_common::DataFusionError;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/benches/map_query_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::sync::Arc;

use arrow_array::{ArrayRef, Int32Array, RecordBatch};
use arrow::array::{ArrayRef, Int32Array, RecordBatch};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use parking_lot::Mutex;
use rand::prelude::ThreadRng;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/benches/sql_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ extern crate datafusion;
mod data_utils;

use crate::criterion::Criterion;
use arrow::array::{ArrayRef, RecordBatch};
use arrow::datatypes::{DataType, Field, Fields, Schema};
use arrow_array::{ArrayRef, RecordBatch};
use criterion::Bencher;
use datafusion::datasource::MemTable;
use datafusion::execution::context::SessionContext;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/core/benches/sql_query_with_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@

use std::{fmt::Write, sync::Arc, time::Duration};

use arrow::array::{Int64Builder, UInt64Builder};
use arrow_array::RecordBatch;
use arrow::array::{Int64Builder, RecordBatch, UInt64Builder};
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use bytes::Bytes;
use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -760,10 +760,10 @@ mod tests {
use crate::prelude::{CsvReadOptions, SessionConfig, SessionContext};
use crate::test_util::arrow_test_data;

use arrow::array::{BooleanArray, Float64Array, Int32Array, StringArray};
use arrow::compute::concat_batches;
use arrow::csv::ReaderBuilder;
use arrow::util::pretty::pretty_format_batches;
use arrow_array::{BooleanArray, Float64Array, Int32Array, StringArray};
use datafusion_common::cast::as_string_array;
use datafusion_common::internal_err;
use datafusion_common::stats::Precision;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ use crate::physical_plan::{
DisplayAs, DisplayFormatType, SendableRecordBatchStream, Statistics,
};

use arrow::array::RecordBatch;
use arrow::datatypes::Schema;
use arrow::datatypes::SchemaRef;
use arrow::json;
use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
use arrow_array::RecordBatch;
use arrow_schema::ArrowError;
use datafusion_catalog::Session;
use datafusion_common::config::{ConfigField, ConfigFileType, JsonOptions};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ use std::fmt::{self, Debug, Display};
use std::sync::Arc;
use std::task::Poll;

use crate::arrow::array::RecordBatch;
use crate::arrow::datatypes::SchemaRef;
use crate::datasource::physical_plan::{FileScanConfig, FileSinkConfig};
use crate::error::Result;
use crate::physical_plan::{ExecutionPlan, Statistics};

use arrow_array::RecordBatch;
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Schema};
use datafusion_catalog::Session;
use datafusion_common::file_options::file_type::FileType;
Expand Down
7 changes: 4 additions & 3 deletions datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1309,9 +1309,10 @@ mod tests {
use crate::datasource::file_format::parquet::test_util::store_parquet;
use crate::physical_plan::metrics::MetricValue;
use crate::prelude::{ParquetReadOptions, SessionConfig, SessionContext};
use arrow::array::{Array, ArrayRef, StringArray};
use arrow_array::types::Int32Type;
use arrow_array::{DictionaryArray, Int32Array, Int64Array};
use arrow::array::{
types::Int32Type, Array, ArrayRef, DictionaryArray, Int32Array, Int64Array,
StringArray,
};
use arrow_schema::{DataType, Field};
use async_trait::async_trait;
use datafusion_common::cast::{
Expand Down
Loading