Skip to content

Commit

Permalink
Support Utf8View for bit_length kernel (apache#6671)
Browse files Browse the repository at this point in the history
* Support `Utf8View` for string function `bit_length()`

Signed-off-by: Austin Liu <[email protected]>

* Add test & handle view bytes length counting

Signed-off-by: Austin Liu <[email protected]>

Add test & handle view bytes length counting

Signed-off-by: Austin Liu <[email protected]>

* Refine `string_view_array`

Signed-off-by: Austin Liu <[email protected]>

* Make length from `i32` to `u32` & check nullity

Signed-off-by: Austin Liu <[email protected]>

* Clean up

Signed-off-by: Austin Liu <[email protected]>

* Refine

Signed-off-by: Austin Liu <[email protected]>

* Use `from_unary` instead

Signed-off-by: Austin Liu <[email protected]>

* Prevent inspect the string data

Signed-off-by: Austin Liu <[email protected]>

* Clean up

Signed-off-by: Austin Liu <[email protected]>

---------

Signed-off-by: Austin Liu <[email protected]>
  • Loading branch information
austin362667 authored Nov 5, 2024
1 parent b11b151 commit 350ea26
Showing 1 changed file with 38 additions and 0 deletions.
38 changes: 38 additions & 0 deletions arrow-string/src/length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,15 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
let list = array.as_string::<i64>();
Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
}
DataType::Utf8View => {
let list = array.as_string_view();
let values = list
.views()
.iter()
.map(|view| (*view as i32).wrapping_mul(8))
.collect();
Ok(Arc::new(Int32Array::new(values, array.nulls().cloned())))
}
DataType::Binary => {
let list = array.as_binary::<i32>();
Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
Expand Down Expand Up @@ -462,6 +471,35 @@ mod tests {
})
}

#[test]
fn bit_length_test_utf8view() {
bit_length_cases()
.into_iter()
.for_each(|(input, len, expected)| {
let string_array = StringViewArray::from(input);
let result = bit_length(&string_array).unwrap();
assert_eq!(len, result.len());
let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
expected.iter().enumerate().for_each(|(i, value)| {
assert_eq!(*value, result.value(i));
});
})
}

#[test]
fn bit_length_null_utf8view() {
bit_length_null_cases()
.into_iter()
.for_each(|(input, len, expected)| {
let array = StringArray::from(input);
let result = bit_length(&array).unwrap();
assert_eq!(len, result.len());
let result = result.as_any().downcast_ref::<Int32Array>().unwrap();

let expected: Int32Array = expected.into();
assert_eq!(&expected, result);
})
}
#[test]
fn bit_length_binary() {
let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];
Expand Down

0 comments on commit 350ea26

Please sign in to comment.