From f59b94fd7cae376e7398f31743bc732a746b25b3 Mon Sep 17 00:00:00 2001 From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com> Date: Sat, 25 Jan 2025 18:18:15 +0200 Subject: [PATCH] fix: use the values builder capacity for the hash map in `PrimitiveDictionaryBuilder::new_from_builders` (#7012) * feat: allow setting custom value data type in `PrimitiveDictionaryBuilder` Fixes #7011 * use the values capacity for the hash map * update new_from_empty_builders and not new_from_builders --- .../builder/primitive_dictionary_builder.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs index 282f0ae9d5b1..f4a6662462e0 100644 --- a/arrow-array/src/builder/primitive_dictionary_builder.rs +++ b/arrow-array/src/builder/primitive_dictionary_builder.rs @@ -126,10 +126,11 @@ where keys_builder.is_empty() && values_builder.is_empty(), "keys and values builders must be empty" ); + let values_capacity = values_builder.capacity(); Self { keys_builder, values_builder, - map: HashMap::new(), + map: HashMap::with_capacity(values_capacity), } } @@ -633,4 +634,19 @@ mod tests { assert_eq!(values, [None, None]); } + + #[test] + fn creating_dictionary_from_builders_should_use_values_capacity_for_the_map() { + let builder = PrimitiveDictionaryBuilder::::new_from_empty_builders( + PrimitiveBuilder::with_capacity(1).with_data_type(DataType::Int32), + PrimitiveBuilder::with_capacity(2).with_data_type(DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, Some("+08:00".into()))), + ); + + assert!( + builder.map.capacity() >= builder.values_builder.capacity(), + "map capacity {} should be at least the values capacity {}", + builder.map.capacity(), + builder.values_builder.capacity() + ) + } }