Skip to content

Commit

Permalink
File writer preserve dict bug (apache#6711)
Browse files Browse the repository at this point in the history
* arrow-ipc: Add failing test for IPC file writer not preserving dict ID

* arrow-ipc: Fix footer schema in IPC file
  • Loading branch information
brancz authored Nov 15, 2024
1 parent 07e8e20 commit 5a86db3
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
37 changes: 36 additions & 1 deletion arrow-ipc/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1395,7 +1395,7 @@ impl<R: Read> RecordBatchReader for StreamReader<R> {

#[cfg(test)]
mod tests {
use crate::writer::{unslice_run_array, DictionaryTracker, IpcDataGenerator};
use crate::writer::{unslice_run_array, DictionaryTracker, IpcDataGenerator, IpcWriteOptions};

use super::*;

Expand Down Expand Up @@ -1702,6 +1702,41 @@ mod tests {
assert_eq!(batch, roundtrip_ipc(&batch));
}

#[test]
fn test_roundtrip_nested_dict_no_preserve_dict_id() {
let inner: DictionaryArray<Int32Type> = vec!["a", "b", "a"].into_iter().collect();

let array = Arc::new(inner) as ArrayRef;

let dctfield = Arc::new(Field::new("dict", array.data_type().clone(), false));

let s = StructArray::from(vec![(dctfield, array)]);
let struct_array = Arc::new(s) as ArrayRef;

let schema = Arc::new(Schema::new(vec![Field::new(
"struct",
struct_array.data_type().clone(),
false,
)]));

let batch = RecordBatch::try_new(schema, vec![struct_array]).unwrap();

let mut buf = Vec::new();
let mut writer = crate::writer::FileWriter::try_new_with_options(
&mut buf,
batch.schema_ref(),
IpcWriteOptions::default().with_preserve_dict_id(false),
)
.unwrap();
writer.write(&batch).unwrap();
writer.finish().unwrap();
drop(writer);

let mut reader = FileReader::try_new(std::io::Cursor::new(buf), None).unwrap();

assert_eq!(batch, reader.next().unwrap().unwrap());
}

fn check_union_with_builder(mut builder: UnionBuilder) {
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null::<Int32Type>("a").unwrap();
Expand Down
5 changes: 4 additions & 1 deletion arrow-ipc/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1012,8 +1012,11 @@ impl<W: Write> FileWriter<W> {
let mut fbb = FlatBufferBuilder::new();
let dictionaries = fbb.create_vector(&self.dictionary_blocks);
let record_batches = fbb.create_vector(&self.record_blocks);
let preserve_dict_id = self.write_options.preserve_dict_id;
let mut dictionary_tracker =
DictionaryTracker::new_with_preserve_dict_id(true, preserve_dict_id);
let schema = IpcSchemaEncoder::new()
.with_dictionary_tracker(&mut self.dictionary_tracker)
.with_dictionary_tracker(&mut dictionary_tracker)
.schema_to_fb_offset(&mut fbb, &self.schema);
let fb_custom_metadata = (!self.custom_metadata.is_empty())
.then(|| crate::convert::metadata_to_fb(&mut fbb, &self.custom_metadata));
Expand Down

0 comments on commit 5a86db3

Please sign in to comment.