Skip to content

Commit

Permalink
Buffer batches to use interleave
Browse files Browse the repository at this point in the history
  • Loading branch information
Dandandan committed Nov 30, 2023
1 parent 52fbb08 commit f71dd3f
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions datafusion/physical-plan/src/repartition/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use std::sync::Arc;
use std::task::{Context, Poll};
use std::{any::Any, vec};

use arrow::array::{ArrayRef, UInt64Builder};
use arrow::datatypes::SchemaRef;
use arrow::record_batch::RecordBatch;
use arrow_array::Array;
Expand Down Expand Up @@ -182,7 +181,7 @@ impl BatchPartitioner {
create_hashes(&arrays, random_state, hash_buffer)?;

for (index, hash) in hash_buffer.iter().enumerate() {
let p = (*hash % *partitions as u64);
let p = *hash % *partitions as u64;
indices[p as usize].push((i, index))
}
}
Expand Down Expand Up @@ -743,6 +742,8 @@ impl RepartitionExec {
timer.done();
}

batches_buffer.clear();

// If the input stream is endless, we may spin forever and
// never yield back to tokio. See
// https://github.com/apache/arrow-datafusion/issues/5278.
Expand Down

0 comments on commit f71dd3f

Please sign in to comment.