Skip to content

Commit

Permalink
Client-side chunks 4: integrations (#6441)
Browse files Browse the repository at this point in the history
Integrate the new chunk batcher in all SDKs, and get rid of the old one.

On the backend, we make sure to deserialize incoming chunks into the old
`DataTable`s, so business can continue as usual.


Although the new batcher has a much more complicated task with all these
sub-splits to manage, it is somehow already more performant than the old
one 🤷‍♂️:
```bash
# this branch
cargo b -p log_benchmark --release && hyperfine --runs 15 './target/release/log_benchmark --benchmarks points3d_many_individual'
Benchmark 1: ./target/release/log_benchmark --benchmarks points3d_many_individual
  Time (mean ± σ):      4.499 s ±  0.117 s    [User: 5.544 s, System: 1.836 s]
  Range (min … max):    4.226 s …  4.640 s    15 runs

# main
cargo b -p log_benchmark --release && hyperfine --runs 15 './target/release/log_benchmark --benchmarks points3d_many_individual'
Benchmark 1: ./target/release/log_benchmark --benchmarks points3d_many_individual
  Time (mean ± σ):      4.407 s ±  0.773 s    [User: 8.423 s, System: 0.880 s]
  Range (min … max):    2.997 s …  6.148 s    15 runs
```
Notice the massive difference in user time.

---

Part of a PR series to implement our new chunk-based data model on the
client-side (SDKs):
- #6437
- #6438
- #6439
- #6440
- #6441
  • Loading branch information
teh-cmc authored May 31, 2024
1 parent fde4a87 commit 9a86ad5
Show file tree
Hide file tree
Showing 15 changed files with 291 additions and 817 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4765,6 +4765,7 @@ dependencies = [
"rand",
"re_build_info",
"re_build_tools",
"re_chunk",
"re_data_loader",
"re_data_store",
"re_log",
Expand Down Expand Up @@ -5525,6 +5526,7 @@ dependencies = [
"re_arrow2",
"re_build_info",
"re_build_tools",
"re_chunk",
"re_log",
"re_log_types",
"re_memory",
Expand Down
97 changes: 29 additions & 68 deletions crates/re_log_types/src/data_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,67 +272,6 @@ re_types_core::delegate_arrow_tuid!(TableId as "rerun.controls.TableId");
/// │ 2 ┆ 2023-04-05 09:36:47.188855872 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ c ┆ [hey] ┆ - ┆ [4294967295] │
/// └──────────┴───────────────────────────────┴──────────────────────────────────┴───────────────────┴─────────────┴──────────────────────────────────┴─────────────────┘
/// ```
///
/// ## Example
///
/// ```rust
/// # use re_log_types::{
/// # example_components::{MyColor, MyLabel, MyPoint},
/// # DataRow, DataTable, RowId, TableId, Timeline, TimePoint,
/// # };
/// #
/// # let table_id = TableId::new();
/// #
/// # let timepoint = |frame_nr: i64, clock: i64| {
/// # TimePoint::from([
/// # (Timeline::new_sequence("frame_nr"), frame_nr),
/// # (Timeline::new_sequence("clock"), clock),
/// # ])
/// # };
/// #
/// let row0 = {
/// let points: &[MyPoint] = &[MyPoint { x: 10.0, y: 10.0 }, MyPoint { x: 20.0, y: 20.0 }];
/// let colors: &[_] = &[MyColor(0xff7f7f7f)];
/// let labels: &[MyLabel] = &[];
///
/// DataRow::from_cells3(
/// RowId::new(),
/// "a",
/// timepoint(1, 1),
/// (points, colors, labels),
/// ).unwrap()
/// };
///
/// let row1 = {
/// let colors: &[MyColor] = &[];
///
/// DataRow::from_cells1(RowId::new(), "b", timepoint(1, 2), colors).unwrap()
/// };
///
/// let row2 = {
/// let colors: &[_] = &[MyColor(0xff7f7f7f)];
/// let labels: &[_] = &[MyLabel("hey".into())];
///
/// DataRow::from_cells2(
/// RowId::new(),
/// "c",
/// timepoint(2, 1),
/// (colors, labels),
/// ).unwrap()
/// };
///
/// let table_in = DataTable::from_rows(table_id, [row0, row1, row2]);
/// eprintln!("Table in:\n{table_in}");
///
/// let (schema, columns) = table_in.serialize().unwrap();
/// // eprintln!("{schema:#?}");
/// eprintln!("Wired chunk:\n{columns:#?}");
///
/// let table_out = DataTable::deserialize(table_id, &schema, &columns).unwrap();
/// eprintln!("Table out:\n{table_out}");
/// #
/// # assert_eq!(table_in, table_out);
/// ```
#[derive(Debug, Clone, PartialEq)]
pub struct DataTable {
/// Auto-generated `TUID`, uniquely identifying this batch of data and keeping track of the
Expand Down Expand Up @@ -582,6 +521,20 @@ impl DataTable {
let mut schema = Schema::default();
let mut columns = Vec::new();

// Temporary compatibility layer with Chunks.
if let Some(entity_path) = self.col_entity_path.front() {
/// The key used to identify a Rerun [`EntityPath`] in chunk-level [`ArrowSchema`] metadata.
//
// NOTE: Temporarily copied from `re_chunk` while we're transitioning away to the new data
// model.
const CHUNK_METADATA_KEY_ENTITY_PATH: &str = "rerun.entity_path";

schema.metadata.insert(
CHUNK_METADATA_KEY_ENTITY_PATH.to_owned(),
entity_path.to_string(),
);
}

{
let (control_schema, control_columns) = self.serialize_time_columns();
schema.fields.extend(control_schema.fields);
Expand Down Expand Up @@ -873,6 +826,18 @@ impl DataTable {
) -> DataTableResult<Self> {
re_tracing::profile_function!();

/// The key used to identify a Rerun [`EntityPath`] in chunk-level [`ArrowSchema`] metadata.
//
// NOTE: Temporarily copied from `re_chunk` while we're transitioning away to the new data
// model.
const CHUNK_METADATA_KEY_ENTITY_PATH: &str = "rerun.entity_path";

let entity_path = schema
.metadata
.get(CHUNK_METADATA_KEY_ENTITY_PATH)
.ok_or_else(|| DataTableError::MissingColumn("metadata:entity_path".to_owned()))?;
let entity_path = EntityPath::parse_forgiving(entity_path);

// --- Time ---

let col_timelines: DataTableResult<_> = schema
Expand Down Expand Up @@ -920,13 +885,9 @@ impl DataTable {
.unwrap()
.as_ref(),
)?;
#[allow(clippy::unwrap_used)]
let col_entity_path = EntityPath::from_arrow(
chunk
.get(control_index(EntityPath::name().as_str())?)
.unwrap()
.as_ref(),
)?;
let col_entity_path = std::iter::repeat_with(|| entity_path.clone())
.take(col_row_id.len())
.collect_vec();

// --- Components ---

Expand Down
Loading

0 comments on commit 9a86ad5

Please sign in to comment.