From a61d9974c9f65fad29e698c3e981a28c70aff42b Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Wed, 5 Feb 2025 22:50:26 +0100
Subject: [PATCH 01/38] Add `RowIdColumnDescriptor`

---
 Cargo.lock                                    |  1 +
 crates/store/re_sorbet/Cargo.toml             |  1 +
 crates/store/re_sorbet/src/lib.rs             |  2 +
 .../re_sorbet/src/row_id_column_schema.rs     | 61 +++++++++++++++++++
 4 files changed, 65 insertions(+)
 create mode 100644 crates/store/re_sorbet/src/row_id_column_schema.rs

diff --git a/Cargo.lock b/Cargo.lock
index ee0ad1e76ede..e31e7a0b8f91 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6588,6 +6588,7 @@ dependencies = [
  "arrow",
  "re_log",
  "re_log_types",
+ "re_tuid",
  "re_types_core",
  "thiserror 1.0.65",
 ]
diff --git a/crates/store/re_sorbet/Cargo.toml b/crates/store/re_sorbet/Cargo.toml
index 9cbedebbf68e..f23822371aee 100644
--- a/crates/store/re_sorbet/Cargo.toml
+++ b/crates/store/re_sorbet/Cargo.toml
@@ -22,6 +22,7 @@ all-features = true
 [dependencies]
 re_log_types.workspace = true
 re_log.workspace = true
+re_tuid.workspace = true
 re_types_core.workspace = true
 
 arrow.workspace = true
diff --git a/crates/store/re_sorbet/src/lib.rs b/crates/store/re_sorbet/src/lib.rs
index adfd5c216ac3..fcd4cd150958 100644
--- a/crates/store/re_sorbet/src/lib.rs
+++ b/crates/store/re_sorbet/src/lib.rs
@@ -7,6 +7,7 @@ mod data_column_schema;
 mod index_column_schema;
 mod ipc;
 mod metadata;
+mod row_id_column_schema;
 
 pub use self::{
     column_schema::{ColumnDescriptor, ColumnError},
@@ -17,4 +18,5 @@ pub use self::{
         ArrowBatchMetadata, ArrowFieldMetadata, MetadataExt, MissingFieldMetadata,
         MissingMetadataKey,
     },
+    row_id_column_schema::{RowIdColumnDescriptor, WrongDatatypeError},
 };
diff --git a/crates/store/re_sorbet/src/row_id_column_schema.rs b/crates/store/re_sorbet/src/row_id_column_schema.rs
new file mode 100644
index 000000000000..0346283ca213
--- /dev/null
+++ b/crates/store/re_sorbet/src/row_id_column_schema.rs
@@ -0,0 +1,61 @@
+use arrow::datatypes::{DataType as ArrowDatatype, Field as ArrowField};
+use re_types_core::{Component as _, Loggable as _, RowId};
+
+#[derive(thiserror::Error, Debug)]
+#[error("Wrong datatype. Expected {expected:?}, got {actual:?}")]
+pub struct WrongDatatypeError {
+    pub expected: ArrowDatatype,
+    pub actual: ArrowDatatype,
+}
+
+/// Describes the [`RowId`]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct RowIdColumnDescriptor {}
+
+impl RowIdColumnDescriptor {
+    #[inline]
+    pub fn to_arrow_field(&self) -> ArrowField {
+        let Self {} = self;
+
+        let nullable = false; // All rows has an id
+
+        let metadata = [
+            Some(("rerun.kind".to_owned(), "control".to_owned())),
+            // This ensures the RowId/Tuid is formatted correctly:
+            Some((
+                "ARROW:extension:name".to_owned(),
+                re_tuid::Tuid::ARROW_EXTENSION_NAME.to_owned(),
+            )),
+        ]
+        .into_iter()
+        .flatten()
+        .collect();
+
+        ArrowField::new(
+            RowId::descriptor().to_string(),
+            RowId::arrow_datatype(),
+            nullable,
+        )
+        .with_metadata(metadata)
+    }
+
+    #[allow(clippy::unused_self)]
+    pub fn datatype(&self) -> ArrowDatatype {
+        RowId::arrow_datatype()
+    }
+}
+
+impl TryFrom<&ArrowField> for RowIdColumnDescriptor {
+    type Error = WrongDatatypeError;
+
+    fn try_from(field: &ArrowField) -> Result<Self, Self::Error> {
+        if field.data_type() == &RowId::arrow_datatype() {
+            Ok(Self {})
+        } else {
+            Err(WrongDatatypeError {
+                expected: RowId::arrow_datatype(),
+                actual: field.data_type().clone(),
+            })
+        }
+    }
+}

From 65a90df8bc5a97f88eab4a8dc8787c5a0aea1f6d Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Thu, 6 Feb 2025 08:24:36 +0100
Subject: [PATCH 02/38] Add `struct ChunkSchema`

---
 crates/store/re_sorbet/src/chunk_schema.rs | 59 ++++++++++++++++++++++
 crates/store/re_sorbet/src/lib.rs          |  2 +
 2 files changed, 61 insertions(+)
 create mode 100644 crates/store/re_sorbet/src/chunk_schema.rs

diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs
new file mode 100644
index 000000000000..00d7a13775c9
--- /dev/null
+++ b/crates/store/re_sorbet/src/chunk_schema.rs
@@ -0,0 +1,59 @@
+use arrow::datatypes::{Field as ArrowField, Schema as ArrowSchema};
+
+use re_log_types::EntityPath;
+use re_types_core::ChunkId;
+
+use crate::{ArrowBatchMetadata, ComponentColumnDescriptor, RowIdColumnDescriptor};
+
+/// The parsed schema of a Rerun chunk, i.e. multiple columns of data for a single entity.
+///
+/// This does NOT preserve custom arrow metadata.
+/// It only contains the metadata used by Rerun.
+pub struct ChunkSchema {
+    /// The globally unique ID of this chunk.
+    chunk_id: ChunkId,
+
+    /// Which entity is this chunk for?
+    entity_path: EntityPath,
+
+    /// Are we sorted by the row id column?
+    is_sorted: bool,
+
+    /// The primary row id column.
+    row_id_column: RowIdColumnDescriptor,
+
+    /// All other columns (indices and data).
+    columns: Vec<ComponentColumnDescriptor>,
+}
+
+impl From<ChunkSchema> for ArrowSchema {
+    fn from(chunk_schema: ChunkSchema) -> Self {
+        let ChunkSchema {
+            chunk_id,
+            entity_path,
+            is_sorted,
+            row_id_column,
+            columns,
+        } = chunk_schema;
+
+        let mut metadata = ArrowBatchMetadata::from([
+            ("rerun.id".to_owned(), format!("{:X}", chunk_id.as_u128())),
+            ("rerun.entity_path".to_owned(), entity_path.to_string()),
+            // TODO: heap_size_bytes ?
+        ]);
+        if is_sorted {
+            metadata.insert("rerun.is_sorted".to_owned(), "true".to_owned());
+        }
+
+        let mut fields: Vec<ArrowField> = Vec::with_capacity(1 + columns.len());
+        fields.push(row_id_column.to_arrow_field());
+        fields.extend(columns.iter().map(|column| column.to_arrow_field()));
+
+        Self {
+            metadata,
+            fields: fields.into(),
+        }
+    }
+}
+
+// impl TryFrom<ArrowSchema> for ChunkSchema { }
diff --git a/crates/store/re_sorbet/src/lib.rs b/crates/store/re_sorbet/src/lib.rs
index fcd4cd150958..fc0cf0260a16 100644
--- a/crates/store/re_sorbet/src/lib.rs
+++ b/crates/store/re_sorbet/src/lib.rs
@@ -2,6 +2,7 @@
 //!
 //! Handles the structure of arrow record batches and their meta data for different use cases for Rerun.
 
+mod chunk_schema;
 mod column_schema;
 mod data_column_schema;
 mod index_column_schema;
@@ -10,6 +11,7 @@ mod metadata;
 mod row_id_column_schema;
 
 pub use self::{
+    chunk_schema::ChunkSchema,
     column_schema::{ColumnDescriptor, ColumnError},
     data_column_schema::ComponentColumnDescriptor,
     index_column_schema::{TimeColumnDescriptor, UnsupportedTimeType},

From a5b597f01d3816a1f0360ecc4617e5a2eccd4fba Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Thu, 6 Feb 2025 10:17:25 +0100
Subject: [PATCH 03/38] Add converted from arrow schema

---
 crates/store/re_sorbet/src/chunk_schema.rs    | 99 ++++++++++++++++++-
 .../re_sorbet/src/row_id_column_schema.rs     |  1 +
 2 files changed, 97 insertions(+), 3 deletions(-)

diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs
index 00d7a13775c9..b9d889ef3036 100644
--- a/crates/store/re_sorbet/src/chunk_schema.rs
+++ b/crates/store/re_sorbet/src/chunk_schema.rs
@@ -3,7 +3,36 @@ use arrow::datatypes::{Field as ArrowField, Schema as ArrowSchema};
 use re_log_types::EntityPath;
 use re_types_core::ChunkId;
 
-use crate::{ArrowBatchMetadata, ComponentColumnDescriptor, RowIdColumnDescriptor};
+use crate::{
+    ArrowBatchMetadata, ComponentColumnDescriptor, MetadataExt as _, MissingFieldMetadata,
+    MissingMetadataKey, RowIdColumnDescriptor, WrongDatatypeError,
+};
+
+#[derive(thiserror::Error, Debug)]
+pub enum ChunkSchemaError {
+    #[error(transparent)]
+    MissingMetadataKey(#[from] MissingMetadataKey),
+
+    #[error("Bad RowId columns: {0}")]
+    BadRowIdColumn(WrongDatatypeError),
+
+    #[error("Bad column '{field_name}': {error}")]
+    BadColumn {
+        field_name: String,
+        error: MissingFieldMetadata,
+    },
+
+    #[error("Bad chunk schema: {reason}")]
+    Custom { reason: String },
+}
+
+impl ChunkSchemaError {
+    fn custom(reason: impl Into<String>) -> Self {
+        Self::Custom {
+            reason: reason.into(),
+        }
+    }
+}
 
 /// The parsed schema of a Rerun chunk, i.e. multiple columns of data for a single entity.
 ///
@@ -16,6 +45,9 @@ pub struct ChunkSchema {
     /// Which entity is this chunk for?
     entity_path: EntityPath,
 
+    /// The heap size of this chunk in bytes, if known.
+    heap_size_bytes: Option<u64>,
+
     /// Are we sorted by the row id column?
     is_sorted: bool,
 
@@ -31,6 +63,7 @@ impl From<ChunkSchema> for ArrowSchema {
         let ChunkSchema {
             chunk_id,
             entity_path,
+            heap_size_bytes,
             is_sorted,
             row_id_column,
             columns,
@@ -39,8 +72,13 @@ impl From<ChunkSchema> for ArrowSchema {
         let mut metadata = ArrowBatchMetadata::from([
             ("rerun.id".to_owned(), format!("{:X}", chunk_id.as_u128())),
             ("rerun.entity_path".to_owned(), entity_path.to_string()),
-            // TODO: heap_size_bytes ?
         ]);
+        if let Some(heap_size_bytes) = heap_size_bytes {
+            metadata.insert(
+                "rerun.heap_size_bytes".to_owned(),
+                heap_size_bytes.to_string(),
+            );
+        }
         if is_sorted {
             metadata.insert("rerun.is_sorted".to_owned(), "true".to_owned());
         }
@@ -56,4 +94,59 @@ impl From<ChunkSchema> for ArrowSchema {
     }
 }
 
-// impl TryFrom<ArrowSchema> for ChunkSchema { }
+impl TryFrom<ArrowSchema> for ChunkSchema {
+    type Error = ChunkSchemaError;
+
+    fn try_from(arrow_schema: ArrowSchema) -> Result<Self, Self::Error> {
+        let ArrowSchema { metadata, fields } = arrow_schema;
+
+        let chunk_id = {
+            let chunk_id = metadata.get_or_err("rerun.id")?;
+            let chunk_id = u128::from_str_radix(chunk_id, 16).map_err(|err| {
+                ChunkSchemaError::custom(format!(
+                    "Failed to deserialize chunk id {chunk_id:?}: {err}"
+                ))
+            })?;
+            ChunkId::from_u128(chunk_id)
+        };
+
+        let entity_path = EntityPath::parse_forgiving(metadata.get_or_err("rerun.entity_path")?);
+        let is_sorted = metadata.get_bool("rerun.is_sorted");
+        let heap_size_bytes = if let Some(heap_size_bytes) = metadata.get("rerun.heap_size_bytes") {
+            heap_size_bytes.parse().ok() // TODO: log error
+        } else {
+            None
+        };
+
+        // The first field must be the row id column:
+        let Some(first_field) = fields.first() else {
+            return Err(ChunkSchemaError::custom("No fields in schema"));
+        };
+
+        let row_id_column = RowIdColumnDescriptor::try_from(first_field.as_ref())
+            .map_err(ChunkSchemaError::BadRowIdColumn)?;
+
+        let columns: Result<Vec<_>, _> = fields
+            .iter()
+            .skip(1)
+            .map(|field| {
+                ComponentColumnDescriptor::try_from(field.as_ref()).map_err(|err| {
+                    ChunkSchemaError::BadColumn {
+                        field_name: field.name().to_owned(),
+                        error: err,
+                    }
+                })
+            })
+            .collect();
+        let columns = columns?;
+
+        Ok(Self {
+            chunk_id,
+            entity_path,
+            heap_size_bytes,
+            is_sorted,
+            row_id_column,
+            columns,
+        })
+    }
+}
diff --git a/crates/store/re_sorbet/src/row_id_column_schema.rs b/crates/store/re_sorbet/src/row_id_column_schema.rs
index 0346283ca213..3f8746434704 100644
--- a/crates/store/re_sorbet/src/row_id_column_schema.rs
+++ b/crates/store/re_sorbet/src/row_id_column_schema.rs
@@ -49,6 +49,7 @@ impl TryFrom<&ArrowField> for RowIdColumnDescriptor {
     type Error = WrongDatatypeError;
 
     fn try_from(field: &ArrowField) -> Result<Self, Self::Error> {
+        // TODO: check `rerun.kind`
         if field.data_type() == &RowId::arrow_datatype() {
             Ok(Self {})
         } else {

From b095d591079b901e3e9cfddffbe52dc42d606247 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Thu, 6 Feb 2025 16:25:52 +0100
Subject: [PATCH 04/38] Add `re_sorbet::ChunkBatch`

---
 Cargo.lock                                 |   1 +
 crates/store/re_sorbet/Cargo.toml          |   1 +
 crates/store/re_sorbet/src/chunk_batch.rs  | 128 +++++++++++----------
 crates/store/re_sorbet/src/chunk_schema.rs | 105 +++++++++++++----
 crates/store/re_sorbet/src/lib.rs          |   2 +
 5 files changed, 159 insertions(+), 78 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index e31e7a0b8f91..82477f8c0d33 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6586,6 +6586,7 @@ name = "re_sorbet"
 version = "0.23.0-alpha.1+dev"
 dependencies = [
  "arrow",
+ "re_format_arrow",
  "re_log",
  "re_log_types",
  "re_tuid",
diff --git a/crates/store/re_sorbet/Cargo.toml b/crates/store/re_sorbet/Cargo.toml
index f23822371aee..42d50270c915 100644
--- a/crates/store/re_sorbet/Cargo.toml
+++ b/crates/store/re_sorbet/Cargo.toml
@@ -20,6 +20,7 @@ all-features = true
 
 
 [dependencies]
+re_format_arrow.workspace = true
 re_log_types.workspace = true
 re_log.workspace = true
 re_tuid.workspace = true
diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index e91d7500b835..24fd116b5364 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -1,26 +1,67 @@
-use arrow::array::RecordBatch as ArrowRecordBatch;
-use arrow::datatypes::Schema as ArrowSchema;
+use arrow::{
+    array::RecordBatch as ArrowRecordBatch,
+    datatypes::{Fields as ArrowFields, Schema as ArrowSchema},
+};
 
 use re_log_types::EntityPath;
+use re_types_core::ChunkId;
 
-#[derive(thiserror::Error, Debug)]
-pub enum ChunkBatchError {
-    #[error("Missing record batch metadata key `{0}`")]
-    MissingRecordBatchMetadataKey(&'static str),
-}
+use crate::{chunk_schema::InvalidChunkSchema, ArrowBatchMetadata, ChunkSchema};
 
 /// The arrow [`ArrowRecordBatch`] representation of a Rerun chunk.
 ///
 /// This is a wrapper around a [`ArrowRecordBatch`].
 ///
 /// Each [`ChunkBatch`] contains logging data for a single [`EntityPath`].
-/// It walwyas have a [`RowId`] column.
+/// It always has a [`RowId`] column.
 #[derive(Debug, Clone)]
 pub struct ChunkBatch {
-    entity_path: EntityPath,
+    schema: ChunkSchema,
     batch: ArrowRecordBatch,
 }
 
+impl ChunkBatch {
+    /// The parsed rerun schema of this chunk.
+    #[inline]
+    pub fn chunk_schema(&self) -> &ChunkSchema {
+        &self.schema
+    }
+
+    /// The globally unique ID of this chunk.
+    #[inline]
+    pub fn chunk_id(&self) -> ChunkId {
+        self.schema.chunk_id()
+    }
+
+    /// Which entity is this chunk for?
+    #[inline]
+    pub fn entity_path(&self) -> &EntityPath {
+        self.schema.entity_path()
+    }
+
+    /// The heap size of this chunk in bytes, if known.
+    #[inline]
+    pub fn heap_size_bytes(&self) -> Option<u64> {
+        self.schema.heap_size_bytes()
+    }
+
+    /// Are we sorted by the row id column?
+    #[inline]
+    pub fn is_sorted(&self) -> bool {
+        self.schema.is_sorted()
+    }
+
+    #[inline]
+    pub fn fields(&self) -> &ArrowFields {
+        &self.schema_ref().fields
+    }
+
+    #[inline]
+    pub fn arrow_bacth_metadata(&self) -> &ArrowBatchMetadata {
+        &self.batch.schema_ref().metadata
+    }
+}
+
 impl std::fmt::Display for ChunkBatch {
     #[inline]
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -51,58 +92,29 @@ impl From<ChunkBatch> for ArrowRecordBatch {
     }
 }
 
-impl TryFrom<ArrowRecordBatch> for ChunkBatch {
-    type Error = ChunkBatchError;
-
-    fn try_from(batch: ArrowRecordBatch) -> Result<Self, Self::Error> {
-        let mut schema = ArrowSchema::clone(&*batch.schema());
-        let metadata = &mut schema.metadata;
-
-        {
-            // Verify version
-            if let Some(batch_version) = metadata.get(Self::CHUNK_METADATA_KEY_VERSION) {
-                if batch_version != Self::CHUNK_METADATA_VERSION {
-                    re_log::warn_once!(
-                        "ChunkBatch version mismatch. Expected {:?}, got {batch_version:?}",
-                        Self::CHUNK_METADATA_VERSION
-                    );
-                }
-            }
-            metadata.insert(
-                Self::CHUNK_METADATA_KEY_VERSION.to_owned(),
-                Self::CHUNK_METADATA_VERSION.to_owned(),
-            );
-        }
-
-        let entity_path =
-            if let Some(entity_path) = metadata.get(Self::CHUNK_METADATA_KEY_ENTITY_PATH) {
-                EntityPath::parse_forgiving(entity_path)
-            } else {
-                return Err(ChunkBatchError::MissingRecordBatchMetadataKey(
-                    Self::CHUNK_METADATA_KEY_ENTITY_PATH,
-                ));
-            };
-
-        Ok(Self { entity_path, batch })
+impl From<&ChunkBatch> for ArrowRecordBatch {
+    #[inline]
+    fn from(chunk: &ChunkBatch) -> Self {
+        chunk.batch.clone()
     }
 }
 
-/// ## Metadata keys for the record batch metadata
-impl ChunkBatch {
-    /// The key used to identify the version of the Rerun schema.
-    const CHUNK_METADATA_KEY_VERSION: &'static str = "rerun.version";
-
-    /// The version of the Rerun schema.
-    const CHUNK_METADATA_VERSION: &'static str = "1";
-
-    /// The key used to identify a Rerun [`EntityPath`] in the record batch metadata.
-    const CHUNK_METADATA_KEY_ENTITY_PATH: &'static str = "rerun.entity_path";
-}
+impl TryFrom<ArrowRecordBatch> for ChunkBatch {
+    type Error = InvalidChunkSchema;
 
-impl ChunkBatch {
-    /// Returns the [`EntityPath`] of the batch.
-    #[inline]
-    pub fn entity_path(&self) -> &EntityPath {
-        &self.entity_path
+    fn try_from(batch: ArrowRecordBatch) -> Result<Self, Self::Error> {
+        let chunk_schema = ChunkSchema::try_from(batch.schema_ref().as_ref())?;
+
+        // Extend with any metadata that might have been missing:
+        let mut arrow_schema = ArrowSchema::clone(batch.schema_ref().as_ref());
+        arrow_schema
+            .metadata
+            .extend(chunk_schema.arrow_batch_metadata());
+        let batch = batch.with_schema(arrow_schema.into()).expect("Can't fail");
+
+        Ok(Self {
+            schema: chunk_schema,
+            batch,
+        })
     }
 }
diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs
index b9d889ef3036..a10b40dcb6cd 100644
--- a/crates/store/re_sorbet/src/chunk_schema.rs
+++ b/crates/store/re_sorbet/src/chunk_schema.rs
@@ -9,7 +9,7 @@ use crate::{
 };
 
 #[derive(thiserror::Error, Debug)]
-pub enum ChunkSchemaError {
+pub enum InvalidChunkSchema {
     #[error(transparent)]
     MissingMetadataKey(#[from] MissingMetadataKey),
 
@@ -26,7 +26,7 @@ pub enum ChunkSchemaError {
     Custom { reason: String },
 }
 
-impl ChunkSchemaError {
+impl InvalidChunkSchema {
     fn custom(reason: impl Into<String>) -> Self {
         Self::Custom {
             reason: reason.into(),
@@ -38,6 +38,7 @@ impl ChunkSchemaError {
 ///
 /// This does NOT preserve custom arrow metadata.
 /// It only contains the metadata used by Rerun.
+#[derive(Debug, Clone)]
 pub struct ChunkSchema {
     /// The globally unique ID of this chunk.
     chunk_id: ChunkId,
@@ -58,31 +59,85 @@ pub struct ChunkSchema {
     columns: Vec<ComponentColumnDescriptor>,
 }
 
-impl From<ChunkSchema> for ArrowSchema {
-    fn from(chunk_schema: ChunkSchema) -> Self {
-        let ChunkSchema {
+/// ## Metadata keys for the record batch metadata
+impl ChunkSchema {
+    /// The key used to identify the version of the Rerun schema.
+    const CHUNK_METADATA_KEY_VERSION: &'static str = "rerun.version";
+
+    /// The version of the Rerun schema.
+    const CHUNK_METADATA_VERSION: &'static str = "1";
+}
+
+impl ChunkSchema {
+    /// The globally unique ID of this chunk.
+
+    #[inline]
+    pub fn chunk_id(&self) -> ChunkId {
+        self.chunk_id
+    }
+
+    /// Which entity is this chunk for?
+
+    #[inline]
+    pub fn entity_path(&self) -> &EntityPath {
+        &self.entity_path
+    }
+
+    /// The heap size of this chunk in bytes, if known.
+
+    #[inline]
+    pub fn heap_size_bytes(&self) -> Option<u64> {
+        self.heap_size_bytes
+    }
+
+    /// Are we sorted by the row id column?
+
+    #[inline]
+    pub fn is_sorted(&self) -> bool {
+        self.is_sorted
+    }
+
+    pub fn arrow_batch_metadata(&self) -> ArrowBatchMetadata {
+        let Self {
             chunk_id,
             entity_path,
             heap_size_bytes,
             is_sorted,
-            row_id_column,
-            columns,
-        } = chunk_schema;
-
-        let mut metadata = ArrowBatchMetadata::from([
+            row_id_column: _,
+            columns: _,
+        } = self;
+
+        let mut arrow_metadata = ArrowBatchMetadata::from([
+            (
+                Self::CHUNK_METADATA_KEY_VERSION.to_owned(),
+                Self::CHUNK_METADATA_VERSION.to_owned(),
+            ),
             ("rerun.id".to_owned(), format!("{:X}", chunk_id.as_u128())),
             ("rerun.entity_path".to_owned(), entity_path.to_string()),
         ]);
         if let Some(heap_size_bytes) = heap_size_bytes {
-            metadata.insert(
+            arrow_metadata.insert(
                 "rerun.heap_size_bytes".to_owned(),
                 heap_size_bytes.to_string(),
             );
         }
-        if is_sorted {
-            metadata.insert("rerun.is_sorted".to_owned(), "true".to_owned());
+        if *is_sorted {
+            arrow_metadata.insert("rerun.is_sorted".to_owned(), "true".to_owned());
         }
 
+        arrow_metadata
+    }
+}
+
+impl From<ChunkSchema> for ArrowSchema {
+    fn from(chunk_schema: ChunkSchema) -> Self {
+        let metadata = chunk_schema.arrow_batch_metadata();
+
+        let ChunkSchema {
+            row_id_column,
+            columns,
+            ..
+        } = chunk_schema;
         let mut fields: Vec<ArrowField> = Vec::with_capacity(1 + columns.len());
         fields.push(row_id_column.to_arrow_field());
         fields.extend(columns.iter().map(|column| column.to_arrow_field()));
@@ -94,16 +149,16 @@ impl From<ChunkSchema> for ArrowSchema {
     }
 }
 
-impl TryFrom<ArrowSchema> for ChunkSchema {
-    type Error = ChunkSchemaError;
+impl TryFrom<&ArrowSchema> for ChunkSchema {
+    type Error = InvalidChunkSchema;
 
-    fn try_from(arrow_schema: ArrowSchema) -> Result<Self, Self::Error> {
+    fn try_from(arrow_schema: &ArrowSchema) -> Result<Self, Self::Error> {
         let ArrowSchema { metadata, fields } = arrow_schema;
 
         let chunk_id = {
             let chunk_id = metadata.get_or_err("rerun.id")?;
             let chunk_id = u128::from_str_radix(chunk_id, 16).map_err(|err| {
-                ChunkSchemaError::custom(format!(
+                InvalidChunkSchema::custom(format!(
                     "Failed to deserialize chunk id {chunk_id:?}: {err}"
                 ))
             })?;
@@ -118,20 +173,30 @@ impl TryFrom<ArrowSchema> for ChunkSchema {
             None
         };
 
+        // Verify version
+        if let Some(batch_version) = metadata.get(Self::CHUNK_METADATA_KEY_VERSION) {
+            if batch_version != Self::CHUNK_METADATA_VERSION {
+                re_log::warn_once!(
+                    "ChunkSchema version mismatch. Expected {:?}, got {batch_version:?}",
+                    Self::CHUNK_METADATA_VERSION
+                );
+            }
+        }
+
         // The first field must be the row id column:
         let Some(first_field) = fields.first() else {
-            return Err(ChunkSchemaError::custom("No fields in schema"));
+            return Err(InvalidChunkSchema::custom("No fields in schema"));
         };
 
         let row_id_column = RowIdColumnDescriptor::try_from(first_field.as_ref())
-            .map_err(ChunkSchemaError::BadRowIdColumn)?;
+            .map_err(InvalidChunkSchema::BadRowIdColumn)?;
 
         let columns: Result<Vec<_>, _> = fields
             .iter()
             .skip(1)
             .map(|field| {
                 ComponentColumnDescriptor::try_from(field.as_ref()).map_err(|err| {
-                    ChunkSchemaError::BadColumn {
+                    InvalidChunkSchema::BadColumn {
                         field_name: field.name().to_owned(),
                         error: err,
                     }
diff --git a/crates/store/re_sorbet/src/lib.rs b/crates/store/re_sorbet/src/lib.rs
index fc0cf0260a16..9544b32d6afe 100644
--- a/crates/store/re_sorbet/src/lib.rs
+++ b/crates/store/re_sorbet/src/lib.rs
@@ -2,6 +2,7 @@
 //!
 //! Handles the structure of arrow record batches and their meta data for different use cases for Rerun.
 
+mod chunk_batch;
 mod chunk_schema;
 mod column_schema;
 mod data_column_schema;
@@ -11,6 +12,7 @@ mod metadata;
 mod row_id_column_schema;
 
 pub use self::{
+    chunk_batch::ChunkBatch,
     chunk_schema::ChunkSchema,
     column_schema::{ColumnDescriptor, ColumnError},
     data_column_schema::ComponentColumnDescriptor,

From e90fdbeb55573ad96c212022515752d79c1b5413 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Fri, 7 Feb 2025 12:12:21 +0100
Subject: [PATCH 05/38] Convert Chunk to ChunkBatch

---
 Cargo.lock                                    |   2 +
 crates/store/re_chunk/Cargo.toml              |   5 +-
 crates/store/re_chunk/src/chunk.rs            |   9 ++
 crates/store/re_chunk/src/transport.rs        | 143 +++++++++++++++++-
 crates/store/re_sorbet/Cargo.toml             |   1 +
 crates/store/re_sorbet/src/chunk_batch.rs     |  88 ++++++++++-
 crates/store/re_sorbet/src/chunk_schema.rs    | 116 +++++++++++---
 .../store/re_sorbet/src/data_column_schema.rs |  26 ++--
 .../re_sorbet/src/index_column_schema.rs      |  35 +++--
 crates/store/re_sorbet/src/lib.rs             |   2 +-
 .../re_sorbet/src/row_id_column_schema.rs     |  43 +++++-
 11 files changed, 412 insertions(+), 58 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 82477f8c0d33..a21a840c791a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5830,6 +5830,7 @@ dependencies = [
  "re_format_arrow",
  "re_log",
  "re_log_types",
+ "re_sorbet",
  "re_tracing",
  "re_tuid",
  "re_types_core",
@@ -6586,6 +6587,7 @@ name = "re_sorbet"
 version = "0.23.0-alpha.1+dev"
 dependencies = [
  "arrow",
+ "itertools 0.13.0",
  "re_format_arrow",
  "re_log",
  "re_log_types",
diff --git a/crates/store/re_chunk/Cargo.toml b/crates/store/re_chunk/Cargo.toml
index e8b617cac5fe..cc4c3873e0c4 100644
--- a/crates/store/re_chunk/Cargo.toml
+++ b/crates/store/re_chunk/Cargo.toml
@@ -32,10 +32,11 @@ serde = ["re_log_types/serde", "re_tuid/serde", "re_types_core/serde"]
 re_arrow_util.workspace = true
 re_byte_size.workspace = true
 re_error.workspace = true
-re_format.workspace = true
 re_format_arrow.workspace = true
-re_log.workspace = true
+re_format.workspace = true
 re_log_types.workspace = true
+re_log.workspace = true
+re_sorbet.workspace = true
 re_tracing.workspace = true
 re_tuid.workspace = true
 re_types_core.workspace = true
diff --git a/crates/store/re_chunk/src/chunk.rs b/crates/store/re_chunk/src/chunk.rs
index 436914b46042..7d63e7e39f58 100644
--- a/crates/store/re_chunk/src/chunk.rs
+++ b/crates/store/re_chunk/src/chunk.rs
@@ -45,6 +45,15 @@ pub enum ChunkError {
 
     #[error("Deserialization: {0}")]
     Deserialization(#[from] DeserializationError),
+
+    #[error(transparent)]
+    UnsupportedTimeType(#[from] re_sorbet::UnsupportedTimeType),
+
+    #[error(transparent)]
+    WrongDatatypeError(#[from] re_sorbet::WrongDatatypeError),
+
+    #[error(transparent)]
+    MismatchedChunkSchemaError(#[from] re_sorbet::MismatchedChunkSchemaError),
 }
 
 pub type ChunkResult<T> = Result<T, ChunkError>;
diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index 19cd375a72e3..8669e86d64ba 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -390,6 +390,116 @@ impl TransportChunk {
     }
 }
 
+impl Chunk {
+    /// Prepare the [`Chunk`] for transport.
+    ///
+    /// It is probably a good idea to sort the chunk first.
+    pub fn to_chunk_batch(&self) -> ChunkResult<re_sorbet::ChunkBatch> {
+        self.sanity_check()?;
+
+        re_tracing::profile_function!(format!(
+            "num_columns={} num_rows={}",
+            self.num_columns(),
+            self.num_rows()
+        ));
+
+        let heap_size_bytes = self.heap_size_bytes();
+        let Self {
+            id,
+            entity_path,
+            heap_size_bytes: _, // use the method instead because of lazy initialization
+            is_sorted,
+            row_ids,
+            timelines,
+            components,
+        } = self;
+
+        let row_id_schema = re_sorbet::RowIdColumnDescriptor::try_from(RowId::arrow_datatype())?;
+
+        let (index_schemas, index_arrays): (Vec<_>, Vec<_>) = {
+            re_tracing::profile_scope!("timelines");
+
+            let mut timelines = timelines
+                .iter()
+                .map(|(timeline, info)| {
+                    let TimeColumn {
+                        timeline: _,
+                        times: _,
+                        is_sorted,
+                        time_range: _,
+                    } = info;
+
+                    let array = info.times_array();
+                    let schema = re_sorbet::TimeColumnDescriptor {
+                        timeline: *timeline,
+                        datatype: array.data_type().clone(),
+                        is_sorted: *is_sorted,
+                    };
+
+                    (schema, into_arrow_ref(array))
+                })
+                .collect_vec();
+
+            timelines.sort_by(|(schema_a, _), (schema_b, _)| schema_a.cmp(schema_b));
+
+            timelines.into_iter().unzip()
+        };
+
+        let (data_schemas, data_arrays): (Vec<_>, Vec<_>) = {
+            re_tracing::profile_scope!("components");
+
+            let mut components = components
+                .values()
+                .flat_map(|per_desc| per_desc.iter())
+                .map(|(component_desc, list_array)| {
+                    let list_array = ArrowListArray::from(list_array.clone());
+                    let ComponentDescriptor {
+                        archetype_name,
+                        archetype_field_name,
+                        component_name,
+                    } = *component_desc;
+
+                    let schema = re_sorbet::ComponentColumnDescriptor {
+                        store_datatype: list_array.data_type().clone(),
+                        entity_path: entity_path.clone(),
+
+                        archetype_name,
+                        archetype_field_name,
+                        component_name,
+
+                        is_static: false,             // TODO
+                        is_indicator: false,          // TODO
+                        is_tombstone: false,          // TODO
+                        is_semantically_empty: false, // TODO
+                    };
+                    (schema, into_arrow_ref(list_array))
+                })
+                .collect_vec();
+
+            components.sort_by(|(schema_a, _), (schema_b, _)| schema_a.cmp(schema_b));
+
+            components.into_iter().unzip()
+        };
+
+        let schema = re_sorbet::ChunkSchema::new(
+            *id,
+            entity_path.clone(),
+            row_id_schema,
+            index_schemas,
+            data_schemas,
+        )
+        .with_heap_size_bytes(heap_size_bytes)
+        .with_sorted(*is_sorted);
+
+        Ok(re_sorbet::ChunkBatch::try_new(
+            schema,
+            into_arrow_ref(row_ids.clone()),
+            index_arrays,
+            data_arrays,
+        )?)
+    }
+}
+
 impl Chunk {
     /// Prepare the [`Chunk`] for transport.
     ///
@@ -735,7 +845,7 @@ mod tests {
         let entity_path = EntityPath::parse_forgiving("a/b/c");
 
         let timeline1 = Timeline::new_temporal("log_time");
-        let timelines1 = std::iter::once((
+        let timelines1: IntMap<_, _> = std::iter::once((
             timeline1,
             TimeColumn::new(Some(true), timeline1, vec![42, 43, 44, 45].into()),
         ))
@@ -788,7 +898,7 @@ mod tests {
 
         let row_ids = vec![RowId::new(), RowId::new(), RowId::new(), RowId::new()];
 
-        for timelines in [timelines1, timelines2] {
+        for timelines in [timelines1.clone(), timelines2.clone()] {
             let chunk_original = Chunk::from_native_row_ids(
                 ChunkId::new(),
                 entity_path.clone(),
@@ -860,6 +970,35 @@ mod tests {
             }
         }
 
+        for timelines in [timelines1, timelines2] {
+            let chunk_before = Chunk::from_native_row_ids(
+                ChunkId::new(),
+                entity_path.clone(),
+                None,
+                &row_ids,
+                timelines.clone(),
+                components.clone().into_iter().collect(),
+            )
+            .unwrap();
+
+            let chunk_batch = chunk_before.to_chunk_batch().unwrap();
+
+            assert_eq!(chunk_before.num_columns(), chunk_batch.num_columns());
+            assert_eq!(chunk_before.num_rows(), chunk_batch.num_rows());
+
+            let chunk_after = Chunk::from_record_batch(chunk_batch.into()).unwrap();
+
+            assert_eq!(chunk_before.entity_path(), chunk_after.entity_path());
+            assert_eq!(
+                chunk_before.heap_size_bytes(),
+                chunk_after.heap_size_bytes(),
+            );
+            assert_eq!(chunk_before.num_columns(), chunk_after.num_columns());
+            assert_eq!(chunk_before.num_rows(), chunk_after.num_rows());
+            assert!(chunk_before.are_equal(&chunk_after));
+            assert_eq!(chunk_before, chunk_after);
+        }
+
         Ok(())
     }
 }
diff --git a/crates/store/re_sorbet/Cargo.toml b/crates/store/re_sorbet/Cargo.toml
index 42d50270c915..929fcfa20188 100644
--- a/crates/store/re_sorbet/Cargo.toml
+++ b/crates/store/re_sorbet/Cargo.toml
@@ -27,6 +27,7 @@ re_tuid.workspace = true
 re_types_core.workspace = true
 
 arrow.workspace = true
+itertools.workspace = true
 thiserror.workspace = true
 
 # Keep this crate simple, with few dependencies.
diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index 24fd116b5364..e6cc158a6fad 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -1,12 +1,29 @@
 use arrow::{
-    array::RecordBatch as ArrowRecordBatch,
+    array::{ArrayRef as ArrowArrayRef, RecordBatch as ArrowRecordBatch, RecordBatchOptions},
     datatypes::{Fields as ArrowFields, Schema as ArrowSchema},
 };
 
 use re_log_types::EntityPath;
 use re_types_core::ChunkId;
 
-use crate::{chunk_schema::InvalidChunkSchema, ArrowBatchMetadata, ChunkSchema};
+use crate::{
+    chunk_schema::InvalidChunkSchema, ArrowBatchMetadata, ChunkSchema, WrongDatatypeError,
+};
+
+#[derive(thiserror::Error, Debug)]
+pub enum MismatchedChunkSchemaError {
+    #[error("{0}")]
+    Custom(String),
+
+    #[error(transparent)]
+    WrongDatatypeError(#[from] WrongDatatypeError),
+}
+
+impl MismatchedChunkSchemaError {
+    pub fn custom(s: impl Into<String>) -> Self {
+        Self::Custom(s.into())
+    }
+}
 
 /// The arrow [`ArrowRecordBatch`] representation of a Rerun chunk.
 ///
@@ -17,9 +34,76 @@ use crate::{chunk_schema::InvalidChunkSchema, ArrowBatchMetadata, ChunkSchema};
 #[derive(Debug, Clone)]
 pub struct ChunkBatch {
     schema: ChunkSchema,
+
+    // TODO: should we store a record batch here, or just the parsed columns?
     batch: ArrowRecordBatch,
 }
 
+impl ChunkBatch {
+    pub fn try_new(
+        schema: ChunkSchema,
+        row_ids: ArrowArrayRef,
+        index_arrays: Vec<ArrowArrayRef>,
+        data_arrays: Vec<ArrowArrayRef>,
+    ) -> Result<Self, MismatchedChunkSchemaError> {
+        let row_count = row_ids.len();
+
+        WrongDatatypeError::compare_expected_actual(
+            &schema.row_id_column.datatype(),
+            row_ids.data_type(),
+        )?;
+
+        if index_arrays.len() != schema.index_columns.len() {
+            return Err(MismatchedChunkSchemaError::custom(format!(
+                "Schema had {} index columns, but got {}",
+                schema.index_columns.len(),
+                index_arrays.len()
+            )));
+        }
+        for (schema, array) in itertools::izip!(&schema.index_columns, &index_arrays) {
+            WrongDatatypeError::compare_expected_actual(schema.datatype(), array.data_type())?;
+            if array.len() != row_count {
+                return Err(MismatchedChunkSchemaError::custom(format!(
+                    "Index column {:?} had {} rows, but we got {} row IDs",
+                    schema.name(),
+                    array.len(),
+                    row_count
+                )));
+            }
+        }
+
+        if data_arrays.len() != schema.data_columns.len() {
+            return Err(MismatchedChunkSchemaError::custom(format!(
+                "Schema had {} data columns, but got {}",
+                schema.data_columns.len(),
+                data_arrays.len()
+            )));
+        }
+        for (schema, array) in itertools::izip!(&schema.data_columns, &data_arrays) {
+            WrongDatatypeError::compare_expected_actual(&schema.store_datatype, array.data_type())?;
+            if array.len() != row_count {
+                return Err(MismatchedChunkSchemaError::custom(format!(
+                    "Data column {:?} had {} rows, but we got {} row IDs",
+                    schema.column_name(),
+                    array.len(),
+                    row_count
+                )));
+            }
+        }
+
+        let arrow_columns = itertools::chain!(Some(row_ids), index_arrays, data_arrays).collect();
+
+        let batch = ArrowRecordBatch::try_new_with_options(
+            std::sync::Arc::new(ArrowSchema::from(&schema)),
+            arrow_columns,
+            &RecordBatchOptions::default().with_row_count(Some(row_count)),
+        )
+        .unwrap();
+
+        Ok(Self { schema, batch })
+    }
+}
+
 impl ChunkBatch {
     /// The parsed rerun schema of this chunk.
     #[inline]
diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs
index a10b40dcb6cd..b80784621244 100644
--- a/crates/store/re_sorbet/src/chunk_schema.rs
+++ b/crates/store/re_sorbet/src/chunk_schema.rs
@@ -1,11 +1,12 @@
 use arrow::datatypes::{Field as ArrowField, Schema as ArrowSchema};
 
+use itertools::Itertools as _;
 use re_log_types::EntityPath;
 use re_types_core::ChunkId;
 
 use crate::{
-    ArrowBatchMetadata, ComponentColumnDescriptor, MetadataExt as _, MissingFieldMetadata,
-    MissingMetadataKey, RowIdColumnDescriptor, WrongDatatypeError,
+    ArrowBatchMetadata, ColumnDescriptor, ColumnError, ComponentColumnDescriptor, MetadataExt as _,
+    MissingMetadataKey, RowIdColumnDescriptor, TimeColumnDescriptor, WrongDatatypeError,
 };
 
 #[derive(thiserror::Error, Debug)]
@@ -19,11 +20,14 @@ pub enum InvalidChunkSchema {
     #[error("Bad column '{field_name}': {error}")]
     BadColumn {
         field_name: String,
-        error: MissingFieldMetadata,
+        error: ColumnError,
     },
 
     #[error("Bad chunk schema: {reason}")]
     Custom { reason: String },
+
+    #[error("The data columns were not the last columns. Index columns must come before any data columns.")]
+    UnorderedIndexAndDataColumns,
 }
 
 impl InvalidChunkSchema {
@@ -41,22 +45,25 @@ impl InvalidChunkSchema {
 #[derive(Debug, Clone)]
 pub struct ChunkSchema {
     /// The globally unique ID of this chunk.
-    chunk_id: ChunkId,
+    pub chunk_id: ChunkId,
 
     /// Which entity is this chunk for?
-    entity_path: EntityPath,
+    pub entity_path: EntityPath,
 
     /// The heap size of this chunk in bytes, if known.
-    heap_size_bytes: Option<u64>,
+    pub heap_size_bytes: Option<u64>,
 
     /// Are we sorted by the row id column?
-    is_sorted: bool,
+    pub is_sorted: bool,
 
     /// The primary row id column.
-    row_id_column: RowIdColumnDescriptor,
+    pub row_id_column: RowIdColumnDescriptor,
+
+    /// Index columns (timelines).
+    pub index_columns: Vec<TimeColumnDescriptor>,
 
-    /// All other columns (indices and data).
-    columns: Vec<ComponentColumnDescriptor>,
+    /// The actual component data
+    pub data_columns: Vec<ComponentColumnDescriptor>,
 }
 
 /// ## Metadata keys for the record batch metadata
@@ -68,35 +75,72 @@ impl ChunkSchema {
     const CHUNK_METADATA_VERSION: &'static str = "1";
 }
 
+/// ## Builders
 impl ChunkSchema {
-    /// The globally unique ID of this chunk.
+    pub fn new(
+        chunk_id: ChunkId,
+        entity_path: EntityPath,
+        row_id_column: RowIdColumnDescriptor,
+        index_columns: Vec<TimeColumnDescriptor>,
+        data_columns: Vec<ComponentColumnDescriptor>,
+    ) -> Self {
+        Self {
+            chunk_id,
+            entity_path,
+            heap_size_bytes: None,
+            is_sorted: false, // assume the worst
+            row_id_column,
+            index_columns,
+            data_columns,
+        }
+    }
+
+    #[inline]
+    pub fn with_heap_size_bytes(mut self, heap_size_bytes: u64) -> Self {
+        self.heap_size_bytes = Some(heap_size_bytes);
+        self
+    }
+
+    #[inline]
+    pub fn with_sorted(mut self, sorted: bool) -> Self {
+        self.is_sorted = sorted;
+        self
+    }
+}
 
+/// ## Accessors
+impl ChunkSchema {
+    /// The globally unique ID of this chunk.
     #[inline]
     pub fn chunk_id(&self) -> ChunkId {
         self.chunk_id
     }
 
     /// Which entity is this chunk for?
-
     #[inline]
     pub fn entity_path(&self) -> &EntityPath {
         &self.entity_path
     }
 
     /// The heap size of this chunk in bytes, if known.
-
     #[inline]
     pub fn heap_size_bytes(&self) -> Option<u64> {
         self.heap_size_bytes
     }
 
     /// Are we sorted by the row id column?
-
     #[inline]
     pub fn is_sorted(&self) -> bool {
         self.is_sorted
     }
 
+    /// Total number of columns in this chunk,
+    /// including the row id column, the index columns,
+    /// and the data columns.
+    pub fn num_columns(&self) -> usize {
+        1 + self.index_columns.len() + self.data_columns.len()
+    }
+
     pub fn arrow_batch_metadata(&self) -> ArrowBatchMetadata {
         let Self {
             chunk_id,
@@ -104,7 +148,8 @@ impl ChunkSchema {
             heap_size_bytes,
             is_sorted,
             row_id_column: _,
-            columns: _,
+            index_columns: _,
+            data_columns: _,
         } = self;
 
         let mut arrow_metadata = ArrowBatchMetadata::from([
@@ -129,18 +174,22 @@ impl ChunkSchema {
     }
 }
 
-impl From<ChunkSchema> for ArrowSchema {
-    fn from(chunk_schema: ChunkSchema) -> Self {
+impl From<&ChunkSchema> for ArrowSchema {
+    fn from(chunk_schema: &ChunkSchema) -> Self {
         let metadata = chunk_schema.arrow_batch_metadata();
+        let num_columns = chunk_schema.num_columns();
 
         let ChunkSchema {
             row_id_column,
-            columns,
+            index_columns,
+            data_columns,
             ..
         } = chunk_schema;
-        let mut fields: Vec<ArrowField> = Vec::with_capacity(1 + columns.len());
+
+        let mut fields: Vec<ArrowField> = Vec::with_capacity(num_columns);
         fields.push(row_id_column.to_arrow_field());
-        fields.extend(columns.iter().map(|column| column.to_arrow_field()));
+        fields.extend(index_columns.iter().map(|column| column.to_arrow_field()));
+        fields.extend(data_columns.iter().map(|column| column.to_arrow_field()));
 
         Self {
             metadata,
@@ -195,7 +244,7 @@ impl TryFrom<&ArrowSchema> for ChunkSchema {
             .iter()
             .skip(1)
             .map(|field| {
-                ComponentColumnDescriptor::try_from(field.as_ref()).map_err(|err| {
+                ColumnDescriptor::try_from(field.as_ref()).map_err(|err| {
                     InvalidChunkSchema::BadColumn {
                         field_name: field.name().to_owned(),
                         error: err,
@@ -205,13 +254,36 @@ impl TryFrom<&ArrowSchema> for ChunkSchema {
             .collect();
         let columns = columns?;
 
+        // Index columns should always come first:
+        let num_index_columns = columns.partition_point(|p| matches!(p, ColumnDescriptor::Time(_)));
+
+        let index_columns = columns[0..num_index_columns]
+            .iter()
+            .filter_map(|c| match c {
+                ColumnDescriptor::Time(column) => Some(column.clone()),
+                ColumnDescriptor::Component(_) => None,
+            })
+            .collect_vec();
+        let data_columns = columns[0..num_index_columns]
+            .iter()
+            .filter_map(|c| match c {
+                ColumnDescriptor::Time(_) => None,
+                ColumnDescriptor::Component(column) => Some(column.clone()),
+            })
+            .collect_vec();
+
+        if index_columns.len() + data_columns.len() < columns.len() {
+            return Err(InvalidChunkSchema::UnorderedIndexAndDataColumns);
+        }
+
         Ok(Self {
             chunk_id,
             entity_path,
             heap_size_bytes,
             is_sorted,
             row_id_column,
-            columns,
+            index_columns,
+            data_columns,
         })
     }
 }
diff --git a/crates/store/re_sorbet/src/data_column_schema.rs b/crates/store/re_sorbet/src/data_column_schema.rs
index 3c24c9c61494..26659ec158f6 100644
--- a/crates/store/re_sorbet/src/data_column_schema.rs
+++ b/crates/store/re_sorbet/src/data_column_schema.rs
@@ -196,8 +196,7 @@ impl ComponentColumnDescriptor {
         self.store_datatype.clone()
     }
 
-    #[inline]
-    pub fn to_arrow_field(&self) -> ArrowField {
+    pub fn column_name(&self) -> String {
         let entity_path = &self.entity_path;
         let descriptor = ComponentDescriptor {
             archetype_name: self.archetype_name,
@@ -205,16 +204,19 @@ impl ComponentColumnDescriptor {
             component_name: self.component_name,
         };
 
-        ArrowField::new(
-            // NOTE: Uncomment this to expose fully-qualified names in the Dataframe APIs!
-            // I'm not doing that right now, to avoid breaking changes (and we need to talk about
-            // what the syntax for these fully-qualified paths need to look like first).
-            format!("{}:{}", entity_path, descriptor.component_name.short_name()),
-            // format!("{entity_path}@{}", descriptor.short_name()),
-            self.returned_datatype(),
-            true, /* nullable */
-        )
-        .with_metadata(self.metadata())
+        format!("{}:{}", entity_path, descriptor.component_name.short_name())
+
+        // NOTE: Uncomment this to expose fully-qualified names in the Dataframe APIs!
+        // I'm not doing that right now, to avoid breaking changes (and we need to talk about
+        // what the syntax for these fully-qualified paths need to look like first).
+        // format!("{entity_path}@{}", descriptor.short_name())
+    }
+
+    #[inline]
+    pub fn to_arrow_field(&self) -> ArrowField {
+        let nullable = true;
+        ArrowField::new(self.column_name(), self.returned_datatype(), nullable)
+            .with_metadata(self.metadata())
     }
 }
 
diff --git a/crates/store/re_sorbet/src/index_column_schema.rs b/crates/store/re_sorbet/src/index_column_schema.rs
index c782edf0ecaf..29ca0db2a3c1 100644
--- a/crates/store/re_sorbet/src/index_column_schema.rs
+++ b/crates/store/re_sorbet/src/index_column_schema.rs
@@ -1,6 +1,9 @@
 use arrow::datatypes::{DataType as ArrowDatatype, Field as ArrowField};
+
 use re_log_types::{Timeline, TimelineName};
 
+use crate::MetadataExt as _;
+
 #[derive(thiserror::Error, Debug)]
 #[error("Unsupported time type: {datatype:?}")]
 pub struct UnsupportedTimeType {
@@ -15,6 +18,9 @@ pub struct TimeColumnDescriptor {
 
     /// The Arrow datatype of the column.
     pub datatype: ArrowDatatype,
+
+    /// Are the indices in this column sorted?
+    pub is_sorted: bool,
 }
 
 impl PartialOrd for TimeColumnDescriptor {
@@ -30,6 +36,7 @@ impl Ord for TimeColumnDescriptor {
         let Self {
             timeline,
             datatype: _,
+            is_sorted: _,
         } = self;
         timeline.cmp(&other.timeline)
     }
@@ -44,6 +51,7 @@ impl TimeColumnDescriptor {
             // It doesn't matter, only the name will remain in the Arrow schema anyhow.
             timeline: Timeline::new_sequence(name),
             datatype: ArrowDatatype::Null,
+            is_sorted: true,
         }
     }
 
@@ -69,17 +77,21 @@ impl TimeColumnDescriptor {
 
     #[inline]
     pub fn to_arrow_field(&self) -> ArrowField {
-        let Self { timeline, datatype } = self;
+        let Self {
+            timeline,
+            datatype,
+            is_sorted,
+        } = self;
 
         let nullable = true; // Time column must be nullable since static data doesn't have a time.
 
-        let metadata = [
-            Some(("rerun.kind".to_owned(), "index".to_owned())),
-            Some(("rerun.index_name".to_owned(), timeline.name().to_string())),
-        ]
-        .into_iter()
-        .flatten()
-        .collect();
+        let mut metadata = std::collections::HashMap::from([
+            ("rerun.kind".to_owned(), "index".to_owned()),
+            ("rerun.index_name".to_owned(), timeline.name().to_string()),
+        ]);
+        if *is_sorted {
+            metadata.insert("rerun.is_sorted".to_owned(), "true".to_owned());
+        }
 
         ArrowField::new(timeline.name().to_string(), datatype.clone(), nullable)
             .with_metadata(metadata)
@@ -91,6 +103,7 @@ impl From<Timeline> for TimeColumnDescriptor {
         Self {
             timeline,
             datatype: timeline.datatype(),
+            is_sorted: false, // assume the worst
         }
     }
 }
@@ -114,6 +127,10 @@ impl TryFrom<&ArrowField> for TimeColumnDescriptor {
 
         let timeline = Timeline::new(name, time_type);
 
-        Ok(Self { timeline, datatype })
+        Ok(Self {
+            timeline,
+            datatype,
+            is_sorted: field.metadata().get_bool("rerun.is_sorted"),
+        })
     }
 }
diff --git a/crates/store/re_sorbet/src/lib.rs b/crates/store/re_sorbet/src/lib.rs
index 9544b32d6afe..fb1c754d9604 100644
--- a/crates/store/re_sorbet/src/lib.rs
+++ b/crates/store/re_sorbet/src/lib.rs
@@ -12,7 +12,7 @@ mod metadata;
 mod row_id_column_schema;
 
 pub use self::{
-    chunk_batch::ChunkBatch,
+    chunk_batch::{ChunkBatch, MismatchedChunkSchemaError},
     chunk_schema::ChunkSchema,
     column_schema::{ColumnDescriptor, ColumnError},
     data_column_schema::ComponentColumnDescriptor,
diff --git a/crates/store/re_sorbet/src/row_id_column_schema.rs b/crates/store/re_sorbet/src/row_id_column_schema.rs
index 3f8746434704..fae2a3b84f11 100644
--- a/crates/store/re_sorbet/src/row_id_column_schema.rs
+++ b/crates/store/re_sorbet/src/row_id_column_schema.rs
@@ -8,6 +8,22 @@ pub struct WrongDatatypeError {
     pub actual: ArrowDatatype,
 }
 
+impl WrongDatatypeError {
+    pub fn compare_expected_actual(
+        expected: &ArrowDatatype,
+        actual: &ArrowDatatype,
+    ) -> Result<(), Self> {
+        if expected == actual {
+            Ok(())
+        } else {
+            Err(Self {
+                expected: expected.clone(),
+                actual: actual.clone(),
+            })
+        }
+    }
+}
+
 /// Describes the [`RowId`]
 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct RowIdColumnDescriptor {}
@@ -50,13 +66,24 @@ impl TryFrom<&ArrowField> for RowIdColumnDescriptor {
 
     fn try_from(field: &ArrowField) -> Result<Self, Self::Error> {
         // TODO: check `rerun.kind`
-        if field.data_type() == &RowId::arrow_datatype() {
-            Ok(Self {})
-        } else {
-            Err(WrongDatatypeError {
-                expected: RowId::arrow_datatype(),
-                actual: field.data_type().clone(),
-            })
-        }
+        Self::try_from(field.data_type())
+    }
+}
+
+impl TryFrom<&ArrowDatatype> for RowIdColumnDescriptor {
+    type Error = WrongDatatypeError;
+
+    fn try_from(data_type: &ArrowDatatype) -> Result<Self, Self::Error> {
+        WrongDatatypeError::compare_expected_actual(&RowId::arrow_datatype(), data_type)?;
+        Ok(Self {})
+    }
+}
+
+impl TryFrom<ArrowDatatype> for RowIdColumnDescriptor {
+    type Error = WrongDatatypeError;
+
+    fn try_from(data_type: ArrowDatatype) -> Result<Self, Self::Error> {
+        WrongDatatypeError::compare_expected_actual(&RowId::arrow_datatype(), &data_type)?;
+        Ok(Self {})
     }
 }

From 42d3a207dd21c1a6efa306e403fe857c52721bc8 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Sat, 8 Feb 2025 13:06:04 +0100
Subject: [PATCH 06/38] Roundtrip it

---
 Cargo.lock                                 |   1 +
 crates/store/re_chunk/src/transport.rs     | 115 ++++++++++++++++++++-
 crates/store/re_sorbet/Cargo.toml          |   1 +
 crates/store/re_sorbet/src/chunk_batch.rs  |  41 +++++++-
 crates/store/re_sorbet/src/chunk_schema.rs |  15 +--
 5 files changed, 160 insertions(+), 13 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a21a840c791a..8a2d9ab3cc65 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6588,6 +6588,7 @@ version = "0.23.0-alpha.1+dev"
 dependencies = [
  "arrow",
  "itertools 0.13.0",
+ "re_arrow_util",
  "re_format_arrow",
  "re_log",
  "re_log_types",
diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index 8669e86d64ba..a0ad43438a63 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -657,6 +657,102 @@ impl Chunk {
         Ok(TransportChunk::from(record_batch))
     }
 
+    pub fn from_chunk_batch(batch: &re_sorbet::ChunkBatch) -> ChunkResult<Self> {
+        re_tracing::profile_function!(format!(
+            "num_columns={} num_rows={}",
+            batch.num_columns(),
+            batch.num_rows()
+        ));
+
+        // Metadata
+        let (id, entity_path, is_sorted) = (
+            batch.chunk_id(),
+            batch.entity_path().clone(),
+            batch.is_sorted(),
+        );
+
+        let row_ids = batch.row_id_column().1.clone();
+
+        let timelines = {
+            re_tracing::profile_scope!("timelines");
+
+            let mut timelines = IntMap::default();
+
+            for (schema, column) in batch.index_columns() {
+                let timeline = schema.timeline();
+
+                let times =
+                    TimeColumn::read_array(&as_array_ref(column.clone())).map_err(|err| {
+                        ChunkError::Malformed {
+                            reason: format!("Bad time column '{}': {err}", schema.name()),
+                        }
+                    })?;
+
+                let time_column =
+                    TimeColumn::new(schema.is_sorted.then_some(true), timeline, times);
+                if timelines.insert(timeline, time_column).is_some() {
+                    return Err(ChunkError::Malformed {
+                        reason: format!(
+                            "time column '{}' was specified more than once",
+                            schema.name(),
+                        ),
+                    });
+                }
+            }
+
+            timelines
+        };
+
+        let components = {
+            let mut components = ChunkComponents::default();
+
+            for (schema, column) in batch.data_columns() {
+                let column = column
+                    .downcast_array_ref::<ArrowListArray>()
+                    .ok_or_else(|| ChunkError::Malformed {
+                        reason: format!(
+                            "The outer array in a chunked component batch must be a sparse list, got {:?}",
+                            column.data_type(),
+                        ),
+                    })?;
+
+                let component_desc = ComponentDescriptor {
+                    archetype_name: schema.archetype_name,
+                    archetype_field_name: schema.archetype_field_name,
+                    component_name: schema.component_name,
+                };
+
+                if components
+                    .insert_descriptor(component_desc, column.clone())
+                    .is_some()
+                {
+                    return Err(ChunkError::Malformed {
+                        reason: format!(
+                            "component column '{schema:?}' was specified more than once"
+                        ),
+                    });
+                }
+            }
+
+            components
+        };
+
+        let mut res = Self::new(
+            id,
+            entity_path,
+            is_sorted.then_some(true),
+            row_ids,
+            timelines,
+            components,
+        )?;
+
+        if let Some(heap_size_bytes) = batch.heap_size_bytes() {
+            res.heap_size_bytes = heap_size_bytes.into();
+        }
+
+        Ok(res)
+    }
+
     pub fn from_record_batch(batch: ArrowRecordBatch) -> ChunkResult<Self> {
         Self::from_transport(&TransportChunk::from(batch))
     }
@@ -831,6 +927,7 @@ impl Chunk {
 #[cfg(test)]
 mod tests {
     use nohash_hasher::IntMap;
+    use similar_asserts::assert_eq;
 
     use re_arrow_util::arrow_util;
     use re_log_types::{
@@ -981,12 +1078,22 @@ mod tests {
             )
             .unwrap();
 
-            let chunk_batch = chunk_before.to_chunk_batch().unwrap();
+            let chunk_batch_before = chunk_before.to_chunk_batch().unwrap();
+
+            assert_eq!(chunk_before.num_columns(), chunk_batch_before.num_columns());
+            assert_eq!(chunk_before.num_rows(), chunk_batch_before.num_rows());
 
-            assert_eq!(chunk_before.num_columns(), chunk_batch.num_columns());
-            assert_eq!(chunk_before.num_rows(), chunk_batch.num_rows());
+            let arrow_record_batch = ArrowRecordBatch::from(&chunk_batch_before);
+
+            let chunk_batch_after = re_sorbet::ChunkBatch::try_from(arrow_record_batch).unwrap();
+
+            assert_eq!(
+                chunk_batch_before.chunk_schema(),
+                chunk_batch_after.chunk_schema()
+            );
+            assert_eq!(chunk_batch_before.num_rows(), chunk_batch_after.num_rows());
 
-            let chunk_after = Chunk::from_record_batch(chunk_batch.into()).unwrap();
+            let chunk_after = Chunk::from_chunk_batch(&chunk_batch_after).unwrap();
 
             assert_eq!(chunk_before.entity_path(), chunk_after.entity_path());
             assert_eq!(
diff --git a/crates/store/re_sorbet/Cargo.toml b/crates/store/re_sorbet/Cargo.toml
index 929fcfa20188..be87b024fd93 100644
--- a/crates/store/re_sorbet/Cargo.toml
+++ b/crates/store/re_sorbet/Cargo.toml
@@ -20,6 +20,7 @@ all-features = true
 
 
 [dependencies]
+re_arrow_util.workspace = true
 re_format_arrow.workspace = true
 re_log_types.workspace = true
 re_log.workspace = true
diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index e6cc158a6fad..791672b88ad1 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -1,5 +1,8 @@
 use arrow::{
-    array::{ArrayRef as ArrowArrayRef, RecordBatch as ArrowRecordBatch, RecordBatchOptions},
+    array::{
+        Array as _, ArrayRef as ArrowArrayRef, AsArray, RecordBatch as ArrowRecordBatch,
+        RecordBatchOptions, StructArray as ArrowStructArray,
+    },
     datatypes::{Fields as ArrowFields, Schema as ArrowSchema},
 };
 
@@ -7,7 +10,8 @@ use re_log_types::EntityPath;
 use re_types_core::ChunkId;
 
 use crate::{
-    chunk_schema::InvalidChunkSchema, ArrowBatchMetadata, ChunkSchema, WrongDatatypeError,
+    chunk_schema::InvalidChunkSchema, ArrowBatchMetadata, ChunkSchema, ComponentColumnDescriptor,
+    RowIdColumnDescriptor, TimeColumnDescriptor, WrongDatatypeError,
 };
 
 #[derive(thiserror::Error, Debug)]
@@ -144,6 +148,37 @@ impl ChunkBatch {
     pub fn arrow_bacth_metadata(&self) -> &ArrowBatchMetadata {
         &self.batch.schema_ref().metadata
     }
+
+    pub fn row_id_column(&self) -> (&RowIdColumnDescriptor, &ArrowStructArray) {
+        // The first column is always the row IDs.
+        (
+            &self.schema.row_id_column,
+            self.batch.columns()[0]
+                .as_struct_opt()
+                .expect("Row IDs should be encoded as struct"),
+        )
+    }
+
+    /// The columns of the indices (timelines).
+    pub fn index_columns(&self) -> impl Iterator<Item = (&TimeColumnDescriptor, &ArrowArrayRef)> {
+        itertools::izip!(
+            &self.schema.index_columns,
+            self.batch.columns().iter().skip(1) // skip row IDs
+        )
+    }
+
+    /// The columns of the indices (timelines).
+    pub fn data_columns(
+        &self,
+    ) -> impl Iterator<Item = (&ComponentColumnDescriptor, &ArrowArrayRef)> {
+        itertools::izip!(
+            &self.schema.data_columns,
+            self.batch
+                .columns()
+                .iter()
+                .skip(1 + self.schema.index_columns.len()) // skip row IDs and indices
+        )
+    }
 }
 
 impl std::fmt::Display for ChunkBatch {
@@ -189,6 +224,8 @@ impl TryFrom<ArrowRecordBatch> for ChunkBatch {
     fn try_from(batch: ArrowRecordBatch) -> Result<Self, Self::Error> {
         let chunk_schema = ChunkSchema::try_from(batch.schema_ref().as_ref())?;
 
+        // TODO: sanity check that the schema matches the columns
+
         // Extend with any metadata that might have been missing:
         let mut arrow_schema = ArrowSchema::clone(batch.schema_ref().as_ref());
         arrow_schema
diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs
index b80784621244..3a73149d3f8e 100644
--- a/crates/store/re_sorbet/src/chunk_schema.rs
+++ b/crates/store/re_sorbet/src/chunk_schema.rs
@@ -42,7 +42,7 @@ impl InvalidChunkSchema {
 ///
 /// This does NOT preserve custom arrow metadata.
 /// It only contains the metadata used by Rerun.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub struct ChunkSchema {
     /// The globally unique ID of this chunk.
     pub chunk_id: ChunkId,
@@ -240,7 +240,7 @@ impl TryFrom<&ArrowSchema> for ChunkSchema {
         let row_id_column = RowIdColumnDescriptor::try_from(first_field.as_ref())
             .map_err(InvalidChunkSchema::BadRowIdColumn)?;
 
-        let columns: Result<Vec<_>, _> = fields
+        let index_and_data_columns: Result<Vec<_>, _> = fields
             .iter()
             .skip(1)
             .map(|field| {
@@ -252,19 +252,20 @@ impl TryFrom<&ArrowSchema> for ChunkSchema {
                 })
             })
             .collect();
-        let columns = columns?;
+        let index_and_data_columns = index_and_data_columns?;
 
         // Index columns should always come first:
-        let num_index_columns = columns.partition_point(|p| matches!(p, ColumnDescriptor::Time(_)));
+        let num_index_columns =
+            index_and_data_columns.partition_point(|p| matches!(p, ColumnDescriptor::Time(_)));
 
-        let index_columns = columns[0..num_index_columns]
+        let index_columns = index_and_data_columns[0..num_index_columns]
             .iter()
             .filter_map(|c| match c {
                 ColumnDescriptor::Time(column) => Some(column.clone()),
                 ColumnDescriptor::Component(_) => None,
             })
             .collect_vec();
-        let data_columns = columns[0..num_index_columns]
+        let data_columns = index_and_data_columns[num_index_columns..]
             .iter()
             .filter_map(|c| match c {
                 ColumnDescriptor::Time(_) => None,
@@ -272,7 +273,7 @@ impl TryFrom<&ArrowSchema> for ChunkSchema {
             })
             .collect_vec();
 
-        if index_columns.len() + data_columns.len() < columns.len() {
+        if index_columns.len() + data_columns.len() != index_and_data_columns.len() {
             return Err(InvalidChunkSchema::UnorderedIndexAndDataColumns);
         }
 

From 40fd804336fac89d0968228ce269f9224a89a81a Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 09:53:32 +0100
Subject: [PATCH 07/38] Less use of `TransportChunk`

---
 crates/viewer/re_chunk_store_ui/src/chunk_ui.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs b/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs
index b61273cf22e1..32fbab0d5e8e 100644
--- a/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs
+++ b/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs
@@ -1,12 +1,11 @@
 use std::collections::BTreeMap;
 use std::sync::Arc;
 
-use arrow::array::Array as _;
+use arrow::array::{Array as _, RecordBatch as ArrowRecordBatch};
 use egui_extras::{Column, TableRow};
 use itertools::Itertools;
 
 use re_byte_size::SizeBytes;
-use re_chunk_store::external::re_chunk::TransportChunk;
 use re_chunk_store::Chunk;
 use re_log_types::{TimeZone, Timeline};
 use re_types::datatypes::TimeInt;
@@ -204,7 +203,7 @@ impl ChunkUi {
                 }
             };
 
-        let fields_ui = |ui: &mut egui::Ui, transport: &TransportChunk| {
+        let fields_ui = |ui: &mut egui::Ui, transport: &ArrowRecordBatch| {
             for field in &transport.schema_ref().fields {
                 ui.push_id(field.name().clone(), |ui| {
                     ui.list_item_collapsible_noninteractive_label(field.name(), false, |ui| {

From 8a4b5d6d3309737f2c44f6cbb58f478c1651abfd Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 09:54:43 +0100
Subject: [PATCH 08/38] Remove unwrap; fix compilation of tests

---
 Cargo.lock                                  | 1 -
 crates/store/re_sorbet/Cargo.toml           | 1 -
 crates/store/re_sorbet/src/chunk_batch.rs   | 6 +++++-
 crates/store/re_sorbet/src/column_schema.rs | 1 +
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 8a2d9ab3cc65..a21a840c791a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6588,7 +6588,6 @@ version = "0.23.0-alpha.1+dev"
 dependencies = [
  "arrow",
  "itertools 0.13.0",
- "re_arrow_util",
  "re_format_arrow",
  "re_log",
  "re_log_types",
diff --git a/crates/store/re_sorbet/Cargo.toml b/crates/store/re_sorbet/Cargo.toml
index be87b024fd93..929fcfa20188 100644
--- a/crates/store/re_sorbet/Cargo.toml
+++ b/crates/store/re_sorbet/Cargo.toml
@@ -20,7 +20,6 @@ all-features = true
 
 
 [dependencies]
-re_arrow_util.workspace = true
 re_format_arrow.workspace = true
 re_log_types.workspace = true
 re_log.workspace = true
diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index 791672b88ad1..7071f2eb48b2 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -102,7 +102,11 @@ impl ChunkBatch {
             arrow_columns,
             &RecordBatchOptions::default().with_row_count(Some(row_count)),
         )
-        .unwrap();
+        .map_err(|err| {
+            MismatchedChunkSchemaError::custom(format!(
+                "Failed to create arrow record batch: {err}"
+            ))
+        })?;
 
         Ok(Self { schema, batch })
     }
diff --git a/crates/store/re_sorbet/src/column_schema.rs b/crates/store/re_sorbet/src/column_schema.rs
index 9f7cf34ac02e..f22253cbdabe 100644
--- a/crates/store/re_sorbet/src/column_schema.rs
+++ b/crates/store/re_sorbet/src/column_schema.rs
@@ -114,6 +114,7 @@ fn test_schema_over_ipc() {
                 arrow::datatypes::TimeUnit::Nanosecond,
                 None,
             ),
+            is_sorted: true,
         }),
         ColumnDescriptor::Component(ComponentColumnDescriptor {
             entity_path: re_log_types::EntityPath::from("/some/path"),

From 538b9b311e87fbfd5661bc5df17386883f27d304 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 13:26:34 +0100
Subject: [PATCH 09/38] Remove one use of TransportChunk

---
 Cargo.lock                                  |  1 +
 crates/top/rerun/Cargo.toml                 |  1 +
 crates/top/rerun/src/commands/rrd/filter.rs | 19 +++++++------------
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a21a840c791a..3bd1059c03e1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7502,6 +7502,7 @@ dependencies = [
  "re_sdk",
  "re_sdk_comms",
  "re_smart_channel",
+ "re_sorbet",
  "re_tracing",
  "re_types",
  "re_video",
diff --git a/crates/top/rerun/Cargo.toml b/crates/top/rerun/Cargo.toml
index 974fddd12dcf..a6f1233c01f5 100644
--- a/crates/top/rerun/Cargo.toml
+++ b/crates/top/rerun/Cargo.toml
@@ -137,6 +137,7 @@ re_log_types.workspace = true
 re_log.workspace = true
 re_memory.workspace = true
 re_smart_channel.workspace = true
+re_sorbet.workspace = true
 re_tracing.workspace = true
 re_video.workspace = true
 
diff --git a/crates/top/rerun/src/commands/rrd/filter.rs b/crates/top/rerun/src/commands/rrd/filter.rs
index 6b2256d4056a..6f8c84f8dd96 100644
--- a/crates/top/rerun/src/commands/rrd/filter.rs
+++ b/crates/top/rerun/src/commands/rrd/filter.rs
@@ -58,7 +58,7 @@ impl FilterCommand {
         let now = std::time::Instant::now();
         re_log::info!(srcs = ?path_to_input_rrds, ?dropped_timelines, "filter started");
 
-        let dropped_timelines: HashSet<_> = dropped_timelines.iter().collect();
+        let dropped_timelines: HashSet<_> = dropped_timelines.iter().cloned().collect();
         let dropped_entity_paths: HashSet<EntityPath> = dropped_entity_paths
             .iter()
             .map(|s| EntityPath::parse_forgiving(s))
@@ -120,7 +120,7 @@ impl FilterCommand {
                                     msg.batch.columns()
                                 )
                                 .filter(|(field, _col)| {
-                                    should_keep_timeline(&dropped_timelines, field)
+                                    !is_field_timeline_of(field, &dropped_timelines)
                                 })
                                 .map(|(field, col)| (field.clone(), col.clone()))
                                 .unzip();
@@ -201,16 +201,11 @@ impl FilterCommand {
 
 // ---
 
-fn should_keep_timeline(dropped_timelines: &HashSet<&String>, field: &ArrowField) -> bool {
-    let is_timeline = field
-        .metadata()
-        .get(TransportChunk::FIELD_METADATA_KEY_KIND)
-        .map(|s| s.as_str())
-        == Some(TransportChunk::FIELD_METADATA_VALUE_KIND_TIME);
-
-    let is_dropped = dropped_timelines.contains(field.name());
-
-    !is_timeline || !is_dropped
+// Does the given field represent a timeline that is in the given set?
+fn is_field_timeline_of(field: &ArrowField, dropped_timelines: &HashSet<String>) -> bool {
+    re_sorbet::TimeColumnDescriptor::try_from(field)
+        .ok()
+        .is_some_and(|schema| dropped_timelines.contains(schema.name().as_str()))
 }
 
 fn should_keep_entity_path(

From 851c7fed6c2add0463104b025e93158ea78405dd Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 13:30:57 +0100
Subject: [PATCH 10/38] Use re_sobet in rerun cli filtering

---
 crates/top/rerun/src/commands/rrd/filter.rs | 77 +++++++++------------
 1 file changed, 34 insertions(+), 43 deletions(-)

diff --git a/crates/top/rerun/src/commands/rrd/filter.rs b/crates/top/rerun/src/commands/rrd/filter.rs
index 6f8c84f8dd96..fc9a30a3244e 100644
--- a/crates/top/rerun/src/commands/rrd/filter.rs
+++ b/crates/top/rerun/src/commands/rrd/filter.rs
@@ -8,7 +8,7 @@ use arrow::{
 use itertools::Either;
 
 use re_build_info::CrateVersion;
-use re_chunk::{external::crossbeam, TransportChunk};
+use re_chunk::external::crossbeam;
 use re_sdk::{external::arrow, EntityPath};
 
 use crate::commands::read_rrd_streams_from_file_or_stdin;
@@ -111,32 +111,40 @@ impl FilterCommand {
                 Ok(msg) => {
                     let msg = match msg {
                         re_log_types::LogMsg::ArrowMsg(store_id, mut msg) => {
-                            if !should_keep_entity_path(&dropped_entity_paths, &msg.batch.schema())
+                            match re_sorbet::ChunkSchema::try_from(msg.batch.schema_ref().as_ref())
                             {
-                                None
-                            } else {
-                                let (fields, columns): (Vec<_>, Vec<_>) = itertools::izip!(
-                                    &msg.batch.schema().fields,
-                                    msg.batch.columns()
-                                )
-                                .filter(|(field, _col)| {
-                                    !is_field_timeline_of(field, &dropped_timelines)
-                                })
-                                .map(|(field, col)| (field.clone(), col.clone()))
-                                .unzip();
-
-                                if let Ok(new_batch) = ArrowRecordBatch::try_new(
-                                    ArrowSchema::new_with_metadata(
-                                        fields,
-                                        msg.batch.schema().metadata().clone(),
-                                    )
-                                    .into(),
-                                    columns,
-                                ) {
-                                    msg.batch = new_batch;
-                                    Some(re_log_types::LogMsg::ArrowMsg(store_id, msg))
-                                } else {
-                                    None // Probably failed because we filtered out everything
+                                Ok(schema) => {
+                                    if !dropped_entity_paths.contains(&schema.entity_path) {
+                                        None
+                                    } else {
+                                        let (fields, columns): (Vec<_>, Vec<_>) = itertools::izip!(
+                                            &msg.batch.schema().fields,
+                                            msg.batch.columns()
+                                        )
+                                        .filter(|(field, _col)| {
+                                            !is_field_timeline_of(field, &dropped_timelines)
+                                        })
+                                        .map(|(field, col)| (field.clone(), col.clone()))
+                                        .unzip();
+
+                                        if let Ok(new_batch) = ArrowRecordBatch::try_new(
+                                            ArrowSchema::new_with_metadata(
+                                                fields,
+                                                msg.batch.schema().metadata().clone(),
+                                            )
+                                            .into(),
+                                            columns,
+                                        ) {
+                                            msg.batch = new_batch;
+                                            Some(re_log_types::LogMsg::ArrowMsg(store_id, msg))
+                                        } else {
+                                            None // Probably failed because we filtered out everything
+                                        }
+                                    }
+                                }
+                                Err(err) => {
+                                    re_log::warn_once!("Failed to parse chunk schema: {err}");
+                                    None
                                 }
                             }
                         }
@@ -207,20 +215,3 @@ fn is_field_timeline_of(field: &ArrowField, dropped_timelines: &HashSet<String>)
         .ok()
         .is_some_and(|schema| dropped_timelines.contains(schema.name().as_str()))
 }
-
-fn should_keep_entity_path(
-    dropped_entity_paths: &HashSet<EntityPath>,
-    schema: &ArrowSchema,
-) -> bool {
-    let Some(entity_path) = schema
-        .metadata
-        .get(TransportChunk::CHUNK_METADATA_KEY_ENTITY_PATH)
-        .map(|s| EntityPath::parse_forgiving(s))
-    else {
-        return true;
-    };
-
-    let is_dropped = dropped_entity_paths.contains(&entity_path);
-
-    !is_dropped
-}

From c42d842c7e9736402b3610f85d01b90d31ffa43e Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 13:45:43 +0100
Subject: [PATCH 11/38] No TransportChunk in re_grpc_client

---
 crates/store/re_grpc_client/src/lib.rs | 23 ++++++++++-------------
 rerun_py/src/remote.rs                 |  5 +----
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/crates/store/re_grpc_client/src/lib.rs b/crates/store/re_grpc_client/src/lib.rs
index 9c063d094497..38df97ee48e7 100644
--- a/crates/store/re_grpc_client/src/lib.rs
+++ b/crates/store/re_grpc_client/src/lib.rs
@@ -223,8 +223,7 @@ async fn stream_recording_async(
         }));
     }
 
-    let store_info =
-        store_info_from_catalog_chunk(&TransportChunk::from(resp[0].clone()), &recording_id)?;
+    let store_info = store_info_from_catalog_chunk(&resp[0], &recording_id)?;
     let store_id = store_info.store_id.clone();
 
     re_log::debug!("Fetching {recording_id}…");
@@ -281,39 +280,37 @@ async fn stream_recording_async(
 }
 
 pub fn store_info_from_catalog_chunk(
-    tc: &TransportChunk,
+    record_batch: &ArrowRecordBatch,
     recording_id: &str,
 ) -> Result<StoreInfo, StreamError> {
     let store_id = StoreId::from_string(StoreKind::Recording, recording_id.to_owned());
 
-    let (_field, data) = tc
-        .components()
-        .find(|(f, _)| f.name() == CATALOG_APP_ID_FIELD_NAME)
+    let data = record_batch
+        .column_by_name(CATALOG_APP_ID_FIELD_NAME)
         .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed {
-            reason: "no {CATALOG_APP_ID_FIELD_NAME} field found".to_owned(),
+            reason: format!("no {CATALOG_APP_ID_FIELD_NAME} field found"),
         }))?;
     let app_id = data
         .downcast_array_ref::<arrow::array::StringArray>()
         .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed {
             reason: format!(
                 "{CATALOG_APP_ID_FIELD_NAME} must be a utf8 array: {:?}",
-                tc.schema_ref()
+                record_batch.schema_ref()
             ),
         }))?
         .value(0);
 
-    let (_field, data) = tc
-        .components()
-        .find(|(f, _)| f.name() == CATALOG_START_TIME_FIELD_NAME)
+    let data = record_batch
+        .column_by_name(CATALOG_START_TIME_FIELD_NAME)
         .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed {
-            reason: "no {CATALOG_START_TIME_FIELD_NAME}} field found".to_owned(),
+            reason: format!("no {CATALOG_START_TIME_FIELD_NAME} field found"),
         }))?;
     let start_time = data
         .downcast_array_ref::<arrow::array::TimestampNanosecondArray>()
         .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed {
             reason: format!(
                 "{CATALOG_START_TIME_FIELD_NAME} must be a Timestamp array: {:?}",
-                tc.schema_ref()
+                record_batch.schema_ref()
             ),
         }))?
         .value(0);
diff --git a/rerun_py/src/remote.rs b/rerun_py/src/remote.rs
index c5b5afc0dc5a..48febc779f14 100644
--- a/rerun_py/src/remote.rs
+++ b/rerun_py/src/remote.rs
@@ -138,10 +138,7 @@ impl PyStorageNodeClient {
                     ));
                 }
 
-                re_grpc_client::store_info_from_catalog_chunk(
-                    &re_chunk::TransportChunk::from(resp[0].clone()),
-                    id,
-                )
+                re_grpc_client::store_info_from_catalog_chunk(&resp[0], id)
             })
             .map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
 

From b4ed1e09cbf491f4913e572587f5da1ef61c1a1a Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 13:58:02 +0100
Subject: [PATCH 12/38] Remove `Chunk::from_transport`

---
 crates/store/re_chunk/src/chunk.rs            |   3 +
 crates/store/re_chunk/src/transport.rs        | 148 +-----------------
 .../tests/arrow_encode_roundtrip.rs           |   4 +-
 crates/store/re_sorbet/src/lib.rs             |   2 +-
 4 files changed, 12 insertions(+), 145 deletions(-)

diff --git a/crates/store/re_chunk/src/chunk.rs b/crates/store/re_chunk/src/chunk.rs
index 7d63e7e39f58..14bc14e2ec6a 100644
--- a/crates/store/re_chunk/src/chunk.rs
+++ b/crates/store/re_chunk/src/chunk.rs
@@ -54,6 +54,9 @@ pub enum ChunkError {
 
     #[error(transparent)]
     MismatchedChunkSchemaError(#[from] re_sorbet::MismatchedChunkSchemaError),
+
+    #[error(transparent)]
+    InvalidChunkSchema(#[from] re_sorbet::InvalidChunkSchema),
 }
 
 pub type ChunkResult<T> = Result<T, ChunkError>;
diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index a0ad43438a63..7db98a534425 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -1,12 +1,9 @@
 use arrow::{
     array::{
         Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
-        RecordBatch as ArrowRecordBatch, StructArray as ArrowStructArray,
-    },
-    datatypes::{
-        DataType as ArrowDatatype, Field as ArrowField, Fields as ArrowFields,
-        Schema as ArrowSchema, TimeUnit as ArrowTimeUnit,
+        RecordBatch as ArrowRecordBatch,
     },
+    datatypes::{Field as ArrowField, Fields as ArrowFields, Schema as ArrowSchema},
 };
 use itertools::Itertools;
 use nohash_hasher::IntMap;
@@ -14,7 +11,7 @@ use tap::Tap as _;
 
 use re_arrow_util::{arrow_util::into_arrow_ref, ArrowArrayDowncastRef as _};
 use re_byte_size::SizeBytes as _;
-use re_log_types::{EntityPath, Timeline};
+use re_log_types::EntityPath;
 use re_types_core::{
     arrow_helpers::as_array_ref, Component as _, ComponentDescriptor, Loggable as _,
 };
@@ -754,145 +751,12 @@ impl Chunk {
     }
 
     pub fn from_record_batch(batch: ArrowRecordBatch) -> ChunkResult<Self> {
-        Self::from_transport(&TransportChunk::from(batch))
-    }
-
-    pub fn from_transport(transport: &TransportChunk) -> ChunkResult<Self> {
         re_tracing::profile_function!(format!(
             "num_columns={} num_rows={}",
-            transport.num_columns(),
-            transport.num_rows()
+            batch.num_columns(),
+            batch.num_rows()
         ));
-
-        // Metadata
-        let (id, entity_path, is_sorted) = {
-            re_tracing::profile_scope!("metadata");
-            (
-                transport.id()?,
-                transport.entity_path()?,
-                transport.is_sorted(),
-            )
-        };
-
-        // Row IDs
-        let row_ids = {
-            re_tracing::profile_scope!("row ids");
-
-            let Some(row_ids) = transport.controls().find_map(|(field, column)| {
-                // TODO(cmc): disgusting, but good enough for now.
-                (field.name() == RowId::descriptor().component_name.as_str()).then_some(column)
-            }) else {
-                return Err(ChunkError::Malformed {
-                    reason: format!("missing row_id column ({:?})", transport.schema_ref()),
-                });
-            };
-
-            ArrowArrayRef::from(row_ids.clone())
-                .downcast_array_ref::<ArrowStructArray>()
-                .ok_or_else(|| ChunkError::Malformed {
-                    reason: format!(
-                        "RowId data has the wrong datatype: expected {:?} but got {:?} instead",
-                        RowId::arrow_datatype(),
-                        *row_ids.data_type(),
-                    ),
-                })?
-                .clone()
-        };
-
-        // Timelines
-        let timelines = {
-            re_tracing::profile_scope!("timelines");
-
-            let mut timelines = IntMap::default();
-
-            for (field, column) in transport.timelines() {
-                // See also [`Timeline::datatype`]
-                let timeline = match column.data_type() {
-                    ArrowDatatype::Int64 => Timeline::new_sequence(field.name().as_str()),
-                    ArrowDatatype::Timestamp(ArrowTimeUnit::Nanosecond, None) => {
-                        Timeline::new_temporal(field.name().as_str())
-                    }
-                    _ => {
-                        return Err(ChunkError::Malformed {
-                            reason: format!(
-                                "time column '{}' is not deserializable ({:?})",
-                                field.name(),
-                                column.data_type()
-                            ),
-                        });
-                    }
-                };
-
-                let times = TimeColumn::read_array(&ArrowArrayRef::from(column.clone())).map_err(
-                    |err| ChunkError::Malformed {
-                        reason: format!("Bad time column '{}': {err}", field.name()),
-                    },
-                )?;
-
-                let is_sorted = field
-                    .metadata()
-                    .contains_key(TransportChunk::FIELD_METADATA_MARKER_IS_SORTED_BY_TIME);
-
-                let time_column = TimeColumn::new(is_sorted.then_some(true), timeline, times);
-                if timelines.insert(timeline, time_column).is_some() {
-                    return Err(ChunkError::Malformed {
-                        reason: format!(
-                            "time column '{}' was specified more than once",
-                            field.name(),
-                        ),
-                    });
-                }
-            }
-
-            timelines
-        };
-
-        // Components
-        let components = {
-            let mut components = ChunkComponents::default();
-
-            for (field, column) in transport.components() {
-                let column = column
-                    .downcast_array_ref::<ArrowListArray>()
-                    .ok_or_else(|| ChunkError::Malformed {
-                        reason: format!(
-                            "The outer array in a chunked component batch must be a sparse list, got {:?}",
-                            column.data_type(),
-                        ),
-                    })?;
-
-                let component_desc = TransportChunk::component_descriptor_from_field(field);
-
-                if components
-                    .insert_descriptor(component_desc, column.clone())
-                    .is_some()
-                {
-                    return Err(ChunkError::Malformed {
-                        reason: format!(
-                            "component column '{}' was specified more than once",
-                            field.name(),
-                        ),
-                    });
-                }
-            }
-
-            components
-        };
-
-        let mut res = Self::new(
-            id,
-            entity_path,
-            is_sorted.then_some(true),
-            row_ids,
-            timelines,
-            components,
-        )?;
-
-        if let Some(heap_size_bytes) = transport.heap_size_bytes() {
-            res.heap_size_bytes = heap_size_bytes.into();
-        }
-
-        Ok(res)
+        Self::from_chunk_batch(&re_sorbet::ChunkBatch::try_from(batch)?)
     }
 }
 
diff --git a/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs b/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs
index de3fb567958b..2e80fc16a2f9 100644
--- a/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs
+++ b/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs
@@ -30,8 +30,8 @@ fn encode_roundtrip() {
         .build()
         .unwrap();
 
-    let transport = chunk.to_transport().unwrap();
-    assert_eq!(Chunk::from_transport(&transport).unwrap(), chunk);
+    let record_batch = chunk.to_record_batch().unwrap();
+    assert_eq!(Chunk::from_record_batch(record_batch).unwrap(), chunk);
 
     let arrow_msg = chunk.to_arrow_msg().unwrap();
     assert_eq!(Chunk::from_arrow_msg(&arrow_msg).unwrap(), chunk);
diff --git a/crates/store/re_sorbet/src/lib.rs b/crates/store/re_sorbet/src/lib.rs
index fb1c754d9604..4bff398ecc02 100644
--- a/crates/store/re_sorbet/src/lib.rs
+++ b/crates/store/re_sorbet/src/lib.rs
@@ -13,7 +13,7 @@ mod row_id_column_schema;
 
 pub use self::{
     chunk_batch::{ChunkBatch, MismatchedChunkSchemaError},
-    chunk_schema::ChunkSchema,
+    chunk_schema::{ChunkSchema, InvalidChunkSchema},
     column_schema::{ColumnDescriptor, ColumnError},
     data_column_schema::ComponentColumnDescriptor,
     index_column_schema::{TimeColumnDescriptor, UnsupportedTimeType},

From 7fc72935fb0bf0e2c0e83502e0e344520db4b243 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 14:24:18 +0100
Subject: [PATCH 13/38] More robust parsing of ComponentColumnDescriptor

---
 crates/store/re_sorbet/src/chunk_schema.rs    |  8 ++---
 crates/store/re_sorbet/src/column_schema.rs   | 29 ++++++++++++++-----
 .../store/re_sorbet/src/data_column_schema.rs | 29 +++++++++++++++----
 rerun_py/src/remote.rs                        | 17 +++++++++--
 4 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs
index 3a73149d3f8e..b03abe34f6ca 100644
--- a/crates/store/re_sorbet/src/chunk_schema.rs
+++ b/crates/store/re_sorbet/src/chunk_schema.rs
@@ -244,12 +244,12 @@ impl TryFrom<&ArrowSchema> for ChunkSchema {
             .iter()
             .skip(1)
             .map(|field| {
-                ColumnDescriptor::try_from(field.as_ref()).map_err(|err| {
-                    InvalidChunkSchema::BadColumn {
+                ColumnDescriptor::try_from_arrow_field(Some(&entity_path), field.as_ref()).map_err(
+                    |err| InvalidChunkSchema::BadColumn {
                         field_name: field.name().to_owned(),
                         error: err,
-                    }
-                })
+                    },
+                )
             })
             .collect();
         let index_and_data_columns = index_and_data_columns?;
diff --git a/crates/store/re_sorbet/src/column_schema.rs b/crates/store/re_sorbet/src/column_schema.rs
index f22253cbdabe..68b99e71965b 100644
--- a/crates/store/re_sorbet/src/column_schema.rs
+++ b/crates/store/re_sorbet/src/column_schema.rs
@@ -80,22 +80,34 @@ impl ColumnDescriptor {
         columns.iter().map(|c| c.to_arrow_field()).collect()
     }
 
-    pub fn from_arrow_fields(fields: &[ArrowFieldRef]) -> Result<Vec<Self>, ColumnError> {
+    /// `chunk_entity_path`: if this column is part of a chunk batch,
+    /// what is its entity path (so we can set [`ComponentColumnDescriptor::entity_path`])?
+    pub fn from_arrow_fields(
+        chunk_entity_path: Option<&EntityPath>,
+        fields: &[ArrowFieldRef],
+    ) -> Result<Vec<Self>, ColumnError> {
         fields
             .iter()
-            .map(|field| Self::try_from(field.as_ref()))
+            .map(|field| Self::try_from_arrow_field(chunk_entity_path, field.as_ref()))
             .collect()
     }
 }
 
-impl TryFrom<&ArrowField> for ColumnDescriptor {
-    type Error = ColumnError;
-
-    fn try_from(field: &ArrowField) -> Result<Self, Self::Error> {
+impl ColumnDescriptor {
+    /// `chunk_entity_path`: if this column is part of a chunk batch,
+    /// what is its entity path (so we can set [`ComponentColumnDescriptor::entity_path`])?
+    pub fn try_from_arrow_field(
+        chunk_entity_path: Option<&EntityPath>,
+        field: &ArrowField,
+    ) -> Result<Self, ColumnError> {
         let kind = field.get_or_err("rerun.kind")?;
         match kind {
             "index" | "time" => Ok(Self::Time(TimeColumnDescriptor::try_from(field)?)),
-            "data" => Ok(Self::Component(ComponentColumnDescriptor::try_from(field)?)),
+
+            "data" => Ok(Self::Component(
+                ComponentColumnDescriptor::try_from_arrow_field(chunk_entity_path, field)?,
+            )),
+
             _ => Err(ColumnError::UnsupportedColumnKind {
                 kind: kind.to_owned(),
             }),
@@ -136,6 +148,7 @@ fn test_schema_over_ipc() {
     let recovered_schema = crate::schema_from_ipc(&ipc_bytes).unwrap();
     assert_eq!(recovered_schema.as_ref(), &original_schema);
 
-    let recovered_columns = ColumnDescriptor::from_arrow_fields(&recovered_schema.fields).unwrap();
+    let recovered_columns =
+        ColumnDescriptor::from_arrow_fields(None, &recovered_schema.fields).unwrap();
     assert_eq!(recovered_columns, original_columns);
 }
diff --git a/crates/store/re_sorbet/src/data_column_schema.rs b/crates/store/re_sorbet/src/data_column_schema.rs
index 26659ec158f6..46fc97dbdeca 100644
--- a/crates/store/re_sorbet/src/data_column_schema.rs
+++ b/crates/store/re_sorbet/src/data_column_schema.rs
@@ -21,6 +21,10 @@ pub struct ComponentColumnDescriptor {
     pub store_datatype: ArrowDatatype,
 
     /// The path of the entity.
+    ///
+    /// If this column is part of a chunk batch,
+    /// this is the same for all columns in the batch,
+    /// and will also be set in the schema for the whole chunk.
     pub entity_path: EntityPath,
 
     /// Optional name of the `Archetype` associated with this data.
@@ -220,16 +224,31 @@ impl ComponentColumnDescriptor {
     }
 }
 
-impl TryFrom<&ArrowField> for ComponentColumnDescriptor {
-    type Error = MissingFieldMetadata;
+impl ComponentColumnDescriptor {
+    /// `chunk_entity_path`: if this column is part of a chunk batch,
+    /// what is its entity path (so we can set [`ComponentColumnDescriptor::entity_path`])?
+    pub fn try_from_arrow_field(
+        chunk_entity_path: Option<&EntityPath>,
+        field: &ArrowField,
+    ) -> Result<Self, MissingFieldMetadata> {
+        let entity_path = if let Some(chunk_entity_path) = chunk_entity_path {
+            chunk_entity_path.clone()
+        } else {
+            EntityPath::parse_forgiving(field.get_or_err("rerun.entity_path")?)
+        };
+
+        let component_name = if let Some(component_name) = field.get_opt("rerun.component") {
+            ComponentName::from(component_name)
+        } else {
+            ComponentName::new(field.name()) // Legacy fallback
+        };
 
-    fn try_from(field: &ArrowField) -> Result<Self, Self::Error> {
         Ok(Self {
             store_datatype: field.data_type().clone(),
-            entity_path: EntityPath::parse_forgiving(field.get_or_err("rerun.entity_path")?),
+            entity_path,
             archetype_name: field.get_opt("rerun.archetype").map(|x| x.into()),
             archetype_field_name: field.get_opt("rerun.archetype_field").map(|x| x.into()),
-            component_name: field.get_or_err("rerun.component")?.into(),
+            component_name,
             is_static: field.get_bool("rerun.is_static"),
             is_indicator: field.get_bool("rerun.is_indicator"),
             is_tombstone: field.get_bool("rerun.is_tombstone"),
diff --git a/rerun_py/src/remote.rs b/rerun_py/src/remote.rs
index 48febc779f14..1b6db9ad1fd1 100644
--- a/rerun_py/src/remote.rs
+++ b/rerun_py/src/remote.rs
@@ -289,9 +289,20 @@ impl PyStorageNodeClient {
             let arrow_schema = ArrowSchema::try_from(&schema)
                 .map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
 
-            let column_descriptors =
-                re_sorbet::ColumnDescriptor::from_arrow_fields(&arrow_schema.fields)
-                    .map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
+            let chunk_schema = re_sorbet::ChunkSchema::try_from(&arrow_schema)
+                .map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
+
+            let column_descriptors = itertools::chain!(
+                chunk_schema
+                    .index_columns
+                    .into_iter()
+                    .map(re_sorbet::ColumnDescriptor::Time),
+                chunk_schema
+                    .data_columns
+                    .into_iter()
+                    .map(re_sorbet::ColumnDescriptor::Component),
+            )
+            .collect();
 
             Ok(PySchema {
                 schema: column_descriptors,

From d0b92c9ee2d4b7c5ce3ce288c5c28dff6cfee12d Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 14:24:27 +0100
Subject: [PATCH 14/38] Fix roundtripping

---
 crates/store/re_sorbet/src/chunk_batch.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index 7071f2eb48b2..54377d75d5e7 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -235,7 +235,13 @@ impl TryFrom<ArrowRecordBatch> for ChunkBatch {
         arrow_schema
             .metadata
             .extend(chunk_schema.arrow_batch_metadata());
-        let batch = batch.with_schema(arrow_schema.into()).expect("Can't fail");
+
+        let batch = ArrowRecordBatch::try_new_with_options(
+            arrow_schema.into(),
+            batch.columns().to_vec(),
+            &RecordBatchOptions::default().with_row_count(Some(batch.num_rows())),
+        )
+        .expect("Can't fail");
 
         Ok(Self {
             schema: chunk_schema,

From ab2ad6f0c056de38d9f58b29dea9444e1b352ec0 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 14:31:21 +0100
Subject: [PATCH 15/38] Use "true" insteado of "yes", to make it more JSON-like

---
 ...e_dataframe__query__tests__async_barebones_static.snap | 4 ++--
 ...dataframe__query__tests__async_barebones_temporal.snap | 4 ++--
 .../re_dataframe__query__tests__barebones-2.snap          | 4 ++--
 .../snapshots/re_dataframe__query__tests__barebones.snap  | 4 ++--
 .../snapshots/re_dataframe__query__tests__clears-2.snap   | 4 ++--
 .../src/snapshots/re_dataframe__query__tests__clears.snap | 4 ++--
 .../re_dataframe__query__tests__filtered_index_range.snap | 4 ++--
 ...re_dataframe__query__tests__filtered_index_values.snap | 4 ++--
 ...e_dataframe__query__tests__filtered_is_not_null-2.snap | 4 ++--
 ...e_dataframe__query__tests__filtered_is_not_null-3.snap | 4 ++--
 ...e_dataframe__query__tests__filtered_is_not_null-4.snap | 4 ++--
 .../re_dataframe__query__tests__filtered_is_not_null.snap | 4 ++--
 .../re_dataframe__query__tests__selection-2.snap          | 5 +++--
 .../re_dataframe__query__tests__selection-4.snap          | 4 ++--
 ...query__tests__sparse_fill_strategy_latestatglobal.snap | 4 ++--
 .../re_dataframe__query__tests__using_index_values-2.snap | 4 ++--
 .../re_dataframe__query__tests__using_index_values.snap   | 4 ++--
 .../re_dataframe__query__tests__view_contents-2.snap      | 4 ++--
 ...aframe__query__tests__view_contents_and_selection.snap | 6 +++---
 crates/store/re_sorbet/src/data_column_schema.rs          | 8 ++++----
 20 files changed, 44 insertions(+), 43 deletions(-)

diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_static.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_static.snap
index db957933033a..271d97e97da8 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_static.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_static.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                  │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"  ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time" ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"          ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                        ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                        ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                        ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ null                   ┆ null                   ┆ null                         ┆ [c]                          ┆ null                         │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_temporal.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_temporal.snap
index 7ce18178b1a0..e0f9c9d8b04b 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_temporal.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__async_barebones_temporal.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                         │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"                 ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                               ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 10                     ┆ 1970-01-01T00:00:00.000000010 ┆ null                         ┆ [c]                          ┆ [{x: 0.0, y: 0.0}]           │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones-2.snap
index 7ce18178b1a0..e0f9c9d8b04b 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones-2.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones-2.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                         │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"                 ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                               ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 10                     ┆ 1970-01-01T00:00:00.000000010 ┆ null                         ┆ [c]                          ┆ [{x: 0.0, y: 0.0}]           │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones.snap
index db957933033a..271d97e97da8 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__barebones.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                  │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"  ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time" ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"          ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                        ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                        ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                        ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ null                   ┆ null                   ┆ null                         ┆ [c]                          ┆ null                         │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears-2.snap
index 99889208a5c1..f145b121fe1b 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears-2.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears-2.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                         │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"                 ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                               ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 10                     ┆ 1970-01-01T00:00:00.000000010 ┆ null                         ┆ [c]                          ┆ [{x: 0.0, y: 0.0}]           │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears.snap
index 99889208a5c1..f145b121fe1b 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__clears.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                         │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"                 ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                               ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 10                     ┆ 1970-01-01T00:00:00.000000010 ┆ null                         ┆ [c]                          ┆ [{x: 0.0, y: 0.0}]           │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_range.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_range.snap
index b1b085b65851..a2ef8ebd40c1 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_range.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_range.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                         │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"                 ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                               ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 30                     ┆ null                          ┆ [2]                          ┆ [c]                          ┆ [{x: 2.0, y: 2.0}]           │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_values.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_values.snap
index 14ff3c194dd9..cf59831c6c90 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_values.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_index_values.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                  │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"  ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time" ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"          ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                        ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                        ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                        ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 30                     ┆ null                   ┆ [2]                          ┆ [c]                          ┆ [{x: 2.0, y: 2.0}]           │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-2.snap
index 961f4883fa04..ebb8736328a9 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-2.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-2.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌────────────────────────┬────────────────────────┬──────────────────────────────┬──────────────────────────────┬──────────────────────────────┐
 │ frame_nr               ┆ log_time               ┆ /this/that:example.MyColor   ┆ /this/that:example.MyLabel   ┆ /this/that:example.MyPoint   │
@@ -8,7 +8,7 @@ expression: "TransportChunk::from(dataframe)"
 │ type: "i64"            ┆ type: "Timestamp(ns)"  ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │
 │ index_name: "frame_nr" ┆ index_name: "log_time" ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │
 │ kind: "index"          ┆ kind: "index"          ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │
-│                        ┆                        ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │
+│                        ┆                        ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │
 │                        ┆                        ┆                              ┆ kind: "data"                 ┆                              │
 ╞════════════════════════╪════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡
 └────────────────────────┴────────────────────────┴──────────────────────────────┴──────────────────────────────┴──────────────────────────────┘
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-3.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-3.snap
index 7ce18178b1a0..e0f9c9d8b04b 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-3.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-3.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                         │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"                 ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                               ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 10                     ┆ 1970-01-01T00:00:00.000000010 ┆ null                         ┆ [c]                          ┆ [{x: 0.0, y: 0.0}]           │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-4.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-4.snap
index f764102f654c..3a99fcafeee9 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-4.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null-4.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                         │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"                 ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                               ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 30                     ┆ null                          ┆ [2]                          ┆ [c]                          ┆ [{x: 2.0, y: 2.0}]           │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null.snap
index 961f4883fa04..ebb8736328a9 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__filtered_is_not_null.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌────────────────────────┬────────────────────────┬──────────────────────────────┬──────────────────────────────┬──────────────────────────────┐
 │ frame_nr               ┆ log_time               ┆ /this/that:example.MyColor   ┆ /this/that:example.MyLabel   ┆ /this/that:example.MyPoint   │
@@ -8,7 +8,7 @@ expression: "TransportChunk::from(dataframe)"
 │ type: "i64"            ┆ type: "Timestamp(ns)"  ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │
 │ index_name: "frame_nr" ┆ index_name: "log_time" ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │
 │ kind: "index"          ┆ kind: "index"          ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │
-│                        ┆                        ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │
+│                        ┆                        ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │
 │                        ┆                        ┆                              ┆ kind: "data"                 ┆                              │
 ╞════════════════════════╪════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡
 └────────────────────────┴────────────────────────┴──────────────────────────────┴──────────────────────────────┴──────────────────────────────┘
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-2.snap
index fc9abb00c569..4c40256a40bd 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-2.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-2.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                │
@@ -11,7 +11,8 @@ expression: "TransportChunk::from(dataframe)"
 │ │ ---                    ┆ ---                    ┆ ---                                      │ │
 │ │ type: "i64"            ┆ type: "i64"            ┆ type: "null"                             │ │
 │ │ index_name: "frame_nr" ┆ index_name: "frame_nr" ┆ index_name: "ATimeColumnThatDoesntExist" │ │
-│ │ kind: "index"          ┆ kind: "index"          ┆ kind: "index"                            │ │
+│ │ kind: "index"          ┆ kind: "index"          ┆ is_sorted: "true"                        │ │
+│ │                        ┆                        ┆ kind: "index"                            │ │
 │ ╞════════════════════════╪════════════════════════╪══════════════════════════════════════════╡ │
 │ │ 10                     ┆ 10                     ┆ null                                     │ │
 │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-4.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-4.snap
index ad3cdcde7a7b..4b65d98f4a7e 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-4.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__selection-4.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                                                              │
@@ -17,7 +17,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆ entity_path:   │ │
 │ │                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆ "/this/that"   │ │
 │ │                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆ is_static:     │ │
-│ │                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆ "yes"          │ │
+│ │                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆ "true"         │ │
 │ │                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆                ┆ kind: "data"   │ │
 │ ╞════════════════╪════════════════╪════════════════╪════════════════╪════════════════╪════════════════╪════════════════╪════════════════╪════════════════╪════════════════╪════════════════╡ │
 │ │ 10             ┆ 10             ┆ 10             ┆ 10             ┆ 10             ┆ 10             ┆ 10             ┆ 10             ┆ 10             ┆ 10             ┆ [c]            │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__sparse_fill_strategy_latestatglobal.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__sparse_fill_strategy_latestatglobal.snap
index 611b3867fcb0..76f2eff49bb4 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__sparse_fill_strategy_latestatglobal.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__sparse_fill_strategy_latestatglobal.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                         │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"                 ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                               ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 10                     ┆ 1970-01-01T00:00:00.000000010 ┆ null                         ┆ [c]                          ┆ [{x: 0.0, y: 0.0}]           │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values-2.snap
index 0c0c7ebc3a62..932dc1b111fb 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values-2.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values-2.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                         │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"                 ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                               ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                               ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 0                      ┆ null                          ┆ null                         ┆ [c]                          ┆ null                         │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values.snap
index bad2cb956e13..f0d9abdc6deb 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__using_index_values.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                  │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"  ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           ┆ type: "List[Struct[2]]"      │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time" ┆ component: "example.MyColor" ┆ component: "example.MyLabel" ┆ component: "example.MyPoint" │ │
 │ │ kind: "index"          ┆ kind: "index"          ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                        ┆ kind: "data"                 ┆ is_static: "yes"             ┆ kind: "data"                 │ │
+│ │                        ┆                        ┆ kind: "data"                 ┆ is_static: "true"            ┆ kind: "data"                 │ │
 │ │                        ┆                        ┆                              ┆ kind: "data"                 ┆                              │ │
 │ ╞════════════════════════╪════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 0                      ┆ null                   ┆ null                         ┆ [c]                          ┆ null                         │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents-2.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents-2.snap
index a81573d92315..3e96d3b4e3e9 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents-2.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents-2.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                   │
@@ -12,7 +12,7 @@ expression: "TransportChunk::from(dataframe)"
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"  ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time" ┆ component: "example.MyColor" ┆ component: "example.MyLabel" │ │
 │ │ kind: "index"          ┆ kind: "index"          ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                        ┆ kind: "data"                 ┆ is_static: "yes"             │ │
+│ │                        ┆                        ┆ kind: "data"                 ┆ is_static: "true"            │ │
 │ │                        ┆                        ┆                              ┆ kind: "data"                 │ │
 │ ╞════════════════════════╪════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 30                     ┆ null                   ┆ [2]                          ┆ [c]                          │ │
diff --git a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents_and_selection.snap b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents_and_selection.snap
index 88cb601328a1..32c026cfed50 100644
--- a/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents_and_selection.snap
+++ b/crates/store/re_dataframe/src/snapshots/re_dataframe__query__tests__view_contents_and_selection.snap
@@ -1,6 +1,6 @@
 ---
 source: crates/store/re_dataframe/src/query.rs
-expression: "TransportChunk::from(dataframe)"
+expression: DisplayRB(dataframe)
 ---
 ┌───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
 │ CHUNK METADATA:                                                                                                                                                           │
@@ -11,8 +11,8 @@ expression: "TransportChunk::from(dataframe)"
 │ │ ---                    ┆ ---                    ┆ ---                    ┆ ---                          ┆ ---                          ┆ ---                          │ │
 │ │ type: "i64"            ┆ type: "Timestamp(ns)"  ┆ type: "null"           ┆ type: "null"                 ┆ type: "List[u32]"            ┆ type: "List[Utf8]"           │ │
 │ │ index_name: "frame_nr" ┆ index_name: "log_time" ┆ index_name: "log_tick" ┆ component: "example.MyPoint" ┆ component: "example.MyColor" ┆ component: "example.MyLabel" │ │
-│ │ kind: "index"          ┆ kind: "index"          ┆ kind: "index"          ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-│ │                        ┆                        ┆                        ┆ kind: "data"                 ┆ kind: "data"                 ┆ is_static: "yes"             │ │
+│ │ kind: "index"          ┆ kind: "index"          ┆ is_sorted: "true"      ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
+│ │                        ┆                        ┆ kind: "index"          ┆ kind: "data"                 ┆ kind: "data"                 ┆ is_static: "true"            │ │
 │ │                        ┆                        ┆                        ┆                              ┆                              ┆ kind: "data"                 │ │
 │ ╞════════════════════════╪════════════════════════╪════════════════════════╪══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
 │ │ 30                     ┆ null                   ┆ null                   ┆ null                         ┆ [2]                          ┆ [c]                          │ │
diff --git a/crates/store/re_sorbet/src/data_column_schema.rs b/crates/store/re_sorbet/src/data_column_schema.rs
index 46fc97dbdeca..45f886886448 100644
--- a/crates/store/re_sorbet/src/data_column_schema.rs
+++ b/crates/store/re_sorbet/src/data_column_schema.rs
@@ -184,11 +184,11 @@ impl ComponentColumnDescriptor {
                 "rerun.component".to_owned(),
                 component_name.short_name().to_owned(),
             )),
-            (*is_static).then_some(("rerun.is_static".to_owned(), "yes".to_owned())),
-            (*is_indicator).then_some(("rerun.is_indicator".to_owned(), "yes".to_owned())),
-            (*is_tombstone).then_some(("rerun.is_tombstone".to_owned(), "yes".to_owned())),
+            (*is_static).then_some(("rerun.is_static".to_owned(), "true".to_owned())),
+            (*is_indicator).then_some(("rerun.is_indicator".to_owned(), "true".to_owned())),
+            (*is_tombstone).then_some(("rerun.is_tombstone".to_owned(), "true".to_owned())),
             (*is_semantically_empty)
-                .then_some(("rerun.is_semantically_empty".to_owned(), "yes".to_owned())),
+                .then_some(("rerun.is_semantically_empty".to_owned(), "true".to_owned())),
         ]
         .into_iter()
         .flatten()

From f79bdc26894d3a4999cfb7ea0ca9608c412f23f3 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 14:44:33 +0100
Subject: [PATCH 16/38] Use similar_asserts

---
 Cargo.lock                             | 1 +
 crates/store/re_grpc_server/Cargo.toml | 3 +++
 crates/store/re_grpc_server/src/lib.rs | 3 ++-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Cargo.lock b/Cargo.lock
index 3bd1059c03e1..fea1d86f2af6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6184,6 +6184,7 @@ dependencies = [
  "re_protos",
  "re_tracing",
  "re_types",
+ "similar-asserts",
  "tokio",
  "tokio-stream",
  "tokio-util",
diff --git a/crates/store/re_grpc_server/Cargo.toml b/crates/store/re_grpc_server/Cargo.toml
index 538b56257975..22dcf8553d31 100644
--- a/crates/store/re_grpc_server/Cargo.toml
+++ b/crates/store/re_grpc_server/Cargo.toml
@@ -39,3 +39,6 @@ tokio-util.workspace = true
 tonic = { workspace = true, default-features = false, features = ["transport"] }
 tonic-web.workspace = true
 tower-http = { workspace = true, features = ["cors"] }
+
+[dev-dependencies]
+similar-asserts.workspace = true
diff --git a/crates/store/re_grpc_server/src/lib.rs b/crates/store/re_grpc_server/src/lib.rs
index 020ba2a4559e..7df9f98aa939 100644
--- a/crates/store/re_grpc_server/src/lib.rs
+++ b/crates/store/re_grpc_server/src/lib.rs
@@ -273,6 +273,7 @@ mod tests {
     use re_protos::sdk_comms::v0::{
         message_proxy_client::MessageProxyClient, message_proxy_server::MessageProxyServer,
     };
+    use similar_asserts::assert_eq;
     use std::net::SocketAddr;
     use std::sync::Arc;
     use std::time::Duration;
@@ -541,7 +542,7 @@ mod tests {
 
         for log_stream in &mut log_streams {
             let actual = read_log_stream(log_stream, expected.len()).await;
-            assert_eq!(expected, actual);
+            assert_eq!(actual, expected);
         }
 
         completion.finish();

From 52be0a46ffe0ee66491d92e254ef16338806ae56 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 14:49:25 +0100
Subject: [PATCH 17/38] Less TransportChunk

---
 Cargo.lock                                         |  7 -------
 crates/store/re_chunk/Cargo.toml                   |  1 -
 crates/store/re_chunk/src/chunk.rs                 |  4 ++--
 crates/store/re_chunk/src/transport.rs             |  3 +--
 crates/store/re_log_encoding/src/codec/wire/mod.rs |  2 +-
 crates/viewer/re_chunk_store_ui/src/chunk_ui.rs    | 12 ++++++------
 6 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index fea1d86f2af6..cf9781718cca 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5835,7 +5835,6 @@ dependencies = [
  "re_tuid",
  "re_types_core",
  "similar-asserts",
- "tap",
  "thiserror 1.0.65",
 ]
 
@@ -8692,12 +8691,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "tap"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
-
 [[package]]
 name = "target-lexicon"
 version = "0.12.16"
diff --git a/crates/store/re_chunk/Cargo.toml b/crates/store/re_chunk/Cargo.toml
index cc4c3873e0c4..1407b1ffcb18 100644
--- a/crates/store/re_chunk/Cargo.toml
+++ b/crates/store/re_chunk/Cargo.toml
@@ -52,7 +52,6 @@ itertools.workspace = true
 nohash-hasher.workspace = true
 rand = { workspace = true, features = ["std_rng"] }
 similar-asserts.workspace = true
-tap.workspace = true
 thiserror.workspace = true
 
 # Native dependencies:
diff --git a/crates/store/re_chunk/src/chunk.rs b/crates/store/re_chunk/src/chunk.rs
index 14bc14e2ec6a..93105f2c19bd 100644
--- a/crates/store/re_chunk/src/chunk.rs
+++ b/crates/store/re_chunk/src/chunk.rs
@@ -1274,11 +1274,11 @@ impl Chunk {
 impl std::fmt::Display for Chunk {
     #[inline]
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let transport_chunk = self.to_transport().map_err(|err| {
+        let batch = self.to_record_batch().map_err(|err| {
             re_log::error_once!("couldn't display Chunk: {err}");
             std::fmt::Error
         })?;
-        transport_chunk.fmt(f)
+        re_format_arrow::format_record_batch_with_width(&batch, f.width()).fmt(f)
     }
 }
 
diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index 7db98a534425..3a32e69a8ed8 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -778,11 +778,10 @@ impl Chunk {
         re_tracing::profile_function!();
         self.sanity_check()?;
 
-        let transport = self.to_transport()?;
         Ok(re_log_types::ArrowMsg {
             chunk_id: re_tuid::Tuid::from_u128(self.id().as_u128()),
             timepoint_max: self.timepoint_max(),
-            batch: transport.into(),
+            batch: self.to_record_batch()?,
             on_release: None,
         })
     }
diff --git a/crates/store/re_log_encoding/src/codec/wire/mod.rs b/crates/store/re_log_encoding/src/codec/wire/mod.rs
index 7f2ea2624100..6f511a93b67a 100644
--- a/crates/store/re_log_encoding/src/codec/wire/mod.rs
+++ b/crates/store/re_log_encoding/src/codec/wire/mod.rs
@@ -56,7 +56,7 @@ mod tests {
 
         let encoded: DataframePart = expected_chunk
             .clone()
-            .to_transport()
+            .to_record_batch()
             .unwrap()
             .encode()
             .unwrap();
diff --git a/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs b/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs
index 32fbab0d5e8e..efa7ec106bf9 100644
--- a/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs
+++ b/crates/viewer/re_chunk_store_ui/src/chunk_ui.rs
@@ -203,8 +203,8 @@ impl ChunkUi {
                 }
             };
 
-        let fields_ui = |ui: &mut egui::Ui, transport: &ArrowRecordBatch| {
-            for field in &transport.schema_ref().fields {
+        let fields_ui = |ui: &mut egui::Ui, batch: &ArrowRecordBatch| {
+            for field in &batch.schema_ref().fields {
                 ui.push_id(field.name().clone(), |ui| {
                     ui.list_item_collapsible_noninteractive_label(field.name(), false, |ui| {
                         ui.list_item_collapsible_noninteractive_label("Data type", false, |ui| {
@@ -276,14 +276,14 @@ impl ChunkUi {
 
         list_item::list_item_scope(ui, "chunk_stats", |ui| {
             ui.list_item_collapsible_noninteractive_label("Stats", false, chunk_stats_ui);
-            match self.chunk.to_transport() {
-                Ok(transport) => {
+            match self.chunk.to_record_batch() {
+                Ok(batch) => {
                     ui.list_item_collapsible_noninteractive_label("Transport", false, |ui| {
                         ui.list_item_collapsible_noninteractive_label("Metadata", false, |ui| {
-                            metadata_ui(ui, &transport.schema_ref().metadata);
+                            metadata_ui(ui, &batch.schema_ref().metadata);
                         });
                         ui.list_item_collapsible_noninteractive_label("Fields", false, |ui| {
-                            fields_ui(ui, &transport);
+                            fields_ui(ui, &batch);
                         });
                     });
                 }

From 3519a747c078f953411a1e6ac527f9afc0bc2a7e Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 15:10:54 +0100
Subject: [PATCH 18/38] Remove some use of TransportChunk

---
 Cargo.lock                             |   2 +
 crates/store/re_chunk/src/transport.rs | 231 +------------------------
 crates/store/re_grpc_client/Cargo.toml |   2 +
 crates/store/re_grpc_client/src/lib.rs | 113 ++++++------
 4 files changed, 72 insertions(+), 276 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index cf9781718cca..c7dfecccad65 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6150,6 +6150,7 @@ version = "0.23.0-alpha.1+dev"
 dependencies = [
  "arrow",
  "async-stream",
+ "itertools 0.13.0",
  "re_arrow_util",
  "re_chunk",
  "re_error",
@@ -6158,6 +6159,7 @@ dependencies = [
  "re_log_types",
  "re_protos",
  "re_smart_channel",
+ "re_sorbet",
  "re_types",
  "thiserror 1.0.65",
  "tokio",
diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index 3a32e69a8ed8..a931c5c46b73 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -3,18 +3,15 @@ use arrow::{
         Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
         RecordBatch as ArrowRecordBatch,
     },
-    datatypes::{Field as ArrowField, Fields as ArrowFields, Schema as ArrowSchema},
+    datatypes::{Field as ArrowField, Fields as ArrowFields},
 };
 use itertools::Itertools;
 use nohash_hasher::IntMap;
-use tap::Tap as _;
 
 use re_arrow_util::{arrow_util::into_arrow_ref, ArrowArrayDowncastRef as _};
 use re_byte_size::SizeBytes as _;
 use re_log_types::EntityPath;
-use re_types_core::{
-    arrow_helpers::as_array_ref, Component as _, ComponentDescriptor, Loggable as _,
-};
+use re_types_core::{arrow_helpers::as_array_ref, ComponentDescriptor, Loggable as _};
 
 use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, RowId, TimeColumn};
 
@@ -502,156 +499,7 @@ impl Chunk {
     ///
     /// It is probably a good idea to sort the chunk first.
     pub fn to_record_batch(&self) -> ChunkResult<ArrowRecordBatch> {
-        self.sanity_check()?;
-
-        re_tracing::profile_function!(format!(
-            "num_columns={} num_rows={}",
-            self.num_columns(),
-            self.num_rows()
-        ));
-
-        let Self {
-            id,
-            entity_path,
-            heap_size_bytes: _, // use the method instead because of lazy initialization
-            is_sorted,
-            row_ids,
-            timelines,
-            components,
-        } = self;
-
-        let mut fields: Vec<ArrowField> = vec![];
-        let mut metadata = std::collections::HashMap::default();
-        let mut columns: Vec<ArrowArrayRef> =
-            Vec::with_capacity(1 /* row_ids */ + timelines.len() + components.len());
-
-        // Chunk-level metadata
-        {
-            re_tracing::profile_scope!("metadata");
-
-            metadata.extend(TransportChunk::chunk_metadata_id(*id));
-
-            metadata.extend(TransportChunk::chunk_metadata_entity_path(entity_path));
-
-            metadata.extend(TransportChunk::chunk_metadata_heap_size_bytes(
-                self.heap_size_bytes(),
-            ));
-
-            if *is_sorted {
-                metadata.extend(TransportChunk::chunk_metadata_is_sorted());
-            }
-        }
-
-        // Row IDs
-        {
-            re_tracing::profile_scope!("row ids");
-
-            fields.push(
-                ArrowField::new(
-                    RowId::descriptor().to_string(),
-                    RowId::arrow_datatype().clone(),
-                    false,
-                )
-                .with_metadata(
-                    TransportChunk::field_metadata_control_column().tap_mut(|metadata| {
-                        // This ensures the RowId/Tuid is formatted correctly
-                        metadata.insert(
-                            "ARROW:extension:name".to_owned(),
-                            re_tuid::Tuid::ARROW_EXTENSION_NAME.to_owned(),
-                        );
-                    }),
-                ),
-            );
-            columns.push(into_arrow_ref(row_ids.clone()));
-        }
-
-        // Timelines
-        {
-            re_tracing::profile_scope!("timelines");
-
-            let mut timelines = timelines
-                .iter()
-                .map(|(timeline, info)| {
-                    let TimeColumn {
-                        timeline: _,
-                        times: _,
-                        is_sorted,
-                        time_range: _,
-                    } = info;
-
-                    let nullable = false; // timelines within a single chunk are always dense
-                    let field =
-                        ArrowField::new(timeline.name().to_string(), timeline.datatype(), nullable)
-                            .with_metadata({
-                                let mut metadata = TransportChunk::field_metadata_time_column();
-                                if *is_sorted {
-                                    metadata.extend(TransportChunk::field_metadata_is_sorted());
-                                }
-                                metadata
-                            });
-
-                    let times = info.times_array();
-
-                    (field, times)
-                })
-                .collect_vec();
-
-            timelines
-                .sort_by(|(field1, _times1), (field2, _times2)| field1.name().cmp(field2.name()));
-
-            for (field, times) in timelines {
-                fields.push(field);
-                columns.push(times);
-            }
-        }
-
-        // Components
-        {
-            re_tracing::profile_scope!("components");
-
-            let mut components = components
-                .values()
-                .flat_map(|per_desc| per_desc.iter())
-                .map(|(component_desc, list_array)| {
-                    let list_array = ArrowListArray::from(list_array.clone());
-
-                    let field = ArrowField::new(
-                        component_desc.component_name.to_string(),
-                        list_array.data_type().clone(),
-                        true,
-                    )
-                    .with_metadata({
-                        let mut metadata = TransportChunk::field_metadata_data_column();
-                        metadata.extend(TransportChunk::field_metadata_component_descriptor(
-                            component_desc,
-                        ));
-                        metadata
-                    });
-
-                    (field, as_array_ref(list_array))
-                })
-                .collect_vec();
-
-            components
-                .sort_by(|(field1, _data1), (field2, _data2)| field1.name().cmp(field2.name()));
-
-            for (field, data) in components {
-                fields.push(field);
-                columns.push(data);
-            }
-        }
-
-        let schema = ArrowSchema::new_with_metadata(fields, metadata);
-
-        Ok(ArrowRecordBatch::try_new(schema.into(), columns)?)
-    }
-
-    /// Prepare the [`Chunk`] for transport.
-    ///
-    /// It is probably a good idea to sort the chunk first.
-    pub fn to_transport(&self) -> ChunkResult<TransportChunk> {
-        let record_batch = self.to_record_batch()?;
-        Ok(TransportChunk::from(record_batch))
+        Ok(self.to_chunk_batch()?.into())
     }
 
     pub fn from_chunk_batch(batch: &re_sorbet::ChunkBatch) -> ChunkResult<Self> {
@@ -797,6 +645,7 @@ mod tests {
         example_components::{MyColor, MyPoint},
         Timeline,
     };
+    use re_types_core::Component as _;
 
     use super::*;
 
@@ -858,78 +707,6 @@ mod tests {
 
         let row_ids = vec![RowId::new(), RowId::new(), RowId::new(), RowId::new()];
 
-        for timelines in [timelines1.clone(), timelines2.clone()] {
-            let chunk_original = Chunk::from_native_row_ids(
-                ChunkId::new(),
-                entity_path.clone(),
-                None,
-                &row_ids,
-                timelines.clone(),
-                components.clone().into_iter().collect(),
-            )?;
-            let mut chunk_before = chunk_original.clone();
-
-            for _ in 0..3 {
-                let chunk_in_transport = chunk_before.to_transport()?;
-                let chunk_after = Chunk::from_record_batch(chunk_in_transport.clone().into())?;
-
-                assert_eq!(
-                    chunk_in_transport.entity_path()?,
-                    *chunk_original.entity_path()
-                );
-                assert_eq!(
-                    chunk_in_transport.entity_path()?,
-                    *chunk_after.entity_path()
-                );
-                assert_eq!(
-                    chunk_in_transport.heap_size_bytes(),
-                    Some(chunk_after.heap_size_bytes()),
-                );
-                assert_eq!(
-                    chunk_in_transport.num_columns(),
-                    chunk_original.num_columns()
-                );
-                assert_eq!(chunk_in_transport.num_columns(), chunk_after.num_columns());
-                assert_eq!(chunk_in_transport.num_rows(), chunk_original.num_rows());
-                assert_eq!(chunk_in_transport.num_rows(), chunk_after.num_rows());
-
-                assert_eq!(
-                    chunk_in_transport.num_controls(),
-                    chunk_original.num_controls()
-                );
-                assert_eq!(
-                    chunk_in_transport.num_controls(),
-                    chunk_after.num_controls()
-                );
-                assert_eq!(
-                    chunk_in_transport.num_timelines(),
-                    chunk_original.num_timelines()
-                );
-                assert_eq!(
-                    chunk_in_transport.num_timelines(),
-                    chunk_after.num_timelines()
-                );
-                assert_eq!(
-                    chunk_in_transport.num_components(),
-                    chunk_original.num_components()
-                );
-                assert_eq!(
-                    chunk_in_transport.num_components(),
-                    chunk_after.num_components()
-                );
-
-                eprintln!("{chunk_before}");
-                eprintln!("{chunk_in_transport}");
-                eprintln!("{chunk_after}");
-
-                assert_eq!(chunk_before, chunk_after);
-
-                assert!(chunk_before.are_equal(&chunk_after));
-
-                chunk_before = chunk_after;
-            }
-        }
-
         for timelines in [timelines1, timelines2] {
             let chunk_before = Chunk::from_native_row_ids(
                 ChunkId::new(),
diff --git a/crates/store/re_grpc_client/Cargo.toml b/crates/store/re_grpc_client/Cargo.toml
index 56a65f476ad4..256a29fc1c00 100644
--- a/crates/store/re_grpc_client/Cargo.toml
+++ b/crates/store/re_grpc_client/Cargo.toml
@@ -28,10 +28,12 @@ re_log_encoding = { workspace = true, features = ["encoder", "decoder"] }
 re_log_types.workspace = true
 re_protos.workspace = true
 re_smart_channel.workspace = true
+re_sorbet.workspace = true
 re_types.workspace = true
 
 arrow.workspace = true
 async-stream.workspace = true
+itertools.workspace = true
 thiserror.workspace = true
 tokio-stream.workspace = true
 url.workspace = true
diff --git a/crates/store/re_grpc_client/src/lib.rs b/crates/store/re_grpc_client/src/lib.rs
index 38df97ee48e7..f456cdbd8f1b 100644
--- a/crates/store/re_grpc_client/src/lib.rs
+++ b/crates/store/re_grpc_client/src/lib.rs
@@ -399,7 +399,7 @@ async fn stream_catalog_async(
 
     re_log::info!("Starting to read...");
     while let Some(result) = resp.next().await {
-        let input = TransportChunk::from(result.map_err(TonicStatusError)?);
+        let record_batch = result.map_err(TonicStatusError)?;
 
         // Catalog received from the ReDap server isn't suitable for direct conversion to a Rerun Chunk:
         // - conversion expects "data" columns to be ListArrays, hence we need to convert any individual row column data to ListArray
@@ -407,57 +407,72 @@ async fn stream_catalog_async(
 
         let mut fields: Vec<ArrowField> = Vec::new();
         let mut columns: Vec<ArrowArrayRef> = Vec::new();
-        // add the (row id) control field
-        let (row_id_field, row_id_data) = input.controls().next().ok_or(
-            StreamError::ChunkError(re_chunk::ChunkError::Malformed {
-                reason: "no control field found".to_owned(),
-            }),
-        )?;
-
-        fields.push(
-            ArrowField::new(
-                RowId::name().to_string(), // need to rename to Rerun Chunk expected control field
-                row_id_field.data_type().clone(),
-                false, /* not nullable */
-            )
-            .with_metadata(TransportChunk::field_metadata_control_column()),
-        );
-        columns.push(row_id_data.clone());
 
-        // next add any timeline field
-        for (field, data) in input.timelines() {
-            fields.push(field.clone());
-            columns.push(data.clone());
-        }
+        for (field, data) in
+            itertools::izip!(record_batch.schema().fields(), record_batch.columns())
+        {
+            // TODO: move this switch to re_sorbet
+            match field.metadata().get("rerun.kind").map(|s| s.as_str()) {
+                Some("control") => {
+                    fields.push(
+                        ArrowField::new(
+                            RowId::name().to_string(), // need to rename to Rerun Chunk expected control field
+                            field.data_type().clone(),
+                            false, /* not nullable */
+                        )
+                        .with_metadata(TransportChunk::field_metadata_control_column()),
+                    );
 
-        // now add all the 'data' fields - we slice each column array into individual arrays and then convert the whole lot into a ListArray
-        for (field, data) in input.components() {
-            let data_field_inner =
-                ArrowField::new("item", field.data_type().clone(), true /* nullable */);
-
-            let data_field = ArrowField::new(
-                field.name().clone(),
-                ArrowDataType::List(Arc::new(data_field_inner.clone())),
-                false, /* not nullable */
-            )
-            .with_metadata(TransportChunk::field_metadata_data_column());
-
-            let mut sliced: Vec<ArrowArrayRef> = Vec::new();
-            for idx in 0..data.len() {
-                sliced.push(data.clone().slice(idx, 1));
-            }
+                    columns.push(data.clone());
+                }
+                Some("index" | "time") => {
+                    fields.push(ArrowField::clone(field.as_ref()));
+                    columns.push(data.clone());
+                }
+                Some("data" | "component") => {
+                    // We slice each column array into individual arrays and then convert the whole lot into a ListArray
+
+                    let data_field_inner = ArrowField::new(
+                        "item",
+                        field.data_type().clone(),
+                        true, /* nullable */
+                    );
 
-            let data_arrays = sliced.iter().map(|e| Some(e.as_ref())).collect::<Vec<_>>();
-            #[allow(clippy::unwrap_used)] // we know we've given the right field type
-            let data_field_array: arrow::array::ListArray =
-                re_arrow_util::arrow_util::arrays_to_list_array(
-                    data_field_inner.data_type().clone(),
-                    &data_arrays,
-                )
-                .unwrap();
-
-            fields.push(data_field);
-            columns.push(as_array_ref(data_field_array));
+                    let data_field = ArrowField::new(
+                        field.name().clone(),
+                        ArrowDataType::List(Arc::new(data_field_inner.clone())),
+                        false, /* not nullable */
+                    )
+                    .with_metadata(TransportChunk::field_metadata_data_column());
+
+                    let mut sliced: Vec<ArrowArrayRef> = Vec::new();
+                    for idx in 0..data.len() {
+                        sliced.push(data.clone().slice(idx, 1));
+                    }
+
+                    let data_arrays = sliced.iter().map(|e| Some(e.as_ref())).collect::<Vec<_>>();
+                    #[allow(clippy::unwrap_used)] // we know we've given the right field type
+                    let data_field_array: arrow::array::ListArray =
+                        re_arrow_util::arrow_util::arrays_to_list_array(
+                            data_field_inner.data_type().clone(),
+                            &data_arrays,
+                        )
+                        .unwrap();
+
+                    fields.push(data_field);
+                    columns.push(as_array_ref(data_field_array));
+                }
+                Some(kind) => {
+                    return Err(StreamError::ChunkError(re_chunk::ChunkError::Malformed {
+                        reason: format!("Unknown rerun.kind {kind:?}"),
+                    }));
+                }
+                None => {
+                    return Err(StreamError::ChunkError(re_chunk::ChunkError::Malformed {
+                        reason: "Missing or unknown rerun.kind".to_owned(),
+                    }));
+                }
+            }
         }
 
         let schema = {

From 3a51b621fece7c89b3c88f1b6dc2e0e3920ffd8c Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 15:12:19 +0100
Subject: [PATCH 19/38] Remove `TransportChunk` a concrete type

---
 crates/store/re_chunk/src/transport.rs        | 169 +-----------------
 .../formatting__format_chunk_store.snap       |  36 ++--
 2 files changed, 23 insertions(+), 182 deletions(-)

diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index a931c5c46b73..a38c2dbeb4b7 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -1,9 +1,6 @@
 use arrow::{
-    array::{
-        Array as ArrowArray, ArrayRef as ArrowArrayRef, ListArray as ArrowListArray,
-        RecordBatch as ArrowRecordBatch,
-    },
-    datatypes::{Field as ArrowField, Fields as ArrowFields},
+    array::{Array as ArrowArray, ListArray as ArrowListArray, RecordBatch as ArrowRecordBatch},
+    datatypes::Field as ArrowField,
 };
 use itertools::Itertools;
 use nohash_hasher::IntMap;
@@ -19,7 +16,7 @@ pub type ArrowMetadata = std::collections::HashMap<String, String>;
 
 // ---
 
-/// A [`Chunk`] that is ready for transport. Obtained by calling [`Chunk::to_transport`].
+/// A [`Chunk`] that is ready for transport.
 ///
 /// It contains a schema with a matching number of columns, all of the same length.
 ///
@@ -35,46 +32,7 @@ pub type ArrowMetadata = std::collections::HashMap<String, String>;
 /// claiming to be sorted while it is in fact not).
 // TODO(emilk): remove this, and replace it with a trait extension type for `ArrowRecordBatch`.
 #[derive(Debug, Clone)]
-pub struct TransportChunk {
-    batch: ArrowRecordBatch,
-}
-
-impl std::fmt::Display for TransportChunk {
-    #[inline]
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        re_format_arrow::format_record_batch_with_width(self, f.width()).fmt(f)
-    }
-}
-
-impl AsRef<ArrowRecordBatch> for TransportChunk {
-    #[inline]
-    fn as_ref(&self) -> &ArrowRecordBatch {
-        &self.batch
-    }
-}
-
-impl std::ops::Deref for TransportChunk {
-    type Target = ArrowRecordBatch;
-
-    #[inline]
-    fn deref(&self) -> &ArrowRecordBatch {
-        &self.batch
-    }
-}
-
-impl From<ArrowRecordBatch> for TransportChunk {
-    #[inline]
-    fn from(batch: ArrowRecordBatch) -> Self {
-        Self { batch }
-    }
-}
-
-impl From<TransportChunk> for ArrowRecordBatch {
-    #[inline]
-    fn from(chunk: TransportChunk) -> Self {
-        chunk.batch
-    }
-}
+pub struct TransportChunk {}
 
 // TODO(#6572): Relying on Arrow's native schema metadata feature is bound to fail, we need to
 // switch to something more powerful asap.
@@ -265,125 +223,6 @@ impl TransportChunk {
     }
 }
 
-impl TransportChunk {
-    #[inline]
-    pub fn id(&self) -> ChunkResult<ChunkId> {
-        if let Some(id) = self.metadata().get(Self::CHUNK_METADATA_KEY_ID) {
-            let id = u128::from_str_radix(id, 16).map_err(|err| ChunkError::Malformed {
-                reason: format!("cannot deserialize chunk id: {err}"),
-            })?;
-            Ok(ChunkId::from_u128(id))
-        } else {
-            Err(crate::ChunkError::Malformed {
-                reason: format!("chunk id missing from metadata ({:?})", self.metadata()),
-            })
-        }
-    }
-
-    #[inline]
-    pub fn entity_path(&self) -> ChunkResult<EntityPath> {
-        match self
-            .schema_ref()
-            .metadata
-            .get(Self::CHUNK_METADATA_KEY_ENTITY_PATH)
-        {
-            Some(entity_path) => Ok(EntityPath::parse_forgiving(entity_path)),
-            None => Err(crate::ChunkError::Malformed {
-                reason: format!(
-                    "entity path missing from metadata ({:?})",
-                    self.schema_ref().metadata
-                ),
-            }),
-        }
-    }
-
-    /// The size in bytes of the data, once loaded in memory, in chunk-level.
-    ///
-    /// This is stored in the metadata. Returns `None` if that key is not set.
-    #[inline]
-    pub fn heap_size_bytes(&self) -> Option<u64> {
-        self.metadata()
-            .get(Self::CHUNK_METADATA_KEY_HEAP_SIZE_BYTES)
-            .and_then(|s| s.parse::<u64>().ok())
-    }
-
-    #[inline]
-    pub fn fields(&self) -> &ArrowFields {
-        &self.schema_ref().fields
-    }
-
-    pub fn metadata(&self) -> &std::collections::HashMap<String, String> {
-        &self.batch.schema_ref().metadata
-    }
-
-    /// Looks in the chunk metadata for the `IS_SORTED` marker.
-    ///
-    /// It is possible that a chunk is sorted but didn't set that marker.
-    /// This is fine, although wasteful.
-    #[inline]
-    pub fn is_sorted(&self) -> bool {
-        self.metadata()
-            .contains_key(Self::CHUNK_METADATA_MARKER_IS_SORTED_BY_ROW_ID)
-    }
-
-    /// Iterates all columns of the specified `kind`.
-    ///
-    /// See:
-    /// * [`Self::FIELD_METADATA_VALUE_KIND_TIME`]
-    /// * [`Self::FIELD_METADATA_VALUE_KIND_CONTROL`]
-    /// * [`Self::FIELD_METADATA_VALUE_KIND_DATA`]
-    #[inline]
-    fn columns_of_kind<'a>(
-        &'a self,
-        kind: &'a str,
-    ) -> impl Iterator<Item = (&'a ArrowField, &'a ArrowArrayRef)> + 'a {
-        self.fields().iter().enumerate().filter_map(|(i, field)| {
-            let actual_kind = field.metadata().get(Self::FIELD_METADATA_KEY_KIND);
-            (actual_kind.map(|s| s.as_str()) == Some(kind))
-                .then(|| {
-                    self.batch
-                        .columns()
-                        .get(i)
-                        .map(|column| (field.as_ref(), column))
-                })
-                .flatten()
-        })
-    }
-
-    /// Iterates all control columns present in this chunk.
-    #[inline]
-    pub fn controls(&self) -> impl Iterator<Item = (&ArrowField, &ArrowArrayRef)> {
-        self.columns_of_kind(Self::FIELD_METADATA_VALUE_KIND_CONTROL)
-    }
-
-    /// Iterates all data columns present in this chunk.
-    #[inline]
-    pub fn components(&self) -> impl Iterator<Item = (&ArrowField, &ArrowArrayRef)> {
-        self.columns_of_kind(Self::FIELD_METADATA_VALUE_KIND_DATA)
-    }
-
-    /// Iterates all timeline columns present in this chunk.
-    #[inline]
-    pub fn timelines(&self) -> impl Iterator<Item = (&ArrowField, &ArrowArrayRef)> {
-        self.columns_of_kind(Self::FIELD_METADATA_VALUE_KIND_TIME)
-    }
-
-    #[inline]
-    pub fn num_controls(&self) -> usize {
-        self.controls().count()
-    }
-
-    #[inline]
-    pub fn num_timelines(&self) -> usize {
-        self.timelines().count()
-    }
-
-    #[inline]
-    pub fn num_components(&self) -> usize {
-        self.components().count()
-    }
-}
-
 impl Chunk {
     /// Prepare the [`Chunk`] for transport.
     ///
diff --git a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap
index 229e882a9439..690ec61f453b 100644
--- a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap
+++ b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap
@@ -12,22 +12,24 @@ ChunkStore {
         num_events: 2
     }
     chunks: [
-        ┌──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
-        │ CHUNK METADATA:                                                                                                              │
-        │ * entity_path: "/this/that"                                                                                                  │
-        │ * heap_size_bytes: "1072"                                                                                                    │
-        │ * id: "661EFDF2E3B19F7C045F15"                                                                                               │
-        │ * is_sorted: ""                                                                                                              │
-        ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
-        │ ┌──────────────────────────────────┬───────────────┬───────────────────────────────┬───────────────────┬───────────────────┐ │
-        │ │ RowId                            ┆ frame_nr      ┆ log_time                      ┆ example.MyColor   ┆ example.MyIndex   │ │
-        │ │ ---                              ┆ ---           ┆ ---                           ┆ ---               ┆ ---               │ │
-        │ │ type: "Struct[2]"                ┆ type: "i64"   ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]" ┆ type: "List[u64]" │ │
-        │ │ ARROW:extension:name: "TUID"     ┆ is_sorted: "" ┆ is_sorted: ""                 ┆ kind: "data"      ┆ kind: "data"      │ │
-        │ │ kind: "control"                  ┆ kind: "time"  ┆ kind: "time"                  ┆                   ┆                   │ │
-        │ ╞══════════════════════════════════╪═══════════════╪═══════════════════════════════╪═══════════════════╪═══════════════════╡ │
-        │ │ 0000000067816A6BB4B8C1254D40007B ┆ 1             ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2]         ┆ [0, 1, 2]         │ │
-        │ └──────────────────────────────────┴───────────────┴───────────────────────────────┴───────────────────┴───────────────────┘ │
-        └──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+        ┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+        │ CHUNK METADATA:                                                                                                                                             │
+        │ * entity_path: "/this/that"                                                                                                                                 │
+        │ * heap_size_bytes: "1072"                                                                                                                                   │
+        │ * id: "661EFDF2E3B19F7C045F15"                                                                                                                              │
+        │ * is_sorted: "true"                                                                                                                                         │
+        │ * version: "1"                                                                                                                                              │
+        ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+        │ ┌──────────────────────────────────┬────────────────────────┬───────────────────────────────┬──────────────────────────────┬──────────────────────────────┐ │
+        │ │ RowId                            ┆ frame_nr               ┆ log_time                      ┆ /this/that:example.MyColor   ┆ /this/that:example.MyIndex   │ │
+        │ │ ---                              ┆ ---                    ┆ ---                           ┆ ---                          ┆ ---                          │ │
+        │ │ type: "Struct[2]"                ┆ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[u64]"            │ │
+        │ │ ARROW:extension:name: "TUID"     ┆ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyIndex" │ │
+        │ │ kind: "control"                  ┆ is_sorted: "true"      ┆ is_sorted: "true"             ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
+        │ │                                  ┆ kind: "index"          ┆ kind: "index"                 ┆ kind: "data"                 ┆ kind: "data"                 │ │
+        │ ╞══════════════════════════════════╪════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
+        │ │ 0000000067816A6BB4B8C1254D40007B ┆ 1                      ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2]                    ┆ [0, 1, 2]                    │ │
+        │ └──────────────────────────────────┴────────────────────────┴───────────────────────────────┴──────────────────────────────┴──────────────────────────────┘ │
+        └─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
     ]
 }

From f01efda8e19cd9af8f42b285794e76793d1a98e3 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 15:56:20 +0100
Subject: [PATCH 20/38] Remove the last of TransportChunk

---
 Cargo.lock                                    |   3 +
 crates/store/re_chunk/src/chunk.rs            |   2 +-
 crates/store/re_chunk/src/lib.rs              |   1 -
 crates/store/re_chunk/src/transport.rs        | 207 ------------------
 crates/store/re_dataframe/src/lib.rs          |   2 -
 crates/store/re_grpc_client/Cargo.toml        |   1 +
 crates/store/re_grpc_client/src/lib.rs        | 111 ++--------
 .../tests/arrow_encode_roundtrip.rs           |   2 +
 crates/store/re_sorbet/Cargo.toml             |   2 +
 crates/store/re_sorbet/src/chunk_batch.rs     |  78 ++++++-
 crates/store/re_sorbet/src/chunk_schema.rs    |  12 +-
 .../store/re_sorbet/src/data_column_schema.rs |   2 +
 .../re_sorbet/src/row_id_column_schema.rs     |   7 +-
 crates/top/re_sdk/src/lib.rs                  |   1 -
 14 files changed, 126 insertions(+), 305 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index c7dfecccad65..bcdcb0c7d891 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6154,6 +6154,7 @@ dependencies = [
  "re_arrow_util",
  "re_chunk",
  "re_error",
+ "re_format_arrow",
  "re_log",
  "re_log_encoding",
  "re_log_types",
@@ -6590,9 +6591,11 @@ version = "0.23.0-alpha.1+dev"
 dependencies = [
  "arrow",
  "itertools 0.13.0",
+ "re_arrow_util",
  "re_format_arrow",
  "re_log",
  "re_log_types",
+ "re_tracing",
  "re_tuid",
  "re_types_core",
  "thiserror 1.0.65",
diff --git a/crates/store/re_chunk/src/chunk.rs b/crates/store/re_chunk/src/chunk.rs
index 93105f2c19bd..287649b4ee65 100644
--- a/crates/store/re_chunk/src/chunk.rs
+++ b/crates/store/re_chunk/src/chunk.rs
@@ -182,7 +182,7 @@ impl FromIterator<(ComponentName, ArrowListArray)> for ChunkComponents {
 /// Its time columns might or might not be ascendingly sorted, depending on how the data was logged.
 ///
 /// This is the in-memory representation of a chunk, optimized for efficient manipulation of the
-/// data within. For transport, see [`crate::TransportChunk`] instead.
+/// data within. For transport, see [`re_sorbet::ChunkBatch`] instead.
 #[derive(Debug)]
 pub struct Chunk {
     pub(crate) id: ChunkId,
diff --git a/crates/store/re_chunk/src/lib.rs b/crates/store/re_chunk/src/lib.rs
index f0792e930cec..38e155e68ff7 100644
--- a/crates/store/re_chunk/src/lib.rs
+++ b/crates/store/re_chunk/src/lib.rs
@@ -29,7 +29,6 @@ pub use self::iter::{
 };
 pub use self::latest_at::LatestAtQuery;
 pub use self::range::{RangeQuery, RangeQueryOptions};
-pub use self::transport::TransportChunk;
 
 #[cfg(not(target_arch = "wasm32"))]
 pub use self::batcher::{
diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index a38c2dbeb4b7..ae0acdd98bdd 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -16,213 +16,6 @@ pub type ArrowMetadata = std::collections::HashMap<String, String>;
 
 // ---
 
-/// A [`Chunk`] that is ready for transport.
-///
-/// It contains a schema with a matching number of columns, all of the same length.
-///
-/// This is just a wrapper around an [`ArrowRecordBatch`], with some helper functions on top.
-/// It can be converted to and from [`ArrowRecordBatch`] without overhead.
-///
-/// Use the `Display` implementation to dump the chunk as a nicely formatted table.
-///
-/// This has a stable ABI! The entire point of this type is to allow users to send raw arrow data
-/// into Rerun.
-/// This means we have to be very careful when checking the validity of the data: slipping corrupt
-/// data into the store could silently break all the index search logic (e.g. think of a chunk
-/// claiming to be sorted while it is in fact not).
-// TODO(emilk): remove this, and replace it with a trait extension type for `ArrowRecordBatch`.
-#[derive(Debug, Clone)]
-pub struct TransportChunk {}
-
-// TODO(#6572): Relying on Arrow's native schema metadata feature is bound to fail, we need to
-// switch to something more powerful asap.
-impl TransportChunk {
-    /// The key used to identify a Rerun [`ChunkId`] in chunk-level [`ArrowSchema`] metadata.
-    pub const CHUNK_METADATA_KEY_ID: &'static str = "rerun.id";
-
-    /// The key used to identify a Rerun [`EntityPath`] in chunk-level [`ArrowSchema`] metadata.
-    pub const CHUNK_METADATA_KEY_ENTITY_PATH: &'static str = "rerun.entity_path";
-
-    /// The key used to identify the size in bytes of the data, once loaded in memory, in chunk-level
-    /// [`ArrowSchema`] metadata.
-    pub const CHUNK_METADATA_KEY_HEAP_SIZE_BYTES: &'static str = "rerun.heap_size_bytes";
-
-    /// The marker used to identify whether a chunk is sorted in chunk-level [`ArrowSchema`] metadata.
-    ///
-    /// The associated value is irrelevant -- if this marker is present, then it is true.
-    ///
-    /// Chunks are ascendingly sorted by their `RowId` column.
-    pub const CHUNK_METADATA_MARKER_IS_SORTED_BY_ROW_ID: &'static str = "rerun.is_sorted";
-
-    /// The key used to identify the kind of a Rerun column in field-level [`ArrowSchema`] metadata.
-    ///
-    /// That is: control columns (e.g. `row_id`), time columns or component columns.
-    pub const FIELD_METADATA_KEY_KIND: &'static str = "rerun.kind";
-
-    /// The value used to identify a Rerun time column in field-level [`ArrowSchema`] metadata.
-    pub const FIELD_METADATA_VALUE_KIND_TIME: &'static str = "time";
-
-    /// The value used to identify a Rerun control column in field-level [`ArrowSchema`] metadata.
-    pub const FIELD_METADATA_VALUE_KIND_CONTROL: &'static str = "control";
-
-    /// The value used to identify a Rerun data column in field-level [`ArrowSchema`] metadata.
-    pub const FIELD_METADATA_VALUE_KIND_DATA: &'static str = "data";
-
-    /// The key used to identify the [`crate::ArchetypeName`] in field-level [`ArrowSchema`] metadata.
-    pub const FIELD_METADATA_KEY_ARCHETYPE_NAME: &'static str = "rerun.archetype_name";
-
-    /// The key used to identify the [`crate::ArchetypeFieldName`] in field-level [`ArrowSchema`] metadata.
-    pub const FIELD_METADATA_KEY_ARCHETYPE_FIELD_NAME: &'static str = "rerun.archetype_field_name";
-
-    /// The marker used to identify whether a column is sorted in field-level [`ArrowSchema`] metadata.
-    ///
-    /// The associated value is irrelevant -- if this marker is present, then it is true.
-    ///
-    /// Chunks are ascendingly sorted by their `RowId` column but, depending on whether the data
-    /// was logged out of order or not for a given time column, that column might follow the global
-    /// `RowId` while still being unsorted relative to its own time order.
-    pub const FIELD_METADATA_MARKER_IS_SORTED_BY_TIME: &'static str =
-        Self::CHUNK_METADATA_MARKER_IS_SORTED_BY_ROW_ID;
-
-    /// Returns the appropriate chunk-level [`ArrowSchema`] metadata for a Rerun [`ChunkId`].
-    #[inline]
-    pub fn chunk_metadata_id(id: ChunkId) -> ArrowMetadata {
-        [
-            (
-                Self::CHUNK_METADATA_KEY_ID.to_owned(),
-                format!("{:X}", id.as_u128()),
-            ), //
-        ]
-        .into()
-    }
-
-    /// Returns the appropriate chunk-level [`ArrowSchema`] metadata for the in-memory size in bytes.
-    #[inline]
-    pub fn chunk_metadata_heap_size_bytes(heap_size_bytes: u64) -> ArrowMetadata {
-        [
-            (
-                Self::CHUNK_METADATA_KEY_HEAP_SIZE_BYTES.to_owned(),
-                heap_size_bytes.to_string(),
-            ), //
-        ]
-        .into()
-    }
-
-    /// Returns the appropriate chunk-level [`ArrowSchema`] metadata for a Rerun [`EntityPath`].
-    #[inline]
-    pub fn chunk_metadata_entity_path(entity_path: &EntityPath) -> ArrowMetadata {
-        [
-            (
-                Self::CHUNK_METADATA_KEY_ENTITY_PATH.to_owned(),
-                entity_path.to_string(),
-            ), //
-        ]
-        .into()
-    }
-
-    /// Returns the appropriate chunk-level [`ArrowSchema`] metadata for an `IS_SORTED` marker.
-    #[inline]
-    pub fn chunk_metadata_is_sorted() -> ArrowMetadata {
-        [
-            (
-                Self::CHUNK_METADATA_MARKER_IS_SORTED_BY_ROW_ID.to_owned(),
-                String::new(),
-            ), //
-        ]
-        .into()
-    }
-
-    /// Returns the appropriate field-level [`ArrowSchema`] metadata for a Rerun time column.
-    #[inline]
-    pub fn field_metadata_time_column() -> ArrowMetadata {
-        [
-            (
-                Self::FIELD_METADATA_KEY_KIND.to_owned(),
-                Self::FIELD_METADATA_VALUE_KIND_TIME.to_owned(),
-            ), //
-        ]
-        .into()
-    }
-
-    /// Returns the appropriate field-level [`ArrowSchema`] metadata for a Rerun control column.
-    #[inline]
-    pub fn field_metadata_control_column() -> ArrowMetadata {
-        [
-            (
-                Self::FIELD_METADATA_KEY_KIND.to_owned(),
-                Self::FIELD_METADATA_VALUE_KIND_CONTROL.to_owned(),
-            ), //
-        ]
-        .into()
-    }
-
-    /// Returns the appropriate field-level [`ArrowSchema`] metadata for a Rerun data column.
-    #[inline]
-    pub fn field_metadata_data_column() -> ArrowMetadata {
-        [
-            (
-                Self::FIELD_METADATA_KEY_KIND.to_owned(),
-                Self::FIELD_METADATA_VALUE_KIND_DATA.to_owned(),
-            ), //
-        ]
-        .into()
-    }
-
-    /// Returns the appropriate field-level [`ArrowSchema`] metadata for an `IS_SORTED` marker.
-    #[inline]
-    pub fn field_metadata_is_sorted() -> ArrowMetadata {
-        [
-            (
-                Self::FIELD_METADATA_MARKER_IS_SORTED_BY_TIME.to_owned(),
-                String::new(),
-            ), //
-        ]
-        .into()
-    }
-
-    #[inline]
-    pub fn field_metadata_component_descriptor(
-        component_desc: &ComponentDescriptor,
-    ) -> ArrowMetadata {
-        component_desc
-            .archetype_name
-            .iter()
-            .copied()
-            .map(|archetype_name| {
-                (
-                    Self::FIELD_METADATA_KEY_ARCHETYPE_NAME.to_owned(),
-                    archetype_name.to_string(),
-                )
-            })
-            .chain(component_desc.archetype_field_name.iter().copied().map(
-                |archetype_field_name| {
-                    (
-                        Self::FIELD_METADATA_KEY_ARCHETYPE_FIELD_NAME.to_owned(),
-                        archetype_field_name.to_string(),
-                    )
-                },
-            ))
-            .collect()
-    }
-
-    #[inline]
-    pub fn component_descriptor_from_field(field: &ArrowField) -> ComponentDescriptor {
-        ComponentDescriptor {
-            archetype_name: field
-                .metadata()
-                .get(Self::FIELD_METADATA_KEY_ARCHETYPE_NAME)
-                .cloned()
-                .map(Into::into),
-            component_name: field.name().clone().into(),
-            archetype_field_name: field
-                .metadata()
-                .get(Self::FIELD_METADATA_KEY_ARCHETYPE_FIELD_NAME)
-                .cloned()
-                .map(Into::into),
-        }
-    }
-}
-
 impl Chunk {
     /// Prepare the [`Chunk`] for transport.
     ///
diff --git a/crates/store/re_dataframe/src/lib.rs b/crates/store/re_dataframe/src/lib.rs
index ab7a5b2c60ce..44f1e891dcd7 100644
--- a/crates/store/re_dataframe/src/lib.rs
+++ b/crates/store/re_dataframe/src/lib.rs
@@ -6,8 +6,6 @@ mod query;
 pub use self::engine::QueryEngine;
 pub use self::query::QueryHandle;
 
-#[doc(no_inline)]
-pub use self::external::re_chunk::TransportChunk;
 #[doc(no_inline)]
 pub use self::external::re_chunk_store::{
     ChunkStoreConfig, ChunkStoreHandle, ColumnSelector, ComponentColumnSelector, Index, IndexRange,
diff --git a/crates/store/re_grpc_client/Cargo.toml b/crates/store/re_grpc_client/Cargo.toml
index 256a29fc1c00..106b79cbf653 100644
--- a/crates/store/re_grpc_client/Cargo.toml
+++ b/crates/store/re_grpc_client/Cargo.toml
@@ -23,6 +23,7 @@ all-features = true
 re_arrow_util.workspace = true
 re_chunk.workspace = true
 re_error.workspace = true
+re_format_arrow.workspace = true
 re_log.workspace = true
 re_log_encoding = { workspace = true, features = ["encoder", "decoder"] }
 re_log_types.workspace = true
diff --git a/crates/store/re_grpc_client/src/lib.rs b/crates/store/re_grpc_client/src/lib.rs
index f456cdbd8f1b..f222510b7bed 100644
--- a/crates/store/re_grpc_client/src/lib.rs
+++ b/crates/store/re_grpc_client/src/lib.rs
@@ -14,7 +14,7 @@ use arrow::{
 use url::Url;
 
 use re_arrow_util::ArrowArrayDowncastRef as _;
-use re_chunk::{Chunk, ChunkBuilder, ChunkId, EntityPath, RowId, Timeline, TransportChunk};
+use re_chunk::{Chunk, ChunkBuilder, ChunkId, EntityPath, RowId, Timeline};
 use re_log_encoding::codec::{wire::decoder::Decode, CodecError};
 use re_log_types::{
     external::re_types_core::ComponentDescriptor, ApplicationId, BlueprintActivationCommand,
@@ -96,6 +96,9 @@ pub enum StreamError {
 
     #[error("Invalid URI: {0}")]
     InvalidUri(String),
+
+    #[error(transparent)]
+    InvalidChunkSchema(#[from] re_sorbet::InvalidChunkSchema),
 }
 
 // ----------------------------------------------------------------------------
@@ -399,99 +402,31 @@ async fn stream_catalog_async(
 
     re_log::info!("Starting to read...");
     while let Some(result) = resp.next().await {
-        let record_batch = result.map_err(TonicStatusError)?;
-
-        // Catalog received from the ReDap server isn't suitable for direct conversion to a Rerun Chunk:
-        // - conversion expects "data" columns to be ListArrays, hence we need to convert any individual row column data to ListArray
-        // - conversion expects the input TransportChunk to have a ChunkId so we need to add that piece of metadata
-
-        let mut fields: Vec<ArrowField> = Vec::new();
-        let mut columns: Vec<ArrowArrayRef> = Vec::new();
+        let entity_path = EntityPath::parse_forgiving("catalog");
 
-        for (field, data) in
-            itertools::izip!(record_batch.schema().fields(), record_batch.columns())
-        {
-            // TODO: move this switch to re_sorbet
-            match field.metadata().get("rerun.kind").map(|s| s.as_str()) {
-                Some("control") => {
-                    fields.push(
-                        ArrowField::new(
-                            RowId::name().to_string(), // need to rename to Rerun Chunk expected control field
-                            field.data_type().clone(),
-                            false, /* not nullable */
-                        )
-                        .with_metadata(TransportChunk::field_metadata_control_column()),
-                    );
-
-                    columns.push(data.clone());
-                }
-                Some("index" | "time") => {
-                    fields.push(ArrowField::clone(field.as_ref()));
-                    columns.push(data.clone());
-                }
-                Some("data" | "component") => {
-                    // We slice each column array into individual arrays and then convert the whole lot into a ListArray
+        let record_batch = result.map_err(TonicStatusError)?;
 
-                    let data_field_inner = ArrowField::new(
-                        "item",
-                        field.data_type().clone(),
-                        true, /* nullable */
-                    );
+        let mut metadata = record_batch.schema_ref().metadata.clone();
 
-                    let data_field = ArrowField::new(
-                        field.name().clone(),
-                        ArrowDataType::List(Arc::new(data_field_inner.clone())),
-                        false, /* not nullable */
-                    )
-                    .with_metadata(TransportChunk::field_metadata_data_column());
-
-                    let mut sliced: Vec<ArrowArrayRef> = Vec::new();
-                    for idx in 0..data.len() {
-                        sliced.push(data.clone().slice(idx, 1));
-                    }
-
-                    let data_arrays = sliced.iter().map(|e| Some(e.as_ref())).collect::<Vec<_>>();
-                    #[allow(clippy::unwrap_used)] // we know we've given the right field type
-                    let data_field_array: arrow::array::ListArray =
-                        re_arrow_util::arrow_util::arrays_to_list_array(
-                            data_field_inner.data_type().clone(),
-                            &data_arrays,
-                        )
-                        .unwrap();
-
-                    fields.push(data_field);
-                    columns.push(as_array_ref(data_field_array));
-                }
-                Some(kind) => {
-                    return Err(StreamError::ChunkError(re_chunk::ChunkError::Malformed {
-                        reason: format!("Unknown rerun.kind {kind:?}"),
-                    }));
-                }
-                None => {
-                    return Err(StreamError::ChunkError(re_chunk::ChunkError::Malformed {
-                        reason: "Missing or unknown rerun.kind".to_owned(),
-                    }));
-                }
+        for (key, value) in [
+            re_sorbet::ChunkSchema::chunk_id_metadata(&ChunkId::new()),
+            re_sorbet::ChunkSchema::entity_path_metadata(&entity_path),
+        ] {
+            if !metadata.contains_key(&key) {
+                metadata.insert(key, value);
             }
         }
 
-        let schema = {
-            let metadata = HashMap::from_iter([
-                (
-                    TransportChunk::CHUNK_METADATA_KEY_ENTITY_PATH.to_owned(),
-                    "catalog".to_owned(),
-                ),
-                (
-                    TransportChunk::CHUNK_METADATA_KEY_ID.to_owned(),
-                    ChunkId::new().to_string(),
-                ),
-            ]);
-            arrow::datatypes::Schema::new_with_metadata(fields, metadata)
-        };
-
-        let record_batch = ArrowRecordBatch::try_new(schema.into(), columns)
-            .map_err(re_chunk::ChunkError::from)?;
-        let mut chunk = Chunk::from_record_batch(record_batch)?;
+        let schema_with_more_metadata = arrow::datatypes::Schema::clone(record_batch.schema_ref())
+            .with_metadata(metadata)
+            .into();
+        let record_batch = record_batch
+            .with_schema(schema_with_more_metadata)
+            .expect("Can't fail, because we only added metadata");
+
+        let chunk_batch = re_sorbet::ChunkBatch::try_from(record_batch.clone())?;
+
+        let mut chunk = Chunk::from_chunk_batch(&chunk_batch)?;
 
         // finally, enrich catalog data with RecordingUri that's based on the ReDap endpoint (that we know)
         // and the recording id (that we have in the catalog data)
diff --git a/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs b/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs
index 2e80fc16a2f9..80eb680b915f 100644
--- a/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs
+++ b/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs
@@ -1,3 +1,5 @@
+use similar_asserts::assert_eq;
+
 use re_build_info::CrateVersion;
 use re_chunk::{Chunk, RowId, TimePoint, Timeline};
 use re_log_encoding::{
diff --git a/crates/store/re_sorbet/Cargo.toml b/crates/store/re_sorbet/Cargo.toml
index 929fcfa20188..136f3f5282b0 100644
--- a/crates/store/re_sorbet/Cargo.toml
+++ b/crates/store/re_sorbet/Cargo.toml
@@ -20,9 +20,11 @@ all-features = true
 
 
 [dependencies]
+re_arrow_util.workspace = true
 re_format_arrow.workspace = true
 re_log_types.workspace = true
 re_log.workspace = true
+re_tracing.workspace = true
 re_tuid.workspace = true
 re_types_core.workspace = true
 
diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index 54377d75d5e7..e9b6a4fcbb3f 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -1,11 +1,17 @@
+use std::sync::Arc;
+
 use arrow::{
     array::{
         Array as _, ArrayRef as ArrowArrayRef, AsArray, RecordBatch as ArrowRecordBatch,
         RecordBatchOptions, StructArray as ArrowStructArray,
     },
-    datatypes::{Fields as ArrowFields, Schema as ArrowSchema},
+    datatypes::{
+        DataType as ArrowDataType, Field as ArrowField, FieldRef as ArrowFieldRef,
+        Fields as ArrowFields, Schema as ArrowSchema,
+    },
 };
 
+use re_arrow_util::{arrow_util::into_arrow_ref, ArrowArrayDowncastRef};
 use re_log_types::EntityPath;
 use re_types_core::ChunkId;
 
@@ -225,7 +231,12 @@ impl From<&ChunkBatch> for ArrowRecordBatch {
 impl TryFrom<ArrowRecordBatch> for ChunkBatch {
     type Error = InvalidChunkSchema;
 
+    /// Will automatically wrap data columns in `ListArrays` if they are not already.
     fn try_from(batch: ArrowRecordBatch) -> Result<Self, Self::Error> {
+        re_tracing::profile_function!();
+
+        let batch = make_all_data_columns_list_arrays(&batch);
+
         let chunk_schema = ChunkSchema::try_from(batch.schema_ref().as_ref())?;
 
         // TODO: sanity check that the schema matches the columns
@@ -249,3 +260,68 @@ impl TryFrom<ArrowRecordBatch> for ChunkBatch {
         })
     }
 }
+
+/// Make sure all data columns are `ListArrays`.
+fn make_all_data_columns_list_arrays(batch: &ArrowRecordBatch) -> ArrowRecordBatch {
+    re_tracing::profile_function!();
+
+    let num_columns = batch.num_columns();
+    let mut fields: Vec<ArrowFieldRef> = Vec::with_capacity(num_columns);
+    let mut columns: Vec<ArrowArrayRef> = Vec::with_capacity(num_columns);
+
+    for (field, data) in itertools::izip!(batch.schema().fields(), batch.columns()) {
+        if data
+            .downcast_array_ref::<arrow::array::ListArray>()
+            .is_some()
+        {
+            // Already fine
+            fields.push(field.clone());
+            columns.push(data.clone());
+        } else if field
+            .metadata()
+            .get("rerun.kind")
+            .is_some_and(|kind| kind == "data")
+        {
+            // We slice each column array into individual arrays and then convert the whole lot into a ListArray
+
+            let data_field_inner =
+                ArrowField::new("item", field.data_type().clone(), true /* nullable */);
+
+            let data_field = ArrowField::new(
+                field.name().clone(),
+                ArrowDataType::List(Arc::new(data_field_inner.clone())),
+                false, /* not nullable */
+            )
+            .with_metadata(field.metadata().clone());
+
+            let mut sliced: Vec<ArrowArrayRef> = Vec::new();
+            for idx in 0..data.len() {
+                sliced.push(data.clone().slice(idx, 1));
+            }
+
+            let data_arrays = sliced.iter().map(|e| Some(e.as_ref())).collect::<Vec<_>>();
+            #[allow(clippy::unwrap_used)] // we know we've given the right field type
+            let list_array: arrow::array::ListArray =
+                re_arrow_util::arrow_util::arrays_to_list_array(
+                    data_field_inner.data_type().clone(),
+                    &data_arrays,
+                )
+                .unwrap();
+
+            fields.push(data_field.into());
+            columns.push(into_arrow_ref(list_array));
+        } else {
+            fields.push(field.clone());
+            columns.push(data.clone());
+        }
+    }
+
+    let schema = ArrowSchema::new_with_metadata(fields, batch.schema().metadata.clone());
+
+    ArrowRecordBatch::try_new_with_options(
+        schema.into(),
+        columns,
+        &RecordBatchOptions::default().with_row_count(Some(batch.num_rows())),
+    )
+    .unwrap()
+}
diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs
index b03abe34f6ca..46e4c7a6efe0 100644
--- a/crates/store/re_sorbet/src/chunk_schema.rs
+++ b/crates/store/re_sorbet/src/chunk_schema.rs
@@ -141,6 +141,14 @@ impl ChunkSchema {
         1 + self.index_columns.len() + self.data_columns.len()
     }
 
+    pub fn chunk_id_metadata(chunk_id: &ChunkId) -> (String, String) {
+        ("rerun.id".to_owned(), format!("{:X}", chunk_id.as_u128()))
+    }
+
+    pub fn entity_path_metadata(entity_path: &EntityPath) -> (String, String) {
+        ("rerun.entity_path".to_owned(), entity_path.to_string())
+    }
+
     pub fn arrow_batch_metadata(&self) -> ArrowBatchMetadata {
         let Self {
             chunk_id,
@@ -157,8 +165,8 @@ impl ChunkSchema {
                 Self::CHUNK_METADATA_KEY_VERSION.to_owned(),
                 Self::CHUNK_METADATA_VERSION.to_owned(),
             ),
-            ("rerun.id".to_owned(), format!("{:X}", chunk_id.as_u128())),
-            ("rerun.entity_path".to_owned(), entity_path.to_string()),
+            Self::chunk_id_metadata(chunk_id),
+            Self::entity_path_metadata(entity_path),
         ]);
         if let Some(heap_size_bytes) = heap_size_bytes {
             arrow_metadata.insert(
diff --git a/crates/store/re_sorbet/src/data_column_schema.rs b/crates/store/re_sorbet/src/data_column_schema.rs
index 45f886886448..b2bc95df19d2 100644
--- a/crates/store/re_sorbet/src/data_column_schema.rs
+++ b/crates/store/re_sorbet/src/data_column_schema.rs
@@ -243,6 +243,8 @@ impl ComponentColumnDescriptor {
             ComponentName::new(field.name()) // Legacy fallback
         };
 
+        // TODO: check that the data type is a list array and at least warn if it is not
+
         Ok(Self {
             store_datatype: field.data_type().clone(),
             entity_path,
diff --git a/crates/store/re_sorbet/src/row_id_column_schema.rs b/crates/store/re_sorbet/src/row_id_column_schema.rs
index fae2a3b84f11..5a9007f0b458 100644
--- a/crates/store/re_sorbet/src/row_id_column_schema.rs
+++ b/crates/store/re_sorbet/src/row_id_column_schema.rs
@@ -29,12 +29,14 @@ impl WrongDatatypeError {
 pub struct RowIdColumnDescriptor {}
 
 impl RowIdColumnDescriptor {
+    pub fn new() -> Self {
+        Self {}
+    }
+
     #[inline]
     pub fn to_arrow_field(&self) -> ArrowField {
         let Self {} = self;
 
-        let nullable = false; // All rows has an id
-
         let metadata = [
             Some(("rerun.kind".to_owned(), "control".to_owned())),
             // This ensures the RowId/Tuid is formatted correctly:
@@ -47,6 +49,7 @@ impl RowIdColumnDescriptor {
         .flatten()
         .collect();
 
+        let nullable = false; // All rows has an id
         ArrowField::new(
             RowId::descriptor().to_string(),
             RowId::arrow_datatype(),
diff --git a/crates/top/re_sdk/src/lib.rs b/crates/top/re_sdk/src/lib.rs
index 0a2d442ac185..6397d4008793 100644
--- a/crates/top/re_sdk/src/lib.rs
+++ b/crates/top/re_sdk/src/lib.rs
@@ -82,7 +82,6 @@ pub mod log {
     pub use re_chunk::{
         Chunk, ChunkBatcher, ChunkBatcherConfig, ChunkBatcherError, ChunkBatcherResult,
         ChunkComponents, ChunkError, ChunkId, ChunkResult, PendingRow, RowId, TimeColumn,
-        TransportChunk,
     };
     pub use re_log_types::LogMsg;
 }

From 8326fae19aba2ecea809a6d6ae2e8d926ce67761 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 16:12:09 +0100
Subject: [PATCH 21/38] Cleanup

---
 crates/store/re_chunk/src/transport.rs        | 20 ++++------
 crates/store/re_grpc_client/src/lib.rs        | 40 +++++++++----------
 .../re_log_encoding/src/codec/file/decoder.rs |  2 +-
 .../re_log_encoding/src/codec/wire/mod.rs     |  2 +-
 .../src/protobuf_conversions.rs               |  2 +-
 .../tests/arrow_encode_roundtrip.rs           |  2 +-
 crates/store/re_sorbet/src/chunk_batch.rs     |  8 ++--
 .../re_sorbet/src/row_id_column_schema.rs     |  3 +-
 rerun_py/src/remote.rs                        |  2 +-
 9 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index ae0acdd98bdd..b55c5b3b02d5 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -1,18 +1,14 @@
-use arrow::{
-    array::{Array as ArrowArray, ListArray as ArrowListArray, RecordBatch as ArrowRecordBatch},
-    datatypes::Field as ArrowField,
+use arrow::array::{
+    Array as ArrowArray, ListArray as ArrowListArray, RecordBatch as ArrowRecordBatch,
 };
 use itertools::Itertools;
 use nohash_hasher::IntMap;
 
 use re_arrow_util::{arrow_util::into_arrow_ref, ArrowArrayDowncastRef as _};
 use re_byte_size::SizeBytes as _;
-use re_log_types::EntityPath;
 use re_types_core::{arrow_helpers::as_array_ref, ComponentDescriptor, Loggable as _};
 
-use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, RowId, TimeColumn};
-
-pub type ArrowMetadata = std::collections::HashMap<String, String>;
+use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkResult, RowId, TimeColumn};
 
 // ---
 
@@ -230,7 +226,7 @@ impl Chunk {
         Ok(res)
     }
 
-    pub fn from_record_batch(batch: ArrowRecordBatch) -> ChunkResult<Self> {
+    pub fn from_record_batch(batch: &ArrowRecordBatch) -> ChunkResult<Self> {
         re_tracing::profile_function!(format!(
             "num_columns={} num_rows={}",
             batch.num_columns(),
@@ -250,7 +246,7 @@ impl Chunk {
             on_release: _,
         } = msg;
 
-        Self::from_record_batch(batch.clone())
+        Self::from_record_batch(batch)
     }
 
     #[inline]
@@ -275,9 +271,9 @@ mod tests {
     use re_arrow_util::arrow_util;
     use re_log_types::{
         example_components::{MyColor, MyPoint},
-        Timeline,
+        EntityPath, Timeline,
     };
-    use re_types_core::Component as _;
+    use re_types_core::{ChunkId, Component as _};
 
     use super::*;
 
@@ -357,7 +353,7 @@ mod tests {
 
             let arrow_record_batch = ArrowRecordBatch::from(&chunk_batch_before);
 
-            let chunk_batch_after = re_sorbet::ChunkBatch::try_from(arrow_record_batch).unwrap();
+            let chunk_batch_after = re_sorbet::ChunkBatch::try_from(&arrow_record_batch).unwrap();
 
             assert_eq!(
                 chunk_batch_before.chunk_schema(),
diff --git a/crates/store/re_grpc_client/src/lib.rs b/crates/store/re_grpc_client/src/lib.rs
index f222510b7bed..05d92ea1777b 100644
--- a/crates/store/re_grpc_client/src/lib.rs
+++ b/crates/store/re_grpc_client/src/lib.rs
@@ -2,12 +2,11 @@
 
 pub mod message_proxy;
 
-use std::{collections::HashMap, error::Error, sync::Arc};
+use std::error::Error;
 
 use arrow::{
     array::{
-        Array as ArrowArray, ArrayRef as ArrowArrayRef, RecordBatch as ArrowRecordBatch,
-        StringArray as ArrowStringArray,
+        ArrayRef as ArrowArrayRef, RecordBatch as ArrowRecordBatch, StringArray as ArrowStringArray,
     },
     datatypes::{DataType as ArrowDataType, Field as ArrowField},
 };
@@ -264,7 +263,7 @@ async fn stream_recording_async(
     re_log::info!("Starting to read...");
     while let Some(result) = resp.next().await {
         let batch = result.map_err(TonicStatusError)?;
-        let chunk = Chunk::from_record_batch(batch)?;
+        let chunk = Chunk::from_record_batch(&batch)?;
 
         if tx
             .send(LogMsg::ArrowMsg(store_id.clone(), chunk.to_arrow_msg()?))
@@ -404,27 +403,28 @@ async fn stream_catalog_async(
     while let Some(result) = resp.next().await {
         let entity_path = EntityPath::parse_forgiving("catalog");
 
-        let record_batch = result.map_err(TonicStatusError)?;
+        let mut record_batch = result.map_err(TonicStatusError)?;
 
-        let mut metadata = record_batch.schema_ref().metadata.clone();
+        {
+            let mut metadata = record_batch.schema_ref().metadata.clone();
 
-        for (key, value) in [
-            re_sorbet::ChunkSchema::chunk_id_metadata(&ChunkId::new()),
-            re_sorbet::ChunkSchema::entity_path_metadata(&entity_path),
-        ] {
-            if !metadata.contains_key(&key) {
-                metadata.insert(key, value);
+            for (key, value) in [
+                re_sorbet::ChunkSchema::chunk_id_metadata(&ChunkId::new()),
+                re_sorbet::ChunkSchema::entity_path_metadata(&entity_path),
+            ] {
+                metadata.entry(key).or_insert(value);
             }
-        }
 
-        let schema_with_more_metadata = arrow::datatypes::Schema::clone(record_batch.schema_ref())
-            .with_metadata(metadata)
-            .into();
-        let record_batch = record_batch
-            .with_schema(schema_with_more_metadata)
-            .expect("Can't fail, because we only added metadata");
+            let schema_with_more_metadata =
+                arrow::datatypes::Schema::clone(record_batch.schema_ref())
+                    .with_metadata(metadata)
+                    .into();
+            record_batch = record_batch
+                .with_schema(schema_with_more_metadata)
+                .expect("Can't fail, because we only added metadata");
+        }
 
-        let chunk_batch = re_sorbet::ChunkBatch::try_from(record_batch.clone())?;
+        let chunk_batch = re_sorbet::ChunkBatch::try_from(&record_batch)?;
 
         let mut chunk = Chunk::from_chunk_batch(&chunk_batch)?;
 
diff --git a/crates/store/re_log_encoding/src/codec/file/decoder.rs b/crates/store/re_log_encoding/src/codec/file/decoder.rs
index a8721fd4882d..224e95a02d97 100644
--- a/crates/store/re_log_encoding/src/codec/file/decoder.rs
+++ b/crates/store/re_log_encoding/src/codec/file/decoder.rs
@@ -44,7 +44,7 @@ pub fn decode_bytes(message_kind: MessageKind, buf: &[u8]) -> Result<Option<LogM
                 .ok_or_else(|| missing_field!(re_protos::log_msg::v0::ArrowMsg, "store_id"))?
                 .into();
 
-            let chunk = re_chunk::Chunk::from_record_batch(batch)?;
+            let chunk = re_chunk::Chunk::from_record_batch(&batch)?;
 
             Some(LogMsg::ArrowMsg(store_id, chunk.to_arrow_msg()?))
         }
diff --git a/crates/store/re_log_encoding/src/codec/wire/mod.rs b/crates/store/re_log_encoding/src/codec/wire/mod.rs
index 6f511a93b67a..65cf51dd0fe5 100644
--- a/crates/store/re_log_encoding/src/codec/wire/mod.rs
+++ b/crates/store/re_log_encoding/src/codec/wire/mod.rs
@@ -62,7 +62,7 @@ mod tests {
             .unwrap();
 
         let decoded = encoded.decode().unwrap();
-        let decoded_chunk = Chunk::from_record_batch(decoded).unwrap();
+        let decoded_chunk = Chunk::from_record_batch(&decoded).unwrap();
 
         assert_eq!(expected_chunk, decoded_chunk);
     }
diff --git a/crates/store/re_log_encoding/src/protobuf_conversions.rs b/crates/store/re_log_encoding/src/protobuf_conversions.rs
index 33a6181fd483..b97040978999 100644
--- a/crates/store/re_log_encoding/src/protobuf_conversions.rs
+++ b/crates/store/re_log_encoding/src/protobuf_conversions.rs
@@ -47,7 +47,7 @@ pub fn log_msg_from_proto(
                 .ok_or_else(|| missing_field!(re_protos::log_msg::v0::ArrowMsg, "store_id"))?
                 .into();
 
-            let chunk = re_chunk::Chunk::from_record_batch(batch)?;
+            let chunk = re_chunk::Chunk::from_record_batch(&batch)?;
 
             Ok(re_log_types::LogMsg::ArrowMsg(
                 store_id,
diff --git a/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs b/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs
index 80eb680b915f..cea7d6c0450a 100644
--- a/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs
+++ b/crates/store/re_log_encoding/tests/arrow_encode_roundtrip.rs
@@ -33,7 +33,7 @@ fn encode_roundtrip() {
         .unwrap();
 
     let record_batch = chunk.to_record_batch().unwrap();
-    assert_eq!(Chunk::from_record_batch(record_batch).unwrap(), chunk);
+    assert_eq!(Chunk::from_record_batch(&record_batch).unwrap(), chunk);
 
     let arrow_msg = chunk.to_arrow_msg().unwrap();
     assert_eq!(Chunk::from_arrow_msg(&arrow_msg).unwrap(), chunk);
diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index e9b6a4fcbb3f..a1d34647c1da 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -228,14 +228,14 @@ impl From<&ChunkBatch> for ArrowRecordBatch {
     }
 }
 
-impl TryFrom<ArrowRecordBatch> for ChunkBatch {
+impl TryFrom<&ArrowRecordBatch> for ChunkBatch {
     type Error = InvalidChunkSchema;
 
     /// Will automatically wrap data columns in `ListArrays` if they are not already.
-    fn try_from(batch: ArrowRecordBatch) -> Result<Self, Self::Error> {
+    fn try_from(batch: &ArrowRecordBatch) -> Result<Self, Self::Error> {
         re_tracing::profile_function!();
 
-        let batch = make_all_data_columns_list_arrays(&batch);
+        let batch = make_all_data_columns_list_arrays(batch);
 
         let chunk_schema = ChunkSchema::try_from(batch.schema_ref().as_ref())?;
 
@@ -323,5 +323,5 @@ fn make_all_data_columns_list_arrays(batch: &ArrowRecordBatch) -> ArrowRecordBat
         columns,
         &RecordBatchOptions::default().with_row_count(Some(batch.num_rows())),
     )
-    .unwrap()
+    .expect("Can't fail")
 }
diff --git a/crates/store/re_sorbet/src/row_id_column_schema.rs b/crates/store/re_sorbet/src/row_id_column_schema.rs
index 5a9007f0b458..028753218e3a 100644
--- a/crates/store/re_sorbet/src/row_id_column_schema.rs
+++ b/crates/store/re_sorbet/src/row_id_column_schema.rs
@@ -25,10 +25,11 @@ impl WrongDatatypeError {
 }
 
 /// Describes the [`RowId`]
-#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct RowIdColumnDescriptor {}
 
 impl RowIdColumnDescriptor {
+    #[inline]
     pub fn new() -> Self {
         Self {}
     }
diff --git a/rerun_py/src/remote.rs b/rerun_py/src/remote.rs
index 1b6db9ad1fd1..d43c4fc2951b 100644
--- a/rerun_py/src/remote.rs
+++ b/rerun_py/src/remote.rs
@@ -840,7 +840,7 @@ impl PyStorageNodeClient {
                         return Err(PyRuntimeError::new_err(err.to_string()));
                     }
                 };
-                let chunk = Chunk::from_record_batch(batch)
+                let chunk = Chunk::from_record_batch(&batch)
                     .map_err(|err| PyRuntimeError::new_err(err.to_string()))?;
 
                 store

From 5d0edb2566e06d4b2f8ba148d49407dd9a6e253a Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 16:29:35 +0100
Subject: [PATCH 22/38] Use full component and archetype names in metadata for
 roundtripping

---
 crates/store/re_chunk/src/transport.rs        | 32 +++++++++----------
 .../store/re_sorbet/src/data_column_schema.rs |  5 +--
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index b55c5b3b02d5..da688f72df31 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -13,10 +13,19 @@ use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkResult, RowId, TimeC
 // ---
 
 impl Chunk {
+    /// Prepare the [`Chunk`] for transport.
+    ///
+    /// It is probably a good idea to sort the chunk first.
+    pub fn to_record_batch(&self) -> ChunkResult<ArrowRecordBatch> {
+        re_tracing::profile_function!();
+        Ok(self.to_chunk_batch()?.into())
+    }
+
     /// Prepare the [`Chunk`] for transport.
     ///
     /// It is probably a good idea to sort the chunk first.
     pub fn to_chunk_batch(&self) -> ChunkResult<re_sorbet::ChunkBatch> {
+        re_tracing::profile_function!();
         self.sanity_check()?;
 
         re_tracing::profile_function!(format!(
@@ -120,14 +129,14 @@ impl Chunk {
             data_arrays,
         )?)
     }
-}
 
-impl Chunk {
-    /// Prepare the [`Chunk`] for transport.
-    ///
-    /// It is probably a good idea to sort the chunk first.
-    pub fn to_record_batch(&self) -> ChunkResult<ArrowRecordBatch> {
-        Ok(self.to_chunk_batch()?.into())
+    pub fn from_record_batch(batch: &ArrowRecordBatch) -> ChunkResult<Self> {
+        re_tracing::profile_function!(format!(
+            "num_columns={} num_rows={}",
+            batch.num_columns(),
+            batch.num_rows()
+        ));
+        Self::from_chunk_batch(&re_sorbet::ChunkBatch::try_from(batch)?)
     }
 
     pub fn from_chunk_batch(batch: &re_sorbet::ChunkBatch) -> ChunkResult<Self> {
@@ -225,15 +234,6 @@ impl Chunk {
 
         Ok(res)
     }
-
-    pub fn from_record_batch(batch: &ArrowRecordBatch) -> ChunkResult<Self> {
-        re_tracing::profile_function!(format!(
-            "num_columns={} num_rows={}",
-            batch.num_columns(),
-            batch.num_rows()
-        ));
-        Self::from_chunk_batch(&re_sorbet::ChunkBatch::try_from(batch)?)
-    }
 }
 
 impl Chunk {
diff --git a/crates/store/re_sorbet/src/data_column_schema.rs b/crates/store/re_sorbet/src/data_column_schema.rs
index b2bc95df19d2..377eb772e008 100644
--- a/crates/store/re_sorbet/src/data_column_schema.rs
+++ b/crates/store/re_sorbet/src/data_column_schema.rs
@@ -173,16 +173,17 @@ impl ComponentColumnDescriptor {
         } = self;
 
         // TODO(#6889): This needs some proper sorbetization -- I just threw these names randomly.
+        // We use the long namesa for the archetype and component names so that they roundtrip properly!
         [
             Some(("rerun.kind".to_owned(), "data".to_owned())),
             Some(("rerun.entity_path".to_owned(), entity_path.to_string())),
-            archetype_name.map(|name| ("rerun.archetype".to_owned(), name.short_name().to_owned())),
+            archetype_name.map(|name| ("rerun.archetype".to_owned(), name.full_name().to_owned())),
             archetype_field_name
                 .as_ref()
                 .map(|name| ("rerun.archetype_field".to_owned(), name.to_string())),
             Some((
                 "rerun.component".to_owned(),
-                component_name.short_name().to_owned(),
+                component_name.full_name().to_owned(),
             )),
             (*is_static).then_some(("rerun.is_static".to_owned(), "true".to_owned())),
             (*is_indicator).then_some(("rerun.is_indicator".to_owned(), "true".to_owned())),

From 2058129df9adb677baf7361d331ef61aa2a204ed Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 16:45:09 +0100
Subject: [PATCH 23/38] Fix TODOs

---
 crates/store/re_chunk/src/transport.rs        |  8 ++---
 crates/store/re_sorbet/src/chunk_batch.rs     |  6 ++--
 crates/store/re_sorbet/src/chunk_schema.rs    | 32 +++++++++++--------
 .../store/re_sorbet/src/data_column_schema.rs |  2 --
 .../re_sorbet/src/row_id_column_schema.rs     |  1 -
 5 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index da688f72df31..bf10014d10de 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -98,10 +98,10 @@ impl Chunk {
                         archetype_field_name,
                         component_name,
 
-                        is_static: false,             // TODO
-                        is_indicator: false,          // TODO
-                        is_tombstone: false,          // TODO
-                        is_semantically_empty: false, // TODO
+                        is_static: false, // TODO(#8744): figure out what to do here
+                        is_indicator: false, // TODO(#8744): figure out what to do here
+                        is_tombstone: false, // TODO(#8744): figure out what to do here
+                        is_semantically_empty: false, // TODO(#8744): figure out what to do here
                     };
                     (schema, into_arrow_ref(list_array))
                 })
diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index a1d34647c1da..5b16155ba250 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -44,8 +44,6 @@ impl MismatchedChunkSchemaError {
 #[derive(Debug, Clone)]
 pub struct ChunkBatch {
     schema: ChunkSchema,
-
-    // TODO: should we store a record batch here, or just the parsed columns?
     batch: ArrowRecordBatch,
 }
 
@@ -239,7 +237,9 @@ impl TryFrom<&ArrowRecordBatch> for ChunkBatch {
 
         let chunk_schema = ChunkSchema::try_from(batch.schema_ref().as_ref())?;
 
-        // TODO: sanity check that the schema matches the columns
+        for (field, column) in itertools::izip!(chunk_schema.arrow_fields(), batch.columns()) {
+            debug_assert_eq!(field.data_type(), column.data_type());
+        }
 
         // Extend with any metadata that might have been missing:
         let mut arrow_schema = ArrowSchema::clone(batch.schema_ref().as_ref());
diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs
index 46e4c7a6efe0..9f528afefab8 100644
--- a/crates/store/re_sorbet/src/chunk_schema.rs
+++ b/crates/store/re_sorbet/src/chunk_schema.rs
@@ -180,28 +180,27 @@ impl ChunkSchema {
 
         arrow_metadata
     }
-}
-
-impl From<&ChunkSchema> for ArrowSchema {
-    fn from(chunk_schema: &ChunkSchema) -> Self {
-        let metadata = chunk_schema.arrow_batch_metadata();
-        let num_columns = chunk_schema.num_columns();
 
-        let ChunkSchema {
+    pub fn arrow_fields(&self) -> Vec<ArrowField> {
+        let Self {
             row_id_column,
             index_columns,
             data_columns,
             ..
-        } = chunk_schema;
-
-        let mut fields: Vec<ArrowField> = Vec::with_capacity(num_columns);
+        } = self;
+        let mut fields: Vec<ArrowField> = Vec::with_capacity(self.num_columns());
         fields.push(row_id_column.to_arrow_field());
         fields.extend(index_columns.iter().map(|column| column.to_arrow_field()));
         fields.extend(data_columns.iter().map(|column| column.to_arrow_field()));
+        fields
+    }
+}
 
+impl From<&ChunkSchema> for ArrowSchema {
+    fn from(chunk_schema: &ChunkSchema) -> Self {
         Self {
-            metadata,
-            fields: fields.into(),
+            metadata: chunk_schema.arrow_batch_metadata(),
+            fields: chunk_schema.arrow_fields().into(),
         }
     }
 }
@@ -225,7 +224,14 @@ impl TryFrom<&ArrowSchema> for ChunkSchema {
         let entity_path = EntityPath::parse_forgiving(metadata.get_or_err("rerun.entity_path")?);
         let is_sorted = metadata.get_bool("rerun.is_sorted");
         let heap_size_bytes = if let Some(heap_size_bytes) = metadata.get("rerun.heap_size_bytes") {
-            heap_size_bytes.parse().ok() // TODO: log error
+            heap_size_bytes
+                .parse()
+                .map_err(|err| {
+                    re_log::warn_once!(
+                        "Failed to parse heap_size_bytes {heap_size_bytes:?} in chunk: {err}"
+                    );
+                })
+                .ok()
         } else {
             None
         };
diff --git a/crates/store/re_sorbet/src/data_column_schema.rs b/crates/store/re_sorbet/src/data_column_schema.rs
index 377eb772e008..c1642b7a5957 100644
--- a/crates/store/re_sorbet/src/data_column_schema.rs
+++ b/crates/store/re_sorbet/src/data_column_schema.rs
@@ -244,8 +244,6 @@ impl ComponentColumnDescriptor {
             ComponentName::new(field.name()) // Legacy fallback
         };
 
-        // TODO: check that the data type is a list array and at least warn if it is not
-
         Ok(Self {
             store_datatype: field.data_type().clone(),
             entity_path,
diff --git a/crates/store/re_sorbet/src/row_id_column_schema.rs b/crates/store/re_sorbet/src/row_id_column_schema.rs
index 028753218e3a..18b99403a337 100644
--- a/crates/store/re_sorbet/src/row_id_column_schema.rs
+++ b/crates/store/re_sorbet/src/row_id_column_schema.rs
@@ -69,7 +69,6 @@ impl TryFrom<&ArrowField> for RowIdColumnDescriptor {
     type Error = WrongDatatypeError;
 
     fn try_from(field: &ArrowField) -> Result<Self, Self::Error> {
-        // TODO: check `rerun.kind`
         Self::try_from(field.data_type())
     }
 }

From cb4c498c55946f08cb329f9e68669984a3247cf8 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 17:23:05 +0100
Subject: [PATCH 24/38] Fix doclink

---
 crates/store/re_sorbet/src/chunk_batch.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index 5b16155ba250..e200ddd405bd 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -40,7 +40,7 @@ impl MismatchedChunkSchemaError {
 /// This is a wrapper around a [`ArrowRecordBatch`].
 ///
 /// Each [`ChunkBatch`] contains logging data for a single [`EntityPath`].
-/// It always has a [`RowId`] column.
+/// It always has a [`re_types_core::RowId`] column.
 #[derive(Debug, Clone)]
 pub struct ChunkBatch {
     schema: ChunkSchema,

From 66a6013f4d4b97807895713ec4b7c14baf0bfedc Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 17:35:23 +0100
Subject: [PATCH 25/38] Only include entity path in column name in dataframe
 batches

---
 Cargo.lock                                    |  1 +
 .../formatting__format_chunk_store.snap       |  2 +-
 crates/store/re_dataframe/Cargo.toml          |  1 +
 crates/store/re_dataframe/src/query.rs        |  2 +-
 crates/store/re_sorbet/src/chunk_batch.rs     |  2 +-
 crates/store/re_sorbet/src/chunk_schema.rs    |  6 ++-
 crates/store/re_sorbet/src/column_schema.rs   | 17 ++++---
 .../store/re_sorbet/src/data_column_schema.rs | 45 +++++++++++--------
 crates/store/re_sorbet/src/lib.rs             |  9 ++++
 9 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index bcdcb0c7d891..a4b741fce4d0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6048,6 +6048,7 @@ dependencies = [
  "re_log_encoding",
  "re_log_types",
  "re_query",
+ "re_sorbet",
  "re_tracing",
  "re_types",
  "re_types_core",
diff --git a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap
index 690ec61f453b..124630814753 100644
--- a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap
+++ b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap
@@ -21,7 +21,7 @@ ChunkStore {
         │ * version: "1"                                                                                                                                              │
         ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
         │ ┌──────────────────────────────────┬────────────────────────┬───────────────────────────────┬──────────────────────────────┬──────────────────────────────┐ │
-        │ │ RowId                            ┆ frame_nr               ┆ log_time                      ┆ /this/that:example.MyColor   ┆ /this/that:example.MyIndex   │ │
+        │ │ RowId                            ┆ frame_nr               ┆ log_time                      ┆ example.MyColor              ┆ example.MyIndex              │ │
         │ │ ---                              ┆ ---                    ┆ ---                           ┆ ---                          ┆ ---                          │ │
         │ │ type: "Struct[2]"                ┆ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[u64]"            │ │
         │ │ ARROW:extension:name: "TUID"     ┆ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyIndex" │ │
diff --git a/crates/store/re_dataframe/Cargo.toml b/crates/store/re_dataframe/Cargo.toml
index 36f5247dc317..3f1873ca4292 100644
--- a/crates/store/re_dataframe/Cargo.toml
+++ b/crates/store/re_dataframe/Cargo.toml
@@ -34,6 +34,7 @@ re_log.workspace = true
 re_log_encoding.workspace = true
 re_log_types.workspace = true
 re_query.workspace = true
+re_sorbet.workspace = true
 re_tracing.workspace = true
 re_types_core.workspace = true
 
diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs
index 016ce25fb147..054b6259cd3b 100644
--- a/crates/store/re_dataframe/src/query.rs
+++ b/crates/store/re_dataframe/src/query.rs
@@ -193,7 +193,7 @@ impl<E: StorageEngineLike> QueryHandle<E> {
         let arrow_schema = ArrowSchemaRef::from(ArrowSchema::new_with_metadata(
             selected_contents
                 .iter()
-                .map(|(_, descr)| descr.to_arrow_field())
+                .map(|(_, descr)| descr.to_arrow_field(re_sorbet::BatchType::Dataframe))
                 .collect::<ArrowFields>(),
             Default::default(),
         ));
diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index e200ddd405bd..1eb820cb9160 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -92,7 +92,7 @@ impl ChunkBatch {
             if array.len() != row_count {
                 return Err(MismatchedChunkSchemaError::custom(format!(
                     "Data column {:?} had {} rows, but we got {} row IDs",
-                    schema.column_name(),
+                    schema.column_name(crate::BatchType::Chunk),
                     array.len(),
                     row_count
                 )));
diff --git a/crates/store/re_sorbet/src/chunk_schema.rs b/crates/store/re_sorbet/src/chunk_schema.rs
index 9f528afefab8..b090e76bc6bd 100644
--- a/crates/store/re_sorbet/src/chunk_schema.rs
+++ b/crates/store/re_sorbet/src/chunk_schema.rs
@@ -191,7 +191,11 @@ impl ChunkSchema {
         let mut fields: Vec<ArrowField> = Vec::with_capacity(self.num_columns());
         fields.push(row_id_column.to_arrow_field());
         fields.extend(index_columns.iter().map(|column| column.to_arrow_field()));
-        fields.extend(data_columns.iter().map(|column| column.to_arrow_field()));
+        fields.extend(
+            data_columns
+                .iter()
+                .map(|column| column.to_arrow_field(crate::BatchType::Chunk)),
+        );
         fields
     }
 }
diff --git a/crates/store/re_sorbet/src/column_schema.rs b/crates/store/re_sorbet/src/column_schema.rs
index 68b99e71965b..14b2b937cfb0 100644
--- a/crates/store/re_sorbet/src/column_schema.rs
+++ b/crates/store/re_sorbet/src/column_schema.rs
@@ -68,16 +68,19 @@ impl ColumnDescriptor {
     }
 
     #[inline]
-    pub fn to_arrow_field(&self) -> ArrowField {
+    pub fn to_arrow_field(&self, batch_type: crate::BatchType) -> ArrowField {
         match self {
             Self::Time(descr) => descr.to_arrow_field(),
-            Self::Component(descr) => descr.to_arrow_field(),
+            Self::Component(descr) => descr.to_arrow_field(batch_type),
         }
     }
 
     #[inline]
-    pub fn to_arrow_fields(columns: &[Self]) -> ArrowFields {
-        columns.iter().map(|c| c.to_arrow_field()).collect()
+    pub fn to_arrow_fields(columns: &[Self], batch_type: crate::BatchType) -> ArrowFields {
+        columns
+            .iter()
+            .map(|c| c.to_arrow_field(batch_type))
+            .collect()
     }
 
     /// `chunk_entity_path`: if this column is part of a chunk batch,
@@ -141,8 +144,10 @@ fn test_schema_over_ipc() {
         }),
     ];
 
-    let original_schema =
-        arrow::datatypes::Schema::new(ColumnDescriptor::to_arrow_fields(&original_columns));
+    let original_schema = arrow::datatypes::Schema::new(ColumnDescriptor::to_arrow_fields(
+        &original_columns,
+        crate::BatchType::Dataframe,
+    ));
     let ipc_bytes = crate::ipc_from_schema(&original_schema).unwrap();
 
     let recovered_schema = crate::schema_from_ipc(&ipc_bytes).unwrap();
diff --git a/crates/store/re_sorbet/src/data_column_schema.rs b/crates/store/re_sorbet/src/data_column_schema.rs
index c1642b7a5957..dbcc83b2ea1d 100644
--- a/crates/store/re_sorbet/src/data_column_schema.rs
+++ b/crates/store/re_sorbet/src/data_column_schema.rs
@@ -3,7 +3,7 @@ use arrow::datatypes::{DataType as ArrowDatatype, Field as ArrowField};
 use re_log_types::{ComponentPath, EntityPath};
 use re_types_core::{ArchetypeFieldName, ArchetypeName, ComponentDescriptor, ComponentName};
 
-use crate::{ArrowFieldMetadata, MetadataExt as _, MissingFieldMetadata};
+use crate::{ArrowFieldMetadata, BatchType, MetadataExt as _, MissingFieldMetadata};
 
 /// Describes a data/component column, such as `Position3D`, in a dataframe.
 ///
@@ -201,27 +201,36 @@ impl ComponentColumnDescriptor {
         self.store_datatype.clone()
     }
 
-    pub fn column_name(&self) -> String {
-        let entity_path = &self.entity_path;
-        let descriptor = ComponentDescriptor {
-            archetype_name: self.archetype_name,
-            archetype_field_name: self.archetype_field_name,
-            component_name: self.component_name,
-        };
-
-        format!("{}:{}", entity_path, descriptor.component_name.short_name())
-
-        // NOTE: Uncomment this to expose fully-qualified names in the Dataframe APIs!
-        // I'm not doing that right now, to avoid breaking changes (and we need to talk about
-        // what the syntax for these fully-qualified paths need to look like first).
-        // format!("{entity_path}@{}", descriptor.short_name())
+    pub fn column_name(&self, batch_type: BatchType) -> String {
+        match batch_type {
+            BatchType::Chunk => self.component_name.short_name().to_owned(),
+            BatchType::Dataframe => {
+                let entity_path = &self.entity_path;
+                let descriptor = ComponentDescriptor {
+                    archetype_name: self.archetype_name,
+                    archetype_field_name: self.archetype_field_name,
+                    component_name: self.component_name,
+                };
+
+                format!("{}:{}", entity_path, descriptor.component_name.short_name())
+
+                // NOTE: Uncomment this to expose fully-qualified names in the Dataframe APIs!
+                // I'm not doing that right now, to avoid breaking changes (and we need to talk about
+                // what the syntax for these fully-qualified paths need to look like first).
+                // format!("{entity_path}@{}", descriptor.short_name())
+            }
+        }
     }
 
     #[inline]
-    pub fn to_arrow_field(&self) -> ArrowField {
+    pub fn to_arrow_field(&self, batch_type: BatchType) -> ArrowField {
         let nullable = true;
-        ArrowField::new(self.column_name(), self.returned_datatype(), nullable)
-            .with_metadata(self.metadata())
+        ArrowField::new(
+            self.column_name(batch_type),
+            self.returned_datatype(),
+            nullable,
+        )
+        .with_metadata(self.metadata())
     }
 }
 
diff --git a/crates/store/re_sorbet/src/lib.rs b/crates/store/re_sorbet/src/lib.rs
index 4bff398ecc02..b9bf625980c1 100644
--- a/crates/store/re_sorbet/src/lib.rs
+++ b/crates/store/re_sorbet/src/lib.rs
@@ -24,3 +24,12 @@ pub use self::{
     },
     row_id_column_schema::{RowIdColumnDescriptor, WrongDatatypeError},
 };
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum BatchType {
+    /// Data for one entity
+    Chunk,
+
+    /// Potentially multiple entities
+    Dataframe,
+}

From 0216e84e8589d2c50145423149bf890f388c13ca Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 17:42:39 +0100
Subject: [PATCH 26/38] Only set entity_path-metdata per column in dataframes

---
 .../formatting__format_chunk_store.snap       |  4 +-
 .../store/re_sorbet/src/data_column_schema.rs | 68 +++++++++++++------
 2 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap
index 124630814753..eed4b3c5ee97 100644
--- a/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap
+++ b/crates/store/re_chunk_store/tests/snapshots/formatting__format_chunk_store.snap
@@ -25,8 +25,8 @@ ChunkStore {
         │ │ ---                              ┆ ---                    ┆ ---                           ┆ ---                          ┆ ---                          │ │
         │ │ type: "Struct[2]"                ┆ type: "i64"            ┆ type: "Timestamp(ns)"         ┆ type: "List[u32]"            ┆ type: "List[u64]"            │ │
         │ │ ARROW:extension:name: "TUID"     ┆ index_name: "frame_nr" ┆ index_name: "log_time"        ┆ component: "example.MyColor" ┆ component: "example.MyIndex" │ │
-        │ │ kind: "control"                  ┆ is_sorted: "true"      ┆ is_sorted: "true"             ┆ entity_path: "/this/that"    ┆ entity_path: "/this/that"    │ │
-        │ │                                  ┆ kind: "index"          ┆ kind: "index"                 ┆ kind: "data"                 ┆ kind: "data"                 │ │
+        │ │ kind: "control"                  ┆ is_sorted: "true"      ┆ is_sorted: "true"             ┆ kind: "data"                 ┆ kind: "data"                 │ │
+        │ │                                  ┆ kind: "index"          ┆ kind: "index"                 ┆                              ┆                              │ │
         │ ╞══════════════════════════════════╪════════════════════════╪═══════════════════════════════╪══════════════════════════════╪══════════════════════════════╡ │
         │ │ 0000000067816A6BB4B8C1254D40007B ┆ 1                      ┆ 2025-01-10T18:43:42.123456789 ┆ [0, 1, 2]                    ┆ [0, 1, 2]                    │ │
         │ └──────────────────────────────────┴────────────────────────┴───────────────────────────────┴──────────────────────────────┴──────────────────────────────┘ │
diff --git a/crates/store/re_sorbet/src/data_column_schema.rs b/crates/store/re_sorbet/src/data_column_schema.rs
index dbcc83b2ea1d..7f9e4c5fbce0 100644
--- a/crates/store/re_sorbet/src/data_column_schema.rs
+++ b/crates/store/re_sorbet/src/data_column_schema.rs
@@ -159,7 +159,7 @@ impl ComponentColumnDescriptor {
         &self.entity_path == entity_path && self.component_name.matches(component_name)
     }
 
-    fn metadata(&self) -> ArrowFieldMetadata {
+    fn metadata(&self, batch_type: BatchType) -> ArrowFieldMetadata {
         let Self {
             entity_path,
             archetype_name,
@@ -173,27 +173,53 @@ impl ComponentColumnDescriptor {
         } = self;
 
         // TODO(#6889): This needs some proper sorbetization -- I just threw these names randomly.
-        // We use the long namesa for the archetype and component names so that they roundtrip properly!
-        [
-            Some(("rerun.kind".to_owned(), "data".to_owned())),
-            Some(("rerun.entity_path".to_owned(), entity_path.to_string())),
-            archetype_name.map(|name| ("rerun.archetype".to_owned(), name.full_name().to_owned())),
-            archetype_field_name
-                .as_ref()
-                .map(|name| ("rerun.archetype_field".to_owned(), name.to_string())),
-            Some((
+        // We use the long names for the archetype and component names so that they roundtrip properly!
+        let mut metadata = std::collections::HashMap::from([
+            ("rerun.kind".to_owned(), "data".to_owned()),
+            (
                 "rerun.component".to_owned(),
                 component_name.full_name().to_owned(),
-            )),
-            (*is_static).then_some(("rerun.is_static".to_owned(), "true".to_owned())),
-            (*is_indicator).then_some(("rerun.is_indicator".to_owned(), "true".to_owned())),
-            (*is_tombstone).then_some(("rerun.is_tombstone".to_owned(), "true".to_owned())),
-            (*is_semantically_empty)
-                .then_some(("rerun.is_semantically_empty".to_owned(), "true".to_owned())),
-        ]
-        .into_iter()
-        .flatten()
-        .collect()
+            ),
+        ]);
+
+        match batch_type {
+            BatchType::Dataframe => {
+                metadata.insert("rerun.entity_path".to_owned(), entity_path.to_string());
+            }
+            BatchType::Chunk => {
+                // The whole chhunk is for the same entity, which is set in the record batch metadata.
+                // No need to repeat it here.
+            }
+        }
+
+        if let Some(archetype_name) = archetype_name {
+            metadata.insert(
+                "rerun.archetype".to_owned(),
+                archetype_name.full_name().to_owned(),
+            );
+        }
+
+        if let Some(archetype_field_name) = archetype_field_name {
+            metadata.insert(
+                "rerun.archetype_field".to_owned(),
+                archetype_field_name.to_string(),
+            );
+        }
+
+        if *is_static {
+            metadata.insert("rerun.is_static".to_owned(), "true".to_owned());
+        }
+        if *is_indicator {
+            metadata.insert("rerun.is_indicator".to_owned(), "true".to_owned());
+        }
+        if *is_tombstone {
+            metadata.insert("rerun.is_tombstone".to_owned(), "true".to_owned());
+        }
+        if *is_semantically_empty {
+            metadata.insert("rerun.is_semantically_empty".to_owned(), "true".to_owned());
+        }
+
+        metadata
     }
 
     #[inline]
@@ -230,7 +256,7 @@ impl ComponentColumnDescriptor {
             self.returned_datatype(),
             nullable,
         )
-        .with_metadata(self.metadata())
+        .with_metadata(self.metadata(batch_type))
     }
 }
 

From 3e73478740b3c2c1db4f11c52e94e55462a40dfb Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 17:44:21 +0100
Subject: [PATCH 27/38] Remove unused dependencies

---
 Cargo.lock                             | 2 --
 crates/store/re_grpc_client/Cargo.toml | 2 --
 2 files changed, 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a4b741fce4d0..f838463b2061 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6151,11 +6151,9 @@ version = "0.23.0-alpha.1+dev"
 dependencies = [
  "arrow",
  "async-stream",
- "itertools 0.13.0",
  "re_arrow_util",
  "re_chunk",
  "re_error",
- "re_format_arrow",
  "re_log",
  "re_log_encoding",
  "re_log_types",
diff --git a/crates/store/re_grpc_client/Cargo.toml b/crates/store/re_grpc_client/Cargo.toml
index 106b79cbf653..c7cf18683615 100644
--- a/crates/store/re_grpc_client/Cargo.toml
+++ b/crates/store/re_grpc_client/Cargo.toml
@@ -23,7 +23,6 @@ all-features = true
 re_arrow_util.workspace = true
 re_chunk.workspace = true
 re_error.workspace = true
-re_format_arrow.workspace = true
 re_log.workspace = true
 re_log_encoding = { workspace = true, features = ["encoder", "decoder"] }
 re_log_types.workspace = true
@@ -34,7 +33,6 @@ re_types.workspace = true
 
 arrow.workspace = true
 async-stream.workspace = true
-itertools.workspace = true
 thiserror.workspace = true
 tokio-stream.workspace = true
 url.workspace = true

From 11c1e7240bdc9bcdc705896815921660431323c9 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 17:59:27 +0100
Subject: [PATCH 28/38] Improve docstrings

---
 crates/store/re_sorbet/src/chunk_batch.rs          | 4 ++--
 crates/store/re_sorbet/src/lib.rs                  | 4 ++++
 crates/store/re_sorbet/src/row_id_column_schema.rs | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index 1eb820cb9160..14bafe07c046 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -35,9 +35,9 @@ impl MismatchedChunkSchemaError {
     }
 }
 
-/// The arrow [`ArrowRecordBatch`] representation of a Rerun chunk.
+/// The [`ArrowRecordBatch`] representation of a Rerun chunk.
 ///
-/// This is a wrapper around a [`ArrowRecordBatch`].
+/// This is a wrapper around a [`ChunkSchema`] and a [`ArrowRecordBatch`].
 ///
 /// Each [`ChunkBatch`] contains logging data for a single [`EntityPath`].
 /// It always has a [`re_types_core::RowId`] column.
diff --git a/crates/store/re_sorbet/src/lib.rs b/crates/store/re_sorbet/src/lib.rs
index b9bf625980c1..18e20c9b937f 100644
--- a/crates/store/re_sorbet/src/lib.rs
+++ b/crates/store/re_sorbet/src/lib.rs
@@ -1,6 +1,10 @@
 //! Rerun arrow metadata and record batch definitions.
 //!
 //! Handles the structure of arrow record batches and their meta data for different use cases for Rerun.
+//!
+//! An arrow record batch needs to follow a specific schema to be be compatible with Rerun,
+//! and that schema is defined in [`ChunkSchema`].
+//! If a record batch matches the schema, it can be converted to a [`ChunkBatch`].
 
 mod chunk_batch;
 mod chunk_schema;
diff --git a/crates/store/re_sorbet/src/row_id_column_schema.rs b/crates/store/re_sorbet/src/row_id_column_schema.rs
index 18b99403a337..d772d03b87fe 100644
--- a/crates/store/re_sorbet/src/row_id_column_schema.rs
+++ b/crates/store/re_sorbet/src/row_id_column_schema.rs
@@ -24,7 +24,7 @@ impl WrongDatatypeError {
     }
 }
 
-/// Describes the [`RowId`]
+/// Describes the schema of the primary [`RowId`] column.
 #[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct RowIdColumnDescriptor {}
 

From ef6f1cdc276d2131c4b06fcde4508a36a941a04c Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Mon, 10 Feb 2025 18:04:32 +0100
Subject: [PATCH 29/38] typos

---
 crates/store/re_sorbet/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/store/re_sorbet/src/lib.rs b/crates/store/re_sorbet/src/lib.rs
index 18e20c9b937f..a9934c5bd9eb 100644
--- a/crates/store/re_sorbet/src/lib.rs
+++ b/crates/store/re_sorbet/src/lib.rs
@@ -2,7 +2,7 @@
 //!
 //! Handles the structure of arrow record batches and their meta data for different use cases for Rerun.
 //!
-//! An arrow record batch needs to follow a specific schema to be be compatible with Rerun,
+//! An arrow record batch needs to follow a specific schema to be compatible with Rerun,
 //! and that schema is defined in [`ChunkSchema`].
 //! If a record batch matches the schema, it can be converted to a [`ChunkBatch`].
 

From 3d9147c25f7e89808833b757ddf122aa7f70ccd4 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Tue, 11 Feb 2025 09:15:58 +0100
Subject: [PATCH 30/38] Add sanity checks to find source of doubly-prefixed
 `rerun.components.`

---
 crates/store/re_chunk/src/chunk.rs            |  6 +++-
 crates/store/re_chunk/src/transport.rs        |  2 ++
 crates/store/re_chunk_store/src/dataframe.rs  |  4 +++
 crates/store/re_dataframe/src/query.rs        | 30 +++++++++++++++----
 crates/store/re_sorbet/src/column_schema.rs   | 10 +++++++
 .../store/re_sorbet/src/data_column_schema.rs | 24 +++++++++++++--
 crates/store/re_types_core/src/archetype.rs   | 13 ++++++++
 .../re_types_core/src/component_descriptor.rs | 17 +++++++++--
 crates/store/re_types_core/src/loggable.rs    | 15 ++++++++++
 rerun_py/rerun_sdk/rerun/_baseclasses.py      |  4 +++
 rerun_py/rerun_sdk/rerun/_log.py              |  1 +
 rerun_py/src/arrow.rs                         |  6 ++--
 12 files changed, 119 insertions(+), 13 deletions(-)

diff --git a/crates/store/re_chunk/src/chunk.rs b/crates/store/re_chunk/src/chunk.rs
index 287649b4ee65..85e234eb0d89 100644
--- a/crates/store/re_chunk/src/chunk.rs
+++ b/crates/store/re_chunk/src/chunk.rs
@@ -1447,6 +1447,7 @@ impl Chunk {
     /// Returns an error if the Chunk's invariants are not upheld.
     ///
     /// Costly checks are only run in debug builds.
+    #[track_caller]
     pub fn sanity_check(&self) -> ChunkResult<()> {
         re_tracing::profile_function!();
 
@@ -1516,8 +1517,10 @@ impl Chunk {
         }
 
         // Components
-        for (_component_name, per_desc) in components.iter() {
+        for (component_name, per_desc) in components.iter() {
+            component_name.sanity_check();
             for (component_desc, list_array) in per_desc {
+                component_desc.component_name.sanity_check();
                 if !matches!(list_array.data_type(), arrow::datatypes::DataType::List(_)) {
                     return Err(ChunkError::Malformed {
                         reason: format!(
@@ -1568,6 +1571,7 @@ impl TimeColumn {
     /// Returns an error if the Chunk's invariants are not upheld.
     ///
     /// Costly checks are only run in debug builds.
+    #[track_caller]
     pub fn sanity_check(&self) -> ChunkResult<()> {
         let Self {
             timeline: _,
diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index bf10014d10de..0dda84d32656 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -90,6 +90,8 @@ impl Chunk {
                         component_name,
                     } = *component_desc;
 
+                    component_name.sanity_check();
+
                     let schema = re_sorbet::ComponentColumnDescriptor {
                         store_datatype: list_array.data_type().clone(),
                         entity_path: entity_path.clone(),
diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs
index ab0a82c56f65..ab4183412a63 100644
--- a/crates/store/re_chunk_store/src/dataframe.rs
+++ b/crates/store/re_chunk_store/src/dataframe.rs
@@ -405,6 +405,8 @@ impl ChunkStore {
                     is_semantically_empty,
                 } = metadata;
 
+                component_descr.component_name.sanity_check();
+
                 ComponentColumnDescriptor {
                     // NOTE: The data is always a at least a list, whether it's latest-at or range.
                     // It might be wrapped further in e.g. a dict, but at the very least
@@ -477,6 +479,8 @@ impl ChunkStore {
         let component_name =
             component_descr.map_or(selected_component_name, |descr| descr.component_name);
 
+        component_name.sanity_check();
+
         let ColumnMetadata {
             is_static,
             is_indicator,
diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs
index 054b6259cd3b..4219a1a41a04 100644
--- a/crates/store/re_dataframe/src/query.rs
+++ b/crates/store/re_dataframe/src/query.rs
@@ -234,6 +234,8 @@ impl<E: StorageEngineLike> QueryHandle<E> {
                     continue;
                 };
 
+                descr.sanity_check();
+
                 // NOTE: It would be tempting to concatenate all these individual clear chunks into one
                 // single big chunk, but that'd be a mistake: 1) it's costly to do so but more
                 // importantly 2) that would lead to likely very large chunk overlap, which is very bad
@@ -333,6 +335,8 @@ impl<E: StorageEngineLike> QueryHandle<E> {
                 .map(|(_view_idx, descr)| match descr {
                     ColumnDescriptor::Time(_) => None,
                     ColumnDescriptor::Component(descr) => {
+                        descr.sanity_check();
+
                         let query = re_chunk::LatestAtQuery::new(
                             Timeline::new_sequence(""),
                             TimeInt::STATIC,
@@ -350,6 +354,14 @@ impl<E: StorageEngineLike> QueryHandle<E> {
                 .collect_vec()
         };
 
+        for descr in &view_contents {
+            descr.sanity_check();
+        }
+
+        for (_, descr) in &selected_contents {
+            descr.sanity_check();
+        }
+
         QueryHandleState {
             view_contents,
             selected_contents,
@@ -403,6 +415,11 @@ impl<E: StorageEngineLike> QueryHandle<E> {
                         component_name: selected_component_name,
                     } = selected_column;
 
+                    debug_assert!(
+                        !selected_component_name.starts_with("rerun.components.rerun.components."),
+                        "Found component with full name {selected_component_name:?}. Maybe some bad round-tripping?"
+                    );
+
                     view_contents
                         .iter()
                         .enumerate()
@@ -1166,11 +1183,14 @@ impl<E: StorageEngineLike> QueryHandle<E> {
 
                         StreamingJoinState::Retrofilled(unit) => {
                             let component_desc = state.view_contents.get(view_idx).and_then(|col| match col {
-                                ColumnDescriptor::Component(descr) => Some(re_types_core::ComponentDescriptor {
-                                    component_name: descr.component_name,
-                                    archetype_name: descr.archetype_name,
-                                    archetype_field_name: descr.archetype_field_name,
-                                }),
+                                ColumnDescriptor::Component(descr) => {
+                                    descr.component_name.sanity_check();
+                                    Some(re_types_core::ComponentDescriptor {
+                                        component_name: descr.component_name,
+                                        archetype_name: descr.archetype_name,
+                                        archetype_field_name: descr.archetype_field_name,
+                                    })
+                                },
                                 ColumnDescriptor::Time(_) => None,
                             })?;
                             unit.components().get_by_descriptor(&component_desc).cloned()
diff --git a/crates/store/re_sorbet/src/column_schema.rs b/crates/store/re_sorbet/src/column_schema.rs
index 14b2b937cfb0..5294dc6e032e 100644
--- a/crates/store/re_sorbet/src/column_schema.rs
+++ b/crates/store/re_sorbet/src/column_schema.rs
@@ -35,6 +35,16 @@ pub enum ColumnDescriptor {
 }
 
 impl ColumnDescriptor {
+    /// Debug-only sanity check.
+    #[inline]
+    #[track_caller]
+    pub fn sanity_check(&self) {
+        match self {
+            Self::Time(_) => {}
+            Self::Component(descr) => descr.sanity_check(),
+        }
+    }
+
     #[inline]
     pub fn entity_path(&self) -> Option<&EntityPath> {
         match self {
diff --git a/crates/store/re_sorbet/src/data_column_schema.rs b/crates/store/re_sorbet/src/data_column_schema.rs
index 7f9e4c5fbce0..d77dc4add0be 100644
--- a/crates/store/re_sorbet/src/data_column_schema.rs
+++ b/crates/store/re_sorbet/src/data_column_schema.rs
@@ -127,6 +127,7 @@ impl std::fmt::Display for ComponentColumnDescriptor {
 impl From<ComponentColumnDescriptor> for re_types_core::ComponentDescriptor {
     #[inline]
     fn from(descr: ComponentColumnDescriptor) -> Self {
+        descr.sanity_check();
         Self {
             archetype_name: descr.archetype_name,
             archetype_field_name: descr.archetype_field_name,
@@ -138,6 +139,7 @@ impl From<ComponentColumnDescriptor> for re_types_core::ComponentDescriptor {
 impl From<&ComponentColumnDescriptor> for re_types_core::ComponentDescriptor {
     #[inline]
     fn from(descr: &ComponentColumnDescriptor) -> Self {
+        descr.sanity_check();
         Self {
             archetype_name: descr.archetype_name,
             archetype_field_name: descr.archetype_field_name,
@@ -147,6 +149,16 @@ impl From<&ComponentColumnDescriptor> for re_types_core::ComponentDescriptor {
 }
 
 impl ComponentColumnDescriptor {
+    /// Debug-only sanity check.
+    #[inline]
+    #[track_caller]
+    pub fn sanity_check(&self) {
+        self.component_name.sanity_check();
+        if let Some(archetype_name) = &self.archetype_name {
+            archetype_name.sanity_check();
+        }
+    }
+
     pub fn component_path(&self) -> ComponentPath {
         ComponentPath {
             entity_path: self.entity_path.clone(),
@@ -160,6 +172,8 @@ impl ComponentColumnDescriptor {
     }
 
     fn metadata(&self, batch_type: BatchType) -> ArrowFieldMetadata {
+        self.sanity_check();
+
         let Self {
             entity_path,
             archetype_name,
@@ -228,6 +242,8 @@ impl ComponentColumnDescriptor {
     }
 
     pub fn column_name(&self, batch_type: BatchType) -> String {
+        self.sanity_check();
+
         match batch_type {
             BatchType::Chunk => self.component_name.short_name().to_owned(),
             BatchType::Dataframe => {
@@ -279,7 +295,7 @@ impl ComponentColumnDescriptor {
             ComponentName::new(field.name()) // Legacy fallback
         };
 
-        Ok(Self {
+        let schema = Self {
             store_datatype: field.data_type().clone(),
             entity_path,
             archetype_name: field.get_opt("rerun.archetype").map(|x| x.into()),
@@ -289,6 +305,10 @@ impl ComponentColumnDescriptor {
             is_indicator: field.get_bool("rerun.is_indicator"),
             is_tombstone: field.get_bool("rerun.is_tombstone"),
             is_semantically_empty: field.get_bool("rerun.is_semantically_empty"),
-        })
+        };
+
+        schema.sanity_check();
+
+        Ok(schema)
     }
 }
diff --git a/crates/store/re_types_core/src/archetype.rs b/crates/store/re_types_core/src/archetype.rs
index 356fc62af86f..a6f39d8a6ef2 100644
--- a/crates/store/re_types_core/src/archetype.rs
+++ b/crates/store/re_types_core/src/archetype.rs
@@ -139,11 +139,23 @@ re_string_interner::declare_new_type!(
 );
 
 impl ArchetypeName {
+    /// Runs some asserts in debug mode to make sure the name is not weird.
+    #[inline]
+    #[track_caller]
+    pub fn sanity_check(&self) {
+        let full_name = self.0.as_str();
+        debug_assert!(
+            !full_name.starts_with("rerun.archetypes.rerun.archetypes.") || full_name.contains(':'),
+            "DEBUG ASSERT: Found archetype with full name {full_name:?}. Maybe some bad round-tripping?"
+        );
+    }
+
     /// Returns the fully-qualified name, e.g. `rerun.archetypes.Points3D`.
     ///
     /// This is the default `Display` implementation for [`ArchetypeName`].
     #[inline]
     pub fn full_name(&self) -> &'static str {
+        self.sanity_check();
         self.0.as_str()
     }
 
@@ -157,6 +169,7 @@ impl ArchetypeName {
     /// ```
     #[inline]
     pub fn short_name(&self) -> &'static str {
+        self.sanity_check();
         let full_name = self.0.as_str();
         if let Some(short_name) = full_name.strip_prefix("rerun.archetypes.") {
             short_name
diff --git a/crates/store/re_types_core/src/component_descriptor.rs b/crates/store/re_types_core/src/component_descriptor.rs
index 3d0b95b136e2..0d2574c0e302 100644
--- a/crates/store/re_types_core/src/component_descriptor.rs
+++ b/crates/store/re_types_core/src/component_descriptor.rs
@@ -102,17 +102,25 @@ impl ComponentDescriptor {
         }
     }
 
+    #[inline]
+    #[track_caller]
+    pub fn sanity_check(&self) {
+        self.component_name.sanity_check();
+    }
+
     /// Returns the fully-qualified name, e.g. `rerun.archetypes.Points3D:rerun.components.Position3D#positions`.
     ///
     /// This is the default `Display` implementation for [`ComponentDescriptor`].
     #[inline]
     pub fn full_name(&self) -> String {
+        self.sanity_check();
         self.to_string()
     }
 
     /// Returns the unqualified name, e.g. `Points3D:Position3D#positions`.
     #[inline]
     pub fn short_name(&self) -> String {
+        self.sanity_check();
         self.to_any_string(true)
     }
 }
@@ -135,6 +143,7 @@ impl ComponentDescriptor {
     #[inline]
     pub fn new(component_name: impl Into<ComponentName>) -> Self {
         let component_name = component_name.into();
+        component_name.sanity_check();
         Self {
             archetype_name: None,
             archetype_field_name: None,
@@ -197,7 +206,7 @@ impl From<arrow::datatypes::Field> for ComponentDescriptor {
     fn from(field: arrow::datatypes::Field) -> Self {
         let md = field.metadata();
 
-        Self {
+        let descr = Self {
             archetype_name: md
                 .get(FIELD_METADATA_KEY_ARCHETYPE_NAME)
                 .cloned()
@@ -206,7 +215,9 @@ impl From<arrow::datatypes::Field> for ComponentDescriptor {
                 .get(FIELD_METADATA_KEY_ARCHETYPE_FIELD_NAME)
                 .cloned()
                 .map(Into::into),
-            component_name: field.name().clone().into(),
-        }
+            component_name: field.name().to_string().into(),
+        };
+        descr.sanity_check();
+        descr
     }
 }
diff --git a/crates/store/re_types_core/src/loggable.rs b/crates/store/re_types_core/src/loggable.rs
index 38a9f7d2df7b..11b2d59f4afa 100644
--- a/crates/store/re_types_core/src/loggable.rs
+++ b/crates/store/re_types_core/src/loggable.rs
@@ -131,6 +131,7 @@ re_string_interner::declare_new_type!(
 impl From<ComponentName> for Cow<'static, ComponentDescriptor> {
     #[inline]
     fn from(name: ComponentName) -> Self {
+        name.sanity_check();
         Cow::Owned(ComponentDescriptor::new(name))
     }
 }
@@ -141,16 +142,29 @@ impl From<ComponentName> for Cow<'static, ComponentDescriptor> {
 impl From<&ComponentName> for Cow<'static, ComponentDescriptor> {
     #[inline]
     fn from(name: &ComponentName) -> Self {
+        name.sanity_check();
         Cow::Owned(ComponentDescriptor::new(*name))
     }
 }
 
 impl ComponentName {
+    /// Runs some asserts in debug mode to make sure the name is not weird.
+    #[inline]
+    #[track_caller]
+    pub fn sanity_check(&self) {
+        let full_name = self.0.as_str();
+        debug_assert!(
+            !full_name.starts_with("rerun.components.rerun.components.") || full_name.contains(':'),
+            "DEBUG ASSERT: Found component with full name {full_name:?}. Maybe some bad round-tripping?"
+        );
+    }
+
     /// Returns the fully-qualified name, e.g. `rerun.components.Position2D`.
     ///
     /// This is the default `Display` implementation for [`ComponentName`].
     #[inline]
     pub fn full_name(&self) -> &'static str {
+        self.sanity_check();
         self.0.as_str()
     }
 
@@ -164,6 +178,7 @@ impl ComponentName {
     /// ```
     #[inline]
     pub fn short_name(&self) -> &'static str {
+        self.sanity_check();
         let full_name = self.0.as_str();
         if let Some(short_name) = full_name.strip_prefix("rerun.blueprint.components.") {
             short_name
diff --git a/rerun_py/rerun_sdk/rerun/_baseclasses.py b/rerun_py/rerun_sdk/rerun/_baseclasses.py
index 891b3f75bd40..43c10db70552 100644
--- a/rerun_py/rerun_sdk/rerun/_baseclasses.py
+++ b/rerun_py/rerun_sdk/rerun/_baseclasses.py
@@ -54,6 +54,10 @@ def __init__(
         archetype_name: str | None = None,
         archetype_field_name: str | None = None,
     ) -> None:
+        assert not component_name.startswith("rerun.components.rerun.components."), f"Bad component name: {component_name}'"
+        if archetype_name is not None:
+            assert not archetype_name.startswith("rerun.archetypes.rerun.archetypes."), f"Bad archetype name '{archetype_name}'"
+
         self.archetype_name = archetype_name
         self.archetype_field_name = archetype_field_name
         self.component_name = component_name
diff --git a/rerun_py/rerun_sdk/rerun/_log.py b/rerun_py/rerun_sdk/rerun/_log.py
index bac403522eb6..4c364b1fa679 100644
--- a/rerun_py/rerun_sdk/rerun/_log.py
+++ b/rerun_py/rerun_sdk/rerun/_log.py
@@ -36,6 +36,7 @@ def __init__(self, archetype_name: str) -> None:
 
         """
         self.data = pa.nulls(1, type=pa.null())
+        assert not archetype_name.startswith("rerun.archetypes.rerun.archetypes."), f"Bad archetype name '{archetype_name}' in IndicatorComponentBatch"
         self._archetype_name = archetype_name
 
     def component_name(self) -> str:
diff --git a/rerun_py/src/arrow.rs b/rerun_py/src/arrow.rs
index 271beda23cec..c920044ef42d 100644
--- a/rerun_py/src/arrow.rs
+++ b/rerun_py/src/arrow.rs
@@ -44,11 +44,13 @@ pub fn descriptor_to_rust(component_descr: &Bound<'_, PyAny>) -> PyResult<Compon
     let component_name = component_descr.getattr(pyo3::intern!(py, "component_name"))?;
     let component_name: Cow<'_, str> = component_name.extract()?;
 
-    Ok(ComponentDescriptor {
+    let descr = ComponentDescriptor {
         archetype_name: archetype_name.map(|s| s.as_ref().into()),
         archetype_field_name: archetype_field_name.map(|s| s.as_ref().into()),
         component_name: component_name.as_ref().into(),
-    })
+    };
+    descr.sanity_check();
+    Ok(descr)
 }
 
 /// Perform conversion between a pyarrow array to arrow types.

From 0842f842a6881a5abbaedac52af41b781565c38d Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Tue, 11 Feb 2025 09:48:21 +0100
Subject: [PATCH 31/38] Fix full/short name errors

---
 crates/store/re_chunk_store/src/dataframe.rs  |  6 +++---
 .../store/re_sorbet/src/data_column_schema.rs | 20 ++++++++++---------
 rerun_py/rerun_sdk/rerun/dataframe.py         |  6 +++---
 rerun_py/src/dataframe.rs                     |  2 +-
 4 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/crates/store/re_chunk_store/src/dataframe.rs b/crates/store/re_chunk_store/src/dataframe.rs
index ab4183412a63..a9b729f1ca6b 100644
--- a/crates/store/re_chunk_store/src/dataframe.rs
+++ b/crates/store/re_chunk_store/src/dataframe.rs
@@ -95,7 +95,7 @@ impl From<ComponentColumnDescriptor> for ComponentColumnSelector {
     fn from(desc: ComponentColumnDescriptor) -> Self {
         Self {
             entity_path: desc.entity_path.clone(),
-            component_name: desc.component_name.to_string(),
+            component_name: desc.component_name.short_name().to_owned(),
         }
     }
 }
@@ -106,7 +106,7 @@ impl ComponentColumnSelector {
     pub fn new<C: re_types_core::Component>(entity_path: EntityPath) -> Self {
         Self {
             entity_path,
-            component_name: C::name().to_string(),
+            component_name: C::name().short_name().to_owned(),
         }
     }
 
@@ -115,7 +115,7 @@ impl ComponentColumnSelector {
     pub fn new_for_component_name(entity_path: EntityPath, component_name: ComponentName) -> Self {
         Self {
             entity_path,
-            component_name: component_name.to_string(),
+            component_name: component_name.short_name().to_owned(),
         }
     }
 }
diff --git a/crates/store/re_sorbet/src/data_column_schema.rs b/crates/store/re_sorbet/src/data_column_schema.rs
index d77dc4add0be..5c7120d50a8d 100644
--- a/crates/store/re_sorbet/src/data_column_schema.rs
+++ b/crates/store/re_sorbet/src/data_column_schema.rs
@@ -245,20 +245,22 @@ impl ComponentColumnDescriptor {
         self.sanity_check();
 
         match batch_type {
-            BatchType::Chunk => self.component_name.short_name().to_owned(),
+            BatchType::Chunk => {
+                // All columns are of the same entity
+                self.component_name.short_name().to_owned()
+            }
             BatchType::Dataframe => {
-                let entity_path = &self.entity_path;
-                let descriptor = ComponentDescriptor {
-                    archetype_name: self.archetype_name,
-                    archetype_field_name: self.archetype_field_name,
-                    component_name: self.component_name,
-                };
-
-                format!("{}:{}", entity_path, descriptor.component_name.short_name())
+                // Each column can be of a different entity
+                format!("{}:{}", self.entity_path, self.component_name.short_name())
 
                 // NOTE: Uncomment this to expose fully-qualified names in the Dataframe APIs!
                 // I'm not doing that right now, to avoid breaking changes (and we need to talk about
                 // what the syntax for these fully-qualified paths need to look like first).
+                // let descriptor = ComponentDescriptor {
+                //     archetype_name: self.archetype_name,
+                //     archetype_field_name: self.archetype_field_name,
+                //     component_name: self.component_name,
+                // };
                 // format!("{entity_path}@{}", descriptor.short_name())
             }
         }
diff --git a/rerun_py/rerun_sdk/rerun/dataframe.py b/rerun_py/rerun_sdk/rerun/dataframe.py
index 89c72647e21d..7e16b27b2832 100644
--- a/rerun_py/rerun_sdk/rerun/dataframe.py
+++ b/rerun_py/rerun_sdk/rerun/dataframe.py
@@ -61,9 +61,9 @@ def __init__(self, metadata: dict[bytes, bytes], col: pa.Array):
     def component_descriptor(self) -> ComponentDescriptor:
         kwargs = {}
         if SORBET_ARCHETYPE_NAME in self.metadata:
-            kwargs["archetype_name"] = "rerun.archetypes" + self.metadata[SORBET_ARCHETYPE_NAME].decode("utf-8")
+            kwargs["archetype_name"] = self.metadata[SORBET_ARCHETYPE_NAME].decode("utf-8")
         if SORBET_COMPONENT_NAME in self.metadata:
-            kwargs["component_name"] = "rerun.components." + self.metadata[SORBET_COMPONENT_NAME].decode("utf-8")
+            kwargs["component_name"] = self.metadata[SORBET_COMPONENT_NAME].decode("utf-8")
         if SORBET_ARCHETYPE_FIELD in self.metadata:
             kwargs["archetype_field_name"] = self.metadata[SORBET_ARCHETYPE_FIELD].decode("utf-8")
 
@@ -105,7 +105,7 @@ def send_record_batch(batch: pa.RecordBatch, rec: Optional[RecordingStream] = No
                 archetypes[entity_path].add(metadata[SORBET_ARCHETYPE_NAME].decode("utf-8"))
     for entity_path, archetype_set in archetypes.items():
         for archetype in archetype_set:
-            data[entity_path].append(IndicatorComponentBatch("rerun.archetypes." + archetype))
+            data[entity_path].append(IndicatorComponentBatch(archetype))
 
     for entity_path, columns in data.items():
         send_columns(
diff --git a/rerun_py/src/dataframe.rs b/rerun_py/src/dataframe.rs
index 9d03268e5519..5b8f4c94d792 100644
--- a/rerun_py/src/dataframe.rs
+++ b/rerun_py/src/dataframe.rs
@@ -188,7 +188,7 @@ impl PyComponentColumnDescriptor {
     /// This property is read-only.
     #[getter]
     fn component_name(&self) -> &str {
-        &self.0.component_name
+        self.0.component_name.full_name()
     }
 
     /// Whether the column is static.

From 861e496287de4a80046e64adcb2eaa589df4c262 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Tue, 11 Feb 2025 09:48:38 +0100
Subject: [PATCH 32/38] Add `#[track_caller]` to sanity checks

---
 crates/store/re_types/src/archetypes/mesh3d_ext.rs           | 1 +
 crates/store/re_types/src/components/view_coordinates_ext.rs | 1 +
 crates/viewer/re_renderer/src/mesh.rs                        | 1 +
 3 files changed, 3 insertions(+)

diff --git a/crates/store/re_types/src/archetypes/mesh3d_ext.rs b/crates/store/re_types/src/archetypes/mesh3d_ext.rs
index 853a8e3676d7..70c58c392c0b 100644
--- a/crates/store/re_types/src/archetypes/mesh3d_ext.rs
+++ b/crates/store/re_types/src/archetypes/mesh3d_ext.rs
@@ -52,6 +52,7 @@ impl Mesh3D {
     /// and that we have the same number of positions and normals (if any).
     ///
     /// Only use this when logging a whole new mesh. Not meaningful for field updates!
+    #[track_caller]
     pub fn sanity_check(&self) -> Result<(), Mesh3DError> {
         let num_vertices = self.num_vertices();
 
diff --git a/crates/store/re_types/src/components/view_coordinates_ext.rs b/crates/store/re_types/src/components/view_coordinates_ext.rs
index f5eaa0e1a3e0..eb8629208b23 100644
--- a/crates/store/re_types/src/components/view_coordinates_ext.rs
+++ b/crates/store/re_types/src/components/view_coordinates_ext.rs
@@ -37,6 +37,7 @@ impl ViewCoordinates {
     }
 
     /// Returns an error if this does not span all three dimensions.
+    #[track_caller]
     pub fn sanity_check(&self) -> Result<(), String> {
         let mut dims = [false; 3];
         for dir in *self.0 {
diff --git a/crates/viewer/re_renderer/src/mesh.rs b/crates/viewer/re_renderer/src/mesh.rs
index fb61b7d8ca3c..e2e78f086154 100644
--- a/crates/viewer/re_renderer/src/mesh.rs
+++ b/crates/viewer/re_renderer/src/mesh.rs
@@ -71,6 +71,7 @@ pub struct CpuMesh {
 }
 
 impl CpuMesh {
+    #[track_caller]
     pub fn sanity_check(&self) -> Result<(), MeshError> {
         re_tracing::profile_function!();
 

From b094db79731ec969684e1ce3280f3fdc5ca061b7 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Tue, 11 Feb 2025 09:50:35 +0100
Subject: [PATCH 33/38] py-fmt

---
 rerun_py/rerun_sdk/rerun/_baseclasses.py | 8 ++++++--
 rerun_py/rerun_sdk/rerun/_log.py         | 4 +++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/rerun_py/rerun_sdk/rerun/_baseclasses.py b/rerun_py/rerun_sdk/rerun/_baseclasses.py
index 43c10db70552..030038781bd3 100644
--- a/rerun_py/rerun_sdk/rerun/_baseclasses.py
+++ b/rerun_py/rerun_sdk/rerun/_baseclasses.py
@@ -54,9 +54,13 @@ def __init__(
         archetype_name: str | None = None,
         archetype_field_name: str | None = None,
     ) -> None:
-        assert not component_name.startswith("rerun.components.rerun.components."), f"Bad component name: {component_name}'"
+        assert not component_name.startswith(
+            "rerun.components.rerun.components."
+        ), f"Bad component name: {component_name}'"
         if archetype_name is not None:
-            assert not archetype_name.startswith("rerun.archetypes.rerun.archetypes."), f"Bad archetype name '{archetype_name}'"
+            assert not archetype_name.startswith(
+                "rerun.archetypes.rerun.archetypes."
+            ), f"Bad archetype name '{archetype_name}'"
 
         self.archetype_name = archetype_name
         self.archetype_field_name = archetype_field_name
diff --git a/rerun_py/rerun_sdk/rerun/_log.py b/rerun_py/rerun_sdk/rerun/_log.py
index 4c364b1fa679..aef830373cb9 100644
--- a/rerun_py/rerun_sdk/rerun/_log.py
+++ b/rerun_py/rerun_sdk/rerun/_log.py
@@ -36,7 +36,9 @@ def __init__(self, archetype_name: str) -> None:
 
         """
         self.data = pa.nulls(1, type=pa.null())
-        assert not archetype_name.startswith("rerun.archetypes.rerun.archetypes."), f"Bad archetype name '{archetype_name}' in IndicatorComponentBatch"
+        assert not archetype_name.startswith(
+            "rerun.archetypes.rerun.archetypes."
+        ), f"Bad archetype name '{archetype_name}' in IndicatorComponentBatch"
         self._archetype_name = archetype_name
 
     def component_name(self) -> str:

From f9df88e6bffca29a01520586533dddb5cdc7e923 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Tue, 11 Feb 2025 10:06:39 +0100
Subject: [PATCH 34/38] Fix bad sanity checks

---
 crates/store/re_types_core/src/archetype.rs | 2 +-
 crates/store/re_types_core/src/loggable.rs  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/store/re_types_core/src/archetype.rs b/crates/store/re_types_core/src/archetype.rs
index a6f39d8a6ef2..429d7fa69ff9 100644
--- a/crates/store/re_types_core/src/archetype.rs
+++ b/crates/store/re_types_core/src/archetype.rs
@@ -145,7 +145,7 @@ impl ArchetypeName {
     pub fn sanity_check(&self) {
         let full_name = self.0.as_str();
         debug_assert!(
-            !full_name.starts_with("rerun.archetypes.rerun.archetypes.") || full_name.contains(':'),
+            !full_name.starts_with("rerun.archetypes.rerun.archetypes.") && !full_name.contains(':'),
             "DEBUG ASSERT: Found archetype with full name {full_name:?}. Maybe some bad round-tripping?"
         );
     }
diff --git a/crates/store/re_types_core/src/loggable.rs b/crates/store/re_types_core/src/loggable.rs
index 11b2d59f4afa..d4be8a487f92 100644
--- a/crates/store/re_types_core/src/loggable.rs
+++ b/crates/store/re_types_core/src/loggable.rs
@@ -154,7 +154,7 @@ impl ComponentName {
     pub fn sanity_check(&self) {
         let full_name = self.0.as_str();
         debug_assert!(
-            !full_name.starts_with("rerun.components.rerun.components.") || full_name.contains(':'),
+            !full_name.starts_with("rerun.components.rerun.components.") && !full_name.contains(':'),
             "DEBUG ASSERT: Found component with full name {full_name:?}. Maybe some bad round-tripping?"
         );
     }

From 39ecc473fb1960b9cabdac25bfcffa8fa4ea1d34 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Tue, 11 Feb 2025 10:06:50 +0100
Subject: [PATCH 35/38] Explain some TODOs

---
 crates/store/re_chunk/src/transport.rs | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs
index 0dda84d32656..9cc3623329d5 100644
--- a/crates/store/re_chunk/src/transport.rs
+++ b/crates/store/re_chunk/src/transport.rs
@@ -100,10 +100,13 @@ impl Chunk {
                         archetype_field_name,
                         component_name,
 
-                        is_static: false, // TODO(#8744): figure out what to do here
-                        is_indicator: false, // TODO(#8744): figure out what to do here
-                        is_tombstone: false, // TODO(#8744): figure out what to do here
-                        is_semantically_empty: false, // TODO(#8744): figure out what to do here
+                        // These are a consequence of using `ComponentColumnDescriptor` both for chunk batches and dataframe batches.
+                        // Setting them all to `false` at least ensures they aren't written to the arrow metadata:
+                        // TODO(#8744): figure out what to do here
+                        is_static: false,
+                        is_indicator: false,
+                        is_tombstone: false,
+                        is_semantically_empty: false,
                     };
                     (schema, into_arrow_ref(list_array))
                 })

From 1c13ee4a2706e34d6375cddb53b23f087f4aa390 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Tue, 11 Feb 2025 14:14:21 +0100
Subject: [PATCH 36/38] Add a TODO

---
 crates/store/re_sorbet/src/chunk_batch.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index 14bafe07c046..f3943ab45329 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -299,6 +299,7 @@ fn make_all_data_columns_list_arrays(batch: &ArrowRecordBatch) -> ArrowRecordBat
                 sliced.push(data.clone().slice(idx, 1));
             }
 
+            // TODO(teh-cmc): we can do something faster/simpler here; see https://github.com/rerun-io/rerun/pull/8945#discussion_r1950689060
             let data_arrays = sliced.iter().map(|e| Some(e.as_ref())).collect::<Vec<_>>();
             #[allow(clippy::unwrap_used)] // we know we've given the right field type
             let list_array: arrow::array::ListArray =

From 163f7fb373da9c76507c2a5194471661747f84d3 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Tue, 11 Feb 2025 14:28:16 +0100
Subject: [PATCH 37/38] Clean up the list-array-wrapping code

---
 crates/store/re_sorbet/src/chunk_batch.rs | 85 +++++++++++------------
 1 file changed, 42 insertions(+), 43 deletions(-)

diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index f3943ab45329..05a322ba9481 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -2,8 +2,8 @@ use std::sync::Arc;
 
 use arrow::{
     array::{
-        Array as _, ArrayRef as ArrowArrayRef, AsArray, RecordBatch as ArrowRecordBatch,
-        RecordBatchOptions, StructArray as ArrowStructArray,
+        Array as ArrowArray, ArrayRef as ArrowArrayRef, AsArray, ListArray as ArrowListArray,
+        RecordBatch as ArrowRecordBatch, RecordBatchOptions, StructArray as ArrowStructArray,
     },
     datatypes::{
         DataType as ArrowDataType, Field as ArrowField, FieldRef as ArrowFieldRef,
@@ -269,51 +269,19 @@ fn make_all_data_columns_list_arrays(batch: &ArrowRecordBatch) -> ArrowRecordBat
     let mut fields: Vec<ArrowFieldRef> = Vec::with_capacity(num_columns);
     let mut columns: Vec<ArrowArrayRef> = Vec::with_capacity(num_columns);
 
-    for (field, data) in itertools::izip!(batch.schema().fields(), batch.columns()) {
-        if data
-            .downcast_array_ref::<arrow::array::ListArray>()
-            .is_some()
-        {
-            // Already fine
-            fields.push(field.clone());
-            columns.push(data.clone());
-        } else if field
+    for (field, array) in itertools::izip!(batch.schema().fields(), batch.columns()) {
+        let is_list_array = array.downcast_array_ref::<ArrowListArray>().is_some();
+        let is_data_column = field
             .metadata()
             .get("rerun.kind")
-            .is_some_and(|kind| kind == "data")
-        {
-            // We slice each column array into individual arrays and then convert the whole lot into a ListArray
-
-            let data_field_inner =
-                ArrowField::new("item", field.data_type().clone(), true /* nullable */);
-
-            let data_field = ArrowField::new(
-                field.name().clone(),
-                ArrowDataType::List(Arc::new(data_field_inner.clone())),
-                false, /* not nullable */
-            )
-            .with_metadata(field.metadata().clone());
-
-            let mut sliced: Vec<ArrowArrayRef> = Vec::new();
-            for idx in 0..data.len() {
-                sliced.push(data.clone().slice(idx, 1));
-            }
-
-            // TODO(teh-cmc): we can do something faster/simpler here; see https://github.com/rerun-io/rerun/pull/8945#discussion_r1950689060
-            let data_arrays = sliced.iter().map(|e| Some(e.as_ref())).collect::<Vec<_>>();
-            #[allow(clippy::unwrap_used)] // we know we've given the right field type
-            let list_array: arrow::array::ListArray =
-                re_arrow_util::arrow_util::arrays_to_list_array(
-                    data_field_inner.data_type().clone(),
-                    &data_arrays,
-                )
-                .unwrap();
-
-            fields.push(data_field.into());
-            columns.push(into_arrow_ref(list_array));
+            .is_some_and(|kind| kind == "data");
+        if is_data_column && !is_list_array {
+            let (field, array) = wrap_in_list_array(field, array);
+            fields.push(field.into());
+            columns.push(into_arrow_ref(array));
         } else {
             fields.push(field.clone());
-            columns.push(data.clone());
+            columns.push(array.clone());
         }
     }
 
@@ -326,3 +294,34 @@ fn make_all_data_columns_list_arrays(batch: &ArrowRecordBatch) -> ArrowRecordBat
     )
     .expect("Can't fail")
 }
+
+// TODO(teh-cmc): we can do something faster/simpler here; see https://github.com/rerun-io/rerun/pull/8945#discussion_r1950689060
+fn wrap_in_list_array(field: &ArrowField, data: &dyn ArrowArray) -> (ArrowField, ArrowListArray) {
+    re_tracing::profile_function!();
+
+    // We slice each column array into individual arrays and then convert the whole lot into a ListArray
+
+    let data_field_inner =
+        ArrowField::new("item", field.data_type().clone(), true /* nullable */);
+
+    let data_field = ArrowField::new(
+        field.name().clone(),
+        ArrowDataType::List(Arc::new(data_field_inner.clone())),
+        false, /* not nullable */
+    )
+    .with_metadata(field.metadata().clone());
+
+    let mut sliced: Vec<ArrowArrayRef> = Vec::new();
+    for idx in 0..data.len() {
+        sliced.push(data.slice(idx, 1));
+    }
+
+    let data_arrays = sliced.iter().map(|e| Some(e.as_ref())).collect::<Vec<_>>();
+    #[allow(clippy::unwrap_used)] // we know we've given the right field type
+    let list_array: ArrowListArray = re_arrow_util::arrow_util::arrays_to_list_array(
+        data_field_inner.data_type().clone(),
+        &data_arrays,
+    )
+    .unwrap();
+    (data_field, list_array)
+}

From 8282fc56be042d50dd048421418efca396d87270 Mon Sep 17 00:00:00 2001
From: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
Date: Tue, 11 Feb 2025 14:33:45 +0100
Subject: [PATCH 38/38] Fix TODO-formatting

---
 crates/store/re_sorbet/src/chunk_batch.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/store/re_sorbet/src/chunk_batch.rs b/crates/store/re_sorbet/src/chunk_batch.rs
index 05a322ba9481..47a8feb71d3b 100644
--- a/crates/store/re_sorbet/src/chunk_batch.rs
+++ b/crates/store/re_sorbet/src/chunk_batch.rs
@@ -295,7 +295,7 @@ fn make_all_data_columns_list_arrays(batch: &ArrowRecordBatch) -> ArrowRecordBat
     .expect("Can't fail")
 }
 
-// TODO(teh-cmc): we can do something faster/simpler here; see https://github.com/rerun-io/rerun/pull/8945#discussion_r1950689060
+// TODO(cmc): we can do something faster/simpler here; see https://github.com/rerun-io/rerun/pull/8945#discussion_r1950689060
 fn wrap_in_list_array(field: &ArrowField, data: &dyn ArrowArray) -> (ArrowField, ArrowListArray) {
     re_tracing::profile_function!();