From 0d6a74c640f96f8efe27c66dada34ae0af96fe6f Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Mon, 11 Dec 2023 19:29:38 +0100 Subject: [PATCH 01/13] Switch to the new entity path format --- crates/re_data_store/src/lib.rs | 2 +- .../benches/msg_encode_benchmark.rs | 8 +- crates/re_log_types/src/index.rs | 138 ------- crates/re_log_types/src/lib.rs | 2 - crates/re_log_types/src/path/entity_path.rs | 17 +- crates/re_log_types/src/path/mod.rs | 90 +++-- crates/re_log_types/src/path/parse_path.rs | 341 ++++++------------ crates/re_query/benches/query_benchmark.rs | 6 +- crates/re_viewer_context/src/blueprint_id.rs | 5 +- .../src/space_view_entity_picker.rs | 2 +- docs/content/concepts/entity-component.md | 2 + docs/content/concepts/entity-path.md | 27 +- .../content/concepts/spaces-and-transforms.md | 14 +- 13 files changed, 231 insertions(+), 423 deletions(-) delete mode 100644 crates/re_log_types/src/index.rs diff --git a/crates/re_data_store/src/lib.rs b/crates/re_data_store/src/lib.rs index 8afcb48e50e0..9d13882580bf 100644 --- a/crates/re_data_store/src/lib.rs +++ b/crates/re_data_store/src/lib.rs @@ -28,7 +28,7 @@ pub use self::versioned_instance_path::{VersionedInstancePath, VersionedInstance pub(crate) use self::entity_tree::{ClearCascade, CompactedStoreEvents}; use re_log_types::DataTableError; -pub use re_log_types::{EntityPath, EntityPathPart, Index, TimeInt, Timeline}; +pub use re_log_types::{EntityPath, EntityPathPart, TimeInt, Timeline}; #[cfg(feature = "serde")] pub use blueprint::EntityPropertiesComponent; diff --git a/crates/re_log_encoding/benches/msg_encode_benchmark.rs b/crates/re_log_encoding/benches/msg_encode_benchmark.rs index e2e093ecfb8f..0a617069bd12 100644 --- a/crates/re_log_encoding/benches/msg_encode_benchmark.rs +++ b/crates/re_log_encoding/benches/msg_encode_benchmark.rs @@ -5,8 +5,8 @@ compile_error!("msg_encode_benchmark requires 'decoder' and 'encoder' features." static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; use re_log_types::{ - entity_path, DataRow, DataTable, Index, LogMsg, RowId, StoreId, StoreKind, TableId, TimeInt, - TimeType, Timeline, + entity_path, DataRow, DataTable, LogMsg, RowId, StoreId, StoreKind, TableId, TimeInt, TimeType, + Timeline, }; use re_types::datagen::{build_some_colors, build_some_positions2d}; @@ -73,7 +73,7 @@ fn mono_points_arrow(c: &mut Criterion) { TableId::ZERO, [DataRow::from_cells2( RowId::ZERO, - entity_path!("points", Index::Sequence(i as _)), + entity_path!("points", i.to_string()), [build_frame_nr(0.into())], 1, (build_some_positions2d(1), build_some_colors(1)), @@ -131,7 +131,7 @@ fn mono_points_arrow_batched(c: &mut Criterion) { (0..NUM_POINTS).map(|i| { DataRow::from_cells2( RowId::ZERO, - entity_path!("points", Index::Sequence(i as _)), + entity_path!("points", i.to_string()), [build_frame_nr(0.into())], 1, (build_some_positions2d(1), build_some_colors(1)), diff --git a/crates/re_log_types/src/index.rs b/crates/re_log_types/src/index.rs deleted file mode 100644 index 5cf7a2d7340d..000000000000 --- a/crates/re_log_types/src/index.rs +++ /dev/null @@ -1,138 +0,0 @@ -use crate::hash::Hash128; - -// ---------------------------------------------------------------------------- - -/// The key of a table, or an array index. -/// -/// This is a variant of [`EntityPathPart`][crate::EntityPathPart] which makes up [`EntityPath`][crate::EntityPath]. -#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -pub enum Index { - /// For arrays, assumed to be dense (0, 1, 2, …). - Sequence(u64), - - /// Any integer, e.g. a hash or an arbitrary identifier. - Integer(i128), - - /// UUID/GUID - Uuid(uuid::Uuid), - - /// Anything goes. - String(String), -} - -#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -pub enum BatchIndex { - /// Many batches use implicit sequential-indexing - SequentialIndex(usize), - - /// Some batches want to provide explicit indices - FullIndex(Vec), -} - -impl BatchIndex { - pub fn len(&self) -> usize { - match &self { - BatchIndex::SequentialIndex(sz) => *sz, - BatchIndex::FullIndex(vec) => vec.len(), - } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } -} - -impl Index { - #[inline] - pub fn hash(&self) -> IndexHash { - IndexHash::hash(self) - } -} - -impl std::fmt::Display for Index { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Sequence(seq) => format!("#{seq}").fmt(f), - Self::Integer(value) => value.fmt(f), - Self::Uuid(value) => value.fmt(f), - Self::String(value) => format!("{value:?}").fmt(f), // put it in quotes - } - } -} - -crate::impl_into_enum!(String, Index, String); - -impl From<&str> for Index { - #[inline] - fn from(s: &str) -> Self { - Self::String(s.into()) - } -} - -// ---------------------------------------------------------------------------- - -/// A 128 bit hash of [`Index`] with negligible risk of collision. -#[derive(Copy, Clone, Eq)] -pub struct IndexHash(Hash128); - -impl IndexHash { - pub const NONE: IndexHash = Self(Hash128::ZERO); - - #[inline] - pub fn hash(index: &Index) -> Self { - Self(Hash128::hash(index)) - } - - #[inline] - pub fn is_none(&self) -> bool { - self.0 == Hash128::ZERO - } - - #[inline] - pub fn is_some(&self) -> bool { - self.0 != Hash128::ZERO - } - - #[inline] - pub fn hash64(&self) -> u64 { - self.0.hash64() - } - - #[inline] - pub fn first64(&self) -> u64 { - self.0.first64() - } - - #[inline] - pub fn second64(&self) -> u64 { - self.0.second64() - } -} - -impl std::hash::Hash for IndexHash { - #[inline] - fn hash(&self, state: &mut H) { - state.write_u64(self.0.hash64()); - } -} - -impl std::cmp::PartialEq for IndexHash { - #[inline] - fn eq(&self, other: &Self) -> bool { - self.0 == other.0 - } -} - -impl nohash_hasher::IsEnabled for IndexHash {} - -impl std::fmt::Debug for IndexHash { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(&format!( - "IndexHash({:016X}{:016X})", - self.0.first64(), - self.0.second64() - )) - } -} diff --git a/crates/re_log_types/src/lib.rs b/crates/re_log_types/src/lib.rs index 5f15bc123e40..329a4683fd2d 100644 --- a/crates/re_log_types/src/lib.rs +++ b/crates/re_log_types/src/lib.rs @@ -24,7 +24,6 @@ mod data_table; #[cfg(feature = "testing")] pub mod example_components; pub mod hash; -mod index; mod num_instances; pub mod path; mod time; @@ -49,7 +48,6 @@ pub use self::data_table::{ ErasedTimeVec, NumInstancesVec, RowIdVec, TableId, TimePointVec, METADATA_KIND, METADATA_KIND_CONTROL, METADATA_KIND_DATA, }; -pub use self::index::*; pub use self::num_instances::NumInstances; pub use self::path::*; pub use self::time::{Duration, Time, TimeZone}; diff --git a/crates/re_log_types/src/path/entity_path.rs b/crates/re_log_types/src/path/entity_path.rs index cebff10ba8bb..61612bd731c9 100644 --- a/crates/re_log_types/src/path/entity_path.rs +++ b/crates/re_log_types/src/path/entity_path.rs @@ -82,14 +82,14 @@ impl std::fmt::Debug for EntityPathHash { /// Implements [`nohash_hasher::IsEnabled`]. /// /// ``` -/// # use re_log_types::{EntityPath, EntityPathPart, Index}; +/// # use re_log_types::{EntityPath, EntityPathPart}; /// assert_eq!( -/// EntityPath::parse_strict(r#"camera/"ACME Örnöga"/points/#42"#).unwrap(), +/// EntityPath::parse_strict(r#"camera/ACME\ Örnöga/points/42"#).unwrap(), /// EntityPath::new(vec![ -/// EntityPathPart::Name("camera".into()), -/// EntityPathPart::Index(Index::String("ACME Örnöga".into())), -/// EntityPathPart::Name("points".into()), -/// EntityPathPart::Index(Index::Sequence(42)) +/// "camera".into(), +/// "ACME Örnöga".into(), +/// "points".into(), +/// "42".into(), /// ]) /// ); /// ``` @@ -124,8 +124,8 @@ impl EntityPath { } /// Treat the string as one opaque string, NOT splitting on any slashes. - pub fn from_single_string(string: String) -> Self { - Self::new(vec![EntityPathPart::Index(crate::Index::String(string))]) + pub fn from_single_string(string: impl Into) -> Self { + Self::new(vec![EntityPathPart::new(string)]) } #[inline] @@ -133,6 +133,7 @@ impl EntityPath { self.path.iter() } + #[inline] pub fn last(&self) -> Option<&EntityPathPart> { self.path.last() } diff --git a/crates/re_log_types/src/path/mod.rs b/crates/re_log_types/src/path/mod.rs index 226ca5ad9dee..ed1df3be5df7 100644 --- a/crates/re_log_types/src/path/mod.rs +++ b/crates/re_log_types/src/path/mod.rs @@ -20,52 +20,96 @@ pub use entity_path_expr::EntityPathExpr; pub use entity_path_impl::EntityPathImpl; pub use parse_path::PathParseError; -use re_string_interner::InternedString; - -use crate::Index; - // ---------------------------------------------------------------------------- /// The different parts that make up an [`EntityPath`]. -#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +/// +/// In the file system analogy, this is the name of a folder. +#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] // TODO(#4464): ordering #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -pub enum EntityPathPart { - /// Corresponds to the name of a struct field. - /// - /// Names must match the regex: `[a-zA-z0-9_-]+` - Name(InternedString), - - /// Array/table/map member. - Index(Index), +pub struct EntityPathPart( + // TODO(emilk): consider other string types; e.g. interned strings, `Arc`, … + String, +); + +impl EntityPathPart { + #[inline] + pub fn new(string: impl Into) -> Self { + Self(string.into()) + } + + #[inline] + pub fn as_str(&self) -> &str { + &self.0 + } } impl std::fmt::Display for EntityPathPart { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Name(name) => f.write_str(name), - Self::Index(index) => index.fmt(f), + let mut is_first_character = true; + + for c in self.0.chars() { + let print_as_is = if is_first_character { + // Escape punctutation if it is the first character, + // so that we can use `-` for negation, and `.` to mean the current entity. + c.is_alphanumeric() + } else { + c.is_alphanumeric() || matches!(c, '_' | '-' | '.') + }; + + if print_as_is { + c.fmt(f)?; + } else { + match c { + '\n' => "\\n".fmt(f), + '\r' => "\\r".fmt(f), + '\t' => "\\t".fmt(f), + c if c.is_ascii_punctuation() || c == ' ' || c.is_alphanumeric() => { + write!(f, "\\{c}") + } + c => write!(f, "\\u{{{:x}}}", c as u32), + }?; + } + + is_first_character = false; } + Ok(()) } } impl From<&str> for EntityPathPart { #[inline] fn from(part: &str) -> Self { - Self::Name(part.into()) + Self(part.into()) } } impl From for EntityPathPart { #[inline] fn from(part: String) -> Self { - Self::Name(part.into()) + Self(part) + } +} + +impl AsRef for EntityPathPart { + #[inline] + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl std::borrow::Borrow for EntityPathPart { + #[inline] + fn borrow(&self) -> &str { + self.as_str() } } -impl From for EntityPathPart { +impl std::ops::Deref for EntityPathPart { + type Target = str; #[inline] - fn from(part: Index) -> Self { - Self::Index(part) + fn deref(&self) -> &str { + self.as_str() } } @@ -74,7 +118,7 @@ impl From for EntityPathPart { /// Build a `Vec`: /// ``` /// # use re_log_types::*; -/// let parts: Vec = entity_path_vec!("foo", Index::Sequence(123)); +/// let parts: Vec = entity_path_vec!("foo", "bar"); /// ``` #[macro_export] macro_rules! entity_path_vec { @@ -90,7 +134,7 @@ macro_rules! entity_path_vec { /// Build a `EntityPath`: /// ``` /// # use re_log_types::*; -/// let path: EntityPath = entity_path!("foo", Index::Sequence(123)); +/// let path: EntityPath = entity_path!("foo", "bar"); /// ``` #[macro_export] macro_rules! entity_path { diff --git a/crates/re_log_types/src/path/parse_path.rs b/crates/re_log_types/src/path/parse_path.rs index 95a6d7304fae..31737357390e 100644 --- a/crates/re_log_types/src/path/parse_path.rs +++ b/crates/re_log_types/src/path/parse_path.rs @@ -2,7 +2,7 @@ use std::str::FromStr; use re_types_core::{components::InstanceKey, ComponentName}; -use crate::{ComponentPath, DataPath, EntityPath, EntityPathPart, Index}; +use crate::{ComponentPath, DataPath, EntityPath, EntityPathPart}; #[derive(thiserror::Error, Debug, PartialEq, Eq)] pub enum PathParseError { @@ -15,18 +15,9 @@ pub enum PathParseError { #[error("Path had leading slash")] LeadingSlash, - #[error("Missing closing quote (\")")] - UnterminatedString, - - #[error("Bad escape sequence: {details}")] - BadEscape { details: &'static str }, - #[error("Double-slashes with no part between")] DoubleSlash, - #[error("Invalid sequence: {0:?} (expected positive integer)")] - InvalidSequence(String), - #[error("Missing slash (/)")] MissingSlash, @@ -36,9 +27,6 @@ pub enum PathParseError { #[error("Empty part")] EmptyPart, - #[error("Invalid character: {character:?} in entity path identifier {part:?}. Only ASCII characters, numbers, underscore, and dash are allowed. To put arbitrary text in an entity path, surround it with double-quotes. See https://www.rerun.io/docs/concepts/entity-path for more.")] - InvalidCharacterInPart { part: String, character: char }, - #[error("Invalid instance key: {0:?} (expected '[#1234]')")] BadInstanceKey(String), @@ -53,6 +41,16 @@ pub enum PathParseError { #[error("Found trailing colon (:)")] TrailingColon, + + // Escaping: + #[error("Unknown escape sequence: \\{0}")] + UnknownEscapeSequence(char), + + #[error("String ended in backslash")] + TrailingBackslash, + + #[error("{0:?} needs to be escaped as `\\{0}`")] + MissingEscape(char), } type Result = std::result::Result; @@ -71,9 +69,7 @@ impl std::str::FromStr for DataPath { return Err(PathParseError::EmptyString); } - // Start by looking for a component - - let mut tokens = tokenize(path)?; + let mut tokens = tokenize_data_path(path); let mut component_name = None; let mut instance_key = None; @@ -119,14 +115,7 @@ impl std::str::FromStr for DataPath { // The remaining tokens should all be separated with `/`: - let parts = entity_path_parts_from_tokens(&tokens)?; - - // Validate names: - for part in &parts { - if let EntityPathPart::Name(name) = part { - validate_name(name)?; - } - } + let parts = entity_path_parts_from_tokens_strict(&tokens)?; let entity_path = EntityPath::from(parts); @@ -154,12 +143,12 @@ impl EntityPath { /// Parses anything that `ent_path.to_string()` outputs. /// /// For a forgiving parse that accepts anything, use [`Self::parse_forgiving`]. - pub fn parse_strict(s: &str) -> Result { + pub fn parse_strict(input: &str) -> Result { let DataPath { entity_path, instance_key, component_name, - } = DataPath::from_str(s)?; + } = DataPath::from_str(input)?; if let Some(instance_key) = instance_key { return Err(PathParseError::UnexpectedInstanceKey(instance_key)); @@ -167,31 +156,28 @@ impl EntityPath { if let Some(component_name) = component_name { return Err(PathParseError::UnexpectedComponentName(component_name)); } + + let normalized = entity_path.to_string(); + if normalized != input { + re_log::warn_once!("Found an entity path '{input}' that was not in the normalized form. Please write it as '{normalized}' instead. See https://www.rerun.io/docs/concepts/entity-path for more"); + } + Ok(entity_path) } - /// Parses an entity path, handling any malformed input with a logged warning. + /// Parses an entity path, ha ndling any malformed input with a logged warning. /// /// For a strict parses, use [`Self::parse_strict`] instead. - pub fn parse_forgiving(s: &str) -> Self { - let mut parts = parse_entity_path_forgiving(s); - - for part in &mut parts { - if let EntityPathPart::Name(name) = part { - if validate_name(name).is_err() { - // Quote this, e.g. `foo/invalid.name` -> `foo/"invalid.name"` - *part = EntityPathPart::Index(Index::String(name.to_string())); - } - } - } - - let path = EntityPath::from(parts); + pub fn parse_forgiving(input: &str) -> Self { + let parts = parse_entity_path_forgiving(input); + let entity_path = EntityPath::from(parts); - if path.to_string() != s { - re_log::warn_once!("Found an entity path '{s}' that was not in the normalized form. Please write it as '{path}' instead. Only ASCII characters, numbers, underscore, and dash are allowed in identifiers. To put arbitrary text in an entity path, surround it with double-quotes. See https://www.rerun.io/docs/concepts/entity-path for more"); + let normalized = entity_path.to_string(); + if normalized != input { + re_log::warn_once!("Found an entity path '{input}' that was not in the normalized form. Please write it as '{normalized}' instead. See https://www.rerun.io/docs/concepts/entity-path for more"); } - path + entity_path } } @@ -223,57 +209,16 @@ impl FromStr for ComponentPath { /// A very forgiving parsing of the given entity path. /// /// Things like `foo/Hallå Där!` will be accepted, and transformed into -/// the path `foo/"Hallå Där!"`. +/// the path `foo/Hallå\ Där\!`. fn parse_entity_path_forgiving(path: &str) -> Vec { - #![allow(clippy::unwrap_used)] - // We parse on bytes, and take care to only split on either side of a one-byte ASCII character, - // making the `from_utf8(…).unwrap()`s below safe. - let mut bytes = path.as_bytes(); - - let mut parts = vec![]; - - while let Some(c) = bytes.first() { - if *c == b'/' { - bytes = &bytes[1..]; // skip the / - } else if *c == b'"' { - // Look for the terminating quote ignoring escaped quotes (\"): - let mut i = 1; - loop { - if i == bytes.len() { - // Unterminated string - let's do our best: - let unescaped = - unescape_string_forgiving(std::str::from_utf8(&bytes[1..i]).unwrap()); - - parts.push(EntityPathPart::Index(Index::String(unescaped))); - bytes = &bytes[i..]; - break; - } else if bytes[i] == b'\\' && i + 1 < bytes.len() { - i += 2; // consume escape and what was escaped - } else if bytes[i] == b'"' { - break; - } else { - i += 1; - } - } - - let unescaped = unescape_string_forgiving(std::str::from_utf8(&bytes[1..i]).unwrap()); - parts.push(EntityPathPart::Index(Index::String(unescaped))); - bytes = &bytes[i + 1..]; // skip the closing quote - } else { - let end = bytes.iter().position(|&b| b == b'/').unwrap_or(bytes.len()); - parts.push(parse_part(std::str::from_utf8(&bytes[0..end]).unwrap())); - if end == bytes.len() { - break; - } else { - bytes = &bytes[end + 1..]; // skip the / - } - } - } - - parts + tokenize_entity_path(path) + .into_iter() + .filter(|&part| part != "/") // ignore duplicate slashes + .map(parse_part_forgiving) + .collect() } -fn entity_path_parts_from_tokens(mut tokens: &[&str]) -> Result> { +fn entity_path_parts_from_tokens_strict(mut tokens: &[&str]) -> Result> { if tokens.is_empty() { return Err(PathParseError::MissingPath); } @@ -294,13 +239,8 @@ fn entity_path_parts_from_tokens(mut tokens: &[&str]) -> Result String { s } -fn tokenize(path: &str) -> Result> { +/// "/foo/bar" -> ["/", "foo", "/", "bar"] +fn tokenize_entity_path(path: &str) -> Vec<&str> { + tokenize_by(path, &[b'/']) +} + +/// "/foo/bar[#42]:Color" -> ["/", "foo", "/", "bar", "[", "#42:", "]", ":", "Color"] +fn tokenize_data_path(path: &str) -> Vec<&str> { + tokenize_by(path, &[b'/', b'[', b']', b':']) +} + +fn tokenize_by<'s>(path: &'s str, special_chars: &[u8]) -> Vec<&'s str> { #![allow(clippy::unwrap_used)] + // We parse on bytes, and take care to only split on either side of a one-byte ASCII, // making the `from_utf8(…)`s below safe to unwrap. let mut bytes = path.as_bytes(); - fn is_special_character(c: u8) -> bool { - matches!(c, b'[' | b']' | b':' | b'/') - } - let mut tokens = vec![]; - while let Some(c) = bytes.first() { - if *c == b'"' { - // Look for the terminating quote ignoring escaped quotes (\"): - let mut i = 1; - loop { - if i == bytes.len() { - return Err(PathParseError::UnterminatedString); - } else if bytes[i] == b'\\' && i + 1 < bytes.len() { - i += 2; // consume escape and what was escaped - } else if bytes[i] == b'"' { - break; - } else { - i += 1; - } - } - - let token = &bytes[..i + 1]; // Include the closing quote - tokens.push(token); - bytes = &bytes[i + 1..]; // skip the closing quote - } else if is_special_character(*c) { - tokens.push(&bytes[..1]); - bytes = &bytes[1..]; - } else { - let mut i = 0; - while i < bytes.len() { - if bytes[i] == b'"' || is_special_character(bytes[i]) { - break; - } - i += 1; + while !bytes.is_empty() { + let mut i = 0; + let mut is_in_escape = false; + while i < bytes.len() { + if !is_in_escape && special_chars.contains(&bytes[i]) { + break; } - assert!(0 < i); - tokens.push(&bytes[..i]); - bytes = &bytes[i..]; + is_in_escape = bytes[i] == b'\\'; + i += 1; + } + if i == 0 { + // The first character was a special character, so we need to put it in its own token: + i = 1; } + tokens.push(&bytes[..i]); + bytes = &bytes[i..]; } // Safety: we split at proper character boundaries - Ok(tokens + tokens .iter() .map(|token| std::str::from_utf8(token).unwrap()) - .collect()) -} - -fn parse_part(s: &str) -> EntityPathPart { - use std::str::FromStr as _; - - if let Some(s) = s.strip_prefix('#') { - if let Ok(sequence) = u64::from_str(s) { - return EntityPathPart::Index(Index::Sequence(sequence)); - } - } - - if let Ok(integer) = i128::from_str(s) { - EntityPathPart::Index(Index::Integer(integer)) - } else if let Ok(uuid) = uuid::Uuid::parse_str(s) { - EntityPathPart::Index(Index::Uuid(uuid)) - } else { - EntityPathPart::Name(s.into()) - } + .collect() } -fn validate_name(name: &str) -> Result<()> { - if name.is_empty() { - return Err(PathParseError::EmptyPart); - } - - if name.starts_with('#') { - return Err(PathParseError::InvalidSequence(name.to_owned())); - } - - for c in name.chars() { - if !c.is_ascii_alphanumeric() && c != '_' && c != '-' { - return Err(PathParseError::InvalidCharacterInPart { - part: name.to_owned(), - character: c, - }); - } - } - - Ok(()) -} - -fn unescape_string(input: &str) -> Result { +/// Unescape the string +fn parse_part_forgiving(input: &str) -> EntityPathPart { let mut output = String::with_capacity(input.len()); let mut chars = input.chars(); while let Some(c) = chars.next() { @@ -432,36 +323,60 @@ fn unescape_string(input: &str) -> Result { 'n' => '\n', 'r' => '\r', 't' => '\t', - '\"' | '\\' => c, - _ => { - return Err("Unknown escape sequence (\\)"); - } + _ => c, }); } else { - return Err("Trailing escape (\\)"); + // Trailing escape: treat it as a (escaped) backslash + output.push('\\'); } } else { output.push(c); } } - Ok(output) + EntityPathPart::from(output) } -fn unescape_string_forgiving(input: &str) -> String { - match unescape_string(input) { - Ok(s) => s, - Err(err) => { - re_log::warn_once!("Bad escape sequence in entity path string {input:?}: {err}"); - input.to_owned() +/// Unescape the string +fn parse_part_strict(input: &str) -> Result { + let mut output = String::with_capacity(input.len()); + let mut chars = input.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + if let Some(c) = chars.next() { + output.push(match c { + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + c if c.is_ascii_alphanumeric() => { + return Err(PathParseError::UnknownEscapeSequence(c)) + } + c => c, + }); + } else { + return Err(PathParseError::TrailingBackslash); + } + } else if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') { + output.push(c); + } else { + return Err(PathParseError::MissingEscape(c)); } } + Ok(EntityPathPart::from(output)) } #[test] fn test_unescape_string() { - let input = r#"Hello \"World\" / \\ \n\r\t"#; - let unescaped = unescape_string(input).unwrap(); - assert_eq!(unescaped, "Hello \"World\" / \\ \n\r\t"); + for (input, expected) in [ + (r"Hello\ world!", "Hello world!"), + (r"Hello\", "Hello\\"), + ( + r#"Hello \"World\" / \\ \n\r\t"#, + "Hello \"World\" / \\ \n\r\t", + ), + ] { + let unescaped = parse_part_forgiving(input); + assert_eq!(unescaped.as_str(), expected); + } } #[test] @@ -481,24 +396,20 @@ fn test_parse_entity_path_forgiving() { assert_eq!(parse("foo"), entity_path_vec!("foo")); assert_eq!(parse("foo/bar"), entity_path_vec!("foo", "bar")); assert_eq!( - parse(r#"foo/"bar"/#123/-1234/6d046bf4-e5d3-4599-9153-85dd97218cb3"#), - entity_path_vec!( - "foo", - Index::String("bar".into()), - Index::Sequence(123), - Index::Integer(-1234), - Index::Uuid(uuid::Uuid::parse_str("6d046bf4-e5d3-4599-9153-85dd97218cb3").unwrap()) - ) + parse(r#"foo/bar :/."#), + entity_path_vec!("foo", "bar :", ".",) ); + assert_eq!(parse("hallådär"), entity_path_vec!("hallådär")); assert_eq!(normalize(""), "/"); assert_eq!(normalize("/"), "/"); assert_eq!(normalize("//"), "/"); assert_eq!(normalize("/foo/bar/"), "foo/bar"); - assert_eq!(normalize("foo/bar.baz"), r#"foo/"bar.baz""#); - assert_eq!(normalize("foo/#42"), "foo/#42"); - assert_eq!(normalize("foo/#bar/baz"), r##"foo/"#bar"/baz"##); - assert_eq!(normalize("foo/Hallå Där!"), r#"foo/"Hallå Där!""#); + assert_eq!(normalize("/foo///bar//"), "foo/bar"); + assert_eq!(normalize("foo/bar:baz"), r#"foo/bar\:baz"#); + assert_eq!(normalize("foo/42"), "foo/42"); + assert_eq!(normalize("foo/#bar/baz"), r##"foo/\#bar/baz"##); + assert_eq!(normalize("foo/Hallå Där!"), r#"foo/Hallå\ Där\!"#); } #[test] @@ -515,20 +426,6 @@ fn test_parse_entity_path_strict() { assert_eq!(parse("/foo"), Err(PathParseError::LeadingSlash)); assert_eq!(parse("foo/bar"), Ok(entity_path_vec!("foo", "bar"))); assert_eq!(parse("foo//bar"), Err(PathParseError::DoubleSlash)); - assert_eq!( - parse(r#"foo/"bar"/#123/-1234/6d046bf4-e5d3-4599-9153-85dd97218cb3"#), - Ok(entity_path_vec!( - "foo", - Index::String("bar".into()), - Index::Sequence(123), - Index::Integer(-1234), - Index::Uuid(uuid::Uuid::parse_str("6d046bf4-e5d3-4599-9153-85dd97218cb3").unwrap()) - )) - ); - assert_eq!( - parse(r#"foo/"bar""baz""#), - Err(PathParseError::MissingSlash) - ); assert_eq!(parse("foo/bar/"), Err(PathParseError::TrailingSlash)); assert!(matches!( @@ -539,6 +436,8 @@ fn test_parse_entity_path_strict() { parse(r#"entity[#123]"#), Err(PathParseError::UnexpectedInstanceKey(InstanceKey(123))) )); + + assert_eq!(parse("hallådär"), Ok(entity_path_vec!("hallådär"))); } #[test] @@ -616,12 +515,10 @@ fn test_parse_data_path() { // Check that we catch invalid characters in identifiers/names: assert!(matches!( DataPath::from_str(r#"hello there"#), - Err(PathParseError::InvalidCharacterInPart { .. }) - )); - assert!(matches!( - DataPath::from_str(r#"hallådär"#), - Err(PathParseError::InvalidCharacterInPart { .. }) + Err(PathParseError::MissingEscape(' ')) )); assert!(DataPath::from_str(r#"hello_there"#).is_ok()); assert!(DataPath::from_str(r#"hello-there"#).is_ok()); + assert!(DataPath::from_str(r#"hello.there"#).is_ok()); + assert!(DataPath::from_str(r#"hallådär"#).is_ok()); } diff --git a/crates/re_query/benches/query_benchmark.rs b/crates/re_query/benches/query_benchmark.rs index bb01848f9ac9..3b6d3f893830 100644 --- a/crates/re_query/benches/query_benchmark.rs +++ b/crates/re_query/benches/query_benchmark.rs @@ -5,7 +5,7 @@ use criterion::{criterion_group, criterion_main, Criterion}; use itertools::Itertools; use re_arrow_store::{DataStore, LatestAtQuery}; -use re_log_types::{entity_path, DataRow, EntityPath, Index, RowId, TimeInt, TimeType, Timeline}; +use re_log_types::{entity_path, DataRow, EntityPath, RowId, TimeInt, TimeType, Timeline}; use re_query::query_archetype; use re_types::{ archetypes::Points2D, @@ -84,7 +84,7 @@ pub fn build_some_strings(len: usize) -> Vec { fn mono_points(c: &mut Criterion) { // Each mono point gets logged at a different path let paths = (0..NUM_POINTS) - .map(move |point_idx| entity_path!("points", Index::Sequence(point_idx as _))) + .map(move |point_idx| entity_path!("points", point_idx.to_string())) .collect_vec(); let msgs = build_points_rows(&paths, 1); @@ -113,7 +113,7 @@ fn mono_points(c: &mut Criterion) { fn mono_strings(c: &mut Criterion) { // Each mono string gets logged at a different path let paths = (0..NUM_STRINGS) - .map(move |string_idx| entity_path!("strings", Index::Sequence(string_idx as _))) + .map(move |string_idx| entity_path!("strings", string_idx.to_string())) .collect_vec(); let msgs = build_strings_rows(&paths, 1); diff --git a/crates/re_viewer_context/src/blueprint_id.rs b/crates/re_viewer_context/src/blueprint_id.rs index 718f2a215188..9b79e882f989 100644 --- a/crates/re_viewer_context/src/blueprint_id.rs +++ b/crates/re_viewer_context/src/blueprint_id.rs @@ -1,7 +1,8 @@ use std::hash::BuildHasher; use once_cell::sync::Lazy; -use re_log_types::{EntityPath, EntityPathPart, Index}; + +use re_log_types::{EntityPath, EntityPathPart}; pub trait BlueprintIdRegistry { fn registry() -> &'static EntityPath; @@ -79,7 +80,7 @@ impl BlueprintId { T::registry() .iter() .cloned() - .chain(std::iter::once(EntityPathPart::Index(Index::Uuid(self.id)))) + .chain(std::iter::once(EntityPathPart::new(self.id.to_string()))) .collect() } diff --git a/crates/re_viewport/src/space_view_entity_picker.rs b/crates/re_viewport/src/space_view_entity_picker.rs index 2c6adb715c34..03b076be9d9b 100644 --- a/crates/re_viewport/src/space_view_entity_picker.rs +++ b/crates/re_viewport/src/space_view_entity_picker.rs @@ -174,7 +174,7 @@ fn add_entities_tree_ui( add_entities_tree_ui( ctx, ui, - &path_comp.to_string(), + path_comp, child_tree, space_view, query_result, diff --git a/docs/content/concepts/entity-component.md b/docs/content/concepts/entity-component.md index cc2703fe565f..ede3a1c7e691 100644 --- a/docs/content/concepts/entity-component.md +++ b/docs/content/concepts/entity-component.md @@ -14,6 +14,8 @@ data associated with those entities. * *Components*, however, are what contains the data that is associated with those "things". For example, position, color, pixel data, etc. +Entities are like folders, and components are like files. + Additionally, the Rerun SDKs expose two additional concepts: * *Archetypes* are coherent set of components corresponding to primitive such as 2D points or 3D boxes. In the Rerun SDKs, archetypes take the form of builder objects that assist with the creation of such component sets. They are meant as high-level, convenience helpers that can be bypassed entirely if/when required by advanced use-cases. * *Datatypes* are regular data structures that components occasionally rely on when fundamental data types (`float`, `uint32`, etc.) are not sufficient. diff --git a/docs/content/concepts/entity-path.md b/docs/content/concepts/entity-path.md index 000acf157198..ac299b3235a9 100644 --- a/docs/content/concepts/entity-path.md +++ b/docs/content/concepts/entity-path.md @@ -17,32 +17,35 @@ elements. The conventional path semantics including concepts of *root* and *pare When writing paths in logging APIs the leading `/` is omitted. -Note that there is no path-level distinction between "file-like" and "directory-like" concepts. Any path may be an -entity, and entities may be direct children of other entities. For example (this uses the Python SDK but the same applies for all supported languages): +In the file path analogy, each entity is a folder, and a component is a file. +This implies that any entity in a hierarchy can contain components. + +For example (this uses the Python SDK but the same applies for all supported languages): + ```python rr.log("image", rr.Image(img)) rr.log("image/points", rr.Points2D(points)) ``` -However, it is also acceptable to leave implicitly "empty" elements in your paths as well. +It is also acceptable to leave implicitly "empty" entities in your paths as well. ```python rr.log("camera/image", rr.Image(img)) rr.log("camera/image/detections/points", rr.Points2D(points)) ``` + Nothing needs to be explicitly logged to `"camera"` or `"camera/image/detection"` to make the above valid. +In other words, the `log` call is akin to creating a folder with `mkdir -p` and then writing files (components) to it. +Existing components of the same name will be overwritten. ### Path parts -A path can look like this: `camera/"Left"/detection/#42/bbox`. Each part (between the slashes) can either be: - -* An identifier (e.g. `camera`), intended for hard-coded names. Only ASCII characters, numbers, underscore, and dash are allowed in identifiers (`[a-zA-Z0-9_-]+`). -* A `"quoted string"`, intended for arbitrary strings, like file names and serials numbers. -* An integer, intended for hashes or similar. -* A number sequence, prefixed by `#`, intended for indices. -* A UUID. +Each "part" of a path must be a non-empty string. Any character is allower, but special characters need to be escaped using `\`. +Characters that need NOT be escaped are letters, numbers, and underscore, dash, and dot (`_`, `-`, `.`). +Any other character should be escaped, including symbols (`\:`, `\$`, …) and whitespace (`\ `, `\n`, `\t`, …). -So for instance, `foo/bar/#42/5678/"CA426571"/a6a5e96c-fd52-4d21-a394-ffbb6e5def1d` is a valid path. +So for instance, `world/3d/My\ Image.jpg/detection` is a valid path (note the escaped space!). +⚠️ NOTE: even though entity paths are somewhat analogous to file paths, they are NOT the same. `..` does not mean "parent folder", and you are NOT intended to pass a file path as an entity path (especially not on Windows, which use `\` as a path separator). ### Path Hierarchy Functions Path hierarchy plays an important role in a number of different functions within Rerun: @@ -57,5 +60,5 @@ the relationship between that entity and its direct parent. ### Reserved Paths -The path prefix "rerun/" is considered reserved for use by the Rerun SDK itself and should not be used for logging +The path prefix `rerun/` is considered reserved for use by the Rerun SDK itself and should not be used for logging user data. This is where Rerun will log additional information such as warnings. diff --git a/docs/content/concepts/spaces-and-transforms.md b/docs/content/concepts/spaces-and-transforms.md index e37376dd0b4a..5cb64b34111e 100644 --- a/docs/content/concepts/spaces-and-transforms.md +++ b/docs/content/concepts/spaces-and-transforms.md @@ -90,19 +90,19 @@ Say you have a 3D world with two cameras with known extrinsics (pose) and intrin rr.log("world/points", rr.Points3D(…)) # Log first camera: -rr.log("world/camera/#0", rr.TranslationAndMat3(translation=cam0_pose.pos, matrix=cam0_pose.rot)) -rr.log("world/camera/#0/image", rr.Pinhole(…)) +rr.log("world/camera/0", rr.TranslationAndMat3(translation=cam0_pose.pos, matrix=cam0_pose.rot)) +rr.log("world/camera/0/image", rr.Pinhole(…)) # Log second camera: -rr.log("world/camera/#1", rr.TranslationAndMat3(translation=cam1_pose.pos, matrix=cam1_pose.rot)) -rr.log("world/camera/#1/image", rr.Pinhole(…)) +rr.log("world/camera/1", rr.TranslationAndMat3(translation=cam1_pose.pos, matrix=cam1_pose.rot)) +rr.log("world/camera/1/image", rr.Pinhole(…)) # Log some data to the image spaces of the first camera: -rr.log("world/camera/#0/image", rr.Image(…)) -rr.log("world/camera/#0/image/detection", rr.Boxes2D(…)) +rr.log("world/camera/0/image", rr.Image(…)) +rr.log("world/camera/0/image/detection", rr.Boxes2D(…)) ``` -Rerun will from this understand how the `world` space and the two image spaces (`world/camera/#0/image` and `world/camera/#1/image`) relate to each other, which allows you to explore their relationship in the Rerun Viewer. In the 3D view you will see the two cameras show up with their respective camera frustums (based on the intrinsics). If you hover your mouse in one of the image spaces, a corresponding ray will be shot through the 3D space. +Rerun will from this understand how the `world` space and the two image spaces (`world/camera/0/image` and `world/camera/1/image`) relate to each other, which allows you to explore their relationship in the Rerun Viewer. In the 3D view you will see the two cameras show up with their respective camera frustums (based on the intrinsics). If you hover your mouse in one of the image spaces, a corresponding ray will be shot through the 3D space. Note that none of the names in the paths are special. From 5f6281fb5bd1c130150587071f0ad59576499887 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Mon, 11 Dec 2023 20:15:21 +0100 Subject: [PATCH 02/13] Implement natural ordering of entity paths --- Cargo.lock | 7 +++ Cargo.toml | 1 + crates/re_log_types/Cargo.toml | 1 + crates/re_log_types/src/path/mod.rs | 19 +++++- .../re_log_types/src/path/natural_ordering.rs | 63 +++++++++++++++++++ 5 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 crates/re_log_types/src/path/natural_ordering.rs diff --git a/Cargo.lock b/Cargo.lock index 436f1e62c8dd..88f2adf07fb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3332,6 +3332,12 @@ dependencies = [ "getrandom", ] +[[package]] +name = "natord" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308d96db8debc727c3fd9744aac51751243420e46edf401010908da7f8d5e57c" + [[package]] name = "ndarray" version = "0.15.6" @@ -4593,6 +4599,7 @@ dependencies = [ "half 2.3.1", "itertools 0.11.0", "mimalloc", + "natord", "nohash-hasher", "num-derive", "num-traits", diff --git a/Cargo.toml b/Cargo.toml index a0aa614c79d9..7e7cacd1399d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -143,6 +143,7 @@ memory-stats = "1.1" mimalloc = "0.1.29" mime = "0.3" mime_guess = "2.0" +natord = "1.0.9" ndarray = "0.15" ndarray-rand = "0.14" never = "0.1" diff --git a/crates/re_log_types/Cargo.toml b/crates/re_log_types/Cargo.toml index 3f8b908b28dc..1ad6c3d28c1e 100644 --- a/crates/re_log_types/Cargo.toml +++ b/crates/re_log_types/Cargo.toml @@ -56,6 +56,7 @@ fixed = { version = "1.17", default-features = false, features = ["serde"] } # we keep it as a direct dependency to ensure it stays pinned on `2.2.1` half.workspace = true itertools.workspace = true +natord.workspace = true nohash-hasher.workspace = true num-derive.workspace = true num-traits.workspace = true diff --git a/crates/re_log_types/src/path/mod.rs b/crates/re_log_types/src/path/mod.rs index ed1df3be5df7..b1996bbc3616 100644 --- a/crates/re_log_types/src/path/mod.rs +++ b/crates/re_log_types/src/path/mod.rs @@ -11,6 +11,7 @@ mod data_path; mod entity_path; mod entity_path_expr; mod entity_path_impl; +mod natural_ordering; mod parse_path; pub use component_path::ComponentPath; @@ -25,7 +26,7 @@ pub use parse_path::PathParseError; /// The different parts that make up an [`EntityPath`]. /// /// In the file system analogy, this is the name of a folder. -#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] // TODO(#4464): ordering +#[derive(Clone, Debug, Hash, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct EntityPathPart( // TODO(emilk): consider other string types; e.g. interned strings, `Arc`, … @@ -107,12 +108,28 @@ impl std::borrow::Borrow for EntityPathPart { impl std::ops::Deref for EntityPathPart { type Target = str; + #[inline] fn deref(&self) -> &str { self.as_str() } } +impl std::cmp::Ord for EntityPathPart { + #[inline] + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // Use natural ordering of strings, so that "image2" comes before "image10". + natural_ordering::compare(self.as_str(), other.as_str()) + } +} + +impl std::cmp::PartialOrd for EntityPathPart { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + // ---------------------------------------------------------------------------- /// Build a `Vec`: diff --git a/crates/re_log_types/src/path/natural_ordering.rs b/crates/re_log_types/src/path/natural_ordering.rs new file mode 100644 index 000000000000..f1880dedf96d --- /dev/null +++ b/crates/re_log_types/src/path/natural_ordering.rs @@ -0,0 +1,63 @@ +//! Implement natural ordering for strings, so that "file5" < "file10". +//! +//! Crates considered: +//! * `human-sort`: - overflows on large integers +//! * `lexical-sort`: - comes with a huge unicode->ascii table +//! * `natord`: - the one we're using + +use std::cmp::Ordering; + +/// Natural ordering for strings, so that "file5" < "file10". +pub fn compare(a: &str, b: &str) -> Ordering { + natord::compare_iter( + a.chars(), + b.chars(), + |_| false, + |&l, &r| compare_chars(l, r), + |&c| c.to_digit(10).map(|v| v as isize), + ) +} + +// Ignore case when ordering, so that `a < B < b < c` +fn compare_chars(a: char, b: char) -> Ordering { + let al = a.to_ascii_lowercase(); + let bl = b.to_ascii_lowercase(); + + if al == bl { + a.cmp(&b) + } else { + al.cmp(&bl) + } +} + +#[test] +fn test_natural_ordering() { + fn check_total_order(strs: &[&str]) { + fn ordering_str(ord: Ordering) -> &'static str { + match ord { + Ordering::Greater => ">", + Ordering::Equal => "=", + Ordering::Less => "<", + } + } + + for (i, &x) in strs.iter().enumerate() { + for (j, &y) in strs.iter().enumerate() { + assert!( + compare(x, y) == i.cmp(&j), + "Got {x:?} {} {y:?}; expected {x:?} {} {y:?}", + ordering_str(i.cmp(&j)), + ordering_str(compare(x, y)) + ); + } + } + } + + check_total_order(&["10", "a", "aa", "b", "c"]); + check_total_order(&["a", "a0", "a1", "a1a", "a1b", "a2", "a10", "a20"]); + check_total_order(&["1.001", "1.002", "1.010", "1.02", "1.1", "1.3"]); + check_total_order(&["a 2", "a2"]); + check_total_order(&["a", "B", "b", "c"]); + assert!(compare("a", "a") == Ordering::Equal); + assert!(compare("a", "A") != Ordering::Equal); +} From 2389aed246f429f7279bdc59a46726366b96b3c9 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Mon, 11 Dec 2023 21:11:02 +0100 Subject: [PATCH 03/13] Use `Display` format for `Debug` entity paths too --- crates/re_log_types/src/path/entity_path_impl.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/re_log_types/src/path/entity_path_impl.rs b/crates/re_log_types/src/path/entity_path_impl.rs index d07eefde4af8..afbf69e4ac6d 100644 --- a/crates/re_log_types/src/path/entity_path_impl.rs +++ b/crates/re_log_types/src/path/entity_path_impl.rs @@ -83,9 +83,8 @@ where impl std::fmt::Debug for EntityPathImpl { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // Show the nicely formatted entity path, but surround with quotes and escape - // troublesome characters such as back-slashes and newlines. - write!(f, "{:?}", self.to_string()) + // Like `Display`, but with quotes + write!(f, "'{self}'") } } From 720162365426c37947ba23dbdf37a4da6f76684e Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 12 Dec 2023 13:10:18 +0100 Subject: [PATCH 04/13] Improve the docstring of `EntityPath` --- crates/re_log_types/src/path/entity_path.rs | 19 +++++++++---------- .../re_log_types/src/path/entity_path_impl.rs | 2 +- crates/re_log_types/src/path/mod.rs | 2 ++ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/crates/re_log_types/src/path/entity_path.rs b/crates/re_log_types/src/path/entity_path.rs index 61612bd731c9..b21275a4e87f 100644 --- a/crates/re_log_types/src/path/entity_path.rs +++ b/crates/re_log_types/src/path/entity_path.rs @@ -67,22 +67,21 @@ impl std::fmt::Debug for EntityPathHash { // ---------------------------------------------------------------------------- -/// The unique identifier of an entity, e.g. `camera/"ACME Örnöga"/points` +/// The unique identifier of an entity, e.g. `camera/3/points` /// /// The entity path is a list of [parts][EntityPathPart] separated by slashes. +/// Each part is a non-empty string, that can contain any character. +/// When written as a string, some characters in the parts need to be escaped with a `\` +/// (only character, numbers, `.`, `-`, `_` does not need escaping). /// -/// Each part is either a [_name_][EntityPathPart::Name] of a limited set of characters, -/// or an [`Index`][crate::Index]. -/// Names are like idenitifers in code, and must match the regex: `[a-zA-z0-9_-]+` -/// Indices are like array indices or keys in a map or table, and can be any string, -/// uuid, or number. +/// See for more on entity paths. /// -/// Reference-counted internally, so this is cheap to clone. -/// -/// Implements [`nohash_hasher::IsEnabled`]. +/// `EntityPath` is reference-counted internally, so it is cheap to clone. +/// It also has a precomputed hash and implemented [`nohash_hasher::IsEnabled`], +/// so it is very cheap to use in a [`nohash_hasher::IntMap`] and [`nohash_hasher::IntSet`]. /// /// ``` -/// # use re_log_types::{EntityPath, EntityPathPart}; +/// # use re_log_types::EntityPath; /// assert_eq!( /// EntityPath::parse_strict(r#"camera/ACME\ Örnöga/points/42"#).unwrap(), /// EntityPath::new(vec![ diff --git a/crates/re_log_types/src/path/entity_path_impl.rs b/crates/re_log_types/src/path/entity_path_impl.rs index afbf69e4ac6d..1ee1cb608877 100644 --- a/crates/re_log_types/src/path/entity_path_impl.rs +++ b/crates/re_log_types/src/path/entity_path_impl.rs @@ -1,6 +1,6 @@ use crate::path::EntityPathPart; -/// `camera / "left" / points / #42` +/// `camera/left/points/42` /// /// Wrapped by [`crate::EntityPath`] together with a hash. #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] diff --git a/crates/re_log_types/src/path/mod.rs b/crates/re_log_types/src/path/mod.rs index b1996bbc3616..8bb7fbf96d0d 100644 --- a/crates/re_log_types/src/path/mod.rs +++ b/crates/re_log_types/src/path/mod.rs @@ -25,6 +25,8 @@ pub use parse_path::PathParseError; /// The different parts that make up an [`EntityPath`]. /// +/// A non-empty string. +/// /// In the file system analogy, this is the name of a folder. #[derive(Clone, Debug, Hash, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] From 9fe5d0b6ed54de23e9e203d0f4053acb569bcada Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 12 Dec 2023 16:44:28 +0100 Subject: [PATCH 05/13] Better EntityPathPart code --- crates/re_log_types/src/path/entity_path.rs | 2 + crates/re_log_types/src/path/mod.rs | 132 +++++++++++------- crates/re_log_types/src/path/parse_path.rs | 41 +----- .../src/space_view_entity_picker.rs | 2 +- 4 files changed, 87 insertions(+), 90 deletions(-) diff --git a/crates/re_log_types/src/path/entity_path.rs b/crates/re_log_types/src/path/entity_path.rs index b21275a4e87f..edc036e1773c 100644 --- a/crates/re_log_types/src/path/entity_path.rs +++ b/crates/re_log_types/src/path/entity_path.rs @@ -123,6 +123,8 @@ impl EntityPath { } /// Treat the string as one opaque string, NOT splitting on any slashes. + /// + /// The given string is expected to be unescaped, i.e. any `\` is treated as a normal character. pub fn from_single_string(string: impl Into) -> Self { Self::new(vec![EntityPathPart::new(string)]) } diff --git a/crates/re_log_types/src/path/mod.rs b/crates/re_log_types/src/path/mod.rs index 8bb7fbf96d0d..ff864ccdb525 100644 --- a/crates/re_log_types/src/path/mod.rs +++ b/crates/re_log_types/src/path/mod.rs @@ -27,6 +27,12 @@ pub use parse_path::PathParseError; /// /// A non-empty string. /// +/// Note that the contents of the string is NOT escaped, +/// so escaping needs to be done when printing this +/// (done by the `Display` impl). +/// +/// Because of this, `EntityPathPart` does NOT implement `AsRef` etc. +/// /// In the file system analogy, this is the name of a folder. #[derive(Clone, Debug, Hash, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] @@ -36,47 +42,83 @@ pub struct EntityPathPart( ); impl EntityPathPart { + /// The given string is expected to be unescaped, i.e. any `\` is treated as a normal character. #[inline] - pub fn new(string: impl Into) -> Self { - Self(string.into()) + pub fn new(unescaped_string: impl Into) -> Self { + Self(unescaped_string.into()) } + /// The unescaped string. + /// + /// Use [`Self::escaped_string`] or `to_string` to escape it. #[inline] - pub fn as_str(&self) -> &str { + pub fn unescaped_str(&self) -> &str { &self.0 } -} - -impl std::fmt::Display for EntityPathPart { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut is_first_character = true; + #[inline] + pub fn escaped_string(&self) -> String { + let mut s = String::with_capacity(self.0.len()); for c in self.0.chars() { - let print_as_is = if is_first_character { - // Escape punctutation if it is the first character, - // so that we can use `-` for negation, and `.` to mean the current entity. - c.is_alphanumeric() - } else { - c.is_alphanumeric() || matches!(c, '_' | '-' | '.') - }; + // Note: we print all unicode character (e.g. `åäö`) as is. + let print_as_is = c.is_alphanumeric() || matches!(c, '_' | '-' | '.'); if print_as_is { - c.fmt(f)?; + s.push(c); } else { match c { - '\n' => "\\n".fmt(f), - '\r' => "\\r".fmt(f), - '\t' => "\\t".fmt(f), - c if c.is_ascii_punctuation() || c == ' ' || c.is_alphanumeric() => { - write!(f, "\\{c}") + '\n' => { + s.push_str("\\n"); + } + '\r' => { + s.push_str("\\r"); + } + '\t' => { + s.push_str("\\t"); } - c => write!(f, "\\u{{{:x}}}", c as u32), - }?; + c if c.is_ascii_punctuation() || c == ' ' => { + s.push('\\'); + s.push(c); + } + c => { + // Rust-style unicode escape, e.g. `\u{2009}`. + s.push_str(&format!("\\u{{{:x}}}", c as u32)); + } + }; } + } + s + } - is_first_character = false; + /// Unescape the string, forgiving any syntax error with a best-effort approach. + pub fn parse_forgiving(input: &str) -> Self { + let mut output = String::with_capacity(input.len()); + let mut chars = input.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + if let Some(c) = chars.next() { + output.push(match c { + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + _ => c, + }); + } else { + // Trailing escape: treat it as a (escaped) backslash + output.push('\\'); + } + } else { + output.push(c); + } } - Ok(()) + + Self::new(output) + } +} + +impl std::fmt::Display for EntityPathPart { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.escaped_string().fmt(f) } } @@ -94,34 +136,11 @@ impl From for EntityPathPart { } } -impl AsRef for EntityPathPart { - #[inline] - fn as_ref(&self) -> &str { - self.as_str() - } -} - -impl std::borrow::Borrow for EntityPathPart { - #[inline] - fn borrow(&self) -> &str { - self.as_str() - } -} - -impl std::ops::Deref for EntityPathPart { - type Target = str; - - #[inline] - fn deref(&self) -> &str { - self.as_str() - } -} - impl std::cmp::Ord for EntityPathPart { #[inline] fn cmp(&self, other: &Self) -> std::cmp::Ordering { // Use natural ordering of strings, so that "image2" comes before "image10". - natural_ordering::compare(self.as_str(), other.as_str()) + natural_ordering::compare(self.unescaped_str(), other.unescaped_str()) } } @@ -172,4 +191,19 @@ mod tests { assert_eq!(entity_path_vec!(), vec![]); assert_eq!(entity_path!(), EntityPath::from(vec![])); } + + #[test] + fn test_unescape_string() { + for (input, expected) in [ + (r"Hello\ world!", "Hello world!"), + (r"Hello\", "Hello\\"), + ( + r#"Hello \"World\" / \\ \n\r\t"#, + "Hello \"World\" / \\ \n\r\t", + ), + ] { + let part = EntityPathPart::parse_forgiving(input); + assert_eq!(part.unescaped_str(), expected); + } + } } diff --git a/crates/re_log_types/src/path/parse_path.rs b/crates/re_log_types/src/path/parse_path.rs index 31737357390e..373290f766ce 100644 --- a/crates/re_log_types/src/path/parse_path.rs +++ b/crates/re_log_types/src/path/parse_path.rs @@ -214,7 +214,7 @@ fn parse_entity_path_forgiving(path: &str) -> Vec { tokenize_entity_path(path) .into_iter() .filter(|&part| part != "/") // ignore duplicate slashes - .map(parse_part_forgiving) + .map(EntityPathPart::parse_forgiving) .collect() } @@ -312,30 +312,6 @@ fn tokenize_by<'s>(path: &'s str, special_chars: &[u8]) -> Vec<&'s str> { .collect() } -/// Unescape the string -fn parse_part_forgiving(input: &str) -> EntityPathPart { - let mut output = String::with_capacity(input.len()); - let mut chars = input.chars(); - while let Some(c) = chars.next() { - if c == '\\' { - if let Some(c) = chars.next() { - output.push(match c { - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - _ => c, - }); - } else { - // Trailing escape: treat it as a (escaped) backslash - output.push('\\'); - } - } else { - output.push(c); - } - } - EntityPathPart::from(output) -} - /// Unescape the string fn parse_part_strict(input: &str) -> Result { let mut output = String::with_capacity(input.len()); @@ -364,21 +340,6 @@ fn parse_part_strict(input: &str) -> Result { Ok(EntityPathPart::from(output)) } -#[test] -fn test_unescape_string() { - for (input, expected) in [ - (r"Hello\ world!", "Hello world!"), - (r"Hello\", "Hello\\"), - ( - r#"Hello \"World\" / \\ \n\r\t"#, - "Hello \"World\" / \\ \n\r\t", - ), - ] { - let unescaped = parse_part_forgiving(input); - assert_eq!(unescaped.as_str(), expected); - } -} - #[test] fn test_parse_entity_path_forgiving() { use crate::entity_path_vec; diff --git a/crates/re_viewport/src/space_view_entity_picker.rs b/crates/re_viewport/src/space_view_entity_picker.rs index 03b076be9d9b..2c6adb715c34 100644 --- a/crates/re_viewport/src/space_view_entity_picker.rs +++ b/crates/re_viewport/src/space_view_entity_picker.rs @@ -174,7 +174,7 @@ fn add_entities_tree_ui( add_entities_tree_ui( ctx, ui, - path_comp, + &path_comp.to_string(), child_tree, space_view, query_result, From 4ebb86735a9d91c3062ee16ec72be607b4af9aa6 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 12 Dec 2023 16:45:30 +0100 Subject: [PATCH 06/13] Move EntityPathPart to own file --- .../re_log_types/src/path/entity_path_part.rs | 142 +++++++++++++++++ crates/re_log_types/src/path/mod.rs | 147 +----------------- 2 files changed, 144 insertions(+), 145 deletions(-) create mode 100644 crates/re_log_types/src/path/entity_path_part.rs diff --git a/crates/re_log_types/src/path/entity_path_part.rs b/crates/re_log_types/src/path/entity_path_part.rs new file mode 100644 index 000000000000..30edcf120605 --- /dev/null +++ b/crates/re_log_types/src/path/entity_path_part.rs @@ -0,0 +1,142 @@ +/// The different parts that make up an [`EntityPath`]. +/// +/// A non-empty string. +/// +/// Note that the contents of the string is NOT escaped, +/// so escaping needs to be done when printing this +/// (done by the `Display` impl). +/// +/// Because of this, `EntityPathPart` does NOT implement `AsRef` etc. +/// +/// In the file system analogy, this is the name of a folder. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct EntityPathPart( + // TODO(emilk): consider other string types; e.g. interned strings, `Arc`, … + String, +); + +impl EntityPathPart { + /// The given string is expected to be unescaped, i.e. any `\` is treated as a normal character. + #[inline] + pub fn new(unescaped_string: impl Into) -> Self { + Self(unescaped_string.into()) + } + + /// The unescaped string. + /// + /// Use [`Self::escaped_string`] or `to_string` to escape it. + #[inline] + pub fn unescaped_str(&self) -> &str { + &self.0 + } + + #[inline] + pub fn escaped_string(&self) -> String { + let mut s = String::with_capacity(self.0.len()); + for c in self.0.chars() { + // Note: we print all unicode character (e.g. `åäö`) as is. + let print_as_is = c.is_alphanumeric() || matches!(c, '_' | '-' | '.'); + + if print_as_is { + s.push(c); + } else { + match c { + '\n' => { + s.push_str("\\n"); + } + '\r' => { + s.push_str("\\r"); + } + '\t' => { + s.push_str("\\t"); + } + c if c.is_ascii_punctuation() || c == ' ' => { + s.push('\\'); + s.push(c); + } + c => { + // Rust-style unicode escape, e.g. `\u{2009}`. + s.push_str(&format!("\\u{{{:x}}}", c as u32)); + } + }; + } + } + s + } + + /// Unescape the string, forgiving any syntax error with a best-effort approach. + pub fn parse_forgiving(input: &str) -> Self { + let mut output = String::with_capacity(input.len()); + let mut chars = input.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + if let Some(c) = chars.next() { + output.push(match c { + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + _ => c, + }); + } else { + // Trailing escape: treat it as a (escaped) backslash + output.push('\\'); + } + } else { + output.push(c); + } + } + + Self::new(output) + } +} + +impl std::fmt::Display for EntityPathPart { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.escaped_string().fmt(f) + } +} + +impl From<&str> for EntityPathPart { + #[inline] + fn from(part: &str) -> Self { + Self(part.into()) + } +} + +impl From for EntityPathPart { + #[inline] + fn from(part: String) -> Self { + Self(part) + } +} + +impl std::cmp::Ord for EntityPathPart { + #[inline] + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + // Use natural ordering of strings, so that "image2" comes before "image10". + super::natural_ordering::compare(self.unescaped_str(), other.unescaped_str()) + } +} + +impl std::cmp::PartialOrd for EntityPathPart { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +#[test] +fn test_unescape_string() { + for (input, expected) in [ + (r"Hello\ world!", "Hello world!"), + (r"Hello\", "Hello\\"), + ( + r#"Hello \"World\" / \\ \n\r\t"#, + "Hello \"World\" / \\ \n\r\t", + ), + ] { + let part = EntityPathPart::parse_forgiving(input); + assert_eq!(part.unescaped_str(), expected); + } +} diff --git a/crates/re_log_types/src/path/mod.rs b/crates/re_log_types/src/path/mod.rs index ff864ccdb525..9b9b7db9c140 100644 --- a/crates/re_log_types/src/path/mod.rs +++ b/crates/re_log_types/src/path/mod.rs @@ -11,6 +11,7 @@ mod data_path; mod entity_path; mod entity_path_expr; mod entity_path_impl; +mod entity_path_part; mod natural_ordering; mod parse_path; @@ -19,140 +20,11 @@ pub use data_path::DataPath; pub use entity_path::{EntityPath, EntityPathHash}; pub use entity_path_expr::EntityPathExpr; pub use entity_path_impl::EntityPathImpl; +pub use entity_path_part::EntityPathPart; pub use parse_path::PathParseError; // ---------------------------------------------------------------------------- -/// The different parts that make up an [`EntityPath`]. -/// -/// A non-empty string. -/// -/// Note that the contents of the string is NOT escaped, -/// so escaping needs to be done when printing this -/// (done by the `Display` impl). -/// -/// Because of this, `EntityPathPart` does NOT implement `AsRef` etc. -/// -/// In the file system analogy, this is the name of a folder. -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -pub struct EntityPathPart( - // TODO(emilk): consider other string types; e.g. interned strings, `Arc`, … - String, -); - -impl EntityPathPart { - /// The given string is expected to be unescaped, i.e. any `\` is treated as a normal character. - #[inline] - pub fn new(unescaped_string: impl Into) -> Self { - Self(unescaped_string.into()) - } - - /// The unescaped string. - /// - /// Use [`Self::escaped_string`] or `to_string` to escape it. - #[inline] - pub fn unescaped_str(&self) -> &str { - &self.0 - } - - #[inline] - pub fn escaped_string(&self) -> String { - let mut s = String::with_capacity(self.0.len()); - for c in self.0.chars() { - // Note: we print all unicode character (e.g. `åäö`) as is. - let print_as_is = c.is_alphanumeric() || matches!(c, '_' | '-' | '.'); - - if print_as_is { - s.push(c); - } else { - match c { - '\n' => { - s.push_str("\\n"); - } - '\r' => { - s.push_str("\\r"); - } - '\t' => { - s.push_str("\\t"); - } - c if c.is_ascii_punctuation() || c == ' ' => { - s.push('\\'); - s.push(c); - } - c => { - // Rust-style unicode escape, e.g. `\u{2009}`. - s.push_str(&format!("\\u{{{:x}}}", c as u32)); - } - }; - } - } - s - } - - /// Unescape the string, forgiving any syntax error with a best-effort approach. - pub fn parse_forgiving(input: &str) -> Self { - let mut output = String::with_capacity(input.len()); - let mut chars = input.chars(); - while let Some(c) = chars.next() { - if c == '\\' { - if let Some(c) = chars.next() { - output.push(match c { - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - _ => c, - }); - } else { - // Trailing escape: treat it as a (escaped) backslash - output.push('\\'); - } - } else { - output.push(c); - } - } - - Self::new(output) - } -} - -impl std::fmt::Display for EntityPathPart { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.escaped_string().fmt(f) - } -} - -impl From<&str> for EntityPathPart { - #[inline] - fn from(part: &str) -> Self { - Self(part.into()) - } -} - -impl From for EntityPathPart { - #[inline] - fn from(part: String) -> Self { - Self(part) - } -} - -impl std::cmp::Ord for EntityPathPart { - #[inline] - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - // Use natural ordering of strings, so that "image2" comes before "image10". - natural_ordering::compare(self.unescaped_str(), other.unescaped_str()) - } -} - -impl std::cmp::PartialOrd for EntityPathPart { - #[inline] - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -// ---------------------------------------------------------------------------- - /// Build a `Vec`: /// ``` /// # use re_log_types::*; @@ -191,19 +63,4 @@ mod tests { assert_eq!(entity_path_vec!(), vec![]); assert_eq!(entity_path!(), EntityPath::from(vec![])); } - - #[test] - fn test_unescape_string() { - for (input, expected) in [ - (r"Hello\ world!", "Hello world!"), - (r"Hello\", "Hello\\"), - ( - r#"Hello \"World\" / \\ \n\r\t"#, - "Hello \"World\" / \\ \n\r\t", - ), - ] { - let part = EntityPathPart::parse_forgiving(input); - assert_eq!(part.unescaped_str(), expected); - } - } } From 33eef3ade0e62ddc16429ecd6596ac34d1c77746 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 12 Dec 2023 17:19:02 +0100 Subject: [PATCH 07/13] Support unicode sequences --- .../re_log_types/src/path/entity_path_part.rs | 165 +++++++++++++++--- crates/re_log_types/src/path/parse_path.rs | 33 +--- docs/content/concepts/entity-path.md | 2 + 3 files changed, 146 insertions(+), 54 deletions(-) diff --git a/crates/re_log_types/src/path/entity_path_part.rs b/crates/re_log_types/src/path/entity_path_part.rs index 30edcf120605..7ff0772c4f98 100644 --- a/crates/re_log_types/src/path/entity_path_part.rs +++ b/crates/re_log_types/src/path/entity_path_part.rs @@ -1,3 +1,5 @@ +use crate::PathParseError; + /// The different parts that make up an [`EntityPath`]. /// /// A non-empty string. @@ -23,6 +25,90 @@ impl EntityPathPart { Self(unescaped_string.into()) } + /// Unescape the string, forgiving any syntax error with a best-effort approach. + pub fn parse_forgiving(input: &str) -> Self { + let mut output = String::with_capacity(input.len()); + let mut chars = input.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + if let Some(c) = chars.next() { + match c { + 'n' => { + output.push('\n'); + } + 'r' => { + output.push('\r'); + } + 't' => { + output.push('\t'); + } + 'u' => { + match parse_unicode_escape(&mut chars) { + Ok(c) => { + output.push(c); + } + Err(s) => { + // Invalid unicode escape: treat it as a (escaped) backslash + output.push('\\'); + output.push('u'); + output.push_str(&s); + } + }; + } + _ => output.push(c), + } + } else { + // Trailing escape: treat it as a (escaped) backslash + output.push('\\'); + } + } else { + output.push(c); + } + } + + Self::new(output) + } + + /// Unescape the string, returning errors on wrongly escaped input. + pub fn parse_strict(input: &str) -> Result { + let mut output = String::with_capacity(input.len()); + let mut chars = input.chars(); + while let Some(c) = chars.next() { + if c == '\\' { + if let Some(c) = chars.next() { + match c { + 'n' => { + output.push('\n'); + } + 'r' => { + output.push('\r'); + } + 't' => { + output.push('\t'); + } + 'u' => match parse_unicode_escape(&mut chars) { + Ok(c) => { + output.push(c); + } + Err(s) => return Err(PathParseError::InvalidUnicodeEscape(s)), + }, + c if c.is_ascii_punctuation() || c == ' ' => { + output.push(c); + } + c => return Err(PathParseError::UnknownEscapeSequence(c)), + }; + } else { + return Err(PathParseError::TrailingBackslash); + } + } else if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') { + output.push(c); + } else { + return Err(PathParseError::MissingEscape(c)); + } + } + Ok(Self::from(output)) + } + /// The unescaped string. /// /// Use [`Self::escaped_string`] or `to_string` to escape it. @@ -56,7 +142,7 @@ impl EntityPathPart { s.push(c); } c => { - // Rust-style unicode escape, e.g. `\u{2009}`. + // Rust-style unicode escape, e.g. `\u{262E}`. s.push_str(&format!("\\u{{{:x}}}", c as u32)); } }; @@ -64,31 +150,38 @@ impl EntityPathPart { } s } +} - /// Unescape the string, forgiving any syntax error with a best-effort approach. - pub fn parse_forgiving(input: &str) -> Self { - let mut output = String::with_capacity(input.len()); - let mut chars = input.chars(); - while let Some(c) = chars.next() { - if c == '\\' { - if let Some(c) = chars.next() { - output.push(match c { - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - _ => c, - }); - } else { - // Trailing escape: treat it as a (escaped) backslash - output.push('\\'); - } - } else { - output.push(c); - } +/// Parses e.g. `{262E}`. +/// +/// Returns the consumed input characters on fail. +fn parse_unicode_escape(input: &mut impl Iterator) -> Result { + let mut all_chars = String::new(); + for c in input { + all_chars.push(c); + if c == '}' || all_chars.len() == 6 { + break; } + } - Self::new(output) + let chars = all_chars.as_str(); + + let Some(chars) = chars.strip_prefix('{') else { + return Err(all_chars); + }; + let Some(chars) = chars.strip_suffix('}') else { + return Err(all_chars); + }; + + if chars.len() != 4 { + return Err(all_chars); } + + let Ok(num) = u32::from_str_radix(chars, 16) else { + return Err(all_chars); + }; + + char::from_u32(num).ok_or(all_chars) } impl std::fmt::Display for EntityPathPart { @@ -128,12 +221,34 @@ impl std::cmp::PartialOrd for EntityPathPart { #[test] fn test_unescape_string() { + // strict: + for (input, expected) in [ + (r"Hallå", "Hallå"), + (r"Hall\u{00E5}\n\r\t", "Hallå\n\r\t"), + (r"Hello\ world\!", "Hello world!"), + ] { + let part = EntityPathPart::parse_strict(input).unwrap(); + assert_eq!(part.unescaped_str(), expected); + } + + assert_eq!( + EntityPathPart::parse_strict(r"\u{262E}"), + Ok(EntityPathPart::from("☮")) + ); + assert_eq!( + EntityPathPart::parse_strict(r"\u{apa}! :D") + .unwrap_err() + .to_string(), + r"Expected e.g. '\u{262E}', found: '\u{apa}'" + ); + + // forgiving: for (input, expected) in [ - (r"Hello\ world!", "Hello world!"), (r"Hello\", "Hello\\"), + (r"\u{apa}\u{262E}", r"\u{apa}☮"), ( - r#"Hello \"World\" / \\ \n\r\t"#, - "Hello \"World\" / \\ \n\r\t", + r#"Hello \"World\" / \\ \n\r\t \u{00E5}"#, + "Hello \"World\" / \\ \n\r\t å", ), ] { let part = EntityPathPart::parse_forgiving(input); diff --git a/crates/re_log_types/src/path/parse_path.rs b/crates/re_log_types/src/path/parse_path.rs index 373290f766ce..3232a879b49f 100644 --- a/crates/re_log_types/src/path/parse_path.rs +++ b/crates/re_log_types/src/path/parse_path.rs @@ -51,6 +51,9 @@ pub enum PathParseError { #[error("{0:?} needs to be escaped as `\\{0}`")] MissingEscape(char), + + #[error("Expected e.g. '\\u{{262E}}', found: '\\u{0}'")] + InvalidUnicodeEscape(String), } type Result = std::result::Result; @@ -240,7 +243,7 @@ fn entity_path_parts_from_tokens_strict(mut tokens: &[&str]) -> Result(path: &'s str, special_chars: &[u8]) -> Vec<&'s str> { .collect() } -/// Unescape the string -fn parse_part_strict(input: &str) -> Result { - let mut output = String::with_capacity(input.len()); - let mut chars = input.chars(); - while let Some(c) = chars.next() { - if c == '\\' { - if let Some(c) = chars.next() { - output.push(match c { - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - c if c.is_ascii_alphanumeric() => { - return Err(PathParseError::UnknownEscapeSequence(c)) - } - c => c, - }); - } else { - return Err(PathParseError::TrailingBackslash); - } - } else if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') { - output.push(c); - } else { - return Err(PathParseError::MissingEscape(c)); - } - } - Ok(EntityPathPart::from(output)) -} - #[test] fn test_parse_entity_path_forgiving() { use crate::entity_path_vec; diff --git a/docs/content/concepts/entity-path.md b/docs/content/concepts/entity-path.md index ac299b3235a9..0b621cb3f2df 100644 --- a/docs/content/concepts/entity-path.md +++ b/docs/content/concepts/entity-path.md @@ -43,6 +43,8 @@ Each "part" of a path must be a non-empty string. Any character is allower, but Characters that need NOT be escaped are letters, numbers, and underscore, dash, and dot (`_`, `-`, `.`). Any other character should be escaped, including symbols (`\:`, `\$`, …) and whitespace (`\ `, `\n`, `\t`, …). +You can an arbitrary unicode codepoint into an entity path using `\u{262E}`. + So for instance, `world/3d/My\ Image.jpg/detection` is a valid path (note the escaped space!). ⚠️ NOTE: even though entity paths are somewhat analogous to file paths, they are NOT the same. `..` does not mean "parent folder", and you are NOT intended to pass a file path as an entity path (especially not on Windows, which use `\` as a path separator). From 5bb79b792794c4dba7be8fbaec140cae4192395e Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 12 Dec 2023 17:40:51 +0100 Subject: [PATCH 08/13] Fix doclink --- crates/re_log_types/src/path/entity_path_part.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/re_log_types/src/path/entity_path_part.rs b/crates/re_log_types/src/path/entity_path_part.rs index 7ff0772c4f98..67d64559bf02 100644 --- a/crates/re_log_types/src/path/entity_path_part.rs +++ b/crates/re_log_types/src/path/entity_path_part.rs @@ -1,6 +1,6 @@ use crate::PathParseError; -/// The different parts that make up an [`EntityPath`]. +/// The different parts that make up an [`EntityPath`][crate::EntityPath]. /// /// A non-empty string. /// From ef404605d32072d71c0893918c4ef3b53d1f51fb Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 12 Dec 2023 17:56:49 +0100 Subject: [PATCH 09/13] Final tweaks --- .../re_log_types/src/path/entity_path_part.rs | 23 +++++++++++++++++-- crates/re_log_types/src/path/parse_path.rs | 6 ++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/crates/re_log_types/src/path/entity_path_part.rs b/crates/re_log_types/src/path/entity_path_part.rs index 67d64559bf02..dba1f0936f09 100644 --- a/crates/re_log_types/src/path/entity_path_part.rs +++ b/crates/re_log_types/src/path/entity_path_part.rs @@ -143,7 +143,7 @@ impl EntityPathPart { } c => { // Rust-style unicode escape, e.g. `\u{262E}`. - s.push_str(&format!("\\u{{{:x}}}", c as u32)); + s.push_str(&format!("\\u{{{:04X}}}", c as u32)); } }; } @@ -220,7 +220,7 @@ impl std::cmp::PartialOrd for EntityPathPart { } #[test] -fn test_unescape_string() { +fn test_parse_entity_path_part() { // strict: for (input, expected) in [ (r"Hallå", "Hallå"), @@ -242,6 +242,17 @@ fn test_unescape_string() { r"Expected e.g. '\u{262E}', found: '\u{apa}'" ); + assert_eq!( + EntityPathPart::parse_strict(r"\u{0001}") + .unwrap() + .unescaped_str(), + "\u{0001}" + ); + assert_eq!( + EntityPathPart::parse_forgiving("☮").escaped_string(), + r"\u{262E}" + ); + // forgiving: for (input, expected) in [ (r"Hello\", "Hello\\"), @@ -254,4 +265,12 @@ fn test_unescape_string() { let part = EntityPathPart::parse_forgiving(input); assert_eq!(part.unescaped_str(), expected); } + + // roundtripping: + for str in [r"\u{0001}", r"Hello\ world\!\ \u{262E}"] { + assert_eq!( + EntityPathPart::parse_strict(str).unwrap().escaped_string(), + str + ); + } } diff --git a/crates/re_log_types/src/path/parse_path.rs b/crates/re_log_types/src/path/parse_path.rs index 3232a879b49f..b4eb99a76085 100644 --- a/crates/re_log_types/src/path/parse_path.rs +++ b/crates/re_log_types/src/path/parse_path.rs @@ -162,13 +162,13 @@ impl EntityPath { let normalized = entity_path.to_string(); if normalized != input { - re_log::warn_once!("Found an entity path '{input}' that was not in the normalized form. Please write it as '{normalized}' instead. See https://www.rerun.io/docs/concepts/entity-path for more"); + re_log::warn_once!("The entity path '{input}' that was not in the normalized form. It will be interpreted as '{normalized}'. See https://www.rerun.io/docs/concepts/entity-path for more"); } Ok(entity_path) } - /// Parses an entity path, ha ndling any malformed input with a logged warning. + /// Parses an entity path, handling any malformed input with a logged warning. /// /// For a strict parses, use [`Self::parse_strict`] instead. pub fn parse_forgiving(input: &str) -> Self { @@ -177,7 +177,7 @@ impl EntityPath { let normalized = entity_path.to_string(); if normalized != input { - re_log::warn_once!("Found an entity path '{input}' that was not in the normalized form. Please write it as '{normalized}' instead. See https://www.rerun.io/docs/concepts/entity-path for more"); + re_log::warn_once!("The entity path '{input}' that was not in the normalized form. It will be interpreted as '{normalized}'. See https://www.rerun.io/docs/concepts/entity-path for more"); } entity_path From c0b60ec39ee7f46ccff05b386a0c0b38f5ce31c8 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 12 Dec 2023 17:59:21 +0100 Subject: [PATCH 10/13] Typos --- docs/content/concepts/entity-path.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/content/concepts/entity-path.md b/docs/content/concepts/entity-path.md index 0b621cb3f2df..9a5d9adcab25 100644 --- a/docs/content/concepts/entity-path.md +++ b/docs/content/concepts/entity-path.md @@ -39,11 +39,11 @@ Existing components of the same name will be overwritten. ### Path parts -Each "part" of a path must be a non-empty string. Any character is allower, but special characters need to be escaped using `\`. +Each "part" of a path must be a non-empty string. Any character is allowed, but special characters need to be escaped using `\`. Characters that need NOT be escaped are letters, numbers, and underscore, dash, and dot (`_`, `-`, `.`). Any other character should be escaped, including symbols (`\:`, `\$`, …) and whitespace (`\ `, `\n`, `\t`, …). -You can an arbitrary unicode codepoint into an entity path using `\u{262E}`. +You can an arbitrary unicode code point into an entity path using `\u{262E}`. So for instance, `world/3d/My\ Image.jpg/detection` is a valid path (note the escaped space!). From acae12c07ae8ada745d83562dbbb3e6452f18b91 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Wed, 13 Dec 2023 10:53:56 +0100 Subject: [PATCH 11/13] better wording Co-authored-by: Jeremy Leibs --- crates/re_log_types/src/path/parse_path.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/re_log_types/src/path/parse_path.rs b/crates/re_log_types/src/path/parse_path.rs index b4eb99a76085..b83c11490c50 100644 --- a/crates/re_log_types/src/path/parse_path.rs +++ b/crates/re_log_types/src/path/parse_path.rs @@ -162,7 +162,7 @@ impl EntityPath { let normalized = entity_path.to_string(); if normalized != input { - re_log::warn_once!("The entity path '{input}' that was not in the normalized form. It will be interpreted as '{normalized}'. See https://www.rerun.io/docs/concepts/entity-path for more"); + re_log::warn_once!("The entity path '{input}' was not in the normalized form. It will be interpreted as '{normalized}'. See https://www.rerun.io/docs/concepts/entity-path for more"); } Ok(entity_path) From d9da309b95ddac8d6185062609d998bc49d0f3f3 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Wed, 13 Dec 2023 10:54:01 +0100 Subject: [PATCH 12/13] better wording Co-authored-by: Jeremy Leibs --- crates/re_log_types/src/path/parse_path.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/re_log_types/src/path/parse_path.rs b/crates/re_log_types/src/path/parse_path.rs index b83c11490c50..27a42898f29a 100644 --- a/crates/re_log_types/src/path/parse_path.rs +++ b/crates/re_log_types/src/path/parse_path.rs @@ -177,7 +177,7 @@ impl EntityPath { let normalized = entity_path.to_string(); if normalized != input { - re_log::warn_once!("The entity path '{input}' that was not in the normalized form. It will be interpreted as '{normalized}'. See https://www.rerun.io/docs/concepts/entity-path for more"); + re_log::warn_once!("The entity path '{input}' was not in the normalized form. It will be interpreted as '{normalized}'. See https://www.rerun.io/docs/concepts/entity-path for more"); } entity_path From 2c7017ea73627cfec74d4aedb0aaa680344624d8 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Wed, 13 Dec 2023 11:22:36 +0100 Subject: [PATCH 13/13] Fix a few more doclinks --- crates/re_log_types/src/path/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/re_log_types/src/path/mod.rs b/crates/re_log_types/src/path/mod.rs index 9b9b7db9c140..6256fdedbc52 100644 --- a/crates/re_log_types/src/path/mod.rs +++ b/crates/re_log_types/src/path/mod.rs @@ -1,10 +1,7 @@ //! Every logged entity in Rerun is logged to an [`EntityPath`]. //! //! The path is made up out of several [`EntityPathPart`]s, -//! each of which is either a name ([`EntityPathPart::Name`]) -//! or an [`Index`]. -//! -//! The [`Index`]es are for tables, arrays etc. +//! which are just non-empty strings. mod component_path; mod data_path;