Skip to content

Commit

Permalink
CanonicalizationScheme enum type
Browse files Browse the repository at this point in the history
  • Loading branch information
hoxxep committed Nov 26, 2024
1 parent 5689cfc commit 83dd4aa
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 32 deletions.
101 changes: 70 additions & 31 deletions ciborium/src/ser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,40 @@ use ciborium_io::Write;
use ciborium_ll::*;
use serde::{ser, Serialize as _};

/// Which canonicalization scheme to use for CBOR serialization.
#[cfg(feature = "std")]
pub enum CanonicalizationScheme {
/// No canonicalization, don't sort map keys. Faster and reduces allocations.
None,

/// Sort map keys in output according to [RFC 7049]'s deterministic encoding spec.
///
/// Also aligns with [RFC 8949 4.2.3]'s backwards compatibility sort order.
///
/// Uses length-first map key ordering. Eg. `["a", "b", "aa"]`.
Rfc7049,

/// Sort map keys in output according to [RFC 8949]'s deterministic encoding spec.
///
/// Uses bytewise lexicographic map key ordering. Eg. `["a", "aa", "b"]`.
Rfc8049,
}

#[cfg(feature = "std")]
impl CanonicalizationScheme {
/// Does this canonicalisation scheme require sorting of keys.
pub fn is_sorting(&self) -> bool {
matches!(self, Self::Rfc7049 | Self::Rfc8049)
}

// pub fn key<K: serde::Serialize>(&self, key: &K) -> Result<Vec<u8>, Error<std::io::Error>> {
// let mut buffer = Vec::new();
// let mut serializer = Serializer::new(&mut buffer, true);
// key.serialize(&mut serializer)?;
// Ok(buffer)
// }
}

/// A serializer for CBOR.
pub struct Serializer<W> {
encoder: Encoder<W>,
Expand All @@ -20,7 +54,7 @@ pub struct Serializer<W> {
///
/// [RFC 8949]: https://www.rfc-editor.org/rfc/rfc8949.html#name-deterministically-encoded-c
#[cfg(feature = "std")]
canonical: bool,
canonicalization: CanonicalizationScheme,
}

impl<W: Write> Serializer<W> {
Expand All @@ -31,10 +65,10 @@ impl<W: Write> Serializer<W> {
///
/// [RFC 8949]: https://www.rfc-editor.org/rfc/rfc8949.html#name-deterministically-encoded-c
#[cfg(feature = "std")]
pub fn new(encoder: impl Into<Encoder<W>>, canonical: bool) -> Self {
pub fn new(encoder: impl Into<Encoder<W>>, canonicalization: CanonicalizationScheme) -> Self {
Self {
encoder: encoder.into(),
canonical
canonicalization
}
}
}
Expand All @@ -45,7 +79,7 @@ impl<W: Write> From<W> for Serializer<W> {
Self {
encoder: writer.into(),
#[cfg(feature = "std")]
canonical: false,
canonicalization: CanonicalizationScheme::None,
}
}
}
Expand All @@ -56,7 +90,7 @@ impl<W: Write> From<Encoder<W>> for Serializer<W> {
Self {
encoder: writer,
#[cfg(feature = "std")]
canonical: false,
canonicalization: CanonicalizationScheme::None,
}
}
}
Expand Down Expand Up @@ -335,7 +369,7 @@ macro_rules! end {
() => {
#[inline]
fn end(self) -> Result<(), Self::Error> {
if self.ending {
if self.indefinite {
self.serializer.encoder.push(Header::Break)?;
}

Expand All @@ -350,27 +384,30 @@ macro_rules! end_map {
#[inline]
fn end(self) -> Result<(), Self::Error> {
#[cfg(feature = "std")]
if self.serializer.canonical {
if matches!(self.serializer.canonicalization, CanonicalizationScheme::Rfc8049) {
// keys get sorted in lexicographical byte order
let keys = self.cache_keys;
let values = self.cache_values;

debug_assert_eq!(
keys.len(), values.len(),
"ciborium error: canonicalization failed, keys and values must have same length.");
"ciborium error: canonicalization failed, different number of keys and values?");

let mut pairs = std::collections::BTreeMap::new();
for (key, value) in keys.iter().zip(values.iter()) {
pairs.insert(key, value);
}
let pairs: std::collections::BTreeMap<_, _> =
keys.iter().zip(values.iter()).collect();

for (key, value) in pairs {
for (key, value) in pairs.iter() {
self.serializer.encoder.write_all(&key)?;
self.serializer.encoder.write_all(&value)?;
}
}

if self.ending {
#[cfg(feature = "std")]
if matches!(self.serializer.canonicalization, CanonicalizationScheme::Rfc7049) {
unimplemented!("rfc7049 canonicalization not yet implemented");
}

if self.indefinite {
self.serializer.encoder.push(Header::Break)?;
}

Expand All @@ -385,7 +422,9 @@ macro_rules! end_map {
#[doc(hidden)]
pub struct CollectionSerializer<'a, W> {
serializer: &'a mut Serializer<W>,
ending: bool,

/// Whether the collection is indefinite length. Cannot be used with canonical serialization.
indefinite: bool,
tag: bool,

#[cfg(feature = "std")]
Expand All @@ -395,21 +434,20 @@ pub struct CollectionSerializer<'a, W> {
}

impl<'a, W> CollectionSerializer<'a, W> {
pub fn new(serializer: &'a mut Serializer<W>, ending: bool, tag: bool) -> Self {
pub fn new(serializer: &'a mut Serializer<W>, indefinite: bool, tag: bool) -> Self {
#[cfg(feature = "std")]
let capacity = match serializer.canonical {
true => 4,
false => 0,
};
assert!(
!(serializer.canonicalization.is_sorting() && indefinite),
"ciborium error: canonical mode cannot be used with indefinite length collections");

Self {
serializer,
ending,
indefinite,
tag,
#[cfg(feature = "std")]
cache_keys: Vec::with_capacity(capacity),
cache_keys: Vec::with_capacity(0),
#[cfg(feature = "std")]
cache_values: Vec::with_capacity(capacity),
cache_values: Vec::with_capacity(0),
}
}
}
Expand Down Expand Up @@ -504,7 +542,7 @@ where
#[inline]
fn serialize_key<U: ?Sized + ser::Serialize>(&mut self, key: &U) -> Result<(), Self::Error> {
#[cfg(feature = "std")]
if self.serializer.canonical {
if self.serializer.canonicalization.is_sorting() {
let key_bytes = to_vec(key).map_err(|e| Error::Value(e.to_string()))?;
self.cache_keys.push(key_bytes);
return Ok(());
Expand All @@ -519,7 +557,7 @@ where
value: &U,
) -> Result<(), Self::Error> {
#[cfg(feature = "std")]
if self.serializer.canonical {
if self.serializer.canonicalization.is_sorting() {
let value_bytes = to_vec(value).map_err(|e| Error::Value(e.to_string()))?;
self.cache_values.push(value_bytes);
return Ok(());
Expand All @@ -545,7 +583,7 @@ where
value: &U,
) -> Result<(), Self::Error> {
#[cfg(feature = "std")]
if self.serializer.canonical {
if self.serializer.canonicalization.is_sorting() {
let key_bytes = to_vec(key).map_err(|e| Error::Value(e.to_string()))?;
self.cache_keys.push(key_bytes);
let value_bytes = to_vec(value).map_err(|e| Error::Value(e.to_string()))?;
Expand Down Expand Up @@ -575,7 +613,7 @@ where
value: &U,
) -> Result<(), Self::Error> {
#[cfg(feature = "std")]
if self.serializer.canonical {
if self.serializer.canonicalization.is_sorting() {
let key_bytes = to_vec(key).map_err(|e| Error::Value(e.to_string()))?;
self.cache_keys.push(key_bytes);
let value_bytes = to_vec(value).map_err(|e| Error::Value(e.to_string()))?;
Expand All @@ -595,7 +633,7 @@ where
#[inline]
pub fn to_vec<T: ?Sized + ser::Serialize>(value: &T) -> Result<Vec<u8>, Error<std::io::Error>> {
let mut buffer = std::vec::Vec::with_capacity(1024);
let mut serializer = Serializer::new(&mut buffer, false);
let mut serializer = Serializer::new(&mut buffer, CanonicalizationScheme::None);
value.serialize(&mut serializer)?;
Ok(buffer)
}
Expand All @@ -607,9 +645,9 @@ pub fn to_vec<T: ?Sized + ser::Serialize>(value: &T) -> Result<Vec<u8>, Error<st
/// [RFC 8949]: https://www.rfc-editor.org/rfc/rfc8949.html#name-deterministically-encoded-c
#[cfg(feature = "std")]
#[inline]
pub fn to_vec_canonical<T: ?Sized + ser::Serialize>(value: &T) -> Result<Vec<u8>, Error<std::io::Error>> {
pub fn to_vec_canonical<T: ?Sized + ser::Serialize>(value: &T, scheme: CanonicalizationScheme) -> Result<Vec<u8>, Error<std::io::Error>> {
let mut buffer = std::vec::Vec::with_capacity(1024);
let mut serializer = Serializer::new(&mut buffer, true);
let mut serializer = Serializer::new(&mut buffer, scheme);
value.serialize(&mut serializer)?;
Ok(buffer)
}
Expand Down Expand Up @@ -637,10 +675,11 @@ where
pub fn into_writer_canonical<T: ?Sized + ser::Serialize, W: Write>(
value: &T,
writer: W,
scheme: CanonicalizationScheme,
) -> Result<(), Error<W::Error>>
where
W::Error: core::fmt::Debug,
{
let mut encoder = Serializer::new(writer, true);
let mut encoder = Serializer::new(writer, scheme);
value.serialize(&mut encoder)
}
3 changes: 2 additions & 1 deletion ciborium/tests/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use ciborium::tag::Required;
use ciborium::value::CanonicalValue;
use rand::prelude::*;
use std::collections::BTreeMap;
use ciborium::ser::CanonicalizationScheme;

macro_rules! cval {
($x:expr) => {
Expand Down Expand Up @@ -81,7 +82,7 @@ fn map_canonical() {
map.insert(cval!("z"), val!(4));
map.insert(cval!("aa"), val!(6));

let bytes1 = ciborium::ser::to_vec_canonical(&map).unwrap();
let bytes1 = ciborium::ser::to_vec_canonical(&map, CanonicalizationScheme::Rfc8049).unwrap();

assert_eq!(
hex::encode(&bytes1),
Expand Down

0 comments on commit 83dd4aa

Please sign in to comment.