From fff0931d24f74edea3d704bfb7c52c22b5f63bc3 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 20 Aug 2024 21:52:20 -0400 Subject: [PATCH 001/181] `chia-datalayer` --- Cargo.lock | 4 +++ crates/chia-datalayer/Cargo.toml | 15 +++++++++++ crates/chia-datalayer/src/lib.rs | 45 ++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 crates/chia-datalayer/Cargo.toml create mode 100644 crates/chia-datalayer/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index b52c5664c..24b4425b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -369,6 +369,10 @@ dependencies = [ "thiserror", ] +[[package]] +name = "chia-datalayer" +version = "0.1.0" + [[package]] name = "chia-fuzz" version = "0.12.0" diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml new file mode 100644 index 000000000..e84a3d2a0 --- /dev/null +++ b/crates/chia-datalayer/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "chia-datalayer" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +description = "DataLayer modules for Chia blockchain" +authors = ["Chia Network, Inc. "] +homepage = "https://github.com/Chia-Network/chia_rs" +repository = "https://github.com/Chia-Network/chia_rs" + +[lints] +workspace = true + +[lib] +crate-type = ["rlib"] diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs new file mode 100644 index 000000000..7d9e635a7 --- /dev/null +++ b/crates/chia-datalayer/src/lib.rs @@ -0,0 +1,45 @@ +use std::collections::HashMap; + +type Index = usize; +type Key = Vec; + +pub enum NodeType { + Internal, + Leaf, +} + +#[derive(Debug)] +pub struct MerkleBlob { + // TODO: shouldn't really all be pub + pub blob: Vec, + pub kv_to_index: HashMap, + pub free_indexes: Vec, + pub last_allocated_index: Index, +} + +pub struct NodeMetadata { + pub node_type: NodeType, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_well_something() { + let _index = 0; + let _key = Key::new(); + let _node_type = NodeType::Internal; + let merkle_blob = MerkleBlob { + blob: Vec::new(), + kv_to_index: HashMap::new(), + free_indexes: Vec::new(), + last_allocated_index: 0, + }; + + assert_eq!( + merkle_blob.blob, + Vec::new(), + ); + } +} From 0b2034744cecfebd016340d73ff1f9e7192cb969 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 20 Aug 2024 21:58:06 -0400 Subject: [PATCH 002/181] fmt --- crates/chia-datalayer/src/lib.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 7d9e635a7..0c79c2230 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -27,7 +27,7 @@ mod tests { #[test] fn test_well_something() { - let _index = 0; + let _index: Index = 0; let _key = Key::new(); let _node_type = NodeType::Internal; let merkle_blob = MerkleBlob { @@ -37,9 +37,6 @@ mod tests { last_allocated_index: 0, }; - assert_eq!( - merkle_blob.blob, - Vec::new(), - ); + assert_eq!(merkle_blob.blob, Vec::new()); } } From d6a7483d78ed802da22115e7a817fdfba8d243fb Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 21 Aug 2024 16:18:55 -0400 Subject: [PATCH 003/181] parse. horribly. --- crates/chia-datalayer/src/lib.rs | 149 +++++++++++++++++++++++++++---- 1 file changed, 131 insertions(+), 18 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 0c79c2230..fd8dc714a 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1,24 +1,140 @@ -use std::collections::HashMap; +// use std::collections::HashMap; -type Index = usize; -type Key = Vec; +type TreeIndex = u32; +// type Key = Vec; +type Hash = [u8; 32]; +type KVId = Hash; + +#[derive(Debug, Hash, Eq, PartialEq)] pub enum NodeType { Internal, Leaf, } +impl NodeType { + pub fn load(&value: &u8) -> Self { + // TODO: identify some useful structured serialization tooling we use + // TODO: find a better way to tie serialization values to enumerators + match value { + 0 => NodeType::Internal, + 1 => NodeType::Leaf, + other => panic!("unknown NodeType value: {}", other), + } + } +} + +// impl NodeType { +// const TYPE_TO_VALUE: HashMap = HashMap::from([ +// (NodeType::Internal, 0), +// (NodeType::Leaf, 1), +// ]); +// +// fn value(&self) -> u8 { +// let map = Self::TYPE_TO_VALUE; +// // TODO: this seems pretty clearly the wrong way, probably +// let value = map.get(self); +// if value.is_some() { +// return 3; +// } +// panic!("no value for NodeType: {self:?}"); +// } +// } + #[derive(Debug)] pub struct MerkleBlob { // TODO: shouldn't really all be pub pub blob: Vec, - pub kv_to_index: HashMap, - pub free_indexes: Vec, - pub last_allocated_index: Index, } +// TODO: fill out related to the serializations +const METADATA_SIZE: u32 = 2; +const DATA_SIZE: u32 = 0; +const SPACING: u32 = METADATA_SIZE + DATA_SIZE; + +impl MerkleBlob { + pub fn get_raw_node(&self, index: TreeIndex) -> RawMerkleNode { + // TODO: handle invalid indexes? + // TODO: handle overflows? + let metadata_start = index * SPACING; + let data_start = metadata_start + METADATA_SIZE; + let end = data_start + DATA_SIZE; + + let metadata_blob = &self.blob[metadata_start as usize..data_start as usize]; + let data_blob = &self.blob[data_start as usize..end as usize]; + let metadata = NodeMetadata::load(metadata_blob); + RawMerkleNode::load(metadata, 0, data_blob) + } +} + +pub enum RawMerkleNode { + Root { + left: TreeIndex, + right: TreeIndex, + hash: Hash, + // TODO: kinda feels questionable having it be aware of its own location + // TODO: just always at zero? + index: TreeIndex, + }, + Internal { + parent: TreeIndex, + left: TreeIndex, + right: TreeIndex, + hash: Hash, + // TODO: kinda feels questionable having it be aware of its own location + index: TreeIndex, + }, + Leaf { + parent: TreeIndex, + key_value: KVId, + hash: Hash, + // TODO: kinda feels questionable having it be aware of its own location + index: TreeIndex, + }, +} + +impl RawMerkleNode { + // TODO: how do i say what i'm passing in is length two if i [u8; 2] here + pub fn load(metadata: NodeMetadata, index: TreeIndex, blob: &[u8]) -> Self { + match metadata.node_type { + NodeType::Internal => RawMerkleNode::Internal { + // TODO: get these right + parent: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[0..4]).unwrap()), + left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[4..8]).unwrap()), + right: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[8..12]).unwrap()), + hash: <[u8; 32]>::try_from(&blob[12..46]).unwrap(), + index, + }, + NodeType::Leaf => RawMerkleNode::Leaf { + // TODO: this try from really right? + parent: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[0..4]).unwrap()), + key_value: KVId::try_from(&blob[4..36]).unwrap(), + hash: Hash::try_from(&blob[36..68]).unwrap(), + index, + }, + } + } +} + +#[derive(Debug, PartialEq)] pub struct NodeMetadata { pub node_type: NodeType, + pub dirty: bool, +} + +impl NodeMetadata { + // TODO: how do i say what i'm passing in is length two if i [u8; 2] here + pub fn load(blob: &[u8]) -> Self { + // TODO: identify some useful structured serialization tooling we use + Self { + node_type: NodeType::load(&blob[0]), + dirty: match blob[1] { + 0 => false, + 1 => true, + other => panic!("invalid dirty value: {}", other), + }, + } + } } #[cfg(test)] @@ -26,17 +142,14 @@ mod tests { use super::*; #[test] - fn test_well_something() { - let _index: Index = 0; - let _key = Key::new(); - let _node_type = NodeType::Internal; - let merkle_blob = MerkleBlob { - blob: Vec::new(), - kv_to_index: HashMap::new(), - free_indexes: Vec::new(), - last_allocated_index: 0, - }; - - assert_eq!(merkle_blob.blob, Vec::new()); + fn test_something() { + let a: [u8; 2] = [0, 1]; + assert_eq!( + NodeMetadata::load(&a), + NodeMetadata { + node_type: NodeType::Internal, + dirty: true + } + ); } } From 05fc45126ba2a8a5f37ea9c17ddf26de86959c86 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 22 Aug 2024 08:23:56 -0400 Subject: [PATCH 004/181] tidy --- crates/chia-datalayer/src/lib.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index fd8dc714a..91a6d8ed7 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -48,20 +48,22 @@ pub struct MerkleBlob { } // TODO: fill out related to the serializations -const METADATA_SIZE: u32 = 2; -const DATA_SIZE: u32 = 0; -const SPACING: u32 = METADATA_SIZE + DATA_SIZE; +const METADATA_SIZE: usize = 2; +const DATA_SIZE: usize = 0; +const SPACING: usize = METADATA_SIZE + DATA_SIZE; impl MerkleBlob { pub fn get_raw_node(&self, index: TreeIndex) -> RawMerkleNode { // TODO: handle invalid indexes? // TODO: handle overflows? - let metadata_start = index * SPACING; + let metadata_start = index as usize * SPACING; let data_start = metadata_start + METADATA_SIZE; let end = data_start + DATA_SIZE; - let metadata_blob = &self.blob[metadata_start as usize..data_start as usize]; - let data_blob = &self.blob[data_start as usize..end as usize]; + let metadata_blob: [u8; METADATA_SIZE] = self.blob[metadata_start..data_start] + .try_into() + .expect("better handling"); + let data_blob = &self.blob[data_start..end]; let metadata = NodeMetadata::load(metadata_blob); RawMerkleNode::load(metadata, 0, data_blob) } @@ -94,7 +96,6 @@ pub enum RawMerkleNode { } impl RawMerkleNode { - // TODO: how do i say what i'm passing in is length two if i [u8; 2] here pub fn load(metadata: NodeMetadata, index: TreeIndex, blob: &[u8]) -> Self { match metadata.node_type { NodeType::Internal => RawMerkleNode::Internal { @@ -123,8 +124,7 @@ pub struct NodeMetadata { } impl NodeMetadata { - // TODO: how do i say what i'm passing in is length two if i [u8; 2] here - pub fn load(blob: &[u8]) -> Self { + pub fn load(blob: [u8; METADATA_SIZE]) -> Self { // TODO: identify some useful structured serialization tooling we use Self { node_type: NodeType::load(&blob[0]), @@ -145,7 +145,7 @@ mod tests { fn test_something() { let a: [u8; 2] = [0, 1]; assert_eq!( - NodeMetadata::load(&a), + NodeMetadata::load(a), NodeMetadata { node_type: NodeType::Internal, dirty: true From fb1d6c31febb29a26f68dbd2befafb13ada28c2c Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 22 Aug 2024 08:33:32 -0400 Subject: [PATCH 005/181] `KvId` --- crates/chia-datalayer/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 91a6d8ed7..ff14facff 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -3,7 +3,7 @@ type TreeIndex = u32; // type Key = Vec; type Hash = [u8; 32]; -type KVId = Hash; +type KvId = Hash; #[derive(Debug, Hash, Eq, PartialEq)] @@ -88,7 +88,7 @@ pub enum RawMerkleNode { }, Leaf { parent: TreeIndex, - key_value: KVId, + key_value: KvId, hash: Hash, // TODO: kinda feels questionable having it be aware of its own location index: TreeIndex, @@ -109,7 +109,7 @@ impl RawMerkleNode { NodeType::Leaf => RawMerkleNode::Leaf { // TODO: this try from really right? parent: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[0..4]).unwrap()), - key_value: KVId::try_from(&blob[4..36]).unwrap(), + key_value: KvId::try_from(&blob[4..36]).unwrap(), hash: Hash::try_from(&blob[36..68]).unwrap(), index, }, From 735244cb823cfc27e218ec16e994912401f539ad Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 22 Aug 2024 11:35:09 -0400 Subject: [PATCH 006/181] misc --- crates/chia-datalayer/src/lib.rs | 58 +++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index ff14facff..bf2e795e9 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -47,25 +47,40 @@ pub struct MerkleBlob { pub blob: Vec, } -// TODO: fill out related to the serializations +// TODO: clearly shouldnt' be hard coded const METADATA_SIZE: usize = 2; -const DATA_SIZE: usize = 0; +// TODO: clearly shouldnt' be hard coded +const DATA_SIZE: usize = 68; const SPACING: usize = METADATA_SIZE + DATA_SIZE; impl MerkleBlob { - pub fn get_raw_node(&self, index: TreeIndex) -> RawMerkleNode { + pub fn get_raw_node(&self, index: TreeIndex) -> Result { // TODO: handle invalid indexes? // TODO: handle overflows? let metadata_start = index as usize * SPACING; let data_start = metadata_start + METADATA_SIZE; let end = data_start + DATA_SIZE; - let metadata_blob: [u8; METADATA_SIZE] = self.blob[metadata_start..data_start] + let metadata_blob: [u8; METADATA_SIZE] = self + .blob + .get(metadata_start..data_start) + .ok_or(String::from("metadata blob out of bounds"))? .try_into() - .expect("better handling"); - let data_blob = &self.blob[data_start..end]; - let metadata = NodeMetadata::load(metadata_blob); - RawMerkleNode::load(metadata, 0, data_blob) + .map_err(|e| format!("metadata blob wrong size: {e}"))?; + let data_blob: [u8; DATA_SIZE] = self + .blob + .get(data_start..end) + .ok_or(String::from("data blob out of bounds"))? + .try_into() + .map_err(|e| format!("data blob wrong size: {e}"))?; + let metadata = match NodeMetadata::load(metadata_blob) { + Ok(metadata) => metadata, + Err(message) => return Err(format!("failed loading metadata: {message})")), + }; + Ok(match RawMerkleNode::load(metadata, 0, data_blob) { + Ok(node) => node, + Err(message) => return Err(format!("failed loading raw node: {message}")), + }) } } @@ -96,23 +111,28 @@ pub enum RawMerkleNode { } impl RawMerkleNode { - pub fn load(metadata: NodeMetadata, index: TreeIndex, blob: &[u8]) -> Self { + pub fn load( + metadata: NodeMetadata, + index: TreeIndex, + blob: [u8; DATA_SIZE], + ) -> Result { + // TODO: add Err results match metadata.node_type { - NodeType::Internal => RawMerkleNode::Internal { + NodeType::Internal => Ok(RawMerkleNode::Internal { // TODO: get these right parent: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[0..4]).unwrap()), left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[4..8]).unwrap()), right: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[8..12]).unwrap()), hash: <[u8; 32]>::try_from(&blob[12..46]).unwrap(), index, - }, - NodeType::Leaf => RawMerkleNode::Leaf { + }), + NodeType::Leaf => Ok(RawMerkleNode::Leaf { // TODO: this try from really right? parent: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[0..4]).unwrap()), key_value: KvId::try_from(&blob[4..36]).unwrap(), hash: Hash::try_from(&blob[36..68]).unwrap(), index, - }, + }), } } } @@ -124,16 +144,16 @@ pub struct NodeMetadata { } impl NodeMetadata { - pub fn load(blob: [u8; METADATA_SIZE]) -> Self { + pub fn load(blob: [u8; METADATA_SIZE]) -> Result { // TODO: identify some useful structured serialization tooling we use - Self { + Ok(Self { node_type: NodeType::load(&blob[0]), dirty: match blob[1] { 0 => false, 1 => true, - other => panic!("invalid dirty value: {}", other), + other => return Err(format!("invalid dirty value: {other}")), }, - } + }) } } @@ -146,10 +166,10 @@ mod tests { let a: [u8; 2] = [0, 1]; assert_eq!( NodeMetadata::load(a), - NodeMetadata { + Ok(NodeMetadata { node_type: NodeType::Internal, dirty: true - } + }) ); } } From f679fa7da6f6432465872be1199dd4cdc1262e34 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 22 Aug 2024 14:13:30 -0400 Subject: [PATCH 007/181] more --- crates/chia-datalayer/src/lib.rs | 46 ++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index bf2e795e9..2f690d973 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -6,22 +6,30 @@ type Hash = [u8; 32]; type KvId = Hash; #[derive(Debug, Hash, Eq, PartialEq)] - +#[repr(u8)] pub enum NodeType { - Internal, - Leaf, + Internal = 0, + Leaf = 1, } impl NodeType { - pub fn load(&value: &u8) -> Self { + pub fn load(value: u8) -> Result { // TODO: identify some useful structured serialization tooling we use // TODO: find a better way to tie serialization values to enumerators match value { - 0 => NodeType::Internal, - 1 => NodeType::Leaf, + // ha! feel free to laugh at this + x if (NodeType::Internal as u8 == x) => Ok(NodeType::Internal), + x if (NodeType::Leaf as u8 == x) => Ok(NodeType::Leaf), other => panic!("unknown NodeType value: {}", other), } } + + pub fn dump(self) -> u8 { + match self { + NodeType::Internal => NodeType::Internal as u8, + NodeType::Leaf => NodeType::Leaf as u8, + } + } } // impl NodeType { @@ -64,13 +72,13 @@ impl MerkleBlob { let metadata_blob: [u8; METADATA_SIZE] = self .blob .get(metadata_start..data_start) - .ok_or(String::from("metadata blob out of bounds"))? + .ok_or("metadata blob out of bounds".to_string())? .try_into() .map_err(|e| format!("metadata blob wrong size: {e}"))?; let data_blob: [u8; DATA_SIZE] = self .blob .get(data_start..end) - .ok_or(String::from("data blob out of bounds"))? + .ok_or("data blob out of bounds".to_string())? .try_into() .map_err(|e| format!("data blob wrong size: {e}"))?; let metadata = match NodeMetadata::load(metadata_blob) { @@ -85,14 +93,14 @@ impl MerkleBlob { } pub enum RawMerkleNode { - Root { - left: TreeIndex, - right: TreeIndex, - hash: Hash, - // TODO: kinda feels questionable having it be aware of its own location - // TODO: just always at zero? - index: TreeIndex, - }, + // Root { + // left: TreeIndex, + // right: TreeIndex, + // hash: Hash, + // // TODO: kinda feels questionable having it be aware of its own location + // // TODO: just always at zero? + // index: TreeIndex, + // }, Internal { parent: TreeIndex, left: TreeIndex, @@ -111,6 +119,10 @@ pub enum RawMerkleNode { } impl RawMerkleNode { + // fn discriminant(&self) -> u8 { + // unsafe { *(self as *const Self as *const u8) } + // } + pub fn load( metadata: NodeMetadata, index: TreeIndex, @@ -147,7 +159,7 @@ impl NodeMetadata { pub fn load(blob: [u8; METADATA_SIZE]) -> Result { // TODO: identify some useful structured serialization tooling we use Ok(Self { - node_type: NodeType::load(&blob[0]), + node_type: NodeType::load(blob[0])?, dirty: match blob[1] { 0 => false, 1 => true, From f57e7fe1a3e7990507cf559ea9a1aa0d14a53bef Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 23 Aug 2024 13:25:28 -0400 Subject: [PATCH 008/181] dumps --- crates/chia-datalayer/src/lib.rs | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 2f690d973..da35af61b 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -24,7 +24,7 @@ impl NodeType { } } - pub fn dump(self) -> u8 { + pub fn dump(&self) -> u8 { match self { NodeType::Internal => NodeType::Internal as u8, NodeType::Leaf => NodeType::Leaf as u8, @@ -167,6 +167,16 @@ impl NodeMetadata { }, }) } + + pub fn dump(&self) -> [u8; METADATA_SIZE] { + [ + self.node_type.dump(), + match self.dirty { + false => 0, + true => 1, + }, + ] + } } #[cfg(test)] @@ -174,14 +184,16 @@ mod tests { use super::*; #[test] - fn test_something() { - let a: [u8; 2] = [0, 1]; + fn test_node_metadata_load_dump() { + let dumped: [u8; 2] = [0, 1]; + let loaded = NodeMetadata::load(dumped).unwrap(); assert_eq!( - NodeMetadata::load(a), - Ok(NodeMetadata { + loaded, + NodeMetadata { node_type: NodeType::Internal, dirty: true - }) + }, ); + assert_eq!(loaded.dump(), dumped); } } From 5ccd2cb19189d2528df8a0683a10d4b26aedbf49 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 23 Aug 2024 13:28:51 -0400 Subject: [PATCH 009/181] from to --- crates/chia-datalayer/src/lib.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index da35af61b..11b46098b 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -13,7 +13,7 @@ pub enum NodeType { } impl NodeType { - pub fn load(value: u8) -> Result { + pub fn from_u8(value: u8) -> Result { // TODO: identify some useful structured serialization tooling we use // TODO: find a better way to tie serialization values to enumerators match value { @@ -24,7 +24,7 @@ impl NodeType { } } - pub fn dump(&self) -> u8 { + pub fn to_u8(&self) -> u8 { match self { NodeType::Internal => NodeType::Internal as u8, NodeType::Leaf => NodeType::Leaf as u8, @@ -81,11 +81,11 @@ impl MerkleBlob { .ok_or("data blob out of bounds".to_string())? .try_into() .map_err(|e| format!("data blob wrong size: {e}"))?; - let metadata = match NodeMetadata::load(metadata_blob) { + let metadata = match NodeMetadata::from_bytes(metadata_blob) { Ok(metadata) => metadata, Err(message) => return Err(format!("failed loading metadata: {message})")), }; - Ok(match RawMerkleNode::load(metadata, 0, data_blob) { + Ok(match RawMerkleNode::from_bytes(metadata, 0, data_blob) { Ok(node) => node, Err(message) => return Err(format!("failed loading raw node: {message}")), }) @@ -123,7 +123,7 @@ impl RawMerkleNode { // unsafe { *(self as *const Self as *const u8) } // } - pub fn load( + pub fn from_bytes( metadata: NodeMetadata, index: TreeIndex, blob: [u8; DATA_SIZE], @@ -156,10 +156,10 @@ pub struct NodeMetadata { } impl NodeMetadata { - pub fn load(blob: [u8; METADATA_SIZE]) -> Result { + pub fn from_bytes(blob: [u8; METADATA_SIZE]) -> Result { // TODO: identify some useful structured serialization tooling we use Ok(Self { - node_type: NodeType::load(blob[0])?, + node_type: NodeType::from_u8(blob[0])?, dirty: match blob[1] { 0 => false, 1 => true, @@ -168,9 +168,9 @@ impl NodeMetadata { }) } - pub fn dump(&self) -> [u8; METADATA_SIZE] { + pub fn to_bytes(&self) -> [u8; METADATA_SIZE] { [ - self.node_type.dump(), + self.node_type.to_u8(), match self.dirty { false => 0, true => 1, @@ -184,16 +184,16 @@ mod tests { use super::*; #[test] - fn test_node_metadata_load_dump() { - let dumped: [u8; 2] = [0, 1]; - let loaded = NodeMetadata::load(dumped).unwrap(); + fn test_node_metadata_from_to() { + let bytes: [u8; 2] = [0, 1]; + let object = NodeMetadata::from_bytes(bytes).unwrap(); assert_eq!( - loaded, + object, NodeMetadata { node_type: NodeType::Internal, dirty: true }, ); - assert_eq!(loaded.dump(), dumped); + assert_eq!(object.to_bytes(), bytes); } } From b9ae428845e9a9cfb6d828be6a7c6e14cd9e03c3 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 26 Aug 2024 15:23:07 -0400 Subject: [PATCH 010/181] can load a simple python tree --- Cargo.lock | 3 + crates/chia-datalayer/Cargo.toml | 4 + crates/chia-datalayer/src/lib.rs | 121 +++++++++++++++++++++++++++---- 3 files changed, 114 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 24b4425b6..9dcb73299 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -372,6 +372,9 @@ dependencies = [ [[package]] name = "chia-datalayer" version = "0.1.0" +dependencies = [ + "hex-literal", +] [[package]] name = "chia-fuzz" diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index e84a3d2a0..cd1c27df8 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -13,3 +13,7 @@ workspace = true [lib] crate-type = ["rlib"] + +[dependencies] +# dev? +hex-literal = { workspace = true } diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 11b46098b..88b4bc35b 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -3,7 +3,7 @@ type TreeIndex = u32; // type Key = Vec; type Hash = [u8; 32]; -type KvId = Hash; +type KvId = u64; #[derive(Debug, Hash, Eq, PartialEq)] #[repr(u8)] @@ -51,17 +51,53 @@ impl NodeType { #[derive(Debug)] pub struct MerkleBlob { - // TODO: shouldn't really all be pub - pub blob: Vec, + blob: Vec, + free_indexes: Vec, } -// TODO: clearly shouldnt' be hard coded +// TODO: clearly shouldn't be hard coded const METADATA_SIZE: usize = 2; -// TODO: clearly shouldnt' be hard coded -const DATA_SIZE: usize = 68; +// TODO: clearly shouldn't be hard coded +const DATA_SIZE: usize = 44; const SPACING: usize = METADATA_SIZE + DATA_SIZE; +// TODO: probably bogus and overflowing or somesuch +const NULL_PARENT: TreeIndex = 0xffffffff; // 1 << (4 * 8) - 1; + impl MerkleBlob { + pub fn insert(&mut self) -> Result<(), String> { + // TODO: garbage just to use stuff + let index = self.get_new_index(); + self.insert_entry_to_blob(index, [0; SPACING])?; + + Ok(()) + } + + fn get_new_index(&mut self) -> TreeIndex { + match self.free_indexes.pop() { + None => (self.blob.len() / SPACING) as TreeIndex, + Some(new_index) => new_index, + } + } + + fn insert_entry_to_blob( + &mut self, + index: TreeIndex, + entry: [u8; SPACING], + ) -> Result<(), String> { + let extend_index = (self.blob.len() / SPACING) as TreeIndex; + if index > extend_index { + return Err(format!("index out of range: {index}")); + } else if index == extend_index { + self.blob.extend_from_slice(&entry); + } else { + let start = index as usize * SPACING; + self.blob[start..start + SPACING].copy_from_slice(&entry); + } + + Ok(()) + } + pub fn get_raw_node(&self, index: TreeIndex) -> Result { // TODO: handle invalid indexes? // TODO: handle overflows? @@ -72,7 +108,12 @@ impl MerkleBlob { let metadata_blob: [u8; METADATA_SIZE] = self .blob .get(metadata_start..data_start) - .ok_or("metadata blob out of bounds".to_string())? + .ok_or(format!( + "metadata blob out of bounds: {} {} {}", + self.blob.len(), + metadata_start, + data_start + ))? .try_into() .map_err(|e| format!("metadata blob wrong size: {e}"))?; let data_blob: [u8; DATA_SIZE] = self @@ -85,13 +126,30 @@ impl MerkleBlob { Ok(metadata) => metadata, Err(message) => return Err(format!("failed loading metadata: {message})")), }; - Ok(match RawMerkleNode::from_bytes(metadata, 0, data_blob) { - Ok(node) => node, - Err(message) => return Err(format!("failed loading raw node: {message}")), - }) + Ok( + match RawMerkleNode::from_bytes(metadata, index, data_blob) { + Ok(node) => node, + Err(message) => return Err(format!("failed loading raw node: {message}")), + }, + ) + } + + pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { + let mut next_index = index; + let mut lineage = vec![]; + loop { + let node = self.get_raw_node(next_index)?; + next_index = node.parent(); + lineage.push(node); + + if next_index == NULL_PARENT { + return Ok(lineage); + } + } } } +#[derive(Debug)] pub enum RawMerkleNode { // Root { // left: TreeIndex, @@ -135,18 +193,25 @@ impl RawMerkleNode { parent: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[0..4]).unwrap()), left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[4..8]).unwrap()), right: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[8..12]).unwrap()), - hash: <[u8; 32]>::try_from(&blob[12..46]).unwrap(), + hash: <[u8; 32]>::try_from(&blob[12..44]).unwrap(), index, }), NodeType::Leaf => Ok(RawMerkleNode::Leaf { // TODO: this try from really right? parent: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[0..4]).unwrap()), - key_value: KvId::try_from(&blob[4..36]).unwrap(), - hash: Hash::try_from(&blob[36..68]).unwrap(), + key_value: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[4..12]).unwrap()), + hash: Hash::try_from(&blob[12..44]).unwrap(), index, }), } } + + pub fn parent(&self) -> TreeIndex { + match self { + RawMerkleNode::Internal { parent, .. } => *parent, + RawMerkleNode::Leaf { parent, .. } => *parent, + } + } } #[derive(Debug, PartialEq)] @@ -181,8 +246,17 @@ impl NodeMetadata { #[cfg(test)] mod tests { + use hex_literal::hex; + use super::*; + fn example_blob() -> MerkleBlob { + let something = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); + MerkleBlob { + blob: Vec::from(something), + free_indexes: vec![], + } + } #[test] fn test_node_metadata_from_to() { let bytes: [u8; 2] = [0, 1]; @@ -196,4 +270,23 @@ mod tests { ); assert_eq!(object.to_bytes(), bytes); } + + #[test] + fn test_load_a_python_dump() { + // let kv_id = 0x1415161718191A1B; + let merkle_blob = example_blob(); + merkle_blob.get_raw_node(0).unwrap(); + } + + #[test] + fn test_get_lineage() { + let merkle_blob = example_blob(); + let lineage = merkle_blob.get_lineage(2).unwrap(); + for node in &lineage { + println!("{node:?}"); + } + assert_eq!(lineage.len(), 2); + let last_node = lineage.last().unwrap(); + assert_eq!(last_node.parent(), NULL_PARENT); + } } From 2a39dbde8305d196f54520cc6eb278f66cbf8c46 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 26 Aug 2024 16:19:43 -0400 Subject: [PATCH 011/181] get random leaf node --- crates/chia-datalayer/src/lib.rs | 34 ++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 88b4bc35b..cdc587c35 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -69,6 +69,7 @@ impl MerkleBlob { // TODO: garbage just to use stuff let index = self.get_new_index(); self.insert_entry_to_blob(index, [0; SPACING])?; + self.get_random_leaf_node(vec![0, 1, 2, 3, 4, 5, 6, 7])?; Ok(()) } @@ -80,6 +81,26 @@ impl MerkleBlob { } } + fn get_random_leaf_node(&self, seed: Vec) -> Result { + let mut node = self.get_raw_node(0)?; + for byte in seed { + for bit in 0..8 { + match node { + RawMerkleNode::Leaf { .. } => return Ok(node), + RawMerkleNode::Internal { left, right, .. } => { + if byte & (1 << bit) != 0 { + node = self.get_raw_node(left)?; + } else { + node = self.get_raw_node(right)?; + } + } + } + } + } + + Err(format!("failed to find a node")) + } + fn insert_entry_to_blob( &mut self, index: TreeIndex, @@ -289,4 +310,17 @@ mod tests { let last_node = lineage.last().unwrap(); assert_eq!(last_node.parent(), NULL_PARENT); } + + #[test] + fn test_get_random_leaf_node() { + let merkle_blob = example_blob(); + let leaf = merkle_blob.get_random_leaf_node(vec![0; 8]).unwrap(); + assert_eq!( + match leaf { + RawMerkleNode::Leaf { index, .. } => index, + RawMerkleNode::Internal { index, .. } => index, + }, + 2, + ) + } } From 0f3838f056db37707606200f932f6457cc5ec0bb Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 27 Aug 2024 11:48:01 -0400 Subject: [PATCH 012/181] preliminary python exposure --- Cargo.lock | 4 ++++ Cargo.toml | 4 ++++ crates/chia-datalayer/Cargo.toml | 7 ++++++- crates/chia-datalayer/src/lib.rs | 25 +++++++++++++++++++++++++ tests/test_merkle_blob.py | 7 +++++++ wheel/Cargo.toml | 1 + wheel/src/api.rs | 5 +++++ 7 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 tests/test_merkle_blob.py diff --git a/Cargo.lock b/Cargo.lock index 9dcb73299..85674096f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -276,6 +276,7 @@ dependencies = [ "chia-bls 0.11.0", "chia-client", "chia-consensus", + "chia-datalayer", "chia-protocol", "chia-puzzles", "chia-ssl", @@ -373,7 +374,9 @@ dependencies = [ name = "chia-datalayer" version = "0.1.0" dependencies = [ + "chia-traits 0.11.0", "hex-literal", + "pyo3", ] [[package]] @@ -515,6 +518,7 @@ version = "0.12.0" dependencies = [ "chia-bls 0.11.0", "chia-consensus", + "chia-datalayer", "chia-protocol", "chia-traits 0.11.0", "chia_py_streamable_macro", diff --git a/Cargo.toml b/Cargo.toml index 063713ba2..2e010dd0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,6 +54,7 @@ implicit_hasher = "allow" chia-bls = { workspace = true, optional = true } chia-client = { workspace = true, optional = true } chia-consensus = { workspace = true, optional = true } +chia-datalayer = { workspace = true, optional = true } chia-protocol = { workspace = true, optional = true } chia-ssl = { workspace = true, optional = true } chia-traits = { workspace = true, optional = true } @@ -67,6 +68,7 @@ default = [ "bls", "client", "consensus", + "datalayer", "protocol", "ssl", "traits", @@ -78,6 +80,7 @@ default = [ bls = ["dep:chia-bls"] client = ["dep:chia-client"] consensus = ["dep:chia-consensus"] +datalayer = ["dep:chia-datalayer"] protocol = ["dep:chia-protocol"] ssl = ["dep:chia-ssl"] traits = ["dep:chia-traits"] @@ -96,6 +99,7 @@ chia_streamable_macro = { path = "./crates/chia_streamable_macro", version = "0. chia-bls = { path = "./crates/chia-bls", version = "0.11.0" } chia-client = { path = "./crates/chia-client", version = "0.11.0" } chia-consensus = { path = "./crates/chia-consensus", version = "0.12.0" } +chia-datalayer = { path = "./crates/chia-datalayer", version = "0.1.0" } chia-protocol = { path = "./crates/chia-protocol", version = "0.11.0" } chia-ssl = { path = "./crates/chia-ssl", version = "0.11.0" } chia-traits = { path = "./crates/chia-traits", version = "0.11.0" } diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index cd1c27df8..a36f72817 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -11,9 +11,14 @@ repository = "https://github.com/Chia-Network/chia_rs" [lints] workspace = true +[features] +py-bindings = ["dep:pyo3", "chia-traits/py-bindings"] + [lib] crate-type = ["rlib"] [dependencies] -# dev? +chia-traits = { workspace = true } +# TODO: dev? hex-literal = { workspace = true } +pyo3 = { workspace = true, optional = true } diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index cdc587c35..956d9c730 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1,5 +1,8 @@ // use std::collections::HashMap; +#[cfg(feature = "py-bindings")] +use pyo3::{prelude::PyBytesMethods, pyclass, pymethods, types::PyBytes, Bound, PyResult}; + type TreeIndex = u32; // type Key = Vec; type Hash = [u8; 32]; @@ -50,6 +53,7 @@ impl NodeType { // } #[derive(Debug)] +#[cfg_attr(feature = "py-bindings", pyclass(frozen, name = "MerkleBlob"))] pub struct MerkleBlob { blob: Vec, free_indexes: Vec, @@ -170,6 +174,27 @@ impl MerkleBlob { } } +#[cfg(feature = "py-bindings")] +#[pymethods] +impl MerkleBlob { + #[new] + pub fn init(blob: &Bound<'_, PyBytes>) -> PyResult { + Ok(Self { + blob: blob.as_bytes().to_vec(), + free_indexes: vec![], + }) + } + + // #[pyo3(name = "get_root")] + // pub fn py_get_root<'a>(&self, py: Python<'a>) -> PyResult> { + // ChiaToPython::to_python(&Bytes32::new(self.get_root()), py) + // } + + pub fn __len__(&self) -> PyResult { + Ok(self.blob.len()) + } +} + #[derive(Debug)] pub enum RawMerkleNode { // Root { diff --git a/tests/test_merkle_blob.py b/tests/test_merkle_blob.py new file mode 100644 index 000000000..65caa4554 --- /dev/null +++ b/tests/test_merkle_blob.py @@ -0,0 +1,7 @@ +from chia_rs import MerkleBlob + +def test_merkle_blob(): + blob = bytes.fromhex("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b") + merkle_blob = MerkleBlob(blob) + print(merkle_blob) + assert len(merkle_blob) == len(blob) diff --git a/wheel/Cargo.toml b/wheel/Cargo.toml index f3d602ddd..426d40e66 100644 --- a/wheel/Cargo.toml +++ b/wheel/Cargo.toml @@ -27,6 +27,7 @@ hex = { workspace = true } pyo3 = { workspace = true, features = ["multiple-pymethods"] } chia-consensus = { workspace = true, features = ["py-bindings"] } chia-bls = { workspace = true, features = ["py-bindings"] } +chia-datalayer = { workspace = true, features = ["py-bindings"] } chia-protocol = { workspace = true, features = ["py-bindings"] } chia-traits = { workspace = true, features = ["py-bindings"] } clvm-traits = { workspace = true, features = ["derive", "py-bindings"] } diff --git a/wheel/src/api.rs b/wheel/src/api.rs index ebf03094b..2dd9fe980 100644 --- a/wheel/src/api.rs +++ b/wheel/src/api.rs @@ -75,6 +75,8 @@ use chia_bls::{ Signature, }; +use chia_datalayer::MerkleBlob; + #[pyfunction] pub fn compute_merkle_set_root<'p>( py: Python<'p>, @@ -485,6 +487,9 @@ pub fn chia_rs(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { // constants m.add_class::()?; + // datalayer + m.add_class::()?; + // merkle tree m.add_class::()?; m.add_function(wrap_pyfunction!(confirm_included_already_hashed, m)?)?; From ae67c65cded8b4a58f8f1abe4bb8d54ec1eb659a Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 27 Aug 2024 12:11:45 -0400 Subject: [PATCH 013/181] add hints --- wheel/generate_type_stubs.py | 8 ++++++++ wheel/python/chia_rs/chia_rs.pyi | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index 089e388ed..ed8a093e9 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -381,6 +381,14 @@ def derive_child_sk_unhardened(pk: PrivateKey, index: int) -> PrivateKey: ... @staticmethod def derive_child_pk_unhardened(pk: G1Element, index: int) -> G1Element: ... +class MerkleBlob: + def __init__( + self, + blob: bytes, + ) -> None: ... + + def __len__(self) -> int: ... + class MerkleSet: def get_root(self) -> bytes32: ... def is_included_already_hashed(self, to_check: bytes) -> Tuple[bool, bytes]: ... diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index c6d6b1e57..3be1068ae 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -125,6 +125,14 @@ class AugSchemeMPL: @staticmethod def derive_child_pk_unhardened(pk: G1Element, index: int) -> G1Element: ... +class MerkleBlob: + def __init__( + self, + blob: bytes, + ) -> None: ... + + def __len__(self) -> int: ... + class MerkleSet: def get_root(self) -> bytes32: ... def is_included_already_hashed(self, to_check: bytes) -> Tuple[bool, bytes]: ... From e6937516e7abfeb052793bdabf15696c45ab2ae2 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 27 Aug 2024 12:32:20 -0400 Subject: [PATCH 014/181] clippy --- crates/chia-datalayer/src/lib.rs | 37 ++++++++++++++------------------ 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 956d9c730..ac1785634 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -3,6 +3,8 @@ #[cfg(feature = "py-bindings")] use pyo3::{prelude::PyBytesMethods, pyclass, pymethods, types::PyBytes, Bound, PyResult}; +use std::cmp::Ordering; + type TreeIndex = u32; // type Key = Vec; type Hash = [u8; 32]; @@ -23,7 +25,7 @@ impl NodeType { // ha! feel free to laugh at this x if (NodeType::Internal as u8 == x) => Ok(NodeType::Internal), x if (NodeType::Leaf as u8 == x) => Ok(NodeType::Leaf), - other => panic!("unknown NodeType value: {}", other), + other => panic!("unknown NodeType value: {other}"), } } @@ -66,7 +68,7 @@ const DATA_SIZE: usize = 44; const SPACING: usize = METADATA_SIZE + DATA_SIZE; // TODO: probably bogus and overflowing or somesuch -const NULL_PARENT: TreeIndex = 0xffffffff; // 1 << (4 * 8) - 1; +const NULL_PARENT: TreeIndex = 0xffff_ffffu32; // 1 << (4 * 8) - 1; impl MerkleBlob { pub fn insert(&mut self) -> Result<(), String> { @@ -102,7 +104,7 @@ impl MerkleBlob { } } - Err(format!("failed to find a node")) + Err("failed to find a node".to_string()) } fn insert_entry_to_blob( @@ -111,13 +113,13 @@ impl MerkleBlob { entry: [u8; SPACING], ) -> Result<(), String> { let extend_index = (self.blob.len() / SPACING) as TreeIndex; - if index > extend_index { - return Err(format!("index out of range: {index}")); - } else if index == extend_index { - self.blob.extend_from_slice(&entry); - } else { - let start = index as usize * SPACING; - self.blob[start..start + SPACING].copy_from_slice(&entry); + match index.cmp(&extend_index) { + Ordering::Greater => return Err(format!("index out of range: {index}")), + Ordering::Equal => self.blob.extend_from_slice(&entry), + Ordering::Less => { + let start = index as usize * SPACING; + self.blob[start..start + SPACING].copy_from_slice(&entry); + } } Ok(()) @@ -152,7 +154,7 @@ impl MerkleBlob { Err(message) => return Err(format!("failed loading metadata: {message})")), }; Ok( - match RawMerkleNode::from_bytes(metadata, index, data_blob) { + match RawMerkleNode::from_bytes(&metadata, index, data_blob) { Ok(node) => node, Err(message) => return Err(format!("failed loading raw node: {message}")), }, @@ -228,7 +230,7 @@ impl RawMerkleNode { // } pub fn from_bytes( - metadata: NodeMetadata, + metadata: &NodeMetadata, index: TreeIndex, blob: [u8; DATA_SIZE], ) -> Result { @@ -254,8 +256,7 @@ impl RawMerkleNode { pub fn parent(&self) -> TreeIndex { match self { - RawMerkleNode::Internal { parent, .. } => *parent, - RawMerkleNode::Leaf { parent, .. } => *parent, + RawMerkleNode::Internal { parent, .. } | RawMerkleNode::Leaf { parent, .. } => *parent, } } } @@ -280,13 +281,7 @@ impl NodeMetadata { } pub fn to_bytes(&self) -> [u8; METADATA_SIZE] { - [ - self.node_type.to_u8(), - match self.dirty { - false => 0, - true => 1, - }, - ] + [self.node_type.to_u8(), u8::from(self.dirty)] } } From b6f90946d2cc788cb68ee5c2062c0edb06ce5b1d Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 27 Aug 2024 12:39:16 -0400 Subject: [PATCH 015/181] black --- tests/test_merkle_blob.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_merkle_blob.py b/tests/test_merkle_blob.py index 65caa4554..787ff67e0 100644 --- a/tests/test_merkle_blob.py +++ b/tests/test_merkle_blob.py @@ -1,7 +1,10 @@ from chia_rs import MerkleBlob + def test_merkle_blob(): - blob = bytes.fromhex("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b") + blob = bytes.fromhex( + "0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b" + ) merkle_blob = MerkleBlob(blob) print(merkle_blob) assert len(merkle_blob) == len(blob) From 682fe07b4a3404fb0e2dd65d585248b2cd7de61a Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 28 Aug 2024 12:14:55 -0400 Subject: [PATCH 016/181] misc --- crates/chia-datalayer/src/lib.rs | 232 +++++++++++++++++++++++++------ tests/test_merkle_blob.py | 1 + 2 files changed, 187 insertions(+), 46 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index ac1785634..4fc989398 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -5,9 +5,16 @@ use pyo3::{prelude::PyBytesMethods, pyclass, pymethods, types::PyBytes, Bound, P use std::cmp::Ordering; +// TODO: clearly shouldn't be hard coded +const METADATA_SIZE: usize = 2; +// TODO: clearly shouldn't be hard coded +const DATA_SIZE: usize = 44; +const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; + type TreeIndex = u32; // type Key = Vec; type Hash = [u8; 32]; +type Block = [u8; BLOCK_SIZE]; type KvId = u64; #[derive(Debug, Hash, Eq, PartialEq)] @@ -61,12 +68,6 @@ pub struct MerkleBlob { free_indexes: Vec, } -// TODO: clearly shouldn't be hard coded -const METADATA_SIZE: usize = 2; -// TODO: clearly shouldn't be hard coded -const DATA_SIZE: usize = 44; -const SPACING: usize = METADATA_SIZE + DATA_SIZE; - // TODO: probably bogus and overflowing or somesuch const NULL_PARENT: TreeIndex = 0xffff_ffffu32; // 1 << (4 * 8) - 1; @@ -74,7 +75,7 @@ impl MerkleBlob { pub fn insert(&mut self) -> Result<(), String> { // TODO: garbage just to use stuff let index = self.get_new_index(); - self.insert_entry_to_blob(index, [0; SPACING])?; + self.insert_entry_to_blob(index, [0; BLOCK_SIZE])?; self.get_random_leaf_node(vec![0, 1, 2, 3, 4, 5, 6, 7])?; Ok(()) @@ -82,7 +83,7 @@ impl MerkleBlob { fn get_new_index(&mut self) -> TreeIndex { match self.free_indexes.pop() { - None => (self.blob.len() / SPACING) as TreeIndex, + None => (self.blob.len() / BLOCK_SIZE) as TreeIndex, Some(new_index) => new_index, } } @@ -107,45 +108,43 @@ impl MerkleBlob { Err("failed to find a node".to_string()) } - fn insert_entry_to_blob( - &mut self, - index: TreeIndex, - entry: [u8; SPACING], - ) -> Result<(), String> { - let extend_index = (self.blob.len() / SPACING) as TreeIndex; + fn insert_entry_to_blob(&mut self, index: TreeIndex, block: Block) -> Result<(), String> { + let extend_index = (self.blob.len() / BLOCK_SIZE) as TreeIndex; match index.cmp(&extend_index) { Ordering::Greater => return Err(format!("index out of range: {index}")), - Ordering::Equal => self.blob.extend_from_slice(&entry), + Ordering::Equal => self.blob.extend_from_slice(&block), Ordering::Less => { - let start = index as usize * SPACING; - self.blob[start..start + SPACING].copy_from_slice(&entry); + let start = index as usize * BLOCK_SIZE; + self.blob[start..start + BLOCK_SIZE].copy_from_slice(&block); } } Ok(()) } - pub fn get_raw_node(&self, index: TreeIndex) -> Result { - // TODO: handle invalid indexes? - // TODO: handle overflows? - let metadata_start = index as usize * SPACING; + fn get_block(&self, index: TreeIndex) -> Result { + let metadata_start = index as usize * BLOCK_SIZE; let data_start = metadata_start + METADATA_SIZE; let end = data_start + DATA_SIZE; - let metadata_blob: [u8; METADATA_SIZE] = self - .blob - .get(metadata_start..data_start) - .ok_or(format!( - "metadata blob out of bounds: {} {} {}", - self.blob.len(), - metadata_start, - data_start - ))? + self.blob + .get(metadata_start..end) + .ok_or(format!("index out of bounds: {index}"))? + .try_into() + .map_err(|e| format!("failed getting block {index}: {e}")) + } + + pub fn get_raw_node(&self, index: TreeIndex) -> Result { + // TODO: handle invalid indexes? + // TODO: handle overflows? + let block = self.get_block(index)?; + let metadata_blob: [u8; METADATA_SIZE] = block + .get(..METADATA_SIZE) + .ok_or(format!("metadata blob out of bounds: {}", block.len(),))? .try_into() .map_err(|e| format!("metadata blob wrong size: {e}"))?; - let data_blob: [u8; DATA_SIZE] = self - .blob - .get(data_start..end) + let data_blob: [u8; DATA_SIZE] = block + .get(METADATA_SIZE..) .ok_or("data blob out of bounds".to_string())? .try_into() .map_err(|e| format!("data blob wrong size: {e}"))?; @@ -161,6 +160,14 @@ impl MerkleBlob { ) } + pub fn get_parent_index(&self, index: TreeIndex) -> Result { + let block = self.get_block(index).unwrap(); + let node_type = + NodeMetadata::node_type_from_bytes(block[..METADATA_SIZE].try_into().unwrap())?; + + RawMerkleNode::parent_from_bytes(&node_type, block[METADATA_SIZE..].try_into().unwrap()) + } + pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { let mut next_index = index; let mut lineage = vec![]; @@ -174,6 +181,26 @@ impl MerkleBlob { } } } + + pub fn get_lineage_indexes(&self, index: TreeIndex) -> Result, String> { + // TODO: yep, this 'optimization' might be overkill, and should be speed compared regardless + let mut next_index = index; + let mut lineage = vec![]; + loop { + lineage.push(next_index); + let block = self.get_block(next_index)?; + let node_type = + NodeMetadata::node_type_from_bytes(block[..METADATA_SIZE].try_into().unwrap())?; + next_index = RawMerkleNode::parent_from_bytes( + &node_type, + block[METADATA_SIZE..].try_into().unwrap(), + )?; + + if next_index == NULL_PARENT { + return Ok(lineage); + } + } + } } #[cfg(feature = "py-bindings")] @@ -197,7 +224,7 @@ impl MerkleBlob { } } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum RawMerkleNode { // Root { // left: TreeIndex, @@ -235,10 +262,11 @@ impl RawMerkleNode { blob: [u8; DATA_SIZE], ) -> Result { // TODO: add Err results + let parent = Self::parent_from_bytes(&metadata.node_type, &blob)?; match metadata.node_type { NodeType::Internal => Ok(RawMerkleNode::Internal { // TODO: get these right - parent: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[0..4]).unwrap()), + parent, left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[4..8]).unwrap()), right: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[8..12]).unwrap()), hash: <[u8; 32]>::try_from(&blob[12..44]).unwrap(), @@ -246,7 +274,7 @@ impl RawMerkleNode { }), NodeType::Leaf => Ok(RawMerkleNode::Leaf { // TODO: this try from really right? - parent: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[0..4]).unwrap()), + parent, key_value: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[4..12]).unwrap()), hash: Hash::try_from(&blob[12..44]).unwrap(), index, @@ -254,6 +282,50 @@ impl RawMerkleNode { } } + fn parent_from_bytes( + node_type: &NodeType, + blob: &[u8; DATA_SIZE], + ) -> Result { + // TODO: a little setup here for pre-optimization to allow walking parents without processing entire nodes + match node_type { + NodeType::Internal => Ok(TreeIndex::from_be_bytes( + <[u8; 4]>::try_from(&blob[0..4]).unwrap(), + )), + NodeType::Leaf => Ok(TreeIndex::from_be_bytes( + <[u8; 4]>::try_from(&blob[0..4]).unwrap(), + )), + } + } + pub fn to_bytes(&self) -> Vec { + let mut blob: Vec = Vec::new(); + match self { + RawMerkleNode::Internal { + parent, + left, + right, + hash, + index: _, + } => { + blob.extend(parent.to_be_bytes()); + blob.extend(left.to_be_bytes()); + blob.extend(right.to_be_bytes()); + blob.extend(hash); + } + RawMerkleNode::Leaf { + parent, + key_value, + hash, + index: _, + } => { + blob.extend(parent.to_be_bytes()); + blob.extend(key_value.to_be_bytes()); + blob.extend(hash); + } + } + + blob + } + pub fn parent(&self) -> TreeIndex { match self { RawMerkleNode::Internal { parent, .. } | RawMerkleNode::Leaf { parent, .. } => *parent, @@ -271,7 +343,7 @@ impl NodeMetadata { pub fn from_bytes(blob: [u8; METADATA_SIZE]) -> Result { // TODO: identify some useful structured serialization tooling we use Ok(Self { - node_type: NodeType::from_u8(blob[0])?, + node_type: Self::node_type_from_bytes(blob)?, dirty: match blob[1] { 0 => false, 1 => true, @@ -283,21 +355,29 @@ impl NodeMetadata { pub fn to_bytes(&self) -> [u8; METADATA_SIZE] { [self.node_type.to_u8(), u8::from(self.dirty)] } + + pub fn node_type_from_bytes(blob: [u8; METADATA_SIZE]) -> Result { + NodeType::from_u8(blob[0]) + } } #[cfg(test)] mod tests { + use core::array; + use hex_literal::hex; use super::*; - fn example_blob() -> MerkleBlob { - let something = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); + const EXAMPLE_BLOB: [u8; 138] = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); + + fn example_merkle_blob() -> MerkleBlob { MerkleBlob { - blob: Vec::from(something), + blob: Vec::from(EXAMPLE_BLOB), free_indexes: vec![], } } + #[test] fn test_node_metadata_from_to() { let bytes: [u8; 2] = [0, 1]; @@ -315,13 +395,13 @@ mod tests { #[test] fn test_load_a_python_dump() { // let kv_id = 0x1415161718191A1B; - let merkle_blob = example_blob(); + let merkle_blob = example_merkle_blob(); merkle_blob.get_raw_node(0).unwrap(); } #[test] fn test_get_lineage() { - let merkle_blob = example_blob(); + let merkle_blob = example_merkle_blob(); let lineage = merkle_blob.get_lineage(2).unwrap(); for node in &lineage { println!("{node:?}"); @@ -333,14 +413,74 @@ mod tests { #[test] fn test_get_random_leaf_node() { - let merkle_blob = example_blob(); + let merkle_blob = example_merkle_blob(); let leaf = merkle_blob.get_random_leaf_node(vec![0; 8]).unwrap(); assert_eq!( match leaf { - RawMerkleNode::Leaf { index, .. } => index, - RawMerkleNode::Internal { index, .. } => index, + RawMerkleNode::Internal { index, .. } | RawMerkleNode::Leaf { index, .. } => index, }, 2, - ) + ); + } + + #[test] + fn test_build() { + let hash: Hash = array::from_fn(|i| i as u8 + 12); + let root = RawMerkleNode::Internal { + parent: NULL_PARENT, + left: 1, + right: 2, + hash, + index: 0, + }; + let left_leaf = RawMerkleNode::Leaf { + parent: 0, + key_value: 0x0405_0607_0809_0A0B, + hash, + index: 1, + }; + let right_leaf = RawMerkleNode::Leaf { + parent: 0, + key_value: 0x1415_1617_1819_1A1B, + hash, + index: 2, + }; + + let mut blob: Vec = Vec::new(); + + blob.extend( + NodeMetadata { + node_type: NodeType::Internal, + dirty: true, + } + .to_bytes(), + ); + blob.extend(root.to_bytes()); + blob.extend( + NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + } + .to_bytes(), + ); + blob.extend(left_leaf.to_bytes()); + blob.extend( + NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + } + .to_bytes(), + ); + blob.extend(right_leaf.to_bytes()); + + assert_eq!(blob, Vec::from(EXAMPLE_BLOB)); + + let merkle_blob = MerkleBlob { + blob, + free_indexes: vec![], + }; + assert_eq!(merkle_blob.get_raw_node(0).unwrap(), root); + assert_eq!(merkle_blob.get_raw_node(1).unwrap(), left_leaf); + assert_eq!(merkle_blob.get_raw_node(2).unwrap(), right_leaf); } } diff --git a/tests/test_merkle_blob.py b/tests/test_merkle_blob.py index 787ff67e0..dff0f15e7 100644 --- a/tests/test_merkle_blob.py +++ b/tests/test_merkle_blob.py @@ -7,4 +7,5 @@ def test_merkle_blob(): ) merkle_blob = MerkleBlob(blob) print(merkle_blob) + print(dir(merkle_blob)) assert len(merkle_blob) == len(blob) From 46480403800df41c2764ce139959d771abecd14b Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 29 Aug 2024 13:03:11 -0400 Subject: [PATCH 017/181] insert --- Cargo.lock | 1 + crates/chia-datalayer/Cargo.toml | 1 + crates/chia-datalayer/src/lib.rs | 489 ++++++++++++++++++++++++++----- 3 files changed, 416 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 85674096f..d51254a29 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -375,6 +375,7 @@ name = "chia-datalayer" version = "0.1.0" dependencies = [ "chia-traits 0.11.0", + "clvmr", "hex-literal", "pyo3", ] diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index a36f72817..c1ee04c48 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -18,6 +18,7 @@ py-bindings = ["dep:pyo3", "chia-traits/py-bindings"] crate-type = ["rlib"] [dependencies] +clvmr = { workspace = true } chia-traits = { workspace = true } # TODO: dev? hex-literal = { workspace = true } diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 4fc989398..c24deb0f2 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -3,7 +3,9 @@ #[cfg(feature = "py-bindings")] use pyo3::{prelude::PyBytesMethods, pyclass, pymethods, types::PyBytes, Bound, PyResult}; +use clvmr::sha2::Sha256; use std::cmp::Ordering; +use std::collections::HashMap; // TODO: clearly shouldn't be hard coded const METADATA_SIZE: usize = 2; @@ -61,29 +63,326 @@ impl NodeType { // } // } -#[derive(Debug)] +fn internal_hash(left_hash: Hash, right_hash: Hash) -> Hash { + // TODO: verify against original reference in blockchain + let mut hasher = Sha256::new(); + hasher.update(b"\x02"); + hasher.update(left_hash); + hasher.update(right_hash); + + hasher.finalize() +} + +// TODO: probably bogus and overflowing or somesuch +const NULL_PARENT: TreeIndex = 0xffff_ffffu32; // 1 << (4 * 8) - 1; + +// TODO: does not enforce matching metadata node type and node enumeration type +struct ParsedBlock { + metadata: NodeMetadata, + node: RawMerkleNode, +} + +impl ParsedBlock { + pub fn to_bytes(&self) -> [u8; BLOCK_SIZE] { + let mut blob: [u8; BLOCK_SIZE] = [0; BLOCK_SIZE]; + blob[..METADATA_SIZE].copy_from_slice(&self.metadata.to_bytes()); + blob[METADATA_SIZE..].copy_from_slice(&self.node.to_bytes()); + + blob + } + + pub fn from_bytes(blob: [u8; BLOCK_SIZE], index: TreeIndex) -> Result { + // TODO: handle invalid indexes? + // TODO: handle overflows? + let metadata_blob: [u8; METADATA_SIZE] = blob + .get(..METADATA_SIZE) + .ok_or(format!("metadata blob out of bounds: {}", blob.len(),))? + .try_into() + .map_err(|e| format!("metadata blob wrong size: {e}"))?; + let data_blob: [u8; DATA_SIZE] = blob + .get(METADATA_SIZE..) + .ok_or("data blob out of bounds".to_string())? + .try_into() + .map_err(|e| format!("data blob wrong size: {e}"))?; + let metadata = match NodeMetadata::from_bytes(metadata_blob) { + Ok(metadata) => metadata, + Err(message) => return Err(format!("failed loading metadata: {message})")), + }; + Ok( + match RawMerkleNode::from_bytes(&metadata, index, data_blob) { + Ok(node) => ParsedBlock { metadata, node }, + Err(message) => return Err(format!("failed loading raw node: {message}")), + }, + ) + } +} +fn get_free_indexes(blob: &Vec) -> Result, String> { + let index_count = blob.len() / BLOCK_SIZE; + + if index_count == 0 { + return Ok(vec![]); + } + + let mut seen_indexes: Vec = vec![false; index_count]; + let mut queue: Vec = vec![0]; + + while queue.len() > 0 { + let index: TreeIndex = queue.pop().unwrap(); + let offset = index as usize * BLOCK_SIZE; + let block = + ParsedBlock::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; + seen_indexes[index as usize] = true; + match block.node { + RawMerkleNode::Internal { left, right, .. } => { + queue.push(left); + queue.push(right); + } + RawMerkleNode::Leaf { .. } => (), + } + } + + let mut free_indexes: Vec = vec![]; + for (index, seen) in seen_indexes.iter().enumerate() { + if !seen { + free_indexes.push(index as TreeIndex) + } + } + + Ok(free_indexes) +} + +fn get_keys_values_indexes(blob: &Vec) -> Result, String> { + let index_count = blob.len() / BLOCK_SIZE; + + let mut kv_to_index: HashMap = HashMap::default(); + + if index_count == 0 { + return Ok(kv_to_index); + } + + let mut queue: Vec = vec![0]; + + while queue.len() > 0 { + let index: TreeIndex = queue.pop().unwrap(); + let offset = index as usize * BLOCK_SIZE; + let block = + ParsedBlock::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; + match block.node { + RawMerkleNode::Leaf { key_value, .. } => { + kv_to_index.insert(key_value, index); + } + RawMerkleNode::Internal { .. } => (), + } + } + + Ok(kv_to_index) +} + #[cfg_attr(feature = "py-bindings", pyclass(frozen, name = "MerkleBlob"))] pub struct MerkleBlob { blob: Vec, free_indexes: Vec, + kv_to_index: HashMap, + // TODO: maybe name it next_index_to_allocate + last_allocated_index: TreeIndex, } -// TODO: probably bogus and overflowing or somesuch -const NULL_PARENT: TreeIndex = 0xffff_ffffu32; // 1 << (4 * 8) - 1; - impl MerkleBlob { - pub fn insert(&mut self) -> Result<(), String> { - // TODO: garbage just to use stuff - let index = self.get_new_index(); - self.insert_entry_to_blob(index, [0; BLOCK_SIZE])?; - self.get_random_leaf_node(vec![0, 1, 2, 3, 4, 5, 6, 7])?; + pub fn new(blob: Vec) -> Result { + let length = blob.len(); + let block_count = length / BLOCK_SIZE; + let remainder = length % BLOCK_SIZE; + if remainder != 0 { + return Err(format!( + "blob length must be a multiple of block count, found extra bytes: {remainder}" + )); + } + + // TODO: stop double tree traversals here + let free_indexes = get_free_indexes(&blob).unwrap(); + let kv_to_index = get_keys_values_indexes(&blob).unwrap(); + + Ok(Self { + blob, + free_indexes, + kv_to_index, + last_allocated_index: block_count as TreeIndex, + }) + } + + pub fn insert(&mut self, key_value: KvId, hash: Hash) -> Result<(), String> { + if self.blob.len() == 0 { + let metadata = NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }; + let raw_merkle_node = RawMerkleNode::Leaf { + parent: NULL_PARENT, + key_value, + hash, + index: 0, + }; + self.blob.extend(metadata.to_bytes()); + self.blob.extend(raw_merkle_node.to_bytes()); + self.kv_to_index.insert(key_value, 0); + self.last_allocated_index = 1; + return Ok(()); + } + let mut hasher = Sha256::new(); + hasher.update(key_value.to_be_bytes()); + let seed: Hash = hasher.finalize(); + let old_leaf = self.get_random_leaf_node(Vec::from(seed))?; + let internal_node_hash = internal_hash(old_leaf.hash(), hash); + + if self.kv_to_index.len() == 1 { + self.blob.clear(); + + let new_internal_block = ParsedBlock { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: RawMerkleNode::Internal { + parent: NULL_PARENT, + left: 1, + right: 2, + hash: internal_node_hash, + index: 0, + }, + }; + + self.blob.extend(new_internal_block.to_bytes()); + + let left_leaf_block = ParsedBlock { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: RawMerkleNode::Leaf { + parent: 0, + key_value: old_leaf.key_value(), + hash: old_leaf.hash(), + index: 1, + }, + }; + self.blob.extend(left_leaf_block.to_bytes()); + + let right_leaf_block = ParsedBlock { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: RawMerkleNode::Leaf { + parent: 0, + key_value: key_value, + hash: hash, + index: 2, + }, + }; + self.blob.extend(right_leaf_block.to_bytes()); + + self.free_indexes.clear(); + self.last_allocated_index = 3; + + return Ok(()); + } + + let new_leaf_index = self.get_new_index(); + let new_internal_node_index = self.get_new_index(); + + let new_leaf_block = ParsedBlock { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: RawMerkleNode::Leaf { + parent: new_internal_node_index, + key_value, + hash, + index: new_leaf_index, + }, + }; + self.insert_entry_to_blob(new_leaf_index, new_leaf_block.to_bytes())?; + + let new_internal_block = ParsedBlock { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: RawMerkleNode::Internal { + parent: old_leaf.parent(), + left: old_leaf.index(), + right: new_leaf_index, + hash: internal_node_hash, + index: new_internal_node_index, + }, + }; + self.insert_entry_to_blob(new_internal_node_index, new_internal_block.to_bytes())?; + + let old_parent_index = old_leaf.parent(); + assert!( + old_parent_index != NULL_PARENT, + "{}", + format!("{key_value:?} {hash:?}") + ); + + let mut old_leaf_block = + ParsedBlock::from_bytes(self.get_block(old_leaf.index())?, old_leaf.index())?; + old_leaf_block.node.set_parent(new_internal_node_index); + self.blob.copy_from_slice(&old_leaf_block.to_bytes()); + + let mut old_parent_block = + ParsedBlock::from_bytes(self.get_block(old_leaf.index())?, old_leaf.index())?; + match old_parent_block.node { + RawMerkleNode::Internal { + ref mut left, + ref mut right, + .. + } => { + if old_leaf.index() == *left { + *left = new_internal_node_index; + } else if old_leaf.index() == *right { + *right = new_internal_node_index; + } else { + panic!(); + } + } + RawMerkleNode::Leaf { .. } => panic!(), + } + self.blob.copy_from_slice(&old_parent_block.to_bytes()); + + self.mark_lineage_as_dirty(old_parent_index)?; + self.kv_to_index.insert(key_value, new_internal_node_index); + + Ok(()) + } + + fn mark_lineage_as_dirty(&self, index: TreeIndex) -> Result<(), String> { + let mut index = index; + + while index != NULL_PARENT { + let mut block = ParsedBlock::from_bytes(self.get_block(index)?, index)?; + block.metadata.dirty = true; + index = block.node.parent(); + } Ok(()) } + // fn update_entry( + // index: TreeIndex, + // parent: Option[TreeIndex], + // left: Option[TreeIndex], + // right: Option[TreeIndex], + // hash: Option[Hash], + // key_value: Option[KvId], + // ) fn get_new_index(&mut self) -> TreeIndex { match self.free_indexes.pop() { - None => (self.blob.len() / BLOCK_SIZE) as TreeIndex, + None => { + self.last_allocated_index += 1; + self.last_allocated_index + } Some(new_index) => new_index, } } @@ -135,6 +434,7 @@ impl MerkleBlob { } pub fn get_raw_node(&self, index: TreeIndex) -> Result { + // TODO: use ParsedBlock::from_bytes() // TODO: handle invalid indexes? // TODO: handle overflows? let block = self.get_block(index)?; @@ -208,10 +508,7 @@ impl MerkleBlob { impl MerkleBlob { #[new] pub fn init(blob: &Bound<'_, PyBytes>) -> PyResult { - Ok(Self { - blob: blob.as_bytes().to_vec(), - free_indexes: vec![], - }) + Ok(Self::new(blob.as_bytes().to_vec()).unwrap()) } // #[pyo3(name = "get_root")] @@ -296,7 +593,7 @@ impl RawMerkleNode { )), } } - pub fn to_bytes(&self) -> Vec { + pub fn to_bytes(&self) -> [u8; DATA_SIZE] { let mut blob: Vec = Vec::new(); match self { RawMerkleNode::Internal { @@ -323,7 +620,7 @@ impl RawMerkleNode { } } - blob + blob.try_into().unwrap() } pub fn parent(&self) -> TreeIndex { @@ -331,6 +628,33 @@ impl RawMerkleNode { RawMerkleNode::Internal { parent, .. } | RawMerkleNode::Leaf { parent, .. } => *parent, } } + + pub fn hash(&self) -> Hash { + match self { + RawMerkleNode::Internal { hash, .. } | RawMerkleNode::Leaf { hash, .. } => *hash, + } + } + + pub fn index(&self) -> TreeIndex { + match self { + RawMerkleNode::Internal { index, .. } | RawMerkleNode::Leaf { index, .. } => *index, + } + } + + pub fn set_parent(&mut self, p: TreeIndex) { + match self { + &mut RawMerkleNode::Internal { ref mut parent, .. } + | RawMerkleNode::Leaf { ref mut parent, .. } => *parent = p, + } + } + + // TODO: yes i know i'm trying to write this code in a non-rusty way and i need to stop that + pub fn key_value(&self) -> KvId { + match self { + RawMerkleNode::Leaf { key_value, .. } => *key_value, + _ => panic!(), + } + } } #[derive(Debug, PartialEq)] @@ -363,19 +687,50 @@ impl NodeMetadata { #[cfg(test)] mod tests { - use core::array; - use hex_literal::hex; use super::*; const EXAMPLE_BLOB: [u8; 138] = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); + const HASH: Hash = [ + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, + ]; + + const EXAMPLE_ROOT: RawMerkleNode = RawMerkleNode::Internal { + parent: NULL_PARENT, + left: 1, + right: 2, + hash: HASH, + index: 0, + }; + const EXAMPLE_ROOT_METADATA: NodeMetadata = NodeMetadata { + node_type: NodeType::Internal, + dirty: true, + }; + const EXAMPLE_LEFT_LEAF: RawMerkleNode = RawMerkleNode::Leaf { + parent: 0, + key_value: 0x0405_0607_0809_0A0B, + hash: HASH, + index: 1, + }; + const EXAMPLE_LEFT_LEAF_METADATA: NodeMetadata = NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }; + const EXAMPLE_RIGHT_LEAF: RawMerkleNode = RawMerkleNode::Leaf { + parent: 0, + key_value: 0x1415_1617_1819_1A1B, + hash: HASH, + index: 2, + }; + const EXAMPLE_RIGHT_LEAF_METADATA: NodeMetadata = NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }; fn example_merkle_blob() -> MerkleBlob { - MerkleBlob { - blob: Vec::from(EXAMPLE_BLOB), - free_indexes: vec![], - } + MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap() } #[test] @@ -424,63 +779,47 @@ mod tests { } #[test] - fn test_build() { - let hash: Hash = array::from_fn(|i| i as u8 + 12); - let root = RawMerkleNode::Internal { - parent: NULL_PARENT, - left: 1, - right: 2, - hash, - index: 0, - }; - let left_leaf = RawMerkleNode::Leaf { - parent: 0, - key_value: 0x0405_0607_0809_0A0B, - hash, - index: 1, - }; - let right_leaf = RawMerkleNode::Leaf { - parent: 0, - key_value: 0x1415_1617_1819_1A1B, - hash, - index: 2, - }; - + fn test_build_blob_and_read() { let mut blob: Vec = Vec::new(); - blob.extend( - NodeMetadata { - node_type: NodeType::Internal, - dirty: true, - } - .to_bytes(), - ); - blob.extend(root.to_bytes()); - blob.extend( - NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - } - .to_bytes(), - ); - blob.extend(left_leaf.to_bytes()); - blob.extend( - NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - } - .to_bytes(), - ); - blob.extend(right_leaf.to_bytes()); + blob.extend(EXAMPLE_ROOT_METADATA.to_bytes()); + blob.extend(EXAMPLE_ROOT.to_bytes()); + blob.extend(EXAMPLE_LEFT_LEAF_METADATA.to_bytes()); + blob.extend(EXAMPLE_LEFT_LEAF.to_bytes()); + blob.extend(EXAMPLE_RIGHT_LEAF_METADATA.to_bytes()); + blob.extend(EXAMPLE_RIGHT_LEAF.to_bytes()); assert_eq!(blob, Vec::from(EXAMPLE_BLOB)); - let merkle_blob = MerkleBlob { - blob, - free_indexes: vec![], - }; - assert_eq!(merkle_blob.get_raw_node(0).unwrap(), root); - assert_eq!(merkle_blob.get_raw_node(1).unwrap(), left_leaf); - assert_eq!(merkle_blob.get_raw_node(2).unwrap(), right_leaf); + let merkle_blob = MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap(); + + assert_eq!(merkle_blob.get_raw_node(0).unwrap(), EXAMPLE_ROOT); + assert_eq!(merkle_blob.get_raw_node(1).unwrap(), EXAMPLE_LEFT_LEAF); + assert_eq!(merkle_blob.get_raw_node(2).unwrap(), EXAMPLE_RIGHT_LEAF); + } + + #[test] + fn test_build_merkle() { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + merkle_blob + .insert(EXAMPLE_LEFT_LEAF.key_value(), EXAMPLE_LEFT_LEAF.hash()) + .unwrap(); + merkle_blob + .insert(EXAMPLE_RIGHT_LEAF.key_value(), EXAMPLE_RIGHT_LEAF.hash()) + .unwrap(); + + // TODO: just hacking here to compare with the ~wrong~ simplified reference + let mut root = ParsedBlock::from_bytes(merkle_blob.get_block(0).unwrap(), 0).unwrap(); + root.metadata.dirty = true; + match root.node { + RawMerkleNode::Internal { ref mut hash, .. } => { + *hash = HASH; + } + RawMerkleNode::Leaf { .. } => panic!(), + } + merkle_blob.blob[..BLOCK_SIZE].copy_from_slice(&root.to_bytes()); + + assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); } } From 444b8cdb57391875349d60b0ed88609ff4e0f602 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 3 Sep 2024 09:00:45 -0400 Subject: [PATCH 018/181] fixup --- crates/chia-datalayer/src/lib.rs | 129 +++++++++++++++++++++++++------ tests/test_merkle_blob.py | 38 +++++++++ 2 files changed, 142 insertions(+), 25 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index c24deb0f2..6cdc390a5 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1,7 +1,8 @@ // use std::collections::HashMap; +use pyo3::buffer::PyBuffer; #[cfg(feature = "py-bindings")] -use pyo3::{prelude::PyBytesMethods, pyclass, pymethods, types::PyBytes, Bound, PyResult}; +use pyo3::{pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; use std::cmp::Ordering; @@ -178,7 +179,7 @@ fn get_keys_values_indexes(blob: &Vec) -> Result, S Ok(kv_to_index) } -#[cfg_attr(feature = "py-bindings", pyclass(frozen, name = "MerkleBlob"))] +#[cfg_attr(feature = "py-bindings", pyclass(name = "MerkleBlob"))] pub struct MerkleBlob { blob: Vec, free_indexes: Vec, @@ -212,19 +213,23 @@ impl MerkleBlob { pub fn insert(&mut self, key_value: KvId, hash: Hash) -> Result<(), String> { if self.blob.len() == 0 { - let metadata = NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - }; - let raw_merkle_node = RawMerkleNode::Leaf { - parent: NULL_PARENT, - key_value, - hash, - index: 0, + let new_leaf_block = ParsedBlock { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: RawMerkleNode::Leaf { + parent: NULL_PARENT, + key_value, + hash, + index: 0, + }, }; - self.blob.extend(metadata.to_bytes()); - self.blob.extend(raw_merkle_node.to_bytes()); + + self.blob.extend(new_leaf_block.to_bytes()); + self.kv_to_index.insert(key_value, 0); + self.free_indexes.clear(); self.last_allocated_index = 1; return Ok(()); } @@ -266,6 +271,10 @@ impl MerkleBlob { }, }; self.blob.extend(left_leaf_block.to_bytes()); + self.kv_to_index.insert( + left_leaf_block.node.key_value(), + left_leaf_block.node.index(), + ); let right_leaf_block = ParsedBlock { metadata: NodeMetadata { @@ -274,12 +283,16 @@ impl MerkleBlob { }, node: RawMerkleNode::Leaf { parent: 0, - key_value: key_value, - hash: hash, + key_value, + hash, index: 2, }, }; self.blob.extend(right_leaf_block.to_bytes()); + self.kv_to_index.insert( + right_leaf_block.node.key_value(), + right_leaf_block.node.index(), + ); self.free_indexes.clear(); self.last_allocated_index = 3; @@ -329,10 +342,11 @@ impl MerkleBlob { let mut old_leaf_block = ParsedBlock::from_bytes(self.get_block(old_leaf.index())?, old_leaf.index())?; old_leaf_block.node.set_parent(new_internal_node_index); - self.blob.copy_from_slice(&old_leaf_block.to_bytes()); + let offset = old_leaf_block.node.index() as usize * BLOCK_SIZE; + self.blob[offset..offset + BLOCK_SIZE].copy_from_slice(&old_leaf_block.to_bytes()); let mut old_parent_block = - ParsedBlock::from_bytes(self.get_block(old_leaf.index())?, old_leaf.index())?; + ParsedBlock::from_bytes(self.get_block(old_parent_index)?, old_parent_index)?; match old_parent_block.node { RawMerkleNode::Internal { ref mut left, @@ -349,7 +363,8 @@ impl MerkleBlob { } RawMerkleNode::Leaf { .. } => panic!(), } - self.blob.copy_from_slice(&old_parent_block.to_bytes()); + let offset = old_parent_index as usize * BLOCK_SIZE; + self.blob[offset..offset + BLOCK_SIZE].copy_from_slice(&old_parent_block.to_bytes()); self.mark_lineage_as_dirty(old_parent_index)?; self.kv_to_index.insert(key_value, new_internal_node_index); @@ -357,12 +372,14 @@ impl MerkleBlob { Ok(()) } - fn mark_lineage_as_dirty(&self, index: TreeIndex) -> Result<(), String> { + fn mark_lineage_as_dirty(&mut self, index: TreeIndex) -> Result<(), String> { let mut index = index; while index != NULL_PARENT { let mut block = ParsedBlock::from_bytes(self.get_block(index)?, index)?; block.metadata.dirty = true; + let offset = index as usize * BLOCK_SIZE; + self.blob[offset..offset + BLOCK_SIZE].copy_from_slice(&block.to_bytes()); index = block.node.parent(); } @@ -381,7 +398,7 @@ impl MerkleBlob { match self.free_indexes.pop() { None => { self.last_allocated_index += 1; - self.last_allocated_index + self.last_allocated_index - 1 } Some(new_index) => new_index, } @@ -507,8 +524,15 @@ impl MerkleBlob { #[pymethods] impl MerkleBlob { #[new] - pub fn init(blob: &Bound<'_, PyBytes>) -> PyResult { - Ok(Self::new(blob.as_bytes().to_vec()).unwrap()) + pub fn py_init(blob: PyBuffer) -> PyResult { + if !blob.is_c_contiguous() { + panic!("from_bytes() must be called with a contiguous buffer"); + } + #[allow(unsafe_code)] + let slice = + unsafe { std::slice::from_raw_parts(blob.buf_ptr() as *const u8, blob.len_bytes()) }; + + Ok(Self::new(Vec::from(slice)).unwrap()) } // #[pyo3(name = "get_root")] @@ -516,7 +540,18 @@ impl MerkleBlob { // ChiaToPython::to_python(&Bytes32::new(self.get_root()), py) // } - pub fn __len__(&self) -> PyResult { + #[pyo3(name = "insert")] + pub fn py_insert(&mut self, key_value: KvId, hash: Hash) -> PyResult<()> { + // TODO: consider the error + // self.insert(key_value, hash).map_err(|_| PyValueError::new_err("yeppers")) + self.insert(key_value, hash).unwrap(); + // self.insert(key_value, hash).map_err(|_| PyValueError::new_err("invalid key"))?; + + Ok(()) + } + + #[pyo3(name = "__len__")] + pub fn py_len(&self) -> PyResult { Ok(self.blob.len()) } } @@ -687,9 +722,12 @@ impl NodeMetadata { #[cfg(test)] mod tests { - use hex_literal::hex; - use super::*; + use chia_traits::Streamable; + use hex_literal::hex; + use std::fs; + use std::io; + use std::io::Write; const EXAMPLE_BLOB: [u8; 138] = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); const HASH: Hash = [ @@ -822,4 +860,45 @@ mod tests { assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); } + + #[test] + fn test_just_insert_a_bunch() { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + use std::time::{Duration, Instant}; + let mut total_time = Duration::new(0, 0); + + for i in 0..100000 { + let start = Instant::now(); + merkle_blob + // TODO: yeah this hash is garbage + .insert(i as KvId, HASH) + .unwrap(); + let end = Instant::now(); + total_time += end.duration_since(start); + + // match i + 1 { + // 2 => assert_eq!(merkle_blob.blob.len(), 3 * BLOCK_SIZE), + // 3 => assert_eq!(merkle_blob.blob.len(), 5 * BLOCK_SIZE), + // _ => (), + // } + + // let file = fs::File::create(format!("/home/altendky/tmp/mbt/rs/{i:0>4}")).unwrap(); + // let mut file = io::LineWriter::new(file); + // for block in merkle_blob.blob.chunks(BLOCK_SIZE) { + // let mut s = String::new(); + // for byte in block { + // s.push_str(&format!("{:02x}", byte)); + // } + // s.push_str("\n"); + // file.write_all(s.as_bytes()).unwrap(); + // } + + // fs::write(format!("/home/altendky/tmp/mbt/rs/{i:0>4}"), &merkle_blob.blob).unwrap(); + } + // println!("{:?}", merkle_blob.blob) + + println!("total time: {total_time:?}") + // TODO: check, well... something + } } diff --git a/tests/test_merkle_blob.py b/tests/test_merkle_blob.py index dff0f15e7..d0666afe6 100644 --- a/tests/test_merkle_blob.py +++ b/tests/test_merkle_blob.py @@ -9,3 +9,41 @@ def test_merkle_blob(): print(merkle_blob) print(dir(merkle_blob)) assert len(merkle_blob) == len(blob) + + +def test_just_insert_a_bunch() -> None: + HASH = bytes([ + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, 43, + ]) + + import pathlib + path = pathlib.Path("~/tmp/mbt/").expanduser() + path.joinpath("py").mkdir(parents=True, exist_ok=True) + path.joinpath("rs").mkdir(parents=True, exist_ok=True) + + merkle_blob = MerkleBlob(blob=bytearray()) + import time + total_time = 0 + for i in range(100000): + start = time.monotonic() + merkle_blob.insert(i, HASH) + end = time.monotonic() + total_time += end - start + + # kv_count = i + 1 + # if kv_count == 2: + # assert len(merkle_blob.blob) == 3 * spacing + # elif kv_count == 3: + # assert len(merkle_blob.blob) == 5 * spacing + # + # with path.joinpath("py", f"{i:04}").open(mode="w") as file: + # for offset in range(0, len(merkle_blob.blob), spacing): + # file.write(merkle_blob.blob[offset:offset + spacing].hex()) + # file.write("\n") + # path.joinpath("py", f"{i:04}").write_bytes(merkle_blob.blob) + + # rs = pathlib.Path("~/repos/chia_rs/crates/chia-datalayer/src/test_just_insert_a_bunch_reference").expanduser().read_bytes() + # b = bytes(merkle_blob.blob) + # assert b == rs, 'not the same' + assert False, f"total time: {total_time}" From 88bd27108ef63b07dfcdb8ef417270fce126382e Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 5 Sep 2024 13:02:22 -0400 Subject: [PATCH 019/181] split up insert --- crates/chia-datalayer/src/lib.rs | 199 +++++++++++++++++-------------- 1 file changed, 111 insertions(+), 88 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 6cdc390a5..abf13f66d 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1,8 +1,7 @@ // use std::collections::HashMap; -use pyo3::buffer::PyBuffer; #[cfg(feature = "py-bindings")] -use pyo3::{pyclass, pymethods, PyResult}; +use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; use std::cmp::Ordering; @@ -212,27 +211,11 @@ impl MerkleBlob { } pub fn insert(&mut self, key_value: KvId, hash: Hash) -> Result<(), String> { + // TODO: what about only unused providing a blob length? if self.blob.len() == 0 { - let new_leaf_block = ParsedBlock { - metadata: NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - }, - node: RawMerkleNode::Leaf { - parent: NULL_PARENT, - key_value, - hash, - index: 0, - }, - }; - - self.blob.extend(new_leaf_block.to_bytes()); - - self.kv_to_index.insert(key_value, 0); - self.free_indexes.clear(); - self.last_allocated_index = 1; - return Ok(()); + return self.insert_first(key_value, hash); } + let mut hasher = Sha256::new(); hasher.update(key_value.to_be_bytes()); let seed: Hash = hasher.finalize(); @@ -240,66 +223,109 @@ impl MerkleBlob { let internal_node_hash = internal_hash(old_leaf.hash(), hash); if self.kv_to_index.len() == 1 { - self.blob.clear(); - - let new_internal_block = ParsedBlock { - metadata: NodeMetadata { - node_type: NodeType::Internal, - dirty: false, - }, - node: RawMerkleNode::Internal { - parent: NULL_PARENT, - left: 1, - right: 2, - hash: internal_node_hash, - index: 0, - }, - }; - - self.blob.extend(new_internal_block.to_bytes()); - - let left_leaf_block = ParsedBlock { - metadata: NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - }, - node: RawMerkleNode::Leaf { - parent: 0, - key_value: old_leaf.key_value(), - hash: old_leaf.hash(), - index: 1, - }, - }; - self.blob.extend(left_leaf_block.to_bytes()); - self.kv_to_index.insert( - left_leaf_block.node.key_value(), - left_leaf_block.node.index(), - ); - - let right_leaf_block = ParsedBlock { - metadata: NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - }, - node: RawMerkleNode::Leaf { - parent: 0, - key_value, - hash, - index: 2, - }, - }; - self.blob.extend(right_leaf_block.to_bytes()); - self.kv_to_index.insert( - right_leaf_block.node.key_value(), - right_leaf_block.node.index(), - ); - - self.free_indexes.clear(); - self.last_allocated_index = 3; - - return Ok(()); + return self.insert_second(key_value, hash, old_leaf, internal_node_hash); } + self.insert_third_or_later(key_value, hash, old_leaf, internal_node_hash) + } + + fn insert_first(&mut self, key_value: KvId, hash: Hash) -> Result<(), String> { + let new_leaf_block = ParsedBlock { + metadata: crate::NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: crate::RawMerkleNode::Leaf { + parent: NULL_PARENT, + key_value, + hash, + index: 0, + }, + }; + + self.blob.extend(new_leaf_block.to_bytes()); + + self.kv_to_index.insert(key_value, 0); + self.free_indexes.clear(); + self.last_allocated_index = 1; + + Ok(()) + } + + fn insert_second( + &mut self, + key_value: KvId, + hash: Hash, + old_leaf: RawMerkleNode, + internal_node_hash: Hash, + ) -> Result<(), String> { + self.blob.clear(); + + let new_internal_block = ParsedBlock { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: RawMerkleNode::Internal { + parent: NULL_PARENT, + left: 1, + right: 2, + hash: internal_node_hash, + index: 0, + }, + }; + + self.blob.extend(new_internal_block.to_bytes()); + + let left_leaf_block = ParsedBlock { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: RawMerkleNode::Leaf { + parent: 0, + key_value: old_leaf.key_value(), + hash: old_leaf.hash(), + index: 1, + }, + }; + self.blob.extend(left_leaf_block.to_bytes()); + self.kv_to_index.insert( + left_leaf_block.node.key_value(), + left_leaf_block.node.index(), + ); + + let right_leaf_block = ParsedBlock { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: RawMerkleNode::Leaf { + parent: 0, + key_value, + hash, + index: 2, + }, + }; + self.blob.extend(right_leaf_block.to_bytes()); + self.kv_to_index.insert( + right_leaf_block.node.key_value(), + right_leaf_block.node.index(), + ); + + self.free_indexes.clear(); + self.last_allocated_index = 3; + + Ok(()) + } + + fn insert_third_or_later( + &mut self, + key_value: KvId, + hash: Hash, + old_leaf: RawMerkleNode, + internal_node_hash: Hash, + ) -> Result<(), String> { let new_leaf_index = self.get_new_index(); let new_internal_node_index = self.get_new_index(); @@ -339,11 +365,10 @@ impl MerkleBlob { format!("{key_value:?} {hash:?}") ); - let mut old_leaf_block = - ParsedBlock::from_bytes(self.get_block(old_leaf.index())?, old_leaf.index())?; - old_leaf_block.node.set_parent(new_internal_node_index); - let offset = old_leaf_block.node.index() as usize * BLOCK_SIZE; - self.blob[offset..offset + BLOCK_SIZE].copy_from_slice(&old_leaf_block.to_bytes()); + let mut block = + ParsedBlock::from_bytes(self.get_block(old_leaf.index())?, new_internal_node_index)?; + block.node.set_parent(new_internal_node_index); + self.insert_entry_to_blob(old_leaf.index(), block.to_bytes())?; let mut old_parent_block = ParsedBlock::from_bytes(self.get_block(old_parent_index)?, old_parent_index)?; @@ -363,8 +388,7 @@ impl MerkleBlob { } RawMerkleNode::Leaf { .. } => panic!(), } - let offset = old_parent_index as usize * BLOCK_SIZE; - self.blob[offset..offset + BLOCK_SIZE].copy_from_slice(&old_parent_block.to_bytes()); + self.insert_entry_to_blob(old_parent_index, old_parent_block.to_bytes())?; self.mark_lineage_as_dirty(old_parent_index)?; self.kv_to_index.insert(key_value, new_internal_node_index); @@ -378,8 +402,7 @@ impl MerkleBlob { while index != NULL_PARENT { let mut block = ParsedBlock::from_bytes(self.get_block(index)?, index)?; block.metadata.dirty = true; - let offset = index as usize * BLOCK_SIZE; - self.blob[offset..offset + BLOCK_SIZE].copy_from_slice(&block.to_bytes()); + self.insert_entry_to_blob(index, block.to_bytes())?; index = block.node.parent(); } From 7b99213c7589d403f72f405873fd7ade7869fb2f Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 5 Sep 2024 13:08:41 -0400 Subject: [PATCH 020/181] make it `.get_random_leaf_node_from_bytes()` --- crates/chia-datalayer/src/lib.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index abf13f66d..203ceb4a4 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -216,10 +216,7 @@ impl MerkleBlob { return self.insert_first(key_value, hash); } - let mut hasher = Sha256::new(); - hasher.update(key_value.to_be_bytes()); - let seed: Hash = hasher.finalize(); - let old_leaf = self.get_random_leaf_node(Vec::from(seed))?; + let old_leaf = self.get_random_leaf_node_from_bytes(Vec::from(key_value.to_be_bytes()))?; let internal_node_hash = internal_hash(old_leaf.hash(), hash); if self.kv_to_index.len() == 1 { @@ -427,7 +424,14 @@ impl MerkleBlob { } } - fn get_random_leaf_node(&self, seed: Vec) -> Result { + fn get_random_leaf_node_from_bytes( + &self, + seed_bytes: Vec, + ) -> Result { + let mut hasher = Sha256::new(); + hasher.update(seed_bytes); + let seed: Hash = hasher.finalize(); + let mut node = self.get_raw_node(0)?; for byte in seed { for bit in 0..8 { @@ -830,12 +834,14 @@ mod tests { #[test] fn test_get_random_leaf_node() { let merkle_blob = example_merkle_blob(); - let leaf = merkle_blob.get_random_leaf_node(vec![0; 8]).unwrap(); + let leaf = merkle_blob + .get_random_leaf_node_from_bytes(vec![0; 8]) + .unwrap(); assert_eq!( match leaf { RawMerkleNode::Internal { index, .. } | RawMerkleNode::Leaf { index, .. } => index, }, - 2, + 1, ); } From 7efc3ff80f0ee817e9eb17ee70d17d0c4198ca7f Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 5 Sep 2024 15:26:44 -0400 Subject: [PATCH 021/181] more testing --- Cargo.lock | 2 + crates/chia-datalayer/Cargo.toml | 7 +- crates/chia-datalayer/src/lib.rs | 396 ++++++++++++++++++------------- 3 files changed, 232 insertions(+), 173 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5581899f5..9295ef186 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -375,9 +375,11 @@ name = "chia-datalayer" version = "0.1.0" dependencies = [ "chia-traits 0.11.0", + "clvm-utils", "clvmr", "hex-literal", "pyo3", + "rstest", ] [[package]] diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index c1ee04c48..ee3235f3b 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -20,6 +20,9 @@ crate-type = ["rlib"] [dependencies] clvmr = { workspace = true } chia-traits = { workspace = true } -# TODO: dev? -hex-literal = { workspace = true } pyo3 = { workspace = true, optional = true } + +[dev-dependencies] +clvm-utils = { workspace = true } +hex-literal = { workspace = true } +rstest = { workspace = true } diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 203ceb4a4..caab724a1 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -19,7 +19,7 @@ type Hash = [u8; 32]; type Block = [u8; BLOCK_SIZE]; type KvId = u64; -#[derive(Debug, Hash, Eq, PartialEq)] +#[derive(Clone, Debug, Hash, Eq, PartialEq)] #[repr(u8)] pub enum NodeType { Internal = 0, @@ -73,8 +73,176 @@ fn internal_hash(left_hash: Hash, right_hash: Hash) -> Hash { hasher.finalize() } -// TODO: probably bogus and overflowing or somesuch -const NULL_PARENT: TreeIndex = 0xffff_ffffu32; // 1 << (4 * 8) - 1; +const NULL_PARENT: TreeIndex = 0xffff_ffffu32; + +#[derive(Debug, PartialEq)] +pub struct NodeMetadata { + pub node_type: NodeType, + pub dirty: bool, +} + +impl NodeMetadata { + pub fn from_bytes(blob: [u8; METADATA_SIZE]) -> Result { + // TODO: could save 1-2% of tree space by packing (and maybe don't do that) + // TODO: identify some useful structured serialization tooling we use + Ok(Self { + node_type: Self::node_type_from_bytes(blob)?, + dirty: Self::dirty_from_bytes(blob)?, + }) + } + + pub fn to_bytes(&self) -> [u8; METADATA_SIZE] { + [self.node_type.to_u8(), u8::from(self.dirty)] + } + + pub fn node_type_from_bytes(blob: [u8; METADATA_SIZE]) -> Result { + NodeType::from_u8(blob[0]) + } + + pub fn dirty_from_bytes(blob: [u8; METADATA_SIZE]) -> Result { + match blob[1] { + 0 => Ok(false), + 1 => Ok(true), + other => return Err(format!("invalid dirty value: {other}")), + } + } +} + +#[derive(Debug, PartialEq)] +pub enum RawMerkleNode { + // Root { + // left: TreeIndex, + // right: TreeIndex, + // hash: Hash, + // // TODO: kinda feels questionable having it be aware of its own location + // // TODO: just always at zero? + // index: TreeIndex, + // }, + Internal { + parent: TreeIndex, + left: TreeIndex, + right: TreeIndex, + hash: Hash, + // TODO: kinda feels questionable having it be aware of its own location + index: TreeIndex, + }, + Leaf { + parent: TreeIndex, + key_value: KvId, + hash: Hash, + // TODO: kinda feels questionable having it be aware of its own location + index: TreeIndex, + }, +} + +impl RawMerkleNode { + // fn discriminant(&self) -> u8 { + // unsafe { *(self as *const Self as *const u8) } + // } + + pub fn from_bytes( + metadata: &NodeMetadata, + index: TreeIndex, + blob: [u8; DATA_SIZE], + ) -> Result { + // TODO: add Err results + let parent = Self::parent_from_bytes(&metadata.node_type, &blob)?; + match metadata.node_type { + NodeType::Internal => Ok(RawMerkleNode::Internal { + // TODO: get these right + parent, + left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[4..8]).unwrap()), + right: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[8..12]).unwrap()), + hash: <[u8; 32]>::try_from(&blob[12..44]).unwrap(), + index, + }), + NodeType::Leaf => Ok(RawMerkleNode::Leaf { + // TODO: this try from really right? + parent, + key_value: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[4..12]).unwrap()), + hash: Hash::try_from(&blob[12..44]).unwrap(), + index, + }), + } + } + + fn parent_from_bytes( + node_type: &NodeType, + blob: &[u8; DATA_SIZE], + ) -> Result { + // TODO: a little setup here for pre-optimization to allow walking parents without processing entire nodes + match node_type { + NodeType::Internal => Ok(TreeIndex::from_be_bytes( + <[u8; 4]>::try_from(&blob[0..4]).unwrap(), + )), + NodeType::Leaf => Ok(TreeIndex::from_be_bytes( + <[u8; 4]>::try_from(&blob[0..4]).unwrap(), + )), + } + } + pub fn to_bytes(&self) -> [u8; DATA_SIZE] { + let mut blob: Vec = Vec::new(); + match self { + RawMerkleNode::Internal { + parent, + left, + right, + hash, + index: _, + } => { + blob.extend(parent.to_be_bytes()); + blob.extend(left.to_be_bytes()); + blob.extend(right.to_be_bytes()); + blob.extend(hash); + } + RawMerkleNode::Leaf { + parent, + key_value, + hash, + index: _, + } => { + blob.extend(parent.to_be_bytes()); + blob.extend(key_value.to_be_bytes()); + blob.extend(hash); + } + } + + blob.try_into().unwrap() + } + + pub fn parent(&self) -> TreeIndex { + match self { + RawMerkleNode::Internal { parent, .. } | RawMerkleNode::Leaf { parent, .. } => *parent, + } + } + + pub fn hash(&self) -> Hash { + match self { + RawMerkleNode::Internal { hash, .. } | RawMerkleNode::Leaf { hash, .. } => *hash, + } + } + + pub fn index(&self) -> TreeIndex { + match self { + RawMerkleNode::Internal { index, .. } | RawMerkleNode::Leaf { index, .. } => *index, + } + } + + pub fn set_parent(&mut self, p: TreeIndex) { + match self { + &mut RawMerkleNode::Internal { ref mut parent, .. } + | RawMerkleNode::Leaf { ref mut parent, .. } => *parent = p, + } + } + + // TODO: yes i know i'm trying to write this code in a non-rusty way and i need to stop that + pub fn key_value(&self) -> KvId { + match self { + RawMerkleNode::Leaf { key_value, .. } => *key_value, + _ => panic!(), + } + } +} // TODO: does not enforce matching metadata node type and node enumeration type struct ParsedBlock { @@ -116,6 +284,7 @@ impl ParsedBlock { ) } } + fn get_free_indexes(blob: &Vec) -> Result, String> { let index_count = blob.len() / BLOCK_SIZE; @@ -583,175 +752,13 @@ impl MerkleBlob { } } -#[derive(Debug, PartialEq)] -pub enum RawMerkleNode { - // Root { - // left: TreeIndex, - // right: TreeIndex, - // hash: Hash, - // // TODO: kinda feels questionable having it be aware of its own location - // // TODO: just always at zero? - // index: TreeIndex, - // }, - Internal { - parent: TreeIndex, - left: TreeIndex, - right: TreeIndex, - hash: Hash, - // TODO: kinda feels questionable having it be aware of its own location - index: TreeIndex, - }, - Leaf { - parent: TreeIndex, - key_value: KvId, - hash: Hash, - // TODO: kinda feels questionable having it be aware of its own location - index: TreeIndex, - }, -} - -impl RawMerkleNode { - // fn discriminant(&self) -> u8 { - // unsafe { *(self as *const Self as *const u8) } - // } - - pub fn from_bytes( - metadata: &NodeMetadata, - index: TreeIndex, - blob: [u8; DATA_SIZE], - ) -> Result { - // TODO: add Err results - let parent = Self::parent_from_bytes(&metadata.node_type, &blob)?; - match metadata.node_type { - NodeType::Internal => Ok(RawMerkleNode::Internal { - // TODO: get these right - parent, - left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[4..8]).unwrap()), - right: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[8..12]).unwrap()), - hash: <[u8; 32]>::try_from(&blob[12..44]).unwrap(), - index, - }), - NodeType::Leaf => Ok(RawMerkleNode::Leaf { - // TODO: this try from really right? - parent, - key_value: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[4..12]).unwrap()), - hash: Hash::try_from(&blob[12..44]).unwrap(), - index, - }), - } - } - - fn parent_from_bytes( - node_type: &NodeType, - blob: &[u8; DATA_SIZE], - ) -> Result { - // TODO: a little setup here for pre-optimization to allow walking parents without processing entire nodes - match node_type { - NodeType::Internal => Ok(TreeIndex::from_be_bytes( - <[u8; 4]>::try_from(&blob[0..4]).unwrap(), - )), - NodeType::Leaf => Ok(TreeIndex::from_be_bytes( - <[u8; 4]>::try_from(&blob[0..4]).unwrap(), - )), - } - } - pub fn to_bytes(&self) -> [u8; DATA_SIZE] { - let mut blob: Vec = Vec::new(); - match self { - RawMerkleNode::Internal { - parent, - left, - right, - hash, - index: _, - } => { - blob.extend(parent.to_be_bytes()); - blob.extend(left.to_be_bytes()); - blob.extend(right.to_be_bytes()); - blob.extend(hash); - } - RawMerkleNode::Leaf { - parent, - key_value, - hash, - index: _, - } => { - blob.extend(parent.to_be_bytes()); - blob.extend(key_value.to_be_bytes()); - blob.extend(hash); - } - } - - blob.try_into().unwrap() - } - - pub fn parent(&self) -> TreeIndex { - match self { - RawMerkleNode::Internal { parent, .. } | RawMerkleNode::Leaf { parent, .. } => *parent, - } - } - - pub fn hash(&self) -> Hash { - match self { - RawMerkleNode::Internal { hash, .. } | RawMerkleNode::Leaf { hash, .. } => *hash, - } - } - - pub fn index(&self) -> TreeIndex { - match self { - RawMerkleNode::Internal { index, .. } | RawMerkleNode::Leaf { index, .. } => *index, - } - } - - pub fn set_parent(&mut self, p: TreeIndex) { - match self { - &mut RawMerkleNode::Internal { ref mut parent, .. } - | RawMerkleNode::Leaf { ref mut parent, .. } => *parent = p, - } - } - - // TODO: yes i know i'm trying to write this code in a non-rusty way and i need to stop that - pub fn key_value(&self) -> KvId { - match self { - RawMerkleNode::Leaf { key_value, .. } => *key_value, - _ => panic!(), - } - } -} - -#[derive(Debug, PartialEq)] -pub struct NodeMetadata { - pub node_type: NodeType, - pub dirty: bool, -} - -impl NodeMetadata { - pub fn from_bytes(blob: [u8; METADATA_SIZE]) -> Result { - // TODO: identify some useful structured serialization tooling we use - Ok(Self { - node_type: Self::node_type_from_bytes(blob)?, - dirty: match blob[1] { - 0 => false, - 1 => true, - other => return Err(format!("invalid dirty value: {other}")), - }, - }) - } - - pub fn to_bytes(&self) -> [u8; METADATA_SIZE] { - [self.node_type.to_u8(), u8::from(self.dirty)] - } - - pub fn node_type_from_bytes(blob: [u8; METADATA_SIZE]) -> Result { - NodeType::from_u8(blob[0]) - } -} - #[cfg(test)] mod tests { use super::*; use chia_traits::Streamable; + use clvm_utils; use hex_literal::hex; + use rstest::rstest; use std::fs; use std::io; use std::io::Write; @@ -799,17 +806,64 @@ mod tests { } #[test] - fn test_node_metadata_from_to() { - let bytes: [u8; 2] = [0, 1]; + fn test_node_type_serialized_values() { + // TODO: can i make sure we cover all variants? + assert_eq!(NodeType::Internal as u8, 0); + assert_eq!(NodeType::Leaf as u8, 1); + + for node_type in [NodeType::Internal, NodeType::Leaf] { + assert_eq!(node_type.to_u8(), node_type.clone() as u8,); + assert_eq!( + NodeType::from_u8(node_type.clone() as u8).unwrap(), + node_type, + ) + } + } + + #[test] + fn test_internal_hash() { + // TODO: yeah, various questions around this and how to express 'this is dl internal hash' + // without silly repetition. maybe just a use as. + // in Python: Program.to((left_hash, right_hash)).get_tree_hash_precalc(left_hash, right_hash) + let left: Hash = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ]; + let right: Hash = [ + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + ]; + assert_eq!( + internal_hash(left, right), + clvm_utils::tree_hash_pair( + clvm_utils::TreeHash::new(left), + clvm_utils::TreeHash::new(right) + ) + .to_bytes(), + ); + } + + #[rstest] + fn test_node_metadata_from_to( + #[values(false, true)] dirty: bool, + // TODO: can we make sure we cover all variants + #[values(NodeType::Internal, NodeType::Leaf)] node_type: NodeType, + ) { + let bytes: [u8; 2] = [node_type.to_u8(), dirty as u8]; let object = NodeMetadata::from_bytes(bytes).unwrap(); assert_eq!( object, NodeMetadata { - node_type: NodeType::Internal, - dirty: true + node_type: node_type, + dirty: dirty }, ); assert_eq!(object.to_bytes(), bytes); + assert_eq!( + NodeMetadata::node_type_from_bytes(bytes).unwrap(), + object.node_type + ); + assert_eq!(NodeMetadata::dirty_from_bytes(bytes).unwrap(), object.dirty); } #[test] From d00dda3bb456b4796642a40249c1eda23a371025 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 10:25:54 -0400 Subject: [PATCH 022/181] tidy --- crates/chia-datalayer/src/lib.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index caab724a1..50bc1c83d 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -755,13 +755,9 @@ impl MerkleBlob { #[cfg(test)] mod tests { use super::*; - use chia_traits::Streamable; use clvm_utils; use hex_literal::hex; use rstest::rstest; - use std::fs; - use std::io; - use std::io::Write; const EXAMPLE_BLOB: [u8; 138] = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); const HASH: Hash = [ From cc97f1a22a5433ced6ef21cae9b654f63ac53105 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 10:57:09 -0400 Subject: [PATCH 023/181] less raw in the name --- crates/chia-datalayer/src/lib.rs | 108 ++++++++++++++----------------- 1 file changed, 50 insertions(+), 58 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 50bc1c83d..69e9f1fff 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -109,7 +109,7 @@ impl NodeMetadata { } #[derive(Debug, PartialEq)] -pub enum RawMerkleNode { +pub enum Node { // Root { // left: TreeIndex, // right: TreeIndex, @@ -135,7 +135,7 @@ pub enum RawMerkleNode { }, } -impl RawMerkleNode { +impl Node { // fn discriminant(&self) -> u8 { // unsafe { *(self as *const Self as *const u8) } // } @@ -148,7 +148,7 @@ impl RawMerkleNode { // TODO: add Err results let parent = Self::parent_from_bytes(&metadata.node_type, &blob)?; match metadata.node_type { - NodeType::Internal => Ok(RawMerkleNode::Internal { + NodeType::Internal => Ok(Node::Internal { // TODO: get these right parent, left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[4..8]).unwrap()), @@ -156,7 +156,7 @@ impl RawMerkleNode { hash: <[u8; 32]>::try_from(&blob[12..44]).unwrap(), index, }), - NodeType::Leaf => Ok(RawMerkleNode::Leaf { + NodeType::Leaf => Ok(Node::Leaf { // TODO: this try from really right? parent, key_value: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[4..12]).unwrap()), @@ -183,7 +183,7 @@ impl RawMerkleNode { pub fn to_bytes(&self) -> [u8; DATA_SIZE] { let mut blob: Vec = Vec::new(); match self { - RawMerkleNode::Internal { + Node::Internal { parent, left, right, @@ -195,7 +195,7 @@ impl RawMerkleNode { blob.extend(right.to_be_bytes()); blob.extend(hash); } - RawMerkleNode::Leaf { + Node::Leaf { parent, key_value, hash, @@ -212,33 +212,34 @@ impl RawMerkleNode { pub fn parent(&self) -> TreeIndex { match self { - RawMerkleNode::Internal { parent, .. } | RawMerkleNode::Leaf { parent, .. } => *parent, + Node::Internal { parent, .. } | Node::Leaf { parent, .. } => *parent, } } pub fn hash(&self) -> Hash { match self { - RawMerkleNode::Internal { hash, .. } | RawMerkleNode::Leaf { hash, .. } => *hash, + Node::Internal { hash, .. } | Node::Leaf { hash, .. } => *hash, } } pub fn index(&self) -> TreeIndex { match self { - RawMerkleNode::Internal { index, .. } | RawMerkleNode::Leaf { index, .. } => *index, + Node::Internal { index, .. } | Node::Leaf { index, .. } => *index, } } pub fn set_parent(&mut self, p: TreeIndex) { match self { - &mut RawMerkleNode::Internal { ref mut parent, .. } - | RawMerkleNode::Leaf { ref mut parent, .. } => *parent = p, + &mut Node::Internal { ref mut parent, .. } | Node::Leaf { ref mut parent, .. } => { + *parent = p + } } } // TODO: yes i know i'm trying to write this code in a non-rusty way and i need to stop that pub fn key_value(&self) -> KvId { match self { - RawMerkleNode::Leaf { key_value, .. } => *key_value, + Node::Leaf { key_value, .. } => *key_value, _ => panic!(), } } @@ -247,7 +248,7 @@ impl RawMerkleNode { // TODO: does not enforce matching metadata node type and node enumeration type struct ParsedBlock { metadata: NodeMetadata, - node: RawMerkleNode, + node: Node, } impl ParsedBlock { @@ -276,12 +277,10 @@ impl ParsedBlock { Ok(metadata) => metadata, Err(message) => return Err(format!("failed loading metadata: {message})")), }; - Ok( - match RawMerkleNode::from_bytes(&metadata, index, data_blob) { - Ok(node) => ParsedBlock { metadata, node }, - Err(message) => return Err(format!("failed loading raw node: {message}")), - }, - ) + Ok(match Node::from_bytes(&metadata, index, data_blob) { + Ok(node) => ParsedBlock { metadata, node }, + Err(message) => return Err(format!("failed loading raw node: {message}")), + }) } } @@ -302,11 +301,11 @@ fn get_free_indexes(blob: &Vec) -> Result, String> { ParsedBlock::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; seen_indexes[index as usize] = true; match block.node { - RawMerkleNode::Internal { left, right, .. } => { + Node::Internal { left, right, .. } => { queue.push(left); queue.push(right); } - RawMerkleNode::Leaf { .. } => (), + Node::Leaf { .. } => (), } } @@ -337,10 +336,10 @@ fn get_keys_values_indexes(blob: &Vec) -> Result, S let block = ParsedBlock::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; match block.node { - RawMerkleNode::Leaf { key_value, .. } => { + Node::Leaf { key_value, .. } => { kv_to_index.insert(key_value, index); } - RawMerkleNode::Internal { .. } => (), + Node::Internal { .. } => (), } } @@ -401,7 +400,7 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node: crate::RawMerkleNode::Leaf { + node: crate::Node::Leaf { parent: NULL_PARENT, key_value, hash, @@ -422,7 +421,7 @@ impl MerkleBlob { &mut self, key_value: KvId, hash: Hash, - old_leaf: RawMerkleNode, + old_leaf: Node, internal_node_hash: Hash, ) -> Result<(), String> { self.blob.clear(); @@ -432,7 +431,7 @@ impl MerkleBlob { node_type: NodeType::Internal, dirty: false, }, - node: RawMerkleNode::Internal { + node: Node::Internal { parent: NULL_PARENT, left: 1, right: 2, @@ -448,7 +447,7 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node: RawMerkleNode::Leaf { + node: Node::Leaf { parent: 0, key_value: old_leaf.key_value(), hash: old_leaf.hash(), @@ -466,7 +465,7 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node: RawMerkleNode::Leaf { + node: Node::Leaf { parent: 0, key_value, hash, @@ -489,7 +488,7 @@ impl MerkleBlob { &mut self, key_value: KvId, hash: Hash, - old_leaf: RawMerkleNode, + old_leaf: Node, internal_node_hash: Hash, ) -> Result<(), String> { let new_leaf_index = self.get_new_index(); @@ -500,7 +499,7 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node: RawMerkleNode::Leaf { + node: Node::Leaf { parent: new_internal_node_index, key_value, hash, @@ -514,7 +513,7 @@ impl MerkleBlob { node_type: NodeType::Internal, dirty: false, }, - node: RawMerkleNode::Internal { + node: Node::Internal { parent: old_leaf.parent(), left: old_leaf.index(), right: new_leaf_index, @@ -539,7 +538,7 @@ impl MerkleBlob { let mut old_parent_block = ParsedBlock::from_bytes(self.get_block(old_parent_index)?, old_parent_index)?; match old_parent_block.node { - RawMerkleNode::Internal { + Node::Internal { ref mut left, ref mut right, .. @@ -552,7 +551,7 @@ impl MerkleBlob { panic!(); } } - RawMerkleNode::Leaf { .. } => panic!(), + Node::Leaf { .. } => panic!(), } self.insert_entry_to_blob(old_parent_index, old_parent_block.to_bytes())?; @@ -593,10 +592,7 @@ impl MerkleBlob { } } - fn get_random_leaf_node_from_bytes( - &self, - seed_bytes: Vec, - ) -> Result { + fn get_random_leaf_node_from_bytes(&self, seed_bytes: Vec) -> Result { let mut hasher = Sha256::new(); hasher.update(seed_bytes); let seed: Hash = hasher.finalize(); @@ -605,8 +601,8 @@ impl MerkleBlob { for byte in seed { for bit in 0..8 { match node { - RawMerkleNode::Leaf { .. } => return Ok(node), - RawMerkleNode::Internal { left, right, .. } => { + Node::Leaf { .. } => return Ok(node), + Node::Internal { left, right, .. } => { if byte & (1 << bit) != 0 { node = self.get_raw_node(left)?; } else { @@ -646,7 +642,7 @@ impl MerkleBlob { .map_err(|e| format!("failed getting block {index}: {e}")) } - pub fn get_raw_node(&self, index: TreeIndex) -> Result { + pub fn get_raw_node(&self, index: TreeIndex) -> Result { // TODO: use ParsedBlock::from_bytes() // TODO: handle invalid indexes? // TODO: handle overflows? @@ -665,12 +661,10 @@ impl MerkleBlob { Ok(metadata) => metadata, Err(message) => return Err(format!("failed loading metadata: {message})")), }; - Ok( - match RawMerkleNode::from_bytes(&metadata, index, data_blob) { - Ok(node) => node, - Err(message) => return Err(format!("failed loading raw node: {message}")), - }, - ) + Ok(match Node::from_bytes(&metadata, index, data_blob) { + Ok(node) => node, + Err(message) => return Err(format!("failed loading raw node: {message}")), + }) } pub fn get_parent_index(&self, index: TreeIndex) -> Result { @@ -678,10 +672,10 @@ impl MerkleBlob { let node_type = NodeMetadata::node_type_from_bytes(block[..METADATA_SIZE].try_into().unwrap())?; - RawMerkleNode::parent_from_bytes(&node_type, block[METADATA_SIZE..].try_into().unwrap()) + Node::parent_from_bytes(&node_type, block[METADATA_SIZE..].try_into().unwrap()) } - pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { + pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { let mut next_index = index; let mut lineage = vec![]; loop { @@ -704,10 +698,8 @@ impl MerkleBlob { let block = self.get_block(next_index)?; let node_type = NodeMetadata::node_type_from_bytes(block[..METADATA_SIZE].try_into().unwrap())?; - next_index = RawMerkleNode::parent_from_bytes( - &node_type, - block[METADATA_SIZE..].try_into().unwrap(), - )?; + next_index = + Node::parent_from_bytes(&node_type, block[METADATA_SIZE..].try_into().unwrap())?; if next_index == NULL_PARENT { return Ok(lineage); @@ -765,7 +757,7 @@ mod tests { 35, 36, 37, 38, 39, 40, 41, 42, 43, ]; - const EXAMPLE_ROOT: RawMerkleNode = RawMerkleNode::Internal { + const EXAMPLE_ROOT: Node = Node::Internal { parent: NULL_PARENT, left: 1, right: 2, @@ -776,7 +768,7 @@ mod tests { node_type: NodeType::Internal, dirty: true, }; - const EXAMPLE_LEFT_LEAF: RawMerkleNode = RawMerkleNode::Leaf { + const EXAMPLE_LEFT_LEAF: Node = Node::Leaf { parent: 0, key_value: 0x0405_0607_0809_0A0B, hash: HASH, @@ -786,7 +778,7 @@ mod tests { node_type: NodeType::Leaf, dirty: false, }; - const EXAMPLE_RIGHT_LEAF: RawMerkleNode = RawMerkleNode::Leaf { + const EXAMPLE_RIGHT_LEAF: Node = Node::Leaf { parent: 0, key_value: 0x1415_1617_1819_1A1B, hash: HASH, @@ -889,7 +881,7 @@ mod tests { .unwrap(); assert_eq!( match leaf { - RawMerkleNode::Internal { index, .. } | RawMerkleNode::Leaf { index, .. } => index, + Node::Internal { index, .. } | Node::Leaf { index, .. } => index, }, 1, ); @@ -930,10 +922,10 @@ mod tests { let mut root = ParsedBlock::from_bytes(merkle_blob.get_block(0).unwrap(), 0).unwrap(); root.metadata.dirty = true; match root.node { - RawMerkleNode::Internal { ref mut hash, .. } => { + Node::Internal { ref mut hash, .. } => { *hash = HASH; } - RawMerkleNode::Leaf { .. } => panic!(), + Node::Leaf { .. } => panic!(), } merkle_blob.blob[..BLOCK_SIZE].copy_from_slice(&root.to_bytes()); From 99568d94277fc2103cd4ab9b9f6db336b7384398 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 11:03:27 -0400 Subject: [PATCH 024/181] drop a little commented code --- crates/chia-datalayer/src/lib.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 69e9f1fff..75b5f67fe 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -110,14 +110,6 @@ impl NodeMetadata { #[derive(Debug, PartialEq)] pub enum Node { - // Root { - // left: TreeIndex, - // right: TreeIndex, - // hash: Hash, - // // TODO: kinda feels questionable having it be aware of its own location - // // TODO: just always at zero? - // index: TreeIndex, - // }, Internal { parent: TreeIndex, left: TreeIndex, From eabb7c01b4b2b217ac40e9525dcf9cceff13925d Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 11:19:59 -0400 Subject: [PATCH 025/181] drop some `crate::` --- crates/chia-datalayer/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 75b5f67fe..5e09205ba 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -388,11 +388,11 @@ impl MerkleBlob { fn insert_first(&mut self, key_value: KvId, hash: Hash) -> Result<(), String> { let new_leaf_block = ParsedBlock { - metadata: crate::NodeMetadata { + metadata: NodeMetadata { node_type: NodeType::Leaf, dirty: false, }, - node: crate::Node::Leaf { + node: Node::Leaf { parent: NULL_PARENT, key_value, hash, From 17c4a86a7d8fb3a41de3c68c75e835d3b7b9563d Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 12:54:47 -0400 Subject: [PATCH 026/181] tidy --- crates/chia-datalayer/src/lib.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 5e09205ba..efdf66bbd 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -595,11 +595,13 @@ impl MerkleBlob { match node { Node::Leaf { .. } => return Ok(node), Node::Internal { left, right, .. } => { + let next: TreeIndex; if byte & (1 << bit) != 0 { - node = self.get_raw_node(left)?; + next = left; } else { - node = self.get_raw_node(right)?; + next = right; } + node = self.get_raw_node(next)?; } } } From d2c835024decddbccb63525da6485f7f9c4433fc Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 14:05:00 -0400 Subject: [PATCH 027/181] refactor node common attributes to a new non-enum layer --- crates/chia-datalayer/src/lib.rs | 224 +++++++++++++------------------ 1 file changed, 94 insertions(+), 130 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index efdf66bbd..a9f71c649 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -109,22 +109,19 @@ impl NodeMetadata { } #[derive(Debug, PartialEq)] -pub enum Node { - Internal { - parent: TreeIndex, - left: TreeIndex, - right: TreeIndex, - hash: Hash, - // TODO: kinda feels questionable having it be aware of its own location - index: TreeIndex, - }, - Leaf { - parent: TreeIndex, - key_value: KvId, - hash: Hash, - // TODO: kinda feels questionable having it be aware of its own location - index: TreeIndex, - }, +pub struct Node { + // TODO: can this be an Option to avoid dealing with the magic number outside of serialization? + parent: TreeIndex, + hash: Hash, + specific: NodeSpecific, + // TODO: kinda feels questionable having it be aware of its own location + index: TreeIndex, +} + +#[derive(Debug, PartialEq)] +pub enum NodeSpecific { + Internal { left: TreeIndex, right: TreeIndex }, + Leaf { key_value: KvId }, } impl Node { @@ -139,23 +136,22 @@ impl Node { ) -> Result { // TODO: add Err results let parent = Self::parent_from_bytes(&metadata.node_type, &blob)?; - match metadata.node_type { - NodeType::Internal => Ok(Node::Internal { - // TODO: get these right - parent, - left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[4..8]).unwrap()), - right: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[8..12]).unwrap()), - hash: <[u8; 32]>::try_from(&blob[12..44]).unwrap(), - index, - }), - NodeType::Leaf => Ok(Node::Leaf { - // TODO: this try from really right? - parent, - key_value: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[4..12]).unwrap()), - hash: Hash::try_from(&blob[12..44]).unwrap(), - index, - }), - } + Ok(Self { + parent, + index, + // TODO: move the common parts to the beginning of the serialization? + hash: <[u8; 32]>::try_from(&blob[12..44]).unwrap(), + specific: match metadata.node_type { + NodeType::Internal => NodeSpecific::Internal { + left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[4..8]).unwrap()), + right: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[8..12]).unwrap()), + }, + NodeType::Leaf => NodeSpecific::Leaf { + // TODO: this try from really right? + key_value: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[4..12]).unwrap()), + }, + }, + }) } fn parent_from_bytes( @@ -175,10 +171,9 @@ impl Node { pub fn to_bytes(&self) -> [u8; DATA_SIZE] { let mut blob: Vec = Vec::new(); match self { - Node::Internal { + Node { parent, - left, - right, + specific: NodeSpecific::Internal { left, right }, hash, index: _, } => { @@ -187,9 +182,9 @@ impl Node { blob.extend(right.to_be_bytes()); blob.extend(hash); } - Node::Leaf { + Node { parent, - key_value, + specific: NodeSpecific::Leaf { key_value }, hash, index: _, } => { @@ -202,36 +197,10 @@ impl Node { blob.try_into().unwrap() } - pub fn parent(&self) -> TreeIndex { - match self { - Node::Internal { parent, .. } | Node::Leaf { parent, .. } => *parent, - } - } - - pub fn hash(&self) -> Hash { - match self { - Node::Internal { hash, .. } | Node::Leaf { hash, .. } => *hash, - } - } - - pub fn index(&self) -> TreeIndex { - match self { - Node::Internal { index, .. } | Node::Leaf { index, .. } => *index, - } - } - - pub fn set_parent(&mut self, p: TreeIndex) { - match self { - &mut Node::Internal { ref mut parent, .. } | Node::Leaf { ref mut parent, .. } => { - *parent = p - } - } - } - // TODO: yes i know i'm trying to write this code in a non-rusty way and i need to stop that pub fn key_value(&self) -> KvId { - match self { - Node::Leaf { key_value, .. } => *key_value, + match self.specific { + NodeSpecific::Leaf { key_value } => key_value, _ => panic!(), } } @@ -292,12 +261,12 @@ fn get_free_indexes(blob: &Vec) -> Result, String> { let block = ParsedBlock::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; seen_indexes[index as usize] = true; - match block.node { - Node::Internal { left, right, .. } => { + match block.node.specific { + NodeSpecific::Internal { left, right } => { queue.push(left); queue.push(right); } - Node::Leaf { .. } => (), + NodeSpecific::Leaf { .. } => (), } } @@ -327,11 +296,11 @@ fn get_keys_values_indexes(blob: &Vec) -> Result, S let offset = index as usize * BLOCK_SIZE; let block = ParsedBlock::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; - match block.node { - Node::Leaf { key_value, .. } => { + match block.node.specific { + NodeSpecific::Leaf { key_value } => { kv_to_index.insert(key_value, index); } - Node::Internal { .. } => (), + NodeSpecific::Internal { .. } => (), } } @@ -377,7 +346,7 @@ impl MerkleBlob { } let old_leaf = self.get_random_leaf_node_from_bytes(Vec::from(key_value.to_be_bytes()))?; - let internal_node_hash = internal_hash(old_leaf.hash(), hash); + let internal_node_hash = internal_hash(old_leaf.hash, hash); if self.kv_to_index.len() == 1 { return self.insert_second(key_value, hash, old_leaf, internal_node_hash); @@ -392,9 +361,9 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node: Node::Leaf { + node: Node { parent: NULL_PARENT, - key_value, + specific: NodeSpecific::Leaf { key_value }, hash, index: 0, }, @@ -423,10 +392,9 @@ impl MerkleBlob { node_type: NodeType::Internal, dirty: false, }, - node: Node::Internal { + node: Node { parent: NULL_PARENT, - left: 1, - right: 2, + specific: NodeSpecific::Internal { left: 1, right: 2 }, hash: internal_node_hash, index: 0, }, @@ -439,27 +407,27 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node: Node::Leaf { + node: Node { parent: 0, - key_value: old_leaf.key_value(), - hash: old_leaf.hash(), + specific: NodeSpecific::Leaf { + key_value: old_leaf.key_value(), + }, + hash: old_leaf.hash, index: 1, }, }; self.blob.extend(left_leaf_block.to_bytes()); - self.kv_to_index.insert( - left_leaf_block.node.key_value(), - left_leaf_block.node.index(), - ); + self.kv_to_index + .insert(left_leaf_block.node.key_value(), left_leaf_block.node.index); let right_leaf_block = ParsedBlock { metadata: NodeMetadata { node_type: NodeType::Leaf, dirty: false, }, - node: Node::Leaf { + node: Node { parent: 0, - key_value, + specific: NodeSpecific::Leaf { key_value }, hash, index: 2, }, @@ -467,7 +435,7 @@ impl MerkleBlob { self.blob.extend(right_leaf_block.to_bytes()); self.kv_to_index.insert( right_leaf_block.node.key_value(), - right_leaf_block.node.index(), + right_leaf_block.node.index, ); self.free_indexes.clear(); @@ -491,9 +459,9 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node: Node::Leaf { + node: Node { parent: new_internal_node_index, - key_value, + specific: NodeSpecific::Leaf { key_value }, hash, index: new_leaf_index, }, @@ -505,17 +473,19 @@ impl MerkleBlob { node_type: NodeType::Internal, dirty: false, }, - node: Node::Internal { - parent: old_leaf.parent(), - left: old_leaf.index(), - right: new_leaf_index, + node: Node { + parent: old_leaf.parent, + specific: NodeSpecific::Internal { + left: old_leaf.index, + right: new_leaf_index, + }, hash: internal_node_hash, index: new_internal_node_index, }, }; self.insert_entry_to_blob(new_internal_node_index, new_internal_block.to_bytes())?; - let old_parent_index = old_leaf.parent(); + let old_parent_index = old_leaf.parent; assert!( old_parent_index != NULL_PARENT, "{}", @@ -523,27 +493,27 @@ impl MerkleBlob { ); let mut block = - ParsedBlock::from_bytes(self.get_block(old_leaf.index())?, new_internal_node_index)?; - block.node.set_parent(new_internal_node_index); - self.insert_entry_to_blob(old_leaf.index(), block.to_bytes())?; + ParsedBlock::from_bytes(self.get_block(old_leaf.index)?, new_internal_node_index)?; + block.node.parent = new_internal_node_index; + self.insert_entry_to_blob(old_leaf.index, block.to_bytes())?; let mut old_parent_block = ParsedBlock::from_bytes(self.get_block(old_parent_index)?, old_parent_index)?; - match old_parent_block.node { - Node::Internal { + match old_parent_block.node.specific { + NodeSpecific::Internal { ref mut left, ref mut right, .. } => { - if old_leaf.index() == *left { + if old_leaf.index == *left { *left = new_internal_node_index; - } else if old_leaf.index() == *right { + } else if old_leaf.index == *right { *right = new_internal_node_index; } else { panic!(); } } - Node::Leaf { .. } => panic!(), + NodeSpecific::Leaf { .. } => panic!(), } self.insert_entry_to_blob(old_parent_index, old_parent_block.to_bytes())?; @@ -560,7 +530,7 @@ impl MerkleBlob { let mut block = ParsedBlock::from_bytes(self.get_block(index)?, index)?; block.metadata.dirty = true; self.insert_entry_to_blob(index, block.to_bytes())?; - index = block.node.parent(); + index = block.node.parent; } Ok(()) @@ -592,9 +562,9 @@ impl MerkleBlob { let mut node = self.get_raw_node(0)?; for byte in seed { for bit in 0..8 { - match node { - Node::Leaf { .. } => return Ok(node), - Node::Internal { left, right, .. } => { + match node.specific { + NodeSpecific::Leaf { .. } => return Ok(node), + NodeSpecific::Internal { left, right, .. } => { let next: TreeIndex; if byte & (1 << bit) != 0 { next = left; @@ -674,7 +644,7 @@ impl MerkleBlob { let mut lineage = vec![]; loop { let node = self.get_raw_node(next_index)?; - next_index = node.parent(); + next_index = node.parent; lineage.push(node); if next_index == NULL_PARENT { @@ -751,10 +721,9 @@ mod tests { 35, 36, 37, 38, 39, 40, 41, 42, 43, ]; - const EXAMPLE_ROOT: Node = Node::Internal { + const EXAMPLE_ROOT: Node = Node { parent: NULL_PARENT, - left: 1, - right: 2, + specific: NodeSpecific::Internal { left: 1, right: 2 }, hash: HASH, index: 0, }; @@ -762,9 +731,11 @@ mod tests { node_type: NodeType::Internal, dirty: true, }; - const EXAMPLE_LEFT_LEAF: Node = Node::Leaf { + const EXAMPLE_LEFT_LEAF: Node = Node { parent: 0, - key_value: 0x0405_0607_0809_0A0B, + specific: NodeSpecific::Leaf { + key_value: 0x0405_0607_0809_0A0B, + }, hash: HASH, index: 1, }; @@ -772,9 +743,11 @@ mod tests { node_type: NodeType::Leaf, dirty: false, }; - const EXAMPLE_RIGHT_LEAF: Node = Node::Leaf { + const EXAMPLE_RIGHT_LEAF: Node = Node { parent: 0, - key_value: 0x1415_1617_1819_1A1B, + specific: NodeSpecific::Leaf { + key_value: 0x1415_1617_1819_1A1B, + }, hash: HASH, index: 2, }; @@ -864,7 +837,7 @@ mod tests { } assert_eq!(lineage.len(), 2); let last_node = lineage.last().unwrap(); - assert_eq!(last_node.parent(), NULL_PARENT); + assert_eq!(last_node.parent, NULL_PARENT); } #[test] @@ -873,12 +846,7 @@ mod tests { let leaf = merkle_blob .get_random_leaf_node_from_bytes(vec![0; 8]) .unwrap(); - assert_eq!( - match leaf { - Node::Internal { index, .. } | Node::Leaf { index, .. } => index, - }, - 1, - ); + assert_eq!(leaf.index, 1); } #[test] @@ -906,21 +874,17 @@ mod tests { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); merkle_blob - .insert(EXAMPLE_LEFT_LEAF.key_value(), EXAMPLE_LEFT_LEAF.hash()) + .insert(EXAMPLE_LEFT_LEAF.key_value(), EXAMPLE_LEFT_LEAF.hash) .unwrap(); merkle_blob - .insert(EXAMPLE_RIGHT_LEAF.key_value(), EXAMPLE_RIGHT_LEAF.hash()) + .insert(EXAMPLE_RIGHT_LEAF.key_value(), EXAMPLE_RIGHT_LEAF.hash) .unwrap(); // TODO: just hacking here to compare with the ~wrong~ simplified reference let mut root = ParsedBlock::from_bytes(merkle_blob.get_block(0).unwrap(), 0).unwrap(); root.metadata.dirty = true; - match root.node { - Node::Internal { ref mut hash, .. } => { - *hash = HASH; - } - Node::Leaf { .. } => panic!(), - } + root.node.hash = HASH; + assert_eq!(root.metadata.node_type, NodeType::Internal); merkle_blob.blob[..BLOCK_SIZE].copy_from_slice(&root.to_bytes()); assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); From 2ed20b6fc2aca7111bde1f7071fb21569285ec53 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 14:53:40 -0400 Subject: [PATCH 028/181] misc --- crates/chia-datalayer/src/lib.rs | 113 +++++++++++++++---------------- 1 file changed, 56 insertions(+), 57 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index a9f71c649..a0af14e25 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -6,6 +6,7 @@ use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; use std::cmp::Ordering; use std::collections::HashMap; +use std::ops::Range; // TODO: clearly shouldn't be hard coded const METADATA_SIZE: usize = 2; @@ -16,7 +17,7 @@ const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; type TreeIndex = u32; // type Key = Vec; type Hash = [u8; 32]; -type Block = [u8; BLOCK_SIZE]; +type BlockBytes = [u8; BLOCK_SIZE]; type KvId = u64; #[derive(Clone, Debug, Hash, Eq, PartialEq)] @@ -124,6 +125,13 @@ pub enum NodeSpecific { Leaf { key_value: KvId }, } +const PARENT_RANGE: Range = 0..4; +const LEFT_RANGE: Range = 4..8; +const RIGHT_RANGE: Range = 8..12; +const KEY_VALUE_RANGE: Range = 4..12; +// TODO: move the common parts to the beginning of the serialization? +const HASH_RANGE: Range = 12..44; + impl Node { // fn discriminant(&self) -> u8 { // unsafe { *(self as *const Self as *const u8) } @@ -135,38 +143,33 @@ impl Node { blob: [u8; DATA_SIZE], ) -> Result { // TODO: add Err results - let parent = Self::parent_from_bytes(&metadata.node_type, &blob)?; + let parent = Self::parent_from_bytes(&blob)?; Ok(Self { parent, index, - // TODO: move the common parts to the beginning of the serialization? - hash: <[u8; 32]>::try_from(&blob[12..44]).unwrap(), + hash: <[u8; 32]>::try_from(&blob[HASH_RANGE]).unwrap(), specific: match metadata.node_type { NodeType::Internal => NodeSpecific::Internal { - left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[4..8]).unwrap()), - right: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[8..12]).unwrap()), + left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[LEFT_RANGE]).unwrap()), + right: TreeIndex::from_be_bytes( + <[u8; 4]>::try_from(&blob[RIGHT_RANGE]).unwrap(), + ), }, NodeType::Leaf => NodeSpecific::Leaf { // TODO: this try from really right? - key_value: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[4..12]).unwrap()), + key_value: KvId::from_be_bytes( + <[u8; 8]>::try_from(&blob[KEY_VALUE_RANGE]).unwrap(), + ), }, }, }) } - fn parent_from_bytes( - node_type: &NodeType, - blob: &[u8; DATA_SIZE], - ) -> Result { + fn parent_from_bytes(blob: &[u8; DATA_SIZE]) -> Result { // TODO: a little setup here for pre-optimization to allow walking parents without processing entire nodes - match node_type { - NodeType::Internal => Ok(TreeIndex::from_be_bytes( - <[u8; 4]>::try_from(&blob[0..4]).unwrap(), - )), - NodeType::Leaf => Ok(TreeIndex::from_be_bytes( - <[u8; 4]>::try_from(&blob[0..4]).unwrap(), - )), - } + Ok(TreeIndex::from_be_bytes( + <[u8; 4]>::try_from(&blob[PARENT_RANGE]).unwrap(), + )) } pub fn to_bytes(&self) -> [u8; DATA_SIZE] { let mut blob: Vec = Vec::new(); @@ -207,12 +210,12 @@ impl Node { } // TODO: does not enforce matching metadata node type and node enumeration type -struct ParsedBlock { +struct Block { metadata: NodeMetadata, node: Node, } -impl ParsedBlock { +impl Block { pub fn to_bytes(&self) -> [u8; BLOCK_SIZE] { let mut blob: [u8; BLOCK_SIZE] = [0; BLOCK_SIZE]; blob[..METADATA_SIZE].copy_from_slice(&self.metadata.to_bytes()); @@ -221,7 +224,7 @@ impl ParsedBlock { blob } - pub fn from_bytes(blob: [u8; BLOCK_SIZE], index: TreeIndex) -> Result { + pub fn from_bytes(blob: [u8; BLOCK_SIZE], index: TreeIndex) -> Result { // TODO: handle invalid indexes? // TODO: handle overflows? let metadata_blob: [u8; METADATA_SIZE] = blob @@ -239,8 +242,8 @@ impl ParsedBlock { Err(message) => return Err(format!("failed loading metadata: {message})")), }; Ok(match Node::from_bytes(&metadata, index, data_blob) { - Ok(node) => ParsedBlock { metadata, node }, - Err(message) => return Err(format!("failed loading raw node: {message}")), + Ok(node) => Block { metadata, node }, + Err(message) => return Err(format!("failed loading node: {message}")), }) } } @@ -259,7 +262,7 @@ fn get_free_indexes(blob: &Vec) -> Result, String> { let index: TreeIndex = queue.pop().unwrap(); let offset = index as usize * BLOCK_SIZE; let block = - ParsedBlock::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; + Block::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; seen_indexes[index as usize] = true; match block.node.specific { NodeSpecific::Internal { left, right } => { @@ -295,7 +298,7 @@ fn get_keys_values_indexes(blob: &Vec) -> Result, S let index: TreeIndex = queue.pop().unwrap(); let offset = index as usize * BLOCK_SIZE; let block = - ParsedBlock::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; + Block::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; match block.node.specific { NodeSpecific::Leaf { key_value } => { kv_to_index.insert(key_value, index); @@ -356,7 +359,7 @@ impl MerkleBlob { } fn insert_first(&mut self, key_value: KvId, hash: Hash) -> Result<(), String> { - let new_leaf_block = ParsedBlock { + let new_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, dirty: false, @@ -387,7 +390,7 @@ impl MerkleBlob { ) -> Result<(), String> { self.blob.clear(); - let new_internal_block = ParsedBlock { + let new_internal_block = Block { metadata: NodeMetadata { node_type: NodeType::Internal, dirty: false, @@ -402,7 +405,7 @@ impl MerkleBlob { self.blob.extend(new_internal_block.to_bytes()); - let left_leaf_block = ParsedBlock { + let left_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, dirty: false, @@ -420,7 +423,7 @@ impl MerkleBlob { self.kv_to_index .insert(left_leaf_block.node.key_value(), left_leaf_block.node.index); - let right_leaf_block = ParsedBlock { + let right_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, dirty: false, @@ -454,7 +457,7 @@ impl MerkleBlob { let new_leaf_index = self.get_new_index(); let new_internal_node_index = self.get_new_index(); - let new_leaf_block = ParsedBlock { + let new_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, dirty: false, @@ -468,7 +471,7 @@ impl MerkleBlob { }; self.insert_entry_to_blob(new_leaf_index, new_leaf_block.to_bytes())?; - let new_internal_block = ParsedBlock { + let new_internal_block = Block { metadata: NodeMetadata { node_type: NodeType::Internal, dirty: false, @@ -486,19 +489,20 @@ impl MerkleBlob { self.insert_entry_to_blob(new_internal_node_index, new_internal_block.to_bytes())?; let old_parent_index = old_leaf.parent; - assert!( - old_parent_index != NULL_PARENT, + assert_ne!( + old_parent_index, + NULL_PARENT, "{}", format!("{key_value:?} {hash:?}") ); let mut block = - ParsedBlock::from_bytes(self.get_block(old_leaf.index)?, new_internal_node_index)?; + Block::from_bytes(self.get_block(old_leaf.index)?, new_internal_node_index)?; block.node.parent = new_internal_node_index; self.insert_entry_to_blob(old_leaf.index, block.to_bytes())?; let mut old_parent_block = - ParsedBlock::from_bytes(self.get_block(old_parent_index)?, old_parent_index)?; + Block::from_bytes(self.get_block(old_parent_index)?, old_parent_index)?; match old_parent_block.node.specific { NodeSpecific::Internal { ref mut left, @@ -527,7 +531,7 @@ impl MerkleBlob { let mut index = index; while index != NULL_PARENT { - let mut block = ParsedBlock::from_bytes(self.get_block(index)?, index)?; + let mut block = Block::from_bytes(self.get_block(index)?, index)?; block.metadata.dirty = true; self.insert_entry_to_blob(index, block.to_bytes())?; index = block.node.parent; @@ -559,7 +563,7 @@ impl MerkleBlob { hasher.update(seed_bytes); let seed: Hash = hasher.finalize(); - let mut node = self.get_raw_node(0)?; + let mut node = self.get_node(0)?; for byte in seed { for bit in 0..8 { match node.specific { @@ -571,7 +575,7 @@ impl MerkleBlob { } else { next = right; } - node = self.get_raw_node(next)?; + node = self.get_node(next)?; } } } @@ -580,7 +584,7 @@ impl MerkleBlob { Err("failed to find a node".to_string()) } - fn insert_entry_to_blob(&mut self, index: TreeIndex, block: Block) -> Result<(), String> { + fn insert_entry_to_blob(&mut self, index: TreeIndex, block: BlockBytes) -> Result<(), String> { let extend_index = (self.blob.len() / BLOCK_SIZE) as TreeIndex; match index.cmp(&extend_index) { Ordering::Greater => return Err(format!("index out of range: {index}")), @@ -594,7 +598,7 @@ impl MerkleBlob { Ok(()) } - fn get_block(&self, index: TreeIndex) -> Result { + fn get_block(&self, index: TreeIndex) -> Result { let metadata_start = index as usize * BLOCK_SIZE; let data_start = metadata_start + METADATA_SIZE; let end = data_start + DATA_SIZE; @@ -606,8 +610,8 @@ impl MerkleBlob { .map_err(|e| format!("failed getting block {index}: {e}")) } - pub fn get_raw_node(&self, index: TreeIndex) -> Result { - // TODO: use ParsedBlock::from_bytes() + pub fn get_node(&self, index: TreeIndex) -> Result { + // TODO: use Block::from_bytes() // TODO: handle invalid indexes? // TODO: handle overflows? let block = self.get_block(index)?; @@ -627,23 +631,21 @@ impl MerkleBlob { }; Ok(match Node::from_bytes(&metadata, index, data_blob) { Ok(node) => node, - Err(message) => return Err(format!("failed loading raw node: {message}")), + Err(message) => return Err(format!("failed loading node: {message}")), }) } pub fn get_parent_index(&self, index: TreeIndex) -> Result { let block = self.get_block(index).unwrap(); - let node_type = - NodeMetadata::node_type_from_bytes(block[..METADATA_SIZE].try_into().unwrap())?; - Node::parent_from_bytes(&node_type, block[METADATA_SIZE..].try_into().unwrap()) + Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap()) } pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { let mut next_index = index; let mut lineage = vec![]; loop { - let node = self.get_raw_node(next_index)?; + let node = self.get_node(next_index)?; next_index = node.parent; lineage.push(node); @@ -660,10 +662,7 @@ impl MerkleBlob { loop { lineage.push(next_index); let block = self.get_block(next_index)?; - let node_type = - NodeMetadata::node_type_from_bytes(block[..METADATA_SIZE].try_into().unwrap())?; - next_index = - Node::parent_from_bytes(&node_type, block[METADATA_SIZE..].try_into().unwrap())?; + next_index = Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap())?; if next_index == NULL_PARENT { return Ok(lineage); @@ -825,7 +824,7 @@ mod tests { fn test_load_a_python_dump() { // let kv_id = 0x1415161718191A1B; let merkle_blob = example_merkle_blob(); - merkle_blob.get_raw_node(0).unwrap(); + merkle_blob.get_node(0).unwrap(); } #[test] @@ -864,9 +863,9 @@ mod tests { let merkle_blob = MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap(); - assert_eq!(merkle_blob.get_raw_node(0).unwrap(), EXAMPLE_ROOT); - assert_eq!(merkle_blob.get_raw_node(1).unwrap(), EXAMPLE_LEFT_LEAF); - assert_eq!(merkle_blob.get_raw_node(2).unwrap(), EXAMPLE_RIGHT_LEAF); + assert_eq!(merkle_blob.get_node(0).unwrap(), EXAMPLE_ROOT); + assert_eq!(merkle_blob.get_node(1).unwrap(), EXAMPLE_LEFT_LEAF); + assert_eq!(merkle_blob.get_node(2).unwrap(), EXAMPLE_RIGHT_LEAF); } #[test] @@ -881,7 +880,7 @@ mod tests { .unwrap(); // TODO: just hacking here to compare with the ~wrong~ simplified reference - let mut root = ParsedBlock::from_bytes(merkle_blob.get_block(0).unwrap(), 0).unwrap(); + let mut root = Block::from_bytes(merkle_blob.get_block(0).unwrap(), 0).unwrap(); root.metadata.dirty = true; root.node.hash = HASH; assert_eq!(root.metadata.node_type, NodeType::Internal); From 2e40365a52e6b8e0fecaa6be3d615ef4e444de7e Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 15:46:34 -0400 Subject: [PATCH 029/181] constrain the magic null parent value to serialized data --- crates/chia-datalayer/src/lib.rs | 100 +++++++++++++++++-------------- 1 file changed, 54 insertions(+), 46 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index a0af14e25..cfeb18985 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -15,6 +15,7 @@ const DATA_SIZE: usize = 44; const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; type TreeIndex = u32; +type Parent = Option; // type Key = Vec; type Hash = [u8; 32]; type BlockBytes = [u8; BLOCK_SIZE]; @@ -112,7 +113,7 @@ impl NodeMetadata { #[derive(Debug, PartialEq)] pub struct Node { // TODO: can this be an Option to avoid dealing with the magic number outside of serialization? - parent: TreeIndex, + parent: Parent, hash: Hash, specific: NodeSpecific, // TODO: kinda feels questionable having it be aware of its own location @@ -165,11 +166,14 @@ impl Node { }) } - fn parent_from_bytes(blob: &[u8; DATA_SIZE]) -> Result { + fn parent_from_bytes(blob: &[u8; DATA_SIZE]) -> Result { // TODO: a little setup here for pre-optimization to allow walking parents without processing entire nodes - Ok(TreeIndex::from_be_bytes( - <[u8; 4]>::try_from(&blob[PARENT_RANGE]).unwrap(), - )) + let parent_integer = + TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[PARENT_RANGE]).unwrap()); + match parent_integer { + NULL_PARENT => Ok(None), + _ => Ok(Some(parent_integer)), + } } pub fn to_bytes(&self) -> [u8; DATA_SIZE] { let mut blob: Vec = Vec::new(); @@ -180,7 +184,11 @@ impl Node { hash, index: _, } => { - blob.extend(parent.to_be_bytes()); + let parent_integer = match parent { + None => NULL_PARENT, + Some(parent) => *parent, + }; + blob.extend(parent_integer.to_be_bytes()); blob.extend(left.to_be_bytes()); blob.extend(right.to_be_bytes()); blob.extend(hash); @@ -191,7 +199,11 @@ impl Node { hash, index: _, } => { - blob.extend(parent.to_be_bytes()); + let parent_integer = match parent { + None => NULL_PARENT, + Some(parent) => *parent, + }; + blob.extend(parent_integer.to_be_bytes()); blob.extend(key_value.to_be_bytes()); blob.extend(hash); } @@ -365,7 +377,7 @@ impl MerkleBlob { dirty: false, }, node: Node { - parent: NULL_PARENT, + parent: None, specific: NodeSpecific::Leaf { key_value }, hash, index: 0, @@ -396,7 +408,7 @@ impl MerkleBlob { dirty: false, }, node: Node { - parent: NULL_PARENT, + parent: None, specific: NodeSpecific::Internal { left: 1, right: 2 }, hash: internal_node_hash, index: 0, @@ -411,7 +423,7 @@ impl MerkleBlob { dirty: false, }, node: Node { - parent: 0, + parent: Some(0), specific: NodeSpecific::Leaf { key_value: old_leaf.key_value(), }, @@ -429,7 +441,7 @@ impl MerkleBlob { dirty: false, }, node: Node { - parent: 0, + parent: Some(0), specific: NodeSpecific::Leaf { key_value }, hash, index: 2, @@ -463,7 +475,7 @@ impl MerkleBlob { dirty: false, }, node: Node { - parent: new_internal_node_index, + parent: Some(new_internal_node_index), specific: NodeSpecific::Leaf { key_value }, hash, index: new_leaf_index, @@ -488,17 +500,13 @@ impl MerkleBlob { }; self.insert_entry_to_blob(new_internal_node_index, new_internal_block.to_bytes())?; - let old_parent_index = old_leaf.parent; - assert_ne!( - old_parent_index, - NULL_PARENT, - "{}", - format!("{key_value:?} {hash:?}") - ); + let Some(old_parent_index) = old_leaf.parent else { + panic!("{key_value:?} {hash:?}") + }; let mut block = Block::from_bytes(self.get_block(old_leaf.index)?, new_internal_node_index)?; - block.node.parent = new_internal_node_index; + block.node.parent = Some(new_internal_node_index); self.insert_entry_to_blob(old_leaf.index, block.to_bytes())?; let mut old_parent_block = @@ -528,13 +536,13 @@ impl MerkleBlob { } fn mark_lineage_as_dirty(&mut self, index: TreeIndex) -> Result<(), String> { - let mut index = index; + let mut next_index = Some(index); - while index != NULL_PARENT { - let mut block = Block::from_bytes(self.get_block(index)?, index)?; + while let Some(this_index) = next_index { + let mut block = Block::from_bytes(self.get_block(this_index)?, this_index)?; block.metadata.dirty = true; - self.insert_entry_to_blob(index, block.to_bytes())?; - index = block.node.parent; + self.insert_entry_to_blob(this_index, block.to_bytes())?; + next_index = block.node.parent; } Ok(()) @@ -635,39 +643,39 @@ impl MerkleBlob { }) } - pub fn get_parent_index(&self, index: TreeIndex) -> Result { + pub fn get_parent_index(&self, index: TreeIndex) -> Result { let block = self.get_block(index).unwrap(); Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap()) } pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { - let mut next_index = index; + // TODO: what about an index that happens to be the null index? a question for everywhere i guess + let mut next_index = Some(index); let mut lineage = vec![]; - loop { - let node = self.get_node(next_index)?; + + while let Some(this_index) = next_index { + let node = self.get_node(this_index)?; next_index = node.parent; lineage.push(node); - - if next_index == NULL_PARENT { - return Ok(lineage); - } } + + Ok(lineage) } pub fn get_lineage_indexes(&self, index: TreeIndex) -> Result, String> { // TODO: yep, this 'optimization' might be overkill, and should be speed compared regardless - let mut next_index = index; - let mut lineage = vec![]; - loop { - lineage.push(next_index); - let block = self.get_block(next_index)?; - next_index = Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap())?; + // TODO: what about an index that happens to be the null index? a question for everywhere i guess + let mut next_index = Some(index); + let mut lineage: Vec = vec![]; - if next_index == NULL_PARENT { - return Ok(lineage); - } + while let Some(this_index) = next_index { + lineage.push(this_index); + let block = self.get_block(this_index)?; + next_index = Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap())?; } + + Ok(lineage) } } @@ -721,7 +729,7 @@ mod tests { ]; const EXAMPLE_ROOT: Node = Node { - parent: NULL_PARENT, + parent: None, specific: NodeSpecific::Internal { left: 1, right: 2 }, hash: HASH, index: 0, @@ -731,7 +739,7 @@ mod tests { dirty: true, }; const EXAMPLE_LEFT_LEAF: Node = Node { - parent: 0, + parent: Some(0), specific: NodeSpecific::Leaf { key_value: 0x0405_0607_0809_0A0B, }, @@ -743,7 +751,7 @@ mod tests { dirty: false, }; const EXAMPLE_RIGHT_LEAF: Node = Node { - parent: 0, + parent: Some(0), specific: NodeSpecific::Leaf { key_value: 0x1415_1617_1819_1A1B, }, @@ -836,7 +844,7 @@ mod tests { } assert_eq!(lineage.len(), 2); let last_node = lineage.last().unwrap(); - assert_eq!(last_node.parent, NULL_PARENT); + assert_eq!(last_node.parent, None); } #[test] From e45299121e4ac9ad412d94a8d3cf44b1726a9b02 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 16:13:02 -0400 Subject: [PATCH 030/181] yup --- crates/chia-datalayer/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index cfeb18985..0c7d5ae77 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -112,7 +112,6 @@ impl NodeMetadata { #[derive(Debug, PartialEq)] pub struct Node { - // TODO: can this be an Option to avoid dealing with the magic number outside of serialization? parent: Parent, hash: Hash, specific: NodeSpecific, From bf43e9f9b96f23f595c35f3f8a3d2f091e8022da Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 16:14:30 -0400 Subject: [PATCH 031/181] tidy --- crates/chia-datalayer/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 0c7d5ae77..87694a8f4 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -143,9 +143,8 @@ impl Node { blob: [u8; DATA_SIZE], ) -> Result { // TODO: add Err results - let parent = Self::parent_from_bytes(&blob)?; Ok(Self { - parent, + parent: Self::parent_from_bytes(&blob)?, index, hash: <[u8; 32]>::try_from(&blob[HASH_RANGE]).unwrap(), specific: match metadata.node_type { From 32984bb30e5090eaf0fbc289a0ee4c35935efa3b Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 16:50:44 -0400 Subject: [PATCH 032/181] oops --- tests/test_merkle_blob.py | 9 ++++++--- wheel/generate_type_stubs.py | 1 + wheel/python/chia_rs/chia_rs.pyi | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/test_merkle_blob.py b/tests/test_merkle_blob.py index d0666afe6..9ff370a27 100644 --- a/tests/test_merkle_blob.py +++ b/tests/test_merkle_blob.py @@ -1,4 +1,7 @@ + from chia_rs import MerkleBlob +from chia_rs.sized_bytes import bytes32 +from chia_rs.sized_ints import uint64 def test_merkle_blob(): @@ -12,7 +15,7 @@ def test_merkle_blob(): def test_just_insert_a_bunch() -> None: - HASH = bytes([ + HASH = bytes32([ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, ]) @@ -24,10 +27,10 @@ def test_just_insert_a_bunch() -> None: merkle_blob = MerkleBlob(blob=bytearray()) import time - total_time = 0 + total_time = 0.0 for i in range(100000): start = time.monotonic() - merkle_blob.insert(i, HASH) + merkle_blob.insert(uint64(i), HASH) end = time.monotonic() total_time += end - start diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index 7324b61bf..670f81f80 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -382,6 +382,7 @@ def __init__( blob: bytes, ) -> None: ... + def insert(self, key: uint64, value: bytes32) -> None: ... def __len__(self) -> int: ... class MerkleSet: diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index ff887eee7..9586ba5ff 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -131,6 +131,7 @@ class MerkleBlob: blob: bytes, ) -> None: ... + def insert(self, key: uint64, value: bytes32) -> None: ... def __len__(self) -> int: ... class MerkleSet: From 0de8a05f42967f3a390de34afc6d2522504379c5 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 17:18:05 -0400 Subject: [PATCH 033/181] clippy --- crates/chia-datalayer/src/lib.rs | 50 +++++++++++++++----------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 87694a8f4..326e1657e 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -105,7 +105,7 @@ impl NodeMetadata { match blob[1] { 0 => Ok(false), 1 => Ok(true), - other => return Err(format!("invalid dirty value: {other}")), + other => Err(format!("invalid dirty value: {other}")), } } } @@ -166,8 +166,10 @@ impl Node { fn parent_from_bytes(blob: &[u8; DATA_SIZE]) -> Result { // TODO: a little setup here for pre-optimization to allow walking parents without processing entire nodes - let parent_integer = - TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[PARENT_RANGE]).unwrap()); + let parent_integer = TreeIndex::from_be_bytes( + <[u8; 4]>::try_from(&blob[PARENT_RANGE]) + .map_err(|e| format!("data blob wrong size: {e}"))?, + ); match parent_integer { NULL_PARENT => Ok(None), _ => Ok(Some(parent_integer)), @@ -212,10 +214,11 @@ impl Node { // TODO: yes i know i'm trying to write this code in a non-rusty way and i need to stop that pub fn key_value(&self) -> KvId { - match self.specific { - NodeSpecific::Leaf { key_value } => key_value, - _ => panic!(), - } + let NodeSpecific::Leaf { key_value } = self.specific else { + panic!() + }; + + key_value } } @@ -258,7 +261,7 @@ impl Block { } } -fn get_free_indexes(blob: &Vec) -> Result, String> { +fn get_free_indexes(blob: &[u8]) -> Result, String> { let index_count = blob.len() / BLOCK_SIZE; if index_count == 0 { @@ -268,8 +271,7 @@ fn get_free_indexes(blob: &Vec) -> Result, String> { let mut seen_indexes: Vec = vec![false; index_count]; let mut queue: Vec = vec![0]; - while queue.len() > 0 { - let index: TreeIndex = queue.pop().unwrap(); + while let Some(index) = queue.pop() { let offset = index as usize * BLOCK_SIZE; let block = Block::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; @@ -286,14 +288,14 @@ fn get_free_indexes(blob: &Vec) -> Result, String> { let mut free_indexes: Vec = vec![]; for (index, seen) in seen_indexes.iter().enumerate() { if !seen { - free_indexes.push(index as TreeIndex) + free_indexes.push(index as TreeIndex); } } Ok(free_indexes) } -fn get_keys_values_indexes(blob: &Vec) -> Result, String> { +fn get_keys_values_indexes(blob: &[u8]) -> Result, String> { let index_count = blob.len() / BLOCK_SIZE; let mut kv_to_index: HashMap = HashMap::default(); @@ -304,8 +306,7 @@ fn get_keys_values_indexes(blob: &Vec) -> Result, S let mut queue: Vec = vec![0]; - while queue.len() > 0 { - let index: TreeIndex = queue.pop().unwrap(); + while let Some(index) = queue.pop() { let offset = index as usize * BLOCK_SIZE; let block = Block::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; @@ -354,8 +355,8 @@ impl MerkleBlob { pub fn insert(&mut self, key_value: KvId, hash: Hash) -> Result<(), String> { // TODO: what about only unused providing a blob length? - if self.blob.len() == 0 { - return self.insert_first(key_value, hash); + if self.blob.is_empty() { + self.insert_first(key_value, hash); } let old_leaf = self.get_random_leaf_node_from_bytes(Vec::from(key_value.to_be_bytes()))?; @@ -368,7 +369,7 @@ impl MerkleBlob { self.insert_third_or_later(key_value, hash, old_leaf, internal_node_hash) } - fn insert_first(&mut self, key_value: KvId, hash: Hash) -> Result<(), String> { + fn insert_first(&mut self, key_value: KvId, hash: Hash) { let new_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, @@ -387,8 +388,6 @@ impl MerkleBlob { self.kv_to_index.insert(key_value, 0); self.free_indexes.clear(); self.last_allocated_index = 1; - - Ok(()) } fn insert_second( @@ -575,12 +574,7 @@ impl MerkleBlob { match node.specific { NodeSpecific::Leaf { .. } => return Ok(node), NodeSpecific::Internal { left, right, .. } => { - let next: TreeIndex; - if byte & (1 << bit) != 0 { - next = left; - } else { - next = right; - } + let next: TreeIndex = if byte & (1 << bit) != 0 { left } else { right }; node = self.get_node(next)?; } } @@ -644,7 +638,11 @@ impl MerkleBlob { pub fn get_parent_index(&self, index: TreeIndex) -> Result { let block = self.get_block(index).unwrap(); - Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap()) + Node::parent_from_bytes( + block[METADATA_SIZE..] + .try_into() + .map_err(|e| format!("data blob wrong size: {e}"))?, + ) } pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { From 03856ffe34809381ab9166712a6b34637adc943a Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 17:21:50 -0400 Subject: [PATCH 034/181] black --- tests/test_merkle_blob.py | 43 ++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/tests/test_merkle_blob.py b/tests/test_merkle_blob.py index 9ff370a27..ce4dc6458 100644 --- a/tests/test_merkle_blob.py +++ b/tests/test_merkle_blob.py @@ -1,4 +1,3 @@ - from chia_rs import MerkleBlob from chia_rs.sized_bytes import bytes32 from chia_rs.sized_ints import uint64 @@ -15,18 +14,52 @@ def test_merkle_blob(): def test_just_insert_a_bunch() -> None: - HASH = bytes32([ - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 42, 43, - ]) + HASH = bytes32( + [ + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + ] + ) import pathlib + path = pathlib.Path("~/tmp/mbt/").expanduser() path.joinpath("py").mkdir(parents=True, exist_ok=True) path.joinpath("rs").mkdir(parents=True, exist_ok=True) merkle_blob = MerkleBlob(blob=bytearray()) import time + total_time = 0.0 for i in range(100000): start = time.monotonic() From 7a567d9d33848e3b36d179f6d15a5afb7e25b63b Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 17:29:21 -0400 Subject: [PATCH 035/181] clippy --- crates/chia-datalayer/src/lib.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 326e1657e..61da06231 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -363,10 +363,11 @@ impl MerkleBlob { let internal_node_hash = internal_hash(old_leaf.hash, hash); if self.kv_to_index.len() == 1 { - return self.insert_second(key_value, hash, old_leaf, internal_node_hash); + self.insert_second(key_value, hash, &old_leaf, internal_node_hash); + return Ok(()); } - self.insert_third_or_later(key_value, hash, old_leaf, internal_node_hash) + self.insert_third_or_later(key_value, hash, &old_leaf, internal_node_hash) } fn insert_first(&mut self, key_value: KvId, hash: Hash) { @@ -394,9 +395,9 @@ impl MerkleBlob { &mut self, key_value: KvId, hash: Hash, - old_leaf: Node, + old_leaf: &Node, internal_node_hash: Hash, - ) -> Result<(), String> { + ) { self.blob.clear(); let new_internal_block = Block { @@ -452,15 +453,13 @@ impl MerkleBlob { self.free_indexes.clear(); self.last_allocated_index = 3; - - Ok(()) } fn insert_third_or_later( &mut self, key_value: KvId, hash: Hash, - old_leaf: Node, + old_leaf: &Node, internal_node_hash: Hash, ) -> Result<(), String> { let new_leaf_index = self.get_new_index(); From 778fa01d723b348ae753c20a39838af1b27d4449 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 17:49:30 -0400 Subject: [PATCH 036/181] clippy --- crates/chia-datalayer/src/lib.rs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 61da06231..e20107a0f 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -677,11 +677,13 @@ impl MerkleBlob { #[cfg(feature = "py-bindings")] #[pymethods] impl MerkleBlob { + #[allow(clippy::needless_pass_by_value)] #[new] pub fn py_init(blob: PyBuffer) -> PyResult { - if !blob.is_c_contiguous() { - panic!("from_bytes() must be called with a contiguous buffer"); - } + assert!( + blob.is_c_contiguous(), + "from_bytes() must be called with a contiguous buffer" + ); #[allow(unsafe_code)] let slice = unsafe { std::slice::from_raw_parts(blob.buf_ptr() as *const u8, blob.len_bytes()) }; @@ -713,9 +715,9 @@ impl MerkleBlob { #[cfg(test)] mod tests { use super::*; - use clvm_utils; use hex_literal::hex; use rstest::rstest; + use std::time::{Duration, Instant}; const EXAMPLE_BLOB: [u8; 138] = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); const HASH: Hash = [ @@ -773,7 +775,7 @@ mod tests { assert_eq!( NodeType::from_u8(node_type.clone() as u8).unwrap(), node_type, - ) + ); } } @@ -808,13 +810,7 @@ mod tests { ) { let bytes: [u8; 2] = [node_type.to_u8(), dirty as u8]; let object = NodeMetadata::from_bytes(bytes).unwrap(); - assert_eq!( - object, - NodeMetadata { - node_type: node_type, - dirty: dirty - }, - ); + assert_eq!(object, NodeMetadata { node_type, dirty },); assert_eq!(object.to_bytes(), bytes); assert_eq!( NodeMetadata::node_type_from_bytes(bytes).unwrap(), @@ -896,10 +892,9 @@ mod tests { fn test_just_insert_a_bunch() { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - use std::time::{Duration, Instant}; let mut total_time = Duration::new(0, 0); - for i in 0..100000 { + for i in 0..100_000 { let start = Instant::now(); merkle_blob // TODO: yeah this hash is garbage @@ -929,7 +924,7 @@ mod tests { } // println!("{:?}", merkle_blob.blob) - println!("total time: {total_time:?}") + println!("total time: {total_time:?}"); // TODO: check, well... something } } From 67f544bfcf4bb40152b7a2642013a160e600494e Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 18:02:51 -0400 Subject: [PATCH 037/181] unassert --- tests/test_merkle_blob.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_merkle_blob.py b/tests/test_merkle_blob.py index ce4dc6458..cd5631fd0 100644 --- a/tests/test_merkle_blob.py +++ b/tests/test_merkle_blob.py @@ -82,4 +82,5 @@ def test_just_insert_a_bunch() -> None: # rs = pathlib.Path("~/repos/chia_rs/crates/chia-datalayer/src/test_just_insert_a_bunch_reference").expanduser().read_bytes() # b = bytes(merkle_blob.blob) # assert b == rs, 'not the same' - assert False, f"total time: {total_time}" + + # assert False, f"total time: {total_time}" From ca09a9153b7c5a523b5e46a48dde48d4e94d4ded Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Sep 2024 18:42:26 -0400 Subject: [PATCH 038/181] empty From d83b3df7541d9f3e82fa3034af2794bd3bfaab03 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Sep 2024 11:36:03 -0400 Subject: [PATCH 039/181] remove some commented out code --- crates/chia-datalayer/src/lib.rs | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index e20107a0f..869d33046 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1,5 +1,3 @@ -// use std::collections::HashMap; - #[cfg(feature = "py-bindings")] use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; @@ -16,7 +14,6 @@ const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; type TreeIndex = u32; type Parent = Option; -// type Key = Vec; type Hash = [u8; 32]; type BlockBytes = [u8; BLOCK_SIZE]; type KvId = u64; @@ -544,14 +541,6 @@ impl MerkleBlob { Ok(()) } - // fn update_entry( - // index: TreeIndex, - // parent: Option[TreeIndex], - // left: Option[TreeIndex], - // right: Option[TreeIndex], - // hash: Option[Hash], - // key_value: Option[KvId], - // ) fn get_new_index(&mut self) -> TreeIndex { match self.free_indexes.pop() { None => { @@ -691,17 +680,10 @@ impl MerkleBlob { Ok(Self::new(Vec::from(slice)).unwrap()) } - // #[pyo3(name = "get_root")] - // pub fn py_get_root<'a>(&self, py: Python<'a>) -> PyResult> { - // ChiaToPython::to_python(&Bytes32::new(self.get_root()), py) - // } - #[pyo3(name = "insert")] pub fn py_insert(&mut self, key_value: KvId, hash: Hash) -> PyResult<()> { // TODO: consider the error - // self.insert(key_value, hash).map_err(|_| PyValueError::new_err("yeppers")) self.insert(key_value, hash).unwrap(); - // self.insert(key_value, hash).map_err(|_| PyValueError::new_err("invalid key"))?; Ok(()) } @@ -821,7 +803,6 @@ mod tests { #[test] fn test_load_a_python_dump() { - // let kv_id = 0x1415161718191A1B; let merkle_blob = example_merkle_blob(); merkle_blob.get_node(0).unwrap(); } From 45ebc57a9cf01dd0d64d78dc3fab27498eb6aa73 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Sep 2024 13:30:53 -0400 Subject: [PATCH 040/181] .get_block_bytes() --- crates/chia-datalayer/src/lib.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 869d33046..0e55c7bff 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -497,13 +497,15 @@ impl MerkleBlob { panic!("{key_value:?} {hash:?}") }; - let mut block = - Block::from_bytes(self.get_block(old_leaf.index)?, new_internal_node_index)?; + let mut block = Block::from_bytes( + self.get_block_bytes(old_leaf.index)?, + new_internal_node_index, + )?; block.node.parent = Some(new_internal_node_index); self.insert_entry_to_blob(old_leaf.index, block.to_bytes())?; let mut old_parent_block = - Block::from_bytes(self.get_block(old_parent_index)?, old_parent_index)?; + Block::from_bytes(self.get_block_bytes(old_parent_index)?, old_parent_index)?; match old_parent_block.node.specific { NodeSpecific::Internal { ref mut left, @@ -532,7 +534,7 @@ impl MerkleBlob { let mut next_index = Some(index); while let Some(this_index) = next_index { - let mut block = Block::from_bytes(self.get_block(this_index)?, this_index)?; + let mut block = Block::from_bytes(self.get_block_bytes(this_index)?, this_index)?; block.metadata.dirty = true; self.insert_entry_to_blob(this_index, block.to_bytes())?; next_index = block.node.parent; @@ -586,7 +588,7 @@ impl MerkleBlob { Ok(()) } - fn get_block(&self, index: TreeIndex) -> Result { + fn get_block_bytes(&self, index: TreeIndex) -> Result { let metadata_start = index as usize * BLOCK_SIZE; let data_start = metadata_start + METADATA_SIZE; let end = data_start + DATA_SIZE; @@ -602,7 +604,7 @@ impl MerkleBlob { // TODO: use Block::from_bytes() // TODO: handle invalid indexes? // TODO: handle overflows? - let block = self.get_block(index)?; + let block = self.get_block_bytes(index)?; let metadata_blob: [u8; METADATA_SIZE] = block .get(..METADATA_SIZE) .ok_or(format!("metadata blob out of bounds: {}", block.len(),))? @@ -624,7 +626,7 @@ impl MerkleBlob { } pub fn get_parent_index(&self, index: TreeIndex) -> Result { - let block = self.get_block(index).unwrap(); + let block = self.get_block_bytes(index).unwrap(); Node::parent_from_bytes( block[METADATA_SIZE..] @@ -655,7 +657,7 @@ impl MerkleBlob { while let Some(this_index) = next_index { lineage.push(this_index); - let block = self.get_block(this_index)?; + let block = self.get_block_bytes(this_index)?; next_index = Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap())?; } @@ -860,7 +862,7 @@ mod tests { .unwrap(); // TODO: just hacking here to compare with the ~wrong~ simplified reference - let mut root = Block::from_bytes(merkle_blob.get_block(0).unwrap(), 0).unwrap(); + let mut root = Block::from_bytes(merkle_blob.get_block_bytes(0).unwrap(), 0).unwrap(); root.metadata.dirty = true; root.node.hash = HASH; assert_eq!(root.metadata.node_type, NodeType::Internal); From b22ce1741e2dde07c55e8f6b78eb4f73d55f419b Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Sep 2024 15:57:42 -0400 Subject: [PATCH 041/181] implement delete, iterable merkle blob, and to dot --- crates/chia-datalayer/src/lib.rs | 348 ++++++++++++++++++++++++++++++- 1 file changed, 343 insertions(+), 5 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 0e55c7bff..a7904c936 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -3,7 +3,8 @@ use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; use std::cmp::Ordering; -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; +use std::iter::IntoIterator; use std::ops::Range; // TODO: clearly shouldn't be hard coded @@ -63,7 +64,6 @@ impl NodeType { // } fn internal_hash(left_hash: Hash, right_hash: Hash) -> Hash { - // TODO: verify against original reference in blockchain let mut hasher = Sha256::new(); hasher.update(b"\x02"); hasher.update(left_hash); @@ -72,6 +72,39 @@ fn internal_hash(left_hash: Hash, right_hash: Hash) -> Hash { hasher.finalize() } +pub struct DotLines { + nodes: Vec, + connections: Vec, + pair_boxes: Vec, +} + +impl DotLines { + pub fn new() -> Self { + Self { + nodes: vec![], + connections: vec![], + pair_boxes: vec![], + } + } + + pub fn push(&mut self, mut other: DotLines) { + self.nodes.append(&mut other.nodes); + self.connections.append(&mut other.connections); + self.pair_boxes.append(&mut other.pair_boxes); + } + + pub fn dump(&mut self) -> String { + // TODO: consuming itself, secretly + let mut result = vec!["digraph {".to_string()]; + result.append(&mut self.nodes); + result.append(&mut self.connections); + result.append(&mut self.pair_boxes); + result.push("}".to_string()); + + result.join("\n") + } +} + const NULL_PARENT: TreeIndex = 0xffff_ffffu32; #[derive(Debug, PartialEq)] @@ -122,6 +155,20 @@ pub enum NodeSpecific { Leaf { key_value: KvId }, } +impl NodeSpecific { + pub fn sibling_index(&self, index: TreeIndex) -> TreeIndex { + let NodeSpecific::Internal { right, left } = self else { + panic!() + }; + + match index { + x if (x == *right) => *left, + x if (x == *left) => *right, + _ => panic!(), + } + } +} + const PARENT_RANGE: Range = 0..4; const LEFT_RANGE: Range = 4..8; const RIGHT_RANGE: Range = 8..12; @@ -217,6 +264,42 @@ impl Node { key_value } + + pub fn to_dot(&self) -> DotLines { + let index = self.index; + match self.specific { + NodeSpecific::Internal {left, right} => DotLines{ + nodes: vec![ + format!("node_{index} [label=\"{index}\"]"), + ], + connections: vec![ + format!("node_{index} -> node_{left};"), + format!("node_{index} -> node_{right};"), + // TODO: can this be done without introducing a blank line? + match self.parent{ + Some(parent) => format!("node_{index} -> node_{parent};"), + None => "".to_string(), + }, + ], + pair_boxes: vec![ + format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), + ] + }, + NodeSpecific::Leaf {key_value} => DotLines{ + nodes: vec![ + format!("node_{index} [shape=box, label=\"{index}\\nkey_value: {key_value}\"];"), + ], + connections: vec![ + // TODO: dedupe with above + match self.parent{ + Some(parent) => format!("node_{index} -> node_{parent};"), + None => "".to_string(), + }, + ], + pair_boxes: vec![], + }, + } + } } // TODO: does not enforce matching metadata node type and node enumeration type @@ -525,11 +608,106 @@ impl MerkleBlob { self.insert_entry_to_blob(old_parent_index, old_parent_block.to_bytes())?; self.mark_lineage_as_dirty(old_parent_index)?; - self.kv_to_index.insert(key_value, new_internal_node_index); + self.kv_to_index.insert(key_value, new_leaf_index); + + Ok(()) + } + + pub fn delete(&mut self, key_value: KvId) -> Result<(), String> { + let leaf_index = *self.kv_to_index.get(&key_value).unwrap(); + let leaf = self.get_node(leaf_index).unwrap(); + + match leaf.specific { + // TODO: blech + NodeSpecific::Leaf { .. } => (), + NodeSpecific::Internal { .. } => panic!(), + }; + self.kv_to_index.remove(&key_value); + + let Some(parent_index) = leaf.parent else { + self.free_indexes.clear(); + self.last_allocated_index = 0; + self.blob.clear(); + return Ok(()); + }; + + self.free_indexes.push(leaf_index); + let parent = self.get_node(parent_index).unwrap(); + // TODO: kinda implicit that we 'check' that parent is internal inside .sibling_index() + let sibling_index = parent.specific.sibling_index(leaf_index); + let mut sibling_block = self.get_block(sibling_index)?; + + let Some(grandparent_index) = parent.parent else { + sibling_block.metadata.dirty = true; + sibling_block.node.parent = None; + let range = self.get_block_range(0); + self.blob[range].copy_from_slice(&sibling_block.to_bytes()); + + match sibling_block.node.specific { + NodeSpecific::Leaf { key_value } => { + self.kv_to_index.insert(key_value, 0); + } + NodeSpecific::Internal { left, right } => { + for child_index in [left, right] { + let mut block = self.get_block(child_index)?; + block.node.parent = Some(0); + self.insert_entry_to_blob(child_index, block.to_bytes())?; + } + } + }; + + self.free_indexes.push(sibling_index); + + return Ok(()); + }; + + self.free_indexes.push(parent_index); + let mut grandparent_block = self.get_block(grandparent_index).unwrap(); + + sibling_block.node.parent = Some(grandparent_index); + let range = self.get_block_range(sibling_index); + self.blob[range].copy_from_slice(&sibling_block.to_bytes()); + + match grandparent_block.node.specific { + NodeSpecific::Internal { + ref mut left, + ref mut right, + .. + } => match parent_index { + x if x == *left => *left = sibling_index, + x if x == *right => *right = sibling_index, + _ => panic!(), + }, + NodeSpecific::Leaf { .. } => panic!(), + }; + let range = self.get_block_range(grandparent_index); + self.blob[range].copy_from_slice(&grandparent_block.to_bytes()); + + self.mark_lineage_as_dirty(grandparent_index)?; Ok(()) } + // fn update_parent(&mut self, index: TreeIndex, parent: Option) -> Result<(), String> { + // let range = self.get_block_range(index); + // + // let mut node = self.get_node(index)?; + // node.parent = parent; + // self.blob[range].copy_from_slice(&node.to_bytes()); + // + // Ok(()) + // } + + // fn update_left(&mut self, index: TreeIndex, left: Option) -> Result<(), String> { + // let range = self.get_block_range(index); + // + // let mut node = self.get_node(index)?; + // node.left = left; + // self.blob[range].copy_from_slice(&node.to_bytes()); + // + // Ok(()) + // } + fn mark_lineage_as_dirty(&mut self, index: TreeIndex) -> Result<(), String> { let mut next_index = Some(index); @@ -588,13 +766,37 @@ impl MerkleBlob { Ok(()) } - fn get_block_bytes(&self, index: TreeIndex) -> Result { + fn get_block(&self, index: TreeIndex) -> Result { + Block::from_bytes(self.get_block_bytes(index)?, index) + } + + fn get_block_range(&self, index: TreeIndex) -> Range { let metadata_start = index as usize * BLOCK_SIZE; let data_start = metadata_start + METADATA_SIZE; let end = data_start + DATA_SIZE; + let range = metadata_start..end; + // checking range validity + self.blob.get(range.clone()).unwrap(); + + range + } + + // fn get_block_slice(&self, index: TreeIndex) -> Result<&mut BlockBytes, String> { + // let metadata_start = index as usize * BLOCK_SIZE; + // let data_start = metadata_start + METADATA_SIZE; + // let end = data_start + DATA_SIZE; + // + // self.blob + // .get(metadata_start..end) + // .ok_or(format!("index out of bounds: {index}"))? + // .try_into() + // .map_err(|e| format!("failed getting block {index}: {e}")) + // } + + fn get_block_bytes(&self, index: TreeIndex) -> Result { self.blob - .get(metadata_start..end) + .get(self.get_block_range(index)) .ok_or(format!("index out of bounds: {index}"))? .try_into() .map_err(|e| format!("failed getting block {index}: {e}")) @@ -663,6 +865,24 @@ impl MerkleBlob { Ok(lineage) } + + pub fn to_dot(&self) -> DotLines { + let mut result = DotLines::new(); + for node in self { + result.push(node.to_dot()); + } + + result + } +} + +impl<'a> IntoIterator for &'a MerkleBlob { + type Item = Node; + type IntoIter = MerkleBlobIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + MerkleBlobIterator::new(self) + } } #[cfg(feature = "py-bindings")] @@ -690,12 +910,66 @@ impl MerkleBlob { Ok(()) } + #[pyo3(name = "delete")] + pub fn py_delete(&mut self, key_value: KvId) -> PyResult<()> { + // TODO: consider the error + self.delete(key_value).unwrap(); + + Ok(()) + } + #[pyo3(name = "__len__")] pub fn py_len(&self) -> PyResult { Ok(self.blob.len()) } } +pub struct MerkleBlobIterator<'a> { + merkle_blob: &'a MerkleBlob, + deque: VecDeque, + index_count: usize, +} + +impl<'a> MerkleBlobIterator<'a> { + fn new(merkle_blob: &'a MerkleBlob) -> Self { + let index_count = merkle_blob.blob.len() / BLOCK_SIZE; + let mut deque = VecDeque::new(); + deque.push_back(0); + + Self { + merkle_blob, + deque: deque, + index_count, + } + } +} + +impl Iterator for MerkleBlobIterator<'_> { + type Item = Node; + + fn next(&mut self) -> Option { + // left depth first + + if self.index_count == 0 { + return None; + } + + let Some(index) = self.deque.pop_front() else { + return None; + }; + let block = self.merkle_blob.get_block(index).unwrap(); + match block.node.specific { + NodeSpecific::Internal { left, right } => { + self.deque.push_front(right); + self.deque.push_front(left); + } + NodeSpecific::Leaf { .. } => (), + } + + Some(block.node) + } +} + #[cfg(test)] mod tests { use super::*; @@ -910,4 +1184,68 @@ mod tests { println!("total time: {total_time:?}"); // TODO: check, well... something } + + #[test] + fn test_delete() { + const COUNT: usize = 10; + let mut dots = vec![]; + + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let key_value_ids: [KvId; COUNT] = core::array::from_fn(|i| i as KvId); + + for key_value_id in key_value_ids { + let mut hasher = Sha256::new(); + hasher.update(key_value_id.to_be_bytes()); + let hash: Hash = hasher.finalize(); + + println!("inserting: {key_value_id}"); + merkle_blob.insert(key_value_id, hash).unwrap(); + dots.push(merkle_blob.to_dot().dump()); + } + + for key_value_id in key_value_ids { + println!("deleting: {key_value_id}"); + merkle_blob.delete(key_value_id).unwrap(); + dots.push(merkle_blob.to_dot().dump()); + } + + // let mut key_value_ids: Vec = vec![0; COUNT]; + // + // for (i, key_value_id) in key_value_ids.iter_mut().enumerate() { + // *key_value_id = i as KvId; + // } + // for i in 0..100_000 { + // let start = Instant::now(); + // merkle_blob + // // TODO: yeah this hash is garbage + // .insert(i as KvId, HASH) + // .unwrap(); + // let end = Instant::now(); + // total_time += end.duration_since(start); + // + // // match i + 1 { + // // 2 => assert_eq!(merkle_blob.blob.len(), 3 * BLOCK_SIZE), + // // 3 => assert_eq!(merkle_blob.blob.len(), 5 * BLOCK_SIZE), + // // _ => (), + // // } + // + // // let file = fs::File::create(format!("/home/altendky/tmp/mbt/rs/{i:0>4}")).unwrap(); + // // let mut file = io::LineWriter::new(file); + // // for block in merkle_blob.blob.chunks(BLOCK_SIZE) { + // // let mut s = String::new(); + // // for byte in block { + // // s.push_str(&format!("{:02x}", byte)); + // // } + // // s.push_str("\n"); + // // file.write_all(s.as_bytes()).unwrap(); + // // } + // + // // fs::write(format!("/home/altendky/tmp/mbt/rs/{i:0>4}"), &merkle_blob.blob).unwrap(); + // } + // // println!("{:?}", merkle_blob.blob) + // + // println!("total time: {total_time:?}"); + // // TODO: check, well... something + } } From ea5f8c88235a505d820ae6979eb7f68ad7d125d5 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Sep 2024 16:01:47 -0400 Subject: [PATCH 042/181] clippy --- crates/chia-datalayer/src/lib.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index a7904c936..37d1b429d 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -78,6 +78,12 @@ pub struct DotLines { pair_boxes: Vec, } +impl Default for DotLines { + fn default() -> Self { + Self::new() + } +} + impl DotLines { pub fn new() -> Self { Self { @@ -278,7 +284,7 @@ impl Node { // TODO: can this be done without introducing a blank line? match self.parent{ Some(parent) => format!("node_{index} -> node_{parent};"), - None => "".to_string(), + None => String::new(), }, ], pair_boxes: vec![ @@ -293,7 +299,7 @@ impl Node { // TODO: dedupe with above match self.parent{ Some(parent) => format!("node_{index} -> node_{parent};"), - None => "".to_string(), + None => String::new(), }, ], pair_boxes: vec![], @@ -874,6 +880,10 @@ impl MerkleBlob { result } + + pub fn iter(&self) -> MerkleBlobIterator<'_> { + <&Self as IntoIterator>::into_iter(self) + } } impl<'a> IntoIterator for &'a MerkleBlob { @@ -938,7 +948,7 @@ impl<'a> MerkleBlobIterator<'a> { Self { merkle_blob, - deque: deque, + deque, index_count, } } @@ -954,9 +964,7 @@ impl Iterator for MerkleBlobIterator<'_> { return None; } - let Some(index) = self.deque.pop_front() else { - return None; - }; + let index = self.deque.pop_front()?; let block = self.merkle_blob.get_block(index).unwrap(); match block.node.specific { NodeSpecific::Internal { left, right } => { From e548e2335aa4601a123807be6cea6f2862c05865 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 11 Sep 2024 08:23:03 -0400 Subject: [PATCH 043/181] comments --- crates/chia-datalayer/src/lib.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 37d1b429d..26daa6262 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -445,6 +445,7 @@ impl MerkleBlob { self.insert_first(key_value, hash); } + // TODO: make this a parameter so we have one insert call where you specify the location let old_leaf = self.get_random_leaf_node_from_bytes(Vec::from(key_value.to_be_bytes()))?; let internal_node_hash = internal_hash(old_leaf.hash, hash); @@ -694,6 +695,10 @@ impl MerkleBlob { Ok(()) } + // fn upsert(&self, old_key_value: KvId, new_key_value: KvId, new_hash: Hash) -> Result<(), String> { + // if old_key_value + // } + // fn update_parent(&mut self, index: TreeIndex, parent: Option) -> Result<(), String> { // let range = self.get_block_range(index); // From a2ce26d5855fa801f2de47cc5858a6716db1527d Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 11 Sep 2024 08:59:31 -0400 Subject: [PATCH 044/181] reuse iteration --- crates/chia-datalayer/src/lib.rs | 83 ++++++++++++++------------------ 1 file changed, 37 insertions(+), 46 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 26daa6262..a4ed4e381 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -315,15 +315,15 @@ struct Block { } impl Block { - pub fn to_bytes(&self) -> [u8; BLOCK_SIZE] { - let mut blob: [u8; BLOCK_SIZE] = [0; BLOCK_SIZE]; + pub fn to_bytes(&self) -> BlockBytes { + let mut blob: BlockBytes = [0; BLOCK_SIZE]; blob[..METADATA_SIZE].copy_from_slice(&self.metadata.to_bytes()); blob[METADATA_SIZE..].copy_from_slice(&self.node.to_bytes()); blob } - pub fn from_bytes(blob: [u8; BLOCK_SIZE], index: TreeIndex) -> Result { + pub fn from_bytes(blob: BlockBytes, index: TreeIndex) -> Result { // TODO: handle invalid indexes? // TODO: handle overflows? let metadata_blob: [u8; METADATA_SIZE] = blob @@ -345,8 +345,23 @@ impl Block { Err(message) => return Err(format!("failed loading node: {message}")), }) } + + fn range(index: TreeIndex) -> Range { + let metadata_start = index as usize * BLOCK_SIZE; + let data_start = metadata_start + METADATA_SIZE; + let end = data_start + DATA_SIZE; + + // let range = metadata_start..end; + // // checking range validity + // self.blob.get(range.clone()).unwrap(); + // + // range + metadata_start..end + } } +// TODO: once error handling is well defined, remove allow and handle warning +#[allow(clippy::unnecessary_wraps)] fn get_free_indexes(blob: &[u8]) -> Result, String> { let index_count = blob.len() / BLOCK_SIZE; @@ -355,20 +370,9 @@ fn get_free_indexes(blob: &[u8]) -> Result, String> { } let mut seen_indexes: Vec = vec![false; index_count]; - let mut queue: Vec = vec![0]; - while let Some(index) = queue.pop() { - let offset = index as usize * BLOCK_SIZE; - let block = - Block::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; - seen_indexes[index as usize] = true; - match block.node.specific { - NodeSpecific::Internal { left, right } => { - queue.push(left); - queue.push(right); - } - NodeSpecific::Leaf { .. } => (), - } + for node in MerkleBlobIterator::new(blob) { + seen_indexes[node.index as usize] = true; } let mut free_indexes: Vec = vec![]; @@ -381,6 +385,8 @@ fn get_free_indexes(blob: &[u8]) -> Result, String> { Ok(free_indexes) } +// TODO: once error handling is well defined, remove allow and handle warning +#[allow(clippy::unnecessary_wraps)] fn get_keys_values_indexes(blob: &[u8]) -> Result, String> { let index_count = blob.len() / BLOCK_SIZE; @@ -390,15 +396,10 @@ fn get_keys_values_indexes(blob: &[u8]) -> Result, Stri return Ok(kv_to_index); } - let mut queue: Vec = vec![0]; - - while let Some(index) = queue.pop() { - let offset = index as usize * BLOCK_SIZE; - let block = - Block::from_bytes(blob[offset..offset + BLOCK_SIZE].try_into().unwrap(), index)?; - match block.node.specific { + for node in MerkleBlobIterator::new(blob) { + match node.specific { NodeSpecific::Leaf { key_value } => { - kv_to_index.insert(key_value, index); + kv_to_index.insert(key_value, node.index); } NodeSpecific::Internal { .. } => (), } @@ -647,7 +648,7 @@ impl MerkleBlob { let Some(grandparent_index) = parent.parent else { sibling_block.metadata.dirty = true; sibling_block.node.parent = None; - let range = self.get_block_range(0); + let range = Block::range(0); self.blob[range].copy_from_slice(&sibling_block.to_bytes()); match sibling_block.node.specific { @@ -672,7 +673,7 @@ impl MerkleBlob { let mut grandparent_block = self.get_block(grandparent_index).unwrap(); sibling_block.node.parent = Some(grandparent_index); - let range = self.get_block_range(sibling_index); + let range = Block::range(sibling_index); self.blob[range].copy_from_slice(&sibling_block.to_bytes()); match grandparent_block.node.specific { @@ -687,7 +688,7 @@ impl MerkleBlob { }, NodeSpecific::Leaf { .. } => panic!(), }; - let range = self.get_block_range(grandparent_index); + let range = Block::range(grandparent_index); self.blob[range].copy_from_slice(&grandparent_block.to_bytes()); self.mark_lineage_as_dirty(grandparent_index)?; @@ -781,18 +782,6 @@ impl MerkleBlob { Block::from_bytes(self.get_block_bytes(index)?, index) } - fn get_block_range(&self, index: TreeIndex) -> Range { - let metadata_start = index as usize * BLOCK_SIZE; - let data_start = metadata_start + METADATA_SIZE; - let end = data_start + DATA_SIZE; - - let range = metadata_start..end; - // checking range validity - self.blob.get(range.clone()).unwrap(); - - range - } - // fn get_block_slice(&self, index: TreeIndex) -> Result<&mut BlockBytes, String> { // let metadata_start = index as usize * BLOCK_SIZE; // let data_start = metadata_start + METADATA_SIZE; @@ -807,7 +796,7 @@ impl MerkleBlob { fn get_block_bytes(&self, index: TreeIndex) -> Result { self.blob - .get(self.get_block_range(index)) + .get(Block::range(index)) .ok_or(format!("index out of bounds: {index}"))? .try_into() .map_err(|e| format!("failed getting block {index}: {e}")) @@ -896,7 +885,8 @@ impl<'a> IntoIterator for &'a MerkleBlob { type IntoIter = MerkleBlobIterator<'a>; fn into_iter(self) -> Self::IntoIter { - MerkleBlobIterator::new(self) + // TODO: review types around this to avoid copying + MerkleBlobIterator::new(&self.blob[..]) } } @@ -940,19 +930,19 @@ impl MerkleBlob { } pub struct MerkleBlobIterator<'a> { - merkle_blob: &'a MerkleBlob, + blob: &'a [u8], deque: VecDeque, index_count: usize, } impl<'a> MerkleBlobIterator<'a> { - fn new(merkle_blob: &'a MerkleBlob) -> Self { - let index_count = merkle_blob.blob.len() / BLOCK_SIZE; + fn new(blob: &'a [u8]) -> Self { + let index_count = blob.len() / BLOCK_SIZE; let mut deque = VecDeque::new(); deque.push_back(0); Self { - merkle_blob, + blob, deque, index_count, } @@ -970,7 +960,8 @@ impl Iterator for MerkleBlobIterator<'_> { } let index = self.deque.pop_front()?; - let block = self.merkle_blob.get_block(index).unwrap(); + let block_bytes: BlockBytes = self.blob[Block::range(index)].try_into().unwrap(); + let block = Block::from_bytes(block_bytes, index).unwrap(); match block.node.specific { NodeSpecific::Internal { left, right } => { self.deque.push_front(right); From 60baf0a37d1856567a06785047415862b87185de Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 11 Sep 2024 11:18:02 -0400 Subject: [PATCH 045/181] add `.calculate_lazy_hashes()` --- crates/chia-datalayer/src/lib.rs | 120 +++++++++++++++++++++---------- 1 file changed, 83 insertions(+), 37 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index a4ed4e381..bada1428d 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -309,7 +309,7 @@ impl Node { } // TODO: does not enforce matching metadata node type and node enumeration type -struct Block { +pub struct Block { metadata: NodeMetadata, node: Node, } @@ -371,8 +371,8 @@ fn get_free_indexes(blob: &[u8]) -> Result, String> { let mut seen_indexes: Vec = vec![false; index_count]; - for node in MerkleBlobIterator::new(blob) { - seen_indexes[node.index as usize] = true; + for block in MerkleBlobIterator::new(blob) { + seen_indexes[block.node.index as usize] = true; } let mut free_indexes: Vec = vec![]; @@ -396,10 +396,10 @@ fn get_keys_values_indexes(blob: &[u8]) -> Result, Stri return Ok(kv_to_index); } - for node in MerkleBlobIterator::new(blob) { - match node.specific { + for block in MerkleBlobIterator::new(blob) { + match block.node.specific { NodeSpecific::Leaf { key_value } => { - kv_to_index.insert(key_value, node.index); + kv_to_index.insert(key_value, block.node.index); } NodeSpecific::Internal { .. } => (), } @@ -764,14 +764,17 @@ impl MerkleBlob { Err("failed to find a node".to_string()) } - fn insert_entry_to_blob(&mut self, index: TreeIndex, block: BlockBytes) -> Result<(), String> { + fn insert_entry_to_blob( + &mut self, + index: TreeIndex, + block_bytes: BlockBytes, + ) -> Result<(), String> { let extend_index = (self.blob.len() / BLOCK_SIZE) as TreeIndex; match index.cmp(&extend_index) { Ordering::Greater => return Err(format!("index out of range: {index}")), - Ordering::Equal => self.blob.extend_from_slice(&block), + Ordering::Equal => self.blob.extend_from_slice(&block_bytes), Ordering::Less => { - let start = index as usize * BLOCK_SIZE; - self.blob[start..start + BLOCK_SIZE].copy_from_slice(&block); + self.blob[Block::range(index)].copy_from_slice(&block_bytes); } } @@ -868,8 +871,8 @@ impl MerkleBlob { pub fn to_dot(&self) -> DotLines { let mut result = DotLines::new(); - for node in self { - result.push(node.to_dot()); + for block in self { + result.push(block.node.to_dot()); } result @@ -878,10 +881,36 @@ impl MerkleBlob { pub fn iter(&self) -> MerkleBlobIterator<'_> { <&Self as IntoIterator>::into_iter(self) } + + pub fn calculate_lazy_hashes(&mut self) { + // TODO: really want a truncated traversal, not filter + // TODO: yeah, storing the whole set of blocks via collect is not great + for mut block in self + .iter() + .filter(|block| block.metadata.dirty) + .collect::>() + { + match block.node.specific { + NodeSpecific::Leaf { .. } => panic!("leaves should not be dirty"), + NodeSpecific::Internal { left, right, .. } => { + // TODO: obviously inefficient to re-get/deserialize these blocks inside + // an iteration that's already doing that + let left = self.get_block(left).unwrap(); + let right = self.get_block(right).unwrap(); + // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' + block.node.hash = internal_hash(left.node.hash, right.node.hash); + block.metadata.dirty = false; + self.insert_entry_to_blob(block.node.index, block.to_bytes()) + .unwrap(); + } + } + } + } } impl<'a> IntoIterator for &'a MerkleBlob { - type Item = Node; + // TODO: review efficiency in whatever use cases we end up with, vs Item = Node etc + type Item = Block; type IntoIter = MerkleBlobIterator<'a>; fn into_iter(self) -> Self::IntoIter { @@ -929,48 +958,63 @@ impl MerkleBlob { } } +struct MerkleBlobIteratorItem { + visited: bool, + index: TreeIndex, +} + pub struct MerkleBlobIterator<'a> { blob: &'a [u8], - deque: VecDeque, - index_count: usize, + deque: VecDeque, } impl<'a> MerkleBlobIterator<'a> { fn new(blob: &'a [u8]) -> Self { - let index_count = blob.len() / BLOCK_SIZE; let mut deque = VecDeque::new(); - deque.push_back(0); - - Self { - blob, - deque, - index_count, + if blob.len() / BLOCK_SIZE > 0 { + deque.push_back(MerkleBlobIteratorItem { + visited: false, + index: 0, + }); } + + Self { blob, deque } } } impl Iterator for MerkleBlobIterator<'_> { - type Item = Node; + type Item = Block; fn next(&mut self) -> Option { - // left depth first + // left sibling first, children before parents - if self.index_count == 0 { - return None; - } + loop { + let item = self.deque.pop_front()?; + let block_bytes: BlockBytes = self.blob[Block::range(item.index)].try_into().unwrap(); + let block = Block::from_bytes(block_bytes, item.index).unwrap(); - let index = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[Block::range(index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes, index).unwrap(); - match block.node.specific { - NodeSpecific::Internal { left, right } => { - self.deque.push_front(right); - self.deque.push_front(left); + match block.node.specific { + NodeSpecific::Leaf { .. } => return Some(block), + NodeSpecific::Internal { left, right } => { + if item.visited { + return Some(block); + }; + + self.deque.push_front(MerkleBlobIteratorItem { + visited: true, + index: item.index, + }); + self.deque.push_front(MerkleBlobIteratorItem { + visited: false, + index: right, + }); + self.deque.push_front(MerkleBlobIteratorItem { + visited: false, + index: left, + }); + } } - NodeSpecific::Leaf { .. } => (), } - - Some(block.node) } } @@ -1187,6 +1231,8 @@ mod tests { println!("total time: {total_time:?}"); // TODO: check, well... something + + merkle_blob.calculate_lazy_hashes(); } #[test] From 502b9e69e47fe62c0bbe1b10c69ff9becec356ce Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 11 Sep 2024 11:55:09 -0400 Subject: [PATCH 046/181] consolidate actual blob modification --- crates/chia-datalayer/src/lib.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index bada1428d..c9e798862 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -648,8 +648,7 @@ impl MerkleBlob { let Some(grandparent_index) = parent.parent else { sibling_block.metadata.dirty = true; sibling_block.node.parent = None; - let range = Block::range(0); - self.blob[range].copy_from_slice(&sibling_block.to_bytes()); + self.insert_entry_to_blob(0, sibling_block.to_bytes())?; match sibling_block.node.specific { NodeSpecific::Leaf { key_value } => { @@ -673,8 +672,7 @@ impl MerkleBlob { let mut grandparent_block = self.get_block(grandparent_index).unwrap(); sibling_block.node.parent = Some(grandparent_index); - let range = Block::range(sibling_index); - self.blob[range].copy_from_slice(&sibling_block.to_bytes()); + self.insert_entry_to_blob(sibling_index, sibling_block.to_bytes())?; match grandparent_block.node.specific { NodeSpecific::Internal { @@ -688,8 +686,7 @@ impl MerkleBlob { }, NodeSpecific::Leaf { .. } => panic!(), }; - let range = Block::range(grandparent_index); - self.blob[range].copy_from_slice(&grandparent_block.to_bytes()); + self.insert_entry_to_blob(grandparent_index, grandparent_block.to_bytes())?; self.mark_lineage_as_dirty(grandparent_index)?; @@ -725,6 +722,11 @@ impl MerkleBlob { while let Some(this_index) = next_index { let mut block = Block::from_bytes(self.get_block_bytes(this_index)?, this_index)?; + + if block.metadata.dirty { + return Ok(()); + } + block.metadata.dirty = true; self.insert_entry_to_blob(this_index, block.to_bytes())?; next_index = block.node.parent; @@ -769,6 +771,7 @@ impl MerkleBlob { index: TreeIndex, block_bytes: BlockBytes, ) -> Result<(), String> { + assert_eq!(self.blob.len() % BLOCK_SIZE, 0); let extend_index = (self.blob.len() / BLOCK_SIZE) as TreeIndex; match index.cmp(&extend_index) { Ordering::Greater => return Err(format!("index out of range: {index}")), @@ -1188,7 +1191,9 @@ mod tests { root.metadata.dirty = true; root.node.hash = HASH; assert_eq!(root.metadata.node_type, NodeType::Internal); - merkle_blob.blob[..BLOCK_SIZE].copy_from_slice(&root.to_bytes()); + merkle_blob + .insert_entry_to_blob(0, root.to_bytes()) + .unwrap(); assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); } From ae0087fda4efb26a985fe6f25eb9a9ee4a72f2a3 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 12 Sep 2024 15:26:36 -0400 Subject: [PATCH 047/181] misc incomplete --- crates/chia-datalayer/src/lib.rs | 181 ++++++++++++++++++++++++++++--- 1 file changed, 165 insertions(+), 16 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index c9e798862..31088e61a 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -371,7 +371,7 @@ fn get_free_indexes(blob: &[u8]) -> Result, String> { let mut seen_indexes: Vec = vec![false; index_count]; - for block in MerkleBlobIterator::new(blob) { + for block in MerkleBlobLeftChildFirstIterator::new(blob) { seen_indexes[block.node.index as usize] = true; } @@ -396,7 +396,7 @@ fn get_keys_values_indexes(blob: &[u8]) -> Result, Stri return Ok(kv_to_index); } - for block in MerkleBlobIterator::new(blob) { + for block in MerkleBlobLeftChildFirstIterator::new(blob) { match block.node.specific { NodeSpecific::Leaf { key_value } => { kv_to_index.insert(key_value, block.node.index); @@ -766,13 +766,18 @@ impl MerkleBlob { Err("failed to find a node".to_string()) } + fn extend_index(&self) -> TreeIndex { + assert_eq!(self.blob.len() % BLOCK_SIZE, 0); + + (self.blob.len() / BLOCK_SIZE) as TreeIndex + } + fn insert_entry_to_blob( &mut self, index: TreeIndex, block_bytes: BlockBytes, ) -> Result<(), String> { - assert_eq!(self.blob.len() % BLOCK_SIZE, 0); - let extend_index = (self.blob.len() / BLOCK_SIZE) as TreeIndex; + let extend_index = self.extend_index(); match index.cmp(&extend_index) { Ordering::Greater => return Err(format!("index out of range: {index}")), Ordering::Equal => self.blob.extend_from_slice(&block_bytes), @@ -881,7 +886,7 @@ impl MerkleBlob { result } - pub fn iter(&self) -> MerkleBlobIterator<'_> { + pub fn iter(&self) -> MerkleBlobLeftChildFirstIterator<'_> { <&Self as IntoIterator>::into_iter(self) } @@ -909,16 +914,73 @@ impl MerkleBlob { } } } + + pub fn relocate_node(&mut self, source: TreeIndex, destination: TreeIndex) { + let extend_index = self.extend_index(); + assert_ne!(source, 0); + assert!(source < extend_index); + assert!(!self.free_indexes.contains(&source)); + assert!(destination <= extend_index); + assert!(destination == extend_index || self.free_indexes.contains(&destination)); + + let source_block = self.get_block(source).unwrap(); + if let Some(parent) = source_block.node.parent { + let mut parent_block = self.get_block(parent).unwrap(); + let NodeSpecific::Internal { + ref mut left, + ref mut right, + } = parent_block.node.specific + else { + panic!(); + }; + match source { + x if x == *left => *left = destination, + x if x == *right => *right = destination, + _ => panic!(), + } + self.insert_entry_to_blob(parent, parent_block.to_bytes()) + .unwrap(); + } + + match source_block.node.specific { + NodeSpecific::Leaf { key_value } => { + self.kv_to_index.insert(key_value, destination); + } + NodeSpecific::Internal { left, right, .. } => { + for child in [left, right] { + let mut block = self.get_block(child).unwrap(); + block.node.parent = Some(destination); + self.insert_entry_to_blob(child, block.to_bytes()).unwrap(); + } + } + } + + self.free_indexes.push(source); + } + + #[allow(unused)] + fn rebuild(&mut self) -> Result<(), String> { + panic!(); + // TODO: could make insert_entry_to_blob a free function and not need to make + // a merkle blob here? maybe? + let mut new = Self::new(Vec::new())?; + for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob).enumerate() { + // new.insert_entry_to_blob(index, )? + } + self.blob = new.blob; + + Ok(()) + } } impl<'a> IntoIterator for &'a MerkleBlob { // TODO: review efficiency in whatever use cases we end up with, vs Item = Node etc type Item = Block; - type IntoIter = MerkleBlobIterator<'a>; + type IntoIter = MerkleBlobLeftChildFirstIterator<'a>; fn into_iter(self) -> Self::IntoIter { // TODO: review types around this to avoid copying - MerkleBlobIterator::new(&self.blob[..]) + MerkleBlobLeftChildFirstIterator::new(&self.blob[..]) } } @@ -961,21 +1023,21 @@ impl MerkleBlob { } } -struct MerkleBlobIteratorItem { +struct MerkleBlobLeftChildFirstIteratorItem { visited: bool, index: TreeIndex, } -pub struct MerkleBlobIterator<'a> { +pub struct MerkleBlobLeftChildFirstIterator<'a> { blob: &'a [u8], - deque: VecDeque, + deque: VecDeque, } -impl<'a> MerkleBlobIterator<'a> { +impl<'a> MerkleBlobLeftChildFirstIterator<'a> { fn new(blob: &'a [u8]) -> Self { let mut deque = VecDeque::new(); if blob.len() / BLOCK_SIZE > 0 { - deque.push_back(MerkleBlobIteratorItem { + deque.push_back(MerkleBlobLeftChildFirstIteratorItem { visited: false, index: 0, }); @@ -985,7 +1047,7 @@ impl<'a> MerkleBlobIterator<'a> { } } -impl Iterator for MerkleBlobIterator<'_> { +impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { type Item = Block; fn next(&mut self) -> Option { @@ -1003,15 +1065,15 @@ impl Iterator for MerkleBlobIterator<'_> { return Some(block); }; - self.deque.push_front(MerkleBlobIteratorItem { + self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { visited: true, index: item.index, }); - self.deque.push_front(MerkleBlobIteratorItem { + self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { visited: false, index: right, }); - self.deque.push_front(MerkleBlobIteratorItem { + self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { visited: false, index: left, }); @@ -1021,6 +1083,83 @@ impl Iterator for MerkleBlobIterator<'_> { } } +pub struct MerkleBlobParentFirstIterator<'a> { + blob: &'a [u8], + deque: VecDeque, +} + +impl<'a> MerkleBlobParentFirstIterator<'a> { + fn new(blob: &'a [u8]) -> Self { + let mut deque = VecDeque::new(); + if blob.len() / BLOCK_SIZE > 0 { + deque.push_back(0); + } + + Self { blob, deque } + } +} + +impl Iterator for MerkleBlobParentFirstIterator<'_> { + type Item = Block; + + fn next(&mut self) -> Option { + // left sibling first, parents before children + + loop { + let index = self.deque.pop_front()?; + let block_bytes: BlockBytes = self.blob[Block::range(index)].try_into().unwrap(); + let block = Block::from_bytes(block_bytes, index).unwrap(); + + match block.node.specific { + NodeSpecific::Leaf { .. } => return Some(block), + NodeSpecific::Internal { left, right } => { + self.deque.push_front(right); + self.deque.push_front(left); + } + } + } + } +} + +pub struct MerkleBlobBreadthFirstIterator<'a> { + blob: &'a [u8], + deque: VecDeque, +} + +impl<'a> MerkleBlobBreadthFirstIterator<'a> { + #[allow(unused)] + fn new(blob: &'a [u8]) -> Self { + let mut deque = VecDeque::new(); + if blob.len() / BLOCK_SIZE > 0 { + deque.push_back(0); + } + + Self { blob, deque } + } +} + +impl Iterator for MerkleBlobBreadthFirstIterator<'_> { + type Item = Block; + + fn next(&mut self) -> Option { + // left sibling first, parent depth before child depth + + loop { + let index = self.deque.pop_front()?; + let block_bytes: BlockBytes = self.blob[Block::range(index)].try_into().unwrap(); + let block = Block::from_bytes(block_bytes, index).unwrap(); + + match block.node.specific { + NodeSpecific::Leaf { .. } => return Some(block), + NodeSpecific::Internal { left, right } => { + self.deque.push_back(left); + self.deque.push_back(right); + } + } + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -1073,6 +1212,16 @@ mod tests { MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap() } + #[allow(unused)] + fn normalized_blob(merkle_blob: &MerkleBlob) -> Vec { + let mut new = MerkleBlob::new(merkle_blob.blob.clone()).unwrap(); + + new.calculate_lazy_hashes(); + new.rebuild(); + + new.blob + } + #[test] fn test_node_type_serialized_values() { // TODO: can i make sure we cover all variants? From 3935ac8178af9ded9b19967e69dea519c3b7907a Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 13 Sep 2024 10:31:33 -0400 Subject: [PATCH 048/181] check that deleting in reverse of insertion recreates original tree --- crates/chia-datalayer/src/lib.rs | 68 ++++++++++++-------------------- 1 file changed, 25 insertions(+), 43 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 31088e61a..6ffb4032b 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -4,7 +4,7 @@ use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; use std::cmp::Ordering; use std::collections::{HashMap, VecDeque}; -use std::iter::IntoIterator; +use std::iter::{zip, IntoIterator}; use std::ops::Range; // TODO: clearly shouldn't be hard coded @@ -409,6 +409,7 @@ fn get_keys_values_indexes(blob: &[u8]) -> Result, Stri } #[cfg_attr(feature = "py-bindings", pyclass(name = "MerkleBlob"))] +#[derive(Debug)] pub struct MerkleBlob { blob: Vec, free_indexes: Vec, @@ -646,7 +647,6 @@ impl MerkleBlob { let mut sibling_block = self.get_block(sibling_index)?; let Some(grandparent_index) = parent.parent else { - sibling_block.metadata.dirty = true; sibling_block.node.parent = None; self.insert_entry_to_blob(0, sibling_block.to_bytes())?; @@ -973,6 +973,21 @@ impl MerkleBlob { } } +impl PartialEq for MerkleBlob { + fn eq(&self, other: &Self) -> bool { + for (self_block, other_block) in zip(self, other) { + if (self_block.metadata.dirty || other_block.metadata.dirty) + || self_block.node.hash != other_block.node.hash + || self_block.node.specific != other_block.node.specific + { + return false; + } + } + + true + } +} + impl<'a> IntoIterator for &'a MerkleBlob { // TODO: review efficiency in whatever use cases we end up with, vs Item = Node etc type Item = Block; @@ -1390,11 +1405,12 @@ mod tests { } #[test] - fn test_delete() { + fn test_delete_in_reverse_creates_matching_trees() { const COUNT: usize = 10; let mut dots = vec![]; let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + let mut reference_blobs = vec![]; let key_value_ids: [KvId; COUNT] = core::array::from_fn(|i| i as KvId); @@ -1404,52 +1420,18 @@ mod tests { let hash: Hash = hasher.finalize(); println!("inserting: {key_value_id}"); + merkle_blob.calculate_lazy_hashes(); + reference_blobs.push(MerkleBlob::new(merkle_blob.blob.clone()).unwrap()); merkle_blob.insert(key_value_id, hash).unwrap(); dots.push(merkle_blob.to_dot().dump()); } - for key_value_id in key_value_ids { + for key_value_id in key_value_ids.iter().rev() { println!("deleting: {key_value_id}"); - merkle_blob.delete(key_value_id).unwrap(); + merkle_blob.delete(*key_value_id).unwrap(); + merkle_blob.calculate_lazy_hashes(); + assert_eq!(merkle_blob, reference_blobs[*key_value_id as usize]); dots.push(merkle_blob.to_dot().dump()); } - - // let mut key_value_ids: Vec = vec![0; COUNT]; - // - // for (i, key_value_id) in key_value_ids.iter_mut().enumerate() { - // *key_value_id = i as KvId; - // } - // for i in 0..100_000 { - // let start = Instant::now(); - // merkle_blob - // // TODO: yeah this hash is garbage - // .insert(i as KvId, HASH) - // .unwrap(); - // let end = Instant::now(); - // total_time += end.duration_since(start); - // - // // match i + 1 { - // // 2 => assert_eq!(merkle_blob.blob.len(), 3 * BLOCK_SIZE), - // // 3 => assert_eq!(merkle_blob.blob.len(), 5 * BLOCK_SIZE), - // // _ => (), - // // } - // - // // let file = fs::File::create(format!("/home/altendky/tmp/mbt/rs/{i:0>4}")).unwrap(); - // // let mut file = io::LineWriter::new(file); - // // for block in merkle_blob.blob.chunks(BLOCK_SIZE) { - // // let mut s = String::new(); - // // for byte in block { - // // s.push_str(&format!("{:02x}", byte)); - // // } - // // s.push_str("\n"); - // // file.write_all(s.as_bytes()).unwrap(); - // // } - // - // // fs::write(format!("/home/altendky/tmp/mbt/rs/{i:0>4}"), &merkle_blob.blob).unwrap(); - // } - // // println!("{:?}", merkle_blob.blob) - // - // println!("total time: {total_time:?}"); - // // TODO: check, well... something } } From 85aefbc4d4e827d5ad541d758e6c8df973c1c2b7 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 13 Sep 2024 11:15:18 -0400 Subject: [PATCH 049/181] make the serialization definition more clear --- crates/chia-datalayer/src/lib.rs | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 6ffb4032b..1284de32f 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -5,6 +5,7 @@ use clvmr::sha2::Sha256; use std::cmp::Ordering; use std::collections::{HashMap, VecDeque}; use std::iter::{zip, IntoIterator}; +use std::mem::size_of; use std::ops::Range; // TODO: clearly shouldn't be hard coded @@ -17,7 +18,7 @@ type TreeIndex = u32; type Parent = Option; type Hash = [u8; 32]; type BlockBytes = [u8; BLOCK_SIZE]; -type KvId = u64; +type KvId = i64; #[derive(Clone, Debug, Hash, Eq, PartialEq)] #[repr(u8)] @@ -175,12 +176,24 @@ impl NodeSpecific { } } -const PARENT_RANGE: Range = 0..4; -const LEFT_RANGE: Range = 4..8; -const RIGHT_RANGE: Range = 8..12; -const KEY_VALUE_RANGE: Range = 4..12; -// TODO: move the common parts to the beginning of the serialization? -const HASH_RANGE: Range = 12..44; +const fn range_by_length(start: usize, length: usize) -> Range { + start..start + length +} + +// define the serialized block format +// TODO: consider in more detail other serialization tools such as serde and streamable +// common fields +const PARENT_RANGE: Range = range_by_length(0, size_of::()); +// internal specific fields +const LEFT_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); +const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of::()); +// leaf specific fields +const KEY_VALUE_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); +// and back to common fields +// TODO: move the common parts to the beginning of the serialization +// TODO: better way to pick the max of key value and right range, until we move hash first +// NOTE: they happen to be the same location right now... +const HASH_RANGE: Range = range_by_length(KEY_VALUE_RANGE.end, size_of::()); impl Node { // fn discriminant(&self) -> u8 { From 28548288a3a4894be7d09f99f8ee584ae2508ca4 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 13 Sep 2024 11:42:44 -0400 Subject: [PATCH 050/181] simplify away from match some places --- crates/chia-datalayer/src/lib.rs | 94 ++++++++++++++++---------------- 1 file changed, 46 insertions(+), 48 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 1284de32f..11f6b9e44 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -410,11 +410,8 @@ fn get_keys_values_indexes(blob: &[u8]) -> Result, Stri } for block in MerkleBlobLeftChildFirstIterator::new(blob) { - match block.node.specific { - NodeSpecific::Leaf { key_value } => { - kv_to_index.insert(key_value, block.node.index); - } - NodeSpecific::Internal { .. } => (), + if let NodeSpecific::Leaf { key_value } = block.node.specific { + kv_to_index.insert(key_value, block.node.index); } } @@ -611,22 +608,23 @@ impl MerkleBlob { let mut old_parent_block = Block::from_bytes(self.get_block_bytes(old_parent_index)?, old_parent_index)?; - match old_parent_block.node.specific { - NodeSpecific::Internal { - ref mut left, - ref mut right, - .. - } => { - if old_leaf.index == *left { - *left = new_internal_node_index; - } else if old_leaf.index == *right { - *right = new_internal_node_index; - } else { - panic!(); - } + if let NodeSpecific::Internal { + ref mut left, + ref mut right, + .. + } = old_parent_block.node.specific + { + if old_leaf.index == *left { + *left = new_internal_node_index; + } else if old_leaf.index == *right { + *right = new_internal_node_index; + } else { + panic!(); } - NodeSpecific::Leaf { .. } => panic!(), - } + } else { + panic!(); + }; + self.insert_entry_to_blob(old_parent_index, old_parent_block.to_bytes())?; self.mark_lineage_as_dirty(old_parent_index)?; @@ -639,11 +637,11 @@ impl MerkleBlob { let leaf_index = *self.kv_to_index.get(&key_value).unwrap(); let leaf = self.get_node(leaf_index).unwrap(); - match leaf.specific { - // TODO: blech - NodeSpecific::Leaf { .. } => (), - NodeSpecific::Internal { .. } => panic!(), - }; + // TODO: blech + if let NodeSpecific::Leaf { .. } = leaf.specific { + } else { + panic!() + } self.kv_to_index.remove(&key_value); let Some(parent_index) = leaf.parent else { @@ -687,18 +685,20 @@ impl MerkleBlob { sibling_block.node.parent = Some(grandparent_index); self.insert_entry_to_blob(sibling_index, sibling_block.to_bytes())?; - match grandparent_block.node.specific { - NodeSpecific::Internal { - ref mut left, - ref mut right, - .. - } => match parent_index { + if let NodeSpecific::Internal { + ref mut left, + ref mut right, + .. + } = grandparent_block.node.specific + { + match parent_index { x if x == *left => *left = sibling_index, x if x == *right => *right = sibling_index, _ => panic!(), - }, - NodeSpecific::Leaf { .. } => panic!(), - }; + } + } else { + panic!() + } self.insert_entry_to_blob(grandparent_index, grandparent_block.to_bytes())?; self.mark_lineage_as_dirty(grandparent_index)?; @@ -911,20 +911,18 @@ impl MerkleBlob { .filter(|block| block.metadata.dirty) .collect::>() { - match block.node.specific { - NodeSpecific::Leaf { .. } => panic!("leaves should not be dirty"), - NodeSpecific::Internal { left, right, .. } => { - // TODO: obviously inefficient to re-get/deserialize these blocks inside - // an iteration that's already doing that - let left = self.get_block(left).unwrap(); - let right = self.get_block(right).unwrap(); - // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' - block.node.hash = internal_hash(left.node.hash, right.node.hash); - block.metadata.dirty = false; - self.insert_entry_to_blob(block.node.index, block.to_bytes()) - .unwrap(); - } - } + let NodeSpecific::Internal { left, right } = block.node.specific else { + panic!("leaves should not be dirty") + }; + // TODO: obviously inefficient to re-get/deserialize these blocks inside + // an iteration that's already doing that + let left = self.get_block(left).unwrap(); + let right = self.get_block(right).unwrap(); + // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' + block.node.hash = internal_hash(left.node.hash, right.node.hash); + block.metadata.dirty = false; + self.insert_entry_to_blob(block.node.index, block.to_bytes()) + .unwrap(); } } From cb90651430f0c2946e018c8e4646070419602de8 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 13 Sep 2024 12:05:51 -0400 Subject: [PATCH 051/181] pass hashes by reference --- crates/chia-datalayer/src/lib.rs | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 11f6b9e44..c42b7924e 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -64,7 +64,7 @@ impl NodeType { // } // } -fn internal_hash(left_hash: Hash, right_hash: Hash) -> Hash { +fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { let mut hasher = Sha256::new(); hasher.update(b"\x02"); hasher.update(left_hash); @@ -451,7 +451,7 @@ impl MerkleBlob { }) } - pub fn insert(&mut self, key_value: KvId, hash: Hash) -> Result<(), String> { + pub fn insert(&mut self, key_value: KvId, hash: &Hash) -> Result<(), String> { // TODO: what about only unused providing a blob length? if self.blob.is_empty() { self.insert_first(key_value, hash); @@ -459,17 +459,17 @@ impl MerkleBlob { // TODO: make this a parameter so we have one insert call where you specify the location let old_leaf = self.get_random_leaf_node_from_bytes(Vec::from(key_value.to_be_bytes()))?; - let internal_node_hash = internal_hash(old_leaf.hash, hash); + let internal_node_hash = internal_hash(&old_leaf.hash, hash); if self.kv_to_index.len() == 1 { - self.insert_second(key_value, hash, &old_leaf, internal_node_hash); + self.insert_second(key_value, hash, &old_leaf, &internal_node_hash); return Ok(()); } - self.insert_third_or_later(key_value, hash, &old_leaf, internal_node_hash) + self.insert_third_or_later(key_value, hash, &old_leaf, &internal_node_hash) } - fn insert_first(&mut self, key_value: KvId, hash: Hash) { + fn insert_first(&mut self, key_value: KvId, hash: &Hash) { let new_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, @@ -478,7 +478,7 @@ impl MerkleBlob { node: Node { parent: None, specific: NodeSpecific::Leaf { key_value }, - hash, + hash: *hash, index: 0, }, }; @@ -493,9 +493,9 @@ impl MerkleBlob { fn insert_second( &mut self, key_value: KvId, - hash: Hash, + hash: &Hash, old_leaf: &Node, - internal_node_hash: Hash, + internal_node_hash: &Hash, ) { self.blob.clear(); @@ -507,7 +507,7 @@ impl MerkleBlob { node: Node { parent: None, specific: NodeSpecific::Internal { left: 1, right: 2 }, - hash: internal_node_hash, + hash: *internal_node_hash, index: 0, }, }; @@ -540,7 +540,7 @@ impl MerkleBlob { node: Node { parent: Some(0), specific: NodeSpecific::Leaf { key_value }, - hash, + hash: *hash, index: 2, }, }; @@ -557,9 +557,9 @@ impl MerkleBlob { fn insert_third_or_later( &mut self, key_value: KvId, - hash: Hash, + hash: &Hash, old_leaf: &Node, - internal_node_hash: Hash, + internal_node_hash: &Hash, ) -> Result<(), String> { let new_leaf_index = self.get_new_index(); let new_internal_node_index = self.get_new_index(); @@ -572,7 +572,7 @@ impl MerkleBlob { node: Node { parent: Some(new_internal_node_index), specific: NodeSpecific::Leaf { key_value }, - hash, + hash: *hash, index: new_leaf_index, }, }; @@ -589,7 +589,7 @@ impl MerkleBlob { left: old_leaf.index, right: new_leaf_index, }, - hash: internal_node_hash, + hash: *internal_node_hash, index: new_internal_node_index, }, }; @@ -706,7 +706,7 @@ impl MerkleBlob { Ok(()) } - // fn upsert(&self, old_key_value: KvId, new_key_value: KvId, new_hash: Hash) -> Result<(), String> { + // fn upsert(&self, old_key_value: KvId, new_key_value: KvId, new_hash: &Hash) -> Result<(), String> { // if old_key_value // } @@ -919,7 +919,7 @@ impl MerkleBlob { let left = self.get_block(left).unwrap(); let right = self.get_block(right).unwrap(); // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' - block.node.hash = internal_hash(left.node.hash, right.node.hash); + block.node.hash = internal_hash(&left.node.hash, &right.node.hash); block.metadata.dirty = false; self.insert_entry_to_blob(block.node.index, block.to_bytes()) .unwrap(); @@ -1030,7 +1030,7 @@ impl MerkleBlob { #[pyo3(name = "insert")] pub fn py_insert(&mut self, key_value: KvId, hash: Hash) -> PyResult<()> { // TODO: consider the error - self.insert(key_value, hash).unwrap(); + self.insert(key_value, &hash).unwrap(); Ok(()) } @@ -1277,7 +1277,7 @@ mod tests { 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, ]; assert_eq!( - internal_hash(left, right), + internal_hash(&left, &right), clvm_utils::tree_hash_pair( clvm_utils::TreeHash::new(left), clvm_utils::TreeHash::new(right) @@ -1355,10 +1355,10 @@ mod tests { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); merkle_blob - .insert(EXAMPLE_LEFT_LEAF.key_value(), EXAMPLE_LEFT_LEAF.hash) + .insert(EXAMPLE_LEFT_LEAF.key_value(), &EXAMPLE_LEFT_LEAF.hash) .unwrap(); merkle_blob - .insert(EXAMPLE_RIGHT_LEAF.key_value(), EXAMPLE_RIGHT_LEAF.hash) + .insert(EXAMPLE_RIGHT_LEAF.key_value(), &EXAMPLE_RIGHT_LEAF.hash) .unwrap(); // TODO: just hacking here to compare with the ~wrong~ simplified reference @@ -1383,7 +1383,7 @@ mod tests { let start = Instant::now(); merkle_blob // TODO: yeah this hash is garbage - .insert(i as KvId, HASH) + .insert(i as KvId, &HASH) .unwrap(); let end = Instant::now(); total_time += end.duration_since(start); @@ -1433,7 +1433,7 @@ mod tests { println!("inserting: {key_value_id}"); merkle_blob.calculate_lazy_hashes(); reference_blobs.push(MerkleBlob::new(merkle_blob.blob.clone()).unwrap()); - merkle_blob.insert(key_value_id, hash).unwrap(); + merkle_blob.insert(key_value_id, &hash).unwrap(); dots.push(merkle_blob.to_dot().dump()); } From e5a916e5f0090f2fe69453066ac249a54c44d056 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 17 Sep 2024 12:20:55 -0400 Subject: [PATCH 052/181] fix insert, and always more --- Cargo.lock | 39 +++++++++ Cargo.toml | 3 + crates/chia-datalayer/Cargo.toml | 5 +- crates/chia-datalayer/src/lib.rs | 137 +++++++++++++++++++++++++++++-- 4 files changed, 178 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 40a1d5cc9..f9f1f9c33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -378,8 +378,11 @@ dependencies = [ "clvm-utils", "clvmr", "hex-literal", + "open", + "percent-encoding", "pyo3", "rstest", + "url", ] [[package]] @@ -1267,6 +1270,15 @@ version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767" +[[package]] +name = "is-docker" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928bae27f42bc99b60d9ac7334e3a21d10ad8f1835a4e12ec3ec0464765ed1b3" +dependencies = [ + "once_cell", +] + [[package]] name = "is-terminal" version = "0.4.12" @@ -1278,6 +1290,16 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "is-wsl" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "173609498df190136aa7dea1a91db051746d339e18476eed5ca40521f02d7aa5" +dependencies = [ + "is-docker", + "once_cell", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1564,6 +1586,17 @@ version = "11.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +[[package]] +name = "open" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a877bf6abd716642a53ef1b89fb498923a4afca5c754f9050b4d081c05c4b3" +dependencies = [ + "is-wsl", + "libc", + "pathdiff", +] + [[package]] name = "openssl" version = "0.10.66" @@ -1647,6 +1680,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "pathdiff" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" + [[package]] name = "pem" version = "3.0.4" diff --git a/Cargo.toml b/Cargo.toml index 3584e4442..468b3ce69 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -143,3 +143,6 @@ zstd = "0.13.2" blocking-threadpool = "1.0.1" libfuzzer-sys = "0.4" wasm-bindgen = "0.2.93" +open = "5.3.0" +url = "2.5.2" +percent-encoding = "2.3.1" diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index ee3235f3b..b907ca32a 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -18,11 +18,14 @@ py-bindings = ["dep:pyo3", "chia-traits/py-bindings"] crate-type = ["rlib"] [dependencies] -clvmr = { workspace = true } chia-traits = { workspace = true } +clvmr = { workspace = true } pyo3 = { workspace = true, optional = true } [dev-dependencies] clvm-utils = { workspace = true } hex-literal = { workspace = true } +open = { workspace = true } +percent-encoding = { workspace = true } rstest = { workspace = true } +url = { workspace = true } diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index c42b7924e..67f9c3bfd 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -77,6 +77,7 @@ pub struct DotLines { nodes: Vec, connections: Vec, pair_boxes: Vec, + note: String, } impl Default for DotLines { @@ -91,6 +92,7 @@ impl DotLines { nodes: vec![], connections: vec![], pair_boxes: vec![], + note: "".to_string(), } } @@ -102,7 +104,8 @@ impl DotLines { pub fn dump(&mut self) -> String { // TODO: consuming itself, secretly - let mut result = vec!["digraph {".to_string()]; + let note = &self.note; + let mut result = vec![format!("# {note}"), "".to_string(), "digraph {".to_string()]; result.append(&mut self.nodes); result.append(&mut self.connections); result.append(&mut self.pair_boxes); @@ -110,6 +113,12 @@ impl DotLines { result.join("\n") } + + pub fn set_note(&mut self, note: &str) -> &mut Self { + self.note = String::from(note); + + self + } } const NULL_PARENT: TreeIndex = 0xffff_ffffu32; @@ -302,7 +311,8 @@ impl Node { ], pair_boxes: vec![ format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), - ] + ], + note: "".to_string(), }, NodeSpecific::Leaf {key_value} => DotLines{ nodes: vec![ @@ -316,6 +326,7 @@ impl Node { }, ], pair_boxes: vec![], + note: "".to_string(), }, } } @@ -455,6 +466,7 @@ impl MerkleBlob { // TODO: what about only unused providing a blob length? if self.blob.is_empty() { self.insert_first(key_value, hash); + return Ok(()); } // TODO: make this a parameter so we have one insert call where you specify the location @@ -706,9 +718,57 @@ impl MerkleBlob { Ok(()) } - // fn upsert(&self, old_key_value: KvId, new_key_value: KvId, new_hash: &Hash) -> Result<(), String> { - // if old_key_value - // } + pub fn upsert( + &mut self, + old_key_value: KvId, + new_key_value: KvId, + new_hash: &Hash, + ) -> Result<(), String> { + let Some(leaf_index) = self.kv_to_index.get(&old_key_value) else { + self.insert(new_key_value, new_hash)?; + return Ok(()); + }; + + let mut block = self.get_block(*leaf_index).unwrap(); + if let NodeSpecific::Leaf { ref mut key_value } = block.node.specific { + block.node.hash.clone_from(new_hash); + *key_value = new_key_value; + } else { + panic!() + } + self.insert_entry_to_blob(*leaf_index, block.to_bytes())?; + + if let Some(parent) = block.node.parent { + self.mark_lineage_as_dirty(parent)?; + } + + Ok(()) + } + + pub fn check(&self) { + let mut leaf_count: usize = 0; + let mut internal_count: usize = 0; + + for block in self { + match block.node.specific { + NodeSpecific::Internal { .. } => internal_count += 1, + NodeSpecific::Leaf { key_value } => { + leaf_count += 1; + assert!(self.kv_to_index.contains_key(&key_value)); + // TODO: consider what type free indexes should be + assert!(!self.free_indexes.contains(&block.node.index)); + } + } + } + + assert_eq!(leaf_count, self.kv_to_index.len()); + assert_eq!( + leaf_count + internal_count + self.free_indexes.len(), + self.extend_index() as usize, + ); + + // TODO: check parent/child bidirectional accuracy + } // fn update_parent(&mut self, index: TreeIndex, parent: Option) -> Result<(), String> { // let range = self.get_block_range(index); @@ -1307,6 +1367,8 @@ mod tests { fn test_load_a_python_dump() { let merkle_blob = example_merkle_blob(); merkle_blob.get_node(0).unwrap(); + + merkle_blob.check(); } #[test] @@ -1319,6 +1381,8 @@ mod tests { assert_eq!(lineage.len(), 2); let last_node = lineage.last().unwrap(); assert_eq!(last_node.parent, None); + + merkle_blob.check(); } #[test] @@ -1328,6 +1392,8 @@ mod tests { .get_random_leaf_node_from_bytes(vec![0; 8]) .unwrap(); assert_eq!(leaf.index, 1); + + merkle_blob.check(); } #[test] @@ -1348,6 +1414,8 @@ mod tests { assert_eq!(merkle_blob.get_node(0).unwrap(), EXAMPLE_ROOT); assert_eq!(merkle_blob.get_node(1).unwrap(), EXAMPLE_LEFT_LEAF); assert_eq!(merkle_blob.get_node(2).unwrap(), EXAMPLE_RIGHT_LEAF); + + merkle_blob.check(); } #[test] @@ -1371,6 +1439,8 @@ mod tests { .unwrap(); assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); + + merkle_blob.check(); } #[test] @@ -1413,6 +1483,8 @@ mod tests { // TODO: check, well... something merkle_blob.calculate_lazy_hashes(); + + merkle_blob.check(); } #[test] @@ -1437,6 +1509,8 @@ mod tests { dots.push(merkle_blob.to_dot().dump()); } + merkle_blob.check(); + for key_value_id in key_value_ids.iter().rev() { println!("deleting: {key_value_id}"); merkle_blob.delete(*key_value_id).unwrap(); @@ -1444,5 +1518,58 @@ mod tests { assert_eq!(merkle_blob, reference_blobs[*key_value_id as usize]); dots.push(merkle_blob.to_dot().dump()); } + + merkle_blob.check(); + } + + // TODO: better conditional execution than the commenting i'm doing now + #[allow(dead_code)] + fn open_dot(lines: &mut DotLines) { + use open; + use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; + use url::Url; + + let mut url = Url::parse("http://edotor.net").unwrap(); + // https://edotor.net/?engine=dot#graph%20%7B%7D%0A -> graph {} + url.query_pairs_mut().append_pair("engine", "dot"); + url.set_fragment(Some( + &utf8_percent_encode(&lines.dump(), NON_ALPHANUMERIC).to_string(), + )); + open::that(url.as_str()).unwrap(); + } + + #[test] + fn test_insert_first() { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let key_value_id: KvId = 1; + // open_dot(&mut merkle_blob.to_dot().set_note("empty")); + merkle_blob.insert(key_value_id, &HASH).unwrap(); + // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + + merkle_blob.check(); + + assert_eq!(merkle_blob.free_indexes.len(), 0); + assert_eq!(merkle_blob.kv_to_index.len(), 1); + } + + #[test] + fn test_delete_root() { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let key_value_id: KvId = 1; + // open_dot(&mut merkle_blob.to_dot().set_note("empty")); + merkle_blob.insert(key_value_id, &HASH).unwrap(); + // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + merkle_blob.check(); + + merkle_blob.delete(key_value_id).unwrap(); + + merkle_blob.check(); + assert_eq!( + merkle_blob.free_indexes.len(), + merkle_blob.extend_index() as usize + ); + assert_eq!(merkle_blob.kv_to_index.len(), 0); } } From d6a19920374727f19d61ab32c373963a10272272 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 17 Sep 2024 12:24:43 -0400 Subject: [PATCH 053/181] clippy and tweaks --- crates/chia-datalayer/src/lib.rs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 67f9c3bfd..dd15222aa 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -92,7 +92,7 @@ impl DotLines { nodes: vec![], connections: vec![], pair_boxes: vec![], - note: "".to_string(), + note: String::new(), } } @@ -105,7 +105,7 @@ impl DotLines { pub fn dump(&mut self) -> String { // TODO: consuming itself, secretly let note = &self.note; - let mut result = vec![format!("# {note}"), "".to_string(), "digraph {".to_string()]; + let mut result = vec![format!("# {note}"), String::new(), "digraph {".to_string()]; result.append(&mut self.nodes); result.append(&mut self.connections); result.append(&mut self.pair_boxes); @@ -312,7 +312,7 @@ impl Node { pair_boxes: vec![ format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), ], - note: "".to_string(), + note: String::new(), }, NodeSpecific::Leaf {key_value} => DotLines{ nodes: vec![ @@ -326,7 +326,7 @@ impl Node { }, ], pair_boxes: vec![], - note: "".to_string(), + note: String::new(), }, } } @@ -466,19 +466,20 @@ impl MerkleBlob { // TODO: what about only unused providing a blob length? if self.blob.is_empty() { self.insert_first(key_value, hash); - return Ok(()); - } - - // TODO: make this a parameter so we have one insert call where you specify the location - let old_leaf = self.get_random_leaf_node_from_bytes(Vec::from(key_value.to_be_bytes()))?; - let internal_node_hash = internal_hash(&old_leaf.hash, hash); + } else { + // TODO: make this a parameter so we have one insert call where you specify the location + let old_leaf = + self.get_random_leaf_node_from_bytes(Vec::from(key_value.to_be_bytes()))?; + let internal_node_hash = internal_hash(&old_leaf.hash, hash); - if self.kv_to_index.len() == 1 { - self.insert_second(key_value, hash, &old_leaf, &internal_node_hash); - return Ok(()); + if self.kv_to_index.len() == 1 { + self.insert_second(key_value, hash, &old_leaf, &internal_node_hash); + } else { + self.insert_third_or_later(key_value, hash, &old_leaf, &internal_node_hash)?; + } } - self.insert_third_or_later(key_value, hash, &old_leaf, &internal_node_hash) + Ok(()) } fn insert_first(&mut self, key_value: KvId, hash: &Hash) { From 40bc2085d2e20a912bfe38f721d4272b6ca403f4 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 20 Sep 2024 11:44:18 -0400 Subject: [PATCH 054/181] remove redundant assertions --- crates/chia-datalayer/src/lib.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index dd15222aa..04f1c6c62 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1549,8 +1549,6 @@ mod tests { // open_dot(&mut merkle_blob.to_dot().set_note("first after")); merkle_blob.check(); - - assert_eq!(merkle_blob.free_indexes.len(), 0); assert_eq!(merkle_blob.kv_to_index.len(), 1); } @@ -1567,10 +1565,6 @@ mod tests { merkle_blob.delete(key_value_id).unwrap(); merkle_blob.check(); - assert_eq!( - merkle_blob.free_indexes.len(), - merkle_blob.extend_index() as usize - ); assert_eq!(merkle_blob.kv_to_index.len(), 0); } } From 838c3c2c0e67b11e863a4979aadc68e0631ffb19 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 1 Oct 2024 11:00:52 -0400 Subject: [PATCH 055/181] fixup hints --- wheel/generate_type_stubs.py | 4 +++- wheel/python/chia_rs/chia_rs.pyi | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index 3f9f52e2b..903432cbe 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -378,13 +378,15 @@ def derive_child_sk_unhardened(sk: PrivateKey, index: int) -> PrivateKey: ... @staticmethod def derive_child_pk_unhardened(pk: G1Element, index: int) -> G1Element: ... +@final class MerkleBlob: def __init__( self, blob: bytes, ) -> None: ... - def insert(self, key: uint64, value: bytes32) -> None: ... + def insert(self, key_value: uint64, hash: bytes32) -> None: ... + def delete(self, key_value: uint64) -> None: ... def __len__(self) -> int: ... @final diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index f94eaf5f2..c653772a6 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -120,13 +120,15 @@ class AugSchemeMPL: @staticmethod def derive_child_pk_unhardened(pk: G1Element, index: int) -> G1Element: ... +@final class MerkleBlob: def __init__( self, blob: bytes, ) -> None: ... - def insert(self, key: uint64, value: bytes32) -> None: ... + def insert(self, key_value: uint64, hash: bytes32) -> None: ... + def delete(self, key_value: uint64) -> None: ... def __len__(self) -> int: ... @final From 744e41f251bf068a8e63d2a38bf014daa3850689 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 1 Oct 2024 12:53:16 -0400 Subject: [PATCH 056/181] test name tweak --- crates/chia-datalayer/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 04f1c6c62..1bf3363e7 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1553,7 +1553,7 @@ mod tests { } #[test] - fn test_delete_root() { + fn test_delete_last() { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); let key_value_id: KvId = 1; From e8b51a9b0f3d3dad751baa0b5243f0396d3ac94c Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 1 Oct 2024 15:32:32 -0400 Subject: [PATCH 057/181] catch up a bit --- Cargo.lock | 1 + crates/chia-datalayer/Cargo.toml | 1 + crates/chia-datalayer/src/lib.rs | 459 +++++++++++++++++-------------- wheel/generate_type_stubs.py | 4 +- wheel/python/chia_rs/chia_rs.pyi | 4 +- 5 files changed, 258 insertions(+), 211 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2cf3864be..fae875918 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -377,6 +377,7 @@ dependencies = [ "clvm-utils", "clvmr", "hex-literal", + "num-traits", "open", "percent-encoding", "pyo3", diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index 1455c0f4f..46ad85af2 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -24,6 +24,7 @@ pyo3 = { workspace = true, optional = true } [dev-dependencies] clvm-utils = { workspace = true } hex-literal = { workspace = true } +num-traits = { workspace = true } open = { workspace = true } percent-encoding = { workspace = true } rstest = { workspace = true } diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 1bf3363e7..33e822eb3 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -8,17 +8,35 @@ use std::iter::{zip, IntoIterator}; use std::mem::size_of; use std::ops::Range; +type TreeIndex = u32; +type Parent = Option; +type Hash = [u8; 32]; +type KvId = i64; + +const fn range_by_length(start: usize, length: usize) -> Range { + start..start + length +} + +// define the serialized block format +// TODO: consider in more detail other serialization tools such as serde and streamable +// common fields +// TODO: better way to pick the max of key value and right range, until we move hash first +const HASH_RANGE: Range = range_by_length(0, size_of::()); +const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); +// internal specific fields +const LEFT_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); +const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of::()); +// leaf specific fields +const KEY_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); +const VALUE_RANGE: Range = range_by_length(KEY_RANGE.end, size_of::()); + // TODO: clearly shouldn't be hard coded const METADATA_SIZE: usize = 2; // TODO: clearly shouldn't be hard coded -const DATA_SIZE: usize = 44; +// TODO: max of RIGHT_RANGE.end and VALUE_RANGE.end +const DATA_SIZE: usize = VALUE_RANGE.end; const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; - -type TreeIndex = u32; -type Parent = Option; -type Hash = [u8; 32]; type BlockBytes = [u8; BLOCK_SIZE]; -type KvId = i64; #[derive(Clone, Debug, Hash, Eq, PartialEq)] #[repr(u8)] @@ -168,7 +186,7 @@ pub struct Node { #[derive(Debug, PartialEq)] pub enum NodeSpecific { Internal { left: TreeIndex, right: TreeIndex }, - Leaf { key_value: KvId }, + Leaf { key: KvId, value: KvId }, } impl NodeSpecific { @@ -185,25 +203,6 @@ impl NodeSpecific { } } -const fn range_by_length(start: usize, length: usize) -> Range { - start..start + length -} - -// define the serialized block format -// TODO: consider in more detail other serialization tools such as serde and streamable -// common fields -const PARENT_RANGE: Range = range_by_length(0, size_of::()); -// internal specific fields -const LEFT_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); -const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of::()); -// leaf specific fields -const KEY_VALUE_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); -// and back to common fields -// TODO: move the common parts to the beginning of the serialization -// TODO: better way to pick the max of key value and right range, until we move hash first -// NOTE: they happen to be the same location right now... -const HASH_RANGE: Range = range_by_length(KEY_VALUE_RANGE.end, size_of::()); - impl Node { // fn discriminant(&self) -> u8 { // unsafe { *(self as *const Self as *const u8) } @@ -228,9 +227,8 @@ impl Node { }, NodeType::Leaf => NodeSpecific::Leaf { // TODO: this try from really right? - key_value: KvId::from_be_bytes( - <[u8; 8]>::try_from(&blob[KEY_VALUE_RANGE]).unwrap(), - ), + key: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[KEY_RANGE]).unwrap()), + value: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[VALUE_RANGE]).unwrap()), }, }, }) @@ -260,14 +258,17 @@ impl Node { None => NULL_PARENT, Some(parent) => *parent, }; + // TODO: insert per ranges + blob.extend(hash); blob.extend(parent_integer.to_be_bytes()); blob.extend(left.to_be_bytes()); blob.extend(right.to_be_bytes()); - blob.extend(hash); + // TODO: not-yucky padding + blob.extend([0; DATA_SIZE - RIGHT_RANGE.end]); } Node { parent, - specific: NodeSpecific::Leaf { key_value }, + specific: NodeSpecific::Leaf { key, value }, hash, index: _, } => { @@ -275,9 +276,13 @@ impl Node { None => NULL_PARENT, Some(parent) => *parent, }; - blob.extend(parent_integer.to_be_bytes()); - blob.extend(key_value.to_be_bytes()); + // TODO: insert per ranges blob.extend(hash); + blob.extend(parent_integer.to_be_bytes()); + blob.extend(key.to_be_bytes()); + blob.extend(value.to_be_bytes()); + // TODO: not-yucky padding + blob.extend([0; DATA_SIZE - VALUE_RANGE.end]); } } @@ -285,12 +290,12 @@ impl Node { } // TODO: yes i know i'm trying to write this code in a non-rusty way and i need to stop that - pub fn key_value(&self) -> KvId { - let NodeSpecific::Leaf { key_value } = self.specific else { + pub fn key_value(&self) -> (KvId, KvId) { + let NodeSpecific::Leaf { key, value } = self.specific else { panic!() }; - key_value + (key, value) } pub fn to_dot(&self) -> DotLines { @@ -314,9 +319,9 @@ impl Node { ], note: String::new(), }, - NodeSpecific::Leaf {key_value} => DotLines{ + NodeSpecific::Leaf {key, value} => DotLines{ nodes: vec![ - format!("node_{index} [shape=box, label=\"{index}\\nkey_value: {key_value}\"];"), + format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), ], connections: vec![ // TODO: dedupe with above @@ -414,19 +419,19 @@ fn get_free_indexes(blob: &[u8]) -> Result, String> { fn get_keys_values_indexes(blob: &[u8]) -> Result, String> { let index_count = blob.len() / BLOCK_SIZE; - let mut kv_to_index: HashMap = HashMap::default(); + let mut key_to_index: HashMap = HashMap::default(); if index_count == 0 { - return Ok(kv_to_index); + return Ok(key_to_index); } for block in MerkleBlobLeftChildFirstIterator::new(blob) { - if let NodeSpecific::Leaf { key_value } = block.node.specific { - kv_to_index.insert(key_value, block.node.index); + if let NodeSpecific::Leaf { key, .. } = block.node.specific { + key_to_index.insert(key, block.node.index); } } - Ok(kv_to_index) + Ok(key_to_index) } #[cfg_attr(feature = "py-bindings", pyclass(name = "MerkleBlob"))] @@ -434,7 +439,7 @@ fn get_keys_values_indexes(blob: &[u8]) -> Result, Stri pub struct MerkleBlob { blob: Vec, free_indexes: Vec, - kv_to_index: HashMap, + key_to_index: HashMap, // TODO: maybe name it next_index_to_allocate last_allocated_index: TreeIndex, } @@ -452,37 +457,39 @@ impl MerkleBlob { // TODO: stop double tree traversals here let free_indexes = get_free_indexes(&blob).unwrap(); - let kv_to_index = get_keys_values_indexes(&blob).unwrap(); + let key_to_index = get_keys_values_indexes(&blob).unwrap(); Ok(Self { blob, free_indexes, - kv_to_index, + key_to_index, last_allocated_index: block_count as TreeIndex, }) } - pub fn insert(&mut self, key_value: KvId, hash: &Hash) -> Result<(), String> { + pub fn insert(&mut self, key: KvId, value: KvId, hash: &Hash) -> Result<(), String> { // TODO: what about only unused providing a blob length? if self.blob.is_empty() { - self.insert_first(key_value, hash); + self.insert_first(key, value, hash); } else { + let mut hasher = Sha256::new(); + hasher.update(key.to_be_bytes()); + let seed: Hash = hasher.finalize(); // TODO: make this a parameter so we have one insert call where you specify the location - let old_leaf = - self.get_random_leaf_node_from_bytes(Vec::from(key_value.to_be_bytes()))?; + let old_leaf = self.get_random_leaf_node(seed)?; let internal_node_hash = internal_hash(&old_leaf.hash, hash); - if self.kv_to_index.len() == 1 { - self.insert_second(key_value, hash, &old_leaf, &internal_node_hash); + if self.key_to_index.len() == 1 { + self.insert_second(key, value, hash, &old_leaf, &internal_node_hash); } else { - self.insert_third_or_later(key_value, hash, &old_leaf, &internal_node_hash)?; + self.insert_third_or_later(key, value, hash, &old_leaf, &internal_node_hash)?; } } Ok(()) } - fn insert_first(&mut self, key_value: KvId, hash: &Hash) { + fn insert_first(&mut self, key: KvId, value: KvId, hash: &Hash) { let new_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, @@ -490,7 +497,7 @@ impl MerkleBlob { }, node: Node { parent: None, - specific: NodeSpecific::Leaf { key_value }, + specific: NodeSpecific::Leaf { key, value }, hash: *hash, index: 0, }, @@ -498,14 +505,15 @@ impl MerkleBlob { self.blob.extend(new_leaf_block.to_bytes()); - self.kv_to_index.insert(key_value, 0); + self.key_to_index.insert(key, 0); self.free_indexes.clear(); self.last_allocated_index = 1; } fn insert_second( &mut self, - key_value: KvId, + key: KvId, + value: KvId, hash: &Hash, old_leaf: &Node, internal_node_hash: &Hash, @@ -527,6 +535,8 @@ impl MerkleBlob { self.blob.extend(new_internal_block.to_bytes()); + let (old_leaf_key, old_leaf_value) = old_leaf.key_value(); + let left_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, @@ -535,15 +545,17 @@ impl MerkleBlob { node: Node { parent: Some(0), specific: NodeSpecific::Leaf { - key_value: old_leaf.key_value(), + key: old_leaf_key, + value: old_leaf_value, }, hash: old_leaf.hash, index: 1, }, }; self.blob.extend(left_leaf_block.to_bytes()); - self.kv_to_index - .insert(left_leaf_block.node.key_value(), left_leaf_block.node.index); + let (left_leaf_key, _) = left_leaf_block.node.key_value(); + self.key_to_index + .insert(left_leaf_key, left_leaf_block.node.index); let right_leaf_block = Block { metadata: NodeMetadata { @@ -552,14 +564,14 @@ impl MerkleBlob { }, node: Node { parent: Some(0), - specific: NodeSpecific::Leaf { key_value }, + specific: NodeSpecific::Leaf { key, value }, hash: *hash, index: 2, }, }; self.blob.extend(right_leaf_block.to_bytes()); - self.kv_to_index.insert( - right_leaf_block.node.key_value(), + self.key_to_index.insert( + right_leaf_block.node.key_value().0, right_leaf_block.node.index, ); @@ -569,7 +581,8 @@ impl MerkleBlob { fn insert_third_or_later( &mut self, - key_value: KvId, + key: KvId, + value: KvId, hash: &Hash, old_leaf: &Node, internal_node_hash: &Hash, @@ -584,7 +597,7 @@ impl MerkleBlob { }, node: Node { parent: Some(new_internal_node_index), - specific: NodeSpecific::Leaf { key_value }, + specific: NodeSpecific::Leaf { key, value }, hash: *hash, index: new_leaf_index, }, @@ -609,7 +622,7 @@ impl MerkleBlob { self.insert_entry_to_blob(new_internal_node_index, new_internal_block.to_bytes())?; let Some(old_parent_index) = old_leaf.parent else { - panic!("{key_value:?} {hash:?}") + panic!("{key:?} {value:?} {hash:?}") }; let mut block = Block::from_bytes( @@ -641,13 +654,13 @@ impl MerkleBlob { self.insert_entry_to_blob(old_parent_index, old_parent_block.to_bytes())?; self.mark_lineage_as_dirty(old_parent_index)?; - self.kv_to_index.insert(key_value, new_leaf_index); + self.key_to_index.insert(key, new_leaf_index); Ok(()) } - pub fn delete(&mut self, key_value: KvId) -> Result<(), String> { - let leaf_index = *self.kv_to_index.get(&key_value).unwrap(); + pub fn delete(&mut self, key: KvId) -> Result<(), String> { + let leaf_index = *self.key_to_index.get(&key).unwrap(); let leaf = self.get_node(leaf_index).unwrap(); // TODO: blech @@ -655,7 +668,7 @@ impl MerkleBlob { } else { panic!() } - self.kv_to_index.remove(&key_value); + self.key_to_index.remove(&key); let Some(parent_index) = leaf.parent else { self.free_indexes.clear(); @@ -675,8 +688,8 @@ impl MerkleBlob { self.insert_entry_to_blob(0, sibling_block.to_bytes())?; match sibling_block.node.specific { - NodeSpecific::Leaf { key_value } => { - self.kv_to_index.insert(key_value, 0); + NodeSpecific::Leaf { key, .. } => { + self.key_to_index.insert(key, 0); } NodeSpecific::Internal { left, right } => { for child_index in [left, right] { @@ -719,21 +732,20 @@ impl MerkleBlob { Ok(()) } - pub fn upsert( - &mut self, - old_key_value: KvId, - new_key_value: KvId, - new_hash: &Hash, - ) -> Result<(), String> { - let Some(leaf_index) = self.kv_to_index.get(&old_key_value) else { - self.insert(new_key_value, new_hash)?; + pub fn upsert(&mut self, key: KvId, value: KvId, new_hash: &Hash) -> Result<(), String> { + let Some(leaf_index) = self.key_to_index.get(&key) else { + self.insert(key, value, new_hash)?; return Ok(()); }; let mut block = self.get_block(*leaf_index).unwrap(); - if let NodeSpecific::Leaf { ref mut key_value } = block.node.specific { + if let NodeSpecific::Leaf { + value: ref mut inplace_value, + .. + } = block.node.specific + { block.node.hash.clone_from(new_hash); - *key_value = new_key_value; + *inplace_value = value; } else { panic!() } @@ -753,16 +765,16 @@ impl MerkleBlob { for block in self { match block.node.specific { NodeSpecific::Internal { .. } => internal_count += 1, - NodeSpecific::Leaf { key_value } => { + NodeSpecific::Leaf { key, .. } => { leaf_count += 1; - assert!(self.kv_to_index.contains_key(&key_value)); + assert!(self.key_to_index.contains_key(&key)); // TODO: consider what type free indexes should be assert!(!self.free_indexes.contains(&block.node.index)); } } } - assert_eq!(leaf_count, self.kv_to_index.len()); + assert_eq!(leaf_count, self.key_to_index.len()); assert_eq!( leaf_count + internal_count + self.free_indexes.len(), self.extend_index() as usize, @@ -819,7 +831,7 @@ impl MerkleBlob { } } - fn get_random_leaf_node_from_bytes(&self, seed_bytes: Vec) -> Result { + fn get_random_leaf_node(&self, seed_bytes: Hash) -> Result { let mut hasher = Sha256::new(); hasher.update(seed_bytes); let seed: Hash = hasher.finalize(); @@ -1015,8 +1027,8 @@ impl MerkleBlob { } match source_block.node.specific { - NodeSpecific::Leaf { key_value } => { - self.kv_to_index.insert(key_value, destination); + NodeSpecific::Leaf { key, .. } => { + self.key_to_index.insert(key, destination); } NodeSpecific::Internal { left, right, .. } => { for child in [left, right] { @@ -1089,17 +1101,17 @@ impl MerkleBlob { } #[pyo3(name = "insert")] - pub fn py_insert(&mut self, key_value: KvId, hash: Hash) -> PyResult<()> { + pub fn py_insert(&mut self, key: KvId, value: KvId, hash: Hash) -> PyResult<()> { // TODO: consider the error - self.insert(key_value, &hash).unwrap(); + self.insert(key, value, &hash).unwrap(); Ok(()) } #[pyo3(name = "delete")] - pub fn py_delete(&mut self, key_value: KvId) -> PyResult<()> { + pub fn py_delete(&mut self, key: KvId) -> PyResult<()> { // TODO: consider the error - self.delete(key_value).unwrap(); + self.delete(key).unwrap(); Ok(()) } @@ -1250,54 +1262,57 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { #[cfg(test)] mod tests { use super::*; - use hex_literal::hex; - use rstest::rstest; + // use hex_literal::hex; + use num_traits; + use rstest::{fixture, rstest}; use std::time::{Duration, Instant}; - const EXAMPLE_BLOB: [u8; 138] = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); - const HASH: Hash = [ - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 42, 43, - ]; - - const EXAMPLE_ROOT: Node = Node { - parent: None, - specific: NodeSpecific::Internal { left: 1, right: 2 }, - hash: HASH, - index: 0, - }; - const EXAMPLE_ROOT_METADATA: NodeMetadata = NodeMetadata { - node_type: NodeType::Internal, - dirty: true, - }; - const EXAMPLE_LEFT_LEAF: Node = Node { - parent: Some(0), - specific: NodeSpecific::Leaf { - key_value: 0x0405_0607_0809_0A0B, - }, - hash: HASH, - index: 1, - }; - const EXAMPLE_LEFT_LEAF_METADATA: NodeMetadata = NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - }; - const EXAMPLE_RIGHT_LEAF: Node = Node { - parent: Some(0), - specific: NodeSpecific::Leaf { - key_value: 0x1415_1617_1819_1A1B, - }, - hash: HASH, - index: 2, - }; - const EXAMPLE_RIGHT_LEAF_METADATA: NodeMetadata = NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - }; - - fn example_merkle_blob() -> MerkleBlob { - MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap() - } + // const EXAMPLE_BLOB: [u8; 138] = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); + // const HASH: Hash = [ + // 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + // 35, 36, 37, 38, 39, 40, 41, 42, 43, + // ]; + // + // const EXAMPLE_ROOT: Node = Node { + // parent: None, + // specific: NodeSpecific::Internal { left: 1, right: 2 }, + // hash: HASH, + // index: 0, + // }; + // const EXAMPLE_ROOT_METADATA: NodeMetadata = NodeMetadata { + // node_type: NodeType::Internal, + // dirty: true, + // }; + // const EXAMPLE_LEFT_LEAF: Node = Node { + // parent: Some(0), + // specific: NodeSpecific::Leaf { + // key: 0x0405_0607_0809_0A0B, + // value: 0x1415_1617_1819_1A1B, + // }, + // hash: HASH, + // index: 1, + // }; + // const EXAMPLE_LEFT_LEAF_METADATA: NodeMetadata = NodeMetadata { + // node_type: NodeType::Leaf, + // dirty: false, + // }; + // const EXAMPLE_RIGHT_LEAF: Node = Node { + // parent: Some(0), + // specific: NodeSpecific::Leaf { + // key: 0x2425_2627_2829_2A2B, + // value: 0x3435_3637_3839_3A3B, + // }, + // hash: HASH, + // index: 2, + // }; + // const EXAMPLE_RIGHT_LEAF_METADATA: NodeMetadata = NodeMetadata { + // node_type: NodeType::Leaf, + // dirty: false, + // }; + + // fn example_merkle_blob() -> MerkleBlob { + // MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap() + // } #[allow(unused)] fn normalized_blob(merkle_blob: &MerkleBlob) -> Vec { @@ -1364,18 +1379,36 @@ mod tests { assert_eq!(NodeMetadata::dirty_from_bytes(bytes).unwrap(), object.dirty); } - #[test] - fn test_load_a_python_dump() { - let merkle_blob = example_merkle_blob(); - merkle_blob.get_node(0).unwrap(); + // #[test] + // fn test_load_a_python_dump() { + // let merkle_blob = example_merkle_blob(); + // merkle_blob.get_node(0).unwrap(); + // + // merkle_blob.check(); + // } + fn hash(i: T) -> Hash { + let mut hasher = Sha256::new(); + hasher.update(i.to_be_bytes()); - merkle_blob.check(); + hasher.finalize() } - #[test] - fn test_get_lineage() { - let merkle_blob = example_merkle_blob(); - let lineage = merkle_blob.get_lineage(2).unwrap(); + #[fixture] + fn small_blob() -> MerkleBlob { + let mut blob = MerkleBlob::new(vec![]).unwrap(); + + blob.insert(0x0001_0203_0405_0607, 0x1011_1213_1415_1617, &hash(0x1020)) + .unwrap(); + + blob.insert(0x2021_2223_2425_2627, 0x3031_3233_3435_3637, &hash(0x2030)) + .unwrap(); + + blob + } + + #[rstest] + fn test_get_lineage(small_blob: MerkleBlob) { + let lineage = small_blob.get_lineage(2).unwrap(); for node in &lineage { println!("{node:?}"); } @@ -1383,66 +1416,72 @@ mod tests { let last_node = lineage.last().unwrap(); assert_eq!(last_node.parent, None); - merkle_blob.check(); - } - - #[test] - fn test_get_random_leaf_node() { - let merkle_blob = example_merkle_blob(); - let leaf = merkle_blob - .get_random_leaf_node_from_bytes(vec![0; 8]) - .unwrap(); - assert_eq!(leaf.index, 1); - - merkle_blob.check(); + small_blob.check(); } - #[test] - fn test_build_blob_and_read() { - let mut blob: Vec = Vec::new(); - - blob.extend(EXAMPLE_ROOT_METADATA.to_bytes()); - blob.extend(EXAMPLE_ROOT.to_bytes()); - blob.extend(EXAMPLE_LEFT_LEAF_METADATA.to_bytes()); - blob.extend(EXAMPLE_LEFT_LEAF.to_bytes()); - blob.extend(EXAMPLE_RIGHT_LEAF_METADATA.to_bytes()); - blob.extend(EXAMPLE_RIGHT_LEAF.to_bytes()); - - assert_eq!(blob, Vec::from(EXAMPLE_BLOB)); - - let merkle_blob = MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap(); - - assert_eq!(merkle_blob.get_node(0).unwrap(), EXAMPLE_ROOT); - assert_eq!(merkle_blob.get_node(1).unwrap(), EXAMPLE_LEFT_LEAF); - assert_eq!(merkle_blob.get_node(2).unwrap(), EXAMPLE_RIGHT_LEAF); + #[rstest] + #[case::right(0, 2)] + #[case::left(0xff, 1)] + fn test_get_random_leaf_node( + #[case] seed: u8, + #[case] index: TreeIndex, + // #[values((0, 2), (0xff, 1))] (seed, index): (u8, TreeIndex), + small_blob: MerkleBlob, + ) { + let leaf = small_blob.get_random_leaf_node([seed; 32]).unwrap(); + assert_eq!(leaf.index, index); - merkle_blob.check(); + small_blob.check(); } - #[test] - fn test_build_merkle() { - let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - - merkle_blob - .insert(EXAMPLE_LEFT_LEAF.key_value(), &EXAMPLE_LEFT_LEAF.hash) - .unwrap(); - merkle_blob - .insert(EXAMPLE_RIGHT_LEAF.key_value(), &EXAMPLE_RIGHT_LEAF.hash) - .unwrap(); - - // TODO: just hacking here to compare with the ~wrong~ simplified reference - let mut root = Block::from_bytes(merkle_blob.get_block_bytes(0).unwrap(), 0).unwrap(); - root.metadata.dirty = true; - root.node.hash = HASH; - assert_eq!(root.metadata.node_type, NodeType::Internal); - merkle_blob - .insert_entry_to_blob(0, root.to_bytes()) - .unwrap(); - - assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); + // #[test] + // fn test_build_blob_and_read() { + // let mut blob: Vec = Vec::new(); + // + // blob.extend(EXAMPLE_ROOT_METADATA.to_bytes()); + // blob.extend(EXAMPLE_ROOT.to_bytes()); + // blob.extend(EXAMPLE_LEFT_LEAF_METADATA.to_bytes()); + // blob.extend(EXAMPLE_LEFT_LEAF.to_bytes()); + // blob.extend(EXAMPLE_RIGHT_LEAF_METADATA.to_bytes()); + // blob.extend(EXAMPLE_RIGHT_LEAF.to_bytes()); + // + // assert_eq!(blob, Vec::from(EXAMPLE_BLOB)); + // + // let merkle_blob = MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap(); + // + // assert_eq!(merkle_blob.get_node(0).unwrap(), EXAMPLE_ROOT); + // assert_eq!(merkle_blob.get_node(1).unwrap(), EXAMPLE_LEFT_LEAF); + // assert_eq!(merkle_blob.get_node(2).unwrap(), EXAMPLE_RIGHT_LEAF); + // + // merkle_blob.check(); + // } - merkle_blob.check(); - } + // #[test] + // fn test_build_merkle() { + // let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + // + // let (key, value) = EXAMPLE_LEFT_LEAF.key_value(); + // merkle_blob + // .insert(key, value, &EXAMPLE_LEFT_LEAF.hash) + // .unwrap(); + // let (key, value) = EXAMPLE_RIGHT_LEAF.key_value(); + // merkle_blob + // .insert(key, value, &EXAMPLE_RIGHT_LEAF.hash) + // .unwrap(); + // + // // TODO: just hacking here to compare with the ~wrong~ simplified reference + // let mut root = Block::from_bytes(merkle_blob.get_block_bytes(0).unwrap(), 0).unwrap(); + // root.metadata.dirty = true; + // root.node.hash = HASH; + // assert_eq!(root.metadata.node_type, NodeType::Internal); + // merkle_blob + // .insert_entry_to_blob(0, root.to_bytes()) + // .unwrap(); + // + // assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); + // + // merkle_blob.check(); + // } #[test] fn test_just_insert_a_bunch() { @@ -1454,7 +1493,7 @@ mod tests { let start = Instant::now(); merkle_blob // TODO: yeah this hash is garbage - .insert(i as KvId, &HASH) + .insert(i as KvId, i as KvId, &hash(i)) .unwrap(); let end = Instant::now(); total_time += end.duration_since(start); @@ -1506,7 +1545,9 @@ mod tests { println!("inserting: {key_value_id}"); merkle_blob.calculate_lazy_hashes(); reference_blobs.push(MerkleBlob::new(merkle_blob.blob.clone()).unwrap()); - merkle_blob.insert(key_value_id, &hash).unwrap(); + merkle_blob + .insert(key_value_id, key_value_id, &hash) + .unwrap(); dots.push(merkle_blob.to_dot().dump()); } @@ -1545,11 +1586,13 @@ mod tests { let key_value_id: KvId = 1; // open_dot(&mut merkle_blob.to_dot().set_note("empty")); - merkle_blob.insert(key_value_id, &HASH).unwrap(); + merkle_blob + .insert(key_value_id, key_value_id, &hash(key_value_id)) + .unwrap(); // open_dot(&mut merkle_blob.to_dot().set_note("first after")); merkle_blob.check(); - assert_eq!(merkle_blob.kv_to_index.len(), 1); + assert_eq!(merkle_blob.key_to_index.len(), 1); } #[test] @@ -1558,13 +1601,15 @@ mod tests { let key_value_id: KvId = 1; // open_dot(&mut merkle_blob.to_dot().set_note("empty")); - merkle_blob.insert(key_value_id, &HASH).unwrap(); + merkle_blob + .insert(key_value_id, key_value_id, &hash(key_value_id)) + .unwrap(); // open_dot(&mut merkle_blob.to_dot().set_note("first after")); merkle_blob.check(); merkle_blob.delete(key_value_id).unwrap(); merkle_blob.check(); - assert_eq!(merkle_blob.kv_to_index.len(), 0); + assert_eq!(merkle_blob.key_to_index.len(), 0); } } diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index 903432cbe..87e93718a 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -385,8 +385,8 @@ def __init__( blob: bytes, ) -> None: ... - def insert(self, key_value: uint64, hash: bytes32) -> None: ... - def delete(self, key_value: uint64) -> None: ... + def insert(self, key: uint64, value: uint64, hash: bytes32) -> None: ... + def delete(self, key: uint64) -> None: ... def __len__(self) -> int: ... @final diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index c653772a6..3ba9f63e6 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -127,8 +127,8 @@ class MerkleBlob: blob: bytes, ) -> None: ... - def insert(self, key_value: uint64, hash: bytes32) -> None: ... - def delete(self, key_value: uint64) -> None: ... + def insert(self, key: uint64, value: uint64, hash: bytes32) -> None: ... + def delete(self, key: uint64) -> None: ... def __len__(self) -> int: ... @final From eda9afc2fa58787e1bd997ad1f2d028eb5acbbdc Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 2 Oct 2024 11:47:18 -0400 Subject: [PATCH 058/181] add insert location --- crates/chia-datalayer/src/lib.rs | 253 ++++++++++++++++++++++--------- 1 file changed, 180 insertions(+), 73 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 33e822eb3..a1ddf7ef2 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -91,6 +91,20 @@ fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { hasher.finalize() } +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub enum Side { + Left, + Right, +} + +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub enum InsertLocation { + Auto, + AsRoot, + Leaf { index: TreeIndex, side: Side }, +} + +// TODO: this should probably be test code? pub struct DotLines { nodes: Vec, connections: Vec, @@ -467,22 +481,53 @@ impl MerkleBlob { }) } - pub fn insert(&mut self, key: KvId, value: KvId, hash: &Hash) -> Result<(), String> { - // TODO: what about only unused providing a blob length? - if self.blob.is_empty() { - self.insert_first(key, value, hash); - } else { - let mut hasher = Sha256::new(); - hasher.update(key.to_be_bytes()); - let seed: Hash = hasher.finalize(); - // TODO: make this a parameter so we have one insert call where you specify the location - let old_leaf = self.get_random_leaf_node(seed)?; - let internal_node_hash = internal_hash(&old_leaf.hash, hash); - - if self.key_to_index.len() == 1 { - self.insert_second(key, value, hash, &old_leaf, &internal_node_hash); - } else { - self.insert_third_or_later(key, value, hash, &old_leaf, &internal_node_hash)?; + pub fn insert( + &mut self, + key: KvId, + value: KvId, + hash: &Hash, + insert_location: InsertLocation, + ) -> Result<(), String> { + let insert_location = match insert_location { + InsertLocation::Auto => self.get_random_insert_location_by_kvid(key)?, + _ => insert_location, + }; + + match insert_location { + InsertLocation::Auto => { + panic!("this should have been caught and processed above") + } + InsertLocation::AsRoot => { + // TODO: what about only unused blocks resulting in a non-empty blob? + assert!(self.blob.is_empty()); + self.insert_first(key, value, hash); + } + InsertLocation::Leaf { index, side } => { + // TODO: what about only unused blocks resulting ia blob length? + assert!(!self.blob.is_empty()); + let old_leaf = self.get_node(index)?; + match old_leaf.specific { + NodeSpecific::Leaf { .. } => {} + NodeSpecific::Internal { .. } => panic!(), + } + // let NodeSpecific::Leaf ( .. ) = old_leaf.specific else { panic!() }; + let internal_node_hash = match side { + Side::Left => internal_hash(hash, &old_leaf.hash), + Side::Right => internal_hash(&old_leaf.hash, hash), + }; + + if self.key_to_index.len() == 1 { + self.insert_second(key, value, hash, &old_leaf, &internal_node_hash, &side)?; + } else { + self.insert_third_or_later( + key, + value, + hash, + &old_leaf, + &internal_node_hash, + &side, + )?; + } } } @@ -517,8 +562,11 @@ impl MerkleBlob { hash: &Hash, old_leaf: &Node, internal_node_hash: &Hash, - ) { + side: &Side, + ) -> Result<(), String> { self.blob.clear(); + self.blob.resize(BLOCK_SIZE * 3, 0); + self.free_indexes.clear(); let new_internal_block = Block { metadata: NodeMetadata { @@ -533,50 +581,50 @@ impl MerkleBlob { }, }; - self.blob.extend(new_internal_block.to_bytes()); + self.insert_entry_to_blob(0, new_internal_block.to_bytes())?; let (old_leaf_key, old_leaf_value) = old_leaf.key_value(); - - let left_leaf_block = Block { - metadata: NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - }, - node: Node { + let nodes = [ + Node { parent: Some(0), specific: NodeSpecific::Leaf { key: old_leaf_key, value: old_leaf_value, }, hash: old_leaf.hash, - index: 1, - }, - }; - self.blob.extend(left_leaf_block.to_bytes()); - let (left_leaf_key, _) = left_leaf_block.node.key_value(); - self.key_to_index - .insert(left_leaf_key, left_leaf_block.node.index); - - let right_leaf_block = Block { - metadata: NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, + index: match side { + Side::Left => 2, + Side::Right => 1, + }, }, - node: Node { + Node { parent: Some(0), specific: NodeSpecific::Leaf { key, value }, hash: *hash, - index: 2, + index: match side { + Side::Left => 1, + Side::Right => 2, + }, }, - }; - self.blob.extend(right_leaf_block.to_bytes()); - self.key_to_index.insert( - right_leaf_block.node.key_value().0, - right_leaf_block.node.index, - ); + ]; + + for node in nodes { + let block = Block { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node, + }; + + self.insert_entry_to_blob(block.node.index, block.to_bytes())?; + self.key_to_index + .insert(block.node.key_value().0, block.node.index); + } - self.free_indexes.clear(); self.last_allocated_index = 3; + + Ok(()) } fn insert_third_or_later( @@ -586,6 +634,7 @@ impl MerkleBlob { hash: &Hash, old_leaf: &Node, internal_node_hash: &Hash, + side: &Side, ) -> Result<(), String> { let new_leaf_index = self.get_new_index(); let new_internal_node_index = self.get_new_index(); @@ -604,6 +653,10 @@ impl MerkleBlob { }; self.insert_entry_to_blob(new_leaf_index, new_leaf_block.to_bytes())?; + let (left_leaf_node, right_leaf_node) = match side { + Side::Left => (&new_leaf_block.node, old_leaf), + Side::Right => (old_leaf, &new_leaf_block.node), + }; let new_internal_block = Block { metadata: NodeMetadata { node_type: NodeType::Internal, @@ -612,8 +665,8 @@ impl MerkleBlob { node: Node { parent: old_leaf.parent, specific: NodeSpecific::Internal { - left: old_leaf.index, - right: new_leaf_index, + left: left_leaf_node.index, + right: right_leaf_node.index, }, hash: *internal_node_hash, index: new_internal_node_index, @@ -734,7 +787,7 @@ impl MerkleBlob { pub fn upsert(&mut self, key: KvId, value: KvId, new_hash: &Hash) -> Result<(), String> { let Some(leaf_index) = self.key_to_index.get(&key) else { - self.insert(key, value, new_hash)?; + self.insert(key, value, new_hash, InsertLocation::Auto)?; return Ok(()); }; @@ -831,16 +884,31 @@ impl MerkleBlob { } } - fn get_random_leaf_node(&self, seed_bytes: Hash) -> Result { - let mut hasher = Sha256::new(); - hasher.update(seed_bytes); - let seed: Hash = hasher.finalize(); + fn get_random_insert_location_by_seed( + &self, + seed_bytes: &[u8], + ) -> Result { + if self.blob.is_empty() { + return Ok(InsertLocation::AsRoot); + } + let side = if (seed_bytes.last().unwrap() & 1 << 7) == 0 { + Side::Left + } else { + Side::Right + }; let mut node = self.get_node(0)?; - for byte in seed { + + // TODO: handle deeper depths than the seed + for byte in seed_bytes { for bit in 0..8 { match node.specific { - NodeSpecific::Leaf { .. } => return Ok(node), + NodeSpecific::Leaf { .. } => { + return Ok(InsertLocation::Leaf { + index: node.index, + side, + }) + } NodeSpecific::Internal { left, right, .. } => { let next: TreeIndex = if byte & (1 << bit) != 0 { left } else { right }; node = self.get_node(next)?; @@ -852,6 +920,14 @@ impl MerkleBlob { Err("failed to find a node".to_string()) } + fn get_random_insert_location_by_kvid(&self, seed: KvId) -> Result { + let mut hasher = Sha256::new(); + hasher.update(seed.to_be_bytes()); + let seed: Hash = hasher.finalize(); + + self.get_random_insert_location_by_seed(&seed) + } + fn extend_index(&self) -> TreeIndex { assert_eq!(self.blob.len() % BLOCK_SIZE, 0); @@ -1103,7 +1179,9 @@ impl MerkleBlob { #[pyo3(name = "insert")] pub fn py_insert(&mut self, key: KvId, value: KvId, hash: Hash) -> PyResult<()> { // TODO: consider the error - self.insert(key, value, &hash).unwrap(); + // TODO: expose insert location + self.insert(key, value, &hash, InsertLocation::Auto) + .unwrap(); Ok(()) } @@ -1263,7 +1341,7 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { mod tests { use super::*; // use hex_literal::hex; - use num_traits; + // use num_traits; use rstest::{fixture, rstest}; use std::time::{Duration, Instant}; @@ -1386,7 +1464,7 @@ mod tests { // // merkle_blob.check(); // } - fn hash(i: T) -> Hash { + fn hash(i: &T) -> Hash { let mut hasher = Sha256::new(); hasher.update(i.to_be_bytes()); @@ -1397,11 +1475,21 @@ mod tests { fn small_blob() -> MerkleBlob { let mut blob = MerkleBlob::new(vec![]).unwrap(); - blob.insert(0x0001_0203_0405_0607, 0x1011_1213_1415_1617, &hash(0x1020)) - .unwrap(); + blob.insert( + 0x0001_0203_0405_0607, + 0x1011_1213_1415_1617, + &hash(&0x1020), + InsertLocation::Auto, + ) + .unwrap(); - blob.insert(0x2021_2223_2425_2627, 0x3031_3233_3435_3637, &hash(0x2030)) - .unwrap(); + blob.insert( + 0x2021_2223_2425_2627, + 0x3031_3233_3435_3637, + &hash(&0x2030), + InsertLocation::Auto, + ) + .unwrap(); blob } @@ -1420,16 +1508,25 @@ mod tests { } #[rstest] - #[case::right(0, 2)] - #[case::left(0xff, 1)] - fn test_get_random_leaf_node( + #[case::right(0, 2, Side::Left)] + #[case::left(0xff, 1, Side::Right)] + fn test_get_random_insert_location_by_seed( #[case] seed: u8, - #[case] index: TreeIndex, - // #[values((0, 2), (0xff, 1))] (seed, index): (u8, TreeIndex), + #[case] expected_index: TreeIndex, + #[case] expected_side: Side, small_blob: MerkleBlob, ) { - let leaf = small_blob.get_random_leaf_node([seed; 32]).unwrap(); - assert_eq!(leaf.index, index); + let location = small_blob + .get_random_insert_location_by_seed(&[seed; 32]) + .unwrap(); + + assert_eq!( + location, + InsertLocation::Leaf { + index: expected_index, + side: expected_side + }, + ); small_blob.check(); } @@ -1493,7 +1590,7 @@ mod tests { let start = Instant::now(); merkle_blob // TODO: yeah this hash is garbage - .insert(i as KvId, i as KvId, &hash(i)) + .insert(i, i, &hash(&i), InsertLocation::Auto) .unwrap(); let end = Instant::now(); total_time += end.duration_since(start); @@ -1546,7 +1643,7 @@ mod tests { merkle_blob.calculate_lazy_hashes(); reference_blobs.push(MerkleBlob::new(merkle_blob.blob.clone()).unwrap()); merkle_blob - .insert(key_value_id, key_value_id, &hash) + .insert(key_value_id, key_value_id, &hash, InsertLocation::Auto) .unwrap(); dots.push(merkle_blob.to_dot().dump()); } @@ -1587,7 +1684,12 @@ mod tests { let key_value_id: KvId = 1; // open_dot(&mut merkle_blob.to_dot().set_note("empty")); merkle_blob - .insert(key_value_id, key_value_id, &hash(key_value_id)) + .insert( + key_value_id, + key_value_id, + &hash(&key_value_id), + InsertLocation::Auto, + ) .unwrap(); // open_dot(&mut merkle_blob.to_dot().set_note("first after")); @@ -1602,7 +1704,12 @@ mod tests { let key_value_id: KvId = 1; // open_dot(&mut merkle_blob.to_dot().set_note("empty")); merkle_blob - .insert(key_value_id, key_value_id, &hash(key_value_id)) + .insert( + key_value_id, + key_value_id, + &hash(&key_value_id), + InsertLocation::Auto, + ) .unwrap(); // open_dot(&mut merkle_blob.to_dot().set_note("first after")); merkle_blob.check(); From 2c4dccb8bc27f213311b800ce729c487c5ed83bc Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 2 Oct 2024 14:16:45 -0400 Subject: [PATCH 059/181] test second node insertion and sides --- crates/chia-datalayer/src/lib.rs | 53 ++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index a1ddf7ef2..7b36888bc 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1697,6 +1697,59 @@ mod tests { assert_eq!(merkle_blob.key_to_index.len(), 1); } + #[rstest] + fn test_insert_second(#[values(Side::Left, Side::Right)] side: Side) { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let key_value_id: KvId = 1; + // open_dot(&mut merkle_blob.to_dot().set_note("empty")); + merkle_blob + .insert( + key_value_id, + key_value_id, + &hash(&key_value_id), + InsertLocation::Auto, + ) + .unwrap(); + // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + let key_value_id: KvId = 2; + merkle_blob + .insert( + key_value_id, + key_value_id, + &hash(&key_value_id), + InsertLocation::Leaf { + index: 0, + side: side.clone(), + }, + ) + .unwrap(); + // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + + let root = merkle_blob.get_node(0).unwrap(); + let NodeSpecific::Internal { left, right } = root.specific else { + panic!() + }; + + let NodeSpecific::Leaf { key: left_key, .. } = merkle_blob.get_node(left).unwrap().specific + else { + panic!() + }; + let NodeSpecific::Leaf { key: right_key, .. } = + merkle_blob.get_node(right).unwrap().specific + else { + panic!() + }; + + let expected_keys: [KvId; 2] = match side { + Side::Left => [2, 1], + Side::Right => [1, 2], + }; + assert_eq!([left_key, right_key], expected_keys); + + merkle_blob.check(); + } + #[test] fn test_delete_last() { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); From d0840c0690c3ad788efe25994a33b08c60ceddbd Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 2 Oct 2024 15:15:57 -0400 Subject: [PATCH 060/181] more choosing side insertion testing --- crates/chia-datalayer/src/lib.rs | 40 ++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 7b36888bc..824550e63 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1698,36 +1698,42 @@ mod tests { } #[rstest] - fn test_insert_second(#[values(Side::Left, Side::Right)] side: Side) { + fn test_insert_choosing_side( + #[values(Side::Left, Side::Right)] side: Side, + #[values(1, 2)] pre_count: usize, + ) { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - let key_value_id: KvId = 1; - // open_dot(&mut merkle_blob.to_dot().set_note("empty")); - merkle_blob - .insert( - key_value_id, - key_value_id, - &hash(&key_value_id), - InsertLocation::Auto, - ) - .unwrap(); + let mut last_key: KvId = 0; + for i in 1..=pre_count { + let key: KvId = i as KvId; + // open_dot(&mut merkle_blob.to_dot().set_note("empty")); + merkle_blob + .insert(key, key, &hash(&key), InsertLocation::Auto) + .unwrap(); + last_key = key; + } + + let key_value_id: KvId = pre_count as KvId + 1; // open_dot(&mut merkle_blob.to_dot().set_note("first after")); - let key_value_id: KvId = 2; merkle_blob .insert( key_value_id, key_value_id, &hash(&key_value_id), InsertLocation::Leaf { - index: 0, + index: merkle_blob.key_to_index[&last_key], side: side.clone(), }, ) .unwrap(); // open_dot(&mut merkle_blob.to_dot().set_note("first after")); - let root = merkle_blob.get_node(0).unwrap(); - let NodeSpecific::Internal { left, right } = root.specific else { + let sibling = merkle_blob + .get_node(merkle_blob.key_to_index[&last_key]) + .unwrap(); + let parent = merkle_blob.get_node(sibling.parent.unwrap()).unwrap(); + let NodeSpecific::Internal { left, right } = parent.specific else { panic!() }; @@ -1742,8 +1748,8 @@ mod tests { }; let expected_keys: [KvId; 2] = match side { - Side::Left => [2, 1], - Side::Right => [1, 2], + Side::Left => [pre_count as KvId + 1, pre_count as KvId], + Side::Right => [pre_count as KvId, pre_count as KvId + 1], }; assert_eq!([left_key, right_key], expected_keys); From 4d7863041d3ed48e62aab3250f1f4e5e0fa1a709 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 2 Oct 2024 15:40:19 -0400 Subject: [PATCH 061/181] fixup --- Cargo.lock | 1 + crates/chia-datalayer/Cargo.toml | 1 + crates/chia-datalayer/src/lib.rs | 6 +++++ tests/test_merkle_blob.py | 41 +++----------------------------- 4 files changed, 11 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fae875918..c7be7a3e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -376,6 +376,7 @@ version = "0.1.0" dependencies = [ "clvm-utils", "clvmr", + "hex", "hex-literal", "num-traits", "open", diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index 46ad85af2..c9e506ff0 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -23,6 +23,7 @@ pyo3 = { workspace = true, optional = true } [dev-dependencies] clvm-utils = { workspace = true } +hex = { workspace = true } hex-literal = { workspace = true } num-traits = { workspace = true } open = { workspace = true } diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 824550e63..212de4c03 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1340,6 +1340,7 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { #[cfg(test)] mod tests { use super::*; + use hex; // use hex_literal::hex; // use num_traits; use rstest::{fixture, rstest}; @@ -1778,4 +1779,9 @@ mod tests { merkle_blob.check(); assert_eq!(merkle_blob.key_to_index.len(), 0); } + + #[rstest] + fn test_dump_small_blob_bytes(small_blob: MerkleBlob) { + println!("{}", hex::encode(small_blob.blob)); + } } diff --git a/tests/test_merkle_blob.py b/tests/test_merkle_blob.py index cd5631fd0..88d5802c5 100644 --- a/tests/test_merkle_blob.py +++ b/tests/test_merkle_blob.py @@ -5,7 +5,7 @@ def test_merkle_blob(): blob = bytes.fromhex( - "0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b" + "00000e4a8b1ecee43f457bbe2b30e94ac2afc0d3a6536f891a2ced5e96ce07fe9932ffffffff000000010000000200000000000000000100d8ddfc94e7201527a6a93ee04aed8c5c122ac38af6dbf6e5f1caefba2597230d000000000001020304050607101112131415161701000f980325ebe9426fa295f3f69cc38ef8fe6ce8f3b9f083556c0f927e67e566510000000020212223242526273031323334353637" ) merkle_blob = MerkleBlob(blob) print(merkle_blob) @@ -14,42 +14,7 @@ def test_merkle_blob(): def test_just_insert_a_bunch() -> None: - HASH = bytes32( - [ - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30, - 31, - 32, - 33, - 34, - 35, - 36, - 37, - 38, - 39, - 40, - 41, - 42, - 43, - ] - ) + HASH = bytes32(range(12, 44)) import pathlib @@ -63,7 +28,7 @@ def test_just_insert_a_bunch() -> None: total_time = 0.0 for i in range(100000): start = time.monotonic() - merkle_blob.insert(uint64(i), HASH) + merkle_blob.insert(uint64(i), uint64(i), HASH) end = time.monotonic() total_time += end - start From ab7d1fef61272f8ace4a157c30762c89b45dfa09 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 2 Oct 2024 16:16:07 -0400 Subject: [PATCH 062/181] remove use hex --- crates/chia-datalayer/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 212de4c03..f82dc0934 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1340,7 +1340,6 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { #[cfg(test)] mod tests { use super::*; - use hex; // use hex_literal::hex; // use num_traits; use rstest::{fixture, rstest}; From bef6828dc6855277f75f30b10c3d6696cab3611c Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 4 Oct 2024 15:00:29 -0400 Subject: [PATCH 063/181] more testing --- crates/chia-datalayer/src/dot.rs | 48 ++++++ crates/chia-datalayer/src/lib.rs | 269 +++++++++++++++++++------------ 2 files changed, 217 insertions(+), 100 deletions(-) create mode 100644 crates/chia-datalayer/src/dot.rs diff --git a/crates/chia-datalayer/src/dot.rs b/crates/chia-datalayer/src/dot.rs new file mode 100644 index 000000000..ad636741a --- /dev/null +++ b/crates/chia-datalayer/src/dot.rs @@ -0,0 +1,48 @@ +// TODO: this should probably be test code? +pub struct DotLines { + pub nodes: Vec, + pub connections: Vec, + pub pair_boxes: Vec, + pub note: String, +} + +impl Default for DotLines { + fn default() -> Self { + Self::new() + } +} + +impl DotLines { + pub fn new() -> Self { + Self { + nodes: vec![], + connections: vec![], + pair_boxes: vec![], + note: String::new(), + } + } + + pub fn push(&mut self, mut other: DotLines) { + self.nodes.append(&mut other.nodes); + self.connections.append(&mut other.connections); + self.pair_boxes.append(&mut other.pair_boxes); + } + + pub fn dump(&mut self) -> String { + // TODO: consuming itself, secretly + let note = &self.note; + let mut result = vec![format!("# {note}"), String::new(), "digraph {".to_string()]; + result.append(&mut self.nodes); + result.append(&mut self.connections); + result.append(&mut self.pair_boxes); + result.push("}".to_string()); + + result.join("\n") + } + + pub fn set_note(&mut self, note: &str) -> &mut Self { + self.note = String::from(note); + + self + } +} diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index f82dc0934..be92cbba6 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -2,12 +2,15 @@ use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; +use dot::DotLines; use std::cmp::Ordering; use std::collections::{HashMap, VecDeque}; use std::iter::{zip, IntoIterator}; use std::mem::size_of; use std::ops::Range; +mod dot; + type TreeIndex = u32; type Parent = Option; type Hash = [u8; 32]; @@ -22,7 +25,8 @@ const fn range_by_length(start: usize, length: usize) -> Range { // common fields // TODO: better way to pick the max of key value and right range, until we move hash first const HASH_RANGE: Range = range_by_length(0, size_of::()); -const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); +// const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); +const PARENT_RANGE: Range = HASH_RANGE.end..(HASH_RANGE.end + size_of::()); // internal specific fields const LEFT_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of::()); @@ -104,55 +108,6 @@ pub enum InsertLocation { Leaf { index: TreeIndex, side: Side }, } -// TODO: this should probably be test code? -pub struct DotLines { - nodes: Vec, - connections: Vec, - pair_boxes: Vec, - note: String, -} - -impl Default for DotLines { - fn default() -> Self { - Self::new() - } -} - -impl DotLines { - pub fn new() -> Self { - Self { - nodes: vec![], - connections: vec![], - pair_boxes: vec![], - note: String::new(), - } - } - - pub fn push(&mut self, mut other: DotLines) { - self.nodes.append(&mut other.nodes); - self.connections.append(&mut other.connections); - self.pair_boxes.append(&mut other.pair_boxes); - } - - pub fn dump(&mut self) -> String { - // TODO: consuming itself, secretly - let note = &self.note; - let mut result = vec![format!("# {note}"), String::new(), "digraph {".to_string()]; - result.append(&mut self.nodes); - result.append(&mut self.connections); - result.append(&mut self.pair_boxes); - result.push("}".to_string()); - - result.join("\n") - } - - pub fn set_note(&mut self, note: &str) -> &mut Self { - self.note = String::from(note); - - self - } -} - const NULL_PARENT: TreeIndex = 0xffff_ffffu32; #[derive(Debug, PartialEq)] @@ -231,18 +186,16 @@ impl Node { Ok(Self { parent: Self::parent_from_bytes(&blob)?, index, - hash: <[u8; 32]>::try_from(&blob[HASH_RANGE]).unwrap(), + hash: blob[HASH_RANGE].try_into().unwrap(), specific: match metadata.node_type { NodeType::Internal => NodeSpecific::Internal { - left: TreeIndex::from_be_bytes(<[u8; 4]>::try_from(&blob[LEFT_RANGE]).unwrap()), - right: TreeIndex::from_be_bytes( - <[u8; 4]>::try_from(&blob[RIGHT_RANGE]).unwrap(), - ), + left: TreeIndex::from_be_bytes(blob[LEFT_RANGE].try_into().unwrap()), + right: TreeIndex::from_be_bytes(blob[RIGHT_RANGE].try_into().unwrap()), }, NodeType::Leaf => NodeSpecific::Leaf { // TODO: this try from really right? - key: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[KEY_RANGE]).unwrap()), - value: KvId::from_be_bytes(<[u8; 8]>::try_from(&blob[VALUE_RANGE]).unwrap()), + key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), + value: KvId::from_be_bytes(blob[VALUE_RANGE].try_into().unwrap()), }, }, }) @@ -250,10 +203,7 @@ impl Node { fn parent_from_bytes(blob: &[u8; DATA_SIZE]) -> Result { // TODO: a little setup here for pre-optimization to allow walking parents without processing entire nodes - let parent_integer = TreeIndex::from_be_bytes( - <[u8; 4]>::try_from(&blob[PARENT_RANGE]) - .map_err(|e| format!("data blob wrong size: {e}"))?, - ); + let parent_integer = TreeIndex::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap()); match parent_integer { NULL_PARENT => Ok(None), _ => Ok(Some(parent_integer)), @@ -369,24 +319,14 @@ impl Block { pub fn from_bytes(blob: BlockBytes, index: TreeIndex) -> Result { // TODO: handle invalid indexes? // TODO: handle overflows? - let metadata_blob: [u8; METADATA_SIZE] = blob - .get(..METADATA_SIZE) - .ok_or(format!("metadata blob out of bounds: {}", blob.len(),))? - .try_into() - .map_err(|e| format!("metadata blob wrong size: {e}"))?; - let data_blob: [u8; DATA_SIZE] = blob - .get(METADATA_SIZE..) - .ok_or("data blob out of bounds".to_string())? - .try_into() - .map_err(|e| format!("data blob wrong size: {e}"))?; - let metadata = match NodeMetadata::from_bytes(metadata_blob) { - Ok(metadata) => metadata, - Err(message) => return Err(format!("failed loading metadata: {message})")), - }; - Ok(match Node::from_bytes(&metadata, index, data_blob) { - Ok(node) => Block { metadata, node }, - Err(message) => return Err(format!("failed loading node: {message}")), - }) + let metadata_blob: [u8; METADATA_SIZE] = blob[..METADATA_SIZE].try_into().unwrap(); + let data_blob: [u8; DATA_SIZE] = blob[METADATA_SIZE..].try_into().unwrap(); + let metadata = NodeMetadata::from_bytes(metadata_blob) + .map_err(|message| format!("failed loading metadata: {message})"))?; + let node = Node::from_bytes(&metadata, index, data_blob) + .map_err(|message| format!("failed loading node: {message})"))?; + + Ok(Block { metadata, node }) } fn range(index: TreeIndex) -> Range { @@ -717,10 +657,9 @@ impl MerkleBlob { let leaf = self.get_node(leaf_index).unwrap(); // TODO: blech - if let NodeSpecific::Leaf { .. } = leaf.specific { - } else { + let NodeSpecific::Leaf { .. } = leaf.specific else { panic!() - } + }; self.key_to_index.remove(&key); let Some(parent_index) = leaf.parent else { @@ -877,6 +816,8 @@ impl MerkleBlob { fn get_new_index(&mut self) -> TreeIndex { match self.free_indexes.pop() { None => { + // TODO: should this extend...? + // TODO: should this update free indexes...? self.last_allocated_index += 1; self.last_allocated_index - 1 } @@ -980,24 +921,13 @@ impl MerkleBlob { // TODO: handle invalid indexes? // TODO: handle overflows? let block = self.get_block_bytes(index)?; - let metadata_blob: [u8; METADATA_SIZE] = block - .get(..METADATA_SIZE) - .ok_or(format!("metadata blob out of bounds: {}", block.len(),))? - .try_into() - .map_err(|e| format!("metadata blob wrong size: {e}"))?; - let data_blob: [u8; DATA_SIZE] = block - .get(METADATA_SIZE..) - .ok_or("data blob out of bounds".to_string())? - .try_into() - .map_err(|e| format!("data blob wrong size: {e}"))?; - let metadata = match NodeMetadata::from_bytes(metadata_blob) { - Ok(metadata) => metadata, - Err(message) => return Err(format!("failed loading metadata: {message})")), - }; - Ok(match Node::from_bytes(&metadata, index, data_blob) { - Ok(node) => node, - Err(message) => return Err(format!("failed loading node: {message}")), - }) + let metadata_blob: [u8; METADATA_SIZE] = block[..METADATA_SIZE].try_into().unwrap(); + let data_blob: [u8; DATA_SIZE] = block[METADATA_SIZE..].try_into().unwrap(); + let metadata = NodeMetadata::from_bytes(metadata_blob) + .map_err(|message| format!("failed loading metadata: {message})"))?; + + Node::from_bytes(&metadata, index, data_blob) + .map_err(|message| format!("failed loading node: {message}")) } pub fn get_parent_index(&self, index: TreeIndex) -> Result { @@ -1131,6 +1061,19 @@ impl MerkleBlob { Ok(()) } + + #[allow(unused)] + fn get_key_value_map(&self) -> HashMap { + let mut key_value = HashMap::new(); + for (key, index) in self.key_to_index.iter() { + let NodeSpecific::Leaf { value, .. } = self.get_node(*index).unwrap().specific else { + panic!() + }; + key_value.insert(*key, value); + } + + key_value + } } impl PartialEq for MerkleBlob { @@ -1779,8 +1722,134 @@ mod tests { assert_eq!(merkle_blob.key_to_index.len(), 0); } + #[rstest] + // TODO: does this mut allow modifying the fixture value as used by other tests? + fn test_delete_frees_index(mut small_blob: MerkleBlob) { + let key = 0x0001_0203_0405_0607; + let index = small_blob.key_to_index[&key]; + small_blob.delete(key).unwrap(); + + assert_eq!(small_blob.free_indexes, vec![index, 2]); + } + + #[rstest] + // TODO: does this mut allow modifying the fixture value as used by other tests? + fn test_get_new_index_with_free_index(mut small_blob: MerkleBlob) { + let key = 0x0001_0203_0405_0607; + let _ = small_blob.key_to_index[&key]; + small_blob.delete(key).unwrap(); + + // NOTE: both 1 and 2 are free per test_delete_frees_index + assert_eq!(small_blob.get_new_index(), 2); + } + #[rstest] fn test_dump_small_blob_bytes(small_blob: MerkleBlob) { println!("{}", hex::encode(small_blob.blob)); } + + #[test] + #[should_panic] + fn test_node_type_from_u8_invalid() { + let _ = NodeType::from_u8(2); + } + + #[test] + fn test_node_metadata_dirty_from_bytes_invalid() { + NodeMetadata::dirty_from_bytes([0, 2]).expect_err("invalid value should fail"); + } + + #[test] + #[should_panic] + fn test_node_specific_sibling_index_panics_for_leaf() { + let leaf = NodeSpecific::Leaf { key: 0, value: 0 }; + leaf.sibling_index(0); + } + + #[test] + #[should_panic] + fn test_node_specific_sibling_index_panics_for_unknown_sibling() { + let node = NodeSpecific::Internal { left: 0, right: 1 }; + node.sibling_index(2); + } + + #[rstest] + fn test_get_free_indexes(small_blob: MerkleBlob) { + let mut blob = small_blob.blob.clone(); + let expected_free_index = (blob.len() / BLOCK_SIZE) as TreeIndex; + blob.extend_from_slice(&[0; BLOCK_SIZE]); + assert_eq!(get_free_indexes(&blob).unwrap(), [expected_free_index]); + } + + #[test] + fn test_merkle_blob_new_errs_for_nonmultiple_of_block_length() { + MerkleBlob::new(vec![1]).expect_err("invalid length should fail"); + } + + #[rstest] + fn test_upsert_inserts(small_blob: MerkleBlob) { + let key = 1234; + assert!(!small_blob.key_to_index.contains_key(&key)); + let value = 5678; + + let mut insert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); + insert_blob + .insert(key, value, &hash(&key), InsertLocation::Auto) + .unwrap(); + // open_dot(&mut insert_blob.to_dot().set_note("first after")); + + let mut upsert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); + upsert_blob.upsert(key, value, &hash(&key)).unwrap(); + // open_dot(&mut upsert_blob.to_dot().set_note("first after")); + + assert_eq!(insert_blob.blob, upsert_blob.blob); + } + + #[rstest] + // TODO: does this mut allow modifying the fixture value as used by other tests? + fn test_upsert_upserts(mut small_blob: MerkleBlob) { + let before_blocks = Vec::from_iter(small_blob.iter()); + let (key, index) = small_blob.key_to_index.iter().next().unwrap(); + let node = small_blob.get_node(*index).unwrap(); + let NodeSpecific::Leaf { + key: original_key, + value: original_value, + .. + } = node.specific + else { + panic!() + }; + let new_value = original_value + 1; + + small_blob.upsert(*key, new_value, &node.hash).unwrap(); + + let after_blocks = Vec::from_iter(small_blob.iter()); + + assert_eq!(before_blocks.len(), after_blocks.len()); + for (before, after) in zip(before_blocks, after_blocks) { + assert_eq!(before.node.parent, after.node.parent); + assert_eq!(before.node.index, after.node.index); + let NodeSpecific::Leaf { + key: before_key, + value: before_value, + } = before.node.specific + else { + assert_eq!(before.node.specific, after.node.specific); + continue; + }; + let NodeSpecific::Leaf { + key: after_key, + value: after_value, + } = after.node.specific + else { + panic!() + }; + assert_eq!(before_key, after_key); + if before_key == original_key { + assert_eq!(after_value, new_value); + } else { + assert_eq!(before_value, after_value); + } + } + } } From 544cf0785b578c49848fbcda8ed5f65fb48ab022 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 4 Oct 2024 15:12:19 -0400 Subject: [PATCH 064/181] clippy --- crates/chia-datalayer/src/lib.rs | 34 ++++++++++++++------------------ 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index be92cbba6..2d5e2fbdd 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -161,13 +161,13 @@ pub enum NodeSpecific { impl NodeSpecific { pub fn sibling_index(&self, index: TreeIndex) -> TreeIndex { let NodeSpecific::Internal { right, left } = self else { - panic!() + panic!("unable to get sibling index from a leaf") }; match index { x if (x == *right) => *left, x if (x == *left) => *right, - _ => panic!(), + _ => panic!("index not a child: {index}"), } } } @@ -184,7 +184,7 @@ impl Node { ) -> Result { // TODO: add Err results Ok(Self { - parent: Self::parent_from_bytes(&blob)?, + parent: Self::parent_from_bytes(&blob), index, hash: blob[HASH_RANGE].try_into().unwrap(), specific: match metadata.node_type { @@ -201,12 +201,12 @@ impl Node { }) } - fn parent_from_bytes(blob: &[u8; DATA_SIZE]) -> Result { + fn parent_from_bytes(blob: &[u8; DATA_SIZE]) -> Parent { // TODO: a little setup here for pre-optimization to allow walking parents without processing entire nodes let parent_integer = TreeIndex::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap()); match parent_integer { - NULL_PARENT => Ok(None), - _ => Ok(Some(parent_integer)), + NULL_PARENT => None, + _ => Some(parent_integer), } } pub fn to_bytes(&self) -> [u8; DATA_SIZE] { @@ -930,14 +930,10 @@ impl MerkleBlob { .map_err(|message| format!("failed loading node: {message}")) } - pub fn get_parent_index(&self, index: TreeIndex) -> Result { + pub fn get_parent_index(&self, index: TreeIndex) -> Parent { let block = self.get_block_bytes(index).unwrap(); - Node::parent_from_bytes( - block[METADATA_SIZE..] - .try_into() - .map_err(|e| format!("data blob wrong size: {e}"))?, - ) + Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap()) } pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { @@ -963,7 +959,7 @@ impl MerkleBlob { while let Some(this_index) = next_index { lineage.push(this_index); let block = self.get_block_bytes(this_index)?; - next_index = Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap())?; + next_index = Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap()); } Ok(lineage) @@ -1065,7 +1061,7 @@ impl MerkleBlob { #[allow(unused)] fn get_key_value_map(&self) -> HashMap { let mut key_value = HashMap::new(); - for (key, index) in self.key_to_index.iter() { + for (key, index) in &self.key_to_index { let NodeSpecific::Leaf { value, .. } = self.get_node(*index).unwrap().specific else { panic!() }; @@ -1749,7 +1745,7 @@ mod tests { } #[test] - #[should_panic] + #[should_panic(expected = "unknown NodeType value: 2")] fn test_node_type_from_u8_invalid() { let _ = NodeType::from_u8(2); } @@ -1760,14 +1756,14 @@ mod tests { } #[test] - #[should_panic] + #[should_panic(expected = "unable to get sibling index from a leaf")] fn test_node_specific_sibling_index_panics_for_leaf() { let leaf = NodeSpecific::Leaf { key: 0, value: 0 }; leaf.sibling_index(0); } #[test] - #[should_panic] + #[should_panic(expected = "index not a child: 2")] fn test_node_specific_sibling_index_panics_for_unknown_sibling() { let node = NodeSpecific::Internal { left: 0, right: 1 }; node.sibling_index(2); @@ -1808,7 +1804,7 @@ mod tests { #[rstest] // TODO: does this mut allow modifying the fixture value as used by other tests? fn test_upsert_upserts(mut small_blob: MerkleBlob) { - let before_blocks = Vec::from_iter(small_blob.iter()); + let before_blocks = small_blob.iter().collect::>(); let (key, index) = small_blob.key_to_index.iter().next().unwrap(); let node = small_blob.get_node(*index).unwrap(); let NodeSpecific::Leaf { @@ -1823,7 +1819,7 @@ mod tests { small_blob.upsert(*key, new_value, &node.hash).unwrap(); - let after_blocks = Vec::from_iter(small_blob.iter()); + let after_blocks = small_blob.iter().collect::>(); assert_eq!(before_blocks.len(), after_blocks.len()); for (before, after) in zip(before_blocks, after_blocks) { From 49b6cacdc954dbbc2c96fa5f74880ee754548942 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Sat, 5 Oct 2024 11:22:56 -0400 Subject: [PATCH 065/181] handle unlimited depths for getting random leaf locations --- crates/chia-datalayer/src/lib.rs | 35 ++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 2d5e2fbdd..cd26abac9 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -829,6 +829,8 @@ impl MerkleBlob { &self, seed_bytes: &[u8], ) -> Result { + let mut seed_bytes = Vec::from(seed_bytes); + if self.blob.is_empty() { return Ok(InsertLocation::AsRoot); } @@ -840,25 +842,28 @@ impl MerkleBlob { }; let mut node = self.get_node(0)?; - // TODO: handle deeper depths than the seed - for byte in seed_bytes { - for bit in 0..8 { - match node.specific { - NodeSpecific::Leaf { .. } => { - return Ok(InsertLocation::Leaf { - index: node.index, - side, - }) - } - NodeSpecific::Internal { left, right, .. } => { - let next: TreeIndex = if byte & (1 << bit) != 0 { left } else { right }; - node = self.get_node(next)?; + loop { + for byte in &seed_bytes { + for bit in 0..8 { + match node.specific { + NodeSpecific::Leaf { .. } => { + return Ok(InsertLocation::Leaf { + index: node.index, + side, + }) + } + NodeSpecific::Internal { left, right, .. } => { + let next: TreeIndex = if byte & (1 << bit) != 0 { left } else { right }; + node = self.get_node(next)?; + } } } } - } - Err("failed to find a node".to_string()) + let mut hasher = Sha256::new(); + hasher.update(seed_bytes); + seed_bytes = hasher.finalize().into(); + } } fn get_random_insert_location_by_kvid(&self, seed: KvId) -> Result { From a4408c73371c007c8cd4c6ee2d7fcae439730d81 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Sat, 5 Oct 2024 11:59:53 -0400 Subject: [PATCH 066/181] sha256 helpers --- crates/chia-datalayer/Cargo.toml | 2 +- crates/chia-datalayer/src/lib.rs | 52 +++++++++++++++++--------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index c9e506ff0..0b38d6569 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -19,13 +19,13 @@ crate-type = ["rlib"] [dependencies] clvmr = { workspace = true } +num-traits = { workspace = true } pyo3 = { workspace = true, optional = true } [dev-dependencies] clvm-utils = { workspace = true } hex = { workspace = true } hex-literal = { workspace = true } -num-traits = { workspace = true } open = { workspace = true } percent-encoding = { workspace = true } rstest = { workspace = true } diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index cd26abac9..f4714d50b 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -86,6 +86,21 @@ impl NodeType { // } // } +#[allow(clippy::needless_pass_by_value)] +fn sha256_num(input: T) -> Hash { + let mut hasher = Sha256::new(); + hasher.update(input.to_be_bytes()); + + hasher.finalize() +} + +fn sha256_bytes(input: &[u8]) -> Hash { + let mut hasher = Sha256::new(); + hasher.update(input); + + hasher.finalize() +} + fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { let mut hasher = Sha256::new(); hasher.update(b"\x02"); @@ -860,16 +875,12 @@ impl MerkleBlob { } } - let mut hasher = Sha256::new(); - hasher.update(seed_bytes); - seed_bytes = hasher.finalize().into(); + seed_bytes = sha256_bytes(&seed_bytes).into(); } } fn get_random_insert_location_by_kvid(&self, seed: KvId) -> Result { - let mut hasher = Sha256::new(); - hasher.update(seed.to_be_bytes()); - let seed: Hash = hasher.finalize(); + let seed = sha256_num(seed); self.get_random_insert_location_by_seed(&seed) } @@ -1285,7 +1296,6 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { mod tests { use super::*; // use hex_literal::hex; - // use num_traits; use rstest::{fixture, rstest}; use std::time::{Duration, Instant}; @@ -1408,12 +1418,6 @@ mod tests { // // merkle_blob.check(); // } - fn hash(i: &T) -> Hash { - let mut hasher = Sha256::new(); - hasher.update(i.to_be_bytes()); - - hasher.finalize() - } #[fixture] fn small_blob() -> MerkleBlob { @@ -1422,7 +1426,7 @@ mod tests { blob.insert( 0x0001_0203_0405_0607, 0x1011_1213_1415_1617, - &hash(&0x1020), + &sha256_num(0x1020), InsertLocation::Auto, ) .unwrap(); @@ -1430,7 +1434,7 @@ mod tests { blob.insert( 0x2021_2223_2425_2627, 0x3031_3233_3435_3637, - &hash(&0x2030), + &sha256_num(0x2030), InsertLocation::Auto, ) .unwrap(); @@ -1534,7 +1538,7 @@ mod tests { let start = Instant::now(); merkle_blob // TODO: yeah this hash is garbage - .insert(i, i, &hash(&i), InsertLocation::Auto) + .insert(i, i, &sha256_num(i), InsertLocation::Auto) .unwrap(); let end = Instant::now(); total_time += end.duration_since(start); @@ -1579,9 +1583,7 @@ mod tests { let key_value_ids: [KvId; COUNT] = core::array::from_fn(|i| i as KvId); for key_value_id in key_value_ids { - let mut hasher = Sha256::new(); - hasher.update(key_value_id.to_be_bytes()); - let hash: Hash = hasher.finalize(); + let hash: Hash = sha256_num(key_value_id); println!("inserting: {key_value_id}"); merkle_blob.calculate_lazy_hashes(); @@ -1631,7 +1633,7 @@ mod tests { .insert( key_value_id, key_value_id, - &hash(&key_value_id), + &sha256_num(key_value_id), InsertLocation::Auto, ) .unwrap(); @@ -1653,7 +1655,7 @@ mod tests { let key: KvId = i as KvId; // open_dot(&mut merkle_blob.to_dot().set_note("empty")); merkle_blob - .insert(key, key, &hash(&key), InsertLocation::Auto) + .insert(key, key, &sha256_num(key), InsertLocation::Auto) .unwrap(); last_key = key; } @@ -1664,7 +1666,7 @@ mod tests { .insert( key_value_id, key_value_id, - &hash(&key_value_id), + &sha256_num(key_value_id), InsertLocation::Leaf { index: merkle_blob.key_to_index[&last_key], side: side.clone(), @@ -1710,7 +1712,7 @@ mod tests { .insert( key_value_id, key_value_id, - &hash(&key_value_id), + &sha256_num(key_value_id), InsertLocation::Auto, ) .unwrap(); @@ -1795,12 +1797,12 @@ mod tests { let mut insert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); insert_blob - .insert(key, value, &hash(&key), InsertLocation::Auto) + .insert(key, value, &sha256_num(key), InsertLocation::Auto) .unwrap(); // open_dot(&mut insert_blob.to_dot().set_note("first after")); let mut upsert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); - upsert_blob.upsert(key, value, &hash(&key)).unwrap(); + upsert_blob.upsert(key, value, &sha256_num(key)).unwrap(); // open_dot(&mut upsert_blob.to_dot().set_note("first after")); assert_eq!(insert_blob.blob, upsert_blob.blob); From 37e8340725a25db5e8022403c2c2b232c0c377e8 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 7 Oct 2024 08:17:34 -0400 Subject: [PATCH 067/181] more names --- crates/chia-datalayer/src/lib.rs | 34 ++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index f4714d50b..781bb286f 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -41,6 +41,10 @@ const METADATA_SIZE: usize = 2; const DATA_SIZE: usize = VALUE_RANGE.end; const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; type BlockBytes = [u8; BLOCK_SIZE]; +type MetadataBytes = [u8; METADATA_SIZE]; +type DataBytes = [u8; DATA_SIZE]; +const METADATA_RANGE: Range = 0..METADATA_SIZE; +const DATA_RANGE: Range = METADATA_SIZE..DATA_SIZE; #[derive(Clone, Debug, Hash, Eq, PartialEq)] #[repr(u8)] @@ -132,7 +136,7 @@ pub struct NodeMetadata { } impl NodeMetadata { - pub fn from_bytes(blob: [u8; METADATA_SIZE]) -> Result { + pub fn from_bytes(blob: MetadataBytes) -> Result { // TODO: could save 1-2% of tree space by packing (and maybe don't do that) // TODO: identify some useful structured serialization tooling we use Ok(Self { @@ -141,15 +145,15 @@ impl NodeMetadata { }) } - pub fn to_bytes(&self) -> [u8; METADATA_SIZE] { + pub fn to_bytes(&self) -> MetadataBytes { [self.node_type.to_u8(), u8::from(self.dirty)] } - pub fn node_type_from_bytes(blob: [u8; METADATA_SIZE]) -> Result { + pub fn node_type_from_bytes(blob: MetadataBytes) -> Result { NodeType::from_u8(blob[0]) } - pub fn dirty_from_bytes(blob: [u8; METADATA_SIZE]) -> Result { + pub fn dirty_from_bytes(blob: MetadataBytes) -> Result { match blob[1] { 0 => Ok(false), 1 => Ok(true), @@ -195,7 +199,7 @@ impl Node { pub fn from_bytes( metadata: &NodeMetadata, index: TreeIndex, - blob: [u8; DATA_SIZE], + blob: DataBytes, ) -> Result { // TODO: add Err results Ok(Self { @@ -216,7 +220,7 @@ impl Node { }) } - fn parent_from_bytes(blob: &[u8; DATA_SIZE]) -> Parent { + fn parent_from_bytes(blob: &DataBytes) -> Parent { // TODO: a little setup here for pre-optimization to allow walking parents without processing entire nodes let parent_integer = TreeIndex::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap()); match parent_integer { @@ -224,7 +228,7 @@ impl Node { _ => Some(parent_integer), } } - pub fn to_bytes(&self) -> [u8; DATA_SIZE] { + pub fn to_bytes(&self) -> DataBytes { let mut blob: Vec = Vec::new(); match self { Node { @@ -325,8 +329,8 @@ pub struct Block { impl Block { pub fn to_bytes(&self) -> BlockBytes { let mut blob: BlockBytes = [0; BLOCK_SIZE]; - blob[..METADATA_SIZE].copy_from_slice(&self.metadata.to_bytes()); - blob[METADATA_SIZE..].copy_from_slice(&self.node.to_bytes()); + blob[METADATA_RANGE].copy_from_slice(&self.metadata.to_bytes()); + blob[DATA_RANGE].copy_from_slice(&self.node.to_bytes()); blob } @@ -334,8 +338,8 @@ impl Block { pub fn from_bytes(blob: BlockBytes, index: TreeIndex) -> Result { // TODO: handle invalid indexes? // TODO: handle overflows? - let metadata_blob: [u8; METADATA_SIZE] = blob[..METADATA_SIZE].try_into().unwrap(); - let data_blob: [u8; DATA_SIZE] = blob[METADATA_SIZE..].try_into().unwrap(); + let metadata_blob: MetadataBytes = blob[METADATA_RANGE].try_into().unwrap(); + let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); let metadata = NodeMetadata::from_bytes(metadata_blob) .map_err(|message| format!("failed loading metadata: {message})"))?; let node = Node::from_bytes(&metadata, index, data_blob) @@ -937,8 +941,8 @@ impl MerkleBlob { // TODO: handle invalid indexes? // TODO: handle overflows? let block = self.get_block_bytes(index)?; - let metadata_blob: [u8; METADATA_SIZE] = block[..METADATA_SIZE].try_into().unwrap(); - let data_blob: [u8; DATA_SIZE] = block[METADATA_SIZE..].try_into().unwrap(); + let metadata_blob: MetadataBytes = block[METADATA_RANGE].try_into().unwrap(); + let data_blob: DataBytes = block[DATA_RANGE].try_into().unwrap(); let metadata = NodeMetadata::from_bytes(metadata_blob) .map_err(|message| format!("failed loading metadata: {message})"))?; @@ -949,7 +953,7 @@ impl MerkleBlob { pub fn get_parent_index(&self, index: TreeIndex) -> Parent { let block = self.get_block_bytes(index).unwrap(); - Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap()) + Node::parent_from_bytes(block[DATA_RANGE].try_into().unwrap()) } pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { @@ -975,7 +979,7 @@ impl MerkleBlob { while let Some(this_index) = next_index { lineage.push(this_index); let block = self.get_block_bytes(this_index)?; - next_index = Node::parent_from_bytes(block[METADATA_SIZE..].try_into().unwrap()); + next_index = Node::parent_from_bytes(block[DATA_RANGE].try_into().unwrap()); } Ok(lineage) From 52c8edc263831c282a89b468eaa99a37cb26274d Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 7 Oct 2024 08:38:36 -0400 Subject: [PATCH 068/181] fixup --- crates/chia-datalayer/src/lib.rs | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 781bb286f..160f8b3a8 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -24,6 +24,9 @@ const fn range_by_length(start: usize, length: usize) -> Range { // TODO: consider in more detail other serialization tools such as serde and streamable // common fields // TODO: better way to pick the max of key value and right range, until we move hash first +// TODO: clearly shouldn't be hard coded +const METADATA_SIZE: usize = 2; +const METADATA_RANGE: Range = 0..METADATA_SIZE; const HASH_RANGE: Range = range_by_length(0, size_of::()); // const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); const PARENT_RANGE: Range = HASH_RANGE.end..(HASH_RANGE.end + size_of::()); @@ -34,8 +37,6 @@ const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of:: = range_by_length(PARENT_RANGE.end, size_of::()); const VALUE_RANGE: Range = range_by_length(KEY_RANGE.end, size_of::()); -// TODO: clearly shouldn't be hard coded -const METADATA_SIZE: usize = 2; // TODO: clearly shouldn't be hard coded // TODO: max of RIGHT_RANGE.end and VALUE_RANGE.end const DATA_SIZE: usize = VALUE_RANGE.end; @@ -43,8 +44,11 @@ const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; type BlockBytes = [u8; BLOCK_SIZE]; type MetadataBytes = [u8; METADATA_SIZE]; type DataBytes = [u8; DATA_SIZE]; -const METADATA_RANGE: Range = 0..METADATA_SIZE; -const DATA_RANGE: Range = METADATA_SIZE..DATA_SIZE; +const DATA_RANGE: Range = METADATA_SIZE..METADATA_SIZE + DATA_SIZE; +const INTERNAL_PADDING_RANGE: Range = RIGHT_RANGE.end..DATA_SIZE; +const INTERNAL_PADDING_SIZE: usize = INTERNAL_PADDING_RANGE.end - INTERNAL_PADDING_RANGE.start; +const LEAF_PADDING_RANGE: Range = VALUE_RANGE.end..DATA_SIZE; +const LEAF_PADDING_SIZE: usize = LEAF_PADDING_RANGE.end - LEAF_PADDING_RANGE.start; #[derive(Clone, Debug, Hash, Eq, PartialEq)] #[repr(u8)] @@ -247,7 +251,7 @@ impl Node { blob.extend(left.to_be_bytes()); blob.extend(right.to_be_bytes()); // TODO: not-yucky padding - blob.extend([0; DATA_SIZE - RIGHT_RANGE.end]); + blob.extend([0; INTERNAL_PADDING_SIZE]); } Node { parent, @@ -265,7 +269,7 @@ impl Node { blob.extend(key.to_be_bytes()); blob.extend(value.to_be_bytes()); // TODO: not-yucky padding - blob.extend([0; DATA_SIZE - VALUE_RANGE.end]); + blob.extend([0; LEAF_PADDING_SIZE]); } } @@ -349,16 +353,8 @@ impl Block { } fn range(index: TreeIndex) -> Range { - let metadata_start = index as usize * BLOCK_SIZE; - let data_start = metadata_start + METADATA_SIZE; - let end = data_start + DATA_SIZE; - - // let range = metadata_start..end; - // // checking range validity - // self.blob.get(range.clone()).unwrap(); - // - // range - metadata_start..end + let block_start = index as usize * BLOCK_SIZE; + block_start..block_start + BLOCK_SIZE } } From 0b8b14d7de70e934524587ccb2d8d78ecbd77559 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 7 Oct 2024 10:39:14 -0400 Subject: [PATCH 069/181] tidy --- crates/chia-datalayer/src/lib.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 160f8b3a8..ad6199517 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -458,14 +458,15 @@ impl MerkleBlob { self.insert_first(key, value, hash); } InsertLocation::Leaf { index, side } => { - // TODO: what about only unused blocks resulting ia blob length? - assert!(!self.blob.is_empty()); let old_leaf = self.get_node(index)?; - match old_leaf.specific { - NodeSpecific::Leaf { .. } => {} - NodeSpecific::Internal { .. } => panic!(), - } - // let NodeSpecific::Leaf ( .. ) = old_leaf.specific else { panic!() }; + let NodeSpecific::Leaf { + key: old_leaf_key, .. + } = old_leaf.specific + else { + panic!() + }; + assert_eq!(self.key_to_index[&old_leaf_key], index); + let internal_node_hash = match side { Side::Left => internal_hash(hash, &old_leaf.hash), Side::Right => internal_hash(&old_leaf.hash, hash), From 9b3fb80bb1a44444eae18ba267df2f9a10ef38b2 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 7 Oct 2024 10:50:31 -0400 Subject: [PATCH 070/181] remove `Node.index` field --- crates/chia-datalayer/src/lib.rs | 151 +++++++++++++++---------------- 1 file changed, 71 insertions(+), 80 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index ad6199517..8cc1bac65 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -171,8 +171,6 @@ pub struct Node { parent: Parent, hash: Hash, specific: NodeSpecific, - // TODO: kinda feels questionable having it be aware of its own location - index: TreeIndex, } #[derive(Debug, PartialEq)] @@ -200,15 +198,10 @@ impl Node { // unsafe { *(self as *const Self as *const u8) } // } - pub fn from_bytes( - metadata: &NodeMetadata, - index: TreeIndex, - blob: DataBytes, - ) -> Result { + pub fn from_bytes(metadata: &NodeMetadata, blob: DataBytes) -> Result { // TODO: add Err results Ok(Self { parent: Self::parent_from_bytes(&blob), - index, hash: blob[HASH_RANGE].try_into().unwrap(), specific: match metadata.node_type { NodeType::Internal => NodeSpecific::Internal { @@ -239,7 +232,6 @@ impl Node { parent, specific: NodeSpecific::Internal { left, right }, hash, - index: _, } => { let parent_integer = match parent { None => NULL_PARENT, @@ -257,7 +249,6 @@ impl Node { parent, specific: NodeSpecific::Leaf { key, value }, hash, - index: _, } => { let parent_integer = match parent { None => NULL_PARENT, @@ -285,8 +276,7 @@ impl Node { (key, value) } - pub fn to_dot(&self) -> DotLines { - let index = self.index; + pub fn to_dot(&self, index: TreeIndex) -> DotLines { match self.specific { NodeSpecific::Internal {left, right} => DotLines{ nodes: vec![ @@ -339,14 +329,14 @@ impl Block { blob } - pub fn from_bytes(blob: BlockBytes, index: TreeIndex) -> Result { + pub fn from_bytes(blob: BlockBytes) -> Result { // TODO: handle invalid indexes? // TODO: handle overflows? let metadata_blob: MetadataBytes = blob[METADATA_RANGE].try_into().unwrap(); let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); let metadata = NodeMetadata::from_bytes(metadata_blob) .map_err(|message| format!("failed loading metadata: {message})"))?; - let node = Node::from_bytes(&metadata, index, data_blob) + let node = Node::from_bytes(&metadata, data_blob) .map_err(|message| format!("failed loading node: {message})"))?; Ok(Block { metadata, node }) @@ -369,8 +359,8 @@ fn get_free_indexes(blob: &[u8]) -> Result, String> { let mut seen_indexes: Vec = vec![false; index_count]; - for block in MerkleBlobLeftChildFirstIterator::new(blob) { - seen_indexes[block.node.index as usize] = true; + for (index, _) in MerkleBlobLeftChildFirstIterator::new(blob) { + seen_indexes[index as usize] = true; } let mut free_indexes: Vec = vec![]; @@ -394,9 +384,9 @@ fn get_keys_values_indexes(blob: &[u8]) -> Result, Stri return Ok(key_to_index); } - for block in MerkleBlobLeftChildFirstIterator::new(blob) { + for (index, block) in MerkleBlobLeftChildFirstIterator::new(blob) { if let NodeSpecific::Leaf { key, .. } = block.node.specific { - key_to_index.insert(key, block.node.index); + key_to_index.insert(key, index); } } @@ -480,6 +470,7 @@ impl MerkleBlob { value, hash, &old_leaf, + index, &internal_node_hash, &side, )?; @@ -500,7 +491,6 @@ impl MerkleBlob { parent: None, specific: NodeSpecific::Leaf { key, value }, hash: *hash, - index: 0, }, }; @@ -533,7 +523,6 @@ impl MerkleBlob { parent: None, specific: NodeSpecific::Internal { left: 1, right: 2 }, hash: *internal_node_hash, - index: 0, }, }; @@ -541,30 +530,34 @@ impl MerkleBlob { let (old_leaf_key, old_leaf_value) = old_leaf.key_value(); let nodes = [ - Node { - parent: Some(0), - specific: NodeSpecific::Leaf { - key: old_leaf_key, - value: old_leaf_value, - }, - hash: old_leaf.hash, - index: match side { + ( + match side { Side::Left => 2, Side::Right => 1, }, - }, - Node { - parent: Some(0), - specific: NodeSpecific::Leaf { key, value }, - hash: *hash, - index: match side { + Node { + parent: Some(0), + specific: NodeSpecific::Leaf { + key: old_leaf_key, + value: old_leaf_value, + }, + hash: old_leaf.hash, + }, + ), + ( + match side { Side::Left => 1, Side::Right => 2, }, - }, + Node { + parent: Some(0), + specific: NodeSpecific::Leaf { key, value }, + hash: *hash, + }, + ), ]; - for node in nodes { + for (index, node) in nodes { let block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, @@ -573,9 +566,8 @@ impl MerkleBlob { node, }; - self.insert_entry_to_blob(block.node.index, block.to_bytes())?; - self.key_to_index - .insert(block.node.key_value().0, block.node.index); + self.insert_entry_to_blob(index, block.to_bytes())?; + self.key_to_index.insert(block.node.key_value().0, index); } self.last_allocated_index = 3; @@ -583,12 +575,15 @@ impl MerkleBlob { Ok(()) } + // TODO: no really, actually consider the too many arguments complaint + #[allow(clippy::too_many_arguments)] fn insert_third_or_later( &mut self, key: KvId, value: KvId, hash: &Hash, old_leaf: &Node, + old_leaf_index: TreeIndex, internal_node_hash: &Hash, side: &Side, ) -> Result<(), String> { @@ -604,14 +599,13 @@ impl MerkleBlob { parent: Some(new_internal_node_index), specific: NodeSpecific::Leaf { key, value }, hash: *hash, - index: new_leaf_index, }, }; self.insert_entry_to_blob(new_leaf_index, new_leaf_block.to_bytes())?; - let (left_leaf_node, right_leaf_node) = match side { - Side::Left => (&new_leaf_block.node, old_leaf), - Side::Right => (old_leaf, &new_leaf_block.node), + let (left_index, right_index) = match side { + Side::Left => (new_leaf_index, old_leaf_index), + Side::Right => (old_leaf_index, new_leaf_index), }; let new_internal_block = Block { metadata: NodeMetadata { @@ -621,11 +615,10 @@ impl MerkleBlob { node: Node { parent: old_leaf.parent, specific: NodeSpecific::Internal { - left: left_leaf_node.index, - right: right_leaf_node.index, + left: left_index, + right: right_index, }, hash: *internal_node_hash, - index: new_internal_node_index, }, }; self.insert_entry_to_blob(new_internal_node_index, new_internal_block.to_bytes())?; @@ -634,24 +627,20 @@ impl MerkleBlob { panic!("{key:?} {value:?} {hash:?}") }; - let mut block = Block::from_bytes( - self.get_block_bytes(old_leaf.index)?, - new_internal_node_index, - )?; + let mut block = Block::from_bytes(self.get_block_bytes(old_leaf_index)?)?; block.node.parent = Some(new_internal_node_index); - self.insert_entry_to_blob(old_leaf.index, block.to_bytes())?; + self.insert_entry_to_blob(old_leaf_index, block.to_bytes())?; - let mut old_parent_block = - Block::from_bytes(self.get_block_bytes(old_parent_index)?, old_parent_index)?; + let mut old_parent_block = Block::from_bytes(self.get_block_bytes(old_parent_index)?)?; if let NodeSpecific::Internal { ref mut left, ref mut right, .. } = old_parent_block.node.specific { - if old_leaf.index == *left { + if old_leaf_index == *left { *left = new_internal_node_index; - } else if old_leaf.index == *right { + } else if old_leaf_index == *right { *right = new_internal_node_index; } else { panic!(); @@ -770,14 +759,14 @@ impl MerkleBlob { let mut leaf_count: usize = 0; let mut internal_count: usize = 0; - for block in self { + for (index, block) in self { match block.node.specific { NodeSpecific::Internal { .. } => internal_count += 1, NodeSpecific::Leaf { key, .. } => { leaf_count += 1; assert!(self.key_to_index.contains_key(&key)); // TODO: consider what type free indexes should be - assert!(!self.free_indexes.contains(&block.node.index)); + assert!(!self.free_indexes.contains(&index)); } } } @@ -815,7 +804,7 @@ impl MerkleBlob { let mut next_index = Some(index); while let Some(this_index) = next_index { - let mut block = Block::from_bytes(self.get_block_bytes(this_index)?, this_index)?; + let mut block = Block::from_bytes(self.get_block_bytes(this_index)?)?; if block.metadata.dirty { return Ok(()); @@ -856,7 +845,8 @@ impl MerkleBlob { } else { Side::Right }; - let mut node = self.get_node(0)?; + let mut next_index: TreeIndex = 0; + let mut node = self.get_node(next_index)?; loop { for byte in &seed_bytes { @@ -864,13 +854,13 @@ impl MerkleBlob { match node.specific { NodeSpecific::Leaf { .. } => { return Ok(InsertLocation::Leaf { - index: node.index, + index: next_index, side, }) } NodeSpecific::Internal { left, right, .. } => { - let next: TreeIndex = if byte & (1 << bit) != 0 { left } else { right }; - node = self.get_node(next)?; + next_index = if byte & (1 << bit) != 0 { left } else { right }; + node = self.get_node(next_index)?; } } } @@ -910,7 +900,7 @@ impl MerkleBlob { } fn get_block(&self, index: TreeIndex) -> Result { - Block::from_bytes(self.get_block_bytes(index)?, index) + Block::from_bytes(self.get_block_bytes(index)?) } // fn get_block_slice(&self, index: TreeIndex) -> Result<&mut BlockBytes, String> { @@ -943,7 +933,7 @@ impl MerkleBlob { let metadata = NodeMetadata::from_bytes(metadata_blob) .map_err(|message| format!("failed loading metadata: {message})"))?; - Node::from_bytes(&metadata, index, data_blob) + Node::from_bytes(&metadata, data_blob) .map_err(|message| format!("failed loading node: {message}")) } @@ -984,8 +974,8 @@ impl MerkleBlob { pub fn to_dot(&self) -> DotLines { let mut result = DotLines::new(); - for block in self { - result.push(block.node.to_dot()); + for (index, block) in self { + result.push(block.node.to_dot(index)); } result @@ -998,9 +988,9 @@ impl MerkleBlob { pub fn calculate_lazy_hashes(&mut self) { // TODO: really want a truncated traversal, not filter // TODO: yeah, storing the whole set of blocks via collect is not great - for mut block in self + for (index, mut block) in self .iter() - .filter(|block| block.metadata.dirty) + .filter(|(_, block)| block.metadata.dirty) .collect::>() { let NodeSpecific::Internal { left, right } = block.node.specific else { @@ -1013,8 +1003,7 @@ impl MerkleBlob { // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' block.node.hash = internal_hash(&left.node.hash, &right.node.hash); block.metadata.dirty = false; - self.insert_entry_to_blob(block.node.index, block.to_bytes()) - .unwrap(); + self.insert_entry_to_blob(index, block.to_bytes()).unwrap(); } } @@ -1091,9 +1080,11 @@ impl MerkleBlob { impl PartialEq for MerkleBlob { fn eq(&self, other: &Self) -> bool { - for (self_block, other_block) in zip(self, other) { + // TODO: should we check the indexes? + for ((_, self_block), (_, other_block)) in zip(self, other) { if (self_block.metadata.dirty || other_block.metadata.dirty) || self_block.node.hash != other_block.node.hash + // TODO: isn't only a leaf supposed to check this? || self_block.node.specific != other_block.node.specific { return false; @@ -1106,7 +1097,7 @@ impl PartialEq for MerkleBlob { impl<'a> IntoIterator for &'a MerkleBlob { // TODO: review efficiency in whatever use cases we end up with, vs Item = Node etc - type Item = Block; + type Item = (TreeIndex, Block); type IntoIter = MerkleBlobLeftChildFirstIterator<'a>; fn into_iter(self) -> Self::IntoIter { @@ -1181,7 +1172,7 @@ impl<'a> MerkleBlobLeftChildFirstIterator<'a> { } impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { - type Item = Block; + type Item = (TreeIndex, Block); fn next(&mut self) -> Option { // left sibling first, children before parents @@ -1189,13 +1180,13 @@ impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { loop { let item = self.deque.pop_front()?; let block_bytes: BlockBytes = self.blob[Block::range(item.index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes, item.index).unwrap(); + let block = Block::from_bytes(block_bytes).unwrap(); match block.node.specific { - NodeSpecific::Leaf { .. } => return Some(block), + NodeSpecific::Leaf { .. } => return Some((item.index, block)), NodeSpecific::Internal { left, right } => { if item.visited { - return Some(block); + return Some((item.index, block)); }; self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { @@ -1241,7 +1232,7 @@ impl Iterator for MerkleBlobParentFirstIterator<'_> { loop { let index = self.deque.pop_front()?; let block_bytes: BlockBytes = self.blob[Block::range(index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes, index).unwrap(); + let block = Block::from_bytes(block_bytes).unwrap(); match block.node.specific { NodeSpecific::Leaf { .. } => return Some(block), @@ -1280,7 +1271,7 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { loop { let index = self.deque.pop_front()?; let block_bytes: BlockBytes = self.blob[Block::range(index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes, index).unwrap(); + let block = Block::from_bytes(block_bytes).unwrap(); match block.node.specific { NodeSpecific::Leaf { .. } => return Some(block), @@ -1830,9 +1821,9 @@ mod tests { let after_blocks = small_blob.iter().collect::>(); assert_eq!(before_blocks.len(), after_blocks.len()); - for (before, after) in zip(before_blocks, after_blocks) { + for ((before_index, before), (after_index, after)) in zip(before_blocks, after_blocks) { assert_eq!(before.node.parent, after.node.parent); - assert_eq!(before.node.index, after.node.index); + assert_eq!(before_index, after_index); let NodeSpecific::Leaf { key: before_key, value: before_value, From a4e41178c9cd996014b75da1cbe42cb541860843 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 7 Oct 2024 11:39:45 -0400 Subject: [PATCH 071/181] tidy --- crates/chia-datalayer/src/lib.rs | 41 +++++++++++++------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 8cc1bac65..748b5a9bc 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -3,6 +3,7 @@ use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; use dot::DotLines; +use num_traits::ToBytes; use std::cmp::Ordering; use std::collections::{HashMap, VecDeque}; use std::iter::{zip, IntoIterator}; @@ -45,10 +46,10 @@ type BlockBytes = [u8; BLOCK_SIZE]; type MetadataBytes = [u8; METADATA_SIZE]; type DataBytes = [u8; DATA_SIZE]; const DATA_RANGE: Range = METADATA_SIZE..METADATA_SIZE + DATA_SIZE; -const INTERNAL_PADDING_RANGE: Range = RIGHT_RANGE.end..DATA_SIZE; -const INTERNAL_PADDING_SIZE: usize = INTERNAL_PADDING_RANGE.end - INTERNAL_PADDING_RANGE.start; -const LEAF_PADDING_RANGE: Range = VALUE_RANGE.end..DATA_SIZE; -const LEAF_PADDING_SIZE: usize = LEAF_PADDING_RANGE.end - LEAF_PADDING_RANGE.start; +// const INTERNAL_PADDING_RANGE: Range = RIGHT_RANGE.end..DATA_SIZE; +// const INTERNAL_PADDING_SIZE: usize = INTERNAL_PADDING_RANGE.end - INTERNAL_PADDING_RANGE.start; +// const LEAF_PADDING_RANGE: Range = VALUE_RANGE.end..DATA_SIZE; +// const LEAF_PADDING_SIZE: usize = LEAF_PADDING_RANGE.end - LEAF_PADDING_RANGE.start; #[derive(Clone, Debug, Hash, Eq, PartialEq)] #[repr(u8)] @@ -199,7 +200,6 @@ impl Node { // } pub fn from_bytes(metadata: &NodeMetadata, blob: DataBytes) -> Result { - // TODO: add Err results Ok(Self { parent: Self::parent_from_bytes(&blob), hash: blob[HASH_RANGE].try_into().unwrap(), @@ -209,7 +209,6 @@ impl Node { right: TreeIndex::from_be_bytes(blob[RIGHT_RANGE].try_into().unwrap()), }, NodeType::Leaf => NodeSpecific::Leaf { - // TODO: this try from really right? key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), value: KvId::from_be_bytes(blob[VALUE_RANGE].try_into().unwrap()), }, @@ -226,7 +225,7 @@ impl Node { } } pub fn to_bytes(&self) -> DataBytes { - let mut blob: Vec = Vec::new(); + let mut blob: DataBytes = [0; DATA_SIZE]; match self { Node { parent, @@ -237,13 +236,10 @@ impl Node { None => NULL_PARENT, Some(parent) => *parent, }; - // TODO: insert per ranges - blob.extend(hash); - blob.extend(parent_integer.to_be_bytes()); - blob.extend(left.to_be_bytes()); - blob.extend(right.to_be_bytes()); - // TODO: not-yucky padding - blob.extend([0; INTERNAL_PADDING_SIZE]); + blob[HASH_RANGE].copy_from_slice(hash); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); + blob[LEFT_RANGE].copy_from_slice(&left.to_be_bytes()); + blob[RIGHT_RANGE].copy_from_slice(&right.to_be_bytes()); } Node { parent, @@ -254,17 +250,14 @@ impl Node { None => NULL_PARENT, Some(parent) => *parent, }; - // TODO: insert per ranges - blob.extend(hash); - blob.extend(parent_integer.to_be_bytes()); - blob.extend(key.to_be_bytes()); - blob.extend(value.to_be_bytes()); - // TODO: not-yucky padding - blob.extend([0; LEAF_PADDING_SIZE]); + blob[HASH_RANGE].copy_from_slice(hash); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); + blob[KEY_RANGE].copy_from_slice(&key.to_be_bytes()); + blob[VALUE_RANGE].copy_from_slice(&value.to_be_bytes()); } } - blob.try_into().unwrap() + blob } // TODO: yes i know i'm trying to write this code in a non-rusty way and i need to stop that @@ -331,7 +324,6 @@ impl Block { pub fn from_bytes(blob: BlockBytes) -> Result { // TODO: handle invalid indexes? - // TODO: handle overflows? let metadata_blob: MetadataBytes = blob[METADATA_RANGE].try_into().unwrap(); let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); let metadata = NodeMetadata::from_bytes(metadata_blob) @@ -443,8 +435,7 @@ impl MerkleBlob { panic!("this should have been caught and processed above") } InsertLocation::AsRoot => { - // TODO: what about only unused blocks resulting in a non-empty blob? - assert!(self.blob.is_empty()); + assert!(self.key_to_index.is_empty()); self.insert_first(key, value, hash); } InsertLocation::Leaf { index, side } => { From c5c8f60fe491887b029be22f30988fe8e0567727 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 8 Oct 2024 13:11:17 -0400 Subject: [PATCH 072/181] error handling tidying --- crates/chia-datalayer/src/lib.rs | 146 +++++++++++++++++++------------ 1 file changed, 90 insertions(+), 56 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 748b5a9bc..d028e4040 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -334,6 +334,7 @@ impl Block { Ok(Block { metadata, node }) } + // TODO: free function probably fn range(index: TreeIndex) -> Range { let block_start = index as usize * BLOCK_SIZE; block_start..block_start + BLOCK_SIZE @@ -407,8 +408,8 @@ impl MerkleBlob { } // TODO: stop double tree traversals here - let free_indexes = get_free_indexes(&blob).unwrap(); - let key_to_index = get_keys_values_indexes(&blob).unwrap(); + let free_indexes = get_free_indexes(&blob)?; + let key_to_index = get_keys_values_indexes(&blob)?; Ok(Self { blob, @@ -435,18 +436,16 @@ impl MerkleBlob { panic!("this should have been caught and processed above") } InsertLocation::AsRoot => { - assert!(self.key_to_index.is_empty()); + if !self.key_to_index.is_empty() { + return Err("requested insertion at root but tree not empty".to_string()); + }; self.insert_first(key, value, hash); } InsertLocation::Leaf { index, side } => { let old_leaf = self.get_node(index)?; - let NodeSpecific::Leaf { - key: old_leaf_key, .. - } = old_leaf.specific - else { - panic!() + let NodeSpecific::Leaf { .. } = old_leaf.specific else { + panic!("requested insertion at leaf but found internal node") }; - assert_eq!(self.key_to_index[&old_leaf_key], index); let internal_node_hash = match side { Side::Left => internal_hash(hash, &old_leaf.hash), @@ -615,7 +614,7 @@ impl MerkleBlob { self.insert_entry_to_blob(new_internal_node_index, new_internal_block.to_bytes())?; let Some(old_parent_index) = old_leaf.parent else { - panic!("{key:?} {value:?} {hash:?}") + panic!("root found when not expected: {key:?} {value:?} {hash:?}") }; let mut block = Block::from_bytes(self.get_block_bytes(old_leaf_index)?)?; @@ -634,10 +633,10 @@ impl MerkleBlob { } else if old_leaf_index == *right { *right = new_internal_node_index; } else { - panic!(); + panic!("child not a child of its parent"); } } else { - panic!(); + panic!("expected internal node but found leaf"); }; self.insert_entry_to_blob(old_parent_index, old_parent_block.to_bytes())?; @@ -649,12 +648,15 @@ impl MerkleBlob { } pub fn delete(&mut self, key: KvId) -> Result<(), String> { - let leaf_index = *self.key_to_index.get(&key).unwrap(); - let leaf = self.get_node(leaf_index).unwrap(); + let leaf_index = *self + .key_to_index + .get(&key) + .ok_or(format!("unknown key: {key}"))?; + let leaf = self.get_node(leaf_index)?; - // TODO: blech + // TODO: maybe some common way to indicate/perform sanity double checks? let NodeSpecific::Leaf { .. } = leaf.specific else { - panic!() + panic!("key to index cache resulted in internal node") }; self.key_to_index.remove(&key); @@ -666,7 +668,7 @@ impl MerkleBlob { }; self.free_indexes.push(leaf_index); - let parent = self.get_node(parent_index).unwrap(); + let parent = self.get_node(parent_index)?; // TODO: kinda implicit that we 'check' that parent is internal inside .sibling_index() let sibling_index = parent.specific.sibling_index(leaf_index); let mut sibling_block = self.get_block(sibling_index)?; @@ -694,7 +696,7 @@ impl MerkleBlob { }; self.free_indexes.push(parent_index); - let mut grandparent_block = self.get_block(grandparent_index).unwrap(); + let mut grandparent_block = self.get_block(grandparent_index)?; sibling_block.node.parent = Some(grandparent_index); self.insert_entry_to_blob(sibling_index, sibling_block.to_bytes())?; @@ -708,10 +710,10 @@ impl MerkleBlob { match parent_index { x if x == *left => *left = sibling_index, x if x == *right => *right = sibling_index, - _ => panic!(), + _ => panic!("parent not a child a grandparent"), } } else { - panic!() + panic!("grandparent not an internal node") } self.insert_entry_to_blob(grandparent_index, grandparent_block.to_bytes())?; @@ -726,7 +728,7 @@ impl MerkleBlob { return Ok(()); }; - let mut block = self.get_block(*leaf_index).unwrap(); + let mut block = self.get_block(*leaf_index)?; if let NodeSpecific::Leaf { value: ref mut inplace_value, .. @@ -735,7 +737,7 @@ impl MerkleBlob { block.node.hash.clone_from(new_hash); *inplace_value = value; } else { - panic!() + panic!("expected internal node but found leaf"); } self.insert_entry_to_blob(*leaf_index, block.to_bytes())?; @@ -746,7 +748,7 @@ impl MerkleBlob { Ok(()) } - pub fn check(&self) { + pub fn check(&self) -> Result<(), String> { let mut leaf_count: usize = 0; let mut internal_count: usize = 0; @@ -755,19 +757,34 @@ impl MerkleBlob { NodeSpecific::Internal { .. } => internal_count += 1, NodeSpecific::Leaf { key, .. } => { leaf_count += 1; - assert!(self.key_to_index.contains_key(&key)); + let cached_index = self + .key_to_index + .get(&key) + .ok_or(format!("key not in key to index cache: {key:?}"))?; + assert_eq!( + *cached_index, index, + "key to index cache for {key:?} should be {index:?} got: {cached_index:?}" + ); // TODO: consider what type free indexes should be - assert!(!self.free_indexes.contains(&index)); + assert!( + !self.free_indexes.contains(&index), + "{}", + format!("active index found in free index list: {index:?}") + ); } } } - assert_eq!(leaf_count, self.key_to_index.len()); + let key_to_index_cache_length = self.key_to_index.len(); + assert_eq!(leaf_count, key_to_index_cache_length, "found {leaf_count:?} leaves but key to index cache length is: {key_to_index_cache_length:?}"); + let total_count = leaf_count + internal_count + self.free_indexes.len(); + let extend_index = self.extend_index(); assert_eq!( - leaf_count + internal_count + self.free_indexes.len(), - self.extend_index() as usize, + total_count, extend_index as usize, + "expected total node count {extend_index:?} found: {total_count:?}", ); + Ok(()) // TODO: check parent/child bidirectional accuracy } @@ -831,7 +848,12 @@ impl MerkleBlob { return Ok(InsertLocation::AsRoot); } - let side = if (seed_bytes.last().unwrap() & 1 << 7) == 0 { + let side = if (seed_bytes + .last() + .ok_or("zero-length seed bytes not allowed")? + & 1 << 7) + == 0 + { Side::Left } else { Side::Right @@ -868,7 +890,9 @@ impl MerkleBlob { } fn extend_index(&self) -> TreeIndex { - assert_eq!(self.blob.len() % BLOCK_SIZE, 0); + let blob_length = self.blob.len(); + let remainder = blob_length % BLOCK_SIZE; + assert_eq!(remainder, 0, "blob length {blob_length:?} not a multiple of {BLOCK_SIZE:?}, remainder: {remainder:?}"); (self.blob.len() / BLOCK_SIZE) as TreeIndex } @@ -880,7 +904,7 @@ impl MerkleBlob { ) -> Result<(), String> { let extend_index = self.extend_index(); match index.cmp(&extend_index) { - Ordering::Greater => return Err(format!("index out of range: {index}")), + Ordering::Greater => return Err(format!("block index out of range: {index}")), Ordering::Equal => self.blob.extend_from_slice(&block_bytes), Ordering::Less => { self.blob[Block::range(index)].copy_from_slice(&block_bytes); @@ -909,7 +933,7 @@ impl MerkleBlob { fn get_block_bytes(&self, index: TreeIndex) -> Result { self.blob .get(Block::range(index)) - .ok_or(format!("index out of bounds: {index}"))? + .ok_or(format!("block index out of bounds: {index}"))? .try_into() .map_err(|e| format!("failed getting block {index}: {e}")) } @@ -928,10 +952,12 @@ impl MerkleBlob { .map_err(|message| format!("failed loading node: {message}")) } - pub fn get_parent_index(&self, index: TreeIndex) -> Parent { - let block = self.get_block_bytes(index).unwrap(); + pub fn get_parent_index(&self, index: TreeIndex) -> Result { + let block = self.get_block_bytes(index)?; - Node::parent_from_bytes(block[DATA_RANGE].try_into().unwrap()) + Ok(Node::parent_from_bytes( + block[DATA_RANGE].try_into().unwrap(), + )) } pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { @@ -976,7 +1002,7 @@ impl MerkleBlob { <&Self as IntoIterator>::into_iter(self) } - pub fn calculate_lazy_hashes(&mut self) { + pub fn calculate_lazy_hashes(&mut self) -> Result<(), String> { // TODO: really want a truncated traversal, not filter // TODO: yeah, storing the whole set of blocks via collect is not great for (index, mut block) in self @@ -989,18 +1015,24 @@ impl MerkleBlob { }; // TODO: obviously inefficient to re-get/deserialize these blocks inside // an iteration that's already doing that - let left = self.get_block(left).unwrap(); - let right = self.get_block(right).unwrap(); + let left = self.get_block(left)?; + let right = self.get_block(right)?; // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' block.node.hash = internal_hash(&left.node.hash, &right.node.hash); block.metadata.dirty = false; - self.insert_entry_to_blob(index, block.to_bytes()).unwrap(); + self.insert_entry_to_blob(index, block.to_bytes())?; } + + Ok(()) } - pub fn relocate_node(&mut self, source: TreeIndex, destination: TreeIndex) { + #[allow(unused)] + fn relocate_node(&mut self, source: TreeIndex, destination: TreeIndex) -> Result<(), String> { let extend_index = self.extend_index(); - assert_ne!(source, 0); + // TODO: perhaps relocation of root should be allowed for some use + if source == 0 { + return Err("relocation of the root and index zero is not allowed".to_string()); + }; assert!(source < extend_index); assert!(!self.free_indexes.contains(&source)); assert!(destination <= extend_index); @@ -1039,6 +1071,8 @@ impl MerkleBlob { } self.free_indexes.push(source); + + Ok(()) } #[allow(unused)] @@ -1399,7 +1433,7 @@ mod tests { // let merkle_blob = example_merkle_blob(); // merkle_blob.get_node(0).unwrap(); // - // merkle_blob.check(); + // merkle_blob.check().unwrap(); // } #[fixture] @@ -1435,7 +1469,7 @@ mod tests { let last_node = lineage.last().unwrap(); assert_eq!(last_node.parent, None); - small_blob.check(); + small_blob.check().unwrap(); } #[rstest] @@ -1459,7 +1493,7 @@ mod tests { }, ); - small_blob.check(); + small_blob.check().unwrap(); } // #[test] @@ -1481,7 +1515,7 @@ mod tests { // assert_eq!(merkle_blob.get_node(1).unwrap(), EXAMPLE_LEFT_LEAF); // assert_eq!(merkle_blob.get_node(2).unwrap(), EXAMPLE_RIGHT_LEAF); // - // merkle_blob.check(); + // merkle_blob.check().unwrap(); // } // #[test] @@ -1508,7 +1542,7 @@ mod tests { // // assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); // - // merkle_blob.check(); + // merkle_blob.check().unwrap(); // } #[test] @@ -1550,9 +1584,9 @@ mod tests { println!("total time: {total_time:?}"); // TODO: check, well... something - merkle_blob.calculate_lazy_hashes(); + merkle_blob.calculate_lazy_hashes().unwrap(); - merkle_blob.check(); + merkle_blob.check().unwrap(); } #[test] @@ -1569,7 +1603,7 @@ mod tests { let hash: Hash = sha256_num(key_value_id); println!("inserting: {key_value_id}"); - merkle_blob.calculate_lazy_hashes(); + merkle_blob.calculate_lazy_hashes().unwrap(); reference_blobs.push(MerkleBlob::new(merkle_blob.blob.clone()).unwrap()); merkle_blob .insert(key_value_id, key_value_id, &hash, InsertLocation::Auto) @@ -1577,17 +1611,17 @@ mod tests { dots.push(merkle_blob.to_dot().dump()); } - merkle_blob.check(); + merkle_blob.check().unwrap(); for key_value_id in key_value_ids.iter().rev() { println!("deleting: {key_value_id}"); merkle_blob.delete(*key_value_id).unwrap(); - merkle_blob.calculate_lazy_hashes(); + merkle_blob.calculate_lazy_hashes().unwrap(); assert_eq!(merkle_blob, reference_blobs[*key_value_id as usize]); dots.push(merkle_blob.to_dot().dump()); } - merkle_blob.check(); + merkle_blob.check().unwrap(); } // TODO: better conditional execution than the commenting i'm doing now @@ -1622,7 +1656,7 @@ mod tests { .unwrap(); // open_dot(&mut merkle_blob.to_dot().set_note("first after")); - merkle_blob.check(); + merkle_blob.check().unwrap(); assert_eq!(merkle_blob.key_to_index.len(), 1); } @@ -1682,7 +1716,7 @@ mod tests { }; assert_eq!([left_key, right_key], expected_keys); - merkle_blob.check(); + merkle_blob.check().unwrap(); } #[test] @@ -1700,11 +1734,11 @@ mod tests { ) .unwrap(); // open_dot(&mut merkle_blob.to_dot().set_note("first after")); - merkle_blob.check(); + merkle_blob.check().unwrap(); merkle_blob.delete(key_value_id).unwrap(); - merkle_blob.check(); + merkle_blob.check().unwrap(); assert_eq!(merkle_blob.key_to_index.len(), 0); } From 47d7edfc72b72ae2dd2e2cf8e28a6ace96b34ecc Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 8 Oct 2024 13:14:02 -0400 Subject: [PATCH 073/181] less allow --- crates/chia-datalayer/src/lib.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index d028e4040..a1c9b2c0c 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -341,13 +341,11 @@ impl Block { } } -// TODO: once error handling is well defined, remove allow and handle warning -#[allow(clippy::unnecessary_wraps)] -fn get_free_indexes(blob: &[u8]) -> Result, String> { +fn get_free_indexes(blob: &[u8]) -> Vec { let index_count = blob.len() / BLOCK_SIZE; if index_count == 0 { - return Ok(vec![]); + return vec![]; } let mut seen_indexes: Vec = vec![false; index_count]; @@ -363,18 +361,16 @@ fn get_free_indexes(blob: &[u8]) -> Result, String> { } } - Ok(free_indexes) + free_indexes } -// TODO: once error handling is well defined, remove allow and handle warning -#[allow(clippy::unnecessary_wraps)] -fn get_keys_values_indexes(blob: &[u8]) -> Result, String> { +fn get_keys_values_indexes(blob: &[u8]) -> HashMap { let index_count = blob.len() / BLOCK_SIZE; let mut key_to_index: HashMap = HashMap::default(); if index_count == 0 { - return Ok(key_to_index); + return key_to_index; } for (index, block) in MerkleBlobLeftChildFirstIterator::new(blob) { @@ -383,7 +379,7 @@ fn get_keys_values_indexes(blob: &[u8]) -> Result, Stri } } - Ok(key_to_index) + key_to_index } #[cfg_attr(feature = "py-bindings", pyclass(name = "MerkleBlob"))] @@ -408,8 +404,8 @@ impl MerkleBlob { } // TODO: stop double tree traversals here - let free_indexes = get_free_indexes(&blob)?; - let key_to_index = get_keys_values_indexes(&blob)?; + let free_indexes = get_free_indexes(&blob); + let key_to_index = get_keys_values_indexes(&blob); Ok(Self { blob, @@ -1798,7 +1794,7 @@ mod tests { let mut blob = small_blob.blob.clone(); let expected_free_index = (blob.len() / BLOCK_SIZE) as TreeIndex; blob.extend_from_slice(&[0; BLOCK_SIZE]); - assert_eq!(get_free_indexes(&blob).unwrap(), [expected_free_index]); + assert_eq!(get_free_indexes(&blob), [expected_free_index]); } #[test] From 9a1919beaeab90e581496f1087a2f2e96a4dd261 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 8 Oct 2024 13:18:44 -0400 Subject: [PATCH 074/181] a little less pythony --- crates/chia-datalayer/src/lib.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index a1c9b2c0c..00daa3ce7 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -260,15 +260,6 @@ impl Node { blob } - // TODO: yes i know i'm trying to write this code in a non-rusty way and i need to stop that - pub fn key_value(&self) -> (KvId, KvId) { - let NodeSpecific::Leaf { key, value } = self.specific else { - panic!() - }; - - (key, value) - } - pub fn to_dot(&self, index: TreeIndex) -> DotLines { match self.specific { NodeSpecific::Internal {left, right} => DotLines{ @@ -514,7 +505,13 @@ impl MerkleBlob { self.insert_entry_to_blob(0, new_internal_block.to_bytes())?; - let (old_leaf_key, old_leaf_value) = old_leaf.key_value(); + let NodeSpecific::Leaf { + key: old_leaf_key, + value: old_leaf_value, + } = old_leaf.specific + else { + return Err("old leaf unexpectedly not a leaf".to_string()); + }; let nodes = [ ( match side { @@ -553,7 +550,10 @@ impl MerkleBlob { }; self.insert_entry_to_blob(index, block.to_bytes())?; - self.key_to_index.insert(block.node.key_value().0, index); + let NodeSpecific::Leaf { key: this_key, .. } = block.node.specific else { + return Err("new block unexpectedly not a leaf".to_string()); + }; + self.key_to_index.insert(this_key, index); } self.last_allocated_index = 3; From 479d8793bd92091ea710a1595cb51c279f71c227 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 8 Oct 2024 14:08:45 -0400 Subject: [PATCH 075/181] tidy --- crates/chia-datalayer/src/lib.rs | 57 ++++++++++++++------------------ 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 00daa3ce7..0b055bab4 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -261,6 +261,12 @@ impl Node { } pub fn to_dot(&self, index: TreeIndex) -> DotLines { + // TODO: can this be done without introducing a blank line? + let node_to_parent = match self.parent { + Some(parent) => format!("node_{index} -> node_{parent};"), + None => String::new(), + }; + match self.specific { NodeSpecific::Internal {left, right} => DotLines{ nodes: vec![ @@ -269,11 +275,7 @@ impl Node { connections: vec![ format!("node_{index} -> node_{left};"), format!("node_{index} -> node_{right};"), - // TODO: can this be done without introducing a blank line? - match self.parent{ - Some(parent) => format!("node_{index} -> node_{parent};"), - None => String::new(), - }, + node_to_parent, ], pair_boxes: vec![ format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), @@ -284,13 +286,7 @@ impl Node { nodes: vec![ format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), ], - connections: vec![ - // TODO: dedupe with above - match self.parent{ - Some(parent) => format!("node_{index} -> node_{parent};"), - None => String::new(), - }, - ], + connections: vec![node_to_parent], pair_boxes: vec![], note: String::new(), }, @@ -298,6 +294,11 @@ impl Node { } } +fn block_range(index: TreeIndex) -> Range { + let block_start = index as usize * BLOCK_SIZE; + block_start..block_start + BLOCK_SIZE +} + // TODO: does not enforce matching metadata node type and node enumeration type pub struct Block { metadata: NodeMetadata, @@ -314,7 +315,6 @@ impl Block { } pub fn from_bytes(blob: BlockBytes) -> Result { - // TODO: handle invalid indexes? let metadata_blob: MetadataBytes = blob[METADATA_RANGE].try_into().unwrap(); let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); let metadata = NodeMetadata::from_bytes(metadata_blob) @@ -324,12 +324,6 @@ impl Block { Ok(Block { metadata, node }) } - - // TODO: free function probably - fn range(index: TreeIndex) -> Range { - let block_start = index as usize * BLOCK_SIZE; - block_start..block_start + BLOCK_SIZE - } } fn get_free_indexes(blob: &[u8]) -> Vec { @@ -379,8 +373,7 @@ pub struct MerkleBlob { blob: Vec, free_indexes: Vec, key_to_index: HashMap, - // TODO: maybe name it next_index_to_allocate - last_allocated_index: TreeIndex, + next_index_to_allocate: TreeIndex, } impl MerkleBlob { @@ -402,7 +395,7 @@ impl MerkleBlob { blob, free_indexes, key_to_index, - last_allocated_index: block_count as TreeIndex, + next_index_to_allocate: block_count as TreeIndex, }) } @@ -475,7 +468,7 @@ impl MerkleBlob { self.key_to_index.insert(key, 0); self.free_indexes.clear(); - self.last_allocated_index = 1; + self.next_index_to_allocate = 1; } fn insert_second( @@ -556,7 +549,7 @@ impl MerkleBlob { self.key_to_index.insert(this_key, index); } - self.last_allocated_index = 3; + self.next_index_to_allocate = 3; Ok(()) } @@ -658,7 +651,7 @@ impl MerkleBlob { let Some(parent_index) = leaf.parent else { self.free_indexes.clear(); - self.last_allocated_index = 0; + self.next_index_to_allocate = 0; self.blob.clear(); return Ok(()); }; @@ -827,8 +820,8 @@ impl MerkleBlob { None => { // TODO: should this extend...? // TODO: should this update free indexes...? - self.last_allocated_index += 1; - self.last_allocated_index - 1 + self.next_index_to_allocate += 1; + self.next_index_to_allocate - 1 } Some(new_index) => new_index, } @@ -903,7 +896,7 @@ impl MerkleBlob { Ordering::Greater => return Err(format!("block index out of range: {index}")), Ordering::Equal => self.blob.extend_from_slice(&block_bytes), Ordering::Less => { - self.blob[Block::range(index)].copy_from_slice(&block_bytes); + self.blob[block_range(index)].copy_from_slice(&block_bytes); } } @@ -928,7 +921,7 @@ impl MerkleBlob { fn get_block_bytes(&self, index: TreeIndex) -> Result { self.blob - .get(Block::range(index)) + .get(block_range(index)) .ok_or(format!("block index out of bounds: {index}"))? .try_into() .map_err(|e| format!("failed getting block {index}: {e}")) @@ -1200,7 +1193,7 @@ impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { loop { let item = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[Block::range(item.index)].try_into().unwrap(); + let block_bytes: BlockBytes = self.blob[block_range(item.index)].try_into().unwrap(); let block = Block::from_bytes(block_bytes).unwrap(); match block.node.specific { @@ -1252,7 +1245,7 @@ impl Iterator for MerkleBlobParentFirstIterator<'_> { loop { let index = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[Block::range(index)].try_into().unwrap(); + let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); let block = Block::from_bytes(block_bytes).unwrap(); match block.node.specific { @@ -1291,7 +1284,7 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { loop { let index = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[Block::range(index)].try_into().unwrap(); + let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); let block = Block::from_bytes(block_bytes).unwrap(); match block.node.specific { From cc56c35441d4495c5bed00802e922d65c7ec2b02 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 8 Oct 2024 15:16:58 -0400 Subject: [PATCH 076/181] tidy --- crates/chia-datalayer/src/lib.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 0b055bab4..943850116 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -217,7 +217,6 @@ impl Node { } fn parent_from_bytes(blob: &DataBytes) -> Parent { - // TODO: a little setup here for pre-optimization to allow walking parents without processing entire nodes let parent_integer = TreeIndex::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap()); match parent_integer { NULL_PARENT => None, @@ -971,8 +970,7 @@ impl MerkleBlob { while let Some(this_index) = next_index { lineage.push(this_index); - let block = self.get_block_bytes(this_index)?; - next_index = Node::parent_from_bytes(block[DATA_RANGE].try_into().unwrap()); + next_index = self.get_parent_index(this_index)?; } Ok(lineage) From 27add2d10802ded71a05222e74b1249b4b23532d Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 8 Oct 2024 15:22:59 -0400 Subject: [PATCH 077/181] single tree traversal to load --- crates/chia-datalayer/src/lib.rs | 42 ++++++++++---------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 943850116..c759ae46f 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -325,17 +325,20 @@ impl Block { } } -fn get_free_indexes(blob: &[u8]) -> Vec { +fn get_free_indexes_and_keys_values_indexes( + blob: &[u8], +) -> (Vec, HashMap) { let index_count = blob.len() / BLOCK_SIZE; - if index_count == 0 { - return vec![]; - } - let mut seen_indexes: Vec = vec![false; index_count]; + let mut key_to_index: HashMap = HashMap::default(); - for (index, _) in MerkleBlobLeftChildFirstIterator::new(blob) { + for (index, block) in MerkleBlobLeftChildFirstIterator::new(blob) { seen_indexes[index as usize] = true; + + if let NodeSpecific::Leaf { key, .. } = block.node.specific { + key_to_index.insert(key, index); + } } let mut free_indexes: Vec = vec![]; @@ -345,25 +348,7 @@ fn get_free_indexes(blob: &[u8]) -> Vec { } } - free_indexes -} - -fn get_keys_values_indexes(blob: &[u8]) -> HashMap { - let index_count = blob.len() / BLOCK_SIZE; - - let mut key_to_index: HashMap = HashMap::default(); - - if index_count == 0 { - return key_to_index; - } - - for (index, block) in MerkleBlobLeftChildFirstIterator::new(blob) { - if let NodeSpecific::Leaf { key, .. } = block.node.specific { - key_to_index.insert(key, index); - } - } - - key_to_index + (free_indexes, key_to_index) } #[cfg_attr(feature = "py-bindings", pyclass(name = "MerkleBlob"))] @@ -386,9 +371,7 @@ impl MerkleBlob { )); } - // TODO: stop double tree traversals here - let free_indexes = get_free_indexes(&blob); - let key_to_index = get_keys_values_indexes(&blob); + let (free_indexes, key_to_index) = get_free_indexes_and_keys_values_indexes(&blob); Ok(Self { blob, @@ -1785,7 +1768,8 @@ mod tests { let mut blob = small_blob.blob.clone(); let expected_free_index = (blob.len() / BLOCK_SIZE) as TreeIndex; blob.extend_from_slice(&[0; BLOCK_SIZE]); - assert_eq!(get_free_indexes(&blob), [expected_free_index]); + let (free_indexes, _) = get_free_indexes_and_keys_values_indexes(&blob); + assert_eq!(free_indexes, [expected_free_index]); } #[test] From bd9d2e1dfb9474db674e622b8e9457b85a6e01e5 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 8 Oct 2024 15:23:48 -0400 Subject: [PATCH 078/181] tidy --- crates/chia-datalayer/src/lib.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index c759ae46f..70a0554cf 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1713,7 +1713,6 @@ mod tests { } #[rstest] - // TODO: does this mut allow modifying the fixture value as used by other tests? fn test_delete_frees_index(mut small_blob: MerkleBlob) { let key = 0x0001_0203_0405_0607; let index = small_blob.key_to_index[&key]; @@ -1723,7 +1722,6 @@ mod tests { } #[rstest] - // TODO: does this mut allow modifying the fixture value as used by other tests? fn test_get_new_index_with_free_index(mut small_blob: MerkleBlob) { let key = 0x0001_0203_0405_0607; let _ = small_blob.key_to_index[&key]; @@ -1797,7 +1795,6 @@ mod tests { } #[rstest] - // TODO: does this mut allow modifying the fixture value as used by other tests? fn test_upsert_upserts(mut small_blob: MerkleBlob) { let before_blocks = small_blob.iter().collect::>(); let (key, index) = small_blob.key_to_index.iter().next().unwrap(); From e2d0b649c5ef8e82fb3ffefd673103f33e4aacef Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 10 Oct 2024 10:32:11 -0400 Subject: [PATCH 079/181] move all dot stuff to dot module --- crates/chia-datalayer/src/dot.rs | 62 +++++++++++++++++++- crates/chia-datalayer/src/lib.rs | 99 ++++++++++++-------------------- 2 files changed, 98 insertions(+), 63 deletions(-) diff --git a/crates/chia-datalayer/src/dot.rs b/crates/chia-datalayer/src/dot.rs index ad636741a..a21f9cffc 100644 --- a/crates/chia-datalayer/src/dot.rs +++ b/crates/chia-datalayer/src/dot.rs @@ -1,4 +1,7 @@ -// TODO: this should probably be test code? +use crate::{MerkleBlob, Node, NodeSpecific, TreeIndex}; +use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; +use url::Url; + pub struct DotLines { pub nodes: Vec, pub connections: Vec, @@ -46,3 +49,60 @@ impl DotLines { self } } + +impl Node { + pub fn to_dot(&self, index: TreeIndex) -> DotLines { + // TODO: can this be done without introducing a blank line? + let node_to_parent = match self.parent { + Some(parent) => format!("node_{index} -> node_{parent};"), + None => String::new(), + }; + + match self.specific { + NodeSpecific::Internal {left, right} => DotLines{ + nodes: vec![ + format!("node_{index} [label=\"{index}\"]"), + ], + connections: vec![ + format!("node_{index} -> node_{left};"), + format!("node_{index} -> node_{right};"), + node_to_parent, + ], + pair_boxes: vec![ + format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), + ], + note: String::new(), + }, + NodeSpecific::Leaf {key, value} => DotLines{ + nodes: vec![ + format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), + ], + connections: vec![node_to_parent], + pair_boxes: vec![], + note: String::new(), + }, + } + } +} + +impl MerkleBlob { + pub fn to_dot(&self) -> DotLines { + let mut result = DotLines::new(); + for (index, block) in self { + result.push(block.node.to_dot(index)); + } + + result + } +} + +// TODO: better conditional execution than the commenting i'm doing now +pub fn open_dot(lines: &mut DotLines) { + let mut url = Url::parse("http://edotor.net").unwrap(); + // https://edotor.net/?engine=dot#graph%20%7B%7D%0A -> graph {} + url.query_pairs_mut().append_pair("engine", "dot"); + url.set_fragment(Some( + &utf8_percent_encode(&lines.dump(), NON_ALPHANUMERIC).to_string(), + )); + open::that(url.as_str()).unwrap(); +} diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 70a0554cf..7e6fcc7ba 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -2,7 +2,6 @@ use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; -use dot::DotLines; use num_traits::ToBytes; use std::cmp::Ordering; use std::collections::{HashMap, VecDeque}; @@ -10,8 +9,6 @@ use std::iter::{zip, IntoIterator}; use std::mem::size_of; use std::ops::Range; -mod dot; - type TreeIndex = u32; type Parent = Option; type Hash = [u8; 32]; @@ -259,38 +256,38 @@ impl Node { blob } - pub fn to_dot(&self, index: TreeIndex) -> DotLines { - // TODO: can this be done without introducing a blank line? - let node_to_parent = match self.parent { - Some(parent) => format!("node_{index} -> node_{parent};"), - None => String::new(), - }; - - match self.specific { - NodeSpecific::Internal {left, right} => DotLines{ - nodes: vec![ - format!("node_{index} [label=\"{index}\"]"), - ], - connections: vec![ - format!("node_{index} -> node_{left};"), - format!("node_{index} -> node_{right};"), - node_to_parent, - ], - pair_boxes: vec![ - format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), - ], - note: String::new(), - }, - NodeSpecific::Leaf {key, value} => DotLines{ - nodes: vec![ - format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), - ], - connections: vec![node_to_parent], - pair_boxes: vec![], - note: String::new(), - }, - } - } + // pub fn to_dot(&self, index: TreeIndex) -> DotLines { + // // TODO: can this be done without introducing a blank line? + // let node_to_parent = match self.parent { + // Some(parent) => format!("node_{index} -> node_{parent};"), + // None => String::new(), + // }; + // + // match self.specific { + // NodeSpecific::Internal {left, right} => DotLines{ + // nodes: vec![ + // format!("node_{index} [label=\"{index}\"]"), + // ], + // connections: vec![ + // format!("node_{index} -> node_{left};"), + // format!("node_{index} -> node_{right};"), + // node_to_parent, + // ], + // pair_boxes: vec![ + // format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), + // ], + // note: String::new(), + // }, + // NodeSpecific::Leaf {key, value} => DotLines{ + // nodes: vec![ + // format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), + // ], + // connections: vec![node_to_parent], + // pair_boxes: vec![], + // note: String::new(), + // }, + // } + // } } fn block_range(index: TreeIndex) -> Range { @@ -959,15 +956,6 @@ impl MerkleBlob { Ok(lineage) } - pub fn to_dot(&self) -> DotLines { - let mut result = DotLines::new(); - for (index, block) in self { - result.push(block.node.to_dot(index)); - } - - result - } - pub fn iter(&self) -> MerkleBlobLeftChildFirstIterator<'_> { <&Self as IntoIterator>::into_iter(self) } @@ -1279,6 +1267,8 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { } } +#[cfg(test)] +mod dot; #[cfg(test)] mod tests { use super::*; @@ -1594,22 +1584,6 @@ mod tests { merkle_blob.check().unwrap(); } - // TODO: better conditional execution than the commenting i'm doing now - #[allow(dead_code)] - fn open_dot(lines: &mut DotLines) { - use open; - use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; - use url::Url; - - let mut url = Url::parse("http://edotor.net").unwrap(); - // https://edotor.net/?engine=dot#graph%20%7B%7D%0A -> graph {} - url.query_pairs_mut().append_pair("engine", "dot"); - url.set_fragment(Some( - &utf8_percent_encode(&lines.dump(), NON_ALPHANUMERIC).to_string(), - )); - open::that(url.as_str()).unwrap(); - } - #[test] fn test_insert_first() { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); @@ -1777,6 +1751,7 @@ mod tests { #[rstest] fn test_upsert_inserts(small_blob: MerkleBlob) { + use dot::open_dot; let key = 1234; assert!(!small_blob.key_to_index.contains_key(&key)); let value = 5678; @@ -1785,11 +1760,11 @@ mod tests { insert_blob .insert(key, value, &sha256_num(key), InsertLocation::Auto) .unwrap(); - // open_dot(&mut insert_blob.to_dot().set_note("first after")); + open_dot(insert_blob.to_dot().set_note("first after")); let mut upsert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); upsert_blob.upsert(key, value, &sha256_num(key)).unwrap(); - // open_dot(&mut upsert_blob.to_dot().set_note("first after")); + open_dot(upsert_blob.to_dot().set_note("first after")); assert_eq!(insert_blob.blob, upsert_blob.blob); } From 9d790e9cde0c02f8a2665c00bb76a4e632618401 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 10 Oct 2024 11:17:23 -0400 Subject: [PATCH 080/181] less dot running for now --- crates/chia-datalayer/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 7e6fcc7ba..3aeb981f5 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1273,6 +1273,7 @@ mod dot; mod tests { use super::*; // use hex_literal::hex; + // use dot::open_dot; use rstest::{fixture, rstest}; use std::time::{Duration, Instant}; @@ -1751,7 +1752,6 @@ mod tests { #[rstest] fn test_upsert_inserts(small_blob: MerkleBlob) { - use dot::open_dot; let key = 1234; assert!(!small_blob.key_to_index.contains_key(&key)); let value = 5678; @@ -1760,11 +1760,11 @@ mod tests { insert_blob .insert(key, value, &sha256_num(key), InsertLocation::Auto) .unwrap(); - open_dot(insert_blob.to_dot().set_note("first after")); + // open_dot(insert_blob.to_dot().set_note("first after")); let mut upsert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); upsert_blob.upsert(key, value, &sha256_num(key)).unwrap(); - open_dot(upsert_blob.to_dot().set_note("first after")); + // open_dot(upsert_blob.to_dot().set_note("first after")); assert_eq!(insert_blob.blob, upsert_blob.blob); } From 1a62f56ff2bcf87cee0ba6ce00a7760dc240f207 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 10 Oct 2024 11:46:00 -0400 Subject: [PATCH 081/181] dot unused allow --- crates/chia-datalayer/src/dot.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/chia-datalayer/src/dot.rs b/crates/chia-datalayer/src/dot.rs index a21f9cffc..e919fb858 100644 --- a/crates/chia-datalayer/src/dot.rs +++ b/crates/chia-datalayer/src/dot.rs @@ -97,6 +97,7 @@ impl MerkleBlob { } // TODO: better conditional execution than the commenting i'm doing now +#[allow(unused)] pub fn open_dot(lines: &mut DotLines) { let mut url = Url::parse("http://edotor.net").unwrap(); // https://edotor.net/?engine=dot#graph%20%7B%7D%0A -> graph {} From 967a10f5bd16fade89decb5215ec9b5b9b06c98e Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 10 Oct 2024 11:49:32 -0400 Subject: [PATCH 082/181] tidy --- crates/chia-datalayer/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 3aeb981f5..023706ac1 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -859,10 +859,11 @@ impl MerkleBlob { fn extend_index(&self) -> TreeIndex { let blob_length = self.blob.len(); + let index: TreeIndex = (blob_length / BLOCK_SIZE) as TreeIndex; let remainder = blob_length % BLOCK_SIZE; assert_eq!(remainder, 0, "blob length {blob_length:?} not a multiple of {BLOCK_SIZE:?}, remainder: {remainder:?}"); - (self.blob.len() / BLOCK_SIZE) as TreeIndex + index } fn insert_entry_to_blob( From cb249f61720916c951248d1b003940b5086f4a31 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 10 Oct 2024 11:57:28 -0400 Subject: [PATCH 083/181] drop next_index_to_allocate --- crates/chia-datalayer/src/lib.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 023706ac1..5ed84e864 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -354,13 +354,11 @@ pub struct MerkleBlob { blob: Vec, free_indexes: Vec, key_to_index: HashMap, - next_index_to_allocate: TreeIndex, } impl MerkleBlob { pub fn new(blob: Vec) -> Result { let length = blob.len(); - let block_count = length / BLOCK_SIZE; let remainder = length % BLOCK_SIZE; if remainder != 0 { return Err(format!( @@ -374,7 +372,6 @@ impl MerkleBlob { blob, free_indexes, key_to_index, - next_index_to_allocate: block_count as TreeIndex, }) } @@ -447,7 +444,6 @@ impl MerkleBlob { self.key_to_index.insert(key, 0); self.free_indexes.clear(); - self.next_index_to_allocate = 1; } fn insert_second( @@ -528,8 +524,6 @@ impl MerkleBlob { self.key_to_index.insert(this_key, index); } - self.next_index_to_allocate = 3; - Ok(()) } @@ -630,7 +624,6 @@ impl MerkleBlob { let Some(parent_index) = leaf.parent else { self.free_indexes.clear(); - self.next_index_to_allocate = 0; self.blob.clear(); return Ok(()); }; @@ -799,8 +792,9 @@ impl MerkleBlob { None => { // TODO: should this extend...? // TODO: should this update free indexes...? - self.next_index_to_allocate += 1; - self.next_index_to_allocate - 1 + let index = self.extend_index(); + self.blob.extend_from_slice(&[0; BLOCK_SIZE]); + index } Some(new_index) => new_index, } From cc8e958325e4d855e94143e160d71e63f4b16bfa Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 10 Oct 2024 12:00:33 -0400 Subject: [PATCH 084/181] add .clear() --- crates/chia-datalayer/src/lib.rs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 5ed84e864..146432e09 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -375,6 +375,12 @@ impl MerkleBlob { }) } + fn clear(&mut self) { + self.blob.clear(); + self.key_to_index.clear(); + self.free_indexes.clear(); + } + pub fn insert( &mut self, key: KvId, @@ -440,10 +446,13 @@ impl MerkleBlob { }, }; - self.blob.extend(new_leaf_block.to_bytes()); + self.clear(); + // TODO: unwrap, ack, review + self.insert_entry_to_blob(self.extend_index(), new_leaf_block.to_bytes()) + .unwrap(); + // TODO: put this in insert_entry_to_blob()? self.key_to_index.insert(key, 0); - self.free_indexes.clear(); } fn insert_second( @@ -455,9 +464,9 @@ impl MerkleBlob { internal_node_hash: &Hash, side: &Side, ) -> Result<(), String> { - self.blob.clear(); + self.clear(); + // TODO: just handling the nodes below being out of order. this all still smells a bit self.blob.resize(BLOCK_SIZE * 3, 0); - self.free_indexes.clear(); let new_internal_block = Block { metadata: NodeMetadata { @@ -521,6 +530,7 @@ impl MerkleBlob { let NodeSpecific::Leaf { key: this_key, .. } = block.node.specific else { return Err("new block unexpectedly not a leaf".to_string()); }; + // TODO: put this in insert_entry_to_blob()? self.key_to_index.insert(this_key, index); } @@ -604,6 +614,7 @@ impl MerkleBlob { self.insert_entry_to_blob(old_parent_index, old_parent_block.to_bytes())?; self.mark_lineage_as_dirty(old_parent_index)?; + // TODO: put this in insert_entry_to_blob()? self.key_to_index.insert(key, new_leaf_index); Ok(()) @@ -623,8 +634,7 @@ impl MerkleBlob { self.key_to_index.remove(&key); let Some(parent_index) = leaf.parent else { - self.free_indexes.clear(); - self.blob.clear(); + self.clear(); return Ok(()); }; @@ -640,6 +650,7 @@ impl MerkleBlob { match sibling_block.node.specific { NodeSpecific::Leaf { key, .. } => { + // TODO: put this in insert_entry_to_blob()? self.key_to_index.insert(key, 0); } NodeSpecific::Internal { left, right } => { From bca83f89f34bf4ac9eecd65bfa1a544b1be8b2e7 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 11 Oct 2024 08:02:43 -0400 Subject: [PATCH 085/181] centralize key to index insertion --- crates/chia-datalayer/src/lib.rs | 83 ++++++++++++++++---------------- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 146432e09..ad9512f25 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -352,6 +352,7 @@ fn get_free_indexes_and_keys_values_indexes( #[derive(Debug)] pub struct MerkleBlob { blob: Vec, + // TODO: should this be a set for fast lookups? free_indexes: Vec, key_to_index: HashMap, } @@ -448,11 +449,8 @@ impl MerkleBlob { self.clear(); // TODO: unwrap, ack, review - self.insert_entry_to_blob(self.extend_index(), new_leaf_block.to_bytes()) + self.insert_entry_to_blob(self.extend_index(), &new_leaf_block) .unwrap(); - - // TODO: put this in insert_entry_to_blob()? - self.key_to_index.insert(key, 0); } fn insert_second( @@ -480,7 +478,7 @@ impl MerkleBlob { }, }; - self.insert_entry_to_blob(0, new_internal_block.to_bytes())?; + self.insert_entry_to_blob(0, &new_internal_block)?; let NodeSpecific::Leaf { key: old_leaf_key, @@ -526,12 +524,10 @@ impl MerkleBlob { node, }; - self.insert_entry_to_blob(index, block.to_bytes())?; let NodeSpecific::Leaf { key: this_key, .. } = block.node.specific else { return Err("new block unexpectedly not a leaf".to_string()); }; - // TODO: put this in insert_entry_to_blob()? - self.key_to_index.insert(this_key, index); + self.insert_entry_to_blob(index, &block)?; } Ok(()) @@ -563,7 +559,7 @@ impl MerkleBlob { hash: *hash, }, }; - self.insert_entry_to_blob(new_leaf_index, new_leaf_block.to_bytes())?; + self.insert_entry_to_blob(new_leaf_index, &new_leaf_block)?; let (left_index, right_index) = match side { Side::Left => (new_leaf_index, old_leaf_index), @@ -583,7 +579,7 @@ impl MerkleBlob { hash: *internal_node_hash, }, }; - self.insert_entry_to_blob(new_internal_node_index, new_internal_block.to_bytes())?; + self.insert_entry_to_blob(new_internal_node_index, &new_internal_block)?; let Some(old_parent_index) = old_leaf.parent else { panic!("root found when not expected: {key:?} {value:?} {hash:?}") @@ -591,7 +587,7 @@ impl MerkleBlob { let mut block = Block::from_bytes(self.get_block_bytes(old_leaf_index)?)?; block.node.parent = Some(new_internal_node_index); - self.insert_entry_to_blob(old_leaf_index, block.to_bytes())?; + self.insert_entry_to_blob(old_leaf_index, &block)?; let mut old_parent_block = Block::from_bytes(self.get_block_bytes(old_parent_index)?)?; if let NodeSpecific::Internal { @@ -611,11 +607,9 @@ impl MerkleBlob { panic!("expected internal node but found leaf"); }; - self.insert_entry_to_blob(old_parent_index, old_parent_block.to_bytes())?; + self.insert_entry_to_blob(old_parent_index, &old_parent_block)?; self.mark_lineage_as_dirty(old_parent_index)?; - // TODO: put this in insert_entry_to_blob()? - self.key_to_index.insert(key, new_leaf_index); Ok(()) } @@ -646,19 +640,13 @@ impl MerkleBlob { let Some(grandparent_index) = parent.parent else { sibling_block.node.parent = None; - self.insert_entry_to_blob(0, sibling_block.to_bytes())?; + self.insert_entry_to_blob(0, &sibling_block)?; - match sibling_block.node.specific { - NodeSpecific::Leaf { key, .. } => { - // TODO: put this in insert_entry_to_blob()? - self.key_to_index.insert(key, 0); - } - NodeSpecific::Internal { left, right } => { - for child_index in [left, right] { - let mut block = self.get_block(child_index)?; - block.node.parent = Some(0); - self.insert_entry_to_blob(child_index, block.to_bytes())?; - } + if let NodeSpecific::Internal { left, right } = sibling_block.node.specific { + for child_index in [left, right] { + let mut block = self.get_block(child_index)?; + block.node.parent = Some(0); + self.insert_entry_to_blob(child_index, &block)?; } }; @@ -671,7 +659,7 @@ impl MerkleBlob { let mut grandparent_block = self.get_block(grandparent_index)?; sibling_block.node.parent = Some(grandparent_index); - self.insert_entry_to_blob(sibling_index, sibling_block.to_bytes())?; + self.insert_entry_to_blob(sibling_index, &sibling_block)?; if let NodeSpecific::Internal { ref mut left, @@ -687,7 +675,7 @@ impl MerkleBlob { } else { panic!("grandparent not an internal node") } - self.insert_entry_to_blob(grandparent_index, grandparent_block.to_bytes())?; + self.insert_entry_to_blob(grandparent_index, &grandparent_block)?; self.mark_lineage_as_dirty(grandparent_index)?; @@ -711,7 +699,7 @@ impl MerkleBlob { } else { panic!("expected internal node but found leaf"); } - self.insert_entry_to_blob(*leaf_index, block.to_bytes())?; + self.insert_entry_to_blob(*leaf_index, &block)?; if let Some(parent) = block.node.parent { self.mark_lineage_as_dirty(parent)?; @@ -791,7 +779,7 @@ impl MerkleBlob { } block.metadata.dirty = true; - self.insert_entry_to_blob(this_index, block.to_bytes())?; + self.insert_entry_to_blob(this_index, &block)?; next_index = block.node.parent; } @@ -871,20 +859,34 @@ impl MerkleBlob { index } - fn insert_entry_to_blob( - &mut self, - index: TreeIndex, - block_bytes: BlockBytes, - ) -> Result<(), String> { + fn insert_entry_to_blob(&mut self, index: TreeIndex, block: &Block) -> Result<(), String> { + let new_block_bytes = block.to_bytes(); let extend_index = self.extend_index(); match index.cmp(&extend_index) { Ordering::Greater => return Err(format!("block index out of range: {index}")), - Ordering::Equal => self.blob.extend_from_slice(&block_bytes), + Ordering::Equal => self.blob.extend_from_slice(&new_block_bytes), Ordering::Less => { - self.blob[block_range(index)].copy_from_slice(&block_bytes); + // TODO: lots of deserialization here for just the key + let old_block = self.get_block(index)?; + if !self.free_indexes.contains(&index) + && old_block.metadata.node_type == NodeType::Leaf + { + // TODO: sort of repeating the leaf check above and below. smells a little + if let NodeSpecific::Leaf { + key: old_block_key, .. + } = old_block.node.specific + { + self.key_to_index.remove(&old_block_key); + }; + }; + self.blob[block_range(index)].copy_from_slice(&new_block_bytes); } } + if let NodeSpecific::Leaf { key, .. } = block.node.specific { + self.key_to_index.insert(key, index); + }; + Ok(()) } @@ -984,7 +986,7 @@ impl MerkleBlob { // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' block.node.hash = internal_hash(&left.node.hash, &right.node.hash); block.metadata.dirty = false; - self.insert_entry_to_blob(index, block.to_bytes())?; + self.insert_entry_to_blob(index, &block)?; } Ok(()) @@ -1017,8 +1019,7 @@ impl MerkleBlob { x if x == *right => *right = destination, _ => panic!(), } - self.insert_entry_to_blob(parent, parent_block.to_bytes()) - .unwrap(); + self.insert_entry_to_blob(parent, &parent_block).unwrap(); } match source_block.node.specific { @@ -1029,7 +1030,7 @@ impl MerkleBlob { for child in [left, right] { let mut block = self.get_block(child).unwrap(); block.node.parent = Some(destination); - self.insert_entry_to_blob(child, block.to_bytes()).unwrap(); + self.insert_entry_to_blob(child, &block).unwrap(); } } } From b701e08caa01e1517d675c99612bf1ebfff3ad4d Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 11 Oct 2024 08:37:39 -0400 Subject: [PATCH 086/181] fixup --- crates/chia-datalayer/src/lib.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index ad9512f25..b741832d3 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -887,6 +887,10 @@ impl MerkleBlob { self.key_to_index.insert(key, index); }; + if let Some(free_index_index) = self.free_indexes.iter().find(|i| **i == index) { + self.free_indexes.remove(*free_index_index as usize); + } + Ok(()) } @@ -1022,16 +1026,11 @@ impl MerkleBlob { self.insert_entry_to_blob(parent, &parent_block).unwrap(); } - match source_block.node.specific { - NodeSpecific::Leaf { key, .. } => { - self.key_to_index.insert(key, destination); - } - NodeSpecific::Internal { left, right, .. } => { - for child in [left, right] { - let mut block = self.get_block(child).unwrap(); - block.node.parent = Some(destination); - self.insert_entry_to_blob(child, &block).unwrap(); - } + if let NodeSpecific::Internal { left, right, .. } = source_block.node.specific { + for child in [left, right] { + let mut block = self.get_block(child).unwrap(); + block.node.parent = Some(destination); + self.insert_entry_to_blob(child, &block).unwrap(); } } From 9451f98321cdf1748ac2e569b99bfba257e1cc24 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 11 Oct 2024 16:28:30 -0400 Subject: [PATCH 087/181] tidy --- crates/chia-datalayer/src/lib.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index b741832d3..87813ad52 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -524,9 +524,6 @@ impl MerkleBlob { node, }; - let NodeSpecific::Leaf { key: this_key, .. } = block.node.specific else { - return Err("new block unexpectedly not a leaf".to_string()); - }; self.insert_entry_to_blob(index, &block)?; } From b345e5c9e68cf75b6a6eb9afa571edb1cef64a6b Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 11 Oct 2024 16:31:27 -0400 Subject: [PATCH 088/181] hashset for free indexes --- crates/chia-datalayer/src/lib.rs | 37 +++++++++++++++++--------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 87813ad52..778b2611f 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -4,7 +4,7 @@ use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; use num_traits::ToBytes; use std::cmp::Ordering; -use std::collections::{HashMap, VecDeque}; +use std::collections::{HashMap, HashSet, VecDeque}; use std::iter::{zip, IntoIterator}; use std::mem::size_of; use std::ops::Range; @@ -324,7 +324,7 @@ impl Block { fn get_free_indexes_and_keys_values_indexes( blob: &[u8], -) -> (Vec, HashMap) { +) -> (HashSet, HashMap) { let index_count = blob.len() / BLOCK_SIZE; let mut seen_indexes: Vec = vec![false; index_count]; @@ -338,10 +338,10 @@ fn get_free_indexes_and_keys_values_indexes( } } - let mut free_indexes: Vec = vec![]; + let mut free_indexes: HashSet = HashSet::new(); for (index, seen) in seen_indexes.iter().enumerate() { if !seen { - free_indexes.push(index as TreeIndex); + free_indexes.insert(index as TreeIndex); } } @@ -353,7 +353,7 @@ fn get_free_indexes_and_keys_values_indexes( pub struct MerkleBlob { blob: Vec, // TODO: should this be a set for fast lookups? - free_indexes: Vec, + free_indexes: HashSet, key_to_index: HashMap, } @@ -629,7 +629,7 @@ impl MerkleBlob { return Ok(()); }; - self.free_indexes.push(leaf_index); + self.free_indexes.insert(leaf_index); let parent = self.get_node(parent_index)?; // TODO: kinda implicit that we 'check' that parent is internal inside .sibling_index() let sibling_index = parent.specific.sibling_index(leaf_index); @@ -647,12 +647,12 @@ impl MerkleBlob { } }; - self.free_indexes.push(sibling_index); + self.free_indexes.insert(sibling_index); return Ok(()); }; - self.free_indexes.push(parent_index); + self.free_indexes.insert(parent_index); let mut grandparent_block = self.get_block(grandparent_index)?; sibling_block.node.parent = Some(grandparent_index); @@ -784,7 +784,7 @@ impl MerkleBlob { } fn get_new_index(&mut self) -> TreeIndex { - match self.free_indexes.pop() { + match self.free_indexes.iter().next().copied() { None => { // TODO: should this extend...? // TODO: should this update free indexes...? @@ -792,7 +792,10 @@ impl MerkleBlob { self.blob.extend_from_slice(&[0; BLOCK_SIZE]); index } - Some(new_index) => new_index, + Some(new_index) => { + self.free_indexes.remove(&new_index); + new_index + } } } @@ -884,9 +887,7 @@ impl MerkleBlob { self.key_to_index.insert(key, index); }; - if let Some(free_index_index) = self.free_indexes.iter().find(|i| **i == index) { - self.free_indexes.remove(*free_index_index as usize); - } + self.free_indexes.take(&index); Ok(()) } @@ -1031,7 +1032,7 @@ impl MerkleBlob { } } - self.free_indexes.push(source); + self.free_indexes.insert(source); Ok(()) } @@ -1696,7 +1697,7 @@ mod tests { let index = small_blob.key_to_index[&key]; small_blob.delete(key).unwrap(); - assert_eq!(small_blob.free_indexes, vec![index, 2]); + assert_eq!(small_blob.free_indexes, HashSet::from([index, 2])); } #[rstest] @@ -1705,8 +1706,10 @@ mod tests { let _ = small_blob.key_to_index[&key]; small_blob.delete(key).unwrap(); + let expected = HashSet::from([1, 2]); + assert_eq!(small_blob.free_indexes, expected); // NOTE: both 1 and 2 are free per test_delete_frees_index - assert_eq!(small_blob.get_new_index(), 2); + assert!(expected.contains(&small_blob.get_new_index())); } #[rstest] @@ -1745,7 +1748,7 @@ mod tests { let expected_free_index = (blob.len() / BLOCK_SIZE) as TreeIndex; blob.extend_from_slice(&[0; BLOCK_SIZE]); let (free_indexes, _) = get_free_indexes_and_keys_values_indexes(&blob); - assert_eq!(free_indexes, [expected_free_index]); + assert_eq!(free_indexes, HashSet::from([expected_free_index])); } #[test] From 87ac5b07cdbd4ef722bb1278ab8b96fd761dcc92 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 11 Oct 2024 17:05:26 -0400 Subject: [PATCH 089/181] merkle submodule --- crates/chia-datalayer/src/lib.rs | 1825 +---------------- crates/chia-datalayer/src/merkle.rs | 1824 ++++++++++++++++ crates/chia-datalayer/src/{ => merkle}/dot.rs | 2 +- 3 files changed, 1827 insertions(+), 1824 deletions(-) create mode 100644 crates/chia-datalayer/src/merkle.rs rename crates/chia-datalayer/src/{ => merkle}/dot.rs (98%) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 778b2611f..7e997f3c2 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1,1824 +1,3 @@ -#[cfg(feature = "py-bindings")] -use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; +mod merkle; -use clvmr::sha2::Sha256; -use num_traits::ToBytes; -use std::cmp::Ordering; -use std::collections::{HashMap, HashSet, VecDeque}; -use std::iter::{zip, IntoIterator}; -use std::mem::size_of; -use std::ops::Range; - -type TreeIndex = u32; -type Parent = Option; -type Hash = [u8; 32]; -type KvId = i64; - -const fn range_by_length(start: usize, length: usize) -> Range { - start..start + length -} - -// define the serialized block format -// TODO: consider in more detail other serialization tools such as serde and streamable -// common fields -// TODO: better way to pick the max of key value and right range, until we move hash first -// TODO: clearly shouldn't be hard coded -const METADATA_SIZE: usize = 2; -const METADATA_RANGE: Range = 0..METADATA_SIZE; -const HASH_RANGE: Range = range_by_length(0, size_of::()); -// const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); -const PARENT_RANGE: Range = HASH_RANGE.end..(HASH_RANGE.end + size_of::()); -// internal specific fields -const LEFT_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); -const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of::()); -// leaf specific fields -const KEY_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); -const VALUE_RANGE: Range = range_by_length(KEY_RANGE.end, size_of::()); - -// TODO: clearly shouldn't be hard coded -// TODO: max of RIGHT_RANGE.end and VALUE_RANGE.end -const DATA_SIZE: usize = VALUE_RANGE.end; -const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; -type BlockBytes = [u8; BLOCK_SIZE]; -type MetadataBytes = [u8; METADATA_SIZE]; -type DataBytes = [u8; DATA_SIZE]; -const DATA_RANGE: Range = METADATA_SIZE..METADATA_SIZE + DATA_SIZE; -// const INTERNAL_PADDING_RANGE: Range = RIGHT_RANGE.end..DATA_SIZE; -// const INTERNAL_PADDING_SIZE: usize = INTERNAL_PADDING_RANGE.end - INTERNAL_PADDING_RANGE.start; -// const LEAF_PADDING_RANGE: Range = VALUE_RANGE.end..DATA_SIZE; -// const LEAF_PADDING_SIZE: usize = LEAF_PADDING_RANGE.end - LEAF_PADDING_RANGE.start; - -#[derive(Clone, Debug, Hash, Eq, PartialEq)] -#[repr(u8)] -pub enum NodeType { - Internal = 0, - Leaf = 1, -} - -impl NodeType { - pub fn from_u8(value: u8) -> Result { - // TODO: identify some useful structured serialization tooling we use - // TODO: find a better way to tie serialization values to enumerators - match value { - // ha! feel free to laugh at this - x if (NodeType::Internal as u8 == x) => Ok(NodeType::Internal), - x if (NodeType::Leaf as u8 == x) => Ok(NodeType::Leaf), - other => panic!("unknown NodeType value: {other}"), - } - } - - pub fn to_u8(&self) -> u8 { - match self { - NodeType::Internal => NodeType::Internal as u8, - NodeType::Leaf => NodeType::Leaf as u8, - } - } -} - -// impl NodeType { -// const TYPE_TO_VALUE: HashMap = HashMap::from([ -// (NodeType::Internal, 0), -// (NodeType::Leaf, 1), -// ]); -// -// fn value(&self) -> u8 { -// let map = Self::TYPE_TO_VALUE; -// // TODO: this seems pretty clearly the wrong way, probably -// let value = map.get(self); -// if value.is_some() { -// return 3; -// } -// panic!("no value for NodeType: {self:?}"); -// } -// } - -#[allow(clippy::needless_pass_by_value)] -fn sha256_num(input: T) -> Hash { - let mut hasher = Sha256::new(); - hasher.update(input.to_be_bytes()); - - hasher.finalize() -} - -fn sha256_bytes(input: &[u8]) -> Hash { - let mut hasher = Sha256::new(); - hasher.update(input); - - hasher.finalize() -} - -fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { - let mut hasher = Sha256::new(); - hasher.update(b"\x02"); - hasher.update(left_hash); - hasher.update(right_hash); - - hasher.finalize() -} - -#[derive(Clone, Debug, Hash, Eq, PartialEq)] -pub enum Side { - Left, - Right, -} - -#[derive(Clone, Debug, Hash, Eq, PartialEq)] -pub enum InsertLocation { - Auto, - AsRoot, - Leaf { index: TreeIndex, side: Side }, -} - -const NULL_PARENT: TreeIndex = 0xffff_ffffu32; - -#[derive(Debug, PartialEq)] -pub struct NodeMetadata { - pub node_type: NodeType, - pub dirty: bool, -} - -impl NodeMetadata { - pub fn from_bytes(blob: MetadataBytes) -> Result { - // TODO: could save 1-2% of tree space by packing (and maybe don't do that) - // TODO: identify some useful structured serialization tooling we use - Ok(Self { - node_type: Self::node_type_from_bytes(blob)?, - dirty: Self::dirty_from_bytes(blob)?, - }) - } - - pub fn to_bytes(&self) -> MetadataBytes { - [self.node_type.to_u8(), u8::from(self.dirty)] - } - - pub fn node_type_from_bytes(blob: MetadataBytes) -> Result { - NodeType::from_u8(blob[0]) - } - - pub fn dirty_from_bytes(blob: MetadataBytes) -> Result { - match blob[1] { - 0 => Ok(false), - 1 => Ok(true), - other => Err(format!("invalid dirty value: {other}")), - } - } -} - -#[derive(Debug, PartialEq)] -pub struct Node { - parent: Parent, - hash: Hash, - specific: NodeSpecific, -} - -#[derive(Debug, PartialEq)] -pub enum NodeSpecific { - Internal { left: TreeIndex, right: TreeIndex }, - Leaf { key: KvId, value: KvId }, -} - -impl NodeSpecific { - pub fn sibling_index(&self, index: TreeIndex) -> TreeIndex { - let NodeSpecific::Internal { right, left } = self else { - panic!("unable to get sibling index from a leaf") - }; - - match index { - x if (x == *right) => *left, - x if (x == *left) => *right, - _ => panic!("index not a child: {index}"), - } - } -} - -impl Node { - // fn discriminant(&self) -> u8 { - // unsafe { *(self as *const Self as *const u8) } - // } - - pub fn from_bytes(metadata: &NodeMetadata, blob: DataBytes) -> Result { - Ok(Self { - parent: Self::parent_from_bytes(&blob), - hash: blob[HASH_RANGE].try_into().unwrap(), - specific: match metadata.node_type { - NodeType::Internal => NodeSpecific::Internal { - left: TreeIndex::from_be_bytes(blob[LEFT_RANGE].try_into().unwrap()), - right: TreeIndex::from_be_bytes(blob[RIGHT_RANGE].try_into().unwrap()), - }, - NodeType::Leaf => NodeSpecific::Leaf { - key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), - value: KvId::from_be_bytes(blob[VALUE_RANGE].try_into().unwrap()), - }, - }, - }) - } - - fn parent_from_bytes(blob: &DataBytes) -> Parent { - let parent_integer = TreeIndex::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap()); - match parent_integer { - NULL_PARENT => None, - _ => Some(parent_integer), - } - } - pub fn to_bytes(&self) -> DataBytes { - let mut blob: DataBytes = [0; DATA_SIZE]; - match self { - Node { - parent, - specific: NodeSpecific::Internal { left, right }, - hash, - } => { - let parent_integer = match parent { - None => NULL_PARENT, - Some(parent) => *parent, - }; - blob[HASH_RANGE].copy_from_slice(hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); - blob[LEFT_RANGE].copy_from_slice(&left.to_be_bytes()); - blob[RIGHT_RANGE].copy_from_slice(&right.to_be_bytes()); - } - Node { - parent, - specific: NodeSpecific::Leaf { key, value }, - hash, - } => { - let parent_integer = match parent { - None => NULL_PARENT, - Some(parent) => *parent, - }; - blob[HASH_RANGE].copy_from_slice(hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); - blob[KEY_RANGE].copy_from_slice(&key.to_be_bytes()); - blob[VALUE_RANGE].copy_from_slice(&value.to_be_bytes()); - } - } - - blob - } - - // pub fn to_dot(&self, index: TreeIndex) -> DotLines { - // // TODO: can this be done without introducing a blank line? - // let node_to_parent = match self.parent { - // Some(parent) => format!("node_{index} -> node_{parent};"), - // None => String::new(), - // }; - // - // match self.specific { - // NodeSpecific::Internal {left, right} => DotLines{ - // nodes: vec![ - // format!("node_{index} [label=\"{index}\"]"), - // ], - // connections: vec![ - // format!("node_{index} -> node_{left};"), - // format!("node_{index} -> node_{right};"), - // node_to_parent, - // ], - // pair_boxes: vec![ - // format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), - // ], - // note: String::new(), - // }, - // NodeSpecific::Leaf {key, value} => DotLines{ - // nodes: vec![ - // format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), - // ], - // connections: vec![node_to_parent], - // pair_boxes: vec![], - // note: String::new(), - // }, - // } - // } -} - -fn block_range(index: TreeIndex) -> Range { - let block_start = index as usize * BLOCK_SIZE; - block_start..block_start + BLOCK_SIZE -} - -// TODO: does not enforce matching metadata node type and node enumeration type -pub struct Block { - metadata: NodeMetadata, - node: Node, -} - -impl Block { - pub fn to_bytes(&self) -> BlockBytes { - let mut blob: BlockBytes = [0; BLOCK_SIZE]; - blob[METADATA_RANGE].copy_from_slice(&self.metadata.to_bytes()); - blob[DATA_RANGE].copy_from_slice(&self.node.to_bytes()); - - blob - } - - pub fn from_bytes(blob: BlockBytes) -> Result { - let metadata_blob: MetadataBytes = blob[METADATA_RANGE].try_into().unwrap(); - let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); - let metadata = NodeMetadata::from_bytes(metadata_blob) - .map_err(|message| format!("failed loading metadata: {message})"))?; - let node = Node::from_bytes(&metadata, data_blob) - .map_err(|message| format!("failed loading node: {message})"))?; - - Ok(Block { metadata, node }) - } -} - -fn get_free_indexes_and_keys_values_indexes( - blob: &[u8], -) -> (HashSet, HashMap) { - let index_count = blob.len() / BLOCK_SIZE; - - let mut seen_indexes: Vec = vec![false; index_count]; - let mut key_to_index: HashMap = HashMap::default(); - - for (index, block) in MerkleBlobLeftChildFirstIterator::new(blob) { - seen_indexes[index as usize] = true; - - if let NodeSpecific::Leaf { key, .. } = block.node.specific { - key_to_index.insert(key, index); - } - } - - let mut free_indexes: HashSet = HashSet::new(); - for (index, seen) in seen_indexes.iter().enumerate() { - if !seen { - free_indexes.insert(index as TreeIndex); - } - } - - (free_indexes, key_to_index) -} - -#[cfg_attr(feature = "py-bindings", pyclass(name = "MerkleBlob"))] -#[derive(Debug)] -pub struct MerkleBlob { - blob: Vec, - // TODO: should this be a set for fast lookups? - free_indexes: HashSet, - key_to_index: HashMap, -} - -impl MerkleBlob { - pub fn new(blob: Vec) -> Result { - let length = blob.len(); - let remainder = length % BLOCK_SIZE; - if remainder != 0 { - return Err(format!( - "blob length must be a multiple of block count, found extra bytes: {remainder}" - )); - } - - let (free_indexes, key_to_index) = get_free_indexes_and_keys_values_indexes(&blob); - - Ok(Self { - blob, - free_indexes, - key_to_index, - }) - } - - fn clear(&mut self) { - self.blob.clear(); - self.key_to_index.clear(); - self.free_indexes.clear(); - } - - pub fn insert( - &mut self, - key: KvId, - value: KvId, - hash: &Hash, - insert_location: InsertLocation, - ) -> Result<(), String> { - let insert_location = match insert_location { - InsertLocation::Auto => self.get_random_insert_location_by_kvid(key)?, - _ => insert_location, - }; - - match insert_location { - InsertLocation::Auto => { - panic!("this should have been caught and processed above") - } - InsertLocation::AsRoot => { - if !self.key_to_index.is_empty() { - return Err("requested insertion at root but tree not empty".to_string()); - }; - self.insert_first(key, value, hash); - } - InsertLocation::Leaf { index, side } => { - let old_leaf = self.get_node(index)?; - let NodeSpecific::Leaf { .. } = old_leaf.specific else { - panic!("requested insertion at leaf but found internal node") - }; - - let internal_node_hash = match side { - Side::Left => internal_hash(hash, &old_leaf.hash), - Side::Right => internal_hash(&old_leaf.hash, hash), - }; - - if self.key_to_index.len() == 1 { - self.insert_second(key, value, hash, &old_leaf, &internal_node_hash, &side)?; - } else { - self.insert_third_or_later( - key, - value, - hash, - &old_leaf, - index, - &internal_node_hash, - &side, - )?; - } - } - } - - Ok(()) - } - - fn insert_first(&mut self, key: KvId, value: KvId, hash: &Hash) { - let new_leaf_block = Block { - metadata: NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - }, - node: Node { - parent: None, - specific: NodeSpecific::Leaf { key, value }, - hash: *hash, - }, - }; - - self.clear(); - // TODO: unwrap, ack, review - self.insert_entry_to_blob(self.extend_index(), &new_leaf_block) - .unwrap(); - } - - fn insert_second( - &mut self, - key: KvId, - value: KvId, - hash: &Hash, - old_leaf: &Node, - internal_node_hash: &Hash, - side: &Side, - ) -> Result<(), String> { - self.clear(); - // TODO: just handling the nodes below being out of order. this all still smells a bit - self.blob.resize(BLOCK_SIZE * 3, 0); - - let new_internal_block = Block { - metadata: NodeMetadata { - node_type: NodeType::Internal, - dirty: false, - }, - node: Node { - parent: None, - specific: NodeSpecific::Internal { left: 1, right: 2 }, - hash: *internal_node_hash, - }, - }; - - self.insert_entry_to_blob(0, &new_internal_block)?; - - let NodeSpecific::Leaf { - key: old_leaf_key, - value: old_leaf_value, - } = old_leaf.specific - else { - return Err("old leaf unexpectedly not a leaf".to_string()); - }; - let nodes = [ - ( - match side { - Side::Left => 2, - Side::Right => 1, - }, - Node { - parent: Some(0), - specific: NodeSpecific::Leaf { - key: old_leaf_key, - value: old_leaf_value, - }, - hash: old_leaf.hash, - }, - ), - ( - match side { - Side::Left => 1, - Side::Right => 2, - }, - Node { - parent: Some(0), - specific: NodeSpecific::Leaf { key, value }, - hash: *hash, - }, - ), - ]; - - for (index, node) in nodes { - let block = Block { - metadata: NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - }, - node, - }; - - self.insert_entry_to_blob(index, &block)?; - } - - Ok(()) - } - - // TODO: no really, actually consider the too many arguments complaint - #[allow(clippy::too_many_arguments)] - fn insert_third_or_later( - &mut self, - key: KvId, - value: KvId, - hash: &Hash, - old_leaf: &Node, - old_leaf_index: TreeIndex, - internal_node_hash: &Hash, - side: &Side, - ) -> Result<(), String> { - let new_leaf_index = self.get_new_index(); - let new_internal_node_index = self.get_new_index(); - - let new_leaf_block = Block { - metadata: NodeMetadata { - node_type: NodeType::Leaf, - dirty: false, - }, - node: Node { - parent: Some(new_internal_node_index), - specific: NodeSpecific::Leaf { key, value }, - hash: *hash, - }, - }; - self.insert_entry_to_blob(new_leaf_index, &new_leaf_block)?; - - let (left_index, right_index) = match side { - Side::Left => (new_leaf_index, old_leaf_index), - Side::Right => (old_leaf_index, new_leaf_index), - }; - let new_internal_block = Block { - metadata: NodeMetadata { - node_type: NodeType::Internal, - dirty: false, - }, - node: Node { - parent: old_leaf.parent, - specific: NodeSpecific::Internal { - left: left_index, - right: right_index, - }, - hash: *internal_node_hash, - }, - }; - self.insert_entry_to_blob(new_internal_node_index, &new_internal_block)?; - - let Some(old_parent_index) = old_leaf.parent else { - panic!("root found when not expected: {key:?} {value:?} {hash:?}") - }; - - let mut block = Block::from_bytes(self.get_block_bytes(old_leaf_index)?)?; - block.node.parent = Some(new_internal_node_index); - self.insert_entry_to_blob(old_leaf_index, &block)?; - - let mut old_parent_block = Block::from_bytes(self.get_block_bytes(old_parent_index)?)?; - if let NodeSpecific::Internal { - ref mut left, - ref mut right, - .. - } = old_parent_block.node.specific - { - if old_leaf_index == *left { - *left = new_internal_node_index; - } else if old_leaf_index == *right { - *right = new_internal_node_index; - } else { - panic!("child not a child of its parent"); - } - } else { - panic!("expected internal node but found leaf"); - }; - - self.insert_entry_to_blob(old_parent_index, &old_parent_block)?; - - self.mark_lineage_as_dirty(old_parent_index)?; - - Ok(()) - } - - pub fn delete(&mut self, key: KvId) -> Result<(), String> { - let leaf_index = *self - .key_to_index - .get(&key) - .ok_or(format!("unknown key: {key}"))?; - let leaf = self.get_node(leaf_index)?; - - // TODO: maybe some common way to indicate/perform sanity double checks? - let NodeSpecific::Leaf { .. } = leaf.specific else { - panic!("key to index cache resulted in internal node") - }; - self.key_to_index.remove(&key); - - let Some(parent_index) = leaf.parent else { - self.clear(); - return Ok(()); - }; - - self.free_indexes.insert(leaf_index); - let parent = self.get_node(parent_index)?; - // TODO: kinda implicit that we 'check' that parent is internal inside .sibling_index() - let sibling_index = parent.specific.sibling_index(leaf_index); - let mut sibling_block = self.get_block(sibling_index)?; - - let Some(grandparent_index) = parent.parent else { - sibling_block.node.parent = None; - self.insert_entry_to_blob(0, &sibling_block)?; - - if let NodeSpecific::Internal { left, right } = sibling_block.node.specific { - for child_index in [left, right] { - let mut block = self.get_block(child_index)?; - block.node.parent = Some(0); - self.insert_entry_to_blob(child_index, &block)?; - } - }; - - self.free_indexes.insert(sibling_index); - - return Ok(()); - }; - - self.free_indexes.insert(parent_index); - let mut grandparent_block = self.get_block(grandparent_index)?; - - sibling_block.node.parent = Some(grandparent_index); - self.insert_entry_to_blob(sibling_index, &sibling_block)?; - - if let NodeSpecific::Internal { - ref mut left, - ref mut right, - .. - } = grandparent_block.node.specific - { - match parent_index { - x if x == *left => *left = sibling_index, - x if x == *right => *right = sibling_index, - _ => panic!("parent not a child a grandparent"), - } - } else { - panic!("grandparent not an internal node") - } - self.insert_entry_to_blob(grandparent_index, &grandparent_block)?; - - self.mark_lineage_as_dirty(grandparent_index)?; - - Ok(()) - } - - pub fn upsert(&mut self, key: KvId, value: KvId, new_hash: &Hash) -> Result<(), String> { - let Some(leaf_index) = self.key_to_index.get(&key) else { - self.insert(key, value, new_hash, InsertLocation::Auto)?; - return Ok(()); - }; - - let mut block = self.get_block(*leaf_index)?; - if let NodeSpecific::Leaf { - value: ref mut inplace_value, - .. - } = block.node.specific - { - block.node.hash.clone_from(new_hash); - *inplace_value = value; - } else { - panic!("expected internal node but found leaf"); - } - self.insert_entry_to_blob(*leaf_index, &block)?; - - if let Some(parent) = block.node.parent { - self.mark_lineage_as_dirty(parent)?; - } - - Ok(()) - } - - pub fn check(&self) -> Result<(), String> { - let mut leaf_count: usize = 0; - let mut internal_count: usize = 0; - - for (index, block) in self { - match block.node.specific { - NodeSpecific::Internal { .. } => internal_count += 1, - NodeSpecific::Leaf { key, .. } => { - leaf_count += 1; - let cached_index = self - .key_to_index - .get(&key) - .ok_or(format!("key not in key to index cache: {key:?}"))?; - assert_eq!( - *cached_index, index, - "key to index cache for {key:?} should be {index:?} got: {cached_index:?}" - ); - // TODO: consider what type free indexes should be - assert!( - !self.free_indexes.contains(&index), - "{}", - format!("active index found in free index list: {index:?}") - ); - } - } - } - - let key_to_index_cache_length = self.key_to_index.len(); - assert_eq!(leaf_count, key_to_index_cache_length, "found {leaf_count:?} leaves but key to index cache length is: {key_to_index_cache_length:?}"); - let total_count = leaf_count + internal_count + self.free_indexes.len(); - let extend_index = self.extend_index(); - assert_eq!( - total_count, extend_index as usize, - "expected total node count {extend_index:?} found: {total_count:?}", - ); - - Ok(()) - // TODO: check parent/child bidirectional accuracy - } - - // fn update_parent(&mut self, index: TreeIndex, parent: Option) -> Result<(), String> { - // let range = self.get_block_range(index); - // - // let mut node = self.get_node(index)?; - // node.parent = parent; - // self.blob[range].copy_from_slice(&node.to_bytes()); - // - // Ok(()) - // } - - // fn update_left(&mut self, index: TreeIndex, left: Option) -> Result<(), String> { - // let range = self.get_block_range(index); - // - // let mut node = self.get_node(index)?; - // node.left = left; - // self.blob[range].copy_from_slice(&node.to_bytes()); - // - // Ok(()) - // } - - fn mark_lineage_as_dirty(&mut self, index: TreeIndex) -> Result<(), String> { - let mut next_index = Some(index); - - while let Some(this_index) = next_index { - let mut block = Block::from_bytes(self.get_block_bytes(this_index)?)?; - - if block.metadata.dirty { - return Ok(()); - } - - block.metadata.dirty = true; - self.insert_entry_to_blob(this_index, &block)?; - next_index = block.node.parent; - } - - Ok(()) - } - - fn get_new_index(&mut self) -> TreeIndex { - match self.free_indexes.iter().next().copied() { - None => { - // TODO: should this extend...? - // TODO: should this update free indexes...? - let index = self.extend_index(); - self.blob.extend_from_slice(&[0; BLOCK_SIZE]); - index - } - Some(new_index) => { - self.free_indexes.remove(&new_index); - new_index - } - } - } - - fn get_random_insert_location_by_seed( - &self, - seed_bytes: &[u8], - ) -> Result { - let mut seed_bytes = Vec::from(seed_bytes); - - if self.blob.is_empty() { - return Ok(InsertLocation::AsRoot); - } - - let side = if (seed_bytes - .last() - .ok_or("zero-length seed bytes not allowed")? - & 1 << 7) - == 0 - { - Side::Left - } else { - Side::Right - }; - let mut next_index: TreeIndex = 0; - let mut node = self.get_node(next_index)?; - - loop { - for byte in &seed_bytes { - for bit in 0..8 { - match node.specific { - NodeSpecific::Leaf { .. } => { - return Ok(InsertLocation::Leaf { - index: next_index, - side, - }) - } - NodeSpecific::Internal { left, right, .. } => { - next_index = if byte & (1 << bit) != 0 { left } else { right }; - node = self.get_node(next_index)?; - } - } - } - } - - seed_bytes = sha256_bytes(&seed_bytes).into(); - } - } - - fn get_random_insert_location_by_kvid(&self, seed: KvId) -> Result { - let seed = sha256_num(seed); - - self.get_random_insert_location_by_seed(&seed) - } - - fn extend_index(&self) -> TreeIndex { - let blob_length = self.blob.len(); - let index: TreeIndex = (blob_length / BLOCK_SIZE) as TreeIndex; - let remainder = blob_length % BLOCK_SIZE; - assert_eq!(remainder, 0, "blob length {blob_length:?} not a multiple of {BLOCK_SIZE:?}, remainder: {remainder:?}"); - - index - } - - fn insert_entry_to_blob(&mut self, index: TreeIndex, block: &Block) -> Result<(), String> { - let new_block_bytes = block.to_bytes(); - let extend_index = self.extend_index(); - match index.cmp(&extend_index) { - Ordering::Greater => return Err(format!("block index out of range: {index}")), - Ordering::Equal => self.blob.extend_from_slice(&new_block_bytes), - Ordering::Less => { - // TODO: lots of deserialization here for just the key - let old_block = self.get_block(index)?; - if !self.free_indexes.contains(&index) - && old_block.metadata.node_type == NodeType::Leaf - { - // TODO: sort of repeating the leaf check above and below. smells a little - if let NodeSpecific::Leaf { - key: old_block_key, .. - } = old_block.node.specific - { - self.key_to_index.remove(&old_block_key); - }; - }; - self.blob[block_range(index)].copy_from_slice(&new_block_bytes); - } - } - - if let NodeSpecific::Leaf { key, .. } = block.node.specific { - self.key_to_index.insert(key, index); - }; - - self.free_indexes.take(&index); - - Ok(()) - } - - fn get_block(&self, index: TreeIndex) -> Result { - Block::from_bytes(self.get_block_bytes(index)?) - } - - // fn get_block_slice(&self, index: TreeIndex) -> Result<&mut BlockBytes, String> { - // let metadata_start = index as usize * BLOCK_SIZE; - // let data_start = metadata_start + METADATA_SIZE; - // let end = data_start + DATA_SIZE; - // - // self.blob - // .get(metadata_start..end) - // .ok_or(format!("index out of bounds: {index}"))? - // .try_into() - // .map_err(|e| format!("failed getting block {index}: {e}")) - // } - - fn get_block_bytes(&self, index: TreeIndex) -> Result { - self.blob - .get(block_range(index)) - .ok_or(format!("block index out of bounds: {index}"))? - .try_into() - .map_err(|e| format!("failed getting block {index}: {e}")) - } - - pub fn get_node(&self, index: TreeIndex) -> Result { - // TODO: use Block::from_bytes() - // TODO: handle invalid indexes? - // TODO: handle overflows? - let block = self.get_block_bytes(index)?; - let metadata_blob: MetadataBytes = block[METADATA_RANGE].try_into().unwrap(); - let data_blob: DataBytes = block[DATA_RANGE].try_into().unwrap(); - let metadata = NodeMetadata::from_bytes(metadata_blob) - .map_err(|message| format!("failed loading metadata: {message})"))?; - - Node::from_bytes(&metadata, data_blob) - .map_err(|message| format!("failed loading node: {message}")) - } - - pub fn get_parent_index(&self, index: TreeIndex) -> Result { - let block = self.get_block_bytes(index)?; - - Ok(Node::parent_from_bytes( - block[DATA_RANGE].try_into().unwrap(), - )) - } - - pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { - // TODO: what about an index that happens to be the null index? a question for everywhere i guess - let mut next_index = Some(index); - let mut lineage = vec![]; - - while let Some(this_index) = next_index { - let node = self.get_node(this_index)?; - next_index = node.parent; - lineage.push(node); - } - - Ok(lineage) - } - - pub fn get_lineage_indexes(&self, index: TreeIndex) -> Result, String> { - // TODO: yep, this 'optimization' might be overkill, and should be speed compared regardless - // TODO: what about an index that happens to be the null index? a question for everywhere i guess - let mut next_index = Some(index); - let mut lineage: Vec = vec![]; - - while let Some(this_index) = next_index { - lineage.push(this_index); - next_index = self.get_parent_index(this_index)?; - } - - Ok(lineage) - } - - pub fn iter(&self) -> MerkleBlobLeftChildFirstIterator<'_> { - <&Self as IntoIterator>::into_iter(self) - } - - pub fn calculate_lazy_hashes(&mut self) -> Result<(), String> { - // TODO: really want a truncated traversal, not filter - // TODO: yeah, storing the whole set of blocks via collect is not great - for (index, mut block) in self - .iter() - .filter(|(_, block)| block.metadata.dirty) - .collect::>() - { - let NodeSpecific::Internal { left, right } = block.node.specific else { - panic!("leaves should not be dirty") - }; - // TODO: obviously inefficient to re-get/deserialize these blocks inside - // an iteration that's already doing that - let left = self.get_block(left)?; - let right = self.get_block(right)?; - // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' - block.node.hash = internal_hash(&left.node.hash, &right.node.hash); - block.metadata.dirty = false; - self.insert_entry_to_blob(index, &block)?; - } - - Ok(()) - } - - #[allow(unused)] - fn relocate_node(&mut self, source: TreeIndex, destination: TreeIndex) -> Result<(), String> { - let extend_index = self.extend_index(); - // TODO: perhaps relocation of root should be allowed for some use - if source == 0 { - return Err("relocation of the root and index zero is not allowed".to_string()); - }; - assert!(source < extend_index); - assert!(!self.free_indexes.contains(&source)); - assert!(destination <= extend_index); - assert!(destination == extend_index || self.free_indexes.contains(&destination)); - - let source_block = self.get_block(source).unwrap(); - if let Some(parent) = source_block.node.parent { - let mut parent_block = self.get_block(parent).unwrap(); - let NodeSpecific::Internal { - ref mut left, - ref mut right, - } = parent_block.node.specific - else { - panic!(); - }; - match source { - x if x == *left => *left = destination, - x if x == *right => *right = destination, - _ => panic!(), - } - self.insert_entry_to_blob(parent, &parent_block).unwrap(); - } - - if let NodeSpecific::Internal { left, right, .. } = source_block.node.specific { - for child in [left, right] { - let mut block = self.get_block(child).unwrap(); - block.node.parent = Some(destination); - self.insert_entry_to_blob(child, &block).unwrap(); - } - } - - self.free_indexes.insert(source); - - Ok(()) - } - - #[allow(unused)] - fn rebuild(&mut self) -> Result<(), String> { - panic!(); - // TODO: could make insert_entry_to_blob a free function and not need to make - // a merkle blob here? maybe? - let mut new = Self::new(Vec::new())?; - for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob).enumerate() { - // new.insert_entry_to_blob(index, )? - } - self.blob = new.blob; - - Ok(()) - } - - #[allow(unused)] - fn get_key_value_map(&self) -> HashMap { - let mut key_value = HashMap::new(); - for (key, index) in &self.key_to_index { - let NodeSpecific::Leaf { value, .. } = self.get_node(*index).unwrap().specific else { - panic!() - }; - key_value.insert(*key, value); - } - - key_value - } -} - -impl PartialEq for MerkleBlob { - fn eq(&self, other: &Self) -> bool { - // TODO: should we check the indexes? - for ((_, self_block), (_, other_block)) in zip(self, other) { - if (self_block.metadata.dirty || other_block.metadata.dirty) - || self_block.node.hash != other_block.node.hash - // TODO: isn't only a leaf supposed to check this? - || self_block.node.specific != other_block.node.specific - { - return false; - } - } - - true - } -} - -impl<'a> IntoIterator for &'a MerkleBlob { - // TODO: review efficiency in whatever use cases we end up with, vs Item = Node etc - type Item = (TreeIndex, Block); - type IntoIter = MerkleBlobLeftChildFirstIterator<'a>; - - fn into_iter(self) -> Self::IntoIter { - // TODO: review types around this to avoid copying - MerkleBlobLeftChildFirstIterator::new(&self.blob[..]) - } -} - -#[cfg(feature = "py-bindings")] -#[pymethods] -impl MerkleBlob { - #[allow(clippy::needless_pass_by_value)] - #[new] - pub fn py_init(blob: PyBuffer) -> PyResult { - assert!( - blob.is_c_contiguous(), - "from_bytes() must be called with a contiguous buffer" - ); - #[allow(unsafe_code)] - let slice = - unsafe { std::slice::from_raw_parts(blob.buf_ptr() as *const u8, blob.len_bytes()) }; - - Ok(Self::new(Vec::from(slice)).unwrap()) - } - - #[pyo3(name = "insert")] - pub fn py_insert(&mut self, key: KvId, value: KvId, hash: Hash) -> PyResult<()> { - // TODO: consider the error - // TODO: expose insert location - self.insert(key, value, &hash, InsertLocation::Auto) - .unwrap(); - - Ok(()) - } - - #[pyo3(name = "delete")] - pub fn py_delete(&mut self, key: KvId) -> PyResult<()> { - // TODO: consider the error - self.delete(key).unwrap(); - - Ok(()) - } - - #[pyo3(name = "__len__")] - pub fn py_len(&self) -> PyResult { - Ok(self.blob.len()) - } -} - -struct MerkleBlobLeftChildFirstIteratorItem { - visited: bool, - index: TreeIndex, -} - -pub struct MerkleBlobLeftChildFirstIterator<'a> { - blob: &'a [u8], - deque: VecDeque, -} - -impl<'a> MerkleBlobLeftChildFirstIterator<'a> { - fn new(blob: &'a [u8]) -> Self { - let mut deque = VecDeque::new(); - if blob.len() / BLOCK_SIZE > 0 { - deque.push_back(MerkleBlobLeftChildFirstIteratorItem { - visited: false, - index: 0, - }); - } - - Self { blob, deque } - } -} - -impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { - type Item = (TreeIndex, Block); - - fn next(&mut self) -> Option { - // left sibling first, children before parents - - loop { - let item = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[block_range(item.index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes).unwrap(); - - match block.node.specific { - NodeSpecific::Leaf { .. } => return Some((item.index, block)), - NodeSpecific::Internal { left, right } => { - if item.visited { - return Some((item.index, block)); - }; - - self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { - visited: true, - index: item.index, - }); - self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { - visited: false, - index: right, - }); - self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { - visited: false, - index: left, - }); - } - } - } - } -} - -pub struct MerkleBlobParentFirstIterator<'a> { - blob: &'a [u8], - deque: VecDeque, -} - -impl<'a> MerkleBlobParentFirstIterator<'a> { - fn new(blob: &'a [u8]) -> Self { - let mut deque = VecDeque::new(); - if blob.len() / BLOCK_SIZE > 0 { - deque.push_back(0); - } - - Self { blob, deque } - } -} - -impl Iterator for MerkleBlobParentFirstIterator<'_> { - type Item = Block; - - fn next(&mut self) -> Option { - // left sibling first, parents before children - - loop { - let index = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes).unwrap(); - - match block.node.specific { - NodeSpecific::Leaf { .. } => return Some(block), - NodeSpecific::Internal { left, right } => { - self.deque.push_front(right); - self.deque.push_front(left); - } - } - } - } -} - -pub struct MerkleBlobBreadthFirstIterator<'a> { - blob: &'a [u8], - deque: VecDeque, -} - -impl<'a> MerkleBlobBreadthFirstIterator<'a> { - #[allow(unused)] - fn new(blob: &'a [u8]) -> Self { - let mut deque = VecDeque::new(); - if blob.len() / BLOCK_SIZE > 0 { - deque.push_back(0); - } - - Self { blob, deque } - } -} - -impl Iterator for MerkleBlobBreadthFirstIterator<'_> { - type Item = Block; - - fn next(&mut self) -> Option { - // left sibling first, parent depth before child depth - - loop { - let index = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes).unwrap(); - - match block.node.specific { - NodeSpecific::Leaf { .. } => return Some(block), - NodeSpecific::Internal { left, right } => { - self.deque.push_back(left); - self.deque.push_back(right); - } - } - } - } -} - -#[cfg(test)] -mod dot; -#[cfg(test)] -mod tests { - use super::*; - // use hex_literal::hex; - // use dot::open_dot; - use rstest::{fixture, rstest}; - use std::time::{Duration, Instant}; - - // const EXAMPLE_BLOB: [u8; 138] = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); - // const HASH: Hash = [ - // 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - // 35, 36, 37, 38, 39, 40, 41, 42, 43, - // ]; - // - // const EXAMPLE_ROOT: Node = Node { - // parent: None, - // specific: NodeSpecific::Internal { left: 1, right: 2 }, - // hash: HASH, - // index: 0, - // }; - // const EXAMPLE_ROOT_METADATA: NodeMetadata = NodeMetadata { - // node_type: NodeType::Internal, - // dirty: true, - // }; - // const EXAMPLE_LEFT_LEAF: Node = Node { - // parent: Some(0), - // specific: NodeSpecific::Leaf { - // key: 0x0405_0607_0809_0A0B, - // value: 0x1415_1617_1819_1A1B, - // }, - // hash: HASH, - // index: 1, - // }; - // const EXAMPLE_LEFT_LEAF_METADATA: NodeMetadata = NodeMetadata { - // node_type: NodeType::Leaf, - // dirty: false, - // }; - // const EXAMPLE_RIGHT_LEAF: Node = Node { - // parent: Some(0), - // specific: NodeSpecific::Leaf { - // key: 0x2425_2627_2829_2A2B, - // value: 0x3435_3637_3839_3A3B, - // }, - // hash: HASH, - // index: 2, - // }; - // const EXAMPLE_RIGHT_LEAF_METADATA: NodeMetadata = NodeMetadata { - // node_type: NodeType::Leaf, - // dirty: false, - // }; - - // fn example_merkle_blob() -> MerkleBlob { - // MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap() - // } - - #[allow(unused)] - fn normalized_blob(merkle_blob: &MerkleBlob) -> Vec { - let mut new = MerkleBlob::new(merkle_blob.blob.clone()).unwrap(); - - new.calculate_lazy_hashes(); - new.rebuild(); - - new.blob - } - - #[test] - fn test_node_type_serialized_values() { - // TODO: can i make sure we cover all variants? - assert_eq!(NodeType::Internal as u8, 0); - assert_eq!(NodeType::Leaf as u8, 1); - - for node_type in [NodeType::Internal, NodeType::Leaf] { - assert_eq!(node_type.to_u8(), node_type.clone() as u8,); - assert_eq!( - NodeType::from_u8(node_type.clone() as u8).unwrap(), - node_type, - ); - } - } - - #[test] - fn test_internal_hash() { - // TODO: yeah, various questions around this and how to express 'this is dl internal hash' - // without silly repetition. maybe just a use as. - // in Python: Program.to((left_hash, right_hash)).get_tree_hash_precalc(left_hash, right_hash) - let left: Hash = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - ]; - let right: Hash = [ - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - ]; - assert_eq!( - internal_hash(&left, &right), - clvm_utils::tree_hash_pair( - clvm_utils::TreeHash::new(left), - clvm_utils::TreeHash::new(right) - ) - .to_bytes(), - ); - } - - #[rstest] - fn test_node_metadata_from_to( - #[values(false, true)] dirty: bool, - // TODO: can we make sure we cover all variants - #[values(NodeType::Internal, NodeType::Leaf)] node_type: NodeType, - ) { - let bytes: [u8; 2] = [node_type.to_u8(), dirty as u8]; - let object = NodeMetadata::from_bytes(bytes).unwrap(); - assert_eq!(object, NodeMetadata { node_type, dirty },); - assert_eq!(object.to_bytes(), bytes); - assert_eq!( - NodeMetadata::node_type_from_bytes(bytes).unwrap(), - object.node_type - ); - assert_eq!(NodeMetadata::dirty_from_bytes(bytes).unwrap(), object.dirty); - } - - // #[test] - // fn test_load_a_python_dump() { - // let merkle_blob = example_merkle_blob(); - // merkle_blob.get_node(0).unwrap(); - // - // merkle_blob.check().unwrap(); - // } - - #[fixture] - fn small_blob() -> MerkleBlob { - let mut blob = MerkleBlob::new(vec![]).unwrap(); - - blob.insert( - 0x0001_0203_0405_0607, - 0x1011_1213_1415_1617, - &sha256_num(0x1020), - InsertLocation::Auto, - ) - .unwrap(); - - blob.insert( - 0x2021_2223_2425_2627, - 0x3031_3233_3435_3637, - &sha256_num(0x2030), - InsertLocation::Auto, - ) - .unwrap(); - - blob - } - - #[rstest] - fn test_get_lineage(small_blob: MerkleBlob) { - let lineage = small_blob.get_lineage(2).unwrap(); - for node in &lineage { - println!("{node:?}"); - } - assert_eq!(lineage.len(), 2); - let last_node = lineage.last().unwrap(); - assert_eq!(last_node.parent, None); - - small_blob.check().unwrap(); - } - - #[rstest] - #[case::right(0, 2, Side::Left)] - #[case::left(0xff, 1, Side::Right)] - fn test_get_random_insert_location_by_seed( - #[case] seed: u8, - #[case] expected_index: TreeIndex, - #[case] expected_side: Side, - small_blob: MerkleBlob, - ) { - let location = small_blob - .get_random_insert_location_by_seed(&[seed; 32]) - .unwrap(); - - assert_eq!( - location, - InsertLocation::Leaf { - index: expected_index, - side: expected_side - }, - ); - - small_blob.check().unwrap(); - } - - // #[test] - // fn test_build_blob_and_read() { - // let mut blob: Vec = Vec::new(); - // - // blob.extend(EXAMPLE_ROOT_METADATA.to_bytes()); - // blob.extend(EXAMPLE_ROOT.to_bytes()); - // blob.extend(EXAMPLE_LEFT_LEAF_METADATA.to_bytes()); - // blob.extend(EXAMPLE_LEFT_LEAF.to_bytes()); - // blob.extend(EXAMPLE_RIGHT_LEAF_METADATA.to_bytes()); - // blob.extend(EXAMPLE_RIGHT_LEAF.to_bytes()); - // - // assert_eq!(blob, Vec::from(EXAMPLE_BLOB)); - // - // let merkle_blob = MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap(); - // - // assert_eq!(merkle_blob.get_node(0).unwrap(), EXAMPLE_ROOT); - // assert_eq!(merkle_blob.get_node(1).unwrap(), EXAMPLE_LEFT_LEAF); - // assert_eq!(merkle_blob.get_node(2).unwrap(), EXAMPLE_RIGHT_LEAF); - // - // merkle_blob.check().unwrap(); - // } - - // #[test] - // fn test_build_merkle() { - // let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - // - // let (key, value) = EXAMPLE_LEFT_LEAF.key_value(); - // merkle_blob - // .insert(key, value, &EXAMPLE_LEFT_LEAF.hash) - // .unwrap(); - // let (key, value) = EXAMPLE_RIGHT_LEAF.key_value(); - // merkle_blob - // .insert(key, value, &EXAMPLE_RIGHT_LEAF.hash) - // .unwrap(); - // - // // TODO: just hacking here to compare with the ~wrong~ simplified reference - // let mut root = Block::from_bytes(merkle_blob.get_block_bytes(0).unwrap(), 0).unwrap(); - // root.metadata.dirty = true; - // root.node.hash = HASH; - // assert_eq!(root.metadata.node_type, NodeType::Internal); - // merkle_blob - // .insert_entry_to_blob(0, root.to_bytes()) - // .unwrap(); - // - // assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); - // - // merkle_blob.check().unwrap(); - // } - - #[test] - fn test_just_insert_a_bunch() { - let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - - let mut total_time = Duration::new(0, 0); - - for i in 0..100_000 { - let start = Instant::now(); - merkle_blob - // TODO: yeah this hash is garbage - .insert(i, i, &sha256_num(i), InsertLocation::Auto) - .unwrap(); - let end = Instant::now(); - total_time += end.duration_since(start); - - // match i + 1 { - // 2 => assert_eq!(merkle_blob.blob.len(), 3 * BLOCK_SIZE), - // 3 => assert_eq!(merkle_blob.blob.len(), 5 * BLOCK_SIZE), - // _ => (), - // } - - // let file = fs::File::create(format!("/home/altendky/tmp/mbt/rs/{i:0>4}")).unwrap(); - // let mut file = io::LineWriter::new(file); - // for block in merkle_blob.blob.chunks(BLOCK_SIZE) { - // let mut s = String::new(); - // for byte in block { - // s.push_str(&format!("{:02x}", byte)); - // } - // s.push_str("\n"); - // file.write_all(s.as_bytes()).unwrap(); - // } - - // fs::write(format!("/home/altendky/tmp/mbt/rs/{i:0>4}"), &merkle_blob.blob).unwrap(); - } - // println!("{:?}", merkle_blob.blob) - - println!("total time: {total_time:?}"); - // TODO: check, well... something - - merkle_blob.calculate_lazy_hashes().unwrap(); - - merkle_blob.check().unwrap(); - } - - #[test] - fn test_delete_in_reverse_creates_matching_trees() { - const COUNT: usize = 10; - let mut dots = vec![]; - - let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - let mut reference_blobs = vec![]; - - let key_value_ids: [KvId; COUNT] = core::array::from_fn(|i| i as KvId); - - for key_value_id in key_value_ids { - let hash: Hash = sha256_num(key_value_id); - - println!("inserting: {key_value_id}"); - merkle_blob.calculate_lazy_hashes().unwrap(); - reference_blobs.push(MerkleBlob::new(merkle_blob.blob.clone()).unwrap()); - merkle_blob - .insert(key_value_id, key_value_id, &hash, InsertLocation::Auto) - .unwrap(); - dots.push(merkle_blob.to_dot().dump()); - } - - merkle_blob.check().unwrap(); - - for key_value_id in key_value_ids.iter().rev() { - println!("deleting: {key_value_id}"); - merkle_blob.delete(*key_value_id).unwrap(); - merkle_blob.calculate_lazy_hashes().unwrap(); - assert_eq!(merkle_blob, reference_blobs[*key_value_id as usize]); - dots.push(merkle_blob.to_dot().dump()); - } - - merkle_blob.check().unwrap(); - } - - #[test] - fn test_insert_first() { - let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - - let key_value_id: KvId = 1; - // open_dot(&mut merkle_blob.to_dot().set_note("empty")); - merkle_blob - .insert( - key_value_id, - key_value_id, - &sha256_num(key_value_id), - InsertLocation::Auto, - ) - .unwrap(); - // open_dot(&mut merkle_blob.to_dot().set_note("first after")); - - merkle_blob.check().unwrap(); - assert_eq!(merkle_blob.key_to_index.len(), 1); - } - - #[rstest] - fn test_insert_choosing_side( - #[values(Side::Left, Side::Right)] side: Side, - #[values(1, 2)] pre_count: usize, - ) { - let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - - let mut last_key: KvId = 0; - for i in 1..=pre_count { - let key: KvId = i as KvId; - // open_dot(&mut merkle_blob.to_dot().set_note("empty")); - merkle_blob - .insert(key, key, &sha256_num(key), InsertLocation::Auto) - .unwrap(); - last_key = key; - } - - let key_value_id: KvId = pre_count as KvId + 1; - // open_dot(&mut merkle_blob.to_dot().set_note("first after")); - merkle_blob - .insert( - key_value_id, - key_value_id, - &sha256_num(key_value_id), - InsertLocation::Leaf { - index: merkle_blob.key_to_index[&last_key], - side: side.clone(), - }, - ) - .unwrap(); - // open_dot(&mut merkle_blob.to_dot().set_note("first after")); - - let sibling = merkle_blob - .get_node(merkle_blob.key_to_index[&last_key]) - .unwrap(); - let parent = merkle_blob.get_node(sibling.parent.unwrap()).unwrap(); - let NodeSpecific::Internal { left, right } = parent.specific else { - panic!() - }; - - let NodeSpecific::Leaf { key: left_key, .. } = merkle_blob.get_node(left).unwrap().specific - else { - panic!() - }; - let NodeSpecific::Leaf { key: right_key, .. } = - merkle_blob.get_node(right).unwrap().specific - else { - panic!() - }; - - let expected_keys: [KvId; 2] = match side { - Side::Left => [pre_count as KvId + 1, pre_count as KvId], - Side::Right => [pre_count as KvId, pre_count as KvId + 1], - }; - assert_eq!([left_key, right_key], expected_keys); - - merkle_blob.check().unwrap(); - } - - #[test] - fn test_delete_last() { - let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - - let key_value_id: KvId = 1; - // open_dot(&mut merkle_blob.to_dot().set_note("empty")); - merkle_blob - .insert( - key_value_id, - key_value_id, - &sha256_num(key_value_id), - InsertLocation::Auto, - ) - .unwrap(); - // open_dot(&mut merkle_blob.to_dot().set_note("first after")); - merkle_blob.check().unwrap(); - - merkle_blob.delete(key_value_id).unwrap(); - - merkle_blob.check().unwrap(); - assert_eq!(merkle_blob.key_to_index.len(), 0); - } - - #[rstest] - fn test_delete_frees_index(mut small_blob: MerkleBlob) { - let key = 0x0001_0203_0405_0607; - let index = small_blob.key_to_index[&key]; - small_blob.delete(key).unwrap(); - - assert_eq!(small_blob.free_indexes, HashSet::from([index, 2])); - } - - #[rstest] - fn test_get_new_index_with_free_index(mut small_blob: MerkleBlob) { - let key = 0x0001_0203_0405_0607; - let _ = small_blob.key_to_index[&key]; - small_blob.delete(key).unwrap(); - - let expected = HashSet::from([1, 2]); - assert_eq!(small_blob.free_indexes, expected); - // NOTE: both 1 and 2 are free per test_delete_frees_index - assert!(expected.contains(&small_blob.get_new_index())); - } - - #[rstest] - fn test_dump_small_blob_bytes(small_blob: MerkleBlob) { - println!("{}", hex::encode(small_blob.blob)); - } - - #[test] - #[should_panic(expected = "unknown NodeType value: 2")] - fn test_node_type_from_u8_invalid() { - let _ = NodeType::from_u8(2); - } - - #[test] - fn test_node_metadata_dirty_from_bytes_invalid() { - NodeMetadata::dirty_from_bytes([0, 2]).expect_err("invalid value should fail"); - } - - #[test] - #[should_panic(expected = "unable to get sibling index from a leaf")] - fn test_node_specific_sibling_index_panics_for_leaf() { - let leaf = NodeSpecific::Leaf { key: 0, value: 0 }; - leaf.sibling_index(0); - } - - #[test] - #[should_panic(expected = "index not a child: 2")] - fn test_node_specific_sibling_index_panics_for_unknown_sibling() { - let node = NodeSpecific::Internal { left: 0, right: 1 }; - node.sibling_index(2); - } - - #[rstest] - fn test_get_free_indexes(small_blob: MerkleBlob) { - let mut blob = small_blob.blob.clone(); - let expected_free_index = (blob.len() / BLOCK_SIZE) as TreeIndex; - blob.extend_from_slice(&[0; BLOCK_SIZE]); - let (free_indexes, _) = get_free_indexes_and_keys_values_indexes(&blob); - assert_eq!(free_indexes, HashSet::from([expected_free_index])); - } - - #[test] - fn test_merkle_blob_new_errs_for_nonmultiple_of_block_length() { - MerkleBlob::new(vec![1]).expect_err("invalid length should fail"); - } - - #[rstest] - fn test_upsert_inserts(small_blob: MerkleBlob) { - let key = 1234; - assert!(!small_blob.key_to_index.contains_key(&key)); - let value = 5678; - - let mut insert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); - insert_blob - .insert(key, value, &sha256_num(key), InsertLocation::Auto) - .unwrap(); - // open_dot(insert_blob.to_dot().set_note("first after")); - - let mut upsert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); - upsert_blob.upsert(key, value, &sha256_num(key)).unwrap(); - // open_dot(upsert_blob.to_dot().set_note("first after")); - - assert_eq!(insert_blob.blob, upsert_blob.blob); - } - - #[rstest] - fn test_upsert_upserts(mut small_blob: MerkleBlob) { - let before_blocks = small_blob.iter().collect::>(); - let (key, index) = small_blob.key_to_index.iter().next().unwrap(); - let node = small_blob.get_node(*index).unwrap(); - let NodeSpecific::Leaf { - key: original_key, - value: original_value, - .. - } = node.specific - else { - panic!() - }; - let new_value = original_value + 1; - - small_blob.upsert(*key, new_value, &node.hash).unwrap(); - - let after_blocks = small_blob.iter().collect::>(); - - assert_eq!(before_blocks.len(), after_blocks.len()); - for ((before_index, before), (after_index, after)) in zip(before_blocks, after_blocks) { - assert_eq!(before.node.parent, after.node.parent); - assert_eq!(before_index, after_index); - let NodeSpecific::Leaf { - key: before_key, - value: before_value, - } = before.node.specific - else { - assert_eq!(before.node.specific, after.node.specific); - continue; - }; - let NodeSpecific::Leaf { - key: after_key, - value: after_value, - } = after.node.specific - else { - panic!() - }; - assert_eq!(before_key, after_key); - if before_key == original_key { - assert_eq!(after_value, new_value); - } else { - assert_eq!(before_value, after_value); - } - } - } -} +pub use merkle::MerkleBlob; diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs new file mode 100644 index 000000000..778b2611f --- /dev/null +++ b/crates/chia-datalayer/src/merkle.rs @@ -0,0 +1,1824 @@ +#[cfg(feature = "py-bindings")] +use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; + +use clvmr::sha2::Sha256; +use num_traits::ToBytes; +use std::cmp::Ordering; +use std::collections::{HashMap, HashSet, VecDeque}; +use std::iter::{zip, IntoIterator}; +use std::mem::size_of; +use std::ops::Range; + +type TreeIndex = u32; +type Parent = Option; +type Hash = [u8; 32]; +type KvId = i64; + +const fn range_by_length(start: usize, length: usize) -> Range { + start..start + length +} + +// define the serialized block format +// TODO: consider in more detail other serialization tools such as serde and streamable +// common fields +// TODO: better way to pick the max of key value and right range, until we move hash first +// TODO: clearly shouldn't be hard coded +const METADATA_SIZE: usize = 2; +const METADATA_RANGE: Range = 0..METADATA_SIZE; +const HASH_RANGE: Range = range_by_length(0, size_of::()); +// const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); +const PARENT_RANGE: Range = HASH_RANGE.end..(HASH_RANGE.end + size_of::()); +// internal specific fields +const LEFT_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); +const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of::()); +// leaf specific fields +const KEY_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); +const VALUE_RANGE: Range = range_by_length(KEY_RANGE.end, size_of::()); + +// TODO: clearly shouldn't be hard coded +// TODO: max of RIGHT_RANGE.end and VALUE_RANGE.end +const DATA_SIZE: usize = VALUE_RANGE.end; +const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; +type BlockBytes = [u8; BLOCK_SIZE]; +type MetadataBytes = [u8; METADATA_SIZE]; +type DataBytes = [u8; DATA_SIZE]; +const DATA_RANGE: Range = METADATA_SIZE..METADATA_SIZE + DATA_SIZE; +// const INTERNAL_PADDING_RANGE: Range = RIGHT_RANGE.end..DATA_SIZE; +// const INTERNAL_PADDING_SIZE: usize = INTERNAL_PADDING_RANGE.end - INTERNAL_PADDING_RANGE.start; +// const LEAF_PADDING_RANGE: Range = VALUE_RANGE.end..DATA_SIZE; +// const LEAF_PADDING_SIZE: usize = LEAF_PADDING_RANGE.end - LEAF_PADDING_RANGE.start; + +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +#[repr(u8)] +pub enum NodeType { + Internal = 0, + Leaf = 1, +} + +impl NodeType { + pub fn from_u8(value: u8) -> Result { + // TODO: identify some useful structured serialization tooling we use + // TODO: find a better way to tie serialization values to enumerators + match value { + // ha! feel free to laugh at this + x if (NodeType::Internal as u8 == x) => Ok(NodeType::Internal), + x if (NodeType::Leaf as u8 == x) => Ok(NodeType::Leaf), + other => panic!("unknown NodeType value: {other}"), + } + } + + pub fn to_u8(&self) -> u8 { + match self { + NodeType::Internal => NodeType::Internal as u8, + NodeType::Leaf => NodeType::Leaf as u8, + } + } +} + +// impl NodeType { +// const TYPE_TO_VALUE: HashMap = HashMap::from([ +// (NodeType::Internal, 0), +// (NodeType::Leaf, 1), +// ]); +// +// fn value(&self) -> u8 { +// let map = Self::TYPE_TO_VALUE; +// // TODO: this seems pretty clearly the wrong way, probably +// let value = map.get(self); +// if value.is_some() { +// return 3; +// } +// panic!("no value for NodeType: {self:?}"); +// } +// } + +#[allow(clippy::needless_pass_by_value)] +fn sha256_num(input: T) -> Hash { + let mut hasher = Sha256::new(); + hasher.update(input.to_be_bytes()); + + hasher.finalize() +} + +fn sha256_bytes(input: &[u8]) -> Hash { + let mut hasher = Sha256::new(); + hasher.update(input); + + hasher.finalize() +} + +fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { + let mut hasher = Sha256::new(); + hasher.update(b"\x02"); + hasher.update(left_hash); + hasher.update(right_hash); + + hasher.finalize() +} + +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub enum Side { + Left, + Right, +} + +#[derive(Clone, Debug, Hash, Eq, PartialEq)] +pub enum InsertLocation { + Auto, + AsRoot, + Leaf { index: TreeIndex, side: Side }, +} + +const NULL_PARENT: TreeIndex = 0xffff_ffffu32; + +#[derive(Debug, PartialEq)] +pub struct NodeMetadata { + pub node_type: NodeType, + pub dirty: bool, +} + +impl NodeMetadata { + pub fn from_bytes(blob: MetadataBytes) -> Result { + // TODO: could save 1-2% of tree space by packing (and maybe don't do that) + // TODO: identify some useful structured serialization tooling we use + Ok(Self { + node_type: Self::node_type_from_bytes(blob)?, + dirty: Self::dirty_from_bytes(blob)?, + }) + } + + pub fn to_bytes(&self) -> MetadataBytes { + [self.node_type.to_u8(), u8::from(self.dirty)] + } + + pub fn node_type_from_bytes(blob: MetadataBytes) -> Result { + NodeType::from_u8(blob[0]) + } + + pub fn dirty_from_bytes(blob: MetadataBytes) -> Result { + match blob[1] { + 0 => Ok(false), + 1 => Ok(true), + other => Err(format!("invalid dirty value: {other}")), + } + } +} + +#[derive(Debug, PartialEq)] +pub struct Node { + parent: Parent, + hash: Hash, + specific: NodeSpecific, +} + +#[derive(Debug, PartialEq)] +pub enum NodeSpecific { + Internal { left: TreeIndex, right: TreeIndex }, + Leaf { key: KvId, value: KvId }, +} + +impl NodeSpecific { + pub fn sibling_index(&self, index: TreeIndex) -> TreeIndex { + let NodeSpecific::Internal { right, left } = self else { + panic!("unable to get sibling index from a leaf") + }; + + match index { + x if (x == *right) => *left, + x if (x == *left) => *right, + _ => panic!("index not a child: {index}"), + } + } +} + +impl Node { + // fn discriminant(&self) -> u8 { + // unsafe { *(self as *const Self as *const u8) } + // } + + pub fn from_bytes(metadata: &NodeMetadata, blob: DataBytes) -> Result { + Ok(Self { + parent: Self::parent_from_bytes(&blob), + hash: blob[HASH_RANGE].try_into().unwrap(), + specific: match metadata.node_type { + NodeType::Internal => NodeSpecific::Internal { + left: TreeIndex::from_be_bytes(blob[LEFT_RANGE].try_into().unwrap()), + right: TreeIndex::from_be_bytes(blob[RIGHT_RANGE].try_into().unwrap()), + }, + NodeType::Leaf => NodeSpecific::Leaf { + key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), + value: KvId::from_be_bytes(blob[VALUE_RANGE].try_into().unwrap()), + }, + }, + }) + } + + fn parent_from_bytes(blob: &DataBytes) -> Parent { + let parent_integer = TreeIndex::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap()); + match parent_integer { + NULL_PARENT => None, + _ => Some(parent_integer), + } + } + pub fn to_bytes(&self) -> DataBytes { + let mut blob: DataBytes = [0; DATA_SIZE]; + match self { + Node { + parent, + specific: NodeSpecific::Internal { left, right }, + hash, + } => { + let parent_integer = match parent { + None => NULL_PARENT, + Some(parent) => *parent, + }; + blob[HASH_RANGE].copy_from_slice(hash); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); + blob[LEFT_RANGE].copy_from_slice(&left.to_be_bytes()); + blob[RIGHT_RANGE].copy_from_slice(&right.to_be_bytes()); + } + Node { + parent, + specific: NodeSpecific::Leaf { key, value }, + hash, + } => { + let parent_integer = match parent { + None => NULL_PARENT, + Some(parent) => *parent, + }; + blob[HASH_RANGE].copy_from_slice(hash); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); + blob[KEY_RANGE].copy_from_slice(&key.to_be_bytes()); + blob[VALUE_RANGE].copy_from_slice(&value.to_be_bytes()); + } + } + + blob + } + + // pub fn to_dot(&self, index: TreeIndex) -> DotLines { + // // TODO: can this be done without introducing a blank line? + // let node_to_parent = match self.parent { + // Some(parent) => format!("node_{index} -> node_{parent};"), + // None => String::new(), + // }; + // + // match self.specific { + // NodeSpecific::Internal {left, right} => DotLines{ + // nodes: vec![ + // format!("node_{index} [label=\"{index}\"]"), + // ], + // connections: vec![ + // format!("node_{index} -> node_{left};"), + // format!("node_{index} -> node_{right};"), + // node_to_parent, + // ], + // pair_boxes: vec![ + // format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), + // ], + // note: String::new(), + // }, + // NodeSpecific::Leaf {key, value} => DotLines{ + // nodes: vec![ + // format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), + // ], + // connections: vec![node_to_parent], + // pair_boxes: vec![], + // note: String::new(), + // }, + // } + // } +} + +fn block_range(index: TreeIndex) -> Range { + let block_start = index as usize * BLOCK_SIZE; + block_start..block_start + BLOCK_SIZE +} + +// TODO: does not enforce matching metadata node type and node enumeration type +pub struct Block { + metadata: NodeMetadata, + node: Node, +} + +impl Block { + pub fn to_bytes(&self) -> BlockBytes { + let mut blob: BlockBytes = [0; BLOCK_SIZE]; + blob[METADATA_RANGE].copy_from_slice(&self.metadata.to_bytes()); + blob[DATA_RANGE].copy_from_slice(&self.node.to_bytes()); + + blob + } + + pub fn from_bytes(blob: BlockBytes) -> Result { + let metadata_blob: MetadataBytes = blob[METADATA_RANGE].try_into().unwrap(); + let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); + let metadata = NodeMetadata::from_bytes(metadata_blob) + .map_err(|message| format!("failed loading metadata: {message})"))?; + let node = Node::from_bytes(&metadata, data_blob) + .map_err(|message| format!("failed loading node: {message})"))?; + + Ok(Block { metadata, node }) + } +} + +fn get_free_indexes_and_keys_values_indexes( + blob: &[u8], +) -> (HashSet, HashMap) { + let index_count = blob.len() / BLOCK_SIZE; + + let mut seen_indexes: Vec = vec![false; index_count]; + let mut key_to_index: HashMap = HashMap::default(); + + for (index, block) in MerkleBlobLeftChildFirstIterator::new(blob) { + seen_indexes[index as usize] = true; + + if let NodeSpecific::Leaf { key, .. } = block.node.specific { + key_to_index.insert(key, index); + } + } + + let mut free_indexes: HashSet = HashSet::new(); + for (index, seen) in seen_indexes.iter().enumerate() { + if !seen { + free_indexes.insert(index as TreeIndex); + } + } + + (free_indexes, key_to_index) +} + +#[cfg_attr(feature = "py-bindings", pyclass(name = "MerkleBlob"))] +#[derive(Debug)] +pub struct MerkleBlob { + blob: Vec, + // TODO: should this be a set for fast lookups? + free_indexes: HashSet, + key_to_index: HashMap, +} + +impl MerkleBlob { + pub fn new(blob: Vec) -> Result { + let length = blob.len(); + let remainder = length % BLOCK_SIZE; + if remainder != 0 { + return Err(format!( + "blob length must be a multiple of block count, found extra bytes: {remainder}" + )); + } + + let (free_indexes, key_to_index) = get_free_indexes_and_keys_values_indexes(&blob); + + Ok(Self { + blob, + free_indexes, + key_to_index, + }) + } + + fn clear(&mut self) { + self.blob.clear(); + self.key_to_index.clear(); + self.free_indexes.clear(); + } + + pub fn insert( + &mut self, + key: KvId, + value: KvId, + hash: &Hash, + insert_location: InsertLocation, + ) -> Result<(), String> { + let insert_location = match insert_location { + InsertLocation::Auto => self.get_random_insert_location_by_kvid(key)?, + _ => insert_location, + }; + + match insert_location { + InsertLocation::Auto => { + panic!("this should have been caught and processed above") + } + InsertLocation::AsRoot => { + if !self.key_to_index.is_empty() { + return Err("requested insertion at root but tree not empty".to_string()); + }; + self.insert_first(key, value, hash); + } + InsertLocation::Leaf { index, side } => { + let old_leaf = self.get_node(index)?; + let NodeSpecific::Leaf { .. } = old_leaf.specific else { + panic!("requested insertion at leaf but found internal node") + }; + + let internal_node_hash = match side { + Side::Left => internal_hash(hash, &old_leaf.hash), + Side::Right => internal_hash(&old_leaf.hash, hash), + }; + + if self.key_to_index.len() == 1 { + self.insert_second(key, value, hash, &old_leaf, &internal_node_hash, &side)?; + } else { + self.insert_third_or_later( + key, + value, + hash, + &old_leaf, + index, + &internal_node_hash, + &side, + )?; + } + } + } + + Ok(()) + } + + fn insert_first(&mut self, key: KvId, value: KvId, hash: &Hash) { + let new_leaf_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: Node { + parent: None, + specific: NodeSpecific::Leaf { key, value }, + hash: *hash, + }, + }; + + self.clear(); + // TODO: unwrap, ack, review + self.insert_entry_to_blob(self.extend_index(), &new_leaf_block) + .unwrap(); + } + + fn insert_second( + &mut self, + key: KvId, + value: KvId, + hash: &Hash, + old_leaf: &Node, + internal_node_hash: &Hash, + side: &Side, + ) -> Result<(), String> { + self.clear(); + // TODO: just handling the nodes below being out of order. this all still smells a bit + self.blob.resize(BLOCK_SIZE * 3, 0); + + let new_internal_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: Node { + parent: None, + specific: NodeSpecific::Internal { left: 1, right: 2 }, + hash: *internal_node_hash, + }, + }; + + self.insert_entry_to_blob(0, &new_internal_block)?; + + let NodeSpecific::Leaf { + key: old_leaf_key, + value: old_leaf_value, + } = old_leaf.specific + else { + return Err("old leaf unexpectedly not a leaf".to_string()); + }; + let nodes = [ + ( + match side { + Side::Left => 2, + Side::Right => 1, + }, + Node { + parent: Some(0), + specific: NodeSpecific::Leaf { + key: old_leaf_key, + value: old_leaf_value, + }, + hash: old_leaf.hash, + }, + ), + ( + match side { + Side::Left => 1, + Side::Right => 2, + }, + Node { + parent: Some(0), + specific: NodeSpecific::Leaf { key, value }, + hash: *hash, + }, + ), + ]; + + for (index, node) in nodes { + let block = Block { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node, + }; + + self.insert_entry_to_blob(index, &block)?; + } + + Ok(()) + } + + // TODO: no really, actually consider the too many arguments complaint + #[allow(clippy::too_many_arguments)] + fn insert_third_or_later( + &mut self, + key: KvId, + value: KvId, + hash: &Hash, + old_leaf: &Node, + old_leaf_index: TreeIndex, + internal_node_hash: &Hash, + side: &Side, + ) -> Result<(), String> { + let new_leaf_index = self.get_new_index(); + let new_internal_node_index = self.get_new_index(); + + let new_leaf_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: Node { + parent: Some(new_internal_node_index), + specific: NodeSpecific::Leaf { key, value }, + hash: *hash, + }, + }; + self.insert_entry_to_blob(new_leaf_index, &new_leaf_block)?; + + let (left_index, right_index) = match side { + Side::Left => (new_leaf_index, old_leaf_index), + Side::Right => (old_leaf_index, new_leaf_index), + }; + let new_internal_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: Node { + parent: old_leaf.parent, + specific: NodeSpecific::Internal { + left: left_index, + right: right_index, + }, + hash: *internal_node_hash, + }, + }; + self.insert_entry_to_blob(new_internal_node_index, &new_internal_block)?; + + let Some(old_parent_index) = old_leaf.parent else { + panic!("root found when not expected: {key:?} {value:?} {hash:?}") + }; + + let mut block = Block::from_bytes(self.get_block_bytes(old_leaf_index)?)?; + block.node.parent = Some(new_internal_node_index); + self.insert_entry_to_blob(old_leaf_index, &block)?; + + let mut old_parent_block = Block::from_bytes(self.get_block_bytes(old_parent_index)?)?; + if let NodeSpecific::Internal { + ref mut left, + ref mut right, + .. + } = old_parent_block.node.specific + { + if old_leaf_index == *left { + *left = new_internal_node_index; + } else if old_leaf_index == *right { + *right = new_internal_node_index; + } else { + panic!("child not a child of its parent"); + } + } else { + panic!("expected internal node but found leaf"); + }; + + self.insert_entry_to_blob(old_parent_index, &old_parent_block)?; + + self.mark_lineage_as_dirty(old_parent_index)?; + + Ok(()) + } + + pub fn delete(&mut self, key: KvId) -> Result<(), String> { + let leaf_index = *self + .key_to_index + .get(&key) + .ok_or(format!("unknown key: {key}"))?; + let leaf = self.get_node(leaf_index)?; + + // TODO: maybe some common way to indicate/perform sanity double checks? + let NodeSpecific::Leaf { .. } = leaf.specific else { + panic!("key to index cache resulted in internal node") + }; + self.key_to_index.remove(&key); + + let Some(parent_index) = leaf.parent else { + self.clear(); + return Ok(()); + }; + + self.free_indexes.insert(leaf_index); + let parent = self.get_node(parent_index)?; + // TODO: kinda implicit that we 'check' that parent is internal inside .sibling_index() + let sibling_index = parent.specific.sibling_index(leaf_index); + let mut sibling_block = self.get_block(sibling_index)?; + + let Some(grandparent_index) = parent.parent else { + sibling_block.node.parent = None; + self.insert_entry_to_blob(0, &sibling_block)?; + + if let NodeSpecific::Internal { left, right } = sibling_block.node.specific { + for child_index in [left, right] { + let mut block = self.get_block(child_index)?; + block.node.parent = Some(0); + self.insert_entry_to_blob(child_index, &block)?; + } + }; + + self.free_indexes.insert(sibling_index); + + return Ok(()); + }; + + self.free_indexes.insert(parent_index); + let mut grandparent_block = self.get_block(grandparent_index)?; + + sibling_block.node.parent = Some(grandparent_index); + self.insert_entry_to_blob(sibling_index, &sibling_block)?; + + if let NodeSpecific::Internal { + ref mut left, + ref mut right, + .. + } = grandparent_block.node.specific + { + match parent_index { + x if x == *left => *left = sibling_index, + x if x == *right => *right = sibling_index, + _ => panic!("parent not a child a grandparent"), + } + } else { + panic!("grandparent not an internal node") + } + self.insert_entry_to_blob(grandparent_index, &grandparent_block)?; + + self.mark_lineage_as_dirty(grandparent_index)?; + + Ok(()) + } + + pub fn upsert(&mut self, key: KvId, value: KvId, new_hash: &Hash) -> Result<(), String> { + let Some(leaf_index) = self.key_to_index.get(&key) else { + self.insert(key, value, new_hash, InsertLocation::Auto)?; + return Ok(()); + }; + + let mut block = self.get_block(*leaf_index)?; + if let NodeSpecific::Leaf { + value: ref mut inplace_value, + .. + } = block.node.specific + { + block.node.hash.clone_from(new_hash); + *inplace_value = value; + } else { + panic!("expected internal node but found leaf"); + } + self.insert_entry_to_blob(*leaf_index, &block)?; + + if let Some(parent) = block.node.parent { + self.mark_lineage_as_dirty(parent)?; + } + + Ok(()) + } + + pub fn check(&self) -> Result<(), String> { + let mut leaf_count: usize = 0; + let mut internal_count: usize = 0; + + for (index, block) in self { + match block.node.specific { + NodeSpecific::Internal { .. } => internal_count += 1, + NodeSpecific::Leaf { key, .. } => { + leaf_count += 1; + let cached_index = self + .key_to_index + .get(&key) + .ok_or(format!("key not in key to index cache: {key:?}"))?; + assert_eq!( + *cached_index, index, + "key to index cache for {key:?} should be {index:?} got: {cached_index:?}" + ); + // TODO: consider what type free indexes should be + assert!( + !self.free_indexes.contains(&index), + "{}", + format!("active index found in free index list: {index:?}") + ); + } + } + } + + let key_to_index_cache_length = self.key_to_index.len(); + assert_eq!(leaf_count, key_to_index_cache_length, "found {leaf_count:?} leaves but key to index cache length is: {key_to_index_cache_length:?}"); + let total_count = leaf_count + internal_count + self.free_indexes.len(); + let extend_index = self.extend_index(); + assert_eq!( + total_count, extend_index as usize, + "expected total node count {extend_index:?} found: {total_count:?}", + ); + + Ok(()) + // TODO: check parent/child bidirectional accuracy + } + + // fn update_parent(&mut self, index: TreeIndex, parent: Option) -> Result<(), String> { + // let range = self.get_block_range(index); + // + // let mut node = self.get_node(index)?; + // node.parent = parent; + // self.blob[range].copy_from_slice(&node.to_bytes()); + // + // Ok(()) + // } + + // fn update_left(&mut self, index: TreeIndex, left: Option) -> Result<(), String> { + // let range = self.get_block_range(index); + // + // let mut node = self.get_node(index)?; + // node.left = left; + // self.blob[range].copy_from_slice(&node.to_bytes()); + // + // Ok(()) + // } + + fn mark_lineage_as_dirty(&mut self, index: TreeIndex) -> Result<(), String> { + let mut next_index = Some(index); + + while let Some(this_index) = next_index { + let mut block = Block::from_bytes(self.get_block_bytes(this_index)?)?; + + if block.metadata.dirty { + return Ok(()); + } + + block.metadata.dirty = true; + self.insert_entry_to_blob(this_index, &block)?; + next_index = block.node.parent; + } + + Ok(()) + } + + fn get_new_index(&mut self) -> TreeIndex { + match self.free_indexes.iter().next().copied() { + None => { + // TODO: should this extend...? + // TODO: should this update free indexes...? + let index = self.extend_index(); + self.blob.extend_from_slice(&[0; BLOCK_SIZE]); + index + } + Some(new_index) => { + self.free_indexes.remove(&new_index); + new_index + } + } + } + + fn get_random_insert_location_by_seed( + &self, + seed_bytes: &[u8], + ) -> Result { + let mut seed_bytes = Vec::from(seed_bytes); + + if self.blob.is_empty() { + return Ok(InsertLocation::AsRoot); + } + + let side = if (seed_bytes + .last() + .ok_or("zero-length seed bytes not allowed")? + & 1 << 7) + == 0 + { + Side::Left + } else { + Side::Right + }; + let mut next_index: TreeIndex = 0; + let mut node = self.get_node(next_index)?; + + loop { + for byte in &seed_bytes { + for bit in 0..8 { + match node.specific { + NodeSpecific::Leaf { .. } => { + return Ok(InsertLocation::Leaf { + index: next_index, + side, + }) + } + NodeSpecific::Internal { left, right, .. } => { + next_index = if byte & (1 << bit) != 0 { left } else { right }; + node = self.get_node(next_index)?; + } + } + } + } + + seed_bytes = sha256_bytes(&seed_bytes).into(); + } + } + + fn get_random_insert_location_by_kvid(&self, seed: KvId) -> Result { + let seed = sha256_num(seed); + + self.get_random_insert_location_by_seed(&seed) + } + + fn extend_index(&self) -> TreeIndex { + let blob_length = self.blob.len(); + let index: TreeIndex = (blob_length / BLOCK_SIZE) as TreeIndex; + let remainder = blob_length % BLOCK_SIZE; + assert_eq!(remainder, 0, "blob length {blob_length:?} not a multiple of {BLOCK_SIZE:?}, remainder: {remainder:?}"); + + index + } + + fn insert_entry_to_blob(&mut self, index: TreeIndex, block: &Block) -> Result<(), String> { + let new_block_bytes = block.to_bytes(); + let extend_index = self.extend_index(); + match index.cmp(&extend_index) { + Ordering::Greater => return Err(format!("block index out of range: {index}")), + Ordering::Equal => self.blob.extend_from_slice(&new_block_bytes), + Ordering::Less => { + // TODO: lots of deserialization here for just the key + let old_block = self.get_block(index)?; + if !self.free_indexes.contains(&index) + && old_block.metadata.node_type == NodeType::Leaf + { + // TODO: sort of repeating the leaf check above and below. smells a little + if let NodeSpecific::Leaf { + key: old_block_key, .. + } = old_block.node.specific + { + self.key_to_index.remove(&old_block_key); + }; + }; + self.blob[block_range(index)].copy_from_slice(&new_block_bytes); + } + } + + if let NodeSpecific::Leaf { key, .. } = block.node.specific { + self.key_to_index.insert(key, index); + }; + + self.free_indexes.take(&index); + + Ok(()) + } + + fn get_block(&self, index: TreeIndex) -> Result { + Block::from_bytes(self.get_block_bytes(index)?) + } + + // fn get_block_slice(&self, index: TreeIndex) -> Result<&mut BlockBytes, String> { + // let metadata_start = index as usize * BLOCK_SIZE; + // let data_start = metadata_start + METADATA_SIZE; + // let end = data_start + DATA_SIZE; + // + // self.blob + // .get(metadata_start..end) + // .ok_or(format!("index out of bounds: {index}"))? + // .try_into() + // .map_err(|e| format!("failed getting block {index}: {e}")) + // } + + fn get_block_bytes(&self, index: TreeIndex) -> Result { + self.blob + .get(block_range(index)) + .ok_or(format!("block index out of bounds: {index}"))? + .try_into() + .map_err(|e| format!("failed getting block {index}: {e}")) + } + + pub fn get_node(&self, index: TreeIndex) -> Result { + // TODO: use Block::from_bytes() + // TODO: handle invalid indexes? + // TODO: handle overflows? + let block = self.get_block_bytes(index)?; + let metadata_blob: MetadataBytes = block[METADATA_RANGE].try_into().unwrap(); + let data_blob: DataBytes = block[DATA_RANGE].try_into().unwrap(); + let metadata = NodeMetadata::from_bytes(metadata_blob) + .map_err(|message| format!("failed loading metadata: {message})"))?; + + Node::from_bytes(&metadata, data_blob) + .map_err(|message| format!("failed loading node: {message}")) + } + + pub fn get_parent_index(&self, index: TreeIndex) -> Result { + let block = self.get_block_bytes(index)?; + + Ok(Node::parent_from_bytes( + block[DATA_RANGE].try_into().unwrap(), + )) + } + + pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { + // TODO: what about an index that happens to be the null index? a question for everywhere i guess + let mut next_index = Some(index); + let mut lineage = vec![]; + + while let Some(this_index) = next_index { + let node = self.get_node(this_index)?; + next_index = node.parent; + lineage.push(node); + } + + Ok(lineage) + } + + pub fn get_lineage_indexes(&self, index: TreeIndex) -> Result, String> { + // TODO: yep, this 'optimization' might be overkill, and should be speed compared regardless + // TODO: what about an index that happens to be the null index? a question for everywhere i guess + let mut next_index = Some(index); + let mut lineage: Vec = vec![]; + + while let Some(this_index) = next_index { + lineage.push(this_index); + next_index = self.get_parent_index(this_index)?; + } + + Ok(lineage) + } + + pub fn iter(&self) -> MerkleBlobLeftChildFirstIterator<'_> { + <&Self as IntoIterator>::into_iter(self) + } + + pub fn calculate_lazy_hashes(&mut self) -> Result<(), String> { + // TODO: really want a truncated traversal, not filter + // TODO: yeah, storing the whole set of blocks via collect is not great + for (index, mut block) in self + .iter() + .filter(|(_, block)| block.metadata.dirty) + .collect::>() + { + let NodeSpecific::Internal { left, right } = block.node.specific else { + panic!("leaves should not be dirty") + }; + // TODO: obviously inefficient to re-get/deserialize these blocks inside + // an iteration that's already doing that + let left = self.get_block(left)?; + let right = self.get_block(right)?; + // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' + block.node.hash = internal_hash(&left.node.hash, &right.node.hash); + block.metadata.dirty = false; + self.insert_entry_to_blob(index, &block)?; + } + + Ok(()) + } + + #[allow(unused)] + fn relocate_node(&mut self, source: TreeIndex, destination: TreeIndex) -> Result<(), String> { + let extend_index = self.extend_index(); + // TODO: perhaps relocation of root should be allowed for some use + if source == 0 { + return Err("relocation of the root and index zero is not allowed".to_string()); + }; + assert!(source < extend_index); + assert!(!self.free_indexes.contains(&source)); + assert!(destination <= extend_index); + assert!(destination == extend_index || self.free_indexes.contains(&destination)); + + let source_block = self.get_block(source).unwrap(); + if let Some(parent) = source_block.node.parent { + let mut parent_block = self.get_block(parent).unwrap(); + let NodeSpecific::Internal { + ref mut left, + ref mut right, + } = parent_block.node.specific + else { + panic!(); + }; + match source { + x if x == *left => *left = destination, + x if x == *right => *right = destination, + _ => panic!(), + } + self.insert_entry_to_blob(parent, &parent_block).unwrap(); + } + + if let NodeSpecific::Internal { left, right, .. } = source_block.node.specific { + for child in [left, right] { + let mut block = self.get_block(child).unwrap(); + block.node.parent = Some(destination); + self.insert_entry_to_blob(child, &block).unwrap(); + } + } + + self.free_indexes.insert(source); + + Ok(()) + } + + #[allow(unused)] + fn rebuild(&mut self) -> Result<(), String> { + panic!(); + // TODO: could make insert_entry_to_blob a free function and not need to make + // a merkle blob here? maybe? + let mut new = Self::new(Vec::new())?; + for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob).enumerate() { + // new.insert_entry_to_blob(index, )? + } + self.blob = new.blob; + + Ok(()) + } + + #[allow(unused)] + fn get_key_value_map(&self) -> HashMap { + let mut key_value = HashMap::new(); + for (key, index) in &self.key_to_index { + let NodeSpecific::Leaf { value, .. } = self.get_node(*index).unwrap().specific else { + panic!() + }; + key_value.insert(*key, value); + } + + key_value + } +} + +impl PartialEq for MerkleBlob { + fn eq(&self, other: &Self) -> bool { + // TODO: should we check the indexes? + for ((_, self_block), (_, other_block)) in zip(self, other) { + if (self_block.metadata.dirty || other_block.metadata.dirty) + || self_block.node.hash != other_block.node.hash + // TODO: isn't only a leaf supposed to check this? + || self_block.node.specific != other_block.node.specific + { + return false; + } + } + + true + } +} + +impl<'a> IntoIterator for &'a MerkleBlob { + // TODO: review efficiency in whatever use cases we end up with, vs Item = Node etc + type Item = (TreeIndex, Block); + type IntoIter = MerkleBlobLeftChildFirstIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + // TODO: review types around this to avoid copying + MerkleBlobLeftChildFirstIterator::new(&self.blob[..]) + } +} + +#[cfg(feature = "py-bindings")] +#[pymethods] +impl MerkleBlob { + #[allow(clippy::needless_pass_by_value)] + #[new] + pub fn py_init(blob: PyBuffer) -> PyResult { + assert!( + blob.is_c_contiguous(), + "from_bytes() must be called with a contiguous buffer" + ); + #[allow(unsafe_code)] + let slice = + unsafe { std::slice::from_raw_parts(blob.buf_ptr() as *const u8, blob.len_bytes()) }; + + Ok(Self::new(Vec::from(slice)).unwrap()) + } + + #[pyo3(name = "insert")] + pub fn py_insert(&mut self, key: KvId, value: KvId, hash: Hash) -> PyResult<()> { + // TODO: consider the error + // TODO: expose insert location + self.insert(key, value, &hash, InsertLocation::Auto) + .unwrap(); + + Ok(()) + } + + #[pyo3(name = "delete")] + pub fn py_delete(&mut self, key: KvId) -> PyResult<()> { + // TODO: consider the error + self.delete(key).unwrap(); + + Ok(()) + } + + #[pyo3(name = "__len__")] + pub fn py_len(&self) -> PyResult { + Ok(self.blob.len()) + } +} + +struct MerkleBlobLeftChildFirstIteratorItem { + visited: bool, + index: TreeIndex, +} + +pub struct MerkleBlobLeftChildFirstIterator<'a> { + blob: &'a [u8], + deque: VecDeque, +} + +impl<'a> MerkleBlobLeftChildFirstIterator<'a> { + fn new(blob: &'a [u8]) -> Self { + let mut deque = VecDeque::new(); + if blob.len() / BLOCK_SIZE > 0 { + deque.push_back(MerkleBlobLeftChildFirstIteratorItem { + visited: false, + index: 0, + }); + } + + Self { blob, deque } + } +} + +impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { + type Item = (TreeIndex, Block); + + fn next(&mut self) -> Option { + // left sibling first, children before parents + + loop { + let item = self.deque.pop_front()?; + let block_bytes: BlockBytes = self.blob[block_range(item.index)].try_into().unwrap(); + let block = Block::from_bytes(block_bytes).unwrap(); + + match block.node.specific { + NodeSpecific::Leaf { .. } => return Some((item.index, block)), + NodeSpecific::Internal { left, right } => { + if item.visited { + return Some((item.index, block)); + }; + + self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { + visited: true, + index: item.index, + }); + self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { + visited: false, + index: right, + }); + self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { + visited: false, + index: left, + }); + } + } + } + } +} + +pub struct MerkleBlobParentFirstIterator<'a> { + blob: &'a [u8], + deque: VecDeque, +} + +impl<'a> MerkleBlobParentFirstIterator<'a> { + fn new(blob: &'a [u8]) -> Self { + let mut deque = VecDeque::new(); + if blob.len() / BLOCK_SIZE > 0 { + deque.push_back(0); + } + + Self { blob, deque } + } +} + +impl Iterator for MerkleBlobParentFirstIterator<'_> { + type Item = Block; + + fn next(&mut self) -> Option { + // left sibling first, parents before children + + loop { + let index = self.deque.pop_front()?; + let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); + let block = Block::from_bytes(block_bytes).unwrap(); + + match block.node.specific { + NodeSpecific::Leaf { .. } => return Some(block), + NodeSpecific::Internal { left, right } => { + self.deque.push_front(right); + self.deque.push_front(left); + } + } + } + } +} + +pub struct MerkleBlobBreadthFirstIterator<'a> { + blob: &'a [u8], + deque: VecDeque, +} + +impl<'a> MerkleBlobBreadthFirstIterator<'a> { + #[allow(unused)] + fn new(blob: &'a [u8]) -> Self { + let mut deque = VecDeque::new(); + if blob.len() / BLOCK_SIZE > 0 { + deque.push_back(0); + } + + Self { blob, deque } + } +} + +impl Iterator for MerkleBlobBreadthFirstIterator<'_> { + type Item = Block; + + fn next(&mut self) -> Option { + // left sibling first, parent depth before child depth + + loop { + let index = self.deque.pop_front()?; + let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); + let block = Block::from_bytes(block_bytes).unwrap(); + + match block.node.specific { + NodeSpecific::Leaf { .. } => return Some(block), + NodeSpecific::Internal { left, right } => { + self.deque.push_back(left); + self.deque.push_back(right); + } + } + } + } +} + +#[cfg(test)] +mod dot; +#[cfg(test)] +mod tests { + use super::*; + // use hex_literal::hex; + // use dot::open_dot; + use rstest::{fixture, rstest}; + use std::time::{Duration, Instant}; + + // const EXAMPLE_BLOB: [u8; 138] = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); + // const HASH: Hash = [ + // 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + // 35, 36, 37, 38, 39, 40, 41, 42, 43, + // ]; + // + // const EXAMPLE_ROOT: Node = Node { + // parent: None, + // specific: NodeSpecific::Internal { left: 1, right: 2 }, + // hash: HASH, + // index: 0, + // }; + // const EXAMPLE_ROOT_METADATA: NodeMetadata = NodeMetadata { + // node_type: NodeType::Internal, + // dirty: true, + // }; + // const EXAMPLE_LEFT_LEAF: Node = Node { + // parent: Some(0), + // specific: NodeSpecific::Leaf { + // key: 0x0405_0607_0809_0A0B, + // value: 0x1415_1617_1819_1A1B, + // }, + // hash: HASH, + // index: 1, + // }; + // const EXAMPLE_LEFT_LEAF_METADATA: NodeMetadata = NodeMetadata { + // node_type: NodeType::Leaf, + // dirty: false, + // }; + // const EXAMPLE_RIGHT_LEAF: Node = Node { + // parent: Some(0), + // specific: NodeSpecific::Leaf { + // key: 0x2425_2627_2829_2A2B, + // value: 0x3435_3637_3839_3A3B, + // }, + // hash: HASH, + // index: 2, + // }; + // const EXAMPLE_RIGHT_LEAF_METADATA: NodeMetadata = NodeMetadata { + // node_type: NodeType::Leaf, + // dirty: false, + // }; + + // fn example_merkle_blob() -> MerkleBlob { + // MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap() + // } + + #[allow(unused)] + fn normalized_blob(merkle_blob: &MerkleBlob) -> Vec { + let mut new = MerkleBlob::new(merkle_blob.blob.clone()).unwrap(); + + new.calculate_lazy_hashes(); + new.rebuild(); + + new.blob + } + + #[test] + fn test_node_type_serialized_values() { + // TODO: can i make sure we cover all variants? + assert_eq!(NodeType::Internal as u8, 0); + assert_eq!(NodeType::Leaf as u8, 1); + + for node_type in [NodeType::Internal, NodeType::Leaf] { + assert_eq!(node_type.to_u8(), node_type.clone() as u8,); + assert_eq!( + NodeType::from_u8(node_type.clone() as u8).unwrap(), + node_type, + ); + } + } + + #[test] + fn test_internal_hash() { + // TODO: yeah, various questions around this and how to express 'this is dl internal hash' + // without silly repetition. maybe just a use as. + // in Python: Program.to((left_hash, right_hash)).get_tree_hash_precalc(left_hash, right_hash) + let left: Hash = [ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + ]; + let right: Hash = [ + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + ]; + assert_eq!( + internal_hash(&left, &right), + clvm_utils::tree_hash_pair( + clvm_utils::TreeHash::new(left), + clvm_utils::TreeHash::new(right) + ) + .to_bytes(), + ); + } + + #[rstest] + fn test_node_metadata_from_to( + #[values(false, true)] dirty: bool, + // TODO: can we make sure we cover all variants + #[values(NodeType::Internal, NodeType::Leaf)] node_type: NodeType, + ) { + let bytes: [u8; 2] = [node_type.to_u8(), dirty as u8]; + let object = NodeMetadata::from_bytes(bytes).unwrap(); + assert_eq!(object, NodeMetadata { node_type, dirty },); + assert_eq!(object.to_bytes(), bytes); + assert_eq!( + NodeMetadata::node_type_from_bytes(bytes).unwrap(), + object.node_type + ); + assert_eq!(NodeMetadata::dirty_from_bytes(bytes).unwrap(), object.dirty); + } + + // #[test] + // fn test_load_a_python_dump() { + // let merkle_blob = example_merkle_blob(); + // merkle_blob.get_node(0).unwrap(); + // + // merkle_blob.check().unwrap(); + // } + + #[fixture] + fn small_blob() -> MerkleBlob { + let mut blob = MerkleBlob::new(vec![]).unwrap(); + + blob.insert( + 0x0001_0203_0405_0607, + 0x1011_1213_1415_1617, + &sha256_num(0x1020), + InsertLocation::Auto, + ) + .unwrap(); + + blob.insert( + 0x2021_2223_2425_2627, + 0x3031_3233_3435_3637, + &sha256_num(0x2030), + InsertLocation::Auto, + ) + .unwrap(); + + blob + } + + #[rstest] + fn test_get_lineage(small_blob: MerkleBlob) { + let lineage = small_blob.get_lineage(2).unwrap(); + for node in &lineage { + println!("{node:?}"); + } + assert_eq!(lineage.len(), 2); + let last_node = lineage.last().unwrap(); + assert_eq!(last_node.parent, None); + + small_blob.check().unwrap(); + } + + #[rstest] + #[case::right(0, 2, Side::Left)] + #[case::left(0xff, 1, Side::Right)] + fn test_get_random_insert_location_by_seed( + #[case] seed: u8, + #[case] expected_index: TreeIndex, + #[case] expected_side: Side, + small_blob: MerkleBlob, + ) { + let location = small_blob + .get_random_insert_location_by_seed(&[seed; 32]) + .unwrap(); + + assert_eq!( + location, + InsertLocation::Leaf { + index: expected_index, + side: expected_side + }, + ); + + small_blob.check().unwrap(); + } + + // #[test] + // fn test_build_blob_and_read() { + // let mut blob: Vec = Vec::new(); + // + // blob.extend(EXAMPLE_ROOT_METADATA.to_bytes()); + // blob.extend(EXAMPLE_ROOT.to_bytes()); + // blob.extend(EXAMPLE_LEFT_LEAF_METADATA.to_bytes()); + // blob.extend(EXAMPLE_LEFT_LEAF.to_bytes()); + // blob.extend(EXAMPLE_RIGHT_LEAF_METADATA.to_bytes()); + // blob.extend(EXAMPLE_RIGHT_LEAF.to_bytes()); + // + // assert_eq!(blob, Vec::from(EXAMPLE_BLOB)); + // + // let merkle_blob = MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap(); + // + // assert_eq!(merkle_blob.get_node(0).unwrap(), EXAMPLE_ROOT); + // assert_eq!(merkle_blob.get_node(1).unwrap(), EXAMPLE_LEFT_LEAF); + // assert_eq!(merkle_blob.get_node(2).unwrap(), EXAMPLE_RIGHT_LEAF); + // + // merkle_blob.check().unwrap(); + // } + + // #[test] + // fn test_build_merkle() { + // let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + // + // let (key, value) = EXAMPLE_LEFT_LEAF.key_value(); + // merkle_blob + // .insert(key, value, &EXAMPLE_LEFT_LEAF.hash) + // .unwrap(); + // let (key, value) = EXAMPLE_RIGHT_LEAF.key_value(); + // merkle_blob + // .insert(key, value, &EXAMPLE_RIGHT_LEAF.hash) + // .unwrap(); + // + // // TODO: just hacking here to compare with the ~wrong~ simplified reference + // let mut root = Block::from_bytes(merkle_blob.get_block_bytes(0).unwrap(), 0).unwrap(); + // root.metadata.dirty = true; + // root.node.hash = HASH; + // assert_eq!(root.metadata.node_type, NodeType::Internal); + // merkle_blob + // .insert_entry_to_blob(0, root.to_bytes()) + // .unwrap(); + // + // assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); + // + // merkle_blob.check().unwrap(); + // } + + #[test] + fn test_just_insert_a_bunch() { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let mut total_time = Duration::new(0, 0); + + for i in 0..100_000 { + let start = Instant::now(); + merkle_blob + // TODO: yeah this hash is garbage + .insert(i, i, &sha256_num(i), InsertLocation::Auto) + .unwrap(); + let end = Instant::now(); + total_time += end.duration_since(start); + + // match i + 1 { + // 2 => assert_eq!(merkle_blob.blob.len(), 3 * BLOCK_SIZE), + // 3 => assert_eq!(merkle_blob.blob.len(), 5 * BLOCK_SIZE), + // _ => (), + // } + + // let file = fs::File::create(format!("/home/altendky/tmp/mbt/rs/{i:0>4}")).unwrap(); + // let mut file = io::LineWriter::new(file); + // for block in merkle_blob.blob.chunks(BLOCK_SIZE) { + // let mut s = String::new(); + // for byte in block { + // s.push_str(&format!("{:02x}", byte)); + // } + // s.push_str("\n"); + // file.write_all(s.as_bytes()).unwrap(); + // } + + // fs::write(format!("/home/altendky/tmp/mbt/rs/{i:0>4}"), &merkle_blob.blob).unwrap(); + } + // println!("{:?}", merkle_blob.blob) + + println!("total time: {total_time:?}"); + // TODO: check, well... something + + merkle_blob.calculate_lazy_hashes().unwrap(); + + merkle_blob.check().unwrap(); + } + + #[test] + fn test_delete_in_reverse_creates_matching_trees() { + const COUNT: usize = 10; + let mut dots = vec![]; + + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + let mut reference_blobs = vec![]; + + let key_value_ids: [KvId; COUNT] = core::array::from_fn(|i| i as KvId); + + for key_value_id in key_value_ids { + let hash: Hash = sha256_num(key_value_id); + + println!("inserting: {key_value_id}"); + merkle_blob.calculate_lazy_hashes().unwrap(); + reference_blobs.push(MerkleBlob::new(merkle_blob.blob.clone()).unwrap()); + merkle_blob + .insert(key_value_id, key_value_id, &hash, InsertLocation::Auto) + .unwrap(); + dots.push(merkle_blob.to_dot().dump()); + } + + merkle_blob.check().unwrap(); + + for key_value_id in key_value_ids.iter().rev() { + println!("deleting: {key_value_id}"); + merkle_blob.delete(*key_value_id).unwrap(); + merkle_blob.calculate_lazy_hashes().unwrap(); + assert_eq!(merkle_blob, reference_blobs[*key_value_id as usize]); + dots.push(merkle_blob.to_dot().dump()); + } + + merkle_blob.check().unwrap(); + } + + #[test] + fn test_insert_first() { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let key_value_id: KvId = 1; + // open_dot(&mut merkle_blob.to_dot().set_note("empty")); + merkle_blob + .insert( + key_value_id, + key_value_id, + &sha256_num(key_value_id), + InsertLocation::Auto, + ) + .unwrap(); + // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + + merkle_blob.check().unwrap(); + assert_eq!(merkle_blob.key_to_index.len(), 1); + } + + #[rstest] + fn test_insert_choosing_side( + #[values(Side::Left, Side::Right)] side: Side, + #[values(1, 2)] pre_count: usize, + ) { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let mut last_key: KvId = 0; + for i in 1..=pre_count { + let key: KvId = i as KvId; + // open_dot(&mut merkle_blob.to_dot().set_note("empty")); + merkle_blob + .insert(key, key, &sha256_num(key), InsertLocation::Auto) + .unwrap(); + last_key = key; + } + + let key_value_id: KvId = pre_count as KvId + 1; + // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + merkle_blob + .insert( + key_value_id, + key_value_id, + &sha256_num(key_value_id), + InsertLocation::Leaf { + index: merkle_blob.key_to_index[&last_key], + side: side.clone(), + }, + ) + .unwrap(); + // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + + let sibling = merkle_blob + .get_node(merkle_blob.key_to_index[&last_key]) + .unwrap(); + let parent = merkle_blob.get_node(sibling.parent.unwrap()).unwrap(); + let NodeSpecific::Internal { left, right } = parent.specific else { + panic!() + }; + + let NodeSpecific::Leaf { key: left_key, .. } = merkle_blob.get_node(left).unwrap().specific + else { + panic!() + }; + let NodeSpecific::Leaf { key: right_key, .. } = + merkle_blob.get_node(right).unwrap().specific + else { + panic!() + }; + + let expected_keys: [KvId; 2] = match side { + Side::Left => [pre_count as KvId + 1, pre_count as KvId], + Side::Right => [pre_count as KvId, pre_count as KvId + 1], + }; + assert_eq!([left_key, right_key], expected_keys); + + merkle_blob.check().unwrap(); + } + + #[test] + fn test_delete_last() { + let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); + + let key_value_id: KvId = 1; + // open_dot(&mut merkle_blob.to_dot().set_note("empty")); + merkle_blob + .insert( + key_value_id, + key_value_id, + &sha256_num(key_value_id), + InsertLocation::Auto, + ) + .unwrap(); + // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + merkle_blob.check().unwrap(); + + merkle_blob.delete(key_value_id).unwrap(); + + merkle_blob.check().unwrap(); + assert_eq!(merkle_blob.key_to_index.len(), 0); + } + + #[rstest] + fn test_delete_frees_index(mut small_blob: MerkleBlob) { + let key = 0x0001_0203_0405_0607; + let index = small_blob.key_to_index[&key]; + small_blob.delete(key).unwrap(); + + assert_eq!(small_blob.free_indexes, HashSet::from([index, 2])); + } + + #[rstest] + fn test_get_new_index_with_free_index(mut small_blob: MerkleBlob) { + let key = 0x0001_0203_0405_0607; + let _ = small_blob.key_to_index[&key]; + small_blob.delete(key).unwrap(); + + let expected = HashSet::from([1, 2]); + assert_eq!(small_blob.free_indexes, expected); + // NOTE: both 1 and 2 are free per test_delete_frees_index + assert!(expected.contains(&small_blob.get_new_index())); + } + + #[rstest] + fn test_dump_small_blob_bytes(small_blob: MerkleBlob) { + println!("{}", hex::encode(small_blob.blob)); + } + + #[test] + #[should_panic(expected = "unknown NodeType value: 2")] + fn test_node_type_from_u8_invalid() { + let _ = NodeType::from_u8(2); + } + + #[test] + fn test_node_metadata_dirty_from_bytes_invalid() { + NodeMetadata::dirty_from_bytes([0, 2]).expect_err("invalid value should fail"); + } + + #[test] + #[should_panic(expected = "unable to get sibling index from a leaf")] + fn test_node_specific_sibling_index_panics_for_leaf() { + let leaf = NodeSpecific::Leaf { key: 0, value: 0 }; + leaf.sibling_index(0); + } + + #[test] + #[should_panic(expected = "index not a child: 2")] + fn test_node_specific_sibling_index_panics_for_unknown_sibling() { + let node = NodeSpecific::Internal { left: 0, right: 1 }; + node.sibling_index(2); + } + + #[rstest] + fn test_get_free_indexes(small_blob: MerkleBlob) { + let mut blob = small_blob.blob.clone(); + let expected_free_index = (blob.len() / BLOCK_SIZE) as TreeIndex; + blob.extend_from_slice(&[0; BLOCK_SIZE]); + let (free_indexes, _) = get_free_indexes_and_keys_values_indexes(&blob); + assert_eq!(free_indexes, HashSet::from([expected_free_index])); + } + + #[test] + fn test_merkle_blob_new_errs_for_nonmultiple_of_block_length() { + MerkleBlob::new(vec![1]).expect_err("invalid length should fail"); + } + + #[rstest] + fn test_upsert_inserts(small_blob: MerkleBlob) { + let key = 1234; + assert!(!small_blob.key_to_index.contains_key(&key)); + let value = 5678; + + let mut insert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); + insert_blob + .insert(key, value, &sha256_num(key), InsertLocation::Auto) + .unwrap(); + // open_dot(insert_blob.to_dot().set_note("first after")); + + let mut upsert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); + upsert_blob.upsert(key, value, &sha256_num(key)).unwrap(); + // open_dot(upsert_blob.to_dot().set_note("first after")); + + assert_eq!(insert_blob.blob, upsert_blob.blob); + } + + #[rstest] + fn test_upsert_upserts(mut small_blob: MerkleBlob) { + let before_blocks = small_blob.iter().collect::>(); + let (key, index) = small_blob.key_to_index.iter().next().unwrap(); + let node = small_blob.get_node(*index).unwrap(); + let NodeSpecific::Leaf { + key: original_key, + value: original_value, + .. + } = node.specific + else { + panic!() + }; + let new_value = original_value + 1; + + small_blob.upsert(*key, new_value, &node.hash).unwrap(); + + let after_blocks = small_blob.iter().collect::>(); + + assert_eq!(before_blocks.len(), after_blocks.len()); + for ((before_index, before), (after_index, after)) in zip(before_blocks, after_blocks) { + assert_eq!(before.node.parent, after.node.parent); + assert_eq!(before_index, after_index); + let NodeSpecific::Leaf { + key: before_key, + value: before_value, + } = before.node.specific + else { + assert_eq!(before.node.specific, after.node.specific); + continue; + }; + let NodeSpecific::Leaf { + key: after_key, + value: after_value, + } = after.node.specific + else { + panic!() + }; + assert_eq!(before_key, after_key); + if before_key == original_key { + assert_eq!(after_value, new_value); + } else { + assert_eq!(before_value, after_value); + } + } + } +} diff --git a/crates/chia-datalayer/src/dot.rs b/crates/chia-datalayer/src/merkle/dot.rs similarity index 98% rename from crates/chia-datalayer/src/dot.rs rename to crates/chia-datalayer/src/merkle/dot.rs index e919fb858..e8ab29224 100644 --- a/crates/chia-datalayer/src/dot.rs +++ b/crates/chia-datalayer/src/merkle/dot.rs @@ -1,4 +1,4 @@ -use crate::{MerkleBlob, Node, NodeSpecific, TreeIndex}; +use crate::merkle::{MerkleBlob, Node, NodeSpecific, TreeIndex}; use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use url::Url; From 564d2c298d2a5db07bd74f3ebab0b8fed51255dc Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 15 Oct 2024 17:08:44 -0400 Subject: [PATCH 090/181] tidy --- crates/chia-datalayer/src/merkle.rs | 145 +++------------------------- 1 file changed, 14 insertions(+), 131 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 778b2611f..13aecc1ed 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -58,7 +58,7 @@ pub enum NodeType { impl NodeType { pub fn from_u8(value: u8) -> Result { // TODO: identify some useful structured serialization tooling we use - // TODO: find a better way to tie serialization values to enumerators + // TODO: find a better way to tie serialization values to enumerator variants match value { // ha! feel free to laugh at this x if (NodeType::Internal as u8 == x) => Ok(NodeType::Internal), @@ -255,39 +255,6 @@ impl Node { blob } - - // pub fn to_dot(&self, index: TreeIndex) -> DotLines { - // // TODO: can this be done without introducing a blank line? - // let node_to_parent = match self.parent { - // Some(parent) => format!("node_{index} -> node_{parent};"), - // None => String::new(), - // }; - // - // match self.specific { - // NodeSpecific::Internal {left, right} => DotLines{ - // nodes: vec![ - // format!("node_{index} [label=\"{index}\"]"), - // ], - // connections: vec![ - // format!("node_{index} -> node_{left};"), - // format!("node_{index} -> node_{right};"), - // node_to_parent, - // ], - // pair_boxes: vec![ - // format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), - // ], - // note: String::new(), - // }, - // NodeSpecific::Leaf {key, value} => DotLines{ - // nodes: vec![ - // format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), - // ], - // connections: vec![node_to_parent], - // pair_boxes: vec![], - // note: String::new(), - // }, - // } - // } } fn block_range(index: TreeIndex) -> Range { @@ -896,18 +863,6 @@ impl MerkleBlob { Block::from_bytes(self.get_block_bytes(index)?) } - // fn get_block_slice(&self, index: TreeIndex) -> Result<&mut BlockBytes, String> { - // let metadata_start = index as usize * BLOCK_SIZE; - // let data_start = metadata_start + METADATA_SIZE; - // let end = data_start + DATA_SIZE; - // - // self.blob - // .get(metadata_start..end) - // .ok_or(format!("index out of bounds: {index}"))? - // .try_into() - // .map_err(|e| format!("failed getting block {index}: {e}")) - // } - fn get_block_bytes(&self, index: TreeIndex) -> Result { self.blob .get(block_range(index)) @@ -1276,57 +1231,13 @@ mod dot; #[cfg(test)] mod tests { use super::*; - // use hex_literal::hex; - // use dot::open_dot; + use crate::merkle::dot::DotLines; use rstest::{fixture, rstest}; use std::time::{Duration, Instant}; - // const EXAMPLE_BLOB: [u8; 138] = hex!("0001ffffffff00000001000000020c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000000405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0100000000001415161718191a1b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b"); - // const HASH: Hash = [ - // 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - // 35, 36, 37, 38, 39, 40, 41, 42, 43, - // ]; - // - // const EXAMPLE_ROOT: Node = Node { - // parent: None, - // specific: NodeSpecific::Internal { left: 1, right: 2 }, - // hash: HASH, - // index: 0, - // }; - // const EXAMPLE_ROOT_METADATA: NodeMetadata = NodeMetadata { - // node_type: NodeType::Internal, - // dirty: true, - // }; - // const EXAMPLE_LEFT_LEAF: Node = Node { - // parent: Some(0), - // specific: NodeSpecific::Leaf { - // key: 0x0405_0607_0809_0A0B, - // value: 0x1415_1617_1819_1A1B, - // }, - // hash: HASH, - // index: 1, - // }; - // const EXAMPLE_LEFT_LEAF_METADATA: NodeMetadata = NodeMetadata { - // node_type: NodeType::Leaf, - // dirty: false, - // }; - // const EXAMPLE_RIGHT_LEAF: Node = Node { - // parent: Some(0), - // specific: NodeSpecific::Leaf { - // key: 0x2425_2627_2829_2A2B, - // value: 0x3435_3637_3839_3A3B, - // }, - // hash: HASH, - // index: 2, - // }; - // const EXAMPLE_RIGHT_LEAF_METADATA: NodeMetadata = NodeMetadata { - // node_type: NodeType::Leaf, - // dirty: false, - // }; - - // fn example_merkle_blob() -> MerkleBlob { - // MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap() - // } + fn open_dot(lines: &mut DotLines) { + // crate::merkle::dot::open_dot(lines); + } #[allow(unused)] fn normalized_blob(merkle_blob: &MerkleBlob) -> Vec { @@ -1393,14 +1304,6 @@ mod tests { assert_eq!(NodeMetadata::dirty_from_bytes(bytes).unwrap(), object.dirty); } - // #[test] - // fn test_load_a_python_dump() { - // let merkle_blob = example_merkle_blob(); - // merkle_blob.get_node(0).unwrap(); - // - // merkle_blob.check().unwrap(); - // } - #[fixture] fn small_blob() -> MerkleBlob { let mut blob = MerkleBlob::new(vec![]).unwrap(); @@ -1524,27 +1427,7 @@ mod tests { .unwrap(); let end = Instant::now(); total_time += end.duration_since(start); - - // match i + 1 { - // 2 => assert_eq!(merkle_blob.blob.len(), 3 * BLOCK_SIZE), - // 3 => assert_eq!(merkle_blob.blob.len(), 5 * BLOCK_SIZE), - // _ => (), - // } - - // let file = fs::File::create(format!("/home/altendky/tmp/mbt/rs/{i:0>4}")).unwrap(); - // let mut file = io::LineWriter::new(file); - // for block in merkle_blob.blob.chunks(BLOCK_SIZE) { - // let mut s = String::new(); - // for byte in block { - // s.push_str(&format!("{:02x}", byte)); - // } - // s.push_str("\n"); - // file.write_all(s.as_bytes()).unwrap(); - // } - - // fs::write(format!("/home/altendky/tmp/mbt/rs/{i:0>4}"), &merkle_blob.blob).unwrap(); } - // println!("{:?}", merkle_blob.blob) println!("total time: {total_time:?}"); // TODO: check, well... something @@ -1594,7 +1477,7 @@ mod tests { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); let key_value_id: KvId = 1; - // open_dot(&mut merkle_blob.to_dot().set_note("empty")); + open_dot(&mut merkle_blob.to_dot().set_note("empty")); merkle_blob .insert( key_value_id, @@ -1603,7 +1486,7 @@ mod tests { InsertLocation::Auto, ) .unwrap(); - // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + open_dot(&mut merkle_blob.to_dot().set_note("first after")); merkle_blob.check().unwrap(); assert_eq!(merkle_blob.key_to_index.len(), 1); @@ -1619,7 +1502,7 @@ mod tests { let mut last_key: KvId = 0; for i in 1..=pre_count { let key: KvId = i as KvId; - // open_dot(&mut merkle_blob.to_dot().set_note("empty")); + open_dot(&mut merkle_blob.to_dot().set_note("empty")); merkle_blob .insert(key, key, &sha256_num(key), InsertLocation::Auto) .unwrap(); @@ -1627,7 +1510,7 @@ mod tests { } let key_value_id: KvId = pre_count as KvId + 1; - // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + open_dot(&mut merkle_blob.to_dot().set_note("first after")); merkle_blob .insert( key_value_id, @@ -1639,7 +1522,7 @@ mod tests { }, ) .unwrap(); - // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + open_dot(&mut merkle_blob.to_dot().set_note("first after")); let sibling = merkle_blob .get_node(merkle_blob.key_to_index[&last_key]) @@ -1673,7 +1556,7 @@ mod tests { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); let key_value_id: KvId = 1; - // open_dot(&mut merkle_blob.to_dot().set_note("empty")); + open_dot(&mut merkle_blob.to_dot().set_note("empty")); merkle_blob .insert( key_value_id, @@ -1682,7 +1565,7 @@ mod tests { InsertLocation::Auto, ) .unwrap(); - // open_dot(&mut merkle_blob.to_dot().set_note("first after")); + open_dot(&mut merkle_blob.to_dot().set_note("first after")); merkle_blob.check().unwrap(); merkle_blob.delete(key_value_id).unwrap(); @@ -1766,11 +1649,11 @@ mod tests { insert_blob .insert(key, value, &sha256_num(key), InsertLocation::Auto) .unwrap(); - // open_dot(insert_blob.to_dot().set_note("first after")); + open_dot(insert_blob.to_dot().set_note("first after")); let mut upsert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); upsert_blob.upsert(key, value, &sha256_num(key)).unwrap(); - // open_dot(upsert_blob.to_dot().set_note("first after")); + open_dot(upsert_blob.to_dot().set_note("first after")); assert_eq!(insert_blob.blob, upsert_blob.blob); } From a34cb63321c12d7d3b48021e64dcbe93d69fc1c6 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 15 Oct 2024 17:13:17 -0400 Subject: [PATCH 091/181] tidy --- crates/chia-datalayer/src/merkle.rs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 13aecc1ed..4148d534c 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -63,7 +63,7 @@ impl NodeType { // ha! feel free to laugh at this x if (NodeType::Internal as u8 == x) => Ok(NodeType::Internal), x if (NodeType::Leaf as u8 == x) => Ok(NodeType::Leaf), - other => panic!("unknown NodeType value: {other}"), + other => Err(format!("unknown NodeType value: {other}")), } } @@ -196,6 +196,9 @@ impl Node { // unsafe { *(self as *const Self as *const u8) } // } + // TODO: talk through whether this is good practice. being prepared for an error even though + // presently it won't happen + #[allow(clippy::unnecessary_wraps)] pub fn from_bytes(metadata: &NodeMetadata, blob: DataBytes) -> Result { Ok(Self { parent: Self::parent_from_bytes(&blob), @@ -1235,7 +1238,7 @@ mod tests { use rstest::{fixture, rstest}; use std::time::{Duration, Instant}; - fn open_dot(lines: &mut DotLines) { + fn open_dot(_lines: &mut DotLines) { // crate::merkle::dot::open_dot(lines); } @@ -1477,7 +1480,7 @@ mod tests { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); let key_value_id: KvId = 1; - open_dot(&mut merkle_blob.to_dot().set_note("empty")); + open_dot(merkle_blob.to_dot().set_note("empty")); merkle_blob .insert( key_value_id, @@ -1486,7 +1489,7 @@ mod tests { InsertLocation::Auto, ) .unwrap(); - open_dot(&mut merkle_blob.to_dot().set_note("first after")); + open_dot(merkle_blob.to_dot().set_note("first after")); merkle_blob.check().unwrap(); assert_eq!(merkle_blob.key_to_index.len(), 1); @@ -1502,7 +1505,7 @@ mod tests { let mut last_key: KvId = 0; for i in 1..=pre_count { let key: KvId = i as KvId; - open_dot(&mut merkle_blob.to_dot().set_note("empty")); + open_dot(merkle_blob.to_dot().set_note("empty")); merkle_blob .insert(key, key, &sha256_num(key), InsertLocation::Auto) .unwrap(); @@ -1510,7 +1513,7 @@ mod tests { } let key_value_id: KvId = pre_count as KvId + 1; - open_dot(&mut merkle_blob.to_dot().set_note("first after")); + open_dot(merkle_blob.to_dot().set_note("first after")); merkle_blob .insert( key_value_id, @@ -1522,7 +1525,7 @@ mod tests { }, ) .unwrap(); - open_dot(&mut merkle_blob.to_dot().set_note("first after")); + open_dot(merkle_blob.to_dot().set_note("first after")); let sibling = merkle_blob .get_node(merkle_blob.key_to_index[&last_key]) @@ -1556,7 +1559,7 @@ mod tests { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); let key_value_id: KvId = 1; - open_dot(&mut merkle_blob.to_dot().set_note("empty")); + open_dot(merkle_blob.to_dot().set_note("empty")); merkle_blob .insert( key_value_id, @@ -1565,7 +1568,7 @@ mod tests { InsertLocation::Auto, ) .unwrap(); - open_dot(&mut merkle_blob.to_dot().set_note("first after")); + open_dot(merkle_blob.to_dot().set_note("first after")); merkle_blob.check().unwrap(); merkle_blob.delete(key_value_id).unwrap(); From 32d8cdeb1531fc41aabe8d01e48adcf7482cad3f Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 16 Oct 2024 08:33:23 -0400 Subject: [PATCH 092/181] catch up test --- crates/chia-datalayer/src/merkle.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 4148d534c..fb841f79f 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -1604,9 +1604,11 @@ mod tests { } #[test] - #[should_panic(expected = "unknown NodeType value: 2")] fn test_node_type_from_u8_invalid() { - let _ = NodeType::from_u8(2); + let invalid_value = 2; + let expected = format!("unknown NodeType value: {invalid_value}"); + let actual = NodeType::from_u8(invalid_value); + actual.expect_err(&expected); } #[test] From 2fa7c3af03b5550944e703ff729871af095b59e4 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 17 Oct 2024 09:07:49 -0400 Subject: [PATCH 093/181] tidy --- crates/chia-datalayer/src/merkle.rs | 84 ++++++++----------- ...{test_merkle_blob.py => test_datalayer.py} | 18 ---- 2 files changed, 36 insertions(+), 66 deletions(-) rename tests/{test_merkle_blob.py => test_datalayer.py} (59%) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index fb841f79f..4d4a78918 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -18,16 +18,17 @@ const fn range_by_length(start: usize, length: usize) -> Range { start..start + length } -// define the serialized block format // TODO: consider in more detail other serialization tools such as serde and streamable -// common fields -// TODO: better way to pick the max of key value and right range, until we move hash first -// TODO: clearly shouldn't be hard coded -const METADATA_SIZE: usize = 2; +// define the serialized block format const METADATA_RANGE: Range = 0..METADATA_SIZE; +const TYPE_RANGE: Range = range_by_length(0, size_of::()); +const DIRTY_RANGE: Range = range_by_length(TYPE_RANGE.end, size_of::()); +const METADATA_SIZE: usize = DIRTY_RANGE.end; + +// common fields const HASH_RANGE: Range = range_by_length(0, size_of::()); // const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); -const PARENT_RANGE: Range = HASH_RANGE.end..(HASH_RANGE.end + size_of::()); +const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); // internal specific fields const LEFT_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of::()); @@ -43,10 +44,6 @@ type BlockBytes = [u8; BLOCK_SIZE]; type MetadataBytes = [u8; METADATA_SIZE]; type DataBytes = [u8; DATA_SIZE]; const DATA_RANGE: Range = METADATA_SIZE..METADATA_SIZE + DATA_SIZE; -// const INTERNAL_PADDING_RANGE: Range = RIGHT_RANGE.end..DATA_SIZE; -// const INTERNAL_PADDING_SIZE: usize = INTERNAL_PADDING_RANGE.end - INTERNAL_PADDING_RANGE.start; -// const LEAF_PADDING_RANGE: Range = VALUE_RANGE.end..DATA_SIZE; -// const LEAF_PADDING_SIZE: usize = LEAF_PADDING_RANGE.end - LEAF_PADDING_RANGE.start; #[derive(Clone, Debug, Hash, Eq, PartialEq)] #[repr(u8)] @@ -75,25 +72,8 @@ impl NodeType { } } -// impl NodeType { -// const TYPE_TO_VALUE: HashMap = HashMap::from([ -// (NodeType::Internal, 0), -// (NodeType::Leaf, 1), -// ]); -// -// fn value(&self) -> u8 { -// let map = Self::TYPE_TO_VALUE; -// // TODO: this seems pretty clearly the wrong way, probably -// let value = map.get(self); -// if value.is_some() { -// return 3; -// } -// panic!("no value for NodeType: {self:?}"); -// } -// } - #[allow(clippy::needless_pass_by_value)] -fn sha256_num(input: T) -> Hash { +fn sha256_num(input: T) -> Hash { let mut hasher = Sha256::new(); hasher.update(input.to_be_bytes()); @@ -148,15 +128,19 @@ impl NodeMetadata { } pub fn to_bytes(&self) -> MetadataBytes { - [self.node_type.to_u8(), u8::from(self.dirty)] + let mut bytes = [0u8; METADATA_SIZE]; + bytes[TYPE_RANGE].copy_from_slice(&[self.node_type.to_u8()]); + bytes[DIRTY_RANGE].copy_from_slice(&[u8::from(self.dirty)]); + + bytes } pub fn node_type_from_bytes(blob: MetadataBytes) -> Result { - NodeType::from_u8(blob[0]) + NodeType::from_u8(u8::from_be_bytes(blob[TYPE_RANGE].try_into().unwrap())) } pub fn dirty_from_bytes(blob: MetadataBytes) -> Result { - match blob[1] { + match u8::from_be_bytes(blob[DIRTY_RANGE].try_into().unwrap()) { 0 => Ok(false), 1 => Ok(true), other => Err(format!("invalid dirty value: {other}")), @@ -178,6 +162,7 @@ pub enum NodeSpecific { } impl NodeSpecific { + // TODO: methods that only handle one variant seem kinda smelly to me, am i right? pub fn sibling_index(&self, index: TreeIndex) -> TreeIndex { let NodeSpecific::Internal { right, left } = self else { panic!("unable to get sibling index from a leaf") @@ -192,10 +177,6 @@ impl NodeSpecific { } impl Node { - // fn discriminant(&self) -> u8 { - // unsafe { *(self as *const Self as *const u8) } - // } - // TODO: talk through whether this is good practice. being prepared for an error even though // presently it won't happen #[allow(clippy::unnecessary_wraps)] @@ -265,7 +246,7 @@ fn block_range(index: TreeIndex) -> Range { block_start..block_start + BLOCK_SIZE } -// TODO: does not enforce matching metadata node type and node enumeration type +// TODO: does not enforce matching metadata node type and node enumeration variant pub struct Block { metadata: NodeMetadata, node: Node, @@ -322,7 +303,6 @@ fn get_free_indexes_and_keys_values_indexes( #[derive(Debug)] pub struct MerkleBlob { blob: Vec, - // TODO: should this be a set for fast lookups? free_indexes: HashSet, key_to_index: HashMap, } @@ -372,7 +352,7 @@ impl MerkleBlob { if !self.key_to_index.is_empty() { return Err("requested insertion at root but tree not empty".to_string()); }; - self.insert_first(key, value, hash); + self.insert_first(key, value, hash)?; } InsertLocation::Leaf { index, side } => { let old_leaf = self.get_node(index)?; @@ -404,7 +384,7 @@ impl MerkleBlob { Ok(()) } - fn insert_first(&mut self, key: KvId, value: KvId, hash: &Hash) { + fn insert_first(&mut self, key: KvId, value: KvId, hash: &Hash) -> Result<(), String> { let new_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, @@ -418,9 +398,9 @@ impl MerkleBlob { }; self.clear(); - // TODO: unwrap, ack, review - self.insert_entry_to_blob(self.extend_index(), &new_leaf_block) - .unwrap(); + self.insert_entry_to_blob(self.extend_index(), &new_leaf_block)?; + + Ok(()) } fn insert_second( @@ -601,7 +581,6 @@ impl MerkleBlob { self.free_indexes.insert(leaf_index); let parent = self.get_node(parent_index)?; - // TODO: kinda implicit that we 'check' that parent is internal inside .sibling_index() let sibling_index = parent.specific.sibling_index(leaf_index); let mut sibling_block = self.get_block(sibling_index)?; @@ -678,10 +657,21 @@ impl MerkleBlob { pub fn check(&self) -> Result<(), String> { let mut leaf_count: usize = 0; let mut internal_count: usize = 0; + // TODO: either fix this check, or fix the bug it exposes + // let mut child_to_parent: HashMap = HashMap::new(); + // for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob) { for (index, block) in self { + // if let Some(parent) = block.node.parent { + // assert_eq!(child_to_parent.remove(&index), Some(parent)); + // } match block.node.specific { - NodeSpecific::Internal { .. } => internal_count += 1, + // NodeSpecific::Internal { left, right } => { + NodeSpecific::Internal { .. } => { + internal_count += 1; + // child_to_parent.insert(left, index); + // child_to_parent.insert(right, index); + } NodeSpecific::Leaf { key, .. } => { leaf_count += 1; let cached_index = self @@ -692,7 +682,6 @@ impl MerkleBlob { *cached_index, index, "key to index cache for {key:?} should be {index:?} got: {cached_index:?}" ); - // TODO: consider what type free indexes should be assert!( !self.free_indexes.contains(&index), "{}", @@ -712,7 +701,6 @@ impl MerkleBlob { ); Ok(()) - // TODO: check parent/child bidirectional accuracy } // fn update_parent(&mut self, index: TreeIndex, parent: Option) -> Result<(), String> { @@ -1169,7 +1157,7 @@ impl<'a> MerkleBlobParentFirstIterator<'a> { } impl Iterator for MerkleBlobParentFirstIterator<'_> { - type Item = Block; + type Item = (TreeIndex, Block); fn next(&mut self) -> Option { // left sibling first, parents before children @@ -1180,7 +1168,7 @@ impl Iterator for MerkleBlobParentFirstIterator<'_> { let block = Block::from_bytes(block_bytes).unwrap(); match block.node.specific { - NodeSpecific::Leaf { .. } => return Some(block), + NodeSpecific::Leaf { .. } => return Some((index, block)), NodeSpecific::Internal { left, right } => { self.deque.push_front(right); self.deque.push_front(left); diff --git a/tests/test_merkle_blob.py b/tests/test_datalayer.py similarity index 59% rename from tests/test_merkle_blob.py rename to tests/test_datalayer.py index 88d5802c5..1ca216edf 100644 --- a/tests/test_merkle_blob.py +++ b/tests/test_datalayer.py @@ -31,21 +31,3 @@ def test_just_insert_a_bunch() -> None: merkle_blob.insert(uint64(i), uint64(i), HASH) end = time.monotonic() total_time += end - start - - # kv_count = i + 1 - # if kv_count == 2: - # assert len(merkle_blob.blob) == 3 * spacing - # elif kv_count == 3: - # assert len(merkle_blob.blob) == 5 * spacing - # - # with path.joinpath("py", f"{i:04}").open(mode="w") as file: - # for offset in range(0, len(merkle_blob.blob), spacing): - # file.write(merkle_blob.blob[offset:offset + spacing].hex()) - # file.write("\n") - # path.joinpath("py", f"{i:04}").write_bytes(merkle_blob.blob) - - # rs = pathlib.Path("~/repos/chia_rs/crates/chia-datalayer/src/test_just_insert_a_bunch_reference").expanduser().read_bytes() - # b = bytes(merkle_blob.blob) - # assert b == rs, 'not the same' - - # assert False, f"total time: {total_time}" From 878b8e077ac8b82e362d890a38a59156c4e2fff3 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 17 Oct 2024 09:13:37 -0400 Subject: [PATCH 094/181] check parent child bidirectional agreement --- crates/chia-datalayer/src/merkle.rs | 41 +++++++++++++---------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 4d4a78918..79e46b1b7 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -657,20 +657,17 @@ impl MerkleBlob { pub fn check(&self) -> Result<(), String> { let mut leaf_count: usize = 0; let mut internal_count: usize = 0; - // TODO: either fix this check, or fix the bug it exposes - // let mut child_to_parent: HashMap = HashMap::new(); - - // for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob) { - for (index, block) in self { - // if let Some(parent) = block.node.parent { - // assert_eq!(child_to_parent.remove(&index), Some(parent)); - // } + let mut child_to_parent: HashMap = HashMap::new(); + + for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob) { + if let Some(parent) = block.node.parent { + assert_eq!(child_to_parent.remove(&index), Some(parent)); + } match block.node.specific { - // NodeSpecific::Internal { left, right } => { - NodeSpecific::Internal { .. } => { + NodeSpecific::Internal { left, right } => { internal_count += 1; - // child_to_parent.insert(left, index); - // child_to_parent.insert(right, index); + child_to_parent.insert(left, index); + child_to_parent.insert(right, index); } NodeSpecific::Leaf { key, .. } => { leaf_count += 1; @@ -699,6 +696,7 @@ impl MerkleBlob { total_count, extend_index as usize, "expected total node count {extend_index:?} found: {total_count:?}", ); + assert_eq!(child_to_parent.len(), 0); Ok(()) } @@ -1162,19 +1160,16 @@ impl Iterator for MerkleBlobParentFirstIterator<'_> { fn next(&mut self) -> Option { // left sibling first, parents before children - loop { - let index = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes).unwrap(); + let index = self.deque.pop_front()?; + let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); + let block = Block::from_bytes(block_bytes).unwrap(); - match block.node.specific { - NodeSpecific::Leaf { .. } => return Some((index, block)), - NodeSpecific::Internal { left, right } => { - self.deque.push_front(right); - self.deque.push_front(left); - } - } + if let NodeSpecific::Internal { left, right } = block.node.specific { + self.deque.push_back(left); + self.deque.push_back(right); } + + Some((index, block)) } } From d6e4e4bfcb114d5046576e67ef6c0e486414647a Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 17 Oct 2024 09:45:20 -0400 Subject: [PATCH 095/181] .get_hash() --- crates/chia-datalayer/src/merkle.rs | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 79e46b1b7..1bca9d602 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -183,7 +183,7 @@ impl Node { pub fn from_bytes(metadata: &NodeMetadata, blob: DataBytes) -> Result { Ok(Self { parent: Self::parent_from_bytes(&blob), - hash: blob[HASH_RANGE].try_into().unwrap(), + hash: Self::hash_from_bytes(&blob), specific: match metadata.node_type { NodeType::Internal => NodeSpecific::Internal { left: TreeIndex::from_be_bytes(blob[LEFT_RANGE].try_into().unwrap()), @@ -204,6 +204,11 @@ impl Node { _ => Some(parent_integer), } } + + fn hash_from_bytes(blob: &DataBytes) -> Hash { + blob[HASH_RANGE].try_into().unwrap() + } + pub fn to_bytes(&self) -> DataBytes { let mut blob: DataBytes = [0; DATA_SIZE]; match self { @@ -271,6 +276,11 @@ impl Block { Ok(Block { metadata, node }) } + + pub fn update_hash(&mut self, left: &Hash, right: &Hash) { + self.node.hash = internal_hash(&left, &right); + self.metadata.dirty = false; + } } fn get_free_indexes_and_keys_values_indexes( @@ -852,6 +862,13 @@ impl MerkleBlob { Block::from_bytes(self.get_block_bytes(index)?) } + fn get_hash(&self, index: TreeIndex) -> Result { + let block_bytes = self.get_block_bytes(index)?; + let data_bytes: DataBytes = block_bytes[DATA_RANGE].try_into().unwrap(); + + Ok(Node::hash_from_bytes(&data_bytes)) + } + fn get_block_bytes(&self, index: TreeIndex) -> Result { self.blob .get(block_range(index)) @@ -927,11 +944,10 @@ impl MerkleBlob { }; // TODO: obviously inefficient to re-get/deserialize these blocks inside // an iteration that's already doing that - let left = self.get_block(left)?; - let right = self.get_block(right)?; + let left_hash = self.get_hash(left)?; + let right_hash = self.get_hash(right)?; // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' - block.node.hash = internal_hash(&left.node.hash, &right.node.hash); - block.metadata.dirty = false; + block.update_hash(&left_hash, &right_hash); self.insert_entry_to_blob(index, &block)?; } @@ -1027,7 +1043,6 @@ impl PartialEq for MerkleBlob { } impl<'a> IntoIterator for &'a MerkleBlob { - // TODO: review efficiency in whatever use cases we end up with, vs Item = Node etc type Item = (TreeIndex, Block); type IntoIter = MerkleBlobLeftChildFirstIterator<'a>; From bf86d4054c21f0d878b067660ae386e0b25670d9 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 17 Oct 2024 10:45:15 -0400 Subject: [PATCH 096/181] tidy --- crates/chia-datalayer/src/merkle.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 1bca9d602..9ebc58a97 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -278,7 +278,7 @@ impl Block { } pub fn update_hash(&mut self, left: &Hash, right: &Hash) { - self.node.hash = internal_hash(&left, &right); + self.node.hash = internal_hash(left, right); self.metadata.dirty = false; } } @@ -1267,17 +1267,11 @@ mod tests { #[test] fn test_internal_hash() { - // TODO: yeah, various questions around this and how to express 'this is dl internal hash' - // without silly repetition. maybe just a use as. // in Python: Program.to((left_hash, right_hash)).get_tree_hash_precalc(left_hash, right_hash) - let left: Hash = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, - ]; - let right: Hash = [ - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - ]; + + let left: Hash = (0u8..32).collect::>().try_into().unwrap(); + let right: Hash = (32u8..64).collect::>().try_into().unwrap(); + assert_eq!( internal_hash(&left, &right), clvm_utils::tree_hash_pair( From f0fb65aaa4ff96b63b851cabd32fdf7be48f922f Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 17 Oct 2024 11:33:30 -0400 Subject: [PATCH 097/181] tidy --- crates/chia-datalayer/src/merkle.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 9ebc58a97..2d61ab527 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -55,7 +55,6 @@ pub enum NodeType { impl NodeType { pub fn from_u8(value: u8) -> Result { // TODO: identify some useful structured serialization tooling we use - // TODO: find a better way to tie serialization values to enumerator variants match value { // ha! feel free to laugh at this x if (NodeType::Internal as u8 == x) => Ok(NodeType::Internal), @@ -752,7 +751,6 @@ impl MerkleBlob { fn get_new_index(&mut self) -> TreeIndex { match self.free_indexes.iter().next().copied() { None => { - // TODO: should this extend...? // TODO: should this update free indexes...? let index = self.extend_index(); self.blob.extend_from_slice(&[0; BLOCK_SIZE]); From 6e01fe0d141d58b413cafbe7b49a4ff195ab325d Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 17 Oct 2024 12:32:39 -0400 Subject: [PATCH 098/181] always more --- crates/chia-datalayer/src/merkle.rs | 50 +++++++++++------------------ 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 2d61ab527..4e0cd878d 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -250,7 +250,6 @@ fn block_range(index: TreeIndex) -> Range { block_start..block_start + BLOCK_SIZE } -// TODO: does not enforce matching metadata node type and node enumeration variant pub struct Block { metadata: NodeMetadata, node: Node, @@ -265,7 +264,7 @@ impl Block { blob } - pub fn from_bytes(blob: BlockBytes) -> Result { + pub fn from_bytes(blob: BlockBytes) -> Result { let metadata_blob: MetadataBytes = blob[METADATA_RANGE].try_into().unwrap(); let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); let metadata = NodeMetadata::from_bytes(metadata_blob) @@ -374,18 +373,16 @@ impl MerkleBlob { Side::Right => internal_hash(&old_leaf.hash, hash), }; + let node = Node { + parent: None, + hash: *hash, + specific: NodeSpecific::Leaf { key, value }, + }; + if self.key_to_index.len() == 1 { - self.insert_second(key, value, hash, &old_leaf, &internal_node_hash, &side)?; + self.insert_second(node, &old_leaf, &internal_node_hash, &side)?; } else { - self.insert_third_or_later( - key, - value, - hash, - &old_leaf, - index, - &internal_node_hash, - &side, - )?; + self.insert_third_or_later(node, &old_leaf, index, &internal_node_hash, &side)?; } } } @@ -414,9 +411,7 @@ impl MerkleBlob { fn insert_second( &mut self, - key: KvId, - value: KvId, - hash: &Hash, + mut node: Node, old_leaf: &Node, internal_node_hash: &Hash, side: &Side, @@ -446,6 +441,9 @@ impl MerkleBlob { else { return Err("old leaf unexpectedly not a leaf".to_string()); }; + + node.parent = Some(0); + let nodes = [ ( match side { @@ -466,11 +464,7 @@ impl MerkleBlob { Side::Left => 1, Side::Right => 2, }, - Node { - parent: Some(0), - specific: NodeSpecific::Leaf { key, value }, - hash: *hash, - }, + node, ), ]; @@ -489,13 +483,9 @@ impl MerkleBlob { Ok(()) } - // TODO: no really, actually consider the too many arguments complaint - #[allow(clippy::too_many_arguments)] fn insert_third_or_later( &mut self, - key: KvId, - value: KvId, - hash: &Hash, + mut node: Node, old_leaf: &Node, old_leaf_index: TreeIndex, internal_node_hash: &Hash, @@ -504,16 +494,14 @@ impl MerkleBlob { let new_leaf_index = self.get_new_index(); let new_internal_node_index = self.get_new_index(); + node.parent = Some(new_internal_node_index); + let new_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, dirty: false, }, - node: Node { - parent: Some(new_internal_node_index), - specific: NodeSpecific::Leaf { key, value }, - hash: *hash, - }, + node, }; self.insert_entry_to_blob(new_leaf_index, &new_leaf_block)?; @@ -538,7 +526,7 @@ impl MerkleBlob { self.insert_entry_to_blob(new_internal_node_index, &new_internal_block)?; let Some(old_parent_index) = old_leaf.parent else { - panic!("root found when not expected: {key:?} {value:?} {hash:?}") + panic!("root found when not expected") }; let mut block = Block::from_bytes(self.get_block_bytes(old_leaf_index)?)?; From ae77486ebdcbe6aa3eb8166e2decc00eaa883cdd Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 17 Oct 2024 14:26:23 -0400 Subject: [PATCH 099/181] only 14 to go --- crates/chia-datalayer/src/merkle.rs | 98 ++++++++++++++--------------- 1 file changed, 47 insertions(+), 51 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 4e0cd878d..9911118a7 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -1,5 +1,5 @@ #[cfg(feature = "py-bindings")] -use pyo3::{buffer::PyBuffer, pyclass, pymethods, PyResult}; +use pyo3::{buffer::PyBuffer, exceptions::PyValueError, pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; use num_traits::ToBytes; @@ -17,6 +17,11 @@ type KvId = i64; const fn range_by_length(start: usize, length: usize) -> Range { start..start + length } +const fn max(left: usize, right: usize) -> usize { + [left, right][(left < right) as usize] +} +// TODO: once not experimental... something closer to this +// const fn max(left: T, right: T) -> T { if left < right {right} else {left} } // TODO: consider in more detail other serialization tools such as serde and streamable // define the serialized block format @@ -36,9 +41,7 @@ const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of:: = range_by_length(PARENT_RANGE.end, size_of::()); const VALUE_RANGE: Range = range_by_length(KEY_RANGE.end, size_of::()); -// TODO: clearly shouldn't be hard coded -// TODO: max of RIGHT_RANGE.end and VALUE_RANGE.end -const DATA_SIZE: usize = VALUE_RANGE.end; +const DATA_SIZE: usize = max(RIGHT_RANGE.end, VALUE_RANGE.end); const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; type BlockBytes = [u8; BLOCK_SIZE]; type MetadataBytes = [u8; METADATA_SIZE]; @@ -54,7 +57,6 @@ pub enum NodeType { impl NodeType { pub fn from_u8(value: u8) -> Result { - // TODO: identify some useful structured serialization tooling we use match value { // ha! feel free to laugh at this x if (NodeType::Internal as u8 == x) => Ok(NodeType::Internal), @@ -119,7 +121,6 @@ pub struct NodeMetadata { impl NodeMetadata { pub fn from_bytes(blob: MetadataBytes) -> Result { // TODO: could save 1-2% of tree space by packing (and maybe don't do that) - // TODO: identify some useful structured serialization tooling we use Ok(Self { node_type: Self::node_type_from_bytes(blob)?, dirty: Self::dirty_from_bytes(blob)?, @@ -176,8 +177,6 @@ impl NodeSpecific { } impl Node { - // TODO: talk through whether this is good practice. being prepared for an error even though - // presently it won't happen #[allow(clippy::unnecessary_wraps)] pub fn from_bytes(metadata: &NodeMetadata, blob: DataBytes) -> Result { Ok(Self { @@ -417,8 +416,9 @@ impl MerkleBlob { side: &Side, ) -> Result<(), String> { self.clear(); - // TODO: just handling the nodes below being out of order. this all still smells a bit - self.blob.resize(BLOCK_SIZE * 3, 0); + let root_index = self.get_new_index(); + let left_index = self.get_new_index(); + let right_index = self.get_new_index(); let new_internal_block = Block { metadata: NodeMetadata { @@ -427,12 +427,15 @@ impl MerkleBlob { }, node: Node { parent: None, - specific: NodeSpecific::Internal { left: 1, right: 2 }, + specific: NodeSpecific::Internal { + left: left_index, + right: right_index, + }, hash: *internal_node_hash, }, }; - self.insert_entry_to_blob(0, &new_internal_block)?; + self.insert_entry_to_blob(root_index, &new_internal_block)?; let NodeSpecific::Leaf { key: old_leaf_key, @@ -447,8 +450,8 @@ impl MerkleBlob { let nodes = [ ( match side { - Side::Left => 2, - Side::Right => 1, + Side::Left => right_index, + Side::Right => left_index, }, Node { parent: Some(0), @@ -461,8 +464,8 @@ impl MerkleBlob { ), ( match side { - Side::Left => 1, - Side::Right => 2, + Side::Left => left_index, + Side::Right => right_index, }, node, ), @@ -739,9 +742,11 @@ impl MerkleBlob { fn get_new_index(&mut self) -> TreeIndex { match self.free_indexes.iter().next().copied() { None => { - // TODO: should this update free indexes...? let index = self.extend_index(); self.blob.extend_from_slice(&[0; BLOCK_SIZE]); + // NOTE: explicitly not marking index as free since that would hazard two + // sequential calls to this function through this path to both return + // the same index index } Some(new_index) => { @@ -864,17 +869,7 @@ impl MerkleBlob { } pub fn get_node(&self, index: TreeIndex) -> Result { - // TODO: use Block::from_bytes() - // TODO: handle invalid indexes? - // TODO: handle overflows? - let block = self.get_block_bytes(index)?; - let metadata_blob: MetadataBytes = block[METADATA_RANGE].try_into().unwrap(); - let data_blob: DataBytes = block[DATA_RANGE].try_into().unwrap(); - let metadata = NodeMetadata::from_bytes(metadata_blob) - .map_err(|message| format!("failed loading metadata: {message})"))?; - - Node::from_bytes(&metadata, data_blob) - .map_err(|message| format!("failed loading node: {message}")) + Ok(self.get_block(index)?.node) } pub fn get_parent_index(&self, index: TreeIndex) -> Result { @@ -886,7 +881,6 @@ impl MerkleBlob { } pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { - // TODO: what about an index that happens to be the null index? a question for everywhere i guess let mut next_index = Some(index); let mut lineage = vec![]; @@ -900,8 +894,6 @@ impl MerkleBlob { } pub fn get_lineage_indexes(&self, index: TreeIndex) -> Result, String> { - // TODO: yep, this 'optimization' might be overkill, and should be speed compared regardless - // TODO: what about an index that happens to be the null index? a question for everywhere i guess let mut next_index = Some(index); let mut lineage: Vec = vec![]; @@ -932,7 +924,6 @@ impl MerkleBlob { // an iteration that's already doing that let left_hash = self.get_hash(left)?; let right_hash = self.get_hash(right)?; - // TODO: wrap this up in Block maybe? just to have 'control' of dirty being 'accurate' block.update_hash(&left_hash, &right_hash); self.insert_entry_to_blob(index, &block)?; } @@ -1013,15 +1004,20 @@ impl MerkleBlob { impl PartialEq for MerkleBlob { fn eq(&self, other: &Self) -> bool { - // TODO: should we check the indexes? + // NOTE: this is checking tree structure equality, not serialized bytes equality for ((_, self_block), (_, other_block)) in zip(self, other) { if (self_block.metadata.dirty || other_block.metadata.dirty) || self_block.node.hash != other_block.node.hash - // TODO: isn't only a leaf supposed to check this? - || self_block.node.specific != other_block.node.specific { return false; } + match self_block.node.specific { + // NOTE: this is effectively checked by the controlled overall traversal + NodeSpecific::Internal { .. } => {} + NodeSpecific::Leaf { .. } => { + return self_block.node.specific == other_block.node.specific + } + } } true @@ -1033,8 +1029,7 @@ impl<'a> IntoIterator for &'a MerkleBlob { type IntoIter = MerkleBlobLeftChildFirstIterator<'a>; fn into_iter(self) -> Self::IntoIter { - // TODO: review types around this to avoid copying - MerkleBlobLeftChildFirstIterator::new(&self.blob[..]) + MerkleBlobLeftChildFirstIterator::new(&self.blob) } } @@ -1052,25 +1047,28 @@ impl MerkleBlob { let slice = unsafe { std::slice::from_raw_parts(blob.buf_ptr() as *const u8, blob.len_bytes()) }; - Ok(Self::new(Vec::from(slice)).unwrap()) + match Self::new(Vec::from(slice)) { + Ok(blob) => Ok(blob), + Err(message) => Err(PyValueError::new_err(message)), + } } #[pyo3(name = "insert")] pub fn py_insert(&mut self, key: KvId, value: KvId, hash: Hash) -> PyResult<()> { - // TODO: consider the error - // TODO: expose insert location - self.insert(key, value, &hash, InsertLocation::Auto) - .unwrap(); - - Ok(()) + if let Err(message) = self.insert(key, value, &hash, InsertLocation::Auto) { + Err(PyValueError::new_err(message)) + } else { + Ok(()) + } } #[pyo3(name = "delete")] pub fn py_delete(&mut self, key: KvId) -> PyResult<()> { - // TODO: consider the error - self.delete(key).unwrap(); - - Ok(()) + if let Err(message) = self.delete(key) { + Err(PyValueError::new_err(message)) + } else { + Ok(()) + } } #[pyo3(name = "__len__")] @@ -1238,7 +1236,6 @@ mod tests { #[test] fn test_node_type_serialized_values() { - // TODO: can i make sure we cover all variants? assert_eq!(NodeType::Internal as u8, 0); assert_eq!(NodeType::Leaf as u8, 1); @@ -1271,7 +1268,6 @@ mod tests { #[rstest] fn test_node_metadata_from_to( #[values(false, true)] dirty: bool, - // TODO: can we make sure we cover all variants #[values(NodeType::Internal, NodeType::Leaf)] node_type: NodeType, ) { let bytes: [u8; 2] = [node_type.to_u8(), dirty as u8]; @@ -1403,7 +1399,7 @@ mod tests { for i in 0..100_000 { let start = Instant::now(); merkle_blob - // TODO: yeah this hash is garbage + // NOTE: yeah this hash is garbage .insert(i, i, &sha256_num(i), InsertLocation::Auto) .unwrap(); let end = Instant::now(); From 4d429958caf5862deb07c8e6925e8a8c7940c398 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 18 Oct 2024 13:12:55 -0400 Subject: [PATCH 100/181] more --- crates/chia-datalayer/src/lib.rs | 2 +- crates/chia-datalayer/src/merkle.rs | 197 +++++++++++++++++++++++----- wheel/generate_type_stubs.py | 4 +- wheel/python/chia_rs/chia_rs.pyi | 4 +- wheel/src/api.rs | 5 +- 5 files changed, 177 insertions(+), 35 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 7e997f3c2..f75124346 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1,3 +1,3 @@ mod merkle; -pub use merkle::MerkleBlob; +pub use merkle::{InsertLocation, MerkleBlob, Node, Side}; diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 9911118a7..0d7961bd6 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -2,7 +2,10 @@ use pyo3::{buffer::PyBuffer, exceptions::PyValueError, pyclass, pymethods, PyResult}; use clvmr::sha2::Sha256; +use clvmr::SExp; use num_traits::ToBytes; +use pyo3::types::PyTuple; +use pyo3::{PyObject, Python}; use std::cmp::Ordering; use std::collections::{HashMap, HashSet, VecDeque}; use std::iter::{zip, IntoIterator}; @@ -97,16 +100,21 @@ fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { hasher.finalize() } +#[cfg_attr(feature = "py-bindings", pyclass(name = "Side", eq, eq_int))] #[derive(Clone, Debug, Hash, Eq, PartialEq)] pub enum Side { Left, Right, } +#[cfg_attr(feature = "py-bindings", pyclass(name = "InsertLocation"))] #[derive(Clone, Debug, Hash, Eq, PartialEq)] pub enum InsertLocation { - Auto, - AsRoot, + // error: Unit variant `Auto` is not yet supported in a complex enum + // = help: change to a struct variant with no fields: `Auto { }` + // = note: the enum is complex because of non-unit variant `Leaf` + Auto {}, + AsRoot {}, Leaf { index: TreeIndex, side: Side }, } @@ -148,16 +156,28 @@ impl NodeMetadata { } } +#[cfg_attr(feature = "py-bindings", pyclass(name = "Node"))] #[derive(Debug, PartialEq)] pub struct Node { + // #[cfg_attr(feature = "py-bindings", pyo3(get))] + #[pyo3(get)] parent: Parent, + // #[cfg_attr(feature = "py-bindings", pyo3(get))] + #[pyo3(get)] hash: Hash, + // #[cfg_attr(feature = "py-bindings", pyo3(get))] + #[pyo3(get)] specific: NodeSpecific, } -#[derive(Debug, PartialEq)] +// #[cfg_attr(feature = "py-bindings", pyclass(name = "NodeSpecific"))] +#[cfg_attr(feature = "py-bindings", pyclass(name = "NodeSpecific"))] +#[derive(Clone, Debug, PartialEq)] pub enum NodeSpecific { + // #[cfg_attr(feature = "py-bindings", pyo3(constructor = (left, right)))] + #[pyo3(constructor = (left, right))] Internal { left: TreeIndex, right: TreeIndex }, + #[pyo3(constructor = (key, value))] Leaf { key: KvId, value: KvId }, } @@ -244,6 +264,46 @@ impl Node { } } +#[cfg(feature = "py-bindings")] +#[pymethods] +impl Node { + #[getter(left)] + pub fn left(&self) -> TreeIndex { + let NodeSpecific::Internal { left, .. } = self.specific else { + panic!(); + }; + + left + } + + #[getter(right)] + pub fn right(&self) -> TreeIndex { + let NodeSpecific::Internal { right, .. } = self.specific else { + panic!(); + }; + + right + } + + #[getter(key)] + pub fn key(&self) -> KvId { + let NodeSpecific::Leaf { key, .. } = self.specific else { + panic!(); + }; + + key + } + + #[getter(value)] + pub fn value(&self) -> KvId { + let NodeSpecific::Leaf { value, .. } = self.specific else { + panic!(); + }; + + value + } +} + fn block_range(index: TreeIndex) -> Range { let block_start = index as usize * BLOCK_SIZE; block_start..block_start + BLOCK_SIZE @@ -311,6 +371,8 @@ fn get_free_indexes_and_keys_values_indexes( pub struct MerkleBlob { blob: Vec, free_indexes: HashSet, + #[pyo3(get)] + // #[cfg_attr(feature = "py-bindings", pyo3(get))] key_to_index: HashMap, } @@ -346,16 +408,20 @@ impl MerkleBlob { hash: &Hash, insert_location: InsertLocation, ) -> Result<(), String> { + if self.key_to_index.contains_key(&key) { + return Err("Key already present".to_string()); + } + let insert_location = match insert_location { - InsertLocation::Auto => self.get_random_insert_location_by_kvid(key)?, + InsertLocation::Auto {} => self.get_random_insert_location_by_kvid(key)?, _ => insert_location, }; match insert_location { - InsertLocation::Auto => { + InsertLocation::Auto {} => { panic!("this should have been caught and processed above") } - InsertLocation::AsRoot => { + InsertLocation::AsRoot {} => { if !self.key_to_index.is_empty() { return Err("requested insertion at root but tree not empty".to_string()); }; @@ -630,7 +696,7 @@ impl MerkleBlob { pub fn upsert(&mut self, key: KvId, value: KvId, new_hash: &Hash) -> Result<(), String> { let Some(leaf_index) = self.key_to_index.get(&key) else { - self.insert(key, value, new_hash, InsertLocation::Auto)?; + self.insert(key, value, new_hash, InsertLocation::Auto {})?; return Ok(()); }; @@ -763,7 +829,7 @@ impl MerkleBlob { let mut seed_bytes = Vec::from(seed_bytes); if self.blob.is_empty() { - return Ok(InsertLocation::AsRoot); + return Ok(InsertLocation::AsRoot {}); } let side = if (seed_bytes @@ -1047,28 +1113,88 @@ impl MerkleBlob { let slice = unsafe { std::slice::from_raw_parts(blob.buf_ptr() as *const u8, blob.len_bytes()) }; - match Self::new(Vec::from(slice)) { - Ok(blob) => Ok(blob), - Err(message) => Err(PyValueError::new_err(message)), - } + Self::new(Vec::from(slice)).map_err(|e| PyValueError::new_err(e)) } - #[pyo3(name = "insert")] - pub fn py_insert(&mut self, key: KvId, value: KvId, hash: Hash) -> PyResult<()> { - if let Err(message) = self.insert(key, value, &hash, InsertLocation::Auto) { - Err(PyValueError::new_err(message)) - } else { - Ok(()) - } + #[pyo3(name = "insert", signature = (key, value, hash, reference_kid = None, side = None))] + pub fn py_insert( + &mut self, + key: KvId, + value: KvId, + hash: Hash, + reference_kid: Option, + // TODO: should be a Side, but python has a different Side right now + side: Option, + ) -> PyResult<()> { + let insert_location = match (reference_kid, side) { + (None, None) => InsertLocation::Auto {}, + (Some(key), Some(side)) => InsertLocation::Leaf { + index: *self + .key_to_index + .get(&key) + .ok_or(PyValueError::new_err("TODO: better message here"))?, + side: match side { + // TODO: if this sticks around, we gotta get more formal about the mapping + 0 => Side::Left, + 1 => Side::Right, + _ => panic!(), + }, + }, + _ => { + return Err(PyValueError::new_err( + "must specify neither or both of reference_kid and side", + )); + } + }; + self.insert(key, value, &hash, insert_location) + .map_err(|e| PyValueError::new_err(e)) } #[pyo3(name = "delete")] pub fn py_delete(&mut self, key: KvId) -> PyResult<()> { - if let Err(message) = self.delete(key) { - Err(PyValueError::new_err(message)) - } else { - Ok(()) + self.delete(key).map_err(|e| PyValueError::new_err(e)) + } + + #[pyo3(name = "get_raw_node")] + pub fn py_get_raw_node(&mut self, index: TreeIndex) -> PyResult { + self.get_node(index).map_err(|e| PyValueError::new_err(e)) + } + + #[pyo3(name = "calculate_lazy_hashes")] + pub fn py_calculate_lazy_hashes(&mut self) -> PyResult<()> { + self.calculate_lazy_hashes() + .map_err(|e| PyValueError::new_err(e)) + } + + #[pyo3(name = "get_lineage")] + pub fn py_get_lineage(&self, index: TreeIndex, py: Python<'_>) -> PyResult { + let list = pyo3::types::PyList::empty_bound(py); + + for node in self + .get_lineage(index) + .map_err(|e| PyValueError::new_err(e))? + { + use pyo3::conversion::IntoPy; + use pyo3::types::PyListMethods; + list.append(node.into_py(py))?; } + + Ok(list.into()) + } + + #[pyo3(name = "get_nodes", signature=(index=0))] + pub fn py_get_nodes(&self, py: Python<'_>, index: TreeIndex) -> PyResult { + // TODO: use the index parameter + + let list = pyo3::types::PyList::empty_bound(py); + + for (_, block) in self { + use pyo3::conversion::IntoPy; + use pyo3::types::PyListMethods; + list.append(block.node.into_py(py))?; + } + + Ok(list.into()) } #[pyo3(name = "__len__")] @@ -1289,7 +1415,7 @@ mod tests { 0x0001_0203_0405_0607, 0x1011_1213_1415_1617, &sha256_num(0x1020), - InsertLocation::Auto, + InsertLocation::Auto {}, ) .unwrap(); @@ -1297,7 +1423,7 @@ mod tests { 0x2021_2223_2425_2627, 0x3031_3233_3435_3637, &sha256_num(0x2030), - InsertLocation::Auto, + InsertLocation::Auto {}, ) .unwrap(); @@ -1400,7 +1526,7 @@ mod tests { let start = Instant::now(); merkle_blob // NOTE: yeah this hash is garbage - .insert(i, i, &sha256_num(i), InsertLocation::Auto) + .insert(i, i, &sha256_num(i), InsertLocation::Auto {}) .unwrap(); let end = Instant::now(); total_time += end.duration_since(start); @@ -1431,7 +1557,7 @@ mod tests { merkle_blob.calculate_lazy_hashes().unwrap(); reference_blobs.push(MerkleBlob::new(merkle_blob.blob.clone()).unwrap()); merkle_blob - .insert(key_value_id, key_value_id, &hash, InsertLocation::Auto) + .insert(key_value_id, key_value_id, &hash, InsertLocation::Auto {}) .unwrap(); dots.push(merkle_blob.to_dot().dump()); } @@ -1460,7 +1586,7 @@ mod tests { key_value_id, key_value_id, &sha256_num(key_value_id), - InsertLocation::Auto, + InsertLocation::Auto {}, ) .unwrap(); open_dot(merkle_blob.to_dot().set_note("first after")); @@ -1481,7 +1607,7 @@ mod tests { let key: KvId = i as KvId; open_dot(merkle_blob.to_dot().set_note("empty")); merkle_blob - .insert(key, key, &sha256_num(key), InsertLocation::Auto) + .insert(key, key, &sha256_num(key), InsertLocation::Auto {}) .unwrap(); last_key = key; } @@ -1539,7 +1665,7 @@ mod tests { key_value_id, key_value_id, &sha256_num(key_value_id), - InsertLocation::Auto, + InsertLocation::Auto {}, ) .unwrap(); open_dot(merkle_blob.to_dot().set_note("first after")); @@ -1626,7 +1752,7 @@ mod tests { let mut insert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); insert_blob - .insert(key, value, &sha256_num(key), InsertLocation::Auto) + .insert(key, value, &sha256_num(key), InsertLocation::Auto {}) .unwrap(); open_dot(insert_blob.to_dot().set_note("first after")); @@ -1683,4 +1809,13 @@ mod tests { } } } + + #[test] + fn test_double_insert_fails() { + let mut blob = MerkleBlob::new(vec![]).unwrap(); + blob.insert(0, 0, &[0u8; 32], InsertLocation::Auto {}) + .unwrap(); + blob.insert(0, 0, &[0u8; 32], InsertLocation::Auto {}) + .expect_err(""); + } } diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index 03580c9cb..df91a6cb5 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -386,8 +386,10 @@ def __init__( blob: bytes, ) -> None: ... - def insert(self, key: uint64, value: uint64, hash: bytes32) -> None: ... + def insert(self, key: uint64, value: uint64, hash: bytes32, reference_kid: uint64, side: Side) -> None: ... def delete(self, key: uint64) -> None: ... + # def get_raw_node(self, index: int) -> Node: ... + def calculate_lazy_hashes(self) -> None: ... def __len__(self) -> int: ... @final diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index 6edcac162..d6332e138 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -128,8 +128,10 @@ class MerkleBlob: blob: bytes, ) -> None: ... - def insert(self, key: uint64, value: uint64, hash: bytes32) -> None: ... + def insert(self, key: uint64, value: uint64, hash: bytes32, reference_kid: uint64, side: Side) -> None: ... def delete(self, key: uint64) -> None: ... + # def get_raw_node(self, index: int) -> Node: ... + def calculate_lazy_hashes(self) -> None: ... def __len__(self) -> int: ... @final diff --git a/wheel/src/api.rs b/wheel/src/api.rs index c22462826..91eab80ff 100644 --- a/wheel/src/api.rs +++ b/wheel/src/api.rs @@ -75,7 +75,7 @@ use chia_bls::{ Signature, }; -use chia_datalayer::MerkleBlob; +use chia_datalayer::{MerkleBlob, Node}; #[pyfunction] pub fn compute_merkle_set_root<'p>( @@ -477,6 +477,9 @@ pub fn chia_rs(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { // datalayer m.add_class::()?; + // m.add_class::()?; + // m.add_class::()?; + m.add_class::()?; // merkle tree m.add_class::()?; From 16b3ac9d2fd35ca96d17dc76b56b8ba5812b59bd Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 18 Oct 2024 13:40:01 -0400 Subject: [PATCH 101/181] tidy --- crates/chia-datalayer/src/merkle.rs | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 0d7961bd6..57dd2f2c4 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -1,11 +1,8 @@ #[cfg(feature = "py-bindings")] -use pyo3::{buffer::PyBuffer, exceptions::PyValueError, pyclass, pymethods, PyResult}; +use pyo3::{buffer::PyBuffer, exceptions::PyValueError, pyclass, pymethods, PyResult, Python}; use clvmr::sha2::Sha256; -use clvmr::SExp; use num_traits::ToBytes; -use pyo3::types::PyTuple; -use pyo3::{PyObject, Python}; use std::cmp::Ordering; use std::collections::{HashMap, HashSet, VecDeque}; use std::iter::{zip, IntoIterator}; @@ -156,28 +153,19 @@ impl NodeMetadata { } } -#[cfg_attr(feature = "py-bindings", pyclass(name = "Node"))] +#[cfg_attr(feature = "py-bindings", pyclass(name = "Node", get_all))] #[derive(Debug, PartialEq)] pub struct Node { - // #[cfg_attr(feature = "py-bindings", pyo3(get))] - #[pyo3(get)] parent: Parent, - // #[cfg_attr(feature = "py-bindings", pyo3(get))] - #[pyo3(get)] hash: Hash, - // #[cfg_attr(feature = "py-bindings", pyo3(get))] - #[pyo3(get)] specific: NodeSpecific, } // #[cfg_attr(feature = "py-bindings", pyclass(name = "NodeSpecific"))] -#[cfg_attr(feature = "py-bindings", pyclass(name = "NodeSpecific"))] +#[cfg_attr(feature = "py-bindings", pyclass(name = "NodeSpecific", get_all))] #[derive(Clone, Debug, PartialEq)] pub enum NodeSpecific { - // #[cfg_attr(feature = "py-bindings", pyo3(constructor = (left, right)))] - #[pyo3(constructor = (left, right))] Internal { left: TreeIndex, right: TreeIndex }, - #[pyo3(constructor = (key, value))] Leaf { key: KvId, value: KvId }, } @@ -366,13 +354,11 @@ fn get_free_indexes_and_keys_values_indexes( (free_indexes, key_to_index) } -#[cfg_attr(feature = "py-bindings", pyclass(name = "MerkleBlob"))] +#[cfg_attr(feature = "py-bindings", pyclass(name = "MerkleBlob", get_all))] #[derive(Debug)] pub struct MerkleBlob { blob: Vec, free_indexes: HashSet, - #[pyo3(get)] - // #[cfg_attr(feature = "py-bindings", pyo3(get))] key_to_index: HashMap, } @@ -1182,10 +1168,8 @@ impl MerkleBlob { Ok(list.into()) } - #[pyo3(name = "get_nodes", signature=(index=0))] - pub fn py_get_nodes(&self, py: Python<'_>, index: TreeIndex) -> PyResult { - // TODO: use the index parameter - + #[pyo3(name = "get_nodes")] + pub fn py_get_nodes(&self, py: Python<'_>) -> PyResult { let list = pyo3::types::PyList::empty_bound(py); for (_, block) in self { From 6d92a1e08bc68648a168b1f807c9d036124dc252 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 18 Oct 2024 13:41:55 -0400 Subject: [PATCH 102/181] clippy --- crates/chia-datalayer/src/merkle.rs | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 57dd2f2c4..d253ac47e 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -1099,7 +1099,7 @@ impl MerkleBlob { let slice = unsafe { std::slice::from_raw_parts(blob.buf_ptr() as *const u8, blob.len_bytes()) }; - Self::new(Vec::from(slice)).map_err(|e| PyValueError::new_err(e)) + Self::new(Vec::from(slice)).map_err(PyValueError::new_err) } #[pyo3(name = "insert", signature = (key, value, hash, reference_kid = None, side = None))] @@ -1133,33 +1133,29 @@ impl MerkleBlob { } }; self.insert(key, value, &hash, insert_location) - .map_err(|e| PyValueError::new_err(e)) + .map_err(PyValueError::new_err) } #[pyo3(name = "delete")] pub fn py_delete(&mut self, key: KvId) -> PyResult<()> { - self.delete(key).map_err(|e| PyValueError::new_err(e)) + self.delete(key).map_err(PyValueError::new_err) } #[pyo3(name = "get_raw_node")] pub fn py_get_raw_node(&mut self, index: TreeIndex) -> PyResult { - self.get_node(index).map_err(|e| PyValueError::new_err(e)) + self.get_node(index).map_err(PyValueError::new_err) } #[pyo3(name = "calculate_lazy_hashes")] pub fn py_calculate_lazy_hashes(&mut self) -> PyResult<()> { - self.calculate_lazy_hashes() - .map_err(|e| PyValueError::new_err(e)) + self.calculate_lazy_hashes().map_err(PyValueError::new_err) } #[pyo3(name = "get_lineage")] pub fn py_get_lineage(&self, index: TreeIndex, py: Python<'_>) -> PyResult { let list = pyo3::types::PyList::empty_bound(py); - for node in self - .get_lineage(index) - .map_err(|e| PyValueError::new_err(e))? - { + for node in self.get_lineage(index).map_err(PyValueError::new_err)? { use pyo3::conversion::IntoPy; use pyo3::types::PyListMethods; list.append(node.into_py(py))?; From 14e451b0637aeb1cdaf3c7d19eae9d74f35f67e3 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 21 Oct 2024 11:47:56 -0400 Subject: [PATCH 103/181] stub catchup --- wheel/generate_type_stubs.py | 40 ++++++++++++++++++++++++++++---- wheel/python/chia_rs/chia_rs.pyi | 40 ++++++++++++++++++++++++++++---- 2 files changed, 72 insertions(+), 8 deletions(-) diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index df91a6cb5..1e741de4c 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -261,7 +261,7 @@ def parse_rust_source(filename: str, upper_case: bool) -> List[Tuple[str, List[s # this file is generated by generate_type_stubs.py # -from typing import List, Optional, Sequence, Tuple, Union, Dict, Any, ClassVar, final +from typing import List, Mapping, Optional, Sequence, Set, Tuple, Union, Dict, Any, ClassVar, final from .sized_bytes import bytes32, bytes100 from .sized_ints import uint8, uint16, uint32, uint64, uint128, int8, int16, int32, int64 from typing_extensions import Self @@ -379,17 +379,49 @@ def derive_child_sk_unhardened(sk: PrivateKey, index: int) -> PrivateKey: ... @staticmethod def derive_child_pk_unhardened(pk: G1Element, index: int) -> G1Element: ... +@final +class Node: + @property + def parent(self) -> Optional[uint32]: ... + @property + def hash(self) -> bytes: ... + + # TODO: this all needs reviewed and tidied + @property + def specific(self) -> Union: ... + + @property + def left(self) -> uint32: ... + @property + def right(self) -> uint32: ... + + @property + def key(self) -> int64: ... + @property + def value(self) -> int64: ... + + @final class MerkleBlob: + @property + def blob(self) -> bytearray: ... + @property + def free_indexes(self) -> Set[uint32]: ... + @property + def key_to_index(self) -> Mapping[int64, uint32]: ... + def __init__( self, blob: bytes, ) -> None: ... - def insert(self, key: uint64, value: uint64, hash: bytes32, reference_kid: uint64, side: Side) -> None: ... - def delete(self, key: uint64) -> None: ... - # def get_raw_node(self, index: int) -> Node: ... + def insert(self, key: int64, value: int64, hash: bytes32, reference_kid: Optional[int64] = None, side: Optional[uint8] = None) -> None: ... + def delete(self, key: int64) -> None: ... + def get_raw_node(self, index: uint32) -> Node: ... def calculate_lazy_hashes(self) -> None: ... + def get_lineage(self, index: uint32) -> List[Node]:... + def get_nodes(self) -> List[Node]: ... + def __len__(self) -> int: ... @final diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index d6332e138..d8547d299 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -3,7 +3,7 @@ # this file is generated by generate_type_stubs.py # -from typing import List, Optional, Sequence, Tuple, Union, Dict, Any, ClassVar, final +from typing import List, Mapping, Optional, Sequence, Set, Tuple, Union, Dict, Any, ClassVar, final from .sized_bytes import bytes32, bytes100 from .sized_ints import uint8, uint16, uint32, uint64, uint128, int8, int16, int32, int64 from typing_extensions import Self @@ -121,17 +121,49 @@ class AugSchemeMPL: @staticmethod def derive_child_pk_unhardened(pk: G1Element, index: int) -> G1Element: ... +@final +class Node: + @property + def parent(self) -> Optional[uint32]: ... + @property + def hash(self) -> bytes: ... + + # TODO: this all needs reviewed and tidied + @property + def specific(self) -> Union: ... + + @property + def left(self) -> uint32: ... + @property + def right(self) -> uint32: ... + + @property + def key(self) -> int64: ... + @property + def value(self) -> int64: ... + + @final class MerkleBlob: + @property + def blob(self) -> bytearray: ... + @property + def free_indexes(self) -> Set[uint32]: ... + @property + def key_to_index(self) -> Mapping[int64, uint32]: ... + def __init__( self, blob: bytes, ) -> None: ... - def insert(self, key: uint64, value: uint64, hash: bytes32, reference_kid: uint64, side: Side) -> None: ... - def delete(self, key: uint64) -> None: ... - # def get_raw_node(self, index: int) -> Node: ... + def insert(self, key: int64, value: int64, hash: bytes32, reference_kid: Optional[int64] = None, side: Optional[uint8] = None) -> None: ... + def delete(self, key: int64) -> None: ... + def get_raw_node(self, index: uint32) -> Node: ... def calculate_lazy_hashes(self) -> None: ... + def get_lineage(self, index: uint32) -> List[Node]:... + def get_nodes(self) -> List[Node]: ... + def __len__(self) -> int: ... @final From ecb4d25d42ff85ac2e1f72f96526386d3312d82f Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 22 Oct 2024 19:42:01 -0400 Subject: [PATCH 104/181] implement more; --- crates/chia-datalayer/src/merkle.rs | 317 +++++++++++++++++++++++----- wheel/generate_type_stubs.py | 4 +- 2 files changed, 271 insertions(+), 50 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index d253ac47e..49fc4087b 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -1,5 +1,9 @@ #[cfg(feature = "py-bindings")] -use pyo3::{buffer::PyBuffer, exceptions::PyValueError, pyclass, pymethods, PyResult, Python}; +use pyo3::{ + buffer::PyBuffer, + exceptions::{PyAttributeError, PyValueError}, + pyclass, pymethods, PyResult, Python, +}; use clvmr::sha2::Sha256; use num_traits::ToBytes; @@ -256,39 +260,47 @@ impl Node { #[pymethods] impl Node { #[getter(left)] - pub fn left(&self) -> TreeIndex { + pub fn py_property_left(&self) -> PyResult { let NodeSpecific::Internal { left, .. } = self.specific else { - panic!(); + return Err(PyAttributeError::new_err( + "Attribute 'left' not present for leaf nodes".to_string(), + )); }; - left + Ok(left) } #[getter(right)] - pub fn right(&self) -> TreeIndex { + pub fn py_property_right(&self) -> PyResult { let NodeSpecific::Internal { right, .. } = self.specific else { - panic!(); + return Err(PyAttributeError::new_err( + "Attribute 'right' not present for leaf nodes".to_string(), + )); }; - right + Ok(right) } #[getter(key)] - pub fn key(&self) -> KvId { + pub fn py_property_key(&self) -> PyResult { let NodeSpecific::Leaf { key, .. } = self.specific else { - panic!(); + return Err(PyAttributeError::new_err( + "Attribute 'key' not present for internal nodes".to_string(), + )); }; - key + Ok(key) } #[getter(value)] - pub fn value(&self) -> KvId { + pub fn py_property_value(&self) -> PyResult { let NodeSpecific::Leaf { value, .. } = self.specific else { - panic!(); + return Err(PyAttributeError::new_err( + "Attribute 'value' not present for internal nodes".to_string(), + )); }; - value + Ok(value) } } @@ -613,6 +625,161 @@ impl MerkleBlob { Ok(()) } + fn batch_insert(&mut self, mut keys_values_hashes: I) -> Result<(), String> + where + I: Iterator, + { + // TODO: would it be worthwhile to hold the entire blocks? + let mut indexes = vec![]; + + if self.key_to_index.len() <= 1 { + for _ in 0..2 { + let Some(((key, value), hash)) = keys_values_hashes.next() else { + return Ok(()); + }; + self.insert(key, value, &hash, InsertLocation::Auto {})?; + } + } + + for ((key, value), hash) in keys_values_hashes { + let new_leaf_index = self.get_new_index(); + let new_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Leaf, + dirty: false, + }, + node: Node { + parent: None, + hash, + specific: NodeSpecific::Leaf { key, value }, + }, + }; + self.insert_entry_to_blob(new_leaf_index, &new_block) + .unwrap(); + indexes.push(new_leaf_index); + } + + // TODO: can we insert the top node first? maybe more efficient to update it's children + // than to update the parents of the children when traversing leaf to sub-root? + while !indexes.is_empty() { + let mut new_indexes = vec![]; + + for chunk in indexes.chunks(2) { + let [index_1, index_2] = match chunk { + [index] => { + new_indexes.push(*index); + continue; + } + [index_1, index_2] => [*index_1, *index_2], + _ => unreachable!( + "chunk should always be either one or two long and be handled above" + ), + }; + + let block_1 = self.get_block(index_1)?; + let block_2 = self.get_block(index_2)?; + + let new_internal_node_index = self.get_new_index(); + + let new_block = Block { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: Node { + parent: None, + hash: internal_hash(&block_1.node.hash, &block_2.node.hash), + specific: NodeSpecific::Internal { + left: index_1, + right: index_2, + }, + }, + }; + + self.insert_entry_to_blob(new_internal_node_index, &new_block)?; + for (child_index, mut child_block) in [(index_1, block_1), (index_2, block_2)] { + child_block.node.parent = Some(new_internal_node_index); + self.insert_entry_to_blob(child_index, &child_block)?; + } + new_indexes.push(new_internal_node_index); + } + + indexes = new_indexes; + } + + if indexes.len() == 1 { + // TODO: can we avoid this extra min height leaf traversal? + let min_height_leaf = self.get_min_height_leaf()?; + let NodeSpecific::Leaf { key, .. } = min_height_leaf.node.specific else { + panic!() + }; + self.insert_from_leaf(self.key_to_index[&key], indexes[0], &Side::Left)?; + }; + + Ok(()) + } + + fn insert_from_leaf( + &mut self, + old_leaf_index: TreeIndex, + new_index: TreeIndex, + side: &Side, + ) -> Result<(), String> { + // TODO: consider name, we're inserting a subtree at a leaf + // TODO: seems like this ought to be fairly similar to regular insert + + struct Stuff { + index: TreeIndex, + hash: Hash, + } + + let new_internal_node_index = self.get_new_index(); + let old_leaf = self.get_node(old_leaf_index)?; + let new_node = self.get_node(new_index)?; + + let new_stuff = Stuff { + index: new_index, + hash: new_node.hash, + }; + let old_stuff = Stuff { + index: old_leaf_index, + hash: old_leaf.hash, + }; + let (left, right) = match side { + Side::Left => (new_stuff, old_stuff), + Side::Right => (old_stuff, new_stuff), + }; + let internal_node_hash = internal_hash(&left.hash, &right.hash); + + let block = Block { + metadata: NodeMetadata { + node_type: NodeType::Internal, + dirty: false, + }, + node: Node { + parent: old_leaf.parent, + hash: internal_node_hash, + specific: NodeSpecific::Internal { + left: left.index, + right: right.index, + }, + }, + }; + self.insert_entry_to_blob(new_internal_node_index, &block)?; + // TODO: yeah, doing this a fair bit, dedupe. probably + let mut block = self.get_block(new_index)?; + block.node.parent = Some(new_internal_node_index); + self.insert_entry_to_blob(new_index, &block)?; + + Ok(()) + } + + fn get_min_height_leaf(&self) -> Result { + MerkleBlobBreadthFirstIterator::new(&self.blob) + .next() + .ok_or("unable to find a leaf".to_string()) + } + pub fn delete(&mut self, key: KvId) -> Result<(), String> { let leaf_index = *self .key_to_index @@ -877,6 +1044,8 @@ impl MerkleBlob { Ordering::Less => { // TODO: lots of deserialization here for just the key let old_block = self.get_block(index)?; + // TODO: should we be more careful about accidentally reading garbage like + // from a freshly gotten index if !self.free_indexes.contains(&index) && old_block.metadata.node_type == NodeType::Leaf { @@ -932,14 +1101,17 @@ impl MerkleBlob { )) } - pub fn get_lineage(&self, index: TreeIndex) -> Result, String> { + pub fn get_lineage_with_indexes( + &self, + index: TreeIndex, + ) -> Result, String> { let mut next_index = Some(index); let mut lineage = vec![]; while let Some(this_index) = next_index { let node = self.get_node(this_index)?; next_index = node.parent; - lineage.push(node); + lineage.push((index, node)); } Ok(lineage) @@ -1026,19 +1198,19 @@ impl MerkleBlob { Ok(()) } - #[allow(unused)] - fn rebuild(&mut self) -> Result<(), String> { - panic!(); - // TODO: could make insert_entry_to_blob a free function and not need to make - // a merkle blob here? maybe? - let mut new = Self::new(Vec::new())?; - for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob).enumerate() { - // new.insert_entry_to_blob(index, )? - } - self.blob = new.blob; - - Ok(()) - } + // #[allow(unused)] + // fn rebuild(&mut self) -> Result<(), String> { + // panic!(); + // // TODO: could make insert_entry_to_blob a free function and not need to make + // // a merkle blob here? maybe? + // // let mut new = Self::new(Vec::new())?; + // // for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob).enumerate() { + // // // new.insert_entry_to_blob(index, )? + // // } + // // self.blob = new.blob; + // + // Ok(()) + // } #[allow(unused)] fn get_key_value_map(&self) -> HashMap { @@ -1151,32 +1323,81 @@ impl MerkleBlob { self.calculate_lazy_hashes().map_err(PyValueError::new_err) } - #[pyo3(name = "get_lineage")] - pub fn py_get_lineage(&self, index: TreeIndex, py: Python<'_>) -> PyResult { + #[pyo3(name = "get_lineage_with_indexes")] + pub fn py_get_lineage_with_indexes( + &self, + index: TreeIndex, + py: Python<'_>, + ) -> PyResult { let list = pyo3::types::PyList::empty_bound(py); - for node in self.get_lineage(index).map_err(PyValueError::new_err)? { + for (index, node) in self + .get_lineage_with_indexes(index) + .map_err(PyValueError::new_err)? + { use pyo3::conversion::IntoPy; use pyo3::types::PyListMethods; - list.append(node.into_py(py))?; + list.append((index, node.into_py(py)))?; } Ok(list.into()) } - #[pyo3(name = "get_nodes")] - pub fn py_get_nodes(&self, py: Python<'_>) -> PyResult { + #[pyo3(name = "get_nodes_with_indexes")] + pub fn py_get_nodes_with_indexes(&self, py: Python<'_>) -> PyResult { let list = pyo3::types::PyList::empty_bound(py); - for (_, block) in self { + for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob) { use pyo3::conversion::IntoPy; use pyo3::types::PyListMethods; - list.append(block.node.into_py(py))?; + list.append((index, block.node.into_py(py)))?; } Ok(list.into()) } + #[pyo3(name = "empty")] + pub fn py_empty(&self) -> PyResult { + Ok(self.key_to_index.is_empty()) + } + + #[pyo3(name = "get_root_hash")] + pub fn py_get_root_hash(&self) -> PyResult> { + self.py_get_hash_at_index(0) + } + + #[pyo3(name = "get_hash_at_index")] + pub fn py_get_hash_at_index(&self, index: TreeIndex) -> PyResult> { + if self.key_to_index.is_empty() { + return Ok(None); + } + + let block = self.get_block(index).map_err(PyValueError::new_err)?; + if block.metadata.dirty { + return Err(PyValueError::new_err("root hash is dirty")); + } + + Ok(Some(block.node.hash)) + } + + #[pyo3(name = "batch_insert")] + fn py_batch_insert( + &mut self, + keys_values: Vec<(KvId, KvId)>, + hashes: Vec, + ) -> PyResult<()> { + if keys_values.len() != hashes.len() { + return Err(PyValueError::new_err( + "key/value and hash collection lengths must match", + )); + } + + self.batch_insert(&mut zip(keys_values, hashes)) + .map_err(PyValueError::new_err)?; + + Ok(()) + } + #[pyo3(name = "__len__")] pub fn py_len(&self) -> PyResult { Ok(self.blob.len()) @@ -1330,15 +1551,15 @@ mod tests { // crate::merkle::dot::open_dot(lines); } - #[allow(unused)] - fn normalized_blob(merkle_blob: &MerkleBlob) -> Vec { - let mut new = MerkleBlob::new(merkle_blob.blob.clone()).unwrap(); - - new.calculate_lazy_hashes(); - new.rebuild(); - - new.blob - } + // #[allow(unused)] + // fn normalized_blob(merkle_blob: &MerkleBlob) -> Vec { + // let mut new = MerkleBlob::new(merkle_blob.blob.clone()).unwrap(); + // + // new.calculate_lazy_hashes(); + // new.rebuild(); + // + // new.blob + // } #[test] fn test_node_type_serialized_values() { @@ -1412,12 +1633,12 @@ mod tests { #[rstest] fn test_get_lineage(small_blob: MerkleBlob) { - let lineage = small_blob.get_lineage(2).unwrap(); - for node in &lineage { + let lineage = small_blob.get_lineage_with_indexes(2).unwrap(); + for (_, node) in &lineage { println!("{node:?}"); } assert_eq!(lineage.len(), 2); - let last_node = lineage.last().unwrap(); + let (_, last_node) = lineage.last().unwrap(); assert_eq!(last_node.parent, None); small_blob.check().unwrap(); diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index 1e741de4c..c81c06de5 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -419,8 +419,8 @@ def insert(self, key: int64, value: int64, hash: bytes32, reference_kid: Optiona def delete(self, key: int64) -> None: ... def get_raw_node(self, index: uint32) -> Node: ... def calculate_lazy_hashes(self) -> None: ... - def get_lineage(self, index: uint32) -> List[Node]:... - def get_nodes(self) -> List[Node]: ... + def get_lineage_with_indexes(self, index: uint32) -> List[Tuple[uint32, Node)]:... + def get_nodes_with_indexes(self) -> List[Node]: ... def __len__(self) -> int: ... From 4f1275ec7b8a25bdc8d1768d48a1205f3ddbc5a6 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 22 Oct 2024 19:49:34 -0400 Subject: [PATCH 105/181] pyi --- wheel/generate_type_stubs.py | 4 +++- wheel/python/chia_rs/chia_rs.pyi | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index c81c06de5..37598a695 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -419,8 +419,10 @@ def insert(self, key: int64, value: int64, hash: bytes32, reference_kid: Optiona def delete(self, key: int64) -> None: ... def get_raw_node(self, index: uint32) -> Node: ... def calculate_lazy_hashes(self) -> None: ... - def get_lineage_with_indexes(self, index: uint32) -> List[Tuple[uint32, Node)]:... + def get_lineage_with_indexes(self, index: uint32) -> List[Tuple[uint32, Node]]:... def get_nodes_with_indexes(self) -> List[Node]: ... + def empty(self) -> bool: ... + def get_root_hash(self) -> bytes32: ... def __len__(self) -> int: ... diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index d8547d299..6144f8341 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -161,8 +161,10 @@ class MerkleBlob: def delete(self, key: int64) -> None: ... def get_raw_node(self, index: uint32) -> Node: ... def calculate_lazy_hashes(self) -> None: ... - def get_lineage(self, index: uint32) -> List[Node]:... - def get_nodes(self) -> List[Node]: ... + def get_lineage_with_indexes(self, index: uint32) -> List[Tuple[uint32, Node]]:... + def get_nodes_with_indexes(self) -> List[Node]: ... + def empty(self) -> bool: ... + def get_root_hash(self) -> bytes32: ... def __len__(self) -> int: ... From 030dfc476726d9b580c7ccee4c483f2861399dee Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 23 Oct 2024 09:48:32 -0400 Subject: [PATCH 106/181] unreachable --- crates/chia-datalayer/src/merkle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 49fc4087b..7197b5f6c 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -417,7 +417,7 @@ impl MerkleBlob { match insert_location { InsertLocation::Auto {} => { - panic!("this should have been caught and processed above") + unreachable!("this should have been caught and processed above") } InsertLocation::AsRoot {} => { if !self.key_to_index.is_empty() { From e6ff146c7af1a915f42f40a5f47b8d8db29ed1aa Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 24 Oct 2024 10:47:21 -0400 Subject: [PATCH 107/181] s64 --- tests/test_datalayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index 1ca216edf..2e6d4575e 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -28,6 +28,6 @@ def test_just_insert_a_bunch() -> None: total_time = 0.0 for i in range(100000): start = time.monotonic() - merkle_blob.insert(uint64(i), uint64(i), HASH) + merkle_blob.insert(int64(i), int64(i), HASH) end = time.monotonic() total_time += end - start From c93fb5548cb0a2870afae9a897cb4d3ec11cb4e9 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 24 Oct 2024 11:31:54 -0400 Subject: [PATCH 108/181] blech --- tests/test_datalayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index 2e6d4575e..be8bae198 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -1,6 +1,6 @@ from chia_rs import MerkleBlob from chia_rs.sized_bytes import bytes32 -from chia_rs.sized_ints import uint64 +from chia_rs.sized_ints import int64 def test_merkle_blob(): From 815d5ded232a05558ff360e1b03e8297f9728967 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 24 Oct 2024 11:46:22 -0400 Subject: [PATCH 109/181] hints --- wheel/generate_type_stubs.py | 2 ++ wheel/python/chia_rs/chia_rs.pyi | 2 ++ 2 files changed, 4 insertions(+) diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index 37598a695..21a71f879 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -423,6 +423,8 @@ def get_lineage_with_indexes(self, index: uint32) -> List[Tuple[uint32, Node]]:. def get_nodes_with_indexes(self) -> List[Node]: ... def empty(self) -> bool: ... def get_root_hash(self) -> bytes32: ... + def batch_insert(self, keys_values: List[Tuple[int64, int64]], hashes: List[bytes32]): ... + def get_hash_at_index(self, index: uint32): ... def __len__(self) -> int: ... diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index 6144f8341..25c2563ba 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -165,6 +165,8 @@ class MerkleBlob: def get_nodes_with_indexes(self) -> List[Node]: ... def empty(self) -> bool: ... def get_root_hash(self) -> bytes32: ... + def batch_insert(self, keys_values: List[Tuple[int64, int64]], hashes: List[bytes32]): ... + def get_hash_at_index(self, index: uint32): ... def __len__(self) -> int: ... From 5d54f857d0131371f9b5ff7f0ff291db478fd7c7 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 24 Oct 2024 11:50:44 -0400 Subject: [PATCH 110/181] pub --- crates/chia-datalayer/src/merkle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 7197b5f6c..27cd32baf 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -1381,7 +1381,7 @@ impl MerkleBlob { } #[pyo3(name = "batch_insert")] - fn py_batch_insert( + pub fn py_batch_insert( &mut self, keys_values: Vec<(KvId, KvId)>, hashes: Vec, From d1de37d58176ef25c3c9a7d4d0c2944b530b2d7c Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 24 Oct 2024 12:17:49 -0400 Subject: [PATCH 111/181] pub --- crates/chia-datalayer/src/merkle.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 27cd32baf..fa25741f6 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -625,7 +625,7 @@ impl MerkleBlob { Ok(()) } - fn batch_insert(&mut self, mut keys_values_hashes: I) -> Result<(), String> + pub fn batch_insert(&mut self, mut keys_values_hashes: I) -> Result<(), String> where I: Iterator, { From 7881538063cbd2c0fc4609f27c058714125ad026 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 24 Oct 2024 19:48:40 -0400 Subject: [PATCH 112/181] fixup --- crates/chia-datalayer/src/merkle.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index fa25741f6..7a6259069 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -661,7 +661,7 @@ impl MerkleBlob { // TODO: can we insert the top node first? maybe more efficient to update it's children // than to update the parents of the children when traversing leaf to sub-root? - while !indexes.is_empty() { + while indexes.len() > 1 { let mut new_indexes = vec![]; for chunk in indexes.chunks(2) { @@ -2019,4 +2019,15 @@ mod tests { blob.insert(0, 0, &[0u8; 32], InsertLocation::Auto {}) .expect_err(""); } + + #[rstest] + fn test_batch_insert_with_odd_count_does_not_hang(mut small_blob: MerkleBlob) { + let mut batch: Vec<((KvId, KvId), Hash)> = vec![]; + + for i in 0..9 { + batch.push(((i, i), sha256_num(i))); + } + + small_blob.batch_insert(batch.into_iter()).unwrap(); + } } From be43bf68f8cde4857b4777576050812ac87b4c58 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 25 Oct 2024 13:56:44 -0400 Subject: [PATCH 113/181] sorta kinda parallelize inserting a bunch 'test' --- crates/chia-datalayer/src/merkle.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 7a6259069..c32c62951 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -1717,13 +1717,18 @@ mod tests { // merkle_blob.check().unwrap(); // } - #[test] - fn test_just_insert_a_bunch() { + #[rstest] + fn test_just_insert_a_bunch( + // just allowing parallelism of testing 100,000 inserts total + #[values(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)] n: i64, + ) { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); let mut total_time = Duration::new(0, 0); - for i in 0..100_000 { + let count = 10_000; + let m: KvId = count * n; + for i in m..(m + count) { let start = Instant::now(); merkle_blob // NOTE: yeah this hash is garbage From de711e5bf3df298bef8a837f008a93b719d08760 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 25 Oct 2024 14:09:00 -0400 Subject: [PATCH 114/181] tidy --- crates/chia-datalayer/src/merkle.rs | 82 ++++------------------------- 1 file changed, 11 insertions(+), 71 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index c32c62951..f3485f428 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -18,6 +18,10 @@ type Parent = Option; type Hash = [u8; 32]; type KvId = i64; +// assumptions +// - root is at index 0 +// - any case with no keys will have a zero length blob + const fn range_by_length(start: usize, length: usize) -> Range { start..start + length } @@ -129,7 +133,7 @@ pub struct NodeMetadata { impl NodeMetadata { pub fn from_bytes(blob: MetadataBytes) -> Result { - // TODO: could save 1-2% of tree space by packing (and maybe don't do that) + // OPT: could save 1-2% of tree space by packing (and maybe don't do that) Ok(Self { node_type: Self::node_type_from_bytes(blob)?, dirty: Self::dirty_from_bytes(blob)?, @@ -629,7 +633,7 @@ impl MerkleBlob { where I: Iterator, { - // TODO: would it be worthwhile to hold the entire blocks? + // OPT: would it be worthwhile to hold the entire blocks? let mut indexes = vec![]; if self.key_to_index.len() <= 1 { @@ -659,8 +663,8 @@ impl MerkleBlob { indexes.push(new_leaf_index); } - // TODO: can we insert the top node first? maybe more efficient to update it's children - // than to update the parents of the children when traversing leaf to sub-root? + // OPT: can we insert the top node first? maybe more efficient to update it's children + // than to update the parents of the children when traversing leaf to sub-root? while indexes.len() > 1 { let mut new_indexes = vec![]; @@ -708,7 +712,7 @@ impl MerkleBlob { } if indexes.len() == 1 { - // TODO: can we avoid this extra min height leaf traversal? + // OPT: can we avoid this extra min height leaf traversal? let min_height_leaf = self.get_min_height_leaf()?; let NodeSpecific::Leaf { key, .. } = min_height_leaf.node.specific else { panic!() @@ -725,7 +729,7 @@ impl MerkleBlob { new_index: TreeIndex, side: &Side, ) -> Result<(), String> { - // TODO: consider name, we're inserting a subtree at a leaf + // NAME: consider name, we're inserting a subtree at a leaf // TODO: seems like this ought to be fairly similar to regular insert struct Stuff { @@ -1134,7 +1138,7 @@ impl MerkleBlob { } pub fn calculate_lazy_hashes(&mut self) -> Result<(), String> { - // TODO: really want a truncated traversal, not filter + // OPT: really want a truncated traversal, not filter // TODO: yeah, storing the whole set of blocks via collect is not great for (index, mut block) in self .iter() @@ -1158,7 +1162,6 @@ impl MerkleBlob { #[allow(unused)] fn relocate_node(&mut self, source: TreeIndex, destination: TreeIndex) -> Result<(), String> { let extend_index = self.extend_index(); - // TODO: perhaps relocation of root should be allowed for some use if source == 0 { return Err("relocation of the root and index zero is not allowed".to_string()); }; @@ -1198,20 +1201,6 @@ impl MerkleBlob { Ok(()) } - // #[allow(unused)] - // fn rebuild(&mut self) -> Result<(), String> { - // panic!(); - // // TODO: could make insert_entry_to_blob a free function and not need to make - // // a merkle blob here? maybe? - // // let mut new = Self::new(Vec::new())?; - // // for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob).enumerate() { - // // // new.insert_entry_to_blob(index, )? - // // } - // // self.blob = new.blob; - // - // Ok(()) - // } - #[allow(unused)] fn get_key_value_map(&self) -> HashMap { let mut key_value = HashMap::new(); @@ -1668,55 +1657,6 @@ mod tests { small_blob.check().unwrap(); } - // #[test] - // fn test_build_blob_and_read() { - // let mut blob: Vec = Vec::new(); - // - // blob.extend(EXAMPLE_ROOT_METADATA.to_bytes()); - // blob.extend(EXAMPLE_ROOT.to_bytes()); - // blob.extend(EXAMPLE_LEFT_LEAF_METADATA.to_bytes()); - // blob.extend(EXAMPLE_LEFT_LEAF.to_bytes()); - // blob.extend(EXAMPLE_RIGHT_LEAF_METADATA.to_bytes()); - // blob.extend(EXAMPLE_RIGHT_LEAF.to_bytes()); - // - // assert_eq!(blob, Vec::from(EXAMPLE_BLOB)); - // - // let merkle_blob = MerkleBlob::new(Vec::from(EXAMPLE_BLOB)).unwrap(); - // - // assert_eq!(merkle_blob.get_node(0).unwrap(), EXAMPLE_ROOT); - // assert_eq!(merkle_blob.get_node(1).unwrap(), EXAMPLE_LEFT_LEAF); - // assert_eq!(merkle_blob.get_node(2).unwrap(), EXAMPLE_RIGHT_LEAF); - // - // merkle_blob.check().unwrap(); - // } - - // #[test] - // fn test_build_merkle() { - // let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - // - // let (key, value) = EXAMPLE_LEFT_LEAF.key_value(); - // merkle_blob - // .insert(key, value, &EXAMPLE_LEFT_LEAF.hash) - // .unwrap(); - // let (key, value) = EXAMPLE_RIGHT_LEAF.key_value(); - // merkle_blob - // .insert(key, value, &EXAMPLE_RIGHT_LEAF.hash) - // .unwrap(); - // - // // TODO: just hacking here to compare with the ~wrong~ simplified reference - // let mut root = Block::from_bytes(merkle_blob.get_block_bytes(0).unwrap(), 0).unwrap(); - // root.metadata.dirty = true; - // root.node.hash = HASH; - // assert_eq!(root.metadata.node_type, NodeType::Internal); - // merkle_blob - // .insert_entry_to_blob(0, root.to_bytes()) - // .unwrap(); - // - // assert_eq!(merkle_blob.blob, Vec::from(EXAMPLE_BLOB)); - // - // merkle_blob.check().unwrap(); - // } - #[rstest] fn test_just_insert_a_bunch( // just allowing parallelism of testing 100,000 inserts total From 1b61eed70282bbd5e650d5c9d65ed75442f6ef86 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 28 Oct 2024 09:33:38 -0400 Subject: [PATCH 115/181] .update_parent() --- crates/chia-datalayer/src/merkle.rs | 52 +++++++++++------------------ 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index f3485f428..f73576c12 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -600,11 +600,9 @@ impl MerkleBlob { panic!("root found when not expected") }; - let mut block = Block::from_bytes(self.get_block_bytes(old_leaf_index)?)?; - block.node.parent = Some(new_internal_node_index); - self.insert_entry_to_blob(old_leaf_index, &block)?; + self.update_parent(old_leaf_index, Some(new_internal_node_index))?; - let mut old_parent_block = Block::from_bytes(self.get_block_bytes(old_parent_index)?)?; + let mut old_parent_block = self.get_block(old_parent_index)?; if let NodeSpecific::Internal { ref mut left, ref mut right, @@ -658,8 +656,7 @@ impl MerkleBlob { specific: NodeSpecific::Leaf { key, value }, }, }; - self.insert_entry_to_blob(new_leaf_index, &new_block) - .unwrap(); + self.insert_entry_to_blob(new_leaf_index, &new_block)?; indexes.push(new_leaf_index); } @@ -680,11 +677,11 @@ impl MerkleBlob { ), }; - let block_1 = self.get_block(index_1)?; - let block_2 = self.get_block(index_2)?; - let new_internal_node_index = self.get_new_index(); + let block_1 = self.update_parent(index_1, Some(new_internal_node_index))?; + let block_2 = self.update_parent(index_2, Some(new_internal_node_index))?; + let new_block = Block { metadata: NodeMetadata { node_type: NodeType::Internal, @@ -701,10 +698,6 @@ impl MerkleBlob { }; self.insert_entry_to_blob(new_internal_node_index, &new_block)?; - for (child_index, mut child_block) in [(index_1, block_1), (index_2, block_2)] { - child_block.node.parent = Some(new_internal_node_index); - self.insert_entry_to_blob(child_index, &child_block)?; - } new_indexes.push(new_internal_node_index); } @@ -770,10 +763,7 @@ impl MerkleBlob { }, }; self.insert_entry_to_blob(new_internal_node_index, &block)?; - // TODO: yeah, doing this a fair bit, dedupe. probably - let mut block = self.get_block(new_index)?; - block.node.parent = Some(new_internal_node_index); - self.insert_entry_to_blob(new_index, &block)?; + self.update_parent(new_index, Some(new_internal_node_index))?; Ok(()) } @@ -813,9 +803,7 @@ impl MerkleBlob { if let NodeSpecific::Internal { left, right } = sibling_block.node.specific { for child_index in [left, right] { - let mut block = self.get_block(child_index)?; - block.node.parent = Some(0); - self.insert_entry_to_blob(child_index, &block)?; + self.update_parent(child_index, Some(0))?; } }; @@ -924,15 +912,17 @@ impl MerkleBlob { Ok(()) } - // fn update_parent(&mut self, index: TreeIndex, parent: Option) -> Result<(), String> { - // let range = self.get_block_range(index); - // - // let mut node = self.get_node(index)?; - // node.parent = parent; - // self.blob[range].copy_from_slice(&node.to_bytes()); - // - // Ok(()) - // } + fn update_parent( + &mut self, + index: TreeIndex, + parent: Option, + ) -> Result { + let mut block = self.get_block(index)?; + block.node.parent = parent; + self.insert_entry_to_blob(index, &block)?; + + Ok(block) + } // fn update_left(&mut self, index: TreeIndex, left: Option) -> Result<(), String> { // let range = self.get_block_range(index); @@ -1190,9 +1180,7 @@ impl MerkleBlob { if let NodeSpecific::Internal { left, right, .. } = source_block.node.specific { for child in [left, right] { - let mut block = self.get_block(child).unwrap(); - block.node.parent = Some(destination); - self.insert_entry_to_blob(child, &block).unwrap(); + self.update_parent(child, Some(destination)).unwrap(); } } From a66f16da008b28d2255ee9ecff7614c885d84712 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 28 Oct 2024 15:02:04 -0400 Subject: [PATCH 116/181] fixup --- crates/chia-datalayer/src/merkle.rs | 103 +++++++++++++++++----------- 1 file changed, 64 insertions(+), 39 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index f73576c12..983906526 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -725,6 +725,10 @@ impl MerkleBlob { // NAME: consider name, we're inserting a subtree at a leaf // TODO: seems like this ought to be fairly similar to regular insert + // TODO: but what about the old leaf being the root... is that what the batch insert + // pre-filling of two leafs is about? if so, this needs to be making sure of that + // or something. + struct Stuff { index: TreeIndex, hash: Hash, @@ -765,6 +769,29 @@ impl MerkleBlob { self.insert_entry_to_blob(new_internal_node_index, &block)?; self.update_parent(new_index, Some(new_internal_node_index))?; + let Some(old_leaf_parent) = old_leaf.parent else { + // TODO: relates to comment at the beginning about assumptions about the tree etc + panic!("not handling this case"); + }; + + let mut parent = self.get_block(old_leaf_parent)?; + if let NodeSpecific::Internal { + ref mut left, + ref mut right, + .. + } = parent.node.specific + { + match old_leaf_index { + x if x == *left => *left = new_internal_node_index, + x if x == *right => *right = new_internal_node_index, + _ => panic!("parent not a child a grandparent"), + } + } else { + panic!("not handling this case now...") + } + self.insert_entry_to_blob(old_leaf_parent, &parent)?; + self.update_parent(old_leaf_index, Some(new_internal_node_index))?; + Ok(()) } @@ -924,16 +951,6 @@ impl MerkleBlob { Ok(block) } - // fn update_left(&mut self, index: TreeIndex, left: Option) -> Result<(), String> { - // let range = self.get_block_range(index); - // - // let mut node = self.get_node(index)?; - // node.left = left; - // self.blob[range].copy_from_slice(&node.to_bytes()); - // - // Ok(()) - // } - fn mark_lineage_as_dirty(&mut self, index: TreeIndex) -> Result<(), String> { let mut next_index = Some(index); @@ -1524,19 +1541,15 @@ mod tests { use rstest::{fixture, rstest}; use std::time::{Duration, Instant}; - fn open_dot(_lines: &mut DotLines) { - // crate::merkle::dot::open_dot(lines); + impl Drop for MerkleBlob { + fn drop(&mut self) { + self.check().unwrap(); + } } - // #[allow(unused)] - // fn normalized_blob(merkle_blob: &MerkleBlob) -> Vec { - // let mut new = MerkleBlob::new(merkle_blob.blob.clone()).unwrap(); - // - // new.calculate_lazy_hashes(); - // new.rebuild(); - // - // new.blob - // } + fn open_dot(_lines: &mut DotLines) { + // crate::merkle::dot::open_dot(_lines); + } #[test] fn test_node_type_serialized_values() { @@ -1617,8 +1630,6 @@ mod tests { assert_eq!(lineage.len(), 2); let (_, last_node) = lineage.last().unwrap(); assert_eq!(last_node.parent, None); - - small_blob.check().unwrap(); } #[rstest] @@ -1641,8 +1652,6 @@ mod tests { side: expected_side }, ); - - small_blob.check().unwrap(); } #[rstest] @@ -1670,8 +1679,6 @@ mod tests { // TODO: check, well... something merkle_blob.calculate_lazy_hashes().unwrap(); - - merkle_blob.check().unwrap(); } #[test] @@ -1705,8 +1712,6 @@ mod tests { assert_eq!(merkle_blob, reference_blobs[*key_value_id as usize]); dots.push(merkle_blob.to_dot().dump()); } - - merkle_blob.check().unwrap(); } #[test] @@ -1725,7 +1730,6 @@ mod tests { .unwrap(); open_dot(merkle_blob.to_dot().set_note("first after")); - merkle_blob.check().unwrap(); assert_eq!(merkle_blob.key_to_index.len(), 1); } @@ -1784,8 +1788,6 @@ mod tests { Side::Right => [pre_count as KvId, pre_count as KvId + 1], }; assert_eq!([left_key, right_key], expected_keys); - - merkle_blob.check().unwrap(); } #[test] @@ -1807,7 +1809,6 @@ mod tests { merkle_blob.delete(key_value_id).unwrap(); - merkle_blob.check().unwrap(); assert_eq!(merkle_blob.key_to_index.len(), 0); } @@ -1822,19 +1823,19 @@ mod tests { #[rstest] fn test_get_new_index_with_free_index(mut small_blob: MerkleBlob) { + open_dot(small_blob.to_dot().set_note("initial")); let key = 0x0001_0203_0405_0607; let _ = small_blob.key_to_index[&key]; small_blob.delete(key).unwrap(); + open_dot(small_blob.to_dot().set_note("after delete")); let expected = HashSet::from([1, 2]); assert_eq!(small_blob.free_indexes, expected); - // NOTE: both 1 and 2 are free per test_delete_frees_index - assert!(expected.contains(&small_blob.get_new_index())); } #[rstest] fn test_dump_small_blob_bytes(small_blob: MerkleBlob) { - println!("{}", hex::encode(small_blob.blob)); + println!("{}", hex::encode(small_blob.blob.clone())); } #[test] @@ -1954,13 +1955,37 @@ mod tests { } #[rstest] - fn test_batch_insert_with_odd_count_does_not_hang(mut small_blob: MerkleBlob) { + fn test_batch_insert( + #[values(0, 1, 2, 10)] pre_inserts: usize, + #[values(0, 1, 2, 8, 9)] count: usize, + ) { + let mut blob = MerkleBlob::new(vec![]).unwrap(); + for i in 0..pre_inserts as KvId { + blob.insert(i, i, &sha256_num(i), InsertLocation::Auto {}) + .unwrap(); + } + open_dot(blob.to_dot().set_note("initial")); + let mut batch: Vec<((KvId, KvId), Hash)> = vec![]; - for i in 0..9 { + let mut batch_map = HashMap::new(); + for i in pre_inserts as KvId..(pre_inserts + count) as KvId { batch.push(((i, i), sha256_num(i))); + batch_map.insert(i, i); } - small_blob.batch_insert(batch.into_iter()).unwrap(); + let before = blob.get_key_value_map(); + blob.batch_insert(batch.into_iter()).unwrap(); + let after = blob.get_key_value_map(); + + open_dot( + blob.to_dot() + .set_note(&format!("after batch insert of {count} values")), + ); + + let mut expected = before.clone(); + expected.extend(batch_map); + + assert_eq!(after, expected); } } From e3e1db6c12e150431eb472fee4840c01324730e2 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 28 Oct 2024 15:10:23 -0400 Subject: [PATCH 117/181] drop check for debug assertions too --- crates/chia-datalayer/src/merkle.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 983906526..e2e450c33 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -1532,6 +1532,13 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { } } +#[cfg(any(test, debug_assertions))] +impl Drop for MerkleBlob { + fn drop(&mut self) { + self.check().unwrap(); + } +} + #[cfg(test)] mod dot; #[cfg(test)] @@ -1541,12 +1548,6 @@ mod tests { use rstest::{fixture, rstest}; use std::time::{Duration, Instant}; - impl Drop for MerkleBlob { - fn drop(&mut self) { - self.check().unwrap(); - } - } - fn open_dot(_lines: &mut DotLines) { // crate::merkle::dot::open_dot(_lines); } From b4ee761c07e74a6e160994c1164a56e6bf17b4f2 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 28 Oct 2024 15:17:18 -0400 Subject: [PATCH 118/181] rename to `.check_integrity()` --- crates/chia-datalayer/src/merkle.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index e2e450c33..ec685269f 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -892,7 +892,7 @@ impl MerkleBlob { Ok(()) } - pub fn check(&self) -> Result<(), String> { + pub fn check_integrity(&self) -> Result<(), String> { let mut leaf_count: usize = 0; let mut internal_count: usize = 0; let mut child_to_parent: HashMap = HashMap::new(); @@ -1535,7 +1535,7 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { #[cfg(any(test, debug_assertions))] impl Drop for MerkleBlob { fn drop(&mut self) { - self.check().unwrap(); + self.check_integrity().unwrap(); } } @@ -1704,7 +1704,7 @@ mod tests { dots.push(merkle_blob.to_dot().dump()); } - merkle_blob.check().unwrap(); + merkle_blob.check_integrity().unwrap(); for key_value_id in key_value_ids.iter().rev() { println!("deleting: {key_value_id}"); @@ -1806,7 +1806,7 @@ mod tests { ) .unwrap(); open_dot(merkle_blob.to_dot().set_note("first after")); - merkle_blob.check().unwrap(); + merkle_blob.check_integrity().unwrap(); merkle_blob.delete(key_value_id).unwrap(); From 435739137ad05c6f83abf79a8ca3e5238dffe9a8 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 28 Oct 2024 15:25:29 -0400 Subject: [PATCH 119/181] no default iteration order --- crates/chia-datalayer/src/merkle.rs | 38 ++++++++++++++----------- crates/chia-datalayer/src/merkle/dot.rs | 4 +-- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index ec685269f..a75f1fcb7 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -9,7 +9,7 @@ use clvmr::sha2::Sha256; use num_traits::ToBytes; use std::cmp::Ordering; use std::collections::{HashMap, HashSet, VecDeque}; -use std::iter::{zip, IntoIterator}; +use std::iter::zip; use std::mem::size_of; use std::ops::Range; @@ -1140,15 +1140,14 @@ impl MerkleBlob { Ok(lineage) } - pub fn iter(&self) -> MerkleBlobLeftChildFirstIterator<'_> { - <&Self as IntoIterator>::into_iter(self) - } + // pub fn iter(&self) -> MerkleBlobLeftChildFirstIterator<'_> { + // <&Self as IntoIterator>::into_iter(self) + // } pub fn calculate_lazy_hashes(&mut self) -> Result<(), String> { // OPT: really want a truncated traversal, not filter // TODO: yeah, storing the whole set of blocks via collect is not great - for (index, mut block) in self - .iter() + for (index, mut block) in MerkleBlobLeftChildFirstIterator::new(&self.blob) .filter(|(_, block)| block.metadata.dirty) .collect::>() { @@ -1223,7 +1222,10 @@ impl MerkleBlob { impl PartialEq for MerkleBlob { fn eq(&self, other: &Self) -> bool { // NOTE: this is checking tree structure equality, not serialized bytes equality - for ((_, self_block), (_, other_block)) in zip(self, other) { + for ((_, self_block), (_, other_block)) in zip( + MerkleBlobLeftChildFirstIterator::new(&self.blob), + MerkleBlobLeftChildFirstIterator::new(&other.blob), + ) { if (self_block.metadata.dirty || other_block.metadata.dirty) || self_block.node.hash != other_block.node.hash { @@ -1242,14 +1244,14 @@ impl PartialEq for MerkleBlob { } } -impl<'a> IntoIterator for &'a MerkleBlob { - type Item = (TreeIndex, Block); - type IntoIter = MerkleBlobLeftChildFirstIterator<'a>; - - fn into_iter(self) -> Self::IntoIter { - MerkleBlobLeftChildFirstIterator::new(&self.blob) - } -} +// impl<'a> IntoIterator for &'a MerkleBlob { +// type Item = (TreeIndex, Block); +// type IntoIter = MerkleBlobLeftChildFirstIterator<'a>; +// +// fn into_iter(self) -> Self::IntoIter { +// MerkleBlobLeftChildFirstIterator::new(&self.blob) +// } +// } #[cfg(feature = "py-bindings")] #[pymethods] @@ -1901,7 +1903,8 @@ mod tests { #[rstest] fn test_upsert_upserts(mut small_blob: MerkleBlob) { - let before_blocks = small_blob.iter().collect::>(); + let before_blocks = + MerkleBlobLeftChildFirstIterator::new(&small_blob.blob).collect::>(); let (key, index) = small_blob.key_to_index.iter().next().unwrap(); let node = small_blob.get_node(*index).unwrap(); let NodeSpecific::Leaf { @@ -1916,7 +1919,8 @@ mod tests { small_blob.upsert(*key, new_value, &node.hash).unwrap(); - let after_blocks = small_blob.iter().collect::>(); + let after_blocks = + MerkleBlobLeftChildFirstIterator::new(&small_blob.blob).collect::>(); assert_eq!(before_blocks.len(), after_blocks.len()); for ((before_index, before), (after_index, after)) in zip(before_blocks, after_blocks) { diff --git a/crates/chia-datalayer/src/merkle/dot.rs b/crates/chia-datalayer/src/merkle/dot.rs index e8ab29224..09ea70c0b 100644 --- a/crates/chia-datalayer/src/merkle/dot.rs +++ b/crates/chia-datalayer/src/merkle/dot.rs @@ -1,4 +1,4 @@ -use crate::merkle::{MerkleBlob, Node, NodeSpecific, TreeIndex}; +use crate::merkle::{MerkleBlob, MerkleBlobLeftChildFirstIterator, Node, NodeSpecific, TreeIndex}; use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use url::Url; @@ -88,7 +88,7 @@ impl Node { impl MerkleBlob { pub fn to_dot(&self) -> DotLines { let mut result = DotLines::new(); - for (index, block) in self { + for (index, block) in MerkleBlobLeftChildFirstIterator::new(&self.blob) { result.push(block.node.to_dot(index)); } From fc4c9d90ce99a895b60aec7e9c466bf75fe8c10f Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 28 Oct 2024 15:39:42 -0400 Subject: [PATCH 120/181] tidy --- crates/chia-datalayer/src/merkle.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index a75f1fcb7..31c46b537 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -809,6 +809,7 @@ impl MerkleBlob { let leaf = self.get_node(leaf_index)?; // TODO: maybe some common way to indicate/perform sanity double checks? + // maybe this disappears with unit variants and structs for the data let NodeSpecific::Leaf { .. } = leaf.specific else { panic!("key to index cache resulted in internal node") }; @@ -1053,7 +1054,7 @@ impl MerkleBlob { Ordering::Greater => return Err(format!("block index out of range: {index}")), Ordering::Equal => self.blob.extend_from_slice(&new_block_bytes), Ordering::Less => { - // TODO: lots of deserialization here for just the key + // OPT: lots of deserialization here for just the key let old_block = self.get_block(index)?; // TODO: should we be more careful about accidentally reading garbage like // from a freshly gotten index @@ -1146,7 +1147,7 @@ impl MerkleBlob { pub fn calculate_lazy_hashes(&mut self) -> Result<(), String> { // OPT: really want a truncated traversal, not filter - // TODO: yeah, storing the whole set of blocks via collect is not great + // OPT: yeah, storing the whole set of blocks via collect is not great for (index, mut block) in MerkleBlobLeftChildFirstIterator::new(&self.blob) .filter(|(_, block)| block.metadata.dirty) .collect::>() @@ -1154,8 +1155,8 @@ impl MerkleBlob { let NodeSpecific::Internal { left, right } = block.node.specific else { panic!("leaves should not be dirty") }; - // TODO: obviously inefficient to re-get/deserialize these blocks inside - // an iteration that's already doing that + // OPT: obviously inefficient to re-get/deserialize these blocks inside + // an iteration that's already doing that let left_hash = self.get_hash(left)?; let right_hash = self.get_hash(right)?; block.update_hash(&left_hash, &right_hash); From f5ee0c87599003ffa2cb5f6a6201e62d0472488f Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 5 Nov 2024 10:02:02 -0500 Subject: [PATCH 121/181] complex match -> simple if else if else --- crates/chia-datalayer/src/merkle.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 31c46b537..012ab00a0 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -184,10 +184,12 @@ impl NodeSpecific { panic!("unable to get sibling index from a leaf") }; - match index { - x if (x == *right) => *left, - x if (x == *left) => *right, - _ => panic!("index not a child: {index}"), + if index == *right { + *left + } else if index == *left { + *right + } else { + panic!("index not a child: {index}") } } } From 98fc0d6a7d0737cc031adc2c77f03a5325222637 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 5 Nov 2024 10:07:20 -0500 Subject: [PATCH 122/181] add comment for `KvId` usage --- crates/chia-datalayer/src/merkle.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 012ab00a0..8099e5fc5 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -16,6 +16,9 @@ use std::ops::Range; type TreeIndex = u32; type Parent = Option; type Hash = [u8; 32]; +// key and value ids are provided from outside of this code and are implemented as +// the row id from sqlite which is a signed 8 byte integer. the actually key and +// value data bytes will not be handled within this code, only outside. type KvId = i64; // assumptions From 3ffd27ca93f0d4da6c7348b01e45d810e4998fef Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 5 Nov 2024 12:13:12 -0500 Subject: [PATCH 123/181] rough transition from strings to an error enum --- Cargo.lock | 1 + crates/chia-datalayer/Cargo.toml | 1 + crates/chia-datalayer/src/merkle.rs | 247 ++++++++++++++++------------ 3 files changed, 146 insertions(+), 103 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7bc5fd721..2c478ac27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -383,6 +383,7 @@ dependencies = [ "percent-encoding", "pyo3", "rstest", + "thiserror", "url", ] diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index 0b38d6569..c00caedc4 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -21,6 +21,7 @@ crate-type = ["rlib"] clvmr = { workspace = true } num-traits = { workspace = true } pyo3 = { workspace = true, optional = true } +thiserror = { workspace = true } [dev-dependencies] clvm-utils = { workspace = true } diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 8099e5fc5..91c7cbfee 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -12,6 +12,7 @@ use std::collections::{HashMap, HashSet, VecDeque}; use std::iter::zip; use std::mem::size_of; use std::ops::Range; +use thiserror::Error; type TreeIndex = u32; type Parent = Option; @@ -21,6 +22,50 @@ type Hash = [u8; 32]; // value data bytes will not be handled within this code, only outside. type KvId = i64; +#[derive(Debug, Error)] +pub enum Error { + #[error("unknown NodeType value: {0:?}")] + UnknownNodeTypeValue(u8), + + #[error("unknown dirty value: {0:?}")] + UnknownDirtyValue(u8), + + // TODO: don't use String here + #[error("failed loading metadata: {0}")] + FailedLoadingMetadata(String), + + // TODO: don't use String here + #[error("failed loading node: {0}")] + FailedLoadingNode(String), + + #[error("blob length must be a multiple of block count, found extra bytes: {0}")] + InvalidBlobLength(usize), + + #[error("key already present")] + KeyAlreadyPresent, + + #[error("requested insertion at root but tree not empty")] + UnableToInsertAsRootOfNonEmptyTree, + + #[error("old leaf unexpectedly not a leaf")] + OldLeafUnexpectedlyNotALeaf, + + #[error("unable to find a leaf")] + UnableToFindALeaf, + + #[error("unknown key: {0:?}")] + UnknownKey(KvId), + + #[error("key not in key to index cache: {0:?}")] + IntegrityKeyNotInCache(KvId), + + #[error("zero-length seed bytes not allowed")] + ZeroLengthSeedNotAllowed, + + #[error("block index out of range: {0:?}")] + BlockIndexOutOfRange(TreeIndex), +} + // assumptions // - root is at index 0 // - any case with no keys will have a zero length blob @@ -67,12 +112,12 @@ pub enum NodeType { } impl NodeType { - pub fn from_u8(value: u8) -> Result { + pub fn from_u8(value: u8) -> Result { match value { // ha! feel free to laugh at this x if (NodeType::Internal as u8 == x) => Ok(NodeType::Internal), x if (NodeType::Leaf as u8 == x) => Ok(NodeType::Leaf), - other => Err(format!("unknown NodeType value: {other}")), + other => Err(Error::UnknownNodeTypeValue(other)), } } @@ -135,7 +180,7 @@ pub struct NodeMetadata { } impl NodeMetadata { - pub fn from_bytes(blob: MetadataBytes) -> Result { + pub fn from_bytes(blob: MetadataBytes) -> Result { // OPT: could save 1-2% of tree space by packing (and maybe don't do that) Ok(Self { node_type: Self::node_type_from_bytes(blob)?, @@ -151,15 +196,15 @@ impl NodeMetadata { bytes } - pub fn node_type_from_bytes(blob: MetadataBytes) -> Result { + pub fn node_type_from_bytes(blob: MetadataBytes) -> Result { NodeType::from_u8(u8::from_be_bytes(blob[TYPE_RANGE].try_into().unwrap())) } - pub fn dirty_from_bytes(blob: MetadataBytes) -> Result { + pub fn dirty_from_bytes(blob: MetadataBytes) -> Result { match u8::from_be_bytes(blob[DIRTY_RANGE].try_into().unwrap()) { 0 => Ok(false), 1 => Ok(true), - other => Err(format!("invalid dirty value: {other}")), + other => Err(Error::UnknownDirtyValue(other)), } } } @@ -199,7 +244,7 @@ impl NodeSpecific { impl Node { #[allow(clippy::unnecessary_wraps)] - pub fn from_bytes(metadata: &NodeMetadata, blob: DataBytes) -> Result { + pub fn from_bytes(metadata: &NodeMetadata, blob: DataBytes) -> Result { Ok(Self { parent: Self::parent_from_bytes(&blob), hash: Self::hash_from_bytes(&blob), @@ -332,13 +377,13 @@ impl Block { blob } - pub fn from_bytes(blob: BlockBytes) -> Result { + pub fn from_bytes(blob: BlockBytes) -> Result { let metadata_blob: MetadataBytes = blob[METADATA_RANGE].try_into().unwrap(); let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); let metadata = NodeMetadata::from_bytes(metadata_blob) - .map_err(|message| format!("failed loading metadata: {message})"))?; + .map_err(|message| Error::FailedLoadingMetadata(message.to_string()))?; let node = Node::from_bytes(&metadata, data_blob) - .map_err(|message| format!("failed loading node: {message})"))?; + .map_err(|message| Error::FailedLoadingNode(message.to_string()))?; Ok(Block { metadata, node }) } @@ -384,13 +429,11 @@ pub struct MerkleBlob { } impl MerkleBlob { - pub fn new(blob: Vec) -> Result { + pub fn new(blob: Vec) -> Result { let length = blob.len(); let remainder = length % BLOCK_SIZE; if remainder != 0 { - return Err(format!( - "blob length must be a multiple of block count, found extra bytes: {remainder}" - )); + return Err(Error::InvalidBlobLength(remainder)); } let (free_indexes, key_to_index) = get_free_indexes_and_keys_values_indexes(&blob); @@ -414,9 +457,9 @@ impl MerkleBlob { value: KvId, hash: &Hash, insert_location: InsertLocation, - ) -> Result<(), String> { + ) -> Result<(), Error> { if self.key_to_index.contains_key(&key) { - return Err("Key already present".to_string()); + return Err(Error::KeyAlreadyPresent); } let insert_location = match insert_location { @@ -430,7 +473,7 @@ impl MerkleBlob { } InsertLocation::AsRoot {} => { if !self.key_to_index.is_empty() { - return Err("requested insertion at root but tree not empty".to_string()); + return Err(Error::UnableToInsertAsRootOfNonEmptyTree); }; self.insert_first(key, value, hash)?; } @@ -462,7 +505,7 @@ impl MerkleBlob { Ok(()) } - fn insert_first(&mut self, key: KvId, value: KvId, hash: &Hash) -> Result<(), String> { + fn insert_first(&mut self, key: KvId, value: KvId, hash: &Hash) -> Result<(), Error> { let new_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, @@ -487,7 +530,7 @@ impl MerkleBlob { old_leaf: &Node, internal_node_hash: &Hash, side: &Side, - ) -> Result<(), String> { + ) -> Result<(), Error> { self.clear(); let root_index = self.get_new_index(); let left_index = self.get_new_index(); @@ -515,7 +558,7 @@ impl MerkleBlob { value: old_leaf_value, } = old_leaf.specific else { - return Err("old leaf unexpectedly not a leaf".to_string()); + return Err(Error::OldLeafUnexpectedlyNotALeaf); }; node.parent = Some(0); @@ -566,7 +609,7 @@ impl MerkleBlob { old_leaf_index: TreeIndex, internal_node_hash: &Hash, side: &Side, - ) -> Result<(), String> { + ) -> Result<(), Error> { let new_leaf_index = self.get_new_index(); let new_internal_node_index = self.get_new_index(); @@ -632,7 +675,7 @@ impl MerkleBlob { Ok(()) } - pub fn batch_insert(&mut self, mut keys_values_hashes: I) -> Result<(), String> + pub fn batch_insert(&mut self, mut keys_values_hashes: I) -> Result<(), Error> where I: Iterator, { @@ -726,7 +769,7 @@ impl MerkleBlob { old_leaf_index: TreeIndex, new_index: TreeIndex, side: &Side, - ) -> Result<(), String> { + ) -> Result<(), Error> { // NAME: consider name, we're inserting a subtree at a leaf // TODO: seems like this ought to be fairly similar to regular insert @@ -800,17 +843,14 @@ impl MerkleBlob { Ok(()) } - fn get_min_height_leaf(&self) -> Result { + fn get_min_height_leaf(&self) -> Result { MerkleBlobBreadthFirstIterator::new(&self.blob) .next() - .ok_or("unable to find a leaf".to_string()) + .ok_or(Error::UnableToFindALeaf) } - pub fn delete(&mut self, key: KvId) -> Result<(), String> { - let leaf_index = *self - .key_to_index - .get(&key) - .ok_or(format!("unknown key: {key}"))?; + pub fn delete(&mut self, key: KvId) -> Result<(), Error> { + let leaf_index = *self.key_to_index.get(&key).ok_or(Error::UnknownKey(key))?; let leaf = self.get_node(leaf_index)?; // TODO: maybe some common way to indicate/perform sanity double checks? @@ -872,7 +912,7 @@ impl MerkleBlob { Ok(()) } - pub fn upsert(&mut self, key: KvId, value: KvId, new_hash: &Hash) -> Result<(), String> { + pub fn upsert(&mut self, key: KvId, value: KvId, new_hash: &Hash) -> Result<(), Error> { let Some(leaf_index) = self.key_to_index.get(&key) else { self.insert(key, value, new_hash, InsertLocation::Auto {})?; return Ok(()); @@ -898,7 +938,7 @@ impl MerkleBlob { Ok(()) } - pub fn check_integrity(&self) -> Result<(), String> { + pub fn check_integrity(&self) -> Result<(), Error> { let mut leaf_count: usize = 0; let mut internal_count: usize = 0; let mut child_to_parent: HashMap = HashMap::new(); @@ -918,7 +958,7 @@ impl MerkleBlob { let cached_index = self .key_to_index .get(&key) - .ok_or(format!("key not in key to index cache: {key:?}"))?; + .ok_or(Error::IntegrityKeyNotInCache(key))?; assert_eq!( *cached_index, index, "key to index cache for {key:?} should be {index:?} got: {cached_index:?}" @@ -949,7 +989,7 @@ impl MerkleBlob { &mut self, index: TreeIndex, parent: Option, - ) -> Result { + ) -> Result { let mut block = self.get_block(index)?; block.node.parent = parent; self.insert_entry_to_blob(index, &block)?; @@ -957,7 +997,7 @@ impl MerkleBlob { Ok(block) } - fn mark_lineage_as_dirty(&mut self, index: TreeIndex) -> Result<(), String> { + fn mark_lineage_as_dirty(&mut self, index: TreeIndex) -> Result<(), Error> { let mut next_index = Some(index); while let Some(this_index) = next_index { @@ -995,19 +1035,14 @@ impl MerkleBlob { fn get_random_insert_location_by_seed( &self, seed_bytes: &[u8], - ) -> Result { + ) -> Result { let mut seed_bytes = Vec::from(seed_bytes); if self.blob.is_empty() { return Ok(InsertLocation::AsRoot {}); } - let side = if (seed_bytes - .last() - .ok_or("zero-length seed bytes not allowed")? - & 1 << 7) - == 0 - { + let side = if (seed_bytes.last().ok_or(Error::ZeroLengthSeedNotAllowed)? & 1 << 7) == 0 { Side::Left } else { Side::Right @@ -1037,7 +1072,7 @@ impl MerkleBlob { } } - fn get_random_insert_location_by_kvid(&self, seed: KvId) -> Result { + fn get_random_insert_location_by_kvid(&self, seed: KvId) -> Result { let seed = sha256_num(seed); self.get_random_insert_location_by_seed(&seed) @@ -1052,11 +1087,11 @@ impl MerkleBlob { index } - fn insert_entry_to_blob(&mut self, index: TreeIndex, block: &Block) -> Result<(), String> { + fn insert_entry_to_blob(&mut self, index: TreeIndex, block: &Block) -> Result<(), Error> { let new_block_bytes = block.to_bytes(); let extend_index = self.extend_index(); match index.cmp(&extend_index) { - Ordering::Greater => return Err(format!("block index out of range: {index}")), + Ordering::Greater => return Err(Error::BlockIndexOutOfRange(index)), Ordering::Equal => self.blob.extend_from_slice(&new_block_bytes), Ordering::Less => { // OPT: lots of deserialization here for just the key @@ -1087,30 +1122,31 @@ impl MerkleBlob { Ok(()) } - fn get_block(&self, index: TreeIndex) -> Result { + fn get_block(&self, index: TreeIndex) -> Result { Block::from_bytes(self.get_block_bytes(index)?) } - fn get_hash(&self, index: TreeIndex) -> Result { + fn get_hash(&self, index: TreeIndex) -> Result { let block_bytes = self.get_block_bytes(index)?; let data_bytes: DataBytes = block_bytes[DATA_RANGE].try_into().unwrap(); Ok(Node::hash_from_bytes(&data_bytes)) } - fn get_block_bytes(&self, index: TreeIndex) -> Result { - self.blob + fn get_block_bytes(&self, index: TreeIndex) -> Result { + Ok(self + .blob .get(block_range(index)) - .ok_or(format!("block index out of bounds: {index}"))? + .ok_or(Error::BlockIndexOutOfRange(index))? .try_into() - .map_err(|e| format!("failed getting block {index}: {e}")) + .unwrap_or_else(|e| panic!("failed getting block {index}: {e}"))) } - pub fn get_node(&self, index: TreeIndex) -> Result { + pub fn get_node(&self, index: TreeIndex) -> Result { Ok(self.get_block(index)?.node) } - pub fn get_parent_index(&self, index: TreeIndex) -> Result { + pub fn get_parent_index(&self, index: TreeIndex) -> Result { let block = self.get_block_bytes(index)?; Ok(Node::parent_from_bytes( @@ -1121,7 +1157,7 @@ impl MerkleBlob { pub fn get_lineage_with_indexes( &self, index: TreeIndex, - ) -> Result, String> { + ) -> Result, Error> { let mut next_index = Some(index); let mut lineage = vec![]; @@ -1134,7 +1170,7 @@ impl MerkleBlob { Ok(lineage) } - pub fn get_lineage_indexes(&self, index: TreeIndex) -> Result, String> { + pub fn get_lineage_indexes(&self, index: TreeIndex) -> Result, Error> { let mut next_index = Some(index); let mut lineage: Vec = vec![]; @@ -1150,7 +1186,7 @@ impl MerkleBlob { // <&Self as IntoIterator>::into_iter(self) // } - pub fn calculate_lazy_hashes(&mut self) -> Result<(), String> { + pub fn calculate_lazy_hashes(&mut self) -> Result<(), Error> { // OPT: really want a truncated traversal, not filter // OPT: yeah, storing the whole set of blocks via collect is not great for (index, mut block) in MerkleBlobLeftChildFirstIterator::new(&self.blob) @@ -1171,45 +1207,45 @@ impl MerkleBlob { Ok(()) } - #[allow(unused)] - fn relocate_node(&mut self, source: TreeIndex, destination: TreeIndex) -> Result<(), String> { - let extend_index = self.extend_index(); - if source == 0 { - return Err("relocation of the root and index zero is not allowed".to_string()); - }; - assert!(source < extend_index); - assert!(!self.free_indexes.contains(&source)); - assert!(destination <= extend_index); - assert!(destination == extend_index || self.free_indexes.contains(&destination)); - - let source_block = self.get_block(source).unwrap(); - if let Some(parent) = source_block.node.parent { - let mut parent_block = self.get_block(parent).unwrap(); - let NodeSpecific::Internal { - ref mut left, - ref mut right, - } = parent_block.node.specific - else { - panic!(); - }; - match source { - x if x == *left => *left = destination, - x if x == *right => *right = destination, - _ => panic!(), - } - self.insert_entry_to_blob(parent, &parent_block).unwrap(); - } - - if let NodeSpecific::Internal { left, right, .. } = source_block.node.specific { - for child in [left, right] { - self.update_parent(child, Some(destination)).unwrap(); - } - } - - self.free_indexes.insert(source); - - Ok(()) - } + // #[allow(unused)] + // fn relocate_node(&mut self, source: TreeIndex, destination: TreeIndex) -> Result<(), Error> { + // let extend_index = self.extend_index(); + // if source == 0 { + // return Err("relocation of the root and index zero is not allowed".to_string()); + // }; + // assert!(source < extend_index); + // assert!(!self.free_indexes.contains(&source)); + // assert!(destination <= extend_index); + // assert!(destination == extend_index || self.free_indexes.contains(&destination)); + // + // let source_block = self.get_block(source).unwrap(); + // if let Some(parent) = source_block.node.parent { + // let mut parent_block = self.get_block(parent).unwrap(); + // let NodeSpecific::Internal { + // ref mut left, + // ref mut right, + // } = parent_block.node.specific + // else { + // panic!(); + // }; + // match source { + // x if x == *left => *left = destination, + // x if x == *right => *right = destination, + // _ => panic!(), + // } + // self.insert_entry_to_blob(parent, &parent_block).unwrap(); + // } + // + // if let NodeSpecific::Internal { left, right, .. } = source_block.node.specific { + // for child in [left, right] { + // self.update_parent(child, Some(destination)).unwrap(); + // } + // } + // + // self.free_indexes.insert(source); + // + // Ok(()) + // } #[allow(unused)] fn get_key_value_map(&self) -> HashMap { @@ -1273,7 +1309,7 @@ impl MerkleBlob { let slice = unsafe { std::slice::from_raw_parts(blob.buf_ptr() as *const u8, blob.len_bytes()) }; - Self::new(Vec::from(slice)).map_err(PyValueError::new_err) + Self::new(Vec::from(slice)).map_err(|e| PyValueError::new_err(e.to_string())) } #[pyo3(name = "insert", signature = (key, value, hash, reference_kid = None, side = None))] @@ -1307,22 +1343,25 @@ impl MerkleBlob { } }; self.insert(key, value, &hash, insert_location) - .map_err(PyValueError::new_err) + .map_err(|e| PyValueError::new_err(e.to_string())) } #[pyo3(name = "delete")] pub fn py_delete(&mut self, key: KvId) -> PyResult<()> { - self.delete(key).map_err(PyValueError::new_err) + self.delete(key) + .map_err(|e| PyValueError::new_err(e.to_string())) } #[pyo3(name = "get_raw_node")] pub fn py_get_raw_node(&mut self, index: TreeIndex) -> PyResult { - self.get_node(index).map_err(PyValueError::new_err) + self.get_node(index) + .map_err(|e| PyValueError::new_err(e.to_string())) } #[pyo3(name = "calculate_lazy_hashes")] pub fn py_calculate_lazy_hashes(&mut self) -> PyResult<()> { - self.calculate_lazy_hashes().map_err(PyValueError::new_err) + self.calculate_lazy_hashes() + .map_err(|e| PyValueError::new_err(e.to_string())) } #[pyo3(name = "get_lineage_with_indexes")] @@ -1335,7 +1374,7 @@ impl MerkleBlob { for (index, node) in self .get_lineage_with_indexes(index) - .map_err(PyValueError::new_err)? + .map_err(|e| PyValueError::new_err(e.to_string()))? { use pyo3::conversion::IntoPy; use pyo3::types::PyListMethods; @@ -1374,7 +1413,9 @@ impl MerkleBlob { return Ok(None); } - let block = self.get_block(index).map_err(PyValueError::new_err)?; + let block = self + .get_block(index) + .map_err(|e| PyValueError::new_err(e.to_string()))?; if block.metadata.dirty { return Err(PyValueError::new_err("root hash is dirty")); } @@ -1395,7 +1436,7 @@ impl MerkleBlob { } self.batch_insert(&mut zip(keys_values, hashes)) - .map_err(PyValueError::new_err)?; + .map_err(|e| PyValueError::new_err(e.to_string()))?; Ok(()) } From ac1aad82234d80dc73c84dc7b51e8b047e56e87c Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 5 Nov 2024 19:39:57 -0500 Subject: [PATCH 124/181] use newtype pattern for `TreeIndex` --- crates/chia-datalayer/src/merkle.rs | 91 ++++++++++++++++++----------- 1 file changed, 56 insertions(+), 35 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 91c7cbfee..2684cd884 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -2,7 +2,7 @@ use pyo3::{ buffer::PyBuffer, exceptions::{PyAttributeError, PyValueError}, - pyclass, pymethods, PyResult, Python, + pyclass, pymethods, FromPyObject, IntoPy, PyObject, PyResult, Python, }; use clvmr::sha2::Sha256; @@ -14,7 +14,22 @@ use std::mem::size_of; use std::ops::Range; use thiserror::Error; -type TreeIndex = u32; +#[cfg_attr(feature = "py-bindings", derive(FromPyObject), pyo3(transparent))] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TreeIndex(u32); + +impl IntoPy for TreeIndex { + fn into_py(self, py: Python<'_>) -> pyo3::PyObject { + self.0.into_py(py) + } +} + +impl std::fmt::Display for TreeIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + type Parent = Option; type Hash = [u8; 32]; // key and value ids are provided from outside of this code and are implemented as @@ -171,7 +186,7 @@ pub enum InsertLocation { Leaf { index: TreeIndex, side: Side }, } -const NULL_PARENT: TreeIndex = 0xffff_ffffu32; +const NULL_PARENT: TreeIndex = TreeIndex(0xffff_ffffu32); #[derive(Debug, PartialEq)] pub struct NodeMetadata { @@ -250,8 +265,8 @@ impl Node { hash: Self::hash_from_bytes(&blob), specific: match metadata.node_type { NodeType::Internal => NodeSpecific::Internal { - left: TreeIndex::from_be_bytes(blob[LEFT_RANGE].try_into().unwrap()), - right: TreeIndex::from_be_bytes(blob[RIGHT_RANGE].try_into().unwrap()), + left: TreeIndex(u32::from_be_bytes(blob[LEFT_RANGE].try_into().unwrap())), + right: TreeIndex(u32::from_be_bytes(blob[RIGHT_RANGE].try_into().unwrap())), }, NodeType::Leaf => NodeSpecific::Leaf { key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), @@ -262,7 +277,7 @@ impl Node { } fn parent_from_bytes(blob: &DataBytes) -> Parent { - let parent_integer = TreeIndex::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap()); + let parent_integer = TreeIndex(u32::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap())); match parent_integer { NULL_PARENT => None, _ => Some(parent_integer), @@ -286,9 +301,9 @@ impl Node { Some(parent) => *parent, }; blob[HASH_RANGE].copy_from_slice(hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); - blob[LEFT_RANGE].copy_from_slice(&left.to_be_bytes()); - blob[RIGHT_RANGE].copy_from_slice(&right.to_be_bytes()); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.0.to_be_bytes()); + blob[LEFT_RANGE].copy_from_slice(&left.0.to_be_bytes()); + blob[RIGHT_RANGE].copy_from_slice(&right.0.to_be_bytes()); } Node { parent, @@ -300,7 +315,7 @@ impl Node { Some(parent) => *parent, }; blob[HASH_RANGE].copy_from_slice(hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.0.to_be_bytes()); blob[KEY_RANGE].copy_from_slice(&key.to_be_bytes()); blob[VALUE_RANGE].copy_from_slice(&value.to_be_bytes()); } @@ -359,7 +374,7 @@ impl Node { } fn block_range(index: TreeIndex) -> Range { - let block_start = index as usize * BLOCK_SIZE; + let block_start = index.0 as usize * BLOCK_SIZE; block_start..block_start + BLOCK_SIZE } @@ -403,7 +418,7 @@ fn get_free_indexes_and_keys_values_indexes( let mut key_to_index: HashMap = HashMap::default(); for (index, block) in MerkleBlobLeftChildFirstIterator::new(blob) { - seen_indexes[index as usize] = true; + seen_indexes[index.0 as usize] = true; if let NodeSpecific::Leaf { key, .. } = block.node.specific { key_to_index.insert(key, index); @@ -413,7 +428,7 @@ fn get_free_indexes_and_keys_values_indexes( let mut free_indexes: HashSet = HashSet::new(); for (index, seen) in seen_indexes.iter().enumerate() { if !seen { - free_indexes.insert(index as TreeIndex); + free_indexes.insert(TreeIndex(index as u32)); } } @@ -561,7 +576,7 @@ impl MerkleBlob { return Err(Error::OldLeafUnexpectedlyNotALeaf); }; - node.parent = Some(0); + node.parent = Some(TreeIndex(0)); let nodes = [ ( @@ -570,7 +585,7 @@ impl MerkleBlob { Side::Right => left_index, }, Node { - parent: Some(0), + parent: Some(TreeIndex(0)), specific: NodeSpecific::Leaf { key: old_leaf_key, value: old_leaf_value, @@ -872,11 +887,11 @@ impl MerkleBlob { let Some(grandparent_index) = parent.parent else { sibling_block.node.parent = None; - self.insert_entry_to_blob(0, &sibling_block)?; + self.insert_entry_to_blob(TreeIndex(0), &sibling_block)?; if let NodeSpecific::Internal { left, right } = sibling_block.node.specific { for child_index in [left, right] { - self.update_parent(child_index, Some(0))?; + self.update_parent(child_index, Some(TreeIndex(0)))?; } }; @@ -977,7 +992,7 @@ impl MerkleBlob { let total_count = leaf_count + internal_count + self.free_indexes.len(); let extend_index = self.extend_index(); assert_eq!( - total_count, extend_index as usize, + total_count, extend_index.0 as usize, "expected total node count {extend_index:?} found: {total_count:?}", ); assert_eq!(child_to_parent.len(), 0); @@ -1047,7 +1062,7 @@ impl MerkleBlob { } else { Side::Right }; - let mut next_index: TreeIndex = 0; + let mut next_index = TreeIndex(0); let mut node = self.get_node(next_index)?; loop { @@ -1080,7 +1095,7 @@ impl MerkleBlob { fn extend_index(&self) -> TreeIndex { let blob_length = self.blob.len(); - let index: TreeIndex = (blob_length / BLOCK_SIZE) as TreeIndex; + let index: TreeIndex = TreeIndex((blob_length / BLOCK_SIZE) as u32); let remainder = blob_length % BLOCK_SIZE; assert_eq!(remainder, 0, "blob length {blob_length:?} not a multiple of {BLOCK_SIZE:?}, remainder: {remainder:?}"); @@ -1378,7 +1393,7 @@ impl MerkleBlob { { use pyo3::conversion::IntoPy; use pyo3::types::PyListMethods; - list.append((index, node.into_py(py)))?; + list.append((index.into_py(py), node.into_py(py)))?; } Ok(list.into()) @@ -1391,7 +1406,7 @@ impl MerkleBlob { for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob) { use pyo3::conversion::IntoPy; use pyo3::types::PyListMethods; - list.append((index, block.node.into_py(py)))?; + list.append((index.into_py(py), block.node.into_py(py)))?; } Ok(list.into()) @@ -1404,7 +1419,7 @@ impl MerkleBlob { #[pyo3(name = "get_root_hash")] pub fn py_get_root_hash(&self) -> PyResult> { - self.py_get_hash_at_index(0) + self.py_get_hash_at_index(TreeIndex(0)) } #[pyo3(name = "get_hash_at_index")] @@ -1463,7 +1478,7 @@ impl<'a> MerkleBlobLeftChildFirstIterator<'a> { if blob.len() / BLOCK_SIZE > 0 { deque.push_back(MerkleBlobLeftChildFirstIteratorItem { visited: false, - index: 0, + index: TreeIndex(0), }); } @@ -1516,7 +1531,7 @@ impl<'a> MerkleBlobParentFirstIterator<'a> { fn new(blob: &'a [u8]) -> Self { let mut deque = VecDeque::new(); if blob.len() / BLOCK_SIZE > 0 { - deque.push_back(0); + deque.push_back(TreeIndex(0)); } Self { blob, deque } @@ -1552,7 +1567,7 @@ impl<'a> MerkleBlobBreadthFirstIterator<'a> { fn new(blob: &'a [u8]) -> Self { let mut deque = VecDeque::new(); if blob.len() / BLOCK_SIZE > 0 { - deque.push_back(0); + deque.push_back(TreeIndex(0)); } Self { blob, deque } @@ -1673,7 +1688,7 @@ mod tests { #[rstest] fn test_get_lineage(small_blob: MerkleBlob) { - let lineage = small_blob.get_lineage_with_indexes(2).unwrap(); + let lineage = small_blob.get_lineage_with_indexes(TreeIndex(2)).unwrap(); for (_, node) in &lineage { println!("{node:?}"); } @@ -1683,8 +1698,8 @@ mod tests { } #[rstest] - #[case::right(0, 2, Side::Left)] - #[case::left(0xff, 1, Side::Right)] + #[case::right(0, TreeIndex(2), Side::Left)] + #[case::left(0xff, TreeIndex(1), Side::Right)] fn test_get_random_insert_location_by_seed( #[case] seed: u8, #[case] expected_index: TreeIndex, @@ -1868,7 +1883,10 @@ mod tests { let index = small_blob.key_to_index[&key]; small_blob.delete(key).unwrap(); - assert_eq!(small_blob.free_indexes, HashSet::from([index, 2])); + assert_eq!( + small_blob.free_indexes, + HashSet::from([index, TreeIndex(2)]) + ); } #[rstest] @@ -1879,7 +1897,7 @@ mod tests { small_blob.delete(key).unwrap(); open_dot(small_blob.to_dot().set_note("after delete")); - let expected = HashSet::from([1, 2]); + let expected = HashSet::from([TreeIndex(1), TreeIndex(2)]); assert_eq!(small_blob.free_indexes, expected); } @@ -1905,20 +1923,23 @@ mod tests { #[should_panic(expected = "unable to get sibling index from a leaf")] fn test_node_specific_sibling_index_panics_for_leaf() { let leaf = NodeSpecific::Leaf { key: 0, value: 0 }; - leaf.sibling_index(0); + leaf.sibling_index(TreeIndex(0)); } #[test] #[should_panic(expected = "index not a child: 2")] fn test_node_specific_sibling_index_panics_for_unknown_sibling() { - let node = NodeSpecific::Internal { left: 0, right: 1 }; - node.sibling_index(2); + let node = NodeSpecific::Internal { + left: TreeIndex(0), + right: TreeIndex(1), + }; + node.sibling_index(TreeIndex(2)); } #[rstest] fn test_get_free_indexes(small_blob: MerkleBlob) { let mut blob = small_blob.blob.clone(); - let expected_free_index = (blob.len() / BLOCK_SIZE) as TreeIndex; + let expected_free_index = TreeIndex((blob.len() / BLOCK_SIZE) as u32); blob.extend_from_slice(&[0; BLOCK_SIZE]); let (free_indexes, _) = get_free_indexes_and_keys_values_indexes(&blob); assert_eq!(free_indexes, HashSet::from([expected_free_index])); From a997b9bc4550f2f22340ad411f0cb7c868b654d1 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 5 Nov 2024 20:12:53 -0500 Subject: [PATCH 125/181] add helpers --- crates/chia-datalayer/src/merkle.rs | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 2684cd884..43981eb47 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -18,8 +18,21 @@ use thiserror::Error; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct TreeIndex(u32); +type TreeIndexBytes = [u8; size_of::()]; + +impl TreeIndex { + fn from_be_bytes(bytes: TreeIndexBytes) -> Self { + Self(u32::from_be_bytes(bytes)) + } + + fn to_be_bytes(self) -> TreeIndexBytes { + self.0.to_be_bytes() + } +} + +#[cfg(feature = "py-bindings")] impl IntoPy for TreeIndex { - fn into_py(self, py: Python<'_>) -> pyo3::PyObject { + fn into_py(self, py: Python<'_>) -> PyObject { self.0.into_py(py) } } @@ -265,8 +278,8 @@ impl Node { hash: Self::hash_from_bytes(&blob), specific: match metadata.node_type { NodeType::Internal => NodeSpecific::Internal { - left: TreeIndex(u32::from_be_bytes(blob[LEFT_RANGE].try_into().unwrap())), - right: TreeIndex(u32::from_be_bytes(blob[RIGHT_RANGE].try_into().unwrap())), + left: TreeIndex::from_be_bytes(blob[LEFT_RANGE].try_into().unwrap()), + right: TreeIndex::from_be_bytes(blob[RIGHT_RANGE].try_into().unwrap()), }, NodeType::Leaf => NodeSpecific::Leaf { key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), @@ -277,7 +290,7 @@ impl Node { } fn parent_from_bytes(blob: &DataBytes) -> Parent { - let parent_integer = TreeIndex(u32::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap())); + let parent_integer = TreeIndex::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap()); match parent_integer { NULL_PARENT => None, _ => Some(parent_integer), @@ -301,9 +314,9 @@ impl Node { Some(parent) => *parent, }; blob[HASH_RANGE].copy_from_slice(hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.0.to_be_bytes()); - blob[LEFT_RANGE].copy_from_slice(&left.0.to_be_bytes()); - blob[RIGHT_RANGE].copy_from_slice(&right.0.to_be_bytes()); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); + blob[LEFT_RANGE].copy_from_slice(&left.to_be_bytes()); + blob[RIGHT_RANGE].copy_from_slice(&right.to_be_bytes()); } Node { parent, @@ -315,7 +328,7 @@ impl Node { Some(parent) => *parent, }; blob[HASH_RANGE].copy_from_slice(hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.0.to_be_bytes()); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); blob[KEY_RANGE].copy_from_slice(&key.to_be_bytes()); blob[VALUE_RANGE].copy_from_slice(&value.to_be_bytes()); } From 1b865d928577f97a689f941c38530bbce6508563 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 5 Nov 2024 20:17:13 -0500 Subject: [PATCH 126/181] shift try into unwrap --- crates/chia-datalayer/src/merkle.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 43981eb47..0f55990a8 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -18,14 +18,12 @@ use thiserror::Error; #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct TreeIndex(u32); -type TreeIndexBytes = [u8; size_of::()]; - impl TreeIndex { - fn from_be_bytes(bytes: TreeIndexBytes) -> Self { - Self(u32::from_be_bytes(bytes)) + fn from_be_bytes(bytes: &[u8]) -> Self { + Self(u32::from_be_bytes(bytes.try_into().unwrap())) } - fn to_be_bytes(self) -> TreeIndexBytes { + fn to_be_bytes(self) -> [u8; 4] { self.0.to_be_bytes() } } @@ -278,8 +276,8 @@ impl Node { hash: Self::hash_from_bytes(&blob), specific: match metadata.node_type { NodeType::Internal => NodeSpecific::Internal { - left: TreeIndex::from_be_bytes(blob[LEFT_RANGE].try_into().unwrap()), - right: TreeIndex::from_be_bytes(blob[RIGHT_RANGE].try_into().unwrap()), + left: TreeIndex::from_be_bytes(&blob[LEFT_RANGE]), + right: TreeIndex::from_be_bytes(&blob[RIGHT_RANGE]), }, NodeType::Leaf => NodeSpecific::Leaf { key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), @@ -290,7 +288,7 @@ impl Node { } fn parent_from_bytes(blob: &DataBytes) -> Parent { - let parent_integer = TreeIndex::from_be_bytes(blob[PARENT_RANGE].try_into().unwrap()); + let parent_integer = TreeIndex::from_be_bytes(&blob[PARENT_RANGE]); match parent_integer { NULL_PARENT => None, _ => Some(parent_integer), From a40df03d47c20c6241670ec3ef6353f7ff23c93e Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 5 Nov 2024 20:20:24 -0500 Subject: [PATCH 127/181] big-endian is just part of the serialization definition, not a choice --- crates/chia-datalayer/src/merkle.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 0f55990a8..8fcc2d567 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -19,11 +19,11 @@ use thiserror::Error; pub struct TreeIndex(u32); impl TreeIndex { - fn from_be_bytes(bytes: &[u8]) -> Self { + fn from_bytes(bytes: &[u8]) -> Self { Self(u32::from_be_bytes(bytes.try_into().unwrap())) } - fn to_be_bytes(self) -> [u8; 4] { + fn to_bytes(self) -> [u8; 4] { self.0.to_be_bytes() } } @@ -276,8 +276,8 @@ impl Node { hash: Self::hash_from_bytes(&blob), specific: match metadata.node_type { NodeType::Internal => NodeSpecific::Internal { - left: TreeIndex::from_be_bytes(&blob[LEFT_RANGE]), - right: TreeIndex::from_be_bytes(&blob[RIGHT_RANGE]), + left: TreeIndex::from_bytes(&blob[LEFT_RANGE]), + right: TreeIndex::from_bytes(&blob[RIGHT_RANGE]), }, NodeType::Leaf => NodeSpecific::Leaf { key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), @@ -288,7 +288,7 @@ impl Node { } fn parent_from_bytes(blob: &DataBytes) -> Parent { - let parent_integer = TreeIndex::from_be_bytes(&blob[PARENT_RANGE]); + let parent_integer = TreeIndex::from_bytes(&blob[PARENT_RANGE]); match parent_integer { NULL_PARENT => None, _ => Some(parent_integer), @@ -312,9 +312,9 @@ impl Node { Some(parent) => *parent, }; blob[HASH_RANGE].copy_from_slice(hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); - blob[LEFT_RANGE].copy_from_slice(&left.to_be_bytes()); - blob[RIGHT_RANGE].copy_from_slice(&right.to_be_bytes()); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_bytes()); + blob[LEFT_RANGE].copy_from_slice(&left.to_bytes()); + blob[RIGHT_RANGE].copy_from_slice(&right.to_bytes()); } Node { parent, @@ -326,7 +326,7 @@ impl Node { Some(parent) => *parent, }; blob[HASH_RANGE].copy_from_slice(hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_be_bytes()); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_bytes()); blob[KEY_RANGE].copy_from_slice(&key.to_be_bytes()); blob[VALUE_RANGE].copy_from_slice(&value.to_be_bytes()); } From c0eba9ff5f554d5c99434223f28ab92ff2ad1070 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 6 Nov 2024 20:14:01 -0500 Subject: [PATCH 128/181] use unit variants for node --- crates/chia-datalayer/src/lib.rs | 2 +- crates/chia-datalayer/src/merkle.rs | 597 +++++++++++------------- crates/chia-datalayer/src/merkle/dot.rs | 12 +- wheel/src/api.rs | 5 +- 4 files changed, 282 insertions(+), 334 deletions(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index f75124346..7ed47e100 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1,3 +1,3 @@ mod merkle; -pub use merkle::{InsertLocation, MerkleBlob, Node, Side}; +pub use merkle::{InsertLocation, InternalNode, LeafNode, MerkleBlob, Side}; diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 8fcc2d567..91e950143 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -1,8 +1,7 @@ #[cfg(feature = "py-bindings")] use pyo3::{ - buffer::PyBuffer, - exceptions::{PyAttributeError, PyValueError}, - pyclass, pymethods, FromPyObject, IntoPy, PyObject, PyResult, Python, + buffer::PyBuffer, exceptions::PyValueError, pyclass, pymethods, FromPyObject, IntoPy, PyObject, + PyResult, Python, }; use clvmr::sha2::Sha256; @@ -73,9 +72,6 @@ pub enum Error { #[error("requested insertion at root but tree not empty")] UnableToInsertAsRootOfNonEmptyTree, - #[error("old leaf unexpectedly not a leaf")] - OldLeafUnexpectedlyNotALeaf, - #[error("unable to find a leaf")] UnableToFindALeaf, @@ -235,152 +231,141 @@ impl NodeMetadata { } } -#[cfg_attr(feature = "py-bindings", pyclass(name = "Node", get_all))] -#[derive(Debug, PartialEq)] -pub struct Node { +fn parent_from_bytes(blob: &DataBytes) -> Parent { + let parent_integer = TreeIndex::from_bytes(&blob[PARENT_RANGE]); + match parent_integer { + NULL_PARENT => None, + _ => Some(parent_integer), + } +} + +fn hash_from_bytes(blob: &DataBytes) -> Hash { + blob[HASH_RANGE].try_into().unwrap() +} + +#[cfg_attr(feature = "py-bindings", pyclass(name = "InternalNode", get_all))] +#[derive(Debug, PartialEq, Eq)] +pub struct InternalNode { parent: Parent, hash: Hash, - specific: NodeSpecific, + left: TreeIndex, + right: TreeIndex, } -// #[cfg_attr(feature = "py-bindings", pyclass(name = "NodeSpecific"))] -#[cfg_attr(feature = "py-bindings", pyclass(name = "NodeSpecific", get_all))] -#[derive(Clone, Debug, PartialEq)] -pub enum NodeSpecific { - Internal { left: TreeIndex, right: TreeIndex }, - Leaf { key: KvId, value: KvId }, -} +impl InternalNode { + #[allow(clippy::unnecessary_wraps)] + pub fn from_bytes(blob: &DataBytes) -> Result { + Ok(Self { + parent: parent_from_bytes(blob), + hash: hash_from_bytes(blob), + left: TreeIndex::from_bytes(&blob[LEFT_RANGE]), + right: TreeIndex::from_bytes(&blob[RIGHT_RANGE]), + }) + } + pub fn to_bytes(&self) -> DataBytes { + let mut blob: DataBytes = [0; DATA_SIZE]; + let parent_integer = self.parent.unwrap_or(NULL_PARENT); + blob[HASH_RANGE].copy_from_slice(&self.hash); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_bytes()); + blob[LEFT_RANGE].copy_from_slice(&self.left.to_bytes()); + blob[RIGHT_RANGE].copy_from_slice(&self.right.to_bytes()); -impl NodeSpecific { - // TODO: methods that only handle one variant seem kinda smelly to me, am i right? - pub fn sibling_index(&self, index: TreeIndex) -> TreeIndex { - let NodeSpecific::Internal { right, left } = self else { - panic!("unable to get sibling index from a leaf") - }; + blob + } - if index == *right { - *left - } else if index == *left { - *right + pub fn sibling_index(&self, index: TreeIndex) -> TreeIndex { + if index == self.right { + self.left + } else if index == self.left { + self.right } else { panic!("index not a child: {index}") } } } -impl Node { +#[cfg_attr(feature = "py-bindings", pyclass(name = "LeafNode", get_all))] +#[derive(Debug, PartialEq, Eq)] +pub struct LeafNode { + parent: Parent, + hash: Hash, + key: KvId, + value: KvId, +} + +impl LeafNode { #[allow(clippy::unnecessary_wraps)] - pub fn from_bytes(metadata: &NodeMetadata, blob: DataBytes) -> Result { + pub fn from_bytes(blob: &DataBytes) -> Result { Ok(Self { - parent: Self::parent_from_bytes(&blob), - hash: Self::hash_from_bytes(&blob), - specific: match metadata.node_type { - NodeType::Internal => NodeSpecific::Internal { - left: TreeIndex::from_bytes(&blob[LEFT_RANGE]), - right: TreeIndex::from_bytes(&blob[RIGHT_RANGE]), - }, - NodeType::Leaf => NodeSpecific::Leaf { - key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), - value: KvId::from_be_bytes(blob[VALUE_RANGE].try_into().unwrap()), - }, - }, + parent: parent_from_bytes(blob), + hash: hash_from_bytes(blob), + key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), + value: KvId::from_be_bytes(blob[VALUE_RANGE].try_into().unwrap()), }) } - fn parent_from_bytes(blob: &DataBytes) -> Parent { - let parent_integer = TreeIndex::from_bytes(&blob[PARENT_RANGE]); - match parent_integer { - NULL_PARENT => None, - _ => Some(parent_integer), - } - } - - fn hash_from_bytes(blob: &DataBytes) -> Hash { - blob[HASH_RANGE].try_into().unwrap() - } - pub fn to_bytes(&self) -> DataBytes { let mut blob: DataBytes = [0; DATA_SIZE]; - match self { - Node { - parent, - specific: NodeSpecific::Internal { left, right }, - hash, - } => { - let parent_integer = match parent { - None => NULL_PARENT, - Some(parent) => *parent, - }; - blob[HASH_RANGE].copy_from_slice(hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_bytes()); - blob[LEFT_RANGE].copy_from_slice(&left.to_bytes()); - blob[RIGHT_RANGE].copy_from_slice(&right.to_bytes()); - } - Node { - parent, - specific: NodeSpecific::Leaf { key, value }, - hash, - } => { - let parent_integer = match parent { - None => NULL_PARENT, - Some(parent) => *parent, - }; - blob[HASH_RANGE].copy_from_slice(hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_bytes()); - blob[KEY_RANGE].copy_from_slice(&key.to_be_bytes()); - blob[VALUE_RANGE].copy_from_slice(&value.to_be_bytes()); - } - } + let parent_integer = self.parent.unwrap_or(NULL_PARENT); + blob[HASH_RANGE].copy_from_slice(&self.hash); + blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_bytes()); + blob[KEY_RANGE].copy_from_slice(&self.key.to_be_bytes()); + blob[VALUE_RANGE].copy_from_slice(&self.value.to_be_bytes()); blob } } -#[cfg(feature = "py-bindings")] -#[pymethods] -impl Node { - #[getter(left)] - pub fn py_property_left(&self) -> PyResult { - let NodeSpecific::Internal { left, .. } = self.specific else { - return Err(PyAttributeError::new_err( - "Attribute 'left' not present for leaf nodes".to_string(), - )); - }; +#[derive(Debug, PartialEq, Eq)] +pub enum Node { + Internal(InternalNode), + Leaf(LeafNode), +} - Ok(left) +impl Node { + fn parent(&self) -> Parent { + match self { + Node::Internal(node) => node.parent, + Node::Leaf(node) => node.parent, + } } - #[getter(right)] - pub fn py_property_right(&self) -> PyResult { - let NodeSpecific::Internal { right, .. } = self.specific else { - return Err(PyAttributeError::new_err( - "Attribute 'right' not present for leaf nodes".to_string(), - )); - }; - - Ok(right) + fn set_parent(&mut self, parent: Parent) { + match self { + Node::Internal(node) => node.parent = parent, + Node::Leaf(node) => node.parent = parent, + } } - #[getter(key)] - pub fn py_property_key(&self) -> PyResult { - let NodeSpecific::Leaf { key, .. } = self.specific else { - return Err(PyAttributeError::new_err( - "Attribute 'key' not present for internal nodes".to_string(), - )); - }; + fn hash(&self) -> Hash { + match self { + Node::Internal(node) => node.hash, + Node::Leaf(node) => node.hash, + } + } - Ok(key) + pub fn from_bytes(metadata: &NodeMetadata, blob: &DataBytes) -> Result { + Ok(match metadata.node_type { + NodeType::Internal => Node::Internal(InternalNode::from_bytes(blob)?), + NodeType::Leaf => Node::Leaf(LeafNode::from_bytes(blob)?), + }) } - #[getter(value)] - pub fn py_property_value(&self) -> PyResult { - let NodeSpecific::Leaf { value, .. } = self.specific else { - return Err(PyAttributeError::new_err( - "Attribute 'value' not present for internal nodes".to_string(), - )); - }; + pub fn to_bytes(&self) -> DataBytes { + match self { + Node::Internal(node) => node.to_bytes(), + Node::Leaf(node) => node.to_bytes(), + } + } +} - Ok(value) +#[cfg(feature = "py-bindings")] +impl IntoPy for Node { + fn into_py(self, py: Python<'_>) -> PyObject { + match self { + Node::Internal(node) => node.into_py(py), + Node::Leaf(node) => node.into_py(py), + } } } @@ -408,14 +393,18 @@ impl Block { let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); let metadata = NodeMetadata::from_bytes(metadata_blob) .map_err(|message| Error::FailedLoadingMetadata(message.to_string()))?; - let node = Node::from_bytes(&metadata, data_blob) + let node = Node::from_bytes(&metadata, &data_blob) .map_err(|message| Error::FailedLoadingNode(message.to_string()))?; Ok(Block { metadata, node }) } pub fn update_hash(&mut self, left: &Hash, right: &Hash) { - self.node.hash = internal_hash(left, right); + let hash = internal_hash(left, right); + match self.node { + Node::Internal(ref mut node) => node.hash = hash, + Node::Leaf(ref mut node) => node.hash = hash, + } self.metadata.dirty = false; } } @@ -431,8 +420,8 @@ fn get_free_indexes_and_keys_values_indexes( for (index, block) in MerkleBlobLeftChildFirstIterator::new(blob) { seen_indexes[index.0 as usize] = true; - if let NodeSpecific::Leaf { key, .. } = block.node.specific { - key_to_index.insert(key, index); + if let Node::Leaf(leaf) = block.node { + key_to_index.insert(leaf.key, index); } } @@ -504,8 +493,7 @@ impl MerkleBlob { self.insert_first(key, value, hash)?; } InsertLocation::Leaf { index, side } => { - let old_leaf = self.get_node(index)?; - let NodeSpecific::Leaf { .. } = old_leaf.specific else { + let Node::Leaf(old_leaf) = self.get_node(index)? else { panic!("requested insertion at leaf but found internal node") }; @@ -514,10 +502,11 @@ impl MerkleBlob { Side::Right => internal_hash(&old_leaf.hash, hash), }; - let node = Node { + let node = LeafNode { parent: None, hash: *hash, - specific: NodeSpecific::Leaf { key, value }, + key, + value, }; if self.key_to_index.len() == 1 { @@ -537,11 +526,12 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node: Node { + node: Node::Leaf(LeafNode { parent: None, - specific: NodeSpecific::Leaf { key, value }, hash: *hash, - }, + key, + value, + }), }; self.clear(); @@ -552,8 +542,8 @@ impl MerkleBlob { fn insert_second( &mut self, - mut node: Node, - old_leaf: &Node, + mut node: LeafNode, + old_leaf: &LeafNode, internal_node_hash: &Hash, side: &Side, ) -> Result<(), Error> { @@ -567,26 +557,16 @@ impl MerkleBlob { node_type: NodeType::Internal, dirty: false, }, - node: Node { + node: Node::Internal(InternalNode { parent: None, - specific: NodeSpecific::Internal { - left: left_index, - right: right_index, - }, + left: left_index, + right: right_index, hash: *internal_node_hash, - }, + }), }; self.insert_entry_to_blob(root_index, &new_internal_block)?; - let NodeSpecific::Leaf { - key: old_leaf_key, - value: old_leaf_value, - } = old_leaf.specific - else { - return Err(Error::OldLeafUnexpectedlyNotALeaf); - }; - node.parent = Some(TreeIndex(0)); let nodes = [ @@ -595,12 +575,10 @@ impl MerkleBlob { Side::Left => right_index, Side::Right => left_index, }, - Node { + LeafNode { parent: Some(TreeIndex(0)), - specific: NodeSpecific::Leaf { - key: old_leaf_key, - value: old_leaf_value, - }, + key: old_leaf.key, + value: old_leaf.value, hash: old_leaf.hash, }, ), @@ -619,7 +597,7 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node, + node: Node::Leaf(node), }; self.insert_entry_to_blob(index, &block)?; @@ -630,8 +608,8 @@ impl MerkleBlob { fn insert_third_or_later( &mut self, - mut node: Node, - old_leaf: &Node, + mut node: LeafNode, + old_leaf: &LeafNode, old_leaf_index: TreeIndex, internal_node_hash: &Hash, side: &Side, @@ -646,7 +624,7 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node, + node: Node::Leaf(node), }; self.insert_entry_to_blob(new_leaf_index, &new_leaf_block)?; @@ -659,14 +637,12 @@ impl MerkleBlob { node_type: NodeType::Internal, dirty: false, }, - node: Node { + node: Node::Internal(InternalNode { parent: old_leaf.parent, - specific: NodeSpecific::Internal { - left: left_index, - right: right_index, - }, + left: left_index, + right: right_index, hash: *internal_node_hash, - }, + }), }; self.insert_entry_to_blob(new_internal_node_index, &new_internal_block)?; @@ -677,16 +653,11 @@ impl MerkleBlob { self.update_parent(old_leaf_index, Some(new_internal_node_index))?; let mut old_parent_block = self.get_block(old_parent_index)?; - if let NodeSpecific::Internal { - ref mut left, - ref mut right, - .. - } = old_parent_block.node.specific - { - if old_leaf_index == *left { - *left = new_internal_node_index; - } else if old_leaf_index == *right { - *right = new_internal_node_index; + if let Node::Internal(ref mut internal_node, ..) = old_parent_block.node { + if old_leaf_index == internal_node.left { + internal_node.left = new_internal_node_index; + } else if old_leaf_index == internal_node.right { + internal_node.right = new_internal_node_index; } else { panic!("child not a child of its parent"); } @@ -724,11 +695,12 @@ impl MerkleBlob { node_type: NodeType::Leaf, dirty: false, }, - node: Node { + node: Node::Leaf(LeafNode { parent: None, hash, - specific: NodeSpecific::Leaf { key, value }, - }, + key, + value, + }), }; self.insert_entry_to_blob(new_leaf_index, &new_block)?; indexes.push(new_leaf_index); @@ -753,22 +725,23 @@ impl MerkleBlob { let new_internal_node_index = self.get_new_index(); - let block_1 = self.update_parent(index_1, Some(new_internal_node_index))?; - let block_2 = self.update_parent(index_2, Some(new_internal_node_index))?; + let mut hashes = vec![]; + for index in [index_1, index_2] { + let block = self.update_parent(index, Some(new_internal_node_index))?; + hashes.push(block.node.hash()); + } let new_block = Block { metadata: NodeMetadata { node_type: NodeType::Internal, dirty: false, }, - node: Node { + node: Node::Internal(InternalNode { parent: None, - hash: internal_hash(&block_1.node.hash, &block_2.node.hash), - specific: NodeSpecific::Internal { - left: index_1, - right: index_2, - }, - }, + hash: internal_hash(&hashes[0], &hashes[1]), + left: index_1, + right: index_2, + }), }; self.insert_entry_to_blob(new_internal_node_index, &new_block)?; @@ -781,10 +754,11 @@ impl MerkleBlob { if indexes.len() == 1 { // OPT: can we avoid this extra min height leaf traversal? let min_height_leaf = self.get_min_height_leaf()?; - let NodeSpecific::Leaf { key, .. } = min_height_leaf.node.specific else { - panic!() - }; - self.insert_from_leaf(self.key_to_index[&key], indexes[0], &Side::Left)?; + self.insert_from_leaf( + self.key_to_index[&min_height_leaf.key], + indexes[0], + &Side::Left, + )?; }; Ok(()) @@ -809,12 +783,14 @@ impl MerkleBlob { } let new_internal_node_index = self.get_new_index(); - let old_leaf = self.get_node(old_leaf_index)?; + let Node::Leaf(old_leaf) = self.get_node(old_leaf_index)? else { + panic!(); + }; let new_node = self.get_node(new_index)?; let new_stuff = Stuff { index: new_index, - hash: new_node.hash, + hash: new_node.hash(), }; let old_stuff = Stuff { index: old_leaf_index, @@ -831,14 +807,12 @@ impl MerkleBlob { node_type: NodeType::Internal, dirty: false, }, - node: Node { + node: Node::Internal(InternalNode { parent: old_leaf.parent, hash: internal_node_hash, - specific: NodeSpecific::Internal { - left: left.index, - right: right.index, - }, - }, + left: left.index, + right: right.index, + }), }; self.insert_entry_to_blob(new_internal_node_index, &block)?; self.update_parent(new_index, Some(new_internal_node_index))?; @@ -849,15 +823,10 @@ impl MerkleBlob { }; let mut parent = self.get_block(old_leaf_parent)?; - if let NodeSpecific::Internal { - ref mut left, - ref mut right, - .. - } = parent.node.specific - { + if let Node::Internal(ref mut internal) = parent.node { match old_leaf_index { - x if x == *left => *left = new_internal_node_index, - x if x == *right => *right = new_internal_node_index, + x if x == internal.left => internal.left = new_internal_node_index, + x if x == internal.right => internal.right = new_internal_node_index, _ => panic!("parent not a child a grandparent"), } } else { @@ -869,19 +838,21 @@ impl MerkleBlob { Ok(()) } - fn get_min_height_leaf(&self) -> Result { - MerkleBlobBreadthFirstIterator::new(&self.blob) + fn get_min_height_leaf(&self) -> Result { + let block = MerkleBlobBreadthFirstIterator::new(&self.blob) .next() - .ok_or(Error::UnableToFindALeaf) + .ok_or(Error::UnableToFindALeaf)?; + + match block.node { + Node::Leaf(node) => Ok(node), + Node::Internal(node) => panic!("unexpectedly found internal node first: {node:?}"), + } } pub fn delete(&mut self, key: KvId) -> Result<(), Error> { let leaf_index = *self.key_to_index.get(&key).ok_or(Error::UnknownKey(key))?; - let leaf = self.get_node(leaf_index)?; - // TODO: maybe some common way to indicate/perform sanity double checks? - // maybe this disappears with unit variants and structs for the data - let NodeSpecific::Leaf { .. } = leaf.specific else { + let Node::Leaf(leaf) = self.get_node(leaf_index)? else { panic!("key to index cache resulted in internal node") }; self.key_to_index.remove(&key); @@ -892,16 +863,19 @@ impl MerkleBlob { }; self.free_indexes.insert(leaf_index); - let parent = self.get_node(parent_index)?; - let sibling_index = parent.specific.sibling_index(leaf_index); + let maybe_parent = self.get_node(parent_index)?; + let Node::Internal(parent) = maybe_parent else { + panic!("parent node not internal: {maybe_parent:?}") + }; + let sibling_index = parent.sibling_index(leaf_index); let mut sibling_block = self.get_block(sibling_index)?; let Some(grandparent_index) = parent.parent else { - sibling_block.node.parent = None; + sibling_block.node.set_parent(None); self.insert_entry_to_blob(TreeIndex(0), &sibling_block)?; - if let NodeSpecific::Internal { left, right } = sibling_block.node.specific { - for child_index in [left, right] { + if let Node::Internal(node) = sibling_block.node { + for child_index in [node.left, node.right] { self.update_parent(child_index, Some(TreeIndex(0)))?; } }; @@ -914,18 +888,13 @@ impl MerkleBlob { self.free_indexes.insert(parent_index); let mut grandparent_block = self.get_block(grandparent_index)?; - sibling_block.node.parent = Some(grandparent_index); + sibling_block.node.set_parent(Some(grandparent_index)); self.insert_entry_to_blob(sibling_index, &sibling_block)?; - if let NodeSpecific::Internal { - ref mut left, - ref mut right, - .. - } = grandparent_block.node.specific - { + if let Node::Internal(ref mut internal) = grandparent_block.node { match parent_index { - x if x == *left => *left = sibling_index, - x if x == *right => *right = sibling_index, + x if x == internal.left => internal.left = sibling_index, + x if x == internal.right => internal.right = sibling_index, _ => panic!("parent not a child a grandparent"), } } else { @@ -945,19 +914,15 @@ impl MerkleBlob { }; let mut block = self.get_block(*leaf_index)?; - if let NodeSpecific::Leaf { - value: ref mut inplace_value, - .. - } = block.node.specific - { - block.node.hash.clone_from(new_hash); - *inplace_value = value; + if let Node::Leaf(ref mut leaf) = block.node { + leaf.hash.clone_from(new_hash); + leaf.value = value; } else { panic!("expected internal node but found leaf"); } self.insert_entry_to_blob(*leaf_index, &block)?; - if let Some(parent) = block.node.parent { + if let Some(parent) = block.node.parent() { self.mark_lineage_as_dirty(parent)?; } @@ -970,21 +935,22 @@ impl MerkleBlob { let mut child_to_parent: HashMap = HashMap::new(); for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob) { - if let Some(parent) = block.node.parent { + if let Some(parent) = block.node.parent() { assert_eq!(child_to_parent.remove(&index), Some(parent)); } - match block.node.specific { - NodeSpecific::Internal { left, right } => { + match block.node { + Node::Internal(node) => { internal_count += 1; - child_to_parent.insert(left, index); - child_to_parent.insert(right, index); + child_to_parent.insert(node.left, index); + child_to_parent.insert(node.right, index); } - NodeSpecific::Leaf { key, .. } => { + Node::Leaf(node) => { leaf_count += 1; let cached_index = self .key_to_index - .get(&key) - .ok_or(Error::IntegrityKeyNotInCache(key))?; + .get(&node.key) + .ok_or(Error::IntegrityKeyNotInCache(node.key))?; + let key = node.key; assert_eq!( *cached_index, index, "key to index cache for {key:?} should be {index:?} got: {cached_index:?}" @@ -1017,7 +983,7 @@ impl MerkleBlob { parent: Option, ) -> Result { let mut block = self.get_block(index)?; - block.node.parent = parent; + block.node.set_parent(parent); self.insert_entry_to_blob(index, &block)?; Ok(block) @@ -1035,7 +1001,7 @@ impl MerkleBlob { block.metadata.dirty = true; self.insert_entry_to_blob(this_index, &block)?; - next_index = block.node.parent; + next_index = block.node.parent(); } Ok(()) @@ -1079,15 +1045,19 @@ impl MerkleBlob { loop { for byte in &seed_bytes { for bit in 0..8 { - match node.specific { - NodeSpecific::Leaf { .. } => { + match node { + Node::Leaf { .. } => { return Ok(InsertLocation::Leaf { index: next_index, side, }) } - NodeSpecific::Internal { left, right, .. } => { - next_index = if byte & (1 << bit) != 0 { left } else { right }; + Node::Internal(internal) => { + next_index = if byte & (1 << bit) != 0 { + internal.left + } else { + internal.right + }; node = self.get_node(next_index)?; } } @@ -1128,19 +1098,16 @@ impl MerkleBlob { && old_block.metadata.node_type == NodeType::Leaf { // TODO: sort of repeating the leaf check above and below. smells a little - if let NodeSpecific::Leaf { - key: old_block_key, .. - } = old_block.node.specific - { - self.key_to_index.remove(&old_block_key); + if let Node::Leaf(old_node) = old_block.node { + self.key_to_index.remove(&old_node.key); }; }; self.blob[block_range(index)].copy_from_slice(&new_block_bytes); } } - if let NodeSpecific::Leaf { key, .. } = block.node.specific { - self.key_to_index.insert(key, index); + if let Node::Leaf(ref node) = block.node { + self.key_to_index.insert(node.key, index); }; self.free_indexes.take(&index); @@ -1156,7 +1123,7 @@ impl MerkleBlob { let block_bytes = self.get_block_bytes(index)?; let data_bytes: DataBytes = block_bytes[DATA_RANGE].try_into().unwrap(); - Ok(Node::hash_from_bytes(&data_bytes)) + Ok(hash_from_bytes(&data_bytes)) } fn get_block_bytes(&self, index: TreeIndex) -> Result { @@ -1175,9 +1142,7 @@ impl MerkleBlob { pub fn get_parent_index(&self, index: TreeIndex) -> Result { let block = self.get_block_bytes(index)?; - Ok(Node::parent_from_bytes( - block[DATA_RANGE].try_into().unwrap(), - )) + Ok(parent_from_bytes(block[DATA_RANGE].try_into().unwrap())) } pub fn get_lineage_with_indexes( @@ -1189,7 +1154,7 @@ impl MerkleBlob { while let Some(this_index) = next_index { let node = self.get_node(this_index)?; - next_index = node.parent; + next_index = node.parent(); lineage.push((index, node)); } @@ -1219,13 +1184,13 @@ impl MerkleBlob { .filter(|(_, block)| block.metadata.dirty) .collect::>() { - let NodeSpecific::Internal { left, right } = block.node.specific else { + let Node::Internal(ref leaf) = block.node else { panic!("leaves should not be dirty") }; // OPT: obviously inefficient to re-get/deserialize these blocks inside // an iteration that's already doing that - let left_hash = self.get_hash(left)?; - let right_hash = self.get_hash(right)?; + let left_hash = self.get_hash(leaf.left)?; + let right_hash = self.get_hash(leaf.right)?; block.update_hash(&left_hash, &right_hash); self.insert_entry_to_blob(index, &block)?; } @@ -1277,10 +1242,10 @@ impl MerkleBlob { fn get_key_value_map(&self) -> HashMap { let mut key_value = HashMap::new(); for (key, index) in &self.key_to_index { - let NodeSpecific::Leaf { value, .. } = self.get_node(*index).unwrap().specific else { + let Node::Leaf(leaf) = self.get_node(*index).unwrap() else { panic!() }; - key_value.insert(*key, value); + key_value.insert(*key, leaf.value); } key_value @@ -1295,16 +1260,14 @@ impl PartialEq for MerkleBlob { MerkleBlobLeftChildFirstIterator::new(&other.blob), ) { if (self_block.metadata.dirty || other_block.metadata.dirty) - || self_block.node.hash != other_block.node.hash + || self_block.node.hash() != other_block.node.hash() { return false; } - match self_block.node.specific { + match self_block.node { // NOTE: this is effectively checked by the controlled overall traversal - NodeSpecific::Internal { .. } => {} - NodeSpecific::Leaf { .. } => { - return self_block.node.specific == other_block.node.specific - } + Node::Internal(..) => {} + Node::Leaf(..) => return self_block.node == other_block.node, } } @@ -1446,7 +1409,7 @@ impl MerkleBlob { return Err(PyValueError::new_err("root hash is dirty")); } - Ok(Some(block.node.hash)) + Ok(Some(block.node.hash())) } #[pyo3(name = "batch_insert")] @@ -1508,9 +1471,9 @@ impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { let block_bytes: BlockBytes = self.blob[block_range(item.index)].try_into().unwrap(); let block = Block::from_bytes(block_bytes).unwrap(); - match block.node.specific { - NodeSpecific::Leaf { .. } => return Some((item.index, block)), - NodeSpecific::Internal { left, right } => { + match block.node { + Node::Leaf(..) => return Some((item.index, block)), + Node::Internal(ref node) => { if item.visited { return Some((item.index, block)); }; @@ -1521,11 +1484,11 @@ impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { }); self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { visited: false, - index: right, + index: node.right, }); self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { visited: false, - index: left, + index: node.left, }); } } @@ -1559,9 +1522,9 @@ impl Iterator for MerkleBlobParentFirstIterator<'_> { let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); let block = Block::from_bytes(block_bytes).unwrap(); - if let NodeSpecific::Internal { left, right } = block.node.specific { - self.deque.push_back(left); - self.deque.push_back(right); + if let Node::Internal(ref node) = block.node { + self.deque.push_back(node.left); + self.deque.push_back(node.right); } Some((index, block)) @@ -1596,11 +1559,11 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); let block = Block::from_bytes(block_bytes).unwrap(); - match block.node.specific { - NodeSpecific::Leaf { .. } => return Some(block), - NodeSpecific::Internal { left, right } => { - self.deque.push_back(left); - self.deque.push_back(right); + match block.node { + Node::Leaf(..) => return Some(block), + Node::Internal(node) => { + self.deque.push_back(node.left); + self.deque.push_back(node.right); } } } @@ -1705,7 +1668,7 @@ mod tests { } assert_eq!(lineage.len(), 2); let (_, last_node) = lineage.last().unwrap(); - assert_eq!(last_node.parent, None); + assert_eq!(last_node.parent(), None); } #[rstest] @@ -1844,18 +1807,15 @@ mod tests { let sibling = merkle_blob .get_node(merkle_blob.key_to_index[&last_key]) .unwrap(); - let parent = merkle_blob.get_node(sibling.parent.unwrap()).unwrap(); - let NodeSpecific::Internal { left, right } = parent.specific else { + let parent = merkle_blob.get_node(sibling.parent().unwrap()).unwrap(); + let Node::Internal(internal) = parent else { panic!() }; - let NodeSpecific::Leaf { key: left_key, .. } = merkle_blob.get_node(left).unwrap().specific - else { + let Node::Leaf(left) = merkle_blob.get_node(internal.left).unwrap() else { panic!() }; - let NodeSpecific::Leaf { key: right_key, .. } = - merkle_blob.get_node(right).unwrap().specific - else { + let Node::Leaf(right) = merkle_blob.get_node(internal.right).unwrap() else { panic!() }; @@ -1863,7 +1823,7 @@ mod tests { Side::Left => [pre_count as KvId + 1, pre_count as KvId], Side::Right => [pre_count as KvId, pre_count as KvId + 1], }; - assert_eq!([left_key, right_key], expected_keys); + assert_eq!([left.key, right.key], expected_keys); } #[test] @@ -1930,17 +1890,14 @@ mod tests { NodeMetadata::dirty_from_bytes([0, 2]).expect_err("invalid value should fail"); } - #[test] - #[should_panic(expected = "unable to get sibling index from a leaf")] - fn test_node_specific_sibling_index_panics_for_leaf() { - let leaf = NodeSpecific::Leaf { key: 0, value: 0 }; - leaf.sibling_index(TreeIndex(0)); - } - #[test] #[should_panic(expected = "index not a child: 2")] fn test_node_specific_sibling_index_panics_for_unknown_sibling() { - let node = NodeSpecific::Internal { + // TODO: this probably shouldn't be a panic? + // maybe depends if it is exported or private? + let node = InternalNode { + parent: None, + hash: sha256_num(0), left: TreeIndex(0), right: TreeIndex(1), }; @@ -1985,46 +1942,34 @@ mod tests { let before_blocks = MerkleBlobLeftChildFirstIterator::new(&small_blob.blob).collect::>(); let (key, index) = small_blob.key_to_index.iter().next().unwrap(); - let node = small_blob.get_node(*index).unwrap(); - let NodeSpecific::Leaf { - key: original_key, - value: original_value, - .. - } = node.specific - else { + let Node::Leaf(original) = small_blob.get_node(*index).unwrap() else { panic!() }; - let new_value = original_value + 1; + let new_value = original.value + 1; - small_blob.upsert(*key, new_value, &node.hash).unwrap(); + small_blob.upsert(*key, new_value, &original.hash).unwrap(); let after_blocks = MerkleBlobLeftChildFirstIterator::new(&small_blob.blob).collect::>(); assert_eq!(before_blocks.len(), after_blocks.len()); - for ((before_index, before), (after_index, after)) in zip(before_blocks, after_blocks) { - assert_eq!(before.node.parent, after.node.parent); + for ((before_index, before_block), (after_index, after_block)) in + zip(before_blocks, after_blocks) + { + assert_eq!(before_block.node.parent(), after_block.node.parent()); assert_eq!(before_index, after_index); - let NodeSpecific::Leaf { - key: before_key, - value: before_value, - } = before.node.specific - else { - assert_eq!(before.node.specific, after.node.specific); + let Node::Leaf(before) = before_block.node else { + // TODO: assert equality of the left and right in the internal case continue; }; - let NodeSpecific::Leaf { - key: after_key, - value: after_value, - } = after.node.specific - else { + let Node::Leaf(after) = after_block.node else { panic!() }; - assert_eq!(before_key, after_key); - if before_key == original_key { - assert_eq!(after_value, new_value); + assert_eq!(before.key, after.key); + if before.key == original.key { + assert_eq!(after.value, new_value); } else { - assert_eq!(before_value, after_value); + assert_eq!(before.value, after.value); } } } diff --git a/crates/chia-datalayer/src/merkle/dot.rs b/crates/chia-datalayer/src/merkle/dot.rs index 09ea70c0b..72b75f201 100644 --- a/crates/chia-datalayer/src/merkle/dot.rs +++ b/crates/chia-datalayer/src/merkle/dot.rs @@ -1,4 +1,6 @@ -use crate::merkle::{MerkleBlob, MerkleBlobLeftChildFirstIterator, Node, NodeSpecific, TreeIndex}; +use crate::merkle::{ + InternalNode, LeafNode, MerkleBlob, MerkleBlobLeftChildFirstIterator, Node, TreeIndex, +}; use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use url::Url; @@ -53,13 +55,13 @@ impl DotLines { impl Node { pub fn to_dot(&self, index: TreeIndex) -> DotLines { // TODO: can this be done without introducing a blank line? - let node_to_parent = match self.parent { + let node_to_parent = match self.parent() { Some(parent) => format!("node_{index} -> node_{parent};"), None => String::new(), }; - match self.specific { - NodeSpecific::Internal {left, right} => DotLines{ + match self { + Node::Internal ( InternalNode {left, right, ..}) => DotLines{ nodes: vec![ format!("node_{index} [label=\"{index}\"]"), ], @@ -73,7 +75,7 @@ impl Node { ], note: String::new(), }, - NodeSpecific::Leaf {key, value} => DotLines{ + Node::Leaf (LeafNode{key, value, ..}) => DotLines{ nodes: vec![ format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), ], diff --git a/wheel/src/api.rs b/wheel/src/api.rs index 91eab80ff..ffe7161a3 100644 --- a/wheel/src/api.rs +++ b/wheel/src/api.rs @@ -75,7 +75,7 @@ use chia_bls::{ Signature, }; -use chia_datalayer::{MerkleBlob, Node}; +use chia_datalayer::{InternalNode, LeafNode, MerkleBlob}; #[pyfunction] pub fn compute_merkle_set_root<'p>( @@ -479,7 +479,8 @@ pub fn chia_rs(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; // m.add_class::()?; // m.add_class::()?; - m.add_class::()?; + m.add_class::()?; + m.add_class::()?; // merkle tree m.add_class::()?; From e6833f2dadacb255c53013061c996dacb7c6edeb Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 6 Nov 2024 20:29:43 -0500 Subject: [PATCH 129/181] stub --- wheel/generate_type_stubs.py | 21 +++++++++++++-------- wheel/python/chia_rs/chia_rs.pyi | 21 +++++++++++++-------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index 443830ed0..a090b1524 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -390,22 +390,27 @@ def derive_child_sk_unhardened(sk: PrivateKey, index: int) -> PrivateKey: ... @staticmethod def derive_child_pk_unhardened(pk: G1Element, index: int) -> G1Element: ... + @final -class Node: +class InternalNode: @property def parent(self) -> Optional[uint32]: ... @property def hash(self) -> bytes: ... - # TODO: this all needs reviewed and tidied - @property - def specific(self) -> Union: ... - @property def left(self) -> uint32: ... @property def right(self) -> uint32: ... + +@final +class LeafNode: + @property + def parent(self) -> Optional[uint32]: ... + @property + def hash(self) -> bytes: ... + @property def key(self) -> int64: ... @property @@ -428,10 +433,10 @@ def __init__( def insert(self, key: int64, value: int64, hash: bytes32, reference_kid: Optional[int64] = None, side: Optional[uint8] = None) -> None: ... def delete(self, key: int64) -> None: ... - def get_raw_node(self, index: uint32) -> Node: ... + def get_raw_node(self, index: uint32) -> Union[InternalNode, LeafNode]: ... def calculate_lazy_hashes(self) -> None: ... - def get_lineage_with_indexes(self, index: uint32) -> list[tuple[uint32, Node]]:... - def get_nodes_with_indexes(self) -> list[Node]: ... + def get_lineage_with_indexes(self, index: uint32) -> list[tuple[uint32, Union[InternalNode, LeafNode]]]:... + def get_nodes_with_indexes(self) -> list[Union[InternalNode, LeafNode]]: ... def empty(self) -> bool: ... def get_root_hash(self) -> bytes32: ... def batch_insert(self, keys_values: list[tuple[int64, int64]], hashes: list[bytes32]): ... diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index 317552558..7b05716d2 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -121,22 +121,27 @@ class AugSchemeMPL: @staticmethod def derive_child_pk_unhardened(pk: G1Element, index: int) -> G1Element: ... + @final -class Node: +class InternalNode: @property def parent(self) -> Optional[uint32]: ... @property def hash(self) -> bytes: ... - # TODO: this all needs reviewed and tidied - @property - def specific(self) -> Union: ... - @property def left(self) -> uint32: ... @property def right(self) -> uint32: ... + +@final +class LeafNode: + @property + def parent(self) -> Optional[uint32]: ... + @property + def hash(self) -> bytes: ... + @property def key(self) -> int64: ... @property @@ -159,10 +164,10 @@ class MerkleBlob: def insert(self, key: int64, value: int64, hash: bytes32, reference_kid: Optional[int64] = None, side: Optional[uint8] = None) -> None: ... def delete(self, key: int64) -> None: ... - def get_raw_node(self, index: uint32) -> Node: ... + def get_raw_node(self, index: uint32) -> Union[InternalNode, LeafNode]: ... def calculate_lazy_hashes(self) -> None: ... - def get_lineage_with_indexes(self, index: uint32) -> list[tuple[uint32, Node]]:... - def get_nodes_with_indexes(self) -> list[Node]: ... + def get_lineage_with_indexes(self, index: uint32) -> list[tuple[uint32, Union[InternalNode, LeafNode]]]:... + def get_nodes_with_indexes(self) -> list[Union[InternalNode, LeafNode]]: ... def empty(self) -> bool: ... def get_root_hash(self) -> bytes32: ... def batch_insert(self, keys_values: list[tuple[int64, int64]], hashes: list[bytes32]): ... From bf2622d4b9f7956fe27f57667da0cde4bff6de48 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 7 Nov 2024 08:46:17 -0500 Subject: [PATCH 130/181] tidy --- crates/chia-datalayer/src/merkle.rs | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 91e950143..83ddecb4e 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -344,6 +344,13 @@ impl Node { } } + fn set_hash(&mut self, hash: &Hash) { + match self { + Node::Internal(ref mut node) => node.hash = *hash, + Node::Leaf(ref mut node) => node.hash = *hash, + } + } + pub fn from_bytes(metadata: &NodeMetadata, blob: &DataBytes) -> Result { Ok(match metadata.node_type { NodeType::Internal => Node::Internal(InternalNode::from_bytes(blob)?), @@ -400,11 +407,7 @@ impl Block { } pub fn update_hash(&mut self, left: &Hash, right: &Hash) { - let hash = internal_hash(left, right); - match self.node { - Node::Internal(ref mut node) => node.hash = hash, - Node::Leaf(ref mut node) => node.hash = hash, - } + self.node.set_hash(&internal_hash(left, right)); self.metadata.dirty = false; } } @@ -528,9 +531,9 @@ impl MerkleBlob { }, node: Node::Leaf(LeafNode { parent: None, - hash: *hash, key, value, + hash: *hash, }), }; @@ -1958,9 +1961,16 @@ mod tests { { assert_eq!(before_block.node.parent(), after_block.node.parent()); assert_eq!(before_index, after_index); - let Node::Leaf(before) = before_block.node else { - // TODO: assert equality of the left and right in the internal case - continue; + let before: LeafNode = match before_block.node { + Node::Leaf(leaf) => leaf, + Node::Internal(internal) => { + let Node::Internal(after) = after_block.node else { + panic!() + }; + assert_eq!(internal.left, after.left); + assert_eq!(internal.right, after.right); + continue; + } }; let Node::Leaf(after) = after_block.node else { panic!() From 5d64a2bf5de13a4a38901f4fc15c10681b6373b3 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 7 Nov 2024 09:21:00 -0500 Subject: [PATCH 131/181] `.expect_leaf()` --- crates/chia-datalayer/src/merkle.rs | 68 +++++++++++++++++------------ 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 83ddecb4e..48113c2e5 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -364,6 +364,15 @@ impl Node { Node::Leaf(node) => node.to_bytes(), } } + + fn expect_leaf(self, message: &str) -> LeafNode { + let Node::Leaf(leaf) = self else { + let message = message.replace("<>", &format!("{self:?}")); + panic!("{}", message) + }; + + leaf + } } #[cfg(feature = "py-bindings")] @@ -496,9 +505,9 @@ impl MerkleBlob { self.insert_first(key, value, hash)?; } InsertLocation::Leaf { index, side } => { - let Node::Leaf(old_leaf) = self.get_node(index)? else { - panic!("requested insertion at leaf but found internal node") - }; + let old_leaf = self + .get_node(index)? + .expect_leaf("requested insertion at leaf but found internal node"); let internal_node_hash = match side { Side::Left => internal_hash(hash, &old_leaf.hash), @@ -757,19 +766,15 @@ impl MerkleBlob { if indexes.len() == 1 { // OPT: can we avoid this extra min height leaf traversal? let min_height_leaf = self.get_min_height_leaf()?; - self.insert_from_leaf( - self.key_to_index[&min_height_leaf.key], - indexes[0], - &Side::Left, - )?; + self.insert_from_key(min_height_leaf.key, indexes[0], &Side::Left)?; }; Ok(()) } - fn insert_from_leaf( + fn insert_from_key( &mut self, - old_leaf_index: TreeIndex, + old_leaf_key: KvId, new_index: TreeIndex, side: &Side, ) -> Result<(), Error> { @@ -786,9 +791,7 @@ impl MerkleBlob { } let new_internal_node_index = self.get_new_index(); - let Node::Leaf(old_leaf) = self.get_node(old_leaf_index)? else { - panic!(); - }; + let (old_leaf_index, old_leaf) = self.get_leaf_by_key(old_leaf_key)?; let new_node = self.get_node(new_index)?; let new_stuff = Stuff { @@ -855,9 +858,9 @@ impl MerkleBlob { pub fn delete(&mut self, key: KvId) -> Result<(), Error> { let leaf_index = *self.key_to_index.get(&key).ok_or(Error::UnknownKey(key))?; - let Node::Leaf(leaf) = self.get_node(leaf_index)? else { - panic!("key to index cache resulted in internal node") - }; + let leaf = self + .get_node(leaf_index)? + .expect_leaf("key to index cache resulted in internal node"); self.key_to_index.remove(&key); let Some(parent_index) = leaf.parent else { @@ -1142,6 +1145,15 @@ impl MerkleBlob { Ok(self.get_block(index)?.node) } + pub fn get_leaf_by_key(&self, key: KvId) -> Result<(TreeIndex, LeafNode), Error> { + let index = self.key_to_index[&key]; + let leaf = self + .get_node(index)? + .expect_leaf("expected leaf for index from key cache: {index} -> <>"); + + Ok((index, leaf)) + } + pub fn get_parent_index(&self, index: TreeIndex) -> Result { let block = self.get_block_bytes(index)?; @@ -1241,13 +1253,13 @@ impl MerkleBlob { // Ok(()) // } + // TODO: really this is test, not unused #[allow(unused)] fn get_key_value_map(&self) -> HashMap { let mut key_value = HashMap::new(); for (key, index) in &self.key_to_index { - let Node::Leaf(leaf) = self.get_node(*index).unwrap() else { - panic!() - }; + // silly waste of having the index, but test code and type narrowing so, ok i guess + let (_, leaf) = self.get_leaf_by_key(*key).unwrap(); key_value.insert(*key, leaf.value); } @@ -1815,12 +1827,14 @@ mod tests { panic!() }; - let Node::Leaf(left) = merkle_blob.get_node(internal.left).unwrap() else { - panic!() - }; - let Node::Leaf(right) = merkle_blob.get_node(internal.right).unwrap() else { - panic!() - }; + let left = merkle_blob + .get_node(internal.left) + .unwrap() + .expect_leaf("<>"); + let right = merkle_blob + .get_node(internal.right) + .unwrap() + .expect_leaf("<>"); let expected_keys: [KvId; 2] = match side { Side::Left => [pre_count as KvId + 1, pre_count as KvId], @@ -1945,9 +1959,7 @@ mod tests { let before_blocks = MerkleBlobLeftChildFirstIterator::new(&small_blob.blob).collect::>(); let (key, index) = small_blob.key_to_index.iter().next().unwrap(); - let Node::Leaf(original) = small_blob.get_node(*index).unwrap() else { - panic!() - }; + let original = small_blob.get_node(*index).unwrap().expect_leaf("<>"); let new_value = original.value + 1; small_blob.upsert(*key, new_value, &original.hash).unwrap(); From efecf1b9b358290181be465cbb7905cd2e146aea Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 7 Nov 2024 09:30:40 -0500 Subject: [PATCH 132/181] touchup --- crates/chia-datalayer/src/merkle.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 48113c2e5..167fbe6a6 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -507,7 +507,7 @@ impl MerkleBlob { InsertLocation::Leaf { index, side } => { let old_leaf = self .get_node(index)? - .expect_leaf("requested insertion at leaf but found internal node"); + .expect_leaf("requested insertion at leaf but found internal node: <>"); let internal_node_hash = match side { Side::Left => internal_hash(hash, &old_leaf.hash), @@ -856,11 +856,7 @@ impl MerkleBlob { } pub fn delete(&mut self, key: KvId) -> Result<(), Error> { - let leaf_index = *self.key_to_index.get(&key).ok_or(Error::UnknownKey(key))?; - - let leaf = self - .get_node(leaf_index)? - .expect_leaf("key to index cache resulted in internal node"); + let (leaf_index, leaf) = self.get_leaf_by_key(key)?; self.key_to_index.remove(&key); let Some(parent_index) = leaf.parent else { @@ -1146,7 +1142,7 @@ impl MerkleBlob { } pub fn get_leaf_by_key(&self, key: KvId) -> Result<(TreeIndex, LeafNode), Error> { - let index = self.key_to_index[&key]; + let index = *self.key_to_index.get(&key).ok_or(Error::UnknownKey(key))?; let leaf = self .get_node(index)? .expect_leaf("expected leaf for index from key cache: {index} -> <>"); From caacd6b4d157f3e29a756dd2b31eb121465ba87c Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 7 Nov 2024 10:03:10 -0500 Subject: [PATCH 133/181] add .try_into_leaf() --- crates/chia-datalayer/src/merkle.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 167fbe6a6..8fb0f02db 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -86,6 +86,9 @@ pub enum Error { #[error("block index out of range: {0:?}")] BlockIndexOutOfRange(TreeIndex), + + #[error("node not a leaf: {0:?}")] + NodeNotALeaf(InternalNode), } // assumptions @@ -373,6 +376,13 @@ impl Node { leaf } + + fn try_into_leaf(self) -> Result { + match self { + Node::Leaf(leaf) => Ok(leaf), + Node::Internal(internal) => Err(Error::NodeNotALeaf(internal)), + } + } } #[cfg(feature = "py-bindings")] @@ -505,9 +515,7 @@ impl MerkleBlob { self.insert_first(key, value, hash)?; } InsertLocation::Leaf { index, side } => { - let old_leaf = self - .get_node(index)? - .expect_leaf("requested insertion at leaf but found internal node: <>"); + let old_leaf = self.get_node(index)?.try_into_leaf()?; let internal_node_hash = match side { Side::Left => internal_hash(hash, &old_leaf.hash), From 466a0c97fa44d5dcd0fe7f57213723e767f60519 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Thu, 7 Nov 2024 11:29:00 -0500 Subject: [PATCH 134/181] cleanup --- crates/chia-datalayer/src/merkle.rs | 35 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 8fb0f02db..01abbc5fa 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -247,7 +247,7 @@ fn hash_from_bytes(blob: &DataBytes) -> Hash { } #[cfg_attr(feature = "py-bindings", pyclass(name = "InternalNode", get_all))] -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct InternalNode { parent: Parent, hash: Hash, @@ -288,7 +288,7 @@ impl InternalNode { } #[cfg_attr(feature = "py-bindings", pyclass(name = "LeafNode", get_all))] -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct LeafNode { parent: Parent, hash: Hash, @@ -319,7 +319,7 @@ impl LeafNode { } } -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub enum Node { Internal(InternalNode), Leaf(LeafNode), @@ -857,10 +857,9 @@ impl MerkleBlob { .next() .ok_or(Error::UnableToFindALeaf)?; - match block.node { - Node::Leaf(node) => Ok(node), - Node::Internal(node) => panic!("unexpectedly found internal node first: {node:?}"), - } + Ok(block + .node + .expect_leaf("unexpectedly found internal node first: <>")) } pub fn delete(&mut self, key: KvId) -> Result<(), Error> { @@ -924,12 +923,13 @@ impl MerkleBlob { }; let mut block = self.get_block(*leaf_index)?; - if let Node::Leaf(ref mut leaf) = block.node { - leaf.hash.clone_from(new_hash); - leaf.value = value; - } else { - panic!("expected internal node but found leaf"); - } + // TODO: repeated message + let mut leaf = block.node.clone().expect_leaf(&format!( + "expected leaf for index from key cache: {leaf_index} -> <>" + )); + leaf.hash.clone_from(new_hash); + leaf.value = value; + block.node = Node::Leaf(leaf); self.insert_entry_to_blob(*leaf_index, &block)?; if let Some(parent) = block.node.parent() { @@ -1151,9 +1151,9 @@ impl MerkleBlob { pub fn get_leaf_by_key(&self, key: KvId) -> Result<(TreeIndex, LeafNode), Error> { let index = *self.key_to_index.get(&key).ok_or(Error::UnknownKey(key))?; - let leaf = self - .get_node(index)? - .expect_leaf("expected leaf for index from key cache: {index} -> <>"); + let leaf = self.get_node(index)?.expect_leaf(&format!( + "expected leaf for index from key cache: {index} -> <>" + )); Ok((index, leaf)) } @@ -1901,9 +1901,8 @@ mod tests { #[test] fn test_node_type_from_u8_invalid() { let invalid_value = 2; - let expected = format!("unknown NodeType value: {invalid_value}"); let actual = NodeType::from_u8(invalid_value); - actual.expect_err(&expected); + actual.expect_err("invalid node type value should fail"); } #[test] From 34c0caf3805dc1c8fe7e2df0d3cdc9433651b220 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 12 Nov 2024 12:56:15 -0500 Subject: [PATCH 135/181] newtype for `KvId` --- crates/chia-datalayer/src/merkle.rs | 110 ++++++++++++++++++---------- 1 file changed, 71 insertions(+), 39 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 01abbc5fa..751b7580f 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -42,10 +42,39 @@ impl std::fmt::Display for TreeIndex { type Parent = Option; type Hash = [u8; 32]; +type KvIdBytes = [u8; size_of::()]; // key and value ids are provided from outside of this code and are implemented as // the row id from sqlite which is a signed 8 byte integer. the actually key and // value data bytes will not be handled within this code, only outside. -type KvId = i64; +#[cfg_attr(feature = "py-bindings", derive(FromPyObject), pyo3(transparent))] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct KvId(i64); + +impl KvId { + #[allow(clippy::unnecessary_wraps)] + pub fn from_bytes(blob: KvIdBytes) -> Result { + Ok(Self(i64::from_be_bytes(blob))) + } + + // TODO: consider the self convention more compared with other cases + #[allow(clippy::trivially_copy_pass_by_ref, clippy::wrong_self_convention)] + pub fn to_bytes(&self) -> KvIdBytes { + self.0.to_be_bytes() + } +} + +#[cfg(feature = "py-bindings")] +impl IntoPy for KvId { + fn into_py(self, py: Python<'_>) -> PyObject { + self.0.into_py(py) + } +} + +impl std::fmt::Display for KvId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} #[derive(Debug, Error)] pub enum Error { @@ -302,8 +331,8 @@ impl LeafNode { Ok(Self { parent: parent_from_bytes(blob), hash: hash_from_bytes(blob), - key: KvId::from_be_bytes(blob[KEY_RANGE].try_into().unwrap()), - value: KvId::from_be_bytes(blob[VALUE_RANGE].try_into().unwrap()), + key: KvId::from_bytes(blob[KEY_RANGE].try_into().unwrap())?, + value: KvId::from_bytes(blob[VALUE_RANGE].try_into().unwrap())?, }) } @@ -312,8 +341,8 @@ impl LeafNode { let parent_integer = self.parent.unwrap_or(NULL_PARENT); blob[HASH_RANGE].copy_from_slice(&self.hash); blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_bytes()); - blob[KEY_RANGE].copy_from_slice(&self.key.to_be_bytes()); - blob[VALUE_RANGE].copy_from_slice(&self.value.to_be_bytes()); + blob[KEY_RANGE].copy_from_slice(&self.key.to_bytes()); + blob[VALUE_RANGE].copy_from_slice(&self.value.to_bytes()); blob } @@ -1079,7 +1108,7 @@ impl MerkleBlob { } fn get_random_insert_location_by_kvid(&self, seed: KvId) -> Result { - let seed = sha256_num(seed); + let seed = sha256_num(seed.0); self.get_random_insert_location_by_seed(&seed) } @@ -1661,16 +1690,16 @@ mod tests { let mut blob = MerkleBlob::new(vec![]).unwrap(); blob.insert( - 0x0001_0203_0405_0607, - 0x1011_1213_1415_1617, + KvId(0x0001_0203_0405_0607), + KvId(0x1011_1213_1415_1617), &sha256_num(0x1020), InsertLocation::Auto {}, ) .unwrap(); blob.insert( - 0x2021_2223_2425_2627, - 0x3031_3233_3435_3637, + KvId(0x2021_2223_2425_2627), + KvId(0x3031_3233_3435_3637), &sha256_num(0x2030), InsertLocation::Auto {}, ) @@ -1722,12 +1751,12 @@ mod tests { let mut total_time = Duration::new(0, 0); let count = 10_000; - let m: KvId = count * n; + let m = count * n; for i in m..(m + count) { let start = Instant::now(); merkle_blob // NOTE: yeah this hash is garbage - .insert(i, i, &sha256_num(i), InsertLocation::Auto {}) + .insert(KvId(i), KvId(i), &sha256_num(i), InsertLocation::Auto {}) .unwrap(); let end = Instant::now(); total_time += end.duration_since(start); @@ -1747,10 +1776,10 @@ mod tests { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); let mut reference_blobs = vec![]; - let key_value_ids: [KvId; COUNT] = core::array::from_fn(|i| i as KvId); + let key_value_ids: [KvId; COUNT] = core::array::from_fn(|i| KvId(i as i64)); for key_value_id in key_value_ids { - let hash: Hash = sha256_num(key_value_id); + let hash: Hash = sha256_num(key_value_id.0); println!("inserting: {key_value_id}"); merkle_blob.calculate_lazy_hashes().unwrap(); @@ -1767,7 +1796,7 @@ mod tests { println!("deleting: {key_value_id}"); merkle_blob.delete(*key_value_id).unwrap(); merkle_blob.calculate_lazy_hashes().unwrap(); - assert_eq!(merkle_blob, reference_blobs[*key_value_id as usize]); + assert_eq!(merkle_blob, reference_blobs[key_value_id.0 as usize]); dots.push(merkle_blob.to_dot().dump()); } } @@ -1776,13 +1805,13 @@ mod tests { fn test_insert_first() { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - let key_value_id: KvId = 1; + let key_value_id = KvId(1); open_dot(merkle_blob.to_dot().set_note("empty")); merkle_blob .insert( key_value_id, key_value_id, - &sha256_num(key_value_id), + &sha256_num(key_value_id.0), InsertLocation::Auto {}, ) .unwrap(); @@ -1798,23 +1827,23 @@ mod tests { ) { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - let mut last_key: KvId = 0; + let mut last_key: KvId = KvId(0); for i in 1..=pre_count { - let key: KvId = i as KvId; + let key = KvId(i as i64); open_dot(merkle_blob.to_dot().set_note("empty")); merkle_blob - .insert(key, key, &sha256_num(key), InsertLocation::Auto {}) + .insert(key, key, &sha256_num(key.0), InsertLocation::Auto {}) .unwrap(); last_key = key; } - let key_value_id: KvId = pre_count as KvId + 1; + let key_value_id: KvId = KvId((pre_count + 1) as i64); open_dot(merkle_blob.to_dot().set_note("first after")); merkle_blob .insert( key_value_id, key_value_id, - &sha256_num(key_value_id), + &sha256_num(key_value_id.0), InsertLocation::Leaf { index: merkle_blob.key_to_index[&last_key], side: side.clone(), @@ -1841,8 +1870,8 @@ mod tests { .expect_leaf("<>"); let expected_keys: [KvId; 2] = match side { - Side::Left => [pre_count as KvId + 1, pre_count as KvId], - Side::Right => [pre_count as KvId, pre_count as KvId + 1], + Side::Left => [KvId(pre_count as i64 + 1), KvId(pre_count as i64)], + Side::Right => [KvId(pre_count as i64), KvId(pre_count as i64 + 1)], }; assert_eq!([left.key, right.key], expected_keys); } @@ -1851,13 +1880,13 @@ mod tests { fn test_delete_last() { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); - let key_value_id: KvId = 1; + let key_value_id = KvId(1); open_dot(merkle_blob.to_dot().set_note("empty")); merkle_blob .insert( key_value_id, key_value_id, - &sha256_num(key_value_id), + &sha256_num(key_value_id.0), InsertLocation::Auto {}, ) .unwrap(); @@ -1871,7 +1900,7 @@ mod tests { #[rstest] fn test_delete_frees_index(mut small_blob: MerkleBlob) { - let key = 0x0001_0203_0405_0607; + let key = KvId(0x0001_0203_0405_0607); let index = small_blob.key_to_index[&key]; small_blob.delete(key).unwrap(); @@ -1884,7 +1913,7 @@ mod tests { #[rstest] fn test_get_new_index_with_free_index(mut small_blob: MerkleBlob) { open_dot(small_blob.to_dot().set_note("initial")); - let key = 0x0001_0203_0405_0607; + let key = KvId(0x0001_0203_0405_0607); let _ = small_blob.key_to_index[&key]; small_blob.delete(key).unwrap(); open_dot(small_blob.to_dot().set_note("after delete")); @@ -1940,18 +1969,18 @@ mod tests { #[rstest] fn test_upsert_inserts(small_blob: MerkleBlob) { - let key = 1234; + let key = KvId(1234); assert!(!small_blob.key_to_index.contains_key(&key)); - let value = 5678; + let value = KvId(5678); let mut insert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); insert_blob - .insert(key, value, &sha256_num(key), InsertLocation::Auto {}) + .insert(key, value, &sha256_num(key.0), InsertLocation::Auto {}) .unwrap(); open_dot(insert_blob.to_dot().set_note("first after")); let mut upsert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); - upsert_blob.upsert(key, value, &sha256_num(key)).unwrap(); + upsert_blob.upsert(key, value, &sha256_num(key.0)).unwrap(); open_dot(upsert_blob.to_dot().set_note("first after")); assert_eq!(insert_blob.blob, upsert_blob.blob); @@ -1963,7 +1992,7 @@ mod tests { MerkleBlobLeftChildFirstIterator::new(&small_blob.blob).collect::>(); let (key, index) = small_blob.key_to_index.iter().next().unwrap(); let original = small_blob.get_node(*index).unwrap().expect_leaf("<>"); - let new_value = original.value + 1; + let new_value = KvId(original.value.0 + 1); small_blob.upsert(*key, new_value, &original.hash).unwrap(); @@ -2002,9 +2031,10 @@ mod tests { #[test] fn test_double_insert_fails() { let mut blob = MerkleBlob::new(vec![]).unwrap(); - blob.insert(0, 0, &[0u8; 32], InsertLocation::Auto {}) + let kv = KvId(0); + blob.insert(kv, kv, &[0u8; 32], InsertLocation::Auto {}) .unwrap(); - blob.insert(0, 0, &[0u8; 32], InsertLocation::Auto {}) + blob.insert(kv, kv, &[0u8; 32], InsertLocation::Auto {}) .expect_err(""); } @@ -2014,8 +2044,9 @@ mod tests { #[values(0, 1, 2, 8, 9)] count: usize, ) { let mut blob = MerkleBlob::new(vec![]).unwrap(); - for i in 0..pre_inserts as KvId { - blob.insert(i, i, &sha256_num(i), InsertLocation::Auto {}) + for i in 0..pre_inserts { + let i = KvId(i as i64); + blob.insert(i, i, &sha256_num(i.0), InsertLocation::Auto {}) .unwrap(); } open_dot(blob.to_dot().set_note("initial")); @@ -2023,8 +2054,9 @@ mod tests { let mut batch: Vec<((KvId, KvId), Hash)> = vec![]; let mut batch_map = HashMap::new(); - for i in pre_inserts as KvId..(pre_inserts + count) as KvId { - batch.push(((i, i), sha256_num(i))); + for i in pre_inserts..(pre_inserts + count) { + let i = KvId(i as i64); + batch.push(((i, i), sha256_num(i.0))); batch_map.insert(i, i); } From 0a194e8f0236eba5da523c5bd76257d535d0f6a9 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 12 Nov 2024 13:13:31 -0500 Subject: [PATCH 136/181] doc comment --- crates/chia-datalayer/src/merkle.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 751b7580f..f6d921f86 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -43,9 +43,9 @@ impl std::fmt::Display for TreeIndex { type Parent = Option; type Hash = [u8; 32]; type KvIdBytes = [u8; size_of::()]; -// key and value ids are provided from outside of this code and are implemented as -// the row id from sqlite which is a signed 8 byte integer. the actually key and -// value data bytes will not be handled within this code, only outside. +/// Key and value ids are provided from outside of this code and are implemented as +/// the row id from sqlite which is a signed 8 byte integer. The actual key and +/// value data bytes will not be handled within this code, only outside. #[cfg_attr(feature = "py-bindings", derive(FromPyObject), pyo3(transparent))] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct KvId(i64); From 825f7ac3c3ec2af506dbb9559ef9f380c924c053 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 12 Nov 2024 16:06:42 -0500 Subject: [PATCH 137/181] macro it --- crates/chia-datalayer/src/merkle.rs | 43 +++++++++++++++-------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index f6d921f86..767c394c5 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -158,31 +158,34 @@ type MetadataBytes = [u8; METADATA_SIZE]; type DataBytes = [u8; DATA_SIZE]; const DATA_RANGE: Range = METADATA_SIZE..METADATA_SIZE + DATA_SIZE; -#[derive(Clone, Debug, Hash, Eq, PartialEq)] -#[repr(u8)] -pub enum NodeType { - Internal = 0, - Leaf = 1, -} - -impl NodeType { - pub fn from_u8(value: u8) -> Result { - match value { - // ha! feel free to laugh at this - x if (NodeType::Internal as u8 == x) => Ok(NodeType::Internal), - x if (NodeType::Leaf as u8 == x) => Ok(NodeType::Leaf), - other => Err(Error::UnknownNodeTypeValue(other)), +macro_rules! u8_enum { + ($enum_name:ident, $error_name:expr, {$($variant_name:ident = $variant_value:literal),*}) => { + #[derive(Clone, Debug, Hash, Eq, PartialEq)] + #[repr(u8)] + pub enum $enum_name { + $( $variant_name = $variant_value, )* } - } - pub fn to_u8(&self) -> u8 { - match self { - NodeType::Internal => NodeType::Internal as u8, - NodeType::Leaf => NodeType::Leaf as u8, + impl $enum_name { + pub fn from_u8(value: u8) -> Result { + match value { + $( $variant_value => Ok(Self::$variant_name), )* + other => Err($error_name(other)), + } + } + + pub fn to_u8(&self) -> u8 { + match self { + $( Self::$variant_name => $variant_value, )* + } + } } - } + + }; } +u8_enum!(NodeType, Error::UnknownNodeTypeValue, {Internal = 0, Leaf = 1}); + #[allow(clippy::needless_pass_by_value)] fn sha256_num(input: T) -> Hash { let mut hasher = Sha256::new(); From 9ae331f811f8258619e5abcec16bc0e5e85ac336 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 12 Nov 2024 17:11:52 -0500 Subject: [PATCH 138/181] metadata uses streamable --- Cargo.lock | 3 + crates/chia-datalayer/Cargo.toml | 6 ++ crates/chia-datalayer/src/merkle.rs | 91 +++++++++-------------------- 3 files changed, 38 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c478ac27..93362c886 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -374,6 +374,9 @@ dependencies = [ name = "chia-datalayer" version = "0.1.0" dependencies = [ + "chia-sha2", + "chia-traits 0.15.0", + "chia_streamable_macro 0.15.0", "clvm-utils", "clvmr", "hex", diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index c00caedc4..d97e77fdb 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -22,6 +22,9 @@ clvmr = { workspace = true } num-traits = { workspace = true } pyo3 = { workspace = true, optional = true } thiserror = { workspace = true } +chia_streamable_macro = { workspace = true } +chia-traits = { workspace = true } +chia-sha2 = { workspace = true } [dev-dependencies] clvm-utils = { workspace = true } @@ -31,3 +34,6 @@ open = { workspace = true } percent-encoding = { workspace = true } rstest = { workspace = true } url = { workspace = true } + +[package.metadata.cargo-machete] +ignored = ["chia-sha2"] diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 767c394c5..d07ac9e9e 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -4,6 +4,8 @@ use pyo3::{ PyResult, Python, }; +use chia_streamable_macro::Streamable; +use chia_traits::Streamable; use clvmr::sha2::Sha256; use num_traits::ToBytes; use std::cmp::Ordering; @@ -133,7 +135,6 @@ const fn max(left: usize, right: usize) -> usize { // TODO: once not experimental... something closer to this // const fn max(left: T, right: T) -> T { if left < right {right} else {left} } -// TODO: consider in more detail other serialization tools such as serde and streamable // define the serialized block format const METADATA_RANGE: Range = 0..METADATA_SIZE; const TYPE_RANGE: Range = range_by_length(0, size_of::()); @@ -158,34 +159,24 @@ type MetadataBytes = [u8; METADATA_SIZE]; type DataBytes = [u8; DATA_SIZE]; const DATA_RANGE: Range = METADATA_SIZE..METADATA_SIZE + DATA_SIZE; -macro_rules! u8_enum { - ($enum_name:ident, $error_name:expr, {$($variant_name:ident = $variant_value:literal),*}) => { - #[derive(Clone, Debug, Hash, Eq, PartialEq)] - #[repr(u8)] - pub enum $enum_name { - $( $variant_name = $variant_value, )* - } - - impl $enum_name { - pub fn from_u8(value: u8) -> Result { - match value { - $( $variant_value => Ok(Self::$variant_name), )* - other => Err($error_name(other)), - } - } +#[repr(u8)] +#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] +pub enum NodeType { + Internal = 0, + Leaf = 1, +} - pub fn to_u8(&self) -> u8 { - match self { - $( Self::$variant_name => $variant_value, )* - } - } - } +impl NodeType { + pub fn from_u8(value: u8) -> Result { + Streamable::from_bytes(&[value]).map_err(|_| Error::ZeroLengthSeedNotAllowed) + } - }; + #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] + pub fn to_u8(&self) -> u8 { + Streamable::to_bytes(self).unwrap()[0] + } } -u8_enum!(NodeType, Error::UnknownNodeTypeValue, {Internal = 0, Leaf = 1}); - #[allow(clippy::needless_pass_by_value)] fn sha256_num(input: T) -> Hash { let mut hasher = Sha256::new(); @@ -230,7 +221,7 @@ pub enum InsertLocation { const NULL_PARENT: TreeIndex = TreeIndex(0xffff_ffffu32); -#[derive(Debug, PartialEq)] +#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] pub struct NodeMetadata { pub node_type: NodeType, pub dirty: bool, @@ -239,30 +230,18 @@ pub struct NodeMetadata { impl NodeMetadata { pub fn from_bytes(blob: MetadataBytes) -> Result { // OPT: could save 1-2% of tree space by packing (and maybe don't do that) - Ok(Self { - node_type: Self::node_type_from_bytes(blob)?, - dirty: Self::dirty_from_bytes(blob)?, - }) + // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed + Streamable::from_bytes(&blob).map_err(|_| Error::ZeroLengthSeedNotAllowed) } + #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] pub fn to_bytes(&self) -> MetadataBytes { - let mut bytes = [0u8; METADATA_SIZE]; - bytes[TYPE_RANGE].copy_from_slice(&[self.node_type.to_u8()]); - bytes[DIRTY_RANGE].copy_from_slice(&[u8::from(self.dirty)]); - - bytes - } - - pub fn node_type_from_bytes(blob: MetadataBytes) -> Result { - NodeType::from_u8(u8::from_be_bytes(blob[TYPE_RANGE].try_into().unwrap())) - } - - pub fn dirty_from_bytes(blob: MetadataBytes) -> Result { - match u8::from_be_bytes(blob[DIRTY_RANGE].try_into().unwrap()) { - 0 => Ok(false), - 1 => Ok(true), - other => Err(Error::UnknownDirtyValue(other)), - } + // TODO: stop panicking + Streamable::to_bytes(self) + .unwrap() + .as_slice() + .try_into() + .unwrap() } } @@ -386,6 +365,7 @@ impl Node { } } + #[allow(clippy::trivially_copy_pass_by_ref)] pub fn from_bytes(metadata: &NodeMetadata, blob: &DataBytes) -> Result { Ok(match metadata.node_type { NodeType::Internal => Node::Internal(InternalNode::from_bytes(blob)?), @@ -1647,11 +1627,8 @@ mod tests { assert_eq!(NodeType::Leaf as u8, 1); for node_type in [NodeType::Internal, NodeType::Leaf] { - assert_eq!(node_type.to_u8(), node_type.clone() as u8,); - assert_eq!( - NodeType::from_u8(node_type.clone() as u8).unwrap(), - node_type, - ); + assert_eq!(node_type.to_u8(), node_type as u8,); + assert_eq!(NodeType::from_u8(node_type as u8).unwrap(), node_type,); } } @@ -1681,11 +1658,6 @@ mod tests { let object = NodeMetadata::from_bytes(bytes).unwrap(); assert_eq!(object, NodeMetadata { node_type, dirty },); assert_eq!(object.to_bytes(), bytes); - assert_eq!( - NodeMetadata::node_type_from_bytes(bytes).unwrap(), - object.node_type - ); - assert_eq!(NodeMetadata::dirty_from_bytes(bytes).unwrap(), object.dirty); } #[fixture] @@ -1937,11 +1909,6 @@ mod tests { actual.expect_err("invalid node type value should fail"); } - #[test] - fn test_node_metadata_dirty_from_bytes_invalid() { - NodeMetadata::dirty_from_bytes([0, 2]).expect_err("invalid value should fail"); - } - #[test] #[should_panic(expected = "index not a child: 2")] fn test_node_specific_sibling_index_panics_for_unknown_sibling() { From 22599533dbbc64a062b1c2ca8a2b25a197392e21 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 12 Nov 2024 19:27:52 -0500 Subject: [PATCH 139/181] and... green --- Cargo.lock | 1 + crates/chia-datalayer/Cargo.toml | 1 + crates/chia-datalayer/src/merkle.rs | 206 +++++++++++++++------------ crates/chia-traits/src/streamable.rs | 8 ++ 4 files changed, 122 insertions(+), 94 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 93362c886..9eee18d43 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -374,6 +374,7 @@ dependencies = [ name = "chia-datalayer" version = "0.1.0" dependencies = [ + "chia-protocol", "chia-sha2", "chia-traits 0.15.0", "chia_streamable_macro 0.15.0", diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index d97e77fdb..2e6085095 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -25,6 +25,7 @@ thiserror = { workspace = true } chia_streamable_macro = { workspace = true } chia-traits = { workspace = true } chia-sha2 = { workspace = true } +chia-protocol = { workspace = true } [dev-dependencies] clvm-utils = { workspace = true } diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index d07ac9e9e..5ebf08ca8 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -4,6 +4,7 @@ use pyo3::{ PyResult, Python, }; +use chia_protocol::Bytes32; use chia_streamable_macro::Streamable; use chia_traits::Streamable; use clvmr::sha2::Sha256; @@ -16,17 +17,25 @@ use std::ops::Range; use thiserror::Error; #[cfg_attr(feature = "py-bindings", derive(FromPyObject), pyo3(transparent))] -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord)] pub struct TreeIndex(u32); impl TreeIndex { - fn from_bytes(bytes: &[u8]) -> Self { - Self(u32::from_be_bytes(bytes.try_into().unwrap())) - } + // fn from_bytes(bytes: &[u8]) -> Self { + // // OPT: could save 1-2% of tree space by packing (and maybe don't do that) + // // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed + // Streamable::from_bytes_ignore_extra_bytes(bytes).unwrap() + // } - fn to_bytes(self) -> [u8; 4] { - self.0.to_be_bytes() - } + // #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] + // fn to_bytes(self) -> [u8; 4] { + // // TODO: stop panicking + // Streamable::to_bytes(&self) + // .unwrap() + // .as_slice() + // .try_into() + // .unwrap() + // } } #[cfg(feature = "py-bindings")] @@ -43,25 +52,32 @@ impl std::fmt::Display for TreeIndex { } type Parent = Option; -type Hash = [u8; 32]; +type Hash = Bytes32; type KvIdBytes = [u8; size_of::()]; /// Key and value ids are provided from outside of this code and are implemented as /// the row id from sqlite which is a signed 8 byte integer. The actual key and /// value data bytes will not be handled within this code, only outside. #[cfg_attr(feature = "py-bindings", derive(FromPyObject), pyo3(transparent))] -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord)] pub struct KvId(i64); impl KvId { #[allow(clippy::unnecessary_wraps)] pub fn from_bytes(blob: KvIdBytes) -> Result { - Ok(Self(i64::from_be_bytes(blob))) + // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed + Streamable::from_bytes_ignore_extra_bytes(&blob) + .map_err(|_| Error::FailedLoadingNode("aaa".to_string())) } // TODO: consider the self convention more compared with other cases #[allow(clippy::trivially_copy_pass_by_ref, clippy::wrong_self_convention)] pub fn to_bytes(&self) -> KvIdBytes { - self.0.to_be_bytes() + // TODO: stop panicking + let mut x = Streamable::to_bytes(self).unwrap(); + for _ in x.len()..DATA_SIZE { + x.push(0); + } + x.as_slice().try_into().unwrap() } } @@ -126,33 +142,37 @@ pub enum Error { // - root is at index 0 // - any case with no keys will have a zero length blob -const fn range_by_length(start: usize, length: usize) -> Range { - start..start + length -} -const fn max(left: usize, right: usize) -> usize { - [left, right][(left < right) as usize] -} +// const fn range_by_length(start: usize, length: usize) -> Range { +// start..start + length +// } +// const fn max(left: usize, right: usize) -> usize { +// [left, right][(left < right) as usize] +// } // TODO: once not experimental... something closer to this // const fn max(left: T, right: T) -> T { if left < right {right} else {left} } // define the serialized block format const METADATA_RANGE: Range = 0..METADATA_SIZE; -const TYPE_RANGE: Range = range_by_length(0, size_of::()); -const DIRTY_RANGE: Range = range_by_length(TYPE_RANGE.end, size_of::()); -const METADATA_SIZE: usize = DIRTY_RANGE.end; +// const TYPE_RANGE: Range = range_by_length(0, size_of::()); +// const DIRTY_RANGE: Range = range_by_length(TYPE_RANGE.end, size_of::()); +// const METADATA_SIZE: usize = DIRTY_RANGE.end; +// TODO: figure out the real max +const METADATA_SIZE: usize = 2; // common fields -const HASH_RANGE: Range = range_by_length(0, size_of::()); +// const HASH_RANGE: Range = range_by_length(0, size_of::()); +// const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); // const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); -const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); // internal specific fields -const LEFT_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); -const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of::()); +// const LEFT_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); +// const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of::()); // leaf specific fields -const KEY_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); -const VALUE_RANGE: Range = range_by_length(KEY_RANGE.end, size_of::()); +// const KEY_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); +// const VALUE_RANGE: Range = range_by_length(KEY_RANGE.end, size_of::()); -const DATA_SIZE: usize = max(RIGHT_RANGE.end, VALUE_RANGE.end); +// const DATA_SIZE: usize = max(RIGHT_RANGE.end, VALUE_RANGE.end); +// TODO: figure out the real max +const DATA_SIZE: usize = 100; const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; type BlockBytes = [u8; BLOCK_SIZE]; type MetadataBytes = [u8; METADATA_SIZE]; @@ -168,7 +188,8 @@ pub enum NodeType { impl NodeType { pub fn from_u8(value: u8) -> Result { - Streamable::from_bytes(&[value]).map_err(|_| Error::ZeroLengthSeedNotAllowed) + Streamable::from_bytes_ignore_extra_bytes(&[value]) + .map_err(|_| Error::FailedLoadingNode("bbb".to_string())) } #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] @@ -182,14 +203,14 @@ fn sha256_num(input: T) -> Hash { let mut hasher = Sha256::new(); hasher.update(input.to_be_bytes()); - hasher.finalize() + hasher.finalize().into() } fn sha256_bytes(input: &[u8]) -> Hash { let mut hasher = Sha256::new(); hasher.update(input); - hasher.finalize() + hasher.finalize().into() } fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { @@ -198,7 +219,7 @@ fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { hasher.update(left_hash); hasher.update(right_hash); - hasher.finalize() + hasher.finalize().into() } #[cfg_attr(feature = "py-bindings", pyclass(name = "Side", eq, eq_int))] @@ -219,7 +240,7 @@ pub enum InsertLocation { Leaf { index: TreeIndex, side: Side }, } -const NULL_PARENT: TreeIndex = TreeIndex(0xffff_ffffu32); +// const NULL_PARENT: TreeIndex = TreeIndex(0xffff_ffffu32); #[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] pub struct NodeMetadata { @@ -231,34 +252,34 @@ impl NodeMetadata { pub fn from_bytes(blob: MetadataBytes) -> Result { // OPT: could save 1-2% of tree space by packing (and maybe don't do that) // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed - Streamable::from_bytes(&blob).map_err(|_| Error::ZeroLengthSeedNotAllowed) + Streamable::from_bytes_ignore_extra_bytes(&blob) + .map_err(|_| Error::FailedLoadingNode("ccc".to_string())) } #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] pub fn to_bytes(&self) -> MetadataBytes { // TODO: stop panicking - Streamable::to_bytes(self) - .unwrap() - .as_slice() + let mut x = Streamable::to_bytes(self).unwrap(); + for _ in x.len()..METADATA_SIZE { + x.push(0); + } + let l = x.len(); + x.as_slice() .try_into() - .unwrap() + .expect(format!("length is: {l} of {DATA_SIZE}").as_str()) } } -fn parent_from_bytes(blob: &DataBytes) -> Parent { - let parent_integer = TreeIndex::from_bytes(&blob[PARENT_RANGE]); - match parent_integer { - NULL_PARENT => None, - _ => Some(parent_integer), - } -} - -fn hash_from_bytes(blob: &DataBytes) -> Hash { - blob[HASH_RANGE].try_into().unwrap() -} +// fn parent_from_bytes(blob: &DataBytes) -> Parent { +// Node::from_bytes(blob).unwrap().parent() +// } +// +// fn hash_from_bytes(blob: &DataBytes) -> Hash { +// Node::from_bytes(blob).unwrap().hash() +// } #[cfg_attr(feature = "py-bindings", pyclass(name = "InternalNode", get_all))] -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] pub struct InternalNode { parent: Parent, hash: Hash, @@ -267,24 +288,23 @@ pub struct InternalNode { } impl InternalNode { - #[allow(clippy::unnecessary_wraps)] pub fn from_bytes(blob: &DataBytes) -> Result { - Ok(Self { - parent: parent_from_bytes(blob), - hash: hash_from_bytes(blob), - left: TreeIndex::from_bytes(&blob[LEFT_RANGE]), - right: TreeIndex::from_bytes(&blob[RIGHT_RANGE]), - }) + // OPT: could save 1-2% of tree space by packing (and maybe don't do that) + // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed + Streamable::from_bytes_ignore_extra_bytes(blob) + .map_err(|_| Error::FailedLoadingNode("ddd".to_string())) } - pub fn to_bytes(&self) -> DataBytes { - let mut blob: DataBytes = [0; DATA_SIZE]; - let parent_integer = self.parent.unwrap_or(NULL_PARENT); - blob[HASH_RANGE].copy_from_slice(&self.hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_bytes()); - blob[LEFT_RANGE].copy_from_slice(&self.left.to_bytes()); - blob[RIGHT_RANGE].copy_from_slice(&self.right.to_bytes()); - blob + #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] + pub fn to_bytes(&self) -> DataBytes { + // TODO: stop panicking + let mut x = Streamable::to_bytes(self).expect(format!("aaaaa = {DATA_SIZE}").as_str()); + for _ in x.len()..DATA_SIZE { + x.push(0); + } + x.as_slice() + .try_into() + .expect(format!("bbbbb = {DATA_SIZE}").as_str()) } pub fn sibling_index(&self, index: TreeIndex) -> TreeIndex { @@ -299,7 +319,7 @@ impl InternalNode { } #[cfg_attr(feature = "py-bindings", pyclass(name = "LeafNode", get_all))] -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] pub struct LeafNode { parent: Parent, hash: Hash, @@ -308,25 +328,25 @@ pub struct LeafNode { } impl LeafNode { - #[allow(clippy::unnecessary_wraps)] pub fn from_bytes(blob: &DataBytes) -> Result { - Ok(Self { - parent: parent_from_bytes(blob), - hash: hash_from_bytes(blob), - key: KvId::from_bytes(blob[KEY_RANGE].try_into().unwrap())?, - value: KvId::from_bytes(blob[VALUE_RANGE].try_into().unwrap())?, - }) + // OPT: could save 1-2% of tree space by packing (and maybe don't do that) + // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed + // TODO: more structured padding handling + Streamable::from_bytes_ignore_extra_bytes(blob) + .map_err(|e| Error::FailedLoadingNode(format!("eee {e}").to_string())) } + #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] pub fn to_bytes(&self) -> DataBytes { - let mut blob: DataBytes = [0; DATA_SIZE]; - let parent_integer = self.parent.unwrap_or(NULL_PARENT); - blob[HASH_RANGE].copy_from_slice(&self.hash); - blob[PARENT_RANGE].copy_from_slice(&parent_integer.to_bytes()); - blob[KEY_RANGE].copy_from_slice(&self.key.to_bytes()); - blob[VALUE_RANGE].copy_from_slice(&self.value.to_bytes()); - - blob + // TODO: stop panicking + let mut x = Streamable::to_bytes(self).unwrap(); + for _ in x.len()..DATA_SIZE { + x.push(0); + } + let l = x.len(); + x.as_slice() + .try_into() + .expect(format!("length is: {l} of {DATA_SIZE}").as_str()) } } @@ -1142,10 +1162,7 @@ impl MerkleBlob { } fn get_hash(&self, index: TreeIndex) -> Result { - let block_bytes = self.get_block_bytes(index)?; - let data_bytes: DataBytes = block_bytes[DATA_RANGE].try_into().unwrap(); - - Ok(hash_from_bytes(&data_bytes)) + Ok(self.get_block(index)?.node.hash()) } fn get_block_bytes(&self, index: TreeIndex) -> Result { @@ -1171,9 +1188,7 @@ impl MerkleBlob { } pub fn get_parent_index(&self, index: TreeIndex) -> Result { - let block = self.get_block_bytes(index)?; - - Ok(parent_from_bytes(block[DATA_RANGE].try_into().unwrap())) + Ok(self.get_block(index)?.node.parent()) } pub fn get_lineage_with_indexes( @@ -1275,7 +1290,8 @@ impl MerkleBlob { let mut key_value = HashMap::new(); for (key, index) in &self.key_to_index { // silly waste of having the index, but test code and type narrowing so, ok i guess - let (_, leaf) = self.get_leaf_by_key(*key).unwrap(); + let x = self.get_leaf_by_key(*key); + let (_, leaf) = x.unwrap(); key_value.insert(*key, leaf.value); } @@ -1642,10 +1658,11 @@ mod tests { assert_eq!( internal_hash(&left, &right), clvm_utils::tree_hash_pair( - clvm_utils::TreeHash::new(left), - clvm_utils::TreeHash::new(right) + clvm_utils::TreeHash::new(left.into()), + clvm_utils::TreeHash::new(right.into()) ) - .to_bytes(), + .to_bytes() + .into(), ); } @@ -2002,9 +2019,9 @@ mod tests { fn test_double_insert_fails() { let mut blob = MerkleBlob::new(vec![]).unwrap(); let kv = KvId(0); - blob.insert(kv, kv, &[0u8; 32], InsertLocation::Auto {}) + blob.insert(kv, kv, &[0u8; 32].into(), InsertLocation::Auto {}) .unwrap(); - blob.insert(kv, kv, &[0u8; 32], InsertLocation::Auto {}) + blob.insert(kv, kv, &[0u8; 32].into(), InsertLocation::Auto {}) .expect_err(""); } @@ -2031,7 +2048,8 @@ mod tests { } let before = blob.get_key_value_map(); - blob.batch_insert(batch.into_iter()).unwrap(); + blob.batch_insert(batch.into_iter()) + .expect("here ---asdfasdf"); let after = blob.get_key_value_map(); open_dot( diff --git a/crates/chia-traits/src/streamable.rs b/crates/chia-traits/src/streamable.rs index 6be2dab34..6efca143a 100644 --- a/crates/chia-traits/src/streamable.rs +++ b/crates/chia-traits/src/streamable.rs @@ -54,6 +54,14 @@ pub trait Streamable { Err(Error::InputTooLarge) } } + fn from_bytes_ignore_extra_bytes(bytes: &[u8]) -> Result + where + Self: Sized, + { + let mut cursor = Cursor::new(bytes); + let ret = Self::parse::(&mut cursor)?; + Ok(ret) + } fn from_bytes_unchecked(bytes: &[u8]) -> Result where Self: Sized, From 889eb7f4385be7f25c926876291d2d7798982c15 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 12 Nov 2024 19:46:50 -0500 Subject: [PATCH 140/181] cleanup --- crates/chia-datalayer/src/merkle.rs | 174 +++++----------------------- 1 file changed, 29 insertions(+), 145 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 5ebf08ca8..98d4bbcd8 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -12,7 +12,6 @@ use num_traits::ToBytes; use std::cmp::Ordering; use std::collections::{HashMap, HashSet, VecDeque}; use std::iter::zip; -use std::mem::size_of; use std::ops::Range; use thiserror::Error; @@ -20,24 +19,6 @@ use thiserror::Error; #[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord)] pub struct TreeIndex(u32); -impl TreeIndex { - // fn from_bytes(bytes: &[u8]) -> Self { - // // OPT: could save 1-2% of tree space by packing (and maybe don't do that) - // // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed - // Streamable::from_bytes_ignore_extra_bytes(bytes).unwrap() - // } - - // #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] - // fn to_bytes(self) -> [u8; 4] { - // // TODO: stop panicking - // Streamable::to_bytes(&self) - // .unwrap() - // .as_slice() - // .try_into() - // .unwrap() - // } -} - #[cfg(feature = "py-bindings")] impl IntoPy for TreeIndex { fn into_py(self, py: Python<'_>) -> PyObject { @@ -53,7 +34,6 @@ impl std::fmt::Display for TreeIndex { type Parent = Option; type Hash = Bytes32; -type KvIdBytes = [u8; size_of::()]; /// Key and value ids are provided from outside of this code and are implemented as /// the row id from sqlite which is a signed 8 byte integer. The actual key and /// value data bytes will not be handled within this code, only outside. @@ -61,26 +41,6 @@ type KvIdBytes = [u8; size_of::()]; #[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord)] pub struct KvId(i64); -impl KvId { - #[allow(clippy::unnecessary_wraps)] - pub fn from_bytes(blob: KvIdBytes) -> Result { - // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed - Streamable::from_bytes_ignore_extra_bytes(&blob) - .map_err(|_| Error::FailedLoadingNode("aaa".to_string())) - } - - // TODO: consider the self convention more compared with other cases - #[allow(clippy::trivially_copy_pass_by_ref, clippy::wrong_self_convention)] - pub fn to_bytes(&self) -> KvIdBytes { - // TODO: stop panicking - let mut x = Streamable::to_bytes(self).unwrap(); - for _ in x.len()..DATA_SIZE { - x.push(0); - } - x.as_slice().try_into().unwrap() - } -} - #[cfg(feature = "py-bindings")] impl IntoPy for KvId { fn into_py(self, py: Python<'_>) -> PyObject { @@ -136,41 +96,18 @@ pub enum Error { #[error("node not a leaf: {0:?}")] NodeNotALeaf(InternalNode), + + #[error("from streamable: {0:?}")] + Streaming(chia_traits::chia_error::Error), } // assumptions // - root is at index 0 // - any case with no keys will have a zero length blob -// const fn range_by_length(start: usize, length: usize) -> Range { -// start..start + length -// } -// const fn max(left: usize, right: usize) -> usize { -// [left, right][(left < right) as usize] -// } -// TODO: once not experimental... something closer to this -// const fn max(left: T, right: T) -> T { if left < right {right} else {left} } - // define the serialized block format const METADATA_RANGE: Range = 0..METADATA_SIZE; -// const TYPE_RANGE: Range = range_by_length(0, size_of::()); -// const DIRTY_RANGE: Range = range_by_length(TYPE_RANGE.end, size_of::()); -// const METADATA_SIZE: usize = DIRTY_RANGE.end; -// TODO: figure out the real max const METADATA_SIZE: usize = 2; - -// common fields -// const HASH_RANGE: Range = range_by_length(0, size_of::()); -// const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); -// const PARENT_RANGE: Range = range_by_length(HASH_RANGE.end, size_of::()); -// internal specific fields -// const LEFT_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); -// const RIGHT_RANGE: Range = range_by_length(LEFT_RANGE.end, size_of::()); -// leaf specific fields -// const KEY_RANGE: Range = range_by_length(PARENT_RANGE.end, size_of::()); -// const VALUE_RANGE: Range = range_by_length(KEY_RANGE.end, size_of::()); - -// const DATA_SIZE: usize = max(RIGHT_RANGE.end, VALUE_RANGE.end); // TODO: figure out the real max const DATA_SIZE: usize = 100; const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; @@ -244,40 +181,11 @@ pub enum InsertLocation { #[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] pub struct NodeMetadata { + // OPT: could save 1-2% of tree space by packing (and maybe don't do that) pub node_type: NodeType, pub dirty: bool, } -impl NodeMetadata { - pub fn from_bytes(blob: MetadataBytes) -> Result { - // OPT: could save 1-2% of tree space by packing (and maybe don't do that) - // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed - Streamable::from_bytes_ignore_extra_bytes(&blob) - .map_err(|_| Error::FailedLoadingNode("ccc".to_string())) - } - - #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] - pub fn to_bytes(&self) -> MetadataBytes { - // TODO: stop panicking - let mut x = Streamable::to_bytes(self).unwrap(); - for _ in x.len()..METADATA_SIZE { - x.push(0); - } - let l = x.len(); - x.as_slice() - .try_into() - .expect(format!("length is: {l} of {DATA_SIZE}").as_str()) - } -} - -// fn parent_from_bytes(blob: &DataBytes) -> Parent { -// Node::from_bytes(blob).unwrap().parent() -// } -// -// fn hash_from_bytes(blob: &DataBytes) -> Hash { -// Node::from_bytes(blob).unwrap().hash() -// } - #[cfg_attr(feature = "py-bindings", pyclass(name = "InternalNode", get_all))] #[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] pub struct InternalNode { @@ -288,25 +196,6 @@ pub struct InternalNode { } impl InternalNode { - pub fn from_bytes(blob: &DataBytes) -> Result { - // OPT: could save 1-2% of tree space by packing (and maybe don't do that) - // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed - Streamable::from_bytes_ignore_extra_bytes(blob) - .map_err(|_| Error::FailedLoadingNode("ddd".to_string())) - } - - #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] - pub fn to_bytes(&self) -> DataBytes { - // TODO: stop panicking - let mut x = Streamable::to_bytes(self).expect(format!("aaaaa = {DATA_SIZE}").as_str()); - for _ in x.len()..DATA_SIZE { - x.push(0); - } - x.as_slice() - .try_into() - .expect(format!("bbbbb = {DATA_SIZE}").as_str()) - } - pub fn sibling_index(&self, index: TreeIndex) -> TreeIndex { if index == self.right { self.left @@ -327,29 +216,6 @@ pub struct LeafNode { value: KvId, } -impl LeafNode { - pub fn from_bytes(blob: &DataBytes) -> Result { - // OPT: could save 1-2% of tree space by packing (and maybe don't do that) - // TODO: real error processing, recheck all ZeroLengthSeedNotAllowed - // TODO: more structured padding handling - Streamable::from_bytes_ignore_extra_bytes(blob) - .map_err(|e| Error::FailedLoadingNode(format!("eee {e}").to_string())) - } - - #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] - pub fn to_bytes(&self) -> DataBytes { - // TODO: stop panicking - let mut x = Streamable::to_bytes(self).unwrap(); - for _ in x.len()..DATA_SIZE { - x.push(0); - } - let l = x.len(); - x.as_slice() - .try_into() - .expect(format!("length is: {l} of {DATA_SIZE}").as_str()) - } -} - #[derive(Clone, Debug, PartialEq, Eq)] pub enum Node { Internal(InternalNode), @@ -388,16 +254,27 @@ impl Node { #[allow(clippy::trivially_copy_pass_by_ref)] pub fn from_bytes(metadata: &NodeMetadata, blob: &DataBytes) -> Result { Ok(match metadata.node_type { - NodeType::Internal => Node::Internal(InternalNode::from_bytes(blob)?), - NodeType::Leaf => Node::Leaf(LeafNode::from_bytes(blob)?), + NodeType::Internal => Node::Internal( + Streamable::from_bytes_ignore_extra_bytes(blob).map_err(|e| Error::Streaming(e))?, + ), + NodeType::Leaf => Node::Leaf( + Streamable::from_bytes_ignore_extra_bytes(blob).map_err(|e| Error::Streaming(e))?, + ), }) } pub fn to_bytes(&self) -> DataBytes { - match self { + // TODO: handle the error + let mut base = match self { Node::Internal(node) => node.to_bytes(), Node::Leaf(node) => node.to_bytes(), } + .unwrap(); + for _ in base.len()..DATA_SIZE { + base.push(0); + } + // TODO: handle the error + base.as_slice().try_into().unwrap() } fn expect_leaf(self, message: &str) -> LeafNode { @@ -440,7 +317,14 @@ pub struct Block { impl Block { pub fn to_bytes(&self) -> BlockBytes { let mut blob: BlockBytes = [0; BLOCK_SIZE]; - blob[METADATA_RANGE].copy_from_slice(&self.metadata.to_bytes()); + // TODO: probably propagate the error + blob[METADATA_RANGE].copy_from_slice( + &self + .metadata + .to_bytes() + .map_err(|e| Error::Streaming(e)) + .unwrap(), + ); blob[DATA_RANGE].copy_from_slice(&self.node.to_bytes()); blob @@ -449,7 +333,7 @@ impl Block { pub fn from_bytes(blob: BlockBytes) -> Result { let metadata_blob: MetadataBytes = blob[METADATA_RANGE].try_into().unwrap(); let data_blob: DataBytes = blob[DATA_RANGE].try_into().unwrap(); - let metadata = NodeMetadata::from_bytes(metadata_blob) + let metadata = NodeMetadata::from_bytes(&metadata_blob) .map_err(|message| Error::FailedLoadingMetadata(message.to_string()))?; let node = Node::from_bytes(&metadata, &data_blob) .map_err(|message| Error::FailedLoadingNode(message.to_string()))?; @@ -1672,9 +1556,9 @@ mod tests { #[values(NodeType::Internal, NodeType::Leaf)] node_type: NodeType, ) { let bytes: [u8; 2] = [node_type.to_u8(), dirty as u8]; - let object = NodeMetadata::from_bytes(bytes).unwrap(); + let object = NodeMetadata::from_bytes(&bytes).unwrap(); assert_eq!(object, NodeMetadata { node_type, dirty },); - assert_eq!(object.to_bytes(), bytes); + assert_eq!(object.to_bytes().unwrap(), bytes); } #[fixture] From 43f34b231a1de991d2d7e54b3faf6fa55d31c603 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 12 Nov 2024 19:54:37 -0500 Subject: [PATCH 141/181] max data size is 53 at this point --- crates/chia-datalayer/src/merkle.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 98d4bbcd8..bd8a0b54c 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -108,8 +108,8 @@ pub enum Error { // define the serialized block format const METADATA_RANGE: Range = 0..METADATA_SIZE; const METADATA_SIZE: usize = 2; -// TODO: figure out the real max -const DATA_SIZE: usize = 100; +// TODO: figure out the real max better than trial and error? +const DATA_SIZE: usize = 53; const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; type BlockBytes = [u8; BLOCK_SIZE]; type MetadataBytes = [u8; METADATA_SIZE]; From 22e155ffbd3c80deed80e3f9256e0c977927ad2e Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 12 Nov 2024 20:04:23 -0500 Subject: [PATCH 142/181] tidy --- crates/chia-datalayer/src/merkle.rs | 52 +++++++++++++---------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index bd8a0b54c..43afb20ca 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -16,7 +16,7 @@ use std::ops::Range; use thiserror::Error; #[cfg_attr(feature = "py-bindings", derive(FromPyObject), pyo3(transparent))] -#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Streamable)] pub struct TreeIndex(u32); #[cfg(feature = "py-bindings")] @@ -38,7 +38,7 @@ type Hash = Bytes32; /// the row id from sqlite which is a signed 8 byte integer. The actual key and /// value data bytes will not be handled within this code, only outside. #[cfg_attr(feature = "py-bindings", derive(FromPyObject), pyo3(transparent))] -#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Streamable)] pub struct KvId(i64); #[cfg(feature = "py-bindings")] @@ -117,7 +117,7 @@ type DataBytes = [u8; DATA_SIZE]; const DATA_RANGE: Range = METADATA_SIZE..METADATA_SIZE + DATA_SIZE; #[repr(u8)] -#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Streamable)] pub enum NodeType { Internal = 0, Leaf = 1, @@ -140,6 +140,7 @@ fn sha256_num(input: T) -> Hash { let mut hasher = Sha256::new(); hasher.update(input.to_be_bytes()); + // TODO: propagate? hasher.finalize().into() } @@ -147,6 +148,7 @@ fn sha256_bytes(input: &[u8]) -> Hash { let mut hasher = Sha256::new(); hasher.update(input); + // TODO: propagate? hasher.finalize().into() } @@ -156,6 +158,7 @@ fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { hasher.update(left_hash); hasher.update(right_hash); + // TODO: propagate? hasher.finalize().into() } @@ -177,9 +180,7 @@ pub enum InsertLocation { Leaf { index: TreeIndex, side: Side }, } -// const NULL_PARENT: TreeIndex = TreeIndex(0xffff_ffffu32); - -#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq, Streamable)] pub struct NodeMetadata { // OPT: could save 1-2% of tree space by packing (and maybe don't do that) pub node_type: NodeType, @@ -187,7 +188,7 @@ pub struct NodeMetadata { } #[cfg_attr(feature = "py-bindings", pyclass(name = "InternalNode", get_all))] -#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Streamable)] pub struct InternalNode { parent: Parent, hash: Hash, @@ -208,7 +209,7 @@ impl InternalNode { } #[cfg_attr(feature = "py-bindings", pyclass(name = "LeafNode", get_all))] -#[derive(Streamable, Hash, Debug, Copy, Clone, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Streamable)] pub struct LeafNode { parent: Parent, hash: Hash, @@ -263,18 +264,19 @@ impl Node { }) } - pub fn to_bytes(&self) -> DataBytes { - // TODO: handle the error + pub fn to_bytes(&self) -> Result { let mut base = match self { Node::Internal(node) => node.to_bytes(), Node::Leaf(node) => node.to_bytes(), } - .unwrap(); + .map_err(|e| Error::Streaming(e))?; for _ in base.len()..DATA_SIZE { base.push(0); } - // TODO: handle the error - base.as_slice().try_into().unwrap() + Ok(base + .as_slice() + .try_into() + .expect("padding was added above, might be too large")) } fn expect_leaf(self, message: &str) -> LeafNode { @@ -315,19 +317,13 @@ pub struct Block { } impl Block { - pub fn to_bytes(&self) -> BlockBytes { + pub fn to_bytes(&self) -> Result { let mut blob: BlockBytes = [0; BLOCK_SIZE]; - // TODO: probably propagate the error - blob[METADATA_RANGE].copy_from_slice( - &self - .metadata - .to_bytes() - .map_err(|e| Error::Streaming(e)) - .unwrap(), - ); - blob[DATA_RANGE].copy_from_slice(&self.node.to_bytes()); + blob[METADATA_RANGE] + .copy_from_slice(&self.metadata.to_bytes().map_err(|e| Error::Streaming(e))?); + blob[DATA_RANGE].copy_from_slice(&self.node.to_bytes()?); - blob + Ok(blob) } pub fn from_bytes(blob: BlockBytes) -> Result { @@ -1010,7 +1006,7 @@ impl MerkleBlob { } fn insert_entry_to_blob(&mut self, index: TreeIndex, block: &Block) -> Result<(), Error> { - let new_block_bytes = block.to_bytes(); + let new_block_bytes = block.to_bytes()?; let extend_index = self.extend_index(); match index.cmp(&extend_index) { Ordering::Greater => return Err(Error::BlockIndexOutOfRange(index)), @@ -1174,8 +1170,7 @@ impl MerkleBlob { let mut key_value = HashMap::new(); for (key, index) in &self.key_to_index { // silly waste of having the index, but test code and type narrowing so, ok i guess - let x = self.get_leaf_by_key(*key); - let (_, leaf) = x.unwrap(); + let (_, leaf) = self.get_leaf_by_key(*key).unwrap(); key_value.insert(*key, leaf.value); } @@ -1932,8 +1927,7 @@ mod tests { } let before = blob.get_key_value_map(); - blob.batch_insert(batch.into_iter()) - .expect("here ---asdfasdf"); + blob.batch_insert(batch.into_iter()).unwrap(); let after = blob.get_key_value_map(); open_dot( From d60c5efbf9ce357da9b1cf91e6d08a993ac7997b Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 13 Nov 2024 08:08:42 -0500 Subject: [PATCH 143/181] fixup python test reference blob --- tests/test_datalayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index be8bae198..62a30c796 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -5,7 +5,7 @@ def test_merkle_blob(): blob = bytes.fromhex( - "00000e4a8b1ecee43f457bbe2b30e94ac2afc0d3a6536f891a2ced5e96ce07fe9932ffffffff000000010000000200000000000000000100d8ddfc94e7201527a6a93ee04aed8c5c122ac38af6dbf6e5f1caefba2597230d000000000001020304050607101112131415161701000f980325ebe9426fa295f3f69cc38ef8fe6ce8f3b9f083556c0f927e67e566510000000020212223242526273031323334353637" + "000100770a5d50f980316e3a856b2f0447e1c1285064cd301c731e5b16c16d187d0ff90000000400000002000000000000000000000000010001000000060c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b00000000000000010000000000000001010001000000000c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b00000000000000000000000000000000010001000000040c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0000000000000002000000000000000200010100000000770a5d50f980316e3a856b2f0447e1c1285064cd301c731e5b16c16d187d0ff900000003000000060000000000000000010001000000060c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b0000000000000003000000000000000300000100000004770a5d50f980316e3a856b2f0447e1c1285064cd301c731e5b16c16d187d0ff900000005000000010000000000000000" ) merkle_blob = MerkleBlob(blob) print(merkle_blob) From b376ff349192e3e4fe6054b32e1e4a0a77eceb7e Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 13 Nov 2024 08:11:24 -0500 Subject: [PATCH 144/181] clippy --- crates/chia-datalayer/src/merkle.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 43afb20ca..2d7defe3b 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -256,10 +256,10 @@ impl Node { pub fn from_bytes(metadata: &NodeMetadata, blob: &DataBytes) -> Result { Ok(match metadata.node_type { NodeType::Internal => Node::Internal( - Streamable::from_bytes_ignore_extra_bytes(blob).map_err(|e| Error::Streaming(e))?, + Streamable::from_bytes_ignore_extra_bytes(blob).map_err(Error::Streaming)?, ), NodeType::Leaf => Node::Leaf( - Streamable::from_bytes_ignore_extra_bytes(blob).map_err(|e| Error::Streaming(e))?, + Streamable::from_bytes_ignore_extra_bytes(blob).map_err(Error::Streaming)?, ), }) } @@ -269,10 +269,9 @@ impl Node { Node::Internal(node) => node.to_bytes(), Node::Leaf(node) => node.to_bytes(), } - .map_err(|e| Error::Streaming(e))?; - for _ in base.len()..DATA_SIZE { - base.push(0); - } + .map_err(Error::Streaming)?; + assert!(base.len() <= DATA_SIZE); + base.resize(DATA_SIZE, 0); Ok(base .as_slice() .try_into() @@ -319,8 +318,7 @@ pub struct Block { impl Block { pub fn to_bytes(&self) -> Result { let mut blob: BlockBytes = [0; BLOCK_SIZE]; - blob[METADATA_RANGE] - .copy_from_slice(&self.metadata.to_bytes().map_err(|e| Error::Streaming(e))?); + blob[METADATA_RANGE].copy_from_slice(&self.metadata.to_bytes().map_err(Error::Streaming)?); blob[DATA_RANGE].copy_from_slice(&self.node.to_bytes()?); Ok(blob) From e29b339d6eef9a5ffbf115887a87f4a201b5ccdd Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 13 Nov 2024 08:35:06 -0500 Subject: [PATCH 145/181] stop changing streamable --- crates/chia-datalayer/src/merkle.rs | 20 ++++++++++++-------- crates/chia-traits/src/streamable.rs | 8 -------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 2d7defe3b..f635d5ec6 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -116,6 +116,15 @@ type MetadataBytes = [u8; METADATA_SIZE]; type DataBytes = [u8; DATA_SIZE]; const DATA_RANGE: Range = METADATA_SIZE..METADATA_SIZE + DATA_SIZE; +fn streamable_from_bytes_ignore_extra_bytes(bytes: &[u8]) -> Result +where + T: Streamable, +{ + let mut cursor = std::io::Cursor::new(bytes); + // TODO: consider trusted mode? + T::parse::(&mut cursor).map_err(Error::Streaming) +} + #[repr(u8)] #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Streamable)] pub enum NodeType { @@ -125,8 +134,7 @@ pub enum NodeType { impl NodeType { pub fn from_u8(value: u8) -> Result { - Streamable::from_bytes_ignore_extra_bytes(&[value]) - .map_err(|_| Error::FailedLoadingNode("bbb".to_string())) + streamable_from_bytes_ignore_extra_bytes(&[value]) } #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] @@ -255,12 +263,8 @@ impl Node { #[allow(clippy::trivially_copy_pass_by_ref)] pub fn from_bytes(metadata: &NodeMetadata, blob: &DataBytes) -> Result { Ok(match metadata.node_type { - NodeType::Internal => Node::Internal( - Streamable::from_bytes_ignore_extra_bytes(blob).map_err(Error::Streaming)?, - ), - NodeType::Leaf => Node::Leaf( - Streamable::from_bytes_ignore_extra_bytes(blob).map_err(Error::Streaming)?, - ), + NodeType::Internal => Node::Internal(streamable_from_bytes_ignore_extra_bytes(blob)?), + NodeType::Leaf => Node::Leaf(streamable_from_bytes_ignore_extra_bytes(blob)?), }) } diff --git a/crates/chia-traits/src/streamable.rs b/crates/chia-traits/src/streamable.rs index 6efca143a..6be2dab34 100644 --- a/crates/chia-traits/src/streamable.rs +++ b/crates/chia-traits/src/streamable.rs @@ -54,14 +54,6 @@ pub trait Streamable { Err(Error::InputTooLarge) } } - fn from_bytes_ignore_extra_bytes(bytes: &[u8]) -> Result - where - Self: Sized, - { - let mut cursor = Cursor::new(bytes); - let ret = Self::parse::(&mut cursor)?; - Ok(ret) - } fn from_bytes_unchecked(bytes: &[u8]) -> Result where Self: Sized, From 6ea2c28416a823696bf411d89f2ccf669db54c91 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 13 Nov 2024 08:40:21 -0500 Subject: [PATCH 146/181] less into --- crates/chia-datalayer/src/merkle.rs | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index f635d5ec6..221ce064e 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -148,16 +148,14 @@ fn sha256_num(input: T) -> Hash { let mut hasher = Sha256::new(); hasher.update(input.to_be_bytes()); - // TODO: propagate? - hasher.finalize().into() + Bytes32::new(hasher.finalize()) } fn sha256_bytes(input: &[u8]) -> Hash { let mut hasher = Sha256::new(); hasher.update(input); - // TODO: propagate? - hasher.finalize().into() + Bytes32::new(hasher.finalize()) } fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { @@ -166,8 +164,7 @@ fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { hasher.update(left_hash); hasher.update(right_hash); - // TODO: propagate? - hasher.finalize().into() + Bytes32::new(hasher.finalize()) } #[cfg_attr(feature = "py-bindings", pyclass(name = "Side", eq, eq_int))] @@ -1538,12 +1535,13 @@ mod tests { assert_eq!( internal_hash(&left, &right), - clvm_utils::tree_hash_pair( - clvm_utils::TreeHash::new(left.into()), - clvm_utils::TreeHash::new(right.into()) - ) - .to_bytes() - .into(), + Bytes32::new( + clvm_utils::tree_hash_pair( + clvm_utils::TreeHash::new(left.to_bytes()), + clvm_utils::TreeHash::new(right.to_bytes()), + ) + .to_bytes() + ), ); } @@ -1900,9 +1898,9 @@ mod tests { fn test_double_insert_fails() { let mut blob = MerkleBlob::new(vec![]).unwrap(); let kv = KvId(0); - blob.insert(kv, kv, &[0u8; 32].into(), InsertLocation::Auto {}) + blob.insert(kv, kv, &Bytes32::new([0u8; 32]), InsertLocation::Auto {}) .unwrap(); - blob.insert(kv, kv, &[0u8; 32].into(), InsertLocation::Auto {}) + blob.insert(kv, kv, &Bytes32::new([0u8; 32]), InsertLocation::Auto {}) .expect_err(""); } From eb98a7b7c4185bf06a521f0ce488054415527bad Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 25 Nov 2024 09:40:18 -0500 Subject: [PATCH 147/181] tidy --- crates/chia-datalayer/src/merkle.rs | 131 ++++++++++------------------ wheel/src/api.rs | 2 - 2 files changed, 47 insertions(+), 86 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 221ce064e..41010392d 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -54,7 +54,7 @@ impl std::fmt::Display for KvId { } } -#[derive(Debug, Error)] +#[derive(Debug, Error, PartialEq, Eq)] pub enum Error { #[error("unknown NodeType value: {0:?}")] UnknownNodeTypeValue(u8), @@ -99,6 +99,9 @@ pub enum Error { #[error("from streamable: {0:?}")] Streaming(chia_traits::chia_error::Error), + + #[error("index not a child: {0}")] + IndexIsNotAChild(TreeIndex), } // assumptions @@ -168,10 +171,11 @@ fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { } #[cfg_attr(feature = "py-bindings", pyclass(name = "Side", eq, eq_int))] +#[repr(u8)] #[derive(Clone, Debug, Hash, Eq, PartialEq)] pub enum Side { - Left, - Right, + Left = 0, + Right = 1, } #[cfg_attr(feature = "py-bindings", pyclass(name = "InsertLocation"))] @@ -202,13 +206,13 @@ pub struct InternalNode { } impl InternalNode { - pub fn sibling_index(&self, index: TreeIndex) -> TreeIndex { + pub fn sibling_index(&self, index: TreeIndex) -> Result { if index == self.right { - self.left + Ok(self.left) } else if index == self.left { - self.right + Ok(self.right) } else { - panic!("index not a child: {index}") + Err(Error::IndexIsNotAChild(index)) } } } @@ -279,13 +283,13 @@ impl Node { .expect("padding was added above, might be too large")) } - fn expect_leaf(self, message: &str) -> LeafNode { + fn expect_leaf(&self, message: &str) -> LeafNode { let Node::Leaf(leaf) = self else { let message = message.replace("<>", &format!("{self:?}")); panic!("{}", message) }; - leaf + *leaf } fn try_into_leaf(self) -> Result { @@ -312,6 +316,7 @@ fn block_range(index: TreeIndex) -> Range { } pub struct Block { + // TODO: metadata node type and node's type not verified for agreement metadata: NodeMetadata, node: Node, } @@ -710,7 +715,7 @@ impl MerkleBlob { } let new_internal_node_index = self.get_new_index(); - let (old_leaf_index, old_leaf) = self.get_leaf_by_key(old_leaf_key)?; + let (old_leaf_index, old_leaf, _old_block) = self.get_leaf_by_key(old_leaf_key)?; let new_node = self.get_node(new_index)?; let new_stuff = Stuff { @@ -774,7 +779,7 @@ impl MerkleBlob { } pub fn delete(&mut self, key: KvId) -> Result<(), Error> { - let (leaf_index, leaf) = self.get_leaf_by_key(key)?; + let (leaf_index, leaf, _leaf_block) = self.get_leaf_by_key(key)?; self.key_to_index.remove(&key); let Some(parent_index) = leaf.parent else { @@ -787,7 +792,7 @@ impl MerkleBlob { let Node::Internal(parent) = maybe_parent else { panic!("parent node not internal: {maybe_parent:?}") }; - let sibling_index = parent.sibling_index(leaf_index); + let sibling_index = parent.sibling_index(leaf_index)?; let mut sibling_block = self.get_block(sibling_index)?; let Some(grandparent_index) = parent.parent else { @@ -828,20 +833,16 @@ impl MerkleBlob { } pub fn upsert(&mut self, key: KvId, value: KvId, new_hash: &Hash) -> Result<(), Error> { - let Some(leaf_index) = self.key_to_index.get(&key) else { + let Ok((leaf_index, mut leaf, mut block)) = self.get_leaf_by_key(key) else { self.insert(key, value, new_hash, InsertLocation::Auto {})?; return Ok(()); }; - let mut block = self.get_block(*leaf_index)?; - // TODO: repeated message - let mut leaf = block.node.clone().expect_leaf(&format!( - "expected leaf for index from key cache: {leaf_index} -> <>" - )); leaf.hash.clone_from(new_hash); leaf.value = value; + // OPT: maybe just edit in place? block.node = Node::Leaf(leaf); - self.insert_entry_to_blob(*leaf_index, &block)?; + self.insert_entry_to_blob(leaf_index, &block)?; if let Some(parent) = block.node.parent() { self.mark_lineage_as_dirty(parent)?; @@ -1018,7 +1019,6 @@ impl MerkleBlob { if !self.free_indexes.contains(&index) && old_block.metadata.node_type == NodeType::Leaf { - // TODO: sort of repeating the leaf check above and below. smells a little if let Node::Leaf(old_node) = old_block.node { self.key_to_index.remove(&old_node.key); }; @@ -1057,13 +1057,14 @@ impl MerkleBlob { Ok(self.get_block(index)?.node) } - pub fn get_leaf_by_key(&self, key: KvId) -> Result<(TreeIndex, LeafNode), Error> { + pub fn get_leaf_by_key(&self, key: KvId) -> Result<(TreeIndex, LeafNode, Block), Error> { let index = *self.key_to_index.get(&key).ok_or(Error::UnknownKey(key))?; - let leaf = self.get_node(index)?.expect_leaf(&format!( + let block = self.get_block(index)?; + let leaf = block.node.expect_leaf(&format!( "expected leaf for index from key cache: {index} -> <>" )); - Ok((index, leaf)) + Ok((index, leaf, block)) } pub fn get_parent_index(&self, index: TreeIndex) -> Result { @@ -1122,59 +1123,6 @@ impl MerkleBlob { Ok(()) } - - // #[allow(unused)] - // fn relocate_node(&mut self, source: TreeIndex, destination: TreeIndex) -> Result<(), Error> { - // let extend_index = self.extend_index(); - // if source == 0 { - // return Err("relocation of the root and index zero is not allowed".to_string()); - // }; - // assert!(source < extend_index); - // assert!(!self.free_indexes.contains(&source)); - // assert!(destination <= extend_index); - // assert!(destination == extend_index || self.free_indexes.contains(&destination)); - // - // let source_block = self.get_block(source).unwrap(); - // if let Some(parent) = source_block.node.parent { - // let mut parent_block = self.get_block(parent).unwrap(); - // let NodeSpecific::Internal { - // ref mut left, - // ref mut right, - // } = parent_block.node.specific - // else { - // panic!(); - // }; - // match source { - // x if x == *left => *left = destination, - // x if x == *right => *right = destination, - // _ => panic!(), - // } - // self.insert_entry_to_blob(parent, &parent_block).unwrap(); - // } - // - // if let NodeSpecific::Internal { left, right, .. } = source_block.node.specific { - // for child in [left, right] { - // self.update_parent(child, Some(destination)).unwrap(); - // } - // } - // - // self.free_indexes.insert(source); - // - // Ok(()) - // } - - // TODO: really this is test, not unused - #[allow(unused)] - fn get_key_value_map(&self) -> HashMap { - let mut key_value = HashMap::new(); - for (key, index) in &self.key_to_index { - // silly waste of having the index, but test code and type narrowing so, ok i guess - let (_, leaf) = self.get_leaf_by_key(*key).unwrap(); - key_value.insert(*key, leaf.value); - } - - key_value - } } impl PartialEq for MerkleBlob { @@ -1242,11 +1190,12 @@ impl MerkleBlob { index: *self .key_to_index .get(&key) - .ok_or(PyValueError::new_err("TODO: better message here"))?, + .ok_or(PyValueError::new_err(format!( + "unknown key id passed as insert location reference: {key}" + )))?, side: match side { - // TODO: if this sticks around, we gotta get more formal about the mapping - 0 => Side::Left, - 1 => Side::Right, + x if x == (Side::Left as u8) => Side::Left, + x if x == (Side::Right as u8) => Side::Right, _ => panic!(), }, }, @@ -1515,6 +1464,19 @@ mod tests { // crate::merkle::dot::open_dot(_lines); } + impl MerkleBlob { + fn get_key_value_map(&self) -> HashMap { + let mut key_value = HashMap::new(); + for key in self.key_to_index.keys() { + // silly waste of having the index, but test code and type narrowing so, ok i guess + let (_leaf_index, leaf, _leaf_block) = self.get_leaf_by_key(*key).unwrap(); + key_value.insert(*key, leaf.value); + } + + key_value + } + } + #[test] fn test_node_type_serialized_values() { assert_eq!(NodeType::Internal as u8, 0); @@ -1806,17 +1768,18 @@ mod tests { } #[test] - #[should_panic(expected = "index not a child: 2")] fn test_node_specific_sibling_index_panics_for_unknown_sibling() { - // TODO: this probably shouldn't be a panic? - // maybe depends if it is exported or private? let node = InternalNode { parent: None, hash: sha256_num(0), left: TreeIndex(0), right: TreeIndex(1), }; - node.sibling_index(TreeIndex(2)); + let index = TreeIndex(2); + assert_eq!( + node.sibling_index(TreeIndex(2)), + Err(Error::IndexIsNotAChild(index)) + ); } #[rstest] diff --git a/wheel/src/api.rs b/wheel/src/api.rs index 3ad16074f..3a6d98f66 100644 --- a/wheel/src/api.rs +++ b/wheel/src/api.rs @@ -478,8 +478,6 @@ pub fn chia_rs(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { // datalayer m.add_class::()?; - // m.add_class::()?; - // m.add_class::()?; m.add_class::()?; m.add_class::()?; From f952ded9b5f7a6cb7662164c9bb115b5c010593c Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 25 Nov 2024 10:24:16 -0500 Subject: [PATCH 148/181] tidy --- crates/chia-datalayer/src/merkle.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 41010392d..28b8f6092 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -172,7 +172,7 @@ fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { #[cfg_attr(feature = "py-bindings", pyclass(name = "Side", eq, eq_int))] #[repr(u8)] -#[derive(Clone, Debug, Hash, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Streamable)] pub enum Side { Left = 0, Right = 1, @@ -446,9 +446,9 @@ impl MerkleBlob { }; if self.key_to_index.len() == 1 { - self.insert_second(node, &old_leaf, &internal_node_hash, &side)?; + self.insert_second(node, &old_leaf, &internal_node_hash, side)?; } else { - self.insert_third_or_later(node, &old_leaf, index, &internal_node_hash, &side)?; + self.insert_third_or_later(node, &old_leaf, index, &internal_node_hash, side)?; } } } @@ -481,7 +481,7 @@ impl MerkleBlob { mut node: LeafNode, old_leaf: &LeafNode, internal_node_hash: &Hash, - side: &Side, + side: Side, ) -> Result<(), Error> { self.clear(); let root_index = self.get_new_index(); @@ -548,7 +548,7 @@ impl MerkleBlob { old_leaf: &LeafNode, old_leaf_index: TreeIndex, internal_node_hash: &Hash, - side: &Side, + side: Side, ) -> Result<(), Error> { let new_leaf_index = self.get_new_index(); let new_internal_node_index = self.get_new_index(); @@ -690,7 +690,7 @@ impl MerkleBlob { if indexes.len() == 1 { // OPT: can we avoid this extra min height leaf traversal? let min_height_leaf = self.get_min_height_leaf()?; - self.insert_from_key(min_height_leaf.key, indexes[0], &Side::Left)?; + self.insert_from_key(min_height_leaf.key, indexes[0], Side::Left)?; }; Ok(()) @@ -700,7 +700,7 @@ impl MerkleBlob { &mut self, old_leaf_key: KvId, new_index: TreeIndex, - side: &Side, + side: Side, ) -> Result<(), Error> { // NAME: consider name, we're inserting a subtree at a leaf // TODO: seems like this ought to be fairly similar to regular insert @@ -1193,11 +1193,7 @@ impl MerkleBlob { .ok_or(PyValueError::new_err(format!( "unknown key id passed as insert location reference: {key}" )))?, - side: match side { - x if x == (Side::Left as u8) => Side::Left, - x if x == (Side::Right as u8) => Side::Right, - _ => panic!(), - }, + side: Side::from_bytes(&[side])?, }, _ => { return Err(PyValueError::new_err( @@ -1679,7 +1675,7 @@ mod tests { &sha256_num(key_value_id.0), InsertLocation::Leaf { index: merkle_blob.key_to_index[&last_key], - side: side.clone(), + side, }, ) .unwrap(); From 57de97ce128b9872e78bd0b571f0b53b0a5a81d4 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 26 Nov 2024 12:41:26 -0500 Subject: [PATCH 149/181] remove no longer used `NodeType.from_u8()` and `.to_u8()` these were leftover from before streamable --- crates/chia-datalayer/src/merkle.rs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 28b8f6092..2476417e7 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -135,17 +135,6 @@ pub enum NodeType { Leaf = 1, } -impl NodeType { - pub fn from_u8(value: u8) -> Result { - streamable_from_bytes_ignore_extra_bytes(&[value]) - } - - #[allow(clippy::wrong_self_convention, clippy::trivially_copy_pass_by_ref)] - pub fn to_u8(&self) -> u8 { - Streamable::to_bytes(self).unwrap()[0] - } -} - #[allow(clippy::needless_pass_by_value)] fn sha256_num(input: T) -> Hash { let mut hasher = Sha256::new(); @@ -1479,8 +1468,14 @@ mod tests { assert_eq!(NodeType::Leaf as u8, 1); for node_type in [NodeType::Internal, NodeType::Leaf] { - assert_eq!(node_type.to_u8(), node_type as u8,); - assert_eq!(NodeType::from_u8(node_type as u8).unwrap(), node_type,); + assert_eq!( + Streamable::to_bytes(&node_type).unwrap()[0], + node_type as u8, + ); + assert_eq!( + streamable_from_bytes_ignore_extra_bytes::(&[node_type as u8]).unwrap(), + node_type, + ); } } @@ -1508,7 +1503,7 @@ mod tests { #[values(false, true)] dirty: bool, #[values(NodeType::Internal, NodeType::Leaf)] node_type: NodeType, ) { - let bytes: [u8; 2] = [node_type.to_u8(), dirty as u8]; + let bytes: [u8; 2] = [Streamable::to_bytes(&node_type).unwrap()[0], dirty as u8]; let object = NodeMetadata::from_bytes(&bytes).unwrap(); assert_eq!(object, NodeMetadata { node_type, dirty },); assert_eq!(object.to_bytes().unwrap(), bytes); @@ -1759,7 +1754,7 @@ mod tests { #[test] fn test_node_type_from_u8_invalid() { let invalid_value = 2; - let actual = NodeType::from_u8(invalid_value); + let actual = streamable_from_bytes_ignore_extra_bytes::(&[invalid_value as u8]); actual.expect_err("invalid node type value should fail"); } From a328f274dde9474885b1ee7e54c64f997f1db23b Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 26 Nov 2024 12:44:26 -0500 Subject: [PATCH 150/181] remove all unneeded `pyclass(name = "...")` --- crates/chia-datalayer/src/merkle.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 2476417e7..a9528cfea 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -159,7 +159,7 @@ fn internal_hash(left_hash: &Hash, right_hash: &Hash) -> Hash { Bytes32::new(hasher.finalize()) } -#[cfg_attr(feature = "py-bindings", pyclass(name = "Side", eq, eq_int))] +#[cfg_attr(feature = "py-bindings", pyclass(eq, eq_int))] #[repr(u8)] #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Streamable)] pub enum Side { @@ -167,7 +167,7 @@ pub enum Side { Right = 1, } -#[cfg_attr(feature = "py-bindings", pyclass(name = "InsertLocation"))] +#[cfg_attr(feature = "py-bindings", pyclass)] #[derive(Clone, Debug, Hash, Eq, PartialEq)] pub enum InsertLocation { // error: Unit variant `Auto` is not yet supported in a complex enum @@ -185,7 +185,7 @@ pub struct NodeMetadata { pub dirty: bool, } -#[cfg_attr(feature = "py-bindings", pyclass(name = "InternalNode", get_all))] +#[cfg_attr(feature = "py-bindings", pyclass(get_all))] #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Streamable)] pub struct InternalNode { parent: Parent, @@ -206,7 +206,7 @@ impl InternalNode { } } -#[cfg_attr(feature = "py-bindings", pyclass(name = "LeafNode", get_all))] +#[cfg_attr(feature = "py-bindings", pyclass(get_all))] #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Streamable)] pub struct LeafNode { parent: Parent, @@ -362,7 +362,7 @@ fn get_free_indexes_and_keys_values_indexes( (free_indexes, key_to_index) } -#[cfg_attr(feature = "py-bindings", pyclass(name = "MerkleBlob", get_all))] +#[cfg_attr(feature = "py-bindings", pyclass(get_all))] #[derive(Debug)] pub struct MerkleBlob { blob: Vec, From 6eff838cdb89be3efbf25dc4ad1719f4f6024678 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 26 Nov 2024 12:48:17 -0500 Subject: [PATCH 151/181] make `Node.set_hash()` take ownership of the passed hash --- crates/chia-datalayer/src/merkle.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index a9528cfea..d14725370 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -243,10 +243,10 @@ impl Node { } } - fn set_hash(&mut self, hash: &Hash) { + fn set_hash(&mut self, hash: Hash) { match self { - Node::Internal(ref mut node) => node.hash = *hash, - Node::Leaf(ref mut node) => node.hash = *hash, + Node::Internal(ref mut node) => node.hash = hash, + Node::Leaf(ref mut node) => node.hash = hash, } } @@ -331,7 +331,7 @@ impl Block { } pub fn update_hash(&mut self, left: &Hash, right: &Hash) { - self.node.set_hash(&internal_hash(left, right)); + self.node.set_hash(internal_hash(left, right)); self.metadata.dirty = false; } } From 0be9f5015b1854477cd3bd3633b09082efc89487 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 26 Nov 2024 13:21:10 -0500 Subject: [PATCH 152/181] make `InternalNode` and `LeafNode` fields public --- crates/chia-datalayer/src/merkle.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index d14725370..5eaf8b7e4 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -188,10 +188,10 @@ pub struct NodeMetadata { #[cfg_attr(feature = "py-bindings", pyclass(get_all))] #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Streamable)] pub struct InternalNode { - parent: Parent, - hash: Hash, - left: TreeIndex, - right: TreeIndex, + pub parent: Parent, + pub hash: Hash, + pub left: TreeIndex, + pub right: TreeIndex, } impl InternalNode { @@ -209,10 +209,10 @@ impl InternalNode { #[cfg_attr(feature = "py-bindings", pyclass(get_all))] #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, Streamable)] pub struct LeafNode { - parent: Parent, - hash: Hash, - key: KvId, - value: KvId, + pub parent: Parent, + pub hash: Hash, + pub key: KvId, + pub value: KvId, } #[derive(Clone, Debug, PartialEq, Eq)] From 6904bcd2cd1e3accc6bc8d2aef9ebb114412d514 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 26 Nov 2024 13:37:18 -0500 Subject: [PATCH 153/181] `pub use merkle::*;` --- crates/chia-datalayer/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/chia-datalayer/src/lib.rs b/crates/chia-datalayer/src/lib.rs index 7ed47e100..fd4f335c9 100644 --- a/crates/chia-datalayer/src/lib.rs +++ b/crates/chia-datalayer/src/lib.rs @@ -1,3 +1,3 @@ mod merkle; -pub use merkle::{InsertLocation, InternalNode, LeafNode, MerkleBlob, Side}; +pub use merkle::*; From 72bfcbb303f24319b4ad7e1415e926a7cc1f6ae6 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 26 Nov 2024 13:47:09 -0500 Subject: [PATCH 154/181] update datalaer version to match --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/chia-datalayer/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0275df5c5..80b3f6ae6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -372,7 +372,7 @@ dependencies = [ [[package]] name = "chia-datalayer" -version = "0.1.0" +version = "0.16.0" dependencies = [ "chia-protocol", "chia-sha2", diff --git a/Cargo.toml b/Cargo.toml index 499593902..8cca27f39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -107,7 +107,7 @@ chia_streamable_macro = { path = "./crates/chia_streamable_macro", version = "0. chia-bls = { path = "./crates/chia-bls", version = "0.16.0" } chia-client = { path = "./crates/chia-client", version = "0.16.0" } chia-consensus = { path = "./crates/chia-consensus", version = "0.16.0" } -chia-datalayer = { path = "./crates/chia-datalayer", version = "0.1.0" } +chia-datalayer = { path = "./crates/chia-datalayer", version = "0.16.0" } chia-protocol = { path = "./crates/chia-protocol", version = "0.16.0" } chia-ssl = { path = "./crates/chia-ssl", version = "0.11.0" } chia-traits = { path = "./crates/chia-traits", version = "0.15.0" } diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index 2e6085095..851b20d23 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "chia-datalayer" -version = "0.1.0" +version = "0.16.0" edition = "2021" license = "Apache-2.0" description = "DataLayer modules for Chia blockchain" From 4b0ca0069f11a0369e02c26e2880459958a72f02 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Wed, 4 Dec 2024 12:49:34 -0500 Subject: [PATCH 155/181] `.check_integrity().expect()` not `.unwrap()` --- crates/chia-datalayer/src/merkle.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 5eaf8b7e4..b0593b223 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -1432,7 +1432,8 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { #[cfg(any(test, debug_assertions))] impl Drop for MerkleBlob { fn drop(&mut self) { - self.check_integrity().unwrap(); + self.check_integrity() + .expect("integrity check failed while dropping merkle blob"); } } From c6e984460fa4f3b799aacacc92cf03eba0e5640c Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Dec 2024 13:18:41 -0500 Subject: [PATCH 156/181] test iterators --- Cargo.lock | 17 ++ Cargo.toml | 1 + crates/chia-datalayer/Cargo.toml | 1 + crates/chia-datalayer/src/merkle.rs | 251 +++++++++++++++++++++++- crates/chia-datalayer/src/merkle/dot.rs | 30 ++- 5 files changed, 286 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 80b3f6ae6..20289a677 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -380,6 +380,7 @@ dependencies = [ "chia_streamable_macro 0.15.0", "clvm-utils", "clvmr", + "expect-test", "hex", "hex-literal", "num-traits", @@ -916,6 +917,12 @@ dependencies = [ "syn", ] +[[package]] +name = "dissimilar" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59f8e79d1fbf76bdfbde321e902714bf6c49df88a7dda6fc682fc2979226962d" + [[package]] name = "ecdsa" version = "0.16.9" @@ -962,6 +969,16 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "expect-test" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e0be0a561335815e06dab7c62e50353134c796e7a6155402a64bcff66b6a5e0" +dependencies = [ + "dissimilar", + "once_cell", +] + [[package]] name = "fallible-iterator" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index 8cca27f39..77f9efd53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -138,6 +138,7 @@ arbitrary = "1.4.1" rand = "0.8.5" criterion = "0.5.1" rstest = "0.22.0" +expect-test = "1.5.0" tokio = "1.41.1" tokio-tungstenite = "0.21.0" futures-util = "0.3.31" diff --git a/crates/chia-datalayer/Cargo.toml b/crates/chia-datalayer/Cargo.toml index 851b20d23..74969ee0e 100644 --- a/crates/chia-datalayer/Cargo.toml +++ b/crates/chia-datalayer/Cargo.toml @@ -29,6 +29,7 @@ chia-protocol = { workspace = true } [dev-dependencies] clvm-utils = { workspace = true } +expect-test = { workspace = true } hex = { workspace = true } hex-literal = { workspace = true } open = { workspace = true } diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index b0593b223..3c2d2552d 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -337,7 +337,7 @@ impl Block { } fn get_free_indexes_and_keys_values_indexes( - blob: &[u8], + blob: &Vec, ) -> (HashSet, HashMap) { let index_count = blob.len() / BLOCK_SIZE; @@ -758,7 +758,7 @@ impl MerkleBlob { } fn get_min_height_leaf(&self) -> Result { - let block = MerkleBlobBreadthFirstIterator::new(&self.blob) + let (_index, block) = MerkleBlobBreadthFirstIterator::new(&self.blob) .next() .ok_or(Error::UnableToFindALeaf)?; @@ -1301,12 +1301,12 @@ struct MerkleBlobLeftChildFirstIteratorItem { } pub struct MerkleBlobLeftChildFirstIterator<'a> { - blob: &'a [u8], + blob: &'a Vec, deque: VecDeque, } impl<'a> MerkleBlobLeftChildFirstIterator<'a> { - fn new(blob: &'a [u8]) -> Self { + fn new(blob: &'a Vec) -> Self { let mut deque = VecDeque::new(); if blob.len() / BLOCK_SIZE > 0 { deque.push_back(MerkleBlobLeftChildFirstIteratorItem { @@ -1356,12 +1356,12 @@ impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { } pub struct MerkleBlobParentFirstIterator<'a> { - blob: &'a [u8], + blob: &'a Vec, deque: VecDeque, } impl<'a> MerkleBlobParentFirstIterator<'a> { - fn new(blob: &'a [u8]) -> Self { + fn new(blob: &'a Vec) -> Self { let mut deque = VecDeque::new(); if blob.len() / BLOCK_SIZE > 0 { deque.push_back(TreeIndex(0)); @@ -1391,13 +1391,13 @@ impl Iterator for MerkleBlobParentFirstIterator<'_> { } pub struct MerkleBlobBreadthFirstIterator<'a> { - blob: &'a [u8], + blob: &'a Vec, deque: VecDeque, } impl<'a> MerkleBlobBreadthFirstIterator<'a> { #[allow(unused)] - fn new(blob: &'a [u8]) -> Self { + fn new(blob: &'a Vec) -> Self { let mut deque = VecDeque::new(); if blob.len() / BLOCK_SIZE > 0 { deque.push_back(TreeIndex(0)); @@ -1408,7 +1408,7 @@ impl<'a> MerkleBlobBreadthFirstIterator<'a> { } impl Iterator for MerkleBlobBreadthFirstIterator<'_> { - type Item = Block; + type Item = (TreeIndex, Block); fn next(&mut self) -> Option { // left sibling first, parent depth before child depth @@ -1419,7 +1419,7 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { let block = Block::from_bytes(block_bytes).unwrap(); match block.node { - Node::Leaf(..) => return Some(block), + Node::Leaf(..) => return Some((index, block)), Node::Internal(node) => { self.deque.push_back(node.left); self.deque.push_back(node.right); @@ -1443,6 +1443,7 @@ mod dot; mod tests { use super::*; use crate::merkle::dot::DotLines; + use expect_test::{expect, Expect}; use rstest::{fixture, rstest}; use std::time::{Duration, Instant}; @@ -1533,6 +1534,34 @@ mod tests { blob } + #[fixture] + fn traversal_blob(mut small_blob: MerkleBlob) -> MerkleBlob { + small_blob + .insert( + KvId(103), + KvId(204), + &sha256_num(0x1324), + InsertLocation::Leaf { + index: TreeIndex(1), + side: Side::Right, + }, + ) + .unwrap(); + small_blob + .insert( + KvId(307), + KvId(404), + &sha256_num(0x9183), + InsertLocation::Leaf { + index: TreeIndex(3), + side: Side::Right, + }, + ) + .unwrap(); + + small_blob + } + #[rstest] fn test_get_lineage(small_blob: MerkleBlob) { let lineage = small_blob.get_lineage_with_indexes(TreeIndex(2)).unwrap(); @@ -1895,4 +1924,206 @@ mod tests { assert_eq!(after, expected); } + + fn iterator_test_reference(index: TreeIndex, block: &Block) -> (u32, NodeType, i64, i64, Hash) { + match block.node { + Node::Leaf(leaf) => ( + index.0, + block.metadata.node_type, + leaf.key.0, + leaf.value.0, + block.node.hash(), + ), + Node::Internal(internal) => ( + index.0, + block.metadata.node_type, + internal.left.0 as i64, + internal.right.0 as i64, + block.node.hash(), + ), + } + } + + #[rstest] + // expect-test is adding them back + #[allow(clippy::needless_raw_string_hashes)] + #[case::left_child_first( + "left child first", + MerkleBlobLeftChildFirstIterator::new, + expect![[r#" + [ + ( + 1, + Leaf, + 283686952306183, + 1157726452361532951, + d8ddfc94e7201527a6a93ee04aed8c5c122ac38af6dbf6e5f1caefba2597230d, + ), + ( + 3, + Leaf, + 103, + 204, + 2d47301cff01acc863faa5f57e8fbc632114f1dc764772852ed0c29c0f248bd3, + ), + ( + 5, + Leaf, + 307, + 404, + 97148f80dd9289a1b67527c045fd47662d575ccdb594701a56c2255ac84f6113, + ), + ( + 6, + Internal, + 3, + 5, + b946284149e4f4a0e767ef2feb397533fb112bf4d99c887348cec4438e38c1ce, + ), + ( + 4, + Internal, + 1, + 6, + eee0c40977ba1c0e16a467f30f64d9c2579ff25dd01913e33962c3f1db86c2ea, + ), + ( + 2, + Leaf, + 2315169217770759719, + 3472611983179986487, + 0f980325ebe9426fa295f3f69cc38ef8fe6ce8f3b9f083556c0f927e67e56651, + ), + ( + 0, + Internal, + 4, + 2, + 0e4a8b1ecee43f457bbe2b30e94ac2afc0d3a6536f891a2ced5e96ce07fe9932, + ), + ] + "#]], + )] + // expect-test is adding them back + #[allow(clippy::needless_raw_string_hashes)] + #[case::parent_first( + "parent first", + MerkleBlobParentFirstIterator::new, + expect![[r#" + [ + ( + 0, + Internal, + 4, + 2, + 0e4a8b1ecee43f457bbe2b30e94ac2afc0d3a6536f891a2ced5e96ce07fe9932, + ), + ( + 4, + Internal, + 1, + 6, + eee0c40977ba1c0e16a467f30f64d9c2579ff25dd01913e33962c3f1db86c2ea, + ), + ( + 2, + Leaf, + 2315169217770759719, + 3472611983179986487, + 0f980325ebe9426fa295f3f69cc38ef8fe6ce8f3b9f083556c0f927e67e56651, + ), + ( + 1, + Leaf, + 283686952306183, + 1157726452361532951, + d8ddfc94e7201527a6a93ee04aed8c5c122ac38af6dbf6e5f1caefba2597230d, + ), + ( + 6, + Internal, + 3, + 5, + b946284149e4f4a0e767ef2feb397533fb112bf4d99c887348cec4438e38c1ce, + ), + ( + 3, + Leaf, + 103, + 204, + 2d47301cff01acc863faa5f57e8fbc632114f1dc764772852ed0c29c0f248bd3, + ), + ( + 5, + Leaf, + 307, + 404, + 97148f80dd9289a1b67527c045fd47662d575ccdb594701a56c2255ac84f6113, + ), + ] + "#]])] + // expect-test is adding them back + #[allow(clippy::needless_raw_string_hashes)] + #[case::breadth_first( + "breadth first", + MerkleBlobBreadthFirstIterator::new, + expect![[r#" + [ + ( + 2, + Leaf, + 2315169217770759719, + 3472611983179986487, + 0f980325ebe9426fa295f3f69cc38ef8fe6ce8f3b9f083556c0f927e67e56651, + ), + ( + 1, + Leaf, + 283686952306183, + 1157726452361532951, + d8ddfc94e7201527a6a93ee04aed8c5c122ac38af6dbf6e5f1caefba2597230d, + ), + ( + 3, + Leaf, + 103, + 204, + 2d47301cff01acc863faa5f57e8fbc632114f1dc764772852ed0c29c0f248bd3, + ), + ( + 5, + Leaf, + 307, + 404, + 97148f80dd9289a1b67527c045fd47662d575ccdb594701a56c2255ac84f6113, + ), + ] + "#]])] + fn test_iterators<'a, F, T>( + #[case] note: &str, + #[case] iterator_new: F, + #[case] expected: Expect, + #[by_ref] traversal_blob: &'a MerkleBlob, + ) where + F: Fn(&'a Vec) -> T, + T: Iterator, + { + let mut dot_actual = traversal_blob.to_dot(); + dot_actual.set_note(note); + + let mut actual = vec![]; + { + let blob: &Vec = &traversal_blob.blob; + for (index, block) in iterator_new(blob) { + actual.push(iterator_test_reference(index, &block)); + dot_actual.push_traversal(index); + } + } + + traversal_blob.to_dot(); + + open_dot(&mut dot_actual); + + expected.assert_debug_eq(&actual); + } } diff --git a/crates/chia-datalayer/src/merkle/dot.rs b/crates/chia-datalayer/src/merkle/dot.rs index 72b75f201..498d6d623 100644 --- a/crates/chia-datalayer/src/merkle/dot.rs +++ b/crates/chia-datalayer/src/merkle/dot.rs @@ -8,7 +8,9 @@ pub struct DotLines { pub nodes: Vec, pub connections: Vec, pub pair_boxes: Vec, + pub traversal: Vec, pub note: String, + pub last_traversed_index: Option, } impl Default for DotLines { @@ -23,7 +25,9 @@ impl DotLines { nodes: vec![], connections: vec![], pair_boxes: vec![], + traversal: vec![], note: String::new(), + last_traversed_index: None, } } @@ -31,17 +35,34 @@ impl DotLines { self.nodes.append(&mut other.nodes); self.connections.append(&mut other.connections); self.pair_boxes.append(&mut other.pair_boxes); + self.traversal.append(&mut other.traversal); + } + + pub fn push_traversal(&mut self, index: TreeIndex) { + if let Some(last_index) = self.last_traversed_index { + self.traversal.push(format!( + r#"node_{last_index} -> node_{index} [constraint=false; color="red"]"# + )); + } + self.last_traversed_index = Some(index); } pub fn dump(&mut self) -> String { // TODO: consuming itself, secretly let note = &self.note; - let mut result = vec![format!("# {note}"), String::new(), "digraph {".to_string()]; + let mut result = vec![]; + if !note.is_empty() { + result.push(format!("# {note}")); + result.push(String::new()); + } + result.push("digraph {".to_string()); result.append(&mut self.nodes); result.append(&mut self.connections); result.append(&mut self.pair_boxes); + result.append(&mut self.traversal); result.push("}".to_string()); + result.push(String::new()); result.join("\n") } @@ -56,7 +77,7 @@ impl Node { pub fn to_dot(&self, index: TreeIndex) -> DotLines { // TODO: can this be done without introducing a blank line? let node_to_parent = match self.parent() { - Some(parent) => format!("node_{index} -> node_{parent};"), + Some(parent) => format!("node_{index} -> node_{parent} [constraint=false]"), None => String::new(), }; @@ -71,17 +92,18 @@ impl Node { node_to_parent, ], pair_boxes: vec![ - format!("node [shape = box]; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}}"), + format!("subgraph cluster_node_{index}_children {{ style=invis; {{rank = same; node_{left}->node_{right}[style=invis]; rankdir = LR}} }}"), ], note: String::new(), + ..Default::default() }, Node::Leaf (LeafNode{key, value, ..}) => DotLines{ nodes: vec![ format!("node_{index} [shape=box, label=\"{index}\\nvalue: {key}\\nvalue: {value}\"];"), ], connections: vec![node_to_parent], - pair_boxes: vec![], note: String::new(), + ..Default::default() }, } } From 7867000996e0824b2de457528e5cac091e54ab0d Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Dec 2024 13:31:10 -0500 Subject: [PATCH 157/181] remove unneeded clear --- crates/chia-datalayer/src/merkle.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 3c2d2552d..2e98e1ba5 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -459,7 +459,6 @@ impl MerkleBlob { }), }; - self.clear(); self.insert_entry_to_blob(self.extend_index(), &new_leaf_block)?; Ok(()) From be407fd4bd5aa3a4cd7403e8d15ff4df724db6b8 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Dec 2024 13:47:42 -0500 Subject: [PATCH 158/181] basic merkle blob doc comment --- crates/chia-datalayer/src/merkle.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 2e98e1ba5..55334cfa4 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -362,6 +362,13 @@ fn get_free_indexes_and_keys_values_indexes( (free_indexes, key_to_index) } +/// Stores a DataLayer merkle tree in bytes and provides serialization on each access so that only +/// the parts presently in use are stored in active objects. The bytes are grouped as blocks of +/// equal size regardless of being internal vs. external nodes so that block indexes can be used +/// for references to particular nodes and readily converted to byte indexes. The leaf nodes +/// do not hold the DataLayer key and value data but instead an id for each of the key and value +/// such that the code using a merkle blob can store the key and value as they see fit. Each node +/// stores the hash for the merkle aspect of the tree. #[cfg_attr(feature = "py-bindings", pyclass(get_all))] #[derive(Debug)] pub struct MerkleBlob { From 8b000e584f8ee2031a7795652b67d0905fa55796 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Fri, 6 Dec 2024 16:55:52 -0500 Subject: [PATCH 159/181] fuzzing and first fix for corrupt data to `MerkleBlob::new()` --- Cargo.lock | 10 +- Cargo.toml | 1 + crates/chia-datalayer/fuzz/.gitignore | 4 + crates/chia-datalayer/fuzz/Cargo.toml | 21 ++++ .../fuzz/fuzz_targets/merkle_blob_new.rs | 9 ++ crates/chia-datalayer/src/merkle.rs | 104 ++++++++++-------- crates/chia-datalayer/src/merkle/dot.rs | 9 +- 7 files changed, 110 insertions(+), 48 deletions(-) create mode 100644 crates/chia-datalayer/fuzz/.gitignore create mode 100644 crates/chia-datalayer/fuzz/Cargo.toml create mode 100644 crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs diff --git a/Cargo.lock b/Cargo.lock index 20289a677..e844b32b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -392,6 +392,14 @@ dependencies = [ "url", ] +[[package]] +name = "chia-datalayer-fuzz" +version = "0.0.0" +dependencies = [ + "chia-datalayer", + "libfuzzer-sys", +] + [[package]] name = "chia-fuzz" version = "0.16.0" diff --git a/Cargo.toml b/Cargo.toml index 77f9efd53..42caa67f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -118,6 +118,7 @@ clvm-utils = { path = "./crates/clvm-utils", version = "0.16.0" } clvm-derive = { path = "./crates/clvm-derive", version = "0.13.0" } chia-fuzz = { path = "./crates/chia-consensus/fuzz", version = "0.16.0" } chia-bls-fuzz = { path = "./crates/chia-bls/fuzz", version = "0.16.0" } +chia-datalayer-fuzz = { path = "./crates/chia-datalayer/fuzz", version = "0.16.0" } chia-protocol-fuzz = { path = "./crates/chia-protocol/fuzz", version = "0.16.0" } chia-puzzles-fuzz = { path = "./crates/chia-puzzles/fuzz", version = "0.16.0" } clvm-traits-fuzz = { path = "./crates/clvm-traits/fuzz", version = "0.16.0" } diff --git a/crates/chia-datalayer/fuzz/.gitignore b/crates/chia-datalayer/fuzz/.gitignore new file mode 100644 index 000000000..1a45eee77 --- /dev/null +++ b/crates/chia-datalayer/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/crates/chia-datalayer/fuzz/Cargo.toml b/crates/chia-datalayer/fuzz/Cargo.toml new file mode 100644 index 000000000..88aa532f8 --- /dev/null +++ b/crates/chia-datalayer/fuzz/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "chia-datalayer-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.chia-datalayer] +path = ".." + +[[bin]] +name = "merkle_blob_new" +path = "fuzz_targets/merkle_blob_new.rs" +test = false +doc = false +bench = false diff --git a/crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs b/crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs new file mode 100644 index 000000000..3427e6a37 --- /dev/null +++ b/crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs @@ -0,0 +1,9 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +use chia_datalayer::MerkleBlob; + +fuzz_target!(|data: &[u8]| { + let _ = MerkleBlob::new(data.to_vec()); +}); diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 55334cfa4..8ad13b708 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -82,6 +82,9 @@ pub enum Error { #[error("unable to find a leaf")] UnableToFindALeaf, + #[error("error while finding a leaf: {0:?}")] + FailedWhileFindingALeaf(String), + #[error("unknown key: {0:?}")] UnknownKey(KvId), @@ -338,13 +341,14 @@ impl Block { fn get_free_indexes_and_keys_values_indexes( blob: &Vec, -) -> (HashSet, HashMap) { +) -> Result<(HashSet, HashMap), Error> { let index_count = blob.len() / BLOCK_SIZE; let mut seen_indexes: Vec = vec![false; index_count]; let mut key_to_index: HashMap = HashMap::default(); - for (index, block) in MerkleBlobLeftChildFirstIterator::new(blob) { + for item in MerkleBlobLeftChildFirstIterator::new(blob) { + let (index, block) = item?; seen_indexes[index.0 as usize] = true; if let Node::Leaf(leaf) = block.node { @@ -359,7 +363,7 @@ fn get_free_indexes_and_keys_values_indexes( } } - (free_indexes, key_to_index) + Ok((free_indexes, key_to_index)) } /// Stores a DataLayer merkle tree in bytes and provides serialization on each access so that only @@ -385,7 +389,7 @@ impl MerkleBlob { return Err(Error::InvalidBlobLength(remainder)); } - let (free_indexes, key_to_index) = get_free_indexes_and_keys_values_indexes(&blob); + let (free_indexes, key_to_index) = get_free_indexes_and_keys_values_indexes(&blob)?; Ok(Self { blob, @@ -766,7 +770,7 @@ impl MerkleBlob { fn get_min_height_leaf(&self) -> Result { let (_index, block) = MerkleBlobBreadthFirstIterator::new(&self.blob) .next() - .ok_or(Error::UnableToFindALeaf)?; + .ok_or(Error::UnableToFindALeaf)??; Ok(block .node @@ -851,7 +855,8 @@ impl MerkleBlob { let mut internal_count: usize = 0; let mut child_to_parent: HashMap = HashMap::new(); - for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob) { + for item in MerkleBlobParentFirstIterator::new(&self.blob) { + let (index, block) = item?; if let Some(parent) = block.node.parent() { assert_eq!(child_to_parent.remove(&index), Some(parent)); } @@ -1099,12 +1104,14 @@ impl MerkleBlob { // } pub fn calculate_lazy_hashes(&mut self) -> Result<(), Error> { - // OPT: really want a truncated traversal, not filter // OPT: yeah, storing the whole set of blocks via collect is not great - for (index, mut block) in MerkleBlobLeftChildFirstIterator::new(&self.blob) - .filter(|(_, block)| block.metadata.dirty) - .collect::>() - { + for item in MerkleBlobLeftChildFirstIterator::new(&self.blob).collect::>() { + let (index, mut block) = item?; + // OPT: really want a pruned traversal, not filter + if !block.metadata.dirty { + continue; + } + let Node::Internal(ref leaf) = block.node else { panic!("leaves should not be dirty") }; @@ -1123,10 +1130,14 @@ impl MerkleBlob { impl PartialEq for MerkleBlob { fn eq(&self, other: &Self) -> bool { // NOTE: this is checking tree structure equality, not serialized bytes equality - for ((_, self_block), (_, other_block)) in zip( + for item in zip( MerkleBlobLeftChildFirstIterator::new(&self.blob), MerkleBlobLeftChildFirstIterator::new(&other.blob), ) { + let (Ok((_, self_block)), Ok((_, other_block))) = item else { + // TODO: it's an error though, hmm + return false; + }; if (self_block.metadata.dirty || other_block.metadata.dirty) || self_block.node.hash() != other_block.node.hash() { @@ -1242,9 +1253,10 @@ impl MerkleBlob { pub fn py_get_nodes_with_indexes(&self, py: Python<'_>) -> PyResult { let list = pyo3::types::PyList::empty_bound(py); - for (index, block) in MerkleBlobParentFirstIterator::new(&self.blob) { + for item in MerkleBlobParentFirstIterator::new(&self.blob) { use pyo3::conversion::IntoPy; use pyo3::types::PyListMethods; + let (index, block) = item.map_err(|e| PyValueError::new_err(e.to_string()))?; list.append((index.into_py(py), block.node.into_py(py)))?; } @@ -1326,7 +1338,7 @@ impl<'a> MerkleBlobLeftChildFirstIterator<'a> { } impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { - type Item = (TreeIndex, Block); + type Item = Result<(TreeIndex, Block), Error>; fn next(&mut self) -> Option { // left sibling first, children before parents @@ -1334,13 +1346,17 @@ impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { loop { let item = self.deque.pop_front()?; let block_bytes: BlockBytes = self.blob[block_range(item.index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes).unwrap(); + + let block = match Block::from_bytes(block_bytes) { + Ok(block) => block, + Err(e) => return Some(Err(e)), + }; match block.node { - Node::Leaf(..) => return Some((item.index, block)), + Node::Leaf(..) => return Some(Ok((item.index, block))), Node::Internal(ref node) => { if item.visited { - return Some((item.index, block)); + return Some(Ok((item.index, block))); }; self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { @@ -1378,7 +1394,7 @@ impl<'a> MerkleBlobParentFirstIterator<'a> { } impl Iterator for MerkleBlobParentFirstIterator<'_> { - type Item = (TreeIndex, Block); + type Item = Result<(TreeIndex, Block), Error>; fn next(&mut self) -> Option { // left sibling first, parents before children @@ -1392,7 +1408,7 @@ impl Iterator for MerkleBlobParentFirstIterator<'_> { self.deque.push_back(node.right); } - Some((index, block)) + Some(Ok((index, block))) } } @@ -1414,7 +1430,7 @@ impl<'a> MerkleBlobBreadthFirstIterator<'a> { } impl Iterator for MerkleBlobBreadthFirstIterator<'_> { - type Item = (TreeIndex, Block); + type Item = Result<(TreeIndex, Block), Error>; fn next(&mut self) -> Option { // left sibling first, parent depth before child depth @@ -1425,7 +1441,7 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { let block = Block::from_bytes(block_bytes).unwrap(); match block.node { - Node::Leaf(..) => return Some((index, block)), + Node::Leaf(..) => return Some(Ok((index, block))), Node::Internal(node) => { self.deque.push_back(node.left); self.deque.push_back(node.right); @@ -1647,7 +1663,7 @@ mod tests { merkle_blob .insert(key_value_id, key_value_id, &hash, InsertLocation::Auto {}) .unwrap(); - dots.push(merkle_blob.to_dot().dump()); + dots.push(merkle_blob.to_dot().unwrap().dump()); } merkle_blob.check_integrity().unwrap(); @@ -1657,7 +1673,7 @@ mod tests { merkle_blob.delete(*key_value_id).unwrap(); merkle_blob.calculate_lazy_hashes().unwrap(); assert_eq!(merkle_blob, reference_blobs[key_value_id.0 as usize]); - dots.push(merkle_blob.to_dot().dump()); + dots.push(merkle_blob.to_dot().unwrap().dump()); } } @@ -1666,7 +1682,7 @@ mod tests { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); let key_value_id = KvId(1); - open_dot(merkle_blob.to_dot().set_note("empty")); + open_dot(merkle_blob.to_dot().unwrap().set_note("empty")); merkle_blob .insert( key_value_id, @@ -1675,7 +1691,7 @@ mod tests { InsertLocation::Auto {}, ) .unwrap(); - open_dot(merkle_blob.to_dot().set_note("first after")); + open_dot(merkle_blob.to_dot().unwrap().set_note("first after")); assert_eq!(merkle_blob.key_to_index.len(), 1); } @@ -1690,7 +1706,7 @@ mod tests { let mut last_key: KvId = KvId(0); for i in 1..=pre_count { let key = KvId(i as i64); - open_dot(merkle_blob.to_dot().set_note("empty")); + open_dot(merkle_blob.to_dot().unwrap().set_note("empty")); merkle_blob .insert(key, key, &sha256_num(key.0), InsertLocation::Auto {}) .unwrap(); @@ -1698,7 +1714,7 @@ mod tests { } let key_value_id: KvId = KvId((pre_count + 1) as i64); - open_dot(merkle_blob.to_dot().set_note("first after")); + open_dot(merkle_blob.to_dot().unwrap().set_note("first after")); merkle_blob .insert( key_value_id, @@ -1710,7 +1726,7 @@ mod tests { }, ) .unwrap(); - open_dot(merkle_blob.to_dot().set_note("first after")); + open_dot(merkle_blob.to_dot().unwrap().set_note("first after")); let sibling = merkle_blob .get_node(merkle_blob.key_to_index[&last_key]) @@ -1741,7 +1757,7 @@ mod tests { let mut merkle_blob = MerkleBlob::new(vec![]).unwrap(); let key_value_id = KvId(1); - open_dot(merkle_blob.to_dot().set_note("empty")); + open_dot(merkle_blob.to_dot().unwrap().set_note("empty")); merkle_blob .insert( key_value_id, @@ -1750,7 +1766,7 @@ mod tests { InsertLocation::Auto {}, ) .unwrap(); - open_dot(merkle_blob.to_dot().set_note("first after")); + open_dot(merkle_blob.to_dot().unwrap().set_note("first after")); merkle_blob.check_integrity().unwrap(); merkle_blob.delete(key_value_id).unwrap(); @@ -1772,11 +1788,11 @@ mod tests { #[rstest] fn test_get_new_index_with_free_index(mut small_blob: MerkleBlob) { - open_dot(small_blob.to_dot().set_note("initial")); + open_dot(small_blob.to_dot().unwrap().set_note("initial")); let key = KvId(0x0001_0203_0405_0607); let _ = small_blob.key_to_index[&key]; small_blob.delete(key).unwrap(); - open_dot(small_blob.to_dot().set_note("after delete")); + open_dot(small_blob.to_dot().unwrap().set_note("after delete")); let expected = HashSet::from([TreeIndex(1), TreeIndex(2)]); assert_eq!(small_blob.free_indexes, expected); @@ -1814,7 +1830,7 @@ mod tests { let mut blob = small_blob.blob.clone(); let expected_free_index = TreeIndex((blob.len() / BLOCK_SIZE) as u32); blob.extend_from_slice(&[0; BLOCK_SIZE]); - let (free_indexes, _) = get_free_indexes_and_keys_values_indexes(&blob); + let (free_indexes, _) = get_free_indexes_and_keys_values_indexes(&blob).unwrap(); assert_eq!(free_indexes, HashSet::from([expected_free_index])); } @@ -1833,11 +1849,11 @@ mod tests { insert_blob .insert(key, value, &sha256_num(key.0), InsertLocation::Auto {}) .unwrap(); - open_dot(insert_blob.to_dot().set_note("first after")); + open_dot(insert_blob.to_dot().unwrap().set_note("first after")); let mut upsert_blob = MerkleBlob::new(small_blob.blob.clone()).unwrap(); upsert_blob.upsert(key, value, &sha256_num(key.0)).unwrap(); - open_dot(upsert_blob.to_dot().set_note("first after")); + open_dot(upsert_blob.to_dot().unwrap().set_note("first after")); assert_eq!(insert_blob.blob, upsert_blob.blob); } @@ -1856,9 +1872,9 @@ mod tests { MerkleBlobLeftChildFirstIterator::new(&small_blob.blob).collect::>(); assert_eq!(before_blocks.len(), after_blocks.len()); - for ((before_index, before_block), (after_index, after_block)) in - zip(before_blocks, after_blocks) - { + for item in zip(before_blocks, after_blocks) { + let ((before_index, before_block), (after_index, after_block)) = + (item.0.unwrap(), item.1.unwrap()); assert_eq!(before_block.node.parent(), after_block.node.parent()); assert_eq!(before_index, after_index); let before: LeafNode = match before_block.node { @@ -1905,7 +1921,7 @@ mod tests { blob.insert(i, i, &sha256_num(i.0), InsertLocation::Auto {}) .unwrap(); } - open_dot(blob.to_dot().set_note("initial")); + open_dot(blob.to_dot().unwrap().set_note("initial")); let mut batch: Vec<((KvId, KvId), Hash)> = vec![]; @@ -1922,6 +1938,7 @@ mod tests { open_dot( blob.to_dot() + .unwrap() .set_note(&format!("after batch insert of {count} values")), ); @@ -2112,21 +2129,22 @@ mod tests { #[by_ref] traversal_blob: &'a MerkleBlob, ) where F: Fn(&'a Vec) -> T, - T: Iterator, + T: Iterator>, { - let mut dot_actual = traversal_blob.to_dot(); + let mut dot_actual = traversal_blob.to_dot().unwrap(); dot_actual.set_note(note); let mut actual = vec![]; { let blob: &Vec = &traversal_blob.blob; - for (index, block) in iterator_new(blob) { + for item in iterator_new(blob) { + let (index, block) = item.unwrap(); actual.push(iterator_test_reference(index, &block)); dot_actual.push_traversal(index); } } - traversal_blob.to_dot(); + traversal_blob.to_dot().unwrap(); open_dot(&mut dot_actual); diff --git a/crates/chia-datalayer/src/merkle/dot.rs b/crates/chia-datalayer/src/merkle/dot.rs index 498d6d623..9689cc354 100644 --- a/crates/chia-datalayer/src/merkle/dot.rs +++ b/crates/chia-datalayer/src/merkle/dot.rs @@ -1,5 +1,5 @@ use crate::merkle::{ - InternalNode, LeafNode, MerkleBlob, MerkleBlobLeftChildFirstIterator, Node, TreeIndex, + Error, InternalNode, LeafNode, MerkleBlob, MerkleBlobLeftChildFirstIterator, Node, TreeIndex, }; use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use url::Url; @@ -110,13 +110,14 @@ impl Node { } impl MerkleBlob { - pub fn to_dot(&self) -> DotLines { + pub fn to_dot(&self) -> Result { let mut result = DotLines::new(); - for (index, block) in MerkleBlobLeftChildFirstIterator::new(&self.blob) { + for item in MerkleBlobLeftChildFirstIterator::new(&self.blob) { + let (index, block) = item?; result.push(block.node.to_dot(index)); } - result + Ok(result) } } From e324d7e7b45c95da92c9fa39900765048a0ddd34 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 14:55:49 -0500 Subject: [PATCH 160/181] fuzzer followup --- crates/chia-datalayer/src/merkle.rs | 70 ++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 8ad13b708..aefbbfa5f 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -105,6 +105,12 @@ pub enum Error { #[error("index not a child: {0}")] IndexIsNotAChild(TreeIndex), + + #[error("cycle found")] + CycleFound, + + #[error("block index out of bounds: {0}")] + BlockIndexOutOfBounds(TreeIndex), } // assumptions @@ -1313,6 +1319,18 @@ impl MerkleBlob { } } +fn try_get_block(blob: &[u8], index: TreeIndex) -> Result { + // TODO: check limits and return error + let range = block_range(index); + let block_bytes: BlockBytes = blob + .get(range) + .ok_or(Error::BlockIndexOutOfBounds(index))? + .try_into() + .unwrap(); + + Block::from_bytes(block_bytes) +} + struct MerkleBlobLeftChildFirstIteratorItem { visited: bool, index: TreeIndex, @@ -1321,6 +1339,7 @@ struct MerkleBlobLeftChildFirstIteratorItem { pub struct MerkleBlobLeftChildFirstIterator<'a> { blob: &'a Vec, deque: VecDeque, + already_queued: HashSet, } impl<'a> MerkleBlobLeftChildFirstIterator<'a> { @@ -1333,7 +1352,11 @@ impl<'a> MerkleBlobLeftChildFirstIterator<'a> { }); } - Self { blob, deque } + Self { + blob, + deque, + already_queued: HashSet::new(), + } } } @@ -1345,9 +1368,7 @@ impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { loop { let item = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[block_range(item.index)].try_into().unwrap(); - - let block = match Block::from_bytes(block_bytes) { + let block = match try_get_block(self.blob, item.index) { Ok(block) => block, Err(e) => return Some(Err(e)), }; @@ -1359,6 +1380,11 @@ impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { return Some(Ok((item.index, block))); }; + if self.already_queued.contains(&item.index) { + return Some(Err(Error::CycleFound)); + } + self.already_queued.insert(item.index); + self.deque.push_front(MerkleBlobLeftChildFirstIteratorItem { visited: true, index: item.index, @@ -1380,6 +1406,7 @@ impl Iterator for MerkleBlobLeftChildFirstIterator<'_> { pub struct MerkleBlobParentFirstIterator<'a> { blob: &'a Vec, deque: VecDeque, + already_queued: HashSet, } impl<'a> MerkleBlobParentFirstIterator<'a> { @@ -1389,7 +1416,11 @@ impl<'a> MerkleBlobParentFirstIterator<'a> { deque.push_back(TreeIndex(0)); } - Self { blob, deque } + Self { + blob, + deque, + already_queued: HashSet::new(), + } } } @@ -1400,10 +1431,17 @@ impl Iterator for MerkleBlobParentFirstIterator<'_> { // left sibling first, parents before children let index = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes).unwrap(); + let block = match try_get_block(self.blob, index) { + Ok(block) => block, + Err(e) => return Some(Err(e)), + }; if let Node::Internal(ref node) = block.node { + if self.already_queued.contains(&index) { + return Some(Err(Error::CycleFound)); + } + self.already_queued.insert(index); + self.deque.push_back(node.left); self.deque.push_back(node.right); } @@ -1415,6 +1453,7 @@ impl Iterator for MerkleBlobParentFirstIterator<'_> { pub struct MerkleBlobBreadthFirstIterator<'a> { blob: &'a Vec, deque: VecDeque, + already_queued: HashSet, } impl<'a> MerkleBlobBreadthFirstIterator<'a> { @@ -1425,7 +1464,11 @@ impl<'a> MerkleBlobBreadthFirstIterator<'a> { deque.push_back(TreeIndex(0)); } - Self { blob, deque } + Self { + blob, + deque, + already_queued: HashSet::new(), + } } } @@ -1437,12 +1480,19 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { loop { let index = self.deque.pop_front()?; - let block_bytes: BlockBytes = self.blob[block_range(index)].try_into().unwrap(); - let block = Block::from_bytes(block_bytes).unwrap(); + let block = match try_get_block(self.blob, index) { + Ok(block) => block, + Err(e) => return Some(Err(e)), + }; match block.node { Node::Leaf(..) => return Some(Ok((index, block))), Node::Internal(node) => { + if self.already_queued.contains(&index) { + return Some(Err(Error::CycleFound)); + } + self.already_queued.insert(index); + self.deque.push_back(node.left); self.deque.push_back(node.right); } From dc79b1a301ed985316892eda6bc7742c1297e54a Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 16:55:31 -0500 Subject: [PATCH 161/181] some coverage --- crates/chia-datalayer/src/merkle.rs | 56 +++++++++++++++++++++-------- tests/test_datalayer.py | 13 ++++++- 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index aefbbfa5f..4183076d2 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -416,7 +416,7 @@ impl MerkleBlob { value: KvId, hash: &Hash, insert_location: InsertLocation, - ) -> Result<(), Error> { + ) -> Result { if self.key_to_index.contains_key(&key) { return Err(Error::KeyAlreadyPresent); } @@ -434,7 +434,7 @@ impl MerkleBlob { if !self.key_to_index.is_empty() { return Err(Error::UnableToInsertAsRootOfNonEmptyTree); }; - self.insert_first(key, value, hash)?; + self.insert_first(key, value, hash) } InsertLocation::Leaf { index, side } => { let old_leaf = self.get_node(index)?.try_into_leaf()?; @@ -452,17 +452,15 @@ impl MerkleBlob { }; if self.key_to_index.len() == 1 { - self.insert_second(node, &old_leaf, &internal_node_hash, side)?; + self.insert_second(node, &old_leaf, &internal_node_hash, side) } else { - self.insert_third_or_later(node, &old_leaf, index, &internal_node_hash, side)?; + self.insert_third_or_later(node, &old_leaf, index, &internal_node_hash, side) } } } - - Ok(()) } - fn insert_first(&mut self, key: KvId, value: KvId, hash: &Hash) -> Result<(), Error> { + fn insert_first(&mut self, key: KvId, value: KvId, hash: &Hash) -> Result { let new_leaf_block = Block { metadata: NodeMetadata { node_type: NodeType::Leaf, @@ -476,9 +474,10 @@ impl MerkleBlob { }), }; - self.insert_entry_to_blob(self.extend_index(), &new_leaf_block)?; + let index = self.extend_index(); + self.insert_entry_to_blob(index, &new_leaf_block)?; - Ok(()) + Ok(index) } fn insert_second( @@ -487,7 +486,7 @@ impl MerkleBlob { old_leaf: &LeafNode, internal_node_hash: &Hash, side: Side, - ) -> Result<(), Error> { + ) -> Result { self.clear(); let root_index = self.get_new_index(); let left_index = self.get_new_index(); @@ -544,7 +543,7 @@ impl MerkleBlob { self.insert_entry_to_blob(index, &block)?; } - Ok(()) + Ok(nodes[1].0) } fn insert_third_or_later( @@ -554,7 +553,7 @@ impl MerkleBlob { old_leaf_index: TreeIndex, internal_node_hash: &Hash, side: Side, - ) -> Result<(), Error> { + ) -> Result { let new_leaf_index = self.get_new_index(); let new_internal_node_index = self.get_new_index(); @@ -610,7 +609,7 @@ impl MerkleBlob { self.mark_lineage_as_dirty(old_parent_index)?; - Ok(()) + Ok(new_leaf_index) } pub fn batch_insert(&mut self, mut keys_values_hashes: I) -> Result<(), Error> @@ -952,6 +951,7 @@ impl MerkleBlob { } } + // TODO: not really that random fn get_random_insert_location_by_seed( &self, seed_bytes: &[u8], @@ -962,6 +962,7 @@ impl MerkleBlob { return Ok(InsertLocation::AsRoot {}); } + // TODO: zero means left here but right below? let side = if (seed_bytes.last().ok_or(Error::ZeroLengthSeedNotAllowed)? & 1 << 7) == 0 { Side::Left } else { @@ -1214,7 +1215,9 @@ impl MerkleBlob { } }; self.insert(key, value, &hash, insert_location) - .map_err(|e| PyValueError::new_err(e.to_string())) + .map_err(|e| PyValueError::new_err(e.to_string()))?; + + Ok(()) } #[pyo3(name = "delete")] @@ -1667,6 +1670,31 @@ mod tests { ); } + #[test] + fn test_get_random_insert_location_by_seed_with_seed_too_short() { + let mut blob = MerkleBlob::new(vec![]).unwrap(); + let seed = [0xff]; + let layer_count = 8 * seed.len() + 10; + + for n in 0..layer_count { + let key = KvId((n + 100) as i64); + let value = KvId((n + 100) as i64); + let hash = sha256_num(key.0); + let insert_location = blob.get_random_insert_location_by_seed(&seed).unwrap(); + blob.insert(key, value, &hash, insert_location).unwrap(); + } + + let location = blob.get_random_insert_location_by_seed(&seed).unwrap(); + + let InsertLocation::Leaf { index, .. } = location else { + panic!() + }; + let lineage = blob.get_lineage_indexes(index).unwrap(); + + assert_eq!(lineage.len(), layer_count); + assert!(lineage.len() > seed.len() * 8); + } + #[rstest] fn test_just_insert_a_bunch( // just allowing parallelism of testing 100,000 inserts total diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index 62a30c796..658d9fe66 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -26,8 +26,19 @@ def test_just_insert_a_bunch() -> None: import time total_time = 0.0 - for i in range(100000): + for i in range(100_000): start = time.monotonic() merkle_blob.insert(int64(i), int64(i), HASH) end = time.monotonic() total_time += end - start + + +# TODO: make this a real test +def test_checking coverage() -> None: + count = 100 + + merkle_blob = MerkleBlob(blob=bytearray()) + for i in range(count): + merkle_blob.insert(int64(i), int64(i), HASH) + + assert len(merkle_blob.get_nodes_with_indexes()) == count From 6f3e9a1fe6c97973155a56d54d7819f3b450a73d Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 17:07:57 -0500 Subject: [PATCH 162/181] fixup --- tests/test_datalayer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index 658d9fe66..2329822fe 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -34,11 +34,11 @@ def test_just_insert_a_bunch() -> None: # TODO: make this a real test -def test_checking coverage() -> None: +def test_checking_coverage() -> None: count = 100 merkle_blob = MerkleBlob(blob=bytearray()) for i in range(count): - merkle_blob.insert(int64(i), int64(i), HASH) + merkle_blob.insert(int64(i), int64(i), bytes32.zeros) assert len(merkle_blob.get_nodes_with_indexes()) == count From f3630c08e5a0fee31a937a1ff4be524ac2f8a029 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 17:24:23 -0500 Subject: [PATCH 163/181] fixup --- tests/test_datalayer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index 2329822fe..622eb601e 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -1,4 +1,4 @@ -from chia_rs import MerkleBlob +from chia_rs import MerkleBlob, LeafNode from chia_rs.sized_bytes import bytes32 from chia_rs.sized_ints import int64 @@ -41,4 +41,5 @@ def test_checking_coverage() -> None: for i in range(count): merkle_blob.insert(int64(i), int64(i), bytes32.zeros) - assert len(merkle_blob.get_nodes_with_indexes()) == count + leaves = [node for index, node in merkle_blob.get_nodes_with_indexes() if isinstance(node, LeafNode)] + assert len(leaves) == count From 2dbdab6c1cd0eae125c2c268c25e4be6b192f233 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 17:37:48 -0500 Subject: [PATCH 164/181] fixup --- wheel/generate_type_stubs.py | 2 +- wheel/python/chia_rs/chia_rs.pyi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index 739e1aaaa..ba005862a 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -437,7 +437,7 @@ def delete(self, key: int64) -> None: ... def get_raw_node(self, index: uint32) -> Union[InternalNode, LeafNode]: ... def calculate_lazy_hashes(self) -> None: ... def get_lineage_with_indexes(self, index: uint32) -> list[tuple[uint32, Union[InternalNode, LeafNode]]]:... - def get_nodes_with_indexes(self) -> list[Union[InternalNode, LeafNode]]: ... + def get_nodes_with_indexes(self) -> list[tuple[uint32, Union[InternalNode, LeafNode]]]: ... def empty(self) -> bool: ... def get_root_hash(self) -> bytes32: ... def batch_insert(self, keys_values: list[tuple[int64, int64]], hashes: list[bytes32]): ... diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index 15a87912b..119726aaf 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -168,7 +168,7 @@ class MerkleBlob: def get_raw_node(self, index: uint32) -> Union[InternalNode, LeafNode]: ... def calculate_lazy_hashes(self) -> None: ... def get_lineage_with_indexes(self, index: uint32) -> list[tuple[uint32, Union[InternalNode, LeafNode]]]:... - def get_nodes_with_indexes(self) -> list[Union[InternalNode, LeafNode]]: ... + def get_nodes_with_indexes(self) -> list[tuple[uint32, Union[InternalNode, LeafNode]]]: ... def empty(self) -> bool: ... def get_root_hash(self) -> bytes32: ... def batch_insert(self, keys_values: list[tuple[int64, int64]], hashes: list[bytes32]): ... From 02eae49c9f8548c0493f15066027c136cc606b5e Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 17:42:58 -0500 Subject: [PATCH 165/181] black --- tests/test_datalayer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index 622eb601e..d5cf24606 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -41,5 +41,9 @@ def test_checking_coverage() -> None: for i in range(count): merkle_blob.insert(int64(i), int64(i), bytes32.zeros) - leaves = [node for index, node in merkle_blob.get_nodes_with_indexes() if isinstance(node, LeafNode)] + leaves = [ + node + for index, node in merkle_blob.get_nodes_with_indexes() + if isinstance(node, LeafNode) + ] assert len(leaves) == count From 7e7b6bd8b8d6ddd54fa5d4256beb658df3bb5578 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 19:02:03 -0500 Subject: [PATCH 166/181] exercise intopy for kvid --- tests/test_datalayer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index d5cf24606..baa18fb19 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -41,9 +41,9 @@ def test_checking_coverage() -> None: for i in range(count): merkle_blob.insert(int64(i), int64(i), bytes32.zeros) - leaves = [ - node + keys = { + node.key for index, node in merkle_blob.get_nodes_with_indexes() if isinstance(node, LeafNode) - ] - assert len(leaves) == count + } + assert keys == set(range(count)) From 658e98bc57526d3b13bd0a65757e108a2fbb2d6c Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 21:12:35 -0500 Subject: [PATCH 167/181] more fuzzer panics turned to errors --- crates/chia-datalayer/src/merkle.rs | 64 ++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 4183076d2..7fdbccf13 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -91,6 +91,21 @@ pub enum Error { #[error("key not in key to index cache: {0:?}")] IntegrityKeyNotInCache(KvId), + #[error("key to index cache for {0:?} should be {1:?} got: {2:?}")] + IntegrityKeyToIndexCacheIndex(KvId, TreeIndex, TreeIndex), + + #[error("parent and child relationship mismatched: {0:?}")] + IntegrityParentChildMismatch(TreeIndex), + + #[error("found {0:?} leaves but key to index cache length is: {1}")] + IntegrityKeyToIndexCacheLength(usize, usize), + + #[error("unmatched parent -> child references found: {0}")] + IntegrityUnmatchedChildParentRelationships(usize), + + #[error("expected total node count {0:?} found: {1:?}")] + IntegrityTotalNodeCount(TreeIndex, usize), + #[error("zero-length seed bytes not allowed")] ZeroLengthSeedNotAllowed, @@ -397,11 +412,18 @@ impl MerkleBlob { let (free_indexes, key_to_index) = get_free_indexes_and_keys_values_indexes(&blob)?; - Ok(Self { + let self_ = Self { blob, free_indexes, key_to_index, - }) + }; + + // NOTE: not checked at runtime + // TODO: should it be checked at runtime? + #[cfg(fuzzing)] + self_.check_integrity()?; + + Ok(self_) } fn clear(&mut self) { @@ -863,7 +885,9 @@ impl MerkleBlob { for item in MerkleBlobParentFirstIterator::new(&self.blob) { let (index, block) = item?; if let Some(parent) = block.node.parent() { - assert_eq!(child_to_parent.remove(&index), Some(parent)); + if child_to_parent.remove(&index) != Some(parent) { + return Err(Error::IntegrityParentChildMismatch(index)); + } } match block.node { Node::Internal(node) => { @@ -877,11 +901,13 @@ impl MerkleBlob { .key_to_index .get(&node.key) .ok_or(Error::IntegrityKeyNotInCache(node.key))?; - let key = node.key; - assert_eq!( - *cached_index, index, - "key to index cache for {key:?} should be {index:?} got: {cached_index:?}" - ); + if *cached_index != index { + return Err(Error::IntegrityKeyToIndexCacheIndex( + node.key, + index, + *cached_index, + )); + }; assert!( !self.free_indexes.contains(&index), "{}", @@ -892,14 +918,22 @@ impl MerkleBlob { } let key_to_index_cache_length = self.key_to_index.len(); - assert_eq!(leaf_count, key_to_index_cache_length, "found {leaf_count:?} leaves but key to index cache length is: {key_to_index_cache_length:?}"); + if leaf_count != key_to_index_cache_length { + return Err(Error::IntegrityKeyToIndexCacheLength( + leaf_count, + key_to_index_cache_length, + )); + } let total_count = leaf_count + internal_count + self.free_indexes.len(); let extend_index = self.extend_index(); - assert_eq!( - total_count, extend_index.0 as usize, - "expected total node count {extend_index:?} found: {total_count:?}", - ); - assert_eq!(child_to_parent.len(), 0); + if total_count != extend_index.0 as usize { + return Err(Error::IntegrityTotalNodeCount(extend_index, total_count)); + }; + if !child_to_parent.is_empty() { + return Err(Error::IntegrityUnmatchedChildParentRelationships( + child_to_parent.len(), + )); + } Ok(()) } @@ -1504,7 +1538,7 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { } } -#[cfg(any(test, debug_assertions))] +#[cfg(all(not(fuzzing), any(test, debug_assertions)))] impl Drop for MerkleBlob { fn drop(&mut self) { self.check_integrity() From e6d63a55730170f6d4f803d2b308d63d86797ca5 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 22:10:40 -0500 Subject: [PATCH 168/181] more --- crates/chia-datalayer/src/merkle.rs | 27 ++++++++++++++++++++++++++- tests/test_datalayer.py | 7 +++++-- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 7fdbccf13..4e134f9ed 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -872,7 +872,7 @@ impl MerkleBlob { if let Some(parent) = block.node.parent() { self.mark_lineage_as_dirty(parent)?; - } + }; Ok(()) } @@ -2262,4 +2262,29 @@ mod tests { expected.assert_debug_eq(&actual); } + + #[rstest] + fn test_root_insert_location_when_not_empty(mut small_blob: MerkleBlob) { + small_blob + .insert(KvId(0), KvId(0), &sha256_num(0), InsertLocation::AsRoot {}) + .expect_err("tree not empty so inserting to root should fail"); + } + + #[rstest] + fn test_free_index_reused(mut small_blob: MerkleBlob) { + let (key, index) = { + let (key, index) = small_blob.key_to_index.iter().next().unwrap(); + (*key, *index) + }; + let expected_length = small_blob.blob.len(); + assert!(!small_blob.free_indexes.contains(&index)); + small_blob.delete(key).unwrap(); + assert!(small_blob.free_indexes.contains(&index)); + let new_index = small_blob + .insert(KvId(0), KvId(0), &sha256_num(0), InsertLocation::Auto {}) + .unwrap(); + assert_eq!(small_blob.blob.len(), expected_length); + assert_eq!(new_index, index); + assert!(small_blob.free_indexes.is_empty()); + } } diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index baa18fb19..1ceb79bf2 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -1,4 +1,4 @@ -from chia_rs import MerkleBlob, LeafNode +from chia_rs import LeafNode, MerkleBlob, Side from chia_rs.sized_bytes import bytes32 from chia_rs.sized_ints import int64 @@ -39,7 +39,10 @@ def test_checking_coverage() -> None: merkle_blob = MerkleBlob(blob=bytearray()) for i in range(count): - merkle_blob.insert(int64(i), int64(i), bytes32.zeros) + if i % 2 == 0: + merkle_blob.insert(int64(i), int64(i), bytes32.zeros) + else: + merkle_blob.insert(int64(i), int64(i), bytes32.zeros, int64(i - 1), 0) keys = { node.key From df10170f475acfbb51c0f10d6ddb25925fdafdb0 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 22:18:44 -0500 Subject: [PATCH 169/181] fix --- tests/test_datalayer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index 1ceb79bf2..081d1a794 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -1,4 +1,4 @@ -from chia_rs import LeafNode, MerkleBlob, Side +from chia_rs import LeafNode, MerkleBlob from chia_rs.sized_bytes import bytes32 from chia_rs.sized_ints import int64 From 9796bdfaa011ed1ce9dde1245c2fba5fb833ff50 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 22:33:41 -0500 Subject: [PATCH 170/181] ugh --- tests/test_datalayer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index 081d1a794..c6aafe49e 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -1,6 +1,6 @@ from chia_rs import LeafNode, MerkleBlob from chia_rs.sized_bytes import bytes32 -from chia_rs.sized_ints import int64 +from chia_rs.sized_ints import int64, uint8 def test_merkle_blob(): @@ -42,7 +42,7 @@ def test_checking_coverage() -> None: if i % 2 == 0: merkle_blob.insert(int64(i), int64(i), bytes32.zeros) else: - merkle_blob.insert(int64(i), int64(i), bytes32.zeros, int64(i - 1), 0) + merkle_blob.insert(int64(i), int64(i), bytes32.zeros, int64(i - 1), uint8(0)) keys = { node.key From a6a19138d590df45a4f004243b92d80d05a321e9 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Mon, 9 Dec 2024 22:37:35 -0500 Subject: [PATCH 171/181] black --- tests/test_datalayer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_datalayer.py b/tests/test_datalayer.py index c6aafe49e..73723f15b 100644 --- a/tests/test_datalayer.py +++ b/tests/test_datalayer.py @@ -42,7 +42,9 @@ def test_checking_coverage() -> None: if i % 2 == 0: merkle_blob.insert(int64(i), int64(i), bytes32.zeros) else: - merkle_blob.insert(int64(i), int64(i), bytes32.zeros, int64(i - 1), uint8(0)) + merkle_blob.insert( + int64(i), int64(i), bytes32.zeros, int64(i - 1), uint8(0) + ) keys = { node.key From 9166646e3f74d8875cee352d4f8bcf42e3e2e6c4 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Dec 2024 09:02:13 -0500 Subject: [PATCH 172/181] fixup --- crates/chia-datalayer/src/merkle.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 4e134f9ed..64770616c 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -2272,6 +2272,12 @@ mod tests { #[rstest] fn test_free_index_reused(mut small_blob: MerkleBlob) { + // there must be enough nodes to avoid the few-node insertion methods that clear the blob + for n in 0..5 { + small_blob + .insert(KvId(n), KvId(n), &sha256_num(n), InsertLocation::Auto {}) + .unwrap(); + } let (key, index) = { let (key, index) = small_blob.key_to_index.iter().next().unwrap(); (*key, *index) From 71147a733ffda4551c1e971f2b3f7d4a562ebad6 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Dec 2024 09:16:40 -0500 Subject: [PATCH 173/181] fixup --- crates/chia-datalayer/src/merkle.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 64770616c..1005a490d 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -2273,7 +2273,8 @@ mod tests { #[rstest] fn test_free_index_reused(mut small_blob: MerkleBlob) { // there must be enough nodes to avoid the few-node insertion methods that clear the blob - for n in 0..5 { + let count = 5; + for n in 0..count { small_blob .insert(KvId(n), KvId(n), &sha256_num(n), InsertLocation::Auto {}) .unwrap(); @@ -2287,7 +2288,12 @@ mod tests { small_blob.delete(key).unwrap(); assert!(small_blob.free_indexes.contains(&index)); let new_index = small_blob - .insert(KvId(0), KvId(0), &sha256_num(0), InsertLocation::Auto {}) + .insert( + KvId(count), + KvId(count), + &sha256_num(count), + InsertLocation::Auto {}, + ) .unwrap(); assert_eq!(small_blob.blob.len(), expected_length); assert_eq!(new_index, index); From 4c3003c384f430a59dcb8f06c9acd1f4980b6f28 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Dec 2024 10:09:28 -0500 Subject: [PATCH 174/181] fixup --- crates/chia-datalayer/src/merkle.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 1005a490d..0e2a28a8f 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -398,6 +398,7 @@ fn get_free_indexes_and_keys_values_indexes( #[derive(Debug)] pub struct MerkleBlob { blob: Vec, + // TODO: would be nice for this to be deterministic ala a fifo set free_indexes: HashSet, key_to_index: HashMap, } @@ -2287,6 +2288,8 @@ mod tests { assert!(!small_blob.free_indexes.contains(&index)); small_blob.delete(key).unwrap(); assert!(small_blob.free_indexes.contains(&index)); + let free_indexes = small_blob.free_indexes.clone(); + assert_eq!(free_indexes.len(), 2); let new_index = small_blob .insert( KvId(count), @@ -2296,7 +2299,7 @@ mod tests { ) .unwrap(); assert_eq!(small_blob.blob.len(), expected_length); - assert_eq!(new_index, index); + assert!(free_indexes.contains(&new_index)); assert!(small_blob.free_indexes.is_empty()); } } From 384c6e9b4441cad7900ef357c3855913ff4a2fce Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Dec 2024 12:18:17 -0500 Subject: [PATCH 175/181] more focused fuzzing of valid length blobs --- .../fuzz/fuzz_targets/merkle_blob_new.rs | 15 ++++++++++++--- crates/chia-datalayer/src/merkle.rs | 19 ++++++++++--------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs b/crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs index 3427e6a37..902519c35 100644 --- a/crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs +++ b/crates/chia-datalayer/fuzz/fuzz_targets/merkle_blob_new.rs @@ -1,9 +1,18 @@ #![no_main] -use libfuzzer_sys::fuzz_target; +use libfuzzer_sys::{arbitrary::Unstructured, fuzz_target}; -use chia_datalayer::MerkleBlob; +use chia_datalayer::{MerkleBlob, BLOCK_SIZE}; fuzz_target!(|data: &[u8]| { - let _ = MerkleBlob::new(data.to_vec()); + let mut unstructured = Unstructured::new(data); + let block_count = unstructured.int_in_range(0..=1000).unwrap(); + let mut bytes = vec![0u8; block_count * BLOCK_SIZE]; + unstructured.fill_buffer(&mut bytes).unwrap(); + + let Ok(mut blob) = MerkleBlob::new(bytes) else { + return; + }; + blob.check_integrity_on_drop = false; + let _ = blob.check_integrity(); }); diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 0e2a28a8f..32a7beb1e 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -137,7 +137,7 @@ const METADATA_RANGE: Range = 0..METADATA_SIZE; const METADATA_SIZE: usize = 2; // TODO: figure out the real max better than trial and error? const DATA_SIZE: usize = 53; -const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; +pub const BLOCK_SIZE: usize = METADATA_SIZE + DATA_SIZE; type BlockBytes = [u8; BLOCK_SIZE]; type MetadataBytes = [u8; METADATA_SIZE]; type DataBytes = [u8; DATA_SIZE]; @@ -401,6 +401,8 @@ pub struct MerkleBlob { // TODO: would be nice for this to be deterministic ala a fifo set free_indexes: HashSet, key_to_index: HashMap, + // TODO: used by fuzzing, some cleaner way? + pub check_integrity_on_drop: bool, } impl MerkleBlob { @@ -411,19 +413,16 @@ impl MerkleBlob { return Err(Error::InvalidBlobLength(remainder)); } + // TODO: maybe integrate integrity check here if quick enough let (free_indexes, key_to_index) = get_free_indexes_and_keys_values_indexes(&blob)?; let self_ = Self { blob, free_indexes, key_to_index, + check_integrity_on_drop: true, }; - // NOTE: not checked at runtime - // TODO: should it be checked at runtime? - #[cfg(fuzzing)] - self_.check_integrity()?; - Ok(self_) } @@ -1539,11 +1538,13 @@ impl Iterator for MerkleBlobBreadthFirstIterator<'_> { } } -#[cfg(all(not(fuzzing), any(test, debug_assertions)))] +#[cfg(any(test, debug_assertions))] impl Drop for MerkleBlob { fn drop(&mut self) { - self.check_integrity() - .expect("integrity check failed while dropping merkle blob"); + if self.check_integrity_on_drop { + self.check_integrity() + .expect("integrity check failed while dropping merkle blob"); + } } } From 2141f0da5635d2354c2f153c70f698c8c179d34a Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Dec 2024 12:28:03 -0500 Subject: [PATCH 176/181] stub --- wheel/generate_type_stubs.py | 2 ++ wheel/python/chia_rs/chia_rs.pyi | 2 ++ 2 files changed, 4 insertions(+) diff --git a/wheel/generate_type_stubs.py b/wheel/generate_type_stubs.py index ba005862a..539ea4ec5 100644 --- a/wheel/generate_type_stubs.py +++ b/wheel/generate_type_stubs.py @@ -426,6 +426,8 @@ def blob(self) -> bytearray: ... def free_indexes(self) -> set[uint32]: ... @property def key_to_index(self) -> Mapping[int64, uint32]: ... + @property + def check_integrity_on_drop(self) -> bool: ... def __init__( self, diff --git a/wheel/python/chia_rs/chia_rs.pyi b/wheel/python/chia_rs/chia_rs.pyi index 119726aaf..8185b919f 100644 --- a/wheel/python/chia_rs/chia_rs.pyi +++ b/wheel/python/chia_rs/chia_rs.pyi @@ -157,6 +157,8 @@ class MerkleBlob: def free_indexes(self) -> set[uint32]: ... @property def key_to_index(self) -> Mapping[int64, uint32]: ... + @property + def check_integrity_on_drop(self) -> bool: ... def __init__( self, From 6aecf29123b6fcf3d500d5819f73bae7958b4c1b Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Dec 2024 12:32:34 -0500 Subject: [PATCH 177/181] datalayer fuzz version --- crates/chia-datalayer/fuzz/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/chia-datalayer/fuzz/Cargo.toml b/crates/chia-datalayer/fuzz/Cargo.toml index 88aa532f8..fa5cbaaeb 100644 --- a/crates/chia-datalayer/fuzz/Cargo.toml +++ b/crates/chia-datalayer/fuzz/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "chia-datalayer-fuzz" -version = "0.0.0" +version = "0.16.0" publish = false edition = "2021" From 2a0b98f88b42aef7e0c1269ea9dbc877cb55a2d4 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Dec 2024 12:38:21 -0500 Subject: [PATCH 178/181] datalayer fuzz version and some cfg --- Cargo.lock | 2 +- crates/chia-datalayer/src/merkle.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index e844b32b8..4e6e1ecc5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -394,7 +394,7 @@ dependencies = [ [[package]] name = "chia-datalayer-fuzz" -version = "0.0.0" +version = "0.16.0" dependencies = [ "chia-datalayer", "libfuzzer-sys", diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 32a7beb1e..086c095ad 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -402,6 +402,7 @@ pub struct MerkleBlob { free_indexes: HashSet, key_to_index: HashMap, // TODO: used by fuzzing, some cleaner way? + #[cfg(any(test, debug_assertions))] pub check_integrity_on_drop: bool, } @@ -420,6 +421,7 @@ impl MerkleBlob { blob, free_indexes, key_to_index, + #[cfg(any(test, debug_assertions))] check_integrity_on_drop: true, }; From 952843a03f6f34cd64bb1933b06610f8e5af3b03 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Dec 2024 12:52:01 -0500 Subject: [PATCH 179/181] put back the integrity check on drop field --- crates/chia-datalayer/src/merkle.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index 086c095ad..f4af4d932 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -401,8 +401,8 @@ pub struct MerkleBlob { // TODO: would be nice for this to be deterministic ala a fifo set free_indexes: HashSet, key_to_index: HashMap, - // TODO: used by fuzzing, some cleaner way? - #[cfg(any(test, debug_assertions))] + // TODO: used by fuzzing, some cleaner way? making it cfg-dependent is annoying with + // the type stubs pub check_integrity_on_drop: bool, } From 1c88151cd876342eba990c98563796b679b351fa Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Dec 2024 13:05:27 -0500 Subject: [PATCH 180/181] fix --- crates/chia-datalayer/src/merkle.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/chia-datalayer/src/merkle.rs b/crates/chia-datalayer/src/merkle.rs index f4af4d932..03d6a53c1 100644 --- a/crates/chia-datalayer/src/merkle.rs +++ b/crates/chia-datalayer/src/merkle.rs @@ -421,7 +421,6 @@ impl MerkleBlob { blob, free_indexes, key_to_index, - #[cfg(any(test, debug_assertions))] check_integrity_on_drop: true, }; From 05196e48a211e095b50c1ca0db57c997c62cbbe4 Mon Sep 17 00:00:00 2001 From: Kyle Altendorf Date: Tue, 10 Dec 2024 18:21:42 -0500 Subject: [PATCH 181/181] cargo.lock catchup --- Cargo.lock | 278 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index b0959b533..6f6c36ec1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1036,6 +1036,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + [[package]] name = "futures" version = "0.3.30" @@ -1283,6 +1292,145 @@ version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.2.6" @@ -1456,6 +1604,12 @@ version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + [[package]] name = "log" version = "0.4.22" @@ -1722,6 +1876,12 @@ dependencies = [ "base64ct", ] +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + [[package]] name = "pin-project-lite" version = "0.2.14" @@ -2290,6 +2450,12 @@ dependencies = [ "der", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" version = "0.11.1" @@ -2410,6 +2576,16 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -2523,12 +2699,35 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + [[package]] name = "utf-8" version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -2740,6 +2939,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "x509-parser" version = "0.16.0" @@ -2767,6 +2978,30 @@ dependencies = [ "time", ] +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -2788,6 +3023,27 @@ dependencies = [ "syn", ] +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.1" @@ -2808,6 +3064,28 @@ dependencies = [ "syn", ] +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zstd" version = "0.13.2"