From 482f491c401b2a0c6079022d742f761761ee8079 Mon Sep 17 00:00:00 2001 From: Szabolcs Berecz Date: Wed, 15 May 2019 11:47:25 +0200 Subject: [PATCH] Implement bitfield iterator (#68) * Add one bitfield test from dat/hypercore * Rnd::choose is deprecated * Fix warnings and clippy lints * Implement bitfield iterator * Modify data_iterate mask to better match usage * Cleanup * Rename Bitfield::length() to len() and add is_empty() --- Cargo.toml | 2 +- src/bitfield/iterator.rs | 146 +++++++++++++++++++++++++++++++------- src/bitfield/masks.rs | 1 + src/bitfield/mod.rs | 43 +++++++++--- src/feed.rs | 4 +- tests/bitfield.rs | 148 +++++++++++++++++++++++++++++++++++++-- tests/feed.rs | 4 +- tests/model.rs | 5 +- tests/regression.rs | 2 +- 9 files changed, 307 insertions(+), 48 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8e2a5ec..d156555 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ blake2-rfc = "0.2.18" byteorder = "1.2.6" ed25519-dalek = "0.8.1" failure = "0.1.2" -flat-tree = "4.0.1" +flat-tree = "4.1" lazy_static = "1.1.0" memory-pager = "0.8.0" merkle-tree-stream = "0.8.0" diff --git a/src/bitfield/iterator.rs b/src/bitfield/iterator.rs index 52519cf..fdedeef 100644 --- a/src/bitfield/iterator.rs +++ b/src/bitfield/iterator.rs @@ -4,23 +4,23 @@ use super::Bitfield; /// Iterate over a bitfield. #[derive(Debug)] -pub structIterator { +pub struct Iterator<'a> { start: usize, end: usize, index_end: usize, - pos: usize, - byte: usize, - bitfield: Bitfield, + pos: Option, + byte: u8, + bitfield: &'a mut Bitfield, } -impl Iterator { +impl<'a> Iterator<'a> { /// Create a new instance. - pub fn new(bitfield: &mut Bitfield) -> Self { + pub fn new(bitfield: &'a mut Bitfield) -> Self { Self { start: 0, end: 0, index_end: 0, - pos: 0, + pos: Some(0), byte: 0, bitfield, } @@ -30,33 +30,129 @@ impl Iterator { pub fn range(&mut self, start: usize, end: usize) { self.start = start; self.end = end; - self.index_end = 2 * (end as f32 / 32).ceil(); + self.index_end = 2 * ((end + 31) / 32); - if self.end > self.bitfield.len() { + if self.end > self.bitfield.length { self.bitfield.expand(self.end); } } - pub fn seek(&mut self, offset: usize) { - offset += self.start; - if offset < self.start { - offset = self.start + /// Seek to `offset` + pub fn seek(&mut self, mut offset: usize) -> &mut Self { + offset += self.start; + // FIXME This is fishy. Offset and start is unsigned, so `offset < self.start` can only + // be true when the previous addition overflows. The overflow would cause a panic, so, + // either the addition should be a wrapping_add, or rather, the original offset should + // be checked to ensure it is less than `self.end - self.start`. + if offset < self.start { + offset = self.start; } - if offset >= self.end { - self.pos = -1 - return; + if offset >= self.end { + self.pos = None; + return self; + } + + let o = offset % 8; + + let pos = offset / 8; + self.pos = Some(pos); + + self.byte = + self.bitfield.data.get_byte(pos) | self.bitfield.masks.data_iterate[o]; + + self + } + + pub fn next(&mut self) -> Option { + let mut pos = self.pos?; + + let mut free = self.bitfield.masks.next_data_0_bit[self.byte as usize]; + + while free == -1 { + pos += 1; + self.byte = self.bitfield.data.get_byte(pos); + free = self.bitfield.masks.next_data_0_bit[self.byte as usize]; + + if free == -1 { + pos = self.skip_ahead(pos)?; + + self.byte = self.bitfield.data.get_byte(pos); + free = self.bitfield.masks.next_data_0_bit[self.byte as usize]; + } + } + self.pos = Some(pos); + + self.byte |= self.bitfield.masks.data_iterate[free as usize + 1]; + + let n = 8 * pos + free as usize; + if n < self.end { + Some(n) + } else { + None + } } - let o = offset & 7; + pub fn skip_ahead(&mut self, start: usize) -> Option { + let bitfield_index = &self.bitfield.index; + let tree_end = self.index_end; + let iter = &mut self.bitfield.iterator; + let o = start & 3; + + iter.seek(2 * (start / 4)); - self.pos = (offset - o) / 8; - let left = self.bitfield.data.get_byte(self.pos); - let right = if o == 0 { - 0 - } else { - self.bitfield.masks.data_iterate_mask[o - 1] - }; + let mut tree_byte = bitfield_index.get_byte(iter.index()) + | self.bitfield.masks.index_iterate[o]; + + while self.bitfield.masks.next_index_0_bit[tree_byte as usize] == -1 { + if iter.is_left() { + iter.next(); + } else { + iter.next(); + iter.parent(); + } + + if right_span(iter) >= tree_end { + while right_span(iter) >= tree_end && is_parent(iter) { + iter.left_child(); + } + if right_span(iter) >= tree_end { + return None; + } + } + + tree_byte = bitfield_index.get_byte(iter.index()); + } + + while iter.factor() > 2 { + if self.bitfield.masks.next_index_0_bit[tree_byte as usize] < 2 { + iter.left_child(); + } else { + iter.right_child(); + } + + tree_byte = bitfield_index.get_byte(iter.index()); + } + + let mut free = self.bitfield.masks.next_index_0_bit[tree_byte as usize]; + if free == -1 { + free = 4; + } + + let next = iter.index() * 2 + free as usize; + + if next <= start { + Some(start + 1) + } else { + Some(next) + } + } +} + +fn right_span(iter: &flat_tree::Iterator) -> usize { + iter.index() + iter.factor() / 2 - 1 +} - self.byte = left | right; +fn is_parent(iter: &flat_tree::Iterator) -> bool { + iter.index() & 1 == 1 } diff --git a/src/bitfield/masks.rs b/src/bitfield/masks.rs index 5564597..36a8c5d 100644 --- a/src/bitfield/masks.rs +++ b/src/bitfield/masks.rs @@ -34,6 +34,7 @@ impl Masks { ]; let data_iterate = vec![ + 0b00_00_00_00, // 0 0b10_00_00_00, // 128 0b11_00_00_00, // 192 0b11_10_00_00, // 224 diff --git a/src/bitfield/mod.rs b/src/bitfield/mod.rs index ca29ba3..1dad19a 100644 --- a/src/bitfield/mod.rs +++ b/src/bitfield/mod.rs @@ -17,6 +17,7 @@ //! We need to make sure the performance impact of this stays well within //! bounds. +mod iterator; mod masks; use self::masks::Masks; @@ -57,6 +58,16 @@ impl Bitfield { } } + /// Get the current length + pub fn len(&self) -> usize { + self.length + } + + /// Returns `true` if the bitfield is empty + pub fn is_empty(&self) -> bool { + self.length == 0 + } + /// Set a value at an index. pub fn set(&mut self, index: usize, value: bool) -> Change { let o = mask_8b(index); @@ -113,17 +124,8 @@ impl Bitfield { let pos = (start - o) / 8; let last = (end - e) / 8; - let left_mask = if o == 0 { - 255 - } else { - 255 - self.masks.data_iterate[o - 1] - }; - - let right_mask = if e == 0 { - 0 - } else { - self.masks.data_iterate[e - 1] - }; + let left_mask = 255 - self.masks.data_iterate[o]; + let right_mask = self.masks.data_iterate[e]; let byte = self.data.get_byte(pos); if pos == last { @@ -226,6 +228,25 @@ impl Bitfield { } } } + + /// Constructs an iterator from start to end + pub fn iterator(&mut self) -> iterator::Iterator<'_> { + let len = self.length; + self.iterator_with_range(0, len) + } + + /// Constructs an iterator from `start` to `end` + pub fn iterator_with_range( + &mut self, + start: usize, + end: usize, + ) -> iterator::Iterator<'_> { + let mut iter = iterator::Iterator::new(self); + iter.range(start, end); + iter.seek(0); + + iter + } } // NOTE: can we move this into `sparse_bitfield`? diff --git a/src/feed.rs b/src/feed.rs index 96fcdff..8a4cf5b 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -535,8 +535,8 @@ where } } Ok(Audit { - valid_blocks: valid_blocks, - invalid_blocks: invalid_blocks, + valid_blocks, + invalid_blocks, }) } diff --git a/tests/bitfield.rs b/tests/bitfield.rs index 817d271..17f9ca0 100644 --- a/tests/bitfield.rs +++ b/tests/bitfield.rs @@ -1,6 +1,8 @@ extern crate hypercore; +extern crate rand; use hypercore::bitfield::{Bitfield, Change::*}; +use rand::Rng; #[test] fn set_and_get() { @@ -11,10 +13,129 @@ fn set_and_get() { assert_eq!(b.set(0, true), Unchanged); assert_eq!(b.get(0), true); - assert_eq!(b.get(1424244), false); - assert_eq!(b.set(1424244, true), Changed); - assert_eq!(b.set(1424244, true), Unchanged); - assert_eq!(b.get(1424244), true); + assert_eq!(b.get(1_424_244), false); + assert_eq!(b.set(1_424_244, true), Changed); + assert_eq!(b.set(1_424_244, true), Unchanged); + assert_eq!(b.get(1_424_244), true); +} + +#[test] +fn set_and_get_tree() { + let mut b = Bitfield::new(); + + { + let tree = &mut b.tree; + + assert_eq!(tree.get(0), false); + assert_eq!(tree.set(0, true), Changed); + assert_eq!(tree.set(0, true), Unchanged); + assert_eq!(tree.get(0), true); + + assert_eq!(tree.get(1_424_244), false); + assert_eq!(tree.set(1_424_244, true), Changed); + assert_eq!(tree.set(1_424_244, true), Unchanged); + assert_eq!(tree.get(1_424_244), true); + } + + assert_eq!(b.get(0), false); + assert_eq!(b.get(1_424_244), false); +} + +#[test] +fn set_and_index() { + let mut b = Bitfield::new(); + + { + let mut iter = b.iterator_with_range(0, 100_000_000); + assert_eq!(iter.next(), Some(0)); + } + + b.set(0, true); + { + let mut iter = b.iterator_with_range(0, 100_000_000); + assert_eq!(iter.seek(0).next(), Some(1)); + } + + b.set(479, true); + { + let mut iter = b.iterator_with_range(0, 100_000_000); + assert_eq!(iter.seek(478).next(), Some(478)); + assert_eq!(iter.next(), Some(480)); + } + + b.set(1, true); + { + let mut iter = b.iterator_with_range(0, 100_000_000); + assert_eq!(iter.seek(0).next(), Some(2)); + } + + b.set(2, true); + { + let mut iter = b.iterator_with_range(0, 100_000_000); + assert_eq!(iter.seek(0).next(), Some(3)); + } + + b.set(3, true); + { + let mut iter = b.iterator_with_range(0, 100_000_000); + assert_eq!(iter.seek(0).next(), Some(4)); + } + + let len = b.len(); + for i in 0..len { + b.set(i, true); + } + { + let mut iter = b.iterator_with_range(0, 100_000_000); + assert_eq!(iter.seek(0).next(), Some(len)); + } + + for i in 0..len { + b.set(i, false); + } + { + let mut iter = b.iterator_with_range(0, 100_000_000); + assert_eq!(iter.seek(0).next(), Some(0)); + } +} + +#[test] +fn set_and_index_random() { + let mut b = Bitfield::new(); + + let mut rng = rand::thread_rng(); + for _ in 0..100 { + assert!(check(&mut b), "index validates"); + set(&mut b, rng.gen_range(0, 2000), rng.gen_range(0, 8)); + } + + assert!(check(&mut b), "index validates"); + + fn check(b: &mut Bitfield) -> bool { + let mut all = vec![true; b.len()]; + + { + let mut iter = b.iterator(); + + while let Some(i) = iter.next() { + all[i] = false; + } + } + + for (i, &v) in all.iter().enumerate() { + if b.get(i) != v { + return false; + } + } + + true + } + + fn set(b: &mut Bitfield, i: usize, n: usize) { + for j in i..i + n { + b.set(j, true); + } + } } #[test] @@ -34,3 +155,22 @@ fn get_total_positive_bits() { assert_eq!(b.total(), 5); assert_eq!(b.total_with_start(7), 1); } + +#[test] +fn bitfield_dedup() { + let mut b = Bitfield::new(); + + for i in 0..32 * 1024 { + b.set(i, true); + } + + for i in 0..64 * 1024 { + b.tree.set(i, true); + } + + assert!(b.get(8 * 1024)); + assert!(b.get(16 * 1024)); + b.set(8 * 1024, false); + assert!(!b.get(8 * 1024)); + assert!(b.get(16 * 1024)); +} diff --git a/tests/feed.rs b/tests/feed.rs index 3e6a04a..0835fa4 100644 --- a/tests/feed.rs +++ b/tests/feed.rs @@ -181,7 +181,7 @@ fn copy_keys( let public = PublicKey::from_bytes(public).unwrap(); let secret = SecretKey::from_bytes(&secret).unwrap(); - return (public, secret); + (public, secret) } _ => panic!(": Could not access secret key"), } @@ -217,7 +217,7 @@ fn audit_bad_data() { .open(datapath) .expect("Unable to open the hypercore's data file!"); hypercore_data - .write("yello".as_bytes()) + .write_all(b"yello") .expect("Unable to corrupt the hypercore data file!"); match feed.audit() { diff --git a/tests/model.rs b/tests/model.rs index 7296526..6090c1d 100644 --- a/tests/model.rs +++ b/tests/model.rs @@ -7,6 +7,7 @@ mod common; use common::create_feed; use quickcheck::{Arbitrary, Gen}; +use rand::seq::SliceRandom; use rand::Rng; use std::u8; @@ -22,7 +23,7 @@ enum Op { impl Arbitrary for Op { fn arbitrary(g: &mut G) -> Self { let choices = [0, 1, 2]; - match g.choose(&choices).expect("Value should exist") { + match choices.choose(g).expect("Value should exist") { 0 => { let index: usize = g.gen_range(0, MAX_FILE_SIZE); Op::Get { index } @@ -66,7 +67,7 @@ quickcheck! { Op::Verify => { let len = insta.len(); if len == 0 { - insta.signature(len).is_err(); + insta.signature(len).unwrap_err(); } else { // Always test index of last entry, which is `len - 1`. let len = len - 1; diff --git a/tests/regression.rs b/tests/regression.rs index c8b5f04..6d563d1 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -12,7 +12,7 @@ use common::create_feed; fn regression_01() { let mut feed = create_feed(50).unwrap(); assert_eq!(feed.len(), 0); - feed.signature(0).is_err(); + feed.signature(0).unwrap_err(); let data = b"some_data"; feed.append(data).unwrap();