From 4e52ee960a2a59323e9376eddb47cc4d72f0edc9 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 16 Jul 2024 16:49:08 +0200 Subject: [PATCH 1/2] add an alternative API --- src/bytewise.rs | 13 +++++++++++-- src/bytewise/iter.rs | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/bytewise.rs b/src/bytewise.rs index 3b300a3..3a089a5 100644 --- a/src/bytewise.rs +++ b/src/bytewise.rs @@ -16,8 +16,7 @@ use crate::utils::FromU32; use crate::{MatchKind, Output}; pub use builder::DoubleArrayAhoCorasickBuilder; use iter::{ - FindIterator, FindOverlappingIterator, FindOverlappingNoSuffixIterator, LestmostFindIterator, - U8SliceIterator, + FindIterator, FindOverlappingIterator, FindOverlappingNoSuffixIterator, LestmostFindIterator, OverlappingStepper, U8SliceIterator }; // The root index position. @@ -287,6 +286,16 @@ impl DoubleArrayAhoCorasick { } } + /// + pub fn overlapping_stepper(&self) -> OverlappingStepper { + OverlappingStepper { + pma: self, + state_id: ROOT_STATE_IDX, + output_pos: None, + pos: 0, + } + } + /// Returns an iterator of overlapping matches in the given haystack iterator. /// /// # Arguments diff --git a/src/bytewise/iter.rs b/src/bytewise/iter.rs index 58cae5d..fa94e27 100644 --- a/src/bytewise/iter.rs +++ b/src/bytewise/iter.rs @@ -84,6 +84,49 @@ where } } +/// In contrast to the iterator APIs, this one requires the caller to feed in bytes +/// and take out matches. +pub struct OverlappingStepper<'a, V> { + pub(crate) pma: &'a DoubleArrayAhoCorasick, + pub(crate) state_id: u32, + pub(crate) pos: usize, + pub(crate) output_pos: Option, +} + +impl<'a, V: Copy> OverlappingStepper<'a, V> { + /// + pub fn consume(&mut self, c: u8) { + // self.state_id is always smaller than self.pma.states.len() because + // self.pma.next_state_id_unchecked() ensures to return such a value. + self.state_id = unsafe { self.pma.next_state_id_unchecked(self.state_id, c) }; + self.output_pos = unsafe { + self.pma + .states + .get_unchecked(usize::from_u32(self.state_id)) + .output_pos() + }; + self.pos += 1; + } + + /// + pub fn next(&mut self) -> Option> { + let output_pos = self.output_pos?; + // output_pos.get() is always smaller than self.pma.outputs.len() because + // Output::parent() ensures to return such a value when it is Some. + let out = unsafe { + self.pma + .outputs + .get_unchecked(usize::from_u32(output_pos.get() - 1)) + }; + self.output_pos = out.parent(); + Some(Match { + length: usize::from_u32(out.length()), + end: self.pos, + value: out.value(), + }) + } +} + /// Iterator created by [`DoubleArrayAhoCorasick::find_overlapping_iter()`]. pub struct FindOverlappingIterator<'a, P, V> { pub(crate) pma: &'a DoubleArrayAhoCorasick, From 22f471532a25d90a320eae0902c759db2b8fe962 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Tue, 16 Jul 2024 17:03:33 +0200 Subject: [PATCH 2/2] Update iter.rs --- src/bytewise/iter.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/bytewise/iter.rs b/src/bytewise/iter.rs index fa94e27..c072a58 100644 --- a/src/bytewise/iter.rs +++ b/src/bytewise/iter.rs @@ -95,6 +95,7 @@ pub struct OverlappingStepper<'a, V> { impl<'a, V: Copy> OverlappingStepper<'a, V> { /// + #[inline(always)] pub fn consume(&mut self, c: u8) { // self.state_id is always smaller than self.pma.states.len() because // self.pma.next_state_id_unchecked() ensures to return such a value. @@ -109,6 +110,7 @@ impl<'a, V: Copy> OverlappingStepper<'a, V> { } /// + #[inline(always)] pub fn next(&mut self) -> Option> { let output_pos = self.output_pos?; // output_pos.get() is always smaller than self.pma.outputs.len() because