Skip to content

Commit

Permalink
perf: microoptimize util::trim_bs_whitespace
Browse files Browse the repository at this point in the history
as its used in a hot loop in `frequency`
  • Loading branch information
jqnatividad committed Jan 30, 2025
1 parent a0c3384 commit 02d4a60
Showing 1 changed file with 24 additions and 10 deletions.
34 changes: 24 additions & 10 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1977,18 +1977,32 @@ pub fn write_json_record<W: std::io::Write>(
}

/// trim leading and trailing whitespace from a byte slice
#[inline(always)]
pub fn trim_bs_whitespace(bytes: &[u8]) -> &[u8] {
#[allow(clippy::unnecessary_lazy_evaluations)]
let start = bytes
.iter()
.position(|&b| !b.is_ascii_whitespace())
.unwrap_or_else(|| bytes.len());
let end = bytes
.iter()
.rposition(|&b| !b.is_ascii_whitespace())
.map_or_else(|| start, |pos| pos + 1);
let mut start = 0;
let mut end = bytes.len();

// safety: use unchecked indexing since we're bounds checking with the while condition
// Find start by scanning forward
while start < end {
let b = unsafe { *bytes.get_unchecked(start) };
if !b.is_ascii_whitespace() {
break;
}
start += 1;
}

// Find end by scanning backward
while end > start {
let b = unsafe { *bytes.get_unchecked(end - 1) };
if !b.is_ascii_whitespace() {
break;
}
end -= 1;
}

&bytes[start..end]
// safety: This slice is guaranteed to be in bounds due to our index calculations
unsafe { bytes.get_unchecked(start..end) }
}

/// get stats records from stats.csv.data.jsonl file, or if its invalid, by running the stats
Expand Down

0 comments on commit 02d4a60

Please sign in to comment.