Skip to content

Commit

Permalink
Use SIMD vector reductions
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamNiederer committed Feb 1, 2018
1 parent ccaa458 commit 64fe73a
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ bench = false
simd-accel = ["faster"]

[dependencies]
faster = { version = "0.4.3", optional = true }
faster = { git = "https://github.com/AdamNiederer/faster", branch="master", optional = true }

[dev-dependencies]
quickcheck = "0.6"
Expand Down
39 changes: 28 additions & 11 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,17 @@ pub fn count(haystack: &[u8], needle: u8) -> usize {
ret += (&haystack[i * u8s::WIDTH * 255..(i + 1) * u8s::WIDTH * 255])
.simd_iter()
.simd_reduce(u8s(0), u8s(needle.overflowing_add(1).0), |acc, v| {
acc + (PackedEq::eq(&v, &u8s(needle)).be_u8s() & u8s(0x01))
}).scalar_reduce(0, |acc, s| acc + (s as usize));
acc + v.eq_mask(u8s(needle)).be_u8s() & u8s(0x01)
}).sum_upcast() as usize;
}
ret + (&haystack[haystack.len() - haystack.len() % (u8s::WIDTH * 255)..])

let final_chunk = (&haystack[haystack.len() - haystack.len() % (u8s::WIDTH * 255)..])
.simd_iter()
.simd_reduce(u8s(0), u8s(needle.overflowing_add(1).0), |acc, v| {
acc + (PackedEq::eq(&v, &u8s(needle)).be_u8s() & u8s(0x01))
}).scalar_reduce(0, |acc, s| acc + (s as usize))
acc + v.eq_mask(u8s(needle)).be_u8s() & u8s(0x01)
}).sum_upcast() as usize;

ret + final_chunk
}
}

Expand Down Expand Up @@ -276,7 +279,18 @@ pub fn num_chars(haystack: &[u8]) -> usize {
num_chars_generic::<usize>(32, haystack)
}

/// f
/// Count the number of UTF-8 encoded unicode codepoints in a slice of bytes, fast
///
/// This function is safe to use on any byte array, valid UTF-8 or not,
/// but the output is only meaningful for well-formed UTF-8.
///
/// # Example
///
/// ```
/// let swordfish = "メカジキ";
/// let char_count = bytecount::num_chars(swordfish.as_bytes());
/// assert_eq!(char_count, 4);
/// ```
#[cfg(feature = "simd-accel")]
pub fn num_chars(haystack: &[u8]) -> usize {
if haystack.len() < 100 {
Expand All @@ -288,14 +302,17 @@ pub fn num_chars(haystack: &[u8]) -> usize {
ret += (&haystack[i * u8s::WIDTH * 255..(i + 1) * u8s::WIDTH * 255])
.simd_iter()
.simd_reduce(u8s(0), u8s(0), |acc, v| {
acc + (PackedEq::eq(&(v & u8s(0xC0)), &u8s(0x80)).be_u8s() & u8s(0x01))
}).scalar_reduce(0, |acc, s| acc + (s as usize));
acc + (v & u8s(0xC0)).eq_mask(u8s(0x80)).be_u8s() & u8s(0x01)
}).sum_upcast() as usize;
}
haystack.len() - ret - (&haystack[haystack.len() - haystack.len() % (u8s::WIDTH * 255)..])

let final_chunk = (&haystack[haystack.len() - haystack.len() % (u8s::WIDTH * 255)..])
.simd_iter()
.simd_reduce(u8s(0), u8s(0), |acc, v| {
acc + (PackedEq::eq(&(v & u8s(0xC0)), &u8s(0x80)).be_u8s() & u8s(0x01))
}).scalar_reduce(0, |acc, s| acc + (s as usize))
acc + (v & u8s(0xC0)).eq_mask(u8s(0x80)).be_u8s() & u8s(0x01)
}).sum_upcast() as usize;

haystack.len() - ret - final_chunk
}
}

Expand Down

0 comments on commit 64fe73a

Please sign in to comment.