Skip to content

Commit

Permalink
feat(compression): Add Fingerprint compression code
Browse files Browse the repository at this point in the history
  • Loading branch information
Holzhaus committed Oct 4, 2024
1 parent a7c4e5e commit 17f09d7
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 126 deletions.
326 changes: 200 additions & 126 deletions chromaprint/src/compression.rs
Original file line number Diff line number Diff line change
@@ -1,158 +1,232 @@
/// Pack N least significant bits from each one value into a bitstream.
pub fn pack<const N: usize>(values: &[u32]) -> Vec<u8> {
let mut buf = vec![];
let mut writer = BitWriter::new(&mut buf);
writer.buffer.reserve((values.len() * N + 7) / 8);
for val in values {
writer.write_bits::<N>(*val as u8);
}
writer.flush();
buf
}
use crate::Configuration;

/// Unpack bitstream of N bit numbers into an array.
pub fn unpack<const N: usize>(bytes: &[u8]) -> Vec<u32> {
let mut buf = vec![];
let mut reader = BitReader::new(bytes);
while let Some(bits) = reader.read_bits::<N>() {
buf.push(bits as u32);
}
buf
}
/// Number of "normal" bits.
const NORMAL_BITS: u8 = 3;
/// Maximum "normal" value above which a value becomes "exceptional".
const MAX_NORMAL_VALUE: u8 = (1 << NORMAL_BITS) - 1;

/// Create a bitmask with `n` least significant bits set to `1`.
const fn mask_n_bits(n: usize) -> usize {
(1 << n) - 1
/// Turns an object (e.g. an `u32`) over an iterator of bits.
trait IntoBitIterator {
/// Converts the item into an an iterator over its bits.
fn into_bit_iter(self) -> impl Iterator<Item = bool>;
}

struct BitWriter<'b> {
buffer: &'b mut Vec<u8>,
current_byte: u8,
/// Number of bits written into `current_byte`.
written_bits: usize,
impl IntoBitIterator for u32 {
fn into_bit_iter(self) -> impl Iterator<Item = bool> {
(0..Self::BITS).map(move |index| ((self >> index) & 1) == 1)
}
}

impl<'b> BitWriter<'b> {
fn new(buffer: &'b mut Vec<u8>) -> Self {
Self {
buffer,
current_byte: 0,
written_bits: 0,
}
pub struct FingerprintCompressor<'a>(&'a Configuration);

impl<'a> FingerprintCompressor<'a> {
/// Compress a sub-fingerprint.
fn compress_subfingerprint(subfingerprint: u32) -> impl Iterator<Item = (u8, Option<u8>)> {
subfingerprint
.into_bit_iter()
.enumerate()
.filter_map(|(bit_index, is_bit_set)| {
is_bit_set.then_some(u8::try_from(bit_index).unwrap())
})
.scan(0, |last_bit_index, bit_index| {
let value = bit_index - *last_bit_index;
let result = if value >= MAX_NORMAL_VALUE {
(MAX_NORMAL_VALUE, Some(value - MAX_NORMAL_VALUE))
} else {
(value, None)
};

*last_bit_index = bit_index;
Some(result)
})
.chain(std::iter::once((0, None)))
}

#[inline]
fn write_bits<const BITS: usize>(&mut self, val: u8) {
assert!(BITS <= 8);
// Mask out bits we don't need.
let val = val & mask_n_bits(BITS) as u8;
if self.written_bits + BITS < 8 {
// We have space for new bits in the current byte so just add them to it.
self.current_byte <<= BITS;
self.current_byte |= val;
self.written_bits += BITS;
} else if self.written_bits + BITS == 8 {
// We have just enough space for new bits to make a single byte.
self.current_byte <<= BITS;
self.current_byte |= val;
self.buffer.push(self.current_byte);
self.current_byte = 0;
self.written_bits = 0;
} else {
// We will overflow some bits...
let overflowing_bits = (self.written_bits + BITS) - 8;
// ... and create a new whole byte from previously saved bits and some of new bits.
let fitting_bits = BITS - overflowing_bits;
self.current_byte <<= fitting_bits;
self.current_byte |= val >> overflowing_bits;
self.buffer.push(self.current_byte);
// Now we just save the remaining bits.
self.current_byte = val & mask_n_bits(overflowing_bits) as u8;
self.written_bits = overflowing_bits;
}
/// Compress the fingerprint.
pub fn compress(&self, fingerprint: &[u32]) -> Vec<u8> {
let size = fingerprint.len();
let (normal_bits, exceptional_bits) = fingerprint
.iter()
.scan(0, |last_subfp, current_subfp| {
let value = current_subfp ^ *last_subfp;
*last_subfp = *current_subfp;
Some(value)
})
.flat_map(Self::compress_subfingerprint)
.fold(
(
Vec::<u8>::with_capacity(size),
Vec::<u8>::with_capacity(size),
),
|(mut normal_bits, mut exceptional_bits), (normal_value, exceptional_value)| {
normal_bits.push(normal_value);
if let Some(exceptional_value) = exceptional_value {
exceptional_bits.push(exceptional_value);
}
(normal_bits, exceptional_bits)
},
);

let header_size = 4;
let normal_size = packed_intn_array_len(normal_bits.len(), 3);
let exceptional_size = packed_intn_array_len(exceptional_bits.len(), 5);
let expected_size = header_size + normal_size + exceptional_size;

#[allow(clippy::cast_possible_truncation)]
let output = [
self.0.id(),
((size >> 16) & 0xFF) as u8,
((size >> 8) & 0xFF) as u8,
(size & 0xFF) as u8,
];

let output = output
.into_iter()
.chain(iter_packed_intn_array::<3>(&normal_bits))
.chain(iter_packed_intn_array::<5>(&exceptional_bits))
.collect::<Vec<u8>>();
debug_assert_eq!(output.len(), expected_size);
output
}
}

fn flush(&mut self) {
if self.written_bits != 0 {
// Finish the current byte by adding some padding.
self.buffer.push(self.current_byte << (8 - self.written_bits as u32));
self.written_bits = 0;
self.current_byte = 0;
}
impl<'a> From<&'a Configuration> for FingerprintCompressor<'a> {
fn from(value: &'a Configuration) -> Self {
Self(value)
}
}

struct BitReader<'b> {
bytes: &'b [u8],
current_byte: u8,
remaining_bits: usize,
/// Calculate the size of a packed Int<N> array.
const fn packed_intn_array_len(array_len: usize, n: usize) -> usize {
(array_len * n + 7) / 8
}

impl<'b> BitReader<'b> {
fn new(bytes: &'b [u8]) -> Self {
Self {
bytes,
current_byte: 0,
remaining_bits: 0,
}
}

#[inline]
fn read_bits<const BITS: usize>(&mut self) -> Option<u8> {
assert!(BITS > 0 && BITS <= 8);

if self.remaining_bits >= BITS {
// Just read bits from the current byte.
let bits = (self.current_byte >> (8 - BITS)) & (mask_n_bits(BITS) as u8);
self.current_byte <<= BITS;
self.remaining_bits -= BITS;
Some(bits)
} else {
// Try read next byte.
let [next_byte, rest @ ..] = self.bytes else {
return None;
};
self.bytes = rest;

let bits_from_next_byte = BITS - self.remaining_bits;
let remaining_bits_from_next_byte = 8 - bits_from_next_byte;
let bits = (self.current_byte >> (8 - BITS)) | (next_byte >> remaining_bits_from_next_byte);
self.current_byte = next_byte << bits_from_next_byte;
self.remaining_bits = remaining_bits_from_next_byte;
Some(bits)
}
}
/// Iterate bytes as packed Int<N> array.
fn iter_packed_intn_array<const N: usize>(array: &[u8]) -> impl Iterator<Item = u8> + '_ {
let mask = (0xFF << (8 - N)) >> (8 - N);
array.chunks(8).flat_map(move |slice| {
let (size, result) = slice.iter().map(|s| s & mask).enumerate().fold(
(0, [0u8; N]),
|(_, mut result), (i, bits)| {
let rightmost_bit_index = i * N;
let leftmost_bit_index = rightmost_bit_index + N - 1;

let right_byte = rightmost_bit_index / 8;
let left_byte = leftmost_bit_index / 8;

result[right_byte] |= bits << (rightmost_bit_index % 8);
if left_byte != right_byte {
result[left_byte] |= bits >> ((8 - (rightmost_bit_index % 8)) % 8);
}

(left_byte + 1, result)
},
);
result.into_iter().take(size)
})
}

#[cfg(test)]
mod tests {
use super::{mask_n_bits, pack, unpack};
use super::*;

const ONE_BYTE: [u8; 1] = [0b1011_1010];
const NINE_BYTES: [u8; 9] = [
0b1010_1010,
0b0011_0011,
0b1100_1100,
0b1100_0111,
0b0101_0101,
0b1100_1100,
0b1010_1010,
0b0000_0000,
0b1111_1111,
];
const SIXTYFOUR_BYTES: [u8; 64] = [
0xA2, 0x87, 0xE3, 0xED, 0xAA, 0xD7, 0xE8, 0x94, 0x53, 0x4E, 0x9B, 0xD5, 0x83, 0x12, 0x05,
0x43, 0x67, 0x7E, 0x0A, 0xAF, 0x2D, 0x85, 0xB4, 0x03, 0xEB, 0x13, 0x8E, 0x47, 0x07, 0xA6,
0x76, 0x5D, 0x43, 0x67, 0x8D, 0x9F, 0xEA, 0xAD, 0x3F, 0x34, 0x86, 0xF4, 0x25, 0xC8, 0xA2,
0xBF, 0xF1, 0x22, 0xB5, 0xA6, 0xB8, 0x4A, 0xED, 0xA2, 0xF5, 0x25, 0xDB, 0x62, 0x70, 0xC2,
0xB7, 0x9C, 0xB1, 0x3C,
];

fn packing_n<const N: usize>() {
let values: Vec<_> = (0..1024 * 1024).collect();
#[test]
fn test_iter_packed_int3_array_single_byte() {
const N: usize = 3;
let packed = iter_packed_intn_array::<N>(&ONE_BYTE).collect::<Vec<u8>>();
assert_eq!(packed.len(), packed_intn_array_len(ONE_BYTE.len(), N));
assert_eq!(&packed, &[0b0000_0010]);
}

let packed = pack::<N>(&values);
let unpacked = unpack::<N>(&packed);
#[test]
fn test_iter_packed_int3_array_some_bytes() {
const N: usize = 3;
let packed = iter_packed_intn_array::<N>(&NINE_BYTES).collect::<Vec<u8>>();
assert_eq!(packed.len(), packed_intn_array_len(NINE_BYTES.len(), N));
assert_eq!(
&packed,
&[0b0001_1010, 0b0101_1111, 0b0000_1010, 0b0000_0111]
);
}

for (a, b) in values.iter().copied().zip(unpacked.iter().copied()) {
assert_eq!(a & mask_n_bits(N) as u32, b);
}
#[test]
fn test_iter_packed_int3_array_many_bytes() {
const N: usize = 3;
let packed = iter_packed_intn_array::<N>(&SIXTYFOUR_BYTES).collect::<Vec<u8>>();
assert_eq!(
packed.len(),
packed_intn_array_len(SIXTYFOUR_BYTES.len(), N)
);
assert_eq!(
&packed,
&[
0xFA, 0xAA, 0x83, 0xF3, 0x3A, 0x75, 0xB7, 0xDE, 0x72, 0x9B, 0x7F, 0xBB, 0x7B, 0xAF,
0x9E, 0x66, 0xA1, 0x47, 0x35, 0x54, 0xB5, 0x13, 0x74, 0x86
],
);
}

#[test]
fn packing_3() {
packing_n::<3>();
fn test_iter_packed_int5_array_many_bytes() {
const N: usize = 5;
let packed = iter_packed_intn_array::<N>(&SIXTYFOUR_BYTES).collect::<Vec<u8>>();
assert_eq!(
packed.len(),
packed_intn_array_len(SIXTYFOUR_BYTES.len(), N)
);
assert_eq!(
&packed,
&[
0xE2, 0x8C, 0xA6, 0x2E, 0xA2, 0xD3, 0xED, 0x3A, 0x64, 0x19, 0xC7, 0xAB, 0xD7, 0x0A,
0x1D, 0x6B, 0xBA, 0x73, 0x8C, 0xED, 0xE3, 0xB4, 0xAF, 0xDA, 0xA7, 0x86, 0x16, 0x24,
0x7E, 0x14, 0xD5, 0x60, 0xD5, 0x44, 0x2D, 0x5B, 0x40, 0x71, 0x79, 0xE4,
],
);
}

#[test]
fn packing_5() {
packing_n::<5>();
fn test_iter_packed_int5_array_single_byte() {
const N: usize = 5;
let packed = iter_packed_intn_array::<N>(&ONE_BYTE).collect::<Vec<u8>>();
assert_eq!(packed.len(), packed_intn_array_len(ONE_BYTE.len(), N));
assert_eq!(&packed, &[0b0001_1010]);
}

#[test]
fn padding() {
let vals = vec![0b11100000u8];
let unpacked = unpack::<3>(&vals);
assert_eq!(unpacked, &[7, 0]);
fn test_iter_packed_int5_array_some_bytes() {
const N: usize = 5;
let packed = iter_packed_intn_array::<N>(&NINE_BYTES).collect::<Vec<u8>>();
assert_eq!(packed.len(), packed_intn_array_len(NINE_BYTES.len(), N));
assert_eq!(
&packed,
&[
0b0110_1010,
0b1011_0010,
0b0101_0011,
0b1001_1001,
0b0000_0010,
0b0001_1111
]
);
}
}
}
1 change: 1 addition & 0 deletions chromaprint/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
pub use audio_processor::ResetError;
pub use fingerprint_matcher::{match_fingerprints, Segment, MatchError};
pub use fingerprinter::{Configuration, Fingerprinter};
pub use compression::FingerprintCompressor;

mod audio_processor;
mod chroma;
Expand Down

0 comments on commit 17f09d7

Please sign in to comment.