From 867a154882427bbc4d44f11522ea87d652d46ea7 Mon Sep 17 00:00:00 2001 From: Jan Holthuis Date: Fri, 4 Oct 2024 20:31:36 +0200 Subject: [PATCH 1/6] feat(fingerprinter): Derive `Debug` and `Clone` for `Configuration` --- chromaprint/src/classifier.rs | 2 +- chromaprint/src/filter.rs | 6 +++--- chromaprint/src/fingerprinter.rs | 1 + chromaprint/src/quantize.rs | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/chromaprint/src/classifier.rs b/chromaprint/src/classifier.rs index 348e578..d80a41c 100644 --- a/chromaprint/src/classifier.rs +++ b/chromaprint/src/classifier.rs @@ -1,7 +1,7 @@ use crate::filter::{Filter, Image}; use crate::quantize::Quantizer; -#[derive(Clone, Copy)] +#[derive(Debug, Clone, Copy)] pub struct Classifier { filter: Filter, quantizer: Quantizer, diff --git a/chromaprint/src/filter.rs b/chromaprint/src/filter.rs index 8f2e95c..f7356b9 100644 --- a/chromaprint/src/filter.rs +++ b/chromaprint/src/filter.rs @@ -1,4 +1,4 @@ -#[derive(Clone, Copy)] +#[derive(Debug, Clone, Copy)] pub struct Filter { kind: FilterKind, y: usize, @@ -6,7 +6,7 @@ pub struct Filter { width: usize, } -#[derive(Clone, Copy)] +#[derive(Debug, Clone, Copy)] pub enum FilterKind { Filter0, Filter1, @@ -290,4 +290,4 @@ mod tests { fn subtract(a: f64, b: f64) -> f64 { return a - b; } -} \ No newline at end of file +} diff --git a/chromaprint/src/fingerprinter.rs b/chromaprint/src/fingerprinter.rs index 4a80242..421a608 100644 --- a/chromaprint/src/fingerprinter.rs +++ b/chromaprint/src/fingerprinter.rs @@ -11,6 +11,7 @@ use crate::fingerprint_calculator::FingerprintCalculator; use crate::quantize::Quantizer; /// Structure containing configuration for a [Fingerprinter]. +#[derive(Debug, Clone)] pub struct Configuration { classifiers: Vec, remove_silence: bool, diff --git a/chromaprint/src/quantize.rs b/chromaprint/src/quantize.rs index d03019e..6bc8a04 100644 --- a/chromaprint/src/quantize.rs +++ b/chromaprint/src/quantize.rs @@ -1,4 +1,4 @@ -#[derive(Clone, Copy)] +#[derive(Debug, Clone, Copy)] pub struct Quantizer { t0: f64, t1: f64, From ca9997f58fb5d3cb07dee38313e4a783d3c2c00a Mon Sep 17 00:00:00 2001 From: Jan Holthuis Date: Fri, 4 Oct 2024 20:41:31 +0200 Subject: [PATCH 2/6] feat(fingerprinter): Add algorithm ID to configuration --- chromaprint/src/fingerprinter.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/chromaprint/src/fingerprinter.rs b/chromaprint/src/fingerprinter.rs index 421a608..c93bff6 100644 --- a/chromaprint/src/fingerprinter.rs +++ b/chromaprint/src/fingerprinter.rs @@ -13,6 +13,7 @@ use crate::quantize::Quantizer; /// Structure containing configuration for a [Fingerprinter]. #[derive(Debug, Clone)] pub struct Configuration { + id: u8, classifiers: Vec, remove_silence: bool, silence_threshold: u32, @@ -27,6 +28,7 @@ impl Configuration { /// Creates a new default configuration. fn new() -> Self { Self { + id: 0xFF, classifiers: Vec::new(), remove_silence: false, silence_threshold: 0, @@ -38,6 +40,14 @@ impl Configuration { } } + /// Adds an ID to the configuration. + /// + /// This ID is used for fingerprint compression. + pub fn with_id(mut self, id: u8) -> Self { + self.id = id; + self + } + /// Adds classifiers to the configuration. pub fn with_classifiers(mut self, classifiers: Vec) -> Self { self.max_filter_width = classifiers.iter() @@ -86,6 +96,7 @@ impl Configuration { pub fn preset_test1() -> Self { Self::new() + .with_id(0) .with_classifiers(CLASSIFIER_TEST1.into()) .with_coefficients(CHROMA_FILTER_COEFFICIENTS.into()) .with_interpolation(false) @@ -95,6 +106,7 @@ impl Configuration { pub fn preset_test2() -> Self { Self::new() + .with_id(1) .with_classifiers(CLASSIFIER_TEST2.into()) .with_coefficients(CHROMA_FILTER_COEFFICIENTS.into()) .with_interpolation(false) @@ -104,6 +116,7 @@ impl Configuration { pub fn preset_test3() -> Self { Self::new() + .with_id(2) .with_classifiers(CLASSIFIER_TEST3.into()) .with_coefficients(CHROMA_FILTER_COEFFICIENTS.into()) .with_interpolation(true) @@ -113,11 +126,13 @@ impl Configuration { pub fn preset_test4() -> Self { Self::new() + .with_id(3) .with_removed_silence(50) } pub fn preset_test5() -> Self { Self::new() + .with_id(4) .with_frame_size(DEFAULT_FRAME_SIZE / 2) .with_frame_overlap(DEFAULT_FRAME_SIZE / 2 - DEFAULT_FRAME_SIZE / 4) } @@ -126,6 +141,11 @@ impl Configuration { self.frame_size - self.frame_overlap } + /// The algorithm ID of this configuration (only used for fingerprint compression). + pub fn id(&self) -> u8 { + self.id + } + /// A duration of a single item from the fingerprint. pub fn item_duration_in_seconds(&self) -> f32 { self.samples_in_item() as f32 / self.sample_rate() as f32 From a7c4e5eee2438e5519f308217d6964e646ed6ddd Mon Sep 17 00:00:00 2001 From: Jan Holthuis Date: Sat, 5 Oct 2024 01:04:05 +0200 Subject: [PATCH 3/6] feat(fingerprinter): Add `delay()` method --- chromaprint/src/fingerprinter.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/chromaprint/src/fingerprinter.rs b/chromaprint/src/fingerprinter.rs index c93bff6..052b7d1 100644 --- a/chromaprint/src/fingerprinter.rs +++ b/chromaprint/src/fingerprinter.rs @@ -150,6 +150,13 @@ impl Configuration { pub fn item_duration_in_seconds(&self) -> f32 { self.samples_in_item() as f32 / self.sample_rate() as f32 } + + /// Get the delay. + pub fn delay(&self) -> usize { + ((self.filter_coefficients.len() - 1) + (self.max_filter_width - 1)) + * self.samples_in_item() + + self.frame_overlap + } } impl Default for Configuration { From 7ba90dcd5c5ef528a4235d51c0496fdaa514b00e Mon Sep 17 00:00:00 2001 From: Jan Holthuis Date: Sat, 5 Oct 2024 01:02:56 +0200 Subject: [PATCH 4/6] feat(compression): Add Fingerprint compression code --- chromaprint/src/compression.rs | 357 +++++++++++++++++++++------------ chromaprint/src/lib.rs | 1 + 2 files changed, 234 insertions(+), 124 deletions(-) diff --git a/chromaprint/src/compression.rs b/chromaprint/src/compression.rs index 9cc3100..06f58d3 100644 --- a/chromaprint/src/compression.rs +++ b/chromaprint/src/compression.rs @@ -1,158 +1,267 @@ -/// Pack N least significant bits from each one value into a bitstream. -pub fn pack(values: &[u32]) -> Vec { - let mut buf = vec![]; - let mut writer = BitWriter::new(&mut buf); - writer.buffer.reserve((values.len() * N + 7) / 8); - for val in values { - writer.write_bits::(*val as u8); - } - writer.flush(); - buf +use crate::Configuration; + +/// Number of "normal" bits. +const NORMAL_BITS: u8 = 3; +/// Maximum "normal" value above which a value becomes "exceptional". +const MAX_NORMAL_VALUE: u8 = (1 << NORMAL_BITS) - 1; + +/// Turns an object (e.g. an `u32`) over an iterator of bits. +trait IntoBitIterator { + /// Converts the item into an an iterator over its bits. + fn into_bit_iter(self) -> impl Iterator; } -/// Unpack bitstream of N bit numbers into an array. -pub fn unpack(bytes: &[u8]) -> Vec { - let mut buf = vec![]; - let mut reader = BitReader::new(bytes); - while let Some(bits) = reader.read_bits::() { - buf.push(bits as u32); +impl IntoBitIterator for u32 { + fn into_bit_iter(self) -> impl Iterator { + (0..Self::BITS).map(move |index| ((self >> index) & 1) == 1) } - buf } -/// Create a bitmask with `n` least significant bits set to `1`. -const fn mask_n_bits(n: usize) -> usize { - (1 << n) - 1 -} +pub struct FingerprintCompressor<'a>(&'a Configuration); -struct BitWriter<'b> { - buffer: &'b mut Vec, - current_byte: u8, - /// Number of bits written into `current_byte`. - written_bits: usize, -} +impl<'a> FingerprintCompressor<'a> { + /// Compress a sub-fingerprint. + fn compress_subfingerprint(subfingerprint: u32) -> impl Iterator)> { + subfingerprint + .into_bit_iter() + .enumerate() + .filter_map(|(bit_index, is_bit_set)| { + is_bit_set.then_some(u8::try_from(bit_index + 1).unwrap()) + }) + .scan(0, |last_bit_index, bit_index| { + let value = bit_index - *last_bit_index; + let result = if value >= MAX_NORMAL_VALUE { + (MAX_NORMAL_VALUE, Some(value - MAX_NORMAL_VALUE)) + } else { + (value, None) + }; -impl<'b> BitWriter<'b> { - fn new(buffer: &'b mut Vec) -> Self { - Self { - buffer, - current_byte: 0, - written_bits: 0, - } + *last_bit_index = bit_index; + Some(result) + }) + .chain(std::iter::once((0, None))) } - #[inline] - fn write_bits(&mut self, val: u8) { - assert!(BITS <= 8); - // Mask out bits we don't need. - let val = val & mask_n_bits(BITS) as u8; - if self.written_bits + BITS < 8 { - // We have space for new bits in the current byte so just add them to it. - self.current_byte <<= BITS; - self.current_byte |= val; - self.written_bits += BITS; - } else if self.written_bits + BITS == 8 { - // We have just enough space for new bits to make a single byte. - self.current_byte <<= BITS; - self.current_byte |= val; - self.buffer.push(self.current_byte); - self.current_byte = 0; - self.written_bits = 0; - } else { - // We will overflow some bits... - let overflowing_bits = (self.written_bits + BITS) - 8; - // ... and create a new whole byte from previously saved bits and some of new bits. - let fitting_bits = BITS - overflowing_bits; - self.current_byte <<= fitting_bits; - self.current_byte |= val >> overflowing_bits; - self.buffer.push(self.current_byte); - // Now we just save the remaining bits. - self.current_byte = val & mask_n_bits(overflowing_bits) as u8; - self.written_bits = overflowing_bits; - } + /// Compress the fingerprint. + pub fn compress(&self, fingerprint: &[u32]) -> Vec { + let size = fingerprint.len(); + let (normal_bits, exceptional_bits) = fingerprint + .iter() + .scan(0, |last_subfp, current_subfp| { + let value = current_subfp ^ *last_subfp; + *last_subfp = *current_subfp; + Some(value) + }) + .flat_map(Self::compress_subfingerprint) + .fold( + ( + Vec::::with_capacity(size), + Vec::::with_capacity(size), + ), + |(mut normal_bits, mut exceptional_bits), (normal_value, exceptional_value)| { + normal_bits.push(normal_value); + if let Some(exceptional_value) = exceptional_value { + exceptional_bits.push(exceptional_value); + } + (normal_bits, exceptional_bits) + }, + ); + + let header_size = 4; + let normal_size = packed_intn_array_len(normal_bits.len(), 3); + let exceptional_size = packed_intn_array_len(exceptional_bits.len(), 5); + let expected_size = header_size + normal_size + exceptional_size; + + #[allow(clippy::cast_possible_truncation)] + let output = [ + self.0.id(), + ((size >> 16) & 0xFF) as u8, + ((size >> 8) & 0xFF) as u8, + (size & 0xFF) as u8, + ]; + + let output = output + .into_iter() + .chain(iter_packed_intn_array::<3>(&normal_bits)) + .chain(iter_packed_intn_array::<5>(&exceptional_bits)) + .collect::>(); + debug_assert_eq!(output.len(), expected_size); + output } +} - fn flush(&mut self) { - if self.written_bits != 0 { - // Finish the current byte by adding some padding. - self.buffer.push(self.current_byte << (8 - self.written_bits as u32)); - self.written_bits = 0; - self.current_byte = 0; - } +impl<'a> From<&'a Configuration> for FingerprintCompressor<'a> { + fn from(value: &'a Configuration) -> Self { + Self(value) } } -struct BitReader<'b> { - bytes: &'b [u8], - current_byte: u8, - remaining_bits: usize, +/// Calculate the size of a packed Int array. +const fn packed_intn_array_len(array_len: usize, n: usize) -> usize { + (array_len * n + 7) / 8 } -impl<'b> BitReader<'b> { - fn new(bytes: &'b [u8]) -> Self { - Self { - bytes, - current_byte: 0, - remaining_bits: 0, - } - } +/// Iterate bytes as packed Int array. +fn iter_packed_intn_array(array: &[u8]) -> impl Iterator + '_ { + let mask = (0xFF << (8 - N)) >> (8 - N); + array.chunks(8).flat_map(move |slice| { + let (size, result) = slice.iter().map(|s| s & mask).enumerate().fold( + (0, [0u8; N]), + |(_, mut result), (i, bits)| { + let rightmost_bit_index = i * N; + let leftmost_bit_index = rightmost_bit_index + N - 1; - #[inline] - fn read_bits(&mut self) -> Option { - assert!(BITS > 0 && BITS <= 8); - - if self.remaining_bits >= BITS { - // Just read bits from the current byte. - let bits = (self.current_byte >> (8 - BITS)) & (mask_n_bits(BITS) as u8); - self.current_byte <<= BITS; - self.remaining_bits -= BITS; - Some(bits) - } else { - // Try read next byte. - let [next_byte, rest @ ..] = self.bytes else { - return None; - }; - self.bytes = rest; - - let bits_from_next_byte = BITS - self.remaining_bits; - let remaining_bits_from_next_byte = 8 - bits_from_next_byte; - let bits = (self.current_byte >> (8 - BITS)) | (next_byte >> remaining_bits_from_next_byte); - self.current_byte = next_byte << bits_from_next_byte; - self.remaining_bits = remaining_bits_from_next_byte; - Some(bits) - } - } + let right_byte = rightmost_bit_index / 8; + let left_byte = leftmost_bit_index / 8; + + result[right_byte] |= bits << (rightmost_bit_index % 8); + if left_byte != right_byte { + result[left_byte] |= bits >> ((8 - (rightmost_bit_index % 8)) % 8); + } + + (left_byte + 1, result) + }, + ); + result.into_iter().take(size) + }) } #[cfg(test)] mod tests { - use super::{mask_n_bits, pack, unpack}; + use super::*; - fn packing_n() { - let values: Vec<_> = (0..1024 * 1024).collect(); + const ONE_BYTE: [u8; 1] = [0b1011_1010]; + const NINE_BYTES: [u8; 9] = [ + 0b1010_1010, + 0b0011_0011, + 0b1100_1100, + 0b1100_0111, + 0b0101_0101, + 0b1100_1100, + 0b1010_1010, + 0b0000_0000, + 0b1111_1111, + ]; + const SIXTYFOUR_BYTES: [u8; 64] = [ + 0xA2, 0x87, 0xE3, 0xED, 0xAA, 0xD7, 0xE8, 0x94, 0x53, 0x4E, 0x9B, 0xD5, 0x83, 0x12, 0x05, + 0x43, 0x67, 0x7E, 0x0A, 0xAF, 0x2D, 0x85, 0xB4, 0x03, 0xEB, 0x13, 0x8E, 0x47, 0x07, 0xA6, + 0x76, 0x5D, 0x43, 0x67, 0x8D, 0x9F, 0xEA, 0xAD, 0x3F, 0x34, 0x86, 0xF4, 0x25, 0xC8, 0xA2, + 0xBF, 0xF1, 0x22, 0xB5, 0xA6, 0xB8, 0x4A, 0xED, 0xA2, 0xF5, 0x25, 0xDB, 0x62, 0x70, 0xC2, + 0xB7, 0x9C, 0xB1, 0x3C, + ]; - let packed = pack::(&values); - let unpacked = unpack::(&packed); + #[test] + fn test_iter_packed_int3_array_single_byte() { + const N: usize = 3; + let packed = iter_packed_intn_array::(&ONE_BYTE).collect::>(); + assert_eq!(packed.len(), packed_intn_array_len(ONE_BYTE.len(), N)); + assert_eq!(&packed, &[0b0000_0010]); + } + + #[test] + fn test_iter_packed_int3_array_some_bytes() { + const N: usize = 3; + let packed = iter_packed_intn_array::(&NINE_BYTES).collect::>(); + assert_eq!(packed.len(), packed_intn_array_len(NINE_BYTES.len(), N)); + assert_eq!( + &packed, + &[0b0001_1010, 0b0101_1111, 0b0000_1010, 0b0000_0111] + ); + } + + #[test] + fn test_iter_packed_int3_array_many_bytes() { + const N: usize = 3; + let packed = iter_packed_intn_array::(&SIXTYFOUR_BYTES).collect::>(); + assert_eq!( + packed.len(), + packed_intn_array_len(SIXTYFOUR_BYTES.len(), N) + ); + assert_eq!( + &packed, + &[ + 0xFA, 0xAA, 0x83, 0xF3, 0x3A, 0x75, 0xB7, 0xDE, 0x72, 0x9B, 0x7F, 0xBB, 0x7B, 0xAF, + 0x9E, 0x66, 0xA1, 0x47, 0x35, 0x54, 0xB5, 0x13, 0x74, 0x86 + ], + ); + } - for (a, b) in values.iter().copied().zip(unpacked.iter().copied()) { - assert_eq!(a & mask_n_bits(N) as u32, b); - } + #[test] + fn test_iter_packed_int5_array_many_bytes() { + const N: usize = 5; + let packed = iter_packed_intn_array::(&SIXTYFOUR_BYTES).collect::>(); + assert_eq!( + packed.len(), + packed_intn_array_len(SIXTYFOUR_BYTES.len(), N) + ); + assert_eq!( + &packed, + &[ + 0xE2, 0x8C, 0xA6, 0x2E, 0xA2, 0xD3, 0xED, 0x3A, 0x64, 0x19, 0xC7, 0xAB, 0xD7, 0x0A, + 0x1D, 0x6B, 0xBA, 0x73, 0x8C, 0xED, 0xE3, 0xB4, 0xAF, 0xDA, 0xA7, 0x86, 0x16, 0x24, + 0x7E, 0x14, 0xD5, 0x60, 0xD5, 0x44, 0x2D, 0x5B, 0x40, 0x71, 0x79, 0xE4, + ], + ); } #[test] - fn packing_3() { - packing_n::<3>(); + fn test_iter_packed_int5_array_single_byte() { + const N: usize = 5; + let packed = iter_packed_intn_array::(&ONE_BYTE).collect::>(); + assert_eq!(packed.len(), packed_intn_array_len(ONE_BYTE.len(), N)); + assert_eq!(&packed, &[0b0001_1010]); } #[test] - fn packing_5() { - packing_n::<5>(); + fn test_iter_packed_int5_array_some_bytes() { + const N: usize = 5; + let packed = iter_packed_intn_array::(&NINE_BYTES).collect::>(); + assert_eq!(packed.len(), packed_intn_array_len(NINE_BYTES.len(), N)); + assert_eq!( + &packed, + &[ + 0b0110_1010, + 0b1011_0010, + 0b0101_0011, + 0b1001_1001, + 0b0000_0010, + 0b0001_1111 + ] + ); } #[test] - fn padding() { - let vals = vec![0b11100000u8]; - let unpacked = unpack::<3>(&vals); - assert_eq!(unpacked, &[7, 0]); + fn test_compression() { + const INPUT: [u32; 32] = [ + 0x0FCAF446, 0xE3519E89, 0xD3494DD6, 0x8F219806, 0x9200D530, 0x06B1D52F, 0xB48CC681, + 0x428991C3, 0x59AFBD6B, 0x6ECFB2E5, 0xE8EB7BC3, 0x99A44270, 0x31FFEC13, 0x4A4D81DA, + 0x53887C82, 0x2BB7BEC2, 0xAB895A65, 0x9D7C0AE4, 0xDA356857, 0xE030F7D8, 0x4D428EEE, + 0x0558E019, 0xC3278998, 0xA1D035E4, 0x582E98E5, 0x44C8B708, 0x2E8BA9E2, 0xCB13BC48, + 0xB169A3D8, 0x861274AF, 0x1213EF1C, 0x1F9F06B8, + ]; + + const OUTPUT: [u8; 220] = [ + 0x01, 0x00, 0x00, 0x20, 0x0A, 0xA9, 0x24, 0xD2, 0x92, 0x24, 0x48, 0x92, 0x45, 0x52, + 0x14, 0x65, 0x8B, 0x12, 0x24, 0x49, 0xA4, 0x4C, 0x61, 0x1E, 0x54, 0x89, 0xA4, 0x50, + 0x61, 0x22, 0x28, 0xCA, 0x94, 0xA9, 0x53, 0x82, 0x24, 0xC9, 0x19, 0x4D, 0x83, 0x12, + 0x29, 0x19, 0x95, 0x84, 0x8B, 0xA0, 0x2A, 0x91, 0xA4, 0x47, 0x49, 0x40, 0x69, 0x11, + 0xB3, 0x45, 0x81, 0x12, 0x26, 0xC9, 0xA3, 0x44, 0x81, 0xB2, 0x6D, 0xD9, 0x98, 0x22, + 0x59, 0x94, 0x25, 0x4B, 0x32, 0x31, 0x41, 0xC2, 0x2C, 0x91, 0x12, 0x45, 0x95, 0x90, + 0x2D, 0x51, 0x94, 0x2D, 0x4A, 0x94, 0x04, 0x8C, 0xA4, 0x24, 0x49, 0xC4, 0x64, 0xC1, + 0xD7, 0x24, 0x49, 0xE2, 0x24, 0x48, 0x32, 0x6D, 0x89, 0x92, 0xE4, 0xC8, 0x2B, 0x49, + 0x49, 0x14, 0x05, 0xC9, 0x22, 0x31, 0xDA, 0x94, 0x10, 0x49, 0xC2, 0x24, 0xC9, 0xA2, + 0x2B, 0x81, 0xA2, 0x6C, 0x49, 0xB6, 0x44, 0x8A, 0x84, 0x24, 0x4A, 0xA2, 0x44, 0x99, + 0xF2, 0x21, 0xCF, 0x14, 0x25, 0x49, 0xB2, 0x30, 0x58, 0x92, 0x30, 0x89, 0x92, 0x28, + 0x89, 0x18, 0xE4, 0x8A, 0xA4, 0x24, 0x49, 0xB2, 0x24, 0x41, 0x14, 0x25, 0x49, 0x22, + 0x66, 0xC9, 0x12, 0x48, 0x4A, 0x94, 0x84, 0xE9, 0xA4, 0x40, 0x92, 0x22, 0x3D, 0x8B, + 0x96, 0xA0, 0x4B, 0x92, 0x54, 0x49, 0xA6, 0x24, 0x48, 0xA2, 0x44, 0x89, 0x94, 0x44, + 0x49, 0x94, 0x28, 0x48, 0x16, 0x25, 0xCA, 0x72, 0x0D, 0x9B, 0x32, 0x25, 0x0B, 0xA3, + 0x00, 0xA1, 0x80, 0x01, 0x06, 0x00, 0x00, 0x04, 0x30, 0x00, + ]; + + let config = Configuration::default(); + let compressor = FingerprintCompressor::from(&config); + let output = compressor.compress(&INPUT); + assert_eq!(output, OUTPUT); } -} \ No newline at end of file +} diff --git a/chromaprint/src/lib.rs b/chromaprint/src/lib.rs index bdf52b5..cb0c9c2 100644 --- a/chromaprint/src/lib.rs +++ b/chromaprint/src/lib.rs @@ -3,6 +3,7 @@ pub use audio_processor::ResetError; pub use fingerprint_matcher::{match_fingerprints, Segment, MatchError}; pub use fingerprinter::{Configuration, Fingerprinter}; +pub use compression::FingerprintCompressor; mod audio_processor; mod chroma; From 3fd5786d3d1b2377a5c2510faa0eb201561dd2b8 Mon Sep 17 00:00:00 2001 From: Jan Holthuis Date: Fri, 4 Oct 2024 20:42:05 +0200 Subject: [PATCH 5/6] feat(fpcalc): Add command line interface using clap --- fpcalc/Cargo.toml | 2 + fpcalc/src/main.rs | 97 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/fpcalc/Cargo.toml b/fpcalc/Cargo.toml index 80980ea..bb6fa6e 100644 --- a/fpcalc/Cargo.toml +++ b/fpcalc/Cargo.toml @@ -6,3 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +clap = { version = "4.5.19", features = ["derive"] } +rusty-chromaprint = {path = "../chromaprint"} diff --git a/fpcalc/src/main.rs b/fpcalc/src/main.rs index e7a11a9..be2c4da 100644 --- a/fpcalc/src/main.rs +++ b/fpcalc/src/main.rs @@ -1,3 +1,98 @@ +use clap::Parser; +use std::path::PathBuf; +use std::fmt; +use rusty_chromaprint::Configuration; + +#[derive(Default, Debug, Clone)] +struct Algorithm(Configuration); + +impl TryFrom<&str> for Algorithm { + type Error = &'static str; + + fn try_from(value: &str) -> Result { + let algorithm_id = value.parse::().map_err(|_| "value must be between an integer between 0 and 4")?; + let configuration = match algorithm_id { + 0 => Configuration::preset_test1(), + 1 => Configuration::preset_test2(), + 2 => Configuration::preset_test3(), + 3 => Configuration::preset_test4(), + 4 => Configuration::preset_test5(), + _ => { return Err("unknown algorithm ID"); }, + }; + debug_assert_eq!(configuration.id(), algorithm_id); + let algorithm = Algorithm(configuration); + Ok(algorithm) + } +} + +impl fmt::Display for Algorithm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.id().fmt(f) + } +} + +/// Generate fingerprints from audio files/streams. +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Set the input format name + #[arg(short, long)] + format: Option, + + /// Set the sample rate of the input audio + #[arg(short, long)] + rate: Option, + + /// Set the number of channels in the input audio + #[arg(short, long)] + channels: Option, + + /// Restrict the duration of the processed input audio + #[arg(short, long, default_value_t = 120)] + length: usize, + + /// Split the input audio into chunks of this duration + #[arg(short = 'C', long)] + chunk: Option, + + /// Set the algorithm method. + #[arg(short, long, value_parser = |s: &str| Algorithm::try_from(s), default_value_t)] + algorithm: Algorithm, + + /// Overlap the chunks slightly to make sure audio on the edges is fingerprinted + #[arg(short, long)] + overlap: bool, + + /// Output UNIX timestamps for chunked results, useful when fingerprinting real-time audio stream + #[arg(short = 'T', long)] + ts: bool, + + /// Output fingerprints in the uncompressed format + #[arg(short = 'R', long)] + raw: bool, + + /// Change the uncompressed format from unsigned integers to signed (for pg_acoustid compatibility) + #[arg(short, long)] + signed: bool, + + /// Print the output in JSON format + #[arg(short, long)] + json: bool, + + /// Print the output in text format + #[arg(short, long)] + text: bool, + + /// Print the just the fingerprint in text format + #[arg(short, long)] + plain: bool, + + /// File to analyze + file: PathBuf, +} + fn main() { - println!("Hello, world!"); + let _args = Args::parse(); + + todo!(); } From 6282e1a431fd5127c19c70614fd639efa85fa1f2 Mon Sep 17 00:00:00 2001 From: Jan Holthuis Date: Sat, 5 Oct 2024 00:41:51 +0200 Subject: [PATCH 6/6] feat(fpcalc): Add preliminary `fpcalc` implementation --- fpcalc/Cargo.toml | 4 + fpcalc/src/main.rs | 387 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 373 insertions(+), 18 deletions(-) diff --git a/fpcalc/Cargo.toml b/fpcalc/Cargo.toml index bb6fa6e..67e0a02 100644 --- a/fpcalc/Cargo.toml +++ b/fpcalc/Cargo.toml @@ -6,5 +6,9 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +anyhow = "1.0" +base64 = "0.22.1" +chrono = "0.4.38" clap = { version = "4.5.19", features = ["derive"] } rusty-chromaprint = {path = "../chromaprint"} +symphonia = { version = "0.5.2", features = ["all"]} diff --git a/fpcalc/src/main.rs b/fpcalc/src/main.rs index be2c4da..802bb0a 100644 --- a/fpcalc/src/main.rs +++ b/fpcalc/src/main.rs @@ -1,23 +1,44 @@ +use base64::prelude::{Engine, BASE64_URL_SAFE_NO_PAD}; +use chrono::Local; use clap::Parser; -use std::path::PathBuf; use std::fmt; -use rusty_chromaprint::Configuration; +use std::path::{Path, PathBuf}; + +use anyhow::Context; +use rusty_chromaprint::{Configuration, FingerprintCompressor, Fingerprinter}; +use symphonia::core::audio::{AudioBufferRef, SampleBuffer}; +use symphonia::core::codecs::{Decoder, DecoderOptions, CODEC_TYPE_NULL}; +use symphonia::core::errors::Error; +use symphonia::core::formats::{FormatOptions, FormatReader}; +use symphonia::core::io::MediaSourceStream; +use symphonia::core::meta::MetadataOptions; +use symphonia::core::probe::Hint; #[derive(Default, Debug, Clone)] struct Algorithm(Configuration); +impl Algorithm { + fn as_config(&self) -> &Configuration { + &self.0 + } +} + impl TryFrom<&str> for Algorithm { type Error = &'static str; fn try_from(value: &str) -> Result { - let algorithm_id = value.parse::().map_err(|_| "value must be between an integer between 0 and 4")?; + let algorithm_id = value + .parse::() + .map_err(|_| "value must be between an integer between 0 and 4")?; let configuration = match algorithm_id { 0 => Configuration::preset_test1(), 1 => Configuration::preset_test2(), 2 => Configuration::preset_test3(), 3 => Configuration::preset_test4(), 4 => Configuration::preset_test5(), - _ => { return Err("unknown algorithm ID"); }, + _ => { + return Err("unknown algorithm ID"); + } }; debug_assert_eq!(configuration.id(), algorithm_id); let algorithm = Algorithm(configuration); @@ -75,24 +96,354 @@ struct Args { #[arg(short, long)] signed: bool, - /// Print the output in JSON format - #[arg(short, long)] - json: bool, - - /// Print the output in text format - #[arg(short, long)] - text: bool, - - /// Print the just the fingerprint in text format - #[arg(short, long)] - plain: bool, + /// Print the output in a certain format + #[arg(short='F', long, value_parser = |s: &str| OutputFormat::try_from(s), default_value = "text")] + output_format: OutputFormat, /// File to analyze file: PathBuf, } -fn main() { - let _args = Args::parse(); +impl Args { + fn max_chunk_duration(&self) -> usize { + self.chunk.unwrap_or(0) + } + + fn to_result_printer<'a>(&'a self) -> ResultPrinter<'a> { + ResultPrinter { + config: self.algorithm.as_config(), + abs_ts: self.ts, + raw: self.raw, + signed: self.signed, + format: self.output_format, + max_chunk_duration: self.max_chunk_duration(), + } + } +} + +#[derive(Debug, Clone, Copy)] +enum OutputFormat { + Text, + Json, + Plain, +} + +impl TryFrom<&str> for OutputFormat { + type Error = &'static str; + + fn try_from(value: &str) -> Result { + match value { + "text" => Ok(OutputFormat::Text), + "json" => Ok(OutputFormat::Json), + "plain" => Ok(OutputFormat::Plain), + _ => Err("invalid result format"), + } + } +} + +impl fmt::Display for OutputFormat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self { + Self::Text => "text".fmt(f), + Self::Json => "json".fmt(f), + Self::Plain => "plain".fmt(f), + } + } +} + +struct AudioReader { + format: Box, + decoder: Box, + track_id: u32, + sample_rate: u32, + channel_count: usize, +} + +impl AudioReader { + fn new(path: &impl AsRef) -> anyhow::Result { + let path = path.as_ref(); + let src = std::fs::File::open(path).context("failed to open file")?; + let mss = MediaSourceStream::new(Box::new(src), Default::default()); + + let mut hint = Hint::new(); + if let Some(ext) = path.extension().and_then(|e| e.to_str()) { + hint.with_extension(ext); + } + + let meta_opts: MetadataOptions = Default::default(); + let fmt_opts: FormatOptions = Default::default(); + + let probed = symphonia::default::get_probe() + .format(&hint, mss, &fmt_opts, &meta_opts) + .context("unsupported format")?; + + let format = probed.format; + + let track = format + .tracks() + .iter() + .find(|t| t.codec_params.codec != CODEC_TYPE_NULL) + .context("no supported audio tracks")?; + + let track_id = track.id; + + let dec_opts: DecoderOptions = Default::default(); + + let decoder = symphonia::default::get_codecs() + .make(&track.codec_params, &dec_opts) + .context("unsupported codec")?; + + let sample_rate = track + .codec_params + .sample_rate + .context("missing sample rate")?; + let channel_count = track + .codec_params + .channels + .context("missing audio channels")? + .count(); + + Ok(Self { + format, + decoder, + track_id, + sample_rate, + channel_count, + }) + } + + fn next_buffer(&mut self) -> Result, Error> { + let packet = loop { + let packet = match self.format.next_packet() { + Ok(packet) => packet, + err => break err, + }; + + if packet.track_id() != self.track_id { + continue; + } + + break Ok(packet); + }; + packet.and_then(|pkt| self.decoder.decode(&pkt)) + } +} + +fn get_current_timestamp() -> f64 { + let now = Local::now(); + let usec = now.timestamp_micros(); + (usec as f64) / 1000000.0 +} - todo!(); +pub fn main() -> anyhow::Result<()> { + let args = Args::parse(); + let result_printer = args.to_result_printer(); + + let mut reader = AudioReader::new(&args.file).context("initializing audio reader")?; + + let config = args.algorithm.as_config(); + let mut printer = Fingerprinter::new(config); + + let channel_count: u32 = reader + .channel_count + .try_into() + .context("converting sample rate")?; + printer + .start(reader.sample_rate, channel_count) + .context("initializing fingerprinter")?; + + let mut sample_buf = None; + + let mut ts: f64 = 0.0; + if args.ts { + ts = get_current_timestamp(); + } + + let sample_rate = usize::try_from(reader.sample_rate).context("invalid sample rate")?; + + let mut stream_size = 0; + let stream_limit = args.length * sample_rate; + + let mut chunk_size = 0; + let chunk_limit = args.max_chunk_duration() * sample_rate; + + let mut extra_chunk_limit = 0; + let mut overlap: f64 = 0.0; + + if chunk_limit > 0 && args.overlap { + extra_chunk_limit = config.delay(); + overlap = (config.delay() as f64) * 1.0 / (sample_rate as f64) / 1000.0; + } + + let mut first_chunk = true; + + loop { + let audio_buf = match reader.next_buffer() { + Ok(buffer) => buffer, + Err(Error::DecodeError(err)) => Err(Error::DecodeError(err))?, + Err(_) => break, + }; + + if sample_buf.is_none() { + let spec = *audio_buf.spec(); + let duration = audio_buf.capacity() as u64; + sample_buf = Some(SampleBuffer::::new(duration, spec)); + } + + if let Some(buf) = &mut sample_buf { + let (stream_done, mut frame_size) = if stream_limit > 0 { + let remaining = stream_limit - stream_size; + let frame_size = audio_buf.frames(); + (frame_size > remaining, frame_size.min(remaining)) + } else { + (false, audio_buf.frames()) + }; + stream_size += frame_size; + + if frame_size == 0 { + if stream_done { + break; + } else { + continue; + } + } + + let first_part_size = frame_size; + let (chunk_done, first_part_size) = if chunk_limit > 0 { + let remaining = chunk_limit + extra_chunk_limit - chunk_size; + (first_part_size > remaining, first_part_size.min(remaining)) + } else { + (false, first_part_size) + }; + + buf.copy_interleaved_ref(audio_buf); + let frame_data = buf.samples(); + printer.consume(&frame_data[..first_part_size * reader.channel_count]); + + chunk_size += first_part_size; + + if chunk_done { + printer.finish(); + + let chunk_duration = (chunk_size - extra_chunk_limit) as f64 * 1.0 + / f64::from(reader.sample_rate) + + overlap; + result_printer.print_result(&printer, first_chunk, ts, chunk_duration); + + if args.ts { + ts = get_current_timestamp(); + } else { + ts += chunk_duration; + } + + if args.overlap { + printer = Fingerprinter::new(config); + ts -= overlap; + } else { + printer + .start(reader.sample_rate, channel_count) + .context("initializing fingerprinter")?; + } + + if first_chunk { + extra_chunk_limit = 0; + first_chunk = false; + } + + chunk_size = 0; + } + + frame_size -= first_part_size; + if frame_size > 0 { + printer.consume( + &frame_data[(first_part_size * reader.channel_count) + ..(frame_size * reader.channel_count)], + ); + } + + chunk_size += frame_size; + + if stream_done { + break; + } + } + } + + printer.finish(); + + if chunk_size > 0 { + let chunk_duration = + (chunk_size - extra_chunk_limit) as f64 * 1.0 / f64::from(reader.sample_rate) + overlap; + result_printer.print_result(&printer, first_chunk, ts, chunk_duration); + } + + Ok(()) +} + +struct ResultPrinter<'a> { + config: &'a Configuration, + abs_ts: bool, + raw: bool, + signed: bool, + format: OutputFormat, + max_chunk_duration: usize, +} + +impl<'a> ResultPrinter<'a> { + fn print_result(&self, printer: &Fingerprinter, first: bool, timestamp: f64, duration: f64) { + let raw_fingerprint = printer.fingerprint(); + let fp = if self.raw { + if self.signed { + // FIXME: Use `u32.case_signed()` once it becomes stable. + raw_fingerprint + .iter() + .map(|x| *x as i32) + .map(|x| x.to_string()) + .collect::>() + .join(",") + } else { + raw_fingerprint + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(",") + } + } else { + let compressed_fingerprint = + FingerprintCompressor::from(self.config).compress(raw_fingerprint); + BASE64_URL_SAFE_NO_PAD.encode(&compressed_fingerprint) + }; + + match self.format { + OutputFormat::Text => { + if !first { + println!(""); + } + + if self.abs_ts { + println!("TIMESTAMP={timestamp:.2}"); + } + println!("DURATION={duration}"); + println!("FINGERPRINT={fp}"); + } + OutputFormat::Json => { + if self.max_chunk_duration != 0 { + if self.raw { + println!("{{\"timestamp\": {timestamp:.2}, \"duration\": {duration:.2}, \"fingerprint\": [{fp}]}}"); + } else { + println!("{{\"timestamp\": {timestamp:.2}, \"duration\": {duration:.2}, \"fingerprint\": \"{fp}\"}}"); + } + } else { + if self.raw { + println!("{{\"duration\": {duration:.2}, \"fingerprint\": [{fp}]}}"); + } else { + println!("{{\"duration\": {duration:.2}, \"fingerprint\": \"{fp}\"}}"); + } + } + } + OutputFormat::Plain => { + println!("{fp}"); + } + } + } }