From f2cbacdb8987bfc42219a3e014d517190c8294f6 Mon Sep 17 00:00:00 2001 From: Joel Goguen Date: Thu, 25 Jul 2024 12:53:47 -0400 Subject: [PATCH] Allow using plain lyrics (#4) * Allow using plain lyrics Allow the option to use plain lyrics if synced aren't available. They can be fed into a LRC generator for making proper synced lyrics, and for some people plain lyrics is better than no lyrics. Changes the CLI parser to `clap` to make it easier to declare flags and parse the ways someone might pass them. Keeps the existing behaviour as default (`lyricsrs /path/to/music/dir` works and skips plain lyrics) and adds the `--allow-plain` CLI flag to tell `lyricsrs` to write plain lyrics if that's all that's available. * Update src/main.rs Co-authored-by: td * Update main.rs --------- Co-authored-by: td --- Cargo.lock | 127 ++++++++++++++++++++++++++++ Cargo.toml | 4 + src/main.rs | 197 ++++++++++++++++++++++++++++---------------- tests/common/mod.rs | 23 ++---- tests/test.rs | 60 ++++++++++++-- 5 files changed, 316 insertions(+), 95 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 581880f..0077de4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,55 @@ dependencies = [ "memchr", ] +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -101,6 +150,52 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clap" +version = "4.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + [[package]] name = "core-foundation" version = "0.9.4" @@ -295,6 +390,12 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.3.9" @@ -440,6 +541,12 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + [[package]] name = "itoa" version = "1.0.11" @@ -513,11 +620,13 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" name = "lyricsrs" version = "0.1.0" dependencies = [ + "clap", "lofty", "regex", "reqwest", "serde", "serde_json", + "temp-dir", "tokio", "urlencoding", "walkdir", @@ -1033,6 +1142,12 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "subtle" version = "2.6.1" @@ -1077,6 +1192,12 @@ dependencies = [ "libc", ] +[[package]] +name = "temp-dir" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f227968ec00f0e5322f9b8173c7a0cbcff6181a0a5b28e9892491c286277231" + [[package]] name = "tempfile" version = "3.10.1" @@ -1264,6 +1385,12 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index b93ef30..783c4f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ name = "lyricsrs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +clap = { version = "4.5.9", features = ["derive"] } lofty = "0.20.1" regex = "1.10.5" reqwest = { version = "0.12.5", features = ["json", "blocking"] } @@ -17,3 +18,6 @@ serde_json = "1.0" tokio = { version = "1", features = ["full"] } urlencoding = "2.1.3" walkdir = "2.5.0" + +[dev-dependencies] +temp-dir = "0.1.13" diff --git a/src/main.rs b/src/main.rs index 52c65fd..d08cce3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ -use std::env; - +use clap; +use clap::Parser; use lofty::file::AudioFile; use lofty::probe::Probe; use regex::Regex; @@ -18,6 +18,17 @@ use walkdir::WalkDir; use serde::Deserialize; +#[derive(Parser)] +struct CLI { + // Flags + #[arg(long = "allow-plain")] + allow_plain: bool, + + // Positional args + #[arg(required = true)] + music_dir: String, +} + #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] #[allow(dead_code)] @@ -36,15 +47,9 @@ struct Track { #[tokio::main] async fn main() { let start_time = Instant::now(); - let args: Vec = env::args().collect(); + let args = CLI::parse(); - let music_dir = match args.get(1) { - Some(dir) => Path::new(dir), - None => { - println!("Usage: {} ", args[0]); - return; - } - }; + let music_dir = Path::new(&args.music_dir); if !music_dir.is_dir() { eprintln!("Error: '{}' is not a valid directory.", music_dir.display()); @@ -77,7 +82,13 @@ async fn main() { let successful_count = Arc::clone(&successful_count); total_count += 1; task::spawn(async move { - parse_song_path(&entry.path(), &music_dir, successful_count).await; + parse_song_path( + &entry.path(), + &music_dir, + successful_count, + args.allow_plain, + ) + .await; }) }); @@ -101,7 +112,12 @@ async fn main() { println!("Time taken: {:?}", elapsed_time); } -async fn parse_song_path(file_path: &Path, music_dir: &Path, successful_count: Arc) { +async fn parse_song_path( + file_path: &Path, + music_dir: &Path, + successful_count: Arc, + allow_plain_lyrics: bool, +) { if let Some(album_dir) = file_path.parent() { if let Some(artist_dir) = album_dir.parent() { if let Some(music_dirr) = artist_dir.parent() { @@ -112,6 +128,7 @@ async fn parse_song_path(file_path: &Path, music_dir: &Path, successful_count: A album_dir, file_path, successful_count, + allow_plain_lyrics, ) .await; } @@ -178,6 +195,7 @@ async fn exact_search( album_dir: &Path, file_path: &Path, successful_count: Arc, + allow_plain_lyrics: bool, ) { let artist_name = artist_dir .file_name() @@ -231,51 +249,74 @@ async fn exact_search( .expect("[exact_search] parsing body failed"); let track: Result = serde_json::from_str(&json_data); - match track { + let track_lyrics = match track { Ok(track) => { - // Find the first track with non-empty syncedLyrics match &track.synced_lyrics { - Some(lyrics) => { - save_synced_lyrics( - &music_dir, - artist_dir, - album_dir, - &song_name, - lyrics.clone(), - successful_count, - ) - .await; - } + Some(lyrics) => lyrics.clone(), None => { - println!( - "[exact_search] synced lyrics unavilable {} for song {} falling back to fuzzy_search", - json_data, clean_song - ); - fuzzy_search( - music_dir, - artist_dir, - album_dir, - file_path, - successful_count, - ) - .await; + match &track.plain_lyrics { + Some(lyrics) => { + match allow_plain_lyrics { + true => lyrics.clone(), + false => { + println!( + "[exact_search] synced lyrics unavailable for song {}, plain lyrics available but not allowed", + clean_song + ); + // Yes, in theory having no synced lyrics with plain lyrics + // available would mean there's no point in searching again. + // But, sometimes lrclib.net returns one song for the exact + // match that doesn't have synced lyrics when another will. So + // we fall through to the fuzzy search just in case that's what + // happened here. + String::new() + } + } + } + None => { + println!( + "[exact_search] no lyrics available {} for song {}", + json_data, clean_song + ); + String::new() + } + } } } } Err(_) => { println!( - "[exact_search] could not parse track response {} for song {} falling back to fuzzy_search", + "[exact_search] could not parse track response {} for song {}", json_data, clean_song ); - fuzzy_search( - music_dir, - artist_dir, - album_dir, - file_path, - successful_count, - ) - .await; + String::new() } + }; + + if track_lyrics.is_empty() { + println!( + "[exact_search] falling back to fuzzy search for song {}", + clean_song + ); + fuzzy_search( + music_dir, + artist_dir, + album_dir, + file_path, + successful_count, + allow_plain_lyrics, + ) + .await; + } else { + save_synced_lyrics( + &music_dir, + artist_dir, + album_dir, + &song_name, + track_lyrics, + successful_count, + ) + .await; } } @@ -285,6 +326,7 @@ async fn fuzzy_search( album_dir: &Path, file_path: &Path, successful_count: Arc, + allow_plain_lyrics: bool, ) { let artist_name = artist_dir .file_name() @@ -330,40 +372,51 @@ async fn fuzzy_search( .expect("[fuzzy_search] parsing body failed"); let tracks: Result, serde_json::Error> = serde_json::from_str(&json_data); - match tracks { + let track_lyrics = match tracks { Ok(tracks) => { - if let Some(track) = tracks.iter().find(|&t| t.synced_lyrics.is_some()) { - match &track.synced_lyrics { - Some(lyrics) => { - save_synced_lyrics( - &music_dir, - artist_dir, - album_dir, - &song_name, - lyrics.clone(), - successful_count, - ) - .await; - } - None => { - println!( - "[fuzzy_search] could not parse synced lyrics for song {}", - clean_song - ); + let synced_track = tracks.iter().find(|&t| t.synced_lyrics.is_some()); + match synced_track { + Some(track) => track.synced_lyrics.clone(), + None => { + let plain_track = tracks.iter().find(|&t| t.plain_lyrics.is_some()); + match plain_track { + Some(track) => match allow_plain_lyrics { + true => track.plain_lyrics.clone(), + false => { + println!( + "[fuzzy search] synced lyrics unavailable {} for song {}, plain lyrics available but not allowed", + json_data, clean_song + ); + return; + } + }, + None => { + println!( + "[fuzzy_search] no lyrics available {} for song {}", + json_data, clean_song + ); + return; + } } } - } else { - println!( - "[fuzzy_search] could not find synced lyrics for song {}", - clean_song - ); } } Err(_) => { println!( - "[fuzzy_search] failed to parse json {} for {}", + "[fuzzy_search] could not parse track response {} for song {}", json_data, clean_song ); + return; } - } + }; + + save_synced_lyrics( + &music_dir, + artist_dir, + album_dir, + &song_name, + track_lyrics.expect("tried to save lyrics when none are available"), + successful_count, + ) + .await; } diff --git a/tests/common/mod.rs b/tests/common/mod.rs index b4994ab..73b123f 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -1,4 +1,5 @@ use std::path::PathBuf; +use temp_dir::TempDir; use tokio::fs; @@ -16,8 +17,6 @@ pub const SONGS: [&str; 11] = [ "Our Lady Peace/Spiritual Machines/04 _ In Repair.mp3", ]; -pub const BASE_DIR: &str = "Music"; - async fn create_files_with_names(output_file: &PathBuf) { let dirs = output_file.parent().expect("could not parse dirs"); let file_name = output_file.file_name().expect("could not parse name"); @@ -41,12 +40,11 @@ async fn create_files_with_names(output_file: &PathBuf) { let _copy = fs::copy(PathBuf::from(source_file), output_file).await; } -pub async fn setup() { +pub async fn setup(basedir: &TempDir) { let mut tasks = vec![]; for song_path in SONGS.iter() { - let mut path = PathBuf::from(BASE_DIR); - path.push(song_path); + let path = basedir.child(song_path); tasks.push(tokio::spawn(async move { create_files_with_names(&path).await; @@ -57,14 +55,13 @@ pub async fn setup() { for task in tasks { task.await.expect("Failed to execute task"); } - println!("Files created successfully in folder: {}", BASE_DIR); -} - -pub async fn cleanup() { - let _remove = fs::remove_dir_all(BASE_DIR).await.expect("cleanup failed"); + println!( + "Files created successfully in folder: {}", + basedir.path().display() + ); } -pub async fn check_lrcs() -> bool { +pub async fn check_lrcs(basedir: &TempDir) -> bool { let mut song_names: Vec = Vec::new(); SONGS.iter().for_each(|song_path| { @@ -78,9 +75,7 @@ pub async fn check_lrcs() -> bool { }); let mut all_exist = true; for file in song_names.iter() { - let mut song_path = std::env::current_dir().expect("could not get curr dir"); - song_path.push(BASE_DIR); - song_path.push(file); + let song_path = basedir.child(file); if let Err(_) = fs::metadata(song_path.clone()).await { println!("file {} does not exist", song_path.to_string_lossy()); all_exist = false; diff --git a/tests/test.rs b/tests/test.rs index c037ebf..2972ec7 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,29 +1,35 @@ use std::env; use std::path::PathBuf; use std::process::Command; +use temp_dir::TempDir; mod common; #[tokio::test] async fn test_cli() { - common::setup().await; + // TempDir deletes the created directory when the struct is dropped. Call TempDir::leak() to + // keep it around for debugging purposes. + let tmpdir = &TempDir::new().unwrap(); + common::setup(tmpdir).await; let target_dir = env::var("CARGO_MANIFEST_DIR").expect("could not get target dir"); - let mut current_dir = env::current_dir().expect("could not get current dir"); - current_dir.push(common::BASE_DIR); let mut path = PathBuf::from(target_dir); - path.push("target/release/lyricsrs"); + let output = Command::new(path) - .arg(current_dir) + .arg(tmpdir.path()) .output() .expect("Failed to execute command"); - println!("{:?}", output); + let stdout_str = String::from_utf8_lossy(&output.stdout); + let stderr_str = String::from_utf8_lossy(&output.stderr); + + println!("Exit code: {}", output.status.code().unwrap()); + println!("STDOUT: {}", stdout_str); + println!("STDERR: {}", stderr_str); assert!(output.status.success()); - let stdout_str = String::from_utf8_lossy(&output.stdout); // keep in sync with SONGS in common/mod.rs let to_find = format!( "Successful tasks: {}\nFailed tasks: 0\nTotal tasks: {}", @@ -32,7 +38,43 @@ async fn test_cli() { ); assert!(stdout_str.contains(&to_find)); - assert!(common::check_lrcs().await); + assert!(common::check_lrcs(tmpdir).await); +} + +#[tokio::test] +async fn test_cli_plain_lyrics_allowed() { + // TempDir deletes the created directory when the struct is dropped. Call TempDir::leak() to + // keep it around for debugging purposes. + let tmpdir = &TempDir::new().unwrap(); + common::setup(tmpdir).await; + + let target_dir = env::var("CARGO_MANIFEST_DIR").expect("could not get target dir"); + + let mut path = PathBuf::from(target_dir); + path.push("target/release/lyricsrs"); + + let output = Command::new(path) + .arg("--allow-plain") + .arg(tmpdir.path()) + .output() + .expect("Failed to execute command"); + + let stdout_str = String::from_utf8_lossy(&output.stdout); + let stderr_str = String::from_utf8_lossy(&output.stderr); + + println!("Exit code: {}", output.status.code().unwrap()); + println!("STDOUT: {:?}", stdout_str); + println!("STDERR: {:?}", stderr_str); + + assert!(output.status.success()); + + // keep in sync with SONGS in common/mod.rs + let to_find = format!( + "Successful tasks: {}\nFailed tasks: 0\nTotal tasks: {}", + common::SONGS.len(), + common::SONGS.len() + ); + assert!(stdout_str.contains(&to_find)); - common::cleanup().await; + assert!(common::check_lrcs(tmpdir).await); }