From 2067209feb446543361cf657a57c97f219fbbafc Mon Sep 17 00:00:00 2001 From: Joey Riches Date: Fri, 21 Feb 2025 12:35:50 +0000 Subject: [PATCH] boulder/draft: Use infer to get extension type to perform extraction Using infer will help us catch specalized extension types e.g. `.whl` is a `.zip` `.gem` is a `.tar` --- Cargo.lock | 31 +++++++++++++--- Cargo.toml | 1 + boulder/Cargo.toml | 1 + boulder/src/draft/upstream.rs | 70 ++++++++++++++++++++--------------- 4 files changed, 69 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e8b2c917..12b18eb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -361,7 +361,7 @@ dependencies = [ [[package]] name = "blsforme" version = "0.1.0" -source = "git+https://github.com/serpent-os/blsforme.git?rev=3cb315d6e9b4f2168927bded8b326b55c92f0e84#3cb315d6e9b4f2168927bded8b326b55c92f0e84" +source = "git+https://github.com/AerynOS/blsforme.git?rev=3cb315d6e9b4f2168927bded8b326b55c92f0e84#3cb315d6e9b4f2168927bded8b326b55c92f0e84" dependencies = [ "blake3", "gpt", @@ -391,6 +391,7 @@ dependencies = [ "futures-util", "glob", "hex", + "infer", "itertools 0.13.0", "mailparse", "moss", @@ -469,6 +470,17 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfb" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" +dependencies = [ + "byteorder", + "fnv", + "uuid", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -1612,6 +1624,15 @@ dependencies = [ "web-time", ] +[[package]] +name = "infer" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7" +dependencies = [ + "cfb", +] + [[package]] name = "ipnet" version = "2.10.1" @@ -2689,7 +2710,7 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "superblock" version = "0.1.0" -source = "git+https://github.com/serpent-os/blsforme.git?rev=3cb315d6e9b4f2168927bded8b326b55c92f0e84#3cb315d6e9b4f2168927bded8b326b55c92f0e84" +source = "git+https://github.com/AerynOS/blsforme.git?rev=3cb315d6e9b4f2168927bded8b326b55c92f0e84#3cb315d6e9b4f2168927bded8b326b55c92f0e84" dependencies = [ "log", "thiserror 2.0.3", @@ -2862,9 +2883,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.41.1" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" dependencies = [ "backtrace", "bytes", @@ -2950,7 +2971,7 @@ dependencies = [ [[package]] name = "topology" version = "0.1.0" -source = "git+https://github.com/serpent-os/blsforme.git?rev=3cb315d6e9b4f2168927bded8b326b55c92f0e84#3cb315d6e9b4f2168927bded8b326b55c92f0e84" +source = "git+https://github.com/AerynOS/blsforme.git?rev=3cb315d6e9b4f2168927bded8b326b55c92f0e84#3cb315d6e9b4f2168927bded8b326b55c92f0e84" dependencies = [ "gpt", "log", diff --git a/Cargo.toml b/Cargo.toml index 0d1035b0..e656d14e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,6 +81,7 @@ xxhash-rust = { version = "0.8.11", features = ["xxh3"] } zstd = { version = "0.13.2", features = ["zstdmt"] } mailparse = "0.15.0" zbus = "5.1.1" +infer = "0.19.0" [workspace.lints.rust] rust_2018_idioms = { level = "warn", priority = -1 } diff --git a/boulder/Cargo.toml b/boulder/Cargo.toml index 85df37f5..e6496e79 100644 --- a/boulder/Cargo.toml +++ b/boulder/Cargo.toml @@ -49,6 +49,7 @@ thread-priority.workspace = true tokio.workspace = true url.workspace = true mailparse.workspace = true +infer.workspace = true [lints] workspace = true diff --git a/boulder/src/draft/upstream.rs b/boulder/src/draft/upstream.rs index 48384aad..259c6ff4 100644 --- a/boulder/src/draft/upstream.rs +++ b/boulder/src/draft/upstream.rs @@ -81,37 +81,49 @@ async fn fetch(url: &Url, output: &Path) -> Result { } async fn extract(archive: &Path, destination: &Path) -> Result<(), Error> { - let extension = archive - .extension() - .map(|e| e.to_string_lossy().to_string()) - .unwrap_or_else(|| "tar".to_owned()); - - // If we can't specialise (.zip, etc) assume its a tar - let result = match extension.as_str() { - "zip" => { - Command::new("unzip") - .arg(archive) - .arg("-d") - .arg(destination) - .output() - .await? + if let Some(kind) = infer::get_from_path(archive)? { + println!("Detected type: {} ({})", kind.mime_type(), kind.extension()); + // If we can't specialise (.zip, etc) assume its a tar + let result = match kind.extension() { + "zip" => { + Command::new("unzip") + .arg(archive) + .arg("-d") + .arg(destination) + .output() + .await? + } + _ => { + Command::new("tar") + .arg("xf") + .arg(archive) + .arg("-C") + .arg(destination) + .output() + .await? + } + }; + if result.status.success() { + Ok(()) + } else { + eprintln!("Command exited with: {}", String::from_utf8_lossy(&result.stderr)); + Err(Error::Extract(result.status)) } - _ => { - Command::new("tar") - .arg("xf") - .arg(archive) - .arg("-C") - .arg(destination) - .output() - .await? - } - }; - - if result.status.success() { - Ok(()) } else { - eprintln!("Command exited with: {}", String::from_utf8_lossy(&result.stderr)); - Err(Error::Extract(result.status)) + println!("Unknown file type, attempting tar extraction"); + let result = Command::new("tar") + .arg("xf") + .arg(archive) + .arg("-C") + .arg(destination) + .output() + .await?; + if result.status.success() { + Ok(()) + } else { + eprintln!("Command exited with: {}", String::from_utf8_lossy(&result.stderr)); + Err(Error::Extract(result.status)) + } } }