diff --git a/Cargo.lock b/Cargo.lock index 0140223a8..0a6e660ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -229,6 +229,7 @@ dependencies = [ "derive_more", "dirs", "elf", + "filetime", "futures", "glob", "hex", @@ -249,6 +250,7 @@ dependencies = [ "tui", "url", "yaml", + "zstd", ] [[package]] @@ -800,6 +802,18 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +[[package]] +name = "filetime" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf401df4a4e3872c4fe8151134cf483738e74b67fc934d6532c882b3d24a4550" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.59.0", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -1287,6 +1301,7 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags", "libc", + "redox_syscall", ] [[package]] @@ -2757,6 +2772,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-targets" version = "0.48.5" diff --git a/Cargo.toml b/Cargo.toml index 6982447f9..3e508222f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ dirs = "5.0.1" elf = "0.7.4" indicatif = "0.17.8" itertools = "0.13.0" +filetime = "0.2.24" futures = "0.3.30" glob = "0.3.1" hex = "0.4.3" diff --git a/boulder/Cargo.toml b/boulder/Cargo.toml index 3ae914e52..120c84c7b 100644 --- a/boulder/Cargo.toml +++ b/boulder/Cargo.toml @@ -29,6 +29,7 @@ clap.workspace = true derive_more.workspace = true dirs.workspace = true elf.workspace = true +filetime.workspace = true glob.workspace = true futures.workspace = true hex.workspace = true @@ -43,3 +44,4 @@ strum.workspace = true thiserror.workspace = true tokio.workspace = true url.workspace = true +zstd.workspace = true diff --git a/boulder/src/package/analysis.rs b/boulder/src/package/analysis.rs index b023c544a..b6ce15c7d 100644 --- a/boulder/src/package/analysis.rs +++ b/boulder/src/package/analysis.rs @@ -38,6 +38,7 @@ impl<'a> Chain<'a> { Box::new(handler::elf), Box::new(handler::pkg_config), Box::new(handler::cmake), + Box::new(handler::compressman), // Catch-all if not excluded Box::new(handler::include_any), ], diff --git a/boulder/src/package/analysis/handler.rs b/boulder/src/package/analysis/handler.rs index c28fff268..4496e14c8 100644 --- a/boulder/src/package/analysis/handler.rs +++ b/boulder/src/package/analysis/handler.rs @@ -1,5 +1,14 @@ -use std::{path::PathBuf, process::Command}; - +use filetime::FileTime; +use std::fs::File; +use std::io::{BufReader, BufWriter, Write}; +use std::{ + fs, + os::unix::fs::symlink, + path::{Component, PathBuf}, + process::Command, +}; + +use itertools::Itertools; use moss::{dependency, Dependency, Provider}; use crate::package::collect::PathInfo; @@ -136,3 +145,88 @@ pub fn cmake(bucket: &mut BucketMut, info: &mut PathInfo) -> Result Result { + if !bucket.recipe.parsed.options.compressman { + return Ok(Decision::NextHandler.into()); + } + + let is_man_file = info.path.components().contains(&Component::Normal("man".as_ref())) + && info.file_name().ends_with(|c| ('1'..'9').contains(&c)); + let is_info_file = + info.path.components().contains(&Component::Normal("info".as_ref())) && info.file_name().ends_with(".info"); + + if !(is_man_file || is_info_file) { + return Ok(Decision::NextHandler.into()); + } + + let mut generated_path = PathBuf::new(); + + let metadata = fs::metadata(&info.path)?; + let atime = metadata.modified()?; + let mtime = metadata.accessed()?; + + /* If we have a man/info symlink update the link to the compressed file */ + if info.path.is_symlink() { + let new_original = format!("{}.zst", fs::canonicalize(&info.path)?.display()); + let new_link = format!("{}.zst", &info.path.display()); + + /* + * Depending on the order the files get analysed the new compressed file may not yet exist, + * compress it _now_ so the correct metadata src info is returned to the binary writer. + */ + if !std::path::Path::new(&new_original).exists() { + let compressed_file = compress_file_zstd(fs::canonicalize(&info.path)?)?; + let _ = bucket.paths.install().guest.join(compressed_file); + } + + symlink(format!("{}.zst", fs::read_link(&info.path)?.display()), &new_link)?; + + /* Restore the original {a,m}times for reproducibility */ + filetime::set_symlink_file_times( + &new_link, + FileTime::from_system_time(atime), + FileTime::from_system_time(mtime), + )?; + + generated_path.push(bucket.paths.install().guest.join(new_link)); + return Ok(Decision::ReplaceFile { + newpath: generated_path, + } + .into()); + } + + let mut compressed_file = PathBuf::from(format!("{}.zst", info.path.display())); + + /* We may have already compressed the file if we encountered a symlink to this file first */ + if !&compressed_file.exists() { + compressed_file = compress_file_zstd(info.path.clone())?; + } + + /* Restore the original {a,m}times for reproducibility */ + filetime::set_file_handle_times( + &File::open(&compressed_file)?, + Some(FileTime::from_system_time(atime)), + Some(FileTime::from_system_time(mtime)), + )?; + + generated_path.push(bucket.paths.install().guest.join(compressed_file)); + + pub fn compress_file_zstd(path: PathBuf) -> Result { + let output_path = PathBuf::from(format!("{}.zst", path.display())); + let input = File::create(&output_path)?; + let mut reader = BufReader::new(File::open(&path)?); + let mut writer = BufWriter::new(input); + + zstd::stream::copy_encode(&mut reader, &mut writer, 16)?; + + writer.flush()?; + + Ok(output_path) + } + + Ok(Decision::ReplaceFile { + newpath: generated_path, + } + .into()) +} diff --git a/crates/stone_recipe/src/lib.rs b/crates/stone_recipe/src/lib.rs index b0565b609..d1d3cd07f 100644 --- a/crates/stone_recipe/src/lib.rs +++ b/crates/stone_recipe/src/lib.rs @@ -93,6 +93,8 @@ pub struct Options { pub strip: bool, #[serde(default, deserialize_with = "stringy_bool")] pub networking: bool, + #[serde(default, deserialize_with = "stringy_bool")] + pub compressman: bool, } #[derive(Debug, Clone, Deserialize)]