Skip to content

Commit

Permalink
Change chunker strategy (#17)
Browse files Browse the repository at this point in the history
Previously we were using rollingsum chunker with fairly small chunks.
Profiling revealed that this was creating a bottleneck where we would
spend a long time chunking big files locally like `rose.vfs` even before
downloading.

Switching to fixed sized 1MB chunks is much simpler AND much more
performant. Local tests show approximately 16x improvement.
  • Loading branch information
rminderhoud authored Jan 6, 2025
1 parent ab50a8b commit ac546f5
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 15 deletions.
11 changes: 5 additions & 6 deletions src/bin/rose-updater-archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ struct Args {
#[clap(long, default_value="4", value_parser=parse_compression_level)]
compression_level: u32,

/// Chunk size in bytes
#[clap(long, default_value = "1000000")]
chunk_size: usize,

/// Relative path to the updater program in the input directory
#[clap(long, default_value = "rose-updater.exe")]
updater: PathBuf,
Expand Down Expand Up @@ -115,12 +119,7 @@ async fn main() -> anyhow::Result<()> {
let mut output_file = File::create(&output_path).await?;

let options = bitar::api::compress::CreateArchiveOptions {
chunker_config: bitar::chunker::Config::RollSum(bitar::chunker::FilterConfig {
filter_bits: bitar::chunker::FilterBits::from_size(64 * 1024),
min_chunk_size: 16 * 1024,
max_chunk_size: 16 * 1024 * 1024,
window_size: 64,
}),
chunker_config: bitar::chunker::Config::FixedSize(args.chunk_size),
compression: Some(bitar::Compression::zstd(args.compression_level)?),
..Default::default()
};
Expand Down
27 changes: 18 additions & 9 deletions src/bin/rose-updater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,15 +277,24 @@ fn get_remote_files(
res = clone_remote(
&clone_url,
&output_path,
main_updater) => if res.is_ok() {
info!("Cloned {} to {}", &clone_url, output_path.display());
cloned_tx.send(LocalManifestFileEntry {
path: remote_entry.source_path.clone(),
hash: remote_entry.source_hash.clone(),
size: remote_entry.source_size,
}).await.expect("Failed to send clone message");
} else {
error!("Failed to clone {}", &clone_url);
main_updater) => {
match res {
Ok(_) => {
info!("Cloned {} to {}", clone_url, output_path.display());
let clone_message = LocalManifestFileEntry {
path: remote_entry.source_path.clone(),
hash: remote_entry.source_hash.clone(),
size: remote_entry.source_size,
};

if let Err(err) = cloned_tx.send(clone_message).await {
error!("Failed to send clone message: {}", err);
}
}
Err(err) => {
error!("Failed to clone {}: {}", clone_url, err);
}
}
},
_ = cloned_shutdown.changed() => {
info!("Stopped cloning {}", &clone_url);
Expand Down

0 comments on commit ac546f5

Please sign in to comment.