From 72509aa732096c82b552e1b4dbdc8311fce3ae4e Mon Sep 17 00:00:00 2001 From: Xing Ma Date: Fri, 29 Nov 2024 16:18:10 +0800 Subject: [PATCH] builder: Enhance optimize subcommand for prefetch Major changes: 1. Added compatibility for rafs v5/v6 formats. 2. Set IS_PREFETCHBLOB flag in BlobInfo for prefetchblob. 3. Introduced two new options for storing prefetchblobID path and the optimized bootstrap path. Signed-off-by: Xing Ma --- builder/src/optimize_prefetch.rs | 92 ++++++++++++++++++++++---------- src/bin/nydus-image/main.rs | 55 ++++++++++++++++--- storage/src/device.rs | 8 +++ 3 files changed, 120 insertions(+), 35 deletions(-) diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs index b7981485910..b079c002a5e 100644 --- a/builder/src/optimize_prefetch.rs +++ b/builder/src/optimize_prefetch.rs @@ -18,7 +18,6 @@ use crate::TreeNode; use anyhow::Context; use anyhow::{Ok, Result}; use nydus_api::ConfigV2; -use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; use nydus_rafs::metadata::layout::RafsBlobTable; use nydus_rafs::metadata::RafsSuper; use nydus_rafs::metadata::RafsVersion; @@ -27,9 +26,8 @@ use nydus_storage::meta::BatchContextGenerator; use nydus_storage::meta::BlobChunkInfoV1Ondisk; use nydus_utils::compress; use sha2::Digest; -use std::fs::File; -use std::io::Read; -use std::io::Seek; +use std::fs::{File, OpenOptions}; +use std::io::{Read, Seek, Write}; use std::mem::size_of; use std::sync::Arc; pub struct OptimizePrefetch {} @@ -52,6 +50,7 @@ impl PrefetchBlobState { ctx.blob_features, ); blob_info.set_compressor(ctx.compressor); + blob_info.set_hot_blob_feature(true); let mut blob_ctx = BlobContext::from(ctx, &blob_info, ChunkSource::Build)?; blob_ctx.blob_meta_info_enabled = true; let blob_writer = ArtifactWriter::new(crate::ArtifactStorage::FileDir( @@ -72,13 +71,17 @@ impl OptimizePrefetch { tree: &mut Tree, ctx: &mut BuildContext, bootstrap_mgr: &mut BootstrapManager, - blob_table: &mut RafsV6BlobTable, + blob_table: &mut RafsBlobTable, blobs_dir_path: PathBuf, prefetch_nodes: Vec, + output_path: Option<&Path>, ) -> Result<()> { // create a new blob for prefetch layer - let blob_layer_num = blob_table.entries.len(); + let blob_layer_num = match blob_table { + RafsBlobTable::V5(table) => table.get_all().len(), + RafsBlobTable::V6(table) => table.get_all().len(), + }; let mut blob_state = PrefetchBlobState::new(&ctx, blob_layer_num as u32, &blobs_dir_path)?; let mut batch = BatchContextGenerator::new(0)?; for node in &prefetch_nodes { @@ -95,6 +98,15 @@ impl OptimizePrefetch { Self::dump_blob(ctx, blob_table, &mut blob_state)?; debug!("prefetch blob id: {}", ctx.blob_id); + if let Some(ref f) = output_path { + let mut w = OpenOptions::new() + .truncate(true) + .create(true) + .write(true) + .open(f) + .with_context(|| format!("can not open output file {}", f.display()))?; + w.write_all(ctx.blob_id.as_bytes())?; + } Self::build_dump_bootstrap(tree, ctx, bootstrap_mgr, blob_table)?; Ok(()) @@ -104,7 +116,7 @@ impl OptimizePrefetch { tree: &mut Tree, ctx: &mut BuildContext, bootstrap_mgr: &mut BootstrapManager, - blob_table: &mut RafsV6BlobTable, + blob_table: &mut RafsBlobTable, ) -> Result<()> { let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; let mut bootstrap = Bootstrap::new(tree.clone())?; @@ -112,46 +124,56 @@ impl OptimizePrefetch { // Build bootstrap bootstrap.build(ctx, &mut bootstrap_ctx)?; + let entries = match blob_table { + RafsBlobTable::V5(table) => table.get_all(), + RafsBlobTable::V6(table) => table.get_all(), + }; + // Verify and update prefetch blob assert!( - blob_table - .entries + entries .iter() .filter(|blob| blob.blob_id() == "prefetch-blob") .count() == 1, "Expected exactly one prefetch-blob" ); - // Rewrite prefetch blob id - blob_table - .entries - .iter_mut() - .filter(|blob| blob.blob_id() == "prefetch-blob") - .for_each(|blob| { - let mut info = (**blob).clone(); - info.set_blob_id(ctx.blob_id.clone()); - *blob = Arc::new(info); - }); - - // Dump bootstrap - let blob_table_withprefetch = RafsBlobTable::V6(blob_table.clone()); + match blob_table { + RafsBlobTable::V5(table) => { + rewrite_blob_id(&mut table.entries, "prefetch-blob", ctx.blob_id.clone()) + } + RafsBlobTable::V6(table) => { + rewrite_blob_id(&mut table.entries, "prefetch-blob", ctx.blob_id.clone()) + } + } + let blob_table_withprefetch = match blob_table { + RafsBlobTable::V5(table) => RafsBlobTable::V5(table.clone()), + RafsBlobTable::V6(table) => RafsBlobTable::V6(table.clone()), + }; bootstrap.dump( ctx, &mut bootstrap_mgr.bootstrap_storage, &mut bootstrap_ctx, &blob_table_withprefetch, )?; - Ok(()) } fn dump_blob( ctx: &mut BuildContext, - blob_table: &mut RafsV6BlobTable, + blob_table: &mut RafsBlobTable, blob_state: &mut PrefetchBlobState, ) -> Result<()> { - blob_table.entries.push(blob_state.blob_info.clone().into()); + match blob_table { + RafsBlobTable::V5(table) => { + table.entries.push(blob_state.blob_info.clone().into()); + } + RafsBlobTable::V6(table) => { + table.entries.push(blob_state.blob_info.clone().into()); + } + } + let mut blob_mgr = BlobManager::new(ctx.digester); blob_mgr.add_blob(blob_state.blob_ctx.clone()); blob_mgr.set_current_blob_index(0); @@ -176,7 +198,7 @@ impl OptimizePrefetch { node: &TreeNode, prefetch_state: &mut PrefetchBlobState, batch: &mut BatchContextGenerator, - blob_table: &RafsV6BlobTable, + blob_table: &RafsBlobTable, blobs_dir_path: &Path, ) -> Result<()> { let tree_node = tree @@ -184,14 +206,17 @@ impl OptimizePrefetch { .ok_or(anyhow!("failed to get node"))? .node .as_ref(); + let entries = match blob_table { + RafsBlobTable::V5(table) => table.get_all(), + RafsBlobTable::V6(table) => table.get_all(), + }; let blob_id = tree_node .borrow() .chunks .first() - .and_then(|chunk| blob_table.entries.get(chunk.inner.blob_index() as usize)) + .and_then(|chunk| entries.get(chunk.inner.blob_index() as usize).cloned()) .map(|entry| entry.blob_id()) .ok_or(anyhow!("failed to get blob id"))?; - let mut blob_file = Arc::new(File::open(blobs_dir_path.join(blob_id))?); tree_node.borrow_mut().layer_idx = prefetch_state.blob_info.blob_index() as u16; @@ -247,6 +272,17 @@ impl OptimizePrefetch { } } +fn rewrite_blob_id(entries: &mut [Arc], blob_id: &str, new_blob_id: String) { + entries + .iter_mut() + .filter(|blob| blob.blob_id() == blob_id) + .for_each(|blob| { + let mut info = (**blob).clone(); + info.set_blob_id(new_blob_id.clone()); + *blob = Arc::new(info); + }); +} + pub fn update_ctx_from_bootstrap( ctx: &mut BuildContext, config: Arc, diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index ad47a4f5047..7b71d1fc510 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -38,7 +38,6 @@ use nydus_builder::{ TarballBuilder, Tree, TreeNode, WhiteoutSpec, }; -use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; use nydus_rafs::metadata::{MergeError, RafsSuper, RafsSuperConfig, RafsVersion}; use nydus_storage::backend::localfs::LocalFs; use nydus_storage::backend::BlobBackend; @@ -54,6 +53,10 @@ use serde::{Deserialize, Serialize}; use crate::unpack::{OCIUnpacker, Unpacker}; use crate::validator::Validator; +use nydus_rafs::metadata::layout::v5::{RafsV5BlobTable, RafsV5ExtBlobTable}; +use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; + +use nydus_rafs::metadata::layout::RafsBlobTable; #[cfg(target_os = "linux")] use nydus_service::ServiceArgs; @@ -560,6 +563,18 @@ fn prepare_cmd_args(bti_string: &'static str) -> App { .help( "Directory for localfs storage backend, hosting data blobs and cache files", ), + ) + .arg( + Arg::new("new-bootstrap") + .long("new-bootstrap") + .short('N') + .help("Output path of the new bootstrap"), + ) + .arg( + Arg::new("output-path") + .long("output-path") + .short('o') + .help("Output path of for save hot blob id"), ), ); @@ -1675,6 +1690,8 @@ impl Command { let prefetch_files = Self::get_prefetch_files(matches)?; prefetch_files.iter().for_each(|f| println!("{}", f)); let bootstrap_path = Self::get_bootstrap(matches)?; + let new_bootstrap = Self::get_new_bootstrap(matches)?; + let output_path = Self::get_output_path(matches)?; let config = Self::get_configuration(matches)?; config.internal.set_blob_accessible(true); let mut build_ctx = BuildContext { @@ -1695,21 +1712,31 @@ impl Command { } } - let bootstrap_path = ArtifactStorage::SingleFile(PathBuf::from("optimized_bootstrap")); + let bootstrap_path = if let Some(ref f) = new_bootstrap { + ArtifactStorage::SingleFile(PathBuf::from(f)) + } else { + ArtifactStorage::SingleFile(PathBuf::from("optimized_bootstrap")) + }; + let mut bootstrap_mgr = BootstrapManager::new(Some(bootstrap_path), None); let blobs = sb.superblock.get_blob_infos(); - let mut rafsv6table = RafsV6BlobTable::new(); - for blob in &blobs { - rafsv6table.entries.push(blob.clone()); - } + let mut blob_table = match build_ctx.fs_version { + RafsVersion::V5 => RafsBlobTable::V5(RafsV5BlobTable { + entries: blobs, + extended: RafsV5ExtBlobTable::new(), + }), + + RafsVersion::V6 => RafsBlobTable::V6(RafsV6BlobTable { entries: blobs }), + }; OptimizePrefetch::generate_prefetch( &mut tree, &mut build_ctx, &mut bootstrap_mgr, - &mut rafsv6table, + &mut blob_table, blobs_dir_path.to_path_buf(), prefetch_nodes, + output_path, ) .with_context(|| "Failed to generate prefetch bootstrap")?; @@ -1823,6 +1850,13 @@ impl Command { } } + fn get_new_bootstrap(matches: &ArgMatches) -> Result> { + match matches.get_one::("new-bootstrap") { + Some(s) => Ok(Some(Path::new(s))), + None => Ok(None), + } + } + fn get_prefetch_files(matches: &ArgMatches) -> Result> { match matches.get_one::("prefetch-files") { Some(v) => { @@ -1842,6 +1876,13 @@ impl Command { } } + fn get_output_path(matches: &ArgMatches) -> Result> { + match matches.get_one::("output-path") { + Some(s) => Ok(Some(Path::new(s))), + None => Ok(None), + } + } + fn get_bootstrap_storage(matches: &ArgMatches) -> Result { if let Some(s) = matches.get_one::("bootstrap") { Ok(ArtifactStorage::SingleFile(s.into())) diff --git a/storage/src/device.rs b/storage/src/device.rs index c8b44347377..d6bb772bde0 100644 --- a/storage/src/device.rs +++ b/storage/src/device.rs @@ -79,6 +79,8 @@ bitflags! { const _V5_NO_EXT_BLOB_TABLE = 0x8000_0000; /// Blob is generated with chunkdict. const IS_CHUNKDICT_GENERATED = 0x0000_0200; + /// Blob is a hot blob + const IS_PREFETCHBLOB = 0x0001_0000; } } @@ -508,6 +510,12 @@ impl BlobInfo { } } + pub fn set_hot_blob_feature(&mut self, is_prefetchblob: bool) { + if is_prefetchblob { + self.blob_features |= BlobFeatures::IS_PREFETCHBLOB; + } + } + /// Get SHA256 digest of the ToC content, including the toc tar header. /// /// It's all zero for inlined bootstrap.