From 6683e088d93a281134176df05689fa2c7ab3f060 Mon Sep 17 00:00:00 2001 From: Xing Ma Date: Fri, 29 Nov 2024 16:18:10 +0800 Subject: [PATCH] builder: Enhance optimize subcommand for prefetch Major changes: 1. Added compatibility for rafs v5/v6 formats; 2. Set IS_SEPARATED_WITH_PREFETCH_FILES flag in BlobInfo for prefetchblob; 3. Add option output-json to store build output. Signed-off-by: Xing Ma --- builder/src/optimize_prefetch.rs | 110 +++++++++++++++++++------------ src/bin/nydus-image/main.rs | 52 +++++++++++---- storage/src/device.rs | 8 +++ 3 files changed, 116 insertions(+), 54 deletions(-) diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs index b7981485910..b8a7b10583d 100644 --- a/builder/src/optimize_prefetch.rs +++ b/builder/src/optimize_prefetch.rs @@ -8,6 +8,7 @@ use crate::BlobManager; use crate::Bootstrap; use crate::BootstrapManager; use crate::BuildContext; +use crate::BuildOutput; use crate::ChunkSource; use crate::ConversionType; use crate::NodeChunk; @@ -18,7 +19,6 @@ use crate::TreeNode; use anyhow::Context; use anyhow::{Ok, Result}; use nydus_api::ConfigV2; -use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; use nydus_rafs::metadata::layout::RafsBlobTable; use nydus_rafs::metadata::RafsSuper; use nydus_rafs::metadata::RafsVersion; @@ -28,8 +28,7 @@ use nydus_storage::meta::BlobChunkInfoV1Ondisk; use nydus_utils::compress; use sha2::Digest; use std::fs::File; -use std::io::Read; -use std::io::Seek; +use std::io::{Read, Seek, Write}; use std::mem::size_of; use std::sync::Arc; pub struct OptimizePrefetch {} @@ -52,6 +51,7 @@ impl PrefetchBlobState { ctx.blob_features, ); blob_info.set_compressor(ctx.compressor); + blob_info.set_separated_with_prefetch_files_feature(true); let mut blob_ctx = BlobContext::from(ctx, &blob_info, ChunkSource::Build)?; blob_ctx.blob_meta_info_enabled = true; let blob_writer = ArtifactWriter::new(crate::ArtifactStorage::FileDir( @@ -72,13 +72,16 @@ impl OptimizePrefetch { tree: &mut Tree, ctx: &mut BuildContext, bootstrap_mgr: &mut BootstrapManager, - blob_table: &mut RafsV6BlobTable, + blob_table: &mut RafsBlobTable, blobs_dir_path: PathBuf, prefetch_nodes: Vec, - ) -> Result<()> { + ) -> Result { // create a new blob for prefetch layer - let blob_layer_num = blob_table.entries.len(); + let blob_layer_num = match blob_table { + RafsBlobTable::V5(table) => table.get_all().len(), + RafsBlobTable::V6(table) => table.get_all().len(), + }; let mut blob_state = PrefetchBlobState::new(&ctx, blob_layer_num as u32, &blobs_dir_path)?; let mut batch = BatchContextGenerator::new(0)?; for node in &prefetch_nodes { @@ -92,19 +95,19 @@ impl OptimizePrefetch { )?; } - Self::dump_blob(ctx, blob_table, &mut blob_state)?; + let blob_mgr = Self::dump_blob(ctx, blob_table, &mut blob_state)?; debug!("prefetch blob id: {}", ctx.blob_id); Self::build_dump_bootstrap(tree, ctx, bootstrap_mgr, blob_table)?; - Ok(()) + BuildOutput::new(&blob_mgr, &bootstrap_mgr.bootstrap_storage) } fn build_dump_bootstrap( tree: &mut Tree, ctx: &mut BuildContext, bootstrap_mgr: &mut BootstrapManager, - blob_table: &mut RafsV6BlobTable, + blob_table: &mut RafsBlobTable, ) -> Result<()> { let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; let mut bootstrap = Bootstrap::new(tree.clone())?; @@ -112,46 +115,33 @@ impl OptimizePrefetch { // Build bootstrap bootstrap.build(ctx, &mut bootstrap_ctx)?; - // Verify and update prefetch blob - assert!( - blob_table - .entries - .iter() - .filter(|blob| blob.blob_id() == "prefetch-blob") - .count() - == 1, - "Expected exactly one prefetch-blob" - ); - - // Rewrite prefetch blob id - blob_table - .entries - .iter_mut() - .filter(|blob| blob.blob_id() == "prefetch-blob") - .for_each(|blob| { - let mut info = (**blob).clone(); - info.set_blob_id(ctx.blob_id.clone()); - *blob = Arc::new(info); - }); - - // Dump bootstrap - let blob_table_withprefetch = RafsBlobTable::V6(blob_table.clone()); + let blob_table_withprefetch = match blob_table { + RafsBlobTable::V5(table) => RafsBlobTable::V5(table.clone()), + RafsBlobTable::V6(table) => RafsBlobTable::V6(table.clone()), + }; bootstrap.dump( ctx, &mut bootstrap_mgr.bootstrap_storage, &mut bootstrap_ctx, &blob_table_withprefetch, )?; - Ok(()) } fn dump_blob( ctx: &mut BuildContext, - blob_table: &mut RafsV6BlobTable, + blob_table: &mut RafsBlobTable, blob_state: &mut PrefetchBlobState, - ) -> Result<()> { - blob_table.entries.push(blob_state.blob_info.clone().into()); + ) -> Result { + match blob_table { + RafsBlobTable::V5(table) => { + table.entries.push(blob_state.blob_info.clone().into()); + } + RafsBlobTable::V6(table) => { + table.entries.push(blob_state.blob_info.clone().into()); + } + } + let mut blob_mgr = BlobManager::new(ctx.digester); blob_mgr.add_blob(blob_state.blob_ctx.clone()); blob_mgr.set_current_blob_index(0); @@ -168,7 +158,31 @@ impl OptimizePrefetch { .1 .blob_id .clone(); - Ok(()) + + let entries = match blob_table { + RafsBlobTable::V5(table) => table.get_all(), + RafsBlobTable::V6(table) => table.get_all(), + }; + + // Verify and update prefetch blob + assert!( + entries + .iter() + .filter(|blob| blob.blob_id() == "prefetch-blob") + .count() + == 1, + "Expected exactly one prefetch-blob" + ); + // Rewrite prefetch blob id + match blob_table { + RafsBlobTable::V5(table) => { + rewrite_blob_id(&mut table.entries, "prefetch-blob", ctx.blob_id.clone()) + } + RafsBlobTable::V6(table) => { + rewrite_blob_id(&mut table.entries, "prefetch-blob", ctx.blob_id.clone()) + } + } + Ok(blob_mgr) } fn process_prefetch_node( @@ -176,7 +190,7 @@ impl OptimizePrefetch { node: &TreeNode, prefetch_state: &mut PrefetchBlobState, batch: &mut BatchContextGenerator, - blob_table: &RafsV6BlobTable, + blob_table: &RafsBlobTable, blobs_dir_path: &Path, ) -> Result<()> { let tree_node = tree @@ -184,14 +198,17 @@ impl OptimizePrefetch { .ok_or(anyhow!("failed to get node"))? .node .as_ref(); + let entries = match blob_table { + RafsBlobTable::V5(table) => table.get_all(), + RafsBlobTable::V6(table) => table.get_all(), + }; let blob_id = tree_node .borrow() .chunks .first() - .and_then(|chunk| blob_table.entries.get(chunk.inner.blob_index() as usize)) + .and_then(|chunk| entries.get(chunk.inner.blob_index() as usize).cloned()) .map(|entry| entry.blob_id()) .ok_or(anyhow!("failed to get blob id"))?; - let mut blob_file = Arc::new(File::open(blobs_dir_path.join(blob_id))?); tree_node.borrow_mut().layer_idx = prefetch_state.blob_info.blob_index() as u16; @@ -247,6 +264,17 @@ impl OptimizePrefetch { } } +fn rewrite_blob_id(entries: &mut [Arc], blob_id: &str, new_blob_id: String) { + entries + .iter_mut() + .filter(|blob| blob.blob_id() == blob_id) + .for_each(|blob| { + let mut info = (**blob).clone(); + info.set_blob_id(new_blob_id.clone()); + *blob = Arc::new(info); + }); +} + pub fn update_ctx_from_bootstrap( ctx: &mut BuildContext, config: Arc, diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index ad47a4f5047..06bd168dd03 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -38,7 +38,6 @@ use nydus_builder::{ TarballBuilder, Tree, TreeNode, WhiteoutSpec, }; -use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; use nydus_rafs::metadata::{MergeError, RafsSuper, RafsSuperConfig, RafsVersion}; use nydus_storage::backend::localfs::LocalFs; use nydus_storage::backend::BlobBackend; @@ -54,6 +53,10 @@ use serde::{Deserialize, Serialize}; use crate::unpack::{OCIUnpacker, Unpacker}; use crate::validator::Validator; +use nydus_rafs::metadata::layout::v5::{RafsV5BlobTable, RafsV5ExtBlobTable}; +use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; + +use nydus_rafs::metadata::layout::RafsBlobTable; #[cfg(target_os = "linux")] use nydus_service::ServiceArgs; @@ -560,7 +563,16 @@ fn prepare_cmd_args(bti_string: &'static str) -> App { .help( "Directory for localfs storage backend, hosting data blobs and cache files", ), - ), + ) + .arg( + Arg::new("output-bootstrap") + .long("output-bootstrap") + .short('O') + .help("Output path of optimized bootstrap"), + ) + .arg( + arg_output_json.clone(), + ) ); #[cfg(target_os = "linux")] @@ -911,7 +923,7 @@ fn main() -> Result<()> { } else if let Some(matches) = cmd.subcommand_matches("unpack") { Command::unpack(matches) } else if let Some(matches) = cmd.subcommand_matches("optimize") { - Command::optimize(matches) + Command::optimize(matches, &build_info) } else { #[cfg(target_os = "linux")] if let Some(matches) = cmd.subcommand_matches("export") { @@ -1670,11 +1682,16 @@ impl Command { Ok(()) } - fn optimize(matches: &ArgMatches) -> Result<()> { + fn optimize(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { let blobs_dir_path = Self::get_blobs_dir(matches)?; let prefetch_files = Self::get_prefetch_files(matches)?; prefetch_files.iter().for_each(|f| println!("{}", f)); let bootstrap_path = Self::get_bootstrap(matches)?; + let dst_bootstrap = match matches.get_one::("output-bootstrap") { + None => ArtifactStorage::SingleFile(PathBuf::from("optimized_bootstrap")), + Some(s) => ArtifactStorage::SingleFile(PathBuf::from(s)), + }; + let config = Self::get_configuration(matches)?; config.internal.set_blob_accessible(true); let mut build_ctx = BuildContext { @@ -1695,25 +1712,34 @@ impl Command { } } - let bootstrap_path = ArtifactStorage::SingleFile(PathBuf::from("optimized_bootstrap")); - let mut bootstrap_mgr = BootstrapManager::new(Some(bootstrap_path), None); + let mut bootstrap_mgr = BootstrapManager::new(Some(dst_bootstrap), None); let blobs = sb.superblock.get_blob_infos(); - let mut rafsv6table = RafsV6BlobTable::new(); - for blob in &blobs { - rafsv6table.entries.push(blob.clone()); - } - OptimizePrefetch::generate_prefetch( + let mut blob_table = match build_ctx.fs_version { + RafsVersion::V5 => RafsBlobTable::V5(RafsV5BlobTable { + entries: blobs, + extended: RafsV5ExtBlobTable::new(), + }), + + RafsVersion::V6 => RafsBlobTable::V6(RafsV6BlobTable { entries: blobs }), + }; + let output = OptimizePrefetch::generate_prefetch( &mut tree, &mut build_ctx, &mut bootstrap_mgr, - &mut rafsv6table, + &mut blob_table, blobs_dir_path.to_path_buf(), prefetch_nodes, ) .with_context(|| "Failed to generate prefetch bootstrap")?; - Ok(()) + OutputSerializer::dump( + matches, + output, + build_info, + build_ctx.compressor, + build_ctx.fs_version, + ) } fn inspect(matches: &ArgMatches) -> Result<()> { diff --git a/storage/src/device.rs b/storage/src/device.rs index c8b44347377..74e91ca45fb 100644 --- a/storage/src/device.rs +++ b/storage/src/device.rs @@ -79,6 +79,8 @@ bitflags! { const _V5_NO_EXT_BLOB_TABLE = 0x8000_0000; /// Blob is generated with chunkdict. const IS_CHUNKDICT_GENERATED = 0x0000_0200; + /// Blob is generated with separated prefetch files. + const IS_SEPARATED_WITH_PREFETCH_FILES = 0x0001_0000; } } @@ -508,6 +510,12 @@ impl BlobInfo { } } + pub fn set_separated_with_prefetch_files_feature(&mut self, is_prefetchblob: bool) { + if is_prefetchblob { + self.blob_features |= BlobFeatures::IS_SEPARATED_WITH_PREFETCH_FILES; + } + } + /// Get SHA256 digest of the ToC content, including the toc tar header. /// /// It's all zero for inlined bootstrap.