diff --git a/builder/src/chunkdict_generator.rs b/builder/src/chunkdict_generator.rs index a57dcd97f34..72928b08fff 100644 --- a/builder/src/chunkdict_generator.rs +++ b/builder/src/chunkdict_generator.rs @@ -16,30 +16,17 @@ use super::core::node::{ChunkSource, NodeInfo}; use super::{BlobManager, Bootstrap, BootstrapManager, BuildContext, BuildOutput, Tree}; -use crate::core::blob::Blob; use crate::core::node::Node; +use crate::NodeChunk; use crate::OsString; -use crate::Path; -use crate::TreeNode; -use crate::{ArtifactWriter, BlobContext, NodeChunk}; use anyhow::{Ok, Result}; use nydus_rafs::metadata::chunk::ChunkWrapper; use nydus_rafs::metadata::inode::InodeWrapper; -use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; -use nydus_rafs::metadata::layout::{RafsBlobTable, RafsXAttrs}; -use nydus_storage::device::BlobInfo; -use nydus_storage::meta::BatchContextGenerator; +use nydus_rafs::metadata::layout::RafsXAttrs; use nydus_storage::meta::BlobChunkInfoV1Ondisk; -use nydus_utils::compress; use nydus_utils::compress::Algorithm; use nydus_utils::digest::RafsDigest; -use sha2::digest::Update; -use crate::finalize_blob; -use crate::Artifact; -use std::fs::File; -use std::io::Read; -use std::io::Seek; use std::mem::size_of; use std::path::PathBuf; use std::str::FromStr; @@ -71,40 +58,6 @@ pub struct ChunkdictBlobInfo { /// Struct to generate chunkdict RAFS bootstrap. pub struct Generator {} -struct PrefetchBlobState { - blob_info: BlobInfo, - blob_ctx: BlobContext, - blob_writer: Box, - chunk_count: u32, -} - -impl PrefetchBlobState { - fn new(ctx: &BuildContext, blob_layer_num: u32, blobs_dir_path: &Path) -> Result { - let mut blob_info = BlobInfo::new( - blob_layer_num, - String::from("prefetch-blob"), - 0, - 0, - ctx.chunk_size, - u32::MAX, - ctx.blob_features, - ); - blob_info.set_compressor(ctx.compressor); - let mut blob_ctx = BlobContext::from(ctx, &blob_info, ChunkSource::Build)?; - blob_ctx.blob_meta_info_enabled = true; - let blob_writer = ArtifactWriter::new(crate::ArtifactStorage::FileDir( - blobs_dir_path.to_path_buf(), - )) - .map(|writer| Box::new(writer) as Box)?; - Ok(Self { - blob_info, - blob_ctx, - blob_writer, - chunk_count: 0, - }) - } -} - impl Generator { // Generate chunkdict RAFS bootstrap. pub fn generate( @@ -139,207 +92,6 @@ impl Generator { BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) } - /// Generate a new bootstrap for prefetch. - pub fn generate_prefetch( - tree: &mut Tree, - ctx: &mut BuildContext, - bootstrap_mgr: &mut BootstrapManager, - blob_table: &mut RafsV6BlobTable, - blobs_dir_path: PathBuf, - prefetch_nodes: Vec, - ) -> Result<()> { - // create a new blob for prefetch layer - let blob_layer_num = blob_table.entries.len(); - - let mut blob_state = - PrefetchBlobState::new(&ctx, blob_layer_num as u32, &blobs_dir_path).unwrap(); - let mut batch = BatchContextGenerator::new(0).unwrap(); - for node in &prefetch_nodes { - Self::process_prefetch_node( - tree, - &node, - &mut blob_state, - &mut batch, - blob_table, - &blobs_dir_path, - ); - } - - { - let prefetch_blob_ctx = &blob_state.blob_ctx; - let prefetch_blob_info = &mut blob_state.blob_info; - - Self::finalize_blobinfo_meta_data( - prefetch_blob_info, - blob_state.chunk_count as usize, - prefetch_blob_ctx.current_compressed_offset as usize, - prefetch_blob_ctx.current_uncompressed_offset as usize, - ); - } - - Self::finalize_blob(ctx, blob_table, &mut blob_state); - - debug!("prefetch blob id: {}", ctx.blob_id); - - Self::build_and_dump_bootstrap(tree, ctx, bootstrap_mgr, blob_table)?; - Ok(()) - } - - fn build_and_dump_bootstrap( - tree: &mut Tree, - ctx: &mut BuildContext, - bootstrap_mgr: &mut BootstrapManager, - blob_table: &mut RafsV6BlobTable, - ) -> Result<()> { - let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; - let mut bootstrap = Bootstrap::new(tree.clone())?; - - // Build bootstrap - bootstrap.build(ctx, &mut bootstrap_ctx)?; - - // Verify and update prefetch blob - assert!( - blob_table - .entries - .iter() - .filter(|blob| blob.blob_id() == "prefetch-blob") - .count() - == 1, - "Expected exactly one prefetch-blob" - ); - - // Rewrite prefetch blob id - blob_table - .entries - .iter_mut() - .filter(|blob| blob.blob_id() == "prefetch-blob") - .for_each(|blob| { - let mut info = (**blob).clone(); - info.set_blob_id(ctx.blob_id.clone()); - *blob = Arc::new(info); - }); - - // Dump bootstrap - let blob_table_withprefetch = RafsBlobTable::V6(blob_table.clone()); - bootstrap.dump( - ctx, - &mut bootstrap_mgr.bootstrap_storage, - &mut bootstrap_ctx, - &blob_table_withprefetch, - )?; - - Ok(()) - } - - fn finalize_blob( - ctx: &mut BuildContext, - blob_table: &mut RafsV6BlobTable, - blob_state: &mut PrefetchBlobState, - ) { - blob_table.entries.push(blob_state.blob_info.clone().into()); - let mut blob_mgr = BlobManager::new(nydus_utils::digest::Algorithm::Blake3); - blob_mgr.add_blob(blob_state.blob_ctx.clone()); - blob_mgr.set_current_blob_index(0); - Blob::finalize_blob_data(&ctx, &mut blob_mgr, blob_state.blob_writer.as_mut()).unwrap(); - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - Blob::dump_meta_data(&ctx, blob_ctx, blob_state.blob_writer.as_mut()).unwrap(); - }; - ctx.blob_id = String::from(""); - blob_mgr.get_current_blob().unwrap().1.blob_id = String::from(""); - finalize_blob(ctx, &mut blob_mgr, blob_state.blob_writer.as_mut()).unwrap(); - } - - fn finalize_blobinfo_meta_data( - blobinfo: &mut BlobInfo, - chunk_count: usize, - compressed_offset: usize, - umcompressed_offset: usize, - ) { - blobinfo.set_meta_ci_offset(0x200 + umcompressed_offset); - blobinfo.set_chunk_count(chunk_count); - blobinfo.set_compressed_size(compressed_offset); - blobinfo.set_uncompressed_size(umcompressed_offset); - } - - fn process_prefetch_node( - tree: &mut Tree, - node: &TreeNode, - prefetch_state: &mut PrefetchBlobState, - batch: &mut BatchContextGenerator, - blob_table: &RafsV6BlobTable, - blobs_dir_path: &Path, - ) { - let tree_node = tree - .get_node_mut(&node.borrow().path()) - .unwrap() - .node - .as_ref(); - let blob_id = { - let child = tree_node.borrow(); - child - .chunks - .first() - .and_then(|chunk| blob_table.entries.get(chunk.inner.blob_index() as usize)) - .map(|entry| entry.blob_id()) - .unwrap() - }; - let mut blob_file = Arc::new(File::open(blobs_dir_path.join(blob_id)).unwrap()); - { - let mut child = tree_node.borrow_mut(); - child.layer_idx = prefetch_state.blob_info.blob_index() as u16; - } - - { - let mut child = tree_node.borrow_mut(); - let chunks: &mut Vec = child.chunks.as_mut(); - let blob_ctx = &mut prefetch_state.blob_ctx; - let blob_info = &mut prefetch_state.blob_info; - let encrypted = blob_ctx.blob_compressor != compress::Algorithm::None; - - for chunk in chunks { - let inner = Arc::make_mut(&mut chunk.inner); - - let mut buf = vec![0u8; inner.compressed_size() as usize]; - blob_file - .seek(std::io::SeekFrom::Start(inner.compressed_offset())) - .unwrap(); - blob_file.read_exact(&mut buf).unwrap(); - prefetch_state.blob_writer.write_all(&buf).unwrap(); - let info = batch - .generate_chunk_info( - blob_ctx.current_compressed_offset, - blob_ctx.current_uncompressed_offset, - inner.uncompressed_size(), - encrypted, - ) - .unwrap(); - inner.set_blob_index(blob_info.blob_index()); - inner.set_index(prefetch_state.chunk_count); - prefetch_state.chunk_count += 1; - inner.set_compressed_offset(blob_ctx.current_compressed_offset); - inner.set_uncompressed_offset(blob_ctx.current_uncompressed_offset); - let aligned_d_size: u64 = - nydus_utils::try_round_up_4k(inner.uncompressed_size()).unwrap(); - blob_ctx.compressed_blob_size += inner.compressed_size() as u64; - blob_ctx.uncompressed_blob_size += aligned_d_size; - blob_ctx.current_compressed_offset += inner.compressed_size() as u64; - blob_ctx.current_uncompressed_offset += aligned_d_size; - blob_ctx.add_chunk_meta_info(&inner, Some(info)).unwrap(); - blob_ctx.blob_hash.update(&buf); - - blob_info.set_meta_ci_compressed_size( - (blob_info.meta_ci_compressed_size() - + size_of::() as u64) as usize, - ); - - blob_info.set_meta_ci_uncompressed_size( - (blob_info.meta_ci_uncompressed_size() - + size_of::() as u64) as usize, - ); - } - } - } - /// Validate tree. fn validate_tree(tree: &Tree) -> Result<()> { let pre = &mut |t: &Tree| -> Result<()> { @@ -528,5 +280,3 @@ impl Generator { Ok(()) } } - -// Read the blob, get the chunk, fix dump node chunk function, Blob::dump generate a blob diff --git a/builder/src/core/overlay.rs b/builder/src/core/overlay.rs index a64ebe6da04..9b25f89a97d 100644 --- a/builder/src/core/overlay.rs +++ b/builder/src/core/overlay.rs @@ -74,9 +74,9 @@ pub enum WhiteoutSpec { impl fmt::Display for WhiteoutSpec { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { - WhiteoutSpec::Oci => write!(f, "OCI"), - WhiteoutSpec::Overlayfs => write!(f, "Overlayfs"), - WhiteoutSpec::None => write!(f, "None"), + WhiteoutSpec::Oci => write!(f, "oci"), + WhiteoutSpec::Overlayfs => write!(f, "overlayfs"), + WhiteoutSpec::None => write!(f, "none"), } } } diff --git a/builder/src/lib.rs b/builder/src/lib.rs index 919da5cc893..1163b639d76 100644 --- a/builder/src/lib.rs +++ b/builder/src/lib.rs @@ -41,6 +41,7 @@ pub use self::core::prefetch::{Prefetch, PrefetchPolicy}; pub use self::core::tree::{MetadataTreeBuilder, Tree, TreeNode}; pub use self::directory::DirectoryBuilder; pub use self::merge::Merger; +pub use self::optimize_prefetch::OptimizePrefetch; pub use self::stargz::StargzBuilder; pub use self::tarball::TarballBuilder; @@ -49,6 +50,7 @@ mod compact; mod core; mod directory; mod merge; +mod optimize_prefetch; mod stargz; mod tarball; @@ -248,7 +250,6 @@ fn finalize_blob( if let Some(blob_cache) = ctx.blob_cache_generator.as_ref() { blob_cache.finalize(&blob_ctx.blob_id)?; } - ctx.blob_id = blob_ctx.blob_id.clone(); } Ok(()) } diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs new file mode 100644 index 00000000000..0bc2e70e4ae --- /dev/null +++ b/builder/src/optimize_prefetch.rs @@ -0,0 +1,271 @@ +use crate::anyhow; +use crate::core::blob::Blob; +use crate::finalize_blob; +use crate::Artifact; +use crate::ArtifactWriter; +use crate::BlobContext; +use crate::BlobManager; +use crate::Bootstrap; +use crate::BootstrapManager; +use crate::BuildContext; +use crate::ChunkSource; +use crate::NodeChunk; +use crate::Path; +use crate::PathBuf; +use crate::Tree; +use crate::TreeNode; +use anyhow::{Ok, Result}; +use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; +use nydus_rafs::metadata::layout::RafsBlobTable; +use nydus_storage::device::BlobInfo; +use nydus_storage::meta::BatchContextGenerator; +use nydus_storage::meta::BlobChunkInfoV1Ondisk; +use nydus_utils::compress; +use sha2::Digest; +use std::fs::File; +use std::io::Read; +use std::io::Seek; +use std::mem::size_of; +use std::sync::Arc; +pub struct OptimizePrefetch {} + +struct PrefetchBlobState { + blob_info: BlobInfo, + blob_ctx: BlobContext, + blob_writer: Box, +} + +impl PrefetchBlobState { + fn new(ctx: &BuildContext, blob_layer_num: u32, blobs_dir_path: &Path) -> Result { + let mut blob_info = BlobInfo::new( + blob_layer_num, + String::from("prefetch-blob"), + 0, + 0, + ctx.chunk_size, + u32::MAX, + ctx.blob_features, + ); + blob_info.set_compressor(ctx.compressor); + let mut blob_ctx = BlobContext::from(ctx, &blob_info, ChunkSource::Build)?; + blob_ctx.blob_meta_info_enabled = true; + let blob_writer = ArtifactWriter::new(crate::ArtifactStorage::FileDir( + blobs_dir_path.to_path_buf(), + )) + .map(|writer| Box::new(writer) as Box)?; + Ok(Self { + blob_info, + blob_ctx, + blob_writer, + }) + } +} + +impl OptimizePrefetch { + /// Generate a new bootstrap for prefetch. + pub fn generate_prefetch( + tree: &mut Tree, + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + blob_table: &mut RafsV6BlobTable, + blobs_dir_path: PathBuf, + prefetch_nodes: Vec, + ) -> Result<()> { + // create a new blob for prefetch layer + let blob_layer_num = blob_table.entries.len(); + + let mut blob_state = PrefetchBlobState::new(&ctx, blob_layer_num as u32, &blobs_dir_path)?; + let mut batch = BatchContextGenerator::new(0)?; + for node in &prefetch_nodes { + Self::process_prefetch_node( + tree, + &node, + &mut blob_state, + &mut batch, + blob_table, + &blobs_dir_path, + )?; + } + + { + let prefetch_blob_ctx = &blob_state.blob_ctx; + let prefetch_blob_info = &mut blob_state.blob_info; + + Self::finalize_blobinfo_meta_data( + prefetch_blob_info, + blob_state.blob_ctx.chunk_count as usize, + prefetch_blob_ctx.current_compressed_offset as usize, + prefetch_blob_ctx.current_uncompressed_offset as usize, + ); + } + + Self::finalize_blob(ctx, blob_table, &mut blob_state)?; + + debug!("prefetch blob id: {}", ctx.blob_id); + + Self::build_and_dump_bootstrap(tree, ctx, bootstrap_mgr, blob_table)?; + Ok(()) + } + + fn build_and_dump_bootstrap( + tree: &mut Tree, + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + blob_table: &mut RafsV6BlobTable, + ) -> Result<()> { + let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; + let mut bootstrap = Bootstrap::new(tree.clone())?; + + // Build bootstrap + bootstrap.build(ctx, &mut bootstrap_ctx)?; + + // Verify and update prefetch blob + assert!( + blob_table + .entries + .iter() + .filter(|blob| blob.blob_id() == "prefetch-blob") + .count() + == 1, + "Expected exactly one prefetch-blob" + ); + + // Rewrite prefetch blob id + blob_table + .entries + .iter_mut() + .filter(|blob| blob.blob_id() == "prefetch-blob") + .for_each(|blob| { + let mut info = (**blob).clone(); + info.set_blob_id(ctx.blob_id.clone()); + *blob = Arc::new(info); + }); + + // Dump bootstrap + let blob_table_withprefetch = RafsBlobTable::V6(blob_table.clone()); + bootstrap.dump( + ctx, + &mut bootstrap_mgr.bootstrap_storage, + &mut bootstrap_ctx, + &blob_table_withprefetch, + )?; + + Ok(()) + } + + fn finalize_blob( + ctx: &mut BuildContext, + blob_table: &mut RafsV6BlobTable, + blob_state: &mut PrefetchBlobState, + ) -> Result<()> { + blob_table.entries.push(blob_state.blob_info.clone().into()); + let mut blob_mgr = BlobManager::new(ctx.digester); + blob_mgr.add_blob(blob_state.blob_ctx.clone()); + blob_mgr.set_current_blob_index(0); + Blob::finalize_blob_data(&ctx, &mut blob_mgr, blob_state.blob_writer.as_mut())?; + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + Blob::dump_meta_data(&ctx, blob_ctx, blob_state.blob_writer.as_mut()).unwrap(); + }; + ctx.blob_id = String::from(""); + blob_mgr.get_current_blob().unwrap().1.blob_id = String::from(""); + finalize_blob(ctx, &mut blob_mgr, blob_state.blob_writer.as_mut())?; + ctx.blob_id = blob_mgr + .get_current_blob() + .ok_or(anyhow!("failed to get current blob"))? + .1 + .blob_id + .clone(); + Ok(()) + } + + fn finalize_blobinfo_meta_data( + blobinfo: &mut BlobInfo, + chunk_count: usize, + compressed_offset: usize, + umcompressed_offset: usize, + ) { + blobinfo.set_meta_ci_offset(0x200 + umcompressed_offset); + blobinfo.set_chunk_count(chunk_count); + blobinfo.set_compressed_size(compressed_offset); + blobinfo.set_uncompressed_size(umcompressed_offset); + } + + fn process_prefetch_node( + tree: &mut Tree, + node: &TreeNode, + prefetch_state: &mut PrefetchBlobState, + batch: &mut BatchContextGenerator, + blob_table: &RafsV6BlobTable, + blobs_dir_path: &Path, + ) -> Result<()> { + let tree_node = tree + .get_node_mut(&node.borrow().path()) + .ok_or(anyhow!("failed to get node"))? + .node + .as_ref(); + let blob_id = { + let child = tree_node.borrow(); + child + .chunks + .first() + .and_then(|chunk| blob_table.entries.get(chunk.inner.blob_index() as usize)) + .map(|entry| entry.blob_id()) + .ok_or(anyhow!("failed to get blob id"))? + }; + let mut blob_file = Arc::new(File::open(blobs_dir_path.join(blob_id))?); + { + let mut child = tree_node.borrow_mut(); + child.layer_idx = prefetch_state.blob_info.blob_index() as u16; + } + + { + let mut child = tree_node.borrow_mut(); + let chunks: &mut Vec = child.chunks.as_mut(); + let blob_ctx = &mut prefetch_state.blob_ctx; + let blob_info = &mut prefetch_state.blob_info; + let encrypted = blob_ctx.blob_compressor != compress::Algorithm::None; + + for chunk in chunks { + let inner = Arc::make_mut(&mut chunk.inner); + + let mut buf = vec![0u8; inner.compressed_size() as usize]; + blob_file.seek(std::io::SeekFrom::Start(inner.compressed_offset()))?; + blob_file.read_exact(&mut buf)?; + prefetch_state.blob_writer.write_all(&buf)?; + let info = batch.generate_chunk_info( + blob_ctx.current_compressed_offset, + blob_ctx.current_uncompressed_offset, + inner.uncompressed_size(), + encrypted, + )?; + inner.set_blob_index(blob_info.blob_index()); + if blob_ctx.chunk_count == u32::MAX { + blob_ctx.chunk_count = 0; + } + inner.set_index(blob_ctx.chunk_count); + blob_ctx.chunk_count += 1; + inner.set_compressed_offset(blob_ctx.current_compressed_offset); + inner.set_uncompressed_offset(blob_ctx.current_uncompressed_offset); + let aligned_d_size: u64 = nydus_utils::try_round_up_4k(inner.uncompressed_size()) + .ok_or_else(|| anyhow!("invalid size"))?; + blob_ctx.compressed_blob_size += inner.compressed_size() as u64; + blob_ctx.uncompressed_blob_size += aligned_d_size; + blob_ctx.current_compressed_offset += inner.compressed_size() as u64; + blob_ctx.current_uncompressed_offset += aligned_d_size; + blob_ctx.add_chunk_meta_info(&inner, Some(info))?; + blob_ctx.blob_hash.update(&buf); + + blob_info.set_meta_ci_compressed_size( + (blob_info.meta_ci_compressed_size() + + size_of::() as u64) as usize, + ); + + blob_info.set_meta_ci_uncompressed_size( + (blob_info.meta_ci_uncompressed_size() + + size_of::() as u64) as usize, + ); + } + } + Ok(()) + } +} diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index 5d031909f9b..ee79ae87e0c 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -34,7 +34,8 @@ use nydus_builder::{ parse_chunk_dict_arg, ArtifactStorage, BlobCacheGenerator, BlobCompactor, BlobManager, BootstrapManager, BuildContext, BuildOutput, Builder, ChunkdictBlobInfo, ChunkdictChunkInfo, ConversionType, DirectoryBuilder, Feature, Features, Generator, HashChunkDict, Merger, - Prefetch, PrefetchPolicy, StargzBuilder, TarballBuilder, Tree, TreeNode, WhiteoutSpec, + OptimizePrefetch, Prefetch, PrefetchPolicy, StargzBuilder, TarballBuilder, Tree, TreeNode, + WhiteoutSpec, }; use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; @@ -1688,7 +1689,7 @@ impl Command { }; let sb = update_ctx_from_bootstrap(&mut build_ctx, config, bootstrap_path)?; - let mut tree = Tree::from_bootstrap(&sb, &mut ()).unwrap(); + let mut tree = Tree::from_bootstrap(&sb, &mut ())?; let mut prefetch_nodes: Vec = Vec::new(); // Init prefetch nodes @@ -1707,7 +1708,7 @@ impl Command { rafsv6table.entries.push(blob.clone()); } - Generator::generate_prefetch( + OptimizePrefetch::generate_prefetch( &mut tree, &mut build_ctx, &mut bootstrap_mgr, @@ -1831,7 +1832,7 @@ impl Command { match matches.get_one::("prefetch-files") { Some(v) => { let content = std::fs::read_to_string(v) - .map_err(|_| anyhow!("failed to read prefetch files from {}", v))?; + .map_err(|e| anyhow!("failed to read prefetch files from {}: {}", v, e))?; let mut prefetch_files: Vec = Vec::new(); for line in content.lines() { diff --git a/src/bin/nydus-image/prefetch.rs b/src/bin/nydus-image/prefetch.rs index a9325db81ea..bf9df542279 100644 --- a/src/bin/nydus-image/prefetch.rs +++ b/src/bin/nydus-image/prefetch.rs @@ -13,7 +13,12 @@ pub fn update_ctx_from_bootstrap( ) -> Result { let (sb, _) = RafsSuper::load_from_file(bootstrap_path, config, false)?; - ctx.blob_features = sb.superblock.get_blob_infos().first().unwrap().features(); + ctx.blob_features = sb + .superblock + .get_blob_infos() + .first() + .ok_or_else(|| anyhow!("No blob info found in superblock"))? + .features(); let config = sb.meta.get_config(); if config.is_tarfs_mode {