From d7c65dbbbde332c70c2eb995d7927385e596cf90 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Thu, 5 Dec 2024 15:59:30 +0800 Subject: [PATCH 1/9] fix: builder: fix incorrect chunk processing in optimeize subcommand chunks of a file may come from multiple blobs, we should find blob file for each chunk. Signed-off-by: Xin Yin --- builder/src/optimize_prefetch.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs index b8a7b10583d..dd12d2de498 100644 --- a/builder/src/optimize_prefetch.rs +++ b/builder/src/optimize_prefetch.rs @@ -202,14 +202,6 @@ impl OptimizePrefetch { RafsBlobTable::V5(table) => table.get_all(), RafsBlobTable::V6(table) => table.get_all(), }; - let blob_id = tree_node - .borrow() - .chunks - .first() - .and_then(|chunk| entries.get(chunk.inner.blob_index() as usize).cloned()) - .map(|entry| entry.blob_id()) - .ok_or(anyhow!("failed to get blob id"))?; - let mut blob_file = Arc::new(File::open(blobs_dir_path.join(blob_id))?); tree_node.borrow_mut().layer_idx = prefetch_state.blob_info.blob_index() as u16; @@ -220,6 +212,12 @@ impl OptimizePrefetch { let encrypted = blob_ctx.blob_compressor != compress::Algorithm::None; for chunk in chunks { + let blob_id = entries + .get(chunk.inner.blob_index() as usize) + .map(|entry| entry.blob_id()) + .ok_or(anyhow!("failed to get blob id"))?; + let mut blob_file = Arc::new(File::open(blobs_dir_path.join(blob_id))?); + let inner = Arc::make_mut(&mut chunk.inner); let mut buf = vec![0u8; inner.compressed_size() as usize]; From b6f9a389ba318b93b249adc40d097e0753b8a851 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Mon, 9 Dec 2024 16:16:50 +0800 Subject: [PATCH 2/9] feat: builder: optimize support file range For nydus-image optimize subcommand, support prefetch file with ranges. Signed-off-by: Xin Yin --- builder/src/lib.rs | 1 + builder/src/optimize_prefetch.rs | 113 ++++++++++++++++++++++++++++--- src/bin/nydus-image/main.rs | 40 +++-------- 3 files changed, 114 insertions(+), 40 deletions(-) diff --git a/builder/src/lib.rs b/builder/src/lib.rs index 185e719e144..185faabe3e5 100644 --- a/builder/src/lib.rs +++ b/builder/src/lib.rs @@ -41,6 +41,7 @@ pub use self::core::prefetch::{Prefetch, PrefetchPolicy}; pub use self::core::tree::{MetadataTreeBuilder, Tree, TreeNode}; pub use self::directory::DirectoryBuilder; pub use self::merge::Merger; +pub use self::optimize_prefetch::generate_prefetch_file_info; pub use self::optimize_prefetch::update_ctx_from_bootstrap; pub use self::optimize_prefetch::OptimizePrefetch; pub use self::stargz::StargzBuilder; diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs index dd12d2de498..33918cd5388 100644 --- a/builder/src/optimize_prefetch.rs +++ b/builder/src/optimize_prefetch.rs @@ -15,9 +15,7 @@ use crate::NodeChunk; use crate::Path; use crate::PathBuf; use crate::Tree; -use crate::TreeNode; -use anyhow::Context; -use anyhow::{Ok, Result}; +use anyhow::{bail, Context, Result}; use nydus_api::ConfigV2; use nydus_rafs::metadata::layout::RafsBlobTable; use nydus_rafs::metadata::RafsSuper; @@ -27,6 +25,7 @@ use nydus_storage::meta::BatchContextGenerator; use nydus_storage::meta::BlobChunkInfoV1Ondisk; use nydus_utils::compress; use sha2::Digest; +use std::cmp::{max, min}; use std::fs::File; use std::io::{Read, Seek, Write}; use std::mem::size_of; @@ -39,6 +38,60 @@ struct PrefetchBlobState { blob_writer: Box, } +#[derive(Clone)] +struct PrefetchFileRange { + offset: u64, + size: usize, +} + +pub struct PrefetchFileInfo { + file: PathBuf, + ranges: Option>, +} + +impl PrefetchFileInfo { + fn from_input(input: &str) -> Result { + let parts: Vec<&str> = input.split_whitespace().collect(); + let file = PathBuf::from(parts[0]); + if !file.is_absolute() { + bail!("prefetch file path is not absolute: {}", file.display()); + } + + if parts.len() != 2 { + return Ok(PrefetchFileInfo { file, ranges: None }); + } + let range_strs = parts[1]; + let mut ranges = Vec::new(); + for range_s in range_strs.split(',') { + let range_parts: Vec<&str> = range_s.split('-').collect(); + if range_parts.len() != 2 { + return Err(anyhow!(format!( + "PrefetchFileInfo Range format is incorrect" + ))); + } + + let offset = range_parts[0] + .parse::() + .map_err(|_| anyhow!("parse offset failed"))?; + + let end = range_parts[1] + .parse::() + .map_err(|_| anyhow!("parse size failed"))?; + + let range = PrefetchFileRange { + offset, + size: (end - offset) as usize, + }; + + ranges.push(range); + } + Ok(PrefetchFileInfo { + file, + ranges: Some(ranges), + }) + } +} + impl PrefetchBlobState { fn new(ctx: &BuildContext, blob_layer_num: u32, blobs_dir_path: &Path) -> Result { let mut blob_info = BlobInfo::new( @@ -74,7 +127,7 @@ impl OptimizePrefetch { bootstrap_mgr: &mut BootstrapManager, blob_table: &mut RafsBlobTable, blobs_dir_path: PathBuf, - prefetch_nodes: Vec, + prefetch_files: Vec, ) -> Result { // create a new blob for prefetch layer @@ -84,10 +137,10 @@ impl OptimizePrefetch { }; let mut blob_state = PrefetchBlobState::new(&ctx, blob_layer_num as u32, &blobs_dir_path)?; let mut batch = BatchContextGenerator::new(0)?; - for node in &prefetch_nodes { + for node in prefetch_files { Self::process_prefetch_node( tree, - &node, + node, &mut blob_state, &mut batch, blob_table, @@ -187,14 +240,14 @@ impl OptimizePrefetch { fn process_prefetch_node( tree: &mut Tree, - node: &TreeNode, + prefetch_file_info: PrefetchFileInfo, prefetch_state: &mut PrefetchBlobState, batch: &mut BatchContextGenerator, blob_table: &RafsBlobTable, blobs_dir_path: &Path, ) -> Result<()> { let tree_node = tree - .get_node_mut(&node.borrow().path()) + .get_node_mut(&prefetch_file_info.file) .ok_or(anyhow!("failed to get node"))? .node .as_ref(); @@ -204,7 +257,6 @@ impl OptimizePrefetch { }; tree_node.borrow_mut().layer_idx = prefetch_state.blob_info.blob_index() as u16; - let mut child = tree_node.borrow_mut(); let chunks: &mut Vec = child.chunks.as_mut(); let blob_ctx = &mut prefetch_state.blob_ctx; @@ -212,6 +264,20 @@ impl OptimizePrefetch { let encrypted = blob_ctx.blob_compressor != compress::Algorithm::None; for chunk in chunks { + // check the file range + if let Some(ref ranges) = prefetch_file_info.ranges { + let mut should_skip = true; + for range in ranges { + if range_overlap(chunk, range) { + should_skip = false; + break; + } + } + if should_skip { + continue; + } + } + let blob_id = entries .get(chunk.inner.blob_index() as usize) .map(|entry| entry.blob_id()) @@ -297,3 +363,32 @@ pub fn update_ctx_from_bootstrap( ctx.compressor = config.compressor; Ok(sb) } + +pub fn generate_prefetch_file_info(prefetch_file: &Path) -> Result> { + let content = std::fs::read_to_string(prefetch_file) + .map_err(|e| anyhow!("failed to read prefetch files from {}", e))?; + + let mut prefetch_nodes: Vec = Vec::new(); + for line in content.lines() { + if line.is_empty() || line.trim().is_empty() { + continue; + } + match PrefetchFileInfo::from_input(line) { + Ok(node) => prefetch_nodes.push(node), + Err(e) => warn!("parse prefetch node failed {}", e), + } + } + Ok(prefetch_nodes) +} + +fn range_overlap(chunk: &mut NodeChunk, range: &PrefetchFileRange) -> bool { + if max(range.offset, chunk.inner.file_offset()) + <= min( + range.offset + range.size as u64, + chunk.inner.file_offset() + chunk.inner.uncompressed_size() as u64, + ) + { + return true; + } + false +} diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index 06bd168dd03..e877294fb24 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -31,11 +31,11 @@ use nix::unistd::{getegid, geteuid}; use nydus::{get_build_time_info, setup_logging}; use nydus_api::{BuildTimeInfo, ConfigV2, LocalFsConfig}; use nydus_builder::{ - parse_chunk_dict_arg, update_ctx_from_bootstrap, ArtifactStorage, BlobCacheGenerator, - BlobCompactor, BlobManager, BootstrapManager, BuildContext, BuildOutput, Builder, - ChunkdictBlobInfo, ChunkdictChunkInfo, ConversionType, DirectoryBuilder, Feature, Features, - Generator, HashChunkDict, Merger, OptimizePrefetch, Prefetch, PrefetchPolicy, StargzBuilder, - TarballBuilder, Tree, TreeNode, WhiteoutSpec, + generate_prefetch_file_info, parse_chunk_dict_arg, update_ctx_from_bootstrap, ArtifactStorage, + BlobCacheGenerator, BlobCompactor, BlobManager, BootstrapManager, BuildContext, BuildOutput, + Builder, ChunkdictBlobInfo, ChunkdictChunkInfo, ConversionType, DirectoryBuilder, Feature, + Features, Generator, HashChunkDict, Merger, OptimizePrefetch, Prefetch, PrefetchPolicy, + StargzBuilder, TarballBuilder, Tree, WhiteoutSpec, }; use nydus_rafs::metadata::{MergeError, RafsSuper, RafsSuperConfig, RafsVersion}; @@ -1684,8 +1684,7 @@ impl Command { fn optimize(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { let blobs_dir_path = Self::get_blobs_dir(matches)?; - let prefetch_files = Self::get_prefetch_files(matches)?; - prefetch_files.iter().for_each(|f| println!("{}", f)); + let prefetch_file = Self::get_prefetch_files(matches)?; let bootstrap_path = Self::get_bootstrap(matches)?; let dst_bootstrap = match matches.get_one::("output-bootstrap") { None => ArtifactStorage::SingleFile(PathBuf::from("optimized_bootstrap")), @@ -1702,17 +1701,8 @@ impl Command { let sb = update_ctx_from_bootstrap(&mut build_ctx, config, bootstrap_path)?; let mut tree = Tree::from_bootstrap(&sb, &mut ())?; - - let mut prefetch_nodes: Vec = Vec::new(); - // Init prefetch nodes - for f in prefetch_files.iter() { - let path = PathBuf::from(f); - if let Some(tree) = tree.get_node(&path) { - prefetch_nodes.push(tree.node.clone()); - } - } - let mut bootstrap_mgr = BootstrapManager::new(Some(dst_bootstrap), None); + let prefetch_nodes = generate_prefetch_file_info(prefetch_file)?; let blobs = sb.superblock.get_blob_infos(); let mut blob_table = match build_ctx.fs_version { @@ -1849,21 +1839,9 @@ impl Command { } } - fn get_prefetch_files(matches: &ArgMatches) -> Result> { + fn get_prefetch_files(matches: &ArgMatches) -> Result<&Path> { match matches.get_one::("prefetch-files") { - Some(v) => { - let content = std::fs::read_to_string(v) - .map_err(|e| anyhow!("failed to read prefetch files from {}: {}", v, e))?; - - let mut prefetch_files: Vec = Vec::new(); - for line in content.lines() { - if line.is_empty() || line.trim().is_empty() { - continue; - } - prefetch_files.push(line.trim().to_string()); - } - Ok(prefetch_files) - } + Some(s) => Ok(Path::new(s)), None => bail!("missing parameter `prefetch-files`"), } } From c90c2c338834cb69bf9f32c75dbf440311ca96a4 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Wed, 11 Dec 2024 16:31:57 +0800 Subject: [PATCH 3/9] fix: builder: fix optimize bootstrap miss extended table The bootstrap generated by optimize subcommand missed extended table, which cause runtime use Digested ChunkMap. Signed-off-by: Xin Yin --- builder/src/optimize_prefetch.rs | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs index 33918cd5388..fffa7985682 100644 --- a/builder/src/optimize_prefetch.rs +++ b/builder/src/optimize_prefetch.rs @@ -148,11 +148,11 @@ impl OptimizePrefetch { )?; } - let blob_mgr = Self::dump_blob(ctx, blob_table, &mut blob_state)?; + Self::dump_blob(ctx, blob_table, &mut blob_state)?; debug!("prefetch blob id: {}", ctx.blob_id); - Self::build_dump_bootstrap(tree, ctx, bootstrap_mgr, blob_table)?; + let blob_mgr = Self::build_dump_bootstrap(tree, ctx, bootstrap_mgr, blob_table)?; BuildOutput::new(&blob_mgr, &bootstrap_mgr.bootstrap_storage) } @@ -161,31 +161,36 @@ impl OptimizePrefetch { ctx: &mut BuildContext, bootstrap_mgr: &mut BootstrapManager, blob_table: &mut RafsBlobTable, - ) -> Result<()> { + ) -> Result { let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; let mut bootstrap = Bootstrap::new(tree.clone())?; // Build bootstrap bootstrap.build(ctx, &mut bootstrap_ctx)?; - let blob_table_withprefetch = match blob_table { - RafsBlobTable::V5(table) => RafsBlobTable::V5(table.clone()), - RafsBlobTable::V6(table) => RafsBlobTable::V6(table.clone()), + // generate blob table with extended table + let mut blob_mgr = BlobManager::new(ctx.digester); + let blob_info = match blob_table { + RafsBlobTable::V5(table) => table.get_all(), + RafsBlobTable::V6(table) => table.get_all(), }; + blob_mgr.extend_from_blob_table(ctx, blob_info)?; + let blob_table_withprefetch = blob_mgr.to_blob_table(&ctx)?; + bootstrap.dump( ctx, &mut bootstrap_mgr.bootstrap_storage, &mut bootstrap_ctx, &blob_table_withprefetch, )?; - Ok(()) + Ok(blob_mgr) } fn dump_blob( ctx: &mut BuildContext, blob_table: &mut RafsBlobTable, blob_state: &mut PrefetchBlobState, - ) -> Result { + ) -> Result<()> { match blob_table { RafsBlobTable::V5(table) => { table.entries.push(blob_state.blob_info.clone().into()); @@ -235,7 +240,7 @@ impl OptimizePrefetch { rewrite_blob_id(&mut table.entries, "prefetch-blob", ctx.blob_id.clone()) } } - Ok(blob_mgr) + Ok(()) } fn process_prefetch_node( From ed5dcbed813855ece7d085865ac5b6069c627643 Mon Sep 17 00:00:00 2001 From: Xing Ma Date: Wed, 11 Dec 2024 16:40:09 +0800 Subject: [PATCH 4/9] fix: builder: skip bad file instead of stopping whole build For nydus-image optimize subcommand, skip bad prefetch file instead of stopping whole build process, such as symlink. Signed-off-by: Xing Ma --- builder/src/optimize_prefetch.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs index fffa7985682..b9b1a599416 100644 --- a/builder/src/optimize_prefetch.rs +++ b/builder/src/optimize_prefetch.rs @@ -251,8 +251,14 @@ impl OptimizePrefetch { blob_table: &RafsBlobTable, blobs_dir_path: &Path, ) -> Result<()> { + let file = prefetch_file_info.file; + if tree.get_node_mut(&file).is_none() { + warn!("prefetch file {} is bad, skip it", file.display()); + return Ok(()); + } + let tree_node = tree - .get_node_mut(&prefetch_file_info.file) + .get_node_mut(&file) .ok_or(anyhow!("failed to get node"))? .node .as_ref(); From e98dd893264df709169ab6ead75ac0b264917ba3 Mon Sep 17 00:00:00 2001 From: Xing Ma Date: Mon, 16 Dec 2024 10:37:05 +0800 Subject: [PATCH 5/9] fix: builder: fix hardlink break of optimize subcommand When the inode of the prefetch files has hardlinks, there are two bugs: 1. hardlink is broken, because layer_idx is changed which is used as a key for identifying hardlink in inode_map; 2. prefetching of the other identical inode will fall into other blob, which may cause perfomance issue due to IO amplifying. This commit fixes two above bugs. Signed-off-by: Xing Ma --- builder/src/optimize_prefetch.rs | 42 +++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs index b9b1a599416..9b988151459 100644 --- a/builder/src/optimize_prefetch.rs +++ b/builder/src/optimize_prefetch.rs @@ -44,6 +44,7 @@ struct PrefetchFileRange { size: usize, } +#[derive(Clone)] pub struct PrefetchFileInfo { file: PathBuf, ranges: Option>, @@ -137,7 +138,7 @@ impl OptimizePrefetch { }; let mut blob_state = PrefetchBlobState::new(&ctx, blob_layer_num as u32, &blobs_dir_path)?; let mut batch = BatchContextGenerator::new(0)?; - for node in prefetch_files { + for node in prefetch_files.clone() { Self::process_prefetch_node( tree, node, @@ -152,7 +153,8 @@ impl OptimizePrefetch { debug!("prefetch blob id: {}", ctx.blob_id); - let blob_mgr = Self::build_dump_bootstrap(tree, ctx, bootstrap_mgr, blob_table)?; + let blob_mgr = + Self::build_dump_bootstrap(tree, ctx, bootstrap_mgr, blob_table, prefetch_files)?; BuildOutput::new(&blob_mgr, &bootstrap_mgr.bootstrap_storage) } @@ -161,6 +163,7 @@ impl OptimizePrefetch { ctx: &mut BuildContext, bootstrap_mgr: &mut BootstrapManager, blob_table: &mut RafsBlobTable, + prefetch_files: Vec, ) -> Result { let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; let mut bootstrap = Bootstrap::new(tree.clone())?; @@ -168,6 +171,38 @@ impl OptimizePrefetch { // Build bootstrap bootstrap.build(ctx, &mut bootstrap_ctx)?; + // Fix hardlink + for node in prefetch_files.clone() { + let file = &node.file; + if tree.get_node(&file).is_none() { + warn!( + "prefetch file {} is skipped, no need to fixing hardlink", + file.display() + ); + continue; + } + + let tree_node = tree + .get_node(&file) + .ok_or(anyhow!("failed to get node"))? + .node + .as_ref(); + let child_node = tree_node.borrow(); + let key = ( + child_node.layer_idx, + child_node.info.src_ino, + child_node.info.src_dev, + ); + let chunks = child_node.chunks.clone(); + drop(child_node); + + if let Some(indexes) = bootstrap_ctx.inode_map.get_mut(&key) { + for n in indexes.iter() { + // Rewrite blob chunks to the prefetch blob's chunks + n.borrow_mut().chunks = chunks.clone(); + } + } + } // generate blob table with extended table let mut blob_mgr = BlobManager::new(ctx.digester); let blob_info = match blob_table { @@ -251,7 +286,7 @@ impl OptimizePrefetch { blob_table: &RafsBlobTable, blobs_dir_path: &Path, ) -> Result<()> { - let file = prefetch_file_info.file; + let file = prefetch_file_info.file.clone(); if tree.get_node_mut(&file).is_none() { warn!("prefetch file {} is bad, skip it", file.display()); return Ok(()); @@ -267,7 +302,6 @@ impl OptimizePrefetch { RafsBlobTable::V6(table) => table.get_all(), }; - tree_node.borrow_mut().layer_idx = prefetch_state.blob_info.blob_index() as u16; let mut child = tree_node.borrow_mut(); let chunks: &mut Vec = child.chunks.as_mut(); let blob_ctx = &mut prefetch_state.blob_ctx; From fc811b59705cd8af0cd90c9d8ab22fad58be6e33 Mon Sep 17 00:00:00 2001 From: "maxing.lan" Date: Mon, 20 Jan 2025 17:29:02 +0800 Subject: [PATCH 6/9] fix: builder: fix blob_info lost updates of optimize subcommand Now it only updates blob_ctx during the build process without persisting these changes to blob_info. This results in incorrect metadata which can be shown by using nydus-image check. Signed-off-by: maxing.lan --- builder/src/optimize_prefetch.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs index 9b988151459..0ddcbaad381 100644 --- a/builder/src/optimize_prefetch.rs +++ b/builder/src/optimize_prefetch.rs @@ -358,6 +358,9 @@ impl OptimizePrefetch { blob_ctx.add_chunk_meta_info(&inner, Some(info))?; blob_ctx.blob_hash.update(&buf); + blob_info.set_compressed_size(blob_ctx.compressed_blob_size as usize); + blob_info.set_uncompressed_size(blob_ctx.uncompressed_blob_size as usize); + blob_info.set_chunk_count(blob_ctx.chunk_count as usize); blob_info.set_meta_ci_compressed_size( (blob_info.meta_ci_compressed_size() + size_of::() as u64) as usize, From ce62847a3b19c9a1d323e1003c7ec49d133ac748 Mon Sep 17 00:00:00 2001 From: "maxing.lan" Date: Tue, 21 Jan 2025 16:20:20 +0800 Subject: [PATCH 7/9] fix: builder: fix wrong use of blob meta info of optimize subcommand Blob meta info is only for fs-version 6, so we move the corresponding code in one place for better judgement branch. Besides, fix two potential bugs: 1. for fs-version 5, we don't align the chunk offset up to 4k, wrong rounding up will cause coredump in runtime. 2. use BlobChunkInfoV2Ondisk instead of BlobChunkInfoV1Ondisk to keep consistent with generate_chunk_info() which returns BlobChunkInfoV2Ondisk. Theoretical risk, not fully verified. Signed-off-by: maxing.lan --- builder/src/optimize_prefetch.rs | 49 ++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs index 0ddcbaad381..c96650a02e9 100644 --- a/builder/src/optimize_prefetch.rs +++ b/builder/src/optimize_prefetch.rs @@ -22,7 +22,7 @@ use nydus_rafs::metadata::RafsSuper; use nydus_rafs::metadata::RafsVersion; use nydus_storage::device::BlobInfo; use nydus_storage::meta::BatchContextGenerator; -use nydus_storage::meta::BlobChunkInfoV1Ondisk; +use nydus_storage::meta::BlobChunkInfoV2Ondisk; use nydus_utils::compress; use sha2::Digest; use std::cmp::{max, min}; @@ -239,9 +239,11 @@ impl OptimizePrefetch { blob_mgr.add_blob(blob_state.blob_ctx.clone()); blob_mgr.set_current_blob_index(0); Blob::finalize_blob_data(&ctx, &mut blob_mgr, blob_state.blob_writer.as_mut())?; - if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { - Blob::dump_meta_data(&ctx, blob_ctx, blob_state.blob_writer.as_mut()).unwrap(); - }; + if let RafsBlobTable::V6(_) = blob_table { + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + Blob::dump_meta_data(&ctx, blob_ctx, blob_state.blob_writer.as_mut()).unwrap(); + }; + } ctx.blob_id = String::from(""); blob_mgr.get_current_blob().unwrap().1.blob_id = String::from(""); finalize_blob(ctx, &mut blob_mgr, blob_state.blob_writer.as_mut())?; @@ -335,12 +337,6 @@ impl OptimizePrefetch { blob_file.seek(std::io::SeekFrom::Start(inner.compressed_offset()))?; blob_file.read_exact(&mut buf)?; prefetch_state.blob_writer.write_all(&buf)?; - let info = batch.generate_chunk_info( - blob_ctx.current_compressed_offset, - blob_ctx.current_uncompressed_offset, - inner.uncompressed_size(), - encrypted, - )?; inner.set_blob_index(blob_info.blob_index()); if blob_ctx.chunk_count == u32::MAX { blob_ctx.chunk_count = 0; @@ -349,27 +345,36 @@ impl OptimizePrefetch { blob_ctx.chunk_count += 1; inner.set_compressed_offset(blob_ctx.current_compressed_offset); inner.set_uncompressed_offset(blob_ctx.current_uncompressed_offset); - let aligned_d_size: u64 = nydus_utils::try_round_up_4k(inner.uncompressed_size()) - .ok_or_else(|| anyhow!("invalid size"))?; + let mut aligned_d_size: u64 = inner.uncompressed_size() as u64; + if let RafsBlobTable::V6(_) = blob_table { + aligned_d_size = nydus_utils::try_round_up_4k(inner.uncompressed_size()) + .ok_or_else(|| anyhow!("invalid size"))?; + let info = batch.generate_chunk_info( + blob_ctx.current_compressed_offset, + blob_ctx.current_uncompressed_offset, + inner.uncompressed_size(), + encrypted, + )?; + blob_info.set_meta_ci_compressed_size( + (blob_info.meta_ci_compressed_size() + + size_of::() as u64) as usize, + ); + + blob_info.set_meta_ci_uncompressed_size( + (blob_info.meta_ci_uncompressed_size() + + size_of::() as u64) as usize, + ); + blob_ctx.add_chunk_meta_info(&inner, Some(info))?; + } blob_ctx.compressed_blob_size += inner.compressed_size() as u64; blob_ctx.uncompressed_blob_size += aligned_d_size; blob_ctx.current_compressed_offset += inner.compressed_size() as u64; blob_ctx.current_uncompressed_offset += aligned_d_size; - blob_ctx.add_chunk_meta_info(&inner, Some(info))?; blob_ctx.blob_hash.update(&buf); blob_info.set_compressed_size(blob_ctx.compressed_blob_size as usize); blob_info.set_uncompressed_size(blob_ctx.uncompressed_blob_size as usize); blob_info.set_chunk_count(blob_ctx.chunk_count as usize); - blob_info.set_meta_ci_compressed_size( - (blob_info.meta_ci_compressed_size() + size_of::() as u64) - as usize, - ); - - blob_info.set_meta_ci_uncompressed_size( - (blob_info.meta_ci_uncompressed_size() + size_of::() as u64) - as usize, - ); } Ok(()) From aef54d676b7898ed036d8c461f74790525188341 Mon Sep 17 00:00:00 2001 From: "maxing.lan" Date: Tue, 21 Jan 2025 17:45:03 +0800 Subject: [PATCH 8/9] feat: builder: add backend configure support for optimize subcommand With backend configure supoorted, we can read chunk on demand by specifying backend such as registry without downloadling whole image during image building. Signed-off-by: maxing.lan --- builder/src/optimize_prefetch.rs | 27 +++++++++------- src/bin/nydus-image/main.rs | 55 +++++++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 16 deletions(-) diff --git a/builder/src/optimize_prefetch.rs b/builder/src/optimize_prefetch.rs index c96650a02e9..35404ece666 100644 --- a/builder/src/optimize_prefetch.rs +++ b/builder/src/optimize_prefetch.rs @@ -20,14 +20,13 @@ use nydus_api::ConfigV2; use nydus_rafs::metadata::layout::RafsBlobTable; use nydus_rafs::metadata::RafsSuper; use nydus_rafs::metadata::RafsVersion; +use nydus_storage::backend::BlobBackend; use nydus_storage::device::BlobInfo; use nydus_storage::meta::BatchContextGenerator; use nydus_storage::meta::BlobChunkInfoV2Ondisk; use nydus_utils::compress; use sha2::Digest; use std::cmp::{max, min}; -use std::fs::File; -use std::io::{Read, Seek, Write}; use std::mem::size_of; use std::sync::Arc; pub struct OptimizePrefetch {} @@ -94,7 +93,7 @@ impl PrefetchFileInfo { } impl PrefetchBlobState { - fn new(ctx: &BuildContext, blob_layer_num: u32, blobs_dir_path: &Path) -> Result { + fn new(ctx: &BuildContext, blob_layer_num: u32, output_blob_dir_path: &Path) -> Result { let mut blob_info = BlobInfo::new( blob_layer_num, String::from("prefetch-blob"), @@ -109,7 +108,7 @@ impl PrefetchBlobState { let mut blob_ctx = BlobContext::from(ctx, &blob_info, ChunkSource::Build)?; blob_ctx.blob_meta_info_enabled = true; let blob_writer = ArtifactWriter::new(crate::ArtifactStorage::FileDir( - blobs_dir_path.to_path_buf(), + output_blob_dir_path.to_path_buf(), )) .map(|writer| Box::new(writer) as Box)?; Ok(Self { @@ -127,8 +126,9 @@ impl OptimizePrefetch { ctx: &mut BuildContext, bootstrap_mgr: &mut BootstrapManager, blob_table: &mut RafsBlobTable, - blobs_dir_path: PathBuf, + output_blob_dir_path: PathBuf, prefetch_files: Vec, + backend: Arc, ) -> Result { // create a new blob for prefetch layer @@ -136,7 +136,8 @@ impl OptimizePrefetch { RafsBlobTable::V5(table) => table.get_all().len(), RafsBlobTable::V6(table) => table.get_all().len(), }; - let mut blob_state = PrefetchBlobState::new(&ctx, blob_layer_num as u32, &blobs_dir_path)?; + let mut blob_state = + PrefetchBlobState::new(&ctx, blob_layer_num as u32, &output_blob_dir_path)?; let mut batch = BatchContextGenerator::new(0)?; for node in prefetch_files.clone() { Self::process_prefetch_node( @@ -145,7 +146,7 @@ impl OptimizePrefetch { &mut blob_state, &mut batch, blob_table, - &blobs_dir_path, + backend.clone(), )?; } @@ -286,7 +287,7 @@ impl OptimizePrefetch { prefetch_state: &mut PrefetchBlobState, batch: &mut BatchContextGenerator, blob_table: &RafsBlobTable, - blobs_dir_path: &Path, + backend: Arc, ) -> Result<()> { let file = prefetch_file_info.file.clone(); if tree.get_node_mut(&file).is_none() { @@ -329,13 +330,17 @@ impl OptimizePrefetch { .get(chunk.inner.blob_index() as usize) .map(|entry| entry.blob_id()) .ok_or(anyhow!("failed to get blob id"))?; - let mut blob_file = Arc::new(File::open(blobs_dir_path.join(blob_id))?); let inner = Arc::make_mut(&mut chunk.inner); + let reader = backend + .clone() + .get_reader(&blob_id.clone()) + .expect("get blob err"); let mut buf = vec![0u8; inner.compressed_size() as usize]; - blob_file.seek(std::io::SeekFrom::Start(inner.compressed_offset()))?; - blob_file.read_exact(&mut buf)?; + reader + .read(&mut buf, inner.compressed_offset()) + .expect("read blob err"); prefetch_state.blob_writer.write_all(&buf)?; inner.set_blob_index(blob_info.blob_index()); if blob_ctx.chunk_count == u32::MAX { diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index e877294fb24..f395d871b93 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -555,6 +555,32 @@ fn prepare_cmd_args(bti_string: &'static str) -> App { .num_args(1), ) .arg(arg_config.clone()) + .arg( + Arg::new("backend-type") + .long("backend-type") + .help(format!( + "Type of backend [possible values: {}]", + BlobFactory::supported_backends() + .into_iter() + .filter(|x| x != "localfs") + .collect::>() + .join(", ") + )) + .required(false) + ) + .arg( + Arg::new("backend-config") + .long("backend-config") + .help("Config string of backend") + .required(false), + ) + .arg( + Arg::new("backend-config-file") + .long("backend-config-file") + .help("Config file of backend") + .conflicts_with("backend-config") + .required(false), + ) .arg( Arg::new("blob-dir") .long("blob-dir") @@ -564,12 +590,23 @@ fn prepare_cmd_args(bti_string: &'static str) -> App { "Directory for localfs storage backend, hosting data blobs and cache files", ), ) + .arg( + Arg::new("blob") + .long("blob") + .short('b') + .help("Path to RAFS data blob file") + ) .arg( Arg::new("output-bootstrap") .long("output-bootstrap") .short('O') .help("Output path of optimized bootstrap"), ) + .arg( + Arg::new("output-blob-dir") + .long("output-blob-dir") + .help("Directroy path for storing optimized blob"), + ) .arg( arg_output_json.clone(), ) @@ -1683,7 +1720,7 @@ impl Command { } fn optimize(matches: &ArgMatches, build_info: &BuildTimeInfo) -> Result<()> { - let blobs_dir_path = Self::get_blobs_dir(matches)?; + let output_blob_dir_path = Self::get_output_blob_dir(matches)?; let prefetch_file = Self::get_prefetch_files(matches)?; let bootstrap_path = Self::get_bootstrap(matches)?; let dst_bootstrap = match matches.get_one::("output-bootstrap") { @@ -1699,6 +1736,13 @@ impl Command { ..Default::default() }; + let (_c, backend) = match Self::get_backend(matches, "optimizer") { + Ok((c, b)) => (c, b), + Err(e) => { + bail!("{}, --blob-dir or --backend-type must be specified", e); + } + }; + let sb = update_ctx_from_bootstrap(&mut build_ctx, config, bootstrap_path)?; let mut tree = Tree::from_bootstrap(&sb, &mut ())?; let mut bootstrap_mgr = BootstrapManager::new(Some(dst_bootstrap), None); @@ -1718,8 +1762,9 @@ impl Command { &mut build_ctx, &mut bootstrap_mgr, &mut blob_table, - blobs_dir_path.to_path_buf(), + output_blob_dir_path.to_path_buf(), prefetch_nodes, + backend, ) .with_context(|| "Failed to generate prefetch bootstrap")?; @@ -1832,10 +1877,10 @@ impl Command { } } - fn get_blobs_dir(matches: &ArgMatches) -> Result<&Path> { - match matches.get_one::("blob-dir") { + fn get_output_blob_dir(matches: &ArgMatches) -> Result<&Path> { + match matches.get_one::("output-blob-dir") { Some(s) => Ok(Path::new(s)), - None => bail!("missing parameter `blob-dir`"), + None => bail!("missing parameter `output-blob-dir`"), } } From 55e36f6926bce111c4efabf1512103dd92f32cd1 Mon Sep 17 00:00:00 2001 From: "maxing.lan" Date: Mon, 20 Jan 2025 16:10:16 +0800 Subject: [PATCH 9/9] feat: nydusify: add backend configure support of optimize subcommand When localfs-backend is true (default is false), still pull whole image to build optimized image; otherwise, use registry backend to fetch needed chunk during building process. Signed-off-by: maxing.lan --- contrib/nydusify/cmd/nydusify.go | 48 ++++++++++++++++++++- contrib/nydusify/pkg/optimizer/builder.go | 19 ++++++-- contrib/nydusify/pkg/optimizer/optimizer.go | 14 +++++- docs/nydusify.md | 18 ++++++++ 4 files changed, 92 insertions(+), 7 deletions(-) diff --git a/contrib/nydusify/cmd/nydusify.go b/contrib/nydusify/cmd/nydusify.go index 109dd65c4ff..178718b08c2 100644 --- a/contrib/nydusify/cmd/nydusify.go +++ b/contrib/nydusify/cmd/nydusify.go @@ -200,7 +200,8 @@ func main() { Required: false, Value: false, Usage: "Enable debug log level, overwrites the 'log-level' option", - EnvVars: []string{"DEBUG_LOG_LEVEL"}}, + EnvVars: []string{"DEBUG_LOG_LEVEL"}, + }, &cli.StringFlag{ Name: "log-level", Aliases: []string{"l"}, @@ -1223,10 +1224,52 @@ func main() { Value: "0MB", Usage: "Chunk size for pushing a blob layer in chunked", }, + + &cli.StringFlag{ + Name: "source-backend-type", + Value: "", + Usage: "Type of storage backend, enable verification of file data in Nydus image if specified, possible values: 'oss', 's3', 'localfs'", + EnvVars: []string{"BACKEND_TYPE"}, + }, + &cli.StringFlag{ + Name: "source-backend-config", + Value: "", + Usage: "Json string for storage backend configuration", + EnvVars: []string{"BACKEND_CONFIG"}, + }, + &cli.PathFlag{ + Name: "source-backend-config-file", + Value: "", + TakesFile: true, + Usage: "Json configuration file for storage backend", + EnvVars: []string{"BACKEND_CONFIG_FILE"}, + }, }, Action: func(c *cli.Context) error { setupLogLevel(c) + backendType, backendConfig, err := getBackendConfig(c, "source-", false) + if err != nil { + return err + } else if backendConfig == "" { + backendType = "registry" + parsed, err := reference.ParseNormalizedNamed(c.String("target")) + if err != nil { + return err + } + + backendConfigStruct, err := utils.NewRegistryBackendConfig(parsed, c.Bool("target-insecure")) + if err != nil { + return errors.Wrap(err, "parse registry backend configuration") + } + + bytes, err := json.Marshal(backendConfigStruct) + if err != nil { + return errors.Wrap(err, "marshal registry backend configuration") + } + backendConfig = string(bytes) + } + pushChunkSize, err := humanize.ParseBytes(c.String("push-chunk-size")) if err != nil { return errors.Wrap(err, "invalid --push-chunk-size option") @@ -1248,6 +1291,9 @@ func main() { PushChunkSize: int64(pushChunkSize), PrefetchFilesPath: c.String("prefetch-files"), + + BackendType: backendType, + BackendConfig: backendConfig, } return optimizer.Optimize(context.Background(), opt) diff --git a/contrib/nydusify/pkg/optimizer/builder.go b/contrib/nydusify/pkg/optimizer/builder.go index 66f26ac23d5..84b31b6bbcc 100644 --- a/contrib/nydusify/pkg/optimizer/builder.go +++ b/contrib/nydusify/pkg/optimizer/builder.go @@ -19,9 +19,13 @@ func isSignalKilled(err error) bool { } type BuildOption struct { - BuilderPath string - PrefetchFilesPath string - BootstrapPath string + BuilderPath string + PrefetchFilesPath string + BootstrapPath string + BackendType string + BackendConfig string + // `BlobDir` is used to store optimized blob, + // Beside, `BlobDir` is also used to store the original blobs when backend is localfs BlobDir string OutputBootstrapPath string OutputJSONPath string @@ -42,7 +46,7 @@ func Build(option BuildOption) (string, error) { option.PrefetchFilesPath, "--bootstrap", option.BootstrapPath, - "--blob-dir", + "--output-blob-dir", option.BlobDir, "--output-bootstrap", option.OutputBootstrapPath, @@ -50,6 +54,13 @@ func Build(option BuildOption) (string, error) { outputJSONPath, } + if option.BackendType == "localfs" { + args = append(args, "--blob-dir", option.BlobDir) + } else { + args = append(args, "--backend-type", option.BackendType) + args = append(args, "--backend-config", option.BackendConfig) + } + ctx := context.Background() var cancel context.CancelFunc if option.Timeout != nil { diff --git a/contrib/nydusify/pkg/optimizer/optimizer.go b/contrib/nydusify/pkg/optimizer/optimizer.go index 84d35614e5f..207e9cc8292 100644 --- a/contrib/nydusify/pkg/optimizer/optimizer.go +++ b/contrib/nydusify/pkg/optimizer/optimizer.go @@ -61,6 +61,9 @@ type Opt struct { Platforms string PushChunkSize int64 + + BackendType string + BackendConfig string } // the information generated during building @@ -269,8 +272,10 @@ func Optimize(ctx context.Context, opt Opt) error { } defer os.RemoveAll(buildDir) - if err := fetchBlobs(ctx, opt, buildDir); err != nil { - return errors.Wrap(err, "prepare nydus blobs") + if opt.BackendType == "localfs" { + if err := fetchBlobs(ctx, opt, buildDir); err != nil { + return errors.Wrap(err, "prepare nydus blobs") + } } originalBootstrap := filepath.Join(buildDir, "nydus_bootstrap") @@ -289,12 +294,17 @@ func Optimize(ctx context.Context, opt Opt) error { compressAlgo := bootstrapDesc.Digest.Algorithm().String() blobDir := filepath.Join(buildDir + "/content/blobs/" + compressAlgo) + if err := os.MkdirAll(blobDir, 0755); err != nil { + return errors.Wrap(err, "create blob directory") + } outPutJSONPath := filepath.Join(buildDir, "output.json") newBootstrapPath := filepath.Join(buildDir, "optimized_bootstrap") builderOpt := BuildOption{ BuilderPath: opt.NydusImagePath, PrefetchFilesPath: opt.PrefetchFilesPath, BootstrapPath: originalBootstrap, + BackendType: opt.BackendType, + BackendConfig: opt.BackendConfig, BlobDir: blobDir, OutputBootstrapPath: newBootstrapPath, OutputJSONPath: outPutJSONPath, diff --git a/docs/nydusify.md b/docs/nydusify.md index 32e2239871b..11d6201b7ca 100644 --- a/docs/nydusify.md +++ b/docs/nydusify.md @@ -262,6 +262,24 @@ nerdctl --snapshotter nydus run \ The original container ID need to be a full container ID rather than an abbreviation. +## Optimize nydus image from prefetch files + +The nydusify optimize command can optimize a nydus image from prefetch files, prefetch files are file access patterns during container startup. This will generate a new bootstrap and a new blob wich contains all data indicated by prefetch files. + +The content of prefetch files likes this: +``` +/path/to/file1 start_offset1-end_offset1, start_offset2-end_offset2, ... +/path/to/file2 start_offset1-end_offset1, start_offset2-end_offset2, ... +``` + +``` shell +nydusify optimize \ + --nydus-image /path/to/nydus-image \ + --source myregistry/repo:tag-nydus \ + --target myregistry/repo:tag-nydus-optimized \ + --prefetch-files /path/to/prefetch-files \ +``` + ## More Nydusify Options See `nydusify convert/check/mount --help`