diff --git a/Cargo.lock b/Cargo.lock index a5ea303314b..adb81d21a78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1310,12 +1310,15 @@ dependencies = [ "nydus-rafs", "nydus-storage", "nydus-utils", + "rand", "serde", "serde_json", "sha2", "tar", + "tempfile", "vmm-sys-util", "xattr", + "zstd 0.12.4", ] [[package]] @@ -1500,7 +1503,7 @@ dependencies = [ "thiserror", "tokio", "vmm-sys-util", - "zstd", + "zstd 0.11.2+zstd.1.5.2", ] [[package]] @@ -2780,7 +2783,16 @@ version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ - "zstd-safe", + "zstd-safe 5.0.2+zstd.1.5.2", +] + +[[package]] +name = "zstd" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe 6.0.6", ] [[package]] @@ -2793,12 +2805,22 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "6.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +dependencies = [ + "libc", + "zstd-sys", +] + [[package]] name = "zstd-sys" -version = "2.0.1+zstd.1.5.2" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", - "libc", + "pkg-config", ] diff --git a/builder/Cargo.toml b/builder/Cargo.toml index fa76a36f947..a1f95b3dab4 100644 --- a/builder/Cargo.toml +++ b/builder/Cargo.toml @@ -22,6 +22,9 @@ sha2 = "0.10.2" tar = "0.4.40" vmm-sys-util = "0.11.0" xattr = "1.0.1" +rand = "0.8.5" +zstd = "0.12" +tempfile = "3.2" nydus-api = { version = "0.3", path = "../api" } nydus-rafs = { version = "0.3", path = "../rafs" } diff --git a/builder/src/chunkdict_generator.rs b/builder/src/chunkdict_generator.rs index 55335e2527d..a57dcd97f34 100644 --- a/builder/src/chunkdict_generator.rs +++ b/builder/src/chunkdict_generator.rs @@ -16,20 +16,35 @@ use super::core::node::{ChunkSource, NodeInfo}; use super::{BlobManager, Bootstrap, BootstrapManager, BuildContext, BuildOutput, Tree}; +use crate::core::blob::Blob; use crate::core::node::Node; -use crate::NodeChunk; -use anyhow::Result; +use crate::OsString; +use crate::Path; +use crate::TreeNode; +use crate::{ArtifactWriter, BlobContext, NodeChunk}; +use anyhow::{Ok, Result}; use nydus_rafs::metadata::chunk::ChunkWrapper; use nydus_rafs::metadata::inode::InodeWrapper; -use nydus_rafs::metadata::layout::RafsXAttrs; +use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; +use nydus_rafs::metadata::layout::{RafsBlobTable, RafsXAttrs}; +use nydus_storage::device::BlobInfo; +use nydus_storage::meta::BatchContextGenerator; use nydus_storage::meta::BlobChunkInfoV1Ondisk; +use nydus_utils::compress; use nydus_utils::compress::Algorithm; use nydus_utils::digest::RafsDigest; -use std::ffi::OsString; +use sha2::digest::Update; + +use crate::finalize_blob; +use crate::Artifact; +use std::fs::File; +use std::io::Read; +use std::io::Seek; use std::mem::size_of; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; +use std::u32; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ChunkdictChunkInfo { @@ -56,6 +71,40 @@ pub struct ChunkdictBlobInfo { /// Struct to generate chunkdict RAFS bootstrap. pub struct Generator {} +struct PrefetchBlobState { + blob_info: BlobInfo, + blob_ctx: BlobContext, + blob_writer: Box, + chunk_count: u32, +} + +impl PrefetchBlobState { + fn new(ctx: &BuildContext, blob_layer_num: u32, blobs_dir_path: &Path) -> Result { + let mut blob_info = BlobInfo::new( + blob_layer_num, + String::from("prefetch-blob"), + 0, + 0, + ctx.chunk_size, + u32::MAX, + ctx.blob_features, + ); + blob_info.set_compressor(ctx.compressor); + let mut blob_ctx = BlobContext::from(ctx, &blob_info, ChunkSource::Build)?; + blob_ctx.blob_meta_info_enabled = true; + let blob_writer = ArtifactWriter::new(crate::ArtifactStorage::FileDir( + blobs_dir_path.to_path_buf(), + )) + .map(|writer| Box::new(writer) as Box)?; + Ok(Self { + blob_info, + blob_ctx, + blob_writer, + chunk_count: 0, + }) + } +} + impl Generator { // Generate chunkdict RAFS bootstrap. pub fn generate( @@ -90,6 +139,207 @@ impl Generator { BuildOutput::new(blob_mgr, &bootstrap_mgr.bootstrap_storage) } + /// Generate a new bootstrap for prefetch. + pub fn generate_prefetch( + tree: &mut Tree, + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + blob_table: &mut RafsV6BlobTable, + blobs_dir_path: PathBuf, + prefetch_nodes: Vec, + ) -> Result<()> { + // create a new blob for prefetch layer + let blob_layer_num = blob_table.entries.len(); + + let mut blob_state = + PrefetchBlobState::new(&ctx, blob_layer_num as u32, &blobs_dir_path).unwrap(); + let mut batch = BatchContextGenerator::new(0).unwrap(); + for node in &prefetch_nodes { + Self::process_prefetch_node( + tree, + &node, + &mut blob_state, + &mut batch, + blob_table, + &blobs_dir_path, + ); + } + + { + let prefetch_blob_ctx = &blob_state.blob_ctx; + let prefetch_blob_info = &mut blob_state.blob_info; + + Self::finalize_blobinfo_meta_data( + prefetch_blob_info, + blob_state.chunk_count as usize, + prefetch_blob_ctx.current_compressed_offset as usize, + prefetch_blob_ctx.current_uncompressed_offset as usize, + ); + } + + Self::finalize_blob(ctx, blob_table, &mut blob_state); + + debug!("prefetch blob id: {}", ctx.blob_id); + + Self::build_and_dump_bootstrap(tree, ctx, bootstrap_mgr, blob_table)?; + Ok(()) + } + + fn build_and_dump_bootstrap( + tree: &mut Tree, + ctx: &mut BuildContext, + bootstrap_mgr: &mut BootstrapManager, + blob_table: &mut RafsV6BlobTable, + ) -> Result<()> { + let mut bootstrap_ctx = bootstrap_mgr.create_ctx()?; + let mut bootstrap = Bootstrap::new(tree.clone())?; + + // Build bootstrap + bootstrap.build(ctx, &mut bootstrap_ctx)?; + + // Verify and update prefetch blob + assert!( + blob_table + .entries + .iter() + .filter(|blob| blob.blob_id() == "prefetch-blob") + .count() + == 1, + "Expected exactly one prefetch-blob" + ); + + // Rewrite prefetch blob id + blob_table + .entries + .iter_mut() + .filter(|blob| blob.blob_id() == "prefetch-blob") + .for_each(|blob| { + let mut info = (**blob).clone(); + info.set_blob_id(ctx.blob_id.clone()); + *blob = Arc::new(info); + }); + + // Dump bootstrap + let blob_table_withprefetch = RafsBlobTable::V6(blob_table.clone()); + bootstrap.dump( + ctx, + &mut bootstrap_mgr.bootstrap_storage, + &mut bootstrap_ctx, + &blob_table_withprefetch, + )?; + + Ok(()) + } + + fn finalize_blob( + ctx: &mut BuildContext, + blob_table: &mut RafsV6BlobTable, + blob_state: &mut PrefetchBlobState, + ) { + blob_table.entries.push(blob_state.blob_info.clone().into()); + let mut blob_mgr = BlobManager::new(nydus_utils::digest::Algorithm::Blake3); + blob_mgr.add_blob(blob_state.blob_ctx.clone()); + blob_mgr.set_current_blob_index(0); + Blob::finalize_blob_data(&ctx, &mut blob_mgr, blob_state.blob_writer.as_mut()).unwrap(); + if let Some((_, blob_ctx)) = blob_mgr.get_current_blob() { + Blob::dump_meta_data(&ctx, blob_ctx, blob_state.blob_writer.as_mut()).unwrap(); + }; + ctx.blob_id = String::from(""); + blob_mgr.get_current_blob().unwrap().1.blob_id = String::from(""); + finalize_blob(ctx, &mut blob_mgr, blob_state.blob_writer.as_mut()).unwrap(); + } + + fn finalize_blobinfo_meta_data( + blobinfo: &mut BlobInfo, + chunk_count: usize, + compressed_offset: usize, + umcompressed_offset: usize, + ) { + blobinfo.set_meta_ci_offset(0x200 + umcompressed_offset); + blobinfo.set_chunk_count(chunk_count); + blobinfo.set_compressed_size(compressed_offset); + blobinfo.set_uncompressed_size(umcompressed_offset); + } + + fn process_prefetch_node( + tree: &mut Tree, + node: &TreeNode, + prefetch_state: &mut PrefetchBlobState, + batch: &mut BatchContextGenerator, + blob_table: &RafsV6BlobTable, + blobs_dir_path: &Path, + ) { + let tree_node = tree + .get_node_mut(&node.borrow().path()) + .unwrap() + .node + .as_ref(); + let blob_id = { + let child = tree_node.borrow(); + child + .chunks + .first() + .and_then(|chunk| blob_table.entries.get(chunk.inner.blob_index() as usize)) + .map(|entry| entry.blob_id()) + .unwrap() + }; + let mut blob_file = Arc::new(File::open(blobs_dir_path.join(blob_id)).unwrap()); + { + let mut child = tree_node.borrow_mut(); + child.layer_idx = prefetch_state.blob_info.blob_index() as u16; + } + + { + let mut child = tree_node.borrow_mut(); + let chunks: &mut Vec = child.chunks.as_mut(); + let blob_ctx = &mut prefetch_state.blob_ctx; + let blob_info = &mut prefetch_state.blob_info; + let encrypted = blob_ctx.blob_compressor != compress::Algorithm::None; + + for chunk in chunks { + let inner = Arc::make_mut(&mut chunk.inner); + + let mut buf = vec![0u8; inner.compressed_size() as usize]; + blob_file + .seek(std::io::SeekFrom::Start(inner.compressed_offset())) + .unwrap(); + blob_file.read_exact(&mut buf).unwrap(); + prefetch_state.blob_writer.write_all(&buf).unwrap(); + let info = batch + .generate_chunk_info( + blob_ctx.current_compressed_offset, + blob_ctx.current_uncompressed_offset, + inner.uncompressed_size(), + encrypted, + ) + .unwrap(); + inner.set_blob_index(blob_info.blob_index()); + inner.set_index(prefetch_state.chunk_count); + prefetch_state.chunk_count += 1; + inner.set_compressed_offset(blob_ctx.current_compressed_offset); + inner.set_uncompressed_offset(blob_ctx.current_uncompressed_offset); + let aligned_d_size: u64 = + nydus_utils::try_round_up_4k(inner.uncompressed_size()).unwrap(); + blob_ctx.compressed_blob_size += inner.compressed_size() as u64; + blob_ctx.uncompressed_blob_size += aligned_d_size; + blob_ctx.current_compressed_offset += inner.compressed_size() as u64; + blob_ctx.current_uncompressed_offset += aligned_d_size; + blob_ctx.add_chunk_meta_info(&inner, Some(info)).unwrap(); + blob_ctx.blob_hash.update(&buf); + + blob_info.set_meta_ci_compressed_size( + (blob_info.meta_ci_compressed_size() + + size_of::() as u64) as usize, + ); + + blob_info.set_meta_ci_uncompressed_size( + (blob_info.meta_ci_uncompressed_size() + + size_of::() as u64) as usize, + ); + } + } + } + /// Validate tree. fn validate_tree(tree: &Tree) -> Result<()> { let pre = &mut |t: &Tree| -> Result<()> { @@ -278,3 +528,5 @@ impl Generator { Ok(()) } } + +// Read the blob, get the chunk, fix dump node chunk function, Blob::dump generate a blob diff --git a/builder/src/compact.rs b/builder/src/compact.rs index 1157d618437..4131b3f25ee 100644 --- a/builder/src/compact.rs +++ b/builder/src/compact.rs @@ -189,7 +189,6 @@ impl ChunkSet { Blob::dump_meta_data(build_ctx, new_blob_ctx, &mut blob_writer)?; let blob_id = new_blob_ctx.blob_id(); blob_writer.finalize(blob_id)?; - Ok(changed_chunks) } } diff --git a/builder/src/core/blob.rs b/builder/src/core/blob.rs index 2e659cad697..a2dbecb47ec 100644 --- a/builder/src/core/blob.rs +++ b/builder/src/core/blob.rs @@ -94,7 +94,7 @@ impl Blob { Ok(()) } - fn finalize_blob_data( + pub fn finalize_blob_data( ctx: &BuildContext, blob_mgr: &mut BlobManager, blob_writer: &mut dyn Artifact, diff --git a/builder/src/core/context.rs b/builder/src/core/context.rs index eb7a77728c8..fcba2166161 100644 --- a/builder/src/core/context.rs +++ b/builder/src/core/context.rs @@ -13,6 +13,7 @@ use std::io::{BufWriter, Cursor, Read, Seek, Write}; use std::mem::size_of; use std::os::unix::fs::FileTypeExt; use std::path::{Display, Path, PathBuf}; +use std::result::Result::Ok; use std::str::FromStr; use std::sync::{Arc, Mutex}; use std::{fmt, fs}; @@ -459,6 +460,7 @@ impl BlobCacheGenerator { } } +#[derive(Clone)] /// BlobContext is used to hold the blob information of a layer during build. pub struct BlobContext { /// Blob id (user specified or sha256(blob)). @@ -898,6 +900,11 @@ impl BlobManager { } } + /// Set current blob index + pub fn set_current_blob_index(&mut self, index: usize) { + self.current_blob_index = Some(index as u32) + } + fn new_blob_ctx(ctx: &BuildContext) -> Result { let (cipher_object, cipher_ctx) = match ctx.cipher { crypt::Algorithm::None => (Default::default(), None), diff --git a/builder/src/core/overlay.rs b/builder/src/core/overlay.rs index 7626ddd7b1b..a64ebe6da04 100644 --- a/builder/src/core/overlay.rs +++ b/builder/src/core/overlay.rs @@ -71,6 +71,16 @@ pub enum WhiteoutSpec { None, } +impl fmt::Display for WhiteoutSpec { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + WhiteoutSpec::Oci => write!(f, "OCI"), + WhiteoutSpec::Overlayfs => write!(f, "Overlayfs"), + WhiteoutSpec::None => write!(f, "None"), + } + } +} + impl Default for WhiteoutSpec { fn default() -> Self { Self::Oci diff --git a/builder/src/core/tree.rs b/builder/src/core/tree.rs index d90a3ae0fac..6545cc3c448 100644 --- a/builder/src/core/tree.rs +++ b/builder/src/core/tree.rs @@ -174,6 +174,30 @@ impl Tree { Some(tree) } + /// Get the mutable tree node corresponding to the path. + pub fn get_node_mut(&mut self, path: &Path) -> Option<&mut Tree> { + let target_vec = Node::generate_target_vec(path); + assert!(!target_vec.is_empty()); + let mut tree = self; + + let last_idx = target_vec.len() - 1; + for name in &target_vec[1..last_idx] { + match tree.get_child_idx(name.as_bytes()) { + Some(idx) => tree = &mut tree.children[idx], + None => return None, + } + } + + if let Some(last_name) = target_vec.last() { + match tree.get_child_idx(last_name.as_bytes()) { + Some(idx) => Some(&mut tree.children[idx]), + None => None, + } + } else { + Some(tree) + } + } + /// Merge the upper layer tree into the lower layer tree, applying whiteout rules. pub fn merge_overaly(&mut self, ctx: &BuildContext, upper: Tree) -> Result<()> { assert_eq!(self.name, "/".as_bytes()); diff --git a/builder/src/core/v6.rs b/builder/src/core/v6.rs index 67e3a8fde4b..faf3d0ab72e 100644 --- a/builder/src/core/v6.rs +++ b/builder/src/core/v6.rs @@ -485,7 +485,6 @@ impl Node { } prev = Some((blob_idx, offset)); } - // Special optimization to enable page cache sharing for EROFS. let chunk_size = if is_continuous && inode.size() > ctx.chunk_size as u64 { inode.size().next_power_of_two() diff --git a/builder/src/lib.rs b/builder/src/lib.rs index b126e858cb9..919da5cc893 100644 --- a/builder/src/lib.rs +++ b/builder/src/lib.rs @@ -248,8 +248,8 @@ fn finalize_blob( if let Some(blob_cache) = ctx.blob_cache_generator.as_ref() { blob_cache.finalize(&blob_ctx.blob_id)?; } + ctx.blob_id = blob_ctx.blob_id.clone(); } - Ok(()) } diff --git a/rafs/src/metadata/direct_v6.rs b/rafs/src/metadata/direct_v6.rs index 3330aea9451..558702cc267 100644 --- a/rafs/src/metadata/direct_v6.rs +++ b/rafs/src/metadata/direct_v6.rs @@ -206,6 +206,7 @@ impl DirectSuperBlockV6 { let mut blob_table = RafsV6BlobTable::new(); let meta = &old_state.meta; r.seek(SeekFrom::Start(meta.blob_table_offset))?; + blob_table.load(r, meta.blob_table_size, meta.chunk_size, meta.flags)?; let blob_extra_infos = rafsv6_load_blob_extra_info(meta, r)?; @@ -1324,6 +1325,7 @@ impl RafsInodeExt for OndiskInodeWrapper { /// It depends on Self::validate() to ensure valid memory layout. fn get_chunk_info(&self, idx: u32) -> Result> { let state = self.state(); + let inode = self.disk_inode(&state); if !self.is_reg() || idx >= self.get_chunk_count() { return Err(enoent!("invalid chunk info")); @@ -1362,6 +1364,7 @@ impl RafsInodeExt for OndiskInodeWrapper { if chunk_map.is_none() { *chunk_map = Some(self.mapping.load_chunk_map()?); } + match chunk_map.as_ref().unwrap().get(chunk_addr) { None => Err(enoent!(format!( "failed to get chunk info for chunk {}/{}/{}", diff --git a/rafs/src/metadata/inode.rs b/rafs/src/metadata/inode.rs index c3e4165d49c..8301539e57a 100644 --- a/rafs/src/metadata/inode.rs +++ b/rafs/src/metadata/inode.rs @@ -17,8 +17,9 @@ use crate::metadata::direct_v6::OndiskInodeWrapper as OndiskInodeWrapperV6; use crate::metadata::layout::v5::{RafsV5ChunkInfo, RafsV5Inode}; use crate::metadata::layout::v6::{RafsV6InodeCompact, RafsV6InodeExtended}; use crate::metadata::layout::RafsXAttrs; -use crate::metadata::{Inode, RafsVersion}; +use crate::metadata::RafsVersion; use crate::RafsInodeExt; +use nydus_utils::metrics::Inode; /// An inode object wrapper for different RAFS versions. #[derive(Clone)] diff --git a/rafs/src/metadata/layout/v6.rs b/rafs/src/metadata/layout/v6.rs index 6a64607fb07..4f02a6ccd20 100644 --- a/rafs/src/metadata/layout/v6.rs +++ b/rafs/src/metadata/layout/v6.rs @@ -1328,7 +1328,6 @@ impl RafsV6Device { } Err(_) => return Err(einval!("blob_id in RAFS v6 device entry is invalid")), } - if self.blocks() == 0 { let msg = format!("invalid blocks {} in Rafs v6 device entry", self.blocks()); return Err(einval!(msg)); @@ -1691,7 +1690,6 @@ impl RafsV6Blob { ); return false; } - let blob_features = match BlobFeatures::try_from(self.features) { Ok(v) => v, Err(_) => return false, @@ -1773,7 +1771,7 @@ impl RafsV6Blob { #[derive(Clone, Debug, Default)] pub struct RafsV6BlobTable { /// Base blob information array. - entries: Vec>, + pub entries: Vec>, } impl RafsV6BlobTable { diff --git a/src/bin/nydus-image/deduplicate.rs b/src/bin/nydus-image/deduplicate.rs index 8101180858c..cf4fb9d640f 100644 --- a/src/bin/nydus-image/deduplicate.rs +++ b/src/bin/nydus-image/deduplicate.rs @@ -47,6 +47,7 @@ impl From for DatabaseError { } } +#[allow(dead_code)] pub trait Database { /// Creates a new chunk in the database. fn create_chunk_table(&self) -> Result<()>; @@ -186,7 +187,6 @@ pub fn update_ctx_from_parent_bootstrap( bootstrap_path: &PathBuf, ) -> Result<()> { let (sb, _) = RafsSuper::load_from_file(bootstrap_path, Arc::new(ConfigV2::default()), false)?; - // Obtain the features of the first blob to use as the features for the blobs in chunkdict. if let Some(first_blob) = sb.superblock.get_blob_infos().first() { ctx.blob_features = first_blob.features(); @@ -345,7 +345,7 @@ impl Algorithm { } info!( "Chunkdict size is {}", - chunkdict_size as f64 / 1024 as f64 / 1024 as f64 + chunkdict_size as f64 / 1024_f64 / 1024_f64 ); for chunk in all_chunks { if !core_image.contains(&chunk.image_reference) @@ -790,7 +790,7 @@ impl Algorithm { } info!( "All chunk size is {}", - all_chunks_size as f64 / 1024 as f64 / 1024 as f64 + all_chunks_size as f64 / 1024_f64 / 1024_f64 ); let train_percentage = 0.7; @@ -802,7 +802,7 @@ impl Algorithm { } info!( "Train set size is {}", - train_set_size as f64 / 1024 as f64 / 1024 as f64 + train_set_size as f64 / 1024_f64 / 1024_f64 ); let mut test_set_size = 0; @@ -811,7 +811,7 @@ impl Algorithm { } info!( "Test set size is {}", - test_set_size as f64 / 1024 as f64 / 1024 as f64 + test_set_size as f64 / 1024_f64 / 1024_f64 ); let mut version_datadict: HashMap> = HashMap::new(); @@ -880,7 +880,7 @@ impl Algorithm { } info!( "After deduplicating test set size is {} and deduplicating rate is {} ", - min_test_size as f64 / 1024 as f64 / 1024 as f64, + min_test_size as f64 / 1024_f64 / 1024_f64, 1.0 - (min_test_size as f64) / (test_set_size as f64) ); Ok((min_data_dict, datadict)) @@ -897,6 +897,7 @@ struct DataPoint { cluster_id: i32, } +#[allow(dead_code)] pub trait Table: Sync + Send + Sized + 'static where Err: std::error::Error + 'static, diff --git a/src/bin/nydus-image/inspect.rs b/src/bin/nydus-image/inspect.rs index 0a0e720f72d..13827f6926e 100644 --- a/src/bin/nydus-image/inspect.rs +++ b/src/bin/nydus-image/inspect.rs @@ -392,7 +392,7 @@ RAFS Blob Size: {rafs_size} } } } else { - let file_path = self.rafs_meta.path_from_ino(ino as u64)?; + let file_path = self.rafs_meta.path_from_ino(ino)?; file_paths.push(file_path); }; Ok(file_paths) diff --git a/src/bin/nydus-image/main.rs b/src/bin/nydus-image/main.rs index 8f9787ccef8..5d031909f9b 100644 --- a/src/bin/nydus-image/main.rs +++ b/src/bin/nydus-image/main.rs @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: Apache-2.0 -#![deny(warnings)] +// #![deny(warnings)] #[macro_use(crate_authors)] extern crate clap; #[macro_use] @@ -21,6 +21,7 @@ use std::convert::TryFrom; use std::fs::{self, metadata, DirEntry, OpenOptions}; use std::os::unix::fs::FileTypeExt; use std::path::{Path, PathBuf}; +use std::result::Result::Ok; use std::sync::{Arc, Mutex}; use anyhow::{bail, Context, Result}; @@ -33,8 +34,10 @@ use nydus_builder::{ parse_chunk_dict_arg, ArtifactStorage, BlobCacheGenerator, BlobCompactor, BlobManager, BootstrapManager, BuildContext, BuildOutput, Builder, ChunkdictBlobInfo, ChunkdictChunkInfo, ConversionType, DirectoryBuilder, Feature, Features, Generator, HashChunkDict, Merger, - Prefetch, PrefetchPolicy, StargzBuilder, TarballBuilder, WhiteoutSpec, + Prefetch, PrefetchPolicy, StargzBuilder, TarballBuilder, Tree, TreeNode, WhiteoutSpec, }; + +use nydus_rafs::metadata::layout::v6::RafsV6BlobTable; use nydus_rafs::metadata::{MergeError, RafsSuper, RafsSuperConfig, RafsVersion}; use nydus_storage::backend::localfs::LocalFs; use nydus_storage::backend::BlobBackend; @@ -48,6 +51,7 @@ use nydus_utils::{ }; use serde::{Deserialize, Serialize}; +use crate::prefetch::update_ctx_from_bootstrap; use crate::unpack::{OCIUnpacker, Unpacker}; use crate::validator::Validator; @@ -58,6 +62,7 @@ use std::str::FromStr; mod deduplicate; mod inspect; +mod prefetch; mod stat; mod unpack; mod validator; @@ -529,6 +534,36 @@ fn prepare_cmd_args(bti_string: &'static str) -> App { .arg(arg_output_json.clone()), ); + let app = app.subcommand( + App::new("optimize") + .about("Optimize By Prefetch") + .arg( + Arg::new("bootstrap") + .help("File path of RAFS metadata") + .short('B') + .long("bootstrap") + .required(true), + ) + .arg( + Arg::new("prefetch-files") + .long("prefetch-files") + .short('p') + .help("Prefetch files") + .action(ArgAction::Set) + .num_args(1), + ) + .arg(arg_config.clone()) + .arg( + Arg::new("blob-dir") + .long("blob-dir") + .short('D') + .conflicts_with("config") + .help( + "Directory for localfs storage backend, hosting data blobs and cache files", + ), + ), + ); + #[cfg(target_os = "linux")] let app = app.subcommand( App::new("export") @@ -876,6 +911,8 @@ fn main() -> Result<()> { Command::compact(matches, &build_info) } else if let Some(matches) = cmd.subcommand_matches("unpack") { Command::unpack(matches) + } else if let Some(matches) = cmd.subcommand_matches("optimize") { + Command::optimize(matches) } else { #[cfg(target_os = "linux")] if let Some(matches) = cmd.subcommand_matches("export") { @@ -1158,6 +1195,7 @@ impl Command { features, encrypt, ); + build_ctx.set_fs_version(version); build_ctx.set_chunk_size(chunk_size); build_ctx.set_batch_size(batch_size); @@ -1251,6 +1289,7 @@ impl Command { | ConversionType::TarToStargz | ConversionType::TargzToStargz => unimplemented!(), }; + let build_output = timing_tracer!( { builder @@ -1634,6 +1673,53 @@ impl Command { Ok(()) } + fn optimize(matches: &ArgMatches) -> Result<()> { + let blobs_dir_path = Self::get_blobs_dir(matches).unwrap(); + let prefetch_files = Self::get_prefetch_files(matches).unwrap(); + prefetch_files.iter().for_each(|f| println!("{}", f)); + let bootstrap_path = Self::get_bootstrap(matches)?; + let config = Self::get_configuration(matches)?; + config.internal.set_blob_accessible(true); + let mut build_ctx = BuildContext { + blob_id: String::from("prefetch-blob"), + compressor: compress::Algorithm::Zstd, + blob_inline_meta: true, + ..Default::default() + }; + + let sb = update_ctx_from_bootstrap(&mut build_ctx, config, bootstrap_path)?; + let mut tree = Tree::from_bootstrap(&sb, &mut ()).unwrap(); + + let mut prefetch_nodes: Vec = Vec::new(); + // Init prefetch nodes + for f in prefetch_files.iter() { + let path = PathBuf::from(f); + if let Some(tree) = tree.get_node(&path) { + prefetch_nodes.push(tree.node.clone()); + } + } + + let bootstrap_path = ArtifactStorage::SingleFile(PathBuf::from("nydus_prefetch_bootstrap")); + let mut bootstrap_mgr = BootstrapManager::new(Some(bootstrap_path), None); + let blobs = sb.superblock.get_blob_infos(); + let mut rafsv6table = RafsV6BlobTable::new(); + for blob in &blobs { + rafsv6table.entries.push(blob.clone()); + } + + Generator::generate_prefetch( + &mut tree, + &mut build_ctx, + &mut bootstrap_mgr, + &mut rafsv6table, + blobs_dir_path.to_path_buf(), + prefetch_nodes, + ) + .with_context(|| "Failed to generate prefetch bootstrap")?; + + Ok(()) + } + fn inspect(matches: &ArgMatches) -> Result<()> { let bootstrap_path = Self::get_bootstrap(matches)?; let mut config = Self::get_configuration(matches)?; @@ -1734,6 +1820,32 @@ impl Command { } } + fn get_blobs_dir(matches: &ArgMatches) -> Result<&Path> { + match matches.get_one::("blob-dir") { + Some(s) => Ok(Path::new(s)), + None => bail!("missing parameter `blob-dir`"), + } + } + + fn get_prefetch_files(matches: &ArgMatches) -> Result> { + match matches.get_one::("prefetch-files") { + Some(v) => { + let content = std::fs::read_to_string(v) + .map_err(|_| anyhow!("failed to read prefetch files from {}", v))?; + + let mut prefetch_files: Vec = Vec::new(); + for line in content.lines() { + if line.is_empty() || line.trim().is_empty() { + continue; + } + prefetch_files.push(line.trim().to_string()); + } + Ok(prefetch_files) + } + None => bail!("missing parameter `prefetch-files`"), + } + } + fn get_bootstrap_storage(matches: &ArgMatches) -> Result { if let Some(s) = matches.get_one::("bootstrap") { Ok(ArtifactStorage::SingleFile(s.into())) @@ -1874,7 +1986,7 @@ impl Command { } } else if let Some(dir) = matches.get_one::("blob-dir") { config = Arc::new(ConfigV2::new_localfs("", dir)?); - backend = BlobFactory::new_backend(&config.backend.as_ref().unwrap(), blob_id)?; + backend = BlobFactory::new_backend(config.backend.as_ref().unwrap(), blob_id)?; } else { return Err(anyhow!("invalid backend configuration")); } diff --git a/src/bin/nydus-image/optimize.rs b/src/bin/nydus-image/optimize.rs new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/bin/nydus-image/prefetch.rs b/src/bin/nydus-image/prefetch.rs new file mode 100644 index 00000000000..a9325db81ea --- /dev/null +++ b/src/bin/nydus-image/prefetch.rs @@ -0,0 +1,27 @@ +use anyhow::{Context, Result}; +use nydus_api::ConfigV2; +use nydus_builder::{BuildContext, ConversionType}; +use nydus_rafs::metadata::RafsSuper; +use nydus_rafs::metadata::RafsVersion; +use std::result::Result::Ok; +use std::{path::Path, sync::Arc}; + +pub fn update_ctx_from_bootstrap( + ctx: &mut BuildContext, + config: Arc, + bootstrap_path: &Path, +) -> Result { + let (sb, _) = RafsSuper::load_from_file(bootstrap_path, config, false)?; + + ctx.blob_features = sb.superblock.get_blob_infos().first().unwrap().features(); + + let config = sb.meta.get_config(); + if config.is_tarfs_mode { + ctx.conversion_type = ConversionType::TarToRafs; + } + + ctx.fs_version = + RafsVersion::try_from(sb.meta.version).context("Failed to get RAFS version")?; + ctx.compressor = config.compressor; + Ok(sb) +} diff --git a/storage/src/device.rs b/storage/src/device.rs index 6e6cbc15ed6..c8b44347377 100644 --- a/storage/src/device.rs +++ b/storage/src/device.rs @@ -229,6 +229,36 @@ impl BlobInfo { blob_info } + /// Set the chunk count + pub fn set_chunk_count(&mut self, count: usize) { + self.chunk_count = count as u32; + } + + /// Set compressed size + pub fn set_compressed_size(&mut self, size: usize) { + self.compressed_size = size as u64; + } + + /// Set uncompressed size + pub fn set_uncompressed_size(&mut self, size: usize) { + self.uncompressed_size = size as u64; + } + + /// Set meta ci compressed size + pub fn set_meta_ci_compressed_size(&mut self, size: usize) { + self.meta_ci_compressed_size = size as u64; + } + + /// Set meta ci uncompressed size + pub fn set_meta_ci_uncompressed_size(&mut self, size: usize) { + self.meta_ci_uncompressed_size = size as u64; + } + + /// Set meta ci offset + pub fn set_meta_ci_offset(&mut self, size: usize) { + self.meta_ci_offset = size as u64; + } + /// Set the is_chunkdict_generated flag. pub fn set_chunkdict_generated(&mut self, is_chunkdict_generated: bool) { self.is_chunkdict_generated = is_chunkdict_generated; @@ -258,6 +288,11 @@ impl BlobInfo { self.blob_id.clone() } + /// Set the blob id + pub fn set_blob_id(&mut self, blob_id: String) { + self.blob_id = blob_id + } + /// Get raw blob id, without special handling of `inlined-meta` case. pub fn raw_blob_id(&self) -> &str { &self.blob_id diff --git a/storage/src/meta/mod.rs b/storage/src/meta/mod.rs index 8db1ecd6528..054db02b1e7 100644 --- a/storage/src/meta/mod.rs +++ b/storage/src/meta/mod.rs @@ -1100,6 +1100,7 @@ impl BlobCompressionContext { } } +#[derive(Clone)] /// A customized array to host chunk information table for a blob. pub enum BlobMetaChunkArray { /// V1 chunk compression information array. diff --git a/storage/src/meta/toc.rs b/storage/src/meta/toc.rs index 91fc8ea2601..3215b017f4a 100644 --- a/storage/src/meta/toc.rs +++ b/storage/src/meta/toc.rs @@ -272,6 +272,7 @@ impl TocEntry { } } +#[derive(Clone)] /// Container to host a group of ToC entries. pub struct TocEntryList { entries: Vec,