diff --git a/Cargo.lock b/Cargo.lock index c97b87f4..7a946011 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -190,6 +190,27 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "bzip2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "caps" version = "0.5.5" @@ -206,6 +227,8 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" dependencies = [ + "jobserver", + "libc", "shlex", ] @@ -685,6 +708,15 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.72" @@ -829,6 +861,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linux-loader" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "870c3814345f050991f99869417779f6062542bcf4ed81db7a1b926ad1306638" +dependencies = [ + "vm-memory", +] + [[package]] name = "log" version = "0.4.22" @@ -1588,18 +1629,21 @@ name = "vmm" version = "0.1.0" dependencies = [ "arch", + "bzip2", "codicon", "cpuid", "crossbeam-channel", "curl", "devices", "env_logger", + "flate2", "hvf", "kbs-types", "kernel", "kvm-bindings", "kvm-ioctls", "libc", + "linux-loader", "log", "nix 0.24.3", "polly", @@ -1611,6 +1655,7 @@ dependencies = [ "utils", "vm-memory", "vmm-sys-util", + "zstd", ] [[package]] @@ -1931,3 +1976,31 @@ dependencies = [ "quote", "syn", ] + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/include/libkrun.h b/include/libkrun.h index 5afeb254..87f3eba2 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -403,18 +403,27 @@ int32_t krun_set_exec(uint32_t ctx_id, const char *const argv[], const char *const envp[]); +#define KRUN_KERNEL_FORMAT_RAW 0 +#define KRUN_KERNEL_FORMAT_ELF 1 +#define KRUN_KERNEL_FORMAT_PE_GZ 2 +#define KRUN_KERNEL_FORMAT_IMAGE_BZ2 3 +#define KRUN_KERNEL_FORMAT_IMAGE_GZ 4 +#define KRUN_KERNEL_FORMAT_IMAGE_ZSTD 5 /** * Sets the path to the kernel to be loaded in the microVM. * * Arguments: - * "ctx_id" - the configuration context ID. - * "kernel_path" - the path to the kernel, relative to the host's filesystem. + * "ctx_id" - the configuration context ID. + * "kernel_path" - the path to the kernel, relative to the host's filesystem. + * "kernel_format" - the kernel format * * Returns: * Zero on success or a negative error number on failure. */ + /* Supported disk image formats */ int32_t krun_set_kernel(uint32_t ctx_id, - const char *kernel_path); + const char *kernel_path, + uint32_t kernel_format); /** * Sets environment variables to be configured in the context of the executable. diff --git a/src/arch/src/x86_64/mod.rs b/src/arch/src/x86_64/mod.rs index e31046e8..628c06c2 100644 --- a/src/arch/src/x86_64/mod.rs +++ b/src/arch/src/x86_64/mod.rs @@ -67,48 +67,69 @@ pub const MMIO_MEM_START: u64 = FIRST_ADDR_PAST_32BITS - MEM_32BIT_GAP_SIZE; #[cfg(not(feature = "tee"))] pub fn arch_memory_regions( size: usize, - kernel_load_addr: u64, + kernel_load_addr: Option, kernel_size: usize, ) -> (ArchMemoryInfo, Vec<(GuestAddress, usize)>) { let page_size: usize = unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() }; let size = round_up(size, page_size); - if size < (kernel_load_addr + kernel_size as u64) as usize { - panic!("Kernel doesn't fit in RAM"); - } // It's safe to cast MMIO_MEM_START to usize because it fits in a u32 variable // (It points to an address in the 32 bit space). let (ram_last_addr, shm_start_addr, regions) = match size.checked_sub(MMIO_MEM_START as usize) { // case1: guest memory fits before the gap None | Some(0) => { - let ram_last_addr = kernel_load_addr + kernel_size as u64 + size as u64; - let shm_start_addr = FIRST_ADDR_PAST_32BITS; - ( - ram_last_addr, - shm_start_addr, - vec![ - (GuestAddress(0), kernel_load_addr as usize), - (GuestAddress(kernel_load_addr + kernel_size as u64), size), - ], - ) + if let Some(kernel_load_addr) = kernel_load_addr { + if size < (kernel_load_addr + kernel_size as u64) as usize { + panic!("Kernel doesn't fit in RAM"); + } + + let ram_last_addr = kernel_load_addr + kernel_size as u64 + size as u64; + let shm_start_addr = FIRST_ADDR_PAST_32BITS; + ( + ram_last_addr, + shm_start_addr, + vec![ + (GuestAddress(0), kernel_load_addr as usize), + (GuestAddress(kernel_load_addr + kernel_size as u64), size), + ], + ) + } else { + let ram_last_addr = size as u64; + let shm_start_addr = FIRST_ADDR_PAST_32BITS; + (ram_last_addr, shm_start_addr, vec![(GuestAddress(0), size)]) + } } + // case2: guest memory extends beyond the gap Some(remaining) => { - let ram_last_addr = FIRST_ADDR_PAST_32BITS + remaining as u64; - let shm_start_addr = ((ram_last_addr / 0x4000_0000) + 1) * 0x4000_0000; - ( - ram_last_addr, - shm_start_addr, - vec![ - (GuestAddress(0), kernel_load_addr as usize), - ( - GuestAddress(kernel_load_addr + kernel_size as u64), - (MMIO_MEM_START - (kernel_load_addr + kernel_size as u64)) as usize, - ), - (GuestAddress(FIRST_ADDR_PAST_32BITS), remaining), - ], - ) + if let Some(kernel_load_addr) = kernel_load_addr { + let ram_last_addr = FIRST_ADDR_PAST_32BITS + remaining as u64; + let shm_start_addr = ((ram_last_addr / 0x4000_0000) + 1) * 0x4000_0000; + ( + ram_last_addr, + shm_start_addr, + vec![ + (GuestAddress(0), kernel_load_addr as usize), + ( + GuestAddress(kernel_load_addr + kernel_size as u64), + (MMIO_MEM_START - (kernel_load_addr + kernel_size as u64)) as usize, + ), + (GuestAddress(FIRST_ADDR_PAST_32BITS), remaining), + ], + ) + } else { + let ram_last_addr = FIRST_ADDR_PAST_32BITS + remaining as u64; + let shm_start_addr = ((ram_last_addr / 0x4000_0000) + 1) * 0x4000_0000; + ( + ram_last_addr, + shm_start_addr, + vec![ + (GuestAddress(0), MMIO_MEM_START as usize), + (GuestAddress(FIRST_ADDR_PAST_32BITS), remaining), + ], + ) + } } }; let info = ArchMemoryInfo { @@ -319,7 +340,8 @@ mod tests { #[test] fn regions_lt_4gb() { - let (_info, regions) = arch_memory_regions(1usize << 29, KERNEL_LOAD_ADDR, KERNEL_SIZE); + let (_info, regions) = + arch_memory_regions(1usize << 29, Some(KERNEL_LOAD_ADDR), KERNEL_SIZE); assert_eq!(2, regions.len()); assert_eq!(GuestAddress(0), regions[0].0); assert_eq!(KERNEL_LOAD_ADDR as usize, regions[0].1); @@ -333,7 +355,7 @@ mod tests { #[test] fn regions_gt_4gb() { let (_info, regions) = - arch_memory_regions((1usize << 32) + 0x8000, KERNEL_LOAD_ADDR, KERNEL_SIZE); + arch_memory_regions((1usize << 32) + 0x8000, Some(KERNEL_LOAD_ADDR), KERNEL_SIZE); assert_eq!(3, regions.len()); assert_eq!(GuestAddress(0), regions[0].0); assert_eq!(KERNEL_LOAD_ADDR as usize, regions[0].1); @@ -360,21 +382,21 @@ mod tests { // Now assigning some memory that falls before the 32bit memory hole. let mem_size = 128 << 20; let (arch_mem_info, arch_mem_regions) = - arch_memory_regions(mem_size, KERNEL_LOAD_ADDR, KERNEL_SIZE); + arch_memory_regions(mem_size, Some(KERNEL_LOAD_ADDR), KERNEL_SIZE); let gm = GuestMemoryMmap::from_ranges(&arch_mem_regions).unwrap(); configure_system(&gm, &arch_mem_info, GuestAddress(0), 0, &None, no_vcpus).unwrap(); // Now assigning some memory that is equal to the start of the 32bit memory hole. let mem_size = 3328 << 20; let (arch_mem_info, arch_mem_regions) = - arch_memory_regions(mem_size, KERNEL_LOAD_ADDR, KERNEL_SIZE); + arch_memory_regions(mem_size, Some(KERNEL_LOAD_ADDR), KERNEL_SIZE); let gm = GuestMemoryMmap::from_ranges(&arch_mem_regions).unwrap(); configure_system(&gm, &arch_mem_info, GuestAddress(0), 0, &None, no_vcpus).unwrap(); // Now assigning some memory that falls after the 32bit memory hole. let mem_size = 3330 << 20; let (arch_mem_info, arch_mem_regions) = - arch_memory_regions(mem_size, KERNEL_LOAD_ADDR, KERNEL_SIZE); + arch_memory_regions(mem_size, Some(KERNEL_LOAD_ADDR), KERNEL_SIZE); let gm = GuestMemoryMmap::from_ranges(&arch_mem_regions).unwrap(); configure_system(&gm, &arch_mem_info, GuestAddress(0), 0, &None, no_vcpus).unwrap(); } diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index a9cbe959..e0f1704d 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -8,7 +8,7 @@ use std::env; use std::ffi::CStr; #[cfg(target_os = "linux")] use std::ffi::CString; -#[cfg(all(target_arch = "aarch64", not(feature = "efi")))] +#[cfg(all(not(feature = "efi"), not(feature = "tee")))] use std::fs::File; #[cfg(not(feature = "efi"))] use std::os::fd::AsRawFd; @@ -39,6 +39,8 @@ use vmm::resources::VmResources; #[cfg(feature = "blk")] use vmm::vmm_config::block::BlockDeviceConfig; use vmm::vmm_config::boot_source::{BootSourceConfig, DEFAULT_KERNEL_CMDLINE}; +#[cfg(all(not(feature = "efi"), not(feature = "tee")))] +use vmm::vmm_config::external_kernel::{ExternalKernel, KernelFormat}; #[cfg(not(feature = "tee"))] use vmm::vmm_config::fs::FsDeviceConfig; #[cfg(not(feature = "efi"))] @@ -108,8 +110,6 @@ struct ContextConfig { gpu_shm_size: Option, enable_snd: bool, console_output: Option, - #[cfg(not(feature = "efi"))] - external_kernel: bool, } impl ContextConfig { @@ -1033,27 +1033,8 @@ fn create_virtio_net(ctx_cfg: &mut ContextConfig, backend: VirtioNetBackend) { .expect("Failed to create network interface"); } -#[cfg(any(target_arch = "x86_64", feature = "tee", feature = "efi"))] -#[allow(clippy::format_collect)] -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -pub unsafe extern "C" fn krun_set_kernel(_ctx_id: u32, _c_kernel_path: *const c_char) -> i32 { - -libc::EOPNOTSUPP -} - -#[cfg(all(target_arch = "aarch64", not(feature = "efi")))] -#[allow(clippy::format_collect)] -#[allow(clippy::missing_safety_doc)] -#[no_mangle] -pub unsafe extern "C" fn krun_set_kernel(ctx_id: u32, c_kernel_path: *const c_char) -> i32 { - let kernel_path = match CStr::from_ptr(c_kernel_path).to_str() { - Ok(path) => path, - Err(e) => { - error!("Error parsing kernel_path: {:?}", e); - return -libc::EINVAL; - } - }; - +#[cfg(all(not(feature = "efi"), not(feature = "tee")))] +fn map_kernel(ctx_id: u32, kernel_path: &str) -> i32 { let file = match File::options().read(true).write(false).open(kernel_path) { Ok(file) => file, Err(err) => { @@ -1087,16 +1068,64 @@ pub unsafe extern "C" fn krun_set_kernel(ctx_id: u32, c_kernel_path: *const c_ch }; match CTX_MAP.lock().unwrap().entry(ctx_id) { - Entry::Occupied(mut ctx_cfg) => { - let ctx_cfg = ctx_cfg.get_mut(); - if ctx_cfg.external_kernel { - error!("An extenal kernel was already configured"); - return -libc::EINVAL; - } else { - ctx_cfg.external_kernel = true; - } - ctx_cfg.vmr.set_kernel_bundle(kernel_bundle).unwrap() + Entry::Occupied(mut ctx_cfg) => ctx_cfg + .get_mut() + .vmr + .set_kernel_bundle(kernel_bundle) + .unwrap(), + Entry::Vacant(_) => return -libc::ENOENT, + } + + KRUN_SUCCESS +} + +#[cfg(any(feature = "tee", feature = "efi"))] +#[allow(clippy::format_collect)] +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +pub unsafe extern "C" fn krun_set_kernel(_ctx_id: u32, _c_kernel_path: *const c_char) -> i32 { + -libc::EOPNOTSUPP +} + +#[cfg(all(not(feature = "efi"), not(feature = "tee")))] +#[allow(clippy::format_collect)] +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +pub unsafe extern "C" fn krun_set_kernel( + ctx_id: u32, + c_kernel_path: *const c_char, + kernel_format: u32, +) -> i32 { + let kernel_path = match CStr::from_ptr(c_kernel_path).to_str() { + Ok(path) => path, + Err(e) => { + error!("Error parsing kernel_path: {:?}", e); + return -libc::EINVAL; + } + }; + + let format = match kernel_format { + // For raw kernels, we map the kernel into the process + // and treat it as a bundled kernel. + #[cfg(all(not(feature = "efi"), not(feature = "tee")))] + 0 => return map_kernel(ctx_id, kernel_path), + 1 => KernelFormat::Elf, + 2 => KernelFormat::PeGz, + 3 => KernelFormat::ImageBz2, + 4 => KernelFormat::ImageGz, + 5 => KernelFormat::ImageZstd, + _ => { + return -libc::EINVAL; } + }; + + let external_kernel = ExternalKernel { + path: PathBuf::from(kernel_path), + format, + }; + + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => ctx_cfg.get_mut().vmr.set_external_kernel(external_kernel), Entry::Vacant(_) => return -libc::ENOENT, } @@ -1186,7 +1215,7 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { }; #[cfg(not(feature = "efi"))] - let _krunfw = if !ctx_cfg.external_kernel { + let _krunfw = if ctx_cfg.vmr.external_kernel.is_none() { // The reference to the dynamically loaded library must be kept alive. let krunfw = match unsafe { libloading::Library::new(KRUNFW_NAME) } { Ok(lib) => lib, diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 25ed38d7..cfcf8c71 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -14,11 +14,15 @@ gpu = [] snd = [] [dependencies] +bzip2 = "0.5" crossbeam-channel = "0.5" env_logger = "0.9.0" +flate2 = "1.0.35" libc = ">=0.2.39" +linux-loader = { version = "0.13.0", features = ["bzimage", "elf", "pe"] } log = "0.4.0" vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +zstd = "0.13" arch = { path = "../arch" } devices = { path = "../devices" } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 8eda82a1..1f45386b 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -8,6 +8,8 @@ use crossbeam_channel::{unbounded, Sender}; use std::fmt::{Display, Formatter}; use std::fs::File; use std::io; +#[cfg(all(not(feature = "efi"), not(feature = "tee")))] +use std::io::Read; #[cfg(target_os = "linux")] use std::os::fd::AsRawFd; use std::path::PathBuf; @@ -19,6 +21,8 @@ use super::{Error, Vmm}; use crate::device_manager::legacy::PortIODeviceManager; use crate::device_manager::mmio::MMIODeviceManager; use crate::resources::VmResources; +#[cfg(all(not(feature = "efi"), not(feature = "tee")))] +use crate::vmm_config::external_kernel::{ExternalKernel, KernelFormat}; use devices::legacy::GicV3; use devices::legacy::Serial; #[cfg(target_os = "macos")] @@ -56,13 +60,17 @@ use arch::InitrdConfig; use device_manager::shm::ShmManager; #[cfg(not(feature = "tee"))] use devices::virtio::{fs::ExportTable, VirtioShmRegion}; +#[cfg(all(not(feature = "efi"), not(feature = "tee")))] +use flate2::read::GzDecoder; #[cfg(feature = "tee")] use kvm_bindings::KVM_MAX_CPUID_ENTRIES; use libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO}; +#[cfg(all(target_arch = "x86_64", not(feature = "tee")))] +use linux_loader::loader::{self, KernelLoader}; use nix::unistd::isatty; use polly::event_manager::{Error as EventManagerError, EventManager}; use utils::eventfd::EventFd; -#[cfg(not(feature = "efi"))] +#[cfg(all(target_arch = "x86_64", not(feature = "efi"), not(feature = "tee")))] use vm_memory::mmap::MmapRegion; #[cfg(not(feature = "tee"))] use vm_memory::Address; @@ -82,32 +90,72 @@ pub enum StartMicrovmError { AttachBlockDevice(io::Error), /// Failed to create a `RateLimiter` object. CreateRateLimiter(io::Error), + /// Cannot open the file containing the kernel code. + ElfOpenKernel(io::Error), + /// Cannot load the kernel into the VM. + ElfLoadKernel(linux_loader::loader::Error), /// Memory regions are overlapping or mmap fails. GuestMemoryMmap(vm_memory::Error), + /// The BZIP2 decoder couldn't decompress the kernel. + ImageBz2Decoder(io::Error), + /// Cannot find compressed kernel in file. + ImageBz2Invalid, + /// Cannot load the kernel from the uncompressed ELF data. + ImageBz2LoadKernel(linux_loader::loader::Error), + /// Cannot open the file containing the kernel code. + ImageBz2OpenKernel(io::Error), + /// The GZIP decoder couldn't decompress the kernel. + ImageGzDecoder(io::Error), + /// Cannot find compressed kernel in file. + ImageGzInvalid, + /// Cannot load the kernel from the uncompressed ELF data. + ImageGzLoadKernel(linux_loader::loader::Error), + /// Cannot open the file containing the kernel code. + ImageGzOpenKernel(io::Error), + /// The ZSTD decoder couldn't decompress the kernel. + ImageZstdDecoder(io::Error), + /// Cannot find compressed kernel in file. + ImageZstdInvalid, + /// Cannot load the kernel from the uncompressed ELF data. + ImageZstdLoadKernel(linux_loader::loader::Error), + /// Cannot open the file containing the kernel code. + ImageZstdOpenKernel(io::Error), /// Cannot load initrd due to an invalid memory configuration. InitrdLoad, /// Cannot load initrd due to an invalid image. InitrdRead(io::Error), /// Internal error encountered while starting a microVM. Internal(Error), + /// Cannot inject the kernel into the guest memory due to a problem with the bundle. + InvalidKernelBundle(vm_memory::mmap::MmapRegionError), /// The kernel command line is invalid. KernelCmdline(String), - /// Cannot inject the kernel into the guest memory due to a problem with the bundle. - KernelBundle(vm_memory::mmap::MmapRegionError), + /// The supplied kernel format is not supported. + KernelFormatUnsupported, /// Cannot load command line string. LoadCommandline(kernel::cmdline::Error), /// The start command was issued more than once. MicroVMAlreadyRunning, + /// Cannot start the VM because the initramfs image was not configured. + MissingInitramfs, /// Cannot start the VM because the kernel was not configured. MissingKernelConfig, /// Cannot start the VM because the size of the guest memory was not specified. MissingMemSizeConfig, + /// Cannot start the VM because the qboot image was not configured. + MissingQboot, /// The net device configuration is missing the tap device. NetDeviceNotConfigured, /// Cannot open the block device backing file. OpenBlockDevice(io::Error), /// Cannot open console output file. OpenConsoleFile(io::Error), + /// The GZIP decoder couldn't decompress the kernel. + PeGzDecoder(io::Error), + /// Cannot open the file containing the kernel code. + PeGzOpenKernel(io::Error), + /// Cannot find compressed kernel in file. + PeGzInvalid, /// Cannot initialize a MMIO Balloon device or add a device to the MMIO Bus. RegisterBalloonDevice(device_manager::mmio::Error), /// Cannot initialize a MMIO Block Device or add a device to the MMIO Bus. @@ -159,20 +207,70 @@ impl Display for StartMicrovmError { write!(f, "Unable to attach block device to Vmm. Error: {err}") } CreateRateLimiter(ref err) => write!(f, "Cannot create RateLimiter: {err}"), + ElfOpenKernel(ref err) => { + write!(f, "annot open the file containing the kernel code: {err}") + } + ElfLoadKernel(ref err) => { + write!(f, "Cannot load the kernel into the VM: {err}") + } GuestMemoryMmap(ref err) => { // Remove imbricated quotes from error message. let mut err_msg = format!("{err:?}"); err_msg = err_msg.replace('\"', ""); write!(f, "Invalid Memory Configuration: {err_msg}") } + ImageBz2Decoder(ref err) => { + write!(f, "The BZIP2 decoder couldn't decompress the kernel. {err}") + } + ImageBz2Invalid => { + write!(f, "Cannot find compressed kernel in file.") + } + ImageBz2LoadKernel(ref err) => { + write!( + f, + "Cannot load the kernel from the uncompressed ELF data. {err}" + ) + } + ImageBz2OpenKernel(ref err) => { + write!(f, "Cannot open the file containing the kernel code. {err}") + } + ImageGzDecoder(ref err) => { + write!(f, "The GZIP decoder couldn't decompress the kernel. {err}") + } + ImageGzInvalid => { + write!(f, "Cannot find compressed kernel in file.") + } + ImageGzLoadKernel(ref err) => { + write!( + f, + "Cannot load the kernel from the uncompressed ELF data. {err}" + ) + } + ImageGzOpenKernel(ref err) => { + write!(f, "Cannot open the file containing the kernel code. {err}") + } + ImageZstdDecoder(ref err) => { + write!(f, "The ZSTD decoder couldn't decompress the kernel. {err}") + } + ImageZstdInvalid => { + write!(f, "Cannot find compressed kernel in file.") + } + ImageZstdLoadKernel(ref err) => { + write!( + f, + "Cannot load the kernel from the uncompressed ELF data. {err}" + ) + } + ImageZstdOpenKernel(ref err) => { + write!(f, "Cannot open the file containing the kernel code. {err}") + } InitrdLoad => write!( f, "Cannot load initrd due to an invalid memory configuration." ), InitrdRead(ref err) => write!(f, "Cannot load initrd due to an invalid image: {err}"), Internal(ref err) => write!(f, "Internal error while starting microVM: {err:?}"), - KernelCmdline(ref err) => write!(f, "Invalid kernel command line: {err}"), - KernelBundle(ref err) => { + InvalidKernelBundle(ref err) => { let mut err_msg = format!("{err}"); err_msg = err_msg.replace('\"', ""); write!( @@ -181,16 +279,28 @@ impl Display for StartMicrovmError { bundle. {err_msg}" ) } + KernelCmdline(ref err) => write!(f, "Invalid kernel command line: {err}"), + KernelFormatUnsupported => { + write!(f, "The supplied kernel format is not supported.") + } LoadCommandline(ref err) => { let mut err_msg = format!("{err}"); err_msg = err_msg.replace('\"', ""); write!(f, "Cannot load command line string. {err_msg}") } MicroVMAlreadyRunning => write!(f, "Microvm already running."), + MissingInitramfs => write!( + f, + "Cannot start the VM because the initramfs image was not configured." + ), MissingKernelConfig => write!(f, "Cannot start microvm without kernel configuration."), MissingMemSizeConfig => { write!(f, "Cannot start microvm without guest mem_size config.") } + MissingQboot => write!( + f, + "Cannot start the VM because the qboot image was not configured." + ), NetDeviceNotConfigured => { write!(f, "The net device configuration is missing the tap device.") } @@ -206,6 +316,15 @@ impl Display for StartMicrovmError { write!(f, "Cannot open the console output file. {err_msg}") } + PeGzDecoder(ref err) => { + write!(f, "The GZIP decoder couldn't decompress the kernel. {err}") + } + PeGzOpenKernel(ref err) => { + write!(f, "Cannot open the file containing the kernel code. {err}") + } + PeGzInvalid => { + write!(f, "Cannot find compressed kernel in file.") + } RegisterBalloonDevice(ref err) => { let mut err_msg = format!("{err}"); err_msg = err_msg.replace('\"', ""); @@ -329,17 +448,51 @@ impl Display for StartMicrovmError { } } } + enum Payload { #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] - KernelMmap(MmapRegion, u64, usize), + KernelMmap, #[cfg(all(target_arch = "aarch64", not(feature = "efi")))] - KernelCopy(MmapRegion, u64, usize), + KernelCopy, + #[cfg(all(not(feature = "efi"), not(feature = "tee")))] + ExternalKernel(ExternalKernel), #[cfg(test)] Empty, #[cfg(feature = "efi")] Efi, #[cfg(feature = "tee")] - Tee(MmapRegion, u64, usize, u64, usize, u64, usize), + Tee, +} + +#[cfg(not(feature = "efi"))] +fn choose_payload(vm_resources: &VmResources) -> Result { + if let Some(_kernel_bundle) = &vm_resources.kernel_bundle { + #[cfg(feature = "tee")] + if vm_resources.qboot_bundle.is_none() || vm_resources.initrd_bundle.is_none() { + return Err(StartMicrovmError::MissingKernelConfig); + } + + #[cfg(feature = "tee")] + return Ok(Payload::Tee); + + #[cfg(all(target_os = "linux", target_arch = "x86_64", not(feature = "tee")))] + return Ok(Payload::KernelMmap); + + #[cfg(all(target_arch = "aarch64", not(feature = "efi")))] + return Ok(Payload::KernelCopy); + } else if let Some(_external_kernel) = vm_resources.external_kernel() { + #[cfg(not(feature = "tee"))] + return Ok(Payload::ExternalKernel(_external_kernel.clone())); + #[cfg(feature = "tee")] + return Err(StartMicrovmError::MissingKernelConfig); + } else { + Err(StartMicrovmError::MissingKernelConfig) + } +} + +#[cfg(feature = "efi")] +fn choose_payload(_vm_resources: &VmResources) -> Result { + Ok(Payload::Efi) } /// Builds and starts a microVM based on the current Firecracker VmResources configuration. @@ -355,53 +508,15 @@ pub fn build_microvm( _shutdown_efd: Option, #[cfg(target_os = "macos")] _map_sender: Sender, ) -> std::result::Result>, StartMicrovmError> { - #[cfg(not(feature = "efi"))] - let kernel_bundle = vm_resources - .kernel_bundle() - .ok_or(StartMicrovmError::MissingKernelConfig)?; - #[cfg(not(feature = "efi"))] - let kernel_region = unsafe { - MmapRegion::build_raw(kernel_bundle.host_addr as *mut u8, kernel_bundle.size, 0, 0) - .map_err(StartMicrovmError::KernelBundle)? - }; + let payload = choose_payload(vm_resources)?; - #[cfg(feature = "tee")] - let qboot_bundle = vm_resources - .qboot_bundle() - .ok_or(StartMicrovmError::MissingKernelConfig)?; - - #[cfg(feature = "tee")] - let initrd_bundle = vm_resources - .initrd_bundle() - .ok_or(StartMicrovmError::MissingKernelConfig)?; - - #[cfg(feature = "tee")] - let payload = Payload::Tee( - kernel_region, - kernel_bundle.guest_addr, - kernel_bundle.size, - qboot_bundle.host_addr, - qboot_bundle.size, - initrd_bundle.host_addr, - initrd_bundle.size, - ); - #[cfg(all(target_os = "linux", target_arch = "x86_64", not(feature = "tee")))] - let payload = Payload::KernelMmap(kernel_region, kernel_bundle.guest_addr, kernel_bundle.size); - #[cfg(all(target_arch = "aarch64", not(feature = "efi")))] - let payload = Payload::KernelCopy(kernel_region, kernel_bundle.guest_addr, kernel_bundle.size); - #[cfg(all(target_arch = "aarch64", feature = "efi"))] - let payload = Payload::Efi; - - let (guest_memory, arch_memory_info, mut _shm_manager) = create_guest_memory( + let (guest_memory, entry_addr, arch_memory_info, mut _shm_manager) = create_guest_memory( vm_resources .vm_config() .mem_size_mib .ok_or(StartMicrovmError::MissingMemSizeConfig)?, - #[cfg(feature = "tee")] - None, - #[cfg(not(feature = "tee"))] - Some(vm_resources), - payload, + vm_resources, + &payload, )?; let vcpu_config = vm_resources.vcpu_config(); @@ -451,27 +566,44 @@ pub fn build_microvm( let measured_regions = { println!("Injecting and measuring memory regions. This may take a while."); - let m = vec![ + let qboot_size = if let Some(qboot_bundle) = &vm_resources.qboot_bundle { + qboot_bundle.size + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; + let (kernel_guest_addr, kernel_size) = + if let Some(kernel_bundle) = &vm_resources.kernel_bundle { + (kernel_bundle.guest_addr, kernel_bundle.size) + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; + let initrd_size = if let Some(initrd_bundle) = &vm_resources.initrd_bundle { + initrd_bundle.size + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; + + vec![ MeasuredRegion { guest_addr: arch::BIOS_START, host_addr: guest_memory .get_host_address(GuestAddress(arch::BIOS_START)) .unwrap() as u64, - size: qboot_bundle.size, + size: qboot_size, }, MeasuredRegion { - guest_addr: kernel_bundle.guest_addr, + guest_addr: kernel_guest_addr, host_addr: guest_memory - .get_host_address(GuestAddress(kernel_bundle.guest_addr)) + .get_host_address(GuestAddress(kernel_guest_addr)) .unwrap() as u64, - size: kernel_bundle.size, + size: kernel_size, }, MeasuredRegion { guest_addr: arch::x86_64::layout::INITRD_SEV_START, host_addr: guest_memory .get_host_address(GuestAddress(arch::x86_64::layout::INITRD_SEV_START)) .unwrap() as u64, - size: initrd_bundle.size, + size: initrd_size, }, MeasuredRegion { guest_addr: arch::x86_64::layout::ZERO_PAGE_START, @@ -480,9 +612,7 @@ pub fn build_microvm( .unwrap() as u64, size: 4096, }, - ]; - - m + ] }; // On x86_64 always create a serial device, @@ -493,7 +623,7 @@ pub fn build_microvm( None, None, // Uncomment this to get EFI output when debugging EDK2. - // Some(Box::new(io::stdout())), + //Some(Box::new(io::stdout())), )?) } else { None @@ -536,11 +666,6 @@ pub fn build_microvm( #[cfg(target_os = "macos")] let intc = Some(GicV3::new(vcpu_list.clone())); - #[cfg(all(target_os = "linux", target_arch = "x86_64", not(feature = "tee")))] - let boot_ip: GuestAddress = GuestAddress(kernel_bundle.entry_addr); - #[cfg(feature = "tee")] - let boot_ip: GuestAddress = GuestAddress(arch::RESET_VECTOR); - let vcpus; // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` // while on aarch64 we need to do it the other way around. @@ -553,7 +678,7 @@ pub fn build_microvm( &vm, &vcpu_config, &guest_memory, - boot_ip, + entry_addr, &pio_device_manager.io_bus, &exit_evt, ) @@ -566,14 +691,8 @@ pub fn build_microvm( // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. #[cfg(all(target_arch = "aarch64", target_os = "linux"))] { - vcpus = create_vcpus_aarch64( - &vm, - &vcpu_config, - &guest_memory, - GuestAddress(kernel_bundle.guest_addr), - &exit_evt, - ) - .map_err(StartMicrovmError::Internal)?; + vcpus = create_vcpus_aarch64(&vm, &vcpu_config, &guest_memory, entry_addr, &exit_evt) + .map_err(StartMicrovmError::Internal)?; setup_interrupt_controller(&mut vm, vcpu_config.vcpu_count)?; attach_legacy_devices( @@ -586,16 +705,11 @@ pub fn build_microvm( #[cfg(all(target_arch = "aarch64", target_os = "macos"))] { - #[cfg(not(feature = "efi"))] - let start_addr = GuestAddress(kernel_bundle.guest_addr); - #[cfg(feature = "efi")] - let start_addr = GuestAddress(0u64); - vcpus = create_vcpus_aarch64( &vm, &vcpu_config, &guest_memory, - start_addr, + entry_addr, &exit_evt, vcpu_list.clone(), ) @@ -703,10 +817,17 @@ pub fn build_microvm( load_cmdline(&vmm)?; #[cfg(feature = "tee")] - let initrd_config = Some(InitrdConfig { - address: GuestAddress(arch::x86_64::layout::INITRD_SEV_START), - size: initrd_bundle.size, - }); + let initrd_config = { + let initrd_size = if let Some(initrd_bundle) = &vm_resources.initrd_bundle { + initrd_bundle.size + } else { + return Err(StartMicrovmError::MissingInitramfs); + }; + Some(InitrdConfig { + address: GuestAddress(arch::x86_64::layout::INITRD_SEV_START), + size: initrd_size, + }) + }; #[cfg(not(feature = "tee"))] let initrd_config = None; @@ -761,51 +882,231 @@ pub fn build_microvm( Ok(vmm) } +#[cfg(all(not(feature = "efi"), not(feature = "tee")))] +fn load_external_kernel( + guest_mem: &GuestMemoryMmap, + external_kernel: &ExternalKernel, +) -> std::result::Result { + let entry_addr = match external_kernel.format { + // Raw images are treated as bundled kernels + KernelFormat::Raw => unreachable!(), + #[cfg(target_arch = "x86_64")] + KernelFormat::Elf => { + let mut file = File::options() + .read(true) + .write(false) + .open(external_kernel.path.clone()) + .map_err(StartMicrovmError::ElfOpenKernel)?; + let load_result = loader::Elf::load(guest_mem, None, &mut file, None) + .map_err(StartMicrovmError::ElfLoadKernel)?; + load_result.kernel_load + } + #[cfg(target_arch = "aarch64")] + KernelFormat::PeGz => { + let data: Vec = std::fs::read(external_kernel.path.clone()) + .map_err(StartMicrovmError::PeGzOpenKernel)?; + if let Some(magic) = data + .windows(3) + .position(|window| window == [0x1f, 0x8b, 0x8]) + { + debug!("Found GZIP header on PE file at: 0x{:x}", magic); + let (_, compressed) = data.split_at(magic); + let mut gz = GzDecoder::new(compressed); + let mut kernel_data: Vec = Vec::new(); + gz.read_to_end(&mut kernel_data) + .map_err(StartMicrovmError::PeGzDecoder)?; + guest_mem + .write(&kernel_data, GuestAddress(0x8000_0000)) + .unwrap(); + GuestAddress(0x8000_0000) + } else { + return Err(StartMicrovmError::PeGzInvalid); + } + } + #[cfg(target_arch = "x86_64")] + KernelFormat::ImageBz2 => { + let data: Vec = std::fs::read(external_kernel.path.clone()) + .map_err(StartMicrovmError::ImageBz2OpenKernel)?; + if let Some(magic) = data + .windows(4) + .position(|window| window == [b'B', b'Z', b'h']) + { + debug!("Found BZIP2 header on Image file at: 0x{:x}", magic); + let (_, compressed) = data.split_at(magic); + let mut kernel_data: Vec = Vec::new(); + let mut bz2 = bzip2::read::BzDecoder::new(compressed); + bz2.read_to_end(&mut kernel_data) + .map_err(StartMicrovmError::ImageBz2Decoder)?; + let load_result = loader::Elf::load( + guest_mem, + None, + &mut std::io::Cursor::new(kernel_data), + None, + ) + .map_err(StartMicrovmError::ImageBz2LoadKernel)?; + load_result.kernel_load + } else { + return Err(StartMicrovmError::ImageBz2Invalid); + } + } + #[cfg(target_arch = "x86_64")] + KernelFormat::ImageGz => { + let data: Vec = std::fs::read(external_kernel.path.clone()) + .map_err(StartMicrovmError::ImageGzOpenKernel)?; + if let Some(magic) = data + .windows(3) + .position(|window| window == [0x1f, 0x8b, 0x8]) + { + debug!("Found GZIP header on Image file at: 0x{:x}", magic); + let (_, compressed) = data.split_at(magic); + let mut gz = GzDecoder::new(compressed); + let mut kernel_data: Vec = Vec::new(); + gz.read_to_end(&mut kernel_data) + .map_err(StartMicrovmError::ImageGzDecoder)?; + let load_result = loader::Elf::load( + guest_mem, + None, + &mut std::io::Cursor::new(kernel_data), + None, + ) + .map_err(StartMicrovmError::ImageGzLoadKernel)?; + load_result.kernel_load + } else { + return Err(StartMicrovmError::ImageGzInvalid); + } + } + #[cfg(target_arch = "x86_64")] + KernelFormat::ImageZstd => { + let data: Vec = std::fs::read(external_kernel.path.clone()) + .map_err(StartMicrovmError::ImageZstdOpenKernel)?; + if let Some(magic) = data + .windows(4) + .position(|window| window == [0x28, 0xb5, 0x2f, 0xfd]) + { + debug!("Found ZSTD header on Image file at: 0x{:x}", magic); + let (_, zstd_data) = data.split_at(magic); + let mut kernel_data: Vec = Vec::new(); + let _ = zstd::stream::copy_decode(zstd_data, &mut kernel_data); + let load_result = loader::Elf::load( + guest_mem, + None, + &mut std::io::Cursor::new(kernel_data), + None, + ) + .map_err(StartMicrovmError::ImageZstdLoadKernel)?; + load_result.kernel_load + } else { + return Err(StartMicrovmError::ImageZstdInvalid); + } + } + _ => return Err(StartMicrovmError::KernelFormatUnsupported), + }; + + debug!("load_external_kernel: 0x{:x}", entry_addr.0); + + Ok(entry_addr) +} + fn load_payload( + _vm_resources: &VmResources, guest_mem: GuestMemoryMmap, - payload: Payload, -) -> std::result::Result { + payload: &Payload, +) -> std::result::Result<(GuestMemoryMmap, GuestAddress), StartMicrovmError> { + println!("load_payload"); match payload { #[cfg(all(target_arch = "aarch64", not(feature = "efi")))] - Payload::KernelCopy(kernel_region, kernel_load_addr, kernel_size) => { + Payload::KernelCopy => { + let (kernel_entry_addr, kernel_host_addr, kernel_guest_addr, kernel_size) = + if let Some(kernel_bundle) = &_vm_resources.kernel_bundle { + ( + kernel_bundle.entry_addr, + kernel_bundle.host_addr, + kernel_bundle.guest_addr, + kernel_bundle.size, + ) + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; + let kernel_data = - unsafe { std::slice::from_raw_parts(kernel_region.as_ptr(), kernel_size) }; + unsafe { std::slice::from_raw_parts(kernel_host_addr as *mut u8, kernel_size) }; guest_mem - .write(kernel_data, GuestAddress(kernel_load_addr)) + .write(kernel_data, GuestAddress(kernel_guest_addr)) .unwrap(); - Ok(guest_mem) + Ok((guest_mem, GuestAddress(kernel_entry_addr))) } #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] - Payload::KernelMmap(kernel_region, kernel_load_addr, _kernel_size) => guest_mem - .insert_region(Arc::new( - GuestRegionMmap::new(kernel_region, GuestAddress(kernel_load_addr)) + Payload::KernelMmap => { + let (kernel_entry_addr, kernel_host_addr, kernel_guest_addr, kernel_size) = + if let Some(kernel_bundle) = &_vm_resources.kernel_bundle { + ( + kernel_bundle.entry_addr, + kernel_bundle.host_addr, + kernel_bundle.guest_addr, + kernel_bundle.size, + ) + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; + + let kernel_region = unsafe { + MmapRegion::build_raw(kernel_host_addr as *mut u8, kernel_size, 0, 0) + .map_err(StartMicrovmError::InvalidKernelBundle)? + }; + + Ok(( + guest_mem + .insert_region(Arc::new( + GuestRegionMmap::new(kernel_region, GuestAddress(kernel_guest_addr)) + .map_err(StartMicrovmError::GuestMemoryMmap)?, + )) .map_err(StartMicrovmError::GuestMemoryMmap)?, + GuestAddress(kernel_entry_addr), )) - .map_err(StartMicrovmError::GuestMemoryMmap), + } + #[cfg(all(not(feature = "efi"), not(feature = "tee")))] + Payload::ExternalKernel(external_kernel) => { + let entry_addr = load_external_kernel(&guest_mem, external_kernel)?; + Ok((guest_mem, entry_addr)) + } #[cfg(test)] - Payload::Empty => Ok(guest_mem), + Payload::Empty => Ok((guest_mem, GuestAddress(0))), #[cfg(feature = "tee")] - Payload::Tee( - kernel_region, - kernel_load_addr, - kernel_size, - qboot_host_addr, - qboot_size, - initrd_host_addr, - initrd_size, - ) => { + Payload::Tee => { + let (kernel_host_addr, kernel_guest_addr, kernel_size) = + if let Some(kernel_bundle) = &_vm_resources.kernel_bundle { + ( + kernel_bundle.host_addr, + kernel_bundle.guest_addr, + kernel_bundle.size, + ) + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; let kernel_data = - unsafe { std::slice::from_raw_parts(kernel_region.as_ptr(), kernel_size) }; + unsafe { std::slice::from_raw_parts(kernel_host_addr as *mut u8, kernel_size) }; guest_mem - .write(kernel_data, GuestAddress(kernel_load_addr)) + .write(kernel_data, GuestAddress(kernel_guest_addr)) .unwrap(); + let (qboot_host_addr, qboot_size) = + if let Some(qboot_bundle) = &_vm_resources.qboot_bundle { + (qboot_bundle.host_addr, qboot_bundle.size) + } else { + return Err(StartMicrovmError::MissingQboot); + }; let qboot_data = unsafe { std::slice::from_raw_parts(qboot_host_addr as *mut u8, qboot_size) }; guest_mem .write(qboot_data, GuestAddress(arch::BIOS_START)) .unwrap(); + let (initrd_host_addr, initrd_size) = + if let Some(initrd_bundle) = &_vm_resources.initrd_bundle { + (initrd_bundle.host_addr, initrd_bundle.size) + } else { + return Err(StartMicrovmError::MissingInitramfs); + }; let initrd_data = unsafe { std::slice::from_raw_parts(initrd_host_addr as *mut u8, initrd_size) }; guest_mem @@ -814,72 +1115,81 @@ fn load_payload( GuestAddress(arch::x86_64::layout::INITRD_SEV_START), ) .unwrap(); - Ok(guest_mem) + Ok((guest_mem, GuestAddress(arch::BIOS_START))) } #[cfg(feature = "efi")] Payload::Efi => { guest_mem.write(EDK2_BINARY, GuestAddress(0u64)).unwrap(); - Ok(guest_mem) + Ok((guest_mem, GuestAddress(0))) } } } fn create_guest_memory( mem_size: usize, - vm_resources: Option<&VmResources>, - payload: Payload, -) -> std::result::Result<(GuestMemoryMmap, ArchMemoryInfo, ShmManager), StartMicrovmError> { + vm_resources: &VmResources, + payload: &Payload, +) -> std::result::Result< + (GuestMemoryMmap, GuestAddress, ArchMemoryInfo, ShmManager), + StartMicrovmError, +> { let mem_size = mem_size << 20; #[cfg(target_arch = "x86_64")] let (arch_mem_info, mut arch_mem_regions) = match payload { #[cfg(not(feature = "tee"))] - Payload::KernelMmap(ref _kernel_region, kernel_load_addr, kernel_size) => { - arch::arch_memory_regions(mem_size, kernel_load_addr, kernel_size) + Payload::KernelMmap => { + let (kernel_guest_addr, kernel_size) = + if let Some(kernel_bundle) = &vm_resources.kernel_bundle { + (kernel_bundle.guest_addr, kernel_bundle.size) + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; + arch::arch_memory_regions(mem_size, Some(kernel_guest_addr), kernel_size) } + #[cfg(all(not(feature = "efi"), not(feature = "tee")))] + Payload::ExternalKernel(_kernel_path) => arch::arch_memory_regions(mem_size, None, 0), #[cfg(feature = "tee")] - Payload::Tee( - ref _kernel_region, - kernel_load_addr, - kernel_size, - _qboot_host_addr, - _qboot_size, - _initrd_host_addr, - _initrd_size, - ) => arch::arch_memory_regions(mem_size, kernel_load_addr, kernel_size), + Payload::Tee => { + let (kernel_guest_addr, kernel_size) = + if let Some(kernel_bundle) = &vm_resources.kernel_bundle { + (kernel_bundle.guest_addr, kernel_bundle.size) + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; + arch::arch_memory_regions(mem_size, kernel_guest_addr, kernel_size) + } #[cfg(test)] - Payload::Empty => arch::arch_memory_regions(mem_size, 0, 0), + Payload::Empty => arch::arch_memory_regions(mem_size, None, 0), }; #[cfg(target_arch = "aarch64")] let (arch_mem_info, mut arch_mem_regions) = arch::arch_memory_regions(mem_size); let mut shm_manager = ShmManager::new(&arch_mem_info); - if let Some(vm_resources) = vm_resources { - #[cfg(not(feature = "tee"))] - for (index, fs) in vm_resources.fs.iter().enumerate() { - if let Some(shm_size) = fs.shm_size { - shm_manager - .create_fs_region(index, shm_size) - .map_err(StartMicrovmError::ShmCreate)?; - } - } - if vm_resources.gpu_virgl_flags.is_some() { - let size = vm_resources.gpu_shm_size.unwrap_or(1 << 33); + #[cfg(not(feature = "tee"))] + for (index, fs) in vm_resources.fs.iter().enumerate() { + if let Some(shm_size) = fs.shm_size { shm_manager - .create_gpu_region(size) + .create_fs_region(index, shm_size) .map_err(StartMicrovmError::ShmCreate)?; } - - arch_mem_regions.extend(shm_manager.regions()); + } + if vm_resources.gpu_virgl_flags.is_some() { + let size = vm_resources.gpu_shm_size.unwrap_or(1 << 33); + shm_manager + .create_gpu_region(size) + .map_err(StartMicrovmError::ShmCreate)?; } + arch_mem_regions.extend(shm_manager.regions()); + let guest_mem = GuestMemoryMmap::from_ranges(&arch_mem_regions) .map_err(StartMicrovmError::GuestMemoryMmap)?; - let guest_mem = load_payload(guest_mem, payload)?; + let (guest_mem, entry_addr) = load_payload(vm_resources, guest_mem, payload)?; - Ok((guest_mem, arch_mem_info, shm_manager)) + Ok((guest_mem, entry_addr, arch_mem_info, shm_manager)) } #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] @@ -1560,20 +1870,16 @@ pub mod tests { fn default_guest_memory( mem_size_mib: usize, - ) -> std::result::Result<(GuestMemoryMmap, ArchMemoryInfo, ShmManager), StartMicrovmError> { - let kernel_guest_addr: u64 = 0x1000; - let kernel_size: usize = 0x1000; - let kernel_host_addr: u64 = 0x1000; - - let kernel_region = unsafe { - MmapRegion::build_raw(kernel_host_addr as *mut _, kernel_size, 0, 0).unwrap() - }; - - create_guest_memory( - mem_size_mib, - None, - Payload::KernelMmap(kernel_region, kernel_guest_addr, kernel_size), - ) + ) -> std::result::Result< + (GuestMemoryMmap, GuestAddress, ArchMemoryInfo, ShmManager), + StartMicrovmError, + > { + //let kernel_guest_addr: u64 = 0x1000; + //let kernel_size: usize = 0x1000; + //let kernel_host_addr: u64 = 0x1000; + let vm_resources = super::super::resources::VmResources::default(); + + create_guest_memory(mem_size_mib, &vm_resources, &Payload::Empty) } #[test] @@ -1581,7 +1887,8 @@ pub mod tests { fn test_create_vcpus_x86_64() { let vcpu_count = 2; - let (guest_memory, _arch_memory_info, _shm_manager) = default_guest_memory(128).unwrap(); + let (guest_memory, _entry_addr, _arch_memory_info, _shm_manager) = + default_guest_memory(128).unwrap(); let mut vm = setup_vm(&guest_memory).unwrap(); setup_interrupt_controller(&mut vm).unwrap(); let vcpu_config = VcpuConfig { @@ -1644,10 +1951,10 @@ pub mod tests { let err = Internal(Error::Serial(io::Error::from_raw_os_error(0))); let _ = format!("{}{:?}", err, err); - let err = KernelCmdline(String::from("dummy --cmdline")); + let err = InvalidKernelBundle(vm_memory::mmap::MmapRegionError::InvalidPointer); let _ = format!("{}{:?}", err, err); - let err = KernelBundle(vm_memory::mmap::MmapRegionError::InvalidPointer); + let err = KernelCmdline(String::from("dummy --cmdline")); let _ = format!("{}{:?}", err, err); let err = LoadCommandline(kernel::cmdline::Error::TooLarge); diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 70fb8668..984601d9 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -18,6 +18,7 @@ use kbs_types::Tee; #[cfg(feature = "blk")] use crate::vmm_config::block::{BlockBuilder, BlockConfigError, BlockDeviceConfig}; use crate::vmm_config::boot_source::{BootSourceConfig, BootSourceConfigError}; +use crate::vmm_config::external_kernel::ExternalKernel; #[cfg(not(feature = "tee"))] use crate::vmm_config::fs::*; #[cfg(feature = "tee")] @@ -85,6 +86,8 @@ pub struct VmResources { pub boot_config: BootSourceConfig, /// The parameters for the kernel bundle to be loaded in this microVM. pub kernel_bundle: Option, + /// The path to an external kernel, as an alternative to KernelBundle. + pub external_kernel: Option, /// The parameters for the qboot bundle to be loaded in this microVM. #[cfg(feature = "tee")] pub qboot_bundle: Option, @@ -203,6 +206,14 @@ impl VmResources { Ok(()) } + pub fn external_kernel(&self) -> Option<&ExternalKernel> { + self.external_kernel.as_ref() + } + + pub fn set_external_kernel(&mut self, external_kernel: ExternalKernel) { + self.external_kernel = Some(external_kernel); + } + #[cfg(feature = "tee")] pub fn qboot_bundle(&self) -> Option<&QbootBundle> { self.qboot_bundle.as_ref() @@ -319,6 +330,7 @@ mod tests { vm_config: VmConfig::default(), boot_config: default_boot_cfg(), kernel_bundle: Default::default(), + external_kernel: Default::default(), fs: Default::default(), vsock: Default::default(), #[cfg(feature = "net")] diff --git a/src/vmm/src/vmm_config/external_kernel.rs b/src/vmm/src/vmm_config/external_kernel.rs new file mode 100644 index 00000000..4b21d12e --- /dev/null +++ b/src/vmm/src/vmm_config/external_kernel.rs @@ -0,0 +1,33 @@ +// Copyright 2024, Red Hat Inc. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::path::PathBuf; + +#[derive(Clone, Debug)] +pub enum KernelFormat { + // Raw image, ready to be loaded into the VM. + Raw, + // ELF image, need to locale sections be loaded. + Elf, + // Raw image compressed with GZIP, embedded into a PE file. + PeGz, + // ELF image compressed with BZIP2, embedded into an Image file. + ImageBz2, + // ELF image compressed with GZIP, embedded into an Image file. + ImageGz, + // ELF image compressed with ZSTD, embedded into an Image file. + ImageZstd, +} + +impl Default for KernelFormat { + fn default() -> Self { + Self::Raw + } +} + +/// Data structure holding the attributes read from the `libkrunfw` kernel config. +#[derive(Clone, Debug, Default)] +pub struct ExternalKernel { + pub path: PathBuf, + pub format: KernelFormat, +} diff --git a/src/vmm/src/vmm_config/mod.rs b/src/vmm/src/vmm_config/mod.rs index 8f772b17..9bd6dcf4 100644 --- a/src/vmm/src/vmm_config/mod.rs +++ b/src/vmm/src/vmm_config/mod.rs @@ -8,6 +8,9 @@ pub mod block; /// Wrapper for configuring the microVM boot source. pub mod boot_source; +/// Wrapper for configuring an external kernel to be loaded in the microVM. +pub mod external_kernel; + /// Wrapper for configuring the Fs devices attached to the microVM. #[cfg(not(feature = "tee"))] pub mod fs;