diff --git a/Cargo.lock b/Cargo.lock index 63b28618..85fb43cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,6 +75,7 @@ dependencies = [ "kvm-ioctls", "libc", "smbios", + "tdx", "utils", "vm-fdt", "vm-memory", @@ -648,18 +649,18 @@ dependencies = [ [[package]] name = "kvm-bindings" -version = "0.8.2" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ac3147c9763fd8fa7865a90d6aee87f157b59167145b38e671bbc66b116f1e8" +checksum = "2efe3f1a4437bffe000e6297a593b98184213cd27486776c335f95ab53d48e3a" dependencies = [ "vmm-sys-util", ] [[package]] name = "kvm-ioctls" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bedae2ca4a531bebe311abaf9691f5cc14eaa21475243caa2e39c43bb872947d" +checksum = "92c2176b91f68903b54ac8c6185bada7d607ca6110998976ff15c032f88a7d39" dependencies = [ "bitflags 2.5.0", "kvm-bindings", @@ -1294,6 +1295,18 @@ version = "0.12.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" +[[package]] +name = "tdx" +version = "0.1.0" +dependencies = [ + "bitflags 2.5.0", + "kvm-bindings", + "kvm-ioctls", + "libc", + "uuid", + "vmm-sys-util", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -1441,6 +1454,7 @@ name = "vmm" version = "0.1.0" dependencies = [ "arch", + "arch_gen", "codicon", "cpuid", "crossbeam-channel", @@ -1460,6 +1474,7 @@ dependencies = [ "serde", "serde_json", "sev", + "tdx", "utils", "vm-memory", "vmm-sys-util", diff --git a/Makefile b/Makefile index c4c532f0..30fcebf0 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,8 @@ SNP_INIT_SRC = init/tee/snp_attest.c \ init/tee/snp_attest.h \ $(KBS_INIT_SRC) \ +TDX_INIT_SRC = $(KBS_INIT_SRC) + KBS_LD_FLAGS = -lcurl -lidn2 -lssl -lcrypto -lzstd -lz -lbrotlidec-static \ -lbrotlicommon-static @@ -27,6 +29,14 @@ ifeq ($(SEV),1) INIT_SRC += $(SNP_INIT_SRC) BUILD_INIT = 0 endif +ifeq ($(TDX),1) + VARIANT = -tdx + FEATURE_FLAGS := --features intel-tdx,tee,blk,kbs-types,serde,serde_json,curl + INIT_DEFS += -DTDX=1 + INIT_DEFS += $(KBS_LD_FLAGS) + INIT_SRC += $(KBS_INIT_SRC) + BUILD_INIT = 0 +endif ifeq ($(GPU),1) FEATURE_FLAGS += --features gpu endif @@ -91,6 +101,9 @@ $(LIBRARY_RELEASE_$(OS)): $(INIT_BINARY) ifeq ($(SEV),1) mv target/release/libkrun.so target/release/$(KRUN_BASE_$(OS)) endif +ifeq ($(TDX),1) + mv target/release/libkrun.so target/release/$(KRUN_BASE_$(OS)) +endif ifeq ($(OS),Linux) patchelf --set-soname $(KRUN_SONAME_$(OS)) --output $(LIBRARY_RELEASE_$(OS)) target/release/$(KRUN_BASE_$(OS)) else @@ -108,6 +121,9 @@ $(LIBRARY_DEBUG_$(OS)): $(INIT_BINARY) ifeq ($(SEV),1) mv target/debug/libkrun.so target/debug/$(KRUN_BASE_$(OS)) endif +ifeq ($(TDX),1) + mv target/debug/libkrun.so target/debug/$(KRUN_BASE_$(OS)) +endif ifeq ($(OS),Linux) patchelf --set-soname $(KRUN_SONAME_$(OS)) --output $(LIBRARY_DEBUG_$(OS)) target/debug/$(KRUN_BASE_$(OS)) else diff --git a/examples/Makefile b/examples/Makefile index 8c163059..50e2618e 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -4,6 +4,7 @@ LDFLAGS_x86_64_Linux = -lkrun LDFLAGS_aarch64_Linux = -lkrun LDFLAGS_arm64_Darwin = -L/opt/homebrew/lib -lkrun LDFLAGS_sev = -lkrun-sev +LDFLAGS_tdx = -lkrun-tdx LDFLAGS_efi = -L/opt/homebrew/lib -lkrun-efi CFLAGS = -O2 -g -I../include ROOTFS_DISTRO := fedora @@ -15,6 +16,9 @@ EXAMPLES := chroot_vm ifeq ($(SEV),1) EXAMPLES := launch-tee endif +ifeq ($(TDX),1) + EXAMPLES := launch-tee +endif ifeq ($(EFI),1) EXAMPLES := boot_efi endif @@ -28,7 +32,12 @@ ifeq ($(OS),Darwin) endif launch-tee: launch-tee.c +ifeq ($(SEV),1) gcc -o $@ $< $(CFLAGS) $(LDFLAGS_sev) +endif +ifeq ($(TDX),1) + gcc -o $@ $< $(CFLAGS) $(LDFLAGS_tdx) +endif boot_efi: boot_efi.c gcc -o $@ $< $(CFLAGS) $(LDFLAGS_efi) diff --git a/examples/launch-tee.c b/examples/launch-tee.c index eba39b76..d7867e84 100644 --- a/examples/launch-tee.c +++ b/examples/launch-tee.c @@ -19,12 +19,12 @@ int main(int argc, char *const argv[]) { - char *const port_map[] = + const char *const port_map[] = { "18000:8000", 0 }; - char *const rlimits[] = + const char *const rlimits[] = { // RLIMIT_NPROC = 6 "6=4096:8192", diff --git a/examples/tdx-config-noattest.json b/examples/tdx-config-noattest.json new file mode 100644 index 00000000..5a80f15b --- /dev/null +++ b/examples/tdx-config-noattest.json @@ -0,0 +1,8 @@ +{ + "workload_id": "tdxtest", + "cpus": 1, + "ram_mib": 2048, + "tee": "tdx", + "tee_data": "{\"vendor_chain\": \"\", \"attestation_server_pubkey\": \"\"}", + "attestation_url": "" +} diff --git a/src/arch/Cargo.toml b/src/arch/Cargo.toml index baaedda5..704c20a1 100644 --- a/src/arch/Cargo.toml +++ b/src/arch/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [features] tee = [] +intel-tdx = [ "tee", "tdx" ] amd-sev = [ "tee" ] efi = [] @@ -18,8 +19,9 @@ smbios = { path = "../smbios" } utils = { path = "../utils" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings = { version = "0.9.1", features = ["fam-wrappers"] } +kvm-ioctls = "0.18" +tdx = { path = "../../../tdx", optional = true } [target.'cfg(target_arch = "aarch64")'.dependencies] vm-fdt = ">= 0.2.0" diff --git a/src/arch/src/x86_64/mod.rs b/src/arch/src/x86_64/mod.rs index e31046e8..17fbf302 100644 --- a/src/arch/src/x86_64/mod.rs +++ b/src/arch/src/x86_64/mod.rs @@ -51,7 +51,10 @@ pub enum Error { // Where BIOS/VGA magic would live on a real PC. const EBDA_START: u64 = 0x9fc00; +#[cfg(not(feature = "intel-tdx"))] pub const RESET_VECTOR: u64 = 0xfff0; +#[cfg(feature = "intel-tdx")] +pub const RESET_VECTOR: u64 = 0xffff_fff0; pub const RESET_VECTOR_SEV_AP: u64 = 0xfff3; pub const BIOS_START: u64 = 0xffff_0000; pub const BIOS_SIZE: usize = 65536; @@ -119,6 +122,15 @@ pub fn arch_memory_regions( (info, regions) } +#[cfg(feature = "intel-tdx")] +fn get_tdvf_image_size() -> usize { + use std::io::{Seek, SeekFrom}; + let mut fs = std::fs::File::open("/home/jcorrent/edk2/Build/IntelTdx/DEBUG_GCC5/FV/OVMF.fd").unwrap(); + // let mut fs = std::fs::File::open("/home/jcorrent/edk2/Build/IntelTdx/RELEASE_GCC5/FV/OVMF.fd").unwrap(); + // TODO(jakecorrenti): do proper error handling here + fs.seek(SeekFrom::End(0)).unwrap() as usize +} + /// Returns a Vec of the valid memory addresses. /// These should be used to configure the GuestMemoryMmap structure for the platform. /// For SEV, don't make a hole for the kernel, as it needs to be copied instead of injected, @@ -132,6 +144,11 @@ pub fn arch_memory_regions( ) -> (ArchMemoryInfo, Vec<(GuestAddress, usize)>) { let page_size: usize = unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() }; + #[cfg(feature = "intel-tdx")] + let tdvf_image_size = get_tdvf_image_size(); + #[cfg(feature = "intel-tdx")] + let tdvf_image_start_addr = 0x1_0000_0000 - tdvf_image_size; + let size = round_up(size, page_size); if size < (kernel_load_addr + kernel_size as u64) as usize { panic!("Kernel doesn't fit in RAM"); @@ -214,6 +231,8 @@ pub fn configure_system( cmdline_size: usize, initrd: &Option, num_cpus: u8, + ram_entries: &mut Vec, + nr_ram_entries: &mut u8, ) -> super::Result<()> { const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; const KERNEL_HDR_MAGIC: u32 = 0x5372_6448; @@ -244,7 +263,9 @@ pub fn configure_system( #[cfg(feature = "tee")] { - params.0.hdr.syssize = num_cpus as u32; + // params.0.hdr.syssize = num_cpus as u32; + params.0.hdr.syssize = (arch_memory_info.ram_last_addr / 4096) as u32; + params.0.hdr.root_flags = num_cpus as u16; } add_e820_entry(&mut params.0, 0, EBDA_START, E820_RAM)?; @@ -286,6 +307,9 @@ pub fn configure_system( .write_obj(params, zero_page_addr) .map_err(|_| Error::ZeroPageSetup)?; + *ram_entries = params.0.e820_map.to_vec(); + *nr_ram_entries = params.0.e820_entries; + Ok(()) } diff --git a/src/arch/src/x86_64/mptable.rs b/src/arch/src/x86_64/mptable.rs index 4d3eab58..a3b4b453 100644 --- a/src/arch/src/x86_64/mptable.rs +++ b/src/arch/src/x86_64/mptable.rs @@ -195,6 +195,7 @@ pub fn setup_mptable(mem: &GuestMemoryMmap, num_cpus: u8) -> Result<()> { let mut mpc_bus = MpcBusWrapper(mpspec::mpc_bus::default()); mpc_bus.0.type_ = mpspec::MP_BUS as u8; mpc_bus.0.busid = 0; + panic!("hi"); mpc_bus.0.bustype = BUS_TYPE_ISA; mem.write_obj(mpc_bus, base_mp) .map_err(|_| Error::WriteMpcBus)?; diff --git a/src/cpuid/Cargo.toml b/src/cpuid/Cargo.toml index 41c53aee..9f082538 100644 --- a/src/cpuid/Cargo.toml +++ b/src/cpuid/Cargo.toml @@ -8,5 +8,5 @@ edition = "2021" vmm-sys-util = ">=0.11" [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings = { version = "0.9.1", features = ["fam-wrappers"] } +kvm-ioctls = "0.18" diff --git a/src/libkrun/build.rs b/src/libkrun/build.rs index a3ccc228..175939eb 100644 --- a/src/libkrun/build.rs +++ b/src/libkrun/build.rs @@ -5,6 +5,8 @@ fn main() { println!("cargo:rustc-link-search=/opt/homebrew/lib"); #[cfg(all(not(feature = "tee"), not(feature = "efi")))] println!("cargo:rustc-link-lib=krunfw"); - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] println!("cargo:rustc-link-lib=krunfw-sev"); + #[cfg(feature = "intel-tdx")] + println!("cargo:rustc-link-lib=krunfw-tdx"); } diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 77c84d5d..4f85692c 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -251,7 +251,7 @@ extern "C" { fn krunfw_get_version() -> u32; } -#[cfg(feature = "tee")] +#[cfg(feature = "amd-sev")] #[link(name = "krunfw-sev")] extern "C" { fn krunfw_get_qboot(size: *mut size_t) -> *mut c_char; @@ -264,6 +264,20 @@ extern "C" { fn krunfw_get_version() -> u32; } +// #[cfg(all(feature = "intel-tdx", feature = "tee"))] +#[cfg(feature = "tee")] +#[link(name = "krunfw-tdx")] +extern "C" { + fn krunfw_get_qboot(size: *mut size_t) -> *mut c_char; + fn krunfw_get_initrd(size: *mut size_t) -> *mut c_char; + fn krunfw_get_kernel( + load_addr: *mut u64, + entry_addr: *mut u64, + size: *mut size_t, + ) -> *mut c_char; + fn krunfw_get_version() -> u32; +} + #[no_mangle] pub extern "C" fn krun_set_log_level(level: u32) -> i32 { let log_level = match level { diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 63323c49..72d916d5 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [features] tee = [] amd-sev = [ "blk", "tee", "codicon", "kbs-types", "procfs", "serde", "serde_json", "sev", "curl" ] +intel-tdx = [ "blk", "tee", "kbs-types", "serde", "serde_json", "curl", "tdx" ] net = [] blk = [] efi = [ "blk", "net" ] @@ -21,6 +22,7 @@ log = "0.4.0" vm-memory = { version = ">=0.13", features = ["backend-mmap"] } arch = { path = "../arch" } +arch_gen = { path = "../arch_gen" } devices = { path = "../devices" } kernel = { path = "../kernel" } utils = { path = "../utils"} @@ -36,12 +38,14 @@ sev = { version = "3.2.0", features = ["openssl"], optional = true } curl = { version = "0.4", optional = true } nix = "0.24.1" +tdx = { path = "../../../tdx", optional = true } + [target.'cfg(target_arch = "x86_64")'.dependencies] cpuid = { path = "../cpuid" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings = { version = "0.9.1", features = ["fam-wrappers"] } +kvm-ioctls = "0.18" [target.'cfg(target_os = "macos")'.dependencies] hvf = { path = "../hvf" } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index f7d49ccc..fbf868f0 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -428,7 +428,7 @@ pub fn build_microvm( #[cfg(feature = "tee")] let tee = vm_resources.tee_config().tee; - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] let sev_launcher = match tee { Tee::Sev => Some( vm.sev_secure_virt_prepare(&guest_memory) @@ -437,7 +437,7 @@ pub fn build_microvm( _ => None, }; - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] let snp_launcher = match tee { Tee::Snp => Some( vm.snp_secure_virt_prepare(&guest_memory) @@ -446,11 +446,25 @@ pub fn build_microvm( _ => None, }; + #[cfg(feature = "intel-tdx")] + let _ = match tee { + Tee::Tdx => Some( + vm.tdx_secure_virt_prepare() + .map_err(StartMicrovmError::SecureVirtPrepare)?, + ), + _ => None, + }; + #[cfg(feature = "tee")] let measured_regions = { println!("Injecting and measuring memory regions. This may take a while."); let m = vec![ + MeasuredRegion { + guest_addr: 0, + host_addr: guest_memory.get_host_address(GuestAddress(0)).unwrap() as u64, + size: 0x8000_0000, + }, MeasuredRegion { guest_addr: arch::BIOS_START, host_addr: guest_memory @@ -458,27 +472,28 @@ pub fn build_microvm( .unwrap() as u64, size: qboot_bundle.size, }, - MeasuredRegion { - guest_addr: kernel_bundle.guest_addr, - host_addr: guest_memory - .get_host_address(GuestAddress(kernel_bundle.guest_addr)) - .unwrap() as u64, - size: kernel_bundle.size, - }, - MeasuredRegion { - guest_addr: arch::x86_64::layout::INITRD_SEV_START, - host_addr: guest_memory - .get_host_address(GuestAddress(arch::x86_64::layout::INITRD_SEV_START)) - .unwrap() as u64, - size: initrd_bundle.size, - }, - MeasuredRegion { - guest_addr: arch::x86_64::layout::ZERO_PAGE_START, - host_addr: guest_memory - .get_host_address(GuestAddress(arch::x86_64::layout::ZERO_PAGE_START)) - .unwrap() as u64, - size: 4096, - }, + // MeasuredRegion { + // guest_addr: kernel_bundle.guest_addr, + // host_addr: guest_memory + // .get_host_address(GuestAddress(kernel_bundle.guest_addr)) + // .unwrap() as u64, + // size: kernel_bundle.size, + // }, + // MeasuredRegion { + // guest_addr: arch::x86_64::layout::INITRD_SEV_START, + // host_addr: guest_memory + // .get_host_address(GuestAddress(arch::x86_64::layout::INITRD_SEV_START)) + // .unwrap() as u64, + // size: initrd_bundle.size, + // }, + // MeasuredRegion { + // guest_addr: arch::x86_64::layout::ZERO_PAGE_START, + // host_addr: guest_memory + // .get_host_address(GuestAddress(arch::x86_64::layout::ZERO_PAGE_START)) + // .unwrap() as u64, + // // size: 0x19000, + // size: 4096, + // }, ]; m @@ -486,18 +501,21 @@ pub fn build_microvm( // On x86_64 always create a serial device, // while on aarch64 only create it if 'console=' is specified in the boot args. - let serial_device = if cfg!(feature = "efi") { + let serial_device = if cfg!(not(feature = "efi")) { + // let serial_device = if cfg!(feature = "efi") { Some(setup_serial_device( event_manager, None, - None, + // None, // Uncomment this to get EFI output when debugging EDK2. - // Some(Box::new(io::stdout())), + Some(Box::new(io::stdout())), )?) } else { None }; + println!("serial device: {:#?}", serial_device.is_none()); + let exit_evt = EventFd::new(utils::eventfd::EFD_NONBLOCK) .map_err(Error::EventFd) .map_err(StartMicrovmError::Internal)?; @@ -537,13 +555,18 @@ pub fn build_microvm( #[cfg(feature = "tee")] let boot_ip: GuestAddress = GuestAddress(arch::RESET_VECTOR); + println!("boot_ip: {:#?}", boot_ip); + let vcpus; // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` // while on aarch64 we need to do it the other way around. #[cfg(target_arch = "x86_64")] { - setup_interrupt_controller(&vm)?; - attach_legacy_devices(&vm, &mut pio_device_manager)?; + // #[cfg(not(feature = "intel-tdx"))] + // { + // setup_interrupt_controller(&vm)?; + attach_legacy_devices(&vm, &mut pio_device_manager)?; + // } vcpus = create_vcpus_x86_64( &vm, @@ -610,6 +633,16 @@ pub fn build_microvm( )?; } + #[cfg(feature = "intel-tdx")] + let _ = match tee { + Tee::Tdx => Some( + vm.tdx_secure_virt_prepare_memory(&measured_regions) + .map_err(StartMicrovmError::SecureVirtPrepare)?, + ), + _ => None, + }; + + let mut vmm = Vmm { guest_memory, arch_memory_info, @@ -696,21 +729,32 @@ pub fn build_microvm( #[cfg(not(feature = "tee"))] let initrd_config = None; + #[cfg(feature = "intel-tdx")] + let mut ram_entries: Vec = Vec::new(); + #[cfg(feature = "intel-tdx")] + let mut nr_ram_entries = 0; + vmm.configure_system( vcpus.as_slice(), &initrd_config, &vm_resources.smbios_oem_strings, + #[cfg(target_arch = "x86_64")] + &mut ram_entries, + #[cfg(target_arch = "x86_64")] + &mut nr_ram_entries, ) .map_err(StartMicrovmError::Internal)?; #[cfg(feature = "tee")] { match tee { + #[cfg(feature = "amd-sev")] Tee::Sev => vmm .kvm_vm() .sev_secure_virt_attest(vmm.guest_memory(), measured_regions, sev_launcher.unwrap()) .map_err(StartMicrovmError::SecureVirtAttest)?, + #[cfg(feature = "amd-sev")] Tee::Snp => { let cpuid = kvm .fd() @@ -726,6 +770,15 @@ pub fn build_microvm( ) .map_err(StartMicrovmError::SecureVirtAttest)?; } + + #[cfg(feature = "intel-tdx")] + Tee::Tdx => { + vmm.kvm_vm() + .tdx_secure_virt_finalize_vm() + .map_err(StartMicrovmError::SecureVirtPrepare)?; + // TODO(jakecorrenti): should do a no-attest here for the TDX bits so that we can + // unlock the LUKS partition + } _ => return Err(StartMicrovmError::InvalidTee), } @@ -785,11 +838,35 @@ fn load_payload( .write(kernel_data, GuestAddress(kernel_load_addr)) .unwrap(); - let qboot_data = - unsafe { std::slice::from_raw_parts(qboot_host_addr as *mut u8, qboot_size) }; - guest_mem - .write(qboot_data, GuestAddress(arch::BIOS_START)) - .unwrap(); + // #[cfg(not(feature = "intel-tdx"))] + // { + let qboot_data = + unsafe { std::slice::from_raw_parts(qboot_host_addr as *mut u8, qboot_size) }; + guest_mem + .write(qboot_data, GuestAddress(arch::BIOS_START)) + .unwrap(); + // } + + // #[cfg(feature = "intel-tdx")] + // { + // let mut tdvf_file = + // std::fs::File::open("/home/jcorrent/edk2/Build/IntelTdx/DEBUG_GCC5/FV/OVMF.fd").unwrap(); + // // std::fs::File::open("/home/jcorrent/edk2/Build/IntelTdx/RELEASE_GCC5/FV/OVMF.fd").unwrap(); + // tdvf_file.sync_all().unwrap(); + // let tdvf_file_size = tdvf_file.metadata().unwrap().len(); + // let tdvf_guest_start_address = 0x1_0000_0000 - tdvf_file_size; + // println!( + // "reading the contents of the tdvf file into the address at 0x{:x} on the guest", + // tdvf_file_size + // ); + // guest_mem + // .read_exact_volatile_from( + // GuestAddress(tdvf_guest_start_address as u64), + // &mut tdvf_file, + // tdvf_file_size as usize, + // ) + // .unwrap(); + // } let initrd_data = unsafe { std::slice::from_raw_parts(initrd_host_addr as *mut u8, initrd_size) }; @@ -1062,6 +1139,10 @@ fn create_vcpus_x86_64( exit_evt: &EventFd, ) -> super::Result> { let mut vcpus = Vec::with_capacity(vcpu_config.vcpu_count as usize); + + #[cfg(feature = "intel-tdx")] + let hob_section_addr = vm.tdx_secure_virt_get_tdvf_hob_section_address().unwrap(); + for cpu_index in 0..vcpu_config.vcpu_count { let mut vcpu = Vcpu::new_x86_64( cpu_index, @@ -1073,6 +1154,18 @@ fn create_vcpus_x86_64( ) .map_err(Error::Vcpu)?; + let mut cpuid = vm.supported_cpuid().clone(); + for entry in cpuid.as_mut_slice().iter_mut() { + if entry.index == 0x1 { + entry.ecx &= 1 << 21; + } + } + + #[cfg(feature = "intel-tdx")] + vcpu.tdx_secure_virt_init(hob_section_addr, &cpuid) + .map_err(Error::Vcpu)?; + + println!("entry addr: {:#?}", entry_addr); vcpu.configure_x86_64(guest_mem, entry_addr, vcpu_config) .map_err(Error::Vcpu)?; @@ -1558,6 +1651,7 @@ pub mod tests { let (guest_memory, _arch_memory_info, _shm_manager) = default_guest_memory(128).unwrap(); let mut vm = setup_vm(&guest_memory).unwrap(); + #[cfg(not(feature = "intel-tdx"))] setup_interrupt_controller(&mut vm).unwrap(); let vcpu_config = VcpuConfig { vcpu_count, diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 0e680a84..f45ac577 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -208,6 +208,7 @@ impl Vmm { device_type: DeviceType, device_id: &str, ) -> Option<&Mutex> { + println!("device id: {}", device_id); self.mmio_device_manager.get_device(device_type, device_id) } @@ -259,14 +260,16 @@ impl Vmm { /// Configures the system for boot. pub fn configure_system( - &self, + &mut self, vcpus: &[Vcpu], initrd: &Option, _smbios_oem_strings: &Option>, + #[cfg(target_arch = "x86_64")] ram_entries: &mut Vec, + #[cfg(target_arch = "x86_64")] nr_ram_entries: &mut u8, ) -> Result<()> { #[cfg(target_arch = "x86_64")] { - let cmdline_len = if cfg!(feature = "tee") { + let cmdline_len = if cfg!(feature = "amd-sev") { arch::x86_64::layout::CMDLINE_SEV_SIZE } else { self.kernel_cmdline.len() + 1 @@ -279,8 +282,19 @@ impl Vmm { cmdline_len, initrd, vcpus.len() as u8, + ram_entries, + nr_ram_entries, ) .map_err(Error::ConfigureSystem)?; + + // #[cfg(feature = "intel-tdx")] + // self.vm + // .tdx_secure_virt_prepare_memory( + // &mut self.guest_memory, + // ram_entries, + // &mut (*nr_ram_entries as u64), + // ) + // .unwrap(); } #[cfg(all(target_arch = "aarch64", target_os = "linux"))] diff --git a/src/vmm/src/linux/tee/inteltdx.rs b/src/vmm/src/linux/tee/inteltdx.rs new file mode 100644 index 00000000..c7b2af87 --- /dev/null +++ b/src/vmm/src/linux/tee/inteltdx.rs @@ -0,0 +1,500 @@ +use kvm_ioctls::VmFd; +use tdx::launch::{TdxCapabilities, TdxVm}; +use tdx::tdvf::{self, TdvfSection, TdvfSectionType}; +use vm_memory::{self, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap}; + +use std::fs::File; +use std::io::{self, Read, Seek, SeekFrom}; + +use arch_gen::x86::bootparam::e820entry; + +#[derive(Debug)] +pub enum Error { + CreateTdxVmStruct, + GetCapabilities, + GuestMemoryWriteTdHob(vm_memory::GuestMemoryError), + InitVm, + MissingHobTdvfSection, + OpenTdvfFirmwareFile(io::Error), + ParseTdvfSections(tdvf::Error), + InvalidRamRange, + InvalidRamType, + TooManyRamEntries, + FinalizeVm, +} + +pub struct IntelTdx { + caps: TdxCapabilities, + vm: TdxVm, + tdvf_sections: Vec, + tdvf_file: File, +} + +impl IntelTdx { + pub fn new(vm_fd: &VmFd) -> Result { + // FIXME(jakecorrenti): need to specify the max number of VCPUs here and not just assume 100. This should come from the VmResources that we set when doing krun_set_vm_config() + let vm = TdxVm::new(vm_fd, 1).or_else(|_| return Err(Error::CreateTdxVmStruct))?; + let caps = vm + .get_capabilities(vm_fd) + .or_else(|_| return Err(Error::GetCapabilities))?; + + // let mut firmware = std::fs::File::open("/home/jcorrent/edk2/Build/IntelTdx/RELEASE_GCC5/FV/OVMF.fd") + let mut firmware = std::fs::File::open("/home/jcorrent/edk2/Build/IntelTdx/DEBUG_GCC5/FV/OVMF.fd") + .map_err(Error::OpenTdvfFirmwareFile)?; + let tdvf_sections = + tdx::tdvf::parse_sections(&mut firmware).map_err(Error::ParseTdvfSections)?; + + Ok(IntelTdx { + caps, + vm, + tdvf_sections, + tdvf_file: firmware, + }) + } + + pub fn vm_prepare( + &self, + fd: &kvm_ioctls::VmFd, + cpuid: kvm_bindings::CpuId, + ) -> Result<(), Error> { + self.vm + .init_vm(fd, cpuid) + .or_else(|_| return Err(Error::InitVm))?; + + Ok(()) + } + + pub fn get_tdvf_hob_address(&self) -> Result { + for section in &self.tdvf_sections { + if let TdvfSectionType::TdHob = section.section_type { + return Ok(section.memory_address); + } + } + Err(Error::MissingHobTdvfSection) + } + + pub fn configure_td_memory(&self, fd: &kvm_ioctls::VmFd, regions: &Vec) -> Result<(), Error> { + for region in regions { + println!("adding region: {:#?}", region); + let ext = if arch::BIOS_START == region.guest_addr { + 1 + } else { + 0 + }; + // loop { + match self.vm.init_mem_region(fd, region.guest_addr, (region.size / 4096) as u64, ext, region.host_addr) { + Err(e) => if e.code == 11 { + // continue + } else { + panic!("error: {:#?}", e) + }, + // _ => break, + _ => (), + } + // } + } + + Ok(()) + } + + // pub fn configure_td_memory( + // &self, + // fd: &kvm_ioctls::VmFd, + // guest_mem: &mut GuestMemoryMmap, + // ram_entries: &mut Vec, + // nr_ram_entries: &mut u64, + // ) -> Result<(), Error> { + // let mut tdx_firmware_entries: Vec = self + // .tdvf_sections + // .iter() + // .map(|&s| TdxFirmwareEntry { + // data_offset: s.data_offset, + // data_len: s.raw_data_size, + // address: s.memory_address, + // size: s.memory_data_size, + // r#type: s.section_type, + // attributes: s.attributes, + // mem_ptr: guest_mem + // .get_host_address(vm_memory::GuestAddress(s.memory_address)) + // .unwrap() as u64, + // }) + // .collect(); + + // let mut tdx_ram_entries = tdx_init_ram_entries(&ram_entries[0..(*nr_ram_entries as usize)]); + // let mut tdx_ram_entries = vec![TdxRamEntry { + // address: 0, + // length: 0x8000_0000, + // r#type: TdxRamType::TdxRamUnaccepted, + // }]; + + // for entry in &tdx_firmware_entries { + // match entry.r#type { + // TdvfSectionType::TempMem | TdvfSectionType::TdHob => { + // let ret = tdx_accept_ram_range(&mut tdx_ram_entries, entry.address, entry.size); + // if ret < 0 { + // panic!("unable to accept ram range"); + // } + // } + // _ => (), + // } + // } + + // tdx_ram_entries.sort_by(|a, b| a.address.cmp(&b.address)); + + // for entry in &tdx_firmware_entries { + // match entry.r#type { + // TdvfSectionType::TdHob => { + // tdvf_hob_create(&entry, &tdx_ram_entries, guest_mem).unwrap() + // } + // _ => (), + // } + // } + + // for section in &tdx_firmware_entries { + // // TODO: we should be checking to see if the KVM_CAP_MEMORY_MAPPING capability is + // // enabled, but for now just assume its not + // self.vm + // .init_mem_region( + // fd, + // section.address, + // section.size / 4096, + // // FIXME: instead of checking the section type we should be checking the + // // attributes to see if the feature is set to extend the measurement + // section.attributes & 1, + // guest_mem + // .get_host_address(vm_memory::GuestAddress(section.address)) + // .unwrap() as u64, + // ) + // .unwrap(); + + // // TODO: if the entry is of type TD_HOB or TEMP_MEM then we need to unmap the memory + // // and set the mem_ptr to NULL (or 0 in this case) + // } + + // Ok(()) + // } + + pub fn finalize_vm(&self, fd: &kvm_ioctls::VmFd) -> Result<(), Error> { + self.vm + .finalize(fd) + .or_else(|_| return Err(Error::FinalizeVm)) + } +} + +#[derive(Debug, Default)] +struct TdxFirmwareEntry { + data_offset: u32, + data_len: u32, + address: u64, + size: u64, + r#type: TdvfSectionType, + attributes: u32, + mem_ptr: u64, +} + +#[derive(Copy, Clone, Debug, Default)] +enum TdxRamType { + #[default] + TdxRamUnaccepted, + TdxRamAdded, +} + +#[derive(Copy, Clone, Debug, Default)] +struct TdxRamEntry { + address: u64, + length: u64, + r#type: TdxRamType, +} + +fn tdx_init_ram_entries(entries: &[e820entry]) -> Vec { + entries + .iter() + .map(|e| TdxRamEntry { + address: e.addr, + length: e.size, + r#type: TdxRamType::TdxRamUnaccepted, + }) + .collect() +} + +fn tdx_accept_ram_range(ram_entries: &mut Vec, address: u64, length: u64) -> i32 { + let mut found_entry: Option<&mut TdxRamEntry> = None; + + for entry in ram_entries.iter_mut() { + if address + length <= entry.address || entry.address + entry.length <= address { + continue; + } + + if entry.address > address || entry.address + entry.length < address + length { + return -libc::EINVAL; + } + + if let TdxRamType::TdxRamAdded = entry.r#type { + return -libc::EINVAL; + } + + found_entry = Some(entry); + } + + let found_entry = found_entry.unwrap(); + + let tmp_address = found_entry.address; + let tmp_length = found_entry.length; + + found_entry.address = address; + found_entry.length = length; + found_entry.r#type = TdxRamType::TdxRamAdded; + + // determine the chunk of the ram range before the newly added range + let head_length = address - tmp_address; + if head_length > 0 { + let head_start = tmp_address; + ram_entries.push(TdxRamEntry { + address: head_start, + length: head_length, + r#type: TdxRamType::TdxRamUnaccepted, + }); + } + + // determine the chunk of the ram range after the newly added range + let tail_start = address + length; + // check if the start of the ram range after the newly added range begins before the old + // range's end + if tail_start < tmp_address + tmp_length { + let tail_length = tmp_address + tmp_length - tail_start; + ram_entries.push(TdxRamEntry { + address: tail_start, + length: tail_length, + r#type: TdxRamType::TdxRamUnaccepted, + }); + } + + 0 +} + +#[derive(Debug, Default)] +struct TdvfHob { + hob_addr: u64, + ptr: u64, + size: u64, + + // working area + current: u64, + end: u64, +} + +type EfiPhysicalAddress = u64; +type EfiBootMode = u32; + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct EfiHobGenericHeader { + hob_type: u16, + hob_length: u16, + reserved: u32, +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct EfiHobHandoffInfoTable { + header: EfiHobGenericHeader, + version: u32, + boot_mode: EfiBootMode, + efi_memory_top: EfiPhysicalAddress, + efi_memory_bottom: EfiPhysicalAddress, + efi_free_memory_top: EfiPhysicalAddress, + efi_free_memory_bottom: EfiPhysicalAddress, + efi_end_of_hob_list: EfiPhysicalAddress, +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct EfiHobResourceDescriptor { + header: EfiHobGenericHeader, + owner: EfiGuid, + resource_type: EfiResourceType, + resource_attribute: EfiResourceAttributeType, + physical_start: EfiPhysicalAddress, + resource_length: u64, +} + +type EfiResourceType = u32; +type EfiResourceAttributeType = u32; + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +struct EfiGuid { + data1: u32, + data2: u16, + data3: u16, + data4: [u8; 8], +} + +// SAFETY: data structure only contain a series of integers +unsafe impl ByteValued for EfiHobResourceDescriptor {} +// SAFETY: data structure only contain a series of integers +unsafe impl ByteValued for EfiHobGenericHeader {} +// SAFETY: data structure only contain a series of integers +unsafe impl ByteValued for EfiHobHandoffInfoTable {} + +const EFI_HOB_HANDOFF_TABLE_VERSION: u32 = 0x0009; + +const EFI_HOB_TYPE_HANDOFF: u16 = 0x0001; +const EFI_HOB_TYPE_RESOURCE_DESCRIPTOR: u16 = 0x0003; +const EFI_HOB_TYPE_END_OF_HOB_LIST: u16 = 0xFFFF; + +const EFI_RESOURCE_SYSTEM_MEMORY: u32 = 0x00000000; + +const EFI_RESOURCE_ATTRIBUTE_PRESENT: u32 = 0x00000001; +const EFI_RESOURCE_ATTRIBUTE_INITIALIZED: u32 = 0x00000002; +const EFI_RESOURCE_ATTRIBUTE_TESTED: u32 = 0x00000004; +const EFI_RESOURCE_MEMORY_UNACCEPTED: u32 = 0x00000007; + +const EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE: u32 = EFI_RESOURCE_ATTRIBUTE_PRESENT + | EFI_RESOURCE_ATTRIBUTE_INITIALIZED + | EFI_RESOURCE_ATTRIBUTE_TESTED; + +const EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED: u32 = EFI_RESOURCE_ATTRIBUTE_PRESENT + | EFI_RESOURCE_ATTRIBUTE_INITIALIZED + | EFI_RESOURCE_ATTRIBUTE_TESTED; + +const EFI_HOB_OWNER_ZERO: EfiGuid = EfiGuid { + data1: 0x00000000, + data2: 0x0000, + data3: 0x0000, + data4: [0x00; 8], +}; + +fn tdvf_hob_create( + td_hob: &TdxFirmwareEntry, + ram_entries: &Vec, + guest_mem: &mut GuestMemoryMmap, +) -> Result<(), Error> { + let mut hob = TdvfHob { + hob_addr: td_hob.address, + size: td_hob.size, + ptr: td_hob.mem_ptr, + + current: td_hob.address, + end: td_hob.address + td_hob.size, + }; + + println!("initial td hob: {:?}", hob); + + let hit_area = tdvf_get_area( + &mut hob, + std::mem::size_of::() as u64, + ); + + tdvf_hob_add_memory_resources(&mut hob, &ram_entries, guest_mem)?; + + let last_hob_area = tdvf_get_area(&mut hob, std::mem::size_of::() as u64); + let last_hob = EfiHobGenericHeader { + hob_type: EFI_HOB_TYPE_END_OF_HOB_LIST, + hob_length: std::mem::size_of::() as u16, + reserved: 0, + }; + + println!("last region: {:#?}", last_hob_area); + println!("{:#?}", last_hob); + println!("td hob after last: {:#?}", hob); + println!(""); + println!(""); + + guest_mem + .write_obj(last_hob, last_hob_area) + .map_err(Error::GuestMemoryWriteTdHob)?; + + let hit = EfiHobHandoffInfoTable { + header: EfiHobGenericHeader { + hob_type: EFI_HOB_TYPE_HANDOFF, + hob_length: std::mem::size_of::() as u16, + reserved: 0, + }, + version: EFI_HOB_HANDOFF_TABLE_VERSION, + boot_mode: 0, + efi_memory_top: 0, + efi_memory_bottom: 0, + efi_free_memory_top: 0, + efi_free_memory_bottom: 0, + efi_end_of_hob_list: hob.current, + }; + println!("hit region: {:#?}", hit_area); + println!("{:#?}", hit); + println!("td hob after hit: {:#?}", hob); + guest_mem + .write_obj(hit, hit_area) + .map_err(Error::GuestMemoryWriteTdHob)?; + + Ok(()) +} + +fn tdvf_get_area(hob: &mut TdvfHob, size: u64) -> GuestAddress { + if hob.current + size > hob.end { + panic!("TD_HOB overrun, size = 0x{:x}", size); + } + + let ret = GuestAddress(hob.current); + hob.current += size; + hob.current = tdvf_align(hob, 8); + ret +} + +fn align_down(n: u64, m: u64) -> u64 { + n / m * m +} + +fn align_up(n: u64, m: u64) -> u64 { + align_down(n + m - 1, m) +} + +// FIXME: can simplify this to (hob.current + 7) / 8 * 8 +fn tdvf_align(hob: &TdvfHob, align: usize) -> u64 { + align_up(hob.current, align as u64) +} + +fn tdvf_hob_add_memory_resources( + hob: &mut TdvfHob, + ram_entries: &Vec, + guest_mem: &GuestMemoryMmap, +) -> Result<(), Error> { + for entry in ram_entries { + let (resource_type, resource_attribute) = match entry.r#type { + TdxRamType::TdxRamUnaccepted => ( + EFI_RESOURCE_MEMORY_UNACCEPTED, + EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED, + ), + TdxRamType::TdxRamAdded => ( + EFI_RESOURCE_SYSTEM_MEMORY, + EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE, + ), + }; + + let region_area = + tdvf_get_area(hob, std::mem::size_of::() as u64); + let region = EfiHobResourceDescriptor { + header: EfiHobGenericHeader { + hob_type: EFI_HOB_TYPE_RESOURCE_DESCRIPTOR, + hob_length: std::mem::size_of::() as u16, + reserved: 0, + }, + owner: EFI_HOB_OWNER_ZERO, + resource_type, + resource_attribute, + physical_start: entry.address, + resource_length: entry.length, + }; + + println!("region: {:#?}", region_area); + println!("{:#?}", region); + println!("hob after adding resources: {:#?}", hob); + println!(""); + println!(""); + + guest_mem + .write_obj(region, region_area) + .map_err(Error::GuestMemoryWriteTdHob)?; + } + + Ok(()) +} diff --git a/src/vmm/src/linux/tee/mod.rs b/src/vmm/src/linux/tee/mod.rs index 56b1eb77..565a1db0 100644 --- a/src/vmm/src/linux/tee/mod.rs +++ b/src/vmm/src/linux/tee/mod.rs @@ -3,3 +3,6 @@ pub mod amdsev; #[cfg(feature = "amd-sev")] pub mod amdsnp; + +#[cfg(feature = "intel-tdx")] +pub mod inteltdx; diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index 77726880..89c2581b 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -28,6 +28,9 @@ use super::tee::amdsev::{AmdSev, Error as SevError}; #[cfg(feature = "amd-sev")] use super::tee::amdsnp::{AmdSnp, Error as SnpError}; +#[cfg(feature = "intel-tdx")] +use super::tee::inteltdx::{Error as TdxError, IntelTdx}; + #[cfg(feature = "tee")] use kbs_types::Tee; @@ -45,7 +48,10 @@ use kvm_bindings::{ Msrs, KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_MAX_CPUID_ENTRIES, KVM_PIT_SPEAKER_DUMMY, }; -use kvm_bindings::{kvm_userspace_memory_region, KVM_API_VERSION}; +use kvm_bindings::{ + kvm_create_guest_memfd, kvm_memory_attributes, kvm_userspace_memory_region, + kvm_userspace_memory_region2, KVM_API_VERSION, +}; use kvm_ioctls::*; use utils::eventfd::EventFd; use utils::signal::{register_signal_handler, sigrtmin, Killable}; @@ -69,6 +75,9 @@ pub enum Error { #[cfg(target_arch = "x86_64")] /// A call to cpuid instruction failed. CpuId(cpuid::Error), + #[cfg(feature = "intel-tdx")] + /// Cannot create guest memfd + CreateGuestMemfd(kvm_ioctls::Error), #[cfg(target_arch = "x86_64")] /// Error configuring the floating point related registers FPUConfiguration(arch::x86_64::regs::Error), @@ -110,6 +119,12 @@ pub enum Error { SetupGIC(arch::aarch64::gic::Error), /// Cannot set the memory regions. SetUserMemoryRegion(kvm_ioctls::Error), + #[cfg(feature = "intel-tdx")] + /// Cannot set the memory regions. + SetUserMemoryRegion2(kvm_ioctls::Error), + #[cfg(feature = "intel-tdx")] + /// Cannot set the memory attributes + SetMemoryAttributes(kvm_ioctls::Error), /// Error creating memory map for SHM region. ShmMmap(io::Error), #[cfg(feature = "amd-sev")] @@ -130,6 +145,15 @@ pub enum Error { #[cfg(feature = "amd-sev")] /// Error attesting the Secure VM (SNP). SnpSecVirtAttest(SnpError), + #[cfg(feature = "intel-tdx")] + /// Error initializing the Trust Domain Extensions Backend (TDX) + TdxSecVirtInit(TdxError), + #[cfg(feature = "intel-tdx")] + /// Error preparing the VM for Trust Domain Extensions (TDX) + TdxSecVirtPrepare(TdxError), + #[cfg(feature = "intel-tdx")] + /// Error initializing vCPU for Trust Domain Extensions (TDX) + TdxSecVirtInitVcpu, #[cfg(feature = "tee")] /// The TEE specified is not supported. InvalidTee, @@ -246,6 +270,8 @@ impl Display for Error { match self { #[cfg(target_arch = "x86_64")] CpuId(e) => write!(f, "Cpuid error: {e:?}"), + #[cfg(feature = "intel-tdx")] + CreateGuestMemfd(e) => write!(f, "Failed to create guest memfd: {e:?}",), GuestMemoryMmap(e) => write!(f, "Guest memory error: {e:?}"), #[cfg(target_arch = "x86_64")] GuestMSRs(e) => write!(f, "Retrieving supported guest MSRs fails: {e:?}"), @@ -272,37 +298,56 @@ impl Display for Error { ), SetUserMemoryRegion(e) => write!(f, "Cannot set the memory regions: {e}"), ShmMmap(e) => write!(f, "Error creating memory map for SHM region: {e}"), - #[cfg(feature = "tee")] + #[cfg(feature = "intel-tdx")] + SetUserMemoryRegion2(e) => write!(f, "Cannot set the memory regions: {e:?}",), + #[cfg(feature = "intel-tdx")] + SetMemoryAttributes(e) => write!(f, "Cannot set the memory attributes: {e:?}",), + #[cfg(feature = "amd-sev")] SevSecVirtInit(e) => { write!( f, "Error initializing the Secure Virtualization Backend (SEV): {e:?}" ) } - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] SevSecVirtPrepare(e) => write!( f, "Error preparing the VM for Secure Virtualization (SEV): {e:?}" ), - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] SevSecVirtAttest(e) => write!(f, "Error attesting the Secure VM (SEV): {e:?}"), - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] SnpSecVirtInit(e) => write!( f, "Error initializing the Secure Virtualization Backend (SEV): {e:?}" ), - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] SnpSecVirtPrepare(e) => write!( f, "Error preparing the VM for Secure Virtualization (SNP): {e:?}" ), - #[cfg(feature = "tee")] + #[cfg(feature = "amd-sev")] SnpSecVirtAttest(e) => write!(f, "Error attesting the Secure VM (SNP): {e:?}"), SignalVcpu(e) => write!(f, "Failed to signal Vcpu: {e}"), + #[cfg(feature = "intel-tdx")] + TdxSecVirtInit(e) => write!( + f, + "Error initializing the Trust Domain Extensions Backend (TDX): {e:?}" + ), + #[cfg(feature = "intel-tdx")] + TdxSecVirtPrepare(e) => write!( + f, + "Error preparing the VM for Trust Domain Extensions (TDX): {e:?}" + ), + #[cfg(feature = "intel-tdx")] + TdxSecVirtInitVcpu => write!( + f, + "Error initializing vCPU for Trust Domain Extensions (TDX)" + ), #[cfg(feature = "tee")] MissingTeeConfig => write!(f, "Missing TEE configuration"), #[cfg(target_arch = "x86_64")] @@ -474,7 +519,10 @@ pub struct Vm { #[cfg(feature = "amd-sev")] snp: Option, - #[cfg(feature = "amd-sev")] + #[cfg(feature = "intel-tdx")] + tdx: Option, + + #[cfg(feature = "tee")] pub tee: Tee, } @@ -537,6 +585,31 @@ impl Vm { }) } + #[cfg(feature = "intel-tdx")] + pub fn new(kvm: &Kvm, tee_config: &TeeConfig) -> Result { + // create fd for interacting with kvm-vm specific functions + let vm_fd = kvm + .create_vm_with_type(tdx::launch::KVM_X86_TDX_VM) + .map_err(Error::VmFd)?; + + let supported_cpuid = kvm + .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) + .map_err(Error::VmFd)?; + + let supported_msrs = + arch::x86_64::msr::supported_guest_msrs(kvm).map_err(Error::GuestMSRs)?; + + let tdx = IntelTdx::new(&vm_fd).map_err(Error::TdxSecVirtInit)?; + Ok(Vm { + fd: vm_fd, + next_mem_slot: 0, + supported_cpuid, + supported_msrs, + tdx: Some(tdx), + tee: tee_config.tee, + }) + } + /// Returns a ref to the supported `CpuId` for this Vm. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn supported_cpuid(&self) -> &CpuId { @@ -562,20 +635,70 @@ impl Vm { // It's safe to unwrap because the guest address is valid. let host_addr = guest_mem.get_host_address(region.start_addr()).unwrap(); debug!("Guest memory starts at {:x?}", host_addr); - let memory_region = kvm_userspace_memory_region { - slot: self.next_mem_slot, - guest_phys_addr: region.start_addr().raw_value(), - memory_size: region.len(), - userspace_addr: host_addr as u64, - flags: 0, - }; - // Safe because we mapped the memory region, we made sure that the regions - // are not overlapping. - unsafe { + + #[cfg(feature = "intel-tdx")] + { + let gmem = kvm_create_guest_memfd { + size: region.len(), + flags: 0, + reserved: [0; 6], + }; + let gmem = self + .fd + .create_guest_memfd(gmem) + .map_err(Error::CreateGuestMemfd)?; + + let memory_region = kvm_userspace_memory_region2 { + slot: self.next_mem_slot, + // KVM_MEM_GUEST_MEMFD + flags: 1 << 2, + guest_phys_addr: region.start_addr().raw_value(), + memory_size: region.len(), + userspace_addr: host_addr as u64, + guest_memfd_offset: 0, + guest_memfd: gmem as u32, + pad1: 0, + pad2: [0; 14], + }; + + // Safe because we mapped the memory region, we made sure that the regions + // are not overlapping. + unsafe { + self.fd + .set_user_memory_region2(memory_region) + .map_err(Error::SetUserMemoryRegion2)?; + } + + let attr = kvm_memory_attributes { + address: region.start_addr().raw_value(), + size: region.len() as u64, + // KVM_MEMORY_ATTRIBUTE_PRIVATE, + attributes: 1 << 3, + flags: 0, + }; + self.fd - .set_user_memory_region(memory_region) - .map_err(Error::SetUserMemoryRegion)?; - }; + .set_memory_attributes(attr) + .map_err(Error::SetMemoryAttributes)?; + } + + #[cfg(not(feature = "intel-tdx"))] + { + let memory_region = kvm_userspace_memory_region { + slot: self.next_mem_slot, + guest_phys_addr: region.start_addr().raw_value(), + memory_size: region.len(), + userspace_addr: host_addr as u64, + flags: 0, + }; + // Safe because we mapped the memory region, we made sure that the regions + // are not overlapping. + unsafe { + self.fd + .set_user_memory_region(memory_region) + .map_err(Error::SetUserMemoryRegion)?; + }; + } self.next_mem_slot += 1; } @@ -587,6 +710,50 @@ impl Vm { Ok(()) } + #[cfg(feature = "intel-tdx")] + pub fn tdx_secure_virt_prepare(&self) -> Result<()> { + match &self.tdx { + Some(t) => t + .vm_prepare(&self.fd, self.supported_cpuid.clone()) + .map_err(Error::TdxSecVirtPrepare), + None => Err(Error::InvalidTee), + } + } + + #[cfg(feature = "intel-tdx")] + pub fn tdx_secure_virt_get_tdvf_hob_section_address(&self) -> Result { + match &self.tdx { + // Some(t) => t.get_tdvf_hob_address().map_err(Error::TdxSecVirtPrepare), + Some(t) => Ok(0), + None => Err(Error::InvalidTee), + } + } + + #[cfg(feature = "intel-tdx")] + pub fn tdx_secure_virt_prepare_memory( + &self, + // guest_mem: &mut GuestMemoryMmap, + // ram_entries: &mut Vec, + // nr_ram_entries: &mut u64, + regions: &Vec, + ) -> Result<()> { + match &self.tdx { + Some(t) => t + // .configure_td_memory(&self.fd, guest_mem, ram_entries, nr_ram_entries) + .configure_td_memory(&self.fd, ®ions) + .map_err(Error::TdxSecVirtPrepare), + None => Err(Error::InvalidTee), + } + } + + #[cfg(feature = "intel-tdx")] + pub fn tdx_secure_virt_finalize_vm(&self) -> Result<()> { + match &self.tdx { + Some(t) => t.finalize_vm(&self.fd).map_err(Error::TdxSecVirtPrepare), + None => Err(Error::InvalidTee), + } + } + #[cfg(feature = "amd-sev")] pub fn sev_secure_virt_prepare( &mut self, @@ -999,13 +1166,16 @@ impl Vcpu { .set_cpuid2(&self.cpuid) .map_err(Error::VcpuSetCpuid)?; - arch::x86_64::msr::setup_msrs(&self.fd).map_err(Error::MSRSConfiguration)?; - arch::x86_64::regs::setup_regs(&self.fd, kernel_start_addr.raw_value(), self.id) - .map_err(Error::REGSConfiguration)?; - arch::x86_64::regs::setup_fpu(&self.fd).map_err(Error::FPUConfiguration)?; - arch::x86_64::regs::setup_sregs(guest_mem, &self.fd, self.id) - .map_err(Error::SREGSConfiguration)?; - arch::x86_64::interrupts::set_lint(&self.fd).map_err(Error::LocalIntConfiguration)?; + #[cfg(not(feature = "intel-tdx"))] + { + arch::x86_64::msr::setup_msrs(&self.fd).map_err(Error::MSRSConfiguration)?; + arch::x86_64::regs::setup_regs(&self.fd, kernel_start_addr.raw_value(), self.id) + .map_err(Error::REGSConfiguration)?; + arch::x86_64::regs::setup_fpu(&self.fd).map_err(Error::FPUConfiguration)?; + arch::x86_64::regs::setup_sregs(guest_mem, &self.fd, self.id) + .map_err(Error::SREGSConfiguration)?; + } + // arch::x86_64::interrupts::set_lint(&self.fd).map_err(Error::LocalIntConfiguration)?; Ok(()) } @@ -1196,48 +1366,63 @@ impl Vcpu { /// /// Returns error or enum specifying whether emulation was handled or interrupted. fn run_emulation(&mut self) -> Result { + // println!("Running emulation"); match self.fd.run() { Ok(run) => match run { #[cfg(target_arch = "x86_64")] VcpuExit::IoIn(addr, data) => { + // println!("IO IN"); self.io_bus.read(0, u64::from(addr), data); + // println!("{:?}", data); Ok(VcpuEmulation::Handled) } #[cfg(target_arch = "x86_64")] VcpuExit::IoOut(addr, data) => { + // println!("IO OUT"); self.io_bus.write(0, u64::from(addr), data); + // print!("{}", String::from_utf8_lossy(data)); Ok(VcpuEmulation::Handled) } VcpuExit::MmioRead(addr, data) => { + println!("MMIO READ"); if let Some(ref mmio_bus) = self.mmio_bus { mmio_bus.read(0, addr, data); } Ok(VcpuEmulation::Handled) } VcpuExit::MmioWrite(addr, data) => { + println!("MMIO WRITE"); if let Some(ref mmio_bus) = self.mmio_bus { mmio_bus.write(0, addr, data); } Ok(VcpuEmulation::Handled) } VcpuExit::Hlt => { + println!("HERE HLT"); info!("Received KVM_EXIT_HLT signal"); Ok(VcpuEmulation::Stopped) } VcpuExit::Shutdown => { + println!("HERE SHUTDOWN"); info!("Received KVM_EXIT_SHUTDOWN signal"); Ok(VcpuEmulation::Stopped) } // Documentation specifies that below kvm exits are considered // errors. VcpuExit::FailEntry(reason, vcpu) => { + println!("FAIL ENTRY"); error!("Received KVM_EXIT_FAIL_ENTRY signal: reason={reason}, vcpu={vcpu}"); Err(Error::VcpuUnhandledKvmExit) } VcpuExit::InternalError => { + println!("INTERNAL ERROR"); error!("Received KVM_EXIT_INTERNAL_ERROR signal"); Err(Error::VcpuUnhandledKvmExit) } + VcpuExit::Unknown => { + println!("unknown exit"); + Err(Error::VcpuUnhandledKvmExit) + } r => { // TODO: Are we sure we want to finish running a vcpu upon // receiving a vm exit that is not necessarily an error? @@ -1249,8 +1434,15 @@ impl Vcpu { // error in our code in which case it is better to panic. Err(ref e) => { match e.errno() { - libc::EAGAIN => Ok(VcpuEmulation::Handled), + libc::EAGAIN => { + println!("AGAIN!"); + Ok(VcpuEmulation::Handled) + }, libc::EINTR => { + println!("KVM_RUN exited: Err(EINTR)"); + println!( + "//setting immediate_exit = 0, passing interrupt handling to the guest" + ); self.fd.set_kvm_immediate_exit(0); // Notify that this KVM_RUN was interrupted. Ok(VcpuEmulation::Interrupted) @@ -1380,6 +1572,13 @@ impl Vcpu { StateMachine::finish() } + #[cfg(feature = "intel-tdx")] + pub fn tdx_secure_virt_init(&self, hob_addr: u64, cpuid: &CpuId) -> Result<()> { + self.fd.set_cpuid2(cpuid).unwrap(); + tdx::launch::TdxVcpu::init_raw(&self.fd, hob_addr) + .or_else(|_| return Err(Error::TdxSecVirtInitVcpu)) + } + #[cfg(test)] // In tests the main/vmm thread exits without 'exit()'ing the whole process. // All channels get closed on the other side while this Vcpu thread is still running. diff --git a/src/vmm/src/vmm_config/boot_source.rs b/src/vmm/src/vmm_config/boot_source.rs index 9c70d5b1..961a5a83 100644 --- a/src/vmm/src/vmm_config/boot_source.rs +++ b/src/vmm/src/vmm_config/boot_source.rs @@ -20,7 +20,7 @@ use std::fmt::{Display, Formatter, Result}; pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=-1 panic_print=0 nomodule console=hvc0 \ rootfstype=virtiofs rw quiet no-kvmapf"; -#[cfg(feature = "amd-sev")] +#[cfg(feature = "tee")] pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=-1 panic_print=0 nomodule console=hvc0 \ root=/dev/vda rw quiet no-kvmapf"; #[cfg(target_os = "macos")]