From 844321eedc5921a0d3742776b70d84859c99c229 Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Sat, 23 Nov 2024 22:50:52 -0800 Subject: [PATCH] wip: msi cap Signed-off-by: Changyuan Lyu --- alioth/src/pci/cap.rs | 15 ++- alioth/src/vfio/device.rs | 17 ++- alioth/src/vfio/pci.rs | 249 +++++++++++++++++++++++++++++++------- 3 files changed, 228 insertions(+), 53 deletions(-) diff --git a/alioth/src/pci/cap.rs b/alioth/src/pci/cap.rs index 39c65cb..475cb3c 100644 --- a/alioth/src/pci/cap.rs +++ b/alioth/src/pci/cap.rs @@ -86,24 +86,26 @@ bitfield! { impl Debug; pub enable, set_enable: 0; pub multi_msg_cap, set_multi_msg_cap: 3, 1; - pub multi_msg_enable, set_multi_msg_enable: 6, 4; - pub addr_64, set_addr_64: 7; - pub per_vector_masking, set_per_vector_masking: 8; + pub multi_msg, set_multi_msg: 6, 4; + pub addr_64_cap, set_addr_64_cap: 7; + pub per_vector_masking_cap, set_per_vector_masking_cap: 8; pub ext_msg_data_cap, set_ext_msg_data_cap: 9; - pub ext_msg_data_enable, set_ext_msg_data_enable: 10; + pub ext_msg_data, set_ext_msg_data: 10; } impl MsiMsgCtrl { pub fn cap_size(&self) -> u8 { let mut size = 12; - if self.addr_64() { + if self.addr_64_cap() { size += 4; } - if self.per_vector_masking() { + if self.per_vector_masking_cap() { size += 8; } size } + + pub const WRITABLE: u16 = 1 | 0xb111 << 4 | 1 << 10; } #[derive(Debug, Default, Clone, FromBytes, Immutable, IntoBytes, Layout)] @@ -112,6 +114,7 @@ pub struct MsiCapHdr { pub header: PciCapHdr, pub control: MsiMsgCtrl, } +impl_mmio_for_zerocopy!(MsiCapHdr); bitfield! { #[derive(Copy, Clone, Default, FromBytes, Immutable, IntoBytes, KnownLayout)] diff --git a/alioth/src/vfio/device.rs b/alioth/src/vfio/device.rs index baf7557..50b155f 100644 --- a/alioth/src/vfio/device.rs +++ b/alioth/src/vfio/device.rs @@ -19,7 +19,10 @@ use std::os::fd::AsRawFd; use std::os::unix::fs::FileExt; use crate::mem; -use crate::vfio::bindings::{VfioDeviceInfo, VfioIrqInfo, VfioIrqSet, VfioRegionInfo}; +use crate::vfio::bindings::{ + VfioDeviceInfo, VfioIrqInfo, VfioIrqSet, VfioIrqSetData, VfioIrqSetFlag, VfioPciIrq, + VfioRegionInfo, +}; use crate::vfio::ioctls::{ vfio_device_get_info, vfio_device_get_irq_info, vfio_device_get_region_info, vfio_device_reset, vfio_device_set_irqs, @@ -63,6 +66,18 @@ pub trait Device: Debug + Send + Sync + 'static { Ok(()) } + fn disable_all_irqs(&self, index: VfioPciIrq) -> Result<()> { + let vfio_irq_disable_all = VfioIrqSet { + argsz: size_of::>() as u32, + flags: VfioIrqSetFlag::DATA_NONE | VfioIrqSetFlag::ACTION_TRIGGER, + index: index.raw(), + start: 0, + count: 0, + data: VfioIrqSetData { eventfds: [] }, + }; + self.set_irqs(&vfio_irq_disable_all) + } + fn reset(&self) -> Result<()> { unsafe { vfio_device_reset(self.fd()) }?; Ok(()) diff --git a/alioth/src/vfio/pci.rs b/alioth/src/vfio/pci.rs index 678b739..13e823b 100644 --- a/alioth/src/vfio/pci.rs +++ b/alioth/src/vfio/pci.rs @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::min; use std::fs::File; +use std::iter::zip; use std::mem::size_of; use std::ops::Range; use std::os::fd::{AsFd, AsRawFd}; @@ -21,9 +23,10 @@ use std::sync::atomic::AtomicU64; use std::sync::Arc; use libc::{PROT_READ, PROT_WRITE}; +use macros::Layout; use parking_lot::{Mutex, RwLock}; use snafu::ResultExt; -use zerocopy::{transmute, FromBytes}; +use zerocopy::{transmute, FromBytes, Immutable, IntoBytes}; use crate::errors::boxed_debug_trace; use crate::hv::{IrqFd, MsiSender}; @@ -31,8 +34,8 @@ use crate::mem::emulated::{Action, Mmio, MmioBus}; use crate::mem::mapped::ArcMemPages; use crate::mem::{IoRegion, MemRange, MemRegion, MemRegionEntry, MemRegionType}; use crate::pci::cap::{ - MsiCapHdr, MsixCap, MsixCapMmio, MsixTableEntry, MsixTableMmio, MsixTableMmioEntry, NullCap, - PciCapHdr, PciCapId, + MsiCapHdr, MsiMsgCtrl, MsixCap, MsixCapMmio, MsixTableEntry, MsixTableMmio, MsixTableMmioEntry, + NullCap, PciCapHdr, PciCapId, }; use crate::pci::config::{ Command, CommonHeader, ConfigHeader, DeviceHeader, EmulatedHeader, HeaderData, HeaderType, @@ -45,7 +48,7 @@ use crate::vfio::bindings::{ }; use crate::vfio::device::Device; use crate::vfio::{error, Result}; -use crate::{align_down, align_up, mem}; +use crate::{align_down, align_up, impl_mmio_for_zerocopy, mem}; fn round_up_range(range: Range) -> Range { (align_down!(range.start, 12))..(align_up!(range.end, 12)) @@ -348,10 +351,13 @@ where { pub fn new(name: Arc, dev: D, msi_sender: M) -> Result> { let cdev = Arc::new(VfioDev { dev, name }); + cdev.dev.reset()?; + + let msi_sender = Arc::new(msi_sender); let region_config = cdev.dev.get_region_info(VfioPciRegion::CONFIG.raw())?; - let pci_command = Command::MEM | Command::BUS_MASTER | Command::INTX_DISABLE; + let pci_command = Command::IO | Command::MEM | Command::BUS_MASTER | Command::INTX_DISABLE; cdev.dev.write( region_config.offset + CommonHeader::OFFSET_COMMAND as u64, CommonHeader::SIZE_COMMAND as u8, @@ -371,8 +377,9 @@ where dev_header.intx_pin = 0; dev_header.common.command = Command::empty(); - let mut msix_cap = None; let mut masked_caps: Vec<(u64, Box)> = vec![]; + let mut msix_info = None; + let mut msi_info = None; if dev_header.common.status.contains(Status::CAP) { let mut cap_offset = dev_header.capability_pointer as usize; @@ -392,34 +399,73 @@ where }; c.control.set_enabled(false); c.control.set_masked(false); - masked_caps.push(( - cap_offset as u64, - Box::new(MsixCapMmio { - cap: RwLock::new(c.clone()), - }), - )); - msix_cap = Some(c); + msix_info = Some((cap_offset, c.clone())); } else if cap_header.id == PciCapId::Msi as u8 { - let Ok((c, _)) = MsiCapHdr::read_from_prefix(cap_buf) else { + let Ok((mut c, _)) = MsiCapHdr::read_from_prefix(cap_buf) else { log::error!( "{}: MSI capability is at an invalid offset: {cap_offset:#x}", cdev.name ); continue; }; - log::trace!("{}: hiding MSI cap at {cap_offset:#x}", cdev.name); - masked_caps.push(( - cap_offset as u64, - Box::new(NullCap { - next: cap_header.next, - size: c.control.cap_size(), - }), - )); + log::info!("{}: MSI cap header: {c:#x?}", cdev.name); + c.control.set_enable(false); + c.control.set_ext_msg_data_cap(true); + let multi_msg_cap = min(5, c.control.multi_msg_cap()); + c.control.set_multi_msg_cap(multi_msg_cap); + msi_info = Some((cap_offset, c)); } cap_offset = cap_header.next as usize; } } + let mut msix_cap = None; + if let Some((offset, cap)) = msix_info { + msix_cap = Some(cap.clone()); + let msix_cap_mmio = MsixCapMmio { + cap: RwLock::new(cap), + }; + masked_caps.push((offset as u64, Box::new(msix_cap_mmio))); + if let Some((offset, hdr)) = msi_info { + let null_cap = NullCap { + size: hdr.control.cap_size(), + next: hdr.header.next, + }; + masked_caps.push((offset as u64, Box::new(null_cap))); + } + } else if let Some((offset, hdr)) = msi_info { + let count = 1 << hdr.control.multi_msg_cap(); + let irqfds = (0..count) + .map(|_| msi_sender.create_irqfd()) + .collect::, _>>()?; + + let mut eventfds = [-1; 32]; + for (fd, irqfd) in zip(&mut eventfds, &irqfds) { + *fd = irqfd.as_fd().as_raw_fd(); + } + let set_eventfd = VfioIrqSet { + argsz: (size_of::>() + size_of::() * count) as u32, + flags: VfioIrqSetFlag::DATA_EVENTFD | VfioIrqSetFlag::ACTION_TRIGGER, + index: VfioPciIrq::MSI.raw(), + start: 0, + count: count as u32, + data: VfioIrqSetData { eventfds }, + }; + log::info!( + "{}: update msi eventfds to {:?}", + cdev.name, + &eventfds[0..count] + ); + cdev.dev.set_irqs(&set_eventfd)?; + + let msi_cap_mmio = MsiCapMmio:: { + cap: RwLock::new((hdr, MsiCapBody { data: [0; 4] })), + dev: cdev.clone(), + irqfds, + }; + masked_caps.push((offset as u64, Box::new(msi_cap_mmio))); + } + let mut extra_areas: MmioBus> = MmioBus::new(); masked_caps.sort_by_key(|(offset, _)| *offset); let mut area_end = 0x40; @@ -450,7 +496,6 @@ where let config_header = ConfigHeader::Device(dev_header); - cdev.dev.reset()?; let msix_info = cdev.dev.get_irq_info(VfioPciIrq::MSIX.raw())?; let msix_entries = RwLock::new( (0..msix_info.count) @@ -461,7 +506,6 @@ where let msix_table = Arc::new(MsixTableMmio { entries: msix_entries, }); - let msi_sender = Arc::new(msi_sender); let mut bars = [const { PciBar::Empty }; 6]; let mut bar_masks = [0u32; 6]; @@ -530,15 +574,15 @@ where } fn reset(&self) -> Result<()> { - let disable_msix = VfioIrqSet { - argsz: size_of::>() as u32, - flags: VfioIrqSetFlag::DATA_NONE | VfioIrqSetFlag::ACTION_TRIGGER, - index: VfioPciIrq::MSIX.raw(), - start: 0, - count: 0, - data: VfioIrqSetData { eventfds: [] }, - }; - self.config.dev.dev.set_irqs(&disable_msix)?; + // let disable_msix = VfioIrqSet { + // argsz: size_of::>() as u32, + // flags: VfioIrqSetFlag::DATA_NONE | VfioIrqSetFlag::ACTION_TRIGGER, + // index: VfioPciIrq::MSIX.raw(), + // start: 0, + // count: 0, + // data: VfioIrqSetData { eventfds: [] }, + // }; + // self.config.dev.dev.set_irqs(&disable_msix)?; self.msix_table.reset(); self.config.dev.dev.reset() @@ -567,18 +611,6 @@ where M: MsiSender, D: Device, { - fn disable_all_irqs(&self) -> Result<()> { - let vfio_irq_disable_all = VfioIrqSet { - argsz: size_of::>() as u32, - flags: VfioIrqSetFlag::DATA_NONE | VfioIrqSetFlag::ACTION_TRIGGER, - index: VfioPciIrq::MSIX.raw(), - start: 0, - count: 0, - data: VfioIrqSetData { eventfds: [] }, - }; - self.cdev.dev.set_irqs(&vfio_irq_disable_all) - } - fn enable_irqfd(&self, index: usize) -> Result<()> { let mut entries = self.table.entries.write(); let Some(entry) = entries.get_mut(index) else { @@ -611,7 +643,7 @@ where // subindex for the first time. // As long as the following set_irqs() succeeds, we can safely ignore // the error here. - let _ = self.disable_all_irqs(); + let _ = self.cdev.dev.disable_all_irqs(VfioPciIrq::MSIX); let mut eventfds = [-1; 2048]; let mut count = 0; @@ -681,3 +713,128 @@ where Ok(Action::None) } } + +#[derive(Debug, Default, Clone, FromBytes, Immutable, IntoBytes, Layout)] +#[repr(C)] +struct MsiCapBody { + data: [u32; 4], +} +impl_mmio_for_zerocopy!(MsiCapBody); + +#[derive(Debug)] +struct MsiCapMmio +where + M: MsiSender, +{ + cap: RwLock<(MsiCapHdr, MsiCapBody)>, + dev: Arc>, + irqfds: Box<[M::IrqFd]>, +} + +impl MsiCapMmio +where + M: MsiSender, + D: Device, +{ + fn update_msi(&self, ctrl: MsiMsgCtrl, data: &[u32; 4]) -> Result<()> { + let msg_mask = if ctrl.ext_msg_data() { + u32::MAX + } else { + 0xffff + }; + let (addr, msg) = if ctrl.addr_64_cap() { + ((data[1] as u64) << 32 | data[0] as u64, data[2] & msg_mask) + } else { + (data[0] as u64, data[1] & msg_mask) + }; + let mask = match (ctrl.addr_64_cap(), ctrl.per_vector_masking_cap()) { + (true, true) => data[3], + (false, true) => data[2], + (_, false) => 0, + }; + let count = 1 << ctrl.multi_msg(); + for (index, irqfd) in self.irqfds.iter().enumerate() { + irqfd.set_masked(true)?; + if !ctrl.enable() || index >= count || mask & (1 << index) > 0 { + continue; + } + let msg = msg | index as u32; + irqfd.set_addr_hi((addr >> 32) as u32)?; + irqfd.set_addr_lo(addr as u32)?; + irqfd.set_data(msg)?; + irqfd.set_masked(false)?; + } + Ok(()) + } +} + +impl Mmio for MsiCapMmio +where + D: Device, + M: MsiSender, +{ + fn size(&self) -> u64 { + let (hdr, _) = &*self.cap.read(); + hdr.control.cap_size() as u64 + } + fn read(&self, offset: u64, size: u8) -> mem::Result { + let (hdr, body) = &*self.cap.read(); + let ctrl = hdr.control; + match offset { + 0..4 => hdr.read(offset, size), + 0x10 if ctrl.per_vector_masking_cap() && !ctrl.addr_64_cap() => Ok(0), + 0x14 if ctrl.per_vector_masking_cap() && ctrl.addr_64_cap() => Ok(0), + _ => body.read(offset - size_of_val(hdr) as u64, size), + } + } + fn write(&self, offset: u64, size: u8, val: u64) -> mem::Result { + log::info!( + "{}: write 0x{val:0width$x} to offset 0x{offset:x}.", + self.dev.name, + width = 2 * size as usize + ); + let (hdr, body) = &mut *self.cap.write(); + let mut need_update = false; + match (offset as usize, size) { + (0x2, 2) => { + let ctrl = &mut hdr.control; + let new_ctrl = MsiMsgCtrl(val as u16); + if !ctrl.enable() || !new_ctrl.enable() { + let multi_msg = min(ctrl.multi_msg_cap(), new_ctrl.multi_msg()); + ctrl.set_multi_msg(multi_msg); + } + need_update = ctrl.enable() != new_ctrl.enable() + || (new_ctrl.enable() && ctrl.ext_msg_data() != new_ctrl.ext_msg_data()); + ctrl.set_ext_msg_data(new_ctrl.ext_msg_data()); + ctrl.set_enable(new_ctrl.enable()); + } + (0x4 | 0x8 | 0xc | 0x10, 2 | 4) => { + let data_offset = (offset as usize - size_of_val(hdr)) >> 2; + let reg = &mut body.data[data_offset]; + need_update = hdr.control.enable() && *reg != val as u32; + *reg = val as u32; + } + _ => log::error!( + "{}: write 0x{val:0width$x} to invalid offset 0x{offset:x}.", + self.dev.name, + width = 2 * size as usize + ), + } + if need_update { + self.update_msi(hdr.control, &body.data) + .map_err(boxed_debug_trace) + .context(mem::error::Mmio)?; + } + Ok(Action::None) + } +} + +impl PciConfigArea for MsiCapMmio +where + D: Device, + M: MsiSender, +{ + fn reset(&self) { + // TODO + } +}