Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NVIDIA gpu support #165

Merged
merged 3 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,26 @@ Examples
See also: `.github/workflows/run.yml` as a practical example on how to use
virtme-ng inside docker.

- Run virtme-ng with gpu passthrough:
```
# Confirm host kernel has VFIO and IOMMU support
# Check if NVIDIA module is installed on the host
$ modinfo nvidia
# If the nvidia module is installed, blacklist the nvidia modules
$ sudo bash -c 'echo -e "blacklist nvidia\nblacklist nvidia-drm\nblacklist nvidia-modeset\nblacklist nvidia-peermem\nblacklist nvidia-uvm" > /etc/modprobe.d/blacklist-nvidia.conf'
# Host will need to be rebooted for blacklist to take effect.
# Get GPU device ID
$ lspci -nn | grep NVIDIA
0000:01:00.0 VGA compatible controller [0300]: NVIDIA Corporation AD104GLM [RTX 3500 Ada Generation Laptop GPU] [10de:27bb] (rev a1)
0000:01:00.1 Audio device [0403]: NVIDIA Corporation Device [10de:22bc] (rev a1))
# Configure VFIO for device passthrough
$ sudo bash -c 'options vfio-pci ids=10de:27bb,10de:22bc' > /etc/modprobe.d/vfio.conf
# Load VFIO module
$ sudo modprobe vfio-pci
# Pass PCI address to virtme-ng
$ sudo vng --nvgpu "01:00.0" -r linux
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need sudo permission here to run vng? That might be the case because we need to access pci directly I guess... do you get an permission error if you try to run vng as regular user?

```

Implementation details
======================

Expand Down
48 changes: 28 additions & 20 deletions virtme/architectures.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ def virtiofs_support() -> bool:
return False

@staticmethod
def qemuargs(is_native, use_kvm) -> List[str]:
def qemuargs(is_native, use_kvm, use_gpu) -> List[str]:
_ = is_native
_ = use_kvm
_ = use_gpu
return []

@staticmethod
Expand All @@ -50,6 +51,10 @@ def serial_console_args() -> List[str]:
def qemu_nodisplay_args() -> List[str]:
return ["-vga", "none", "-display", "none"]

@staticmethod
def qemu_nodisplay_nvgpu_args() -> List[str]:
return ["-display", "none"]

@staticmethod
def qemu_display_args() -> List[str]:
return ["-device", "virtio-gpu-pci"]
Expand Down Expand Up @@ -81,8 +86,8 @@ def dtb_path() -> Optional[str]:

class Arch_unknown(Arch):
@staticmethod
def qemuargs(is_native, use_kvm):
return Arch.qemuargs(is_native, use_kvm)
def qemuargs(is_native, use_kvm, use_gpu):
return Arch.qemuargs(is_native, use_kvm, use_gpu)


class Arch_x86(Arch):
Expand All @@ -97,16 +102,19 @@ def virtiofs_support() -> bool:
return True

@staticmethod
def qemuargs(is_native, use_kvm):
ret = Arch.qemuargs(is_native, use_kvm)
def qemuargs(is_native, use_kvm, use_gpu):
ret = Arch.qemuargs(is_native, use_kvm, use_gpu)

# Add a watchdog. This is useful for testing.
ret.extend(["-device", "i6300esb,id=watchdog0"])

if is_native and use_kvm:
# If we're likely to use KVM, request a full-featured CPU.
# (NB: if KVM fails, this will cause problems. We should probe.)
ret.extend(["-cpu", "host"]) # We can't migrate regardless.
cpu_str = "host"
if use_gpu:
cpu_str += ",host-phys-bits-limit=0x28"
ret.extend(["-cpu", cpu_str])
else:
ret.extend(["-machine", "q35"])

Expand Down Expand Up @@ -182,8 +190,8 @@ def qemu_display_args() -> List[str]:
]

@staticmethod
def qemuargs(is_native, use_kvm):
ret = Arch.qemuargs(is_native, use_kvm)
def qemuargs(is_native, use_kvm, use_gpu):
ret = Arch.qemuargs(is_native, use_kvm, use_gpu)

# Use microvm architecture for faster boot
ret.extend(["-M", "microvm,accel=kvm,pcie=on,rtc=on"])
Expand All @@ -203,8 +211,8 @@ def __init__(self):
self.defconfig_target = "vexpress_defconfig"

@staticmethod
def qemuargs(is_native, use_kvm):
ret = Arch.qemuargs(is_native, use_kvm)
def qemuargs(is_native, use_kvm, use_gpu):
ret = Arch.qemuargs(is_native, use_kvm, use_gpu)

# Emulate a vexpress-a15.
ret.extend(["-M", "vexpress-a15"])
Expand Down Expand Up @@ -257,8 +265,8 @@ def virtiofs_support() -> bool:
return True

@staticmethod
def qemuargs(is_native, use_kvm):
ret = Arch.qemuargs(is_native, use_kvm)
def qemuargs(is_native, use_kvm, use_gpu):
ret = Arch.qemuargs(is_native, use_kvm, use_gpu)

if is_native:
ret.extend(["-M", "virt,gic-version=host"])
Expand Down Expand Up @@ -299,8 +307,8 @@ def __init__(self, name):
self.gccname = "powerpc64le"

@staticmethod
def qemuargs(is_native, use_kvm):
ret = Arch.qemuargs(is_native, use_kvm)
def qemuargs(is_native, use_kvm, use_gpu):
ret = Arch.qemuargs(is_native, use_kvm, use_gpu)
ret.extend(["-M", "pseries"])

return ret
Expand Down Expand Up @@ -337,8 +345,8 @@ def virtiofs_support() -> bool:
return True

@staticmethod
def qemuargs(is_native, use_kvm):
ret = Arch.qemuargs(is_native, use_kvm)
def qemuargs(is_native, use_kvm, use_gpu):
ret = Arch.qemuargs(is_native, use_kvm, use_gpu)
ret.extend(["-machine", "virt"])
ret.extend(["-bios", "default"])

Expand All @@ -362,8 +370,8 @@ def __init__(self):
self.gccname = "sparc64"

@staticmethod
def qemuargs(is_native, use_kvm):
return Arch.qemuargs(is_native, use_kvm)
def qemuargs(is_native, use_kvm, use_gpu):
return Arch.qemuargs(is_native, use_kvm, use_gpu)

def kimg_path(self):
return "arch/sparc/boot/image"
Expand All @@ -387,8 +395,8 @@ def virtio_dev_type(virtiotype):
return "virtio-%s-ccw" % virtiotype

@staticmethod
def qemuargs(is_native, use_kvm):
ret = Arch.qemuargs(is_native, use_kvm)
def qemuargs(is_native, use_kvm, use_gpu):
ret = Arch.qemuargs(is_native, use_kvm, use_gpu)

# Ask for the latest version of s390-ccw
ret.extend(["-M", "s390-ccw-virtio"])
Expand Down
20 changes: 16 additions & 4 deletions virtme/commands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,9 @@ def make_parser() -> argparse.ArgumentParser:
help="Supply a directory that is r/w to the guest but read-only in the host. Use --overlay-rwdir=path.",
)

g.add_argument(
"--nvgpu", action="store", default=None, help="Set guest NVIDIA GPU."
)
return parser


Expand Down Expand Up @@ -1051,7 +1054,7 @@ def do_it() -> int:
qemuargs.extend(["-machine", "accel=kvm:tcg"])

# Add architecture-specific options
qemuargs.extend(arch.qemuargs(is_native, kvm_ok))
qemuargs.extend(arch.qemuargs(is_native, kvm_ok, args.nvgpu is not None))

# Set up / override baseline devices
qemuargs.extend(["-parallel", "none"])
Expand Down Expand Up @@ -1087,7 +1090,10 @@ def do_it() -> int:

kernelargs.extend(["virtme_console=" + arg for arg in arch.serial_console_args()])

qemuargs.extend(arch.qemu_nodisplay_args())
if args.nvgpu is None:
qemuargs.extend(arch.qemu_nodisplay_args())
else:
qemuargs.extend(arch.qemu_nodisplay_nvgpu_args())

# PS/2 probing is slow; give the kernel a hint to speed it up.
kernelargs.extend(["psmouse.proto=exps"])
Expand Down Expand Up @@ -1172,7 +1178,10 @@ def fetch_script_retcode():
def do_script(shellcmd: str, ret_path=None, show_boot_console=False) -> None:
if args.graphics is None:
# Turn off default I/O
qemuargs.extend(arch.qemu_nodisplay_args())
if args.nvgpu is None:
qemuargs.extend(arch.qemu_nodisplay_args())
else:
qemuargs.extend(arch.qemu_nodisplay_nvgpu_args())

# Check if we can redirect stdin/stdout/stderr.
if not can_access_file("/proc/self/fd/0") or \
Expand Down Expand Up @@ -1285,7 +1294,7 @@ def do_script(shellcmd: str, ret_path=None, show_boot_console=False) -> None:
show_boot_console=args.show_boot_console,
)

if args.graphics is not None:
if args.graphics is not None and args.nvgpu is None:
video_args = arch.qemu_display_args()
if video_args:
qemuargs.extend(video_args)
Expand Down Expand Up @@ -1339,6 +1348,9 @@ def do_script(shellcmd: str, ret_path=None, show_boot_console=False) -> None:
if args.user:
kernelargs.append("virtme_user=%s" % args.user)

if args.nvgpu:
qemuargs.extend(["-device", args.nvgpu])

# If we are running as root on the host pass this information to the guest
# (this can be useful to properly support running virtme-ng instances
# inside docker)
Expand Down
15 changes: 15 additions & 0 deletions virtme_ng/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,13 @@ def make_parser():
nargs="*",
help="Additional Makefile variables",
)

parser.add_argument(
"--nvgpu",
action="store",
metavar="[GPU PCI Address]",
help="Add a passthrough NVIDIA GPU",
)
return parser


Expand Down Expand Up @@ -1041,6 +1048,12 @@ def _get_virtme_qemu_opts(self, args):
else:
self.virtme_param["qemu_opts"] = ""

def _get_virtme_nvgpu(self, args):
if args.nvgpu is not None:
self.virtme_param["nvgpu"] = f"--nvgpu 'vfio-pci,host={args.nvgpu}'"
else:
self.virtme_param["nvgpu"] = ""

def run(self, args):
"""Execute a kernel inside virtme-ng."""
self._get_virtme_name(args)
Expand Down Expand Up @@ -1076,6 +1089,7 @@ def run(self, args):
self._get_virtme_busybox(args)
self._get_virtme_qemu(args)
self._get_virtme_qemu_opts(args)
self._get_virtme_nvgpu(args)

# Start VM using virtme-run
cmd = (
Expand Down Expand Up @@ -1113,6 +1127,7 @@ def run(self, args):
+ f'{self.virtme_param["busybox"]} '
+ f'{self.virtme_param["qemu"]} '
+ f'{self.virtme_param["qemu_opts"]} '
+ f'{self.virtme_param["nvgpu"]} '
)
check_call(cmd, shell=True)

Expand Down
Loading