canonical · gabrielmougard · Jan 23, 2025 · tomponline · Feb 3, 2025 · gabrielmougard
@@ -125,9 +125,69 @@ lxc config device add c1 gpu0 gpu id="nvidia.com/gpu=0"
 lxc start c1
 [ "$(lxc exec c1 -- ls /dev/dri/ | grep -c '^card[0-9]')" = "1" ] || false
 lxc exec c1 -- nvidia-smi
+lxc delete -f c1
+
+# Check that CDI device files are cleanly remove even if the host machine is abruptly rebooted
+echo "==> Testing that CDI device files are cleanly removed after abrupt reboot"
+lxc init "${IMAGE}" v1 --vm
+if hasNeededAPIExtension devlxd_images_vm; then
+  lxc config set v1 -c security.devlxd.images=true
+fi
+
+lxc config device add v1 gpu0 gpu pci="${first_card_pci_slot}"
+lxc start v1
+echo "==> Waiting for the VM agent to be ready"
+waitInstanceBooted v1
+
+echo "==> Installing NVIDIA drivers inside the VM"
+lxc exec v1 -- apt-get update
+lxc exec v1 --env DEBIAN_FRONTEND=noninteractive -- apt-get install -y ubuntu-drivers-common
+lxc exec v1 --env DEBIAN_FRONTEND=noninteractive -- ubuntu-drivers autoinstall
+
+echo "==> Rebooting the VM to load NVIDIA drivers"
+lxc restart v1
+
+waitInstanceBooted v1
+
+echo "==> Verifying NVIDIA driver installation in the VM"
+lxc exec v1 -- nvidia-smi
+
+echo "==> Installing LXD inside the VM"
+lxc exec v1 -- snap install lxd --channel="${LXD_SNAP_CHANNEL}"
+
+echo "==> Initializing LXD inside the VM"
+lxc exec v1 -- lxd init --auto
+
+echo "==> Launching a container inside the VM"
+lxc exec v1 -- lxc init "${IMAGE}" c1
+
+echo "==> Adding GPU to the container inside the VM using CDI"
+lxc exec v1 -- lxc config device add c1 gpu0 gpu id="nvidia.com/gpu=0"
+lxc exec v1 -- lxc start c1
+# Wait for the container to be ready
+sleep 20
+
+echo "==> Verifying GPU access inside the container"
+lxc exec v1 -- lxc exec c1 -- nvidia-smi
+
+echo "==> Simulating abrupt reboot by force-stopping the VM"
+lxc stop v1 -f
+
+echo "==> Starting the VM again"
+lxc start v1
+
+waitInstanceBooted v1
+
+echo "==> Starting the container inside the VM after reboot"
+lxc exec v1 -- lxc start c1
+
+echo "==> Verifying GPU access inside the container after VM reboot"
+lxc exec v1 -- lxc exec c1 -- nvidia-smi
+
+echo "==> Cleaning up the VM"
+lxc delete v1 -f
 
 echo "==> Cleaning up"
-lxc delete -f c1
 lxc profile device remove default root
 lxc profile device remove default eth0
 lxc storage delete default