Skip to content

Instantly share code, notes, and snippets.

@vadimstasiev
Last active April 13, 2024 17:51
Show Gist options
  • Save vadimstasiev/d874464a8ecbc4008d330721a0dca385 to your computer and use it in GitHub Desktop.
Save vadimstasiev/d874464a8ecbc4008d330721a0dca385 to your computer and use it in GitHub Desktop.
Proxmox Single GPU Passthrough VFIO binding and unbinding
agent: 1
balloon: 0
bios: ovmf
boot: order=hostpci0;virtio1
cores: 24
cpu: host,hidden=1,flags=+pdpe1gb;+hv-tlbflush;+aes
efidisk0: local-zfs:vm-100-disk-0,efitype=4m,size=1M
hookscript: local:snippets/hookscript.sh
hostpci0: 0000:06:00,pcie=1,rombar=0,romfile=RTX3090-OG-Founders.bin
hostpci1: 0000:08:00.4
hostpci2: 0000:08:00.3
ide0: local:iso/virtio-win-0.1.240.iso,media=cdrom,size=612812K
ide2: local:iso/Win11_23H2_EnglishInternational_x64v2.iso,media=cdrom,size=6638374K
machine: pc-q35-8.1
memory: 45000
meta: creation-qemu=8.1.5,ctime=1709928892
net0: e1000=BC:24:11:AE:F3:9B,bridge=vmbr0
numa: 0
ostype: win11
scsihw: virtio-scsi-single
smbios1: uuid=9658f376-b663-485d-9bd2-7fb1b3c10976
sockets: 1
tpmstate0: local-zfs:vm-100-disk-2,size=4M,version=v2.0
vga: none
virtio0: /dev/sdb,aio=threads,backup=0,size=976762584K
virtio1: local-zfs:vm-100-disk-3,iothread=1,size=300G
vmgenid: 33c5c951-af72-454d-9b9e-9ae7bccac5fb
#!/bin/bash
# Proxmox VM start hook script for VM ID 100 with GPU, GPU audio, and USB controller passthrough
VMID="100"
CPUS="0-23"
GPU_PCI="0000:06:00.0"
GPU_AUDIO_PCI="0000:06:00.1"
USB_CONTROLLER_PCI="0000:08:00.3"
AUDIO_DAC="0000:08:00.4"
VFIO_DRIVER="vfio-pci"
LOCK_FILE="/tmp/binding_vfio.lock"
force_disconnect_device() {
local pci_id=$1
echo 1 > /sys/bus/pci/devices/$pci_id/remove
echo 1 > /sys/bus/pci/rescan
}
if [ "$1" == "$VMID" ]; then
if [ "$2" == "pre-start" ]; then
touch "$LOCK_FILE"
modprobe vfio-pci
systemctl stop gdm3
sleep 2
echo $GPU_PCI > /sys/bus/pci/devices/$GPU_PCI/driver/unbind
sleep 2
# echo 14 > /sys/bus/pci/devices/$GPU_PCI/resource1_resize
echo $VFIO_DRIVER > /sys/bus/pci/devices/$GPU_PCI/driver_override
echo $GPU_PCI > /sys/bus/pci/drivers/vfio-pci/bind
echo $GPU_AUDIO_PCI > /sys/bus/pci/devices/$GPU_AUDIO_PCI/driver/unbind
echo $VFIO_DRIVER > /sys/bus/pci/devices/$GPU_AUDIO_PCI/driver_override
echo $GPU_AUDIO_PCI > /sys/bus/pci/drivers/vfio-pci/bind
force_disconnect_device $USB_CONTROLLER_PCI
echo $USB_CONTROLLER_PCI > /sys/bus/pci/devices/$USB_CONTROLLER_PCI/driver/unbind
echo $VFIO_DRIVER > /sys/bus/pci/devices/$USB_CONTROLLER_PCI/driver_override
echo $USB_CONTROLLER_PCI > /sys/bus/pci/drivers/vfio-pci/bind
echo $AUDIO_DAC > /sys/bus/pci/devices/$AUDIO_DAC/driver/unbind
echo $VFIO_DRIVER > /sys/bus/pci/devices/$AUDIO_DAC/driver_override
echo $AUDIO_DAC > /sys/bus/pci/drivers/vfio-pci/bind
rm "$LOCK_FILE"
elif [ "$2" == "post-start" ]; then
# Ensure PID file is available for reading
while [ ! -f /run/qemu-server/$1.pid ]; do sleep 1; done
PID=$(cat /run/qemu-server/$1.pid)
taskset -cp $CPUS $PID
fi
fi
#!/bin/bash
# Enable/disable debug mode
DEBUG_MODE=true
# VM ID to monitor
VMID=100
# Path to your rebind script
REBIND_SCRIPT="/home/srv01/Scripts/rebind-script.sh"
# Define the LOCK_FILE
LOCK_FILE="/tmp/binding_vfio.lock"
# Last known state of the VM
last_state="unknown"
# Function to log messages when debug mode is enabled
debug_log() {
if [ "$DEBUG_MODE" = true ]; then
echo "[DEBUG] $1"
fi
}
# Function to check if VM is running
is_vm_running() {
qm status $VMID | grep -q "running"
result=$?
debug_log "is_vm_running: qm status $VMID result: $result"
return $result
}
# Function to rebind devices using the specified script
rebind_devices() {
debug_log "Rebinding devices with script: $REBIND_SCRIPT"
bash "$REBIND_SCRIPT"
}
# Main loop
while true; do
if ! is_vm_running; then
if [ "$last_state" != "stopped" ]; then
echo "VM $VMID has stopped."
last_state="stopped"
debug_log "State changed to stopped"
fi
if [ ! -f "$LOCK_FILE" ]; then
debug_log "Lock file not found, proceeding to rebind devices."
rebind_devices
else
debug_log "Lock file found: $LOCK_FILE"
fi
else
if [ "$last_state" != "running" ]; then
echo "VM $VMID is running."
last_state="running"
debug_log "State changed to running"
else
debug_log "VM $VMID is still running."
fi
fi
sleep 5 # Wait for 5 seconds before checking again
done
#!/bin/bash
LOCK_FILE="/tmp/binding_vfio.lock"
# Define device IDs and their expected drivers
declare -A device_driver_map=(
["0000:06:00.0"]="nvidia"
["0000:06:00.1"]="snd_hda_intel"
["0000:08:00.3"]="xhci_hcd"
["0000:08:00.4"]="snd_hda_intel"
)
# Function to extract vendor and device ID for a PCI device
get_pci_vendor_device_id() {
local pci_id=$1
local vendor_id=$(cat /sys/bus/pci/devices/$pci_id/vendor)
local device_id=$(cat /sys/bus/pci/devices/$pci_id/device)
# Strip leading '0x' from vendor/device ID
vendor_id=${vendor_id#0x}
device_id=${device_id#0x}
# Ensure lowercase for consistency
echo "${vendor_id,,} ${device_id,,}"
}
# Function to check if a device is bound to VFIO
is_bound_to_vfio() {
local pci_id=$1
[[ "$(readlink /sys/bus/pci/devices/$pci_id/driver)" =~ vfio-pci$ ]]
}
# Function to unbind a device from VFIO and clear driver override
unbind_vfio_and_clear_override() {
local pci_id=$1
if is_bound_to_vfio $pci_id; then
echo $pci_id > /sys/bus/pci/drivers/vfio-pci/unbind
echo > /sys/bus/pci/devices/$pci_id/driver_override
# Ensure the device is fully released before continuing
while lsof | grep -q $pci_id; do sleep 1; done
return 0
else
return 1
fi
}
# Function to reload a module
reload_module() {
local module=$1
modprobe -r $module 2>/dev/null
# Wait a bit to ensure the module is fully unloaded
sleep 2
modprobe $module 2>/dev/null
}
# Attempt to unbind devices from VFIO and clear driver overrides
device_rebound=false
for pci_id in "${!device_driver_map[@]}"; do
if unbind_vfio_and_clear_override $pci_id; then
device_rebound=true
fi
done
# Check if any device was rebound before proceeding
if [ "$device_rebound" = true ]; then
# Ensure all NVIDIA related modules are properly handled
reload_module "nvidia_drm"
reload_module "nvidia_modeset"
reload_module "nvidia_uvm"
reload_module "nvidia"
reload_module "snd_hda_intel"
# Wait a bit to ensure modules are fully loaded and devices are ready
sleep 5
# Try to bind the device to the NVIDIA driver
for pci_id in "${!device_driver_map[@]}"; do
if [[ ${device_driver_map[$pci_id]} == "nvidia" ]]; then
echo -n $pci_id | tee /sys/bus/pci/drivers/nvidia/bind 2>/dev/null || true
# After binding, attempt to dynamically set new_id if necessary
new_id=$(get_pci_vendor_device_id $pci_id)
echo $new_id > /sys/bus/pci/drivers/nvidia/new_id 2>/dev/null || true
fi
done
# Rebind devices to their original drivers
for pci_id in "${!device_driver_map[@]}"; do
driver=${device_driver_map[$pci_id]}
# Initialize a counter for the timeout
counter=0
timeout=10 # Timeout after 10 seconds
# Wait until the device is ready to accept a new driver or timeout
while [ ! -e /sys/bus/pci/drivers/$driver/$pci_id ]; do
sleep 1
((counter++))
if [ $counter -ge $timeout ]; then
echo "Timeout waiting for $pci_id to be ready for binding to $driver."
break # Exit the loop if timeout reached
fi
done
# If device is ready, attempt to bind
if [ -e /sys/bus/pci/drivers/$driver/$pci_id ]; then
echo $pci_id > /sys/bus/pci/drivers/$driver/bind
else
echo "Skipping binding for $pci_id to $driver due to timeout."
fi
done
# Get the PCI ID of the USB controller
usb_controller_id=""
for pci_id in "${!device_driver_map[@]}"; do
if [[ ${device_driver_map[$pci_id]} == "xhci_hcd" ]]; then
usb_controller_id=$pci_id
break
fi
done
# Check if USB controller ID is found
if [ -n "$usb_controller_id" ]; then
# Rescan USB bus
echo 1 > /sys/bus/pci/devices/$usb_controller_id/remove
echo 1 > /sys/bus/pci/rescan
else
echo "USB controller not found in device map. Skipping USB bus rescan."
fi
# Restart display manager to apply changes
systemctl restart gdm3
echo "Devices have been rebound to their original drivers."
else
echo "No VFIO-bound devices needed rebinding."
fi
[Unit]
Description=Continuous Rebind Devices Monitoring for VM 100
[Service]
Type=simple
ExecStart=/home/srv01/Scripts/monitor-vm-100.sh
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment