Skip to content

Instantly share code, notes, and snippets.

@achimnol
Last active September 25, 2016 15:32
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save achimnol/3404967 to your computer and use it in GitHub Desktop.
Save achimnol/3404967 to your computer and use it in GitHub Desktop.
An upstart configuration for NVIDIA CUDA device initialization without X environments (derived from Sangjin Han's init.d script)
description "CUDA initialization without X environments"
start on runlevel [2345]
stop on runlevel [!2345]
pre-start script
DRIVER=nvidia
HOLD_GPU=/usr/local/bin/hold_gpu.py
[ -x "$HOLD_GPU" ] || { echo "$HOLD_GPU is missing or not executable!"; exit 1; }
echo "Loading $DRIVER kernel module..."
modprobe $DRIVER NVreg_EnableMSI=1
[ "$?" = 0 ] || { echo "Loading $DRIVER kernel module has failed."; exit 1; }
echo -n "Initializing CUDA /dev entries... "
N3D=`/usr/bin/lspci | grep -i NVIDIA | grep "3D controller" | wc -l`
NVGA=`/usr/bin/lspci | grep -i NVIDIA | grep "VGA compatible controller" | wc -l`
N=$(($N3D + $NVGA - 1))
[ "$N" = -1 ] && { echo "No CUDA devices are found."; stop; exit 0; }
for i in `seq 0 $N`; do
DEVFILE=/dev/nvidia$i
[ ! -c $DEVFILE ] && mknod -m 666 $DEVFILE c 195 $i || :
[ "$?" = 0 ] || { echo "Creation of /dev/nvidia$i failed."; exit 1; }
done
DEVFILE=/dev/nvidiactl
[ ! -c $DEVFILE ] && mknod -m 666 $DEVFILE c 195 255 || :
[ "$?" = 0 ] || { echo "Creation of /dev/nvidiactl failed."; exit 1; }
echo "Done."
end script
exec /usr/local/bin/hold_gpu.py
post-stop script
echo "Unloading nvidia_uvm, nvidia kernel module..."
rmmod -f nvidia_uvm
[ "$?" = 0 ] || { echo "Unloading nvidia_uvm kernel module has failed."; exit 1; }
rmmod -f nvidia
[ "$?" = 0 ] || { echo "Unloading nvidia kernel module has failed."; exit 1; }
echo -n "Removing CUDA /dev entries... "
rm -f /dev/nvidia*
echo "Done."
end script
#! /usr/bin/env python
# This script sets an interrupt and holds a handle to CUDA device by not dying.
# Copy or make a symlink at /usr/local/bin directory.
from __future__ import print_function
import sys, time
import logging, logging.handlers
import subprocess, multiprocessing
import ctypes, ctypes.util
_libnuma = ctypes.CDLL(ctypes.util.find_library('numa'))
_libcuda = ctypes.CDLL(ctypes.util.find_library('cudart'))
def execute(cmd, check_returncode=True):
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
stdout, stderr = proc.communicate()
logger.debug(cmd)
if check_returncode and proc.returncode != 0:
raise subprocess.CalledProcessError(proc.returncode, cmd)
return stdout.decode('ascii')
def read_sysfs(bus_id, name):
with open('/sys/bus/pci/devices/{0}/{1}'.format(bus_id, name), 'r') as f:
return f.read().strip()
def get_cpu_topology():
num_nodes = _libnuma.numa_num_configured_nodes()
node_cpus = []
for n in range(num_nodes):
node_cpus.append([])
for c in range(multiprocessing.cpu_count()):
node_id = int(_libnuma.numa_node_of_cpu(ctypes.c_int(c)))
node_cpus[node_id].append(c)
return node_cpus
def set_gpu_affinity():
num_cpus = multiprocessing.cpu_count()
assert num_cpus >= 4, 'Too few number of CPU cores.'
# You should put the path to the CUDA toolkit library into LD_LIBRARY_PATH envvar
# or into the system-wide configuration at /etc/ld.so.conf.d/.
count = ctypes.c_int(0)
# This will initialize a CUDA device context,
# which holds the handle to the device until this program exits.
ret = _libcuda.cudaGetDeviceCount(ctypes.byref(count))
assert ret == 0, 'cudaGetDeviceCount() has failed.'
logger.info('Grabbed a CUDA context.')
logger.info('Number of GPUs detected: {0}'.format(count.value))
result = execute('lspci | grep -i nvidia | grep VGA').strip()
lines = result.splitlines()
assert len(lines) == count.value, 'Mismatch of CUDA device count and # NVIDIA GPUs on the PCI bus?'
# Example: for dual processor octa-core systems with HT enabled (total 32 logical cores),
# each GPU is mapped to core 23 and 31.
# Example2: if there are two GPUs per node, they are mapped to core 22/23,
# 30/31 depending on their actual node location.
node_cpus = get_cpu_topology()
node_devices = [0] * len(node_cpus)
for dev_idx, line in enumerate(lines):
bus_id, _ = line.split(' ', 1)
bus_id = '0000:' + bus_id
node_id = int(read_sysfs(bus_id, 'numa_node'))
irq = int(read_sysfs(bus_id, 'irq'))
node_devices[node_id] += 1
core_id = node_cpus[node_id][-node_devices[node_id]]
cuda_dev_id = ctypes.c_int(0)
ret = _libcuda.cudaDeviceGetByPCIBusId(ctypes.byref(cuda_dev_id), ctypes.c_char_p(bus_id))
assert ret == 0, 'cudaDeviceGetByPCIBusId() has failed.'
logger.info('GPU (BusId: {0}, DeviceId: {1}) is on node {2}, setting IRQ {3}\'s affinity to core {4}.'.format(
bus_id, cuda_dev_id.value, node_id, irq, core_id
))
execute('echo {0:x} > /proc/irq/{1:d}/smp_affinity'.format(1 << core_id, irq))
if __name__ == '__main__':
log_formatter = logging.Formatter('%(module)s[%(process)d]: %(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
syslog_handler = logging.handlers.SysLogHandler(address='/dev/log')
syslog_handler.setFormatter(log_formatter)
syslog_handler.setLevel(logging.DEBUG)
stdout_handler = logging.StreamHandler()
stdout_handler.setFormatter(log_formatter)
logger.addHandler(syslog_handler)
logger.addHandler(stdout_handler)
try:
set_gpu_affinity()
except AssertionError as e:
logger.exception('Terminating due to an error.')
sys.exit(1)
logger.info('Staying alive...')
try:
while True:
time.sleep(1)
except (SystemExit, KeyboardInterrupt):
print()
pass
logger.info('Terminated.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment