Skip to content

Instantly share code, notes, and snippets.

@IanBoyanZhang
Last active May 2, 2024 02:42
Show Gist options
  • Save IanBoyanZhang/5942c0d3149d3d35be79911f177eb5f3 to your computer and use it in GitHub Desktop.
Save IanBoyanZhang/5942c0d3149d3d35be79911f177eb5f3 to your computer and use it in GitHub Desktop.
import ctypes
import json
from functools import wraps
from typing import Any, Dict, List
from warnings import warn
# Constants from cuda.h
CUDA_SUCCESS = 0
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
# Conversions from semantic version numbers
# Borrowed from original gist and updated from the "GPUs supported" section of this Wikipedia article
# https://en.wikipedia.org/wiki/CUDA
SEMVER_TO_CORES = {
(1, 0): 8, # Tesla
(1, 1): 8,
(1, 2): 8,
(1, 3): 8,
(2, 0): 32, # Fermi
(2, 1): 48,
(3, 0): 192, # Kepler
(3, 2): 192,
(3, 5): 192,
(3, 7): 192,
(5, 0): 128, # Maxwell
(5, 2): 128,
(5, 3): 128,
(6, 0): 64, # Pascal
(6, 1): 128,
(6, 2): 128,
(7, 0): 64, # Volta
(7, 2): 64,
(7, 5): 64, # Turing
(8, 0): 64, # Ampere
(8, 6): 64,
}
SEMVER_TO_ARCH = {
(1, 0): "tesla",
(1, 1): "tesla",
(1, 2): "tesla",
(1, 3): "tesla",
(2, 0): "fermi",
(2, 1): "fermi",
(3, 0): "kepler",
(3, 2): "kepler",
(3, 5): "kepler",
(3, 7): "kepler",
(5, 0): "maxwell",
(5, 2): "maxwell",
(5, 3): "maxwell",
(6, 0): "pascal",
(6, 1): "pascal",
(6, 2): "pascal",
(7, 0): "volta",
(7, 2): "volta",
(7, 5): "turing",
(8, 0): "ampere",
(8, 6): "ampere",
}
# Decorator for CUDA API calls
def cuda_api_call(func):
"""
Decorator to wrap CUDA API calls and check their results.
Raises RuntimeError if the CUDA call does not return CUDA_SUCCESS.
"""
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if result != CUDA_SUCCESS:
error_str = ctypes.c_char_p()
cuda.cuGetErrorString(result, ctypes.byref(error_str))
raise RuntimeError(
f"{func.__name__} failed with error code {result}: {error_str.value.decode()}"
)
return result
return wrapper
def cuda_api_call_warn(func):
"""
Decorator to wrap CUDA API calls and check their results.
Prints a warning message if the CUDA call does not return CUDA_SUCCESS.
"""
@wraps(func)
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if result != CUDA_SUCCESS:
error_str = ctypes.c_char_p()
cuda.cuGetErrorString(result, ctypes.byref(error_str))
warn(
f"Warning: {func.__name__} failed with error code {result}: {error_str.value.decode()}"
)
return result
return wrapper
# Attempt to load the CUDA library
libnames = ("libcuda.so", "libcuda.dylib", "cuda.dll")
for libname in libnames:
try:
cuda = ctypes.CDLL(libname)
except OSError:
continue
else:
break
else:
raise ImportError(f'Could not load any of: {", ".join(libnames)}')
# CUDA API calls wrapped with the decorator
@cuda_api_call
def cuInit(flags):
return cuda.cuInit(flags)
@cuda_api_call
def cuDeviceGetCount(count):
return cuda.cuDeviceGetCount(count)
@cuda_api_call
def cuDeviceGet(device, ordinal):
return cuda.cuDeviceGet(device, ordinal)
@cuda_api_call
def cuDeviceGetName(name, len, dev):
return cuda.cuDeviceGetName(name, len, dev)
@cuda_api_call
def cuDeviceComputeCapability(major, minor, dev):
return cuda.cuDeviceComputeCapability(major, minor, dev)
@cuda_api_call
def cuDeviceGetAttribute(pi, attrib, dev):
return cuda.cuDeviceGetAttribute(pi, attrib, dev)
@cuda_api_call_warn
def cuCtxCreate(pctx, flags, dev):
try:
result = cuda.cuCtxCreate_v2(pctx, flags, dev)
except AttributeError:
result = cuda.cuCtxCreate(pctx, flags, dev)
return result
@cuda_api_call_warn
def cuMemGetInfo(free, total):
try:
result = cuda.cuMemGetInfo_v2(free, total)
except AttributeError:
result = cuda.cuMemGetInfo(free, total)
return result
@cuda_api_call
def cuCtxDetach(ctx):
return cuda.cuCtxDetach(ctx)
# Main function to get CUDA device specs
def get_cuda_device_specs() -> List[Dict[str, Any]]:
"""Generate spec for each GPU device with format
{
'name': str,
'compute_capability': (major: int, minor: int),
'cores': int,
'cuda_cores': int,
'concurrent_threads': int,
'gpu_clock_mhz': float,
'mem_clock_mhz': float,
'total_mem_mb': float,
'free_mem_mb': float,
'architecture': str,
'cuda_cores': int
}
"""
# Initialize CUDA
cuInit(0)
num_gpus = ctypes.c_int()
cuDeviceGetCount(ctypes.byref(num_gpus))
device_specs = []
for i in range(num_gpus.value):
spec = {}
device = ctypes.c_int()
cuDeviceGet(ctypes.byref(device), i)
name = b" " * 100
cuDeviceGetName(ctypes.c_char_p(name), len(name), device)
spec["name"] = name.split(b"\0", 1)[0].decode()
cc_major = ctypes.c_int()
cc_minor = ctypes.c_int()
cuDeviceComputeCapability(
ctypes.byref(cc_major), ctypes.byref(cc_minor), device
)
compute_capability = (cc_major.value, cc_minor.value)
spec["compute_capability"] = compute_capability
cores = ctypes.c_int()
cuDeviceGetAttribute(
ctypes.byref(cores), CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device
)
spec["cores"] = cores.value
threads_per_core = ctypes.c_int()
cuDeviceGetAttribute(
ctypes.byref(threads_per_core),
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR,
device,
)
spec["concurrent_threads"] = cores.value * threads_per_core.value
clockrate = ctypes.c_int()
cuDeviceGetAttribute(
ctypes.byref(clockrate), CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device
)
spec["gpu_clock_mhz"] = clockrate.value / 1000.0
cuDeviceGetAttribute(
ctypes.byref(clockrate), CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device
)
spec["mem_clock_mhz"] = clockrate.value / 1000.0
context = ctypes.c_void_p()
if cuCtxCreate(ctypes.byref(context), 0, device) == CUDA_SUCCESS:
free_mem = ctypes.c_size_t()
total_mem = ctypes.c_size_t()
cuMemGetInfo(ctypes.byref(free_mem), ctypes.byref(total_mem))
spec["total_mem_mb"] = total_mem.value / 1024**2
spec["free_mem_mb"] = free_mem.value / 1024**2
spec["architecture"] = SEMVER_TO_ARCH.get(compute_capability, "unknown")
spec["cuda_cores"] = cores.value * SEMVER_TO_CORES.get(
compute_capability, "unknown"
)
cuCtxDetach(context)
device_specs.append(spec)
return device_specs
if __name__ == "__main__":
print(json.dumps(get_cuda_device_specs(), indent=2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment