Skip to content

Instantly share code, notes, and snippets.

@mrkwjc mrkwjc/cuspsolve32.py
Last active Dec 6, 2019

Embed
What would you like to do?
CUDA vs. CPU sparse solver in Python - single precision
# ### Interface cuSOLVER PyCUDA
from __future__ import print_function
import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import scipy.sparse as sp
import ctypes
## Wrap the cuSOLVER cusolverSpDcsrlsvqr() using ctypes
## http://docs.nvidia.com/cuda/cusolver/#cusolver-lt-t-gt-csrlsvqr
# cuSparse
_libcusparse = ctypes.cdll.LoadLibrary('libcusparse.so')
_libcusparse.cusparseCreate.restype = int
_libcusparse.cusparseCreate.argtypes = [ctypes.c_void_p]
_libcusparse.cusparseDestroy.restype = int
_libcusparse.cusparseDestroy.argtypes = [ctypes.c_void_p]
_libcusparse.cusparseCreateMatDescr.restype = int
_libcusparse.cusparseCreateMatDescr.argtypes = [ctypes.c_void_p]
# cuSOLVER
_libcusolver = ctypes.cdll.LoadLibrary('libcusolver.so')
_libcusolver.cusolverSpCreate.restype = int
_libcusolver.cusolverSpCreate.argtypes = [ctypes.c_void_p]
_libcusolver.cusolverSpDestroy.restype = int
_libcusolver.cusolverSpDestroy.argtypes = [ctypes.c_void_p]
_libcusolver.cusolverSpScsrlsvqr.restype = int
_libcusolver.cusolverSpScsrlsvqr.argtypes= [ctypes.c_void_p,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_float,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p]
def cuspsolve(A, b):
Acsr = sp.csr_matrix(A, dtype=np.float32)
b = np.asarray(b, dtype=np.float32)
x = np.empty_like(b)
# Copy arrays to GPU
dcsrVal = gpuarray.to_gpu(Acsr.data)
dcsrColInd = gpuarray.to_gpu(Acsr.indices)
dcsrIndPtr = gpuarray.to_gpu(Acsr.indptr)
dx = gpuarray.to_gpu(x)
db = gpuarray.to_gpu(b)
# Create solver parameters
m = ctypes.c_int(Acsr.shape[0]) # Need check if A is square
nnz = ctypes.c_int(Acsr.nnz)
descrA = ctypes.c_void_p()
reorder = ctypes.c_int(0)
tol = ctypes.c_float(1e-10)
singularity = ctypes.c_int(0) # -1 if A not singular
# create cusparse handle
_cusp_handle = ctypes.c_void_p()
status = _libcusparse.cusparseCreate(ctypes.byref(_cusp_handle))
assert(status == 0)
cusp_handle = _cusp_handle.value
# create MatDescriptor
status = _libcusparse.cusparseCreateMatDescr(ctypes.byref(descrA))
assert(status == 0)
#create cusolver handle
_cuso_handle = ctypes.c_void_p()
status = _libcusolver.cusolverSpCreate(ctypes.byref(_cuso_handle))
assert(status == 0)
cuso_handle = _cuso_handle.value
# Solve
res=_libcusolver.cusolverSpScsrlsvqr(cuso_handle,
m,
nnz,
descrA,
dcsrVal.gpudata.__int__(),
dcsrIndPtr.gpudata.__int__(),
dcsrColInd.gpudata.__int__(),
db.gpudata.__int__(),
tol,
reorder,
dx.gpudata.__int__(),
ctypes.byref(singularity))
assert(res == 0)
if singularity.value != -1:
raise ValueError('Singular matrix!')
x = dx.get() # Get result as numpy array
# Destroy handles
status = _libcusolver.cusolverSpDestroy(cuso_handle)
assert(status == 0)
status = _libcusparse.cusparseDestroy(cusp_handle)
assert(status == 0)
# Return result
return x
# Test
if __name__ == '__main__':
A = np.diag(np.arange(1, 5, dtype=np.float32))
b = np.ones(4, dtype=np.float32)
x = cuspsolve(A, b)
np.testing.assert_almost_equal(x, np.array([1. , 0.5, 0.33333333, 0.25], dtype=np.float32))
# Timing comparison
from scipy.sparse import rand
from scipy.sparse.linalg import spsolve
from scipy.sparse import coo_matrix
import time
n = 10000
i = j = np.arange(n)
diag = np.ones(n, dtype=np.float32)
A = rand(n, n, density=0.001, dtype=np.float32)
A = A.tocsr()
A[i, j] = diag
b = np.ones(n, dtype=np.float32)
t0 = time.time()
x = spsolve(A, b, use_umfpack=False) # umfpack works with flost64 only
dt1 = time.time() - t0
print("scipy.sparse.linalg.spsolve time: %s" %dt1)
t0 = time.time()
x = cuspsolve(A, b)
dt2 = time.time() - t0
print("cuspsolve time: %s" %dt2)
ratio = dt1/dt2
if ratio > 1:
print("CUDA is %s times faster than CPU." %ratio)
else:
print("CUDA is %s times slower than CPU." %(1./ratio))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.