Skip to content

Instantly share code, notes, and snippets.

@mrkwjc
Last active November 28, 2022 09:49
Show Gist options
  • Save mrkwjc/ebb22e8b592122cc8be6 to your computer and use it in GitHub Desktop.
Save mrkwjc/ebb22e8b592122cc8be6 to your computer and use it in GitHub Desktop.
CUDA vs. CPU sparse solver in Python
# ### Interface cuSOLVER PyCUDA
from __future__ import print_function
import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import scipy.sparse as sp
import ctypes
## Wrap the cuSOLVER cusolverSpDcsrlsvqr() using ctypes
## http://docs.nvidia.com/cuda/cusolver/#cusolver-lt-t-gt-csrlsvqr
# cuSparse
_libcusparse = ctypes.cdll.LoadLibrary('libcusparse.so')
_libcusparse.cusparseCreate.restype = int
_libcusparse.cusparseCreate.argtypes = [ctypes.c_void_p]
_libcusparse.cusparseDestroy.restype = int
_libcusparse.cusparseDestroy.argtypes = [ctypes.c_void_p]
_libcusparse.cusparseCreateMatDescr.restype = int
_libcusparse.cusparseCreateMatDescr.argtypes = [ctypes.c_void_p]
# cuSOLVER
_libcusolver = ctypes.cdll.LoadLibrary('libcusolver.so')
_libcusolver.cusolverSpCreate.restype = int
_libcusolver.cusolverSpCreate.argtypes = [ctypes.c_void_p]
_libcusolver.cusolverSpDestroy.restype = int
_libcusolver.cusolverSpDestroy.argtypes = [ctypes.c_void_p]
_libcusolver.cusolverSpDcsrlsvqr.restype = int
_libcusolver.cusolverSpDcsrlsvqr.argtypes= [ctypes.c_void_p,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_double,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_void_p]
def cuspsolve(A, b):
Acsr = sp.csr_matrix(A, dtype=float)
b = np.asarray(b, dtype=float)
x = np.empty_like(b)
# Copy arrays to GPU
dcsrVal = gpuarray.to_gpu(Acsr.data)
dcsrColInd = gpuarray.to_gpu(Acsr.indices)
dcsrIndPtr = gpuarray.to_gpu(Acsr.indptr)
dx = gpuarray.to_gpu(x)
db = gpuarray.to_gpu(b)
# Create solver parameters
m = ctypes.c_int(Acsr.shape[0]) # Need check if A is square
nnz = ctypes.c_int(Acsr.nnz)
descrA = ctypes.c_void_p()
reorder = ctypes.c_int(0)
tol = ctypes.c_double(1e-10)
singularity = ctypes.c_int(0) # -1 if A not singular
# create cusparse handle
_cusp_handle = ctypes.c_void_p()
status = _libcusparse.cusparseCreate(ctypes.byref(_cusp_handle))
assert(status == 0)
cusp_handle = _cusp_handle.value
# create MatDescriptor
status = _libcusparse.cusparseCreateMatDescr(ctypes.byref(descrA))
assert(status == 0)
#create cusolver handle
_cuso_handle = ctypes.c_void_p()
status = _libcusolver.cusolverSpCreate(ctypes.byref(_cuso_handle))
assert(status == 0)
cuso_handle = _cuso_handle.value
# Solve
res=_libcusolver.cusolverSpDcsrlsvqr(cuso_handle,
m,
nnz,
descrA,
int(dcsrVal.gpudata),
int(dcsrIndPtr.gpudata),
int(dcsrColInd.gpudata),
int(db.gpudata),
tol,
reorder,
int(dx.gpudata),
ctypes.byref(singularity))
assert(res == 0)
if singularity.value != -1:
raise ValueError('Singular matrix!')
x = dx.get() # Get result as numpy array
# Destroy handles
status = _libcusolver.cusolverSpDestroy(cuso_handle)
assert(status == 0)
status = _libcusparse.cusparseDestroy(cusp_handle)
assert(status == 0)
# Return result
return x
# Test
if __name__ == '__main__':
A = np.diag(np.arange(1, 5))
b = np.ones(4)
x = cuspsolve(A, b)
np.testing.assert_almost_equal(x, np.array([1. , 0.5, 0.33333333, 0.25]))
# Timing comparison
from scipy.sparse import rand
from scipy.sparse.linalg import spsolve
from scipy.sparse import coo_matrix
import time
n = 10000
i = j = np.arange(n)
diag = np.ones(n)
A = rand(n, n, density=0.001)
A = A.tocsr()
A[i, j] = diag
b = np.ones(n)
t0 = time.time()
x = spsolve(A, b)
dt1 = time.time() - t0
print("scipy.sparse.linalg.spsolve time: %s" %dt1)
t0 = time.time()
x = cuspsolve(A, b)
dt2 = time.time() - t0
print("cuspsolve time: %s" %dt2)
ratio = dt1/dt2
if ratio > 1:
print("CUDA is %s times faster than CPU." %ratio)
else:
print("CUDA is %s times slower than CPU." %(1./ratio))
@Stanpol
Copy link

Stanpol commented Sep 19, 2017

CPU: Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz, 6 cores
GPU: Tesla M60, 8Gb, Cuda 8.0
Python 3.5

Results:
scipy.sparse.linalg.spsolve time: 149.61404728889465
cuspsolve time: 134.93718433380127
CUDA is 1.1087681133080889 times faster than CPU.

@lotusk
Copy link

lotusk commented Oct 12, 2018

CPU: Intel(R) Xeon(R) CPU E5-2690 v3 @ 2.60GHz, 6 cores
GPU: Tesla M60, 8Gb, Cuda 8.0
Python 3.5

Results:
scipy.sparse.linalg.spsolve time: 149.61404728889465
cuspsolve time: 134.93718433380127
CUDA is 1.1087681133080889 times faster than CPU.

Just a little faster????

@SanPen
Copy link

SanPen commented Dec 4, 2019

Hi

CPU: Intel® Core™ i7-6700K CPU @ 4.00GHz × 8
GPU: GeForce GTX 1060 6GB/PCIe/SSE2, With CUDA 10.1
RAM: 32 GB

scipy.sparse.linalg.spsolve time: 71.13084483146667
cuspsolve time: 115.17455720901489
CUDA is 1.6191928759162477 times slower than CPU.

My only other comment is that my scipy uses MKL under Ubuntu.

@mrkwjc
Copy link
Author

mrkwjc commented Dec 6, 2019

This is probably because of transferring data to and from GPU. Maybe only _libcusolver.cusolverSpDcsrlsvqr call should be wrapped with time measures. This would make sense i we are keeping data on GPU...

@mrkwjc
Copy link
Author

mrkwjc commented Dec 6, 2019

Well, i tested things and this definitely NOT the data copying issue. CUDA is simply slower!

To see this in the even more spectacular way i higly reccomend to install scikit-umfpack (using pip). The default superlu solver used in spsolve from scipy works using one core only, whereas umfpack boosts solution using all your CPUs.

@Mapoet
Copy link

Mapoet commented Nov 1, 2021

Well, i tested things and this definitely NOT the data copying issue. CUDA is simply slower!

To see this in the even more spectacular way i higly reccomend to install scikit-umfpack (using pip). The default superlu solver used in spsolve from scipy works using one core only, whereas umfpack boosts solution using all your CPUs.

ರ_ರ 心塞,you do need a test to a matrix with rank more than 4000.I really can't understand it is necessary to use cuda to solve the matrix with so small rank, e...m,just 4.😅

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment