Skip to content

Instantly share code, notes, and snippets.

@ybubnov
Last active August 29, 2015 14:18
Show Gist options
  • Save ybubnov/2c854122cc6b8a749ad1 to your computer and use it in GitHub Desktop.
Save ybubnov/2c854122cc6b8a749ad1 to your computer and use it in GitHub Desktop.
Dodgson condensation
import pycuda.autoinit
import pycuda.driver as drv
import numpy as np
import math
import time
from __future__ import division
from pycuda.compiler import SourceModule
""" Dodgson condensation """
if drv.Device.count() < 0:
print("There is no cuda devices")
exit()
cuda_mod = SourceModule("""
__global__ void cuda_det_1(float *a_dest, const float *a_row1, const float *a_row2, const int size)
{
const int i = blockDim.x * blockIdx.x + threadIdx.x;
const int j = i + 1;
if (j < size) {
a_dest[i] = a_row1[i]*a_row2[j] - a_row1[j]*a_row2[i];
}
}
__global__ void cuda_det_2(float *a_dest, const float *a_row1, const float *a_row2, const int size)
{
const int i = blockDim.x * blockIdx.x + threadIdx.x;
const int j = i + 1;
if (i < size) {
a_dest[i] = a_row1[i] / a_row2[j];
}
}
""")
cuda_max_threads = drv.Device(0).get_attributes().get(
pycuda._driver.device_attribute.MAX_THREADS_PER_BLOCK)
cuda_det_1 = cuda_mod.get_function("cuda_det_1")
cuda_det_2 = cuda_mod.get_function("cuda_det_2")
def cuda_reduce(a, blocks, threads):
x, y = map(lambda v: v-1, a.shape)
b = np.zeros(shape=(x,y)).astype(np.float32)
size = np.int32(y+1)
for i in range(x):
cuda_det_1(drv.Out(b[i]), drv.In(a[i]), drv.In(a[i+1]),
size, block=(threads,1,1), grid=(blocks,1,1))
return b
def cuda_product(a, d, blocks, threads):
x, y = map(lambda v: v-1, a.shape)
c = np.zeros(shape=(x,y)).astype(np.float32)
size = np.int32(y+1)
for i in range(x):
cuda_det_1(drv.Out(c[i]), drv.In(a[i]), drv.In(a[i+1]),
size, block=(threads,1,1), grid=(blocks,1,1))
cuda_det_2(drv.Out(c[i]), drv.In(c[i]), drv.In(d[i+1]),
size, block=(threads,1,1), grid=(blocks,1,1))
return c, a
def determinant(d):
x, y = d.shape
threads = min(y, cuda_max_threads)
blocks = int(math.ceil(y/threads))
a = cuda_reduce(d, blocks, threads)
x, y = a.shape
while x > 1:
threads = min(y, cuda_max_threads)
blocks = int(math.ceil(y/threads))
a, d = cuda_product(a, d, blocks, threads)
x, y = a.shape
return a[0][0]
def note_time(function, params):
start = time.time()
result = function(params)
end = time.time()
return result, end-start
if __name__ == "__main__":
a = np.random.normal(loc=1.0, size=(32, 32)).astype(np.float32)
cuda_result, cuda_time = note_time(determinant, a)
cpu_result, cpu_time = note_time(np.linalg.det, a)
print("Rank=%s: cuda(%s)=%s, cpu(%s)=%s" %
(comm.rank, cuda_time, cuda_result, cpu_time, cpu_result))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment