Skip to content

Instantly share code, notes, and snippets.

@hiropppe
Last active June 5, 2017 07:31
Show Gist options
  • Save hiropppe/88b01e27a1192796a0e656de5f15e00e to your computer and use it in GitHub Desktop.
Save hiropppe/88b01e27a1192796a0e656de5f15e00e to your computer and use it in GitHub Desktop.
Simple cython dot product sample for nogil
# cython: boundscheck = False
# cython: wraparound = False
# cython: cdivision = True
import numpy as np
cimport numpy as np
from scipy.linalg cimport cython_blas
from libc.stdio cimport printf
from libc.stdlib cimport abort, malloc, free
cdef double cdot(int n, double dx[], double dy[]) nogil:
cdef int i
cdef double sum = 0.0
for i in range(n):
sum += dx[i]*dy[i]
return sum
cdef double onehot_dot(int end, int onehot[], double w[]) nogil:
cdef double sum = 0.0
cdef int i
i = onehot[0]
if i == 0:
return 0.0
while i < end:
sum += w[i-1]
i = onehot[i]
return sum
def onehot_dot_sample():
cdef int size = 5, end = size + 1
cdef int onehot[6]
cdef double w[5]
cdef i
for i in range(5):
w[i] = <double>i
onehot[0] = 1
onehot[1] = 3
onehot[3] = 5
onehot[5] = end
printf('onehot dot %7.3f', onehot_dot(end, onehot, w))
def blas_ddot_sample():
cdef int n, incx, incy, i
cdef double *x, *y
cdef double res
cdef int len_x, len_y
n = 5
incx = 2
incy = 1
len_x = 1+(n-1)*incx
len_y = 1+(n-1)*incy
x = <double *>malloc( len_x * sizeof( double ) )
y = <double *>malloc( len_y * sizeof( double ) )
for i in range(n):
x[i*incx] = 2.0;
y[i*incy] = 1.0;
printf('ddot %7.3f', cython_blas.ddot(&n, x, &incx, y, &incy))
free(x)
free(y)
def blas_ddot_2d_sample():
cdef int r = 5, c = 5, incx = 1, incy = 1
cdef int len_x, len_y, len_r
cdef int ri, ci
cdef double *dx
cdef double *dy
cdef double dot = 0
len_r = c*incx
len_x = r*len_r
len_y = c*incy
dx = <double *>malloc( len_x * sizeof( double ) )
dy = <double *>malloc( len_y * sizeof( double ) )
for ri in range(r):
for ci in range(c):
dx[ri*len_r + ci*incx] = ri*ci
for ci in range(c):
dy[ci*incx] = ci
for ri in range(r):
dot += cython_blas.ddot(&c, &dx[ri*len_r], &incx, dy, &incy)
printf('ddot %7.3f', dot)
free(dx)
free(dy)
def npdot_speed():
dot = 0
dx = np.ndarray((1000), dtype=np.float32)
dy = np.ndarray((1000), dtype=np.float32)
for j in range(1000):
dx[j] = j
dy[j] = 1000 - 1 - j
import time
elapsed = []
for i in range(10000):
s = time.time()
dot += dx.dot(dy)
elapsed.append(time.time() - s)
print 'npdot', dot
print('{:.3f} us'.format(np.mean(elapsed)*1000*1000))
def cdot_speed():
cdef double[1000] dx
cdef double[1000] dy
cdef int i
cdef double dot = 0
for j in range(1000):
dx[j] = j
dy[j] = 1000 - 1 - j
import time
elapsed = []
for i in range(10000):
s = time.time()
dot += cdot(1000, dx, dy)
elapsed.append(time.time() - s)
print 'cdot', dot
print('{:.3f} us'.format(np.mean(elapsed)*1000*1000))
def ddot_speed():
cdef int n = 1000, incx = 1, incy = 1
cdef int i
cdef double *dx
cdef double *dy
cdef double dot = 0
len_x = 1+(n-1)*incx
len_y = 1+(n-1)*incy
dx = <double *>malloc( len_x * sizeof( double ) )
dy = <double *>malloc( len_y * sizeof( double ) )
for i in range(1000):
dx[i*incx] = i
dy[i*incy] = 1000 - 1 - i
import time
elapsed = []
for i in range(10000):
s = time.time()
dot += cython_blas.ddot(&n, dx, &incx, dy, &incy)
elapsed.append(time.time() - s)
print 'ddot', dot
print('{:.3f} us'.format(np.mean(elapsed)*1000*1000))
free(dx)
free(dy)
def onehot_dot_speed():
cdef int size = 100000, end = size + 1
cdef int onehot[100001]
cdef double w[100000]
cdef int i
cdef double dot = 0
for i in range(size):
w[i] = np.random.randn()
onehot[0] = 1
onehot[1] = 2
onehot[2] = 4
onehot[4] = 8200
onehot[8200] = 50000
onehot[50000] = end
import time
elapsed = []
for i in range(10000):
s = time.time()
onehot_dot(end, onehot, w)
elapsed.append(time.time() - s)
print('{:.3f} us'.format(np.mean(elapsed)*1000*1000))
def ddot_2d_speed():
cdef int r = 12, c = 1000, incx = 1, incy = 1
cdef int len_x, len_y, len_r
cdef int i, ri, ci
cdef double *dx
cdef double *dy
cdef double dot = 0
len_r = c*incx
len_x = r*len_r
len_y = c*incy
dx = <double *>malloc( len_x * sizeof( double ) )
dy = <double *>malloc( len_y * sizeof( double ) )
for ri in range(r):
for ci in range(c):
dx[ri*len_r + ci*incx] = ri*ci
for ci in range(c):
dy[ci*incx] = ci
import time
elapsed = []
for i in range(10000):
s = time.time()
for ri in range(r):
dot += cython_blas.ddot(&c, &dx[ri*len_r], &incx, dy, &incy)
elapsed.append(time.time() - s)
print 'ddot', dot
print('{:.3f} us'.format(np.mean(elapsed)*1000*1000))
free(dx)
free(dy)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment