Created
October 19, 2020 05:08
-
-
Save randompast/742443c9be23b48ab120c78c9f67fd77 to your computer and use it in GitHub Desktop.
I implemented 2nd and 3rd order convolutions for cusignal. This is a sanity check and micro benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import cusignal as cs | |
import numpy as np | |
import cupy as cp | |
from numba import cuda, njit, jit | |
@njit | |
def nbconv_1d3o(x,k): #numba_conv 1 dimension 3rd order | |
y = np.zeros(x.size-k.shape[0]+1) | |
for n in range(0, y.size): | |
d = n+k.shape[0]-1 | |
for i in range(k.shape[0]): | |
for j in range(k.shape[1]): | |
for l in range(k.shape[2]): | |
# print(n, x[d-i] * x[d-j] * k[i,j], x[d-i], x[d-j], k[i,j]) | |
y[n] += x[d-i] * x[d-j] * x[d-l] * k[i,j,l] | |
return y | |
@njit | |
def nbconv_1d2o(x,k): #numba_conv 1 dimension 2nd order | |
y = np.zeros(x.size-k.shape[0]+1) | |
for n in range(0, y.size): | |
d = n+k.shape[0]-1 | |
for i in range(k.shape[0]): | |
for j in range(k.shape[1]): | |
# print(n, x[d-i] * x[d-j] * k[i,j], x[d-i], x[d-j], k[i,j]) | |
y[n] += x[d-i] * x[d-j] * k[i,j] | |
return y | |
@njit | |
def nbconv_1d1o(x,k): #numba_conv 1 dimension 1st order | |
y = np.zeros(x.size-k.shape[0]+1) | |
for n in range(0, y.size): | |
d = n+k.shape[0]-1 | |
for i in range(k.shape[0]): | |
y[n] += x[d-i] * k[i] | |
return y | |
@cuda.jit | |
def nbconv_1d3o_device(x,k,y): #numba_conv | |
n = cuda.grid(1) | |
if (0 <= n) and (n < y.size): | |
d = n+k.shape[0]-1 | |
for i in range(k.shape[0]): | |
for j in range(k.shape[1]): | |
for l in range(k.shape[2]): | |
y[n] += x[d-i] * x[d-j] * x[d-l] * k[i,j,l] | |
@cuda.jit | |
def nbconv_1d2o_device(x,k,y): #numba_conv | |
n = cuda.grid(1) | |
if (0 <= n) and (n < y.size): | |
d = n+k.shape[0]-1 | |
for i in range(k.shape[0]): | |
for j in range(k.shape[1]): | |
y[n] += x[d-i] * x[d-j] * k[i,j] | |
@cuda.jit | |
def nbconv_1d1o_device(x,k,y): #numba_conv | |
n = cuda.grid(1) | |
if (0 <= n) and (n < y.size): | |
d = n+k.shape[0]-1 | |
for i in range(k.shape[0]): | |
y[n] += x[d-i] * k[i] | |
def test_nbc_f(f, x, k): | |
l = x.size - k.shape[0] + 1 | |
y = cp.zeros(l) | |
th = 128 | |
b = y.size//th+1 | |
f[b,th](x, k, y) | |
return y | |
def test_1d_1o(): | |
a = np.arange(5) | |
b = np.arange(2)+1 | |
c = cs.convolve(a,b, "valid") | |
print("cs-1o", c.size, c) | |
c = np.convolve(a,b, "valid") | |
print("np-1o", c.size, c) | |
c = nbconv_1d1o(a,b) | |
print("nj-1o", c.size, c.astype(dtype=int)) | |
c = test_nbc_f(nbconv_1d1o_device, a, b) | |
print("cj-1o", c.size, c.astype(dtype=int)) | |
def test_1d_2o(): | |
a = np.arange(5) | |
b = np.arange(4).reshape(2,2)+1 | |
c = cs.convolve1d2o(a,b) #valid | |
print("cs-2o", c.size, c) | |
c = nbconv_1d2o(a,b) | |
print("nj-2o", c.size, c.astype(dtype=int)) | |
c = test_nbc_f(nbconv_1d2o_device, a, b) | |
print("cj-2o", c.size, c.astype(dtype=int)) | |
def test_1d_3o(): | |
a = np.arange(5) | |
b = np.arange(8).reshape(2,2,2)+1 | |
c = cs.convolve1d3o(a,b) #valid | |
print("cs-3o", c.size, c) | |
c = nbconv_1d3o(a,b) | |
print("nj-3o", c.size, c.astype(dtype=int)) | |
c = test_nbc_f(nbconv_1d3o_device, a, b) | |
print("cj-3o", c.size, c.astype(dtype=int)) | |
def test_1d_1o2o3o(): | |
test_1d_1o() | |
test_1d_2o() | |
test_1d_3o() | |
def time_n(n,f,a,b): | |
start = time.time() | |
for i in range(n): | |
c = f(a,b) | |
elapsed = time.time() - start | |
return elapsed | |
def time_n_cuda(n,f,a,b): | |
start = time.time() | |
for i in range(n): | |
c = test_nbc_f(f,a,b) | |
elapsed = time.time() - start | |
return elapsed | |
def benchmark_1d2o(m,n,d): | |
a = np.random.uniform(-1,1,(n)) | |
b = np.random.uniform(-1,1,(d,d)) | |
t = time_n(m,cs.convolve1d2o,a,b) | |
print("cs-1d2o", t) | |
t = time_n(m,nbconv_1d2o,a,b) | |
print("nj-1d2o", t) | |
t = time_n_cuda(m,nbconv_1d2o_device,a,b) | |
print("cj-1d2o", t) | |
def benchmark_1d3o(m,n,d): | |
a = np.random.uniform(-1,1,(n)) | |
b = np.random.uniform(-1,1,(d,d,d)) | |
t = time_n(m,cs.convolve1d3o,a,b) | |
print("cs-1d3o", t) | |
t = time_n(m,nbconv_1d3o,a,b) | |
print("nj-1d3o", t) | |
t = time_n_cuda(m,nbconv_1d3o_device,a,b) | |
print("cj-1d3o", t) | |
if __name__ == "__main__": | |
m, n, d = 50, 200, 50 | |
print("first run") | |
benchmark_1d2o(m,n,d) | |
benchmark_1d3o(m,n,d) | |
print("second run") | |
benchmark_1d2o(m,n,d) | |
benchmark_1d3o(m,n,d) | |
test_1d_1o2o3o() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment