Skip to content

Instantly share code, notes, and snippets.

@garanews
Last active July 1, 2019 10:53
Show Gist options
  • Save garanews/ca7751175f47b669bcbbf8349c1fac38 to your computer and use it in GitHub Desktop.
Save garanews/ca7751175f47b669bcbbf8349c1fac38 to your computer and use it in GitHub Desktop.
attempt to use dask and cupy with rawkernel to compute ruzicka similarity between a vector and a matrix
import time
import dask.array as da
import dask.dataframe as dd
import numpy as np
from dask.distributed import Client
from dask_cuda import LocalCUDACluster
import cupy as cp
ruzicka_kernel = cp.RawKernel(r'''
extern "C" __global__
void my_ruzicka(const unsigned short* x1, const unsigned short* x2, float* y, int nrow) {
int idx = blockDim.x * blockIdx.x + threadIdx.x;
float max_ab = 0.0;
float min_ab = 0.0;
int tmp = 0;
if(idx < nrow){
for(int x = 0; x < 1024; x++){
tmp = x1[idx*1024+x] * (1024-x);
if(x2[x]>tmp){
max_ab = max_ab + x2[x];
min_ab = min_ab + tmp;
}else{
min_ab = min_ab + x2[x];
max_ab = max_ab + tmp;
}
}
y[idx] = min_ab / max_ab;
}
}
''', 'my_ruzicka')
def ruzicka_retval(a, b):
pd_size = len(a)
y = cp.zeros(CHUNKSIZE, dtype=cp.float32).reshape(1, CHUNKSIZE)
ruzicka_kernel((pd_size, ),(1024,), (a, b, y, pd_size))
return y
CHUNKSIZE = 1024
if __name__ == "__main__":
cluster = LocalCUDACluster()
client = Client(cluster)
# GENERATE ONE RANDOM SAMPLE TO IDENTIFY
vector_new = cp.array(np.random.choice([0, 1], 1024), dtype=cp.uint16) * cp.arange(1023, -1, -1, dtype=cp.uint16)
start = time.time()
# SIMULATE MATRIX OF RANDOM SAMPLES TO COMPARE (starting with 10k, but objective is having like 100M) the matrix is multiplied with same method of the previous array but done in C for speed.
a = np.random.choice(a=[1, 1], size=(10240, 1024))
d_da = da.from_array(a, chunks=(1024, 1024))
res = d_da.map_blocks(lambda df: ruzicka_retval(cp.array(df), vector_new), dtype=cp.float32).compute()
# WORKING without DASK
# y = cp.zeros(len(d_da), dtype=cp.float32)
# ruzicka_kernel((len(d_da), ), (1024, ), (cp.array(d_da, dtype=cp.float32), vector_new, y, len(d_da)))
print("END:", time.time()-start)
print(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment