Skip to content

Instantly share code, notes, and snippets.

@tjyuyao
Forked from szagoruyko/pycudatorch.py
Last active January 9, 2022 03:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tjyuyao/68d83fa3b6b92435637a9c5dcb79a027 to your computer and use it in GitHub Desktop.
Save tjyuyao/68d83fa3b6b92435637a9c5dcb79a027 to your computer and use it in GitHub Desktop.
import torch
import pycuda.autoprimaryctx
from pycuda.compiler import SourceModule
mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
const int i = threadIdx.x;
dest[i] = a[i] * b[i];
}
""")
multiply_them = mod.get_function("multiply_them")
class Holder(pycuda.driver.PointerHolderBase):
def __init__(self, t):
super(Holder, self).__init__()
self.t = t
self.gpudata = t.data_ptr()
def get_pointer(self):
return self.t.data_ptr()
a = torch.randn(400, dtype=torch.float32).cuda()
b = torch.randn(400, dtype=torch.float32).cuda()
dest = torch.empty_like(a)
multiply_them(
Holder(dest),
Holder(a),
Holder(b),
block=(400,1,1), grid=(1,1))
torch.cuda.synchronize()
print(dest-a*b)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment