public
Last active

PyCUDA/Thrust interop

  • Download Gist
thrust_demo.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
import pycuda
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
 
from codepy.cgen import *
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule
 
#Make a host_module, compiled for CPU
host_mod = BoostPythonModule()
 
#Make a device module, compiled with NVCC
nvcc_mod = CudaModule(host_mod)
 
#Describe device module code
#NVCC includes
nvcc_includes = [
'thrust/sort.h',
'thrust/device_vector.h',
'cuda.h',
]
#Add includes to module
nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])
 
#NVCC function
nvcc_function = FunctionBody(
FunctionDeclaration(Value('void', 'my_sort'),
[Value('CUdeviceptr', 'input_ptr'),
Value('int', 'length')]),
Block([Statement('thrust::device_ptr<float> thrust_ptr((float*)input_ptr)'),
Statement('thrust::sort(thrust_ptr, thrust_ptr+length)')]))
 
#Add declaration to nvcc_mod
#Adds declaration to host_mod as well
nvcc_mod.add_function(nvcc_function)
 
host_includes = [
'boost/python/extract.hpp',
]
#Add host includes to module
host_mod.add_to_preamble([Include(x) for x in host_includes])
 
host_namespaces = [
'using namespace boost::python',
]
 
#Add BPL using statement
host_mod.add_to_preamble([Statement(x) for x in host_namespaces])
 
 
host_statements = [
#Extract information from PyCUDA GPUArray
#Get length
'tuple shape = extract<tuple>(gpu_array.attr("shape"))',
'int length = extract<int>(shape[0])',
#Get data pointer
'CUdeviceptr ptr = extract<CUdeviceptr>(gpu_array.attr("gpudata"))',
#Call Thrust routine, compiled into the CudaModule
'my_sort(ptr, length)',
#Return result
'return gpu_array',
]
 
host_mod.add_function(
FunctionBody(
FunctionDeclaration(Value('object', 'host_entry'),
[Value('object', 'gpu_array')]),
Block([Statement(x) for x in host_statements])))
 
#Print out generated code, to see what we're actually compiling
print("---------------------- Host code ----------------------")
print(host_mod.generate())
print("--------------------- Device code ---------------------")
print(nvcc_mod.generate())
print("-------------------------------------------------------")
 
 
#Compile modules
import codepy.jit, codepy.toolchain
gcc_toolchain = codepy.toolchain.guess_toolchain()
nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
 
module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=True)
 
 
 
length = 100
a = np.array(np.random.rand(length), dtype=np.float32)
print("---------------------- Unsorted -----------------------")
print(a)
b = gpuarray.to_gpu(a)
# Call Thrust!!
c = module.host_entry(b)
print("----------------------- Sorted ------------------------")
print c.get()
print("-------------------------------------------------------")

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.