Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
PyCUDA/Thrust interop
import pycuda
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
from codepy.cgen import *
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule
#Make a host_module, compiled for CPU
host_mod = BoostPythonModule()
#Make a device module, compiled with NVCC
nvcc_mod = CudaModule(host_mod)
#Describe device module code
#NVCC includes
nvcc_includes = [
#Add includes to module
nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])
#NVCC function
nvcc_function = FunctionBody(
FunctionDeclaration(Value('void', 'my_sort'),
[Value('CUdeviceptr', 'input_ptr'),
Value('int', 'length')]),
Block([Statement('thrust::device_ptr<float> thrust_ptr((float*)input_ptr)'),
Statement('thrust::sort(thrust_ptr, thrust_ptr+length)')]))
#Add declaration to nvcc_mod
#Adds declaration to host_mod as well
host_includes = [
#Add host includes to module
host_mod.add_to_preamble([Include(x) for x in host_includes])
host_namespaces = [
'using namespace boost::python',
#Add BPL using statement
host_mod.add_to_preamble([Statement(x) for x in host_namespaces])
host_statements = [
#Extract information from PyCUDA GPUArray
#Get length
'tuple shape = extract<tuple>(gpu_array.attr("shape"))',
'int length = extract<int>(shape[0])',
#Get data pointer
'CUdeviceptr ptr = extract<CUdeviceptr>(gpu_array.attr("gpudata"))',
#Call Thrust routine, compiled into the CudaModule
'my_sort(ptr, length)',
#Return result
'return gpu_array',
FunctionDeclaration(Value('object', 'host_entry'),
[Value('object', 'gpu_array')]),
Block([Statement(x) for x in host_statements])))
#Print out generated code, to see what we're actually compiling
print("---------------------- Host code ----------------------")
print("--------------------- Device code ---------------------")
#Compile modules
import codepy.jit, codepy.toolchain
gcc_toolchain = codepy.toolchain.guess_toolchain()
nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=True)
length = 100
a = np.array(np.random.rand(length), dtype=np.float32)
print("---------------------- Unsorted -----------------------")
b = gpuarray.to_gpu(a)
# Call Thrust!!
c = module.host_entry(b)
print("----------------------- Sorted ------------------------")
print c.get()

This comment has been minimized.

Copy link

@s3p02 s3p02 commented Mar 3, 2017

I get the error:

**---------------------- Host code ----------------------
#include <boost/python.hpp>
#include <cuda.h>
void my_sort(CUdeviceptr input_ptr, int length);
#include <boost/python/extract.hpp>
using namespace boost::python;

namespace private_namespace_db9cd38ee0995488b35c8405321b8f95
object host_entry(object gpu_array)
tuple shape = extract(gpu_array.attr("shape"));
int length = extract(shape[0]);
CUdeviceptr ptr = extract(gpu_array.attr("gpudata"));
my_sort(ptr, length);
return gpu_array;

using namespace private_namespace_db9cd38ee0995488b35c8405321b8f95;

boost::python::def("host_entry", &host_entry);
--------------------- Device code ---------------------
#include <thrust/sort.h>
#include <thrust/device_vector.h>
#include <cuda.h>

void my_sort(CUdeviceptr input_ptr, int length)
thrust::device_ptr thrust_ptr((float*)input_ptr);
thrust::sort(thrust_ptr, thrust_ptr+length);

Traceback (most recent call last):
File "", line 82, in
gcc_toolchain = codepy.toolchain.guess_toolchain()
File "/MY/HOME_DIRECTORY/anaconda3/lib/python3.6/site-packages/codepy/", line 412, in guess_toolchain
kwargs = _guess_toolchain_kwargs_from_python_config()
File "/MY/HOME_DIRECTORY/anaconda3/lib/python3.6/site-packages/codepy/", line 398, in _guess_toolchain_kwargs_from_python_config
KeyError: 'SO'**

How do I proceed?


This comment has been minimized.

Copy link

@looninho looninho commented May 2, 2020

I get this error when trying your example:

CompileError Traceback (most recent call last)
4 nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
----> 6 module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=True)

~/anaconda3/envs/test/lib/python3.6/site-packages/codepy/ in compile(self, host_toolchain, nvcc_toolchain, host_kwargs, nvcc_kwargs, **kwargs)
81 host_checksum, host_mod_name, host_object, host_compiled = compile_from_string(
82 host_toolchain,, host_code,
---> 83 object=True, **local_host_kwargs)
84 device_checksum, device_mod_name, device_object, device_compiled = compile_from_string(
85 nvcc_toolchain, 'gpu', device_code, '',

~/anaconda3/envs/test/lib/python3.6/site-packages/codepy/ in compile_from_string(toolchain, name, source_string, source_name, cache_dir, debug, wait_on_error, debug_recompile, object, source_is_binary, sleep_delay)
430 if object:
--> 431 toolchain.build_object(ext_file, source_paths, debug=debug)
432 else:
433 toolchain.build_extension(ext_file, source_paths, debug=debug)

~/anaconda3/envs/test/lib/python3.6/site-packages/codepy/ in build_object(self, ext_file, source_files, debug)
190 print("FAILED compiler invocation:" +
191 " ".join(cc_cmdline), file=sys.stderr)
--> 192 raise CompileError("module compilation failed")
194 def build_extension(self, ext_file, source_files, debug=False):

CompileError: module compilation failed

Any suggestion is welcome

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment