Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
PyCUDA/Thrust interop
import pycuda
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
from codepy.cgen import *
from codepy.bpl import BoostPythonModule
from codepy.cuda import CudaModule
#Make a host_module, compiled for CPU
host_mod = BoostPythonModule()
#Make a device module, compiled with NVCC
nvcc_mod = CudaModule(host_mod)
#Describe device module code
#NVCC includes
nvcc_includes = [
#Add includes to module
nvcc_mod.add_to_preamble([Include(x) for x in nvcc_includes])
#NVCC function
nvcc_function = FunctionBody(
FunctionDeclaration(Value('void', 'my_sort'),
[Value('CUdeviceptr', 'input_ptr'),
Value('int', 'length')]),
Block([Statement('thrust::device_ptr<float> thrust_ptr((float*)input_ptr)'),
Statement('thrust::sort(thrust_ptr, thrust_ptr+length)')]))
#Add declaration to nvcc_mod
#Adds declaration to host_mod as well
host_includes = [
#Add host includes to module
host_mod.add_to_preamble([Include(x) for x in host_includes])
host_namespaces = [
'using namespace boost::python',
#Add BPL using statement
host_mod.add_to_preamble([Statement(x) for x in host_namespaces])
host_statements = [
#Extract information from PyCUDA GPUArray
#Get length
'tuple shape = extract<tuple>(gpu_array.attr("shape"))',
'int length = extract<int>(shape[0])',
#Get data pointer
'CUdeviceptr ptr = extract<CUdeviceptr>(gpu_array.attr("gpudata"))',
#Call Thrust routine, compiled into the CudaModule
'my_sort(ptr, length)',
#Return result
'return gpu_array',
FunctionDeclaration(Value('object', 'host_entry'),
[Value('object', 'gpu_array')]),
Block([Statement(x) for x in host_statements])))
#Print out generated code, to see what we're actually compiling
print("---------------------- Host code ----------------------")
print("--------------------- Device code ---------------------")
#Compile modules
import codepy.jit, codepy.toolchain
gcc_toolchain = codepy.toolchain.guess_toolchain()
nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=True)
length = 100
a = np.array(np.random.rand(length), dtype=np.float32)
print("---------------------- Unsorted -----------------------")
b = gpuarray.to_gpu(a)
# Call Thrust!!
c = module.host_entry(b)
print("----------------------- Sorted ------------------------")
print c.get()

This comment has been minimized.

Copy link

@s3p02 s3p02 commented Mar 3, 2017

I get the error:

**---------------------- Host code ----------------------
#include <boost/python.hpp>
#include <cuda.h>
void my_sort(CUdeviceptr input_ptr, int length);
#include <boost/python/extract.hpp>
using namespace boost::python;

namespace private_namespace_db9cd38ee0995488b35c8405321b8f95
object host_entry(object gpu_array)
tuple shape = extract(gpu_array.attr("shape"));
int length = extract(shape[0]);
CUdeviceptr ptr = extract(gpu_array.attr("gpudata"));
my_sort(ptr, length);
return gpu_array;

using namespace private_namespace_db9cd38ee0995488b35c8405321b8f95;

boost::python::def("host_entry", &host_entry);
--------------------- Device code ---------------------
#include <thrust/sort.h>
#include <thrust/device_vector.h>
#include <cuda.h>

void my_sort(CUdeviceptr input_ptr, int length)
thrust::device_ptr thrust_ptr((float*)input_ptr);
thrust::sort(thrust_ptr, thrust_ptr+length);

Traceback (most recent call last):
File "", line 82, in
gcc_toolchain = codepy.toolchain.guess_toolchain()
File "/MY/HOME_DIRECTORY/anaconda3/lib/python3.6/site-packages/codepy/", line 412, in guess_toolchain
kwargs = _guess_toolchain_kwargs_from_python_config()
File "/MY/HOME_DIRECTORY/anaconda3/lib/python3.6/site-packages/codepy/", line 398, in _guess_toolchain_kwargs_from_python_config
KeyError: 'SO'**

How do I proceed?


This comment has been minimized.

Copy link

@looninho looninho commented May 2, 2020

I get this error when trying your example:

CompileError Traceback (most recent call last)
4 nvcc_toolchain = codepy.toolchain.guess_nvcc_toolchain()
----> 6 module = nvcc_mod.compile(gcc_toolchain, nvcc_toolchain, debug=True)

~/anaconda3/envs/test/lib/python3.6/site-packages/codepy/ in compile(self, host_toolchain, nvcc_toolchain, host_kwargs, nvcc_kwargs, **kwargs)
81 host_checksum, host_mod_name, host_object, host_compiled = compile_from_string(
82 host_toolchain,, host_code,
---> 83 object=True, **local_host_kwargs)
84 device_checksum, device_mod_name, device_object, device_compiled = compile_from_string(
85 nvcc_toolchain, 'gpu', device_code, '',

~/anaconda3/envs/test/lib/python3.6/site-packages/codepy/ in compile_from_string(toolchain, name, source_string, source_name, cache_dir, debug, wait_on_error, debug_recompile, object, source_is_binary, sleep_delay)
430 if object:
--> 431 toolchain.build_object(ext_file, source_paths, debug=debug)
432 else:
433 toolchain.build_extension(ext_file, source_paths, debug=debug)

~/anaconda3/envs/test/lib/python3.6/site-packages/codepy/ in build_object(self, ext_file, source_files, debug)
190 print("FAILED compiler invocation:" +
191 " ".join(cc_cmdline), file=sys.stderr)
--> 192 raise CompileError("module compilation failed")
194 def build_extension(self, ext_file, source_files, debug=False):

CompileError: module compilation failed

Any suggestion is welcome

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.