Skip to content

Instantly share code, notes, and snippets.

@kazimuth
Created April 4, 2017 14:41
Show Gist options
  • Save kazimuth/f9952810d8117f782be71279c0f16f6e to your computer and use it in GitHub Desktop.
Save kazimuth/f9952810d8117f782be71279c0f16f6e to your computer and use it in GitHub Desktop.
dumb cuda + Boost::Python example
nvcc -arch sm_61 -O3 -L/usr/local/lib -lboost_python -I/usr/include/python2.7/ -Xcompiler -fPIC --shared test_python.cu -o test_python.so
$ python
Python 2.7.12 (default, Nov 19 2016, 06:48:10)
[GCC 5.4.0 20160609] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import test_python
>>> test_python.test_saxpy()
Max error: 0.000000
0
>>>
#include <stdio.h>
#include <boost/python.hpp>
__global__
void saxpy(int n, float a, float *x, float *y)
{
int i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < n) y[i] = a*x[i] + y[i];
}
int test_saxpy(void)
{
int N = 1<<20;
float *x, *y, *d_x, *d_y;
x = (float*)malloc(N*sizeof(float));
y = (float*)malloc(N*sizeof(float));
cudaMalloc(&d_x, N*sizeof(float));
cudaMalloc(&d_y, N*sizeof(float));
for (int i = 0; i < N; i++) {
x[i] = 1.0f;
y[i] = 2.0f;
}
cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);
// Perform SAXPY on 1M elements
saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y);
cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);
float maxError = 0.0f;
for (int i = 0; i < N; i++)
maxError = max(maxError, abs(y[i]-4.0f));
printf("Max error: %f\n", maxError);
cudaFree(d_x);
cudaFree(d_y);
free(x);
free(y);
return 0;
}
BOOST_PYTHON_MODULE(test_python)
{
using namespace boost::python;
def("test_saxpy", test_saxpy);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment