Skip to content

Instantly share code, notes, and snippets.

@jaredhoberock
Created April 30, 2012 18:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jaredhoberock/2561193 to your computer and use it in GitHub Desktop.
Save jaredhoberock/2561193 to your computer and use it in GitHub Desktop.
Thrust article listings
// Figure 16.1
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/sort.h>
#include <thrust/copy.h>
#include <cstdlib>
int main()
{
// generate 16M random numbers on the host
thrust::host_vector<int> h_vec(1 << 24);
thrust::generate(h_vec.begin(), h_vec.end(), rand);
// transfer data to the device
thrust::device_vector<int> d_vec = h_vec;
// sort data on the device
thrust::sort(d_vec.begin(), d_vec.end());
// transfer data back to host
thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
return 0;
}
// Figure 16.3a
size_t N = 1024;
// allocate Thrust container
device_vector<int> d_vec(N);
// extract raw pointer from container
int *raw_ptr = raw_pointer_cast(&d_vec[0]);
// use raw_ptr in non Thrust functions
cudaMemset(raw_ptr, 0, N * sizeof(int));
// pass raw_ptr to a kernel
my_kernel<<<N / 128, 128>>>(N, raw_ptr);
// memory is automatically freed
// Figure 16.3b
size_t N = 1024;
// raw pointer to device memory
int *raw_ptr;
cudaMalloc(&raw_ptr, N * sizeof(int));
// wrap raw pointer with a device_ptr
device_ptr<int> dev_ptr= device_pointer_cast(raw_ptr);
// use device_ptr in Thrust algorithms
sort(dev_ptr, dev_ptr + N);
// access device memory through device ptr
dev_ptr[0] = 1;
// free memory
cudaFree(raw_ptr);
// Figure 16.4a
__global__
void saxpy_kernel(int n, float a, float *x, float *y)
{
const int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n) y[i] = a * x[i] + y[i];
}
void saxpy(int n, float a, float *x, float *y)
{
// set launch configuration parameters
int block_size = 256;
int grid_size = (n + block_size - 1) / block_size;
// launch saxpy kernel
saxpy_kernel<<<grid_size,block_size>>>(n, a, x, y);
}
// Figure 16.4b
struct saxpy_functor
{
const float a;
saxpy_functor(float _a) : a(_a) {}
__host__ __device__
float operator()(float x, float y)
{
return a * x + y;
}
};
void saxpy(float a, device_vector<float> &x, device_vector<float> &y)
{
// setup functor
saxpy_functor func(a);
// call transform
transform(x.begin(), x.end(), y.begin(), y.begin(), func);
}
// Figure 16.6
struct square
{
__host__ __device__
float operator()(float x) const
{
return x * x;
}
};
float snrm2_slow(const thrust::device_vector<float> &x)
{
// without fusion
device_vector<float> temp(x.size());
transform(x.begin(), x.end(), temp.begin(), square());
return sqrt(reduce(temp.begin(), temp.end()));
}
// Figure 16.7a
struct float3
{
float x;
float y;
float z;
};
float3 *aos;
...
aos[0].x = 1.0f;
// Figure 16.7b
struct float3_soa
{
float *x;
float *y;
float *z;
};
float3_soa soa;
...
soa.x[0] = 1.0f;
// Figure 16.8
struct rotate_tuple
{
__host__ __device__
tuple<float,float,float> operator()(tuple<float,float,float> &t)
{
float x = get<0>(t);
float y = get<1>(t);
float z = get<2>(t);
float rx = 0.36f*x + 0.48f*y + 0.80f*z;
float ry = 0.80f*x + 0.60f*y + 0.00f*z;
float rz = 0.48f*x + 0.64f*y + 0.60f*z;
return make_tuple(rx, ry, rz);
}
};
...
device_vector<float> x(N), y(N), z(N);
transform(make_zip_iterator(make_tuple(x.begin(), y.begin(), z.begin())),
make_zip_iterator(make_tuple(x.end(), y.end(), z.end())),
make_zip_iterator(make_tuple(x.begin(), y.begin(), z.begin())),
rotate_tuple());
// Figure 16.9
struct smaller_tuple
{
__host__ __device__
tuple<float,int> operator()(tuple<float,int> a, tuple<float,int> b)
{
// return the tuple with the smaller float value
if(get<0>(a) < get<0>(b))
{
return a;
}
else
{
return b;
}
}
};
int min_index(device_vector<float> &values)
{
// [begin,end) form the implicit sequence [0,1,2, ... values.size())
counting_iterator<int> begin(0);
counting_iterator<int> end(values.size());
// initial value of the reduction
tuple<float,int> init(values[0], 0);
// compute the smallest tuple
tuple<float,int> smallest = reduce(make_zip_iterator(make_tuple(values.begin(), begin)),
make_zip_iterator(make_tuple(values.end(), end)),
init,
smaller_tuple());
// return the index
return get<1>(smallest);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment