Instantly share code, notes, and snippets.

Embed
What would you like to do?
Minimal CUDA example (with helpful comments).
#include <stdio.h>
//
// Nearly minimal CUDA example.
// Compile with:
//
// nvcc -o example example.cu
//
#define N 1000
//
// A function marked __global__
// runs on the GPU but can be called from
// the CPU.
//
// This function multiplies the elements of an array
// of ints by 2.
//
// The entire computation can be thought of as running
// with one thread per array element with blockIdx.x
// identifying the thread.
//
// The comparison i<N is because often it isn't convenient
// to have an exact 1-1 correspondence between threads
// and array elements. Not strictly necessary here.
//
// Note how we're mixing GPU and CPU code in the same source
// file. An alternative way to use CUDA is to keep
// C/C++ code separate from CUDA code and dynamically
// compile and load the CUDA code at runtime, a little
// like how you compile and load OpenGL shaders from
// C/C++ code.
//
__global__
void add(int *a, int *b) {
int i = blockIdx.x;
if (i<N) {
b[i] = 2*a[i];
}
}
int main() {
//
// Create int arrays on the CPU.
// ('h' stands for "host".)
//
int ha[N], hb[N];
//
// Create corresponding int arrays on the GPU.
// ('d' stands for "device".)
//
int *da, *db;
cudaMalloc((void **)&da, N*sizeof(int));
cudaMalloc((void **)&db, N*sizeof(int));
//
// Initialise the input data on the CPU.
//
for (int i = 0; i<N; ++i) {
ha[i] = i;
}
//
// Copy input data to array on GPU.
//
cudaMemcpy(da, ha, N*sizeof(int), cudaMemcpyHostToDevice);
//
// Launch GPU code with N threads, one per
// array element.
//
add<<<N, 1>>>(da, db);
//
// Copy output array from GPU back to CPU.
//
cudaMemcpy(hb, db, N*sizeof(int), cudaMemcpyDeviceToHost);
for (int i = 0; i<N; ++i) {
printf("%d\n", hb[i]);
}
//
// Free up the arrays on the GPU.
//
cudaFree(da);
cudaFree(db);
return 0;
}
@kylelk

This comment has been minimized.

kylelk commented Mar 12, 2014

Thank you, this example is easy to understand

@YarnSeemannsgarn

This comment has been minimized.

YarnSeemannsgarn commented Sep 16, 2014

Thanks =)

@dpiponi

This comment has been minimized.

Owner

dpiponi commented May 11, 2017

Well this is convenient. I do a web search for a minimal CUDA example and find my own example. Still compiles and runs too :-)

@john-danson

This comment has been minimized.

john-danson commented Oct 25, 2017

Thanks :>)

@alexshi0000

This comment has been minimized.

alexshi0000 commented Feb 6, 2018

woa nice

@Aswathym24

This comment has been minimized.

Aswathym24 commented Jun 26, 2018

Thanks, its working for N=100000 but after that(#define N 1000000 and more than this) results in 0 's as output. Why so?

@dpiponi

This comment has been minimized.

Owner

dpiponi commented Jul 24, 2018

I'm surprised it works for N=100000.
If you look here https://en.wikipedia.org/wiki/CUDA#Version_features_and_specifications you'll see various limits on the sizes of CUDA structures.
In particular the "Maximum x- or y-dimension of a block" bit.
I chose 1000 so it's less than the 1024 limit.
If you want to work with bigger arrays you'll need to work with multiple blocks and/or higher dimensional blocks.
But then the demo wouldn't be minimal any more :-)

@austinRichards3

This comment has been minimized.

austinRichards3 commented Aug 28, 2018

isn't the syntax for launching a kernel <<<blocks_per_grid, threads_per_block>>>

@ryanbennettvoid

This comment has been minimized.

ryanbennettvoid commented Sep 22, 2018

It produces random scrambled results, as if the output values are pointer addresses. Running on Linux Mint 19 with GTX 1060.

@alexsystemf

This comment has been minimized.

alexsystemf commented Oct 5, 2018

It produces random scrambled results, as if the output values are pointer addresses. Running on Linux Mint 19 with GTX 1060.

Ryan, if you see random output numbers, are just out-of-bound memory contents. check the i and N values and re-compile and re-run it. also try to " #define N 32 " or a small number...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment