jamornsriwasansak/cuda-gltexture-interop.cu

## cuda-gltexture-interop.cu
//This software contains source code provided by NVIDIA Corporation.

#include "cuda_runtime.h"
#include "cuda_surface_types.h"
#include "surface_functions.h"

// must include in this order :(
#include "GL\glew.h"
#include "cuda_gl_interop.h"
#include "GLFW\glfw3.h"

#include "cuda\helper_cuda.h"
#include "cuda\helper_math.h"

#include "device_launch_parameters.h"


#include <conio.h>
#include <stdio.h>
#include <iostream>
#include <vector>

void addWithCuda(int *c, const int *a, const int *b, unsigned int size);

__global__ void addKernel(int *c, const int *a, const int *b)
{
    int i = threadIdx.x;
    c[i] = a[i] + b[i];
}

__global__ void paintBlue(cudaSurfaceObject_t surface)
{
	int i = blockIdx.x;
	//surf2Dwrite(, surface, (i / 16) * sizeof(float4), i % 16);
	//printf("%d", i);
	surf2Dwrite(make_float4(0, 0, 1, 1), surface, (i / 16) * sizeof(float4), i % 16);
}

std::pair<GLuint, cudaGraphicsResource *> createGlTexture2D()
{
	GLuint result;
	//int size = 16 * 16 * 16 * 4 * 4;
	//GLubyte * data = new GLubyte[size];
	GLfloat * data = new float[16 * 16 * 4];
	for (int i = 0;i < 16 * 16 * 4;i++)
	{
		if (i % 4 == 0)
		{
			data[i] = 1;
		}
	}
	glPixelStorei(GL_UNPACK_ALIGNMENT, 1);

	//glGenTextures(1, &result);
	glCreateTextures(GL_TEXTURE_2D, 1, &result);
	glTextureImage2DEXT(result, GL_TEXTURE_2D, 0, GL_RGBA32F, 16, 16, 0, GL_RGBA, GL_FLOAT, data);
	glTextureParameteri(result, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	glTextureParameteri(result, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
	glTextureParameteri(result, GL_TEXTURE_WRAP_S, GL_REPEAT);
	glTextureParameteri(result, GL_TEXTURE_WRAP_T, GL_REPEAT);

	cudaGraphicsResource *curt;
	checkCudaErrors(cudaGraphicsGLRegisterImage(&curt, result, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone));

	return std::make_pair(result, curt);
}

static const GLfloat g_vertex_buffer_data[] =
{
	-1.0f, -1.0f, 0.0f,
	1.0f, -1.0f, 0.0f,
	0.0f,  1.0f, 0.0f,
};

GLint textureSamplerLocation;

GLuint LoadShaders(const char * vertex_file_path, const char * fragment_file_path) {

	// Create the shaders
	GLuint VertexShaderID = glCreateShader(GL_VERTEX_SHADER);
	GLuint FragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);

	// Read the Vertex Shader code from the file
	std::string VertexShaderCode;
	std::ifstream VertexShaderStream(vertex_file_path, std::ios::in);
	if (VertexShaderStream.is_open()) {
		std::string Line = "";
		while (getline(VertexShaderStream, Line))
			VertexShaderCode += "\n" + Line;
		VertexShaderStream.close();
	}
	else {
		printf("Impossible to open %s. Are you in the right directory ? Don't forget to read the FAQ !\n", vertex_file_path);
		getchar();
		return 0;
	}

	// Read the Fragment Shader code from the file
	std::string FragmentShaderCode;
	std::ifstream FragmentShaderStream(fragment_file_path, std::ios::in);
	if (FragmentShaderStream.is_open()) {
		std::string Line = "";
		while (getline(FragmentShaderStream, Line))
			FragmentShaderCode += "\n" + Line;
		FragmentShaderStream.close();
	}

	GLint Result = GL_FALSE;
	int InfoLogLength;


	// Compile Vertex Shader
	printf("Compiling shader : %s\n", vertex_file_path);
	char const * VertexSourcePointer = VertexShaderCode.c_str();
	glShaderSource(VertexShaderID, 1, &VertexSourcePointer, NULL);
	glCompileShader(VertexShaderID);

	// Check Vertex Shader
	glGetShaderiv(VertexShaderID, GL_COMPILE_STATUS, &Result);
	glGetShaderiv(VertexShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
	if (InfoLogLength > 0) {
		std::vector<char> VertexShaderErrorMessage(InfoLogLength + 1);
		glGetShaderInfoLog(VertexShaderID, InfoLogLength, NULL, &VertexShaderErrorMessage[0]);
		printf("%s\n", &VertexShaderErrorMessage[0]);
	}


	// Compile Fragment Shader
	printf("Compiling shader : %s\n", fragment_file_path);
	char const * FragmentSourcePointer = FragmentShaderCode.c_str();
	glShaderSource(FragmentShaderID, 1, &FragmentSourcePointer, NULL);
	glCompileShader(FragmentShaderID);

	// Check Fragment Shader
	glGetShaderiv(FragmentShaderID, GL_COMPILE_STATUS, &Result);
	glGetShaderiv(FragmentShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
	if (InfoLogLength > 0) {
		std::vector<char> FragmentShaderErrorMessage(InfoLogLength + 1);
		glGetShaderInfoLog(FragmentShaderID, InfoLogLength, NULL, &FragmentShaderErrorMessage[0]);
		printf("%s\n", &FragmentShaderErrorMessage[0]);
	}


	// Link the program
	printf("Linking program\n");
	GLuint ProgramID = glCreateProgram();
	glAttachShader(ProgramID, VertexShaderID);
	glAttachShader(ProgramID, FragmentShaderID);
	glLinkProgram(ProgramID);

	// Check the program
	glGetProgramiv(ProgramID, GL_LINK_STATUS, &Result);
	glGetProgramiv(ProgramID, GL_INFO_LOG_LENGTH, &InfoLogLength);
	if (InfoLogLength > 0) {
		std::vector<char> ProgramErrorMessage(InfoLogLength + 1);
		glGetProgramInfoLog(ProgramID, InfoLogLength, NULL, &ProgramErrorMessage[0]);
		printf("%s\n", &ProgramErrorMessage[0]);
	}


	glDetachShader(ProgramID, VertexShaderID);
	glDetachShader(ProgramID, FragmentShaderID);

	glDeleteShader(VertexShaderID);
	glDeleteShader(FragmentShaderID);

	textureSamplerLocation = glGetUniformLocation(ProgramID, "myTextureSampler");
	std::cout << textureSamplerLocation << std::endl;

	return ProgramID;
}

void initGlWindow()
{
	if (!glfwInit())
	{
		std::cout << "error" << std::endl;
	}

	glfwWindowHint(GLFW_SAMPLES, 1);
	glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
	glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
	glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);

	GLFWwindow * window;
	window = glfwCreateWindow(1024, 768, "renderpls", NULL, NULL);
	if (window == nullptr)
	{
		std::cout << "error2" << std::endl;
		glfwTerminate();
	}

	glfwMakeContextCurrent(window);
	glewExperimental = true;
	if (glewInit() != GLEW_OK)
	{
		std::cout << "error3" << std::endl;
	}

	// create vao
	GLuint VertexArrayID;
	glGenVertexArrays(1, &VertexArrayID);
	glBindVertexArray(VertexArrayID);

	glClearColor(1.0f, 1.0f, 1.0f, 1.0f);

	// This will identify our vertex buffer
	GLuint vertexbuffer;
	glGenBuffers(1, &vertexbuffer);
	glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
	glBufferData(GL_ARRAY_BUFFER, sizeof(g_vertex_buffer_data), g_vertex_buffer_data, GL_STATIC_DRAW);

	GLuint programID = LoadShaders("vertex.vert", "fragment.frag");

	auto data = createGlTexture2D();

	cudaGraphicsResource * resource = data.second;

	do
	{
		checkCudaErrors(cudaGraphicsMapResources(1, &resource, 0));
		//checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void**)&dptr, NULL, resource));
		cudaArray * writeArray;
		checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&writeArray, resource, 0, 0));

		cudaResourceDesc wdsc;
		wdsc.resType = cudaResourceTypeArray;
		wdsc.res.array.array = writeArray;

		cudaSurfaceObject_t writeSurface;
		checkCudaErrors(cudaCreateSurfaceObject(&writeSurface, &wdsc));
		//fillBlue(writeSurface, dim3(width, height));
		paintBlue<<<16 * 16, 1>>>(writeSurface);

		checkCudaErrors(cudaDestroySurfaceObject(writeSurface));
		checkCudaErrors(cudaGraphicsUnmapResources(1, &resource));
		checkCudaErrors(cudaStreamSynchronize(0));

		glUseProgram(programID);
		glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
		// Draw nothing, see you in tutorial 2 !

		// 1rst attribute buffer : vertices
		glEnableVertexAttribArray(0);
		glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
		glVertexAttribPointer(
			0,                  // attribute 0. No particular reason for 0, but must match the layout in the shader.
			3,                  // size
			GL_FLOAT,           // type
			GL_FALSE,           // normalized?
			0,                  // stride
			(void*)0            // array buffer offset
		);

		glUniform1i(textureSamplerLocation, 0);
		glBindTextureUnit(0, data.first);

		// Draw the triangle !
		glDrawArrays(GL_TRIANGLES, 0, 3); // Starting from vertex 0; 3 vertices total -> 1 triangle
		glDisableVertexAttribArray(0);

		// Swap buffers
		glfwSwapBuffers(window);
		glfwPollEvents();

	} // Check if the ESC key was pressed or the window was closed
	while (glfwGetKey(window, GLFW_KEY_ESCAPE) != GLFW_PRESS && glfwWindowShouldClose(window) == 0);

}

int main()
{
	initGlWindow();
    const int arraySize = 5;
    const int a[arraySize] = { 1, 2, 3, 4, 5 };
    const int b[arraySize] = { 10, 20, 30, 40, 50 };
    int c[arraySize] = { 0 };

    // Add vectors in parallel.
    addWithCuda(c, a, b, arraySize);

    printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n",
        c[0], c[1], c[2], c[3], c[4]);

    // cudaDeviceReset must be called before exiting in order for profiling and
    // tracing tools such as Nsight and Visual Profiler to show complete traces.
    checkCudaErrors(cudaDeviceReset());
	_getch();

    return 0;
}

// Helper function for using CUDA to add vectors in parallel.
void addWithCuda(int *c, const int *a, const int *b, unsigned int size)
{
    int *dev_a = 0;
    int *dev_b = 0;
    int *dev_c = 0;

    // Choose which GPU to run on, change this on a multi-GPU system.
	checkCudaErrors(cudaSetDevice(0));
    checkCudaErrors(cudaMalloc((void**)&dev_c, size * sizeof(int)));
    checkCudaErrors(cudaMalloc((void**)&dev_a, size * sizeof(int)));
    checkCudaErrors(cudaMalloc((void**)&dev_b, size * sizeof(int)));

    // Copy input vectors from host memory to GPU buffers.
    checkCudaErrors(cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice));
    checkCudaErrors(cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice));

    // Launch a kernel on the GPU with one thread for each element.
    addKernel<<<1, size>>>(dev_c, dev_a, dev_b);

    // Check for any errors launching the kernel
    checkCudaErrors(cudaGetLastError());

    // cudaDeviceSynchronize waits for the kernel to finish, and returns
    // any errors encountered during the launch.
    checkCudaErrors(cudaDeviceSynchronize());

    // Copy output vector from GPU buffer to host memory.
    checkCudaErrors(cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost));
}
	//This software contains source code provided by NVIDIA Corporation.

	#include "cuda_runtime.h"
	#include "cuda_surface_types.h"
	#include "surface_functions.h"

	// must include in this order :(
	#include "GL\glew.h"
	#include "cuda_gl_interop.h"
	#include "GLFW\glfw3.h"

	#include "cuda\helper_cuda.h"
	#include "cuda\helper_math.h"

	#include "device_launch_parameters.h"


	#include <conio.h>
	#include <stdio.h>
	#include <iostream>
	#include <vector>

	void addWithCuda(int c, const int a, const int *b, unsigned int size);

	__global__ void addKernel(int c, const int a, const int *b)
	{
	int i = threadIdx.x;
	c[i] = a[i] + b[i];
	}

	__global__ void paintBlue(cudaSurfaceObject_t surface)
	{
	int i = blockIdx.x;
	//surf2Dwrite(, surface, (i / 16) * sizeof(float4), i % 16);
	//printf("%d", i);
	surf2Dwrite(make_float4(0, 0, 1, 1), surface, (i / 16) * sizeof(float4), i % 16);
	}

	std::pair<GLuint, cudaGraphicsResource *> createGlTexture2D()
	{
	GLuint result;
	//int size = 16 * 16 * 16 * 4 * 4;
	//GLubyte * data = new GLubyte[size];
	GLfloat * data = new float[16 * 16 * 4];
	for (int i = 0;i < 16 * 16 * 4;i++)
	{
	if (i % 4 == 0)
	{
	data[i] = 1;
	}
	}
	glPixelStorei(GL_UNPACK_ALIGNMENT, 1);

	//glGenTextures(1, &result);
	glCreateTextures(GL_TEXTURE_2D, 1, &result);
	glTextureImage2DEXT(result, GL_TEXTURE_2D, 0, GL_RGBA32F, 16, 16, 0, GL_RGBA, GL_FLOAT, data);
	glTextureParameteri(result, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	glTextureParameteri(result, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
	glTextureParameteri(result, GL_TEXTURE_WRAP_S, GL_REPEAT);
	glTextureParameteri(result, GL_TEXTURE_WRAP_T, GL_REPEAT);

	cudaGraphicsResource *curt;
	checkCudaErrors(cudaGraphicsGLRegisterImage(&curt, result, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone));

	return std::make_pair(result, curt);
	}

	static const GLfloat g_vertex_buffer_data[] =
	{
	-1.0f, -1.0f, 0.0f,
	1.0f, -1.0f, 0.0f,
	0.0f, 1.0f, 0.0f,
	};

	GLint textureSamplerLocation;

	GLuint LoadShaders(const char * vertex_file_path, const char * fragment_file_path) {

	// Create the shaders
	GLuint VertexShaderID = glCreateShader(GL_VERTEX_SHADER);
	GLuint FragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);

	// Read the Vertex Shader code from the file
	std::string VertexShaderCode;
	std::ifstream VertexShaderStream(vertex_file_path, std::ios::in);
	if (VertexShaderStream.is_open()) {
	std::string Line = "";
	while (getline(VertexShaderStream, Line))
	VertexShaderCode += "\n" + Line;
	VertexShaderStream.close();
	}
	else {
	printf("Impossible to open %s. Are you in the right directory ? Don't forget to read the FAQ !\n", vertex_file_path);
	getchar();
	return 0;
	}

	// Read the Fragment Shader code from the file
	std::string FragmentShaderCode;
	std::ifstream FragmentShaderStream(fragment_file_path, std::ios::in);
	if (FragmentShaderStream.is_open()) {
	std::string Line = "";
	while (getline(FragmentShaderStream, Line))
	FragmentShaderCode += "\n" + Line;
	FragmentShaderStream.close();
	}

	GLint Result = GL_FALSE;
	int InfoLogLength;


	// Compile Vertex Shader
	printf("Compiling shader : %s\n", vertex_file_path);
	char const * VertexSourcePointer = VertexShaderCode.c_str();
	glShaderSource(VertexShaderID, 1, &VertexSourcePointer, NULL);
	glCompileShader(VertexShaderID);

	// Check Vertex Shader
	glGetShaderiv(VertexShaderID, GL_COMPILE_STATUS, &Result);
	glGetShaderiv(VertexShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
	if (InfoLogLength > 0) {
	std::vector<char> VertexShaderErrorMessage(InfoLogLength + 1);
	glGetShaderInfoLog(VertexShaderID, InfoLogLength, NULL, &VertexShaderErrorMessage[0]);
	printf("%s\n", &VertexShaderErrorMessage[0]);
	}



	// Compile Fragment Shader
	printf("Compiling shader : %s\n", fragment_file_path);
	char const * FragmentSourcePointer = FragmentShaderCode.c_str();
	glShaderSource(FragmentShaderID, 1, &FragmentSourcePointer, NULL);
	glCompileShader(FragmentShaderID);

	// Check Fragment Shader
	glGetShaderiv(FragmentShaderID, GL_COMPILE_STATUS, &Result);
	glGetShaderiv(FragmentShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
	if (InfoLogLength > 0) {
	std::vector<char> FragmentShaderErrorMessage(InfoLogLength + 1);
	glGetShaderInfoLog(FragmentShaderID, InfoLogLength, NULL, &FragmentShaderErrorMessage[0]);
	printf("%s\n", &FragmentShaderErrorMessage[0]);
	}



	// Link the program
	printf("Linking program\n");
	GLuint ProgramID = glCreateProgram();
	glAttachShader(ProgramID, VertexShaderID);
	glAttachShader(ProgramID, FragmentShaderID);
	glLinkProgram(ProgramID);

	// Check the program
	glGetProgramiv(ProgramID, GL_LINK_STATUS, &Result);
	glGetProgramiv(ProgramID, GL_INFO_LOG_LENGTH, &InfoLogLength);
	if (InfoLogLength > 0) {
	std::vector<char> ProgramErrorMessage(InfoLogLength + 1);
	glGetProgramInfoLog(ProgramID, InfoLogLength, NULL, &ProgramErrorMessage[0]);
	printf("%s\n", &ProgramErrorMessage[0]);
	}


	glDetachShader(ProgramID, VertexShaderID);
	glDetachShader(ProgramID, FragmentShaderID);

	glDeleteShader(VertexShaderID);
	glDeleteShader(FragmentShaderID);

	textureSamplerLocation = glGetUniformLocation(ProgramID, "myTextureSampler");
	std::cout << textureSamplerLocation << std::endl;

	return ProgramID;
	}

	void initGlWindow()
	{
	if (!glfwInit())
	{
	std::cout << "error" << std::endl;
	}

	glfwWindowHint(GLFW_SAMPLES, 1);
	glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
	glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
	glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);

	GLFWwindow * window;
	window = glfwCreateWindow(1024, 768, "renderpls", NULL, NULL);
	if (window == nullptr)
	{
	std::cout << "error2" << std::endl;
	glfwTerminate();
	}

	glfwMakeContextCurrent(window);
	glewExperimental = true;
	if (glewInit() != GLEW_OK)
	{
	std::cout << "error3" << std::endl;
	}

	// create vao
	GLuint VertexArrayID;
	glGenVertexArrays(1, &VertexArrayID);
	glBindVertexArray(VertexArrayID);

	glClearColor(1.0f, 1.0f, 1.0f, 1.0f);

	// This will identify our vertex buffer
	GLuint vertexbuffer;
	glGenBuffers(1, &vertexbuffer);
	glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
	glBufferData(GL_ARRAY_BUFFER, sizeof(g_vertex_buffer_data), g_vertex_buffer_data, GL_STATIC_DRAW);

	GLuint programID = LoadShaders("vertex.vert", "fragment.frag");

	auto data = createGlTexture2D();

	cudaGraphicsResource * resource = data.second;

	do
	{
	checkCudaErrors(cudaGraphicsMapResources(1, &resource, 0));
	//checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void**)&dptr, NULL, resource));
	cudaArray * writeArray;
	checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&writeArray, resource, 0, 0));

	cudaResourceDesc wdsc;
	wdsc.resType = cudaResourceTypeArray;
	wdsc.res.array.array = writeArray;

	cudaSurfaceObject_t writeSurface;
	checkCudaErrors(cudaCreateSurfaceObject(&writeSurface, &wdsc));
	//fillBlue(writeSurface, dim3(width, height));
	paintBlue<<<16 * 16, 1>>>(writeSurface);

	checkCudaErrors(cudaDestroySurfaceObject(writeSurface));
	checkCudaErrors(cudaGraphicsUnmapResources(1, &resource));
	checkCudaErrors(cudaStreamSynchronize(0));

	glUseProgram(programID);
	glClear(GL_COLOR_BUFFER_BIT \| GL_DEPTH_BUFFER_BIT);
	// Draw nothing, see you in tutorial 2 !

	// 1rst attribute buffer : vertices
	glEnableVertexAttribArray(0);
	glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer);
	glVertexAttribPointer(
	0, // attribute 0. No particular reason for 0, but must match the layout in the shader.
	3, // size
	GL_FLOAT, // type
	GL_FALSE, // normalized?
	0, // stride
	(void*)0 // array buffer offset
	);

	glUniform1i(textureSamplerLocation, 0);
	glBindTextureUnit(0, data.first);

	// Draw the triangle !
	glDrawArrays(GL_TRIANGLES, 0, 3); // Starting from vertex 0; 3 vertices total -> 1 triangle
	glDisableVertexAttribArray(0);

	// Swap buffers
	glfwSwapBuffers(window);
	glfwPollEvents();

	} // Check if the ESC key was pressed or the window was closed
	while (glfwGetKey(window, GLFW_KEY_ESCAPE) != GLFW_PRESS && glfwWindowShouldClose(window) == 0);

	}

	int main()
	{
	initGlWindow();
	const int arraySize = 5;
	const int a[arraySize] = { 1, 2, 3, 4, 5 };
	const int b[arraySize] = { 10, 20, 30, 40, 50 };
	int c[arraySize] = { 0 };

	// Add vectors in parallel.
	addWithCuda(c, a, b, arraySize);

	printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n",
	c[0], c[1], c[2], c[3], c[4]);

	// cudaDeviceReset must be called before exiting in order for profiling and
	// tracing tools such as Nsight and Visual Profiler to show complete traces.
	checkCudaErrors(cudaDeviceReset());
	_getch();

	return 0;
	}

	// Helper function for using CUDA to add vectors in parallel.
	void addWithCuda(int c, const int a, const int *b, unsigned int size)
	{
	int *dev_a = 0;
	int *dev_b = 0;
	int *dev_c = 0;

	// Choose which GPU to run on, change this on a multi-GPU system.
	checkCudaErrors(cudaSetDevice(0));
	checkCudaErrors(cudaMalloc((void*)&dev_c, size sizeof(int)));
	checkCudaErrors(cudaMalloc((void*)&dev_a, size sizeof(int)));
	checkCudaErrors(cudaMalloc((void*)&dev_b, size sizeof(int)));

	// Copy input vectors from host memory to GPU buffers.
	checkCudaErrors(cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice));
	checkCudaErrors(cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice));

	// Launch a kernel on the GPU with one thread for each element.
	addKernel<<<1, size>>>(dev_c, dev_a, dev_b);

	// Check for any errors launching the kernel
	checkCudaErrors(cudaGetLastError());

	// cudaDeviceSynchronize waits for the kernel to finish, and returns
	// any errors encountered during the launch.
	checkCudaErrors(cudaDeviceSynchronize());

	// Copy output vector from GPU buffer to host memory.
	checkCudaErrors(cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost));
	}