Created
September 19, 2017 20:27
-
-
Save jamornsriwasansak/dcb4ace42493d191ba9125566d97f3a3 to your computer and use it in GitHub Desktop.
CUDA and OpenGL texture interop. Quite painful to find a working solution.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//This software contains source code provided by NVIDIA Corporation. | |
#include "cuda_runtime.h" | |
#include "cuda_surface_types.h" | |
#include "surface_functions.h" | |
// must include in this order :( | |
#include "GL\glew.h" | |
#include "cuda_gl_interop.h" | |
#include "GLFW\glfw3.h" | |
#include "cuda\helper_cuda.h" | |
#include "cuda\helper_math.h" | |
#include "device_launch_parameters.h" | |
#include <conio.h> | |
#include <stdio.h> | |
#include <iostream> | |
#include <vector> | |
void addWithCuda(int *c, const int *a, const int *b, unsigned int size); | |
__global__ void addKernel(int *c, const int *a, const int *b) | |
{ | |
int i = threadIdx.x; | |
c[i] = a[i] + b[i]; | |
} | |
__global__ void paintBlue(cudaSurfaceObject_t surface) | |
{ | |
int i = blockIdx.x; | |
//surf2Dwrite(, surface, (i / 16) * sizeof(float4), i % 16); | |
//printf("%d", i); | |
surf2Dwrite(make_float4(0, 0, 1, 1), surface, (i / 16) * sizeof(float4), i % 16); | |
} | |
std::pair<GLuint, cudaGraphicsResource *> createGlTexture2D() | |
{ | |
GLuint result; | |
//int size = 16 * 16 * 16 * 4 * 4; | |
//GLubyte * data = new GLubyte[size]; | |
GLfloat * data = new float[16 * 16 * 4]; | |
for (int i = 0;i < 16 * 16 * 4;i++) | |
{ | |
if (i % 4 == 0) | |
{ | |
data[i] = 1; | |
} | |
} | |
glPixelStorei(GL_UNPACK_ALIGNMENT, 1); | |
//glGenTextures(1, &result); | |
glCreateTextures(GL_TEXTURE_2D, 1, &result); | |
glTextureImage2DEXT(result, GL_TEXTURE_2D, 0, GL_RGBA32F, 16, 16, 0, GL_RGBA, GL_FLOAT, data); | |
glTextureParameteri(result, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | |
glTextureParameteri(result, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | |
glTextureParameteri(result, GL_TEXTURE_WRAP_S, GL_REPEAT); | |
glTextureParameteri(result, GL_TEXTURE_WRAP_T, GL_REPEAT); | |
cudaGraphicsResource *curt; | |
checkCudaErrors(cudaGraphicsGLRegisterImage(&curt, result, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone)); | |
return std::make_pair(result, curt); | |
} | |
static const GLfloat g_vertex_buffer_data[] = | |
{ | |
-1.0f, -1.0f, 0.0f, | |
1.0f, -1.0f, 0.0f, | |
0.0f, 1.0f, 0.0f, | |
}; | |
GLint textureSamplerLocation; | |
GLuint LoadShaders(const char * vertex_file_path, const char * fragment_file_path) { | |
// Create the shaders | |
GLuint VertexShaderID = glCreateShader(GL_VERTEX_SHADER); | |
GLuint FragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER); | |
// Read the Vertex Shader code from the file | |
std::string VertexShaderCode; | |
std::ifstream VertexShaderStream(vertex_file_path, std::ios::in); | |
if (VertexShaderStream.is_open()) { | |
std::string Line = ""; | |
while (getline(VertexShaderStream, Line)) | |
VertexShaderCode += "\n" + Line; | |
VertexShaderStream.close(); | |
} | |
else { | |
printf("Impossible to open %s. Are you in the right directory ? Don't forget to read the FAQ !\n", vertex_file_path); | |
getchar(); | |
return 0; | |
} | |
// Read the Fragment Shader code from the file | |
std::string FragmentShaderCode; | |
std::ifstream FragmentShaderStream(fragment_file_path, std::ios::in); | |
if (FragmentShaderStream.is_open()) { | |
std::string Line = ""; | |
while (getline(FragmentShaderStream, Line)) | |
FragmentShaderCode += "\n" + Line; | |
FragmentShaderStream.close(); | |
} | |
GLint Result = GL_FALSE; | |
int InfoLogLength; | |
// Compile Vertex Shader | |
printf("Compiling shader : %s\n", vertex_file_path); | |
char const * VertexSourcePointer = VertexShaderCode.c_str(); | |
glShaderSource(VertexShaderID, 1, &VertexSourcePointer, NULL); | |
glCompileShader(VertexShaderID); | |
// Check Vertex Shader | |
glGetShaderiv(VertexShaderID, GL_COMPILE_STATUS, &Result); | |
glGetShaderiv(VertexShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength); | |
if (InfoLogLength > 0) { | |
std::vector<char> VertexShaderErrorMessage(InfoLogLength + 1); | |
glGetShaderInfoLog(VertexShaderID, InfoLogLength, NULL, &VertexShaderErrorMessage[0]); | |
printf("%s\n", &VertexShaderErrorMessage[0]); | |
} | |
// Compile Fragment Shader | |
printf("Compiling shader : %s\n", fragment_file_path); | |
char const * FragmentSourcePointer = FragmentShaderCode.c_str(); | |
glShaderSource(FragmentShaderID, 1, &FragmentSourcePointer, NULL); | |
glCompileShader(FragmentShaderID); | |
// Check Fragment Shader | |
glGetShaderiv(FragmentShaderID, GL_COMPILE_STATUS, &Result); | |
glGetShaderiv(FragmentShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength); | |
if (InfoLogLength > 0) { | |
std::vector<char> FragmentShaderErrorMessage(InfoLogLength + 1); | |
glGetShaderInfoLog(FragmentShaderID, InfoLogLength, NULL, &FragmentShaderErrorMessage[0]); | |
printf("%s\n", &FragmentShaderErrorMessage[0]); | |
} | |
// Link the program | |
printf("Linking program\n"); | |
GLuint ProgramID = glCreateProgram(); | |
glAttachShader(ProgramID, VertexShaderID); | |
glAttachShader(ProgramID, FragmentShaderID); | |
glLinkProgram(ProgramID); | |
// Check the program | |
glGetProgramiv(ProgramID, GL_LINK_STATUS, &Result); | |
glGetProgramiv(ProgramID, GL_INFO_LOG_LENGTH, &InfoLogLength); | |
if (InfoLogLength > 0) { | |
std::vector<char> ProgramErrorMessage(InfoLogLength + 1); | |
glGetProgramInfoLog(ProgramID, InfoLogLength, NULL, &ProgramErrorMessage[0]); | |
printf("%s\n", &ProgramErrorMessage[0]); | |
} | |
glDetachShader(ProgramID, VertexShaderID); | |
glDetachShader(ProgramID, FragmentShaderID); | |
glDeleteShader(VertexShaderID); | |
glDeleteShader(FragmentShaderID); | |
textureSamplerLocation = glGetUniformLocation(ProgramID, "myTextureSampler"); | |
std::cout << textureSamplerLocation << std::endl; | |
return ProgramID; | |
} | |
void initGlWindow() | |
{ | |
if (!glfwInit()) | |
{ | |
std::cout << "error" << std::endl; | |
} | |
glfwWindowHint(GLFW_SAMPLES, 1); | |
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); | |
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3); | |
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); | |
GLFWwindow * window; | |
window = glfwCreateWindow(1024, 768, "renderpls", NULL, NULL); | |
if (window == nullptr) | |
{ | |
std::cout << "error2" << std::endl; | |
glfwTerminate(); | |
} | |
glfwMakeContextCurrent(window); | |
glewExperimental = true; | |
if (glewInit() != GLEW_OK) | |
{ | |
std::cout << "error3" << std::endl; | |
} | |
// create vao | |
GLuint VertexArrayID; | |
glGenVertexArrays(1, &VertexArrayID); | |
glBindVertexArray(VertexArrayID); | |
glClearColor(1.0f, 1.0f, 1.0f, 1.0f); | |
// This will identify our vertex buffer | |
GLuint vertexbuffer; | |
glGenBuffers(1, &vertexbuffer); | |
glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer); | |
glBufferData(GL_ARRAY_BUFFER, sizeof(g_vertex_buffer_data), g_vertex_buffer_data, GL_STATIC_DRAW); | |
GLuint programID = LoadShaders("vertex.vert", "fragment.frag"); | |
auto data = createGlTexture2D(); | |
cudaGraphicsResource * resource = data.second; | |
do | |
{ | |
checkCudaErrors(cudaGraphicsMapResources(1, &resource, 0)); | |
//checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void**)&dptr, NULL, resource)); | |
cudaArray * writeArray; | |
checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&writeArray, resource, 0, 0)); | |
cudaResourceDesc wdsc; | |
wdsc.resType = cudaResourceTypeArray; | |
wdsc.res.array.array = writeArray; | |
cudaSurfaceObject_t writeSurface; | |
checkCudaErrors(cudaCreateSurfaceObject(&writeSurface, &wdsc)); | |
//fillBlue(writeSurface, dim3(width, height)); | |
paintBlue<<<16 * 16, 1>>>(writeSurface); | |
checkCudaErrors(cudaDestroySurfaceObject(writeSurface)); | |
checkCudaErrors(cudaGraphicsUnmapResources(1, &resource)); | |
checkCudaErrors(cudaStreamSynchronize(0)); | |
glUseProgram(programID); | |
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); | |
// Draw nothing, see you in tutorial 2 ! | |
// 1rst attribute buffer : vertices | |
glEnableVertexAttribArray(0); | |
glBindBuffer(GL_ARRAY_BUFFER, vertexbuffer); | |
glVertexAttribPointer( | |
0, // attribute 0. No particular reason for 0, but must match the layout in the shader. | |
3, // size | |
GL_FLOAT, // type | |
GL_FALSE, // normalized? | |
0, // stride | |
(void*)0 // array buffer offset | |
); | |
glUniform1i(textureSamplerLocation, 0); | |
glBindTextureUnit(0, data.first); | |
// Draw the triangle ! | |
glDrawArrays(GL_TRIANGLES, 0, 3); // Starting from vertex 0; 3 vertices total -> 1 triangle | |
glDisableVertexAttribArray(0); | |
// Swap buffers | |
glfwSwapBuffers(window); | |
glfwPollEvents(); | |
} // Check if the ESC key was pressed or the window was closed | |
while (glfwGetKey(window, GLFW_KEY_ESCAPE) != GLFW_PRESS && glfwWindowShouldClose(window) == 0); | |
} | |
int main() | |
{ | |
initGlWindow(); | |
const int arraySize = 5; | |
const int a[arraySize] = { 1, 2, 3, 4, 5 }; | |
const int b[arraySize] = { 10, 20, 30, 40, 50 }; | |
int c[arraySize] = { 0 }; | |
// Add vectors in parallel. | |
addWithCuda(c, a, b, arraySize); | |
printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n", | |
c[0], c[1], c[2], c[3], c[4]); | |
// cudaDeviceReset must be called before exiting in order for profiling and | |
// tracing tools such as Nsight and Visual Profiler to show complete traces. | |
checkCudaErrors(cudaDeviceReset()); | |
_getch(); | |
return 0; | |
} | |
// Helper function for using CUDA to add vectors in parallel. | |
void addWithCuda(int *c, const int *a, const int *b, unsigned int size) | |
{ | |
int *dev_a = 0; | |
int *dev_b = 0; | |
int *dev_c = 0; | |
// Choose which GPU to run on, change this on a multi-GPU system. | |
checkCudaErrors(cudaSetDevice(0)); | |
checkCudaErrors(cudaMalloc((void**)&dev_c, size * sizeof(int))); | |
checkCudaErrors(cudaMalloc((void**)&dev_a, size * sizeof(int))); | |
checkCudaErrors(cudaMalloc((void**)&dev_b, size * sizeof(int))); | |
// Copy input vectors from host memory to GPU buffers. | |
checkCudaErrors(cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(dev_b, b, size * sizeof(int), cudaMemcpyHostToDevice)); | |
// Launch a kernel on the GPU with one thread for each element. | |
addKernel<<<1, size>>>(dev_c, dev_a, dev_b); | |
// Check for any errors launching the kernel | |
checkCudaErrors(cudaGetLastError()); | |
// cudaDeviceSynchronize waits for the kernel to finish, and returns | |
// any errors encountered during the launch. | |
checkCudaErrors(cudaDeviceSynchronize()); | |
// Copy output vector from GPU buffer to host memory. | |
checkCudaErrors(cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment