Skip to content

Instantly share code, notes, and snippets.

@liquidmetal
Last active February 21, 2016 15:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save liquidmetal/95f5cdc62c83121245d9 to your computer and use it in GitHub Desktop.
Save liquidmetal/95f5cdc62c83121245d9 to your computer and use it in GitHub Desktop.
#include <opencv2/opencv.hpp>
#include <opencv2/cudaarithm.hpp>
void initialize_cuda() {
int runtime_version = 0;
cudaError_t error = cudaRuntimeGetVersion(&runtime_version);
printf("Cuda runtime version = %d\n", runtime_version);
int driver_version = 0;
cudaDriverGetVersion(&driver_version);
printf("Cuda driver version = %d\n", driver_version);
int device_count = 0;
cudaGetDeviceCount(&device_count);
printf("Device count = %d\n", device_count);
// Pick the first device
error = cudaSetDevice(0);
if(error) {
printf("There was an error setting the Cuda device\n");
} else {
printf("CUDA initialized and ready to go\n");
}
}
int main() {
// Set the correct device
initialize_cuda();
// Create an identity matrix:
// 1 0 0
// 0 1 0
// 0 0 1
cv::Mat identity = cv::Mat::eye(3, 3, CV_32FC1);
// Upload it to the GPU
cv::cuda::GpuMat identity_gpu(identity);
// Use the OpenCV cuda method for summation (crashes)
cv::cuda::sum(identity_gpu);
// Be a good citizen
return 0;
}
//////////////////////////////////////////////////////////////
// Compiled with the following Makefile
/*
LIBRARIES=`pkg-config --cflags --libs opencv` -lopencv_cudev -lopencv_cudaarithm -lopencv_cudabgsegm -lopencv_cudacodec -lopencv_cudafeatures2d -lopencv_cudafilters -lopencv_cudalegacy -lopencv_cudaobjdetect -lopencv_cudaoptflow -lopencv_cudastereo -lopencv_cudawarping -lopencv_cudev -lm
all:
nvcc main.cu -o sum.o $(LIBRARIES)
*/
//////////////////////////////////////////////////////////////
// To execute this: `./sum.o`
//////////////////////////////////////////////////////////////
// System details:
// opencv-3.1.0
// nvidia 960m / cuda 7.5
// It is only the cv::cuda::sum function that fails. The output looks like this:
// Cuda runtime version = 7050
// Cuda driver version = 7050
// Device count = 1
// CUDA initialized and ready to go
// *** Error in `./sum.o': malloc(): memory corruption: 0x0000000004ecdc90 ***
// Aborted (core dumped)
//////////////////////////////////////////////////////////////////
// Things i've tried
// - Changing the matrix type from CV_32FC1 to CV_8UC1 (doesn't work)
// - Passing a host memory matrix (`identity`, doesn't work)
// - Removing the `initialize_cuda` call (hoping OpenCV does that, doesn' work)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment