Skip to content

Instantly share code, notes, and snippets.

@Lait-au-Cafe
Created October 13, 2017 13:16
Show Gist options
  • Save Lait-au-Cafe/739ef54560ec878275d09c8e52ce5c23 to your computer and use it in GitHub Desktop.
Save Lait-au-Cafe/739ef54560ec878275d09c8e52ce5c23 to your computer and use it in GitHub Desktop.
//=============================================================================
// Includes
//=============================================================================
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <helper_functions.h>
#include <helper_cuda.h>
#include <iostream>
#define _USE_MATH_DEFINES
#include <cmath>
#include <opencv2\opencv.hpp>
//=============================================================================
// Defines
//=============================================================================
#define GAUSS_WINDOW_WIDTH 3
#define GAUSS_WINDOW_SIZE ((2 * GAUSS_WINDOW_WIDTH + 1) * (2 * GAUSS_WINDOW_WIDTH + 1))
//=============================================================================
// Buffers
//=============================================================================
__constant__ float gaussWindow[GAUSS_WINDOW_SIZE];
surface<void, cudaSurfaceType2D> surfRef1;
cudaArray *d_array1; // surfRef1の実体
surface<void, cudaSurfaceType2D> surfRef2;
cudaArray *d_array2; // surfRef2の実体
//=============================================================================
// Device Functions
//=============================================================================
__global__ void UCharToFloat(
uchar3* input,
int width
) {
const int x = blockIdx.x*blockDim.x + threadIdx.x;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
int id = x + y * width;
uchar3 col = input[id];
float gs = (float)(col.x + col.y + col.z) / 3 / 255;
float4 re;
re.x = gs;
re.y = gs;
re.z = gs;
re.w = 0;
surf2Dwrite(re, surfRef1, x * sizeof(float4), y);
}
__global__ void FloatToUChar(
uchar3* result,
int width
) {
const int x = blockIdx.x*blockDim.x + threadIdx.x;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
int id = x + y * width;
float4 gs;
surf2Dread(&gs, surfRef1, x * sizeof(float4), y);
uchar3 col;
col.x = (uchar)(gs.x * 255);
col.y = (uchar)(gs.y * 255);
col.z = (uchar)(gs.z * 255);
result[id] = col;
}
//=============================================================================
// Main Function
//=============================================================================
int main() {
std::cout << "version: " << CV_VERSION << std::endl;
//=========================================================================
// ビデオ関係初期化
//=========================================================================
cv::VideoCapture capture(0);
if (!capture.isOpened()) {
std::cerr << "Error : Cannot open camera device. " << std::endl;
exit(EXIT_FAILURE);
}
cv::Mat frame, result;
// ウィンドウ用意
cv::String windowName = "Test Window";
cv::namedWindow(windowName, CV_WINDOW_AUTOSIZE);
// サイズ取得
capture >> frame;
cv::Size size = frame.size();
// 初期化(同サイズのメモリを確保)
result = frame.clone();
//=========================================================================
// GPUデバイスメモリ関係
//=========================================================================
// スレッド・ブロックサイズ設定
dim3 dimBlock(32, 32, 1);
dim3 dimGrid(size.width / dimBlock.x, size.height / dimBlock.y, 1);
// デバイスメモリの確保(入力)
uchar3* d_frame; // (256 = 8bit = sizeof uchar) * 3 = uchar3
size_t d_frame_pitch;
checkCudaErrors(cudaMallocPitch(&d_frame, &d_frame_pitch, size.width * sizeof(uchar3), size.height));
// デバイスメモリの確保(出力)
uchar3* d_result;
size_t d_result_pitch;
checkCudaErrors(cudaMallocPitch(&d_result, &d_result_pitch, size.width * sizeof(uchar3), size.height));
// Array用のメモリを確保
cudaChannelFormatDesc cdesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
checkCudaErrors(cudaMallocArray(&d_array1, &cdesc, size.width, size.height, cudaArraySurfaceLoadStore));
checkCudaErrors(cudaMallocArray(&d_array2, &cdesc, size.width, size.height, cudaArraySurfaceLoadStore));
// サーフェースにバインド
size_t offset;
checkCudaErrors(cudaBindSurfaceToArray(surfRef1, d_array1));
checkCudaErrors(cudaBindSurfaceToArray(surfRef2, d_array2));
// ガウス窓の設定
float *tmpGaussWindow = new float[GAUSS_WINDOW_SIZE];
const float gauss_sigma = (float)GAUSS_WINDOW_WIDTH / 3;
for (int i = -GAUSS_WINDOW_WIDTH; i <= GAUSS_WINDOW_WIDTH; i++) {
for (int j = -GAUSS_WINDOW_WIDTH; j <= GAUSS_WINDOW_WIDTH; j++) {
tmpGaussWindow[i + (2 * GAUSS_WINDOW_WIDTH + 1) * j] =
expf((i * i + j * j) / (-2 * gauss_sigma * gauss_sigma)) / (2 * M_PI * gauss_sigma * gauss_sigma);
}
}
checkCudaErrors(cudaMemcpyToSymbol(gaussWindow, tmpGaussWindow, sizeof(float) * GAUSS_WINDOW_SIZE));
// delete[] tmpGaussWindow;
//=========================================================================
// メインループ
//=========================================================================
while (cv::waitKey(1) != 113) {
capture >> frame;
checkCudaErrors(cudaMemcpy2D(d_frame, d_frame_pitch, frame.data, frame.step,
size.width * sizeof(uchar3), size.height, cudaMemcpyDefault));
UCharToFloat << <dimGrid, dimBlock, 0 >> > (d_frame, size.width);
FloatToUChar << <dimGrid, dimBlock, 0 >> > (d_result, size.width);
checkCudaErrors(cudaMemcpy2D(result.data, result.step, d_result, d_result_pitch,
size.width * sizeof(uchar3), size.height, cudaMemcpyDefault));
cv::imshow(windowName, result);
}
cv::destroyAllWindows();
cudaFreeArray(d_array1);
cudaFreeArray(d_array2);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment