Last active
December 26, 2016 17:38
-
-
Save ShigekiKarita/61b9ef537f9f5c4d3a1b28cda3ef4195 to your computer and use it in GitHub Desktop.
CUDA8のマルチストリーム/スレッド処理にC++11のfuture/threadを使う ref: http://qiita.com/ShigekiKarita/items/a3891f13f3acc3575c08
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ nvcc ./stream_test.cu -o ./stream_legacy | |
$ nvvp ./stream_legacy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <array> | |
#include <future> | |
#include "cuda_check.hpp" | |
const int N = 1 << 20; | |
__global__ void kernel(float *x, int n) { | |
int tid = threadIdx.x + blockIdx.x * blockDim.x; | |
for (int i = tid; i < n; i += blockDim.x * gridDim.x) { | |
x[i] = sqrt(pow(3.14159,i)); | |
} | |
} | |
int main() { | |
const int num_streams = 8; | |
std::future<cudaError_t> fs[num_streams]; | |
float* data[num_streams]; | |
for (int i = 0; i < num_streams; i++) { | |
fs[i] = std::async( | |
std::launch::async, [=]() mutable | |
{ | |
CUDA_CHECK(cudaMalloc(&data[i], N * sizeof(float))); | |
// launch one worker kernel per stream | |
kernel<<<1, 64, 0>>>(data[i], N); | |
CUDA_CHECK(cudaPeekAtLastError()); | |
return cudaSuccess; | |
}); | |
// launch a dummy kernel on the default stream | |
kernel<<<1, 1>>>(0, 0); | |
CUDA_CHECK(cudaPeekAtLastError()); | |
} | |
for (auto&f: fs) { | |
CUDA_CHECK(f.get()); | |
} | |
cudaDeviceReset(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int main() { | |
const int num_streams = 8; | |
cudaStream_t streams[num_streams]; | |
std::future<cudaError_t> fs[num_streams]; | |
float* data[num_streams]; | |
for (int i = 0; i < num_streams; i++) { | |
// こちらはこのforループ内では実行されない | |
fs[i] = std::async( | |
std::launch::deferred, [=]() mutable | |
{ | |
CUDA_CHECK(cudaStreamCreate(&streams[i])); | |
CUDA_CHECK(cudaMalloc(&data[i], N * sizeof(float))); | |
kernel<<<1, 64, 0, streams[i]>>>(data[i], N); | |
CUDA_CHECK(cudaPeekAtLastError()); | |
return cudaSuccess; | |
}); | |
// こちらは上のコードより先にこのforループ内で実行される | |
kernel<<<1, 1>>>(0, 0); | |
CUDA_CHECK(cudaPeekAtLastError()); | |
} | |
for (auto&f: fs) { | |
// ここではじめて deferred されたラムダ式を実行 | |
CUDA_CHECK(f.get()); | |
} | |
cudaDeviceReset(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <future> | |
#include <thread> | |
#include <iostream> | |
#include "cuda_check.hpp" | |
const int N = 1 << 20; | |
__global__ void kernel(float *x, int n) { | |
int tid = threadIdx.x + blockIdx.x * blockDim.x; | |
for (int i = tid; i < n; i += blockDim.x * gridDim.x) { | |
x[i] = sqrt(pow(3.14159,i)); | |
} | |
} | |
std::string launch_kernel() { | |
float *data; | |
CUDA_CHECK(cudaMalloc(&data, N * sizeof(float))); | |
kernel<<<1, 64>>>(data, N); | |
CUDA_CHECK(cudaPeekAtLastError()); | |
CUDA_CHECK(cudaStreamSynchronize(0)); | |
return "ok"; | |
} | |
int main() { | |
const int num_threads = 8; | |
std::future<std::string> fs[num_threads]; | |
for (auto& f: fs) { | |
try { | |
std::packaged_task<std::string()> task(launch_kernel); | |
f = task.get_future(); | |
std::thread(std::move(task)).detach(); | |
} catch(std::exception& e) { | |
std::cerr << "Error creating thread: " << e.what() << std::endl; | |
} | |
} | |
for (auto& f: fs) { | |
try { | |
std::cout << f.get() << std::endl; | |
} catch(std::exception& e) { | |
std::cerr << "Error joining thread: " << e.what() << std::endl; | |
} | |
} | |
cudaDeviceReset(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment