Last active
November 23, 2021 14:04
-
-
Save fwyzard/1aac6214c8303b4a74abcf170c61b7b9 to your computer and use it in GitHub Desktop.
Test program to check what CUDA operations can be performed on a device different than the current one
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cuda_runtime.h> | |
#include "cudaCheck.h" | |
int main(void) { | |
// 3D buffer layout | |
constexpr size_t pitch = 32; | |
constexpr size_t width = 20; | |
static_assert(width <= pitch, "The 3D buffer `width` cannot be larger than the `pitch`."); | |
constexpr size_t height = 8; | |
constexpr size_t slices = 4; | |
const auto extent = make_cudaExtent(width, height, slices); | |
// linear buffer layout | |
constexpr size_t size = pitch * height * slices; | |
//char host_buffer[size]; | |
char* host_buffer = nullptr; | |
CUDA_CHECK(cudaMallocHost(&host_buffer, size)); | |
char* buffer = nullptr; | |
char* async_buffer = nullptr; | |
// CUDA streams and events | |
cudaStream_t stream; | |
cudaEvent_t event; | |
// =========================================================================== | |
// select the first device | |
CUDA_CHECK(cudaSetDevice(0)); | |
// create a CUDA stream and a CUDA event on the first device | |
CUDA_CHECK(cudaStreamCreate(&stream)); | |
CUDA_CHECK(cudaEventCreate(&event)); | |
// allocate memory on the first device | |
CUDA_CHECK(cudaMalloc(&buffer, size)); | |
CUDA_CHECK(cudaMallocAsync(&async_buffer, size, stream)); | |
// cudaMemset variants need to be called with the correct device | |
CUDA_CHECK(cudaMemset(buffer, 0x00, size)); | |
CUDA_CHECK(cudaMemset2D(buffer, pitch, 0x42, width, height * slices)); | |
CUDA_CHECK(cudaMemset3D( | |
make_cudaPitchedPtr(buffer, pitch, width, height), | |
0x99, extent)); | |
CUDA_CHECK(cudaMemset(async_buffer, 0x00, size)); | |
CUDA_CHECK(cudaMemset2D(async_buffer, pitch, 0x42, width, height * slices)); | |
CUDA_CHECK(cudaMemset3D( | |
make_cudaPitchedPtr(async_buffer, pitch, width, height), | |
0x99, extent)); | |
// cudaMemcpy variants on cudaMallocAsync'ed memory need to be called with the correct device | |
// see https://github.com/fwyzard/nvidia_bug_3446335 | |
CUDA_CHECK(cudaMemcpy(async_buffer, host_buffer, size, cudaMemcpyDefault)); | |
CUDA_CHECK(cudaMemcpy(host_buffer, async_buffer, size, cudaMemcpyDefault)); | |
// cudaMemcpyAsync variants on cudaMallocAsync'ed memory need to be called with the correct device | |
// see https://github.com/fwyzard/nvidia_bug_3446335 | |
CUDA_CHECK(cudaMemcpyAsync(async_buffer, host_buffer, size, cudaMemcpyDefault, stream)); | |
CUDA_CHECK(cudaMemcpyAsync(host_buffer, async_buffer, size, cudaMemcpyDefault, stream)); | |
// =========================================================================== | |
// select the second device | |
CUDA_CHECK(cudaSetDevice(1)); | |
// test | |
CUDA_CHECK(cudaMemcpy(host_buffer, buffer, size, cudaMemcpyDefault)); | |
CUDA_CHECK(cudaMemcpy(buffer, host_buffer, size, cudaMemcpyDefault)); | |
// cudaMemcpyAsync variants on cudaMalloc'ed memory can be called with a different device | |
CUDA_CHECK(cudaMemcpyAsync(host_buffer, buffer, size, cudaMemcpyDefault, stream)); | |
CUDA_CHECK(cudaMemcpyAsync(buffer, host_buffer, size, cudaMemcpyDefault, stream)); | |
// cudaMemsetAsync variants can be called with a different device | |
CUDA_CHECK(cudaMemsetAsync(buffer, 0x00, size, stream)); | |
CUDA_CHECK(cudaMemset2DAsync(buffer, pitch, 0x42, width, height * slices, stream)); | |
CUDA_CHECK(cudaMemset3DAsync( | |
make_cudaPitchedPtr(buffer, pitch, width, height), | |
0x99, extent, stream)); | |
CUDA_CHECK(cudaMemsetAsync(async_buffer, 0x00, size, stream)); | |
CUDA_CHECK(cudaMemset2DAsync(async_buffer, pitch, 0x42, width, height * slices, stream)); | |
CUDA_CHECK(cudaMemset3DAsync( | |
make_cudaPitchedPtr(async_buffer, pitch, width, height), | |
0x99, extent, stream)); | |
// cudaFree and cudaFreeAsync can be called with a different device | |
CUDA_CHECK(cudaFree(buffer)); | |
CUDA_CHECK(cudaFreeAsync(async_buffer, stream)); | |
// cudaEventRecord can be called with a different device | |
// (as long as stream and event refer to the same device) | |
CUDA_CHECK(cudaEventRecord(event, stream)); | |
// cudaStreamSynchronize and cudaStreamDestroy can be called with a different device | |
CUDA_CHECK(cudaStreamSynchronize(stream)); | |
CUDA_CHECK(cudaStreamDestroy(stream)); | |
// cudaEventQuery and cudaEventDestroy can be called with a different device | |
CUDA_CHECK(cudaEventQuery(event)); | |
CUDA_CHECK(cudaEventDestroy(event)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment