-
-
Save psalz/28534fd378767a0ac3087f0fc0f3660b to your computer and use it in GitHub Desktop.
Strange behavior of 2D/3D copies in partially mapped virtual address space
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cassert> | |
#include <cstdio> | |
#include <iostream> | |
#include <tuple> | |
#include <vector> | |
#include <cuda.h> | |
static inline void checkDrvError(CUresult res, const char* tok, const char* file, unsigned line) { | |
if(res != CUDA_SUCCESS) { | |
const char* errStr = NULL; | |
(void)cuGetErrorString(res, &errStr); | |
std::cerr << file << ':' << line << ' ' << tok << " failed (" << (unsigned)res << "): " << errStr << std::endl; | |
} | |
} | |
#define CHECK_DRV(x) checkDrvError(x, #x, __FILE__, __LINE__); | |
#define PRINT_AND_EVAL(x) \ | |
std::cout << #x << std::endl; \ | |
x | |
void run_experiment(const std::vector<std::pair<int, int>> blocks_to_allocate, const size_t copy_src_y) { | |
CUcontext ctx; | |
CHECK_DRV(cuInit(0)); | |
CHECK_DRV(cuDevicePrimaryCtxRetain(&ctx, 0)); | |
CHECK_DRV(cuCtxSetCurrent(ctx)); | |
CUdevice device; | |
CHECK_DRV(cuCtxGetDevice(&device)); | |
size_t granularity = 0; | |
CUmemAllocationProp prop = {}; | |
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED; | |
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; | |
prop.location.id = (int)device; | |
CHECK_DRV(cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM)); | |
const size_t width = 2; | |
const size_t height = 3; | |
const size_t virtual_size = width * height * granularity; | |
CUdeviceptr base_ptr; | |
CHECK_DRV(cuMemAddressReserve(&base_ptr, virtual_size, 0, 0, 0)); | |
// Allocate physical blocks | |
std::vector<CUmemGenericAllocationHandle> allocs(blocks_to_allocate.size()); | |
for(auto& handle : allocs) { | |
CHECK_DRV(cuMemCreate(&handle, granularity, &prop, 0)); | |
} | |
CUmemAccessDesc access_desc = {}; | |
access_desc.location.type = CU_MEM_LOCATION_TYPE_DEVICE; | |
access_desc.location.id = device; | |
access_desc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; | |
// Map *some* of the blocks | |
{ | |
int i = 0; | |
for(const auto& [x, y] : blocks_to_allocate) { | |
const unsigned char linear_id = y * width + x; | |
const auto ptr = base_ptr + linear_id * granularity; | |
CHECK_DRV(cuMemMap(ptr, granularity, 0, allocs[i++], 0)); | |
CHECK_DRV(cuMemSetAccess(ptr, granularity, &access_desc, 1)); | |
// Initialize the first byte in each block with its linear id | |
CHECK_DRV(cuMemcpyHtoD(ptr, &linear_id, sizeof(linear_id))); | |
} | |
} | |
{ | |
std::vector<unsigned char> result(2); | |
CUDA_MEMCPY2D params = {}; | |
params.Height = 2; | |
params.WidthInBytes = 1; | |
params.dstHost = result.data(); | |
params.dstMemoryType = CU_MEMORYTYPE_HOST; | |
params.dstPitch = 1; | |
params.dstXInBytes = 0; | |
params.dstY = 0; | |
params.srcDevice = base_ptr; | |
params.srcMemoryType = CU_MEMORYTYPE_DEVICE; | |
params.srcPitch = width * granularity; | |
params.srcXInBytes = 0; | |
params.srcY = copy_src_y; | |
CHECK_DRV(cuMemcpy2D(¶ms)); | |
CHECK_DRV(cuCtxSynchronize()); | |
const unsigned char expected_results[3] = {0, 2, 4}; | |
if(result[0] == expected_results[copy_src_y] && result[1] == expected_results[copy_src_y + 1]) { | |
std::cout << "All good!" << std::endl; | |
} | |
} | |
// Cleanup | |
for(const auto& [x, y] : blocks_to_allocate) { | |
const auto ptr = base_ptr + (y * width + x) * granularity; | |
CHECK_DRV(cuMemUnmap(ptr, granularity)); | |
} | |
for(auto& handle : allocs) { | |
CHECK_DRV(cuMemRelease(handle)); | |
} | |
CHECK_DRV(cuMemAddressFree(base_ptr, virtual_size)); | |
} | |
int main() { | |
PRINT_AND_EVAL(run_experiment({{0, 0}, {1, 0}, {0, 1}, {0, 2}}, 0)); | |
PRINT_AND_EVAL(run_experiment({{0, 0}, {1, 0}, {0, 1}, {0, 2}}, 1)); | |
PRINT_AND_EVAL(run_experiment({{0, 0}, {0, 1}, {0, 2}}, 0)); | |
PRINT_AND_EVAL(run_experiment({{0, 0}, {0, 1}, {0, 2}}, 1)); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's the output I'm getting: