Skip to content

Instantly share code, notes, and snippets.

@psalz
Last active November 28, 2022 09:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save psalz/28534fd378767a0ac3087f0fc0f3660b to your computer and use it in GitHub Desktop.
Save psalz/28534fd378767a0ac3087f0fc0f3660b to your computer and use it in GitHub Desktop.
Strange behavior of 2D/3D copies in partially mapped virtual address space
#include <cassert>
#include <cstdio>
#include <iostream>
#include <tuple>
#include <vector>
#include <cuda.h>
static inline void checkDrvError(CUresult res, const char* tok, const char* file, unsigned line) {
if(res != CUDA_SUCCESS) {
const char* errStr = NULL;
(void)cuGetErrorString(res, &errStr);
std::cerr << file << ':' << line << ' ' << tok << " failed (" << (unsigned)res << "): " << errStr << std::endl;
}
}
#define CHECK_DRV(x) checkDrvError(x, #x, __FILE__, __LINE__);
#define PRINT_AND_EVAL(x) \
std::cout << #x << std::endl; \
x
void run_experiment(const std::vector<std::pair<int, int>> blocks_to_allocate, const size_t copy_src_y) {
CUcontext ctx;
CHECK_DRV(cuInit(0));
CHECK_DRV(cuDevicePrimaryCtxRetain(&ctx, 0));
CHECK_DRV(cuCtxSetCurrent(ctx));
CUdevice device;
CHECK_DRV(cuCtxGetDevice(&device));
size_t granularity = 0;
CUmemAllocationProp prop = {};
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
prop.location.id = (int)device;
CHECK_DRV(cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM));
const size_t width = 2;
const size_t height = 3;
const size_t virtual_size = width * height * granularity;
CUdeviceptr base_ptr;
CHECK_DRV(cuMemAddressReserve(&base_ptr, virtual_size, 0, 0, 0));
// Allocate physical blocks
std::vector<CUmemGenericAllocationHandle> allocs(blocks_to_allocate.size());
for(auto& handle : allocs) {
CHECK_DRV(cuMemCreate(&handle, granularity, &prop, 0));
}
CUmemAccessDesc access_desc = {};
access_desc.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
access_desc.location.id = device;
access_desc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
// Map *some* of the blocks
{
int i = 0;
for(const auto& [x, y] : blocks_to_allocate) {
const unsigned char linear_id = y * width + x;
const auto ptr = base_ptr + linear_id * granularity;
CHECK_DRV(cuMemMap(ptr, granularity, 0, allocs[i++], 0));
CHECK_DRV(cuMemSetAccess(ptr, granularity, &access_desc, 1));
// Initialize the first byte in each block with its linear id
CHECK_DRV(cuMemcpyHtoD(ptr, &linear_id, sizeof(linear_id)));
}
}
{
std::vector<unsigned char> result(2);
CUDA_MEMCPY2D params = {};
params.Height = 2;
params.WidthInBytes = 1;
params.dstHost = result.data();
params.dstMemoryType = CU_MEMORYTYPE_HOST;
params.dstPitch = 1;
params.dstXInBytes = 0;
params.dstY = 0;
params.srcDevice = base_ptr;
params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
params.srcPitch = width * granularity;
params.srcXInBytes = 0;
params.srcY = copy_src_y;
CHECK_DRV(cuMemcpy2D(&params));
CHECK_DRV(cuCtxSynchronize());
const unsigned char expected_results[3] = {0, 2, 4};
if(result[0] == expected_results[copy_src_y] && result[1] == expected_results[copy_src_y + 1]) {
std::cout << "All good!" << std::endl;
}
}
// Cleanup
for(const auto& [x, y] : blocks_to_allocate) {
const auto ptr = base_ptr + (y * width + x) * granularity;
CHECK_DRV(cuMemUnmap(ptr, granularity));
}
for(auto& handle : allocs) {
CHECK_DRV(cuMemRelease(handle));
}
CHECK_DRV(cuMemAddressFree(base_ptr, virtual_size));
}
int main() {
PRINT_AND_EVAL(run_experiment({{0, 0}, {1, 0}, {0, 1}, {0, 2}}, 0));
PRINT_AND_EVAL(run_experiment({{0, 0}, {1, 0}, {0, 1}, {0, 2}}, 1));
PRINT_AND_EVAL(run_experiment({{0, 0}, {0, 1}, {0, 2}}, 0));
PRINT_AND_EVAL(run_experiment({{0, 0}, {0, 1}, {0, 2}}, 1));
return 0;
}
@psalz
Copy link
Author

psalz commented Nov 27, 2022

Here's the output I'm getting:

run_experiment({{0, 0}, {1, 0}, {0, 1}, {0, 2}}, 0)
All good!
run_experiment({{0, 0}, {1, 0}, {0, 1}, {0, 2}}, 1)
All good! 
run_experiment({{0, 0}, {0, 1}, {0, 2}}, 0)
vmem_2d_copy.cu:92 cuMemcpy2D(&params) failed (1): invalid argument
run_experiment({{0, 0}, {0, 1}, {0, 2}}, 1)
vmem_2d_copy.cu:92 cuMemcpy2D(&params) failed (1): invalid argument

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment