Skip to content

Instantly share code, notes, and snippets.

@allanmac
Last active October 29, 2015 15:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allanmac/9263979 to your computer and use it in GitHub Desktop.
Save allanmac/9263979 to your computer and use it in GitHub Desktop.
Allocate more than 4GB
#include <stdio.h>
//
//
//
static
void
cuda_assert(const cudaError_t code, const char* const file, const int line, const bool abort)
{
if (code != cudaSuccess)
{
fprintf(stderr,"cuda_assert: %s %s %d\n",cudaGetErrorString(code),file,line);
if (abort)
exit(code);
}
}
#define cuda(...) { cuda_assert((cuda##__VA_ARGS__), __FILE__, __LINE__, true); }
//
//
//
size_t meminfo()
{
size_t free, total;
cuda(MemGetInfo(&free,&total));
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
printf("free/total (MB): %7llu / %7llu\n",
free /(1024ll*1024ll),
total/(1024ll*1024ll));
#else
printf("free/total (MB): %7u / %7u\n",
free /(1024*1024),
total/(1024*1024));
#endif
return free;
}
//
//
//
int main(int argc, char** argv)
{
int device = (argc == 2) ? atoi(argv[1]) : 0;
cudaDeviceProp props;
cuda(GetDeviceProperties(&props,device));
printf("%s (%2d)\n",props.name,props.multiProcessorCount);
cuda(SetDevice(device));
//
//
//
size_t free = meminfo();
//
//
//
size_t allocation = free - (256*1024*1024); // allocate (free - 256MB)
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
printf("cudaMalloc (MB): %7llu\n",allocation/(1024ll*1024ll));
#else
printf("cudaMalloc (MB): %7u\n", allocation/(1024*1024));
#endif
//
//
//
void* mem;
cuda(Malloc(&mem,allocation));
//
//
//
meminfo();
//
//
//
cuda(Free(mem));
cuda(DeviceReset());
return 0;
}
@allanmac
Copy link
Author

Compile with 64-bit:

nvcc -m 64 malloc.cu -o malloc

Or, compile with 32-bit to see how cudaMemGetInfo() reports a 32-bit safe free size:

nvcc -m 32 malloc.cu -o malloc

@allanmac
Copy link
Author

> malloc 0
Tesla K20c (13)
free/total (MB):    5043 /    5119
cudaMalloc (MB):    4787
free/total (MB):     255 /    5119

> malloc 1
GeForce GT 240 (12)
free/total (MB):     952 /    1024
cudaMalloc (MB):     696
free/total (MB):     256 /    1024

> malloc 2
GeForce GTX 680 ( 8)
free/total (MB):    3856 /    4096
cudaMalloc (MB):    3600
free/total (MB):     255 /    4096

> malloc 3
GeForce GT 630 ( 2)
free/total (MB):    1602 /    2048
cudaMalloc (MB):    1346
free/total (MB):    1602 /    2048

> malloc 4
GeForce 9400 GT ( 4)
free/total (MB):     471 /     512
cudaMalloc (MB):     215
free/total (MB):     256 /     512

@usmannisar
Copy link

GeForce GTX 850M ( 5)
free/total (MB): 4011 / 4096
cudaMalloc (MB): 3755
free/total (MB): 4011 / 4096

@allanmac
Copy link
Author

Added error checking... but also rebooted. Results are now as expected!

64-bit:

$> nvcc -m 64 malloc.cu -o malloc

$> malloc 0
GeForce GTX 980 (16)
free/total (MB):    3935 /    4096
cudaMalloc (MB):    3679
free/total (MB):     255 /    4096

$> malloc 1
Quadro K620 ( 3)
free/total (MB):    1976 /    2048
cudaMalloc (MB):    1720
free/total (MB):     255 /    2048

$> malloc 2
GeForce GTX 750 Ti ( 5)
free/total (MB):    1617 /    2048
cudaMalloc (MB):    1361
free/total (MB):     255 /    2048

32-bit:

$> nvcc -m 32 malloc.cu -o malloc

$> malloc 0
GeForce GTX 980 (16)
free/total (MB):    3066 /    3072
cudaMalloc (MB):    2810
free/total (MB):     256 /    3072

$> malloc 1
Quadro K620 ( 3)
free/total (MB):    1976 /    2048
cudaMalloc (MB):    1720
free/total (MB):     255 /    2048

$> malloc 2
GeForce GTX 750 Ti ( 5)
free/total (MB):    1618 /    2048
cudaMalloc (MB):    1362
free/total (MB):     255 /    2048

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment