Skip to content

Instantly share code, notes, and snippets.

@improve100
Forked from sonots/cudaMallocBench.cu
Created January 4, 2023 09:28
Show Gist options
  • Save improve100/a93fbe598f8d460a6205722fdb8dff15 to your computer and use it in GitHub Desktop.
Save improve100/a93fbe598f8d460a6205722fdb8dff15 to your computer and use it in GitHub Desktop.
Benchmark of cudaMalloc. Allocate 1MB of memory totally with several block sizes
#include <sys/time.h>
#include <cuda_runtime.h>
#include <stdio.h>
inline double seconds()
{
struct timeval tp;
struct timezone tzp;
int i = gettimeofday(&tp, &tzp);
return ((double)tp.tv_sec + (double)tp.tv_usec * 1.e-6);
}
int total_size = 1024 * 1024; // 1MB
void test(int size)
{
double iStart, iElaps;
int num = total_size / size;
float *d[num];
iStart = seconds();
for (int i = 0; i < num; i++) {
cudaMalloc((float**)&d[i], size);
}
iElaps = seconds() - iStart;
printf("cudaMalloc(%d) x %d Time elapsed %f sec\n", size, num, iElaps);
iStart = seconds();
for (int i = 0; i < num; i++) {
cudaFree(d[i]);
}
iElaps = seconds() - iStart;
printf("cudaFree(%d) x %d Time elapsed %f sec\n", size, num, iElaps);
}
int main(int argc, char **argv)
{
printf("%s Starting...\n", argv[0]);
// set up device
int dev = 0;
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
printf("Using Device %d: %s\n", dev, deviceProp.name);
cudaSetDevice(dev);
int size = atoi(argv[1]);
test(size);
return(0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment