Skip to content

Instantly share code, notes, and snippets.

@hayunjong83
Created March 4, 2020 04:08
Show Gist options
  • Save hayunjong83/220f97ea85d76faeab9ec00d890e9149 to your computer and use it in GitHub Desktop.
Save hayunjong83/220f97ea85d76faeab9ec00d890e9149 to your computer and use it in GitHub Desktop.
record the kernel execution time with CUDA Event API
#include <iostream>
#include <cuda.h>
#include <cuda_runtime.h>
__global__ void saxpy(int n, float a, float *__restrict__ x, float *__restrict__ y)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if( i < n )
y[i] = a * x[i] + y[i];
}
int main()
{
int N = 1 << 16;
int size = N * sizeof(float);
float *h_x = (float*)malloc(size);
float *h_y = (float*)malloc(size);
float *d_x;
float *d_y;
cudaMalloc((void**) &d_x, size);
cudaMalloc((void**) &d_y, size);
for(int i=0; i < N; i++)
{
h_x[i] = 2.0;
h_y[i] = 2.0;
}
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaMemcpy(d_x, h_x, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_y, h_y, size, cudaMemcpyHostToDevice);
cudaEventRecord(start);
saxpy<<<256, 256>>>(N, 2.0, d_x, d_y);
cudaEventRecord(stop);
cudaMemcpy(h_y, d_y, size, cudaMemcpyDeviceToHost);
cudaEventSynchronize(stop);
float milliseconds = 0.0;
cudaEventElapsedTime(&milliseconds, start, stop);
std::cout << " SAXPY execution time : " << milliseconds << " ms " << std::endl;
cudaEventDestroy(start);
cudaEventDestroy(stop);
cudaFree(d_x);
cudaFree(d_y);
free(h_x);
free(h_y);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment