Created
March 4, 2020 04:08
-
-
Save hayunjong83/220f97ea85d76faeab9ec00d890e9149 to your computer and use it in GitHub Desktop.
record the kernel execution time with CUDA Event API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <cuda.h> | |
#include <cuda_runtime.h> | |
__global__ void saxpy(int n, float a, float *__restrict__ x, float *__restrict__ y) | |
{ | |
int i = blockIdx.x * blockDim.x + threadIdx.x; | |
if( i < n ) | |
y[i] = a * x[i] + y[i]; | |
} | |
int main() | |
{ | |
int N = 1 << 16; | |
int size = N * sizeof(float); | |
float *h_x = (float*)malloc(size); | |
float *h_y = (float*)malloc(size); | |
float *d_x; | |
float *d_y; | |
cudaMalloc((void**) &d_x, size); | |
cudaMalloc((void**) &d_y, size); | |
for(int i=0; i < N; i++) | |
{ | |
h_x[i] = 2.0; | |
h_y[i] = 2.0; | |
} | |
cudaEvent_t start, stop; | |
cudaEventCreate(&start); | |
cudaEventCreate(&stop); | |
cudaMemcpy(d_x, h_x, size, cudaMemcpyHostToDevice); | |
cudaMemcpy(d_y, h_y, size, cudaMemcpyHostToDevice); | |
cudaEventRecord(start); | |
saxpy<<<256, 256>>>(N, 2.0, d_x, d_y); | |
cudaEventRecord(stop); | |
cudaMemcpy(h_y, d_y, size, cudaMemcpyDeviceToHost); | |
cudaEventSynchronize(stop); | |
float milliseconds = 0.0; | |
cudaEventElapsedTime(&milliseconds, start, stop); | |
std::cout << " SAXPY execution time : " << milliseconds << " ms " << std::endl; | |
cudaEventDestroy(start); | |
cudaEventDestroy(stop); | |
cudaFree(d_x); | |
cudaFree(d_y); | |
free(h_x); | |
free(h_y); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment