Created
May 23, 2018 02:38
-
-
Save likejazz/bc18b4927d8cbb9bb2bd18664d6b3923 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <math.h> | |
#include <stdio.h> | |
// Kernel function to add the elements of two arrays | |
__global__ | |
void add(int n, float *x, float *y) { | |
int index = blockIdx.x * blockDim.x + threadIdx.x; | |
int stride = blockDim.x * gridDim.x; | |
for (int i = index; i < n; i += stride) | |
y[i] = x[i] + y[i]; | |
} | |
int main(void) { | |
int N = 1 << 20; | |
float *x, *y; | |
cudaSetDevice(0); | |
// Allocate Unified Memory – accessible from CPU or GPU | |
cudaMallocManaged(&x, N * sizeof(float)); | |
cudaMallocManaged(&y, N * sizeof(float)); | |
// initialize x and y arrays on the host | |
for (int i = 0; i < N; i++) { | |
x[i] = 1.0f; | |
y[i] = 2.0f; | |
} | |
// Run kernel on 1M elements on the GPU | |
int blockSize = 256; | |
int numBlocks = (N + blockSize - 1) / blockSize; | |
add<<<numBlocks, blockSize>>>(N, x, y); | |
// Wait for GPU to finish before accessing on host | |
cudaDeviceSynchronize(); | |
// Check for errors (all values should be 3.0f) | |
float maxError = 0.0f; | |
for (int i = 0; i < N; i++) | |
maxError = fmax(maxError, fabs(y[i] - 3.0f)); | |
std::cout << "Max error: " << maxError << std::endl; | |
// Free memory | |
cudaFree(x); | |
cudaFree(y); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment