Skip to content

Instantly share code, notes, and snippets.

@mikebsg01
Last active May 23, 2020 00:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikebsg01/2150a7d18e0268b743a769405e9f0233 to your computer and use it in GitHub Desktop.
Save mikebsg01/2150a7d18e0268b743a769405e9f0233 to your computer and use it in GitHub Desktop.
Parallel programming - Exam #1 - By: Michael Serrato
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <ctime>
#include <cstdlib>
#include <cmath>
using namespace std;
/**
* @author Michael Serrato
* @author Alejandro Madariaga Angeles
* @author Alejandro Gimenez Isasi
*/
#define LENGTH 62000
#define M 3
#define N 6
#define MAX_RANDOM_NUM 100
#define EPSILON float(0.0000001)
template <typename T>
void printArray(T* ptr, size_t size) {
for (size_t i = 0; i < size; ++i) {
cout << (i > 0 ? " " : "") << ptr[i];
}
cout << endl;
}
/* Random Number Generator */
template <typename T>
void fillWithRandoms(T* ptr, size_t size) {
int randNum;
for (size_t i = 0; i < size; ++i) {
randNum = round((((double)rand()) / RAND_MAX) * MAX_RANDOM_NUM);
ptr[i] = randNum;
}
}
/* Vector Substraction performed by the CPU. */
template <typename T>
void vectorSubstractionByCPU(T* a, T* b, T* c, size_t size) {
for (size_t i = 0; i < size; ++i) {
c[i] = a[i] - b[i];
}
}
/* Vector Substraction performed by the GPU - First Strategy */
template <typename T>
__global__ void vectorSubstractionByGPUFirstStrategy(T* a, T* b, T* c) {
const int tid = blockIdx.x * N;
for (size_t i = 0; i < N; ++i) {
if ((tid + i) < LENGTH) {
/*
printf("block: %d, N: %d, i: %d\n", blockIdx.x, N, i);
printf("tid: %d\n", tid + i);
*/
c[tid + i] = a[tid + i] - b[tid + i];
}
}
}
/* Vector Substraction performed by the GPU - Second Strategy */
template <typename T>
__global__ void vectorSubstractionByGPUSecondStrategy(T* a, T* b, T* c) {
const int first_tid = blockIdx.x * (N / 2);
const int second_tid = first_tid + (LENGTH / 2);
for (int i = 0; i < N / 2; i++) {
if ((first_tid + i) < (LENGTH / 2)) {
c[first_tid + i] = a[first_tid + i] - b[first_tid + i];
}
if ((second_tid + i) < LENGTH) {
c[second_tid + i] = a[second_tid + i] - b[second_tid + i];
}
}
}
/* Vector Substraction performed by the GPU - Third Strategy */
template <typename T>
__global__ void vectorSubstractionByGPUThirdStrategy(T* a, T* b, T* c) {
int tid = blockIdx.x * N;
for (int i = 0; i < M; ++i) {
for (int j = 0; j < N; ++j) {
c[tid + j] = a[tid + j] - b[tid + j];
}
tid += (LENGTH / M);
}
}
float compareArrays(float* a, float* b, size_t size, int* countDiff) {
int _countDiff = 0;
float difference = 0, currentDifference = 0;
for (size_t i = 0; i < size; ++i) {
currentDifference = abs(a[i] - b[i]);
difference += currentDifference;
if (currentDifference > EPSILON) {
++_countDiff;
}
}
*countDiff = _countDiff;
return difference;
}
int main() {
float* dev_a,
* dev_b,
* dev_c,
* a = new float[LENGTH],
* b = new float[LENGTH],
* cpu_c = new float[LENGTH],
* gpu_c = new float[LENGTH];
int maxThreads, numBlocks, numThreads;
clock_t start_time, end_time, duration_time;
cudaDeviceProp devProp;
cudaError_t cudaStatus;
float difference = 0;
int countDiff = 0;
cudaSetDevice(0);
cudaGetDeviceProperties(&devProp, 0);
maxThreads = devProp.maxThreadsPerBlock;
cout << "Propiedades del GPU:\n"
<< "\t- Hilos maximos por bloque: " << maxThreads << endl
<< "=============================================" << endl << endl;
// Allocate Memory in GPU
cudaMalloc(&dev_a, LENGTH * sizeof(float));
cudaMalloc(&dev_b, LENGTH * sizeof(float));
cudaMalloc(&dev_c, LENGTH * sizeof(float));
// Fill Arrays A & B with Random Numbers
srand((unsigned)time(NULL));
fillWithRandoms(a, LENGTH);
fillWithRandoms(b, LENGTH);
// printArray(a, LENGTH);
// printArray(b, LENGTH);
cout << endl << "Resta de vectores con " << LENGTH << " elementos:" << endl << endl;
cout << "\tOperacion en CPU:" << endl;
start_time = clock();
vectorSubstractionByCPU(a, b, cpu_c, LENGTH);
end_time = clock();
duration_time = end_time - start_time;
// cout << endl;
// printArray(cpu_c, LENGTH);
printf("\n\t\t-> Toma %.6f ms\n\n", ((((double)duration_time) / CLOCKS_PER_SEC) * 1000.00));
// RUNNING FIRST STRATEGY...
cout << "\tOperacion en GPU - PRIMER ESTRATEGIA:" << endl;
numBlocks = ceil(((double)LENGTH) / N);
numThreads = 1;
cout << "\t\t- Numero de bloques utilizados: " << numBlocks << endl;
cout << "\t\t- Numero de hilos por bloque: " << numThreads << endl << endl;
cudaMemcpy(dev_a, a, LENGTH * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, LENGTH * sizeof(float), cudaMemcpyHostToDevice);
start_time = clock();
dim3 dimGrid1(numBlocks);
dim3 dimBlock1(numThreads);
vectorSubstractionByGPUFirstStrategy << <dimGrid1, dimBlock1 >> > (dev_a, dev_b, dev_c);
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "Kernel launch FAILED: %s\n",
cudaGetErrorString(cudaStatus));
}
end_time = clock();
duration_time = end_time - start_time;
cudaMemcpy(gpu_c, dev_c, LENGTH * sizeof(float), cudaMemcpyDeviceToHost);
// cout << endl;
// printArray(gpu_c, LENGTH);
printf("\n\t\t-> Toma %.6f ms\n\n", ((((double)duration_time) / CLOCKS_PER_SEC) * 1000.00));
difference = compareArrays(gpu_c, cpu_c, LENGTH, &countDiff);
printf("\t\tElementos diferentes %d (%.3f %%) Con valor de %.8f\n", countDiff, ((((float)countDiff) / LENGTH) * 100), difference);
printf("\t======================================================================\n\n");
// RUNNING SECOND STRATEGY...
cout << "\tOperacion en GPU - SEGUNDA ESTRATEGIA:" << endl;
numBlocks = ceil(((double)LENGTH) / N);
numThreads = 1;
cout << "\t\t- Numero de bloques utilizados: " << numBlocks << endl;
cout << "\t\t- Numero de hilos por bloque: " << numThreads << endl << endl;
cudaMemcpy(dev_a, a, LENGTH * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, LENGTH * sizeof(float), cudaMemcpyHostToDevice);
start_time = clock();
dim3 dimGrid2(numBlocks);
dim3 dimBlock2(numThreads);
vectorSubstractionByGPUSecondStrategy << <dimGrid2, dimBlock2 >> > (dev_a, dev_b, dev_c);
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "Kernel launch FAILED: %s\n",
cudaGetErrorString(cudaStatus));
}
end_time = clock();
duration_time = end_time - start_time;
cudaMemcpy(gpu_c, dev_c, LENGTH * sizeof(float), cudaMemcpyDeviceToHost);
// cout << endl;
// printArray(gpu_c, LENGTH);
printf("\n\t\t-> Toma %.6f ms\n\n", ((((double)duration_time) / CLOCKS_PER_SEC) * 1000.00));
difference = compareArrays(gpu_c, cpu_c, LENGTH, &countDiff);
printf("\t\tElementos diferentes %d (%.3f %%) Con valor de %.8f\n", countDiff, ((((float)countDiff) / LENGTH) * 100), difference);
printf("\t======================================================================\n\n");
// RUNNING THIRD STRATEGY...
cout << "\tOperacion en GPU - TERCERA ESTRATEGIA:" << endl;
numBlocks = ceil(((double)LENGTH) / (M * N));
numThreads = 1;
cout << "\t\t- Numero de bloques utilizados: " << numBlocks << endl;
cout << "\t\t- Numero de hilos por bloque: " << numThreads << endl << endl;
cudaMemcpy(dev_a, a, LENGTH * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, LENGTH * sizeof(float), cudaMemcpyHostToDevice);
start_time = clock();
dim3 dimGrid3(numBlocks);
dim3 dimBlock3(numThreads);
vectorSubstractionByGPUThirdStrategy << <dimGrid3, dimBlock3 >> > (dev_a, dev_b, dev_c);
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "Kernel launch FAILED: %s\n",
cudaGetErrorString(cudaStatus));
}
end_time = clock();
duration_time = end_time - start_time;
cudaMemcpy(gpu_c, dev_c, LENGTH * sizeof(float), cudaMemcpyDeviceToHost);
// cout << endl;
// printArray(gpu_c, LENGTH);
printf("\n\t\t-> Toma %.6f ms\n\n", ((((double)duration_time) / CLOCKS_PER_SEC) * 1000.00));
difference = compareArrays(gpu_c, cpu_c, LENGTH, &countDiff);
printf("\t\tElementos diferentes %d (%.3f %%) Con valor de %.8f\n", countDiff, ((((float)countDiff) / LENGTH) * 100), difference);
printf("\t======================================================================\n\n");
// Free Memory in GPU
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
printf("\nPresione cualquier tecla para salir...");
char key;
scanf("%c", &key);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment