Created
February 9, 2022 18:23
-
-
Save TApplencourt/fae5ef9fde6951e12d9d46b6a294ea12 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <vector> | |
#include <iostream> | |
#include <algorithm> | |
#include <omp.h> | |
#include <math.h> | |
#include <assert.h> | |
bool almost_equal(float x, float gold, float tol) { | |
return abs(gold) * (1-tol) <= abs(x) && abs(x) <= abs(gold) * (1 + tol); | |
} | |
int main(){ | |
// Num chunk is never so big. Maximun like 64 | |
const int num_chunks = 2 ; | |
const int globalWI{ 65535996 }; | |
assert(globalWI%num_chunks == 0); | |
const int num_iteration { 10 }; | |
std::vector<double> A(globalWI), B(globalWI), C(globalWI); | |
std::generate(B.begin(), B.end(), std::rand); | |
std::generate(C.begin(), C.end(), std::rand); | |
double *Aptr { A.data() }; | |
double *Bptr { B.data() }; | |
double *Cptr { C.data() }; | |
#pragma omp target enter data map(alloc: Aptr[0:globalWI]) map(to: Bptr[0:globalWI], Cptr[0:globalWI]) | |
double min_time = std::numeric_limits<double>::max(); | |
for (int r=0 ; r< num_iteration; r++) { | |
double start = std::numeric_limits<double>::max(); | |
double end = 0; | |
#pragma omp parallel for reduction(min:start) reduction(max:end) | |
for (int chunk_id = 0; chunk_id < num_chunks; chunk_id++ ) { | |
const int N = globalWI / num_chunks; | |
const int i_start = chunk_id*N; | |
const double l_start = omp_get_wtime(); | |
#pragma omp target teams distribute parallel for simd | |
for (int i=i_start ; i<i_start+N; i++) | |
Aptr[i] = 2.0*Bptr[i] + Cptr[i]; | |
const double l_end = omp_get_wtime(); | |
if (start > l_start) start = l_start; | |
if (end < l_end) end = l_end; | |
} | |
const double time = end - start; | |
min_time = std::min(time,min_time); | |
} | |
#pragma omp target exit data map(from: Aptr[0:globalWI]) | |
for (int i = 0 ; i < globalWI ; i++) { | |
if (!almost_equal(Aptr[i], 2.0*Bptr[i] + Cptr[i], 0.1)) { | |
std::cout << i << " " << Aptr[i] - (2.0*Bptr[i] + Cptr[i]) << std::endl; | |
return 1; | |
} | |
} | |
const double bw = (3*sizeof(double)*globalWI*1E-9)/min_time; | |
std::cout<< "GB/s BW reached: " << bw << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment