Skip to content

Instantly share code, notes, and snippets.

@TApplencourt
Created February 9, 2022 18:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TApplencourt/fae5ef9fde6951e12d9d46b6a294ea12 to your computer and use it in GitHub Desktop.
Save TApplencourt/fae5ef9fde6951e12d9d46b6a294ea12 to your computer and use it in GitHub Desktop.
#include <vector>
#include <iostream>
#include <algorithm>
#include <omp.h>
#include <math.h>
#include <assert.h>
bool almost_equal(float x, float gold, float tol) {
return abs(gold) * (1-tol) <= abs(x) && abs(x) <= abs(gold) * (1 + tol);
}
int main(){
// Num chunk is never so big. Maximun like 64
const int num_chunks = 2 ;
const int globalWI{ 65535996 };
assert(globalWI%num_chunks == 0);
const int num_iteration { 10 };
std::vector<double> A(globalWI), B(globalWI), C(globalWI);
std::generate(B.begin(), B.end(), std::rand);
std::generate(C.begin(), C.end(), std::rand);
double *Aptr { A.data() };
double *Bptr { B.data() };
double *Cptr { C.data() };
#pragma omp target enter data map(alloc: Aptr[0:globalWI]) map(to: Bptr[0:globalWI], Cptr[0:globalWI])
double min_time = std::numeric_limits<double>::max();
for (int r=0 ; r< num_iteration; r++) {
double start = std::numeric_limits<double>::max();
double end = 0;
#pragma omp parallel for reduction(min:start) reduction(max:end)
for (int chunk_id = 0; chunk_id < num_chunks; chunk_id++ ) {
const int N = globalWI / num_chunks;
const int i_start = chunk_id*N;
const double l_start = omp_get_wtime();
#pragma omp target teams distribute parallel for simd
for (int i=i_start ; i<i_start+N; i++)
Aptr[i] = 2.0*Bptr[i] + Cptr[i];
const double l_end = omp_get_wtime();
if (start > l_start) start = l_start;
if (end < l_end) end = l_end;
}
const double time = end - start;
min_time = std::min(time,min_time);
}
#pragma omp target exit data map(from: Aptr[0:globalWI])
for (int i = 0 ; i < globalWI ; i++) {
if (!almost_equal(Aptr[i], 2.0*Bptr[i] + Cptr[i], 0.1)) {
std::cout << i << " " << Aptr[i] - (2.0*Bptr[i] + Cptr[i]) << std::endl;
return 1;
}
}
const double bw = (3*sizeof(double)*globalWI*1E-9)/min_time;
std::cout<< "GB/s BW reached: " << bw << std::endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment