Skip to content

Instantly share code, notes, and snippets.

@mixstef
Last active May 28, 2021 15:37
Show Gist options
  • Save mixstef/a0b9f8b43a55c89aa98043aa096f37ce to your computer and use it in GitHub Desktop.
Save mixstef/a0b9f8b43a55c89aa98043aa096f37ce to your computer and use it in GitHub Desktop.
Ατζέντα εργαστηρίου Παράλληλου Προγραμματισμού 28/5/2021

Ατζέντα εργαστηρίου Παράλληλου Προγραμματισμού 28/5/2021

Στο gist αυτό θα αναρτηθούν λύσεις και υποδείξεις κατά τη διεξαγωγή του εργαστηρίου.

// Srial Matrix multiplication
// Compile with: g++ -Wall -O2 matmul-serial.cpp -o matmul-serial
#include <iostream>
size_t const N = 1000; // all matrices are NxN
using namespace std;
// Serial implementation
void MMSerial(double a[],double b[],double c[],size_t n) {
// for all rows of A and C
for (size_t i=0;i<n;++i) {
// for all columns of B
for (size_t j=0;j<N;++j) {
// for row Ai and column Bj, outputs single element of Ci,j
double sum = 0.0;
for (size_t k=0;k<N;++k) {
sum += a[i*n+k]*b[j*n+k]; // NOTE: B is transposed
}
c[i*n+j] = sum;
}
}
}
int main() {
// alloc matrices
double *a = new double[N*N];
double *b = new double[N*N]; // assume B transposed
double *c = new double[N*N];
// init input (and output) matrices
for (size_t i=0;i<N*N;i++) {
a[i] = 2.0;
b[i] = 3.0;
c[i] = 20.0;
}
// execute load
MMSerial(a,b,c,N);
// test results
for (size_t i=0;i<N*N;i++) {
if (c[i]!=6.0*N) { cout << "error!" << endl; break; }
}
// free matrices
delete[] a;
delete[] b;
delete[] c;
return 0;
}
void MMTbb(double a[],double b[],double c[],size_t n) {
tbb::parallel_for(tbb::blocked_range<size_t>(0,n),[&](const tbb::blocked_range<size_t>& r) {
// for all rows of A and C
for (size_t i=r.begin();i!=r.end();++i) {
// for all columns of B
for (size_t j=0;j<N;++j) {
// for row Ai and column Bj, outputs single element of Ci,j
double sum = 0.0;
for (size_t k=0;k<N;++k) {
sum += a[i*n+k]*b[j*n+k]; // NOTE: B is transposed
}
c[i*n+j] = sum;
}
}
});
}
// Sum reduction - serial version
// Compile with: g++ -Wall -O2 -std=c++11 sumreduce-serial.cpp -o sumreduce-serial
#include <iostream>
#include <chrono>
size_t const N = 10000000;
using namespace std;
double SumSerial(double a[],size_t n) {
double sum = 0.0;
for (size_t i=0;i<n;++i) {
sum += a[i];
}
return sum;
}
int main() {
// alloc array
double *a = new double[N];
// init array
for (size_t i=0;i<N;++i) {
a[i]=i+1;
}
auto start = chrono::high_resolution_clock::now();
// execute test load
double sum = SumSerial(a,N);
auto stop = chrono::high_resolution_clock::now();
// check results
if (sum!=((double)N*(N+1)/2)) {
cout << "Reduction error: " << sum << endl;
}
// free array
delete[] a;
auto duration = chrono::duration_cast<chrono::microseconds>(stop-start);
cout << duration.count() << " us" << endl;
return 0;
}
// Sum reduction - tbb version
// Compile with: g++ -Wall -O2 -std=c++11 sumreduce-tbb.cpp -o sumreduce-tbb -ltbb
#include <iostream>
#include <chrono>
#include "tbb/tbb.h"
size_t const N = 10000000;
using namespace std;
double SumTbb(double a[],size_t n) {
/* From Intel doc:
template<typename Range, typename Value, typename Func, typename Reduction>
Value parallel_reduce( const Range& range, const Value& identity,
const Func& func, const Reduction& reduction,
[, partitioner[, task_group_context& group]] );
Value identity
Left identity element for Func::operator().
Value Func::operator()(const Range& range, const Value& x) const
Accumulate result for subrange, starting with initial value x.
Value Reduction::operator()(const Value& x, const Value& y) const
Combine results x and y.
*/
return tbb::parallel_reduce(tbb::blocked_range<size_t>(size_t(0),n),
0.0,
[&](const tbb::blocked_range<size_t>& r,double init) -> double {
double sum = init;
for (size_t i=r.begin();i!=r.end();++i) {
sum += a[i];
}
return sum;
},
[](double x,double y) -> double {
return x+y;
});
}
int main() {
// alloc array
double *a = new double[N];
// init array
for (size_t i=0;i<N;++i) {
a[i]=i+1;
}
auto start = chrono::high_resolution_clock::now();
// execute test load
double sum = SumTbb(a,N);
auto stop = chrono::high_resolution_clock::now();
// check results
if (sum!=((double)N*(N+1)/2)) {
cout << "Reduction error: " << sum << endl;
}
// free array
delete[] a;
auto duration = chrono::duration_cast<chrono::microseconds>(stop-start);
cout << duration.count() << " us" << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment