Skip to content

Instantly share code, notes, and snippets.

@glzjin
Last active November 20, 2018 12:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save glzjin/376af01d4dceaf6b7b0117289ff3b764 to your computer and use it in GitHub Desktop.
Save glzjin/376af01d4dceaf6b7b0117289ff3b764 to your computer and use it in GitHub Desktop.
Test OpenMp 2
#include <iostream>
#include "omp.h"
#include <sys/time.h>
#include <chrono>
#include <cmath>
using namespace std;
void DataInit (double* pMatrix, double* pVector, int Size) { // Init components of vector and matrix
int i;
for (i=0;i<Size;i++) pVector[i]=1.0;
for (i=0;i<Size*Size;i++) pMatrix[i]=1.0;
}
void NullResult (double* pResult, int Size) { // Null pResult
int i;
for (i=0; i<Size; i++)
pResult [i] =0;
}
void ProcessInit (double* &pMatrix, double* &pVector, double* &pResult, int &Size) { // Init size, vector and matrix
int i;
while (Size <=0) {
printf ("Enter size of the initial objects: ");
scanf ("%d", &Size);
if (Size <=0) {printf ("Size must be greater than 0 \n");}
};
pMatrix = new double [Size*Size];
pVector = new double [Size];
pResult = new double [Size];
NullResult (pResult, Size);
DataInit (pMatrix, pVector, Size);
}
void SerialProduct (double* pMatrix, double* pVector, double* pResult, int Size) { // Serial product
auto t=chrono::steady_clock::now();
int i, j;
for (i=0; i< Size; i++) {
for (j=0; j< Size; j++) {
pResult[i] += pMatrix[i * Size + j] * pVector[j];
}
}
printf ("S:%d\n",chrono::duration <long long, nano>(chrono::steady_clock::now() - t).count());
}
void ParallelProduct (double* pMatrix, double* pVector, double* pResult, int Size) { // Parallel product - rows
auto t=chrono::steady_clock::now();
int i, j;
#pragma omp parallel for private(j)
for (i = 0; i < Size; i++) {
for (j = 0; j < Size; j++)
pResult[i] += pMatrix[i * Size + j] * pVector[j];
}
cout << "P:" << chrono::duration <long long, nano>(chrono::steady_clock::now() - t).count() << endl;
}
void ParallelProductC (double* pMatrix, double* pVector, double* pResult, int Size) { // Parallel product - columns
auto t=chrono::steady_clock::now();
int i, j;
double IterSum = 0;
for (i = 0; i < Size; i++) {
IterSum = 0;
#pragma omp parallel for reduction (+:IterSum)
for (j = 0; j < Size; j++)
IterSum += pMatrix[i * Size + j] * pVector[j];
pResult[i] = IterSum;
}
cout << "C:" << chrono::duration <long long, nano>(chrono::steady_clock::now() - t).count() << endl;
}
void ParallelProductC2 (double* pMatrix, double* pVector, double* pResult, int Size) { // Parallel product - columns improved
auto t=chrono::steady_clock::now();
int ThreadID;
int GridThreadsNum = 4;
int GridSize = int(sqrt(double(GridThreadsNum)));
int BlockSize = Size/GridSize;
omp_set_num_threads(GridThreadsNum);
#pragma omp parallel private(ThreadID)
{
ThreadID=omp_get_thread_num();
double* pThreadResult = new double[Size];
for (int i=0;i<Size;i++)
pThreadResult[i]=0;
int i_start = ThreadID / GridSize * BlockSize;
int j_start = (ThreadID%GridSize)*BlockSize;
double IterResult;
for (int i=0;i<BlockSize;i++){
IterResult = 0;
for (int j=0;j<BlockSize;j++)
IterResult += pMatrix[(i+i_start)*Size+(j+j_start)]*pVector[j+i_start];
pThreadResult[i+i_start] = IterResult;
}
#pragma omp critical
for (int i=0;i<Size;i++)
pResult[i] += pThreadResult[i];
delete [] pThreadResult;
}
cout << "C2:" << chrono::duration <long long, nano>(chrono::steady_clock::now() - t).count() << endl;
}
void ProcessTerminate (double* &pMatrix, double* &pVector, double* &pResult, int Size) { // Print out reasluts and free the memory
// Print out
int i;
printf ("result = (");
for (i=0; i< Size-1; i++)
printf ("%f, ",pResult[i]);
printf ("%f)\n", pResult[Size-1]);
delete [] pMatrix;
delete [] pVector;
delete [] pResult;
}
int main(int argc, char* argv[]) {
double* pMatrix;
double* pVector;
double* pResult;
int Size;
if(argc > 1) {
Size = atoi(argv[1]);
}
// Init size, vector and matrix
ProcessInit (pMatrix, pVector, pResult, Size);
// Serial product
SerialProduct (pMatrix, pVector, pResult, Size);
NullResult (pResult, Size);
// Parallel product - rows
ParallelProduct (pMatrix, pVector, pResult, Size);
NullResult (pResult, Size);
// Parallel product - columns
ParallelProductC (pMatrix, pVector, pResult, Size);
NullResult (pResult, Size);
// Parallel product - columns improved
ParallelProductC2 (pMatrix, pVector, pResult, Size);
// Print out reasluts and free the memory
ProcessTerminate (pMatrix, pVector, pResult, Size);
}
import re
import subprocess
import xlsxwriter
times = 8
def test(element_number):
sub = subprocess.Popen("openmp1 " + str(element_number), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
s = 0
p = 0
c = 0
c2 = 0
while True:
next_line = sub.stdout.readline().strip().decode()
if next_line.find("S") != -1:
r = re.findall('S:(\d*)', next_line)
s = int(r[0])
continue
if next_line.find("P") != -1:
r = re.findall('P:(\d*)', next_line)
p = int(r[0])
continue
if next_line.find("C2") != -1:
r = re.findall('C2:(\d*)', next_line)
c2 = int(r[0])
continue
if next_line.find("C") != -1:
r = re.findall('C:(\d*)', next_line)
c = int(r[0])
continue
if next_line == "":
break
return s, p, c, c2
result_set = {}
for i in range(0, 1):
for j in range(0, 10):
for k in range(0, 10):
result_list = []
element_number = 10000 * i + 1000 * j + 100 * k
if element_number <= 0:
continue
print("Testing:" + str(element_number))
for m in range(1, times):
results = test(element_number)
print(results)
result_list.append(results[0])
result_list.append(results[1])
result_list.append(results[2])
result_list.append(results[3])
result_set[element_number] = result_list
workbook = xlsxwriter.Workbook('openmp1.xlsx')
worksheet = workbook.add_worksheet()
print(result_set)
row = 1
col = 0
for number, result in result_set.items():
worksheet.write(row, col, number)
for i in range(1, times):
worksheet.write(row, i, result[(i - 1) * 4])
for i in range(1, times):
worksheet.write(row, times + i - 1, result[1 + (i - 1) * 4])
for i in range(1, times):
worksheet.write(row, times * 2 + i - 2, result[2 + (i - 1) * 4])
for i in range(1, times):
worksheet.write(row, times * 3 + i - 3, result[3 + (i - 1) * 4])
row += 1
worksheet.write(0, 0, 'Number')
for i in range(1, times):
worksheet.write(0, i, 'Serial Product - %i(ns)' % i)
for i in range(1, times):
worksheet.write(0, times + i - 1, 'ParallelProductByRows - %i(ns)' % i)
for i in range(1, times):
worksheet.write(0, times * 2 + i - 2, 'ParallelProductByColumns - %i(ns)' % i)
for i in range(1, times):
worksheet.write(0, times * 3 + i - 3, 'ParallelProductByColumnsImproved - %i(ns)' % i)
workbook.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment