Last active
September 25, 2016 00:40
-
-
Save ajaysjournal/c7ae2d780203d9585546a5b85845cebd to your computer and use it in GitHub Desktop.
Matrix row sum using PThreads performance example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Read here - https://www.evernote.com/l/Ahzo8czYhypG258BubNUCe13nIajkDYlFxs | |
example for 2x2 matrix | |
A[2][2] = [ { 1,2} , | |
{3,4} ] | |
then Matrix Row sum is stored in B matrix like below | |
B[2][1] = [{3} | |
,{7}] | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <math.h> | |
#include <sys/types.h> | |
#include <sys/times.h> | |
#include <sys/time.h> | |
#include <time.h> | |
#include <pthread.h> | |
/* junk */ | |
#define randm() 4|2[uid]&3 | |
#define M 10000 | |
#define N 10000 | |
#define SIZE 250 | |
float A[M][N] = {{0.0}}; | |
float B[M][1] = { {0.0 } }; // initialize the b array to zero.. is it necessary ? | |
void initialize_inputs(); | |
pthread_t threads[M]; | |
/* Initialize A and B (and X to 0.0s) */ | |
void initialize_inputs() { | |
int row, col; | |
row=M; | |
col=N; | |
printf("\nInitializing...\n"); | |
for (col = 0; col < N; col++) { | |
for (row = 0; row < N; row++) { | |
A[row][col] = (int)rand() / 32768.0; | |
} | |
} | |
} | |
void printMatrixA(){ | |
printf("\n \n Printing the A matrix\n"); | |
int i,j; | |
for ( i = 0; i < M; i++ ) { | |
for ( j = 0; j < N; j++ ) { | |
printf("A[%d][%d] = %10f\t\t ",i,j, A[i][j] ); | |
} | |
printf("\n"); | |
} | |
} | |
void printMatrixB(){ | |
printf("\n\nPrinting the B matrix\n"); | |
int i,j; | |
for ( i = 0; i < M; i++ ) { | |
for ( j = 0; j < 1; j++ ) { | |
printf("B[%d][%d] = %10f\t\t ",i,j, B[i][j] ); | |
} | |
printf("\n"); | |
} | |
} | |
void sumRowSequentialCode(){ | |
int i,j; | |
for ( i = 0; i < M; i++ ) { | |
for ( j = 0; j < N; j++ ) { | |
B[i][0] += A[i][j]; | |
} | |
} | |
} | |
void *sumRowParallel(void *threadId){ | |
int i,j; | |
int start = (int)threadId*(int)SIZE; | |
int end = (int)(threadId+1)*(int)SIZE; | |
for ( i= start; i < end; i++ ) { | |
for(j=0;j<N;j++){ | |
B[i][0] += A[i][j]; | |
} | |
} | |
pthread_exit(NULL); // you need to close that thread | |
} | |
void rowParallelManager(){ | |
int i; | |
int numThread = M/SIZE; | |
for (i=0; i<numThread; i++) { | |
if(pthread_create(&threads[i], NULL, sumRowParallel,(void *)i)){ | |
printf("ERROR; return code from pthread_create()\n"); | |
exit(-1); | |
} | |
} | |
} | |
void finallyThreadsAreJoiningHere(){ | |
int i; | |
// wiating for all threads to compute the summation | |
for (i=0; i<M; i++) { | |
pthread_join(threads[i], NULL); | |
} | |
} | |
int main(int argc, const char * argv[]) { | |
struct timeval etstart, etstop; /* Elapsed times using gettimeofday() */ | |
struct timezone tzdummy; | |
clock_t etstart2, etstop2; /* Elapsed times using times() */ | |
unsigned long long usecstart, usecstop; | |
struct tms cputstart, cputstop; /* CPU times for my processes */ | |
initialize_inputs(); | |
printf("\nStarting clock.\n"); | |
gettimeofday(&etstart, &tzdummy); | |
etstart2 = times(&cputstart); | |
// Calculation | |
// sumRowSequentialCode(); // Uncomment to see the performance for sequential code | |
rowParallelManager(); | |
finallyThreadsAreJoiningHere(); | |
/* Stop Clock */ | |
gettimeofday(&etstop, &tzdummy); | |
etstop2 = times(&cputstop); | |
printf("Stopped clock.\n"); | |
usecstart = (unsigned long long)etstart.tv_sec * 1000000 + etstart.tv_usec; | |
usecstop = (unsigned long long)etstop.tv_sec * 1000000 + etstop.tv_usec; | |
printMatrixB(); | |
/* Display timing results */ | |
printf("\nElapsed time = %g ms.\n", | |
(float)(usecstop - usecstart)/(float)1000); | |
printf("(CPU times are accurate to the nearest %g ms)\n", | |
1.0/(float)CLOCKS_PER_SEC * 1000.0); | |
printf("My total CPU time for parent = %g ms.\n", | |
(float)( (cputstop.tms_utime + cputstop.tms_stime) - | |
(cputstart.tms_utime + cputstart.tms_stime) ) / | |
(float)CLOCKS_PER_SEC * 1000); | |
printf("My system CPU time for parent = %g ms.\n", | |
(float)(cputstop.tms_stime - cputstart.tms_stime) / | |
(float)CLOCKS_PER_SEC * 1000); | |
printf("My total CPU time for child processes = %g ms.\n", | |
(float)( (cputstop.tms_cutime + cputstop.tms_cstime) - | |
(cputstart.tms_cutime + cputstart.tms_cstime) ) / | |
(float)CLOCKS_PER_SEC * 1000); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Notes :
https://www.evernote.com/l/Ahzo8czYhypG258BubNUCe13nIajkDYlFxs