Skip to content

Instantly share code, notes, and snippets.

@ajaysjournal
Last active September 25, 2016 00:40
Show Gist options
  • Save ajaysjournal/c7ae2d780203d9585546a5b85845cebd to your computer and use it in GitHub Desktop.
Save ajaysjournal/c7ae2d780203d9585546a5b85845cebd to your computer and use it in GitHub Desktop.
Matrix row sum using PThreads performance example
/* Read here - https://www.evernote.com/l/Ahzo8czYhypG258BubNUCe13nIajkDYlFxs
example for 2x2 matrix
A[2][2] = [ { 1,2} ,
{3,4} ]
then Matrix Row sum is stored in B matrix like below
B[2][1] = [{3}
,{7}]
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <sys/types.h>
#include <sys/times.h>
#include <sys/time.h>
#include <time.h>
#include <pthread.h>
/* junk */
#define randm() 4|2[uid]&3
#define M 10000
#define N 10000
#define SIZE 250
float A[M][N] = {{0.0}};
float B[M][1] = { {0.0 } }; // initialize the b array to zero.. is it necessary ?
void initialize_inputs();
pthread_t threads[M];
/* Initialize A and B (and X to 0.0s) */
void initialize_inputs() {
int row, col;
row=M;
col=N;
printf("\nInitializing...\n");
for (col = 0; col < N; col++) {
for (row = 0; row < N; row++) {
A[row][col] = (int)rand() / 32768.0;
}
}
}
void printMatrixA(){
printf("\n \n Printing the A matrix\n");
int i,j;
for ( i = 0; i < M; i++ ) {
for ( j = 0; j < N; j++ ) {
printf("A[%d][%d] = %10f\t\t ",i,j, A[i][j] );
}
printf("\n");
}
}
void printMatrixB(){
printf("\n\nPrinting the B matrix\n");
int i,j;
for ( i = 0; i < M; i++ ) {
for ( j = 0; j < 1; j++ ) {
printf("B[%d][%d] = %10f\t\t ",i,j, B[i][j] );
}
printf("\n");
}
}
void sumRowSequentialCode(){
int i,j;
for ( i = 0; i < M; i++ ) {
for ( j = 0; j < N; j++ ) {
B[i][0] += A[i][j];
}
}
}
void *sumRowParallel(void *threadId){
int i,j;
int start = (int)threadId*(int)SIZE;
int end = (int)(threadId+1)*(int)SIZE;
for ( i= start; i < end; i++ ) {
for(j=0;j<N;j++){
B[i][0] += A[i][j];
}
}
pthread_exit(NULL); // you need to close that thread
}
void rowParallelManager(){
int i;
int numThread = M/SIZE;
for (i=0; i<numThread; i++) {
if(pthread_create(&threads[i], NULL, sumRowParallel,(void *)i)){
printf("ERROR; return code from pthread_create()\n");
exit(-1);
}
}
}
void finallyThreadsAreJoiningHere(){
int i;
// wiating for all threads to compute the summation
for (i=0; i<M; i++) {
pthread_join(threads[i], NULL);
}
}
int main(int argc, const char * argv[]) {
struct timeval etstart, etstop; /* Elapsed times using gettimeofday() */
struct timezone tzdummy;
clock_t etstart2, etstop2; /* Elapsed times using times() */
unsigned long long usecstart, usecstop;
struct tms cputstart, cputstop; /* CPU times for my processes */
initialize_inputs();
printf("\nStarting clock.\n");
gettimeofday(&etstart, &tzdummy);
etstart2 = times(&cputstart);
// Calculation
// sumRowSequentialCode(); // Uncomment to see the performance for sequential code
rowParallelManager();
finallyThreadsAreJoiningHere();
/* Stop Clock */
gettimeofday(&etstop, &tzdummy);
etstop2 = times(&cputstop);
printf("Stopped clock.\n");
usecstart = (unsigned long long)etstart.tv_sec * 1000000 + etstart.tv_usec;
usecstop = (unsigned long long)etstop.tv_sec * 1000000 + etstop.tv_usec;
printMatrixB();
/* Display timing results */
printf("\nElapsed time = %g ms.\n",
(float)(usecstop - usecstart)/(float)1000);
printf("(CPU times are accurate to the nearest %g ms)\n",
1.0/(float)CLOCKS_PER_SEC * 1000.0);
printf("My total CPU time for parent = %g ms.\n",
(float)( (cputstop.tms_utime + cputstop.tms_stime) -
(cputstart.tms_utime + cputstart.tms_stime) ) /
(float)CLOCKS_PER_SEC * 1000);
printf("My system CPU time for parent = %g ms.\n",
(float)(cputstop.tms_stime - cputstart.tms_stime) /
(float)CLOCKS_PER_SEC * 1000);
printf("My total CPU time for child processes = %g ms.\n",
(float)( (cputstop.tms_cutime + cputstop.tms_cstime) -
(cputstart.tms_cutime + cputstart.tms_cstime) ) /
(float)CLOCKS_PER_SEC * 1000);
return 0;
}
@ajaysjournal
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment