Last active
March 28, 2020 08:57
-
-
Save shubhsherl/858d8bd444425552682b2e86111f5a7d to your computer and use it in GitHub Desktop.
CS422: Scribing 3 Cache friendly codes and its comparision
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<bits/stdc++.h> | |
#define M 10000 | |
#define N 10000 | |
int a[M][N]; | |
using namespace std; | |
using namespace std::chrono; | |
int sum_array_rows() { | |
int sum = 0; | |
for (int i = 0; i < M; i++) | |
for (int j = 0; j < N; j++) | |
sum += a[i][j]; | |
return sum; | |
} | |
int sum_array_cols() { | |
int sum = 0; | |
for (int j = 0; j < N; j++) | |
for (int i = 0; i < M; i++) | |
sum += a[i][j]; | |
return sum; | |
} | |
void fil_random() { | |
for(int i=0;i<M;i++) | |
for(int j=0;j<N;j++) | |
a[i][j] = rand()%10; | |
} | |
void evaluate(int i) { | |
auto start = high_resolution_clock::now(); | |
switch(i) { | |
case 1: | |
sum_array_cols(); | |
break; | |
case 2: | |
sum_array_rows(); | |
} | |
auto stop = high_resolution_clock::now(); | |
auto duration = duration_cast<microseconds>(stop - start); | |
cout << "Time taken by function "<<i<<" : "<< duration.count() << " microseconds" << endl; | |
} | |
int main() { | |
int sum; | |
fil_random(); | |
for(int i=1;i<=2;i++) | |
evaluate(i); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<bits/stdc++.h> | |
// Matrix 1 : A (M*N) | |
// Matrix 2 : B (N*O) | |
// Output Matrix : C (M*O) | |
#define ORDER_MIN 0 | |
#define ORDER_MAX 9 | |
int order[9] = {100, 200, 400, 800, 1600, 2400, 4800, 7200, 10000}; | |
int M=order[ORDER_MAX-1], N=order[ORDER_MAX-1], O=order[ORDER_MAX-1], CB1=240; | |
double result[5][ORDER_MAX]; | |
typedef long long data_type; | |
using namespace std; | |
using namespace std::chrono; | |
vector<vector<data_type>> A( M , vector<data_type> ( N, 0 ) ); | |
vector<vector<data_type>> B( N , vector<data_type> ( O, 0 ) ); | |
vector<vector<data_type>> C( M , vector<data_type> ( O, 0 ) ); | |
void print(vector<vector<data_type>> & mat, char m) { | |
cout<<"matrix "<<m<<": \n"; | |
for(auto i = mat.begin(); i<mat.end();i++) { | |
for(auto j = i->begin();j<i->end();j++) | |
cout<<*j<<" "; | |
cout<<endl; | |
} | |
} | |
void print_res() { | |
for(int i=-1;i<5; i++) { | |
i==-1 ? cout<<"order:\t" : cout<<"method "<<(i+1)<<" :"; | |
for(int j=ORDER_MIN;j<ORDER_MAX;j++) | |
i==-1 ? cout<<"\t"<<order[j] : cout<<"\t"<<result[i][j]; | |
cout<<endl; | |
} | |
} | |
void fill_zero(vector<data_type> & row) | |
{ | |
generate(row.begin(), row.end(), [](){ return 0; }); | |
} | |
void fill_row(vector<data_type> & row) | |
{ | |
generate(row.begin(), row.end(), [](){ return rand() % 100; }); | |
} | |
void fill_matrix(vector<vector<data_type>> & mat) | |
{ | |
for_each(mat.begin(), mat.end(), fill_row); | |
} | |
void matrix_multiply_1() | |
{ | |
for (int i=0;i<M;i++) | |
for (int j=0;j<O;j++) { | |
data_type s = 0; | |
for (int k=0;k<N;k++) | |
s+=(A[i][k]*B[k][j]); | |
C[i][j]=s; | |
} | |
} | |
void matrix_muptiply_2() { | |
int B1=M/2, B2=O/2; | |
for (int ii = 0;ii<M;ii+=B1) | |
for (int jj = 0;jj<O;jj+=B2) | |
for (int i=ii;i<min(ii+B1,M);i++) | |
for (int j=jj;j<min(jj+B2,O);j++) { | |
data_type s = 0; | |
for (int k=0;k<N;k++) | |
s+=(A[i][k]*B[k][j]); | |
C[i][j]=s; | |
} | |
} | |
void matrix_muptiply_3() { | |
int B1=M/2,B2=O/2; | |
for (int ii = 0;ii<M;ii+=B1) | |
for (int jj = 0;jj<O;jj+=B2) | |
for (int i=ii;i<min(ii+B1,M);i+=2) | |
for (int j=jj;j<min(jj+B2,O);j+=2) { | |
data_type s = 0; | |
for (int k=0;k<N;k++) | |
s+=(A[i][k]*B[k][j]); | |
C[i][j]=s; | |
if (i+1<min(ii+B1,M)) { | |
s = 0; | |
for (int k=0;k<N;k++) | |
s+=(A[i+1][k]*B[k][j]); | |
C[i+1][j]=s; | |
} | |
if (j+1<min(jj+B2,O)) { | |
s = 0; | |
for (int k=0;k<N;k++) | |
s+=(A[i][k]*B[k][j+1]); | |
C[i][j+1]=s; | |
} | |
if (i+1<min(ii+B1,M) && j+1<min(jj+B2,O)) { | |
s = 0; | |
for (int k=0;k<N;k++) | |
s+=(A[i+1][k]*B[k][j+1]); | |
C[i+1][j+1]=s; | |
} | |
} | |
} | |
void matrix_muptiply_4() { | |
int B1=M/2,B2=O/2; | |
for (int ii = 0;ii<M;ii+=B1) | |
for (int jj = 0;jj<O;jj+=B2) | |
for (int i=ii;i<min(ii+B1,M);i+=2) | |
for (int j=jj;j<min(jj+B2,O);j+=2) { | |
data_type s[4] = {0}; | |
for (int k=0;k<N;k++) { | |
s[0]+=(A[i][k]*B[k][j]); | |
if (i+1<min(ii+B1,M)) { | |
s[1]+=(A[i+1][k]*B[k][j]); | |
} | |
if (j+1<min(jj+B2,O)) { | |
s[2]+=(A[i][k]*B[k][j+1]); | |
} | |
if (i+1<min(ii+B1,M) && j+1<min(jj+B2,O)) { | |
s[3]+=(A[i+1][k]*B[k][j+1]); | |
} | |
} | |
C[i][j] = s[0]; | |
C[i+1][j] = s[1]; | |
C[i][j+1] = s[2]; | |
C[i+1][j+1] = s[3]; | |
} | |
} | |
void matrix_muptiply_5() { | |
for_each(C.begin(), C.end(), fill_zero); | |
int B1 = CB1, B2 = CB1; | |
for (int jj = 0;jj<O;jj+=B2) | |
for (int kk = 0;kk<N;kk+=B2) | |
for(int i=0;i<M;i++) | |
for (int j=jj;j<min(jj+B2,O);j++) { | |
data_type s = 0; | |
for (int k=kk;k<min(kk+B2,N);k++) | |
s+=A[i][k]*B[k][j]; | |
C[i][j] += s; | |
} | |
} | |
auto evaluate(int i) { | |
auto start = high_resolution_clock::now(); | |
switch(i) { | |
case 1: | |
matrix_multiply_1(); | |
break; | |
case 2: | |
matrix_muptiply_2(); | |
break; | |
case 3: | |
matrix_muptiply_3(); | |
break; | |
case 4: | |
matrix_muptiply_4(); | |
break; | |
case 5: | |
matrix_muptiply_5(); | |
} | |
auto stop = high_resolution_clock::now(); | |
auto duration = duration_cast<microseconds>(stop - start); | |
// cout << "Time taken by function "<<i<<" : "<< duration.count() << " microseconds" << endl; | |
return duration.count(); | |
} | |
int main() | |
{ | |
fill_matrix(A); | |
fill_matrix(B); | |
for(int j=ORDER_MIN;j<ORDER_MAX;j++) { | |
M = N = O = order[j]; CB1 = min(M/10, 480); | |
for(int i=1;i<=5;i++) { | |
cin.sync(); | |
ofstream ofs("/proc/sys/vm/drop_caches"); | |
ofs << "3" << endl; | |
ofs.close(); | |
result[i-1][j] = evaluate(i); | |
//print(A,'A'); | |
//print(B,'B'); | |
//print(C,'C'); | |
} | |
} | |
print_res(); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<bits/stdc++.h> | |
#define SIZE 100000 | |
using namespace std; | |
using namespace std::chrono; | |
struct optimized_array { | |
long *add; | |
long value; | |
}; | |
long *arr[SIZE]; | |
optimized_array op_arr[SIZE]; | |
void init() { | |
long temp[SIZE]; | |
long flag[SIZE]; | |
for(long i=0;i<SIZE;i++) { | |
arr[i] = new long(rand() % 100000); | |
temp[i] = *arr[i]; | |
flag[i] = 0; | |
op_arr[i].add = new long(*arr[i]); | |
} | |
// get value for op_arr | |
sort(temp, temp+SIZE); | |
for(long i=0;i<SIZE;i++) | |
for(long j=0;j<SIZE;j++) { | |
if (temp[i] == *arr[j] && flag[j] == 0) { | |
flag[j]=1; | |
op_arr[j].value = i; | |
break; | |
} | |
} | |
for(long i=0;i<SIZE;i++) | |
if(flag[i]==0) { | |
cout<<"Soemthing is wrong!!"; | |
break; | |
} | |
} | |
void sort_arr() { | |
for(long i=0;i<SIZE;i++) | |
for(long j=i;j<SIZE;j++) { | |
if (*arr[i] > *arr[j]) { | |
long *t = arr[i]; | |
arr[i] = arr[j]; | |
arr[j] = t; | |
} | |
} | |
} | |
void sort_op_arr() { | |
for(long i=0;i<SIZE;i++) | |
for(long j=i;j<SIZE;j++) { | |
if (op_arr[i].value > op_arr[j].value) { | |
optimized_array t = op_arr[i]; | |
op_arr[i] = op_arr[j]; | |
op_arr[j] = t; | |
} | |
} | |
} | |
void evaluate(int i) { | |
auto start = high_resolution_clock::now(); | |
switch(i) { | |
case 1: | |
sort_arr(); | |
break; | |
case 2: | |
sort_op_arr(); | |
} | |
auto stop = high_resolution_clock::now(); | |
auto duration = duration_cast<microseconds>(stop - start); | |
cout << "Time taken by function "<<i<<" : "<< duration.count() << " microseconds" << endl; | |
} | |
int main() { | |
init(); | |
for(int i=1;i<=2;i++) { | |
evaluate(i); | |
} | |
} |
Sorting time comparison for file: sorting.cpp
Time taken by function 1 : 45552680 microseconds
Time taken by function 2 : 24873190 microseconds
Array sum comparison for file: array_sum.cpp
Time taken by function 1 : 563559 microseconds
Time taken by function 2 : 179487 microseconds
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Results for file: matrix_mult.cpp
time in microseconds
Speedup:
CPU architecture: