Last active
April 24, 2018 05:56
-
-
Save vitillo/5257371 to your computer and use it in GitHub Desktop.
Eigen vs IPP, 4x4 Matrix Multiplication
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <omp.h> | |
#include <iostream> | |
#include <immintrin.h> | |
#include <Eigen/Dense> | |
#include <ipp.h> | |
#define N 1000000 | |
using namespace std; | |
using namespace Eigen; | |
double start, end; | |
class MatrixAVX{ | |
public: | |
MatrixAVX(){ | |
for(int i = 0; i < 16; i++){ | |
m_matrix[i] = i; | |
} | |
} | |
void optMul(MatrixAVX &o, MatrixAVX &tmp) __attribute__((noinline)); | |
private: | |
__attribute__((aligned(32))) double m_matrix[16]; | |
friend ostream & operator<<(ostream &, const MatrixAVX &matrix); | |
}; | |
void MatrixAVX::optMul(MatrixAVX &o, MatrixAVX &tmp){ | |
__m256d a_line, b_line, r_line; | |
__m256d b_line0 = _mm256_load_pd(&o.m_matrix[0]); | |
__m256d b_line1 = _mm256_load_pd(&o.m_matrix[4]); | |
__m256d b_line2 = _mm256_load_pd(&o.m_matrix[8]); | |
__m256d b_line3 = _mm256_load_pd(&o.m_matrix[12]); | |
a_line = _mm256_set1_pd(m_matrix[0]); | |
r_line = _mm256_mul_pd(a_line, b_line0); | |
a_line = _mm256_set1_pd(m_matrix[1]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line1), r_line); | |
a_line = _mm256_set1_pd(m_matrix[2]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line2), r_line); | |
a_line = _mm256_set1_pd(m_matrix[3]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line3), r_line); | |
_mm256_store_pd(&tmp.m_matrix[0], r_line); | |
a_line = _mm256_set1_pd(m_matrix[4]); | |
r_line = _mm256_mul_pd(a_line, b_line0); | |
a_line = _mm256_set1_pd(m_matrix[5]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line1), r_line); | |
a_line = _mm256_set1_pd(m_matrix[6]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line2), r_line); | |
a_line = _mm256_set1_pd(m_matrix[7]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line3), r_line); | |
_mm256_store_pd(&tmp.m_matrix[4], r_line); | |
a_line = _mm256_set1_pd(m_matrix[8]); | |
r_line = _mm256_mul_pd(a_line, b_line0); | |
a_line = _mm256_set1_pd(m_matrix[9]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line1), r_line); | |
a_line = _mm256_set1_pd(m_matrix[10]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line2), r_line); | |
a_line = _mm256_set1_pd(m_matrix[11]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line3), r_line); | |
_mm256_store_pd(&tmp.m_matrix[8], r_line); | |
a_line = _mm256_set1_pd(m_matrix[12]); | |
r_line = _mm256_mul_pd(a_line, b_line0); | |
a_line = _mm256_set1_pd(m_matrix[13]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line1), r_line); | |
a_line = _mm256_set1_pd(m_matrix[14]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line2), r_line); | |
a_line = _mm256_set1_pd(m_matrix[15]); | |
r_line = _mm256_add_pd(_mm256_mul_pd(a_line, b_line3), r_line); | |
_mm256_store_pd(&tmp.m_matrix[12], r_line); | |
} | |
ostream & operator<<(ostream &stream, const MatrixAVX &matrix){ | |
for(int i = 0; i < 4; i++){ | |
for(int j = 0; j < 4; j++) | |
cout << matrix.m_matrix[i*4 + j] << " "; | |
if(i != 3) | |
cout << endl; | |
} | |
return stream; | |
} | |
void start_clock(){ | |
start = omp_get_wtime(); | |
} | |
double stop_clock(){ | |
return omp_get_wtime() - start; | |
} | |
void SIMD_multiply(){ | |
MatrixAVX a, b, c; | |
start_clock(); | |
for(int i = 0; i < N; i++) | |
a.optMul(b, c); | |
cout << "AVX SIMD: " << stop_clock() << endl << c << endl; | |
} | |
void _Eigen_multiply(const Matrix4d &a, const Matrix4d &b, Matrix4d &c) __attribute__((noinline)); | |
void _Eigen_multiply(const Matrix4d &a, const Matrix4d &b, Matrix4d &c){ | |
c.noalias() = a *b; | |
} | |
void Eigen_multiply(){ | |
Matrix4d a, b, c; | |
a << 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15; | |
b << 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15; | |
start_clock(); | |
for(int i = 0; i < N; i++) | |
_Eigen_multiply(a, b, c); | |
cout << "Eigen: " << stop_clock() << endl << c << endl;; | |
} | |
void IPP_multiply(){ | |
double timing; | |
__attribute__((aligned(32))) Ipp64f a[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; | |
__attribute__((aligned(32))) Ipp64f b[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; | |
__attribute__((aligned(32))) Ipp64f c[16]; | |
int stride1 = sizeof(Ipp64f)*4; | |
int stride2 = sizeof(Ipp64f); | |
start_clock(); | |
for(int i = 0; i < N; i++) | |
ippmMul_mm_64f(a, stride1, stride2, 4, 4, b, stride1, stride2, 4, 4, c, stride1, stride2); | |
cout << "IPP: " << stop_clock() << endl; | |
for(int i = 0; i < 4; i++){ | |
for(int j = 0; j < 4; j++) | |
cout << c[i*4 + j] << " "; | |
cout << endl; | |
} | |
} | |
int main(){ | |
SIMD_multiply(); | |
Eigen_multiply(); | |
IPP_multiply(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CC=g++ | |
CXXFLAGS=-O2 -fopenmp -g -mavx -march=native -I/opt/intel/composerxe/ipp/include -Iinclude/eigen3 | |
LDFLAGS = -lgomp -lipps -lippm | |
all: main.o testAVX.o testAVX.h | |
$(CC) $(LDFLAGS) main.o -o kernel | |
main.o: main.cpp testAVX.h | |
$(CC) $(CXXFLAGS) -c main.cpp | |
clean: | |
-rm -rf *.o kernel |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Results?