Skip to content

Instantly share code, notes, and snippets.

@YukiSakamoto
Last active November 23, 2020 08:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save YukiSakamoto/58cc6138386d571c2a47e356b1a2f0b2 to your computer and use it in GitHub Desktop.
Save YukiSakamoto/58cc6138386d571c2a47e356b1a2f0b2 to your computer and use it in GitHub Desktop.
Practice of Matrix product implementation
OPT_FLAG=-O3
all: matrix.x
matrix.x: matrix.cpp
g++ -std=c++11 $(OPT_FLAG) matrix.cpp -o matrix.x
.PHONY: run
run: matrix.x
./matrix.x
.PHONY: clean
clean:
rm matrix.x
#include <iostream>
#include <chrono>
#include <vector>
#include <cstdio>
#include <random>
template <typename T>
class MyMatrix
{
public:
MyMatrix(const size_t rows, const size_t cols):
rows_(rows), cols_(cols), num_elements(rows * cols)
{ container_ = std::vector<T>(num_elements, T(0)); }
size_t get_index(const size_t row, const size_t col) const
{
if (row < this->rows_ && col < this->cols_) {
return row * this->cols_ + col;
} else {
throw; // out of bound!
}
}
const T operator()(const size_t row, const size_t col) const
{
size_t index = this->get_index(row, col);
return this->container_.at(index);
}
T &operator()(const size_t row, const size_t col)
{
size_t index = this->get_index(row, col);
return this->container_.at(index);
}
const size_t get_row() const { return this->rows_; }
const size_t get_col() const { return this->cols_; }
bool operator==(const MyMatrix<T> &rhs) const
{
if (rhs.rows_ != this->rows_ || rhs.rows_ != this->cols_) {
return false;
}
for(size_t i = 0; i < this->num_elements; i++) {
if (rhs.container_.at(i) != this->container_.at(i)) {
return false;
}
}
return true;
}
bool is_same_shape(const MyMatrix<T> rhs) const
{
if (this->rows_ != rhs.rows_ || this->cols_ != rhs.cols_) {
return false;
}
return true;
}
private:
const size_t rows_, cols_, num_elements;
std::vector<T> container_;
};
template <typename T>
MyMatrix<T> product_simple(const MyMatrix<T> &lhs, const MyMatrix<T> &rhs,
double &elapsed_msec)
{
if (lhs.get_col() != rhs.get_row()) {
throw;
}
MyMatrix<T> ret(lhs.get_row(), rhs.get_col());
std::chrono::system_clock::time_point start, end;
start = std::chrono::system_clock::now();
for(size_t i = 0; i < ret.get_row(); i++) {
for(size_t j = 0; j < ret.get_col(); j++) {
for(size_t k = 0; k < ret.get_col() ; k++) {
ret(i,j) += lhs(i, k) * rhs(k, j);
}
}
}
end = std::chrono::system_clock::now();
elapsed_msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count();
return ret;
}
template <typename T>
MyMatrix<T> product_ijk_exchange(const MyMatrix<T> &lhs, const MyMatrix<T> &rhs,
double &elapsed_msec)
{
if (lhs.get_col() != rhs.get_row()) {
throw;
}
MyMatrix<T> ret(lhs.get_row(), rhs.get_col());
std::chrono::system_clock::time_point start, end;
start = std::chrono::system_clock::now();
for(size_t i = 0; i < ret.get_row(); i++) {
for(size_t k = 0; k < ret.get_col() ; k++) {
for(size_t j = 0; j < ret.get_col(); j++) {
ret(i,j) += lhs(i, k) * rhs(k, j);
}
}
}
end = std::chrono::system_clock::now();
elapsed_msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count();
return ret;
}
template <typename T>
MyMatrix <T> add(const MyMatrix<T> &lhs, const MyMatrix<T> &rhs)
{
if (lhs.is_same_shape(rhs) != true) {
throw;
}
MyMatrix<T> ret(lhs.get_row(), lhs.get_col());
for(size_t i = 0; i < lhs.get_row(); i++) {
for(size_t j = 0; j < lhs.get_col(); j++) {
ret(i,j) = lhs(i,j) + rhs(i,j);
}
}
return ret;
}
template <typename T>
MyMatrix<T> product_ijkex_tiling(const MyMatrix<T> &lhs, const MyMatrix<T> &rhs,
double &elapsed_msec)
{
if (lhs.get_col() != rhs.get_row()) {
throw;
}
MyMatrix<T> ret(lhs.get_row(), rhs.get_col());
size_t tile_length = 32;
std::chrono::system_clock::time_point start, end;
start = std::chrono::system_clock::now();
for(size_t I = 0; I < ret.get_row(); I += tile_length) {
const size_t i_start = I;
const size_t i_end = std::min(i_start + tile_length, ret.get_row());
for(size_t K = 0; K < ret.get_col(); K += tile_length) {
const size_t k_start = K;
const size_t k_end = std::min(k_start + tile_length, ret.get_col());
for(size_t J = 0; J < ret.get_col(); J += tile_length) {
const size_t j_start = J;
const size_t j_end = std::min(j_start + tile_length, ret.get_col());
for(size_t i = i_start; i < i_end; i++) {
for(size_t k = k_start; k < k_end ; k++) {
for(size_t j = j_start; j < j_end; j++) {
ret(i,j) += lhs(i, k) * rhs(k, j);
}
}
}
}
}
}
end = std::chrono::system_clock::now();
elapsed_msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count();
return ret;
}
//########################################
// Pretty printer
//########################################
template <typename T>
void print(const MyMatrix<T> &mat)
{
size_t rows = mat.get_row();
size_t cols = mat.get_col();
for(size_t i = 0; i < rows; i++) {
for(size_t j = 0; j < cols; j++) {
std::cout << mat(i,j) << " ";
}
std::cout << std::endl;
}
}
template <>
void print(const MyMatrix<double> &mat)
{
size_t rows = mat.get_row();
size_t cols = mat.get_col();
for(size_t i = 0; i < rows; i++) {
std::printf("[ ");
for(size_t j = 0; j < cols; j++) {
std::printf("%8.4f ", mat(i,j));
}
std::printf(" ]\n");
}
}
template <>
void print(const MyMatrix<int> &mat)
{
size_t rows = mat.get_row();
size_t cols = mat.get_col();
for(size_t i = 0; i < rows; i++) {
std::printf("[ ");
for(size_t j = 0; j < cols; j++) {
std::printf("%8d ", mat(i,j));
}
std::printf(" ]\n");
}
}
template <typename T>
void set_random_value(MyMatrix<T> &mat)
{
for(size_t i = 0; i < mat.get_row(); i++) {
for(size_t j = 0; j < mat.get_col(); j++) {
mat(i,j) = T(0) ;
}
}
}
template <>
void set_random_value(MyMatrix<double> &mat) {
std::random_device rnd;
std::default_random_engine engine(rnd());
std::uniform_real_distribution<> dist(0., 5.);
for(size_t i = 0; i < mat.get_row(); i++) {
for(size_t j = 0; j < mat.get_col(); j++) {
mat(i,j) = dist(engine);
}
}
}
//########################################
// main
//########################################
int main(void)
{
size_t len = 1024;
std::fprintf(stderr, "length: %u\n", len);
MyMatrix<double> a(len,len) ;
set_random_value(a);
double t;
std::fprintf(stderr, "Product of a * a : simple\n");
MyMatrix<double> ret_simple = product_simple(a,a,t);
std::fprintf(stderr, "%f milisec\n", t);
std::fprintf(stderr, "Product of a * a : ijk-exchange\n");
MyMatrix<double> ret_ijkex = product_ijk_exchange(a,a,t);
std::fprintf(stderr, "%f milisec\n", t);
std::fprintf(stderr, "%s\n", ret_simple == ret_ijkex ? "true" : "false");
std::fprintf(stderr, "Product of a * a : ijk-tiling\n");
MyMatrix<double> ret_ijktil = product_ijkex_tiling(a,a,t);
std::fprintf(stderr, "%f milisec\n", t);
std::fprintf(stderr, "%s\n", ret_simple == ret_ijktil ? "true" : "false");
if (false) {
std::printf("ret_simple\n");
print(ret_simple);
std::printf("ret_ijkex\n");
print(ret_ijkex);
std::printf("ret_ijktil\n");
print(ret_ijktil);
}
return 0;
}
./matrix.x
length: 1024
Product of a * a : simple
6924.000000 milisec
Product of a * a : ijk-exchange
3628.000000 milisec
true
Product of a * a : ijk-tiling
2939.000000 milisec
true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment