Skip to content

Instantly share code, notes, and snippets.

@chris124567
Last active April 18, 2022 17:29
Show Gist options
  • Save chris124567/c45d46fdf4d922389641cc9f591ae577 to your computer and use it in GitHub Desktop.
Save chris124567/c45d46fdf4d922389641cc9f591ae577 to your computer and use it in GitHub Desktop.
MATH214 Final Project: Code to Benchmark various Matrix Multiplication Algorithms
#include <benchmark/benchmark.h>
#include <eigen3/Eigen/Dense>
#include <iostream>
using Eigen::MatrixXi;
static MatrixXi naive_multiply(const MatrixXi &A, const MatrixXi &B) {
// result is an n x o matrix
MatrixXi C = MatrixXi::Zero(A.rows(), B.cols());
for (int i = 0; i < A.rows(); i++) {
for (int j = 0; j < B.cols(); j++) {
// calculate dot product of ith row of A and jth column of B
int sum = 0;
for (int k = 0; k < A.cols(); k++) {
sum += A(i, k) * B(k, j);
}
// assign entry in result matrix
C(i, j) = sum;
}
}
return C;
}
static MatrixXi strassen_multiply(const Eigen::MatrixXi &A, const Eigen::MatrixXi &B) {
const int N = A.rows();
if (N <= 4) return naive_multiply(A, B);
const auto &a = A.block(0, 0, N / 2, N / 2);
const auto &b = A.block(N / 2, N / 2, N / 2, N / 2);
const auto &c = B.block(0, 0, N / 2, N / 2);
const auto &d = B.block(N / 2, N / 2, N / 2, N / 2);
const auto &e = A.block(N / 2, 0, N / 2, N / 2);
const auto &f = B.block(0, N / 2, N / 2, N / 2);
const auto &g = B.block(N / 2, 0, N / 2, N / 2);
const auto &h = A.block(0, N / 2, N / 2, N / 2);
const auto &M1 = strassen_multiply(a + b, c + d);
const auto &M2 = strassen_multiply(e + b, c);
const auto &M3 = strassen_multiply(a, f - d);
const auto &M4 = strassen_multiply(b, g - c);
const auto &M5 = strassen_multiply(a + h, d);
const auto &M6 = strassen_multiply(e - a, c + f);
const auto &M7 = strassen_multiply(h - b, g + d);
MatrixXi C(N, N);
C << M1 + M4 - M5 + M7, M3 + M5,
M2 + M4, M1 - M2 + M3 + M6;
return C;
}
static void naive_multiply_benchmark(int N, benchmark::State &state) {
const MatrixXi &A = MatrixXi::Random(N, N);
const MatrixXi &B = MatrixXi::Random(N, N);
for (auto _ : state) {
naive_multiply(A, B);
}
}
static void strassen_multiply_benchmark(int N, benchmark::State &state) {
const MatrixXi &A = MatrixXi::Random(N, N);
const MatrixXi &B = MatrixXi::Random(N, N);
for (auto _ : state) {
strassen_multiply(A, B);
}
}
static void BM_Naive_2x2(benchmark::State &state) {
naive_multiply_benchmark(2, state);
}
static void BM_Strassen_2x2(benchmark::State &state) {
strassen_multiply_benchmark(2, state);
}
static void BM_Naive_4x4(benchmark::State &state) {
naive_multiply_benchmark(4, state);
}
static void BM_Strassen_4x4(benchmark::State &state) {
strassen_multiply_benchmark(4, state);
}
static void BM_Naive_8x8(benchmark::State &state) {
naive_multiply_benchmark(8, state);
}
static void BM_Strassen_8x8(benchmark::State &state) {
strassen_multiply_benchmark(8, state);
}
static void BM_Naive_16x16(benchmark::State &state) {
naive_multiply_benchmark(16, state);
}
static void BM_Strassen_16x16(benchmark::State &state) {
strassen_multiply_benchmark(16, state);
}
static void BM_Naive_32x32(benchmark::State &state) {
naive_multiply_benchmark(32, state);
}
static void BM_Strassen_32x32(benchmark::State &state) {
strassen_multiply_benchmark(32, state);
}
static void BM_Naive_64x64(benchmark::State &state) {
naive_multiply_benchmark(64, state);
}
static void BM_Strassen_64x64(benchmark::State &state) {
strassen_multiply_benchmark(64, state);
}
static void BM_Naive_128x128(benchmark::State &state) {
naive_multiply_benchmark(128, state);
}
static void BM_Strassen_128x128(benchmark::State &state) {
strassen_multiply_benchmark(128, state);
}
static void BM_Naive_256x256(benchmark::State &state) {
naive_multiply_benchmark(256, state);
}
static void BM_Strassen_256x256(benchmark::State &state) {
strassen_multiply_benchmark(256, state);
}
static void BM_Naive_512x512(benchmark::State &state) {
naive_multiply_benchmark(512, state);
}
static void BM_Strassen_512x512(benchmark::State &state) {
strassen_multiply_benchmark(512, state);
}
static void BM_Naive_1024x1024(benchmark::State &state) {
naive_multiply_benchmark(1024, state);
}
static void BM_Strassen_1024x1024(benchmark::State &state) {
strassen_multiply_benchmark(1024, state);
}
BENCHMARK(BM_Naive_2x2);
BENCHMARK(BM_Strassen_2x2);
BENCHMARK(BM_Naive_4x4);
BENCHMARK(BM_Strassen_4x4);
BENCHMARK(BM_Naive_8x8);
BENCHMARK(BM_Strassen_8x8);
BENCHMARK(BM_Naive_16x16);
BENCHMARK(BM_Strassen_16x16);
BENCHMARK(BM_Naive_32x32);
BENCHMARK(BM_Strassen_32x32);
BENCHMARK(BM_Naive_64x64);
BENCHMARK(BM_Strassen_64x64);
BENCHMARK(BM_Naive_128x128);
BENCHMARK(BM_Strassen_128x128);
BENCHMARK(BM_Naive_256x256);
BENCHMARK(BM_Strassen_256x256);
BENCHMARK(BM_Naive_512x512);
BENCHMARK(BM_Strassen_512x512);
BENCHMARK(BM_Naive_1024x1024);
BENCHMARK(BM_Strassen_1024x1024);
BENCHMARK_MAIN();
2022-04-17T14:29:32-04:00
Running ./benchmark
Run on (4 X 1600 MHz CPU s)
CPU Caches:
L1 Data 32 KiB (x4)
L1 Instruction 32 KiB (x4)
L2 Unified 256 KiB (x4)
L3 Unified 6144 KiB (x1)
Load Average: 2.29, 1.83, 1.74
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
----------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------
BM_Naive_2x2 4452 ns 4452 ns 157289
BM_Strassen_2x2 4471 ns 4471 ns 156630
BM_Naive_4x4 25475 ns 25475 ns 27481
BM_Strassen_4x4 25493 ns 25493 ns 27462
BM_Naive_8x8 181657 ns 181656 ns 3852
BM_Strassen_8x8 224344 ns 223743 ns 3130
BM_Naive_16x16 1382803 ns 1382779 ns 506
BM_Strassen_16x16 1685322 ns 1684031 ns 417
BM_Naive_32x32 10813924 ns 10813774 ns 65
BM_Strassen_32x32 12131297 ns 12131102 ns 58
BM_Naive_64x64 85585594 ns 85581655 ns 8
BM_Strassen_64x64 86165807 ns 86162451 ns 8
BM_Naive_128x128 680977415 ns 680966594 ns 1
BM_Strassen_128x128 608205666 ns 608195294 ns 1
BM_Naive_256x256 5450353167 ns 5448782901 ns 1
BM_Strassen_256x256 4310790179 ns 4310753972 ns 1
BM_Naive_512x512 43662225555 ns 43639166429 ns 1
BM_Strassen_512x512 30088482924 ns 30063769568 ns 1
BM_Naive_1024x1024 359372792062 ns 359182019060 ns 1
BM_Strassen_1024x1024 210963958482 ns 210924174384 ns 1
@chris124567
Copy link
Author

chris124567 commented Apr 17, 2022

README

Introduction

This gist contains code implementing two matrix multiplication algorithms (the naive algorithm and Strassen's algorithm) and code benchmarking them using Google's Benchmark framework for C++.

Compile

Compile with: g++ benchmark.cpp -o benchmark -lbenchmark -lpthread

Run with: ./benchmark

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment