Skip to content

Instantly share code, notes, and snippets.

@yoshipon
Forked from denkiwakame/bench.py
Last active September 25, 2021 01:32
Show Gist options
  • Save yoshipon/cd215812f1bce182f2085c56ac95355e to your computer and use it in GitHub Desktop.
Save yoshipon/cd215812f1bce182f2085c56ac95355e to your computer and use it in GitHub Desktop.
numba-cpp benchmarks
from benchmarks.bench_ising import IsingModel, setup
import time
if __name__ == '__main__':
t = time.time()
setup()
im = IsingModel()
print('compile', time.time()-t)
t = time.time()
im.time_ising()
print('exec', time.time()-t)
"""
Ising model benchmark, adapted from
http://matthewrocklin.com/blog/work/2015/02/28/Ising/
"""
from math import exp, log, e, sqrt
import numpy as np
from numba import f8, u1, void, i8, prange
kT = 2 / log(1 + sqrt(2), e)
N = 1000
random = np.random.RandomState(0)
x_start = random.randint(2, size=(N, N)).astype('i8')
x_start[x_start == 0] = -1
N_iterations = 10
def setup():
from numba import jit
global _update, update
@jit('void(i8[:,:],intp,intp)', nopython=True, cache=True, fastmath=True)
def _update(x, i, j):
n, m = x.shape
dE = 2 * x[i, j] * (
x[(i-1) % n, (j-1) % m]
+ x[(i-1) % n, j]
+ x[(i-1) % n, (j+1) % m]
+ x[i, (j-1) % m]
+ x[i, (j+1) % m]
+ x[(i+1) % n, (j-1) % m]
+ x[(i+1) % n, j]
+ x[(i+1) % n, (j+1) % m]
)
if dE <= 0 or exp(-dE / kT) > np.random.random():
x[i, j] *= -1
@jit('void(i8[:,:])', nopython=True, cache=True, fastmath=True)
def update(x):
n, m = x.shape
for i in prange(n):
for j in prange(0, m, 2): # Even columns first to avoid overlap
_update(x, j, i)
# for i in prange(n):
for j in prange(1, m, 2): # Odd columns second to avoid overlap
_update(x, j, i)
class IsingModel:
def time_ising(self):
x = x_start.copy()
for i in prange(N_iterations):
update(x)
// #pragma GCC optimize("Ofast")
// #pragma GCC target("avx2")
#define N 1000
#include <stdint.h>
#include <cmath>
#include <random>
#include <chrono>
#include <iostream>
std::default_random_engine eng(0);
std::uniform_real_distribution<float> uni_dist(0.0, 1.0);
float kT = 2 / log(1 + std::sqrt(2));
int64_t x[N][N];
template<typename T>
void _update(T x[N][N], size_t i, size_t j) {
size_t m = N;
size_t n = N;
T dE = 2 * x[i][j] *
(x[(i - 1) % n][(j - 1) % m] + x[(i - 1) % n][j] +
x[(i - 1) % n][(j + 1) % m] + x[i][(j - 1) % m] +
x[i][(j + 1) % m] + x[(i + 1) % n][(j - 1) % m] +
x[(i + 1) % n][j] + x[(i + 1) % n][(j + 1) % m]);
if (dE <= 0 || std::exp(-dE / kT) > uni_dist(eng)) x[i][j] *= -1;
};
template<typename T>
void update(T x[N][N]) {
size_t m = N;
size_t n = N;
for (size_t i = 0; i < N; i++) {
for (size_t j = 0; j < N; j += 2) {
_update(x, j, i);
}
for (size_t j = 1; j < N; j += 2) {
_update(x, j, i);
}
}
};
int main(int argc, char const* argv[]) {
// initialize x
std::uniform_int_distribution<uint8_t> ber_dist(0, 1);
for (auto i = 0; i < N; i++) {
for (auto j = 0; j < N; j++) {
x[i][j] = ber_dist(eng) == 0 ? -1 : 1;
}
}
// main loop
auto start = std::chrono::system_clock::now();
for (int i = 0; i < 10; i++) {
// copy to x_
int64_t x_[N][N];
for (size_t i = 0; i < N; i++) {
for (size_t j = 0; j < N; j++) {
x_[i][j] = x[i][j];
}
}
update(x_);
}
auto end = std::chrono::system_clock::now();
std::chrono::duration<double> duration = end - start;
std::cout << duration.count() << std::endl;
return 0;
}
numba==0.51.2
Corei7-1185G7@3.00GHz
@numba.jit
compile 0.3417510986328125 [s]
exec 0.44716548919677734 [s]
@g++-10 -Ofast -march=native -mtune=native ising.cc
0.146285 [s]
L45 ~ L50をコメントアウトすると,
0.0572926 [s]
L15とL56をuint8にすると,
0.110651 [s]
さらにL45 ~ L50をコメントアウトすると,
0.0310628 [s]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment