Skip to content

Instantly share code, notes, and snippets.

@IshitaTakeshi
Last active May 24, 2018 11:02
Show Gist options
  • Save IshitaTakeshi/31823f580a75b91cdfcfd8265d9d78a1 to your computer and use it in GitHub Desktop.
Save IshitaTakeshi/31823f580a75b91cdfcfd8265d9d78a1 to your computer and use it in GitHub Desktop.
Comparison of histogram calculation in NumPy and CuPy
import json
import timeit
from collections import defaultdict
n_executions = 10000
def run_numpy(n_samples, n_bins):
setup = """
import numpy as xp
x = xp.random.randint(0, {1}, {0})
bins = xp.linspace(-0.5, 15.5, {1}+1)
""".format(n_samples, n_bins)
return timeit.timeit(
"h, bins = xp.histogram(x, bins=bins)",
setup=setup,
number=n_executions
)
def run_cupy(n_samples, n_bins):
setup = """
import cupy as xp
from chainer.cuda import to_gpu
x = xp.random.randint(0, {1}, {0})
bins = xp.linspace(-0.5, 15.5, {1}+1)
x = to_gpu(x)
bins = to_gpu(bins)
""".format(n_samples, n_bins)
return timeit.timeit(
"h, bins = xp.histogram(x, bins=bins)",
setup=setup,
number=n_executions
)
result = defaultdict(lambda: defaultdict(dict))
for i in range(4, 17):
for j in range(4, 17):
n_samples = pow(2, i)
n_bins = pow(2, j)
result[n_samples][n_bins]["numpy"] = run_numpy(n_samples, n_bins)
result[n_samples][n_bins]["cupy"] = run_cupy(n_samples, n_bins)
with open("comparison_result.json", "w") as f:
json.dump(result, f)
import json
from matplotlib import pyplot as plt
from matplotlib import rcParams
rcParams["font.size"] = 24
linewidth = 4
n_executions = 10000
with open("./comparison_result.json", "r") as f:
result = json.load(f)
def along_samples(n_bins, min_exponent=4, max_exponent=17):
time_numpy = []
time_cupy = []
n_samples_array = []
for i in range(min_exponent, max_exponent):
n = pow(2, i)
n_samples_array.append(n)
time_numpy.append(result[str(n)][str(n_bins)]["numpy"])
time_cupy.append(result[str(n)][str(n_bins)]["cupy"])
return n_samples_array, time_numpy, time_cupy
def along_bins(n_samples, min_exponent=4, max_exponent=17):
time_numpy = []
time_cupy = []
n_bins_array = []
for j in range(min_exponent, max_exponent):
m = pow(2, j)
n_bins_array.append(m)
time_numpy.append(result[str(n_samples)][str(m)]["numpy"])
time_cupy.append(result[str(n_samples)][str(m)]["cupy"])
return n_bins_array, time_numpy, time_cupy
def plot_vs_samples(n_bins):
n_samples_array, time_numpy, time_cupy = along_samples(n_bins=n_bins)
plt.xscale("log", nonposx='clip')
plt.yscale("log", nonposy='clip')
plt.title("Comparison of execution time vs. number of samples\n"
"(n_executions={}, n_bins={})".format(n_executions, n_bins))
plt.xlabel("Number of samples")
plt.ylabel("Execution time [s]")
plt.loglog(n_samples_array, time_numpy, 'ro-', linewidth=linewidth,
label="numpy", basex=2)
plt.loglog(n_samples_array, time_cupy, 'bs-', linewidth=linewidth,
label="cupy", basex=2)
plt.legend()
plt.show()
def plot_vs_bins(n_samples):
n_bins_array, time_numpy, time_cupy = along_bins(n_samples=n_samples)
plt.xscale("log", nonposx='clip')
plt.yscale("log", nonposy='clip')
plt.ylim([0.8, 30.0])
plt.title("Comparison of execution time vs. number of bins\n"
"(n_executions={}, n_samples={})".format(n_executions, n_samples))
plt.xlabel("Number of bins")
plt.ylabel("Execution time [s]")
plt.loglog(n_bins_array, time_numpy, 'ro-', linewidth=linewidth,
label="numpy", basex=2)
plt.loglog(n_bins_array, time_cupy, 'bs-', linewidth=linewidth,
label="cupy", basex=2)
plt.legend()
plt.show()
# plot_vs_samples(n_bins=16)
plot_vs_samples(n_bins=4096)
plot_vs_bins(n_samples=4096)
# plot_vs_bins(n_samples=65536)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment