Skip to content

Instantly share code, notes, and snippets.

@eickenberg
Created December 7, 2016 11:46
Show Gist options
  • Save eickenberg/b0f6b3ad08694841d2c0ede71b3cc948 to your computer and use it in GitHub Desktop.
Save eickenberg/b0f6b3ad08694841d2c0ede71b3cc948 to your computer and use it in GitHub Desktop.
Script for benchmarking skcuda fft performance (pure calculation) wrt pyfftw
"""testing skcuda fft in 3 dimensions"""
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
#from scipy import fftpack as fft
from pyfftw.interfaces import numpy_fft as fft
import skcuda.fft as cu_fft
Bs = (8, 12, 16, 20, 24,)
Ns = (32, 64, 96, 128, 256)
ns = np.ones([len(Ns), len(Bs)], dtype=int)
ns[0:2, 0:2] = 10
import time
#N = 64
#B = 16
cpu_fft_times = []
cpu_ifft_times = []
gpu_fft_times = []
gpu_ifft_times = []
for j, N in enumerate(Ns):
for k, B in enumerate(Bs):
n = ns[j, k]
x = np.empty((B, N, N, N), dtype=np.float32)
xf = np.empty_like(x, dtype=np.complex64)
y = np.empty_like(x)
x[:] = np.random.randn(*x.shape).astype('float32')
t0 = time.time()
for i in range(n):
xf[:] = fft.fftn(x, axes=(1, 2, 3))
t1 = time.time()
cpu_fft_times.append((t1 - t0) / n)
t2 = time.time()
for i in range(n):
y[:] = np.real(fft.ifftn(xf, axes=(1, 2, 3)))
t3 = time.time()
cpu_ifft_times.append((t3 - t2)/n)
x_gpu = gpuarray.to_gpu(x)
xf_gpu = gpuarray.empty((B, N, N, N // 2 + 1), np.complex64)
plan_forward = cu_fft.Plan((N, N, N), np.float32, np.complex64, B)
t4 = time.time()
for i in range(n):
cu_fft.fft(x_gpu, xf_gpu, plan_forward)
t5 = time.time()
gpu_fft_times.append((t5 - t4) / n)
y_gpu = gpuarray.empty_like(x_gpu)
plan_inverse = cu_fft.Plan((N, N, N), np.complex64, np.float32, B)
t6 = time.time()
for i in range(n):
cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)
t7 = time.time()
gpu_ifft_times.append((t7 - t6)/n)
print((N, B, n, cpu_fft_times[-1], cpu_ifft_times[-1],
gpu_fft_times[-1], gpu_ifft_times[-1],
cpu_fft_times[-1] / gpu_fft_times[-1],
cpu_ifft_times[-1] / gpu_ifft_times[-1]))
print(((y - y_gpu.get()) ** 2).sum())
cpu_fft_times = np.array( cpu_fft_times ).reshape(len(Ns), len(Bs))
cpu_ifft_times = np.array( cpu_ifft_times).reshape(len(Ns), len(Bs))
gpu_fft_times = np.array( gpu_fft_times ).reshape(len(Ns), len(Bs))
gpu_ifft_times = np.array( gpu_ifft_times).reshape(len(Ns), len(Bs))
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
plt.figure()
plt.plot(Ns, cpu_fft_times, 'b-')
plt.plot(Ns, cpu_ifft_times, 'b-.')
plt.plot(Ns, gpu_fft_times, 'r-')
plt.plot(Ns, gpu_ifft_times, 'r-.')
plt.yscale('log')
plt.xscale('log')
plt.title("computation time as a function of N for B={}".format(Bs))
plt.xticks(Ns, map(str, Ns))
plt.savefig("f_of_N.png")
plt.savefig("f_of_N.svg")
plt.savefig("f_of_N.pdf")
plt.close()
plt.figure()
plt.plot(Bs, cpu_fft_times.T, 'b-')
plt.plot(Bs, cpu_ifft_times.T, 'b-.')
plt.plot(Bs, gpu_fft_times.T, 'r-')
plt.plot(Bs, gpu_ifft_times.T, 'r-.')
plt.yscale('log')
plt.xscale('log')
plt.title("computation time as a function of B for N={}".format(Ns))
plt.xticks(Bs, map(str, Bs))
plt.savefig("f_of_B.png")
plt.savefig("f_of_B.svg")
plt.savefig("f_of_B.pdf")
plt.close()
@eickenberg
Copy link
Author

Blue is CPU, red is GPU, full is FFT, dotted is IFFT

f_of_b
f_of_n

@eickenberg
Copy link
Author

speedups are generally around 10^3, can go over 10^4 and reach 10^5. Caveat: Everything is in memory already.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment