eickenberg/cuda_fft_test.py

## cuda_fft_test.py
"""testing skcuda fft in 3 dimensions"""

import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
#from scipy import fftpack as fft
from pyfftw.interfaces import numpy_fft as fft

import skcuda.fft as cu_fft

Bs = (8, 12, 16, 20, 24,)
Ns = (32, 64, 96, 128, 256)

ns = np.ones([len(Ns), len(Bs)], dtype=int)
ns[0:2, 0:2] = 10

import time

#N = 64
#B = 16

cpu_fft_times = []
cpu_ifft_times = []
gpu_fft_times = []
gpu_ifft_times = []

for j, N in enumerate(Ns):
    for k, B in enumerate(Bs):
        n = ns[j, k]
        x = np.empty((B, N, N, N), dtype=np.float32)
        xf = np.empty_like(x, dtype=np.complex64)
        y = np.empty_like(x)

        x[:] = np.random.randn(*x.shape).astype('float32')
        t0 = time.time()
        for i in range(n):
            xf[:] = fft.fftn(x, axes=(1, 2, 3))
        t1 = time.time()
        cpu_fft_times.append((t1 - t0) / n)

        t2 = time.time()
        for i in range(n):
            y[:] = np.real(fft.ifftn(xf, axes=(1, 2, 3)))
        t3 = time.time()
        cpu_ifft_times.append((t3 - t2)/n)

        x_gpu = gpuarray.to_gpu(x)
        xf_gpu = gpuarray.empty((B, N, N, N // 2 + 1), np.complex64)
        plan_forward = cu_fft.Plan((N, N, N), np.float32, np.complex64, B)
        t4 = time.time()
        for i in range(n):
            cu_fft.fft(x_gpu, xf_gpu, plan_forward)
        t5 = time.time()
        gpu_fft_times.append((t5 - t4) / n)

        y_gpu = gpuarray.empty_like(x_gpu)
        plan_inverse = cu_fft.Plan((N, N, N), np.complex64, np.float32, B)

        t6 = time.time()
        for i in range(n):
            cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)
        t7 = time.time()
        gpu_ifft_times.append((t7 - t6)/n)
        print((N, B, n, cpu_fft_times[-1], cpu_ifft_times[-1],
            gpu_fft_times[-1], gpu_ifft_times[-1],
            cpu_fft_times[-1] / gpu_fft_times[-1],
            cpu_ifft_times[-1] / gpu_ifft_times[-1]))

print(((y - y_gpu.get()) ** 2).sum())


cpu_fft_times  = np.array( cpu_fft_times ).reshape(len(Ns), len(Bs))
cpu_ifft_times = np.array( cpu_ifft_times).reshape(len(Ns), len(Bs))
gpu_fft_times  = np.array( gpu_fft_times ).reshape(len(Ns), len(Bs))
gpu_ifft_times = np.array( gpu_ifft_times).reshape(len(Ns), len(Bs))

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

plt.figure()
plt.plot(Ns, cpu_fft_times, 'b-')
plt.plot(Ns, cpu_ifft_times, 'b-.')
plt.plot(Ns, gpu_fft_times, 'r-')
plt.plot(Ns, gpu_ifft_times, 'r-.')
plt.yscale('log')
plt.xscale('log')
plt.title("computation time as a function of N for B={}".format(Bs))
plt.xticks(Ns, map(str, Ns))
plt.savefig("f_of_N.png")
plt.savefig("f_of_N.svg")
plt.savefig("f_of_N.pdf")
plt.close()

plt.figure()
plt.plot(Bs, cpu_fft_times.T, 'b-')
plt.plot(Bs, cpu_ifft_times.T, 'b-.')
plt.plot(Bs, gpu_fft_times.T, 'r-')
plt.plot(Bs, gpu_ifft_times.T, 'r-.')
plt.yscale('log')
plt.xscale('log')
plt.title("computation time as a function of B for N={}".format(Ns))
plt.xticks(Bs, map(str, Bs))
plt.savefig("f_of_B.png")
plt.savefig("f_of_B.svg")
plt.savefig("f_of_B.pdf")
plt.close()
	"""testing skcuda fft in 3 dimensions"""

	import pycuda.autoinit
	import pycuda.gpuarray as gpuarray
	import numpy as np
	#from scipy import fftpack as fft
	from pyfftw.interfaces import numpy_fft as fft

	import skcuda.fft as cu_fft

	Bs = (8, 12, 16, 20, 24,)
	Ns = (32, 64, 96, 128, 256)

	ns = np.ones([len(Ns), len(Bs)], dtype=int)
	ns[0:2, 0:2] = 10

	import time

	#N = 64
	#B = 16

	cpu_fft_times = []
	cpu_ifft_times = []
	gpu_fft_times = []
	gpu_ifft_times = []

	for j, N in enumerate(Ns):
	for k, B in enumerate(Bs):
	n = ns[j, k]
	x = np.empty((B, N, N, N), dtype=np.float32)
	xf = np.empty_like(x, dtype=np.complex64)
	y = np.empty_like(x)

	x[:] = np.random.randn(*x.shape).astype('float32')
	t0 = time.time()
	for i in range(n):
	xf[:] = fft.fftn(x, axes=(1, 2, 3))
	t1 = time.time()
	cpu_fft_times.append((t1 - t0) / n)

	t2 = time.time()
	for i in range(n):
	y[:] = np.real(fft.ifftn(xf, axes=(1, 2, 3)))
	t3 = time.time()
	cpu_ifft_times.append((t3 - t2)/n)

	x_gpu = gpuarray.to_gpu(x)
	xf_gpu = gpuarray.empty((B, N, N, N // 2 + 1), np.complex64)
	plan_forward = cu_fft.Plan((N, N, N), np.float32, np.complex64, B)
	t4 = time.time()
	for i in range(n):
	cu_fft.fft(x_gpu, xf_gpu, plan_forward)
	t5 = time.time()
	gpu_fft_times.append((t5 - t4) / n)

	y_gpu = gpuarray.empty_like(x_gpu)
	plan_inverse = cu_fft.Plan((N, N, N), np.complex64, np.float32, B)

	t6 = time.time()
	for i in range(n):
	cu_fft.ifft(xf_gpu, y_gpu, plan_inverse, True)
	t7 = time.time()
	gpu_ifft_times.append((t7 - t6)/n)
	print((N, B, n, cpu_fft_times[-1], cpu_ifft_times[-1],
	gpu_fft_times[-1], gpu_ifft_times[-1],
	cpu_fft_times[-1] / gpu_fft_times[-1],
	cpu_ifft_times[-1] / gpu_ifft_times[-1]))

	print(((y - y_gpu.get()) ** 2).sum())


	cpu_fft_times = np.array( cpu_fft_times ).reshape(len(Ns), len(Bs))
	cpu_ifft_times = np.array( cpu_ifft_times).reshape(len(Ns), len(Bs))
	gpu_fft_times = np.array( gpu_fft_times ).reshape(len(Ns), len(Bs))
	gpu_ifft_times = np.array( gpu_ifft_times).reshape(len(Ns), len(Bs))

	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt

	plt.figure()
	plt.plot(Ns, cpu_fft_times, 'b-')
	plt.plot(Ns, cpu_ifft_times, 'b-.')
	plt.plot(Ns, gpu_fft_times, 'r-')
	plt.plot(Ns, gpu_ifft_times, 'r-.')
	plt.yscale('log')
	plt.xscale('log')
	plt.title("computation time as a function of N for B={}".format(Bs))
	plt.xticks(Ns, map(str, Ns))
	plt.savefig("f_of_N.png")
	plt.savefig("f_of_N.svg")
	plt.savefig("f_of_N.pdf")
	plt.close()

	plt.figure()
	plt.plot(Bs, cpu_fft_times.T, 'b-')
	plt.plot(Bs, cpu_ifft_times.T, 'b-.')
	plt.plot(Bs, gpu_fft_times.T, 'r-')
	plt.plot(Bs, gpu_ifft_times.T, 'r-.')
	plt.yscale('log')
	plt.xscale('log')
	plt.title("computation time as a function of B for N={}".format(Ns))
	plt.xticks(Bs, map(str, Bs))
	plt.savefig("f_of_B.png")
	plt.savefig("f_of_B.svg")
	plt.savefig("f_of_B.pdf")
	plt.close()