Last active
August 29, 2015 14:17
-
-
Save fjarri/27dfbee989d8792bda0c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import numpy | |
from reikna.cluda import cuda_api | |
from reikna.fft import FFT, FFTShift | |
import reikna.cluda.dtypes as dtypes | |
from reikna.core import Transformation, Parameter, Annotation, Type | |
def fftshift(arr_t, axes=None): | |
if axes is None: | |
axes = tuple(range(len(arr_t.shape))) | |
else: | |
axes = tuple(sorted(axes)) | |
return Transformation( | |
[Parameter('output', Annotation(arr_t, 'o')), | |
Parameter('input', Annotation(arr_t, 'i'))], | |
""" | |
<% | |
dimensions = len(output.shape) | |
new_idx_names = ['new_idx' + str(i) for i in range(dimensions)] | |
%> | |
%for dim in range(dimensions): | |
VSIZE_T ${new_idx_names[dim]} = | |
${idxs[dim]} | |
%if dim in axes: | |
%if output.shape[dim] % 2 == 0: | |
+ (${idxs[dim]} < ${output.shape[dim] // 2} ? | |
${output.shape[dim] // 2} : | |
${-output.shape[dim] // 2}) | |
%else: | |
+ (${idxs[dim]} <= ${output.shape[dim] // 2} ? | |
${output.shape[dim] // 2} : | |
${-(output.shape[dim] // 2 + 1)}) | |
%endif | |
%endif | |
; | |
%endfor | |
${output.ctype} val = ${input.load_same}; | |
${output.store_idx}(${', '.join(new_idx_names)}, val); | |
""", | |
render_kwds=dict( | |
axes=axes)) | |
def run_test(thr, shape, dtype, axes=None): | |
data = numpy.random.normal(size=shape).astype(dtype) | |
fft = FFT(data, axes=axes) | |
fftc = fft.compile(thr) | |
shift = FFTShift(data, axes=axes) | |
shiftc = shift.compile(thr) | |
# separate calculation | |
data_dev = thr.to_device(data) | |
t_start = time.time() | |
fftc(data_dev, data_dev) | |
thr.synchronize() | |
t_gpu_fft = time.time() - t_start | |
t_start = time.time() | |
shiftc(data_dev, data_dev) | |
thr.synchronize() | |
t_gpu_shift = time.time() - t_start | |
data_dev = thr.to_device(data) | |
t_start = time.time() | |
fftc(data_dev, data_dev) | |
shiftc(data_dev, data_dev) | |
thr.synchronize() | |
t_gpu_separate = time.time() - t_start | |
# transformation | |
data_dev2 = thr.to_device(data) | |
shift_tr = fftshift(data, axes=axes) | |
fft2 = fft.parameter.output.connect(shift_tr, shift_tr.input, new_output=shift_tr.output) | |
fft2c = fft2.compile(thr) | |
t_start = time.time() | |
fft2c(data_dev2, data_dev2) | |
thr.synchronize() | |
t_gpu_combined = time.time() - t_start | |
# reference | |
t_start = time.time() | |
numpy.fft.fftn(data, axes=axes) | |
t_cpu_fft = time.time() - t_start | |
t_start = time.time() | |
numpy.fft.fftshift(data, axes=axes) | |
t_cpu_shift = time.time() - t_start | |
t_start = time.time() | |
data_ref = numpy.fft.fftn(data, axes=axes) | |
data_ref = numpy.fft.fftshift(data_ref, axes=axes) | |
t_cpu_all = time.time() - t_start | |
data_gpu = data_dev.get() | |
data_gpu2 = data_dev2.get() | |
assert numpy.allclose(data_ref, data_gpu) | |
assert numpy.allclose(data_ref, data_gpu2) | |
return dict( | |
t_gpu_fft=t_gpu_fft, | |
t_gpu_shift=t_gpu_shift, | |
t_gpu_separate=t_gpu_separate, | |
t_gpu_combined=t_gpu_combined, | |
t_cpu_fft=t_cpu_fft, | |
t_cpu_shift=t_cpu_shift, | |
t_cpu_all=t_cpu_all) | |
def run_tests(thr, shape, dtype, axes=None, attempts=10): | |
results = [run_test(thr, shape, dtype, axes=axes) for i in range(attempts)] | |
return {key:min(result[key] for result in results) for key in results[0]} | |
if __name__ == '__main__': | |
api = cuda_api() | |
thr = api.Thread.create() | |
shape = (1024, 1024) | |
dtype = numpy.complex128 | |
axes = (1,) | |
results = run_tests(thr, shape, dtype, axes=axes) | |
print('device:', thr._device.name) | |
print('shape:', shape) | |
print('dtype:', dtype) | |
print('axes:', axes) | |
for key, val in results.items(): | |
print(key, ':', val) | |
print( | |
"Speedup for a separate calculation:", | |
results['t_cpu_all'] / results['t_gpu_separate']) | |
print( | |
"Speedup for a combined calculation:", | |
results['t_cpu_all'] / results['t_gpu_combined']) | |
print( | |
"Speedup for fft alone:", | |
results['t_cpu_fft'] / results['t_gpu_fft']) | |
print( | |
"Speedup for shift alone:", | |
results['t_cpu_shift'] / results['t_gpu_shift']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment