Skip to content

Instantly share code, notes, and snippets.

@geggo
Created February 1, 2017 22:49
Show Gist options
  • Save geggo/6bbc597439ffbec596353cf699fb025c to your computer and use it in GitHub Desktop.
Save geggo/6bbc597439ffbec596353cf699fb025c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# In-place real-to-complex transforms"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"inline real-to-complex FFT is a mess because same raw array needs to be interpreted differently as input or result. Input array needs to follow some rules (padding), so you cannot directly use a given arbitrary array as input. \n",
"\n",
"Stricter rules form memory layout:\n",
"* for the first transform axis, the items must be contiguous\n",
"\n",
"### Option 1: \n",
"\n",
"hand in a single padded array, and information about wanted transform shape, leave rest to the user. \n",
"\n",
"Comment: In this case, why not directly using the low-level wrapper. Difficult to get right for user.\n",
"\n",
"### Option 2:\n",
"\n",
"hand in both an input and an output array, which are views of a common padded array. Provide convenience function for allocating arrays (not yet implemented). (Also useful for allocating output array for an out-of-place transform).\n",
"\n",
"Pros: \n",
"* transform being in-place mostly hidden from user, padding is not visible to user.\n",
"* inplace transform looks like out-of-place (except that input array data is lost), easy(?) to switch\n",
"* easy to implement (check if in- and output-array share comman base, assert contiguity)\n",
"\n",
"Cons:\n",
"* in arrray is sliced, and sliced pyopencly.array.Arrays are less fun to work with (set(), get() fail), but map_to_host() works nicely as replacement. Needs some discussion with maintainer if get() can be improved.\n",
"* array allocation method, or loose some flexibility. \n",
"\n",
"Comment: At the moment my prefered approach. high-leve wrapper should be easy to use.\n",
"\n",
"\n",
"### Option 3:\n",
"mixture of 1 and 2, e.g. hand in padded array and output array and transform shape\n",
"\n",
"Pros: \n",
"* no sliced arrays\n",
"\n",
"Cons:\n",
"* not fun\n",
"\n",
"\n",
"### Option 2b:\n",
"separate class for real transforms (like rfft and fft)?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"set_printoptions(precision=3)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from gpyfft.fft import FFT\n",
"import pyopencl as cl\n",
"import pyopencl.array as cla"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"context = cl.create_some_context(answers = ['0', '1'])\n",
"queue = cl.CommandQueue(context)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"in [ 0. 1. 2. 3. 4. 5. 6. 7.]\n",
"out [ 28.+0.j -4.+9.657j -4.+4.j -4.+1.657j -4.+0.j ]\n",
"np [ 28.+0.j -4.+9.657j -4.+4.j -4.+1.657j -4.+0.j ]\n",
"inplace True\n",
"in [ 0. 1. 2. 3. 4. 5. 6. 7.]\n"
]
}
],
"source": [
"# 1d real to complex transform\n",
"real_dtype = np.float32\n",
"complex_dtype = np.complex64\n",
"\n",
"N = 8\n",
"x_host = np.arange(N, dtype=real_dtype)\n",
"\n",
"#for inplace real transform create in and out arrays as a \n",
"#view to same, padded base array\n",
"shape_real = (N,)\n",
"shape_complex = (N//2 + 1,)\n",
"shape_real_padded = (2*(N//2 + 1),)\n",
"x_pad = cla.zeros(queue, shape_real_padded, dtype = real_dtype)\n",
"x_in = x_pad[:N]\n",
"x_out = x_pad.view(complex_dtype)\n",
"\n",
"x_in.set(x_host)\n",
"\n",
"transform = FFT(context, queue,\n",
" x_in, x_out)\n",
"\n",
"itransform = FFT(context, queue,\n",
" x_out, x_in, real=True)\n",
"\n",
"print 'in', x_in\n",
"transform.enqueue()\n",
"print 'out', x_out\n",
"print 'np ', np.fft.rfft(x_host)\n",
"print 'inplace', transform.plan.inplace\n",
"\n",
"#transform.enqueue(forward=False) #is not inverse complex-to-real transform\n",
"itransform.enqueue()\n",
"print 'in', x_in"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0. 1. 2. 3. 4. 5. 6. 7.]\n",
" [ 8. 9. 10. 11. 12. 13. 14. 15.]\n",
" [ 16. 17. 18. 19. 20. 21. 22. 23.]]\n",
"[[ 28.+0.j -4.+9.657j -4.+4.j -4.+1.657j -4.+0.j ]\n",
" [ 92.+0.j -4.+9.657j -4.+4.j -4.+1.657j -4.+0.j ]\n",
" [ 156.+0.j -4.+9.657j -4.+4.j -4.+1.657j -4.+0.j ]]\n",
"[[ 28.+0.j -4.+9.657j -4.+4.j -4.+1.657j -4.+0.j ]\n",
" [ 92.+0.j -4.+9.657j -4.+4.j -4.+1.657j -4.+0.j ]\n",
" [ 156.+0.j -4.+9.657j -4.+4.j -4.+1.657j -4.+0.j ]]\n",
"True\n"
]
}
],
"source": [
"#batched 1d or 2d real to complex transform\n",
"real_dtype = np.float32\n",
"complex_dtype = np.complex64\n",
"\n",
"M = 3\n",
"N = 8\n",
"axes = (1,) #batched 1d\n",
"#axes = (0,) #fails (thats expected), stupid error message: unexpected shape for output (not ok)\n",
"#axes = (1,0) #2d\n",
"#axes = (0,1) #fails, see above\n",
"\n",
"x_host = np.arange(M*N, dtype=real_dtype).reshape(M,N)\n",
"\n",
"#for inplace real transform create in and out arrays as a \n",
"#view to same, padded base array\n",
"shape_real = x_host.shape\n",
"\n",
"#TODO: normalize axes\n",
"\n",
"N0 = shape_real[axes[0]]\n",
"\n",
"shape_complex = list(shape_real)\n",
"shape_complex[axes[0]] = N0//2 + 1\n",
"\n",
"shape_real_padded = list(shape_real)\n",
"shape_real_padded[axes[0]] = 2*(N0//2 + 1)\n",
"\n",
"x_pad = cla.zeros(queue, tuple(shape_real_padded), dtype = real_dtype)\n",
"x_in = x_pad[ tuple(slice(None, shape_real[k] if k==axes[0] else None)\n",
" for k in range(len(shape_real)))]\n",
"\n",
"x_out = x_pad.view(complex_dtype) #TODO: shortening and view for arbitrary padding\n",
"\n",
"x_in.map_to_host()[:] = x_host\n",
"\n",
"transform = FFT(context, queue,\n",
" x_in, x_out,\n",
" axes=axes)\n",
"\n",
"itransform = FFT(context, queue,\n",
" x_out, x_in, \n",
" axes=axes,\n",
" real=True)\n",
"\n",
"\n",
"print x_in.map_to_host()\n",
"transform.enqueue()\n",
"print x_out\n",
"print np.fft.rfftn(x_host, axes=axes)\n",
"print transform.plan.inplace"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# playground"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(True, True, False, False)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(np.issubdtype(np.float64, np.floating),\n",
"np.issubdtype(np.complex64, np.complexfloating),\n",
"np.issubdtype(np.complex64, np.floating),\n",
"np.issubdtype(np.float64, np.complexfloating),\n",
")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
@SyamGadde
Copy link

SyamGadde commented Feb 2, 2017

This summarizes the issues nicely. Thanks.

I've been wavering between thinking real in-place should look similar to out-of-place (Option 2, what you currently have in commit geggo/gpyfft@2c07fa8) and real in-place should look similar to complex in-place (Option 1, what I have in SyamGadde/gpyfft@f39da92). But your option seems simpler. I've modified my test script (below) and it works well with your current commit. I think I can incorporate some of my bounds/stride checking to make it more idiot-proof (I include myself among them).

I do think having convenience functions to create the padded arrays would also be very helpful to the user, and a separate enqueue_real function would eliminate some of the ambiguity in usage -- the main enqueue function could still have the full functionality, but the wrapper could remove some of the guesswork (sending real=True appropriately, maybe even returning the correct output view).

Test script:

# works with geggo/gpyfft:2c07fa8e7674757c58646c5f11cf710163bd4fac

import numpy as np
import pyopencl as cl
import pyopencl.array as cla
import random
from gpyfft.fft import FFT
context = cl.create_some_context()
queue = cl.CommandQueue(context) 
in_type = np.float32
out_type = np.complex64

for order in ('F', 'C'):
    for axes in ((0,1), (1,0)):
        fastestdim = int(order == 'C')
        if axes[0] == fastestdim:
            print "=== THESE SHOULD ALL WORK ==="
        else:
            # if real-transform axis is not the fastest dimension then
            # all bets are off.
            continue
        for it in range(20):
            m = random.randrange(2, 12, 2)
            n = random.randrange(2, 12, 2)
            print "order=%s axes=%s (%d x %d) " % (order, axes, m, n),
            shape = [m,n]
            padshape = list(shape)
            if axes[0] == fastestdim:
                # first transformed axis (real-transform) is the fastest dim
                # need (N // 2) + 1 elements total
                padshape[fastestdim] += 2
            else:
                # need 2 * N elements total, but still doesn't work some times
                padshape[fastestdim] *= 2
            outslice = [slice(0, m), slice(0, n)]
            outslice[axes[0]] = slice(0, (outslice[axes[0]].stop // 2) + 1)
            outslice = tuple(outslice)
            x_in = np.zeros(padshape, dtype=in_type, order=order)
            x_in[:m, :n] = np.arange(1, (m * n) + 1, dtype=in_type).reshape([m,n], order=order)
            x_in_gpu = cla.to_device(queue, x_in)
            x_out_gpu = x_in_gpu.view(np.complex64)
            x_sub = x_in_gpu[:m, :n]
            #print "x_in_gpu.shape=%s" % (x_in_gpu.shape,)
            #print "x_in_gpu.strides=%s" % (x_in_gpu.strides,)
            #print "x_out_gpu.shape=%s" % (x_out_gpu.shape,)
            #print "x_sub.shape=%s" % (x_sub.shape,)
            #print "outslice=%s" % (outslice,)
            transform = FFT(context, queue, x_sub, axes=axes, out_array=x_out_gpu)
            event, = transform.enqueue()
            event.wait()
            x_out_gpu = x_in_gpu.view(np.complex64)
            result_host = x_out_gpu.get()[outslice]
            result_numpy = np.fft.rfftn(x_in[:m, :n], axes=axes[::-1])
            #print "x_in (zeros are padding)\n", x_in
            #print "gpyfft: fft(x_in)\n", result_host
            #print "numpy: rfft(x_in)\n", result_numpy
            if not np.allclose(result_host, result_numpy, atol=np.max(x_in)*1e-05):
                print "FAIL FORWARD: max abs diff: %s" % (np.max(np.abs(result_numpy - result_host)),)
                continue
    
            transform = FFT(context, queue, x_out_gpu[outslice], axes=axes, out_array=x_sub, real=True)
            event, = transform.enqueue(forward=False)
            event.wait()
            result_host2 = x_in_gpu.get()[:m, :n]
            #print "x_in[:m,:n]\n", x_in[:m, :n]
            #print "gpyfft: ifft(fft(x_in))\n", result_host2
            if not np.allclose(x_in[:m, :n], result_host2, atol=np.max(x_in)*1e-05):
                print "FAIL REVERSE: max abs diff: %s" % (np.max(np.abs(x_in[:m, :n] - result_host2)),)
                continue

            print "SUCCESS"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment