ddemidov/hello.cpp

## hello.cpp
#include <vexcl/vexcl.hpp>

vex::vector<cl_float2>
hilbert(vex::vector<float> &in, std::size_t rows, std::size_t cols) {
    // No need to pass context in:
    const auto &ctx = in.queue_list();

    // Use function instead of vector. Should save global memory IO.
    VEX_FUNCTION(h, float(size_t/*prm1 = idx*/, size_t/*prm2 = rows*/),
            VEX_STRINGIZE_SOURCE(
                float v = prm1 > prm2 / 2 ? 0 : 2;
                if (prm1 == 0) v = 1;
                if (prm2 % 2 == 0 && prm1 == prm2 / 2) v = 1;
                return v;
                )
            );

    // If there is any chance hilbert() will be repeatedly called for the same
    // sizes, these should be cached (e.g. hidden in a class). Construction of
    // FFT is expensive.
    vex::FFT<float, cl_float2> fft(ctx, {rows, cols});
    vex::FFT<cl_float2, cl_float2> ifft(ctx, {rows, cols}, vex::fft::inverse);

    // This way result will be initialized with proper queues (context):
    vex::vector<cl_float2> result(ctx, rows * cols);

    // Not sure about the order of dimensions. Depends on your layout. The one
    // that changes faster is at the right end of extents[]..[].
    result = ifft(
            fft(in) * reshape(
                h(vex::element_index(), rows),
                vex::extents[rows][cols], vex::extents[0]
                )
            );

    return result;
}

int main() {
    vex::Context ctx(vex::Filter::Env && vex::Filter::Count(1));
    std::cout << ctx << std::endl;

    const size_t n = 1024;
    const size_t m = 1;

    vex::vector<float> x(ctx, n * m);
    x = 1;

    vex::vector<cl_float2> y = hilbert(x, m, n);
}
	#include <vexcl/vexcl.hpp>

	vex::vector<cl_float2>
	hilbert(vex::vector<float> &in, std::size_t rows, std::size_t cols) {
	// No need to pass context in:
	const auto &ctx = in.queue_list();

	// Use function instead of vector. Should save global memory IO.
	VEX_FUNCTION(h, float(size_t/prm1 = idx/, size_t/prm2 = rows/),
	VEX_STRINGIZE_SOURCE(
	float v = prm1 > prm2 / 2 ? 0 : 2;
	if (prm1 == 0) v = 1;
	if (prm2 % 2 == 0 && prm1 == prm2 / 2) v = 1;
	return v;
	)
	);

	// If there is any chance hilbert() will be repeatedly called for the same
	// sizes, these should be cached (e.g. hidden in a class). Construction of
	// FFT is expensive.
	vex::FFT<float, cl_float2> fft(ctx, {rows, cols});
	vex::FFT<cl_float2, cl_float2> ifft(ctx, {rows, cols}, vex::fft::inverse);

	// This way result will be initialized with proper queues (context):
	vex::vector<cl_float2> result(ctx, rows * cols);

	// Not sure about the order of dimensions. Depends on your layout. The one
	// that changes faster is at the right end of extents[]..[].
	result = ifft(
	fft(in) * reshape(
	h(vex::element_index(), rows),
	vex::extents[rows][cols], vex::extents[0]
	)
	);

	return result;
	}

	int main() {
	vex::Context ctx(vex::Filter::Env && vex::Filter::Count(1));
	std::cout << ctx << std::endl;

	const size_t n = 1024;
	const size_t m = 1;

	vex::vector<float> x(ctx, n * m);
	x = 1;

	vex::vector<cl_float2> y = hilbert(x, m, n);
	}