bellbind/build.zig

## build.zig
// build all: $ zig build
// clean up: $ zig build dist-clean
const builtin = @import("builtin"); // list-up the content with `zig builtin` command
const Builder = @import("std").build.Builder;

//NOTE: this build code is for zig-0.4.0, maybe incompatible with zig >= 0.5
pub fn build(b: *Builder) void {
    { //[case: WebAssembly wasm] build fft.wasm
        const wasmStep = b.step("fft.wasm", "build fft.wasm");
        const wasm = b.addExecutable("fft.wasm", "fft.zig");
        wasm.setTarget(builtin.Arch.wasm32, builtin.Os.freestanding, builtin.Abi.musl);
        wasm.setBuildMode(builtin.Mode.ReleaseFast);
        wasm.setOutputDir(".");
        wasmStep.dependOn(&wasm.step);
        b.default_step.dependOn(wasmStep);
    }
    { //[case: zig code only] build fft-bench-zig command
        const exeStep = b.step("fft-bench-zig", "build fft-bench-zig command");
        const zigExe = b.addExecutable("fft-bench-zig", "fft-bench-zig.zig");
        zigExe.setBuildMode(builtin.Mode.ReleaseFast);
        zigExe.setOutputDir(".");
        exeStep.dependOn(&zigExe.step);
        b.default_step.dependOn(exeStep);
    }
    { //[case: c exe with zig lib] build fft-bench-c command
        // build fft.h and fft.o
        const objStep = b.step("fft.o", "build fft.h and fft.o");
        const fftObj = b.addObject("fft", "fft.zig");
        fftObj.setBuildMode(builtin.Mode.ReleaseFast);
        fftObj.setOutputDir(".");
        objStep.dependOn(&fftObj.step);

        // build fft-bench-c command (cc: expected clang or gcc)
        const cExeStep = b.step("fft-bench-c", "build fft-bench-c command");
        cExeStep.dependOn(objStep);
        const cExe = b.addSystemCommand([][]const u8{
            "cc", "-Wall", "-Wextra", "-O3", "-o", "fft-bench-c", "fft-bench-c.c", "fft.o",
        });
        cExeStep.dependOn(&cExe.step);
        b.default_step.dependOn(cExeStep);
    }
    { //[case: zig exe with c lib] build fft-bench-cimport command
        const exeStep = b.step("fft-bench-cimport", "build fft-bench-cimport command");

        const cLibStep = b.step("fft-c", "build fft-c.o");
        const cLibObj = b.addSystemCommand([][]const u8{
            "cc", "-Wall", "-Wextra", "-std=c11", "-pedantic", "-O3", "-c", "fft-c.c",
        });
        cLibStep.dependOn(&cLibObj.step);
        exeStep.dependOn(cLibStep);

        const mainObj = b.addExecutable("fft-bench-cimport", "fft-bench-cimport.zig");
        mainObj.addIncludeDir("."); //NOTE: same as `-isystem .`
        mainObj.setBuildMode(builtin.Mode.ReleaseFast);
        mainObj.addObjectFile("fft-c.o");
        mainObj.setOutputDir(".");
        exeStep.dependOn(&mainObj.step);

        b.default_step.dependOn(exeStep);
    }

    { // clean and dist-clean
        const cleanStep = b.step("clean", "clean-up intermediate iles");
        const rm1 = b.addSystemCommand([][]const u8{
            "rm", "-f", "fft.wasm.o", "fft-bench-zig.o", "fft-bench-c.o", "fft-c.o", "fft-bench-cimport.o",
        });
        cleanStep.dependOn(&rm1.step);
        const rmDir = b.addRemoveDirTree("zig-cache");
        cleanStep.dependOn(&rmDir.step);

        const distCleanStep = b.step("dist-clean", "clean-up build generated files");
        distCleanStep.dependOn(cleanStep);
        const rm2 = b.addSystemCommand([][]const u8{
            "rm", "-f", "fft.wasm", "fft-bench-zig", "fft-bench-c", "fft.o", "fft.h", "fft-bench-cimport",
        });
        distCleanStep.dependOn(&rm2.step);
    }
}

## fft-bench-c.c
// A C11 program which uses zig files: fft.zig
//[how to build]
// $ zig build-obj --release-fast fft.zig
// $ cc -O3 -Wall -Wextra -std=c11 -pedantic -c fft-bench-c.c -o fft-bench-c.o
// $ cc -O3 -o fft-bench-c fft-bench-c.o fft.o
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
#include "./fft.h"

int main() {
  {// example
    const int n = 16;
    const int v[16] = {1,3,4,2, 5,6,2,4, 0,1,3,4, 5,62,2,3};
    struct CN f[n], F[n], r[n];
    for (size_t i = 0; i < n; i++) f[i] = (struct CN) {.re = v[i], .im = 0.0};

    puts("[f]");
    for (size_t i = 0; i < n; i++) printf("%f + %fi\n", f[i].re, f[i].im);

    fft(n, f, F);
    ifft(n, F, r);

    puts("[F]");
    for (size_t i = 0; i < n; i++) printf("%f + %fi\n", F[i].re, F[i].im);
    puts("[r]");
    for (size_t i = 0; i < n; i++) printf("%f + %fi\n", r[i].re, r[i].im);
  }

  {// benchmark
    const int n = 1024 * 1024;
    struct CN * f = calloc(n, sizeof (struct CN));
    for (size_t i = 0; i < n; i++) {
      f[i] = (struct CN) {.re = sin(i) * i, .im = 0.0};
    }

    struct CN * F = calloc(n, sizeof (struct CN));
    struct CN * r = calloc(n, sizeof (struct CN));
    fft(n, f, F);
    ifft(n, F, r);
    free(f);
    free(F);
    free(r);
  }
  return 0;
}

## fft-bench-cimport.zig
// A zig program which uses C files: fft-c.h, fft-c.c
// [how to build]
// $ cc -Wall -Wextra -std=c11 -pedantic -O3 -c fft-c.c
// $ zig build-exe --release-fast --object fft-c.o -isystem . fft-bench-cimport.zig
const heap = @import("std").heap;
const warn = @import("std").debug.warn;
const sin = @import("std").math.sin;
const milliTimestamp = @import("std").os.time.milliTimestamp;

const libfft = @cImport({
    @cInclude("fft-c.h");
});
const CN = libfft.CN;
const rect = libfft.rect;
const fft = libfft.fft;
const ifft = libfft.ifft;

//NOTE: These are extracted code of above @cImport libfft
//const CN = extern struct {
//    re: f64,
//    im: f64,
//};
//extern "./fft-c.o" fn rect(re: f64, im: f64) CN;
//extern "./fft-c.o" fn fft(n: u32, f: [*c]CN, F: [*c]CN) void;
//extern "./fft-c.o" fn ifft(n: u32, F: [*c]CN, f: [*c]CN) void;

fn warnCNs(n: u32, cns: [*]CN) void {
    var i: u32 = 0;
    while (i < n) : (i += 1) {
        warn("{} + {}i\n", cns[i].re, cns[i].im);
    }
}

pub fn main() !void {
    { //example
        const n: u32 = 16;
        const v = [n]i32{ 1, 3, 4, 2, 5, 6, 2, 4, 0, 1, 3, 4, 5, 62, 2, 3 };
        var f: [n]CN = undefined;
        {
            var i: u32 = 0;
            while (i < n) : (i += 1) {
                f[i] = rect(@intToFloat(f64, v[i]), 0.0);
            }
        }
        warn("\n[f]\n");
        warnCNs(n, &f);

        var F: [n]CN = undefined;
        var r: [n]CN = undefined;
        fft(n, @ptrCast([*c]CN, &f), @ptrCast([*c]CN, &F));
        ifft(n, @ptrCast([*c]CN, &F), @ptrCast([*c]CN, &r));

        warn("\n[F]\n");
        warnCNs(n, &F);
        warn("\n[r]\n");
        warnCNs(n, &r);
    }

    { //benchmark
        //NOTE: allocators in std will be changed on 0.5.0
        var direct_allocator = heap.DirectAllocator.init();
        var arena = heap.ArenaAllocator.init(&direct_allocator.allocator);
        const allocator = &arena.allocator;
        defer direct_allocator.deinit();
        defer arena.deinit();

        const n: u32 = 1024 * 1024;
        var f: [*]CN = try allocator.create([n]CN);
        {
            var i: u32 = 0;
            while (i < n) : (i += 1) {
                const if64 = @intToFloat(f64, i);
                f[i] = rect(sin(if64) * if64, 0.0);
            }
        }

        var F: [*]CN = try allocator.create([n]CN);
        var r: [*]CN = try allocator.create([n]CN);

        const start = milliTimestamp();
        fft(n, f, F);
        ifft(n, F, r);
        const stop = milliTimestamp();
        warn("fft-ifft: {}ms\n", stop - start);
    }
}

## fft-bench-nodejs.js
// A node.js JavaScript program which uses WebAssembly wasm from zig files: fft.zig
// [how to build fft.wasm from fft.zig]
// $ zig build-exe --release-fast -target wasm32-freestanding --name fft.wasm fft.zig
const fs = require("fs").promises;

(async function () {
  const buf = await fs.readFile("./fft.wasm");
  const {instance} = await WebAssembly.instantiate(buf, {});
  const {__wasm_call_ctors, memory, fft, ifft} = instance.exports;
  __wasm_call_ctors();
  memory.grow(1);

  {// example
    const N = 16, fofs = 0, Fofs = N * 2 * 8, rofs = N * 4 * 8;
    const f = new Float64Array(memory.buffer, fofs, N * 2);
    const F = new Float64Array(memory.buffer, Fofs, N * 2);
    const r = new Float64Array(memory.buffer, rofs, N * 2);

    const fr0 = [1,3,4,2, 5,6,2,4, 0,1,3,4, 5,62,2,3];
    fr0.forEach((v, i) => {
      [f[i * 2], f[i * 2 + 1]] = [v, 0.0];
    });

    fft(N, fofs, Fofs);
    ifft(N, Fofs, rofs);

    console.log(`[fft]`);
    for (let i = 0; i < N; i++) {
      console.log([F[i * 2], F[i * 2 + 1]]);
    }

    console.log(`[ifft]`);
    for (let i = 0; i < N; i++) {
      console.log([r[i * 2], r[i * 2 + 1]]);
    }
  }

  {
    const N = 1024 * 1024;
    const fr0 = [...Array(N).keys()].map(i => Math.sin(i) * i);
    const f0 = fr0.map(n => [n, 0]);

    const BN = N * 2 * 8 * 3, fofs = 0, Fofs = N * 2 * 8, rofs = N * 4 * 8;
    while (memory.buffer.byteLength < BN) memory.grow(1);
    const f = new Float64Array(memory.buffer, fofs, N * 2);
    const F = new Float64Array(memory.buffer, Fofs, N * 2);
    const r = new Float64Array(memory.buffer, rofs, N * 2);
    fr0.forEach((v, i) => {
      [f[i * 2], f[i * 2 + 1]] = [v, 0.0];
    });

    console.time(`fft-ifft`);
    fft(N, fofs, Fofs);
    ifft(N, Fofs, rofs);
    console.timeEnd(`fft-ifft`);
  }
})().catch(console.error);

## fft-bench-zig.zig
// A zig program which uses zig source files: fft.zig
// [how to build]
// $ zig build-exe --release-fast fft-bench-zig.zig
const heap = @import("std").heap;
const warn = @import("std").debug.warn;
const sin = @import("std").math.sin;
const milliTimestamp = @import("std").os.time.milliTimestamp;

const CN = @import("./fft.zig").CN;
const fft = @import("./fft.zig").fft;
const ifft = @import("./fft.zig").ifft;

fn warnCNs(n: u32, cns: [*]CN) void {
    var i: u32 = 0;
    while (i < n) : (i += 1) {
        warn("{} + {}i\n", cns[i].re, cns[i].im);
    }
}

pub fn main() !void {
    { //example
        const n: u32 = 16;
        const v = [n]i32{ 1, 3, 4, 2, 5, 6, 2, 4, 0, 1, 3, 4, 5, 62, 2, 3 };
        var f: [n]CN = undefined;
        {
            var i: u32 = 0;
            while (i < n) : (i += 1) {
                f[i] = CN.rect(@intToFloat(f64, v[i]), 0.0);
            }
        }
        warn("\n[f]\n");
        warnCNs(n, &f);

        var F: [n]CN = undefined;
        var r: [n]CN = undefined;
        fft(n, &f, &F);
        ifft(n, &F, &r);

        warn("\n[F]\n");
        warnCNs(n, &F);
        warn("\n[r]\n");
        warnCNs(n, &r);
    }

    { //benchmark
        //NOTE: allocators in std will be changed on 0.5.0
        var direct_allocator = heap.DirectAllocator.init();
        var arena = heap.ArenaAllocator.init(&direct_allocator.allocator);
        const allocator = &arena.allocator;
        defer direct_allocator.deinit();
        defer arena.deinit();

        const n: u32 = 1024 * 1024;
        var f = try allocator.create([n]CN);
        {
            var i: u32 = 0;
            while (i < n) : (i += 1) {
                const if64 = @intToFloat(f64, i);
                f[i] = CN.rect(sin(if64) * if64, 0.0);
            }
        }

        var F = try allocator.create([n]CN);
        var r = try allocator.create([n]CN);

        const start = milliTimestamp();
        fft(n, f, F);
        ifft(n, F, r);
        const stop = milliTimestamp();
        warn("fft-ifft: {}ms\n", stop  -start);
    }
}

## fft-c.c
// A C11 program as a library as a FFT implementation for fft-c.h
// $ cc -Wall -Wextra -std=c11 -pedantic -O3 -c fft-c.c
#include <complex.h> // complex, I, cexp
#include <math.h> // M_PI
#include <stdbool.h> // bool
#include <stddef.h> // size_t
#include <stdint.h> // uint64_t

typedef double complex CN;

static inline uint32_t reverse_bit(uint32_t s0) {
  uint32_t s1 = ((s0 & 0xaaaaaaaa) >> 1) | ((s0 & 0x55555555) << 1);
  uint32_t s2 = ((s1 & 0xcccccccc) >> 2) | ((s1 & 0x33333333) << 2);
  uint32_t s3 = ((s2 & 0xf0f0f0f0) >> 4) | ((s2 & 0x0f0f0f0f) << 4);
  uint32_t s4 = ((s3 & 0xff00ff00) >> 8) | ((s3 & 0x00ff00ff) << 8);
  return ((s4 & 0xffff0000) >> 16) | ((s4 & 0x0000ffff) << 16);
}

static inline uint32_t rev_bit(uint32_t k, uint32_t n) {
  return reverse_bit(n) >> (8 * sizeof (uint32_t) - k);
}

static inline uint32_t trailing_zeros(uint32_t n) {
  uint32_t k = 0, s = n;
  if (!(s & 0x0000ffff)) {k += 16; s >>= 16;}
  if (!(s & 0x000000ff)) {k += 8; s >>= 8;}
  if (!(s & 0x0000000f)) {k += 4; s >>= 4;}
  if (!(s & 0x00000003)) {k += 2; s >>= 2;}
  return (s & 1) ? k : k + 1;
}

static void fftc(double t, uint32_t N, const CN c[N], CN ret[N]) {
  uint32_t k = trailing_zeros(N);
  for (uint32_t i = 0; i < N; i++) ret[i] = c[rev_bit(k, i)];
  for (uint32_t Nh = 1; Nh < N; Nh <<= 1) {
    t *= 0.5;
    for (uint32_t s = 0; s < N; s += Nh << 1) {
      for (uint32_t i = 0; i < Nh; i++) { //NOTE: s-outside/i-inside is faster
        uint32_t li = s + i;
        uint32_t ri = li + Nh;
        CN re = ret[ri] * cexp(t * i * I);
        CN l = ret[li];
        ret[li] = l + re;
        ret[ri] = l - re;
      }
    }
  }
}

extern CN rect(double re, double im) {
  return re + im * I;
}
extern void fft(uint32_t N, const CN f[N], CN F[N]) {
  fftc(-2.0 * M_PI, N, f, F);
}
extern void ifft(uint32_t N, const CN F[N], CN f[N]) {
  fftc(2.0 * M_PI, N, F, f);
  for (size_t i = 0; i < N; i++) f[i] /= N;
}

## fft-c.h
#ifndef FFT_C_H
#define FFT_C_H
// A C header file for `@cInclude("fft-c.h")` in zig programs
#include <stdint.h>

#ifdef __cplusplus
#define FFT_C_EXTERN_C extern "C"
#else
#define FFT_C_EXTERN_C
#endif

#if defined(_WIN32)
#define FFT_C_EXPORT FFT_C_EXTERN_C __declspec(dllimport)
#else
#define FFT_C_EXPORT FFT_C_EXTERN_C __attribute__((visibility ("default")))
#endif

// NOTE: C11 standard `double complex` as `struct CN` for zig
struct CN {
    double re;
    double im;
};

FFT_C_EXPORT struct CN rect(double re, double im);
FFT_C_EXPORT void fft(uint32_t n, struct CN f[], struct CN F[]);
FFT_C_EXPORT void ifft(uint32_t n, struct CN F[], struct CN f[]);

#endif

## fft.zig
// A zig program as a library (for FFT implementation)
//NOTE:  This code is written for zig-0.4.0 syntax/builtins/std lib
const pi = @import("std").math.pi;
const sin = @import("std").math.sin;
const cos = @import("std").math.cos;

// complex number type
pub export const CN = extern struct { // "extern struct"  as C struct
    pub re: f64,
    pub im: f64,
    pub fn rect(re: f64, im: f64) CN {
        return CN{
            .re = re,
            .im = im,
        };
    }
    pub fn expi(t: f64) CN {
        //NOTE: sin(t) and cos(t) will be @sin(f64, t) and @cos(f64, t) on zig-0.5.0
        return CN{
            .re = cos(t),
            .im = sin(t),
        };
    }
    pub fn add(self: CN, other: CN) CN {
        return CN{
            .re = self.re + other.re,
            .im = self.im + other.im,
        };
    }
    pub fn sub(self: CN, other: CN) CN {
        return CN{
            .re = self.re - other.re,
            .im = self.im - other.im,
        };
    }
    pub fn mul(self: CN, other: CN) CN {
        return CN{
            .re = self.re * other.re - self.im * other.im,
            .im = self.re * other.im + self.im * other.re,
        };
    }
    pub fn rdiv(self: CN, re: f64) CN {
        return CN{
            .re = self.re / re,
            .im = self.im / re,
        };
    }
};
//NOTE: exporting struct returned fn is not allowed for "wasm32" target

test "CN" {
    const assert = @import("std").debug.assert;
    //const warn = @import("std").debug.warn;
    const eps = @import("std").math.f64_epsilon;
    const abs = @import("std").math.fabs;

    const a = CN.rect(1.0, 0.0);
    const b = CN.expi(pi / 2.0);
    assert(a.re == 1.0 and a.im == 0.0);
    //warn("{}+{}i\n", b.re, b.im);
    assert(abs(b.re) < eps and abs(b.im - 1.0) < eps);
    const apb = a.add(b);
    const asb = a.sub(b);
    assert(abs(apb.re - 1.0) < eps and abs(apb.im - 1.0) < eps);
    assert(abs(asb.re - 1.0) < eps and abs(asb.im + 1.0) < eps);
    const bmb = b.mul(b);
    assert(abs(bmb.re + 1.0) < eps and abs(bmb.im) < eps);
    const apb2 = apb.rdiv(2.0);
    assert(abs(apb2.re - 0.5) < eps and abs(apb2.im - 0.5) < eps);
}
test "CN array" {
    const assert = @import("std").debug.assert;
    //const warn = @import("std").debug.warn;
    const eps = @import("std").math.f64_epsilon;
    const abs = @import("std").math.fabs;

    var cns: [2]CN = undefined;
    cns[0] = CN.rect(1.0, 0.0);
    cns[1] = CN.rect(0.0, 1.0);
    cns[0] = cns[0].add(cns[1]);
    assert(abs(cns[0].re - 1.0) < eps and abs(cns[0].im - 1.0) < eps);
}

// reverse as k-bit
fn revbit(k: u32, n0: u32) u32 {
    //NOTE: @bitreverse will be renamed to @bitReverse on zig-0.5.0
    return @bitreverse(u32, n0) >> @truncate(u5, 32 - k);
}

test "revbit" {
    const assert = @import("std").debug.assert;
    const n: u32 = 8;
    const k: u32 = @ctz(n);
    assert(revbit(k, 0) == 0b000);
    assert(revbit(k, 1) == 0b100);
    assert(revbit(k, 2) == 0b010);
    assert(revbit(k, 3) == 0b110);
    assert(revbit(k, 4) == 0b001);
    assert(revbit(k, 5) == 0b101);
    assert(revbit(k, 6) == 0b011);
    assert(revbit(k, 7) == 0b111);
}

// Loop Cooley-Tukey FFT
fn fftc(t0: f64, n: u32, c: [*]CN, r: [*]CN) void {
    {
        const k: u32 = @ctz(n);
        var i: u32 = 0;
        while (i < n) : (i += 1) {
            r[i] = c[@inlineCall(revbit, k, i)];
        }
    }
    var t = t0;
    var nh: u32 = 1;
    while (nh < n) : (nh <<= 1) {
        t /= 2.0;
        const nh2 = nh << 1;
        var s: u32 = 0;
        while (s < n) : (s += nh2) {
            var i: u32 = 0;
            while (i < nh) : (i += 1) {
                const li = s + i;
                const ri = li + nh;
                const re = r[ri].mul(CN.expi(t * @intToFloat(f64, i)));
                const l = r[li];
                r[li] = l.add(re);
                r[ri] = l.sub(re);
            }
        }
    }
}
pub export fn fft(n: u32, f: [*]CN, F: [*]CN) void {
    fftc(-2.0 * pi, n, f, F);
}
pub export fn ifft(n: u32, F: [*]CN, f: [*]CN) void {
    fftc(2.0 * pi, n, F, f);
    const nf64 = @intToFloat(f64, n);
    var i: u32 = 0;
    while (i < n) : (i += 1) {
        f[i] = f[i].rdiv(nf64);
    }
}

test "fft/ifft" {
    const warn = @import("std").debug.warn;
    const assert = @import("std").debug.assert;
    const abs = @import("std").math.fabs;
    const eps = 1e-15;
    //NOTE: On `test` block, `fn` can define in `struct`
    const util = struct {
        fn warnCNs(n: u32, cns: [*]CN) void {
            var i: u32 = 0;
            while (i < n) : (i += 1) {
                warn("{} + {}i\n", cns[i].re, cns[i].im);
            }
        }
    };

    const n: u32 = 16;
    const v = [n]i32{ 1, 3, 4, 2, 5, 6, 2, 4, 0, 1, 3, 4, 5, 62, 2, 3 };
    var f: [n]CN = undefined;
    {
        var i: u32 = 0;
        while (i < n) : (i += 1) {
            f[i] = CN.rect(@intToFloat(f64, v[i]), 0.0);
        }
    }
    warn("\n[f]\n");
    util.warnCNs(n, &f);

    var F: [n]CN = undefined;
    var r: [n]CN = undefined;
    fft(n, &f, &F);
    ifft(n, &F, &r);

    warn("\n[F]\n");
    util.warnCNs(n, &F);
    warn("\n[r]\n");
    util.warnCNs(n, &r);

    {
        var i: u32 = 0;
        while (i < n) : (i += 1) {
            assert(abs(r[i].re - @intToFloat(f64, v[i])) < eps);
        }
    }
}
	// build all: $ zig build
	// clean up: $ zig build dist-clean
	const builtin = @import("builtin"); // list-up the content with `zig builtin` command
	const Builder = @import("std").build.Builder;

	//NOTE: this build code is for zig-0.4.0, maybe incompatible with zig >= 0.5
	pub fn build(b: *Builder) void {
	{ //[case: WebAssembly wasm] build fft.wasm
	const wasmStep = b.step("fft.wasm", "build fft.wasm");
	const wasm = b.addExecutable("fft.wasm", "fft.zig");
	wasm.setTarget(builtin.Arch.wasm32, builtin.Os.freestanding, builtin.Abi.musl);
	wasm.setBuildMode(builtin.Mode.ReleaseFast);
	wasm.setOutputDir(".");
	wasmStep.dependOn(&wasm.step);
	b.default_step.dependOn(wasmStep);
	}
	{ //[case: zig code only] build fft-bench-zig command
	const exeStep = b.step("fft-bench-zig", "build fft-bench-zig command");
	const zigExe = b.addExecutable("fft-bench-zig", "fft-bench-zig.zig");
	zigExe.setBuildMode(builtin.Mode.ReleaseFast);
	zigExe.setOutputDir(".");
	exeStep.dependOn(&zigExe.step);
	b.default_step.dependOn(exeStep);
	}
	{ //[case: c exe with zig lib] build fft-bench-c command
	// build fft.h and fft.o
	const objStep = b.step("fft.o", "build fft.h and fft.o");
	const fftObj = b.addObject("fft", "fft.zig");
	fftObj.setBuildMode(builtin.Mode.ReleaseFast);
	fftObj.setOutputDir(".");
	objStep.dependOn(&fftObj.step);

	// build fft-bench-c command (cc: expected clang or gcc)
	const cExeStep = b.step("fft-bench-c", "build fft-bench-c command");
	cExeStep.dependOn(objStep);
	const cExe = b.addSystemCommand([][]const u8{
	"cc", "-Wall", "-Wextra", "-O3", "-o", "fft-bench-c", "fft-bench-c.c", "fft.o",
	});
	cExeStep.dependOn(&cExe.step);
	b.default_step.dependOn(cExeStep);
	}
	{ //[case: zig exe with c lib] build fft-bench-cimport command
	const exeStep = b.step("fft-bench-cimport", "build fft-bench-cimport command");

	const cLibStep = b.step("fft-c", "build fft-c.o");
	const cLibObj = b.addSystemCommand([][]const u8{
	"cc", "-Wall", "-Wextra", "-std=c11", "-pedantic", "-O3", "-c", "fft-c.c",
	});
	cLibStep.dependOn(&cLibObj.step);
	exeStep.dependOn(cLibStep);

	const mainObj = b.addExecutable("fft-bench-cimport", "fft-bench-cimport.zig");
	mainObj.addIncludeDir("."); //NOTE: same as `-isystem .`
	mainObj.setBuildMode(builtin.Mode.ReleaseFast);
	mainObj.addObjectFile("fft-c.o");
	mainObj.setOutputDir(".");
	exeStep.dependOn(&mainObj.step);

	b.default_step.dependOn(exeStep);
	}

	{ // clean and dist-clean
	const cleanStep = b.step("clean", "clean-up intermediate iles");
	const rm1 = b.addSystemCommand([][]const u8{
	"rm", "-f", "fft.wasm.o", "fft-bench-zig.o", "fft-bench-c.o", "fft-c.o", "fft-bench-cimport.o",
	});
	cleanStep.dependOn(&rm1.step);
	const rmDir = b.addRemoveDirTree("zig-cache");
	cleanStep.dependOn(&rmDir.step);

	const distCleanStep = b.step("dist-clean", "clean-up build generated files");
	distCleanStep.dependOn(cleanStep);
	const rm2 = b.addSystemCommand([][]const u8{
	"rm", "-f", "fft.wasm", "fft-bench-zig", "fft-bench-c", "fft.o", "fft.h", "fft-bench-cimport",
	});
	distCleanStep.dependOn(&rm2.step);
	}
	}
	// A C11 program which uses zig files: fft.zig
	//[how to build]
	// $ zig build-obj --release-fast fft.zig
	// $ cc -O3 -Wall -Wextra -std=c11 -pedantic -c fft-bench-c.c -o fft-bench-c.o
	// $ cc -O3 -o fft-bench-c fft-bench-c.o fft.o
	#include <stdlib.h>
	#include <stdio.h>
	#include <math.h>
	#include <time.h>
	#include "./fft.h"

	int main() {
	{// example
	const int n = 16;
	const int v[16] = {1,3,4,2, 5,6,2,4, 0,1,3,4, 5,62,2,3};
	struct CN f[n], F[n], r[n];
	for (size_t i = 0; i < n; i++) f[i] = (struct CN) {.re = v[i], .im = 0.0};

	puts("[f]");
	for (size_t i = 0; i < n; i++) printf("%f + %fi\n", f[i].re, f[i].im);

	fft(n, f, F);
	ifft(n, F, r);

	puts("[F]");
	for (size_t i = 0; i < n; i++) printf("%f + %fi\n", F[i].re, F[i].im);
	puts("[r]");
	for (size_t i = 0; i < n; i++) printf("%f + %fi\n", r[i].re, r[i].im);
	}

	{// benchmark
	const int n = 1024 * 1024;
	struct CN * f = calloc(n, sizeof (struct CN));
	for (size_t i = 0; i < n; i++) {
	f[i] = (struct CN) {.re = sin(i) * i, .im = 0.0};
	}

	struct CN * F = calloc(n, sizeof (struct CN));
	struct CN * r = calloc(n, sizeof (struct CN));
	fft(n, f, F);
	ifft(n, F, r);
	free(f);
	free(F);
	free(r);
	}
	return 0;
	}
	// A zig program which uses C files: fft-c.h, fft-c.c
	// [how to build]
	// $ cc -Wall -Wextra -std=c11 -pedantic -O3 -c fft-c.c
	// $ zig build-exe --release-fast --object fft-c.o -isystem . fft-bench-cimport.zig
	const heap = @import("std").heap;
	const warn = @import("std").debug.warn;
	const sin = @import("std").math.sin;
	const milliTimestamp = @import("std").os.time.milliTimestamp;

	const libfft = @cImport({
	@cInclude("fft-c.h");
	});
	const CN = libfft.CN;
	const rect = libfft.rect;
	const fft = libfft.fft;
	const ifft = libfft.ifft;

	//NOTE: These are extracted code of above @cImport libfft
	//const CN = extern struct {
	// re: f64,
	// im: f64,
	//};
	//extern "./fft-c.o" fn rect(re: f64, im: f64) CN;
	//extern "./fft-c.o" fn fft(n: u32, f: [c]CN, F: [c]CN) void;
	//extern "./fft-c.o" fn ifft(n: u32, F: [c]CN, f: [c]CN) void;

	fn warnCNs(n: u32, cns: [*]CN) void {
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	warn("{} + {}i\n", cns[i].re, cns[i].im);
	}
	}

	pub fn main() !void {
	{ //example
	const n: u32 = 16;
	const v = [n]i32{ 1, 3, 4, 2, 5, 6, 2, 4, 0, 1, 3, 4, 5, 62, 2, 3 };
	var f: [n]CN = undefined;
	{
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	f[i] = rect(@intToFloat(f64, v[i]), 0.0);
	}
	}
	warn("\n[f]\n");
	warnCNs(n, &f);

	var F: [n]CN = undefined;
	var r: [n]CN = undefined;
	fft(n, @ptrCast([c]CN, &f), @ptrCast([c]CN, &F));
	ifft(n, @ptrCast([c]CN, &F), @ptrCast([c]CN, &r));

	warn("\n[F]\n");
	warnCNs(n, &F);
	warn("\n[r]\n");
	warnCNs(n, &r);
	}

	{ //benchmark
	//NOTE: allocators in std will be changed on 0.5.0
	var direct_allocator = heap.DirectAllocator.init();
	var arena = heap.ArenaAllocator.init(&direct_allocator.allocator);
	const allocator = &arena.allocator;
	defer direct_allocator.deinit();
	defer arena.deinit();

	const n: u32 = 1024 * 1024;
	var f: [*]CN = try allocator.create([n]CN);
	{
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	const if64 = @intToFloat(f64, i);
	f[i] = rect(sin(if64) * if64, 0.0);
	}
	}

	var F: [*]CN = try allocator.create([n]CN);
	var r: [*]CN = try allocator.create([n]CN);

	const start = milliTimestamp();
	fft(n, f, F);
	ifft(n, F, r);
	const stop = milliTimestamp();
	warn("fft-ifft: {}ms\n", stop - start);
	}
	}
	// A node.js JavaScript program which uses WebAssembly wasm from zig files: fft.zig
	// [how to build fft.wasm from fft.zig]
	// $ zig build-exe --release-fast -target wasm32-freestanding --name fft.wasm fft.zig
	const fs = require("fs").promises;

	(async function () {
	const buf = await fs.readFile("./fft.wasm");
	const {instance} = await WebAssembly.instantiate(buf, {});
	const {__wasm_call_ctors, memory, fft, ifft} = instance.exports;
	__wasm_call_ctors();
	memory.grow(1);

	{// example
	const N = 16, fofs = 0, Fofs = N * 2 * 8, rofs = N * 4 * 8;
	const f = new Float64Array(memory.buffer, fofs, N * 2);
	const F = new Float64Array(memory.buffer, Fofs, N * 2);
	const r = new Float64Array(memory.buffer, rofs, N * 2);

	const fr0 = [1,3,4,2, 5,6,2,4, 0,1,3,4, 5,62,2,3];
	fr0.forEach((v, i) => {
	[f[i * 2], f[i * 2 + 1]] = [v, 0.0];
	});

	fft(N, fofs, Fofs);
	ifft(N, Fofs, rofs);

	console.log(`[fft]`);
	for (let i = 0; i < N; i++) {
	console.log([F[i * 2], F[i * 2 + 1]]);
	}

	console.log(`[ifft]`);
	for (let i = 0; i < N; i++) {
	console.log([r[i * 2], r[i * 2 + 1]]);
	}
	}

	{
	const N = 1024 * 1024;
	const fr0 = [...Array(N).keys()].map(i => Math.sin(i) * i);
	const f0 = fr0.map(n => [n, 0]);

	const BN = N * 2 * 8 * 3, fofs = 0, Fofs = N * 2 * 8, rofs = N * 4 * 8;
	while (memory.buffer.byteLength < BN) memory.grow(1);
	const f = new Float64Array(memory.buffer, fofs, N * 2);
	const F = new Float64Array(memory.buffer, Fofs, N * 2);
	const r = new Float64Array(memory.buffer, rofs, N * 2);
	fr0.forEach((v, i) => {
	[f[i * 2], f[i * 2 + 1]] = [v, 0.0];
	});

	console.time(`fft-ifft`);
	fft(N, fofs, Fofs);
	ifft(N, Fofs, rofs);
	console.timeEnd(`fft-ifft`);
	}
	})().catch(console.error);
	// A zig program which uses zig source files: fft.zig
	// [how to build]
	// $ zig build-exe --release-fast fft-bench-zig.zig
	const heap = @import("std").heap;
	const warn = @import("std").debug.warn;
	const sin = @import("std").math.sin;
	const milliTimestamp = @import("std").os.time.milliTimestamp;

	const CN = @import("./fft.zig").CN;
	const fft = @import("./fft.zig").fft;
	const ifft = @import("./fft.zig").ifft;

	fn warnCNs(n: u32, cns: [*]CN) void {
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	warn("{} + {}i\n", cns[i].re, cns[i].im);
	}
	}

	pub fn main() !void {
	{ //example
	const n: u32 = 16;
	const v = [n]i32{ 1, 3, 4, 2, 5, 6, 2, 4, 0, 1, 3, 4, 5, 62, 2, 3 };
	var f: [n]CN = undefined;
	{
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	f[i] = CN.rect(@intToFloat(f64, v[i]), 0.0);
	}
	}
	warn("\n[f]\n");
	warnCNs(n, &f);

	var F: [n]CN = undefined;
	var r: [n]CN = undefined;
	fft(n, &f, &F);
	ifft(n, &F, &r);

	warn("\n[F]\n");
	warnCNs(n, &F);
	warn("\n[r]\n");
	warnCNs(n, &r);
	}

	{ //benchmark
	//NOTE: allocators in std will be changed on 0.5.0
	var direct_allocator = heap.DirectAllocator.init();
	var arena = heap.ArenaAllocator.init(&direct_allocator.allocator);
	const allocator = &arena.allocator;
	defer direct_allocator.deinit();
	defer arena.deinit();

	const n: u32 = 1024 * 1024;
	var f = try allocator.create([n]CN);
	{
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	const if64 = @intToFloat(f64, i);
	f[i] = CN.rect(sin(if64) * if64, 0.0);
	}
	}

	var F = try allocator.create([n]CN);
	var r = try allocator.create([n]CN);

	const start = milliTimestamp();
	fft(n, f, F);
	ifft(n, F, r);
	const stop = milliTimestamp();
	warn("fft-ifft: {}ms\n", stop -start);
	}
	}
	// A C11 program as a library as a FFT implementation for fft-c.h
	// $ cc -Wall -Wextra -std=c11 -pedantic -O3 -c fft-c.c
	#include <complex.h> // complex, I, cexp
	#include <math.h> // M_PI
	#include <stdbool.h> // bool
	#include <stddef.h> // size_t
	#include <stdint.h> // uint64_t

	typedef double complex CN;

	static inline uint32_t reverse_bit(uint32_t s0) {
	uint32_t s1 = ((s0 & 0xaaaaaaaa) >> 1) \| ((s0 & 0x55555555) << 1);
	uint32_t s2 = ((s1 & 0xcccccccc) >> 2) \| ((s1 & 0x33333333) << 2);
	uint32_t s3 = ((s2 & 0xf0f0f0f0) >> 4) \| ((s2 & 0x0f0f0f0f) << 4);
	uint32_t s4 = ((s3 & 0xff00ff00) >> 8) \| ((s3 & 0x00ff00ff) << 8);
	return ((s4 & 0xffff0000) >> 16) \| ((s4 & 0x0000ffff) << 16);
	}

	static inline uint32_t rev_bit(uint32_t k, uint32_t n) {
	return reverse_bit(n) >> (8 * sizeof (uint32_t) - k);
	}

	static inline uint32_t trailing_zeros(uint32_t n) {
	uint32_t k = 0, s = n;
	if (!(s & 0x0000ffff)) {k += 16; s >>= 16;}
	if (!(s & 0x000000ff)) {k += 8; s >>= 8;}
	if (!(s & 0x0000000f)) {k += 4; s >>= 4;}
	if (!(s & 0x00000003)) {k += 2; s >>= 2;}
	return (s & 1) ? k : k + 1;
	}

	static void fftc(double t, uint32_t N, const CN c[N], CN ret[N]) {
	uint32_t k = trailing_zeros(N);
	for (uint32_t i = 0; i < N; i++) ret[i] = c[rev_bit(k, i)];
	for (uint32_t Nh = 1; Nh < N; Nh <<= 1) {
	t *= 0.5;
	for (uint32_t s = 0; s < N; s += Nh << 1) {
	for (uint32_t i = 0; i < Nh; i++) { //NOTE: s-outside/i-inside is faster
	uint32_t li = s + i;
	uint32_t ri = li + Nh;
	CN re = ret[ri] * cexp(t * i * I);
	CN l = ret[li];
	ret[li] = l + re;
	ret[ri] = l - re;
	}
	}
	}
	}

	extern CN rect(double re, double im) {
	return re + im * I;
	}
	extern void fft(uint32_t N, const CN f[N], CN F[N]) {
	fftc(-2.0 * M_PI, N, f, F);
	}
	extern void ifft(uint32_t N, const CN F[N], CN f[N]) {
	fftc(2.0 * M_PI, N, F, f);
	for (size_t i = 0; i < N; i++) f[i] /= N;
	}
	#ifndef FFT_C_H
	#define FFT_C_H
	// A C header file for `@cInclude("fft-c.h")` in zig programs
	#include <stdint.h>

	#ifdef __cplusplus
	#define FFT_C_EXTERN_C extern "C"
	#else
	#define FFT_C_EXTERN_C
	#endif

	#if defined(_WIN32)
	#define FFT_C_EXPORT FFT_C_EXTERN_C __declspec(dllimport)
	#else
	#define FFT_C_EXPORT FFT_C_EXTERN_C __attribute__((visibility ("default")))
	#endif

	// NOTE: C11 standard `double complex` as `struct CN` for zig
	struct CN {
	double re;
	double im;
	};

	FFT_C_EXPORT struct CN rect(double re, double im);
	FFT_C_EXPORT void fft(uint32_t n, struct CN f[], struct CN F[]);
	FFT_C_EXPORT void ifft(uint32_t n, struct CN F[], struct CN f[]);

	#endif
	// A zig program as a library (for FFT implementation)
	//NOTE: This code is written for zig-0.4.0 syntax/builtins/std lib
	const pi = @import("std").math.pi;
	const sin = @import("std").math.sin;
	const cos = @import("std").math.cos;

	// complex number type
	pub export const CN = extern struct { // "extern struct" as C struct
	pub re: f64,
	pub im: f64,
	pub fn rect(re: f64, im: f64) CN {
	return CN{
	.re = re,
	.im = im,
	};
	}
	pub fn expi(t: f64) CN {
	//NOTE: sin(t) and cos(t) will be @sin(f64, t) and @cos(f64, t) on zig-0.5.0
	return CN{
	.re = cos(t),
	.im = sin(t),
	};
	}
	pub fn add(self: CN, other: CN) CN {
	return CN{
	.re = self.re + other.re,
	.im = self.im + other.im,
	};
	}
	pub fn sub(self: CN, other: CN) CN {
	return CN{
	.re = self.re - other.re,
	.im = self.im - other.im,
	};
	}
	pub fn mul(self: CN, other: CN) CN {
	return CN{
	.re = self.re * other.re - self.im * other.im,
	.im = self.re * other.im + self.im * other.re,
	};
	}
	pub fn rdiv(self: CN, re: f64) CN {
	return CN{
	.re = self.re / re,
	.im = self.im / re,
	};
	}
	};
	//NOTE: exporting struct returned fn is not allowed for "wasm32" target

	test "CN" {
	const assert = @import("std").debug.assert;
	//const warn = @import("std").debug.warn;
	const eps = @import("std").math.f64_epsilon;
	const abs = @import("std").math.fabs;

	const a = CN.rect(1.0, 0.0);
	const b = CN.expi(pi / 2.0);
	assert(a.re == 1.0 and a.im == 0.0);
	//warn("{}+{}i\n", b.re, b.im);
	assert(abs(b.re) < eps and abs(b.im - 1.0) < eps);
	const apb = a.add(b);
	const asb = a.sub(b);
	assert(abs(apb.re - 1.0) < eps and abs(apb.im - 1.0) < eps);
	assert(abs(asb.re - 1.0) < eps and abs(asb.im + 1.0) < eps);
	const bmb = b.mul(b);
	assert(abs(bmb.re + 1.0) < eps and abs(bmb.im) < eps);
	const apb2 = apb.rdiv(2.0);
	assert(abs(apb2.re - 0.5) < eps and abs(apb2.im - 0.5) < eps);
	}
	test "CN array" {
	const assert = @import("std").debug.assert;
	//const warn = @import("std").debug.warn;
	const eps = @import("std").math.f64_epsilon;
	const abs = @import("std").math.fabs;

	var cns: [2]CN = undefined;
	cns[0] = CN.rect(1.0, 0.0);
	cns[1] = CN.rect(0.0, 1.0);
	cns[0] = cns[0].add(cns[1]);
	assert(abs(cns[0].re - 1.0) < eps and abs(cns[0].im - 1.0) < eps);
	}

	// reverse as k-bit
	fn revbit(k: u32, n0: u32) u32 {
	//NOTE: @bitreverse will be renamed to @bitReverse on zig-0.5.0
	return @bitreverse(u32, n0) >> @truncate(u5, 32 - k);
	}

	test "revbit" {
	const assert = @import("std").debug.assert;
	const n: u32 = 8;
	const k: u32 = @ctz(n);
	assert(revbit(k, 0) == 0b000);
	assert(revbit(k, 1) == 0b100);
	assert(revbit(k, 2) == 0b010);
	assert(revbit(k, 3) == 0b110);
	assert(revbit(k, 4) == 0b001);
	assert(revbit(k, 5) == 0b101);
	assert(revbit(k, 6) == 0b011);
	assert(revbit(k, 7) == 0b111);
	}

	// Loop Cooley-Tukey FFT
	fn fftc(t0: f64, n: u32, c: []CN, r: []CN) void {
	{
	const k: u32 = @ctz(n);
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	r[i] = c[@inlineCall(revbit, k, i)];
	}
	}
	var t = t0;
	var nh: u32 = 1;
	while (nh < n) : (nh <<= 1) {
	t /= 2.0;
	const nh2 = nh << 1;
	var s: u32 = 0;
	while (s < n) : (s += nh2) {
	var i: u32 = 0;
	while (i < nh) : (i += 1) {
	const li = s + i;
	const ri = li + nh;
	const re = r[ri].mul(CN.expi(t * @intToFloat(f64, i)));
	const l = r[li];
	r[li] = l.add(re);
	r[ri] = l.sub(re);
	}
	}
	}
	}
	pub export fn fft(n: u32, f: []CN, F: []CN) void {
	fftc(-2.0 * pi, n, f, F);
	}
	pub export fn ifft(n: u32, F: []CN, f: []CN) void {
	fftc(2.0 * pi, n, F, f);
	const nf64 = @intToFloat(f64, n);
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	f[i] = f[i].rdiv(nf64);
	}
	}

	test "fft/ifft" {
	const warn = @import("std").debug.warn;
	const assert = @import("std").debug.assert;
	const abs = @import("std").math.fabs;
	const eps = 1e-15;
	//NOTE: On `test` block, `fn` can define in `struct`
	const util = struct {
	fn warnCNs(n: u32, cns: [*]CN) void {
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	warn("{} + {}i\n", cns[i].re, cns[i].im);
	}
	}
	};

	const n: u32 = 16;
	const v = [n]i32{ 1, 3, 4, 2, 5, 6, 2, 4, 0, 1, 3, 4, 5, 62, 2, 3 };
	var f: [n]CN = undefined;
	{
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	f[i] = CN.rect(@intToFloat(f64, v[i]), 0.0);
	}
	}
	warn("\n[f]\n");
	util.warnCNs(n, &f);

	var F: [n]CN = undefined;
	var r: [n]CN = undefined;
	fft(n, &f, &F);
	ifft(n, &F, &r);

	warn("\n[F]\n");
	util.warnCNs(n, &F);
	warn("\n[r]\n");
	util.warnCNs(n, &r);

	{
	var i: u32 = 0;
	while (i < n) : (i += 1) {
	assert(abs(r[i].re - @intToFloat(f64, v[i])) < eps);
	}
	}
	}