AliceLR/!OUTPUT.txt

## !OUTPUT.txt
$ ./mixer.exe
Output samples: 4800000

Function: exo_mixer
 0: 48000Hz <- 48000Hz, vol=256, chn=1, mode=FLAT    :    22435 us
 1: 48000Hz <- 48000Hz, vol=256, chn=2, mode=FLAT    :    22965 us
 2: 48000Hz <- 48000Hz, vol=179, chn=1, mode=FLAT    :    26772 us
 3: 48000Hz <- 48000Hz, vol=179, chn=2, mode=FLAT    :    29794 us
 4: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Nearest :    28849 us
 5: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Nearest :    27272 us
 6: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Nearest :    26582 us
 7: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Nearest :    32344 us
 8: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Linear  :    36516 us
 9: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Linear  :    47827 us
10: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Linear  :    40985 us
11: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Linear  :    53354 us
12: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Cubic   :   103178 us
13: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Cubic   :   171409 us
14: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Cubic   :   110741 us
15: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Cubic   :   184491 us
16: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Nearest :    24953 us
17: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Nearest :    26652 us
18: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Nearest :    26688 us
19: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Nearest :    32573 us
20: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Linear  :    35655 us
21: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Linear  :    54606 us
22: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Linear  :    38543 us
23: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Linear  :    52945 us
24: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Cubic   :   103621 us
25: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Cubic   :   166605 us
26: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Cubic   :   109783 us
27: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Cubic   :   187311 us
28: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Nearest :    24524 us
29: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Nearest :    28573 us
30: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Nearest :    26770 us
31: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Nearest :    33647 us
32: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Linear  :    36670 us
33: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Linear  :    52245 us
34: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Linear  :    38754 us
35: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Linear  :    53203 us
36: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Cubic   :   105438 us
37: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Cubic   :   166757 us
38: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Cubic   :   110132 us
39: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Cubic   :   184436 us

Function: templ_mixer
 0: 48000Hz <- 48000Hz, vol=256, chn=1, mode=FLAT    :    17131 us
 1: 48000Hz <- 48000Hz, vol=256, chn=2, mode=FLAT    :    23422 us
 2: 48000Hz <- 48000Hz, vol=179, chn=1, mode=FLAT    :    23503 us
 3: 48000Hz <- 48000Hz, vol=179, chn=2, mode=FLAT    :    20326 us
 4: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Nearest :    20711 us
 5: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Nearest :    24233 us
 6: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Nearest :    22659 us
 7: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Nearest :    29459 us
 8: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Linear  :    33721 us
 9: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Linear  :    46263 us
10: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Linear  :    36550 us
11: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Linear  :    54954 us
12: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Cubic   :    98541 us
13: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Cubic   :   169011 us
14: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Cubic   :   107805 us
15: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Cubic   :   175298 us
16: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Nearest :    19606 us
17: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Nearest :    23786 us
18: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Nearest :    22596 us
19: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Nearest :    29747 us
20: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Linear  :    32246 us
21: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Linear  :    47347 us
22: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Linear  :    36646 us
23: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Linear  :    53825 us
24: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Cubic   :    98109 us
25: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Cubic   :   166653 us
26: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Cubic   :   106044 us
27: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Cubic   :   177058 us
28: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Nearest :    22747 us
29: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Nearest :    27709 us
30: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Nearest :    22850 us
31: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Nearest :    30354 us
32: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Linear  :    32793 us
33: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Linear  :    47094 us
34: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Linear  :    36086 us
35: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Linear  :    54275 us
36: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Cubic   :    97572 us
37: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Cubic   :   167816 us
38: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Cubic   :   111874 us
39: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Cubic   :   182448 us

Mismatches: 0

## exo_mixer.c
/* MegaZeux
 *
 * Copyright (C) 2004 Gilead Kutnick <exophase@adelphia.net>
 * Copyright (C) 2004 madbrain
 * Copyright (C) 2007 Alistair John Strachan <alistair@devzero.co.uk>
 * Copyright (C) 2018 Alice Rowan <petrifiedrowan@gmail.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of
 * the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "mixer.h"
#include <math.h>

typedef uint8_t Uint8;
typedef int16_t Sint16;
typedef uint32_t Uint32;
typedef int32_t Sint32;
typedef int64_t Sint64;

#define FP_SHIFT      13
#define FP_AND        ((1 << FP_SHIFT) - 1)

// Macros are used to generate functions to help reduce redundancy and
// maintain some kind of speed. Additional, fixed point is used (again,
// for speed purposes to avoid the hits in converting between fixed and
// floating point). I'm almost completely sure that fixed point is ideal
// for nearest and linear resampling but it might not be so good for
// cubic because it has to use 64bit ints and a lot of shifts
// (should work great on a 64bit machine though). The cubic resampler can
// be further optimized in other ways as well, and might be in due time.
// For now, if cubic doesn't give you good speed stick with linear which
// should be quite fast.

#define FLAT_SETUP_INDEX(channels)
#define NEAREST_SETUP_INDEX(channels)

#define FRACTIONAL_SETUP_INDEX(channels)                                \
  int_index = (Sint32)(s_index >> FP_SHIFT) * channels;                 \
  frac_index = (Sint32)(s_index & FP_AND);                              \

#define LINEAR_SETUP_INDEX(channels)                                    \
  FRACTIONAL_SETUP_INDEX(channels)                                      \

#define CUBIC_SETUP_INDEX(channels)                                     \
  FRACTIONAL_SETUP_INDEX(channels)                                      \

#define FLAT_MIX_SAMPLE(dest, channels, offset)                         \
  dest src_buffer[i2 + offset]                                          \

#define NEAREST_MIX_SAMPLE(dest, channels, offset)                      \
  dest src_buffer[((s_index >> FP_SHIFT) * channels) + offset]          \

#define LINEAR_MIX_SAMPLE(dest, channels, offset)                       \
  right_sample = src_buffer[int_index + offset];                        \
  dest (right_sample + ((frac_index *                                   \
   (src_buffer[int_index + channels + offset] - right_sample)) >>       \
   FP_SHIFT))                                                           \

#define CUBIC_MIX_SAMPLE(dest, channels, offset)                        \
  s0 = src_buffer[int_index - channels + offset] << FP_SHIFT;           \
  s1 = src_buffer[int_index + offset] << FP_SHIFT;                      \
  s2 = src_buffer[int_index + channels + offset] << FP_SHIFT;           \
  s3 = src_buffer[int_index + (channels * 2) + offset] << FP_SHIFT;     \
                                                                        \
  a = (((3 * (s1 - s2)) - s0 + s3) / 2);                                \
  b = ((2 * s2) + s0 - (((5 * s1) + s3) / 2));                          \
  c = ((s2 - s0) / 2);                                                  \
                                                                        \
  dest ((Sint32)(((((((((a * frac_index) >> FP_SHIFT) + b) *            \
   frac_index) >> FP_SHIFT) + c) * frac_index) >> FP_SHIFT) + s1) >>    \
   FP_SHIFT)                                                            \

#define RESAMPLE_LOOP_HEADER                                            \
  for(i = 0; i < write_len; i += 2, s_index += d)                       \
  {                                                                     \

#define FLAT_LOOP_HEADER(channels)                                      \
  for(i = 0, i2 = 0; i < write_len; i += 2, i2 += channels)             \
  {                                                                     \

#define NEAREST_LOOP_HEADER(dummy)                                      \
  RESAMPLE_LOOP_HEADER                                                  \

#define LINEAR_LOOP_HEADER(dummy)                                       \
  RESAMPLE_LOOP_HEADER                                                  \

#define CUBIC_LOOP_HEADER(dummy)                                        \
  RESAMPLE_LOOP_HEADER                                                  \

#define SPLIT_HEADER                                                    \
  Sint32 int_index;                                                     \
  Sint32 frac_index;                                                    \

#define RESAMPLE_HEADER                                                 \
  Sint64 s_index = sample_index;                                 \
  Sint64 d = frequency_delta;                                    \

#define FLAT_HEADER                                                     \
  Uint32 i2;                                                            \

#define NEAREST_HEADER                                                  \
  RESAMPLE_HEADER                                                       \

#define LINEAR_HEADER                                                   \
  RESAMPLE_HEADER                                                       \
  SPLIT_HEADER                                                          \
  Sint32 right_sample;                                                  \

#define CUBIC_HEADER                                                    \
  RESAMPLE_HEADER                                                       \
  SPLIT_HEADER                                                          \
  Sint32 s0, s1, s2, s3;                                                \
  Sint64 a, b, c;                                                       \

#define MIXER_FOOTER(channels)                                          \
  }                                                                     \
  s_index -= (data_window_length / (channels * 2)) << FP_SHIFT;  \
  sample_index = s_index;                                        \

#define FLAT_FOOTER(dummy)                                              \
  }                                                                     \
  sample_index = 0;                                              \

#define NEAREST_FOOTER(channels)                                        \
  MIXER_FOOTER(channels)                                                \

#define LINEAR_FOOTER(channels)                                         \
  MIXER_FOOTER(channels)                                                \

#define CUBIC_FOOTER(channels)                                          \
  MIXER_FOOTER(channels)                                                \

/*
#define VOL                                                             \
  * volume / 256                                                        \
*/
#define VOL                                                             \
  * volume >> 8                                                         \

#define SETUP_MIXER(type, num, mod)                                     \
case num:                                                               \
{                                                                       \
  type##_HEADER                                                         \
  type##_LOOP_HEADER(2)                                                 \
  type##_SETUP_INDEX(2)                                                 \
  type##_MIX_SAMPLE(dest_buffer[i] +=, 2, 0) mod;                       \
  type##_MIX_SAMPLE(dest_buffer[i + 1] +=, 2, 1) mod;                   \
  type##_FOOTER(2)                                                      \
  break;                                                                \
}                                                                       \

#define SETUP_MIXER_MONO(type, num, mod)                                \
case num:                                                               \
{                                                                       \
  Sint32 current_sample;                                                \
  type##_HEADER                                                         \
  type##_LOOP_HEADER(1)                                                 \
  type##_SETUP_INDEX(1)                                                 \
  type##_MIX_SAMPLE(current_sample =, 1, 0) mod;                        \
  dest_buffer[i] += current_sample;                                     \
  dest_buffer[i + 1] += current_sample;                                 \
  type##_FOOTER(1)                                                      \
  break;                                                                \
}                                                                       \

#define SETUP_MIXER_ALL(type, num)                                      \
  SETUP_MIXER(type, (num * 4), )                                        \
  SETUP_MIXER_MONO(type, (num * 4) + 1, )                               \
  SETUP_MIXER(type, (num * 4) + 2, VOL)                                 \
  SETUP_MIXER_MONO(type, (num * 4) + 3, VOL)                            \


void exo_mix_data(int32_t * __restrict__ dest_buffer, size_t len, const int16_t *src,
 size_t src_len, int volume, unsigned int channels, unsigned int resample_mode,
 size_t input_frequency, size_t output_frequency)
{
  Sint16 *src_buffer = (Sint16 *)src;
  Uint32 write_len = len / 2;
  Uint32 volume_mode = 1;
  Uint32 mono_mode = (channels == 1);
  Uint32 i;

  Sint64 frequency_delta = ((Sint64)input_frequency << FP_SHIFT) / output_frequency;
  Sint64 sample_index = 0;

  Uint32 data_window_length =
   (Uint32)(ceil((double)write_len / channels *
   input_frequency / output_frequency) * 2 * channels);

  if(input_frequency == output_frequency)
    resample_mode = 0;

  if(volume == 256)
    volume_mode = 0;

  switch((resample_mode << 2) | (volume_mode << 1) | mono_mode)
  {
    SETUP_MIXER_ALL(FLAT, 0)
    SETUP_MIXER_ALL(NEAREST, 1)
    SETUP_MIXER_ALL(LINEAR, 2)
    SETUP_MIXER_ALL(CUBIC, 3)
  }
}

## Makefile

CFLAGS  := -O3 -std=gnu++17 -Wall -Wextra -Wno-unused-parameter -ffast-math ${CFLAGS}
LDFLAGS +=
LDLIBS  +=

OBJS    := mixer.o templ_mixer.o exo_mixer.o
TARGS   := mixer

all: ${TARGS}

${TARGS}: ${OBJS}
	${CXX} ${CFLAGS} $^ -o $@ ${LDFLAGS} ${LDLIBS}

%.o: %.c
	${CXX} -MD ${CFLAGS} -c $< -o $@

%.o: %.cpp
	${CXX} -MD ${CFLAGS} -c $< -o $@

clean:
	rm -f *.d *.o mixer mixer.exe

## mixer.cpp
#include <inttypes.h>
#include <stdio.h>
#include <time.h>
#include <algorithm>
#include <cassert>
#include <chrono>
#include <vector>

#include "mixer.h"

static uint64_t rng_state;

// Seed the RNG from system time on startup
static void rng_seed_init(void)
{
  uint64_t seed = (((uint64_t)time(NULL)) << 32) | clock();
  rng_state = seed;
}

// xorshift*
// Implementation from https://en.wikipedia.org/wiki/Xorshift
unsigned int Random(uint64_t range)
{
  uint64_t x = rng_state;
  if(x == 0) x = 1;
  x ^= x >> 12; // a
  x ^= x << 25; // b
  x ^= x >> 27; // c
  rng_state = x;
  return (((x * 0x2545F4914F6CDD1D) >> 32) * range) >> 32;
}


template<class T, int N>
static constexpr size_t ARRAY_SIZE(const T (&ignore)[N])
{
  return N;
}


typedef int16_t sample_t;
typedef int32_t sample_output_t;

static constexpr size_t output_frequency = 48000;

static constexpr size_t input_frequencies[] =
{
  output_frequency, // Same as output--use flat resampling.
  44100,            // Different--force upsampling.
  8363,             // Different--force upsampling.
  88200,            // Different--force downsampling.
};

static constexpr unsigned int resample_modes[] =
{
  0, // Flat
  1, // Nearest
  2, // Linear
  3, // Cubic
};

static constexpr const char *resample_mode_str[] =
{
  "FLAT",
  "Nearest",
  "Linear",
  "Cubic",
  "Sinc-L.",
};

static constexpr int volumes[] =
{
  256, // No volume mixing
  179, // Volume mixing
};

static constexpr int channel_counts[] =
{
  1, // Mono mode
  2, // Stereo mode
};

static constexpr size_t num_tests =
 ((ARRAY_SIZE(input_frequencies) - 1) * (ARRAY_SIZE(resample_modes) - 1) + 1) *
 ARRAY_SIZE(volumes) * ARRAY_SIZE(channel_counts);

static constexpr size_t max_frequency =
 *std::max_element(std::begin(input_frequencies), std::end(input_frequencies));

static constexpr int max_channels =
 *std::max_element(std::begin(channel_counts), std::end(channel_counts));

static constexpr size_t multiplier = 10;
static constexpr size_t repeat_times = 100;

// The destination buffer is always stereo.
static constexpr size_t dest_size = output_frequency * multiplier * 2;

// Allocate the source buffer with enough space for both the highest input
// frequency and channel count. Also, add extra for linear/cubic modes.
static constexpr size_t src_size = max_frequency * multiplier * max_channels + 256;

void test_function(const mix_data_function mix_data_f, const std::vector<sample_t> &src,
 std::vector<std::vector<sample_output_t>> &dests)
{
  size_t dest_num = 0;
  for(size_t freq : input_frequencies)
  {
    for(unsigned resample_mode : resample_modes)
    {
      // Only use flat copying when the frequencies match.
      if(freq == output_frequency && resample_mode != 0)
        continue;
      // Only use resampling when the frequencies don't match.
      if(freq != output_frequency && resample_mode == 0)
        continue;

      for(int volume : volumes)
      {
        for(int channels : channel_counts)
        {
          printf("%2zu: %zuHz <- %6zuHz, vol=%d, chn=%d, mode=%-8s: ", dest_num,
           output_frequency, freq, volume, channels, resample_mode_str[resample_mode]);
          fflush(stdout);

          assert(dest_num < num_tests);
          std::vector<sample_output_t> &dest = dests[dest_num++];

          auto start_time = std::chrono::steady_clock::now();

          for(size_t i = 0; i < repeat_times; i++)
          {
            mix_data_f(dest.data(), dest.size(), src.data(), src.size(),
             volume, channels, resample_mode, freq, output_frequency);
          }

          auto end_time = std::chrono::steady_clock::now();
          auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);

          printf("%8" PRId64 " us\n", (int64_t)duration.count());
          fflush(stdout);
        }
      }
    }
  }
}

int main(void)
{
  std::vector<std::vector<sample_output_t>> exo_result(num_tests, std::vector<sample_output_t>(dest_size, 0));
  std::vector<std::vector<sample_output_t>> tmpl_result(num_tests, std::vector<sample_output_t>(dest_size, 0));
  std::vector<sample_t> src(src_size);

  rng_seed_init();

  for(sample_t &smpl : src)
    smpl = Random(UINT16_MAX) - (INT16_MAX + 1);

  printf("Output samples: %zu\n", output_frequency * multiplier);

  printf("\nFunction: exo_mixer\n");
  test_function(exo_mix_data, src, exo_result);

  printf("\nFunction: templ_mixer\n");
  test_function(template_mix_data, src, tmpl_result);

  printf("\n");
  size_t mismatches = 0;
  for(size_t i = 0; i < num_tests; i++)
  {
    if(exo_result[i] != tmpl_result[i])
    {
      printf("Mismatch in test %zu\n", i);
      mismatches++;
    }
  }
  printf("Mismatches: %zu\n", mismatches);
  fflush(stdout);

  return 0;
}

## mixer.h

#include <stddef.h>
#include <stdint.h>

typedef bool boolean;

typedef void (*mix_data_function)(int32_t * __restrict__ dest_buffer, size_t len,
 const int16_t *src, size_t src_len, int volume, unsigned channels,
 unsigned resample_mode, size_t input_frequency, size_t output_frequency);

void exo_mix_data(int32_t * __restrict__ dest_buffer, size_t len, const int16_t *src,
 size_t src_len, int volume, unsigned int channels, unsigned int resample_mode,
 size_t input_frequency, size_t output_frequency);

void template_mix_data(int32_t * __restrict__ dest_buffer, size_t len, const int16_t *src,
 size_t src_len, int volume, unsigned int channels, unsigned int resample_mode,
 size_t input_frequency, size_t output_frequency);

## template_mixer.cpp
/* MegaZeux
 *
 * Copyright (C) 2004 Gilead Kutnick <exophase@adelphia.net>
 * Copyright (C) 2004 madbrain
 * Copyright (C) 2007 Alistair John Strachan <alistair@devzero.co.uk>
 * Copyright (C) 2018 Alice Rowan <petrifiedrowan@gmail.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of
 * the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "mixer.h"
#include <math.h>
#include <cassert>

#define FP_SHIFT      13
//#define FP_SHIFT      ((ssize_t)(sizeof(ssize_t) >= 8 ? 16 : 13))
#define FP_AND        ((1 << FP_SHIFT) - 1)

// Placeholder.
struct sampled_stream
{
  int64_t sample_index;
  uint64_t frequency_delta;
  uint32_t data_window_length;
  uint32_t channels;
};

enum mixer_resample
{
  FLAT        = 0,
  NEAREST     = 1,
  LINEAR      = 2,
  CUBIC       = 3,
};

enum mixer_channels
{
  MONO        = 1,
  STEREO      = 2,
};

enum mixer_volume
{
  FIXED       = 0,
  DYNAMIC     = 1,
};

static void update_sample_index(struct sampled_stream *s, int64_t index)
{
  s->sample_index = index - ((int64_t)(s->data_window_length / (s->channels * 2)) << FP_SHIFT);
}

template<mixer_volume VOLUME>
static int32_t volume_function(int32_t sample, int volume)
{
  /**
   * NOTE: previous versions of MZX did a /256 here. Just do the bitshift--the
   * rounding difference is more or less irrelevant and it performs better than
   * GCC's optimization for signed constant division.
   */
  if(VOLUME)
    return (sample * volume) >> 8;

  return sample;
}

template<mixer_channels CHANNELS, mixer_volume VOLUME>
static void flat_mix_loop(struct sampled_stream *s,
 int32_t * __restrict__ dest, size_t write_len, const int16_t *src, int volume)
{
  for(size_t i = 0; i < write_len; i += 2)
  {
    if(CHANNELS >= STEREO)
    {
      *(dest++) += volume_function<VOLUME>(*(src++), volume);
      *(dest++) += volume_function<VOLUME>(*(src++), volume);
    }
    else
    {
      int32_t smpl = volume_function<VOLUME>(*(src++), volume);
      *(dest++) += smpl;
      *(dest++) += smpl;
    }
  }
  s->sample_index = 0;
}

typedef int32_t (*mix_function)(const int16_t *src_offset, ssize_t frac_index);

template<mixer_channels CHANNELS>
static int32_t nearest_mix(const int16_t *src_offset, ssize_t frac_index)
{
  return src_offset[0];
}

template<mixer_channels CHANNELS>
static int32_t linear_mix(const int16_t *src_offset, ssize_t frac_index)
{
  int32_t left = src_offset[0];
  int32_t right = src_offset[CHANNELS];
  return left + ((right - left) * frac_index >> FP_SHIFT);
}

template<mixer_channels CHANNELS>
static int32_t cubic_mix(const int16_t *src_offset, ssize_t frac_index)
{
  /**
   * NOTE: copied mostly verbatim from the old mixer code, with cleanup.
   * This uses ssize_t instead of int32_t since it seems to be faster for
   * 64-bit machines in this particular resampler.
   *
   * This uses a Hermite spline. This is somewhat faster to compute and
   * generally considered better quality than a Lagrange cubic.
   */
  /*
  int32_t s0 = src_offset[-CHANNELS]    << FP_SHIFT;
  int32_t s1 = src_offset[0]            << FP_SHIFT;
  int32_t s2 = src_offset[CHANNELS]     << FP_SHIFT;
  int32_t s3 = src_offset[CHANNELS * 2] << FP_SHIFT;
*/
  ssize_t s0 = (ssize_t)src_offset[-CHANNELS]    << FP_SHIFT;
  ssize_t s1 = (ssize_t)src_offset[0]            << FP_SHIFT;
  ssize_t s2 = (ssize_t)src_offset[CHANNELS]     << FP_SHIFT;
  ssize_t s3 = (ssize_t)src_offset[CHANNELS * 2] << FP_SHIFT;

  int64_t a = (((3 * (s1 - s2)) - s0 + s3) / 2);
  int64_t b = ((2 * s2) + s0 - (((5 * s1) + s3) / 2));
  int64_t c = ((s2 - s0) / 2);

  a = ((a * frac_index) >> FP_SHIFT) + b;
  a = ((a * frac_index) >> FP_SHIFT) + c;
  a = ((a * frac_index) >> FP_SHIFT) + s1;
  return (a >> FP_SHIFT);
}

template<mixer_channels CHANNELS, mixer_volume VOLUME, mix_function MIX>
static void resample_mix_loop(struct sampled_stream *s,
 int32_t * __restrict__ dest, size_t write_len, const int16_t *src, int volume)
{
  int64_t sample_index = s->sample_index;
  int64_t delta = s->frequency_delta;

  for(size_t i = 0; i < write_len; i += 2, sample_index += delta)
  {
    ssize_t int_index = (sample_index >> FP_SHIFT) * CHANNELS;
    ssize_t frac_index = sample_index & FP_AND;

    if(CHANNELS >= STEREO)
    {
      int32_t mix_a = MIX(src + int_index + 0, frac_index);
      int32_t mix_b = MIX(src + int_index + 1, frac_index);
      *(dest++) += volume_function<VOLUME>(mix_a, volume);
      *(dest++) += volume_function<VOLUME>(mix_b, volume);
    }
    else
    {
      int32_t mix = MIX(src + int_index, frac_index);
      int32_t smpl = volume_function<VOLUME>(mix, volume);
      *(dest++) += smpl;
      *(dest++) += smpl;
    }
  }
  update_sample_index(s, sample_index);
}

template<mixer_channels CHANNELS, mixer_volume VOLUME>
static void mixer_function(struct sampled_stream *s,
 int32_t * __restrict__ dest, size_t write_len, const int16_t *src, int volume,
 int resample_mode)
{
  switch((mixer_resample)resample_mode)
  {
    case FLAT:
      flat_mix_loop<CHANNELS, VOLUME>(s, dest, write_len, src, volume);
      break;

    case NEAREST:
      resample_mix_loop<CHANNELS, VOLUME, nearest_mix<CHANNELS>>(s,
       dest, write_len, src, volume);
      break;

    case LINEAR:
      resample_mix_loop<CHANNELS, VOLUME, linear_mix<CHANNELS>>(s,
       dest, write_len, src, volume);
      break;

    case CUBIC:
      resample_mix_loop<CHANNELS, VOLUME, cubic_mix<CHANNELS>>(s,
       dest, write_len, src, volume);
      break;
  }
}

template<mixer_volume VOLUME>
static void mixer_function(struct sampled_stream *s,
 int32_t * __restrict__ dest, size_t write_len, const int16_t *src, int volume,
 int resample_mode, mixer_channels channel_mode)
{
  switch(channel_mode)
  {
    case MONO:
      mixer_function<MONO, VOLUME>(s, dest, write_len, src, volume, resample_mode);
      break;

    case STEREO:
      mixer_function<STEREO, VOLUME>(s, dest, write_len, src, volume, resample_mode);
      break;
  }
}

static void mixer_function(struct sampled_stream *s,
 int32_t * __restrict__ dest, size_t write_len, const int16_t *src, int volume,
 int resample_mode, mixer_channels channel_mode, mixer_volume volume_mode)
{
  switch(volume_mode)
  {
    case FIXED:
      mixer_function<FIXED>(s, dest, write_len, src, volume, resample_mode, channel_mode);
      break;

    case DYNAMIC:
      mixer_function<DYNAMIC>(s, dest, write_len, src, volume, resample_mode, channel_mode);
      break;
  }
}

void template_mix_data(int32_t * __restrict__ dest, size_t len, const int16_t *src,
 size_t src_len, int volume, unsigned int channels, unsigned int resample_mode,
 size_t input_frequency, size_t output_frequency)
{
  struct sampled_stream placeholder;
  size_t write_len = len / 2;
  enum mixer_volume   use_volume   = DYNAMIC;
  enum mixer_channels use_channels = STEREO;

  placeholder.sample_index = 0;
  placeholder.frequency_delta = (((uint64_t)input_frequency) << FP_SHIFT) / output_frequency;
  placeholder.channels = channels;
  placeholder.data_window_length =
   (ceil((double)write_len / 8 * input_frequency / output_frequency) * 2 * channels);

  if(input_frequency == output_frequency)
    resample_mode = 0;

  if(volume == 256)
    use_volume = FIXED;

  if(channels < 2)
    use_channels = MONO;

  mixer_function(&placeholder, dest, write_len, src, volume,
   resample_mode, use_channels, use_volume);
}
	$ ./mixer.exe
	Output samples: 4800000

	Function: exo_mixer
	0: 48000Hz <- 48000Hz, vol=256, chn=1, mode=FLAT : 22435 us
	1: 48000Hz <- 48000Hz, vol=256, chn=2, mode=FLAT : 22965 us
	2: 48000Hz <- 48000Hz, vol=179, chn=1, mode=FLAT : 26772 us
	3: 48000Hz <- 48000Hz, vol=179, chn=2, mode=FLAT : 29794 us
	4: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Nearest : 28849 us
	5: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Nearest : 27272 us
	6: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Nearest : 26582 us
	7: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Nearest : 32344 us
	8: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Linear : 36516 us
	9: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Linear : 47827 us
	10: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Linear : 40985 us
	11: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Linear : 53354 us
	12: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Cubic : 103178 us
	13: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Cubic : 171409 us
	14: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Cubic : 110741 us
	15: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Cubic : 184491 us
	16: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Nearest : 24953 us
	17: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Nearest : 26652 us
	18: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Nearest : 26688 us
	19: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Nearest : 32573 us
	20: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Linear : 35655 us
	21: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Linear : 54606 us
	22: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Linear : 38543 us
	23: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Linear : 52945 us
	24: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Cubic : 103621 us
	25: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Cubic : 166605 us
	26: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Cubic : 109783 us
	27: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Cubic : 187311 us
	28: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Nearest : 24524 us
	29: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Nearest : 28573 us
	30: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Nearest : 26770 us
	31: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Nearest : 33647 us
	32: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Linear : 36670 us
	33: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Linear : 52245 us
	34: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Linear : 38754 us
	35: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Linear : 53203 us
	36: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Cubic : 105438 us
	37: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Cubic : 166757 us
	38: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Cubic : 110132 us
	39: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Cubic : 184436 us

	Function: templ_mixer
	0: 48000Hz <- 48000Hz, vol=256, chn=1, mode=FLAT : 17131 us
	1: 48000Hz <- 48000Hz, vol=256, chn=2, mode=FLAT : 23422 us
	2: 48000Hz <- 48000Hz, vol=179, chn=1, mode=FLAT : 23503 us
	3: 48000Hz <- 48000Hz, vol=179, chn=2, mode=FLAT : 20326 us
	4: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Nearest : 20711 us
	5: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Nearest : 24233 us
	6: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Nearest : 22659 us
	7: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Nearest : 29459 us
	8: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Linear : 33721 us
	9: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Linear : 46263 us
	10: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Linear : 36550 us
	11: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Linear : 54954 us
	12: 48000Hz <- 44100Hz, vol=256, chn=1, mode=Cubic : 98541 us
	13: 48000Hz <- 44100Hz, vol=256, chn=2, mode=Cubic : 169011 us
	14: 48000Hz <- 44100Hz, vol=179, chn=1, mode=Cubic : 107805 us
	15: 48000Hz <- 44100Hz, vol=179, chn=2, mode=Cubic : 175298 us
	16: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Nearest : 19606 us
	17: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Nearest : 23786 us
	18: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Nearest : 22596 us
	19: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Nearest : 29747 us
	20: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Linear : 32246 us
	21: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Linear : 47347 us
	22: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Linear : 36646 us
	23: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Linear : 53825 us
	24: 48000Hz <- 22050Hz, vol=256, chn=1, mode=Cubic : 98109 us
	25: 48000Hz <- 22050Hz, vol=256, chn=2, mode=Cubic : 166653 us
	26: 48000Hz <- 22050Hz, vol=179, chn=1, mode=Cubic : 106044 us
	27: 48000Hz <- 22050Hz, vol=179, chn=2, mode=Cubic : 177058 us
	28: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Nearest : 22747 us
	29: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Nearest : 27709 us
	30: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Nearest : 22850 us
	31: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Nearest : 30354 us
	32: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Linear : 32793 us
	33: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Linear : 47094 us
	34: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Linear : 36086 us
	35: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Linear : 54275 us
	36: 48000Hz <- 88200Hz, vol=256, chn=1, mode=Cubic : 97572 us
	37: 48000Hz <- 88200Hz, vol=256, chn=2, mode=Cubic : 167816 us
	38: 48000Hz <- 88200Hz, vol=179, chn=1, mode=Cubic : 111874 us
	39: 48000Hz <- 88200Hz, vol=179, chn=2, mode=Cubic : 182448 us

	Mismatches: 0
	/* MegaZeux
	*
	* Copyright (C) 2004 Gilead Kutnick <exophase@adelphia.net>
	* Copyright (C) 2004 madbrain
	* Copyright (C) 2007 Alistair John Strachan <alistair@devzero.co.uk>
	* Copyright (C) 2018 Alice Rowan <petrifiedrowan@gmail.com>
	*
	* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU General Public License as
	* published by the Free Software Foundation; either version 2 of
	* the License, or (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program; if not, write to the Free Software
	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
	*/

	#include "mixer.h"
	#include <math.h>

	typedef uint8_t Uint8;
	typedef int16_t Sint16;
	typedef uint32_t Uint32;
	typedef int32_t Sint32;
	typedef int64_t Sint64;

	#define FP_SHIFT 13
	#define FP_AND ((1 << FP_SHIFT) - 1)

	// Macros are used to generate functions to help reduce redundancy and
	// maintain some kind of speed. Additional, fixed point is used (again,
	// for speed purposes to avoid the hits in converting between fixed and
	// floating point). I'm almost completely sure that fixed point is ideal
	// for nearest and linear resampling but it might not be so good for
	// cubic because it has to use 64bit ints and a lot of shifts
	// (should work great on a 64bit machine though). The cubic resampler can
	// be further optimized in other ways as well, and might be in due time.
	// For now, if cubic doesn't give you good speed stick with linear which
	// should be quite fast.

	#define FLAT_SETUP_INDEX(channels)
	#define NEAREST_SETUP_INDEX(channels)

	#define FRACTIONAL_SETUP_INDEX(channels) \
	int_index = (Sint32)(s_index >> FP_SHIFT) * channels; \
	frac_index = (Sint32)(s_index & FP_AND); \

	#define LINEAR_SETUP_INDEX(channels) \
	FRACTIONAL_SETUP_INDEX(channels) \

	#define CUBIC_SETUP_INDEX(channels) \
	FRACTIONAL_SETUP_INDEX(channels) \

	#define FLAT_MIX_SAMPLE(dest, channels, offset) \
	dest src_buffer[i2 + offset] \

	#define NEAREST_MIX_SAMPLE(dest, channels, offset) \
	dest src_buffer[((s_index >> FP_SHIFT) * channels) + offset] \

	#define LINEAR_MIX_SAMPLE(dest, channels, offset) \
	right_sample = src_buffer[int_index + offset]; \
	dest (right_sample + ((frac_index * \
	(src_buffer[int_index + channels + offset] - right_sample)) >> \
	FP_SHIFT)) \

	#define CUBIC_MIX_SAMPLE(dest, channels, offset) \
	s0 = src_buffer[int_index - channels + offset] << FP_SHIFT; \
	s1 = src_buffer[int_index + offset] << FP_SHIFT; \
	s2 = src_buffer[int_index + channels + offset] << FP_SHIFT; \
	s3 = src_buffer[int_index + (channels * 2) + offset] << FP_SHIFT; \
	\
	a = (((3 * (s1 - s2)) - s0 + s3) / 2); \
	b = ((2 * s2) + s0 - (((5 * s1) + s3) / 2)); \
	c = ((s2 - s0) / 2); \
	\
	dest ((Sint32)(((((((((a * frac_index) >> FP_SHIFT) + b) * \
	frac_index) >> FP_SHIFT) + c) * frac_index) >> FP_SHIFT) + s1) >> \
	FP_SHIFT) \

	#define RESAMPLE_LOOP_HEADER \
	for(i = 0; i < write_len; i += 2, s_index += d) \
	{ \

	#define FLAT_LOOP_HEADER(channels) \
	for(i = 0, i2 = 0; i < write_len; i += 2, i2 += channels) \
	{ \

	#define NEAREST_LOOP_HEADER(dummy) \
	RESAMPLE_LOOP_HEADER \

	#define LINEAR_LOOP_HEADER(dummy) \
	RESAMPLE_LOOP_HEADER \

	#define CUBIC_LOOP_HEADER(dummy) \
	RESAMPLE_LOOP_HEADER \

	#define SPLIT_HEADER \
	Sint32 int_index; \
	Sint32 frac_index; \

	#define RESAMPLE_HEADER \
	Sint64 s_index = sample_index; \
	Sint64 d = frequency_delta; \

	#define FLAT_HEADER \
	Uint32 i2; \

	#define NEAREST_HEADER \
	RESAMPLE_HEADER \

	#define LINEAR_HEADER \
	RESAMPLE_HEADER \
	SPLIT_HEADER \
	Sint32 right_sample; \

	#define CUBIC_HEADER \
	RESAMPLE_HEADER \
	SPLIT_HEADER \
	Sint32 s0, s1, s2, s3; \
	Sint64 a, b, c; \

	#define MIXER_FOOTER(channels) \
	} \
	s_index -= (data_window_length / (channels * 2)) << FP_SHIFT; \
	sample_index = s_index; \

	#define FLAT_FOOTER(dummy) \
	} \
	sample_index = 0; \

	#define NEAREST_FOOTER(channels) \
	MIXER_FOOTER(channels) \

	#define LINEAR_FOOTER(channels) \
	MIXER_FOOTER(channels) \

	#define CUBIC_FOOTER(channels) \
	MIXER_FOOTER(channels) \

	/*
	#define VOL \
	* volume / 256 \
	*/
	#define VOL \
	* volume >> 8 \

	#define SETUP_MIXER(type, num, mod) \
	case num: \
	{ \
	type##_HEADER \
	type##_LOOP_HEADER(2) \
	type##_SETUP_INDEX(2) \
	type##_MIX_SAMPLE(dest_buffer[i] +=, 2, 0) mod; \
	type##_MIX_SAMPLE(dest_buffer[i + 1] +=, 2, 1) mod; \
	type##_FOOTER(2) \
	break; \
	} \

	#define SETUP_MIXER_MONO(type, num, mod) \
	case num: \
	{ \
	Sint32 current_sample; \
	type##_HEADER \
	type##_LOOP_HEADER(1) \
	type##_SETUP_INDEX(1) \
	type##_MIX_SAMPLE(current_sample =, 1, 0) mod; \
	dest_buffer[i] += current_sample; \
	dest_buffer[i + 1] += current_sample; \
	type##_FOOTER(1) \
	break; \
	} \

	#define SETUP_MIXER_ALL(type, num) \
	SETUP_MIXER(type, (num * 4), ) \
	SETUP_MIXER_MONO(type, (num * 4) + 1, ) \
	SETUP_MIXER(type, (num * 4) + 2, VOL) \
	SETUP_MIXER_MONO(type, (num * 4) + 3, VOL) \


	void exo_mix_data(int32_t * __restrict__ dest_buffer, size_t len, const int16_t *src,
	size_t src_len, int volume, unsigned int channels, unsigned int resample_mode,
	size_t input_frequency, size_t output_frequency)
	{
	Sint16 src_buffer = (Sint16 )src;
	Uint32 write_len = len / 2;
	Uint32 volume_mode = 1;
	Uint32 mono_mode = (channels == 1);
	Uint32 i;

	Sint64 frequency_delta = ((Sint64)input_frequency << FP_SHIFT) / output_frequency;
	Sint64 sample_index = 0;

	Uint32 data_window_length =
	(Uint32)(ceil((double)write_len / channels *
	input_frequency / output_frequency) * 2 * channels);

	if(input_frequency == output_frequency)
	resample_mode = 0;

	if(volume == 256)
	volume_mode = 0;

	switch((resample_mode << 2) \| (volume_mode << 1) \| mono_mode)
	{
	SETUP_MIXER_ALL(FLAT, 0)
	SETUP_MIXER_ALL(NEAREST, 1)
	SETUP_MIXER_ALL(LINEAR, 2)
	SETUP_MIXER_ALL(CUBIC, 3)
	}
	}

	CFLAGS := -O3 -std=gnu++17 -Wall -Wextra -Wno-unused-parameter -ffast-math ${CFLAGS}
	LDFLAGS +=
	LDLIBS +=

	OBJS := mixer.o templ_mixer.o exo_mixer.o
	TARGS := mixer

	all: ${TARGS}

	${TARGS}: ${OBJS}
	${CXX} ${CFLAGS} $^ -o $@ ${LDFLAGS} ${LDLIBS}

	%.o: %.c
	${CXX} -MD ${CFLAGS} -c $< -o $@

	%.o: %.cpp
	${CXX} -MD ${CFLAGS} -c $< -o $@

	clean:
	rm -f .d .o mixer mixer.exe
	#include <inttypes.h>
	#include <stdio.h>
	#include <time.h>
	#include <algorithm>
	#include <cassert>
	#include <chrono>
	#include <vector>

	#include "mixer.h"

	static uint64_t rng_state;

	// Seed the RNG from system time on startup
	static void rng_seed_init(void)
	{
	uint64_t seed = (((uint64_t)time(NULL)) << 32) \| clock();
	rng_state = seed;
	}

	// xorshift*
	// Implementation from https://en.wikipedia.org/wiki/Xorshift
	unsigned int Random(uint64_t range)
	{
	uint64_t x = rng_state;
	if(x == 0) x = 1;
	x ^= x >> 12; // a
	x ^= x << 25; // b
	x ^= x >> 27; // c
	rng_state = x;
	return (((x * 0x2545F4914F6CDD1D) >> 32) * range) >> 32;
	}


	template<class T, int N>
	static constexpr size_t ARRAY_SIZE(const T (&ignore)[N])
	{
	return N;
	}


	typedef int16_t sample_t;
	typedef int32_t sample_output_t;

	static constexpr size_t output_frequency = 48000;

	static constexpr size_t input_frequencies[] =
	{
	output_frequency, // Same as output--use flat resampling.
	44100, // Different--force upsampling.
	8363, // Different--force upsampling.
	88200, // Different--force downsampling.
	};

	static constexpr unsigned int resample_modes[] =
	{
	0, // Flat
	1, // Nearest
	2, // Linear
	3, // Cubic
	};

	static constexpr const char *resample_mode_str[] =
	{
	"FLAT",
	"Nearest",
	"Linear",
	"Cubic",
	"Sinc-L.",
	};

	static constexpr int volumes[] =
	{
	256, // No volume mixing
	179, // Volume mixing
	};

	static constexpr int channel_counts[] =
	{
	1, // Mono mode
	2, // Stereo mode
	};

	static constexpr size_t num_tests =
	((ARRAY_SIZE(input_frequencies) - 1) * (ARRAY_SIZE(resample_modes) - 1) + 1) *
	ARRAY_SIZE(volumes) * ARRAY_SIZE(channel_counts);

	static constexpr size_t max_frequency =
	*std::max_element(std::begin(input_frequencies), std::end(input_frequencies));

	static constexpr int max_channels =
	*std::max_element(std::begin(channel_counts), std::end(channel_counts));

	static constexpr size_t multiplier = 10;
	static constexpr size_t repeat_times = 100;

	// The destination buffer is always stereo.
	static constexpr size_t dest_size = output_frequency * multiplier * 2;

	// Allocate the source buffer with enough space for both the highest input
	// frequency and channel count. Also, add extra for linear/cubic modes.
	static constexpr size_t src_size = max_frequency * multiplier * max_channels + 256;

	void test_function(const mix_data_function mix_data_f, const std::vector<sample_t> &src,
	std::vector<std::vector<sample_output_t>> &dests)
	{
	size_t dest_num = 0;
	for(size_t freq : input_frequencies)
	{
	for(unsigned resample_mode : resample_modes)
	{
	// Only use flat copying when the frequencies match.
	if(freq == output_frequency && resample_mode != 0)
	continue;
	// Only use resampling when the frequencies don't match.
	if(freq != output_frequency && resample_mode == 0)
	continue;

	for(int volume : volumes)
	{
	for(int channels : channel_counts)
	{
	printf("%2zu: %zuHz <- %6zuHz, vol=%d, chn=%d, mode=%-8s: ", dest_num,
	output_frequency, freq, volume, channels, resample_mode_str[resample_mode]);
	fflush(stdout);

	assert(dest_num < num_tests);
	std::vector<sample_output_t> &dest = dests[dest_num++];

	auto start_time = std::chrono::steady_clock::now();

	for(size_t i = 0; i < repeat_times; i++)
	{
	mix_data_f(dest.data(), dest.size(), src.data(), src.size(),
	volume, channels, resample_mode, freq, output_frequency);
	}

	auto end_time = std::chrono::steady_clock::now();
	auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);

	printf("%8" PRId64 " us\n", (int64_t)duration.count());
	fflush(stdout);
	}
	}
	}
	}
	}

	int main(void)
	{
	std::vector<std::vector<sample_output_t>> exo_result(num_tests, std::vector<sample_output_t>(dest_size, 0));
	std::vector<std::vector<sample_output_t>> tmpl_result(num_tests, std::vector<sample_output_t>(dest_size, 0));
	std::vector<sample_t> src(src_size);

	rng_seed_init();

	for(sample_t &smpl : src)
	smpl = Random(UINT16_MAX) - (INT16_MAX + 1);

	printf("Output samples: %zu\n", output_frequency * multiplier);

	printf("\nFunction: exo_mixer\n");
	test_function(exo_mix_data, src, exo_result);

	printf("\nFunction: templ_mixer\n");
	test_function(template_mix_data, src, tmpl_result);

	printf("\n");
	size_t mismatches = 0;
	for(size_t i = 0; i < num_tests; i++)
	{
	if(exo_result[i] != tmpl_result[i])
	{
	printf("Mismatch in test %zu\n", i);
	mismatches++;
	}
	}
	printf("Mismatches: %zu\n", mismatches);
	fflush(stdout);

	return 0;
	}

	#include <stddef.h>
	#include <stdint.h>

	typedef bool boolean;

	typedef void (mix_data_function)(int32_t __restrict__ dest_buffer, size_t len,
	const int16_t *src, size_t src_len, int volume, unsigned channels,
	unsigned resample_mode, size_t input_frequency, size_t output_frequency);

	void exo_mix_data(int32_t * __restrict__ dest_buffer, size_t len, const int16_t *src,
	size_t src_len, int volume, unsigned int channels, unsigned int resample_mode,
	size_t input_frequency, size_t output_frequency);

	void template_mix_data(int32_t * __restrict__ dest_buffer, size_t len, const int16_t *src,
	size_t src_len, int volume, unsigned int channels, unsigned int resample_mode,
	size_t input_frequency, size_t output_frequency);