/Cargo.toml

## Cargo.toml
[dependencies]
time = "0.1"
num = "0.1.24"

[package]
name = "dsp"
version = "0.1.0"

## dsp.cpp
#include "dsp.h"
#include <algorithm>
#include <cstdint>
#include <limits>

static constexpr int int16_min = std::numeric_limits<int16_t>::min();
static constexpr int int16_max = std::numeric_limits<int16_t>::max();

static inline int16_t clamp(int input)
{
     return std::max(int16_min, std::min(int16_max, input));
}

static inline int get_offset(const FilterState& filter_state, int relative_offset)
{
     static_assert(!(FilterState::size & (FilterState::size - 1)), "size must be a power of two.");
     return (filter_state.current + relative_offset) % filter_state.size;
}

static inline void push_sample(FilterState& filter_state, int16_t sample)
{
     filter_state.input[get_offset(filter_state, 0)] = sample;
     ++filter_state.current;
}

static inline int16_t get_output_sample(const FilterState& filter_state)
{
     return clamp(filter_state.output[get_offset(filter_state, 0)]);
}

// This is an implementation of a Chebyshev lowpass filter at 5000hz with ripple -0.50dB,
// 10th order, and for an input sample rate of 44100hz.
static inline void apply_lowpass(FilterState& filter_state)
{
     static_assert(FilterState::size >= 10, "FilterState::size must be at least 10.");

     double* x = filter_state.input;
     double* y = filter_state.output;

     y[get_offset(filter_state, 0)] =
       (  1.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -10)] + x[get_offset(filter_state,  -0)]))
     + ( 10.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state,  -9)] + x[get_offset(filter_state,  -1)]))
     + ( 45.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state,  -8)] + x[get_offset(filter_state,  -2)]))
     + (120.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state,  -7)] + x[get_offset(filter_state,  -3)]))
     + (210.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state,  -6)] + x[get_offset(filter_state,  -4)]))
     + (252.0 * (1.0 / 6.928330802e+06) *  x[get_offset(filter_state,  -5)])

     + (  -0.4441854896 * y[get_offset(filter_state, -10)])
     + (   4.2144719035 * y[get_offset(filter_state,  -9)])
     + ( -18.5365677633 * y[get_offset(filter_state,  -8)])
     + (  49.7394321983 * y[get_offset(filter_state,  -7)])
     + ( -90.1491003509 * y[get_offset(filter_state,  -6)])
     + ( 115.3235358151 * y[get_offset(filter_state,  -5)])
     + (-105.4969191433 * y[get_offset(filter_state,  -4)])
     + (  68.1964705422 * y[get_offset(filter_state,  -3)])
     + ( -29.8484881821 * y[get_offset(filter_state,  -2)])
     + (   8.0012026712 * y[get_offset(filter_state,  -1)]);
}

void apply_lowpass(FilterState& filter_state, const int16_t* input, int16_t* output, int length)
{
     for (int i = 0; i < length; ++i) {
          push_sample(filter_state, input[i]);
          apply_lowpass(filter_state);
          output[i] = get_output_sample(filter_state);
     }
}

## dsp.h
#include <cstdint>

struct FilterState {
	static constexpr int size = 16;

    double input[size];				// Must use doubles for these arrays, otherwise can be noisy.
    double output[size];
	unsigned int current;

	FilterState() : input{}, output{}, current{} {}
};

void apply_lowpass(FilterState& filter_state, const int16_t* input, int16_t* output, int length);

## dsp.rs
use std::cmp::max;
use std::cmp::min;
use std::i16;

const FILTER_SIZE : usize = 16;

pub struct FilterState {
    // Must use doubles for these arrays, otherwise can be noisy.
    input: [f64; FILTER_SIZE],
    output: [f64; FILTER_SIZE],
    current: u32
}

impl FilterState {
     pub fn new() -> FilterState {
          FilterState {input:[0.0; FILTER_SIZE], output:[0.0; FILTER_SIZE], current:0}
     }
}

#[inline]
fn clamp(input: i32) -> i16
{
     return max(i16::MIN as i32, min(i16::MAX as i32, input)) as i16;
}

#[inline]
fn get_offset(filter_state : &FilterState, relative_offset : i32) -> usize
{
     //#[static_assert] static t: bool = (FILTER_SIZE & (FILTER_SIZE - 1)) == 0;
     return ((filter_state.current + relative_offset as u32) % FILTER_SIZE as u32) as usize;
}

#[inline]
fn push_sample(filter_state : &mut FilterState, sample: i16) {
     filter_state.input[get_offset(filter_state, 0)] = sample as f64;
     filter_state.current = filter_state.current + 1;
}

#[inline]
fn get_output_sample(filter_state : &FilterState) -> i16
{
     return clamp(filter_state.output[get_offset(filter_state, 0)] as i32);
}

// This is an implementation of a Chebyshev lowpass filter at 5000hz with ripple -0.50dB,
// 10th order, and for an input sample rate of 44100hz.
#[inline]
fn apply_lowpass_single(filter_state : &mut FilterState)
{
     //#[static_assert] static t: bool = FILTER_SIZE >= 10;

    //let x = filter_state.input;
     let x = &filter_state.input;

     // Note: I didn't understand how to reference y; I couldn't make it work without either errors or silently dropping
     // the result (was copying the array?).
     // let y = &mut filter_state.output;
     // let y = mut filter_state.output;

     filter_state.output[get_offset(filter_state, 0)] =
       (  1.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -10)] + x[get_offset(filter_state,  -0)]))
     + ( 10.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state,  -9)] + x[get_offset(filter_state,  -1)]))
     + ( 45.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state,  -8)] + x[get_offset(filter_state,  -2)]))
     + (120.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state,  -7)] + x[get_offset(filter_state,  -3)]))
     + (210.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state,  -6)] + x[get_offset(filter_state,  -4)]))
     + (252.0 * (1.0 / 6.928330802e+06) *  x[get_offset(filter_state,  -5)])

     + (  -0.4441854896 * filter_state.output[get_offset(filter_state, -10)])
     + (   4.2144719035 * filter_state.output[get_offset(filter_state,  -9)])
     + ( -18.5365677633 * filter_state.output[get_offset(filter_state,  -8)])
     + (  49.7394321983 * filter_state.output[get_offset(filter_state,  -7)])
     + ( -90.1491003509 * filter_state.output[get_offset(filter_state,  -6)])
     + ( 115.3235358151 * filter_state.output[get_offset(filter_state,  -5)])
     + (-105.4969191433 * filter_state.output[get_offset(filter_state,  -4)])
     + (  68.1964705422 * filter_state.output[get_offset(filter_state,  -3)])
     + ( -29.8484881821 * filter_state.output[get_offset(filter_state,  -2)])
     + (   8.0012026712 * filter_state.output[get_offset(filter_state,  -1)]);
}

#[inline]
pub fn apply_lowpass(filter_state: &mut FilterState, input: &[i16], output: &mut [i16], length: i32)
{
     // Better way to do u32 range?
     for i in 0..length {
          push_sample(filter_state, input[i as usize]);
          apply_lowpass_single(filter_state);
          output[i as usize] = get_output_sample(filter_state);
     }
}

## main.cpp
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <time.h>
#include "dsp.h"

using namespace std;

static const int LENGTH = 524288;

static short inData[LENGTH];
static short outData[LENGTH];

static void populateData() {
	for (int i = 0; i < LENGTH; ++i) {
		inData[i] = sin(i) * 1000.0;
	}
}

static long doCTest() {
		clock_t start = clock();
		clock_t end = clock() + (CLOCKS_PER_SEC * 5);

		int iterations = 0;
		int dummy = 0;
		FilterState filter_state{};

		while (clock() < end) {
			apply_lowpass(filter_state, inData, outData, LENGTH);

			// Avoid some over-optimization
			dummy += outData[0];
			iterations++;
		}

		printf("Dummy:%d\n", dummy);
		printf("Iterations:%d\n",iterations);

		clock_t elapsed_ticks = clock() - start;
		long shortsPerSecond = (long) ((iterations * (long)LENGTH) / (elapsed_ticks / (double) CLOCKS_PER_SEC));

		return shortsPerSecond;
	}

int main() {
	populateData();

	printf("Beginning C tests...\n\n");
	long cResult = doCTest();

	printf("C results: %ld shorts per second.\n\n", cResult);
}

## main.rs
extern crate time;
extern crate num;

use time::precise_time_s;

mod dsp;

const LENGTH : usize = 524288;

fn do_rust_test(in_data: &[i16], out_data: &mut[i16]) -> i32 {
	let start = precise_time_s();
	let end = start + 5.0;

	let mut iterations = 0;
	let mut filter_state = dsp::FilterState::new();

	let mut dummy = 0;

	while precise_time_s() < end {
		dsp::apply_lowpass(&mut filter_state, in_data, out_data, LENGTH as i32);

		// Avoid some over-optimization
		dummy += out_data[0];

		iterations = iterations + 1;
	}

	println!("Dummy:{}", dummy);
	println!("Iterations:{}",iterations);

	let elapsed_time = precise_time_s() - start;
	let shorts_per_second = ((iterations * LENGTH) as f64 / elapsed_time) as i32;
	return shorts_per_second;
}

fn main() {
	let mut in_data = Box::new([0; LENGTH]);
 	let mut out_data = Box::new([0; LENGTH]);

 	for i in 0..LENGTH {
		in_data[i as usize] = ((i as f32).sin() * 1000.0) as i16;
	}

	println!("Beginning Rust tests...\n\n");
	let rust_result = do_rust_test(&*in_data, &mut *out_data);

	println!("Rust Results: {} shorts per second.\n\n", rust_result);
}

## results.txt
Best of 5

rustc --version
rustc 1.0.0 (a59de37e9 2015-05-13) (built 2015-05-14)

cargo run --release
62,127,704 shorts per second.

clang++ --version
Apple LLVM version 6.1.0 (clang-602.0.49) (based on LLVM 3.6.0svn)
Target: x86_64-apple-darwin14.3.0
Thread model: posix

clang++ -std=c++11 -O2 dsp.cpp main.cpp -o dsp && ./dsp
88,107,618 shorts per second.
	[dependencies]
	time = "0.1"
	num = "0.1.24"

	[package]
	name = "dsp"
	version = "0.1.0"
	#include "dsp.h"
	#include <algorithm>
	#include <cstdint>
	#include <limits>

	static constexpr int int16_min = std::numeric_limits<int16_t>::min();
	static constexpr int int16_max = std::numeric_limits<int16_t>::max();

	static inline int16_t clamp(int input)
	{
	return std::max(int16_min, std::min(int16_max, input));
	}

	static inline int get_offset(const FilterState& filter_state, int relative_offset)
	{
	static_assert(!(FilterState::size & (FilterState::size - 1)), "size must be a power of two.");
	return (filter_state.current + relative_offset) % filter_state.size;
	}

	static inline void push_sample(FilterState& filter_state, int16_t sample)
	{
	filter_state.input[get_offset(filter_state, 0)] = sample;
	++filter_state.current;
	}

	static inline int16_t get_output_sample(const FilterState& filter_state)
	{
	return clamp(filter_state.output[get_offset(filter_state, 0)]);
	}

	// This is an implementation of a Chebyshev lowpass filter at 5000hz with ripple -0.50dB,
	// 10th order, and for an input sample rate of 44100hz.
	static inline void apply_lowpass(FilterState& filter_state)
	{
	static_assert(FilterState::size >= 10, "FilterState::size must be at least 10.");

	double* x = filter_state.input;
	double* y = filter_state.output;

	y[get_offset(filter_state, 0)] =
	( 1.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -10)] + x[get_offset(filter_state, -0)]))
	+ ( 10.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -9)] + x[get_offset(filter_state, -1)]))
	+ ( 45.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -8)] + x[get_offset(filter_state, -2)]))
	+ (120.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -7)] + x[get_offset(filter_state, -3)]))
	+ (210.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -6)] + x[get_offset(filter_state, -4)]))
	+ (252.0 * (1.0 / 6.928330802e+06) * x[get_offset(filter_state, -5)])

	+ ( -0.4441854896 * y[get_offset(filter_state, -10)])
	+ ( 4.2144719035 * y[get_offset(filter_state, -9)])
	+ ( -18.5365677633 * y[get_offset(filter_state, -8)])
	+ ( 49.7394321983 * y[get_offset(filter_state, -7)])
	+ ( -90.1491003509 * y[get_offset(filter_state, -6)])
	+ ( 115.3235358151 * y[get_offset(filter_state, -5)])
	+ (-105.4969191433 * y[get_offset(filter_state, -4)])
	+ ( 68.1964705422 * y[get_offset(filter_state, -3)])
	+ ( -29.8484881821 * y[get_offset(filter_state, -2)])
	+ ( 8.0012026712 * y[get_offset(filter_state, -1)]);
	}

	void apply_lowpass(FilterState& filter_state, const int16_t* input, int16_t* output, int length)
	{
	for (int i = 0; i < length; ++i) {
	push_sample(filter_state, input[i]);
	apply_lowpass(filter_state);
	output[i] = get_output_sample(filter_state);
	}
	}
	#include <cstdint>

	struct FilterState {
	static constexpr int size = 16;

	double input[size]; // Must use doubles for these arrays, otherwise can be noisy.
	double output[size];
	unsigned int current;

	FilterState() : input{}, output{}, current{} {}
	};

	void apply_lowpass(FilterState& filter_state, const int16_t* input, int16_t* output, int length);
	use std::cmp::max;
	use std::cmp::min;
	use std::i16;

	const FILTER_SIZE : usize = 16;

	pub struct FilterState {
	// Must use doubles for these arrays, otherwise can be noisy.
	input: [f64; FILTER_SIZE],
	output: [f64; FILTER_SIZE],
	current: u32
	}

	impl FilterState {
	pub fn new() -> FilterState {
	FilterState {input:[0.0; FILTER_SIZE], output:[0.0; FILTER_SIZE], current:0}
	}
	}

	#[inline]
	fn clamp(input: i32) -> i16
	{
	return max(i16::MIN as i32, min(i16::MAX as i32, input)) as i16;
	}

	#[inline]
	fn get_offset(filter_state : &FilterState, relative_offset : i32) -> usize
	{
	//#[static_assert] static t: bool = (FILTER_SIZE & (FILTER_SIZE - 1)) == 0;
	return ((filter_state.current + relative_offset as u32) % FILTER_SIZE as u32) as usize;
	}

	#[inline]
	fn push_sample(filter_state : &mut FilterState, sample: i16) {
	filter_state.input[get_offset(filter_state, 0)] = sample as f64;
	filter_state.current = filter_state.current + 1;
	}

	#[inline]
	fn get_output_sample(filter_state : &FilterState) -> i16
	{
	return clamp(filter_state.output[get_offset(filter_state, 0)] as i32);
	}

	// This is an implementation of a Chebyshev lowpass filter at 5000hz with ripple -0.50dB,
	// 10th order, and for an input sample rate of 44100hz.
	#[inline]
	fn apply_lowpass_single(filter_state : &mut FilterState)
	{
	//#[static_assert] static t: bool = FILTER_SIZE >= 10;

	//let x = filter_state.input;
	let x = &filter_state.input;

	// Note: I didn't understand how to reference y; I couldn't make it work without either errors or silently dropping
	// the result (was copying the array?).
	// let y = &mut filter_state.output;
	// let y = mut filter_state.output;

	filter_state.output[get_offset(filter_state, 0)] =
	( 1.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -10)] + x[get_offset(filter_state, -0)]))
	+ ( 10.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -9)] + x[get_offset(filter_state, -1)]))
	+ ( 45.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -8)] + x[get_offset(filter_state, -2)]))
	+ (120.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -7)] + x[get_offset(filter_state, -3)]))
	+ (210.0 * (1.0 / 6.928330802e+06) * (x[get_offset(filter_state, -6)] + x[get_offset(filter_state, -4)]))
	+ (252.0 * (1.0 / 6.928330802e+06) * x[get_offset(filter_state, -5)])

	+ ( -0.4441854896 * filter_state.output[get_offset(filter_state, -10)])
	+ ( 4.2144719035 * filter_state.output[get_offset(filter_state, -9)])
	+ ( -18.5365677633 * filter_state.output[get_offset(filter_state, -8)])
	+ ( 49.7394321983 * filter_state.output[get_offset(filter_state, -7)])
	+ ( -90.1491003509 * filter_state.output[get_offset(filter_state, -6)])
	+ ( 115.3235358151 * filter_state.output[get_offset(filter_state, -5)])
	+ (-105.4969191433 * filter_state.output[get_offset(filter_state, -4)])
	+ ( 68.1964705422 * filter_state.output[get_offset(filter_state, -3)])
	+ ( -29.8484881821 * filter_state.output[get_offset(filter_state, -2)])
	+ ( 8.0012026712 * filter_state.output[get_offset(filter_state, -1)]);
	}

	#[inline]
	pub fn apply_lowpass(filter_state: &mut FilterState, input: &[i16], output: &mut [i16], length: i32)
	{
	// Better way to do u32 range?
	for i in 0..length {
	push_sample(filter_state, input[i as usize]);
	apply_lowpass_single(filter_state);
	output[i as usize] = get_output_sample(filter_state);
	}
	}
	#include <stdlib.h>
	#include <stdio.h>
	#include <math.h>
	#include <time.h>
	#include "dsp.h"

	using namespace std;

	static const int LENGTH = 524288;

	static short inData[LENGTH];
	static short outData[LENGTH];

	static void populateData() {
	for (int i = 0; i < LENGTH; ++i) {
	inData[i] = sin(i) * 1000.0;
	}
	}

	static long doCTest() {
	clock_t start = clock();
	clock_t end = clock() + (CLOCKS_PER_SEC * 5);

	int iterations = 0;
	int dummy = 0;
	FilterState filter_state{};

	while (clock() < end) {
	apply_lowpass(filter_state, inData, outData, LENGTH);

	// Avoid some over-optimization
	dummy += outData[0];
	iterations++;
	}

	printf("Dummy:%d\n", dummy);
	printf("Iterations:%d\n",iterations);

	clock_t elapsed_ticks = clock() - start;
	long shortsPerSecond = (long) ((iterations * (long)LENGTH) / (elapsed_ticks / (double) CLOCKS_PER_SEC));

	return shortsPerSecond;
	}

	int main() {
	populateData();

	printf("Beginning C tests...\n\n");
	long cResult = doCTest();

	printf("C results: %ld shorts per second.\n\n", cResult);
	}
	extern crate time;
	extern crate num;

	use time::precise_time_s;

	mod dsp;

	const LENGTH : usize = 524288;

	fn do_rust_test(in_data: &[i16], out_data: &mut[i16]) -> i32 {
	let start = precise_time_s();
	let end = start + 5.0;

	let mut iterations = 0;
	let mut filter_state = dsp::FilterState::new();

	let mut dummy = 0;

	while precise_time_s() < end {
	dsp::apply_lowpass(&mut filter_state, in_data, out_data, LENGTH as i32);

	// Avoid some over-optimization
	dummy += out_data[0];

	iterations = iterations + 1;
	}

	println!("Dummy:{}", dummy);
	println!("Iterations:{}",iterations);

	let elapsed_time = precise_time_s() - start;
	let shorts_per_second = ((iterations * LENGTH) as f64 / elapsed_time) as i32;
	return shorts_per_second;
	}

	fn main() {
	let mut in_data = Box::new([0; LENGTH]);
	let mut out_data = Box::new([0; LENGTH]);

	for i in 0..LENGTH {
	in_data[i as usize] = ((i as f32).sin() * 1000.0) as i16;
	}

	println!("Beginning Rust tests...\n\n");
	let rust_result = do_rust_test(&in_data, &mut out_data);

	println!("Rust Results: {} shorts per second.\n\n", rust_result);
	}
	Best of 5

	rustc --version
	rustc 1.0.0 (a59de37e9 2015-05-13) (built 2015-05-14)

	cargo run --release
	62,127,704 shorts per second.

	clang++ --version
	Apple LLVM version 6.1.0 (clang-602.0.49) (based on LLVM 3.6.0svn)
	Target: x86_64-apple-darwin14.3.0
	Thread model: posix

	clang++ -std=c++11 -O2 dsp.cpp main.cpp -o dsp && ./dsp
	88,107,618 shorts per second.