Skip to content

Instantly share code, notes, and snippets.

@kode54
Created March 5, 2022 03:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kode54/0912aa2c4fe339913dd5397cea06eab5 to your computer and use it in GitHub Desktop.
Save kode54/0912aa2c4fe339913dd5397cea06eab5 to your computer and use it in GitHub Desktop.
Bit o' R8Brain benchmarking of DSD downsampling
#include <stdint.h>
#include "r8bstate.h"
// In my quick benchmarks on Apple Silicon, this decimator makes the following code about 40% faster
#define DSD_DECIMATOR 1
#ifdef DSD_DECIMATOR
/**
* DSD 2 PCM: Stage 1:
* Decimate by factor 8
* (one byte (8 samples) -> one float sample)
* The bits are processed from least signicifant to most signicicant.
* @author Sebastian Gesemann
*/
#define dsd2pcm_FILTER_COEFFS_COUNT 64
static const float dsd2pcm_FILTER_COEFFS[64] = {
0.09712411121659f, 0.09613438994044f, 0.09417884216316f, 0.09130441727307f,
0.08757947648990f, 0.08309142055179f, 0.07794369263673f, 0.07225228745463f,
0.06614191680338f, 0.05974199351302f, 0.05318259916599f, 0.04659059631228f,
0.04008603356890f, 0.03377897290478f, 0.02776684382775f, 0.02213240062966f,
0.01694232798846f, 0.01224650881275f, 0.00807793792573f, 0.00445323755944f,
0.00137370697215f, -0.00117318019994f, -0.00321193033831f, -0.00477694265140f,
-0.00591028841335f, -0.00665946056286f, -0.00707518873201f, -0.00720940203988f,
-0.00711340642819f, -0.00683632603227f, -0.00642384017266f, -0.00591723006715f,
-0.00535273320457f, -0.00476118922548f, -0.00416794965654f, -0.00359301524813f,
-0.00305135909510f, -0.00255339111833f, -0.00210551956895f, -0.00171076760278f,
-0.00136940723130f, -0.00107957856005f, -0.00083786862365f, -0.00063983084245f,
-0.00048043272086f, -0.00035442550015f, -0.00025663481039f, -0.00018217573430f,
-0.00012659899635f, -0.00008597726991f, -0.00005694188820f, -0.00003668060332f,
-0.00002290670286f, -0.00001380895679f, -0.00000799057558f, -0.00000440385083f,
-0.00000228567089f, -0.00000109760778f, -0.00000047286430f, -0.00000017129652f,
-0.00000004282776f, 0.00000000119422f, 0.00000000949179f, 0.00000000747450f
};
struct dsd2pcm_state {
/*
* This is the 2nd half of an even order symmetric FIR
* lowpass filter (to be used on a signal sampled at 44100*64 Hz)
* Passband is 0-24 kHz (ripples +/- 0.025 dB)
* Stopband starts at 176.4 kHz (rejection: 170 dB)
* The overall gain is 2.0
*/
/* These remain constant for the duration */
int FILT_LOOKUP_PARTS;
float *FILT_LOOKUP_TABLE;
uint8_t *REVERSE_BITS;
int FIFO_LENGTH;
int FIFO_OFS_MASK;
/* These are altered */
int *fifo;
int fpos;
};
static void dsd2pcm_free(void *);
static void dsd2pcm_reset(void *);
static void *dsd2pcm_alloc() {
struct dsd2pcm_state *state = (struct dsd2pcm_state *)calloc(1, sizeof(struct dsd2pcm_state));
float *FILT_LOOKUP_TABLE;
double *temp;
uint8_t *REVERSE_BITS;
if(!state)
return NULL;
state->FILT_LOOKUP_PARTS = (dsd2pcm_FILTER_COEFFS_COUNT + 7) / 8;
const int FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS;
// The current 128 tap FIR leads to an 8 KB lookup table
state->FILT_LOOKUP_TABLE = (float *)calloc(sizeof(float), FILT_LOOKUP_PARTS << 8);
if(!state->FILT_LOOKUP_TABLE)
goto fail;
FILT_LOOKUP_TABLE = state->FILT_LOOKUP_TABLE;
temp = (double *)calloc(sizeof(double), 0x100);
if(!temp)
goto fail;
for(int part = 0, sofs = 0, dofs = 0; part < FILT_LOOKUP_PARTS;) {
memset(temp, 0, 0x100 * sizeof(double));
for(int bit = 0, bitmask = 0x80; bit < 8 && sofs + bit < dsd2pcm_FILTER_COEFFS_COUNT;) {
double coeff = dsd2pcm_FILTER_COEFFS[sofs + bit];
for(int bite = 0; bite < 0x100; bite++) {
if((bite & bitmask) == 0) {
temp[bite] -= coeff;
} else {
temp[bite] += coeff;
}
}
bit++;
bitmask >>= 1;
}
for(int s = 0; s < 0x100;) {
FILT_LOOKUP_TABLE[dofs++] = (float)temp[s++];
}
part++;
sofs += 8;
}
free(temp);
{ // calculate FIFO stuff
int k = 1;
while(k < FILT_LOOKUP_PARTS * 2) k <<= 1;
state->FIFO_LENGTH = k;
state->FIFO_OFS_MASK = k - 1;
}
state->REVERSE_BITS = (uint8_t *)calloc(1, 0x100);
if(!state->REVERSE_BITS)
goto fail;
REVERSE_BITS = state->REVERSE_BITS;
for(int i = 0, j = 0; i < 0x100; i++) {
REVERSE_BITS[i] = (uint8_t)j;
// "reverse-increment" of j
for(int bitmask = 0x80;;) {
if(((j ^= bitmask) & bitmask) != 0) break;
if(bitmask == 1) break;
bitmask >>= 1;
}
}
state->fifo = (int *)calloc(sizeof(int), state->FIFO_LENGTH);
if(!state->fifo)
goto fail;
dsd2pcm_reset(state);
return (void *)state;
fail:
dsd2pcm_free(state);
return NULL;
}
static void *dsd2pcm_dup(void *_state) {
struct dsd2pcm_state *state = (struct dsd2pcm_state *)_state;
if(state) {
struct dsd2pcm_state *newstate = (struct dsd2pcm_state *)calloc(1, sizeof(struct dsd2pcm_state));
if(newstate) {
newstate->FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS;
newstate->FIFO_LENGTH = state->FIFO_LENGTH;
newstate->FIFO_OFS_MASK = state->FIFO_OFS_MASK;
newstate->fpos = state->fpos;
newstate->FILT_LOOKUP_TABLE = (float *)calloc(sizeof(float), state->FILT_LOOKUP_PARTS << 8);
if(!newstate->FILT_LOOKUP_TABLE)
goto fail;
memcpy(newstate->FILT_LOOKUP_TABLE, state->FILT_LOOKUP_TABLE, sizeof(float) * (state->FILT_LOOKUP_PARTS << 8));
newstate->REVERSE_BITS = (uint8_t *)calloc(1, 0x100);
if(!newstate->REVERSE_BITS)
goto fail;
memcpy(newstate->REVERSE_BITS, state->REVERSE_BITS, 0x100);
newstate->fifo = (int *)calloc(sizeof(int), state->FIFO_LENGTH);
if(!newstate->fifo)
goto fail;
memcpy(newstate->fifo, state->fifo, sizeof(int) * state->FIFO_LENGTH);
return (void *)newstate;
}
fail:
dsd2pcm_free(newstate);
return NULL;
}
return NULL;
}
static void dsd2pcm_free(void *_state) {
struct dsd2pcm_state *state = (struct dsd2pcm_state *)_state;
if(state) {
free(state->fifo);
free(state->REVERSE_BITS);
free(state->FILT_LOOKUP_TABLE);
free(state);
}
}
static void dsd2pcm_reset(void *_state) {
struct dsd2pcm_state *state = (struct dsd2pcm_state *)_state;
const int FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS;
int *fifo = state->fifo;
for(int i = 0; i < FILT_LOOKUP_PARTS; i++) {
fifo[i] = 0x55;
fifo[i + FILT_LOOKUP_PARTS] = 0xAA;
}
state->fpos = FILT_LOOKUP_PARTS;
}
static int dsd2pcm_latency(void *_state) {
struct dsd2pcm_state *state = (struct dsd2pcm_state *)_state;
if(state)
return state->FIFO_LENGTH;
else
return 0;
}
static void dsd2pcm_process(void *_state, const uint8_t *src, size_t sofs, size_t sinc, float *dest, size_t dofs, size_t dinc, size_t len) {
struct dsd2pcm_state *state = (struct dsd2pcm_state *)_state;
int bite1, bite2, temp;
float sample;
int *fifo = state->fifo;
const uint8_t *REVERSE_BITS = state->REVERSE_BITS;
const float *FILT_LOOKUP_TABLE = state->FILT_LOOKUP_TABLE;
const int FILT_LOOKUP_PARTS = state->FILT_LOOKUP_PARTS;
const int FIFO_OFS_MASK = state->FIFO_OFS_MASK;
int fpos = state->fpos;
while(len > 0) {
fifo[fpos] = REVERSE_BITS[fifo[fpos]] & 0xFF;
fifo[(fpos + FILT_LOOKUP_PARTS) & FIFO_OFS_MASK] = src[sofs] & 0xFF;
sofs += sinc;
temp = (fpos + 1) & FIFO_OFS_MASK;
sample = 0;
for(int k = 0, lofs = 0; k < FILT_LOOKUP_PARTS;) {
bite1 = fifo[(fpos - k) & FIFO_OFS_MASK];
bite2 = fifo[(temp + k) & FIFO_OFS_MASK];
sample += FILT_LOOKUP_TABLE[lofs + bite1] + FILT_LOOKUP_TABLE[lofs + bite2];
k++;
lofs += 0x100;
}
fpos = temp;
dest[dofs] = sample;
dofs += dinc;
len--;
}
state->fpos = fpos;
}
static void convert_dsd_to_f32(float *output, const uint8_t *input, size_t count, size_t channels, void **dsd2pcm) {
for(size_t channel = 0; channel < channels; ++channel) {
dsd2pcm_process(dsd2pcm[channel], input, channel, channels, output, channel, channels, count);
}
}
#endif
int main(void) {
unsigned long long sampleCount = 1024 * 1024 * 32;
uint8_t *dsd_random = new uint8_t[sampleCount];
for(size_t i = 0; i < sampleCount; ++i) {
dsd_random[i] = rand();
}
double srcRate = 44100.0 * 64.0;
double dstRate = 48000.0;
float *inputData;
#ifdef DSD_DECIMATOR
srcRate /= 8.0;
void *dsd2pcm = dsd2pcm_alloc();
uint32_t dsd2pcmLatency = dsd2pcm_latency(dsd2pcm);
uint8_t *dsd_silence = new uint8_t[dsd2pcmLatency];
float *dsd2pcm_temp = new float[sampleCount + dsd2pcmLatency];
memset(dsd_silence, 0x55, dsd2pcmLatency);
convert_dsd_to_f32(dsd2pcm_temp, dsd_random, sampleCount, 1, &dsd2pcm);
convert_dsd_to_f32(dsd2pcm_temp + sampleCount, dsd_silence, dsd2pcmLatency, 1, &dsd2pcm);
inputData = dsd2pcm_temp + dsd2pcmLatency;
#else
float *dsd2pcm_temp = new float[sampleCount * 8];
float *ptr = dsd2pcm_temp;
for(size_t i = 0; i < sampleCount; ++i) {
uint8_t sample = dsd_random[i];
for(size_t j = 128; j > 0; j >>= 1) {
if (sample & j) *ptr = +1.0;
else *ptr = -1.0;
++ptr;
}
}
inputData = dsd2pcm_temp;
#endif
#ifndef DSD_DECIMATOR
sampleCount *= 8;
#endif
float *outputBuffer = new float[1024];
unsigned long long ol = sampleCount * dstRate / srcRate;
unsigned long long ip = 0;
r8bstate *state = new r8bstate(1, 1024, srcRate, dstRate);
while(ol > 0) {
size_t blockCount = ol > 1024 ? 1024 : ol;
size_t inputAvail = sampleCount - ip;
size_t outputGenerated = 0;
size_t inputUsed = 0;
if(inputAvail > 1024) inputAvail = 1024;
if(inputAvail) {
outputGenerated = state->resample(&inputData[ip], inputAvail, &inputUsed, outputBuffer, blockCount);
ip += inputUsed;
} else {
outputGenerated = state->flush(outputBuffer, blockCount);
}
if(outputGenerated > ol) {
ol = 0;
} else {
ol -= outputGenerated;
}
}
delete state;
delete[] outputBuffer;
#ifdef DSD_DECIMATOR
dsd2pcm_free(dsd2pcm);
#endif
delete[] dsd2pcm_temp;
delete[] dsd_random;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment