Skip to content

Instantly share code, notes, and snippets.

@buserror
Created December 31, 2019 23:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save buserror/0a3a69cca927b8da6c9c7ee1605007fc to your computer and use it in GitHub Desktop.
Save buserror/0a3a69cca927b8da6c9c7ee1605007fc to your computer and use it in GitHub Desktop.
On a G4, the pipelining was crucial. And on a 8 channel 24 bits (well 32) audio path, it was even more so.
/*
* Clip_48_8.cpp
* EZ8AudioDriver
*
* Created by Michel on Sun Oct 06 2002.
* Copyright (c) 2002 __MyCompanyName__. All rights reserved.
*
*/
#include <libkern/OSTypes.h>
#include "ice1712.h"
typedef unsigned long UInt32;
//#undef FAST
#if FAST
#define FASTPLAY 1
#define FASTCAP 1
#else
#warning Compiling in SLOW mode!
#endif
namespace EZ8 {
static inline double __clip( register double B )
{
register double result;
asm( "fctiw %0, %1" : "=f" (result) : "f" (B) );
return result;
}
void F32L24_48_8(double *v, float *ii, UInt32 *oo, long count)
{
register double v0=v[0],v1=v[1],v2=v[2],v3=v[3],v4=v[4],v5=v[5],v6=v[6],v7=v[7];
// better see that with tabs == 4!
register double scale = 2147483648.0;
#if FASTPLAY
#define _load(i) s##i = ii[i]
#define _volc(i) s##i *= v##i
#define _clip(i) __clip(s##i)
#define _d2l1(i) s##i *= scale
#define _d2l2(i) o##i = (SInt32)s##i
#define _stor(i) __asm__( "stwbrx %0, %1, %2" : : "r" (o##i), "b%" (i << 2), "r" (oo) : "memory" )
#else
// this is the equivalent, without using assembly
#define _load(i) s##i = ii[i]
#define _volc(i) s##i *= v##i
#define _clip(i) if (s##i > 1.0) s##i = 1.0; else if (s##i < -1.0) s##i = -1.0
#define _d2l1(i) o##i = (SInt32)(s##i * scale)
#define _d2l2(i) o##i = (((o##i >> 8) & 0xff) << 16) | (((o##i >> 16) & 0xff) << 8) | (((o##i >> 24) & 0xff))
#define _stor(i) oo[i] = o##i
#endif
register double s0,s1,s2,s3,s4,s5,s6,s7;
register UInt32 o0,o1,o2,o3,o4,o5,o6,o7;
while (count--) {
// staged pipeline 6x8
_load(0);
_load(1);
_load(2);_volc(0);
_load(3);_volc(1);_clip(0);
_load(4);_volc(2);_clip(1);_d2l1(0);
_load(5);_volc(3);_clip(2);_d2l1(1);_d2l2(0);
_load(6);_volc(4);_clip(3);_d2l1(2);_d2l2(1);
_load(7);_volc(5);_clip(4);_d2l1(3);_d2l2(2);_stor(0);
_volc(6);_clip(5);_d2l1(4);_d2l2(3);_stor(1);
_volc(7);_clip(6);_d2l1(5);_d2l2(4);_stor(2);
_clip(7);_d2l1(6);_d2l2(5);_stor(3);
_d2l1(7);_d2l2(6);_stor(4);
_d2l2(7);_stor(5);
_stor(6);
_stor(7);
ii += 8;
oo += NUM_CHANNELS_OUT;
}
}
#undef _load
#undef _volc
#undef _volm
#undef _clip
#undef _d2l1
#undef _d2l2
#undef _stor
static inline double __fctiw( register double B )
{
register double result;
asm( "fctiw %0, %1" : "=f" (result) : "f" (B) );
return result;
}
void L24F32_48_8(double *v, UInt32 *ii, float *oo, long count)
{
register double v0=v[0],v1=v[1],v2=v[2],v3=v[3],v4=v[4],v5=v[5],v6=v[6],v7=v[7];
#if FASTCAP
union { double d; unsigned int i[2]; } transfer[2];
transfer[0].i[0] = transfer[1].i[0] = (0x434UL - 32) << 20;
//0x41400000UL;
transfer[0].i[1] = 0x80000000;
double dBias = transfer[0].d;
#define _load(i) __asm__ __volatile__("lwbrx %0, %1, %2" : "=r" (s##i) : "b%" (i << 2), "r" (ii) : "memory" )
#define _swap(i) s##i ^= 0x80000000UL
#define _l2d1(_i) transfer[_i & 1].i[1] = s##_i
#define _l2d2(_i) o##_i = transfer[_i & 1].d
#define _l2d3(i) o##i -= dBias
#define _volc(i) o##i *= v##i
#define _stor(i) oo[i] = o##i
#else
register double scale = 4.656612873077392578125e-10; // 1 / 2147483648.0; // prevent unnecessary divisions
#define _load(i) s##i = ii[i]
#define _swap(i) s##i = ((s##i & 0xff) << 24) | (((s##i >> 8) & 0xff) << 16) | (((s##i >> 16) & 0xff) << 8)
#define _l2d1(i) o##i = (((double)(SInt32)s##i) * scale)
#define _l2d2(i)
#define _l2d3(i)
#define _volc(i) o##i *= v##i
#define _stor(i) oo[i] = o##i
#endif
register UInt32 s0,s1,s2,s3,s4,s5,s6,s7;
register double o0,o1,o2,o3,o4,o5,o6,o7;
while (count--) {
// staged pipeline 7x8
_load(0);
_load(1);
_load(2);
_load(3);_swap(0);
_load(4);_swap(1);_l2d1(0);
_load(5);_swap(2);_l2d1(1);_l2d2(0);
_load(6);_swap(3);_l2d1(2);_l2d2(1);
_load(7);_swap(4);_l2d1(3);_l2d2(2);_l2d3(0);
_swap(5);_l2d1(4);_l2d2(3);_l2d3(1);_volc(0);
_swap(6);_l2d1(5);_l2d2(4);_l2d3(2);_volc(1);_stor(0);
_swap(7);_l2d1(6);_l2d2(5);_l2d3(3);_volc(2);_stor(1);
_l2d1(7);_l2d2(6);_l2d3(4);_volc(3);_stor(2);
_l2d2(7);_l2d3(5);_volc(4);_stor(3);
_l2d3(6);_volc(5);_stor(4);
_l2d3(7);_volc(6);_stor(5);
_volc(7);_stor(6);
_stor(7);
ii += NUM_CHANNELS_IN;
oo += 8;
}
}
#undef _load
#undef _stor
#undef _clip
#undef _scal
#undef _swap
} // namespace EZ8
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment