Skip to content

Instantly share code, notes, and snippets.

@bandito
Created May 1, 2012 17:44
Show Gist options
  • Save bandito/2569965 to your computer and use it in GitHub Desktop.
Save bandito/2569965 to your computer and use it in GitHub Desktop.
Mp3 decoder
#include "typedefs.h"
#include "imdct.h"
#define SPECTRUM_POS *(i16 *)0x1080 //
static i16 tmp[18];
static i16 u[2][2][17][16]; /* no v[][], it's redundant */
static int u_start[2]={0,0}; /* first element of u[][] */
static int u_div[2]={0,0}; /* which part of u[][] is currently used */
static i16 *spectrum;
#pragma DATA_SECTION(PcmBuffer, "PCM_BUFFER")
u16 PcmBuffer[6*2304];
extern u16 GrannulesInBuffer;
static i16 prev[18];
void MD_IMDCT(i16 win_type,i16 ch,i16 gr, u16 no_of_imdcts)
{
/*------------------------------------------------------------------*/
/* */
/* Function: Calculation of the inverse MDCT */
/* In the case of short blocks the 3 output vectors are already */
/* overlapped and added in this modul. */
/* */
/* New layer3 */
/* */
/*------------------------------------------------------------------*/
register i16 save;
i16 pp1, pp2;
i16 i, p, ss;
i16 *in;
i16 out[36];
i16 n;
i16 sb;
i16 tmp0,tmp1,tmp2,tmp3,tmp4,tmp0_,tmp1_,tmp2_,tmp3_;
i16 tmp0o,tmp1o,tmp2o,tmp3o,tmp4o,tmp0_o,tmp1_o,tmp2_o,tmp3_o;
i16 i0;
i16 i0p12;
i16 i6_;
i16 e,o;
for (sb=0; sb<no_of_imdcts; sb++)
{
in = &spectrum[ch*576+sb*18];
if(win_type == 2)
{
for(p=0;p<36;p+=9)
{
out[p] = out[p+1] = out[p+2] = out[p+3] =
out[p+4] = out[p+5] = out[p+6] = out[p+7] =
out[p+8] = 0;
}
for(ss=0;ss<18;ss+=6)
{
/*
* 12 point IMDCT
*/
/* Begin 12 point IDCT */
/* Input aliasing for 12 pt IDCT */
in[5+ss]+=in[4+ss];
in[4+ss]+=in[3+ss];
in[3+ss]+=in[2+ss];
in[2+ss]+=in[1+ss];
in[1+ss]+=in[0+ss];
/* Input aliasing on odd indices (for 6 point IDCT) */
in[5+ss] += in[3+ss];
in[3+ss] += in[1+ss];
/* 3 point IDCT on even indices */
//Initially supposing that the Q15 format will never overflow
pp2 = q15_mul(in[4+ss],0x4000); //*0.5f
pp1 = q15_mul(in[2+ss],0x6ED9); //*0.866025403f;
save = in[0+ss] + pp2;
tmp[1] = in[0+ss] - in[4+ss];
tmp[0] = save + pp1;
tmp[2] = save - pp1;
/* End 3 point IDCT on even indices */
/* 3 point IDCT on odd indices (for 6 point IDCT) */
pp2 = q15_mul(in[5+ss],0x4000); //*0.5f
pp1 = q15_mul(in[3+ss],0x6ED9);
save = in[1+ss] + pp2;
tmp[4] = in[1+ss] - in[5+ss];
tmp[5] = save + pp1;
tmp[3] = save - pp1;
/* End 3 point IDCT on odd indices */
/* Twiddle factors on odd indices (for 6 point IDCT) */
tmp[3] = q15_q13_mul(tmp[3],0x3DD1);// 1.931851653f;
tmp[4] = q15_mul(tmp[4],0x5A82); //0.707106781f;
tmp[5] = q15_mul(tmp[5],0x4241); //0.517638090f;
/* Output butterflies on 2 3 point IDCT's (for 6 point IDCT) */
save = tmp[0];
tmp[0] += tmp[5];
tmp[5] = save - tmp[5];
save = tmp[1];
tmp[1] += tmp[4];
tmp[4] = save - tmp[4];
save = tmp[2];
tmp[2] += tmp[3];
tmp[3] = save - tmp[3];
/* End 6 point IDCT */
/* Twiddle factors on indices (for 12 point IDCT) */
tmp[0] = q15_mul(tmp[0],0x408D); //0.504314480f;
tmp[1] = q15_mul(tmp[1],0x4545); //0.541196100f;
tmp[2] = q15_mul(tmp[2],0x50AB); //0.630236207f;
tmp[3] = q15_mul(tmp[3],0x6921); //0.821339815f;
tmp[4] = q15_q13_mul(tmp[4],0x29CF); //1.306562965f;
tmp[5] = q15_q13_mul(tmp[4],0x7A94); //3.830648788f;
/* End 12 point IDCT */
/* Shift to 12 point modified IDCT, multiply by window type 2 */
tmp[8] = q15_mul(tmp[0], 0xF9A7); //-0.793353340f;
tmp[9] = q15_mul(tmp[0], 0xB214); // -0.608761429f;
tmp[7] = q15_mul(tmp[1], 0x89BE); // -0.923879532f;
tmp[10] = q15_mul(tmp[1], 0xCF04); // -0.382683432f;
tmp[6] = q15_mul(tmp[2], 0x8118); //-0.991444861f;
tmp[11] = q15_mul(tmp[2], 0xEF4A); // -0.130526192f;
tmp[0] = tmp[3];
tmp[1] = q15_mul(tmp[4],0x30FB); // 0.382683432f;
tmp[2] = q15_mul(tmp[5],0x4DEB); // 0.608761429f;
tmp[3] = q15_mul(tmp[5],0x9A73); // -0.793353340f;
tmp[4] = q15_mul(tmp[4],0x89BE); // -0.923879532f;
tmp[5] = q15_mul(tmp[0],0x8118); // -0.991444861f;
tmp[0] = q15_mul(tmp[0],0x10B5); // 0.130526192f;
for (n=6; n<18; n++)
out[ss + n] += tmp[n-6];
}
//overlapping
if (sb&1)
{ //if subband 1 then don't overlap with the previous one ???
for (i=0;i<18;i+=2) in[i]=out[i] + prev[i];
for (i=1;i<18;i+=2) in[i]=-out[i] - prev[i];
}
else
for (i=0;i<18;i++) in[i]=out[i] + prev[i];
for (i=18;i<36;i++) prev[i-18]=out[i]; //create new overlap array
}
else
{
/*
* 36 point IDCT ****************************************************************
*/
/* input aliasing for 36 point IDCT */
for (n=17; n>0; n--)
in[n]+=in[n-1];
/* 18 point IDCT for odd indices */
/* input aliasing for 18 point IDCT */
in[17]+=in[15];
in[15]+=in[13];
in[13]+=in[11];
in[11]+=in[9];
in[9] +=in[7];
in[7] +=in[5];
in[5] +=in[3];
in[3] +=in[1];
{
/* Fast 9 Point Inverse Discrete Cosine Transform
//
// By Francois-Raymond Boyer
// mailto:boyerf@iro.umontreal.ca
// http://www.iro.umontreal.ca/~boyerf
//
// The code has been optimized for Intel processors
// (takes a lot of time to convert float to and from iternal FPU representation)
//
// It is a simple "factorization" of the IDCT matrix.
*/
/* 9 point IDCT on even indices */
{
/* 5 points on odd indices (not realy an IDCT) */
i0 = in[0]+in[0];
i0p12 = i0 + in[12];
tmp0 = i0p12 + q15_q13_mul(in[4],0x3C23) /*1.8793852415718f */ + q15_q13_mul(in[8],0x3106) /*1.532088886238f*/ + q15_mul(in[16],0x2C74); /* 0.34729635533386f*/
tmp1 = i0 + in[4] - in[8] - in[12] - in[12] - in[16];
tmp2 = i0p12 - q15_mul(in[4], 0x2C74) /*0.34729635533386f*/ - q15_q13_mul(in[8],0x3C23) /* 1.8793852415718f*/ + q15_q13_mul(in[16],0x3106); /* 1.532088886238f*/
tmp3 = i0p12 - q15_q13_mul(in[4],0x3106) /*1.532088886238f*/ + q15_mul(in[8], 0x2C74) /*0.34729635533386f*/ - q15_q13_mul(in[16],0x3C23); /*1.8793852415718f */
tmp4 = in[0] - in[4] + in[8] - in[12] + in[16];
}
{
//1.9696155060244=0x3F07
//1.2855752193731=0x2923
//0.68404028665134=0x578E
i6_ = q15_q13_mul(in[6],0x376C); /*1.732050808f*/
tmp0_ = q15_q13_mul(in[2],0x3F07) /*1.9696155060244f*/ + i6_ + q15_q13_mul(in[10],0x2923) /*1.2855752193731f*/ + q15_mul(in[14],0x578E); /*0.68404028665134f*/
tmp1_ = q15_q13_mul((in[2] - in[10] - in[14]),0x376C); /*1.732050808f*/
tmp2_ = q15_q13_mul(in[2],0x2923) /*1.2855752193731f*/ - i6_ - q15_mul(in[10],0x578E) /*0.68404028665134f*/ + q15_q13_mul(in[14],0x3F07); /*1.9696155060244f*/
tmp3_ = q15_mul(in[2],0x578E) /*0.68404028665134f*/ - i6_ + q15_q13_mul(in[10],0x3F07) /*1.9696155060244f*/ - q15_q13_mul(in[14],0x2923); /*1.2855752193731f*/
}
/* 9 point IDCT on odd indices */
{
/* 5 points on odd indices (not realy an IDCT) */
i0 = in[0+1]+in[0+1];
i0p12 = i0 + in[12+1];
tmp0o = i0p12 + q15_q13_mul(in[4+1],0x3C23) /*1.8793852415718f */ + q15_q13_mul(in[8+1],0x3106) /*1.532088886238f*/ + q15_mul(in[16+1],0x2C74); /* 0.34729635533386f*/
tmp1o = i0 + in[4+1] - in[8+1] - in[12+1] - in[12+1] - in[16+1];
tmp2o = i0p12 - q15_mul(in[4+1], 0x2C74) /*0.34729635533386f*/ - q15_q13_mul(in[8+1],0x3C23) /* 1.8793852415718f*/ + q15_q13_mul(in[16+1],0x3106); /* 1.532088886238f*/
tmp3o = i0p12 - q15_q13_mul(in[4+1],0x3106) /*1.532088886238f*/ + q15_mul(in[8+1], 0x2C74) /*0.34729635533386f*/ - q15_q13_mul(in[16+1],0x3C23); /*1.8793852415718f */
tmp4o = q15_mul((in[0+1] - in[4+1] + in[8+1] - in[12+1] + in[16+1]),0x5A82); /*0.707106781f Twiddled */
}
{
/* 4 points on even indices */
i6_ = q15_q13_mul(in[6+1],0x376C); /*1.732050808f*/
tmp0_o = q15_q13_mul(in[2+1],0x3F07) /*1.9696155060244f*/ + i6_ + q15_q13_mul(in[10+1],0x2923) /*1.2855752193731f*/ + q15_mul(in[14+1],0x578E); /*0.68404028665134f*/
tmp1_o = q15_q13_mul((in[2+1] - in[10+1] - in[14+1]),0x376C); /*1.732050808f*/
tmp2_o = q15_q13_mul(in[2+1],0x2923) /*1.2855752193731f*/ - i6_ - q15_mul(in[10+1],0x578E) /*0.68404028665134f*/ + q15_q13_mul(in[14+1],0x3F07); /*1.9696155060244f*/
tmp3_o = q15_mul(in[2+1],0x578E) /*0.68404028665134f*/ - i6_ + q15_q13_mul(in[10+1],0x3F07) /*1.9696155060244f*/ - q15_q13_mul(in[14+1],0x2923); /*1.2855752193731f*/
}
/* Twiddle factors on odd indices
// and
// Butterflies on 9 point IDCT's
// and
// twiddle factors for 36 point IDCT
*/
{
e = tmp0 + tmp0_;
o = q15_mul((tmp0o + tmp0_o),0x403E); /*0.501909918f*/
tmp[0] = q15_mul((e + o),0xDFF8); /*(-0.500476342f*.5f)*/
tmp[17] = q15_q11_mul((e - o),0xD226); /*(-11.46279281f*.5f)*/
e = tmp1 + tmp1_;
o = q15_mul((tmp1o + tmp1_o),0x4241); //0.517638090f
tmp[1] = q15_mul((e + o),0xDFB9); //-0.504314480f*.5f
tmp[16] = q15_q13_mul((e - o),0xC2B5);//-3.830648788f*.5f)
e = tmp2 + tmp2_;
o = q15_mul((tmp2o + tmp2_o),0x469D); //0.551688959f
tmp[2] = q15_mul((e + o),0xDF39); //(-0.512139757f*.5f)
tmp[15] = q15_q13_mul((e - o),0xDB09); //(-2.310113158f*.5f)
e = tmp3 + tmp3_;
o = q15_mul((tmp3o + tmp3_o),0x4E21); //.610387294f
tmp[3] = q15_mul((e + o),0xDE72); //-0.524264562f*.5f
tmp[14] = q15_mul((e - o),0x9595); //(-1.662754762f*.5f)
tmp[4] = q15_mul((tmp4 + tmp4o),0xBABA); //(-0.541196100f)
tmp[13] = q15_q13_mul((tmp4 - tmp4o),0xD630); //(-1.306562965f)
e = tmp3 - tmp3_;
o = q15_mul((tmp3o - tmp3_o),0x6F94); //0.871723397f
tmp[5] = q15_mul((e + o),0xDBEC); //(-0.563690973f*.5f)
tmp[12] = q15_mul((e - o),0xBAB2); //(-1.082840285f*.5f)
e = tmp2 - tmp2_;
o = q15_q13_mul((tmp2o - tmp2_o),0x25DB); //1.183100792f
tmp[6] = q15_mul((e + o), 0xDA0E); //(-0.592844523f*.5f)
tmp[11] = q15_mul((e - o),0xC471); // (-0.930579498f*.5f)
e = tmp1 - tmp1_;
o = q15_q13_mul((tmp1o - tmp1_o),0x3DD1); //1.931851653f
tmp[7] = q15_mul((e + o),0xD7AA); //(-0.630236207f*.5f)
tmp[10] = q15_mul((e - o),0xCB6F); //(-0.821339815f*.5f)
e = tmp0 - tmp0_;
o = q15_q11_mul((tmp0o - tmp0_o),0x2DE5); //5.736856623f
tmp[8] = q15_mul((e + o),0xD498); //(-0.678170852f*.5f)
tmp[9] = q15_mul((e - o),0xD0A2); //(-0.740093616f*.5f)
}
}
/* shift to modified IDCT */
if (sb&1)
{ //overlapping
for (n=0; n<9; n+=2)
in[n] = q15_mul(-tmp[n+1],win[win_type][n]) + prev[n];
for (n=1; n<9; n+=2)
in[n] = -q15_mul(-tmp[n+1],win[win_type][n]) + prev[n];
for (n=9; n<18; n+=2)
in[n] =-(q15_mul(tmp[26-n] , win[win_type][n]) + prev[n]);
for (n=10; n<18; n+=2)
in[n] =(q15_mul(tmp[26-n] , win[win_type][n]) + prev[n]);
}
else
{
for (n=0; n<9; n++)
in[n] = q15_mul(-tmp[9+n] , win[win_type][n]) + prev[n];
for (n=9; n<18; n++)
in[n] = q15_mul(tmp[26-n] , win[win_type][n]) + prev[n];
}
for (n=0; n<9; n++)
prev[n]= q15_mul(tmp[8-n] , win[win_type][18+n]);
for (n=9; n<18; n++)
prev[n]= q15_mul(tmp[n-9] , win[win_type][18+n]);
}
}
}
void MD_Polyphase(u16 ch, u16 f)
{
int start = u_start[ch];
int div = u_div[ch];
i16 (*u_p)[16];
u16 j,n,k, cha;
const i16 *dewindow = Dewindow[0] + 15 - start;
i16 *u_ptr = (i16 *) u[ch][div];
i16 outf1, outf2, outf3, outf4, out;
i16 d16,d17,d18,d19,d20,d21,d22,d23,d24,d25,d26,d27,d28,d29,d30,d31;
i16 d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15;
i16 c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;
cha=(ch)?0:1;
d0 = spectrum[ch*576+f]; d16=q15_mul((d0 - spectrum[ch*576+558+f]) , b[1]); d0 += spectrum[ch*576+558+f];
d1 = spectrum[ch*576+18+f]; d17=q15_q14_mul((d1 - spectrum[ch*576+540+f]) , b[3]); d1 += spectrum[ch*576+540+f];
d3 = spectrum[ch*576+36+f]; d19=q15_q14_mul((d3 - spectrum[ch*576+522+f]) , b[5]); d3 += spectrum[ch*576+522+f];
d2 = spectrum[ch*576+54+f]; d18=q15_q14_mul((d2 - spectrum[ch*576+504+f]) , b[7]); d2 += spectrum[ch*576+504+f];
d6 = spectrum[ch*576+72+f]; d22=q15_q14_mul((d6 - spectrum[ch*576+486+f]) , b[9]); d6 += spectrum[ch*576+486+f];
d7 = spectrum[ch*576+90+f]; d23=q15_q14_mul((d7 - spectrum[ch*576+468+f]) , b[11]); d7 += spectrum[ch*576+468+f];
d5 = spectrum[ch*576+108+f]; d21=q15_q14_mul((d5 - spectrum[ch*576+450+f]) , b[13]); d5 += spectrum[ch*576+450+f];
d4 = spectrum[ch*576+126+f]; d20=q15_q14_mul((d4 - spectrum[ch*576+432+f]) , b[15]); d4 += spectrum[ch*576+432+f];
d12= spectrum[ch*576+144+f]; d28=q15_q14_mul((d12 - spectrum[ch*576+414+f]) , b[17]); d12+= spectrum[ch*576+414+f];
d13= spectrum[ch*576+162+f]; d29=q15_q14_mul((d13 - spectrum[ch*576+396+f]) , b[19]); d13+= spectrum[ch*576+396+f];
d15= spectrum[ch*576+180+f]; d31=q15_q14_mul((d15 - spectrum[ch*576+378+f]) , b[21]); d15+= spectrum[ch*576+378+f];
d14= spectrum[ch*576+198+f]; d30=q15_q14_mul((d14 - spectrum[ch*576+360+f]) , b[23]); d14+= spectrum[ch*576+360+f];
d10= spectrum[ch*576+216+f]; d26=q15_q14_mul((d10 - spectrum[ch*576+342+f]) , b[25]); d10+= spectrum[ch*576+342+f];
d11= spectrum[ch*576+234+f]; d27=q15_q14_mul((d11 - spectrum[ch*576+324+f]) , b[27]); d11+= spectrum[ch*576+324+f];
d9 = spectrum[ch*576+252+f]; d25=q15_q14_mul((d9 - spectrum[ch*576+306+f]) , b[29]); d9 += spectrum[ch*576+306+f];
d8 = spectrum[ch*576+270+f]; d24=q15_q14_mul((d8 - spectrum[ch*576+288+f]) , b[31]); d8 += spectrum[ch*576+288+f];
/* a test to see what can be done with memory separation
* first we process indexes 0-15
*/
c0 = d0 + d8 ; c8 = q15_q14_mul(( d0 - d8 ) , b[2]);
c1 = d1 + d9 ; c9 = q15_q14_mul(( d1 - d9 ) , b[6]);
c2 = d2 + d10; c10= q15_q14_mul(( d2 - d10) , b[14]);
c3 = d3 + d11; c11= q15_q14_mul(( d3 - d11) , b[10]);
c4 = d4 + d12; c12= q15_q14_mul(( d4 - d12) , b[30]);
c5 = d5 + d13; c13= q15_q14_mul(( d5 - d13) , b[26]);
c6 = d6 + d14; c14= q15_q14_mul(( d6 - d14) , b[18]);
c7 = d7 + d15; c15= q15_q14_mul(( d7 - d15) , b[22]);
/* step 3: 4-wide butterflies
*/
d0 = c0 + c4 ; d4 = q15_q14_mul(( c0 - c4 ) , b[4]);
d1 = c1 + c5 ; d5 = q15_q14_mul(( c1 - c5 ) , b[12]);
d2 = c2 + c6 ; d6 = q15_q14_mul(( c2 - c6 ) , b[28]);
d3 = c3 + c7 ; d7 = q15_q14_mul(( c3 - c7 ) , b[20]);
d8 = c8 + c12; d12= q15_q14_mul(( c8 - c12) , b[4]);
d9 = c9 + c13; d13= q15_q14_mul(( c9 - c13) , b[12]);
d10= c10+ c14; d14= q15_q14_mul((c10 - c14) , b[28]);
d11= c11+ c15; d15= q15_q14_mul((c11 - c15) , b[20]);
/**/ c0 = d0 + d2 ; c2 = q15_q14_mul(( d0 - d2 ) , b[8]);
c1 = d1 + d3 ; c3 = q15_q14_mul(( d1 - d3 ) , b[24]);
/**/ c4 = d4 + d6 ; c6 = q15_q14_mul(( d4 - d6 ) , b[8]);
c5 = d5 + d7 ; c7 = q15_q14_mul(( d5 - d7 ) , b[24]);
/**/ c8 = d8 + d10; c10= q15_q14_mul(( d8 - d10) , b[8]);
c9 = d9 + d11; c11= q15_q14_mul(( d9 - d11) , b[24]);
/**/ c12= d12+ d14; c14= q15_q14_mul((d12 - d14) , b[8]);
c13= d13+ d15; c15= q15_q14_mul((d13 - d15) , b[24]);
/* step 5: 1-wide butterflies
*/
/* this is a little 'hacked up'
*/
d0 = q15_q11_mul((-c0 -c1),0x1000); d1 = q15_q14_mul(( c0 - c1 ) , b[16]);
d2 = c2 + c3; d3 = q15_q14_mul(( c2 - c3 ) , b[16]);
d3 -= d2;
d4 = c4 +c5; d5 = q15_q14_mul(( c4 - c5 ), b[16]);
d5 += d4;
d7 = -d5;
d7 += q15_q14_mul(( c6 - c7 ) , b[16]); d6 = +c6 +c7;
d8 = c8 + c9 ; d9 = q15_q14_mul(( c8 - c9 ) , b[16]);
d11= +d8 +d9;
d11 +=q15_q14_mul((c10 - c11) , b[16]); d10= c10+ c11;
d12 = c12+ c13; d13 = q15_q14_mul((c12 - c13) , b[16]);
d13 += -d8-d9+d12;
d14 = c14+ c15; d15 = q15_q14_mul((c14 - c15) , b[16]);
d15-=d11;
d14 += -d8 -d10;
u_p = (i16 (*)[16]) &u[ch][div][0][start];
/*16*/ u_p[ 0][0] =+d1 ;
u_p[ 2][0] = +d9 -d14;
/*20*/ u_p[ 4][0] = +d5 -d6;
u_p[ 6][0] = -d10 +d13;
/*24*/ u_p[ 8][0] =d3;
u_p[10][0] = -d8 -d9 +d11 -d13;
/*28*/ u_p[12][0] = +d7;
u_p[14][0] = +d15;
/* the other 32 are stored for use with the next granule
*/
u_p = (i16 (*)[16]) &u[ch][!div][0][start];
/*0*/ u_p[16][0] = d0;
u_p[14][0] = -(+d8 );
/*4*/ u_p[12][0] = -(+d4 );
u_p[10][0] = -(-d8 +d12 );
/*8*/ u_p[ 8][0] = -(+d2 );
u_p[ 6][0] = -(+d8 +d10 -d12 );
/*12*/ u_p[ 4][0] = -(-d4 +d6 );
u_p[ 2][0] = -d14;
u_p[ 0][0] = -d1;
c0=d16 + d24; c8= q15_q14_mul((d16 - d24) , b[2]);
c1=d17 + d25; c9= q15_q14_mul((d17 - d25) , b[6]);
c2=d18 + d26; c10= q15_q14_mul((d18 - d26) , b[14]);
c3=d19 + d27; c11= q15_q14_mul((d19 - d27) , b[10]);
c4=d20 + d28; c12= q15_q14_mul((d20 - d28) , b[30]);
c5=d21 + d29; c13= q15_q14_mul((d21 - d29) , b[26]);
c6=d22 + d30; c14= q15_q14_mul((d22 - d30) , b[18]);
c7=d23 + d31; c15= q15_q14_mul((d23 - d31) , b[22]);
/* 3
*/
d16= c0+ c4; d20= q15_q14_mul((c0 - c4) , b[4]);
d17= c1+ c5; d21= q15_q14_mul((c1 - c5) , b[12]);
d18= c2+ c6; d22= q15_q14_mul((c2 - c6) , b[28]);
d19= c3+ c7; d23= q15_q14_mul((c3 - c7) , b[20]);
d24= c8+ c12; d28= q15_q14_mul((c8 - c12) , b[4]);
d25= c9+ c13; d29= q15_q14_mul((c9 - c13) , b[12]);
d26= c10+ c14; d30= q15_q14_mul((c10 - c14) , b[28]);
d27= c11+ c15; d31= q15_q14_mul((c11 - c15) , b[20]);
/* 4
*/
/**/ c0= d16+ d18; c2= q15_q14_mul((d16 - d18) , b[8]);
c1= d17+ d19; c3= q15_q14_mul((d17 - d19) , b[24]);
/**/ c4= d20+ d22; c6= q15_q14_mul((d20 - d22) , b[8]);
c5= d21+ d23; c7= q15_q14_mul((d21 - d23) , b[24]);
/**/ c8= d24+ d26; c10= q15_q14_mul((d24 - d26) , b[8]);
c9= d25+ d27; c11= q15_q14_mul((d25 - d27) , b[24]);
/**/ c12= d28+ d30; c14= q15_q14_mul((d28 - d30) , b[8]);
c13= d29+ d31; c15= q15_q14_mul((d29 - d31) , b[24]);
/* 5
*/
d16= c0+ c1; d17= q15_q14_mul((c0 - c1) * b[16]);
d18= c2+ c3; d19= q15_q14_mul((c2 - c3) * b[16]);
d20= c4+ c5; d21= q15_q14_mul((c4 - c5) * b[16]);
d20+=d16; d21+=d17;
d22= c6+ c7; d23= q15_q14_mul((c6 - c7) * b[16]);
d22+=d16; d22+=d18;
d23+=d16; d23+=d17; d23+=d19;
d24= c8+ c9; d25= q15_q14_mul((c8 - c9) * b[16]);
d26= c10+ c11; d27= q15_q14_mul((c10 - c11) * b[16]);
d26+=d24;
d27+=d24; d27+=d25;
d28= c12+ c13; d29= q15_q14_mul((c12 - c13) * b[16]);
d28-=d20; d29+=d28; d29-=d21;
d30= c14+ c15; d31= q15_q14_mul((c14 - c15) * b[16]);
d30-=d22;
d31-=d23;
u_p = (i16 (*)[16]) &u[ch][!div][0][start];
u_p[ 1][0] = -(+d30 );
u_p[ 3][0] = -(+d22 -d26 );
u_p[ 5][0] = -(-d18 -d20 +d26 );
u_p[ 7][0] = -(+d18 -d28 );
u_p[ 9][0] = -(+d28 );
u_p[11][0] = -(+d20 -d24 );
u_p[13][0] = -(-d16 +d24 );
u_p[15][0] = -(+d16 );
/* the other 32 are stored for use with the next granule
*/
u_p = (i16 (*)[16]) &u[ch][div][0][start];
u_p[15][0] = +d31;
u_p[13][0] = +d23 -d27;
u_p[11][0] = -d19 -d20 -d21 +d27;
u_p[ 9][0] = +d19 -d29;
u_p[ 7][0] = -d18 +d29;
u_p[ 5][0] = +d18 +d20 +d21 -d25 -d26;
u_p[ 3][0] = -d17 -d22 +d25 +d26;
u_p[ 1][0] = +d17 -d30;
/* This is tuned specifically for architectures with
autoincrement and -decrement. */
u_ptr--;
outf1=outf2=outf3=outf4=0;
for (j = 0; j < 16; ++j)
{
for (n=0; n<4; n++)
{
outf1 += q15_mul(*++u_ptr , *++dewindow);
outf2 += q15_mul(*++u_ptr , *++dewindow);
outf3 += q15_mul(*++u_ptr , *++dewindow);
outf4 += q15_mul(*++u_ptr , *++dewindow);
}
PcmBuffer[GrannulesInBuffer*1152+f*64+j*2+cha] = outf1 + outf2 + outf3 + outf4;
dewindow += 16;
}
/* for (j = 0; j < 16; ++j)
{
outf1 = q15_mul(*++u_ptr , *++dewindow);
outf2 = q15_mul(*++u_ptr , *++dewindow);
outf3 = q15_mul(*++u_ptr , *++dewindow);
outf4 = q15_mul(*++u_ptr , *++dewindow);
outf1 += q15_mul(*++u_ptr , *++dewindow);
outf2 += q15_mul(*++u_ptr , *++dewindow);
outf3 += q15_mul(*++u_ptr , *++dewindow);
outf4 += q15_mul(*++u_ptr , *++dewindow);
outf1 += q15_mul(*++u_ptr , *++dewindow);
outf2 += q15_mul(*++u_ptr , *++dewindow);
outf3 += q15_mul(*++u_ptr , *++dewindow);
outf4 += q15_mul(*++u_ptr , *++dewindow);
outf1 += q15_mul(*++u_ptr , *++dewindow);
outf2 += q15_mul(*++u_ptr , *++dewindow);
outf3 += q15_mul(*++u_ptr , *++dewindow);
outf4 += q15_mul(*++u_ptr , *++dewindow);
out = outf1 + outf2 + outf3 + outf4;
dewindow += 16;
} */
if (div & 0x1)
{
k=1;
outf2=outf4=0;
for (n=0; n<4; n++)
{
outf2 += q15_mul(u_ptr[ k] , dewindow[k]);
k+=2;
outf4 += q15_mul(u_ptr[ k] , dewindow[k]);
k+=2;
}
/* outf2 = q15_mul(u_ptr[ 1] , dewindow[0x1]);
outf4 = q15_mul(u_ptr[ 3] , dewindow[0x3]);
outf2 += q15_mul(u_ptr[ 5] , dewindow[0x5]);
outf4 += q15_mul(u_ptr[ 7] , dewindow[0x7]);
outf2 += q15_mul(u_ptr[ 9] , dewindow[0x9]);
outf4 += q15_mul(u_ptr[11] , dewindow[0xb]);
outf2 += q15_mul(u_ptr[13] , dewindow[0xd]);
outf4 += q15_mul(u_ptr[15] , dewindow[0xf]); */
PcmBuffer[GrannulesInBuffer*1152+f*64+j*2+cha] = outf2 + outf4;
dewindow -= 31;
dewindow += start;
dewindow += start;
u_ptr -= 16;
outf1=outf2=outf3=outf4=0;
for (; j < 31; ++j)
{
for (n=0; n<4; n++)
{
outf1 += q15_mul(*++u_ptr , *--dewindow);
outf2 += q15_mul(*++u_ptr , *--dewindow);
outf3 += q15_mul(*++u_ptr , *--dewindow);
outf4 += q15_mul(*++u_ptr , *--dewindow);
}
PcmBuffer[GrannulesInBuffer*1152+f*64+j*2+cha] = outf2 - outf1 + outf4 - outf3;
dewindow -= 16;
u_ptr -= 32;
}
/* for (; j < 31; ++j)
{
outf1 = q15_mul(*++u_ptr , *--dewindow);
outf2 = q15_mul(*++u_ptr , *--dewindow);
outf3 = q15_mul(*++u_ptr , *--dewindow);
outf4 = q15_mul(*++u_ptr , *--dewindow);
outf1 += q15_mul(*++u_ptr , *--dewindow);
outf2 += q15_mul(*++u_ptr , *--dewindow);
outf3 += q15_mul(*++u_ptr , *--dewindow);
outf4 += q15_mul(*++u_ptr , *--dewindow);
outf1 += q15_mul(*++u_ptr , *--dewindow);
outf2 += q15_mul(*++u_ptr , *--dewindow);
outf3 += q15_mul(*++u_ptr , *--dewindow);
outf4 += q15_mul(*++u_ptr , *--dewindow);
outf1 += q15_mul(*++u_ptr , *--dewindow);
outf2 += q15_mul(*++u_ptr , *--dewindow);
outf3 += q15_mul(*++u_ptr , *--dewindow);
outf4 += q15_mul(*++u_ptr , *--dewindow);
out = outf2 - outf1 + outf4 - outf3;
dewindow -= 16;
u_ptr -= 32;
}*/
}
else
{
k=2;
outf2=outf4=0;
for (n=0; n<4; n++)
{
outf2 += q15_mul(u_ptr[ k] , dewindow[k]);
k+=2;
outf4 += q15_mul(u_ptr[ k] , dewindow[k]);
k+=2;
}
PcmBuffer[GrannulesInBuffer*1152+f*64+j*2+cha] = outf2 + outf4;
/* outf2 = q15_mul(u_ptr[ 2] , dewindow[0x2]);
outf4 = q15_mul(u_ptr[ 4] , dewindow[0x4]);
outf2 += q15_mul(u_ptr[ 6] , dewindow[0x6]);
outf4 += q15_mul(u_ptr[ 8] , dewindow[0x8]);
outf2 += q15_mul(u_ptr[10] , dewindow[0xa]);
outf4 += q15_mul(u_ptr[12] , dewindow[0xc]);
outf2 += q15_mul(u_ptr[14] , dewindow[0xe]);
outf4 += q15_mul(u_ptr[16] , dewindow[0x10]); */
//out = outf2 + outf4;
dewindow -= 31;
dewindow += start;
dewindow += start;
u_ptr -= 16;
for (; j < 31; ++j)
{
for (n=0; n<4; n++)
{
outf1 += q15_mul(*++u_ptr , *--dewindow);
outf2 += q15_mul(*++u_ptr , *--dewindow);
outf3 += q15_mul(*++u_ptr , *--dewindow);
outf4 += q15_mul(*++u_ptr , *--dewindow);
}
/* outf1 = q15_mul(*++u_ptr , *--dewindow);
outf2 = q15_mul(*++u_ptr , *--dewindow);
outf3 = q15_mul(*++u_ptr , *--dewindow);
outf4 = q15_mul(*++u_ptr , *--dewindow);
outf1 += q15_mul(*++u_ptr , *--dewindow);
outf2 += q15_mul(*++u_ptr , *--dewindow);
outf3 += q15_mul(*++u_ptr , *--dewindow);
outf4 += q15_mul(*++u_ptr , *--dewindow);
outf1 += q15_mul(*++u_ptr , *--dewindow);
outf2 += q15_mul(*++u_ptr , *--dewindow);
outf3 += q15_mul(*++u_ptr , *--dewindow);
outf4 += q15_mul(*++u_ptr , *--dewindow);
outf1 += q15_mul(*++u_ptr , *--dewindow);
outf2 += q15_mul(*++u_ptr , *--dewindow);
outf3 += q15_mul(*++u_ptr , *--dewindow);
outf4 += q15_mul(*++u_ptr , *--dewindow); */
PcmBuffer[GrannulesInBuffer*1152+f*64+j*2+cha] = outf1 - outf2 + outf3 - outf4;
dewindow -= 16;
u_ptr -= 32;
}
}
--u_start[ch];
u_start[ch] &= 0xf;
u_div[ch]=u_div[ch] ? 0 : 1;
}
void MD_IMDCT_Init()
{
u16 *k;
u16 i;
spectrum=&SPECTRUM_POS;
for (i=0; i<18; i++)
prev[i]=0;
k=&u[0][0][0][0];
for (i=0; i<2*2*17*16; i++)
*k++=0;
}
;This code is taken from TI's ........
;Used to calcualate the exponent of an integer
;from 0 to 32768 (must be in A)
.mmregs
.global _log
.global _exp
.global _descale
.data
;Look Up table for the exp function for the first 10 integers
;where the algorithm looses important accuracy.
;starts from ln(0) to ln(100) in Q13 format
exp_lup .word 0h,0B17h,1193h,162Eh,19C0h,1CABh,1F22h,2145h,2327h
.word 24D7h,265Dh,27C2h,290Ah,2A39h,2B54h,2C5Ch,2D54h,2E3Eh,2F1Ch
.word 2FEEh,30B6h,3174h,322Ah,32D9h,3380h,3421h,34BBh,3550h,35E0h
.word 366Bh,36F1h,3773h,37F1h,386Bh,38E2h,3956h,39C6h,3A33h,3A9Dh
.word 3B05h,3B6Ah,3BCDh,3C2Dh,3C8Ch,3CE8h,3D42h,3D9Ah,3DF0h,3E44h
.word 3E97h,3EE8h,3F38h,3F86h,3FD2h,401Eh,4067h,40B0h,40F7h,413Dh
.word 4182h,41C6h,4208h,424Ah,428Ah,42CAh,4308h,4346h,4383h,43BEh
.word 43F9h,4433h,446Dh,44A5h,44DDh,4514h,454Ah,4580h,45B5h,45E9h
.word 461Ch,464Fh,4681h,46B3h,46E4h,4715h,4745h,4774h,47A3h,47D1h
.word 47FFh,482Ch,4859h,4885h,48B1h,48DCh,4907h,4932h,495Ch,4985h
.word 49AEh
;Q11 format 2048*n*ln2 starting from n=15 to n=0
logtbl .int 21294, 19874, 18454,17035, 15615, 14196, 12776
.int 11357,9937,8517,7098, 5678, 4259, 2839, 1420, 0
;Q15 format of the equation -32768/n (Taylor coefficients
;for n=11 down to 1)
a9_log ;.int -2521,-2731
.int -3277, -3641,-4096,-4681,-5461,-6554,-8192
.int -10293,-16384,-32768, 0, 0
.bss N,1
.bss X,1
.bss EXP, 1
.bss LNIS,1
.text
_log:
STM N, AR4
ADD #0,A,B ;B=A=is
EXP B ;T=leading 0's of. Exponent of B
LD #0x4000, 16,A ;AH=16384, the largest supported scale
ST T,*AR4 ;Store scaling number in N
ANDM #0Fh, *AR4 ;compensate extra 16 leading bits
MVDM N,AR0 ;AR0 index to segment table
NORM B ;Normalize to Q15 fromat
AND #0x3FFF, 16, B ;BH=BH-0x4000
BC taylor_log, BNEQ ;if (B==0) which means it can be represented in 2^N form
;just return the result pre-stored in the index table
STM #logtbl+1, AR3
MAR *AR3+0
LD *AR3, A
RET
taylor_log:
STM X, AR4
SUB B, 0 , A ;A=A-B.A is the X in taylor's equation
STH A, *AR4 ;X is the fractional part in Q15 format
STM a9_log, AR3 ;AR3 points to coefficient in Taylor's equ
LD *AR4 , T ;T is the X in the polynomial equantion. POLY uses the value
;of T
LD *AR3+, 16, A ;first coefficient of the n power in A
LD *AR3+, 16, B ;second coefficient of the (n-1) power in B
RPT #10 ;loop 13 times, enough accuracy for MP3
POLY *AR3+ ;AH=fractional part of the polynomial in Q15 format
SFTA A, -16 ;AH=AL
SFTA A, -4 ;Convert to Q11 format
STM #logtbl, AR3 ;sum up scaling part, N*ln2
MAR *AR3+0
ADD *AR3,A
RET
.data
;exptbl is generated by equation e^(-n). n starts from 0 to 10 into Q15 format
exptbl .int 0x7FFF, 0x2F16, 0x1152, 0x065F, 0x0258, 0x00DC, 0x0051, 0x01D, 0x000A, 0x0004, 0x0001
;a9 is generated by the equation 1/n!. N starts from 8 down to 1 to
;facilitate the use of POLY
;a9_exp .int 1,7,46,273,1365,5461,16384,32767,0,0
a9_exp .int 0,0x6,0x2D,0x111,0x555,0x1555,0x4000,0x7FFF,0,0
.text
.bss N1,1
.bss X1,1
_exp:
SUB #0ACD2h, A, B
BC exp_q15limit, BLT
AND #0h, B
OR #0ffffh,B
SUB A,B ;Negative number.Make positive to compare
ADD #0, B, A
ADD #0,A,B
STM N1, AR4
AND #400h, B ;Check if it is larger than 0.5
BCD adj, BNEQ ;If larger than 0.5 adjust
ADD #400h, A, B
STM N1, AR4
STL B, -11, *AR4 ;store scaling index
AND #3FFh, B ;truncate fractional part
STM X1, AR4 ;store fractional part
SFTA B,4
ADD #0,B,A
LD #0FFFFh, 0, B
SUB A,B ;Negative number.Make positive to compare
STL B, 0, *AR4 ;in Q15 format
B taylor_exp
exp_q15limit:
AND #0,A
B exp_exit
adj:
STL B, -11, *AR4 ;store scaling index
AND #7FFh, B ;truncate fractional part
SUB #400h, B
STM X1, AR4 ;store negative fraction
STL B, 4, *AR4 ;in Q15 format
LD *AR4, T
MPY #-1,B
STL B, *AR4
taylor_exp:
STM a9_exp, AR3 ;AR3 points to coefficient in
;Taylor's equ
LD *AR4 , T ;T is the X in the polynomial equantion. POLY uses the value
;of T
LD *AR3+, 16, A ;first coefficient of the n power in A
LD *AR3+, 16, B ;second coefficient of the (n-1) power in B
RPT #7 ;loop 8 times, enough accuracy for MP3
POLY *AR3+ ;AH=fractional part of the polynomial in Q14
ADD #4000h, 16, A ;taylor equation has one constant.Q14
ADD #0,A, B ;round
AND #0800h, 16, B
BC exp_mul, BEQ ;if less than 0.5 don't round
AND #0F000h,16, A ;will not overflow. Max value 0x6F85
ADD #1000h,16,A ;rounding
exp_mul:
MVDM N1, AR0 ;index into expbtl
STM exptbl, AR3
MAR *AR3+0
MPYA *AR3 ;multiply the scaling part
SFTA B,-14, A ;AL=BH
exp_exit:
RET
;This routine calculates the x=(is)^4/3*2^exp
;Inputs <is> in A, and exp in stack
_descale:
STM EXP, AR4
MVMM SP, AR3 ;first extract exp and place in T
LD *AR3+, T
LD *AR3,T
ST T, *AR4
PSHM ST0
PSHM ST1
RSBX SXM ;we don't want sign extension
SUB #101, A, B ;now check is to see if we can use the look-up table
BC look_up, BLT
CALL _log ;returns ln(A) in Q11 format in A
;now multiply with 1,333333
SFTL A,+15
SFTL A, +1
STM #5555h, T
MPYA B
SFTA B, -14, A
B calc_x
look_up:
SUB #1, A ;substract 1 from A in order
STLM A, AR0 ;to use the look up table
STM exp_lup, AR3
nop
LD #5555h, 16, A
MAR *AR3+0 ;index AR3 to look up table
;load A with 1,33333 in Q14 format
MPYA *AR3 ;multiply and store in B
SFTA B, -12, A ;return result in Q14 format
SFTA A, -3 ;convert to Q11
calc_x:
STM LNIS, AR4
STL A, *AR4
STM EXP, AR4
LD *AR4, T
MPY #-1, B ;exp is negative. Make positive
SFTA B, +15, A ;before multiplying
SFTA A, +1
STM #58B9h, T ;T=ln2
MPYA B
SFTA B, -14, A ;convert to Q11
STL A, *AR4
LD *AR4, T
MPY #-1, B ;make negative again
STM LNIS, AR4 ;and now add with LNIS
ADD *AR4, B
AND #0FFFFh, B, A
CALL _exp
POPM ST1
POPM ST0
RET
.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment