Skip to content

Instantly share code, notes, and snippets.

@sh1boot
Last active December 18, 2015 05:29
Show Gist options
  • Save sh1boot/5732738 to your computer and use it in GitHub Desktop.
Save sh1boot/5732738 to your computer and use it in GitHub Desktop.
experimental table-based divide operation
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define TABBITS 8
#if 0
extern uint32_t divfn(uint16_t i, uint16_t j);
#else
#if TABBITS != 8
uint32_t table2[(1 << TABBITS) + 1];
#else/*{{{*/
static const uint32_t table2[256 + 1] __attribute__((progmem)) = {
0xffffffff, 0x00000000, 0x80000000, 0x55555555, 0x40000000, 0x33333333, 0x2aaaaaab, 0x24924925, 0x20000000, 0x1c71c71c, 0x1999999a, 0x1745d174, 0x15555555, 0x13b13b14, 0x12492492, 0x11111111,
0x10000000, 0x0f0f0f0f, 0x0e38e38e, 0x0d79435e, 0x0ccccccd, 0x0c30c30c, 0x0ba2e8ba, 0x0b21642d, 0x0aaaaaab, 0x0a3d70a4, 0x09d89d8a, 0x097b425f, 0x09249249, 0x08d3dcb1, 0x08888889, 0x08421084,
0x08000000, 0x07c1f07c, 0x07878788, 0x07507507, 0x071c71c7, 0x06eb3e45, 0x06bca1af, 0x06906907, 0x06666666, 0x063e7064, 0x06186186, 0x05f417d0, 0x05d1745d, 0x05b05b06, 0x0590b216, 0x0572620b,
0x05555555, 0x0539782a, 0x051eb852, 0x05050505, 0x04ec4ec5, 0x04d4873f, 0x04bda12f, 0x04a7904a, 0x04924925, 0x047dc11f, 0x0469ee58, 0x0456c798, 0x04444444, 0x04325c54, 0x04210842, 0x04104104,
0x04000000, 0x03f03f04, 0x03e0f83e, 0x03d22635, 0x03c3c3c4, 0x03b5cc0f, 0x03a83a84, 0x039b0ad1, 0x038e38e4, 0x0381c0e0, 0x03759f23, 0x0369d037, 0x035e50d8, 0x03531dec, 0x03483483, 0x033d91d3,
0x03333333, 0x03291620, 0x031f3832, 0x03159722, 0x030c30c3, 0x03030303, 0x02fa0be8, 0x02f14990, 0x02e8ba2f, 0x02e05c0c, 0x02d82d83, 0x02d02d03, 0x02c8590b, 0x02c0b02c, 0x02b93105, 0x02b1da46,
0x02aaaaab, 0x02a3a0fd, 0x029cbc15, 0x0295fad4, 0x028f5c29, 0x0288df0d, 0x02828283, 0x027c4598, 0x02762762, 0x02702702, 0x026a439f, 0x02647c69, 0x025ed098, 0x02593f6a, 0x0253c825, 0x024e6a17,
0x02492492, 0x0243f6f0, 0x023ee090, 0x0239e0d6, 0x0234f72c, 0x02302302, 0x022b63cc, 0x0226b902, 0x02222222, 0x021d9ead, 0x02192e2a, 0x0214d021, 0x02108421, 0x020c49ba, 0x02082082, 0x02040810,
0x02000000, 0x01fc07f0, 0x01f81f82, 0x01f4465a, 0x01f07c1f, 0x01ecc07b, 0x01e9131b, 0x01e573ad, 0x01e1e1e2, 0x01de5d6e, 0x01dae607, 0x01d77b65, 0x01d41d42, 0x01d0cb59, 0x01cd8569, 0x01ca4b30,
0x01c71c72, 0x01c3f8f0, 0x01c0e070, 0x01bdd2b9, 0x01bacf91, 0x01b7d6c4, 0x01b4e81b, 0x01b20364, 0x01af286c, 0x01ac5702, 0x01a98ef6, 0x01a6d01a, 0x01a41a42, 0x01a16d40, 0x019ec8e9, 0x019c2d15,
0x0199999a, 0x01970e50, 0x01948b10, 0x01920fb5, 0x018f9c19, 0x018d3019, 0x018acb91, 0x01886e5f, 0x01861862, 0x0183c978, 0x01818182, 0x017f4060, 0x017d05f4, 0x017ad221, 0x0178a4c8, 0x01767dce,
0x01745d17, 0x01724288, 0x01702e06, 0x016e1f77, 0x016c16c1, 0x016a13cd, 0x01681681, 0x01661ec7, 0x01642c86, 0x01623fa7, 0x01605816, 0x015e75bc, 0x015c9883, 0x015ac057, 0x0158ed23, 0x01571ed4,
0x01555555, 0x01539095, 0x0151d07f, 0x01501501, 0x014e5e0a, 0x014cab88, 0x014afd6a, 0x0149539e, 0x0147ae14, 0x01460cbc, 0x01446f86, 0x0142d662, 0x01414141, 0x013fb014, 0x013e22cc, 0x013c995a,
0x013b13b1, 0x013991c3, 0x01381381, 0x013698df, 0x013521d0, 0x0133ae46, 0x01323e35, 0x0130d190, 0x012f684c, 0x012e025c, 0x012c9fb5, 0x012b404b, 0x0129e413, 0x01288b01, 0x0127350c, 0x0125e227,
0x01249249, 0x01234568, 0x0121fb78, 0x0120b471, 0x011f7048, 0x011e2ef4, 0x011cf06b, 0x011bb4a4, 0x011a7b96, 0x01194538, 0x01181181, 0x0116e069, 0x0115b1e6, 0x011485f1, 0x01135c81, 0x0112358e,
0x01111111, 0x010fef01, 0x010ecf57, 0x010db20b, 0x010c9715, 0x010b7e6f, 0x010a6811, 0x010953f4, 0x01084211, 0x01073261, 0x010624dd, 0x0105197f, 0x01041041, 0x0103091b, 0x01020408, 0x01010101,
0x01000000,
};
#endif/*}}}*/
uint32_t divfn(uint16_t i, uint16_t j)
{
uint32_t t;
uint8_t shift = 0;
uint8_t k;
if (j <= 1)
return (int32_t)i << 16;
/* if j is too large to index the table, then we just shift it right until
* it's within bounds. In this case, we'll use the fractional part (k) for
* linear interpolation, and the integer part for the index.
*/
t = (uint32_t)j << 8;
while (t >= ((uint32_t)1 << (TABBITS + 8)))
{
t >>= 1;
shift++;
}
j = t >> 8;
k = (uint8_t)t;
/* look up (0x80000000 / j) */
t = table2[j];
if (k)
{
/* Linearly interpolate between adjacent table entries. Note that this
* works well because the set of values which are large and need to be
* shifted down and interpolated represent an especially linear part of
* the table.
*/
uint16_t tmp;
uint32_t n = table2[j + 1] - t;
uint32_t m;
tmp = (uint16_t)((uint8_t)(n >> 0)) * k;
tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 8)) * k;
m = (uint8_t)tmp;
tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 16)) * k;
m |= (uint32_t)((uint8_t)tmp) << 8;
tmp = (tmp >> 8) + (uint16_t)((uint8_t)(n >> 24)) * k;
m |= (uint32_t)((uint8_t)tmp) << 16;
t += m;
}
/* compute i * (0x100000000/j) >> 16 for a 16.16 fixed-point value i/j.
* If we had to shift j right, earlier, then we'll shift the result right
* as well, because larger should give us a smaller result.
*/
{
uint16_t tmp;
uint8_t b[4];
uint16_t hi;
#if 0 /* correct, but ineffective */
tmp = ((uint16_t)((uint8_t)i) * (uint8_t)t) >> 8;
#else
tmp = 0x80;
#endif
tmp += (uint16_t)((uint8_t)i) * (uint8_t)(t >> 8);
hi = (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)t;
if (shift < 1)
hi += 0x80;
tmp += (uint8_t)hi;
tmp >>= 8;
tmp += hi >> 8;
tmp += ((uint8_t)i) * (uint8_t)(t >> 16);
b[0] = (uint8_t)tmp;
tmp >>= 8;
tmp += ((uint8_t)i) * (uint8_t)(t >> 24);
b[1] = (uint8_t)tmp;
b[2] = (uint8_t)(tmp >> 8);
tmp = b[0] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 8);
b[0] = (uint8_t)tmp;
tmp >>= 8;
tmp += b[1] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 16);
b[1] = (uint8_t)tmp;
tmp >>= 8;
tmp += b[2] + (uint16_t)((uint8_t)(i >> 8)) * (uint8_t)(t >> 24);
b[2] = (uint8_t)tmp;
b[3] = (uint8_t)(tmp >> 8);
t = b[0] | (uint32_t)b[1] << 8 | (uint32_t)b[2] << 16 | (uint32_t)b[3] << 24;
}
if (shift == 0)
return t;
while (shift-- > 1)
t >>= 1;
t++;
return t >> 1;
}
#endif
int main(void)
{
unsigned long i, j;
uint64_t err = 0;
#if TABBITS != 8
printf("static const uint32_t table2[%d + 1] __attribute__((progmem)) = {\n0xffffffff, ", 1 << TABBITS);
for (j = 1; j < (1 << TABBITS) + 1; j++)
{
table2[j] = (0x100000000u + (j / 2)) / j;
printf("0x%08x,%c", table2[j], ((j & 15) == 15) ? '\n' : ' ');
}
printf("\n}\n");
#endif
for (j = 1; j < 65536; j += 3)
for (i = 0; i < 65536; i += 3)
{
uint32_t ref = (uint32_t)(((uint32_t)i << 16) + (j / 2)) / j;
#if 0
uint32_t tabent = (0x80000000 + (j / 2)) / j;
uint32_t test = ((uint64_t)i * tabent + 0x4000) >> 15;
#else
uint32_t test = divfn(i, j);
#endif
int32_t e = (int32_t)(ref - test);
if (e > 1)
printf("%u/%u==%lu (0x%lx) (not %lu (0x%lx)) error: %ld\n", (unsigned)i, (unsigned)j, ref, ref, test, test, e);
err += abs(e);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment