Created
June 18, 2012 02:20
-
-
Save edarc/2946448 to your computer and use it in GitHub Desktop.
Fast AVR 32-bit LFSR
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// macro for generating dword bit values | |
#define _BVD( x ) ((uint32_t)( 1 ) << ( x )) | |
/* | |
* generate a random byte using a 32-bit LFSR | |
*/ | |
uint8_t random_byte_lfsr32(void) | |
{ | |
// the shift register | |
static uint32_t lfsr = 0xDEADBEEF; | |
// shift thru 8 states to get a completely new byte | |
uint8_t state; | |
for (state = 0; state <= 8; state++) | |
{ | |
uint8_t new_bit = 0; | |
#ifndef OPTIMIZED_LFSR32 | |
/* | |
* original code | |
* 61175.8 cycles per update_output_arcing() avg with -O2, max refresh | |
* ~250Hz, at 60Hz update ~24% cpu load. | |
*/ | |
// xor the tap values together | |
if ( lfsr & _BVD(1) ) | |
new_bit ^= 1; | |
if ( lfsr & _BVD(5) ) | |
new_bit ^= 1; | |
if ( lfsr & _BVD(6) ) | |
new_bit ^= 1; | |
if ( lfsr & _BVD(31) ) | |
new_bit ^= 1; | |
// shift in the new bit | |
lfsr >>= 1 ; | |
lfsr |= (uint32_t)( new_bit ) << 31; | |
#else | |
/* | |
* hand-optimized version | |
* 13697.0 cycles per update_output_arcing() avg with -O2, max refresh | |
* ~1200Hz, at 60Hz update ~5% cpu load. | |
*/ | |
/* | |
* Gratuitous commentary on the workings of this version are included. | |
* | |
* It's motivation is mostly that the 32-bit shift operation presently | |
* generates positively horrendous machine code in current versions of | |
* GCC's AVR target, with various shifting and temporary storage | |
* chicanery to move the carried bits around, whereas we just use the | |
* carry flag to get the job done in 4 instructions. | |
* | |
* Determining the new bit doesn't seem to be as bad, but we can still | |
* do better by hand. | |
*/ | |
asm volatile( | |
/* | |
* xor the tap values into new_bit. | |
* | |
* "sbrc a b" means skip the next instruction if bit b in register | |
* a is cleared. make this conditional branch for each tap in the | |
* lfsr. if the bit at the tap is *set*, the branch is not taken, | |
* proceeding on to "eor" which toggles the lsb in new_bit by | |
* xor'ing it with a constant 0x01 in register 4. | |
*/ | |
"sbrc %a0, 1" "\n\t" | |
"eor %1, %4" "\n\t" | |
"sbrc %a0, 5" "\n\t" | |
"eor %1, %4" "\n\t" | |
"sbrc %a0, 6" "\n\t" | |
"eor %1, %4" "\n\t" | |
"sbrc %d0, 7" "\n\t" | |
"eor %1, %4" "\n\t" | |
/* | |
* shift the lfsr value right. | |
* | |
* start with a logical right-shift operation at the low byte of | |
* the word, which dumps the lsb of the word off the end into the | |
* carry flag, and clears the msb. then use "ror" on the three | |
* remaining bytes, which right-rotates through the carry flag. the | |
* difference is that "ror" stuffs the contents of the carry flag | |
* into the msb instead of just clearing it. the carry flag has | |
* who-knows-what in it at the first shift, thus the use of "lsr" | |
* first. thereafter, the carry flag is used to propagate the lsb | |
* of the prior byte into the msb of the current byte. | |
*/ | |
"lsr %d0" "\n\t" | |
"ror %c0" "\n\t" | |
"ror %b0" "\n\t" | |
"ror %a0" "\n\t" | |
/* | |
* insert the new bit. | |
* | |
* new_bit's lsb says what the new bit will be. if it's clear, skip | |
* the next instruction. said next instruction or's the high byte | |
* with the immediate constant 0x80 to set the msb. | |
*/ | |
"sbrc %1, 0" "\n\t" | |
"ori %d0, %5" "\n\t" | |
/****************************************************/ | |
/* | |
* output registers | |
* | |
* 0 = lfsr | |
* 1 = new_bit | |
* | |
* gcc will take the fact that lfsr is a uint32_t to mean load it | |
* into four (probably consecutive) gprs, which are referred to | |
* with %d0 thru %a0. | |
*/ | |
: "=r" ( lfsr ), "=d" ( new_bit ) | |
/* | |
* input registers | |
* | |
* 0 = lfsr | |
* 1 = new_bit | |
* 4 = register constant | |
* 5 = immediate constant | |
* | |
* 4 is a constant that gets loaded into a register. 5 is an | |
* immediate constant, it ends up in the opcode, not a register. 2 | |
* and 3 are skipped because "input" lfsr and new_bit would have | |
* been there but they are reassigned to 0 and 1 since they are the | |
* same thing. it's basically gcc's non-obvious behavior. | |
*/ | |
: "0" ( lfsr ), "1" ( new_bit ), "d" ( 0x01 ), | |
"m" ( 0x80 ) | |
); | |
#endif | |
} | |
// return the low byte | |
return (uint8_t)( lfsr & 0xFF ); | |
} | |
/* | |
* generate a random byte x where min <= x <= max | |
*/ | |
int8_t random_byte_in(int8_t min, int8_t max) | |
{ | |
// get a random byte, in the LSB of a word for fixed point | |
uint16_t a_random_byte = random_byte_lfsr32(); | |
// scale the thing, fixed point | |
a_random_byte *= (max - min + 1); | |
// de-fixed-point, translate, and return | |
return (int8_t)( (a_random_byte >> 8) + min ); | |
} | |
/* | |
* generate a random unsigned byte x where min <= x <= max | |
*/ | |
uint8_t random_ubyte_in(uint8_t min, uint8_t max) | |
{ | |
// get a random byte, in the LSB of a word for fixed point | |
uint16_t a_random_byte = random_byte_lfsr32(); | |
// scale the thing, fixed point | |
a_random_byte *= (max - min + 1); | |
// de-fixed-point, translate, and return | |
return (uint8_t)( (a_random_byte >> 8) + min ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment