edarc/gist:2946448

## gistfile1.c
// macro for generating dword bit values
#define _BVD( x ) ((uint32_t)( 1 ) << ( x ))

/*
 * generate a random byte using a 32-bit LFSR
 */
uint8_t random_byte_lfsr32(void)
{
    // the shift register
    static uint32_t lfsr = 0xDEADBEEF;

    // shift thru 8 states to get a completely new byte
    uint8_t state;

    for (state = 0; state <= 8; state++)
    {

        uint8_t new_bit = 0;

#ifndef OPTIMIZED_LFSR32

        /*
         * original code
         * 61175.8 cycles per update_output_arcing() avg with -O2, max refresh
         * ~250Hz, at 60Hz update ~24% cpu load.
         */

        // xor the tap values together
        if ( lfsr & _BVD(1) )
            new_bit ^= 1;
        if ( lfsr & _BVD(5) )
            new_bit ^= 1;
        if ( lfsr & _BVD(6) )
            new_bit ^= 1;
        if ( lfsr & _BVD(31) )
            new_bit ^= 1;

        // shift in the new bit
        lfsr >>= 1 ;
        lfsr |= (uint32_t)( new_bit ) << 31;

#else

        /*
         * hand-optimized version
         * 13697.0 cycles per update_output_arcing() avg with -O2, max refresh
         * ~1200Hz, at 60Hz update ~5% cpu load.
         */

        /*
         * Gratuitous commentary on the workings of this version are included.
         *
         * It's motivation is mostly that the 32-bit shift operation presently
         * generates positively horrendous machine code in current versions of
         * GCC's AVR target, with various shifting and temporary storage
         * chicanery to move the carried bits around, whereas we just use the
         * carry flag to get the job done in 4 instructions.
         *
         * Determining the new bit doesn't seem to be as bad, but we can still
         * do better by hand.
         */

        asm volatile(
            /*
             * xor the tap values into new_bit.
             *
             * "sbrc a b" means skip the next instruction if bit b in register
             * a is cleared. make this conditional branch for each tap in the
             * lfsr. if the bit at the tap is *set*, the branch is not taken,
             * proceeding on to "eor" which toggles the lsb in new_bit by
             * xor'ing it with a constant 0x01 in register 4.
             */
            "sbrc %a0, 1"                                   "\n\t"
            "eor  %1, %4"                                   "\n\t"
            "sbrc %a0, 5"                                   "\n\t"
            "eor  %1, %4"                                   "\n\t"
            "sbrc %a0, 6"                                   "\n\t"
            "eor  %1, %4"                                   "\n\t"
            "sbrc %d0, 7"                                   "\n\t"
            "eor  %1, %4"                                   "\n\t"

            /*
             * shift the lfsr value right.
             *
             * start with a logical right-shift operation at the low byte of
             * the word, which dumps the lsb of the word off the end into the
             * carry flag, and clears the msb. then use "ror" on the three
             * remaining bytes, which right-rotates through the carry flag. the
             * difference is that "ror" stuffs the contents of the carry flag
             * into the msb instead of just clearing it. the carry flag has
             * who-knows-what in it at the first shift, thus the use of "lsr"
             * first.  thereafter, the carry flag is used to propagate the lsb
             * of the prior byte into the msb of the current byte.
             */
            "lsr  %d0"                                      "\n\t"
            "ror  %c0"                                      "\n\t"
            "ror  %b0"                                      "\n\t"
            "ror  %a0"                                      "\n\t"

            /*
             * insert the new bit.
             *
             * new_bit's lsb says what the new bit will be. if it's clear, skip
             * the next instruction. said next instruction or's the high byte
             * with the immediate constant 0x80 to set the msb.
             */
            "sbrc %1, 0"                                    "\n\t"
            "ori  %d0, %5"                                  "\n\t"

            /****************************************************/

            /*
             * output registers
             *
             * 0 = lfsr
             * 1 = new_bit
             *
             * gcc will take the fact that lfsr is a uint32_t to mean load it
             * into four (probably consecutive) gprs, which are referred to
             * with %d0 thru %a0.
                         */
            : "=r" ( lfsr ), "=d" ( new_bit )

                        /*
             * input registers
             *
             * 0 = lfsr
             * 1 = new_bit
             * 4 = register constant
             * 5 = immediate constant
             *
             * 4 is a constant that gets loaded into a register. 5 is an
             * immediate constant, it ends up in the opcode, not a register. 2
             * and 3 are skipped because "input" lfsr and new_bit would have
             * been there but they are reassigned to 0 and 1 since they are the
             * same thing. it's basically gcc's non-obvious behavior.
                         */
            : "0" ( lfsr ), "1" ( new_bit ), "d" ( 0x01 ),
                "m" ( 0x80 )
            );

#endif

    }

    // return the low byte
    return (uint8_t)( lfsr & 0xFF );
}


/*
 * generate a random byte x where min <= x <= max
 */
int8_t random_byte_in(int8_t min, int8_t max)
{
    // get a random byte, in the LSB of a word for fixed point
    uint16_t a_random_byte = random_byte_lfsr32();

    // scale the thing, fixed point
    a_random_byte *= (max - min + 1);

    // de-fixed-point, translate, and return
    return (int8_t)( (a_random_byte >> 8) + min );
}

/*
 * generate a random unsigned byte x where min <= x <= max
 */
uint8_t random_ubyte_in(uint8_t min, uint8_t max)
{
    // get a random byte, in the LSB of a word for fixed point
    uint16_t a_random_byte = random_byte_lfsr32();

    // scale the thing, fixed point
    a_random_byte *= (max - min + 1);

    // de-fixed-point, translate, and return
    return (uint8_t)( (a_random_byte >> 8) + min );
}
	// macro for generating dword bit values
	#define _BVD( x ) ((uint32_t)( 1 ) << ( x ))

	/*
	* generate a random byte using a 32-bit LFSR
	*/
	uint8_t random_byte_lfsr32(void)
	{
	// the shift register
	static uint32_t lfsr = 0xDEADBEEF;

	// shift thru 8 states to get a completely new byte
	uint8_t state;

	for (state = 0; state <= 8; state++)
	{

	uint8_t new_bit = 0;

	#ifndef OPTIMIZED_LFSR32

	/*
	* original code
	* 61175.8 cycles per update_output_arcing() avg with -O2, max refresh
	* ~250Hz, at 60Hz update ~24% cpu load.
	*/

	// xor the tap values together
	if ( lfsr & _BVD(1) )
	new_bit ^= 1;
	if ( lfsr & _BVD(5) )
	new_bit ^= 1;
	if ( lfsr & _BVD(6) )
	new_bit ^= 1;
	if ( lfsr & _BVD(31) )
	new_bit ^= 1;

	// shift in the new bit
	lfsr >>= 1 ;
	lfsr \|= (uint32_t)( new_bit ) << 31;

	#else

	/*
	* hand-optimized version
	* 13697.0 cycles per update_output_arcing() avg with -O2, max refresh
	* ~1200Hz, at 60Hz update ~5% cpu load.
	*/

	/*
	* Gratuitous commentary on the workings of this version are included.
	*
	* It's motivation is mostly that the 32-bit shift operation presently
	* generates positively horrendous machine code in current versions of
	* GCC's AVR target, with various shifting and temporary storage
	* chicanery to move the carried bits around, whereas we just use the
	* carry flag to get the job done in 4 instructions.
	*
	* Determining the new bit doesn't seem to be as bad, but we can still
	* do better by hand.
	*/

	asm volatile(
	/*
	* xor the tap values into new_bit.
	*
	* "sbrc a b" means skip the next instruction if bit b in register
	* a is cleared. make this conditional branch for each tap in the
	* lfsr. if the bit at the tap is set, the branch is not taken,
	* proceeding on to "eor" which toggles the lsb in new_bit by
	* xor'ing it with a constant 0x01 in register 4.
	*/
	"sbrc %a0, 1" "\n\t"
	"eor %1, %4" "\n\t"
	"sbrc %a0, 5" "\n\t"
	"eor %1, %4" "\n\t"
	"sbrc %a0, 6" "\n\t"
	"eor %1, %4" "\n\t"
	"sbrc %d0, 7" "\n\t"
	"eor %1, %4" "\n\t"

	/*
	* shift the lfsr value right.
	*
	* start with a logical right-shift operation at the low byte of
	* the word, which dumps the lsb of the word off the end into the
	* carry flag, and clears the msb. then use "ror" on the three
	* remaining bytes, which right-rotates through the carry flag. the
	* difference is that "ror" stuffs the contents of the carry flag
	* into the msb instead of just clearing it. the carry flag has
	* who-knows-what in it at the first shift, thus the use of "lsr"
	* first. thereafter, the carry flag is used to propagate the lsb
	* of the prior byte into the msb of the current byte.
	*/
	"lsr %d0" "\n\t"
	"ror %c0" "\n\t"
	"ror %b0" "\n\t"
	"ror %a0" "\n\t"

	/*
	* insert the new bit.
	*
	* new_bit's lsb says what the new bit will be. if it's clear, skip
	* the next instruction. said next instruction or's the high byte
	* with the immediate constant 0x80 to set the msb.
	*/
	"sbrc %1, 0" "\n\t"
	"ori %d0, %5" "\n\t"

	/****************************************************/

	/*
	* output registers
	*
	* 0 = lfsr
	* 1 = new_bit
	*
	* gcc will take the fact that lfsr is a uint32_t to mean load it
	* into four (probably consecutive) gprs, which are referred to
	* with %d0 thru %a0.
	*/
	: "=r" ( lfsr ), "=d" ( new_bit )

	/*
	* input registers
	*
	* 0 = lfsr
	* 1 = new_bit
	* 4 = register constant
	* 5 = immediate constant
	*
	* 4 is a constant that gets loaded into a register. 5 is an
	* immediate constant, it ends up in the opcode, not a register. 2
	* and 3 are skipped because "input" lfsr and new_bit would have
	* been there but they are reassigned to 0 and 1 since they are the
	* same thing. it's basically gcc's non-obvious behavior.
	*/
	: "0" ( lfsr ), "1" ( new_bit ), "d" ( 0x01 ),
	"m" ( 0x80 )
	);

	#endif

	}

	// return the low byte
	return (uint8_t)( lfsr & 0xFF );
	}


	/*
	* generate a random byte x where min <= x <= max
	*/
	int8_t random_byte_in(int8_t min, int8_t max)
	{
	// get a random byte, in the LSB of a word for fixed point
	uint16_t a_random_byte = random_byte_lfsr32();

	// scale the thing, fixed point
	a_random_byte *= (max - min + 1);

	// de-fixed-point, translate, and return
	return (int8_t)( (a_random_byte >> 8) + min );
	}

	/*
	* generate a random unsigned byte x where min <= x <= max
	*/
	uint8_t random_ubyte_in(uint8_t min, uint8_t max)
	{
	// get a random byte, in the LSB of a word for fixed point
	uint16_t a_random_byte = random_byte_lfsr32();

	// scale the thing, fixed point
	a_random_byte *= (max - min + 1);

	// de-fixed-point, translate, and return
	return (uint8_t)( (a_random_byte >> 8) + min );
	}