Klafyvel/CompareLongAndFloat.ino

## CompareLongAndFloat.ino
/*
  Timing Float and Long operations. The technique used is described
  here : https://forum.arduino.cc/t/timing-the-little-things/47247

  For multiplication this should print :

  long loops: 64 clocks: 92
  float loops: 52 clocks: 115

  For addition :

  long loops: 213 clocks: 22
  float loops: 46 clocks: 131
*/

long al,bl;
float af,bf;

// #define OPERATOR *
#define OPERATOR +

void setup()
{
  Serial.begin(9600);
  al = 50;
  bl = 34;
  af = 5.5;
  bf = 3.4;
}

void loop()
{
  unsigned long loops = 0;

  // TCNT0 is the timer used to compute milliseconds and drive PWM0.
  // It is an 8 bit value that increments every 64 clock cycles and
  // rolls over from 255 to 0.
  //
  // We repeatedly run the test code as the timer goes from 156 through 255
  // which gives use 64*100 clock cycles.
  //
  // In practice this works for timing operations that take from 1 to
  // hundreds of clock cycles. The results get a little chunky after that
  // since the last one will have gone a fair bit past the end period.
  //
  while( TCNT0 != 155);        // wait for 155 to start
  while( TCNT0 == 155);        // wait until 155 ends

  cli(); // turn off interrupts
  while( TCNT0 > 150 ) {       // that 150 acknowledges we may miss 0
    // vvvvvv---- your code to be timed
    al = al OPERATOR bl;
    // ^^^^^^---- your code to be timed
    loops++;
  }
  sei(); // turn interrupts back on

  Serial.print("long ");
  Serial.print("loops: ");
  Serial.print(loops,DEC);
  Serial.print(" clocks: ");
  Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
  Serial.println();

  loops = 0;
  while( TCNT0 != 155);        // wait for 155 to start
  while( TCNT0 == 155);        // wait until 155 ends

  cli(); // turn off interrupts
  while( TCNT0 > 150 ) {       // that 150 acknowledges we may miss 0
    // vvvvvv---- your code to be timed
    af = af OPERATOR bf;
    // ^^^^^^---- your code to be timed
    loops++;
  }
  sei(); // turn interrupts back on

  Serial.print("float ");
  Serial.print("loops: ");
  Serial.print(loops,DEC);
  Serial.print(" clocks: ");
  Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
  Serial.println();

  delay(500);
}

## FloatMultiplication.ino
/* Comparing custom and native float multiplications.

   This should print :

   af = 5.50 bf = 3.40 cf = 18.65native cf =18.70
   native float loops: 41 clocks: 148
   custom float loops: 58 clocks: 102

*/


/* A not-so-crapy™ approximation of float multiplication. */
float floatmul(float a, float b) {
  float result = 0;

  asm (
    /* First step : manage the sign of the product, and store it in flag T.*/
    "mov __tmp_reg__,%D[a]" "\n\t"
    "eor __tmp_reg__,%D[b]" "\n\t"
    "bst __tmp_reg__,7" "\n\t"
    /* Second step : prepare the mantissa under the 1.7 form, and isolate the exponents. */
    /* We copy the high byte of a's mantissa in register B of the result,
       and put it in the 1.7 form.
       */
    "mov %B[result],%C[a]" "\n\t"
    "ori %B[result],0x80" "\n\t"
    /* Copy a's exponent to register D of the result. */
    "mov %D[result],%D[a]" "\n\t"
    "mov __tmp_reg__,%C[a]" "\n\t"
    "lsl __tmp_reg__" "\n\t"
    "rol %D[result]" "\n\t"
    /* Now is the right time to remove the bias, to avoid overflow. */
    "subi %D[result],0x7f" "\n\t"
    /* Same thing as before for b's mantissa. */
    "mov %A[result],%C[b]" "\n\t"
    "ori %A[result],0x80" "\n\t"
    /* Add b's exponent to D register of the result. */
    "mov __zero_reg__,%D[b]" "\n\t"
    "mov __tmp_reg__,%C[b]" "\n\t"
    "lsl __tmp_reg__" "\n\t"
    "rol __zero_reg__" "\n\t"
    "add %D[result],__zero_reg__" "\n\t"
    /* Third step : multiply the mantissas. */
    "fmul %A[result], %B[result]" "\n\t"
    /* save the result in registers A and B of the result. */
    "movw %A[result], __tmp_reg__" "\n\t"
    /* Fourth step : overcome possible normalization issues.
       We only need to perform this normalization once.
     */
    "brcs carry_set_%=" "\n\t"
    "lsl %A[result]" "\n\t"
    "rol %B[result]" "\n\t"
    "dec %D[result]" "\n\t"
    /* Fifth step: now, we should have the right exponent in register D and the normalized
       mantissa in registers A and B, and the sign bit in flag T. Time to rebuild everything.
       */
    "carry_set_%= : inc %D[result]" "\n\t"
    /* First, copy the mantissa from registers A and B to registers B and C.
       Note : we don't clean register A afterwards, this means we will have some remains
       of the computation, but we chose to live with that risk.
       We could use the following instruction to avoid that : clr %A[result] .
     */
    "mov %C[result],%B[result]" "\n\t"
    "mov %B[result],%A[result]" "\n\t"
    "clr %A[result]" "\n\t"
    /* Then we right-shift everything to make room for the sign bit. */
    "lsr %D[result]" "\n\t"
    "ror %C[result]" "\n\t"
    "ror %B[result]" "\n\t"
    "ror %A[result]" "\n\t"
    /* And we copy it. */
    "bld %D[result],7" "\n\t"
    /* clear __zero_reg__ */
    "clr __zero_reg__" "\n\t"
    :
    [result]"+a"(result):
    [a]"r"(a),[b]"r"(b)
  );

  return result;
}

float af,bf,cf;
void setup() {
  Serial.begin(9600);
  af = 5.5;
  bf = 3.4;
  cf = floatmul(af, bf);
  Serial.print("af = ");
  Serial.print(af);
  Serial.print(" bf = ");
  Serial.print(bf);
  Serial.print(" cf = ");
  Serial.print(cf);
  Serial.print("native cf =");
  Serial.println(af*bf);
}
void loop() {
  unsigned long loops = 0;

  // TCNT0 is the timer used to compute milliseconds and drive PWM0.
  // It is an 8 bit value that increments every 64 clock cycles and
  // rolls over from 255 to 0.
  //
  // We repeatedly run the test code as the timer goes from 156 through 255
  // which gives use 64*100 clock cycles.
  //
  // In practice this works for timing operations that take from 1 to
  // hundreds of clock cycles. The results get a little chunky after that
  // since the last one will have gone a fair bit past the end period.
  //
  while( TCNT0 != 155);        // wait for 155 to start
  while( TCNT0 == 155);        // wait until 155 ends

  cli(); // turn off interrupts
  while( TCNT0 > 150 ) {       // that 150 acknowledges we may miss 0
    // vvvvvv---- your code to be timed
    af = af * bf;
    // ^^^^^^---- your code to be timed
    loops++;
  }
  sei(); // turn interrupts back on

  Serial.print("native float ");
  Serial.print("loops: ");
  Serial.print(loops,DEC);
  Serial.print(" clocks: ");
  Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
  Serial.println();

  loops = 0;
  while( TCNT0 != 155);        // wait for 155 to start
  while( TCNT0 == 155);        // wait until 155 ends

  cli(); // turn off interrupts
  while( TCNT0 > 150 ) {       // that 150 acknowledges we may miss 0
    // vvvvvv---- your code to be timed
    af = floatmul(af, bf);
    // ^^^^^^---- your code to be timed
    loops++;
  }
  sei(); // turn interrupts back on

  Serial.print("custom float ");
  Serial.print("loops: ");
  Serial.print(loops,DEC);
  Serial.print(" clocks: ");
  Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
  Serial.println();

  delay(500);
}

## FloatMultiplicationLayout.ino
/* Comparing custom and native float multiplications.

   This should print for bf=0.2 :
   af = 55.50 845E0000 bf = 0.20 7C4CCCCD cf = 11.10 8231999A a = 55.50 845E0000 b = 0.20 7C4CCCCD c = 11.06 8230E800
   native float loops: 41 clocks: 148
   custom float loops: 65 clocks: 90
   native float loops: 48 clocks: 125
   custom float loops: 65 clocks: 90
   native float loops: 66 clocks: 88

   and for bf=3.4 :
   af = 55.50 845E0000 bf = 3.40 8059999A cf = 188.70 863CB334 a = 55.50 845E0000 b = 3.40 8059999A c = 188.18 863C2E00
   native float loops: 41 clocks: 148
   custom float loops: 65 clocks: 90
   native float loops: 44 clocks: 137
   custom float loops: 65 clocks: 90
   native float loops: 52 clocks: 115
   custom float loops: 65 clocks: 90
   native float loops: 52 clocks: 115
   custom float loops: 65 clocks: 90

*/

uint32_t floatToCustom(float x) {
  uint32_t* p = (uint32_t*)(& x);
  uint32_t res = 0;
  res = (((*p)&0x7f800000)<<1) | (((*p)&0x80000000)>>8) | ((*p)&0x7fffff);
  return res;
}

float customToFloat(uint32_t x) {
  float res;
  uint32_t* p = (uint32_t*)(& res);
  *p = ((x&0xff000000)>>1) | ((x&0x08000000)<<8) | (x&0x7fffff);
  return res;
}

/* A not-so-crapy™ approximation of float multiplication. */
uint32_t floatmul(uint32_t a, uint32_t b) {
  uint32_t result;

  asm (
    /* First step : manage the sign of the product, and store it in flag T.*/
    "mov __tmp_reg__,%C[a]" "\n\t"
    "eor __tmp_reg__,%C[b]" "\n\t"
    "bst __tmp_reg__,7" "\n\t"
    /* Second step : prepare the mantissa under the 1.7 form, and isolate the exponents. */
    /* We copy the high byte of a's mantissa in register B of the result,
       and put it in the 1.7 form.
       */
    "mov %B[result],%C[a]" "\n\t"
    "ori %B[result],0x80" "\n\t"
    /* Copy a's exponent to register D of the result. */
    "mov %D[result],%D[a]" "\n\t"
    /* Now is the right time to remove the bias, to avoid overflow. */
    "subi %D[result],0x7f" "\n\t"
    /* Same thing as before for b's mantissa. */
    "mov %A[result],%C[b]" "\n\t"
    "ori %A[result],0x80" "\n\t"
    /* Add b's exponent to D register of the result. */
    "add %D[result],%D[b]" "\n\t"
    /* Third step : multiply the mantissas. */
    "fmul %A[result], %B[result]" "\n\t"
    /* save the result in registers B and C of the result. */
    "mov %B[result], __tmp_reg__" "\n\t"
    "mov %C[result], __zero_reg__" "\n\t"
    /* Fourth step : overcome possible normalization issues.
       We only need to perform this normalization once.

       Warning : the logic here is a bit different as in the other implementation.
       We perform a logical-shift right of the mantissa only when the carry is set.
     */
    "brcc carry_clear_%=" "\n\t"
    "lsr %C[result]" "\n\t"
    "ror %B[result]" "\n\t"
    "inc %D[result]" "\n\t"
    /* Fifth step: now, we should have the right exponent in register D and the normalized
       mantissa in registers A and B, and the sign bit in flag T. Time to rebuild everything.
       */
    "carry_clear_%= :" "\n\t"
    /* clear register A after using it */
    "clr %A[result]" "\n\t"
    "clr __zero_reg__" "\n\t"
    :
    [result]"+a"(result):
    [a]"r"(a),[b]"r"(b)
  );

  return result;
}
float af,bf,cf;
uint32_t a,b,c;
void setup() {
  Serial.begin(9600);
  af = 55.5;
  bf = 3.4;//0.2;//
  a = floatToCustom(af);
  b = floatToCustom(bf);
  c = floatmul(a, b);
  cf = af*bf;

  Serial.print("af = ");
  Serial.print(af);
  Serial.print(" ");
  Serial.print(floatToCustom(af), HEX);
  Serial.print(" bf = ");
  Serial.print(bf);
  Serial.print(" ");
  Serial.print(floatToCustom(bf), HEX);
  Serial.print(" cf = ");
  Serial.print(cf);
  Serial.print(" ");
  Serial.print(floatToCustom(cf), HEX);
  Serial.print(" a = ");
  Serial.print(customToFloat(a));
  Serial.print(" ");
  Serial.print(a, HEX);
  Serial.print(" b = ");
  Serial.print(customToFloat(b));
  Serial.print(" ");
  Serial.print(b, HEX);
  Serial.print(" c = ");
  Serial.print(customToFloat(c));
  Serial.print(" ");
  Serial.println(c, HEX);
}
void loop() {
  unsigned long loops = 0;

  // TCNT0 is the timer used to compute milliseconds and drive PWM0.
  // It is an 8 bit value that increments every 64 clock cycles and
  // rolls over from 255 to 0.
  //
  // We repeatedly run the test code as the timer goes from 156 through 255
  // which gives use 64*100 clock cycles.
  //
  // In practice this works for timing operations that take from 1 to
  // hundreds of clock cycles. The results get a little chunky after that
  // since the last one will have gone a fair bit past the end period.
  //
  while( TCNT0 != 155);        // wait for 155 to start
  while( TCNT0 == 155);        // wait until 155 ends

  cli(); // turn off interrupts
  while( TCNT0 > 150 ) {       // that 150 acknowledges we may miss 0
    // vvvvvv---- your code to be timed
    af = af * bf;
    // ^^^^^^---- your code to be timed
    loops++;
  }
  sei(); // turn interrupts back on

  Serial.print("native float ");
  Serial.print("loops: ");
  Serial.print(loops,DEC);
  Serial.print(" clocks: ");
  Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
  Serial.println();
  delay(100);

  loops = 0;
  while( TCNT0 != 155);        // wait for 155 to start
  while( TCNT0 == 155);        // wait until 155 ends

  cli(); // turn off interrupts
  while( TCNT0 > 150 ) {       // that 150 acknowledges we may miss 0
    // vvvvvv---- your code to be timed
    a = floatmul(a, b);
    // ^^^^^^---- your code to be timed
    loops++;
  }
  sei(); // turn interrupts back on

  Serial.print("custom float ");
  Serial.print("loops: ");
  Serial.print(loops,DEC);
  Serial.print(" clocks: ");
  Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
  Serial.println();

  delay(500);
}

## generated_assembly.asm
build/SimpleMultiplication.ino.elf:     file format elf32-avr


Disassembly of section .text:

00000000 <__vectors>:
   0:	0c 94 34 00 	jmp	0x68	; 0x68 <__ctors_end>
   4:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
   8:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
   c:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  10:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  14:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  18:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  1c:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  20:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  24:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  28:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  2c:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  30:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  34:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  38:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  3c:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  40:	0c 94 48 00 	jmp	0x90	; 0x90 <__vector_16>
  44:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  48:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  4c:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  50:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  54:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  58:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  5c:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  60:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>
  64:	0c 94 46 00 	jmp	0x8c	; 0x8c <__bad_interrupt>

00000068 <__ctors_end>:
  68:	11 24       	eor	r1, r1
  6a:	1f be       	out	0x3f, r1	; 63
  6c:	cf ef       	ldi	r28, 0xFF	; 255
  6e:	d8 e0       	ldi	r29, 0x08	; 8
  70:	de bf       	out	0x3e, r29	; 62
  72:	cd bf       	out	0x3d, r28	; 61

00000074 <__do_clear_bss>:
  74:	21 e0       	ldi	r18, 0x01	; 1
  76:	a0 e0       	ldi	r26, 0x00	; 0
  78:	b1 e0       	ldi	r27, 0x01	; 1
  7a:	01 c0       	rjmp	.+2      	; 0x7e <.do_clear_bss_start>

0000007c <.do_clear_bss_loop>:
  7c:	1d 92       	st	X+, r1

0000007e <.do_clear_bss_start>:
  7e:	ad 30       	cpi	r26, 0x0D	; 13
  80:	b2 07       	cpc	r27, r18
  82:	e1 f7       	brne	.-8      	; 0x7c <.do_clear_bss_loop>
  84:	0e 94 92 00 	call	0x124	; 0x124 <main>
  88:	0c 94 b8 01 	jmp	0x370	; 0x370 <_exit>

0000008c <__bad_interrupt>:
  8c:	0c 94 00 00 	jmp	0	; 0x0 <__vectors>

00000090 <__vector_16>:
  90:	1f 92       	push	r1
  92:	0f 92       	push	r0
  94:	0f b6       	in	r0, 0x3f	; 63
  96:	0f 92       	push	r0
  98:	11 24       	eor	r1, r1
  9a:	2f 93       	push	r18
  9c:	3f 93       	push	r19
  9e:	8f 93       	push	r24
  a0:	9f 93       	push	r25
  a2:	af 93       	push	r26
  a4:	bf 93       	push	r27
  a6:	80 91 09 01 	lds	r24, 0x0109	; 0x800109 <timer0_millis>
  aa:	90 91 0a 01 	lds	r25, 0x010A	; 0x80010a <timer0_millis+0x1>
  ae:	a0 91 0b 01 	lds	r26, 0x010B	; 0x80010b <timer0_millis+0x2>
  b2:	b0 91 0c 01 	lds	r27, 0x010C	; 0x80010c <timer0_millis+0x3>
  b6:	30 91 08 01 	lds	r19, 0x0108	; 0x800108 <timer0_fract>
  ba:	23 e0       	ldi	r18, 0x03	; 3
  bc:	23 0f       	add	r18, r19
  be:	2d 37       	cpi	r18, 0x7D	; 125
  c0:	58 f5       	brcc	.+86     	; 0x118 <__vector_16+0x88>
  c2:	01 96       	adiw	r24, 0x01	; 1
  c4:	a1 1d       	adc	r26, r1
  c6:	b1 1d       	adc	r27, r1
  c8:	20 93 08 01 	sts	0x0108, r18	; 0x800108 <timer0_fract>
  cc:	80 93 09 01 	sts	0x0109, r24	; 0x800109 <timer0_millis>
  d0:	90 93 0a 01 	sts	0x010A, r25	; 0x80010a <timer0_millis+0x1>
  d4:	a0 93 0b 01 	sts	0x010B, r26	; 0x80010b <timer0_millis+0x2>
  d8:	b0 93 0c 01 	sts	0x010C, r27	; 0x80010c <timer0_millis+0x3>
  dc:	80 91 04 01 	lds	r24, 0x0104	; 0x800104 <timer0_overflow_count>
  e0:	90 91 05 01 	lds	r25, 0x0105	; 0x800105 <timer0_overflow_count+0x1>
  e4:	a0 91 06 01 	lds	r26, 0x0106	; 0x800106 <timer0_overflow_count+0x2>
  e8:	b0 91 07 01 	lds	r27, 0x0107	; 0x800107 <timer0_overflow_count+0x3>
  ec:	01 96       	adiw	r24, 0x01	; 1
  ee:	a1 1d       	adc	r26, r1
  f0:	b1 1d       	adc	r27, r1
  f2:	80 93 04 01 	sts	0x0104, r24	; 0x800104 <timer0_overflow_count>
  f6:	90 93 05 01 	sts	0x0105, r25	; 0x800105 <timer0_overflow_count+0x1>
  fa:	a0 93 06 01 	sts	0x0106, r26	; 0x800106 <timer0_overflow_count+0x2>
  fe:	b0 93 07 01 	sts	0x0107, r27	; 0x800107 <timer0_overflow_count+0x3>
 102:	bf 91       	pop	r27
 104:	af 91       	pop	r26
 106:	9f 91       	pop	r25
 108:	8f 91       	pop	r24
 10a:	3f 91       	pop	r19
 10c:	2f 91       	pop	r18
 10e:	0f 90       	pop	r0
 110:	0f be       	out	0x3f, r0	; 63
 112:	0f 90       	pop	r0
 114:	1f 90       	pop	r1
 116:	18 95       	reti
 118:	26 e8       	ldi	r18, 0x86	; 134
 11a:	23 0f       	add	r18, r19
 11c:	02 96       	adiw	r24, 0x02	; 2
 11e:	a1 1d       	adc	r26, r1
 120:	b1 1d       	adc	r27, r1
 122:	d2 cf       	rjmp	.-92     	; 0xc8 <__vector_16+0x38>

00000124 <main>:
 124:	78 94       	sei
 126:	84 b5       	in	r24, 0x24	; 36
 128:	82 60       	ori	r24, 0x02	; 2
 12a:	84 bd       	out	0x24, r24	; 36
 12c:	84 b5       	in	r24, 0x24	; 36
 12e:	81 60       	ori	r24, 0x01	; 1
 130:	84 bd       	out	0x24, r24	; 36
 132:	85 b5       	in	r24, 0x25	; 37
 134:	82 60       	ori	r24, 0x02	; 2
 136:	85 bd       	out	0x25, r24	; 37
 138:	85 b5       	in	r24, 0x25	; 37
 13a:	81 60       	ori	r24, 0x01	; 1
 13c:	85 bd       	out	0x25, r24	; 37
 13e:	80 91 6e 00 	lds	r24, 0x006E	; 0x80006e <__DATA_REGION_ORIGIN__+0xe>
 142:	81 60       	ori	r24, 0x01	; 1
 144:	80 93 6e 00 	sts	0x006E, r24	; 0x80006e <__DATA_REGION_ORIGIN__+0xe>
 148:	10 92 81 00 	sts	0x0081, r1	; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
 14c:	80 91 81 00 	lds	r24, 0x0081	; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
 150:	82 60       	ori	r24, 0x02	; 2
 152:	80 93 81 00 	sts	0x0081, r24	; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
 156:	80 91 81 00 	lds	r24, 0x0081	; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
 15a:	81 60       	ori	r24, 0x01	; 1
 15c:	80 93 81 00 	sts	0x0081, r24	; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
 160:	80 91 80 00 	lds	r24, 0x0080	; 0x800080 <__DATA_REGION_ORIGIN__+0x20>
 164:	81 60       	ori	r24, 0x01	; 1
 166:	80 93 80 00 	sts	0x0080, r24	; 0x800080 <__DATA_REGION_ORIGIN__+0x20>
 16a:	80 91 b1 00 	lds	r24, 0x00B1	; 0x8000b1 <__DATA_REGION_ORIGIN__+0x51>
 16e:	84 60       	ori	r24, 0x04	; 4
 170:	80 93 b1 00 	sts	0x00B1, r24	; 0x8000b1 <__DATA_REGION_ORIGIN__+0x51>
 174:	80 91 b0 00 	lds	r24, 0x00B0	; 0x8000b0 <__DATA_REGION_ORIGIN__+0x50>
 178:	81 60       	ori	r24, 0x01	; 1
 17a:	80 93 b0 00 	sts	0x00B0, r24	; 0x8000b0 <__DATA_REGION_ORIGIN__+0x50>
 17e:	80 91 7a 00 	lds	r24, 0x007A	; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
 182:	84 60       	ori	r24, 0x04	; 4
 184:	80 93 7a 00 	sts	0x007A, r24	; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
 188:	80 91 7a 00 	lds	r24, 0x007A	; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
 18c:	82 60       	ori	r24, 0x02	; 2
 18e:	80 93 7a 00 	sts	0x007A, r24	; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
 192:	80 91 7a 00 	lds	r24, 0x007A	; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
 196:	81 60       	ori	r24, 0x01	; 1
 198:	80 93 7a 00 	sts	0x007A, r24	; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
 19c:	80 91 7a 00 	lds	r24, 0x007A	; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
 1a0:	80 68       	ori	r24, 0x80	; 128
 1a2:	80 93 7a 00 	sts	0x007A, r24	; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
 1a6:	10 92 c1 00 	sts	0x00C1, r1	; 0x8000c1 <__DATA_REGION_ORIGIN__+0x61>
 1aa:	10 92 00 01 	sts	0x0100, r1	; 0x800100 <x>
 1ae:	10 92 01 01 	sts	0x0101, r1	; 0x800101 <x+0x1>
 1b2:	10 92 02 01 	sts	0x0102, r1	; 0x800102 <x+0x2>
 1b6:	10 92 03 01 	sts	0x0103, r1	; 0x800103 <x+0x3>
 1ba:	c0 e0       	ldi	r28, 0x00	; 0
 1bc:	d0 e0       	ldi	r29, 0x00	; 0
 1be:	60 91 00 01 	lds	r22, 0x0100	; 0x800100 <x>
 1c2:	70 91 01 01 	lds	r23, 0x0101	; 0x800101 <x+0x1>
 1c6:	80 91 02 01 	lds	r24, 0x0102	; 0x800102 <x+0x2>
 1ca:	90 91 03 01 	lds	r25, 0x0103	; 0x800103 <x+0x3>
 1ce:	2d ec       	ldi	r18, 0xCD	; 205
 1d0:	3c ec       	ldi	r19, 0xCC	; 204
 1d2:	4c e1       	ldi	r20, 0x1C	; 28
 1d4:	50 e4       	ldi	r21, 0x40	; 64
 1d6:	0e 94 fa 00 	call	0x1f4	; 0x1f4 <__mulsf3>
 1da:	20 97       	sbiw	r28, 0x00	; 0
 1dc:	c1 f3       	breq	.-16     	; 0x1ce <main+0xaa>
 1de:	60 93 00 01 	sts	0x0100, r22	; 0x800100 <x>
 1e2:	70 93 01 01 	sts	0x0101, r23	; 0x800101 <x+0x1>
 1e6:	80 93 02 01 	sts	0x0102, r24	; 0x800102 <x+0x2>
 1ea:	90 93 03 01 	sts	0x0103, r25	; 0x800103 <x+0x3>
 1ee:	0e 94 00 00 	call	0	; 0x0 <__vectors>
 1f2:	e5 cf       	rjmp	.-54     	; 0x1be <main+0x9a>

000001f4 <__mulsf3>:
 1f4:	0e 94 0d 01 	call	0x21a	; 0x21a <__mulsf3x>
 1f8:	0c 94 7e 01 	jmp	0x2fc	; 0x2fc <__fp_round>
 1fc:	0e 94 70 01 	call	0x2e0	; 0x2e0 <__fp_pscA>
 200:	38 f0       	brcs	.+14     	; 0x210 <__mulsf3+0x1c>
 202:	0e 94 77 01 	call	0x2ee	; 0x2ee <__fp_pscB>
 206:	20 f0       	brcs	.+8      	; 0x210 <__mulsf3+0x1c>
 208:	95 23       	and	r25, r21
 20a:	11 f0       	breq	.+4      	; 0x210 <__mulsf3+0x1c>
 20c:	0c 94 67 01 	jmp	0x2ce	; 0x2ce <__fp_inf>
 210:	0c 94 6d 01 	jmp	0x2da	; 0x2da <__fp_nan>
 214:	11 24       	eor	r1, r1
 216:	0c 94 b2 01 	jmp	0x364	; 0x364 <__fp_szero>

0000021a <__mulsf3x>:
 21a:	0e 94 8f 01 	call	0x31e	; 0x31e <__fp_split3>
 21e:	70 f3       	brcs	.-36     	; 0x1fc <__mulsf3+0x8>

00000220 <__mulsf3_pse>:
 220:	95 9f       	mul	r25, r21
 222:	c1 f3       	breq	.-16     	; 0x214 <__mulsf3+0x20>
 224:	95 0f       	add	r25, r21
 226:	50 e0       	ldi	r21, 0x00	; 0
 228:	55 1f       	adc	r21, r21
 22a:	62 9f       	mul	r22, r18
 22c:	f0 01       	movw	r30, r0
 22e:	72 9f       	mul	r23, r18
 230:	bb 27       	eor	r27, r27
 232:	f0 0d       	add	r31, r0
 234:	b1 1d       	adc	r27, r1
 236:	63 9f       	mul	r22, r19
 238:	aa 27       	eor	r26, r26
 23a:	f0 0d       	add	r31, r0
 23c:	b1 1d       	adc	r27, r1
 23e:	aa 1f       	adc	r26, r26
 240:	64 9f       	mul	r22, r20
 242:	66 27       	eor	r22, r22
 244:	b0 0d       	add	r27, r0
 246:	a1 1d       	adc	r26, r1
 248:	66 1f       	adc	r22, r22
 24a:	82 9f       	mul	r24, r18
 24c:	22 27       	eor	r18, r18
 24e:	b0 0d       	add	r27, r0
 250:	a1 1d       	adc	r26, r1
 252:	62 1f       	adc	r22, r18
 254:	73 9f       	mul	r23, r19
 256:	b0 0d       	add	r27, r0
 258:	a1 1d       	adc	r26, r1
 25a:	62 1f       	adc	r22, r18
 25c:	83 9f       	mul	r24, r19
 25e:	a0 0d       	add	r26, r0
 260:	61 1d       	adc	r22, r1
 262:	22 1f       	adc	r18, r18
 264:	74 9f       	mul	r23, r20
 266:	33 27       	eor	r19, r19
 268:	a0 0d       	add	r26, r0
 26a:	61 1d       	adc	r22, r1
 26c:	23 1f       	adc	r18, r19
 26e:	84 9f       	mul	r24, r20
 270:	60 0d       	add	r22, r0
 272:	21 1d       	adc	r18, r1
 274:	82 2f       	mov	r24, r18
 276:	76 2f       	mov	r23, r22
 278:	6a 2f       	mov	r22, r26
 27a:	11 24       	eor	r1, r1
 27c:	9f 57       	subi	r25, 0x7F	; 127
 27e:	50 40       	sbci	r21, 0x00	; 0
 280:	9a f0       	brmi	.+38     	; 0x2a8 <__mulsf3_pse+0x88>
 282:	f1 f0       	breq	.+60     	; 0x2c0 <__mulsf3_pse+0xa0>
 284:	88 23       	and	r24, r24
 286:	4a f0       	brmi	.+18     	; 0x29a <__mulsf3_pse+0x7a>
 288:	ee 0f       	add	r30, r30
 28a:	ff 1f       	adc	r31, r31
 28c:	bb 1f       	adc	r27, r27
 28e:	66 1f       	adc	r22, r22
 290:	77 1f       	adc	r23, r23
 292:	88 1f       	adc	r24, r24
 294:	91 50       	subi	r25, 0x01	; 1
 296:	50 40       	sbci	r21, 0x00	; 0
 298:	a9 f7       	brne	.-22     	; 0x284 <__mulsf3_pse+0x64>
 29a:	9e 3f       	cpi	r25, 0xFE	; 254
 29c:	51 05       	cpc	r21, r1
 29e:	80 f0       	brcs	.+32     	; 0x2c0 <__mulsf3_pse+0xa0>
 2a0:	0c 94 67 01 	jmp	0x2ce	; 0x2ce <__fp_inf>
 2a4:	0c 94 b2 01 	jmp	0x364	; 0x364 <__fp_szero>
 2a8:	5f 3f       	cpi	r21, 0xFF	; 255
 2aa:	e4 f3       	brlt	.-8      	; 0x2a4 <__mulsf3_pse+0x84>
 2ac:	98 3e       	cpi	r25, 0xE8	; 232
 2ae:	d4 f3       	brlt	.-12     	; 0x2a4 <__mulsf3_pse+0x84>
 2b0:	86 95       	lsr	r24
 2b2:	77 95       	ror	r23
 2b4:	67 95       	ror	r22
 2b6:	b7 95       	ror	r27
 2b8:	f7 95       	ror	r31
 2ba:	e7 95       	ror	r30
 2bc:	9f 5f       	subi	r25, 0xFF	; 255
 2be:	c1 f7       	brne	.-16     	; 0x2b0 <__mulsf3_pse+0x90>
 2c0:	fe 2b       	or	r31, r30
 2c2:	88 0f       	add	r24, r24
 2c4:	91 1d       	adc	r25, r1
 2c6:	96 95       	lsr	r25
 2c8:	87 95       	ror	r24
 2ca:	97 f9       	bld	r25, 7
 2cc:	08 95       	ret

000002ce <__fp_inf>:
 2ce:	97 f9       	bld	r25, 7
 2d0:	9f 67       	ori	r25, 0x7F	; 127
 2d2:	80 e8       	ldi	r24, 0x80	; 128
 2d4:	70 e0       	ldi	r23, 0x00	; 0
 2d6:	60 e0       	ldi	r22, 0x00	; 0
 2d8:	08 95       	ret

000002da <__fp_nan>:
 2da:	9f ef       	ldi	r25, 0xFF	; 255
 2dc:	80 ec       	ldi	r24, 0xC0	; 192
 2de:	08 95       	ret

000002e0 <__fp_pscA>:
 2e0:	00 24       	eor	r0, r0
 2e2:	0a 94       	dec	r0
 2e4:	16 16       	cp	r1, r22
 2e6:	17 06       	cpc	r1, r23
 2e8:	18 06       	cpc	r1, r24
 2ea:	09 06       	cpc	r0, r25
 2ec:	08 95       	ret

000002ee <__fp_pscB>:
 2ee:	00 24       	eor	r0, r0
 2f0:	0a 94       	dec	r0
 2f2:	12 16       	cp	r1, r18
 2f4:	13 06       	cpc	r1, r19
 2f6:	14 06       	cpc	r1, r20
 2f8:	05 06       	cpc	r0, r21
 2fa:	08 95       	ret

000002fc <__fp_round>:
 2fc:	09 2e       	mov	r0, r25
 2fe:	03 94       	inc	r0
 300:	00 0c       	add	r0, r0
 302:	11 f4       	brne	.+4      	; 0x308 <__fp_round+0xc>
 304:	88 23       	and	r24, r24
 306:	52 f0       	brmi	.+20     	; 0x31c <__fp_round+0x20>
 308:	bb 0f       	add	r27, r27
 30a:	40 f4       	brcc	.+16     	; 0x31c <__fp_round+0x20>
 30c:	bf 2b       	or	r27, r31
 30e:	11 f4       	brne	.+4      	; 0x314 <__fp_round+0x18>
 310:	60 ff       	sbrs	r22, 0
 312:	04 c0       	rjmp	.+8      	; 0x31c <__fp_round+0x20>
 314:	6f 5f       	subi	r22, 0xFF	; 255
 316:	7f 4f       	sbci	r23, 0xFF	; 255
 318:	8f 4f       	sbci	r24, 0xFF	; 255
 31a:	9f 4f       	sbci	r25, 0xFF	; 255
 31c:	08 95       	ret

0000031e <__fp_split3>:
 31e:	57 fd       	sbrc	r21, 7
 320:	90 58       	subi	r25, 0x80	; 128
 322:	44 0f       	add	r20, r20
 324:	55 1f       	adc	r21, r21
 326:	59 f0       	breq	.+22     	; 0x33e <__fp_splitA+0x10>
 328:	5f 3f       	cpi	r21, 0xFF	; 255
 32a:	71 f0       	breq	.+28     	; 0x348 <__fp_splitA+0x1a>
 32c:	47 95       	ror	r20

0000032e <__fp_splitA>:
 32e:	88 0f       	add	r24, r24
 330:	97 fb       	bst	r25, 7
 332:	99 1f       	adc	r25, r25
 334:	61 f0       	breq	.+24     	; 0x34e <__fp_splitA+0x20>
 336:	9f 3f       	cpi	r25, 0xFF	; 255
 338:	79 f0       	breq	.+30     	; 0x358 <__fp_splitA+0x2a>
 33a:	87 95       	ror	r24
 33c:	08 95       	ret
 33e:	12 16       	cp	r1, r18
 340:	13 06       	cpc	r1, r19
 342:	14 06       	cpc	r1, r20
 344:	55 1f       	adc	r21, r21
 346:	f2 cf       	rjmp	.-28     	; 0x32c <__fp_split3+0xe>
 348:	46 95       	lsr	r20
 34a:	f1 df       	rcall	.-30     	; 0x32e <__fp_splitA>
 34c:	08 c0       	rjmp	.+16     	; 0x35e <__fp_splitA+0x30>
 34e:	16 16       	cp	r1, r22
 350:	17 06       	cpc	r1, r23
 352:	18 06       	cpc	r1, r24
 354:	99 1f       	adc	r25, r25
 356:	f1 cf       	rjmp	.-30     	; 0x33a <__fp_splitA+0xc>
 358:	86 95       	lsr	r24
 35a:	71 05       	cpc	r23, r1
 35c:	61 05       	cpc	r22, r1
 35e:	08 94       	sec
 360:	08 95       	ret

00000362 <__fp_zero>:
 362:	e8 94       	clt

00000364 <__fp_szero>:
 364:	bb 27       	eor	r27, r27
 366:	66 27       	eor	r22, r22
 368:	77 27       	eor	r23, r23
 36a:	cb 01       	movw	r24, r22
 36c:	97 f9       	bld	r25, 7
 36e:	08 95       	ret

00000370 <_exit>:
 370:	f8 94       	cli

00000372 <__stop_program>:
 372:	ff cf       	rjmp	.-2      	; 0x372 <__stop_program>

## SimpleMultiplication.ino
float x;
void setup() {
  x=1.0;
  x*=2.45;
}
void loop() {
}
	/*
	Timing Float and Long operations. The technique used is described
	here : https://forum.arduino.cc/t/timing-the-little-things/47247

	For multiplication this should print :

	long loops: 64 clocks: 92
	float loops: 52 clocks: 115

	For addition :

	long loops: 213 clocks: 22
	float loops: 46 clocks: 131
	*/

	long al,bl;
	float af,bf;

	// #define OPERATOR *
	#define OPERATOR +

	void setup()
	{
	Serial.begin(9600);
	al = 50;
	bl = 34;
	af = 5.5;
	bf = 3.4;
	}

	void loop()
	{
	unsigned long loops = 0;

	// TCNT0 is the timer used to compute milliseconds and drive PWM0.
	// It is an 8 bit value that increments every 64 clock cycles and
	// rolls over from 255 to 0.
	//
	// We repeatedly run the test code as the timer goes from 156 through 255
	// which gives use 64*100 clock cycles.
	//
	// In practice this works for timing operations that take from 1 to
	// hundreds of clock cycles. The results get a little chunky after that
	// since the last one will have gone a fair bit past the end period.
	//
	while( TCNT0 != 155); // wait for 155 to start
	while( TCNT0 == 155); // wait until 155 ends

	cli(); // turn off interrupts
	while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
	// vvvvvv---- your code to be timed
	al = al OPERATOR bl;
	// ^^^^^^---- your code to be timed
	loops++;
	}
	sei(); // turn interrupts back on

	Serial.print("long ");
	Serial.print("loops: ");
	Serial.print(loops,DEC);
	Serial.print(" clocks: ");
	Serial.print( (int) (( 100UL64UL) / loops) - 8 / empty loop cost */, DEC);
	Serial.println();

	loops = 0;
	while( TCNT0 != 155); // wait for 155 to start
	while( TCNT0 == 155); // wait until 155 ends

	cli(); // turn off interrupts
	while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
	// vvvvvv---- your code to be timed
	af = af OPERATOR bf;
	// ^^^^^^---- your code to be timed
	loops++;
	}
	sei(); // turn interrupts back on

	Serial.print("float ");
	Serial.print("loops: ");
	Serial.print(loops,DEC);
	Serial.print(" clocks: ");
	Serial.print( (int) (( 100UL64UL) / loops) - 8 / empty loop cost */, DEC);
	Serial.println();

	delay(500);
	}
	/* Comparing custom and native float multiplications.

	This should print :

	af = 5.50 bf = 3.40 cf = 18.65native cf =18.70
	native float loops: 41 clocks: 148
	custom float loops: 58 clocks: 102

	*/


	/* A not-so-crapy™ approximation of float multiplication. */
	float floatmul(float a, float b) {
	float result = 0;

	asm (
	/* First step : manage the sign of the product, and store it in flag T.*/
	"mov __tmp_reg__,%D[a]" "\n\t"
	"eor __tmp_reg__,%D[b]" "\n\t"
	"bst __tmp_reg__,7" "\n\t"
	/* Second step : prepare the mantissa under the 1.7 form, and isolate the exponents. */
	/* We copy the high byte of a's mantissa in register B of the result,
	and put it in the 1.7 form.
	*/
	"mov %B[result],%C[a]" "\n\t"
	"ori %B[result],0x80" "\n\t"
	/* Copy a's exponent to register D of the result. */
	"mov %D[result],%D[a]" "\n\t"
	"mov __tmp_reg__,%C[a]" "\n\t"
	"lsl __tmp_reg__" "\n\t"
	"rol %D[result]" "\n\t"
	/* Now is the right time to remove the bias, to avoid overflow. */
	"subi %D[result],0x7f" "\n\t"
	/* Same thing as before for b's mantissa. */
	"mov %A[result],%C[b]" "\n\t"
	"ori %A[result],0x80" "\n\t"
	/* Add b's exponent to D register of the result. */
	"mov __zero_reg__,%D[b]" "\n\t"
	"mov __tmp_reg__,%C[b]" "\n\t"
	"lsl __tmp_reg__" "\n\t"
	"rol __zero_reg__" "\n\t"
	"add %D[result],__zero_reg__" "\n\t"
	/* Third step : multiply the mantissas. */
	"fmul %A[result], %B[result]" "\n\t"
	/* save the result in registers A and B of the result. */
	"movw %A[result], __tmp_reg__" "\n\t"
	/* Fourth step : overcome possible normalization issues.
	We only need to perform this normalization once.
	*/
	"brcs carry_set_%=" "\n\t"
	"lsl %A[result]" "\n\t"
	"rol %B[result]" "\n\t"
	"dec %D[result]" "\n\t"
	/* Fifth step: now, we should have the right exponent in register D and the normalized
	mantissa in registers A and B, and the sign bit in flag T. Time to rebuild everything.
	*/
	"carry_set_%= : inc %D[result]" "\n\t"
	/* First, copy the mantissa from registers A and B to registers B and C.
	Note : we don't clean register A afterwards, this means we will have some remains
	of the computation, but we chose to live with that risk.
	We could use the following instruction to avoid that : clr %A[result] .
	*/
	"mov %C[result],%B[result]" "\n\t"
	"mov %B[result],%A[result]" "\n\t"
	"clr %A[result]" "\n\t"
	/* Then we right-shift everything to make room for the sign bit. */
	"lsr %D[result]" "\n\t"
	"ror %C[result]" "\n\t"
	"ror %B[result]" "\n\t"
	"ror %A[result]" "\n\t"
	/* And we copy it. */
	"bld %D[result],7" "\n\t"
	/* clear __zero_reg__ */
	"clr __zero_reg__" "\n\t"
	:
	[result]"+a"(result):
	[a]"r"(a),[b]"r"(b)
	);

	return result;
	}

	float af,bf,cf;
	void setup() {
	Serial.begin(9600);
	af = 5.5;
	bf = 3.4;
	cf = floatmul(af, bf);
	Serial.print("af = ");
	Serial.print(af);
	Serial.print(" bf = ");
	Serial.print(bf);
	Serial.print(" cf = ");
	Serial.print(cf);
	Serial.print("native cf =");
	Serial.println(af*bf);
	}
	void loop() {
	unsigned long loops = 0;

	// TCNT0 is the timer used to compute milliseconds and drive PWM0.
	// It is an 8 bit value that increments every 64 clock cycles and
	// rolls over from 255 to 0.
	//
	// We repeatedly run the test code as the timer goes from 156 through 255
	// which gives use 64*100 clock cycles.
	//
	// In practice this works for timing operations that take from 1 to
	// hundreds of clock cycles. The results get a little chunky after that
	// since the last one will have gone a fair bit past the end period.
	//
	while( TCNT0 != 155); // wait for 155 to start
	while( TCNT0 == 155); // wait until 155 ends

	cli(); // turn off interrupts
	while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
	// vvvvvv---- your code to be timed
	af = af * bf;
	// ^^^^^^---- your code to be timed
	loops++;
	}
	sei(); // turn interrupts back on

	Serial.print("native float ");
	Serial.print("loops: ");
	Serial.print(loops,DEC);
	Serial.print(" clocks: ");
	Serial.print( (int) (( 100UL64UL) / loops) - 8 / empty loop cost */, DEC);
	Serial.println();

	loops = 0;
	while( TCNT0 != 155); // wait for 155 to start
	while( TCNT0 == 155); // wait until 155 ends

	cli(); // turn off interrupts
	while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
	// vvvvvv---- your code to be timed
	af = floatmul(af, bf);
	// ^^^^^^---- your code to be timed
	loops++;
	}
	sei(); // turn interrupts back on

	Serial.print("custom float ");
	Serial.print("loops: ");
	Serial.print(loops,DEC);
	Serial.print(" clocks: ");
	Serial.print( (int) (( 100UL64UL) / loops) - 8 / empty loop cost */, DEC);
	Serial.println();

	delay(500);
	}
	/* Comparing custom and native float multiplications.

	This should print for bf=0.2 :
	af = 55.50 845E0000 bf = 0.20 7C4CCCCD cf = 11.10 8231999A a = 55.50 845E0000 b = 0.20 7C4CCCCD c = 11.06 8230E800
	native float loops: 41 clocks: 148
	custom float loops: 65 clocks: 90
	native float loops: 48 clocks: 125
	custom float loops: 65 clocks: 90
	native float loops: 66 clocks: 88

	and for bf=3.4 :
	af = 55.50 845E0000 bf = 3.40 8059999A cf = 188.70 863CB334 a = 55.50 845E0000 b = 3.40 8059999A c = 188.18 863C2E00
	native float loops: 41 clocks: 148
	custom float loops: 65 clocks: 90
	native float loops: 44 clocks: 137
	custom float loops: 65 clocks: 90
	native float loops: 52 clocks: 115
	custom float loops: 65 clocks: 90
	native float loops: 52 clocks: 115
	custom float loops: 65 clocks: 90

	*/

	uint32_t floatToCustom(float x) {
	uint32_t* p = (uint32_t*)(& x);
	uint32_t res = 0;
	res = (((p)&0x7f800000)<<1) \| (((p)&0x80000000)>>8) \| ((*p)&0x7fffff);
	return res;
	}

	float customToFloat(uint32_t x) {
	float res;
	uint32_t* p = (uint32_t*)(& res);
	*p = ((x&0xff000000)>>1) \| ((x&0x08000000)<<8) \| (x&0x7fffff);
	return res;
	}

	/* A not-so-crapy™ approximation of float multiplication. */
	uint32_t floatmul(uint32_t a, uint32_t b) {
	uint32_t result;

	asm (
	/* First step : manage the sign of the product, and store it in flag T.*/
	"mov __tmp_reg__,%C[a]" "\n\t"
	"eor __tmp_reg__,%C[b]" "\n\t"
	"bst __tmp_reg__,7" "\n\t"
	/* Second step : prepare the mantissa under the 1.7 form, and isolate the exponents. */
	/* We copy the high byte of a's mantissa in register B of the result,
	and put it in the 1.7 form.
	*/
	"mov %B[result],%C[a]" "\n\t"
	"ori %B[result],0x80" "\n\t"
	/* Copy a's exponent to register D of the result. */
	"mov %D[result],%D[a]" "\n\t"
	/* Now is the right time to remove the bias, to avoid overflow. */
	"subi %D[result],0x7f" "\n\t"
	/* Same thing as before for b's mantissa. */
	"mov %A[result],%C[b]" "\n\t"
	"ori %A[result],0x80" "\n\t"
	/* Add b's exponent to D register of the result. */
	"add %D[result],%D[b]" "\n\t"
	/* Third step : multiply the mantissas. */
	"fmul %A[result], %B[result]" "\n\t"
	/* save the result in registers B and C of the result. */
	"mov %B[result], __tmp_reg__" "\n\t"
	"mov %C[result], __zero_reg__" "\n\t"
	/* Fourth step : overcome possible normalization issues.
	We only need to perform this normalization once.

	Warning : the logic here is a bit different as in the other implementation.
	We perform a logical-shift right of the mantissa only when the carry is set.
	*/
	"brcc carry_clear_%=" "\n\t"
	"lsr %C[result]" "\n\t"
	"ror %B[result]" "\n\t"
	"inc %D[result]" "\n\t"
	/* Fifth step: now, we should have the right exponent in register D and the normalized
	mantissa in registers A and B, and the sign bit in flag T. Time to rebuild everything.
	*/
	"carry_clear_%= :" "\n\t"
	/* clear register A after using it */
	"clr %A[result]" "\n\t"
	"clr __zero_reg__" "\n\t"
	:
	[result]"+a"(result):
	[a]"r"(a),[b]"r"(b)
	);

	return result;
	}
	float af,bf,cf;
	uint32_t a,b,c;
	void setup() {
	Serial.begin(9600);
	af = 55.5;
	bf = 3.4;//0.2;//
	a = floatToCustom(af);
	b = floatToCustom(bf);
	c = floatmul(a, b);
	cf = af*bf;

	Serial.print("af = ");
	Serial.print(af);
	Serial.print(" ");
	Serial.print(floatToCustom(af), HEX);
	Serial.print(" bf = ");
	Serial.print(bf);
	Serial.print(" ");
	Serial.print(floatToCustom(bf), HEX);
	Serial.print(" cf = ");
	Serial.print(cf);
	Serial.print(" ");
	Serial.print(floatToCustom(cf), HEX);
	Serial.print(" a = ");
	Serial.print(customToFloat(a));
	Serial.print(" ");
	Serial.print(a, HEX);
	Serial.print(" b = ");
	Serial.print(customToFloat(b));
	Serial.print(" ");
	Serial.print(b, HEX);
	Serial.print(" c = ");
	Serial.print(customToFloat(c));
	Serial.print(" ");
	Serial.println(c, HEX);
	}
	void loop() {
	unsigned long loops = 0;

	// TCNT0 is the timer used to compute milliseconds and drive PWM0.
	// It is an 8 bit value that increments every 64 clock cycles and
	// rolls over from 255 to 0.
	//
	// We repeatedly run the test code as the timer goes from 156 through 255
	// which gives use 64*100 clock cycles.
	//
	// In practice this works for timing operations that take from 1 to
	// hundreds of clock cycles. The results get a little chunky after that
	// since the last one will have gone a fair bit past the end period.
	//
	while( TCNT0 != 155); // wait for 155 to start
	while( TCNT0 == 155); // wait until 155 ends

	cli(); // turn off interrupts
	while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
	// vvvvvv---- your code to be timed
	af = af * bf;
	// ^^^^^^---- your code to be timed
	loops++;
	}
	sei(); // turn interrupts back on

	Serial.print("native float ");
	Serial.print("loops: ");
	Serial.print(loops,DEC);
	Serial.print(" clocks: ");
	Serial.print( (int) (( 100UL64UL) / loops) - 8 / empty loop cost */, DEC);
	Serial.println();
	delay(100);

	loops = 0;
	while( TCNT0 != 155); // wait for 155 to start
	while( TCNT0 == 155); // wait until 155 ends

	cli(); // turn off interrupts
	while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
	// vvvvvv---- your code to be timed
	a = floatmul(a, b);
	// ^^^^^^---- your code to be timed
	loops++;
	}
	sei(); // turn interrupts back on

	Serial.print("custom float ");
	Serial.print("loops: ");
	Serial.print(loops,DEC);
	Serial.print(" clocks: ");
	Serial.print( (int) (( 100UL64UL) / loops) - 8 / empty loop cost */, DEC);
	Serial.println();

	delay(500);
	}
	build/SimpleMultiplication.ino.elf: file format elf32-avr


	Disassembly of section .text:

	00000000 <__vectors>:
	0: 0c 94 34 00 jmp 0x68 ; 0x68 <__ctors_end>
	4: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	8: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	10: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	14: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	18: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	1c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	20: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	24: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	28: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	2c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	30: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	34: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	38: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	3c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	40: 0c 94 48 00 jmp 0x90 ; 0x90 <__vector_16>
	44: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	48: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	4c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	50: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	54: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	58: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	5c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	60: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
	64: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>

	00000068 <__ctors_end>:
	68: 11 24 eor r1, r1
	6a: 1f be out 0x3f, r1 ; 63
	6c: cf ef ldi r28, 0xFF ; 255
	6e: d8 e0 ldi r29, 0x08 ; 8
	70: de bf out 0x3e, r29 ; 62
	72: cd bf out 0x3d, r28 ; 61

	00000074 <__do_clear_bss>:
	74: 21 e0 ldi r18, 0x01 ; 1
	76: a0 e0 ldi r26, 0x00 ; 0
	78: b1 e0 ldi r27, 0x01 ; 1
	7a: 01 c0 rjmp .+2 ; 0x7e <.do_clear_bss_start>

	0000007c <.do_clear_bss_loop>:
	7c: 1d 92 st X+, r1

	0000007e <.do_clear_bss_start>:
	7e: ad 30 cpi r26, 0x0D ; 13
	80: b2 07 cpc r27, r18
	82: e1 f7 brne .-8 ; 0x7c <.do_clear_bss_loop>
	84: 0e 94 92 00 call 0x124 ; 0x124 <main>
	88: 0c 94 b8 01 jmp 0x370 ; 0x370 <_exit>

	0000008c <__bad_interrupt>:
	8c: 0c 94 00 00 jmp 0 ; 0x0 <__vectors>

	00000090 <__vector_16>:
	90: 1f 92 push r1
	92: 0f 92 push r0
	94: 0f b6 in r0, 0x3f ; 63
	96: 0f 92 push r0
	98: 11 24 eor r1, r1
	9a: 2f 93 push r18
	9c: 3f 93 push r19
	9e: 8f 93 push r24
	a0: 9f 93 push r25
	a2: af 93 push r26
	a4: bf 93 push r27
	a6: 80 91 09 01 lds r24, 0x0109 ; 0x800109 <timer0_millis>
	aa: 90 91 0a 01 lds r25, 0x010A ; 0x80010a <timer0_millis+0x1>
	ae: a0 91 0b 01 lds r26, 0x010B ; 0x80010b <timer0_millis+0x2>
	b2: b0 91 0c 01 lds r27, 0x010C ; 0x80010c <timer0_millis+0x3>
	b6: 30 91 08 01 lds r19, 0x0108 ; 0x800108 <timer0_fract>
	ba: 23 e0 ldi r18, 0x03 ; 3
	bc: 23 0f add r18, r19
	be: 2d 37 cpi r18, 0x7D ; 125
	c0: 58 f5 brcc .+86 ; 0x118 <__vector_16+0x88>
	c2: 01 96 adiw r24, 0x01 ; 1
	c4: a1 1d adc r26, r1
	c6: b1 1d adc r27, r1
	c8: 20 93 08 01 sts 0x0108, r18 ; 0x800108 <timer0_fract>
	cc: 80 93 09 01 sts 0x0109, r24 ; 0x800109 <timer0_millis>
	d0: 90 93 0a 01 sts 0x010A, r25 ; 0x80010a <timer0_millis+0x1>
	d4: a0 93 0b 01 sts 0x010B, r26 ; 0x80010b <timer0_millis+0x2>
	d8: b0 93 0c 01 sts 0x010C, r27 ; 0x80010c <timer0_millis+0x3>
	dc: 80 91 04 01 lds r24, 0x0104 ; 0x800104 <timer0_overflow_count>
	e0: 90 91 05 01 lds r25, 0x0105 ; 0x800105 <timer0_overflow_count+0x1>
	e4: a0 91 06 01 lds r26, 0x0106 ; 0x800106 <timer0_overflow_count+0x2>
	e8: b0 91 07 01 lds r27, 0x0107 ; 0x800107 <timer0_overflow_count+0x3>
	ec: 01 96 adiw r24, 0x01 ; 1
	ee: a1 1d adc r26, r1
	f0: b1 1d adc r27, r1
	f2: 80 93 04 01 sts 0x0104, r24 ; 0x800104 <timer0_overflow_count>
	f6: 90 93 05 01 sts 0x0105, r25 ; 0x800105 <timer0_overflow_count+0x1>
	fa: a0 93 06 01 sts 0x0106, r26 ; 0x800106 <timer0_overflow_count+0x2>
	fe: b0 93 07 01 sts 0x0107, r27 ; 0x800107 <timer0_overflow_count+0x3>
	102: bf 91 pop r27
	104: af 91 pop r26
	106: 9f 91 pop r25
	108: 8f 91 pop r24
	10a: 3f 91 pop r19
	10c: 2f 91 pop r18
	10e: 0f 90 pop r0
	110: 0f be out 0x3f, r0 ; 63
	112: 0f 90 pop r0
	114: 1f 90 pop r1
	116: 18 95 reti
	118: 26 e8 ldi r18, 0x86 ; 134
	11a: 23 0f add r18, r19
	11c: 02 96 adiw r24, 0x02 ; 2
	11e: a1 1d adc r26, r1
	120: b1 1d adc r27, r1
	122: d2 cf rjmp .-92 ; 0xc8 <__vector_16+0x38>

	00000124 <main>:
	124: 78 94 sei
	126: 84 b5 in r24, 0x24 ; 36
	128: 82 60 ori r24, 0x02 ; 2
	12a: 84 bd out 0x24, r24 ; 36
	12c: 84 b5 in r24, 0x24 ; 36
	12e: 81 60 ori r24, 0x01 ; 1
	130: 84 bd out 0x24, r24 ; 36
	132: 85 b5 in r24, 0x25 ; 37
	134: 82 60 ori r24, 0x02 ; 2
	136: 85 bd out 0x25, r24 ; 37
	138: 85 b5 in r24, 0x25 ; 37
	13a: 81 60 ori r24, 0x01 ; 1
	13c: 85 bd out 0x25, r24 ; 37
	13e: 80 91 6e 00 lds r24, 0x006E ; 0x80006e <__DATA_REGION_ORIGIN__+0xe>
	142: 81 60 ori r24, 0x01 ; 1
	144: 80 93 6e 00 sts 0x006E, r24 ; 0x80006e <__DATA_REGION_ORIGIN__+0xe>
	148: 10 92 81 00 sts 0x0081, r1 ; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
	14c: 80 91 81 00 lds r24, 0x0081 ; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
	150: 82 60 ori r24, 0x02 ; 2
	152: 80 93 81 00 sts 0x0081, r24 ; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
	156: 80 91 81 00 lds r24, 0x0081 ; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
	15a: 81 60 ori r24, 0x01 ; 1
	15c: 80 93 81 00 sts 0x0081, r24 ; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
	160: 80 91 80 00 lds r24, 0x0080 ; 0x800080 <__DATA_REGION_ORIGIN__+0x20>
	164: 81 60 ori r24, 0x01 ; 1
	166: 80 93 80 00 sts 0x0080, r24 ; 0x800080 <__DATA_REGION_ORIGIN__+0x20>
	16a: 80 91 b1 00 lds r24, 0x00B1 ; 0x8000b1 <__DATA_REGION_ORIGIN__+0x51>
	16e: 84 60 ori r24, 0x04 ; 4
	170: 80 93 b1 00 sts 0x00B1, r24 ; 0x8000b1 <__DATA_REGION_ORIGIN__+0x51>
	174: 80 91 b0 00 lds r24, 0x00B0 ; 0x8000b0 <__DATA_REGION_ORIGIN__+0x50>
	178: 81 60 ori r24, 0x01 ; 1
	17a: 80 93 b0 00 sts 0x00B0, r24 ; 0x8000b0 <__DATA_REGION_ORIGIN__+0x50>
	17e: 80 91 7a 00 lds r24, 0x007A ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
	182: 84 60 ori r24, 0x04 ; 4
	184: 80 93 7a 00 sts 0x007A, r24 ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
	188: 80 91 7a 00 lds r24, 0x007A ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
	18c: 82 60 ori r24, 0x02 ; 2
	18e: 80 93 7a 00 sts 0x007A, r24 ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
	192: 80 91 7a 00 lds r24, 0x007A ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
	196: 81 60 ori r24, 0x01 ; 1
	198: 80 93 7a 00 sts 0x007A, r24 ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
	19c: 80 91 7a 00 lds r24, 0x007A ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
	1a0: 80 68 ori r24, 0x80 ; 128
	1a2: 80 93 7a 00 sts 0x007A, r24 ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
	1a6: 10 92 c1 00 sts 0x00C1, r1 ; 0x8000c1 <__DATA_REGION_ORIGIN__+0x61>
	1aa: 10 92 00 01 sts 0x0100, r1 ; 0x800100 <x>
	1ae: 10 92 01 01 sts 0x0101, r1 ; 0x800101 <x+0x1>
	1b2: 10 92 02 01 sts 0x0102, r1 ; 0x800102 <x+0x2>
	1b6: 10 92 03 01 sts 0x0103, r1 ; 0x800103 <x+0x3>
	1ba: c0 e0 ldi r28, 0x00 ; 0
	1bc: d0 e0 ldi r29, 0x00 ; 0
	1be: 60 91 00 01 lds r22, 0x0100 ; 0x800100 <x>
	1c2: 70 91 01 01 lds r23, 0x0101 ; 0x800101 <x+0x1>
	1c6: 80 91 02 01 lds r24, 0x0102 ; 0x800102 <x+0x2>
	1ca: 90 91 03 01 lds r25, 0x0103 ; 0x800103 <x+0x3>
	1ce: 2d ec ldi r18, 0xCD ; 205
	1d0: 3c ec ldi r19, 0xCC ; 204
	1d2: 4c e1 ldi r20, 0x1C ; 28
	1d4: 50 e4 ldi r21, 0x40 ; 64
	1d6: 0e 94 fa 00 call 0x1f4 ; 0x1f4 <__mulsf3>
	1da: 20 97 sbiw r28, 0x00 ; 0
	1dc: c1 f3 breq .-16 ; 0x1ce <main+0xaa>
	1de: 60 93 00 01 sts 0x0100, r22 ; 0x800100 <x>
	1e2: 70 93 01 01 sts 0x0101, r23 ; 0x800101 <x+0x1>
	1e6: 80 93 02 01 sts 0x0102, r24 ; 0x800102 <x+0x2>
	1ea: 90 93 03 01 sts 0x0103, r25 ; 0x800103 <x+0x3>
	1ee: 0e 94 00 00 call 0 ; 0x0 <__vectors>
	1f2: e5 cf rjmp .-54 ; 0x1be <main+0x9a>

	000001f4 <__mulsf3>:
	1f4: 0e 94 0d 01 call 0x21a ; 0x21a <__mulsf3x>
	1f8: 0c 94 7e 01 jmp 0x2fc ; 0x2fc <__fp_round>
	1fc: 0e 94 70 01 call 0x2e0 ; 0x2e0 <__fp_pscA>
	200: 38 f0 brcs .+14 ; 0x210 <__mulsf3+0x1c>
	202: 0e 94 77 01 call 0x2ee ; 0x2ee <__fp_pscB>
	206: 20 f0 brcs .+8 ; 0x210 <__mulsf3+0x1c>
	208: 95 23 and r25, r21
	20a: 11 f0 breq .+4 ; 0x210 <__mulsf3+0x1c>
	20c: 0c 94 67 01 jmp 0x2ce ; 0x2ce <__fp_inf>
	210: 0c 94 6d 01 jmp 0x2da ; 0x2da <__fp_nan>
	214: 11 24 eor r1, r1
	216: 0c 94 b2 01 jmp 0x364 ; 0x364 <__fp_szero>

	0000021a <__mulsf3x>:
	21a: 0e 94 8f 01 call 0x31e ; 0x31e <__fp_split3>
	21e: 70 f3 brcs .-36 ; 0x1fc <__mulsf3+0x8>

	00000220 <__mulsf3_pse>:
	220: 95 9f mul r25, r21
	222: c1 f3 breq .-16 ; 0x214 <__mulsf3+0x20>
	224: 95 0f add r25, r21
	226: 50 e0 ldi r21, 0x00 ; 0
	228: 55 1f adc r21, r21
	22a: 62 9f mul r22, r18
	22c: f0 01 movw r30, r0
	22e: 72 9f mul r23, r18
	230: bb 27 eor r27, r27
	232: f0 0d add r31, r0
	234: b1 1d adc r27, r1
	236: 63 9f mul r22, r19
	238: aa 27 eor r26, r26
	23a: f0 0d add r31, r0
	23c: b1 1d adc r27, r1
	23e: aa 1f adc r26, r26
	240: 64 9f mul r22, r20
	242: 66 27 eor r22, r22
	244: b0 0d add r27, r0
	246: a1 1d adc r26, r1
	248: 66 1f adc r22, r22
	24a: 82 9f mul r24, r18
	24c: 22 27 eor r18, r18
	24e: b0 0d add r27, r0
	250: a1 1d adc r26, r1
	252: 62 1f adc r22, r18
	254: 73 9f mul r23, r19
	256: b0 0d add r27, r0
	258: a1 1d adc r26, r1
	25a: 62 1f adc r22, r18
	25c: 83 9f mul r24, r19
	25e: a0 0d add r26, r0
	260: 61 1d adc r22, r1
	262: 22 1f adc r18, r18
	264: 74 9f mul r23, r20
	266: 33 27 eor r19, r19
	268: a0 0d add r26, r0
	26a: 61 1d adc r22, r1
	26c: 23 1f adc r18, r19
	26e: 84 9f mul r24, r20
	270: 60 0d add r22, r0
	272: 21 1d adc r18, r1
	274: 82 2f mov r24, r18
	276: 76 2f mov r23, r22
	278: 6a 2f mov r22, r26
	27a: 11 24 eor r1, r1
	27c: 9f 57 subi r25, 0x7F ; 127
	27e: 50 40 sbci r21, 0x00 ; 0
	280: 9a f0 brmi .+38 ; 0x2a8 <__mulsf3_pse+0x88>
	282: f1 f0 breq .+60 ; 0x2c0 <__mulsf3_pse+0xa0>
	284: 88 23 and r24, r24
	286: 4a f0 brmi .+18 ; 0x29a <__mulsf3_pse+0x7a>
	288: ee 0f add r30, r30
	28a: ff 1f adc r31, r31
	28c: bb 1f adc r27, r27
	28e: 66 1f adc r22, r22
	290: 77 1f adc r23, r23
	292: 88 1f adc r24, r24
	294: 91 50 subi r25, 0x01 ; 1
	296: 50 40 sbci r21, 0x00 ; 0
	298: a9 f7 brne .-22 ; 0x284 <__mulsf3_pse+0x64>
	29a: 9e 3f cpi r25, 0xFE ; 254
	29c: 51 05 cpc r21, r1
	29e: 80 f0 brcs .+32 ; 0x2c0 <__mulsf3_pse+0xa0>
	2a0: 0c 94 67 01 jmp 0x2ce ; 0x2ce <__fp_inf>
	2a4: 0c 94 b2 01 jmp 0x364 ; 0x364 <__fp_szero>
	2a8: 5f 3f cpi r21, 0xFF ; 255
	2aa: e4 f3 brlt .-8 ; 0x2a4 <__mulsf3_pse+0x84>
	2ac: 98 3e cpi r25, 0xE8 ; 232
	2ae: d4 f3 brlt .-12 ; 0x2a4 <__mulsf3_pse+0x84>
	2b0: 86 95 lsr r24
	2b2: 77 95 ror r23
	2b4: 67 95 ror r22
	2b6: b7 95 ror r27
	2b8: f7 95 ror r31
	2ba: e7 95 ror r30
	2bc: 9f 5f subi r25, 0xFF ; 255
	2be: c1 f7 brne .-16 ; 0x2b0 <__mulsf3_pse+0x90>
	2c0: fe 2b or r31, r30
	2c2: 88 0f add r24, r24
	2c4: 91 1d adc r25, r1
	2c6: 96 95 lsr r25
	2c8: 87 95 ror r24
	2ca: 97 f9 bld r25, 7
	2cc: 08 95 ret

	000002ce <__fp_inf>:
	2ce: 97 f9 bld r25, 7
	2d0: 9f 67 ori r25, 0x7F ; 127
	2d2: 80 e8 ldi r24, 0x80 ; 128
	2d4: 70 e0 ldi r23, 0x00 ; 0
	2d6: 60 e0 ldi r22, 0x00 ; 0
	2d8: 08 95 ret

	000002da <__fp_nan>:
	2da: 9f ef ldi r25, 0xFF ; 255
	2dc: 80 ec ldi r24, 0xC0 ; 192
	2de: 08 95 ret

	000002e0 <__fp_pscA>:
	2e0: 00 24 eor r0, r0
	2e2: 0a 94 dec r0
	2e4: 16 16 cp r1, r22
	2e6: 17 06 cpc r1, r23
	2e8: 18 06 cpc r1, r24
	2ea: 09 06 cpc r0, r25
	2ec: 08 95 ret

	000002ee <__fp_pscB>:
	2ee: 00 24 eor r0, r0
	2f0: 0a 94 dec r0
	2f2: 12 16 cp r1, r18
	2f4: 13 06 cpc r1, r19
	2f6: 14 06 cpc r1, r20
	2f8: 05 06 cpc r0, r21
	2fa: 08 95 ret

	000002fc <__fp_round>:
	2fc: 09 2e mov r0, r25
	2fe: 03 94 inc r0
	300: 00 0c add r0, r0
	302: 11 f4 brne .+4 ; 0x308 <__fp_round+0xc>
	304: 88 23 and r24, r24
	306: 52 f0 brmi .+20 ; 0x31c <__fp_round+0x20>
	308: bb 0f add r27, r27
	30a: 40 f4 brcc .+16 ; 0x31c <__fp_round+0x20>
	30c: bf 2b or r27, r31
	30e: 11 f4 brne .+4 ; 0x314 <__fp_round+0x18>
	310: 60 ff sbrs r22, 0
	312: 04 c0 rjmp .+8 ; 0x31c <__fp_round+0x20>
	314: 6f 5f subi r22, 0xFF ; 255
	316: 7f 4f sbci r23, 0xFF ; 255
	318: 8f 4f sbci r24, 0xFF ; 255
	31a: 9f 4f sbci r25, 0xFF ; 255
	31c: 08 95 ret

	0000031e <__fp_split3>:
	31e: 57 fd sbrc r21, 7
	320: 90 58 subi r25, 0x80 ; 128
	322: 44 0f add r20, r20
	324: 55 1f adc r21, r21
	326: 59 f0 breq .+22 ; 0x33e <__fp_splitA+0x10>
	328: 5f 3f cpi r21, 0xFF ; 255
	32a: 71 f0 breq .+28 ; 0x348 <__fp_splitA+0x1a>
	32c: 47 95 ror r20

	0000032e <__fp_splitA>:
	32e: 88 0f add r24, r24
	330: 97 fb bst r25, 7
	332: 99 1f adc r25, r25
	334: 61 f0 breq .+24 ; 0x34e <__fp_splitA+0x20>
	336: 9f 3f cpi r25, 0xFF ; 255
	338: 79 f0 breq .+30 ; 0x358 <__fp_splitA+0x2a>
	33a: 87 95 ror r24
	33c: 08 95 ret
	33e: 12 16 cp r1, r18
	340: 13 06 cpc r1, r19
	342: 14 06 cpc r1, r20
	344: 55 1f adc r21, r21
	346: f2 cf rjmp .-28 ; 0x32c <__fp_split3+0xe>
	348: 46 95 lsr r20
	34a: f1 df rcall .-30 ; 0x32e <__fp_splitA>
	34c: 08 c0 rjmp .+16 ; 0x35e <__fp_splitA+0x30>
	34e: 16 16 cp r1, r22
	350: 17 06 cpc r1, r23
	352: 18 06 cpc r1, r24
	354: 99 1f adc r25, r25
	356: f1 cf rjmp .-30 ; 0x33a <__fp_splitA+0xc>
	358: 86 95 lsr r24
	35a: 71 05 cpc r23, r1
	35c: 61 05 cpc r22, r1
	35e: 08 94 sec
	360: 08 95 ret

	00000362 <__fp_zero>:
	362: e8 94 clt

	00000364 <__fp_szero>:
	364: bb 27 eor r27, r27
	366: 66 27 eor r22, r22
	368: 77 27 eor r23, r23
	36a: cb 01 movw r24, r22
	36c: 97 f9 bld r25, 7
	36e: 08 95 ret

	00000370 <_exit>:
	370: f8 94 cli

	00000372 <__stop_program>:
	372: ff cf rjmp .-2 ; 0x372 <__stop_program>