Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@Klafyvel
Last active May 29, 2022 16:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Klafyvel/c8c2474398621c4ad76370f8b118d78e to your computer and use it in GitHub Desktop.
Save Klafyvel/c8c2474398621c4ad76370f8b118d78e to your computer and use it in GitHub Desktop.
/*
Timing Float and Long operations. The technique used is described
here : https://forum.arduino.cc/t/timing-the-little-things/47247
For multiplication this should print :
long loops: 64 clocks: 92
float loops: 52 clocks: 115
For addition :
long loops: 213 clocks: 22
float loops: 46 clocks: 131
*/
long al,bl;
float af,bf;
// #define OPERATOR *
#define OPERATOR +
void setup()
{
Serial.begin(9600);
al = 50;
bl = 34;
af = 5.5;
bf = 3.4;
}
void loop()
{
unsigned long loops = 0;
// TCNT0 is the timer used to compute milliseconds and drive PWM0.
// It is an 8 bit value that increments every 64 clock cycles and
// rolls over from 255 to 0.
//
// We repeatedly run the test code as the timer goes from 156 through 255
// which gives use 64*100 clock cycles.
//
// In practice this works for timing operations that take from 1 to
// hundreds of clock cycles. The results get a little chunky after that
// since the last one will have gone a fair bit past the end period.
//
while( TCNT0 != 155); // wait for 155 to start
while( TCNT0 == 155); // wait until 155 ends
cli(); // turn off interrupts
while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
// vvvvvv---- your code to be timed
al = al OPERATOR bl;
// ^^^^^^---- your code to be timed
loops++;
}
sei(); // turn interrupts back on
Serial.print("long ");
Serial.print("loops: ");
Serial.print(loops,DEC);
Serial.print(" clocks: ");
Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
Serial.println();
loops = 0;
while( TCNT0 != 155); // wait for 155 to start
while( TCNT0 == 155); // wait until 155 ends
cli(); // turn off interrupts
while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
// vvvvvv---- your code to be timed
af = af OPERATOR bf;
// ^^^^^^---- your code to be timed
loops++;
}
sei(); // turn interrupts back on
Serial.print("float ");
Serial.print("loops: ");
Serial.print(loops,DEC);
Serial.print(" clocks: ");
Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
Serial.println();
delay(500);
}
/* Comparing custom and native float multiplications.
This should print :
af = 5.50 bf = 3.40 cf = 18.65native cf =18.70
native float loops: 41 clocks: 148
custom float loops: 58 clocks: 102
*/
/* A not-so-crapy™ approximation of float multiplication. */
float floatmul(float a, float b) {
float result = 0;
asm (
/* First step : manage the sign of the product, and store it in flag T.*/
"mov __tmp_reg__,%D[a]" "\n\t"
"eor __tmp_reg__,%D[b]" "\n\t"
"bst __tmp_reg__,7" "\n\t"
/* Second step : prepare the mantissa under the 1.7 form, and isolate the exponents. */
/* We copy the high byte of a's mantissa in register B of the result,
and put it in the 1.7 form.
*/
"mov %B[result],%C[a]" "\n\t"
"ori %B[result],0x80" "\n\t"
/* Copy a's exponent to register D of the result. */
"mov %D[result],%D[a]" "\n\t"
"mov __tmp_reg__,%C[a]" "\n\t"
"lsl __tmp_reg__" "\n\t"
"rol %D[result]" "\n\t"
/* Now is the right time to remove the bias, to avoid overflow. */
"subi %D[result],0x7f" "\n\t"
/* Same thing as before for b's mantissa. */
"mov %A[result],%C[b]" "\n\t"
"ori %A[result],0x80" "\n\t"
/* Add b's exponent to D register of the result. */
"mov __zero_reg__,%D[b]" "\n\t"
"mov __tmp_reg__,%C[b]" "\n\t"
"lsl __tmp_reg__" "\n\t"
"rol __zero_reg__" "\n\t"
"add %D[result],__zero_reg__" "\n\t"
/* Third step : multiply the mantissas. */
"fmul %A[result], %B[result]" "\n\t"
/* save the result in registers A and B of the result. */
"movw %A[result], __tmp_reg__" "\n\t"
/* Fourth step : overcome possible normalization issues.
We only need to perform this normalization once.
*/
"brcs carry_set_%=" "\n\t"
"lsl %A[result]" "\n\t"
"rol %B[result]" "\n\t"
"dec %D[result]" "\n\t"
/* Fifth step: now, we should have the right exponent in register D and the normalized
mantissa in registers A and B, and the sign bit in flag T. Time to rebuild everything.
*/
"carry_set_%= : inc %D[result]" "\n\t"
/* First, copy the mantissa from registers A and B to registers B and C.
Note : we don't clean register A afterwards, this means we will have some remains
of the computation, but we chose to live with that risk.
We could use the following instruction to avoid that : clr %A[result] .
*/
"mov %C[result],%B[result]" "\n\t"
"mov %B[result],%A[result]" "\n\t"
"clr %A[result]" "\n\t"
/* Then we right-shift everything to make room for the sign bit. */
"lsr %D[result]" "\n\t"
"ror %C[result]" "\n\t"
"ror %B[result]" "\n\t"
"ror %A[result]" "\n\t"
/* And we copy it. */
"bld %D[result],7" "\n\t"
/* clear __zero_reg__ */
"clr __zero_reg__" "\n\t"
:
[result]"+a"(result):
[a]"r"(a),[b]"r"(b)
);
return result;
}
float af,bf,cf;
void setup() {
Serial.begin(9600);
af = 5.5;
bf = 3.4;
cf = floatmul(af, bf);
Serial.print("af = ");
Serial.print(af);
Serial.print(" bf = ");
Serial.print(bf);
Serial.print(" cf = ");
Serial.print(cf);
Serial.print("native cf =");
Serial.println(af*bf);
}
void loop() {
unsigned long loops = 0;
// TCNT0 is the timer used to compute milliseconds and drive PWM0.
// It is an 8 bit value that increments every 64 clock cycles and
// rolls over from 255 to 0.
//
// We repeatedly run the test code as the timer goes from 156 through 255
// which gives use 64*100 clock cycles.
//
// In practice this works for timing operations that take from 1 to
// hundreds of clock cycles. The results get a little chunky after that
// since the last one will have gone a fair bit past the end period.
//
while( TCNT0 != 155); // wait for 155 to start
while( TCNT0 == 155); // wait until 155 ends
cli(); // turn off interrupts
while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
// vvvvvv---- your code to be timed
af = af * bf;
// ^^^^^^---- your code to be timed
loops++;
}
sei(); // turn interrupts back on
Serial.print("native float ");
Serial.print("loops: ");
Serial.print(loops,DEC);
Serial.print(" clocks: ");
Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
Serial.println();
loops = 0;
while( TCNT0 != 155); // wait for 155 to start
while( TCNT0 == 155); // wait until 155 ends
cli(); // turn off interrupts
while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
// vvvvvv---- your code to be timed
af = floatmul(af, bf);
// ^^^^^^---- your code to be timed
loops++;
}
sei(); // turn interrupts back on
Serial.print("custom float ");
Serial.print("loops: ");
Serial.print(loops,DEC);
Serial.print(" clocks: ");
Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
Serial.println();
delay(500);
}
/* Comparing custom and native float multiplications.
This should print for bf=0.2 :
af = 55.50 845E0000 bf = 0.20 7C4CCCCD cf = 11.10 8231999A a = 55.50 845E0000 b = 0.20 7C4CCCCD c = 11.06 8230E800
native float loops: 41 clocks: 148
custom float loops: 65 clocks: 90
native float loops: 48 clocks: 125
custom float loops: 65 clocks: 90
native float loops: 66 clocks: 88
and for bf=3.4 :
af = 55.50 845E0000 bf = 3.40 8059999A cf = 188.70 863CB334 a = 55.50 845E0000 b = 3.40 8059999A c = 188.18 863C2E00
native float loops: 41 clocks: 148
custom float loops: 65 clocks: 90
native float loops: 44 clocks: 137
custom float loops: 65 clocks: 90
native float loops: 52 clocks: 115
custom float loops: 65 clocks: 90
native float loops: 52 clocks: 115
custom float loops: 65 clocks: 90
*/
uint32_t floatToCustom(float x) {
uint32_t* p = (uint32_t*)(& x);
uint32_t res = 0;
res = (((*p)&0x7f800000)<<1) | (((*p)&0x80000000)>>8) | ((*p)&0x7fffff);
return res;
}
float customToFloat(uint32_t x) {
float res;
uint32_t* p = (uint32_t*)(& res);
*p = ((x&0xff000000)>>1) | ((x&0x08000000)<<8) | (x&0x7fffff);
return res;
}
/* A not-so-crapy™ approximation of float multiplication. */
uint32_t floatmul(uint32_t a, uint32_t b) {
uint32_t result;
asm (
/* First step : manage the sign of the product, and store it in flag T.*/
"mov __tmp_reg__,%C[a]" "\n\t"
"eor __tmp_reg__,%C[b]" "\n\t"
"bst __tmp_reg__,7" "\n\t"
/* Second step : prepare the mantissa under the 1.7 form, and isolate the exponents. */
/* We copy the high byte of a's mantissa in register B of the result,
and put it in the 1.7 form.
*/
"mov %B[result],%C[a]" "\n\t"
"ori %B[result],0x80" "\n\t"
/* Copy a's exponent to register D of the result. */
"mov %D[result],%D[a]" "\n\t"
/* Now is the right time to remove the bias, to avoid overflow. */
"subi %D[result],0x7f" "\n\t"
/* Same thing as before for b's mantissa. */
"mov %A[result],%C[b]" "\n\t"
"ori %A[result],0x80" "\n\t"
/* Add b's exponent to D register of the result. */
"add %D[result],%D[b]" "\n\t"
/* Third step : multiply the mantissas. */
"fmul %A[result], %B[result]" "\n\t"
/* save the result in registers B and C of the result. */
"mov %B[result], __tmp_reg__" "\n\t"
"mov %C[result], __zero_reg__" "\n\t"
/* Fourth step : overcome possible normalization issues.
We only need to perform this normalization once.
Warning : the logic here is a bit different as in the other implementation.
We perform a logical-shift right of the mantissa only when the carry is set.
*/
"brcc carry_clear_%=" "\n\t"
"lsr %C[result]" "\n\t"
"ror %B[result]" "\n\t"
"inc %D[result]" "\n\t"
/* Fifth step: now, we should have the right exponent in register D and the normalized
mantissa in registers A and B, and the sign bit in flag T. Time to rebuild everything.
*/
"carry_clear_%= :" "\n\t"
/* clear register A after using it */
"clr %A[result]" "\n\t"
"clr __zero_reg__" "\n\t"
:
[result]"+a"(result):
[a]"r"(a),[b]"r"(b)
);
return result;
}
float af,bf,cf;
uint32_t a,b,c;
void setup() {
Serial.begin(9600);
af = 55.5;
bf = 3.4;//0.2;//
a = floatToCustom(af);
b = floatToCustom(bf);
c = floatmul(a, b);
cf = af*bf;
Serial.print("af = ");
Serial.print(af);
Serial.print(" ");
Serial.print(floatToCustom(af), HEX);
Serial.print(" bf = ");
Serial.print(bf);
Serial.print(" ");
Serial.print(floatToCustom(bf), HEX);
Serial.print(" cf = ");
Serial.print(cf);
Serial.print(" ");
Serial.print(floatToCustom(cf), HEX);
Serial.print(" a = ");
Serial.print(customToFloat(a));
Serial.print(" ");
Serial.print(a, HEX);
Serial.print(" b = ");
Serial.print(customToFloat(b));
Serial.print(" ");
Serial.print(b, HEX);
Serial.print(" c = ");
Serial.print(customToFloat(c));
Serial.print(" ");
Serial.println(c, HEX);
}
void loop() {
unsigned long loops = 0;
// TCNT0 is the timer used to compute milliseconds and drive PWM0.
// It is an 8 bit value that increments every 64 clock cycles and
// rolls over from 255 to 0.
//
// We repeatedly run the test code as the timer goes from 156 through 255
// which gives use 64*100 clock cycles.
//
// In practice this works for timing operations that take from 1 to
// hundreds of clock cycles. The results get a little chunky after that
// since the last one will have gone a fair bit past the end period.
//
while( TCNT0 != 155); // wait for 155 to start
while( TCNT0 == 155); // wait until 155 ends
cli(); // turn off interrupts
while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
// vvvvvv---- your code to be timed
af = af * bf;
// ^^^^^^---- your code to be timed
loops++;
}
sei(); // turn interrupts back on
Serial.print("native float ");
Serial.print("loops: ");
Serial.print(loops,DEC);
Serial.print(" clocks: ");
Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
Serial.println();
delay(100);
loops = 0;
while( TCNT0 != 155); // wait for 155 to start
while( TCNT0 == 155); // wait until 155 ends
cli(); // turn off interrupts
while( TCNT0 > 150 ) { // that 150 acknowledges we may miss 0
// vvvvvv---- your code to be timed
a = floatmul(a, b);
// ^^^^^^---- your code to be timed
loops++;
}
sei(); // turn interrupts back on
Serial.print("custom float ");
Serial.print("loops: ");
Serial.print(loops,DEC);
Serial.print(" clocks: ");
Serial.print( (int) (( 100UL*64UL) / loops) - 8 /* empty loop cost */, DEC);
Serial.println();
delay(500);
}
build/SimpleMultiplication.ino.elf: file format elf32-avr
Disassembly of section .text:
00000000 <__vectors>:
0: 0c 94 34 00 jmp 0x68 ; 0x68 <__ctors_end>
4: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
8: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
10: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
14: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
18: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
1c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
20: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
24: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
28: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
2c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
30: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
34: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
38: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
3c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
40: 0c 94 48 00 jmp 0x90 ; 0x90 <__vector_16>
44: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
48: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
4c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
50: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
54: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
58: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
5c: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
60: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
64: 0c 94 46 00 jmp 0x8c ; 0x8c <__bad_interrupt>
00000068 <__ctors_end>:
68: 11 24 eor r1, r1
6a: 1f be out 0x3f, r1 ; 63
6c: cf ef ldi r28, 0xFF ; 255
6e: d8 e0 ldi r29, 0x08 ; 8
70: de bf out 0x3e, r29 ; 62
72: cd bf out 0x3d, r28 ; 61
00000074 <__do_clear_bss>:
74: 21 e0 ldi r18, 0x01 ; 1
76: a0 e0 ldi r26, 0x00 ; 0
78: b1 e0 ldi r27, 0x01 ; 1
7a: 01 c0 rjmp .+2 ; 0x7e <.do_clear_bss_start>
0000007c <.do_clear_bss_loop>:
7c: 1d 92 st X+, r1
0000007e <.do_clear_bss_start>:
7e: ad 30 cpi r26, 0x0D ; 13
80: b2 07 cpc r27, r18
82: e1 f7 brne .-8 ; 0x7c <.do_clear_bss_loop>
84: 0e 94 92 00 call 0x124 ; 0x124 <main>
88: 0c 94 b8 01 jmp 0x370 ; 0x370 <_exit>
0000008c <__bad_interrupt>:
8c: 0c 94 00 00 jmp 0 ; 0x0 <__vectors>
00000090 <__vector_16>:
90: 1f 92 push r1
92: 0f 92 push r0
94: 0f b6 in r0, 0x3f ; 63
96: 0f 92 push r0
98: 11 24 eor r1, r1
9a: 2f 93 push r18
9c: 3f 93 push r19
9e: 8f 93 push r24
a0: 9f 93 push r25
a2: af 93 push r26
a4: bf 93 push r27
a6: 80 91 09 01 lds r24, 0x0109 ; 0x800109 <timer0_millis>
aa: 90 91 0a 01 lds r25, 0x010A ; 0x80010a <timer0_millis+0x1>
ae: a0 91 0b 01 lds r26, 0x010B ; 0x80010b <timer0_millis+0x2>
b2: b0 91 0c 01 lds r27, 0x010C ; 0x80010c <timer0_millis+0x3>
b6: 30 91 08 01 lds r19, 0x0108 ; 0x800108 <timer0_fract>
ba: 23 e0 ldi r18, 0x03 ; 3
bc: 23 0f add r18, r19
be: 2d 37 cpi r18, 0x7D ; 125
c0: 58 f5 brcc .+86 ; 0x118 <__vector_16+0x88>
c2: 01 96 adiw r24, 0x01 ; 1
c4: a1 1d adc r26, r1
c6: b1 1d adc r27, r1
c8: 20 93 08 01 sts 0x0108, r18 ; 0x800108 <timer0_fract>
cc: 80 93 09 01 sts 0x0109, r24 ; 0x800109 <timer0_millis>
d0: 90 93 0a 01 sts 0x010A, r25 ; 0x80010a <timer0_millis+0x1>
d4: a0 93 0b 01 sts 0x010B, r26 ; 0x80010b <timer0_millis+0x2>
d8: b0 93 0c 01 sts 0x010C, r27 ; 0x80010c <timer0_millis+0x3>
dc: 80 91 04 01 lds r24, 0x0104 ; 0x800104 <timer0_overflow_count>
e0: 90 91 05 01 lds r25, 0x0105 ; 0x800105 <timer0_overflow_count+0x1>
e4: a0 91 06 01 lds r26, 0x0106 ; 0x800106 <timer0_overflow_count+0x2>
e8: b0 91 07 01 lds r27, 0x0107 ; 0x800107 <timer0_overflow_count+0x3>
ec: 01 96 adiw r24, 0x01 ; 1
ee: a1 1d adc r26, r1
f0: b1 1d adc r27, r1
f2: 80 93 04 01 sts 0x0104, r24 ; 0x800104 <timer0_overflow_count>
f6: 90 93 05 01 sts 0x0105, r25 ; 0x800105 <timer0_overflow_count+0x1>
fa: a0 93 06 01 sts 0x0106, r26 ; 0x800106 <timer0_overflow_count+0x2>
fe: b0 93 07 01 sts 0x0107, r27 ; 0x800107 <timer0_overflow_count+0x3>
102: bf 91 pop r27
104: af 91 pop r26
106: 9f 91 pop r25
108: 8f 91 pop r24
10a: 3f 91 pop r19
10c: 2f 91 pop r18
10e: 0f 90 pop r0
110: 0f be out 0x3f, r0 ; 63
112: 0f 90 pop r0
114: 1f 90 pop r1
116: 18 95 reti
118: 26 e8 ldi r18, 0x86 ; 134
11a: 23 0f add r18, r19
11c: 02 96 adiw r24, 0x02 ; 2
11e: a1 1d adc r26, r1
120: b1 1d adc r27, r1
122: d2 cf rjmp .-92 ; 0xc8 <__vector_16+0x38>
00000124 <main>:
124: 78 94 sei
126: 84 b5 in r24, 0x24 ; 36
128: 82 60 ori r24, 0x02 ; 2
12a: 84 bd out 0x24, r24 ; 36
12c: 84 b5 in r24, 0x24 ; 36
12e: 81 60 ori r24, 0x01 ; 1
130: 84 bd out 0x24, r24 ; 36
132: 85 b5 in r24, 0x25 ; 37
134: 82 60 ori r24, 0x02 ; 2
136: 85 bd out 0x25, r24 ; 37
138: 85 b5 in r24, 0x25 ; 37
13a: 81 60 ori r24, 0x01 ; 1
13c: 85 bd out 0x25, r24 ; 37
13e: 80 91 6e 00 lds r24, 0x006E ; 0x80006e <__DATA_REGION_ORIGIN__+0xe>
142: 81 60 ori r24, 0x01 ; 1
144: 80 93 6e 00 sts 0x006E, r24 ; 0x80006e <__DATA_REGION_ORIGIN__+0xe>
148: 10 92 81 00 sts 0x0081, r1 ; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
14c: 80 91 81 00 lds r24, 0x0081 ; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
150: 82 60 ori r24, 0x02 ; 2
152: 80 93 81 00 sts 0x0081, r24 ; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
156: 80 91 81 00 lds r24, 0x0081 ; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
15a: 81 60 ori r24, 0x01 ; 1
15c: 80 93 81 00 sts 0x0081, r24 ; 0x800081 <__DATA_REGION_ORIGIN__+0x21>
160: 80 91 80 00 lds r24, 0x0080 ; 0x800080 <__DATA_REGION_ORIGIN__+0x20>
164: 81 60 ori r24, 0x01 ; 1
166: 80 93 80 00 sts 0x0080, r24 ; 0x800080 <__DATA_REGION_ORIGIN__+0x20>
16a: 80 91 b1 00 lds r24, 0x00B1 ; 0x8000b1 <__DATA_REGION_ORIGIN__+0x51>
16e: 84 60 ori r24, 0x04 ; 4
170: 80 93 b1 00 sts 0x00B1, r24 ; 0x8000b1 <__DATA_REGION_ORIGIN__+0x51>
174: 80 91 b0 00 lds r24, 0x00B0 ; 0x8000b0 <__DATA_REGION_ORIGIN__+0x50>
178: 81 60 ori r24, 0x01 ; 1
17a: 80 93 b0 00 sts 0x00B0, r24 ; 0x8000b0 <__DATA_REGION_ORIGIN__+0x50>
17e: 80 91 7a 00 lds r24, 0x007A ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
182: 84 60 ori r24, 0x04 ; 4
184: 80 93 7a 00 sts 0x007A, r24 ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
188: 80 91 7a 00 lds r24, 0x007A ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
18c: 82 60 ori r24, 0x02 ; 2
18e: 80 93 7a 00 sts 0x007A, r24 ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
192: 80 91 7a 00 lds r24, 0x007A ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
196: 81 60 ori r24, 0x01 ; 1
198: 80 93 7a 00 sts 0x007A, r24 ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
19c: 80 91 7a 00 lds r24, 0x007A ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
1a0: 80 68 ori r24, 0x80 ; 128
1a2: 80 93 7a 00 sts 0x007A, r24 ; 0x80007a <__DATA_REGION_ORIGIN__+0x1a>
1a6: 10 92 c1 00 sts 0x00C1, r1 ; 0x8000c1 <__DATA_REGION_ORIGIN__+0x61>
1aa: 10 92 00 01 sts 0x0100, r1 ; 0x800100 <x>
1ae: 10 92 01 01 sts 0x0101, r1 ; 0x800101 <x+0x1>
1b2: 10 92 02 01 sts 0x0102, r1 ; 0x800102 <x+0x2>
1b6: 10 92 03 01 sts 0x0103, r1 ; 0x800103 <x+0x3>
1ba: c0 e0 ldi r28, 0x00 ; 0
1bc: d0 e0 ldi r29, 0x00 ; 0
1be: 60 91 00 01 lds r22, 0x0100 ; 0x800100 <x>
1c2: 70 91 01 01 lds r23, 0x0101 ; 0x800101 <x+0x1>
1c6: 80 91 02 01 lds r24, 0x0102 ; 0x800102 <x+0x2>
1ca: 90 91 03 01 lds r25, 0x0103 ; 0x800103 <x+0x3>
1ce: 2d ec ldi r18, 0xCD ; 205
1d0: 3c ec ldi r19, 0xCC ; 204
1d2: 4c e1 ldi r20, 0x1C ; 28
1d4: 50 e4 ldi r21, 0x40 ; 64
1d6: 0e 94 fa 00 call 0x1f4 ; 0x1f4 <__mulsf3>
1da: 20 97 sbiw r28, 0x00 ; 0
1dc: c1 f3 breq .-16 ; 0x1ce <main+0xaa>
1de: 60 93 00 01 sts 0x0100, r22 ; 0x800100 <x>
1e2: 70 93 01 01 sts 0x0101, r23 ; 0x800101 <x+0x1>
1e6: 80 93 02 01 sts 0x0102, r24 ; 0x800102 <x+0x2>
1ea: 90 93 03 01 sts 0x0103, r25 ; 0x800103 <x+0x3>
1ee: 0e 94 00 00 call 0 ; 0x0 <__vectors>
1f2: e5 cf rjmp .-54 ; 0x1be <main+0x9a>
000001f4 <__mulsf3>:
1f4: 0e 94 0d 01 call 0x21a ; 0x21a <__mulsf3x>
1f8: 0c 94 7e 01 jmp 0x2fc ; 0x2fc <__fp_round>
1fc: 0e 94 70 01 call 0x2e0 ; 0x2e0 <__fp_pscA>
200: 38 f0 brcs .+14 ; 0x210 <__mulsf3+0x1c>
202: 0e 94 77 01 call 0x2ee ; 0x2ee <__fp_pscB>
206: 20 f0 brcs .+8 ; 0x210 <__mulsf3+0x1c>
208: 95 23 and r25, r21
20a: 11 f0 breq .+4 ; 0x210 <__mulsf3+0x1c>
20c: 0c 94 67 01 jmp 0x2ce ; 0x2ce <__fp_inf>
210: 0c 94 6d 01 jmp 0x2da ; 0x2da <__fp_nan>
214: 11 24 eor r1, r1
216: 0c 94 b2 01 jmp 0x364 ; 0x364 <__fp_szero>
0000021a <__mulsf3x>:
21a: 0e 94 8f 01 call 0x31e ; 0x31e <__fp_split3>
21e: 70 f3 brcs .-36 ; 0x1fc <__mulsf3+0x8>
00000220 <__mulsf3_pse>:
220: 95 9f mul r25, r21
222: c1 f3 breq .-16 ; 0x214 <__mulsf3+0x20>
224: 95 0f add r25, r21
226: 50 e0 ldi r21, 0x00 ; 0
228: 55 1f adc r21, r21
22a: 62 9f mul r22, r18
22c: f0 01 movw r30, r0
22e: 72 9f mul r23, r18
230: bb 27 eor r27, r27
232: f0 0d add r31, r0
234: b1 1d adc r27, r1
236: 63 9f mul r22, r19
238: aa 27 eor r26, r26
23a: f0 0d add r31, r0
23c: b1 1d adc r27, r1
23e: aa 1f adc r26, r26
240: 64 9f mul r22, r20
242: 66 27 eor r22, r22
244: b0 0d add r27, r0
246: a1 1d adc r26, r1
248: 66 1f adc r22, r22
24a: 82 9f mul r24, r18
24c: 22 27 eor r18, r18
24e: b0 0d add r27, r0
250: a1 1d adc r26, r1
252: 62 1f adc r22, r18
254: 73 9f mul r23, r19
256: b0 0d add r27, r0
258: a1 1d adc r26, r1
25a: 62 1f adc r22, r18
25c: 83 9f mul r24, r19
25e: a0 0d add r26, r0
260: 61 1d adc r22, r1
262: 22 1f adc r18, r18
264: 74 9f mul r23, r20
266: 33 27 eor r19, r19
268: a0 0d add r26, r0
26a: 61 1d adc r22, r1
26c: 23 1f adc r18, r19
26e: 84 9f mul r24, r20
270: 60 0d add r22, r0
272: 21 1d adc r18, r1
274: 82 2f mov r24, r18
276: 76 2f mov r23, r22
278: 6a 2f mov r22, r26
27a: 11 24 eor r1, r1
27c: 9f 57 subi r25, 0x7F ; 127
27e: 50 40 sbci r21, 0x00 ; 0
280: 9a f0 brmi .+38 ; 0x2a8 <__mulsf3_pse+0x88>
282: f1 f0 breq .+60 ; 0x2c0 <__mulsf3_pse+0xa0>
284: 88 23 and r24, r24
286: 4a f0 brmi .+18 ; 0x29a <__mulsf3_pse+0x7a>
288: ee 0f add r30, r30
28a: ff 1f adc r31, r31
28c: bb 1f adc r27, r27
28e: 66 1f adc r22, r22
290: 77 1f adc r23, r23
292: 88 1f adc r24, r24
294: 91 50 subi r25, 0x01 ; 1
296: 50 40 sbci r21, 0x00 ; 0
298: a9 f7 brne .-22 ; 0x284 <__mulsf3_pse+0x64>
29a: 9e 3f cpi r25, 0xFE ; 254
29c: 51 05 cpc r21, r1
29e: 80 f0 brcs .+32 ; 0x2c0 <__mulsf3_pse+0xa0>
2a0: 0c 94 67 01 jmp 0x2ce ; 0x2ce <__fp_inf>
2a4: 0c 94 b2 01 jmp 0x364 ; 0x364 <__fp_szero>
2a8: 5f 3f cpi r21, 0xFF ; 255
2aa: e4 f3 brlt .-8 ; 0x2a4 <__mulsf3_pse+0x84>
2ac: 98 3e cpi r25, 0xE8 ; 232
2ae: d4 f3 brlt .-12 ; 0x2a4 <__mulsf3_pse+0x84>
2b0: 86 95 lsr r24
2b2: 77 95 ror r23
2b4: 67 95 ror r22
2b6: b7 95 ror r27
2b8: f7 95 ror r31
2ba: e7 95 ror r30
2bc: 9f 5f subi r25, 0xFF ; 255
2be: c1 f7 brne .-16 ; 0x2b0 <__mulsf3_pse+0x90>
2c0: fe 2b or r31, r30
2c2: 88 0f add r24, r24
2c4: 91 1d adc r25, r1
2c6: 96 95 lsr r25
2c8: 87 95 ror r24
2ca: 97 f9 bld r25, 7
2cc: 08 95 ret
000002ce <__fp_inf>:
2ce: 97 f9 bld r25, 7
2d0: 9f 67 ori r25, 0x7F ; 127
2d2: 80 e8 ldi r24, 0x80 ; 128
2d4: 70 e0 ldi r23, 0x00 ; 0
2d6: 60 e0 ldi r22, 0x00 ; 0
2d8: 08 95 ret
000002da <__fp_nan>:
2da: 9f ef ldi r25, 0xFF ; 255
2dc: 80 ec ldi r24, 0xC0 ; 192
2de: 08 95 ret
000002e0 <__fp_pscA>:
2e0: 00 24 eor r0, r0
2e2: 0a 94 dec r0
2e4: 16 16 cp r1, r22
2e6: 17 06 cpc r1, r23
2e8: 18 06 cpc r1, r24
2ea: 09 06 cpc r0, r25
2ec: 08 95 ret
000002ee <__fp_pscB>:
2ee: 00 24 eor r0, r0
2f0: 0a 94 dec r0
2f2: 12 16 cp r1, r18
2f4: 13 06 cpc r1, r19
2f6: 14 06 cpc r1, r20
2f8: 05 06 cpc r0, r21
2fa: 08 95 ret
000002fc <__fp_round>:
2fc: 09 2e mov r0, r25
2fe: 03 94 inc r0
300: 00 0c add r0, r0
302: 11 f4 brne .+4 ; 0x308 <__fp_round+0xc>
304: 88 23 and r24, r24
306: 52 f0 brmi .+20 ; 0x31c <__fp_round+0x20>
308: bb 0f add r27, r27
30a: 40 f4 brcc .+16 ; 0x31c <__fp_round+0x20>
30c: bf 2b or r27, r31
30e: 11 f4 brne .+4 ; 0x314 <__fp_round+0x18>
310: 60 ff sbrs r22, 0
312: 04 c0 rjmp .+8 ; 0x31c <__fp_round+0x20>
314: 6f 5f subi r22, 0xFF ; 255
316: 7f 4f sbci r23, 0xFF ; 255
318: 8f 4f sbci r24, 0xFF ; 255
31a: 9f 4f sbci r25, 0xFF ; 255
31c: 08 95 ret
0000031e <__fp_split3>:
31e: 57 fd sbrc r21, 7
320: 90 58 subi r25, 0x80 ; 128
322: 44 0f add r20, r20
324: 55 1f adc r21, r21
326: 59 f0 breq .+22 ; 0x33e <__fp_splitA+0x10>
328: 5f 3f cpi r21, 0xFF ; 255
32a: 71 f0 breq .+28 ; 0x348 <__fp_splitA+0x1a>
32c: 47 95 ror r20
0000032e <__fp_splitA>:
32e: 88 0f add r24, r24
330: 97 fb bst r25, 7
332: 99 1f adc r25, r25
334: 61 f0 breq .+24 ; 0x34e <__fp_splitA+0x20>
336: 9f 3f cpi r25, 0xFF ; 255
338: 79 f0 breq .+30 ; 0x358 <__fp_splitA+0x2a>
33a: 87 95 ror r24
33c: 08 95 ret
33e: 12 16 cp r1, r18
340: 13 06 cpc r1, r19
342: 14 06 cpc r1, r20
344: 55 1f adc r21, r21
346: f2 cf rjmp .-28 ; 0x32c <__fp_split3+0xe>
348: 46 95 lsr r20
34a: f1 df rcall .-30 ; 0x32e <__fp_splitA>
34c: 08 c0 rjmp .+16 ; 0x35e <__fp_splitA+0x30>
34e: 16 16 cp r1, r22
350: 17 06 cpc r1, r23
352: 18 06 cpc r1, r24
354: 99 1f adc r25, r25
356: f1 cf rjmp .-30 ; 0x33a <__fp_splitA+0xc>
358: 86 95 lsr r24
35a: 71 05 cpc r23, r1
35c: 61 05 cpc r22, r1
35e: 08 94 sec
360: 08 95 ret
00000362 <__fp_zero>:
362: e8 94 clt
00000364 <__fp_szero>:
364: bb 27 eor r27, r27
366: 66 27 eor r22, r22
368: 77 27 eor r23, r23
36a: cb 01 movw r24, r22
36c: 97 f9 bld r25, 7
36e: 08 95 ret
00000370 <_exit>:
370: f8 94 cli
00000372 <__stop_program>:
372: ff cf rjmp .-2 ; 0x372 <__stop_program>
float x;
void setup() {
x=1.0;
x*=2.45;
}
void loop() {
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment