Created
February 9, 2018 21:52
-
-
Save matthijskooijman/27706e2593da873e6b6707753f91ec28 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This approach is the simplest and does not fix the rounding issue, so | |
// this OK/-1/-2/-3 pattern repeats over and over (all delays are | |
// rounded down to a multiple of 3) | |
delayMicroseconds(0): ERR: 5 cycles == 5.00us | |
delayMicroseconds(1): ERR: 4 cycles == 4.00us | |
delayMicroseconds(2): ERR: 3 cycles == 3.00us | |
delayMicroseconds(3): ERR: 2 cycles == 2.00us | |
delayMicroseconds(4): ERR: 1 cycles == 1.00us | |
delayMicroseconds(5): OK | |
delayMicroseconds(6): ERR: -1 cycles == -1.00us | |
delayMicroseconds(7): ERR: -2 cycles == -2.00us | |
delayMicroseconds(8): ERR: -3 cycles == -3.00us | |
delayMicroseconds(9): ERR: -4 cycles == -4.00us | |
delayMicroseconds(10): ERR: 2 cycles == 2.00us | |
delayMicroseconds(11): ERR: 1 cycles == 1.00us | |
delayMicroseconds(12): OK | |
delayMicroseconds(13): ERR: -1 cycles == -1.00us | |
delayMicroseconds(14): OK | |
delayMicroseconds(15): ERR: -1 cycles == -1.00us | |
delayMicroseconds(16): ERR: -2 cycles == -2.00us | |
delayMicroseconds(17): ERR: -3 cycles == -3.00us | |
// 4 us per loop, 1 cycle per us | |
// loops: (us - 10) / 4 | |
// overhead: 10 = 10us | |
"sbiw %A0, 10" "\n\t" // 2 | |
"brcs L_%=_end" "\n\t" // 1 | |
"lsr %B0" "\n\t" // 1 | |
"ror %A0" "\n\t" // 1 | |
"lsr %B0" "\n\t" // 1 | |
"ror %A0" "\n\t" // 1 | |
"sbiw %A0, 0" "\n\t" // 2 | |
"breq L_%=_end" "\n\t" // 1 | |
// This approach just fixes the rounding errors | |
delayMicroseconds(0): ERR: 5 cycles == 5.00us | |
delayMicroseconds(1): ERR: 4 cycles == 4.00us | |
delayMicroseconds(2): ERR: 3 cycles == 3.00us | |
delayMicroseconds(3): ERR: 2 cycles == 2.00us | |
delayMicroseconds(4): ERR: 1 cycles == 1.00us | |
delayMicroseconds(5): OK | |
delayMicroseconds(6): ERR: -1 cycles == -1.00us | |
delayMicroseconds(7): ERR: -2 cycles == -2.00us | |
delayMicroseconds(8): ERR: -3 cycles == -3.00us | |
delayMicroseconds(9): ERR: -4 cycles == -4.00us | |
delayMicroseconds(10): ERR: -5 cycles == -5.00us | |
delayMicroseconds(11): ERR: -6 cycles == -6.00us | |
delayMicroseconds(12): ERR: -7 cycles == -7.00us | |
delayMicroseconds(13): ERR: 2 cycles == 2.00us | |
delayMicroseconds(14): ERR: 2 cycles == 2.00us | |
delayMicroseconds(15): ERR: 2 cycles == 2.00us | |
delayMicroseconds(16): ERR: 2 cycles == 2.00us | |
delayMicroseconds(17): OK | |
// 4 us per loop, 1 cycle per us | |
// overhead: 13 cycles = 13us | |
// loops: (us - 13) / 4 | |
"sbiw %A0, 13" "\n\t" // 2 | |
"brcs L_%=_end" "\n\t" // 1 | |
"lsr %B0" "\n\t" // 1 | |
"ror %A0" "\n\t" // 1 | |
"brcs L_%=_1\nL_%=_1:" "\n\t" // 1 (2 on carry) | |
"lsr %B0" "\n\t" // 1 | |
"ror %A0" "\n\t" // 1 | |
"brcs L_%=_2\nL_%=_2:" "\n\t" // 1 (2 on carry) | |
"brcs L_%=_3\nL_%=_3:" "\n\t" // 1 (2 on carry) | |
"sbiw %A0, 0" "\n\t" // 2 | |
"breq L_%=_end" "\n\t" // 1 | |
// This approach splits the handling for small and big values, which can | |
// make both simpler (for big values, no need to check for 0 after | |
// division, for small values, no need to divide and check the upper | |
// byte). | |
delayMicroseconds(0): ERR: 9 cycles == 9.00us | |
delayMicroseconds(1): ERR: 8 cycles == 8.00us | |
delayMicroseconds(2): ERR: 7 cycles == 7.00us | |
delayMicroseconds(3): ERR: 6 cycles == 6.00us | |
delayMicroseconds(4): ERR: 5 cycles == 5.00us | |
delayMicroseconds(5): ERR: 4 cycles == 4.00us | |
delayMicroseconds(6): ERR: 3 cycles == 3.00us | |
delayMicroseconds(7): ERR: 2 cycles == 2.00us | |
delayMicroseconds(8): ERR: 1 cycles == 1.00us | |
delayMicroseconds(9): OK | |
delayMicroseconds(10): ERR: 2 cycles == 2.00us | |
delayMicroseconds(11): ERR: 1 cycles == 1.00us | |
delayMicroseconds(12): OK | |
delayMicroseconds(13): ERR: -1 cycles == -1.00us | |
delayMicroseconds(14): OK | |
delayMicroseconds(15): OK | |
delayMicroseconds(16): OK | |
"sbiw %A0, 15" "\n\t" // 2 | |
"brcs L_%=_small" "\n\t" // 1 | |
"lsr %B0" "\n\t" // 1 | |
"ror %A0" "\n\t" // 1 | |
"brcs L%=_1\nL%=_1:" "\n\t" // 1 (2 on carry) | |
"lsr %B0" "\n\t" // 1 | |
"ror %A0" "\n\t" // 1 4 us per loop, 1 cycle per us | |
"brcs L%=_2\nL%=_2:" "\n\t" // 1 (2 on carry) | |
"brcs L%=_3\nL%=_3:" "\n\t" // 1 (2 on carry) | |
"sbiw %A0, 0" "\n\t" // 2 | |
"breq L_%=_end" "\n\t" // 1 4 us per loop, 1 cycle per us | |
"rjmp L_%=_loop" "\n\t" // 2 | |
"L_%=_small:" "\n\t" // 4 up to here | |
"adiw %A0, 5" "\n\t" // 2 | |
"brmi L_%=_end" "\n\t" // 1 | |
"lsr %A0" "\n\t" // 1 | |
"lsr %A0" "\n\t" // 1 | |
"breq L_%=_end" "\n\t" // 1 overhead: 10 = 10us | |
// This approach handles the small values directly, without falling | |
// through to the loop. | |
delayMicroseconds(0): ERR: 8 cycles == 8.00us | |
delayMicroseconds(1): ERR: 7 cycles == 7.00us | |
delayMicroseconds(2): ERR: 6 cycles == 6.00us | |
delayMicroseconds(3): ERR: 5 cycles == 5.00us | |
delayMicroseconds(4): ERR: 4 cycles == 4.00us | |
delayMicroseconds(5): ERR: 3 cycles == 3.00us | |
delayMicroseconds(6): ERR: 2 cycles == 2.00us | |
delayMicroseconds(7): ERR: 1 cycles == 1.00us | |
delayMicroseconds(8): OK | |
delayMicroseconds(9): ERR: -1 cycles == -1.00us | |
delayMicroseconds(10): OK | |
delayMicroseconds(11): OK | |
delayMicroseconds(12): OK | |
delayMicroseconds(13): ERR: 1 cycles == 1.00us | |
delayMicroseconds(14): OK | |
delayMicroseconds(15): OK | |
delayMicroseconds(16): OK | |
"sbiw %A0, 16" "\n\t" // 2 | |
"brcc L_%=_big" "\n\t" // 1 (2 on branch) | |
"adiw %A0, 6" "\n\t" // 2 | |
"brmi L_%=_end" "\n\t" // 1 | |
"breq L_%=_loopcheck" "\n\t" // 1 | |
"cpi %A0, 2" "\n\t" // 1 | |
"brcs L_%=_end" "\n\t" // 1 | |
"breq L_%=_end" "\n\t" // 1 | |
"cpi %A0, 5" "\n\t" // 1 | |
"brcs L_%=_end" "\n\t" // 1 | |
"rjmp L_%=_end" "\n\t" // 1 | |
"L_%=_big:" "\n\t" | |
"lsr %B0" "\n\t" // 1 | |
"ror %A0" "\n\t" // 1 | |
"brcs L%=_1\nL%=_1:" "\n\t" // 1 (2 on carry) | |
"lsr %B0" "\n\t" // 1 | |
"ror %A0" "\n\t" // 1 | |
"brcs L%=_2\nL%=_2:" "\n\t" // 1 (2 on carry) | |
"brcs L%=_3\nL%=_3:" "\n\t" // 1 (2 on carry) | |
"sbiw %A0, 0" "\n\t" // 2 | |
"breq L_%=_end" "\n\t" // 1 | |
"nop" "\n\t" // 1 | |
"nop" "\n\t" // 1 | |
// This also needs an additional label in the loop: | |
"L_%=_loop:" | |
"sbiw %A0, 1" "\n\t" // 2 | |
"L_%=_loopcheck:" | |
"brne L_%=_loop" "\n\t" // 2 (1 on last) | |
"L_%=_end:" | |
// This approach further splits big values into not so big values and | |
// really big values, and handles the notsobig values in a way identical | |
// to the small values (unfortunately there are no cycles available for | |
// a rjmp, so the code is duplicated). | |
delayMicroseconds(0): ERR: 8 cycles == 8.00us | |
delayMicroseconds(1): ERR: 7 cycles == 7.00us | |
delayMicroseconds(2): ERR: 6 cycles == 6.00us | |
delayMicroseconds(3): ERR: 5 cycles == 5.00us | |
delayMicroseconds(4): ERR: 4 cycles == 4.00us | |
delayMicroseconds(5): ERR: 3 cycles == 3.00us | |
delayMicroseconds(6): ERR: 2 cycles == 2.00us | |
delayMicroseconds(7): ERR: 1 cycles == 1.00us | |
delayMicroseconds(8): OK | |
delayMicroseconds(9): ERR: -1 cycles == -1.00us | |
delayMicroseconds(10): OK | |
delayMicroseconds(11): OK | |
delayMicroseconds(12): OK | |
delayMicroseconds(13): OK | |
delayMicroseconds(14): OK | |
delayMicroseconds(15): OK | |
delayMicroseconds(16): OK | |
// 4 us per loop, 1 cycle per us | |
// overhead: 18 cycles = 18us (when us >= 19) | |
// loops: (us - 19) / 4 | |
// no loops when us < 19 | |
"sbiw %A0, 14" "\n\t" // 2 | |
"brcc L_%=_big" "\n\t" // 1 (2 on branch) | |
"adiw %A0, 4" "\n\t" // 2 | |
"brmi L_%=_end" "\n\t" // 1 | |
// Branching to loopcheck with Z flag set adds 1 cycles | |
"breq L_%=_loopcheck" "\n\t" // 1 | |
"cpi %A0, 2" "\n\t" // 1 | |
"brcs L_%=_end" "\n\t" // 1 | |
"breq L_%=_end" "\n\t" // 1 | |
"rjmp L_%=_end" "\n\t" // 1 | |
"L_%=_big:" "\n\t" | |
"sbiw %A0, 5" "\n\t" // 2 | |
"brcs L_%=_notsobig" "\n\t" // 1 (2 on branch) | |
"lsr %B0" "\n\t" // 1 | |
"ror %A0" "\n\t" // 1 | |
"brcs L%=_1\nL%=_1:" "\n\t" // 1 (2 on carry) | |
"lsr %B0" "\n\t" // 1 | |
"ror %A0" "\n\t" // 1 | |
"brcs L%=_2\nL%=_2:" "\n\t" // 1 (2 on carry) | |
"brcs L%=_3\nL%=_3:" "\n\t" // 1 (2 on carry) | |
"cpi %A0, 1" "\n\t" // 1 | |
"cpc %B0, r1" "\n\t" // 1 | |
"brcs L_%=_end" "\n\t" // 1 | |
"rjmp L_%=_loop" "\n\t" // 1 | |
"L_%=_notsobig:" "\n\t" | |
"adiw %A0, 4" "\n\t" // 2 11 to here | |
// Branching to a jump-to-end instruction adds 2 cycles | |
"breq L_%=_jmp_to_end" "\n\t" // 1 | |
"brmi L_%=_end" "\n\t" // 1 | |
"cpi %A0, 2" "\n\t" // 1 | |
"brcs L_%=_end" "\n\t" // 1 | |
"breq L_%=_end" "\n\t" // 1 | |
"L_%=_jmp_to_end:" "\n\t" | |
"rjmp L_%=_end" "\n\t" // 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment