Skip to content

Instantly share code, notes, and snippets.

@matthijskooijman
Created February 9, 2018 21:52
Show Gist options
  • Save matthijskooijman/27706e2593da873e6b6707753f91ec28 to your computer and use it in GitHub Desktop.
Save matthijskooijman/27706e2593da873e6b6707753f91ec28 to your computer and use it in GitHub Desktop.
// This approach is the simplest and does not fix the rounding issue, so
// this OK/-1/-2/-3 pattern repeats over and over (all delays are
// rounded down to a multiple of 3)
delayMicroseconds(0): ERR: 5 cycles == 5.00us
delayMicroseconds(1): ERR: 4 cycles == 4.00us
delayMicroseconds(2): ERR: 3 cycles == 3.00us
delayMicroseconds(3): ERR: 2 cycles == 2.00us
delayMicroseconds(4): ERR: 1 cycles == 1.00us
delayMicroseconds(5): OK
delayMicroseconds(6): ERR: -1 cycles == -1.00us
delayMicroseconds(7): ERR: -2 cycles == -2.00us
delayMicroseconds(8): ERR: -3 cycles == -3.00us
delayMicroseconds(9): ERR: -4 cycles == -4.00us
delayMicroseconds(10): ERR: 2 cycles == 2.00us
delayMicroseconds(11): ERR: 1 cycles == 1.00us
delayMicroseconds(12): OK
delayMicroseconds(13): ERR: -1 cycles == -1.00us
delayMicroseconds(14): OK
delayMicroseconds(15): ERR: -1 cycles == -1.00us
delayMicroseconds(16): ERR: -2 cycles == -2.00us
delayMicroseconds(17): ERR: -3 cycles == -3.00us
// 4 us per loop, 1 cycle per us
// loops: (us - 10) / 4
// overhead: 10 = 10us
"sbiw %A0, 10" "\n\t" // 2
"brcs L_%=_end" "\n\t" // 1
"lsr %B0" "\n\t" // 1
"ror %A0" "\n\t" // 1
"lsr %B0" "\n\t" // 1
"ror %A0" "\n\t" // 1
"sbiw %A0, 0" "\n\t" // 2
"breq L_%=_end" "\n\t" // 1
// This approach just fixes the rounding errors
delayMicroseconds(0): ERR: 5 cycles == 5.00us
delayMicroseconds(1): ERR: 4 cycles == 4.00us
delayMicroseconds(2): ERR: 3 cycles == 3.00us
delayMicroseconds(3): ERR: 2 cycles == 2.00us
delayMicroseconds(4): ERR: 1 cycles == 1.00us
delayMicroseconds(5): OK
delayMicroseconds(6): ERR: -1 cycles == -1.00us
delayMicroseconds(7): ERR: -2 cycles == -2.00us
delayMicroseconds(8): ERR: -3 cycles == -3.00us
delayMicroseconds(9): ERR: -4 cycles == -4.00us
delayMicroseconds(10): ERR: -5 cycles == -5.00us
delayMicroseconds(11): ERR: -6 cycles == -6.00us
delayMicroseconds(12): ERR: -7 cycles == -7.00us
delayMicroseconds(13): ERR: 2 cycles == 2.00us
delayMicroseconds(14): ERR: 2 cycles == 2.00us
delayMicroseconds(15): ERR: 2 cycles == 2.00us
delayMicroseconds(16): ERR: 2 cycles == 2.00us
delayMicroseconds(17): OK
// 4 us per loop, 1 cycle per us
// overhead: 13 cycles = 13us
// loops: (us - 13) / 4
"sbiw %A0, 13" "\n\t" // 2
"brcs L_%=_end" "\n\t" // 1
"lsr %B0" "\n\t" // 1
"ror %A0" "\n\t" // 1
"brcs L_%=_1\nL_%=_1:" "\n\t" // 1 (2 on carry)
"lsr %B0" "\n\t" // 1
"ror %A0" "\n\t" // 1
"brcs L_%=_2\nL_%=_2:" "\n\t" // 1 (2 on carry)
"brcs L_%=_3\nL_%=_3:" "\n\t" // 1 (2 on carry)
"sbiw %A0, 0" "\n\t" // 2
"breq L_%=_end" "\n\t" // 1
// This approach splits the handling for small and big values, which can
// make both simpler (for big values, no need to check for 0 after
// division, for small values, no need to divide and check the upper
// byte).
delayMicroseconds(0): ERR: 9 cycles == 9.00us
delayMicroseconds(1): ERR: 8 cycles == 8.00us
delayMicroseconds(2): ERR: 7 cycles == 7.00us
delayMicroseconds(3): ERR: 6 cycles == 6.00us
delayMicroseconds(4): ERR: 5 cycles == 5.00us
delayMicroseconds(5): ERR: 4 cycles == 4.00us
delayMicroseconds(6): ERR: 3 cycles == 3.00us
delayMicroseconds(7): ERR: 2 cycles == 2.00us
delayMicroseconds(8): ERR: 1 cycles == 1.00us
delayMicroseconds(9): OK
delayMicroseconds(10): ERR: 2 cycles == 2.00us
delayMicroseconds(11): ERR: 1 cycles == 1.00us
delayMicroseconds(12): OK
delayMicroseconds(13): ERR: -1 cycles == -1.00us
delayMicroseconds(14): OK
delayMicroseconds(15): OK
delayMicroseconds(16): OK
"sbiw %A0, 15" "\n\t" // 2
"brcs L_%=_small" "\n\t" // 1
"lsr %B0" "\n\t" // 1
"ror %A0" "\n\t" // 1
"brcs L%=_1\nL%=_1:" "\n\t" // 1 (2 on carry)
"lsr %B0" "\n\t" // 1
"ror %A0" "\n\t" // 1 4 us per loop, 1 cycle per us
"brcs L%=_2\nL%=_2:" "\n\t" // 1 (2 on carry)
"brcs L%=_3\nL%=_3:" "\n\t" // 1 (2 on carry)
"sbiw %A0, 0" "\n\t" // 2
"breq L_%=_end" "\n\t" // 1 4 us per loop, 1 cycle per us
"rjmp L_%=_loop" "\n\t" // 2
"L_%=_small:" "\n\t" // 4 up to here
"adiw %A0, 5" "\n\t" // 2
"brmi L_%=_end" "\n\t" // 1
"lsr %A0" "\n\t" // 1
"lsr %A0" "\n\t" // 1
"breq L_%=_end" "\n\t" // 1 overhead: 10 = 10us
// This approach handles the small values directly, without falling
// through to the loop.
delayMicroseconds(0): ERR: 8 cycles == 8.00us
delayMicroseconds(1): ERR: 7 cycles == 7.00us
delayMicroseconds(2): ERR: 6 cycles == 6.00us
delayMicroseconds(3): ERR: 5 cycles == 5.00us
delayMicroseconds(4): ERR: 4 cycles == 4.00us
delayMicroseconds(5): ERR: 3 cycles == 3.00us
delayMicroseconds(6): ERR: 2 cycles == 2.00us
delayMicroseconds(7): ERR: 1 cycles == 1.00us
delayMicroseconds(8): OK
delayMicroseconds(9): ERR: -1 cycles == -1.00us
delayMicroseconds(10): OK
delayMicroseconds(11): OK
delayMicroseconds(12): OK
delayMicroseconds(13): ERR: 1 cycles == 1.00us
delayMicroseconds(14): OK
delayMicroseconds(15): OK
delayMicroseconds(16): OK
"sbiw %A0, 16" "\n\t" // 2
"brcc L_%=_big" "\n\t" // 1 (2 on branch)
"adiw %A0, 6" "\n\t" // 2
"brmi L_%=_end" "\n\t" // 1
"breq L_%=_loopcheck" "\n\t" // 1
"cpi %A0, 2" "\n\t" // 1
"brcs L_%=_end" "\n\t" // 1
"breq L_%=_end" "\n\t" // 1
"cpi %A0, 5" "\n\t" // 1
"brcs L_%=_end" "\n\t" // 1
"rjmp L_%=_end" "\n\t" // 1
"L_%=_big:" "\n\t"
"lsr %B0" "\n\t" // 1
"ror %A0" "\n\t" // 1
"brcs L%=_1\nL%=_1:" "\n\t" // 1 (2 on carry)
"lsr %B0" "\n\t" // 1
"ror %A0" "\n\t" // 1
"brcs L%=_2\nL%=_2:" "\n\t" // 1 (2 on carry)
"brcs L%=_3\nL%=_3:" "\n\t" // 1 (2 on carry)
"sbiw %A0, 0" "\n\t" // 2
"breq L_%=_end" "\n\t" // 1
"nop" "\n\t" // 1
"nop" "\n\t" // 1
// This also needs an additional label in the loop:
"L_%=_loop:"
"sbiw %A0, 1" "\n\t" // 2
"L_%=_loopcheck:"
"brne L_%=_loop" "\n\t" // 2 (1 on last)
"L_%=_end:"
// This approach further splits big values into not so big values and
// really big values, and handles the notsobig values in a way identical
// to the small values (unfortunately there are no cycles available for
// a rjmp, so the code is duplicated).
delayMicroseconds(0): ERR: 8 cycles == 8.00us
delayMicroseconds(1): ERR: 7 cycles == 7.00us
delayMicroseconds(2): ERR: 6 cycles == 6.00us
delayMicroseconds(3): ERR: 5 cycles == 5.00us
delayMicroseconds(4): ERR: 4 cycles == 4.00us
delayMicroseconds(5): ERR: 3 cycles == 3.00us
delayMicroseconds(6): ERR: 2 cycles == 2.00us
delayMicroseconds(7): ERR: 1 cycles == 1.00us
delayMicroseconds(8): OK
delayMicroseconds(9): ERR: -1 cycles == -1.00us
delayMicroseconds(10): OK
delayMicroseconds(11): OK
delayMicroseconds(12): OK
delayMicroseconds(13): OK
delayMicroseconds(14): OK
delayMicroseconds(15): OK
delayMicroseconds(16): OK
// 4 us per loop, 1 cycle per us
// overhead: 18 cycles = 18us (when us >= 19)
// loops: (us - 19) / 4
// no loops when us < 19
"sbiw %A0, 14" "\n\t" // 2
"brcc L_%=_big" "\n\t" // 1 (2 on branch)
"adiw %A0, 4" "\n\t" // 2
"brmi L_%=_end" "\n\t" // 1
// Branching to loopcheck with Z flag set adds 1 cycles
"breq L_%=_loopcheck" "\n\t" // 1
"cpi %A0, 2" "\n\t" // 1
"brcs L_%=_end" "\n\t" // 1
"breq L_%=_end" "\n\t" // 1
"rjmp L_%=_end" "\n\t" // 1
"L_%=_big:" "\n\t"
"sbiw %A0, 5" "\n\t" // 2
"brcs L_%=_notsobig" "\n\t" // 1 (2 on branch)
"lsr %B0" "\n\t" // 1
"ror %A0" "\n\t" // 1
"brcs L%=_1\nL%=_1:" "\n\t" // 1 (2 on carry)
"lsr %B0" "\n\t" // 1
"ror %A0" "\n\t" // 1
"brcs L%=_2\nL%=_2:" "\n\t" // 1 (2 on carry)
"brcs L%=_3\nL%=_3:" "\n\t" // 1 (2 on carry)
"cpi %A0, 1" "\n\t" // 1
"cpc %B0, r1" "\n\t" // 1
"brcs L_%=_end" "\n\t" // 1
"rjmp L_%=_loop" "\n\t" // 1
"L_%=_notsobig:" "\n\t"
"adiw %A0, 4" "\n\t" // 2 11 to here
// Branching to a jump-to-end instruction adds 2 cycles
"breq L_%=_jmp_to_end" "\n\t" // 1
"brmi L_%=_end" "\n\t" // 1
"cpi %A0, 2" "\n\t" // 1
"brcs L_%=_end" "\n\t" // 1
"breq L_%=_end" "\n\t" // 1
"L_%=_jmp_to_end:" "\n\t"
"rjmp L_%=_end" "\n\t" // 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment