Skip to content

Instantly share code, notes, and snippets.

@bombela
Last active June 9, 2022 08:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bombela/cc90e5c29f3e7667326de4c087c1e148 to your computer and use it in GitHub Desktop.
Save bombela/cc90e5c29f3e7667326de4c087c1e148 to your computer and use it in GitHub Desktop.
Produce the minimal number of machine instructions to delay by the exact number of cycles on AVR with Rust using inline assembly and associated const.
// Copyright 2022 François-Xavier Bourlet <bombela@gmail.com>
// Redistribution and use in source and binary forms, with or without modification, are permitted
// provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of
// conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other materials
// provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
// THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#![allow(unused)]
use core::arch::asm;
pub struct Delayer<const INPUT: u64, const MUL: u64, const DIV: u64>;
impl<const INPUT: u64, const MUL: u64, const DIV: u64> Delayer<INPUT, MUL, DIV> {
const CYCLES: u64 = INPUT * MUL / DIV;
const U32_LOOP: bool = Self::CYCLES > (7 + 5 * 0xFF_FFFF);
const U32_LOOP_COUNT: u64 = if Self::U32_LOOP { ((Self::CYCLES - 9) / 6 + 1) & 0xFFFF_FFFF } else { 0 };
const U32_CYCLES_REM: u64 = if Self::U32_LOOP { (Self::CYCLES - 9) % 6 } else { Self::CYCLES };
const U24_LOOP: bool = Self::U32_CYCLES_REM > (5 + 4 * 0xFFFF);
const U24_LOOP_COUNT: u64 = if Self::U24_LOOP { ((Self::U32_CYCLES_REM - 7) / 5 + 1) & 0xFF_FFFF } else { 0 };
const U24_CYCLES_REM: u64 = if Self::U24_LOOP { (Self::U32_CYCLES_REM - 7) % 5 } else { Self::U32_CYCLES_REM };
const U16_LOOP: bool = Self::U24_CYCLES_REM > (3 + 3 * 0xFF);
const U16_LOOP_COUNT: u64 = if Self::U16_LOOP { ((Self::U24_CYCLES_REM - 5) / 4 + 1) & 0xFFFF } else { 0 };
const U16_CYCLES_REM: u64 = if Self::U16_LOOP { (Self::U24_CYCLES_REM - 5) % 4 } else { Self::U24_CYCLES_REM };
const U8_LOOP: bool = Self::U16_CYCLES_REM > 5;
const U8_LOOP_COUNT: u64 = if Self::U8_LOOP { ((Self::U16_CYCLES_REM - 3) / 3 + 1) & 0xFF } else { 0 };
const U8_CYCLES_REM: u64 = if Self::U8_LOOP { (Self::U16_CYCLES_REM - 3) % 3 } else { Self::U16_CYCLES_REM };
/// 8 instructions.
/// 9 cycles per run.
/// 6 cycles per iteration.
#[inline(always)]
fn delay_cycles_u32() {
unsafe {
asm!(
"ldi {r0:l}, {b0}",
"ldi {r0:h}, {b1}",
"ldi {r2}, {b2}",
"ldi {r3}, {b3}",
"1:",
"sbiw {r0}, 1",
"sbci {r2}, 0",
"sbci {r3}, 0",
"brne 1b",
r0 = out(reg_iw) _,
r2 = out(reg_upper) _,
r3 = out(reg_upper) _,
b0 = const (Self::U32_LOOP_COUNT >> 0) as u8,
b1 = const (Self::U32_LOOP_COUNT >> 8) as u8,
b2 = const (Self::U32_LOOP_COUNT >> 16) as u8,
b3 = const (Self::U32_LOOP_COUNT >> 24) as u8,
options(nomem, nostack),
)
}
}
/// 6 instructions.
/// 7 cycles per run.
/// 5 cycles per iteration.
#[inline(always)]
fn delay_cycles_u24() {
// Some way to static assert that LOOP_COUNT < 2^24 would be nice.
unsafe {
asm!(
"ldi {r0:l}, {b0}",
"ldi {r0:h}, {b1}",
"ldi {r2}, {b2}",
"1:",
"sbiw {r0}, 1",
"sbci {r2}, 0",
"brne 1b",
r0 = out(reg_iw) _,
r2 = out(reg_upper) _,
b0 = const (Self::U24_LOOP_COUNT >> 0) as u8,
b1 = const (Self::U24_LOOP_COUNT >> 8) as u8,
b2 = const (Self::U24_LOOP_COUNT >> 16) as u8,
options(nomem, nostack),
)
}
}
/// 4 instructions.
/// 5 cycles per run.
/// 4 cycles per iteration.
#[inline(always)]
fn delay_cycles_u16() {
unsafe {
asm!(
"ldi {r0:l}, {b0}",
"ldi {r0:h}, {b1}",
"1:",
"sbiw {r0}, 1",
"brne 1b",
r0 = out(reg_iw) _,
b0 = const (Self::U16_LOOP_COUNT >> 0) as u8,
b1 = const (Self::U16_LOOP_COUNT >> 8) as u8,
options(nomem, nostack),
)
}
}
/// 3 instructions.
/// 3 cycles per run.
/// 3 cycles per iteration.
#[inline(always)]
fn delay_cycles_u8() {
unsafe {
asm!(
"ldi {r0}, {b0}",
"1:",
"dec {r0}",
"brne 1b",
r0 = out(reg_upper) _,
b0 = const Self::U8_LOOP_COUNT,
options(nomem, nostack),
// The carry flag is not touched by `dec`.
// That's the difference between `dec` and `sub 1`.
// Is it possible to tell `asm!` that the carry is untouched?
)
}
}
/// 1 instruction.
/// 2 cycles per run.
#[inline(always)]
fn delay_2cycles() {
unsafe {
asm!(
"rjmp .",
options(nomem, nostack, preserves_flags),
)
}
}
/// 1 instruction.
/// 1 cycle per run.
#[inline(always)]
fn delay_1cycle() {
unsafe {
asm!(
"nop",
options(nomem, nostack, preserves_flags),
)
}
}
#[inline(always)]
fn delay_impl() {
if Self::U32_LOOP {
// Cycles 83_886_083 .. 25_769_803_779 (9+6*0xFFFF_FFFF)
Self::delay_cycles_u32();
}
if Self::U24_LOOP {
// Cycles 262_146 ..= 83_886_082 (7+5*0xFF_FFFF)
Self::delay_cycles_u24();
}
if Self::U16_LOOP {
// Cycles 769 ..= 262_145 (5+4*0xFFFF)
Self::delay_cycles_u16();
}
if Self::U8_LOOP {
// Cycles 6 ..= 768 (3+3*0xFF)
Self::delay_cycles_u8();
}
// Cycles from 1..=5 are implemented by
// (0..=2)*delay_2cycles() + (0..=1)*delay_1cycle().
if Self::U8_CYCLES_REM >= 4 {
Self::delay_2cycles();
}
if Self::U8_CYCLES_REM >= 2 {
Self::delay_2cycles();
}
if Self::U8_CYCLES_REM % 2 == 1 {
Self::delay_1cycle();
}
}
}
/// Produce the minimal number of machine instructions to delay by the exact number of CYCLES.
/// Works from 0 to 25_769_803_784 cycles. The number of instructions generated goes up to 11.
/// The higher the number of cycles, the higher number of instructions, in a staircase effect.
/// Almost 18 minutes at 24Mhz.
#[inline(always)]
pub fn delay_cycles<const CYCLES: u64>() {
Delayer::<CYCLES, 1, 1>::delay_impl();
}
/// Maximum value is (25_769_803_784 * 1_000_000 / CPU_FREQUENCY_HZ).
/// Almost 18 minutes at 24Mhz.
#[inline(always)]
pub fn delay_us<const US: u64>() {
Delayer::<US, {crate::CPU_FREQUENCY_HZ as u64}, 1_000_000>::delay_impl();
}
/// Maximum value is (25_769_803_784 * 1_000 / CPU_FREQUENCY_HZ).
/// Almost 18 minutes at 24Mhz.
#[inline(always)]
pub fn delay_ms<const MS: u64>() {
Delayer::<MS, {crate::CPU_FREQUENCY_HZ as u64}, 1_000>::delay_impl();
}
/// Maximum value is (25_769_803_784 * 1 / CPU_FREQUENCY_HZ).
/// Almost 18 minutes at 24Mhz.
#[inline(always)]
pub fn delay_sec<const SEC: u64>() {
Delayer::<SEC, {crate::CPU_FREQUENCY_HZ as u64}, 1>::delay_impl();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment