#![macro_use] // Credit: taken from `rp-hal` (also licensed Apache+MIT) // https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/intrinsics.rs /// Generate a series of aliases for an intrinsic function. macro_rules! intrinsics_aliases { ( extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty, ) => {}; ( unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty, ) => {}; ( extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty, $alias:ident $($rest:ident)* ) => { #[cfg(all(target_arch = "arm", feature = "intrinsics"))] intrinsics! { extern $abi fn $alias( $($argname: $ty),* ) -> $ret { $name($($argname),*) } } intrinsics_aliases! { extern $abi fn $name( $($argname: $ty),* ) -> $ret, $($rest)* } }; ( unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty, $alias:ident $($rest:ident)* ) => { #[cfg(all(target_arch = "arm", feature = "intrinsics"))] intrinsics! { unsafe extern $abi fn $alias( $($argname: $ty),* ) -> $ret { $name($($argname),*) } } intrinsics_aliases! { unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret, $($rest)* } }; } /// The macro used to define overridden intrinsics. /// /// This is heavily inspired by the macro used by compiler-builtins. The idea /// is to abstract anything special that needs to be done to override an /// intrinsic function. Intrinsic generation is disabled for non-ARM targets /// so things like CI and docs generation do not have problems. Additionally /// they can be disabled by disabling the crate feature `intrinsics` for /// testing or comparing performance. /// /// Like the compiler-builtins macro, it accepts a series of functions that /// looks like normal Rust code: /// /// ```rust,ignore /// intrinsics! { /// extern "C" fn foo(a: i32) -> u32 { /// // ... /// } /// #[nonstandard_attribute] /// extern "C" fn bar(a: i32) -> u32 { /// // ... /// } /// } /// ``` /// /// Each function can also be decorated with nonstandard attributes to control /// additional behaviour: /// /// * `slower_than_default` - indicates that the override is slower than the /// default implementation. Currently this just disables the override /// entirely. /// * `bootrom_v2` - indicates that the override is only available /// on a V2 bootrom or higher. Only enabled when the feature /// `rom-v2-intrinsics` is set. /// * `alias` - accepts a list of names to alias the intrinsic to. /// * `aeabi` - accepts a list of ARM EABI names to alias to. /// macro_rules! intrinsics { () => {}; ( #[slower_than_default] $(#[$($attr:tt)*])* extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { $($body:tt)* } $($rest:tt)* ) => { // Not exported, but defined so the actual implementation is // considered used #[allow(dead_code)] fn $name( $($argname: $ty),* ) -> $ret { $($body)* } intrinsics!($($rest)*); }; ( #[bootrom_v2] $(#[$($attr:tt)*])* extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { $($body:tt)* } $($rest:tt)* ) => { // Not exported, but defined so the actual implementation is // considered used #[cfg(not(feature = "rom-v2-intrinsics"))] #[allow(dead_code)] fn $name( $($argname: $ty),* ) -> $ret { $($body)* } #[cfg(feature = "rom-v2-intrinsics")] intrinsics! { $(#[$($attr)*])* extern $abi fn $name( $($argname: $ty),* ) -> $ret { $($body)* } } intrinsics!($($rest)*); }; ( #[alias = $($alias:ident),*] $(#[$($attr:tt)*])* extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { $($body:tt)* } $($rest:tt)* ) => { intrinsics! { $(#[$($attr)*])* extern $abi fn $name( $($argname: $ty),* ) -> $ret { $($body)* } } intrinsics_aliases! { extern $abi fn $name( $($argname: $ty),* ) -> $ret, $($alias) * } intrinsics!($($rest)*); }; ( #[alias = $($alias:ident),*] $(#[$($attr:tt)*])* unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { $($body:tt)* } $($rest:tt)* ) => { intrinsics! { $(#[$($attr)*])* unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret { $($body)* } } intrinsics_aliases! { unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret, $($alias) * } intrinsics!($($rest)*); }; ( #[aeabi = $($alias:ident),*] $(#[$($attr:tt)*])* extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { $($body:tt)* } $($rest:tt)* ) => { intrinsics! { $(#[$($attr)*])* extern $abi fn $name( $($argname: $ty),* ) -> $ret { $($body)* } } intrinsics_aliases! { extern "aapcs" fn $name( $($argname: $ty),* ) -> $ret, $($alias) * } intrinsics!($($rest)*); }; ( $(#[$($attr:tt)*])* extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { $($body:tt)* } $($rest:tt)* ) => { #[cfg(all(target_arch = "arm", feature = "intrinsics"))] $(#[$($attr)*])* extern $abi fn $name( $($argname: $ty),* ) -> $ret { $($body)* } #[cfg(all(target_arch = "arm", feature = "intrinsics"))] mod $name { #[no_mangle] $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) -> $ret { super::$name($($argname),*) } } // Not exported, but defined so the actual implementation is // considered used #[cfg(not(all(target_arch = "arm", feature = "intrinsics")))] #[allow(dead_code)] fn $name( $($argname: $ty),* ) -> $ret { $($body)* } intrinsics!($($rest)*); }; ( $(#[$($attr:tt)*])* unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty { $($body:tt)* } $($rest:tt)* ) => { #[cfg(all(target_arch = "arm", feature = "intrinsics"))] $(#[$($attr)*])* unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret { $($body)* } #[cfg(all(target_arch = "arm", feature = "intrinsics"))] mod $name { #[no_mangle] $(#[$($attr)*])* pub unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret { super::$name($($argname),*) } } // Not exported, but defined so the actual implementation is // considered used #[cfg(not(all(target_arch = "arm", feature = "intrinsics")))] #[allow(dead_code)] unsafe fn $name( $($argname: $ty),* ) -> $ret { $($body)* } intrinsics!($($rest)*); }; } // Credit: taken from `rp-hal` (also licensed Apache+MIT) // https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/sio.rs // This takes advantage of how AAPCS defines a 64-bit return on 32-bit registers // by packing it into r0[0:31] and r1[32:63]. So all we need to do is put // the remainder in the high order 32 bits of a 64 bit result. We can also // alias the division operators to these for a similar reason r0 is the // result either way and r1 a scratch register, so the caller can't assume it // retains the argument value. #[cfg(target_arch = "arm")] core::arch::global_asm!( ".macro hwdivider_head", "ldr r2, =(0xd0000000)", // SIO_BASE // Check the DIRTY state of the divider by shifting it into the C // status bit. "ldr r3, [r2, #0x078]", // DIV_CSR "lsrs r3, #2", // DIRTY = 1, so shift 2 down // We only need to save the state when DIRTY, otherwise we can just do the // division directly. "bcs 2f", "1:", // Do the actual division now, we're either not DIRTY, or we've saved the // state and branched back here so it's safe now. ".endm", ".macro hwdivider_tail", // 8 cycle delay to wait for the result. Each branch takes two cycles // and fits into a 2-byte Thumb instruction, so this is smaller than // 8 NOPs. "b 3f", "3: b 3f", "3: b 3f", "3: b 3f", "3:", // Read the quotient last, since that's what clears the dirty flag. "ldr r1, [r2, #0x074]", // DIV_REMAINDER "ldr r0, [r2, #0x070]", // DIV_QUOTIENT // Either return to the caller or back to the state restore. "bx lr", "2:", // Since we can't save the signed-ness of the calculation, we have to make // sure that there's at least an 8 cycle delay before we read the result. // The push takes 5 cycles, and we've already spent at least 7 checking // the DIRTY state to get here. "push {{r4-r6, lr}}", // Read the quotient last, since that's what clears the dirty flag. "ldr r3, [r2, #0x060]", // DIV_UDIVIDEND "ldr r4, [r2, #0x064]", // DIV_UDIVISOR "ldr r5, [r2, #0x074]", // DIV_REMAINDER "ldr r6, [r2, #0x070]", // DIV_QUOTIENT // If we get interrupted here (before a write sets the DIRTY flag) it's // fine, since we have the full state, so the interruptor doesn't have to // restore it. Once the write happens and the DIRTY flag is set, the // interruptor becomes responsible for restoring our state. "bl 1b", // If we are interrupted here, then the interruptor will start an incorrect // calculation using a wrong divisor, but we'll restore the divisor and // result ourselves correctly. This sets DIRTY, so any interruptor will // save the state. "str r3, [r2, #0x060]", // DIV_UDIVIDEND // If we are interrupted here, the the interruptor may start the // calculation using incorrectly signed inputs, but we'll restore the // result ourselves. This sets DIRTY, so any interruptor will save the // state. "str r4, [r2, #0x064]", // DIV_UDIVISOR // If we are interrupted here, the interruptor will have restored // everything but the quotient may be wrongly signed. If the calculation // started by the above writes is still ongoing it is stopped, so it won't // replace the result we're restoring. DIRTY and READY set, but only // DIRTY matters to make the interruptor save the state. "str r5, [r2, #0x074]", // DIV_REMAINDER // State fully restored after the quotient write. This sets both DIRTY // and READY, so whatever we may have interrupted can read the result. "str r6, [r2, #0x070]", // DIV_QUOTIENT "pop {{r4-r6, pc}}", ".endm", ); macro_rules! division_function { ( $name:ident $($intrinsic:ident)* ( $argty:ty ) { $($begin:literal),+ } ) => { #[cfg(all(target_arch = "arm", feature = "intrinsics"))] core::arch::global_asm!( // Mangle the name slightly, since this is a global symbol. concat!(".section .text._erphal_", stringify!($name)), concat!(".global _erphal_", stringify!($name)), concat!(".type _erphal_", stringify!($name), ", %function"), ".align 2", concat!("_erphal_", stringify!($name), ":"), $( concat!(".global ", stringify!($intrinsic)), concat!(".type ", stringify!($intrinsic), ", %function"), concat!(stringify!($intrinsic), ":"), )* "hwdivider_head", $($begin),+ , "hwdivider_tail", ); #[cfg(all(target_arch = "arm", not(feature = "intrinsics")))] core::arch::global_asm!( // Mangle the name slightly, since this is a global symbol. concat!(".section .text._erphal_", stringify!($name)), concat!(".global _erphal_", stringify!($name)), concat!(".type _erphal_", stringify!($name), ", %function"), ".align 2", concat!("_erphal_", stringify!($name), ":"), "hwdivider_head", $($begin),+ , "hwdivider_tail", ); #[cfg(target_arch = "arm")] extern "aapcs" { // Connect a local name to global symbol above through FFI. #[link_name = concat!("_erphal_", stringify!($name)) ] fn $name(n: $argty, d: $argty) -> u64; } #[cfg(not(target_arch = "arm"))] #[allow(unused_variables)] unsafe fn $name(n: $argty, d: $argty) -> u64 { 0 } }; } division_function! { unsigned_divmod __aeabi_uidivmod __aeabi_uidiv ( u32 ) { "str r0, [r2, #0x060]", // DIV_UDIVIDEND "str r1, [r2, #0x064]" // DIV_UDIVISOR } } division_function! { signed_divmod __aeabi_idivmod __aeabi_idiv ( i32 ) { "str r0, [r2, #0x068]", // DIV_SDIVIDEND "str r1, [r2, #0x06c]" // DIV_SDIVISOR } } fn divider_unsigned(n: u32, d: u32) -> DivResult { let packed = unsafe { unsigned_divmod(n, d) }; DivResult { quotient: packed as u32, remainder: (packed >> 32) as u32, } } fn divider_signed(n: i32, d: i32) -> DivResult { let packed = unsafe { signed_divmod(n, d) }; // Double casts to avoid sign extension DivResult { quotient: packed as u32 as i32, remainder: (packed >> 32) as u32 as i32, } } /// Result of divide/modulo operation struct DivResult { /// The quotient of divide/modulo operation pub quotient: T, /// The remainder of divide/modulo operation pub remainder: T, } intrinsics! { extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { divider_unsigned(n, d).quotient } extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { divider_unsigned(n, d).remainder } extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { let quo_rem = divider_unsigned(n, d); if let Some(rem) = rem { *rem = quo_rem.remainder; } quo_rem.quotient } extern "C" fn __divsi3(n: i32, d: i32) -> i32 { divider_signed(n, d).quotient } extern "C" fn __modsi3(n: i32, d: i32) -> i32 { divider_signed(n, d).remainder } extern "C" fn __divmodsi4(n: i32, d: i32, rem: &mut i32) -> i32 { let quo_rem = divider_signed(n, d); *rem = quo_rem.remainder; quo_rem.quotient } }