478 lines
15 KiB
Rust
478 lines
15 KiB
Rust
#![macro_use]
|
|
|
|
// Credit: taken from `rp-hal` (also licensed Apache+MIT)
|
|
// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/intrinsics.rs
|
|
|
|
/// Generate a series of aliases for an intrinsic function.
|
|
macro_rules! intrinsics_aliases {
|
|
(
|
|
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty,
|
|
) => {};
|
|
(
|
|
unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty,
|
|
) => {};
|
|
|
|
(
|
|
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty,
|
|
$alias:ident
|
|
$($rest:ident)*
|
|
) => {
|
|
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
|
|
intrinsics! {
|
|
extern $abi fn $alias( $($argname: $ty),* ) -> $ret {
|
|
$name($($argname),*)
|
|
}
|
|
}
|
|
|
|
intrinsics_aliases! {
|
|
extern $abi fn $name( $($argname: $ty),* ) -> $ret,
|
|
$($rest)*
|
|
}
|
|
};
|
|
|
|
(
|
|
unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty,
|
|
$alias:ident
|
|
$($rest:ident)*
|
|
) => {
|
|
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
|
|
intrinsics! {
|
|
unsafe extern $abi fn $alias( $($argname: $ty),* ) -> $ret {
|
|
$name($($argname),*)
|
|
}
|
|
}
|
|
|
|
intrinsics_aliases! {
|
|
unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret,
|
|
$($rest)*
|
|
}
|
|
};
|
|
}
|
|
|
|
/// The macro used to define overridden intrinsics.
|
|
///
|
|
/// This is heavily inspired by the macro used by compiler-builtins. The idea
|
|
/// is to abstract anything special that needs to be done to override an
|
|
/// intrinsic function. Intrinsic generation is disabled for non-ARM targets
|
|
/// so things like CI and docs generation do not have problems. Additionally
|
|
/// they can be disabled by disabling the crate feature `intrinsics` for
|
|
/// testing or comparing performance.
|
|
///
|
|
/// Like the compiler-builtins macro, it accepts a series of functions that
|
|
/// looks like normal Rust code:
|
|
///
|
|
/// ```rust,ignore
|
|
/// intrinsics! {
|
|
/// extern "C" fn foo(a: i32) -> u32 {
|
|
/// // ...
|
|
/// }
|
|
/// #[nonstandard_attribute]
|
|
/// extern "C" fn bar(a: i32) -> u32 {
|
|
/// // ...
|
|
/// }
|
|
/// }
|
|
/// ```
|
|
///
|
|
/// Each function can also be decorated with nonstandard attributes to control
|
|
/// additional behaviour:
|
|
///
|
|
/// * `slower_than_default` - indicates that the override is slower than the
|
|
/// default implementation. Currently this just disables the override
|
|
/// entirely.
|
|
/// * `bootrom_v2` - indicates that the override is only available
|
|
/// on a V2 bootrom or higher. Only enabled when the feature
|
|
/// `rom-v2-intrinsics` is set.
|
|
/// * `alias` - accepts a list of names to alias the intrinsic to.
|
|
/// * `aeabi` - accepts a list of ARM EABI names to alias to.
|
|
///
|
|
macro_rules! intrinsics {
|
|
() => {};
|
|
|
|
(
|
|
#[slower_than_default]
|
|
$(#[$($attr:tt)*])*
|
|
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
|
|
$($body:tt)*
|
|
}
|
|
|
|
$($rest:tt)*
|
|
) => {
|
|
// Not exported, but defined so the actual implementation is
|
|
// considered used
|
|
#[allow(dead_code)]
|
|
fn $name( $($argname: $ty),* ) -> $ret {
|
|
$($body)*
|
|
}
|
|
|
|
intrinsics!($($rest)*);
|
|
};
|
|
|
|
(
|
|
#[bootrom_v2]
|
|
$(#[$($attr:tt)*])*
|
|
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
|
|
$($body:tt)*
|
|
}
|
|
|
|
$($rest:tt)*
|
|
) => {
|
|
// Not exported, but defined so the actual implementation is
|
|
// considered used
|
|
#[cfg(not(feature = "rom-v2-intrinsics"))]
|
|
#[allow(dead_code)]
|
|
fn $name( $($argname: $ty),* ) -> $ret {
|
|
$($body)*
|
|
}
|
|
|
|
#[cfg(feature = "rom-v2-intrinsics")]
|
|
intrinsics! {
|
|
$(#[$($attr)*])*
|
|
extern $abi fn $name( $($argname: $ty),* ) -> $ret {
|
|
$($body)*
|
|
}
|
|
}
|
|
|
|
intrinsics!($($rest)*);
|
|
};
|
|
|
|
(
|
|
#[alias = $($alias:ident),*]
|
|
$(#[$($attr:tt)*])*
|
|
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
|
|
$($body:tt)*
|
|
}
|
|
|
|
$($rest:tt)*
|
|
) => {
|
|
intrinsics! {
|
|
$(#[$($attr)*])*
|
|
extern $abi fn $name( $($argname: $ty),* ) -> $ret {
|
|
$($body)*
|
|
}
|
|
}
|
|
|
|
intrinsics_aliases! {
|
|
extern $abi fn $name( $($argname: $ty),* ) -> $ret,
|
|
$($alias) *
|
|
}
|
|
|
|
intrinsics!($($rest)*);
|
|
};
|
|
|
|
(
|
|
#[alias = $($alias:ident),*]
|
|
$(#[$($attr:tt)*])*
|
|
unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
|
|
$($body:tt)*
|
|
}
|
|
|
|
$($rest:tt)*
|
|
) => {
|
|
intrinsics! {
|
|
$(#[$($attr)*])*
|
|
unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret {
|
|
$($body)*
|
|
}
|
|
}
|
|
|
|
intrinsics_aliases! {
|
|
unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret,
|
|
$($alias) *
|
|
}
|
|
|
|
intrinsics!($($rest)*);
|
|
};
|
|
|
|
(
|
|
#[aeabi = $($alias:ident),*]
|
|
$(#[$($attr:tt)*])*
|
|
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
|
|
$($body:tt)*
|
|
}
|
|
|
|
$($rest:tt)*
|
|
) => {
|
|
intrinsics! {
|
|
$(#[$($attr)*])*
|
|
extern $abi fn $name( $($argname: $ty),* ) -> $ret {
|
|
$($body)*
|
|
}
|
|
}
|
|
|
|
intrinsics_aliases! {
|
|
extern "aapcs" fn $name( $($argname: $ty),* ) -> $ret,
|
|
$($alias) *
|
|
}
|
|
|
|
intrinsics!($($rest)*);
|
|
};
|
|
|
|
(
|
|
$(#[$($attr:tt)*])*
|
|
extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
|
|
$($body:tt)*
|
|
}
|
|
|
|
$($rest:tt)*
|
|
) => {
|
|
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
|
|
$(#[$($attr)*])*
|
|
extern $abi fn $name( $($argname: $ty),* ) -> $ret {
|
|
$($body)*
|
|
}
|
|
|
|
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
|
|
mod $name {
|
|
#[no_mangle]
|
|
$(#[$($attr)*])*
|
|
pub extern $abi fn $name( $($argname: $ty),* ) -> $ret {
|
|
super::$name($($argname),*)
|
|
}
|
|
}
|
|
|
|
// Not exported, but defined so the actual implementation is
|
|
// considered used
|
|
#[cfg(not(all(target_arch = "arm", feature = "intrinsics")))]
|
|
#[allow(dead_code)]
|
|
fn $name( $($argname: $ty),* ) -> $ret {
|
|
$($body)*
|
|
}
|
|
|
|
intrinsics!($($rest)*);
|
|
};
|
|
|
|
(
|
|
$(#[$($attr:tt)*])*
|
|
unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) -> $ret:ty {
|
|
$($body:tt)*
|
|
}
|
|
|
|
$($rest:tt)*
|
|
) => {
|
|
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
|
|
$(#[$($attr)*])*
|
|
unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret {
|
|
$($body)*
|
|
}
|
|
|
|
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
|
|
mod $name {
|
|
#[no_mangle]
|
|
$(#[$($attr)*])*
|
|
pub unsafe extern $abi fn $name( $($argname: $ty),* ) -> $ret {
|
|
super::$name($($argname),*)
|
|
}
|
|
}
|
|
|
|
// Not exported, but defined so the actual implementation is
|
|
// considered used
|
|
#[cfg(not(all(target_arch = "arm", feature = "intrinsics")))]
|
|
#[allow(dead_code)]
|
|
unsafe fn $name( $($argname: $ty),* ) -> $ret {
|
|
$($body)*
|
|
}
|
|
|
|
intrinsics!($($rest)*);
|
|
};
|
|
}
|
|
|
|
// Credit: taken from `rp-hal` (also licensed Apache+MIT)
|
|
// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/sio.rs
|
|
|
|
// This takes advantage of how AAPCS defines a 64-bit return on 32-bit registers
|
|
// by packing it into r0[0:31] and r1[32:63]. So all we need to do is put
|
|
// the remainder in the high order 32 bits of a 64 bit result. We can also
|
|
// alias the division operators to these for a similar reason r0 is the
|
|
// result either way and r1 a scratch register, so the caller can't assume it
|
|
// retains the argument value.
|
|
#[cfg(target_arch = "arm")]
|
|
core::arch::global_asm!(
|
|
".macro hwdivider_head",
|
|
"ldr r2, =(0xd0000000)", // SIO_BASE
|
|
// Check the DIRTY state of the divider by shifting it into the C
|
|
// status bit.
|
|
"ldr r3, [r2, #0x078]", // DIV_CSR
|
|
"lsrs r3, #2", // DIRTY = 1, so shift 2 down
|
|
// We only need to save the state when DIRTY, otherwise we can just do the
|
|
// division directly.
|
|
"bcs 2f",
|
|
"1:",
|
|
// Do the actual division now, we're either not DIRTY, or we've saved the
|
|
// state and branched back here so it's safe now.
|
|
".endm",
|
|
".macro hwdivider_tail",
|
|
// 8 cycle delay to wait for the result. Each branch takes two cycles
|
|
// and fits into a 2-byte Thumb instruction, so this is smaller than
|
|
// 8 NOPs.
|
|
"b 3f",
|
|
"3: b 3f",
|
|
"3: b 3f",
|
|
"3: b 3f",
|
|
"3:",
|
|
// Read the quotient last, since that's what clears the dirty flag.
|
|
"ldr r1, [r2, #0x074]", // DIV_REMAINDER
|
|
"ldr r0, [r2, #0x070]", // DIV_QUOTIENT
|
|
// Either return to the caller or back to the state restore.
|
|
"bx lr",
|
|
"2:",
|
|
// Since we can't save the signed-ness of the calculation, we have to make
|
|
// sure that there's at least an 8 cycle delay before we read the result.
|
|
// The push takes 5 cycles, and we've already spent at least 7 checking
|
|
// the DIRTY state to get here.
|
|
"push {{r4-r6, lr}}",
|
|
// Read the quotient last, since that's what clears the dirty flag.
|
|
"ldr r3, [r2, #0x060]", // DIV_UDIVIDEND
|
|
"ldr r4, [r2, #0x064]", // DIV_UDIVISOR
|
|
"ldr r5, [r2, #0x074]", // DIV_REMAINDER
|
|
"ldr r6, [r2, #0x070]", // DIV_QUOTIENT
|
|
// If we get interrupted here (before a write sets the DIRTY flag) it's
|
|
// fine, since we have the full state, so the interruptor doesn't have to
|
|
// restore it. Once the write happens and the DIRTY flag is set, the
|
|
// interruptor becomes responsible for restoring our state.
|
|
"bl 1b",
|
|
// If we are interrupted here, then the interruptor will start an incorrect
|
|
// calculation using a wrong divisor, but we'll restore the divisor and
|
|
// result ourselves correctly. This sets DIRTY, so any interruptor will
|
|
// save the state.
|
|
"str r3, [r2, #0x060]", // DIV_UDIVIDEND
|
|
// If we are interrupted here, the the interruptor may start the
|
|
// calculation using incorrectly signed inputs, but we'll restore the
|
|
// result ourselves. This sets DIRTY, so any interruptor will save the
|
|
// state.
|
|
"str r4, [r2, #0x064]", // DIV_UDIVISOR
|
|
// If we are interrupted here, the interruptor will have restored
|
|
// everything but the quotient may be wrongly signed. If the calculation
|
|
// started by the above writes is still ongoing it is stopped, so it won't
|
|
// replace the result we're restoring. DIRTY and READY set, but only
|
|
// DIRTY matters to make the interruptor save the state.
|
|
"str r5, [r2, #0x074]", // DIV_REMAINDER
|
|
// State fully restored after the quotient write. This sets both DIRTY
|
|
// and READY, so whatever we may have interrupted can read the result.
|
|
"str r6, [r2, #0x070]", // DIV_QUOTIENT
|
|
"pop {{r4-r6, pc}}",
|
|
".endm",
|
|
);
|
|
|
|
macro_rules! division_function {
|
|
(
|
|
$name:ident $($intrinsic:ident)* ( $argty:ty ) {
|
|
$($begin:literal),+
|
|
}
|
|
) => {
|
|
#[cfg(all(target_arch = "arm", feature = "intrinsics"))]
|
|
core::arch::global_asm!(
|
|
// Mangle the name slightly, since this is a global symbol.
|
|
concat!(".section .text._erphal_", stringify!($name)),
|
|
concat!(".global _erphal_", stringify!($name)),
|
|
concat!(".type _erphal_", stringify!($name), ", %function"),
|
|
".align 2",
|
|
concat!("_erphal_", stringify!($name), ":"),
|
|
$(
|
|
concat!(".global ", stringify!($intrinsic)),
|
|
concat!(".type ", stringify!($intrinsic), ", %function"),
|
|
concat!(stringify!($intrinsic), ":"),
|
|
)*
|
|
|
|
"hwdivider_head",
|
|
$($begin),+ ,
|
|
"hwdivider_tail",
|
|
);
|
|
|
|
#[cfg(all(target_arch = "arm", not(feature = "intrinsics")))]
|
|
core::arch::global_asm!(
|
|
// Mangle the name slightly, since this is a global symbol.
|
|
concat!(".section .text._erphal_", stringify!($name)),
|
|
concat!(".global _erphal_", stringify!($name)),
|
|
concat!(".type _erphal_", stringify!($name), ", %function"),
|
|
".align 2",
|
|
concat!("_erphal_", stringify!($name), ":"),
|
|
|
|
"hwdivider_head",
|
|
$($begin),+ ,
|
|
"hwdivider_tail",
|
|
);
|
|
|
|
#[cfg(target_arch = "arm")]
|
|
extern "aapcs" {
|
|
// Connect a local name to global symbol above through FFI.
|
|
#[link_name = concat!("_erphal_", stringify!($name)) ]
|
|
fn $name(n: $argty, d: $argty) -> u64;
|
|
}
|
|
|
|
#[cfg(not(target_arch = "arm"))]
|
|
#[allow(unused_variables)]
|
|
unsafe fn $name(n: $argty, d: $argty) -> u64 { 0 }
|
|
};
|
|
}
|
|
|
|
division_function! {
|
|
unsigned_divmod __aeabi_uidivmod __aeabi_uidiv ( u32 ) {
|
|
"str r0, [r2, #0x060]", // DIV_UDIVIDEND
|
|
"str r1, [r2, #0x064]" // DIV_UDIVISOR
|
|
}
|
|
}
|
|
|
|
division_function! {
|
|
signed_divmod __aeabi_idivmod __aeabi_idiv ( i32 ) {
|
|
"str r0, [r2, #0x068]", // DIV_SDIVIDEND
|
|
"str r1, [r2, #0x06c]" // DIV_SDIVISOR
|
|
}
|
|
}
|
|
|
|
fn divider_unsigned(n: u32, d: u32) -> DivResult<u32> {
|
|
let packed = unsafe { unsigned_divmod(n, d) };
|
|
DivResult {
|
|
quotient: packed as u32,
|
|
remainder: (packed >> 32) as u32,
|
|
}
|
|
}
|
|
|
|
fn divider_signed(n: i32, d: i32) -> DivResult<i32> {
|
|
let packed = unsafe { signed_divmod(n, d) };
|
|
// Double casts to avoid sign extension
|
|
DivResult {
|
|
quotient: packed as u32 as i32,
|
|
remainder: (packed >> 32) as u32 as i32,
|
|
}
|
|
}
|
|
|
|
/// Result of divide/modulo operation
|
|
struct DivResult<T> {
|
|
/// The quotient of divide/modulo operation
|
|
pub quotient: T,
|
|
/// The remainder of divide/modulo operation
|
|
pub remainder: T,
|
|
}
|
|
|
|
intrinsics! {
|
|
extern "C" fn __udivsi3(n: u32, d: u32) -> u32 {
|
|
divider_unsigned(n, d).quotient
|
|
}
|
|
|
|
extern "C" fn __umodsi3(n: u32, d: u32) -> u32 {
|
|
divider_unsigned(n, d).remainder
|
|
}
|
|
|
|
extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 {
|
|
let quo_rem = divider_unsigned(n, d);
|
|
if let Some(rem) = rem {
|
|
*rem = quo_rem.remainder;
|
|
}
|
|
quo_rem.quotient
|
|
}
|
|
|
|
extern "C" fn __divsi3(n: i32, d: i32) -> i32 {
|
|
divider_signed(n, d).quotient
|
|
}
|
|
|
|
extern "C" fn __modsi3(n: i32, d: i32) -> i32 {
|
|
divider_signed(n, d).remainder
|
|
}
|
|
|
|
extern "C" fn __divmodsi4(n: i32, d: i32, rem: &mut i32) -> i32 {
|
|
let quo_rem = divider_signed(n, d);
|
|
*rem = quo_rem.remainder;
|
|
quo_rem.quotient
|
|
}
|
|
}
|