From fdd6e08ed6b5445a53c8cd0752f80f203c4860ac Mon Sep 17 00:00:00 2001 From: pennae Date: Wed, 19 Apr 2023 01:57:37 +0200 Subject: [PATCH] rp: hook up softfloat rom intrinsics rp-hal has done this very well already, so we'll just copy their entire impl again. only div.rs needed some massaging because our sio access works a little differently, everything else worked as is. --- embassy-rp/src/float/add_sub.rs | 92 ++++++++++++ embassy-rp/src/float/cmp.rs | 201 +++++++++++++++++++++++++ embassy-rp/src/float/conv.rs | 157 ++++++++++++++++++++ embassy-rp/src/float/div.rs | 141 ++++++++++++++++++ embassy-rp/src/float/functions.rs | 239 ++++++++++++++++++++++++++++++ embassy-rp/src/float/mod.rs | 149 +++++++++++++++++++ embassy-rp/src/float/mul.rs | 70 +++++++++ embassy-rp/src/lib.rs | 1 + tests/rp/.cargo/config.toml | 6 +- tests/rp/Cargo.toml | 2 +- tests/rp/src/bin/float.rs | 53 +++++++ 11 files changed, 1108 insertions(+), 3 deletions(-) create mode 100644 embassy-rp/src/float/add_sub.rs create mode 100644 embassy-rp/src/float/cmp.rs create mode 100644 embassy-rp/src/float/conv.rs create mode 100644 embassy-rp/src/float/div.rs create mode 100644 embassy-rp/src/float/functions.rs create mode 100644 embassy-rp/src/float/mod.rs create mode 100644 embassy-rp/src/float/mul.rs create mode 100644 tests/rp/src/bin/float.rs diff --git a/embassy-rp/src/float/add_sub.rs b/embassy-rp/src/float/add_sub.rs new file mode 100644 index 00000000..673544cf --- /dev/null +++ b/embassy-rp/src/float/add_sub.rs @@ -0,0 +1,92 @@ +// Credit: taken from `rp-hal` (also licensed Apache+MIT) +// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/float/add_sub.rs + +use super::{Float, Int}; +use crate::rom_data; + +trait ROMAdd { + fn rom_add(self, b: Self) -> Self; +} + +impl ROMAdd for f32 { + fn rom_add(self, b: Self) -> Self { + rom_data::float_funcs::fadd(self, b) + } +} + +impl ROMAdd for f64 { + fn rom_add(self, b: Self) -> Self { + rom_data::double_funcs::dadd(self, b) + } +} + +fn add(a: F, b: F) -> F { + if a.is_not_finite() { + if b.is_not_finite() { + let class_a = a.repr() & (F::SIGNIFICAND_MASK | F::SIGN_MASK); + let class_b = b.repr() & (F::SIGNIFICAND_MASK | F::SIGN_MASK); + + if class_a == F::Int::ZERO && class_b == F::Int::ZERO { + // inf + inf = inf + return a; + } + if class_a == F::SIGN_MASK && class_b == F::SIGN_MASK { + // -inf + (-inf) = -inf + return a; + } + + // Sign mismatch, or either is NaN already + return F::NAN; + } + + // [-]inf/NaN + X = [-]inf/NaN + return a; + } + + if b.is_not_finite() { + // X + [-]inf/NaN = [-]inf/NaN + return b; + } + + a.rom_add(b) +} + +intrinsics! { + #[alias = __addsf3vfp] + #[aeabi = __aeabi_fadd] + extern "C" fn __addsf3(a: f32, b: f32) -> f32 { + add(a, b) + } + + #[bootrom_v2] + #[alias = __adddf3vfp] + #[aeabi = __aeabi_dadd] + extern "C" fn __adddf3(a: f64, b: f64) -> f64 { + add(a, b) + } + + // The ROM just implements subtraction the same way, so just do it here + // and save the work of implementing more complicated NaN/inf handling. + + #[alias = __subsf3vfp] + #[aeabi = __aeabi_fsub] + extern "C" fn __subsf3(a: f32, b: f32) -> f32 { + add(a, -b) + } + + #[bootrom_v2] + #[alias = __subdf3vfp] + #[aeabi = __aeabi_dsub] + extern "C" fn __subdf3(a: f64, b: f64) -> f64 { + add(a, -b) + } + + extern "aapcs" fn __aeabi_frsub(a: f32, b: f32) -> f32 { + add(b, -a) + } + + #[bootrom_v2] + extern "aapcs" fn __aeabi_drsub(a: f64, b: f64) -> f64 { + add(b, -a) + } +} diff --git a/embassy-rp/src/float/cmp.rs b/embassy-rp/src/float/cmp.rs new file mode 100644 index 00000000..e540e391 --- /dev/null +++ b/embassy-rp/src/float/cmp.rs @@ -0,0 +1,201 @@ +// Credit: taken from `rp-hal` (also licensed Apache+MIT) +// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/float/cmp.rs + +use super::Float; +use crate::rom_data; + +trait ROMCmp { + fn rom_cmp(self, b: Self) -> i32; +} + +impl ROMCmp for f32 { + fn rom_cmp(self, b: Self) -> i32 { + rom_data::float_funcs::fcmp(self, b) + } +} + +impl ROMCmp for f64 { + fn rom_cmp(self, b: Self) -> i32 { + rom_data::double_funcs::dcmp(self, b) + } +} + +fn le_abi(a: F, b: F) -> i32 { + if a.is_nan() || b.is_nan() { + 1 + } else { + a.rom_cmp(b) + } +} + +fn ge_abi(a: F, b: F) -> i32 { + if a.is_nan() || b.is_nan() { + -1 + } else { + a.rom_cmp(b) + } +} + +intrinsics! { + #[slower_than_default] + #[bootrom_v2] + #[alias = __eqsf2, __ltsf2, __nesf2] + extern "C" fn __lesf2(a: f32, b: f32) -> i32 { + le_abi(a, b) + } + + #[slower_than_default] + #[bootrom_v2] + #[alias = __eqdf2, __ltdf2, __nedf2] + extern "C" fn __ledf2(a: f64, b: f64) -> i32 { + le_abi(a, b) + } + + #[slower_than_default] + #[bootrom_v2] + #[alias = __gtsf2] + extern "C" fn __gesf2(a: f32, b: f32) -> i32 { + ge_abi(a, b) + } + + #[slower_than_default] + #[bootrom_v2] + #[alias = __gtdf2] + extern "C" fn __gedf2(a: f64, b: f64) -> i32 { + ge_abi(a, b) + } + + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmpge(a: f32, b: f32) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmpeq(a: f32, b: f32) -> i32 { + (le_abi(a, b) == 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmplt(a: f32, b: f32) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_fcmpgt(a: f32, b: f32) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmple(a: f64, b: f64) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmpge(a: f64, b: f64) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmpeq(a: f64, b: f64) -> i32 { + (le_abi(a, b) == 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmplt(a: f64, b: f64) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "aapcs" fn __aeabi_dcmpgt(a: f64, b: f64) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gesf2vfp(a: f32, b: f32) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gedf2vfp(a: f64, b: f64) -> i32 { + (ge_abi(a, b) >= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gtsf2vfp(a: f32, b: f32) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __gtdf2vfp(a: f64, b: f64) -> i32 { + (ge_abi(a, b) > 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __ltsf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __ltdf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) < 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __lesf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __ledf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) <= 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __nesf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) != 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __nedf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) != 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __eqsf2vfp(a: f32, b: f32) -> i32 { + (le_abi(a, b) == 0) as i32 + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn __eqdf2vfp(a: f64, b: f64) -> i32 { + (le_abi(a, b) == 0) as i32 + } +} diff --git a/embassy-rp/src/float/conv.rs b/embassy-rp/src/float/conv.rs new file mode 100644 index 00000000..021826e2 --- /dev/null +++ b/embassy-rp/src/float/conv.rs @@ -0,0 +1,157 @@ +// Credit: taken from `rp-hal` (also licensed Apache+MIT) +// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/float/conv.rs + +use super::Float; +use crate::rom_data; + +// Some of these are also not connected in the Pico SDK. This is probably +// because the ROM version actually does a fixed point conversion, just with +// the fractional width set to zero. + +intrinsics! { + // Not connected in the Pico SDK + #[slower_than_default] + #[aeabi = __aeabi_i2f] + extern "C" fn __floatsisf(i: i32) -> f32 { + rom_data::float_funcs::int_to_float(i) + } + + // Not connected in the Pico SDK + #[slower_than_default] + #[aeabi = __aeabi_i2d] + extern "C" fn __floatsidf(i: i32) -> f64 { + rom_data::double_funcs::int_to_double(i) + } + + // Questionable gain + #[aeabi = __aeabi_l2f] + extern "C" fn __floatdisf(i: i64) -> f32 { + rom_data::float_funcs::int64_to_float(i) + } + + #[bootrom_v2] + #[aeabi = __aeabi_l2d] + extern "C" fn __floatdidf(i: i64) -> f64 { + rom_data::double_funcs::int64_to_double(i) + } + + // Not connected in the Pico SDK + #[slower_than_default] + #[aeabi = __aeabi_ui2f] + extern "C" fn __floatunsisf(i: u32) -> f32 { + rom_data::float_funcs::uint_to_float(i) + } + + // Questionable gain + #[bootrom_v2] + #[aeabi = __aeabi_ui2d] + extern "C" fn __floatunsidf(i: u32) -> f64 { + rom_data::double_funcs::uint_to_double(i) + } + + // Questionable gain + #[bootrom_v2] + #[aeabi = __aeabi_ul2f] + extern "C" fn __floatundisf(i: u64) -> f32 { + rom_data::float_funcs::uint64_to_float(i) + } + + #[bootrom_v2] + #[aeabi = __aeabi_ul2d] + extern "C" fn __floatundidf(i: u64) -> f64 { + rom_data::double_funcs::uint64_to_double(i) + } + + + // The Pico SDK does some optimization here (e.x. fast paths for zero and + // one), but we can just directly connect it. + #[aeabi = __aeabi_f2iz] + extern "C" fn __fixsfsi(f: f32) -> i32 { + rom_data::float_funcs::float_to_int(f) + } + + #[bootrom_v2] + #[aeabi = __aeabi_f2lz] + extern "C" fn __fixsfdi(f: f32) -> i64 { + rom_data::float_funcs::float_to_int64(f) + } + + // Not connected in the Pico SDK + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_d2iz] + extern "C" fn __fixdfsi(f: f64) -> i32 { + rom_data::double_funcs::double_to_int(f) + } + + // Like with the 32 bit version, there's optimization that we just + // skip. + #[bootrom_v2] + #[aeabi = __aeabi_d2lz] + extern "C" fn __fixdfdi(f: f64) -> i64 { + rom_data::double_funcs::double_to_int64(f) + } + + #[slower_than_default] + #[aeabi = __aeabi_f2uiz] + extern "C" fn __fixunssfsi(f: f32) -> u32 { + rom_data::float_funcs::float_to_uint(f) + } + + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_f2ulz] + extern "C" fn __fixunssfdi(f: f32) -> u64 { + rom_data::float_funcs::float_to_uint64(f) + } + + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_d2uiz] + extern "C" fn __fixunsdfsi(f: f64) -> u32 { + rom_data::double_funcs::double_to_uint(f) + } + + #[slower_than_default] + #[bootrom_v2] + #[aeabi = __aeabi_d2ulz] + extern "C" fn __fixunsdfdi(f: f64) -> u64 { + rom_data::double_funcs::double_to_uint64(f) + } + + #[bootrom_v2] + #[alias = __extendsfdf2vfp] + #[aeabi = __aeabi_f2d] + extern "C" fn __extendsfdf2(f: f32) -> f64 { + if f.is_not_finite() { + return f64::from_repr( + // Not finite + f64::EXPONENT_MASK | + // Preserve NaN or inf + ((f.repr() & f32::SIGNIFICAND_MASK) as u64) | + // Preserve sign + ((f.repr() & f32::SIGN_MASK) as u64) << (f64::BITS-f32::BITS) + ); + } + rom_data::float_funcs::float_to_double(f) + } + + #[bootrom_v2] + #[alias = __truncdfsf2vfp] + #[aeabi = __aeabi_d2f] + extern "C" fn __truncdfsf2(f: f64) -> f32 { + if f.is_not_finite() { + let mut repr: u32 = + // Not finite + f32::EXPONENT_MASK | + // Preserve sign + ((f.repr() & f64::SIGN_MASK) >> (f64::BITS-f32::BITS)) as u32; + // Set NaN + if (f.repr() & f64::SIGNIFICAND_MASK) != 0 { + repr |= 1; + } + return f32::from_repr(repr); + } + rom_data::double_funcs::double_to_float(f) + } +} diff --git a/embassy-rp/src/float/div.rs b/embassy-rp/src/float/div.rs new file mode 100644 index 00000000..094dec44 --- /dev/null +++ b/embassy-rp/src/float/div.rs @@ -0,0 +1,141 @@ +// Credit: taken from `rp-hal` (also licensed Apache+MIT) +// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/float/conv.rs + +use super::Float; +use crate::rom_data; + +// Make sure this stays as a separate call, because when it's inlined the +// compiler will move the save of the registers used to contain the divider +// state into the function prologue. That save and restore (push/pop) takes +// longer than the actual division, so doing it in the common case where +// they are not required wastes a lot of time. +#[inline(never)] +#[cold] +fn save_divider_and_call(f: F) -> R +where + F: FnOnce() -> R, +{ + let sio = rp_pac::SIO; + + unsafe { + // Since we can't save the signed-ness of the calculation, we have to make + // sure that there's at least an 8 cycle delay before we read the result. + // The Pico SDK ensures this by using a 6 cycle push and two 1 cycle reads. + // Since we can't be sure the Rust implementation will optimize to the same, + // just use an explicit wait. + while !sio.div().csr().read().ready() {} + + // Read the quotient last, since that's what clears the dirty flag + let dividend = sio.div().udividend().read(); + let divisor = sio.div().udivisor().read(); + let remainder = sio.div().remainder().read(); + let quotient = sio.div().quotient().read(); + + // If we get interrupted here (before a write sets the DIRTY flag) its fine, since + // we have the full state, so the interruptor doesn't have to restore it. Once the + // write happens and the DIRTY flag is set, the interruptor becomes responsible for + // restoring our state. + let result = f(); + + // If we are interrupted here, then the interruptor will start an incorrect calculation + // using a wrong divisor, but we'll restore the divisor and result ourselves correctly. + // This sets DIRTY, so any interruptor will save the state. + sio.div().udividend().write_value(dividend); + // If we are interrupted here, the the interruptor may start the calculation using + // incorrectly signed inputs, but we'll restore the result ourselves. + // This sets DIRTY, so any interruptor will save the state. + sio.div().udivisor().write_value(divisor); + // If we are interrupted here, the interruptor will have restored everything but the + // quotient may be wrongly signed. If the calculation started by the above writes is + // still ongoing it is stopped, so it won't replace the result we're restoring. + // DIRTY and READY set, but only DIRTY matters to make the interruptor save the state. + sio.div().remainder().write_value(remainder); + // State fully restored after the quotient write. This sets both DIRTY and READY, so + // whatever we may have interrupted can read the result. + sio.div().quotient().write_value(quotient); + + result + } +} + +fn save_divider(f: F) -> R +where + F: FnOnce() -> R, +{ + let sio = rp_pac::SIO; + if unsafe { !sio.div().csr().read().dirty() } { + // Not dirty, so nothing is waiting for the calculation. So we can just + // issue it directly without a save/restore. + f() + } else { + save_divider_and_call(f) + } +} + +trait ROMDiv { + fn rom_div(self, b: Self) -> Self; +} + +impl ROMDiv for f32 { + fn rom_div(self, b: Self) -> Self { + // ROM implementation uses the hardware divider, so we have to save it + save_divider(|| rom_data::float_funcs::fdiv(self, b)) + } +} + +impl ROMDiv for f64 { + fn rom_div(self, b: Self) -> Self { + // ROM implementation uses the hardware divider, so we have to save it + save_divider(|| rom_data::double_funcs::ddiv(self, b)) + } +} + +fn div(a: F, b: F) -> F { + if a.is_not_finite() { + if b.is_not_finite() { + // inf/NaN / inf/NaN = NaN + return F::NAN; + } + + if b.is_zero() { + // inf/NaN / 0 = NaN + return F::NAN; + } + + return if b.is_sign_negative() { + // [+/-]inf/NaN / (-X) = [-/+]inf/NaN + a.negate() + } else { + // [-]inf/NaN / X = [-]inf/NaN + a + }; + } + + if b.is_nan() { + // X / NaN = NaN + return b; + } + + // ROM handles X / 0 = [-]inf and X / [-]inf = [-]0, so we only + // need to catch 0 / 0 + if b.is_zero() && a.is_zero() { + return F::NAN; + } + + a.rom_div(b) +} + +intrinsics! { + #[alias = __divsf3vfp] + #[aeabi = __aeabi_fdiv] + extern "C" fn __divsf3(a: f32, b: f32) -> f32 { + div(a, b) + } + + #[bootrom_v2] + #[alias = __divdf3vfp] + #[aeabi = __aeabi_ddiv] + extern "C" fn __divdf3(a: f64, b: f64) -> f64 { + div(a, b) + } +} diff --git a/embassy-rp/src/float/functions.rs b/embassy-rp/src/float/functions.rs new file mode 100644 index 00000000..de29ce33 --- /dev/null +++ b/embassy-rp/src/float/functions.rs @@ -0,0 +1,239 @@ +// Credit: taken from `rp-hal` (also licensed Apache+MIT) +// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/float/functions.rs + +use crate::float::{Float, Int}; +use crate::rom_data; + +trait ROMFunctions { + fn sqrt(self) -> Self; + fn ln(self) -> Self; + fn exp(self) -> Self; + fn sin(self) -> Self; + fn cos(self) -> Self; + fn tan(self) -> Self; + fn atan2(self, y: Self) -> Self; + + fn to_trig_range(self) -> Self; +} + +impl ROMFunctions for f32 { + fn sqrt(self) -> Self { + rom_data::float_funcs::fsqrt(self) + } + + fn ln(self) -> Self { + rom_data::float_funcs::fln(self) + } + + fn exp(self) -> Self { + rom_data::float_funcs::fexp(self) + } + + fn sin(self) -> Self { + rom_data::float_funcs::fsin(self) + } + + fn cos(self) -> Self { + rom_data::float_funcs::fcos(self) + } + + fn tan(self) -> Self { + rom_data::float_funcs::ftan(self) + } + + fn atan2(self, y: Self) -> Self { + rom_data::float_funcs::fatan2(self, y) + } + + fn to_trig_range(self) -> Self { + // -128 < X < 128, logic from the Pico SDK + let exponent = (self.repr() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS; + if exponent < 134 { + self + } else { + self % (core::f32::consts::PI * 2.0) + } + } +} + +impl ROMFunctions for f64 { + fn sqrt(self) -> Self { + rom_data::double_funcs::dsqrt(self) + } + + fn ln(self) -> Self { + rom_data::double_funcs::dln(self) + } + + fn exp(self) -> Self { + rom_data::double_funcs::dexp(self) + } + + fn sin(self) -> Self { + rom_data::double_funcs::dsin(self) + } + + fn cos(self) -> Self { + rom_data::double_funcs::dcos(self) + } + fn tan(self) -> Self { + rom_data::double_funcs::dtan(self) + } + + fn atan2(self, y: Self) -> Self { + rom_data::double_funcs::datan2(self, y) + } + + fn to_trig_range(self) -> Self { + // -1024 < X < 1024, logic from the Pico SDK + let exponent = (self.repr() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS; + if exponent < 1033 { + self + } else { + self % (core::f64::consts::PI * 2.0) + } + } +} + +fn is_negative_nonzero_or_nan(f: F) -> bool { + let repr = f.repr(); + if (repr & F::SIGN_MASK) != F::Int::ZERO { + // Negative, so anything other than exactly zero + return (repr & (!F::SIGN_MASK)) != F::Int::ZERO; + } + // NaN + (repr & (F::EXPONENT_MASK | F::SIGNIFICAND_MASK)) > F::EXPONENT_MASK +} + +fn sqrt(f: F) -> F { + if is_negative_nonzero_or_nan(f) { + F::NAN + } else { + f.sqrt() + } +} + +fn ln(f: F) -> F { + if is_negative_nonzero_or_nan(f) { + F::NAN + } else { + f.ln() + } +} + +fn exp(f: F) -> F { + if f.is_nan() { + F::NAN + } else { + f.exp() + } +} + +fn sin(f: F) -> F { + if f.is_not_finite() { + F::NAN + } else { + f.to_trig_range().sin() + } +} + +fn cos(f: F) -> F { + if f.is_not_finite() { + F::NAN + } else { + f.to_trig_range().cos() + } +} + +fn tan(f: F) -> F { + if f.is_not_finite() { + F::NAN + } else { + f.to_trig_range().tan() + } +} + +fn atan2(x: F, y: F) -> F { + if x.is_nan() || y.is_nan() { + F::NAN + } else { + x.to_trig_range().atan2(y) + } +} + +// Name collisions +mod intrinsics { + intrinsics! { + extern "C" fn sqrtf(f: f32) -> f32 { + super::sqrt(f) + } + + #[bootrom_v2] + extern "C" fn sqrt(f: f64) -> f64 { + super::sqrt(f) + } + + extern "C" fn logf(f: f32) -> f32 { + super::ln(f) + } + + #[bootrom_v2] + extern "C" fn log(f: f64) -> f64 { + super::ln(f) + } + + extern "C" fn expf(f: f32) -> f32 { + super::exp(f) + } + + #[bootrom_v2] + extern "C" fn exp(f: f64) -> f64 { + super::exp(f) + } + + #[slower_than_default] + extern "C" fn sinf(f: f32) -> f32 { + super::sin(f) + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn sin(f: f64) -> f64 { + super::sin(f) + } + + #[slower_than_default] + extern "C" fn cosf(f: f32) -> f32 { + super::cos(f) + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn cos(f: f64) -> f64 { + super::cos(f) + } + + #[slower_than_default] + extern "C" fn tanf(f: f32) -> f32 { + super::tan(f) + } + + #[slower_than_default] + #[bootrom_v2] + extern "C" fn tan(f: f64) -> f64 { + super::tan(f) + } + + // Questionable gain + #[bootrom_v2] + extern "C" fn atan2f(a: f32, b: f32) -> f32 { + super::atan2(a, b) + } + + // Questionable gain + #[bootrom_v2] + extern "C" fn atan2(a: f64, b: f64) -> f64 { + super::atan2(a, b) + } + } +} diff --git a/embassy-rp/src/float/mod.rs b/embassy-rp/src/float/mod.rs new file mode 100644 index 00000000..945afff9 --- /dev/null +++ b/embassy-rp/src/float/mod.rs @@ -0,0 +1,149 @@ +// Credit: taken from `rp-hal` (also licensed Apache+MIT) +// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/float/mod.rs + +use core::ops; + +// Borrowed and simplified from compiler-builtins so we can use bit ops +// on floating point without macro soup. +pub(crate) trait Int: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::SubAssign + + ops::BitAndAssign + + ops::BitOrAssign + + ops::BitXorAssign + + ops::ShlAssign + + ops::ShrAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Shl + + ops::Shr + + ops::BitOr + + ops::BitXor + + ops::BitAnd + + ops::Not +{ + const ZERO: Self; +} + +macro_rules! int_impl { + ($ty:ty) => { + impl Int for $ty { + const ZERO: Self = 0; + } + }; +} + +int_impl!(u32); +int_impl!(u64); + +pub(crate) trait Float: + Copy + + core::fmt::Debug + + PartialEq + + PartialOrd + + ops::AddAssign + + ops::MulAssign + + ops::Add + + ops::Sub + + ops::Div + + ops::Rem +{ + /// A uint of the same with as the float + type Int: Int; + + /// NaN representation for the float + const NAN: Self; + + /// The bitwidth of the float type + const BITS: u32; + + /// The bitwidth of the significand + const SIGNIFICAND_BITS: u32; + + /// A mask for the sign bit + const SIGN_MASK: Self::Int; + + /// A mask for the significand + const SIGNIFICAND_MASK: Self::Int; + + /// A mask for the exponent + const EXPONENT_MASK: Self::Int; + + /// Returns `self` transmuted to `Self::Int` + fn repr(self) -> Self::Int; + + /// Returns a `Self::Int` transmuted back to `Self` + fn from_repr(a: Self::Int) -> Self; + + /// Return a sign swapped `self` + fn negate(self) -> Self; + + /// Returns true if `self` is either NaN or infinity + fn is_not_finite(self) -> bool { + (self.repr() & Self::EXPONENT_MASK) == Self::EXPONENT_MASK + } + + /// Returns true if `self` is infinity + fn is_infinity(self) -> bool { + (self.repr() & (Self::EXPONENT_MASK | Self::SIGNIFICAND_MASK)) == Self::EXPONENT_MASK + } + + /// Returns true if `self is NaN + fn is_nan(self) -> bool { + (self.repr() & (Self::EXPONENT_MASK | Self::SIGNIFICAND_MASK)) > Self::EXPONENT_MASK + } + + /// Returns true if `self` is negative + fn is_sign_negative(self) -> bool { + (self.repr() & Self::SIGN_MASK) != Self::Int::ZERO + } + + /// Returns true if `self` is zero (either sign) + fn is_zero(self) -> bool { + (self.repr() & (Self::SIGNIFICAND_MASK | Self::EXPONENT_MASK)) == Self::Int::ZERO + } +} + +macro_rules! float_impl { + ($ty:ident, $ity:ident, $bits:expr, $significand_bits:expr) => { + impl Float for $ty { + type Int = $ity; + + const NAN: Self = <$ty>::NAN; + + const BITS: u32 = $bits; + const SIGNIFICAND_BITS: u32 = $significand_bits; + + const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); + const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; + const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); + + fn repr(self) -> Self::Int { + self.to_bits() + } + + fn from_repr(a: Self::Int) -> Self { + Self::from_bits(a) + } + + fn negate(self) -> Self { + -self + } + } + }; +} + +float_impl!(f32, u32, 32, 23); +float_impl!(f64, u64, 64, 52); + +mod add_sub; +mod cmp; +mod conv; +mod div; +mod functions; +mod mul; diff --git a/embassy-rp/src/float/mul.rs b/embassy-rp/src/float/mul.rs new file mode 100644 index 00000000..ceb0210e --- /dev/null +++ b/embassy-rp/src/float/mul.rs @@ -0,0 +1,70 @@ +// Credit: taken from `rp-hal` (also licensed Apache+MIT) +// https://github.com/rp-rs/rp-hal/blob/main/rp2040-hal/src/float/mul.rs + +use super::Float; +use crate::rom_data; + +trait ROMMul { + fn rom_mul(self, b: Self) -> Self; +} + +impl ROMMul for f32 { + fn rom_mul(self, b: Self) -> Self { + rom_data::float_funcs::fmul(self, b) + } +} + +impl ROMMul for f64 { + fn rom_mul(self, b: Self) -> Self { + rom_data::double_funcs::dmul(self, b) + } +} + +fn mul(a: F, b: F) -> F { + if a.is_not_finite() { + if b.is_zero() { + // [-]inf/NaN * 0 = NaN + return F::NAN; + } + + return if b.is_sign_negative() { + // [+/-]inf/NaN * (-X) = [-/+]inf/NaN + a.negate() + } else { + // [-]inf/NaN * X = [-]inf/NaN + a + }; + } + + if b.is_not_finite() { + if a.is_zero() { + // 0 * [-]inf/NaN = NaN + return F::NAN; + } + + return if b.is_sign_negative() { + // (-X) * [+/-]inf/NaN = [-/+]inf/NaN + b.negate() + } else { + // X * [-]inf/NaN = [-]inf/NaN + b + }; + } + + a.rom_mul(b) +} + +intrinsics! { + #[alias = __mulsf3vfp] + #[aeabi = __aeabi_fmul] + extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { + mul(a, b) + } + + #[bootrom_v2] + #[alias = __muldf3vfp] + #[aeabi = __aeabi_dmul] + extern "C" fn __muldf3(a: f64, b: f64) -> f64 { + mul(a, b) + } +} diff --git a/embassy-rp/src/lib.rs b/embassy-rp/src/lib.rs index 1d63f6c2..3841bb83 100644 --- a/embassy-rp/src/lib.rs +++ b/embassy-rp/src/lib.rs @@ -12,6 +12,7 @@ mod intrinsics; pub mod adc; pub mod dma; +mod float; pub mod gpio; pub mod i2c; pub mod interrupt; diff --git a/tests/rp/.cargo/config.toml b/tests/rp/.cargo/config.toml index 9611db3a..e1744c70 100644 --- a/tests/rp/.cargo/config.toml +++ b/tests/rp/.cargo/config.toml @@ -1,6 +1,8 @@ [unstable] -build-std = ["core"] -build-std-features = ["panic_immediate_abort"] +# enabling these breaks the float tests during linking, with intrinsics +# duplicated between embassy-rp and compilter_builtins +#build-std = ["core"] +#build-std-features = ["panic_immediate_abort"] [target.'cfg(all(target_arch = "arm", target_os = "none"))'] #runner = "teleprobe client run --target rpi-pico --elf" diff --git a/tests/rp/Cargo.toml b/tests/rp/Cargo.toml index 36ff735e..6778f53d 100644 --- a/tests/rp/Cargo.toml +++ b/tests/rp/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0" embassy-sync = { version = "0.2.0", path = "../../embassy-sync", features = ["defmt"] } embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["arch-cortex-m", "executor-thread", "defmt", "integrated-timers"] } embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt"] } -embassy-rp = { version = "0.1.0", path = "../../embassy-rp", features = ["nightly", "defmt", "unstable-pac", "unstable-traits", "time-driver", "critical-section-impl"] } +embassy-rp = { version = "0.1.0", path = "../../embassy-rp", features = ["nightly", "defmt", "unstable-pac", "unstable-traits", "time-driver", "critical-section-impl", "intrinsics", "rom-v2-intrinsics"] } embassy-futures = { version = "0.1.0", path = "../../embassy-futures" } defmt = "0.3.0" diff --git a/tests/rp/src/bin/float.rs b/tests/rp/src/bin/float.rs new file mode 100644 index 00000000..6715271e --- /dev/null +++ b/tests/rp/src/bin/float.rs @@ -0,0 +1,53 @@ +#![no_std] +#![no_main] +#![feature(type_alias_impl_trait)] + +use defmt::*; +use embassy_executor::Spawner; +use embassy_rp::pac; +use embassy_time::{Duration, Timer}; +use {defmt_rtt as _, panic_probe as _}; + +#[embassy_executor::main] +async fn main(_spawner: Spawner) { + embassy_rp::init(Default::default()); + info!("Hello World!"); + + const PI_F: f32 = 3.1415926535f32; + const PI_D: f64 = 3.14159265358979323846f64; + + unsafe { + pac::BUSCTRL + .perfsel(0) + .write(|r| r.set_perfsel(pac::busctrl::vals::Perfsel::ROM)); + } + + for i in 0..=360 { + let rad_f = (i as f32) * PI_F / 180.0; + info!( + "{}° float: {=f32} / {=f32} / {=f32} / {=f32}", + i, + rad_f, + rad_f - PI_F, + rad_f + PI_F, + rad_f % PI_F + ); + let rad_d = (i as f64) * PI_D / 180.0; + info!( + "{}° double: {=f64} / {=f64} / {=f64} / {=f64}", + i, + rad_d, + rad_d - PI_D, + rad_d + PI_D, + rad_d % PI_D + ); + Timer::after(Duration::from_millis(10)).await; + } + + let rom_accesses = unsafe { pac::BUSCTRL.perfctr(0).read().perfctr() }; + // every float operation used here uses at least 10 cycles + defmt::assert!(rom_accesses >= 360 * 12 * 10); + + info!("Test OK"); + cortex_m::asm::bkpt(); +}