From 7ed9e29326e42bf286b4f7c5883ef216cfb21531 Mon Sep 17 00:00:00 2001 From: Derek Hageman Date: Tue, 25 Jul 2023 15:54:33 -0600 Subject: [PATCH] rp: add async flash Implement an async flash mode using the XIP background best effort read interface. Only reads are actually async, write and erase remain blocking. --- embassy-boot/rp/src/lib.rs | 14 +- embassy-rp/Cargo.toml | 3 +- embassy-rp/src/flash.rs | 203 +++++++++++++++++++++- examples/boot/application/rp/src/bin/a.rs | 4 +- examples/rp/src/bin/flash.rs | 42 ++++- tests/rp/src/bin/flash.rs | 14 +- 6 files changed, 254 insertions(+), 26 deletions(-) diff --git a/embassy-boot/rp/src/lib.rs b/embassy-boot/rp/src/lib.rs index 25329f9e..35fc104e 100644 --- a/embassy-boot/rp/src/lib.rs +++ b/embassy-boot/rp/src/lib.rs @@ -6,7 +6,7 @@ mod fmt; #[cfg(feature = "nightly")] pub use embassy_boot::FirmwareUpdater; pub use embassy_boot::{AlignedBuffer, BlockingFirmwareUpdater, BootLoaderConfig, FirmwareUpdaterConfig, State}; -use embassy_rp::flash::{Flash, ERASE_SIZE}; +use embassy_rp::flash::{Blocking, Flash, ERASE_SIZE}; use embassy_rp::peripherals::{FLASH, WATCHDOG}; use embassy_rp::watchdog::Watchdog; use embassy_time::Duration; @@ -58,14 +58,14 @@ impl /// A flash implementation that will feed a watchdog when touching flash. pub struct WatchdogFlash<'d, const SIZE: usize> { - flash: Flash<'d, FLASH, SIZE>, + flash: Flash<'d, FLASH, Blocking, SIZE>, watchdog: Watchdog, } impl<'d, const SIZE: usize> WatchdogFlash<'d, SIZE> { /// Start a new watchdog with a given flash and watchdog peripheral and a timeout pub fn start(flash: FLASH, watchdog: WATCHDOG, timeout: Duration) -> Self { - let flash: Flash<'_, FLASH, SIZE> = Flash::new(flash); + let flash = Flash::<_, Blocking, SIZE>::new(flash); let mut watchdog = Watchdog::new(watchdog); watchdog.start(timeout); Self { flash, watchdog } @@ -73,12 +73,12 @@ impl<'d, const SIZE: usize> WatchdogFlash<'d, SIZE> { } impl<'d, const SIZE: usize> ErrorType for WatchdogFlash<'d, SIZE> { - type Error = as ErrorType>::Error; + type Error = as ErrorType>::Error; } impl<'d, const SIZE: usize> NorFlash for WatchdogFlash<'d, SIZE> { - const WRITE_SIZE: usize = as NorFlash>::WRITE_SIZE; - const ERASE_SIZE: usize = as NorFlash>::ERASE_SIZE; + const WRITE_SIZE: usize = as NorFlash>::WRITE_SIZE; + const ERASE_SIZE: usize = as NorFlash>::ERASE_SIZE; fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> { self.watchdog.feed(); @@ -91,7 +91,7 @@ impl<'d, const SIZE: usize> NorFlash for WatchdogFlash<'d, SIZE> { } impl<'d, const SIZE: usize> ReadNorFlash for WatchdogFlash<'d, SIZE> { - const READ_SIZE: usize = as ReadNorFlash>::READ_SIZE; + const READ_SIZE: usize = as ReadNorFlash>::READ_SIZE; fn read(&mut self, offset: u32, data: &mut [u8]) -> Result<(), Self::Error> { self.watchdog.feed(); self.flash.read(offset, data) diff --git a/embassy-rp/Cargo.toml b/embassy-rp/Cargo.toml index b53c7a01..6310ffb6 100644 --- a/embassy-rp/Cargo.toml +++ b/embassy-rp/Cargo.toml @@ -48,7 +48,7 @@ boot2-w25x10cl = [] run-from-ram = [] # Enable nightly-only features -nightly = ["embedded-hal-1", "embedded-hal-async", "embassy-embedded-hal/nightly", "dep:embassy-usb-driver", "dep:embedded-io"] +nightly = ["embedded-hal-1", "embedded-hal-async", "embedded-storage-async", "embassy-embedded-hal/nightly", "dep:embassy-usb-driver", "dep:embedded-io"] # Implement embedded-hal 1.0 alpha traits. # Implement embedded-hal-async traits if `nightly` is set as well. @@ -73,6 +73,7 @@ futures = { version = "0.3.17", default-features = false, features = ["async-awa chrono = { version = "0.4", default-features = false, optional = true } embedded-io = { version = "0.4.0", features = ["async"], optional = true } embedded-storage = { version = "0.3" } +embedded-storage-async = { version = "0.4.0", optional = true } rand_core = "0.6.4" fixed = "1.23.1" diff --git a/embassy-rp/src/flash.rs b/embassy-rp/src/flash.rs index 0ed6808e..70d86731 100644 --- a/embassy-rp/src/flash.rs +++ b/embassy-rp/src/flash.rs @@ -1,11 +1,15 @@ +use core::future::Future; use core::marker::PhantomData; +use core::pin::Pin; +use core::task::{Context, Poll}; -use embassy_hal_internal::Peripheral; +use embassy_hal_internal::{into_ref, Peripheral, PeripheralRef}; use embedded_storage::nor_flash::{ check_erase, check_read, check_write, ErrorType, MultiwriteNorFlash, NorFlash, NorFlashError, NorFlashErrorKind, ReadNorFlash, }; +use crate::dma::{AnyChannel, Channel, Transfer}; use crate::pac; use crate::peripherals::FLASH; @@ -24,6 +28,7 @@ pub const PAGE_SIZE: usize = 256; pub const WRITE_SIZE: usize = 1; pub const READ_SIZE: usize = 1; pub const ERASE_SIZE: usize = 4096; +pub const ASYNC_READ_SIZE: usize = 4; /// Error type for NVMC operations. #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -57,13 +62,46 @@ impl NorFlashError for Error { } } -pub struct Flash<'d, T: Instance, const FLASH_SIZE: usize>(PhantomData<&'d mut T>); +/// Future that waits for completion of a background read +#[must_use = "futures do nothing unless you `.await` or poll them"] +pub struct BackgroundRead<'a, 'd, T: Instance, const FLASH_SIZE: usize> { + flash: PhantomData<&'a mut Flash<'d, T, Async, FLASH_SIZE>>, + transfer: Transfer<'a, AnyChannel>, +} -impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, FLASH_SIZE> { - pub fn new(_flash: impl Peripheral

+ 'd) -> Self { - Self(PhantomData) +impl<'a, 'd, T: Instance, const FLASH_SIZE: usize> Future for BackgroundRead<'a, 'd, T, FLASH_SIZE> { + type Output = (); + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + Pin::new(&mut self.transfer).poll(cx) } +} +impl<'a, 'd, T: Instance, const FLASH_SIZE: usize> Drop for BackgroundRead<'a, 'd, T, FLASH_SIZE> { + fn drop(&mut self) { + if pac::XIP_CTRL.stream_ctr().read().0 == 0 { + return; + } + pac::XIP_CTRL + .stream_ctr() + .write_value(pac::xip_ctrl::regs::StreamCtr(0)); + core::sync::atomic::compiler_fence(core::sync::atomic::Ordering::SeqCst); + // Errata RP2040-E8: Perform an uncached read to make sure there's not a transfer in + // flight that might effect an address written to start a new transfer. This stalls + // until after any transfer is complete, so the address will not change anymore. + const XIP_NOCACHE_NOALLOC_BASE: *const u32 = 0x13000000 as *const _; + unsafe { + core::ptr::read_volatile(XIP_NOCACHE_NOALLOC_BASE); + } + core::sync::atomic::compiler_fence(core::sync::atomic::Ordering::SeqCst); + } +} + +pub struct Flash<'d, T: Instance, M: Mode, const FLASH_SIZE: usize> { + dma: Option>, + phantom: PhantomData<(&'d mut T, M)>, +} + +impl<'d, T: Instance, M: Mode, const FLASH_SIZE: usize> Flash<'d, T, M, FLASH_SIZE> { pub fn read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Error> { trace!( "Reading from 0x{:x} to 0x{:x}", @@ -182,6 +220,8 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, FLASH_SIZE> { let ch = crate::pac::DMA.ch(n); while ch.read_addr().read() < SRAM_LOWER && ch.ctrl_trig().read().busy() {} } + // Wait for completion of any background reads + while pac::XIP_CTRL.stream_ctr().read().0 > 0 {} // Run our flash operation in RAM operation(); @@ -210,11 +250,73 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, FLASH_SIZE> { } } -impl<'d, T: Instance, const FLASH_SIZE: usize> ErrorType for Flash<'d, T, FLASH_SIZE> { +impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, Blocking, FLASH_SIZE> { + pub fn new(_flash: impl Peripheral

+ 'd) -> Self { + Self { + dma: None, + phantom: PhantomData, + } + } +} + +impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, Async, FLASH_SIZE> { + pub fn new(_flash: impl Peripheral

+ 'd, dma: impl Peripheral

+ 'd) -> Self { + into_ref!(dma); + Self { + dma: Some(dma.map_into()), + phantom: PhantomData, + } + } + + pub fn background_read<'a>( + &'a mut self, + offset: u32, + data: &'a mut [u32], + ) -> Result, Error> { + trace!( + "Reading in background from 0x{:x} to 0x{:x}", + FLASH_BASE as u32 + offset, + FLASH_BASE as u32 + offset + (data.len() * 4) as u32 + ); + // Can't use check_read because we need to enforce 4-byte alignment + let offset = offset as usize; + let length = data.len() * 4; + if length > self.capacity() || offset > self.capacity() - length { + return Err(Error::OutOfBounds); + } + if offset % 4 != 0 { + return Err(Error::Unaligned); + } + + while !pac::XIP_CTRL.stat().read().fifo_empty() { + pac::XIP_CTRL.stream_fifo().read(); + } + + pac::XIP_CTRL + .stream_addr() + .write_value(pac::xip_ctrl::regs::StreamAddr(FLASH_BASE as u32 + offset as u32)); + pac::XIP_CTRL + .stream_ctr() + .write_value(pac::xip_ctrl::regs::StreamCtr(data.len() as u32)); + + // Use the XIP AUX bus port, rather than the FIFO register access (e.x. + // pac::XIP_CTRL.stream_fifo().as_ptr()) to avoid DMA stalling on + // general XIP access. + const XIP_AUX_BASE: *const u32 = 0x50400000 as *const _; + let transfer = unsafe { crate::dma::read(self.dma.as_mut().unwrap(), XIP_AUX_BASE, data, 37) }; + + Ok(BackgroundRead { + flash: PhantomData, + transfer, + }) + } +} + +impl<'d, T: Instance, M: Mode, const FLASH_SIZE: usize> ErrorType for Flash<'d, T, M, FLASH_SIZE> { type Error = Error; } -impl<'d, T: Instance, const FLASH_SIZE: usize> ReadNorFlash for Flash<'d, T, FLASH_SIZE> { +impl<'d, T: Instance, M: Mode, const FLASH_SIZE: usize> ReadNorFlash for Flash<'d, T, M, FLASH_SIZE> { const READ_SIZE: usize = READ_SIZE; fn read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Self::Error> { @@ -226,9 +328,9 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> ReadNorFlash for Flash<'d, T, FLA } } -impl<'d, T: Instance, const FLASH_SIZE: usize> MultiwriteNorFlash for Flash<'d, T, FLASH_SIZE> {} +impl<'d, T: Instance, M: Mode, const FLASH_SIZE: usize> MultiwriteNorFlash for Flash<'d, T, M, FLASH_SIZE> {} -impl<'d, T: Instance, const FLASH_SIZE: usize> NorFlash for Flash<'d, T, FLASH_SIZE> { +impl<'d, T: Instance, M: Mode, const FLASH_SIZE: usize> NorFlash for Flash<'d, T, M, FLASH_SIZE> { const WRITE_SIZE: usize = WRITE_SIZE; const ERASE_SIZE: usize = ERASE_SIZE; @@ -242,6 +344,74 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> NorFlash for Flash<'d, T, FLASH_S } } +#[cfg(feature = "nightly")] +impl<'d, T: Instance, const FLASH_SIZE: usize> embedded_storage_async::nor_flash::ReadNorFlash + for Flash<'d, T, Async, FLASH_SIZE> +{ + const READ_SIZE: usize = ASYNC_READ_SIZE; + + async fn read(&mut self, offset: u32, bytes: &mut [u8]) -> Result<(), Self::Error> { + use core::mem::MaybeUninit; + + // Checked early to simplify address validity checks + if bytes.len() % 4 != 0 { + return Err(Error::Unaligned); + } + + // If the destination address is already aligned, then we can just DMA directly + if (bytes.as_ptr() as u32) % 4 == 0 { + // Safety: alignment and size have been checked for compatibility + let mut buf: &mut [u32] = + unsafe { core::slice::from_raw_parts_mut(bytes.as_mut_ptr() as *mut u32, bytes.len() / 4) }; + self.background_read(offset, &mut buf)?.await; + return Ok(()); + } + + // Destination address is unaligned, so use an intermediate buffer + const REALIGN_CHUNK: usize = PAGE_SIZE; + // Safety: MaybeUninit requires no initialization + let mut buf: [MaybeUninit; REALIGN_CHUNK / 4] = unsafe { MaybeUninit::uninit().assume_init() }; + let mut chunk_offset: usize = 0; + while chunk_offset < bytes.len() { + let chunk_size = (bytes.len() - chunk_offset).min(REALIGN_CHUNK); + let buf = &mut buf[..(chunk_size / 4)]; + + // Safety: this is written to completely by DMA before any reads + let buf = unsafe { &mut *(buf as *mut [MaybeUninit] as *mut [u32]) }; + self.background_read(offset + chunk_offset as u32, buf)?.await; + + // Safety: [u8] has more relaxed alignment and size requirements than [u32], so this is just aliasing + let buf = unsafe { core::slice::from_raw_parts(buf.as_ptr() as *const _, buf.len() * 4) }; + bytes[chunk_offset..(chunk_offset + chunk_size)].copy_from_slice(&buf[..chunk_size]); + + chunk_offset += chunk_size; + } + + Ok(()) + } + + fn capacity(&self) -> usize { + self.capacity() + } +} + +#[cfg(feature = "nightly")] +impl<'d, T: Instance, const FLASH_SIZE: usize> embedded_storage_async::nor_flash::NorFlash + for Flash<'d, T, Async, FLASH_SIZE> +{ + const WRITE_SIZE: usize = WRITE_SIZE; + + const ERASE_SIZE: usize = ERASE_SIZE; + + async fn erase(&mut self, from: u32, to: u32) -> Result<(), Self::Error> { + self.erase(from, to) + } + + async fn write(&mut self, offset: u32, bytes: &[u8]) -> Result<(), Self::Error> { + self.write(offset, bytes) + } +} + #[allow(dead_code)] mod ram_helpers { use core::marker::PhantomData; @@ -699,9 +869,24 @@ mod ram_helpers { mod sealed { pub trait Instance {} + pub trait Mode {} } pub trait Instance: sealed::Instance {} +pub trait Mode: sealed::Mode {} impl sealed::Instance for FLASH {} impl Instance for FLASH {} + +macro_rules! impl_mode { + ($name:ident) => { + impl sealed::Mode for $name {} + impl Mode for $name {} + }; +} + +pub struct Blocking; +pub struct Async; + +impl_mode!(Blocking); +impl_mode!(Async); diff --git a/examples/boot/application/rp/src/bin/a.rs b/examples/boot/application/rp/src/bin/a.rs index c8497494..b5e1950c 100644 --- a/examples/boot/application/rp/src/bin/a.rs +++ b/examples/boot/application/rp/src/bin/a.rs @@ -7,7 +7,7 @@ use core::cell::RefCell; use defmt_rtt as _; use embassy_boot_rp::*; use embassy_executor::Spawner; -use embassy_rp::flash::Flash; +use embassy_rp::flash::{self, Flash}; use embassy_rp::gpio::{Level, Output}; use embassy_rp::watchdog::Watchdog; use embassy_sync::blocking_mutex::Mutex; @@ -34,7 +34,7 @@ async fn main(_s: Spawner) { let mut watchdog = Watchdog::new(p.WATCHDOG); watchdog.start(Duration::from_secs(8)); - let flash: Flash<_, FLASH_SIZE> = Flash::new(p.FLASH); + let flash = Flash::<_, flash::Blocking, FLASH_SIZE>::new(p.FLASH); let flash = Mutex::new(RefCell::new(flash)); let config = FirmwareUpdaterConfig::from_linkerfile_blocking(&flash); diff --git a/examples/rp/src/bin/flash.rs b/examples/rp/src/bin/flash.rs index 4c4982ac..88bb931d 100644 --- a/examples/rp/src/bin/flash.rs +++ b/examples/rp/src/bin/flash.rs @@ -6,7 +6,7 @@ use defmt::*; use embassy_executor::Spawner; -use embassy_rp::flash::{ERASE_SIZE, FLASH_BASE}; +use embassy_rp::flash::{Async, ERASE_SIZE, FLASH_BASE}; use embassy_rp::peripherals::FLASH; use embassy_time::{Duration, Timer}; use {defmt_rtt as _, panic_probe as _}; @@ -25,7 +25,7 @@ async fn main(_spawner: Spawner) { // https://github.com/knurling-rs/defmt/pull/683 Timer::after(Duration::from_millis(10)).await; - let mut flash = embassy_rp::flash::Flash::<_, FLASH_SIZE>::new(p.FLASH); + let mut flash = embassy_rp::flash::Flash::<_, Async, FLASH_SIZE>::new(p.FLASH, p.DMA_CH0); // Get JEDEC id let jedec = flash.jedec_id().unwrap(); @@ -40,10 +40,12 @@ async fn main(_spawner: Spawner) { multiwrite_bytes(&mut flash, ERASE_SIZE as u32); + background_read(&mut flash, (ERASE_SIZE * 2) as u32).await; + loop {} } -fn multiwrite_bytes(flash: &mut embassy_rp::flash::Flash<'_, FLASH, FLASH_SIZE>, offset: u32) { +fn multiwrite_bytes(flash: &mut embassy_rp::flash::Flash<'_, FLASH, Async, FLASH_SIZE>, offset: u32) { info!(">>>> [multiwrite_bytes]"); let mut read_buf = [0u8; ERASE_SIZE]; defmt::unwrap!(flash.read(ADDR_OFFSET + offset, &mut read_buf)); @@ -71,7 +73,7 @@ fn multiwrite_bytes(flash: &mut embassy_rp::flash::Flash<'_, FLASH, FLASH_SIZE>, } } -fn erase_write_sector(flash: &mut embassy_rp::flash::Flash<'_, FLASH, FLASH_SIZE>, offset: u32) { +fn erase_write_sector(flash: &mut embassy_rp::flash::Flash<'_, FLASH, Async, FLASH_SIZE>, offset: u32) { info!(">>>> [erase_write_sector]"); let mut buf = [0u8; ERASE_SIZE]; defmt::unwrap!(flash.read(ADDR_OFFSET + offset, &mut buf)); @@ -99,3 +101,35 @@ fn erase_write_sector(flash: &mut embassy_rp::flash::Flash<'_, FLASH, FLASH_SIZE defmt::panic!("unexpected"); } } + +async fn background_read(flash: &mut embassy_rp::flash::Flash<'_, FLASH, Async, FLASH_SIZE>, offset: u32) { + info!(">>>> [background_read]"); + + let mut buf = [0u32; 8]; + defmt::unwrap!(flash.background_read(ADDR_OFFSET + offset, &mut buf)).await; + + info!("Addr of flash block is {:x}", ADDR_OFFSET + offset + FLASH_BASE as u32); + info!("Contents start with {=u32:x}", buf[0]); + + defmt::unwrap!(flash.erase(ADDR_OFFSET + offset, ADDR_OFFSET + offset + ERASE_SIZE as u32)); + + defmt::unwrap!(flash.background_read(ADDR_OFFSET + offset, &mut buf)).await; + info!("Contents after erase starts with {=u32:x}", buf[0]); + if buf.iter().any(|x| *x != 0xFFFFFFFF) { + defmt::panic!("unexpected"); + } + + for b in buf.iter_mut() { + *b = 0xDABA1234; + } + + defmt::unwrap!(flash.write(ADDR_OFFSET + offset, unsafe { + core::slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len() * 4) + })); + + defmt::unwrap!(flash.background_read(ADDR_OFFSET + offset, &mut buf)).await; + info!("Contents after write starts with {=u32:x}", buf[0]); + if buf.iter().any(|x| *x != 0xDABA1234) { + defmt::panic!("unexpected"); + } +} diff --git a/tests/rp/src/bin/flash.rs b/tests/rp/src/bin/flash.rs index cf9b86df..c31d6dec 100644 --- a/tests/rp/src/bin/flash.rs +++ b/tests/rp/src/bin/flash.rs @@ -6,11 +6,11 @@ mod common; use defmt::*; use embassy_executor::Spawner; -use embassy_rp::flash::{ERASE_SIZE, FLASH_BASE}; +use embassy_rp::flash::{Async, ERASE_SIZE, FLASH_BASE}; use embassy_time::{Duration, Timer}; use {defmt_rtt as _, panic_probe as _}; -const ADDR_OFFSET: u32 = 0x4000; +const ADDR_OFFSET: u32 = 0x8000; #[embassy_executor::main] async fn main(_spawner: Spawner) { @@ -23,7 +23,7 @@ async fn main(_spawner: Spawner) { // https://github.com/knurling-rs/defmt/pull/683 Timer::after(Duration::from_millis(10)).await; - let mut flash = embassy_rp::flash::Flash::<_, { 2 * 1024 * 1024 }>::new(p.FLASH); + let mut flash = embassy_rp::flash::Flash::<_, Async, { 2 * 1024 * 1024 }>::new(p.FLASH, p.DMA_CH0); // Get JEDEC id let jedec = defmt::unwrap!(flash.jedec_id()); @@ -60,6 +60,14 @@ async fn main(_spawner: Spawner) { defmt::panic!("unexpected"); } + let mut buf = [0u32; ERASE_SIZE / 4]; + + defmt::unwrap!(flash.background_read(ADDR_OFFSET, &mut buf)).await; + info!("Contents after write starts with {=u32:x}", buf[0]); + if buf.iter().any(|x| *x != 0xDADADADA) { + defmt::panic!("unexpected"); + } + info!("Test OK"); cortex_m::asm::bkpt(); }