From 1ee58492fbc58b721dc5ed9037c6787af257cbeb Mon Sep 17 00:00:00 2001
From: kalkyl <henrik.alser@me.com>
Date: Sat, 10 Dec 2022 08:26:35 +0100
Subject: [PATCH 01/10] embassy-rp: Add multicore support

---
 embassy-rp/Cargo.toml                   |   2 +-
 embassy-rp/src/critical_section_impl.rs | 142 +++++++++++++
 embassy-rp/src/lib.rs                   |   2 +
 embassy-rp/src/multicore.rs             | 266 ++++++++++++++++++++++++
 examples/rp/Cargo.toml                  |   3 +-
 examples/rp/src/bin/multicore.rs        |  62 ++++++
 6 files changed, 475 insertions(+), 2 deletions(-)
 create mode 100644 embassy-rp/src/critical_section_impl.rs
 create mode 100644 embassy-rp/src/multicore.rs
 create mode 100644 examples/rp/src/bin/multicore.rs

diff --git a/embassy-rp/Cargo.toml b/embassy-rp/Cargo.toml
index 284d458c..07cd1bc1 100644
--- a/embassy-rp/Cargo.toml
+++ b/embassy-rp/Cargo.toml
@@ -50,7 +50,7 @@ nb = "1.0.0"
 cfg-if = "1.0.0"
 cortex-m-rt = ">=0.6.15,<0.8"
 cortex-m = "0.7.6"
-critical-section = "1.1"
+critical-section = { version = "1.1", features = ["restore-state-u8"] }
 futures = { version = "0.3.17", default-features = false, features = ["async-await"] }
 chrono = { version = "0.4", default-features = false, optional = true }
 embedded-io = { version = "0.4.0", features = ["async"], optional = true }
diff --git a/embassy-rp/src/critical_section_impl.rs b/embassy-rp/src/critical_section_impl.rs
new file mode 100644
index 00000000..ce284c85
--- /dev/null
+++ b/embassy-rp/src/critical_section_impl.rs
@@ -0,0 +1,142 @@
+use core::sync::atomic::{AtomicU8, Ordering};
+
+use crate::pac;
+
+struct RpSpinlockCs;
+critical_section::set_impl!(RpSpinlockCs);
+
+/// Marker value to indicate no-one has the lock.
+///
+/// Initialising `LOCK_OWNER` to 0 means cheaper static initialisation so it's the best choice
+const LOCK_UNOWNED: u8 = 0;
+
+/// Indicates which core owns the lock so that we can call critical_section recursively.
+///
+/// 0 = no one has the lock, 1 = core0 has the lock, 2 = core1 has the lock
+static LOCK_OWNER: AtomicU8 = AtomicU8::new(LOCK_UNOWNED);
+
+/// Marker value to indicate that we already owned the lock when we started the `critical_section`.
+///
+/// Since we can't take the spinlock when we already have it, we need some other way to keep track of `critical_section` ownership.
+/// `critical_section` provides a token for communicating between `acquire` and `release` so we use that.
+/// If we're the outermost call to `critical_section` we use the values 0 and 1 to indicate we should release the spinlock and set the interrupts back to disabled and enabled, respectively.
+/// The value 2 indicates that we aren't the outermost call, and should not release the spinlock or re-enable interrupts in `release`
+const LOCK_ALREADY_OWNED: u8 = 2;
+
+unsafe impl critical_section::Impl for RpSpinlockCs {
+    unsafe fn acquire() -> u8 {
+        RpSpinlockCs::acquire()
+    }
+
+    unsafe fn release(token: u8) {
+        RpSpinlockCs::release(token);
+    }
+}
+
+impl RpSpinlockCs {
+    unsafe fn acquire() -> u8 {
+        // Store the initial interrupt state and current core id in stack variables
+        let interrupts_active = cortex_m::register::primask::read().is_active();
+        // We reserved 0 as our `LOCK_UNOWNED` value, so add 1 to core_id so we get 1 for core0, 2 for core1.
+        let core = pac::SIO.cpuid().read() as u8 + 1;
+        // Do we already own the spinlock?
+        if LOCK_OWNER.load(Ordering::Acquire) == core {
+            // We already own the lock, so we must have called acquire within a critical_section.
+            // Return the magic inner-loop value so that we know not to re-enable interrupts in release()
+            LOCK_ALREADY_OWNED
+        } else {
+            // Spin until we get the lock
+            loop {
+                // Need to disable interrupts to ensure that we will not deadlock
+                // if an interrupt enters critical_section::Impl after we acquire the lock
+                cortex_m::interrupt::disable();
+                // Ensure the compiler doesn't re-order accesses and violate safety here
+                core::sync::atomic::compiler_fence(Ordering::SeqCst);
+                // Read the spinlock reserved for `critical_section`
+                if let Some(lock) = Spinlock31::try_claim() {
+                    // We just acquired the lock.
+                    // 1. Forget it, so we don't immediately unlock
+                    core::mem::forget(lock);
+                    // 2. Store which core we are so we can tell if we're called recursively
+                    LOCK_OWNER.store(core, Ordering::Relaxed);
+                    break;
+                }
+                // We didn't get the lock, enable interrupts if they were enabled before we started
+                if interrupts_active {
+                    cortex_m::interrupt::enable();
+                }
+            }
+            // If we broke out of the loop we have just acquired the lock
+            // As the outermost loop, we want to return the interrupt status to restore later
+            interrupts_active as _
+        }
+    }
+
+    unsafe fn release(token: u8) {
+        // Did we already own the lock at the start of the `critical_section`?
+        if token != LOCK_ALREADY_OWNED {
+            // No, it wasn't owned at the start of this `critical_section`, so this core no longer owns it.
+            // Set `LOCK_OWNER` back to `LOCK_UNOWNED` to ensure the next critical section tries to obtain the spinlock instead
+            LOCK_OWNER.store(LOCK_UNOWNED, Ordering::Relaxed);
+            // Ensure the compiler doesn't re-order accesses and violate safety here
+            core::sync::atomic::compiler_fence(Ordering::SeqCst);
+            // Release the spinlock to allow others to enter critical_section again
+            Spinlock31::release();
+            // Re-enable interrupts if they were enabled when we first called acquire()
+            // We only do this on the outermost `critical_section` to ensure interrupts stay disabled
+            // for the whole time that we have the lock
+            if token != 0 {
+                cortex_m::interrupt::enable();
+            }
+        }
+    }
+}
+
+pub struct Spinlock<const N: usize>(core::marker::PhantomData<()>)
+where
+    Spinlock<N>: SpinlockValid;
+
+impl<const N: usize> Spinlock<N>
+where
+    Spinlock<N>: SpinlockValid,
+{
+    /// Try to claim the spinlock. Will return `Some(Self)` if the lock is obtained, and `None` if the lock is
+    /// already in use somewhere else.
+    pub fn try_claim() -> Option<Self> {
+        // Safety: We're only reading from this register
+        unsafe {
+            let lock = pac::SIO.spinlock(N).read();
+            if lock > 0 {
+                Some(Self(core::marker::PhantomData))
+            } else {
+                None
+            }
+        }
+    }
+
+    /// Clear a locked spin-lock.
+    ///
+    /// # Safety
+    ///
+    /// Only call this function if you hold the spin-lock.
+    pub unsafe fn release() {
+        unsafe {
+            // Write (any value): release the lock
+            pac::SIO.spinlock(N).write_value(1);
+        }
+    }
+}
+
+impl<const N: usize> Drop for Spinlock<N>
+where
+    Spinlock<N>: SpinlockValid,
+{
+    fn drop(&mut self) {
+        // This is safe because we own the object, and hence hold the lock.
+        unsafe { Self::release() }
+    }
+}
+
+pub(crate) type Spinlock31 = Spinlock<31>;
+pub trait SpinlockValid {}
+impl SpinlockValid for Spinlock<31> {}
diff --git a/embassy-rp/src/lib.rs b/embassy-rp/src/lib.rs
index d21b5f7b..9a2fb7fc 100644
--- a/embassy-rp/src/lib.rs
+++ b/embassy-rp/src/lib.rs
@@ -5,6 +5,7 @@
 // This mod MUST go first, so that the others see its macros.
 pub(crate) mod fmt;
 
+mod critical_section_impl;
 mod intrinsics;
 
 pub mod adc;
@@ -23,6 +24,7 @@ pub mod usb;
 
 pub mod clocks;
 pub mod flash;
+pub mod multicore;
 mod reset;
 
 // Reexports
diff --git a/embassy-rp/src/multicore.rs b/embassy-rp/src/multicore.rs
new file mode 100644
index 00000000..dd3b70a6
--- /dev/null
+++ b/embassy-rp/src/multicore.rs
@@ -0,0 +1,266 @@
+//! Multicore support
+//!
+//! This module handles setup of the 2nd cpu core on the rp2040, which we refer to as core1.
+//! It provides functionality for setting up the stack, and starting core1.
+//!
+//! The entrypoint for core1 can be any function that never returns, including closures.
+
+use core::mem::ManuallyDrop;
+use core::sync::atomic::{compiler_fence, Ordering};
+
+use crate::pac;
+
+/// Errors for multicore operations.
+#[derive(Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum Error {
+    /// Operation is invalid on this core.
+    InvalidCore,
+    /// Core was unresponsive to commands.
+    Unresponsive,
+}
+
+/// Core ID
+#[derive(Debug)]
+#[cfg_attr(feature = "defmt", derive(defmt::Format))]
+pub enum CoreId {
+    Core0,
+    Core1,
+}
+
+#[inline(always)]
+fn install_stack_guard(stack_bottom: *mut usize) {
+    let core = unsafe { cortex_m::Peripherals::steal() };
+
+    // Trap if MPU is already configured
+    if core.MPU.ctrl.read() != 0 {
+        cortex_m::asm::udf();
+    }
+
+    // The minimum we can protect is 32 bytes on a 32 byte boundary, so round up which will
+    // just shorten the valid stack range a tad.
+    let addr = (stack_bottom as u32 + 31) & !31;
+    // Mask is 1 bit per 32 bytes of the 256 byte range... clear the bit for the segment we want
+    let subregion_select = 0xff ^ (1 << ((addr >> 5) & 7));
+    unsafe {
+        core.MPU.ctrl.write(5); // enable mpu with background default map
+        core.MPU.rbar.write((addr & !0xff) | 0x8);
+        core.MPU.rasr.write(
+            1 // enable region
+               | (0x7 << 1) // size 2^(7 + 1) = 256
+               | (subregion_select << 8)
+               | 0x10000000, // XN = disable instruction fetch; no other bits means no permissions
+        );
+    }
+}
+
+#[inline(always)]
+fn core1_setup(stack_bottom: *mut usize) {
+    install_stack_guard(stack_bottom);
+    // TODO: irq priorities
+}
+
+/// Multicore execution management.
+pub struct Multicore {
+    cores: (Core, Core),
+}
+
+/// Data type for a properly aligned stack of N 32-bit (usize) words
+#[repr(C, align(32))]
+pub struct Stack<const SIZE: usize> {
+    /// Memory to be used for the stack
+    pub mem: [usize; SIZE],
+}
+
+impl<const SIZE: usize> Stack<SIZE> {
+    /// Construct a stack of length SIZE, initialized to 0
+    pub const fn new() -> Stack<SIZE> {
+        Stack { mem: [0; SIZE] }
+    }
+}
+
+impl Multicore {
+    /// Create a new |Multicore| instance.
+    pub fn new() -> Self {
+        Self {
+            cores: (Core { id: CoreId::Core0 }, Core { id: CoreId::Core1 }),
+        }
+    }
+
+    /// Get the available |Core| instances.
+    pub fn cores(&mut self) -> &mut (Core, Core) {
+        &mut self.cores
+    }
+}
+
+/// A handle for controlling a logical core.
+pub struct Core {
+    pub id: CoreId,
+}
+
+impl Core {
+    /// Spawn a function on this core.
+    pub fn spawn<F>(&mut self, stack: &'static mut [usize], entry: F) -> Result<(), Error>
+    where
+        F: FnOnce() -> bad::Never + Send + 'static,
+    {
+        fn fifo_write(value: u32) {
+            unsafe {
+                let sio = pac::SIO;
+                // Wait for the FIFO to have some space
+                while !sio.fifo().st().read().rdy() {
+                    cortex_m::asm::nop();
+                }
+                // Signal that it's safe for core 0 to get rid of the original value now.
+                sio.fifo().wr().write_value(value);
+            }
+
+            // Fire off an event to the other core.
+            // This is required as the other core may be `wfe` (waiting for event)
+            cortex_m::asm::sev();
+        }
+
+        fn fifo_read() -> u32 {
+            unsafe {
+                let sio = pac::SIO;
+                // Keep trying until FIFO has data
+                loop {
+                    if sio.fifo().st().read().vld() {
+                        return sio.fifo().rd().read();
+                    } else {
+                        // We expect the sending core to `sev` on write.
+                        cortex_m::asm::wfe();
+                    }
+                }
+            }
+        }
+
+        fn fifo_drain() {
+            unsafe {
+                let sio = pac::SIO;
+                while sio.fifo().st().read().vld() {
+                    let _ = sio.fifo().rd().read();
+                }
+            }
+        }
+
+        match self.id {
+            CoreId::Core1 => {
+                // The first two ignored `u64` parameters are there to take up all of the registers,
+                // which means that the rest of the arguments are taken from the stack,
+                // where we're able to put them from core 0.
+                extern "C" fn core1_startup<F: FnOnce() -> bad::Never>(
+                    _: u64,
+                    _: u64,
+                    entry: &mut ManuallyDrop<F>,
+                    stack_bottom: *mut usize,
+                ) -> ! {
+                    core1_setup(stack_bottom);
+                    let entry = unsafe { ManuallyDrop::take(entry) };
+                    // Signal that it's safe for core 0 to get rid of the original value now.
+                    fifo_write(1);
+                    entry()
+                }
+
+                // Reset the core
+                unsafe {
+                    let psm = pac::PSM;
+                    psm.frce_off().modify(|w| w.set_proc1(true));
+                    while !psm.frce_off().read().proc1() {
+                        cortex_m::asm::nop();
+                    }
+                    psm.frce_off().modify(|w| w.set_proc1(false));
+                }
+
+                // Set up the stack
+                let mut stack_ptr = unsafe { stack.as_mut_ptr().add(stack.len()) };
+
+                // We don't want to drop this, since it's getting moved to the other core.
+                let mut entry = ManuallyDrop::new(entry);
+
+                // Push the arguments to `core1_startup` onto the stack.
+                unsafe {
+                    // Push `stack_bottom`.
+                    stack_ptr = stack_ptr.sub(1);
+                    stack_ptr.cast::<*mut usize>().write(stack.as_mut_ptr());
+
+                    // Push `entry`.
+                    stack_ptr = stack_ptr.sub(1);
+                    stack_ptr.cast::<&mut ManuallyDrop<F>>().write(&mut entry);
+                }
+
+                // Make sure the compiler does not reorder the stack writes after to after the
+                // below FIFO writes, which would result in them not being seen by the second
+                // core.
+                //
+                // From the compiler perspective, this doesn't guarantee that the second core
+                // actually sees those writes. However, we know that the RP2040 doesn't have
+                // memory caches, and writes happen in-order.
+                compiler_fence(Ordering::Release);
+
+                let p = unsafe { cortex_m::Peripherals::steal() };
+                let vector_table = p.SCB.vtor.read();
+
+                // After reset, core 1 is waiting to receive commands over FIFO.
+                // This is the sequence to have it jump to some code.
+                let cmd_seq = [
+                    0,
+                    0,
+                    1,
+                    vector_table as usize,
+                    stack_ptr as usize,
+                    core1_startup::<F> as usize,
+                ];
+
+                let mut seq = 0;
+                let mut fails = 0;
+                loop {
+                    let cmd = cmd_seq[seq] as u32;
+                    if cmd == 0 {
+                        fifo_drain();
+                        cortex_m::asm::sev();
+                    }
+                    fifo_write(cmd);
+
+                    let response = fifo_read();
+                    if cmd == response {
+                        seq += 1;
+                    } else {
+                        seq = 0;
+                        fails += 1;
+                        if fails > 16 {
+                            // The second core isn't responding, and isn't going to take the entrypoint,
+                            // so we have to drop it ourselves.
+                            drop(ManuallyDrop::into_inner(entry));
+                            return Err(Error::Unresponsive);
+                        }
+                    }
+                    if seq >= cmd_seq.len() {
+                        break;
+                    }
+                }
+
+                // Wait until the other core has copied `entry` before returning.
+                fifo_read();
+
+                Ok(())
+            }
+            _ => Err(Error::InvalidCore),
+        }
+    }
+}
+
+// https://github.com/nvzqz/bad-rs/blob/master/src/never.rs
+mod bad {
+    pub(crate) type Never = <F as HasOutput>::Output;
+
+    pub trait HasOutput {
+        type Output;
+    }
+
+    impl<O> HasOutput for fn() -> O {
+        type Output = O;
+    }
+
+    type F = fn() -> !;
+}
diff --git a/examples/rp/Cargo.toml b/examples/rp/Cargo.toml
index 60a8ba94..bd624a32 100644
--- a/examples/rp/Cargo.toml
+++ b/examples/rp/Cargo.toml
@@ -18,7 +18,8 @@ embassy-usb-logger = { version = "0.1.0", path = "../../embassy-usb-logger" }
 defmt = "0.3"
 defmt-rtt = "0.4"
 
-cortex-m = { version = "0.7.6", features = ["critical-section-single-core"] }
+#cortex-m = { version = "0.7.6", features = ["critical-section-single-core"] }
+cortex-m = { version = "0.7.6" }
 cortex-m-rt = "0.7.0"
 panic-probe = { version = "0.3", features = ["print-defmt"] }
 futures = { version = "0.3.17", default-features = false, features = ["async-await", "cfg-target-has-atomic", "unstable"] }
diff --git a/examples/rp/src/bin/multicore.rs b/examples/rp/src/bin/multicore.rs
new file mode 100644
index 00000000..46d3cd17
--- /dev/null
+++ b/examples/rp/src/bin/multicore.rs
@@ -0,0 +1,62 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::*;
+use embassy_executor::Executor;
+use embassy_executor::_export::StaticCell;
+use embassy_rp::gpio::{Level, Output};
+use embassy_rp::peripherals::PIN_25;
+use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
+use embassy_sync::channel::Channel;
+use embassy_time::{Duration, Timer};
+use embassy_rp::multicore::{Multicore, Stack};
+use {defmt_rtt as _, panic_probe as _};
+
+static mut CORE1_STACK: Stack<4096> = Stack::new();
+static EXECUTOR0: StaticCell<Executor> = StaticCell::new();
+static EXECUTOR1: StaticCell<Executor> = StaticCell::new();
+static CHANNEL: Channel<CriticalSectionRawMutex, LedState, 1> = Channel::new();
+
+enum LedState {
+    On,
+    Off,
+}
+
+#[cortex_m_rt::entry]
+fn main() -> ! {
+    let p = embassy_rp::init(Default::default());
+    let led = Output::new(p.PIN_25, Level::Low);
+
+    let mut mc = Multicore::new();
+    let (_, core1) = mc.cores();
+    let _ = core1.spawn(unsafe { &mut CORE1_STACK.mem }, move || {
+        let executor1 = EXECUTOR1.init(Executor::new());
+        executor1.run(|spawner| unwrap!(spawner.spawn(core1_task(led))));
+    });
+
+    let executor0 = EXECUTOR0.init(Executor::new());
+    executor0.run(|spawner| unwrap!(spawner.spawn(core0_task())));
+}
+
+#[embassy_executor::task]
+async fn core0_task() {
+    info!("Hello from core 0");
+    loop {
+        CHANNEL.send(LedState::On).await;
+        Timer::after(Duration::from_millis(100)).await;
+        CHANNEL.send(LedState::Off).await;
+        Timer::after(Duration::from_millis(400)).await;
+    }
+}
+
+#[embassy_executor::task]
+async fn core1_task(mut led: Output<'static, PIN_25>) {
+    info!("Hello from core 1");
+    loop {
+        match CHANNEL.recv().await {
+            LedState::On => led.set_high(),
+            LedState::Off => led.set_low(),
+        }
+    }
+}
\ No newline at end of file

From 34eaade14fbf521d2e0e37bdeb870787a184ee2e Mon Sep 17 00:00:00 2001
From: kalkyl <henrik.alser@me.com>
Date: Sat, 10 Dec 2022 08:33:09 +0100
Subject: [PATCH 02/10] fmt

---
 examples/rp/src/bin/multicore.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/rp/src/bin/multicore.rs b/examples/rp/src/bin/multicore.rs
index 46d3cd17..cfb91e21 100644
--- a/examples/rp/src/bin/multicore.rs
+++ b/examples/rp/src/bin/multicore.rs
@@ -6,11 +6,11 @@ use defmt::*;
 use embassy_executor::Executor;
 use embassy_executor::_export::StaticCell;
 use embassy_rp::gpio::{Level, Output};
+use embassy_rp::multicore::{Multicore, Stack};
 use embassy_rp::peripherals::PIN_25;
 use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
 use embassy_sync::channel::Channel;
 use embassy_time::{Duration, Timer};
-use embassy_rp::multicore::{Multicore, Stack};
 use {defmt_rtt as _, panic_probe as _};
 
 static mut CORE1_STACK: Stack<4096> = Stack::new();
@@ -59,4 +59,4 @@ async fn core1_task(mut led: Output<'static, PIN_25>) {
             LedState::Off => led.set_low(),
         }
     }
-}
\ No newline at end of file
+}

From cc0248d83ad34941480cd71d0255054d5317ab74 Mon Sep 17 00:00:00 2001
From: kalkyl <henrik.alser@me.com>
Date: Sat, 10 Dec 2022 12:42:08 +0100
Subject: [PATCH 03/10] Select critical-section in tests

---
 tests/rp/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/rp/Cargo.toml b/tests/rp/Cargo.toml
index a07b479e..ffde4c7f 100644
--- a/tests/rp/Cargo.toml
+++ b/tests/rp/Cargo.toml
@@ -14,7 +14,7 @@ embassy-futures = { version = "0.1.0", path = "../../embassy-futures" }
 defmt = "0.3.0"
 defmt-rtt = "0.4"
 
-cortex-m = { version = "0.7.6", features = ["critical-section-single-core"] }
+cortex-m = { version = "0.7.6" }
 cortex-m-rt = "0.7.0"
 embedded-hal = "0.2.6"
 embedded-hal-1 = { package = "embedded-hal", version = "=1.0.0-alpha.9" }

From d8821cfd41eb1776b904a5766be43f242af938f7 Mon Sep 17 00:00:00 2001
From: kalkyl <henrik.alser@me.com>
Date: Sat, 10 Dec 2022 12:57:45 +0100
Subject: [PATCH 04/10] Feature gate critical-section-impl

---
 embassy-rp/Cargo.toml       | 5 ++++-
 embassy-rp/src/lib.rs       | 2 ++
 embassy-rp/src/multicore.rs | 4 +++-
 examples/rp/Cargo.toml      | 2 +-
 tests/rp/Cargo.toml         | 2 +-
 5 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/embassy-rp/Cargo.toml b/embassy-rp/Cargo.toml
index 07cd1bc1..694f0dc7 100644
--- a/embassy-rp/Cargo.toml
+++ b/embassy-rp/Cargo.toml
@@ -15,6 +15,9 @@ flavors = [
 [features]
 defmt = ["dep:defmt", "embassy-usb-driver?/defmt", "embassy-hal-common/defmt"]
 
+# critical section that is safe for multicore use
+critical-section-impl = ["critical-section/restore-state-u8"]
+
 # Reexport the PAC for the currently enabled chip at `embassy_rp::pac`.
 # This is unstable because semver-minor (non-breaking) releases of embassy-rp may major-bump (breaking) the PAC version.
 # If this is an issue for you, you're encouraged to directly depend on a fixed version of the PAC.
@@ -50,7 +53,7 @@ nb = "1.0.0"
 cfg-if = "1.0.0"
 cortex-m-rt = ">=0.6.15,<0.8"
 cortex-m = "0.7.6"
-critical-section = { version = "1.1", features = ["restore-state-u8"] }
+critical-section = "1.1"
 futures = { version = "0.3.17", default-features = false, features = ["async-await"] }
 chrono = { version = "0.4", default-features = false, optional = true }
 embedded-io = { version = "0.4.0", features = ["async"], optional = true }
diff --git a/embassy-rp/src/lib.rs b/embassy-rp/src/lib.rs
index 9a2fb7fc..2cd99f45 100644
--- a/embassy-rp/src/lib.rs
+++ b/embassy-rp/src/lib.rs
@@ -5,7 +5,9 @@
 // This mod MUST go first, so that the others see its macros.
 pub(crate) mod fmt;
 
+#[cfg(feature = "critical-section-impl")]
 mod critical_section_impl;
+
 mod intrinsics;
 
 pub mod adc;
diff --git a/embassy-rp/src/multicore.rs b/embassy-rp/src/multicore.rs
index dd3b70a6..cc5c192b 100644
--- a/embassy-rp/src/multicore.rs
+++ b/embassy-rp/src/multicore.rs
@@ -4,6 +4,9 @@
 //! It provides functionality for setting up the stack, and starting core1.
 //!
 //! The entrypoint for core1 can be any function that never returns, including closures.
+//!
+//! Enable the `critical-section-impl` feature in embassy-rp when sharing data across cores using
+//! the `embassy-sync` primitives and `CriticalSectionRawMutex`.
 
 use core::mem::ManuallyDrop;
 use core::sync::atomic::{compiler_fence, Ordering};
@@ -57,7 +60,6 @@ fn install_stack_guard(stack_bottom: *mut usize) {
 #[inline(always)]
 fn core1_setup(stack_bottom: *mut usize) {
     install_stack_guard(stack_bottom);
-    // TODO: irq priorities
 }
 
 /// Multicore execution management.
diff --git a/examples/rp/Cargo.toml b/examples/rp/Cargo.toml
index bd624a32..34a2d36d 100644
--- a/examples/rp/Cargo.toml
+++ b/examples/rp/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0"
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
 embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt", "defmt-timestamp-uptime"] }
-embassy-rp = { version = "0.1.0", path = "../../embassy-rp", features = ["defmt", "unstable-traits", "nightly", "unstable-pac", "time-driver"] }
+embassy-rp = { version = "0.1.0", path = "../../embassy-rp", features = ["defmt", "unstable-traits", "nightly", "unstable-pac", "time-driver", "critical-section-impl"] }
 embassy-usb = { version = "0.1.0", path = "../../embassy-usb", features = ["defmt"] }
 embassy-net = { version = "0.1.0", path = "../../embassy-net", features = ["defmt", "nightly", "tcp", "dhcpv4", "medium-ethernet", "pool-16"] }
 embassy-futures = { version = "0.1.0", path = "../../embassy-futures" }
diff --git a/tests/rp/Cargo.toml b/tests/rp/Cargo.toml
index ffde4c7f..572a9ce8 100644
--- a/tests/rp/Cargo.toml
+++ b/tests/rp/Cargo.toml
@@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0"
 embassy-sync = { version = "0.1.0", path = "../../embassy-sync", features = ["defmt"] }
 embassy-executor = { version = "0.1.0", path = "../../embassy-executor", features = ["defmt", "integrated-timers"] }
 embassy-time = { version = "0.1.0", path = "../../embassy-time", features = ["defmt"] }
-embassy-rp = { version = "0.1.0", path = "../../embassy-rp", features = ["nightly", "defmt", "unstable-pac", "unstable-traits", "time-driver"]  }
+embassy-rp = { version = "0.1.0", path = "../../embassy-rp", features = ["nightly", "defmt", "unstable-pac", "unstable-traits", "time-driver", "critical-section-impl"]  }
 embassy-futures = { version = "0.1.0", path = "../../embassy-futures" }
 
 defmt = "0.3.0"

From 96d6c7243b7b5f7f8c90dab666ded0ca0cf29c75 Mon Sep 17 00:00:00 2001
From: kalkyl <henrik.alser@me.com>
Date: Sat, 10 Dec 2022 13:43:29 +0100
Subject: [PATCH 05/10] Cleanup

---
 embassy-rp/src/multicore.rs      | 12 ++++++------
 examples/rp/src/bin/multicore.rs |  7 +++----
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/embassy-rp/src/multicore.rs b/embassy-rp/src/multicore.rs
index cc5c192b..bf635db2 100644
--- a/embassy-rp/src/multicore.rs
+++ b/embassy-rp/src/multicore.rs
@@ -1,4 +1,4 @@
-//! Multicore support
+//! MultiCore support
 //!
 //! This module handles setup of the 2nd cpu core on the rp2040, which we refer to as core1.
 //! It provides functionality for setting up the stack, and starting core1.
@@ -62,9 +62,9 @@ fn core1_setup(stack_bottom: *mut usize) {
     install_stack_guard(stack_bottom);
 }
 
-/// Multicore execution management.
-pub struct Multicore {
-    cores: (Core, Core),
+/// MultiCore execution management.
+pub struct MultiCore {
+    pub cores: (Core, Core),
 }
 
 /// Data type for a properly aligned stack of N 32-bit (usize) words
@@ -81,8 +81,8 @@ impl<const SIZE: usize> Stack<SIZE> {
     }
 }
 
-impl Multicore {
-    /// Create a new |Multicore| instance.
+impl MultiCore {
+    /// Create a new |MultiCore| instance.
     pub fn new() -> Self {
         Self {
             cores: (Core { id: CoreId::Core0 }, Core { id: CoreId::Core1 }),
diff --git a/examples/rp/src/bin/multicore.rs b/examples/rp/src/bin/multicore.rs
index cfb91e21..53941da6 100644
--- a/examples/rp/src/bin/multicore.rs
+++ b/examples/rp/src/bin/multicore.rs
@@ -6,7 +6,7 @@ use defmt::*;
 use embassy_executor::Executor;
 use embassy_executor::_export::StaticCell;
 use embassy_rp::gpio::{Level, Output};
-use embassy_rp::multicore::{Multicore, Stack};
+use embassy_rp::multicore::{MultiCore, Stack};
 use embassy_rp::peripherals::PIN_25;
 use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
 use embassy_sync::channel::Channel;
@@ -28,9 +28,8 @@ fn main() -> ! {
     let p = embassy_rp::init(Default::default());
     let led = Output::new(p.PIN_25, Level::Low);
 
-    let mut mc = Multicore::new();
-    let (_, core1) = mc.cores();
-    let _ = core1.spawn(unsafe { &mut CORE1_STACK.mem }, move || {
+    let mut mc = MultiCore::new();
+    let _ = mc.cores.1.spawn(unsafe { &mut CORE1_STACK.mem }, move || {
         let executor1 = EXECUTOR1.init(Executor::new());
         executor1.run(|spawner| unwrap!(spawner.spawn(core1_task(led))));
     });

From eb1d2e1295cfd2f0580355d69c93387898eaabd4 Mon Sep 17 00:00:00 2001
From: kalkyl <henrik.alser@me.com>
Date: Tue, 13 Dec 2022 04:02:28 +0100
Subject: [PATCH 06/10] Pause CORE1 execution during flash operations

---
 embassy-rp/src/flash.rs     |  29 ++-
 embassy-rp/src/multicore.rs | 343 +++++++++++++++++++++---------------
 2 files changed, 223 insertions(+), 149 deletions(-)

diff --git a/embassy-rp/src/flash.rs b/embassy-rp/src/flash.rs
index a972d5f6..f2137ebe 100644
--- a/embassy-rp/src/flash.rs
+++ b/embassy-rp/src/flash.rs
@@ -6,6 +6,7 @@ use embedded_storage::nor_flash::{
     ReadNorFlash,
 };
 
+use crate::pac;
 use crate::peripherals::FLASH;
 
 pub const FLASH_BASE: usize = 0x10000000;
@@ -28,6 +29,7 @@ pub enum Error {
     OutOfBounds,
     /// Unaligned operation or using unaligned buffers.
     Unaligned,
+    InvalidCore,
     Other,
 }
 
@@ -46,7 +48,7 @@ impl NorFlashError for Error {
         match self {
             Self::OutOfBounds => NorFlashErrorKind::OutOfBounds,
             Self::Unaligned => NorFlashErrorKind::NotAligned,
-            Self::Other => NorFlashErrorKind::Other,
+            _ => NorFlashErrorKind::Other,
         }
     }
 }
@@ -87,7 +89,7 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, FLASH_SIZE> {
 
         let len = to - from;
 
-        unsafe { self.in_ram(|| ram_helpers::flash_range_erase(from, len, true)) };
+        unsafe { self.in_ram(|| ram_helpers::flash_range_erase(from, len, true))? };
 
         Ok(())
     }
@@ -112,7 +114,7 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, FLASH_SIZE> {
 
             let unaligned_offset = offset as usize - start;
 
-            unsafe { self.in_ram(|| ram_helpers::flash_range_program(unaligned_offset as u32, &pad_buf, true)) }
+            unsafe { self.in_ram(|| ram_helpers::flash_range_program(unaligned_offset as u32, &pad_buf, true))? }
         }
 
         let remaining_len = bytes.len() - start_padding;
@@ -130,12 +132,12 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, FLASH_SIZE> {
             if bytes.as_ptr() as usize >= 0x2000_0000 {
                 let aligned_data = &bytes[start_padding..end_padding];
 
-                unsafe { self.in_ram(|| ram_helpers::flash_range_program(aligned_offset as u32, aligned_data, true)) }
+                unsafe { self.in_ram(|| ram_helpers::flash_range_program(aligned_offset as u32, aligned_data, true))? }
             } else {
                 for chunk in bytes[start_padding..end_padding].chunks_exact(PAGE_SIZE) {
                     let mut ram_buf = [0xFF_u8; PAGE_SIZE];
                     ram_buf.copy_from_slice(chunk);
-                    unsafe { self.in_ram(|| ram_helpers::flash_range_program(aligned_offset as u32, &ram_buf, true)) }
+                    unsafe { self.in_ram(|| ram_helpers::flash_range_program(aligned_offset as u32, &ram_buf, true))? }
                     aligned_offset += PAGE_SIZE;
                 }
             }
@@ -150,7 +152,7 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, FLASH_SIZE> {
 
             let unaligned_offset = end_offset - (PAGE_SIZE - rem_offset);
 
-            unsafe { self.in_ram(|| ram_helpers::flash_range_program(unaligned_offset as u32, &pad_buf, true)) }
+            unsafe { self.in_ram(|| ram_helpers::flash_range_program(unaligned_offset as u32, &pad_buf, true))? }
         }
 
         Ok(())
@@ -159,10 +161,17 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, FLASH_SIZE> {
     /// Make sure to uphold the contract points with rp2040-flash.
     /// - interrupts must be disabled
     /// - DMA must not access flash memory
-    unsafe fn in_ram(&mut self, operation: impl FnOnce()) {
+    unsafe fn in_ram(&mut self, operation: impl FnOnce()) -> Result<(), Error> {
         let dma_status = &mut [false; crate::dma::CHANNEL_COUNT];
 
-        // TODO: Make sure CORE1 is paused during the entire duration of the RAM function
+        // Make sure we're running on CORE0
+        let core_id: u32 = unsafe { pac::SIO.cpuid().read() };
+        if core_id != 0 {
+            return Err(Error::InvalidCore);
+        }
+
+        // Make sure CORE1 is paused during the entire duration of the RAM function
+        crate::multicore::pause_core1();
 
         critical_section::with(|_| {
             // Pause all DMA channels for the duration of the ram operation
@@ -185,6 +194,10 @@ impl<'d, T: Instance, const FLASH_SIZE: usize> Flash<'d, T, FLASH_SIZE> {
                 }
             }
         });
+
+        // Resume CORE1 execution
+        crate::multicore::resume_core1();
+        Ok(())
     }
 }
 
diff --git a/embassy-rp/src/multicore.rs b/embassy-rp/src/multicore.rs
index bf635db2..09d40b2e 100644
--- a/embassy-rp/src/multicore.rs
+++ b/embassy-rp/src/multicore.rs
@@ -11,14 +11,19 @@
 use core::mem::ManuallyDrop;
 use core::sync::atomic::{compiler_fence, Ordering};
 
-use crate::pac;
+use atomic_polyfill::AtomicBool;
+
+use crate::interrupt::{Interrupt, InterruptExt};
+use crate::{interrupt, pac};
+
+const PAUSE_TOKEN: u32 = 0xDEADBEEF;
+const RESUME_TOKEN: u32 = !0xDEADBEEF;
+static IS_CORE1_INIT: AtomicBool = AtomicBool::new(false);
 
 /// Errors for multicore operations.
 #[derive(Debug)]
 #[cfg_attr(feature = "defmt", derive(defmt::Format))]
 pub enum Error {
-    /// Operation is invalid on this core.
-    InvalidCore,
     /// Core was unresponsive to commands.
     Unresponsive,
 }
@@ -64,7 +69,7 @@ fn core1_setup(stack_bottom: *mut usize) {
 
 /// MultiCore execution management.
 pub struct MultiCore {
-    pub cores: (Core, Core),
+    pub cores: (Core0, Core1),
 }
 
 /// Data type for a properly aligned stack of N 32-bit (usize) words
@@ -85,169 +90,225 @@ impl MultiCore {
     /// Create a new |MultiCore| instance.
     pub fn new() -> Self {
         Self {
-            cores: (Core { id: CoreId::Core0 }, Core { id: CoreId::Core1 }),
+            cores: (Core0 {}, Core1 {}),
         }
     }
 
     /// Get the available |Core| instances.
-    pub fn cores(&mut self) -> &mut (Core, Core) {
+    pub fn cores(&mut self) -> &mut (Core0, Core1) {
         &mut self.cores
     }
 }
 
 /// A handle for controlling a logical core.
-pub struct Core {
-    pub id: CoreId,
+pub struct Core0 {}
+/// A handle for controlling a logical core.
+pub struct Core1 {}
+
+#[interrupt]
+#[link_section = ".data.ram_func"]
+unsafe fn SIO_IRQ_PROC1() {
+    let sio = pac::SIO;
+    // Clear IRQ
+    sio.fifo().st().write(|w| w.set_wof(false));
+
+    while sio.fifo().st().read().vld() {
+        // Pause CORE1 execution and disable interrupts
+        if fifo_read_wfe() == PAUSE_TOKEN {
+            cortex_m::interrupt::disable();
+            // Signal to CORE0 that execution is paused
+            fifo_write(PAUSE_TOKEN);
+            // Wait for `resume` signal from CORE0
+            while fifo_read_wfe() != RESUME_TOKEN {
+                cortex_m::asm::nop();
+            }
+            cortex_m::interrupt::enable();
+            // Signal to CORE0 that execution is resumed
+            fifo_write(RESUME_TOKEN);
+        }
+    }
 }
 
-impl Core {
-    /// Spawn a function on this core.
+impl Core1 {
+    /// Spawn a function on this core
     pub fn spawn<F>(&mut self, stack: &'static mut [usize], entry: F) -> Result<(), Error>
     where
         F: FnOnce() -> bad::Never + Send + 'static,
     {
-        fn fifo_write(value: u32) {
-            unsafe {
-                let sio = pac::SIO;
-                // Wait for the FIFO to have some space
-                while !sio.fifo().st().read().rdy() {
-                    cortex_m::asm::nop();
-                }
-                // Signal that it's safe for core 0 to get rid of the original value now.
-                sio.fifo().wr().write_value(value);
-            }
+        // The first two ignored `u64` parameters are there to take up all of the registers,
+        // which means that the rest of the arguments are taken from the stack,
+        // where we're able to put them from core 0.
+        extern "C" fn core1_startup<F: FnOnce() -> bad::Never>(
+            _: u64,
+            _: u64,
+            entry: &mut ManuallyDrop<F>,
+            stack_bottom: *mut usize,
+        ) -> ! {
+            core1_setup(stack_bottom);
+            let entry = unsafe { ManuallyDrop::take(entry) };
+            // Signal that it's safe for core 0 to get rid of the original value now.
+            fifo_write(1);
 
-            // Fire off an event to the other core.
-            // This is required as the other core may be `wfe` (waiting for event)
-            cortex_m::asm::sev();
+            IS_CORE1_INIT.store(true, Ordering::Release);
+            // Enable fifo interrupt on CORE1 for `pause` functionality.
+            let irq = unsafe { interrupt::SIO_IRQ_PROC1::steal() };
+            irq.enable();
+
+            entry()
         }
 
-        fn fifo_read() -> u32 {
-            unsafe {
-                let sio = pac::SIO;
-                // Keep trying until FIFO has data
-                loop {
-                    if sio.fifo().st().read().vld() {
-                        return sio.fifo().rd().read();
-                    } else {
-                        // We expect the sending core to `sev` on write.
-                        cortex_m::asm::wfe();
-                    }
+        // Reset the core
+        unsafe {
+            let psm = pac::PSM;
+            psm.frce_off().modify(|w| w.set_proc1(true));
+            while !psm.frce_off().read().proc1() {
+                cortex_m::asm::nop();
+            }
+            psm.frce_off().modify(|w| w.set_proc1(false));
+        }
+
+        // Set up the stack
+        let mut stack_ptr = unsafe { stack.as_mut_ptr().add(stack.len()) };
+
+        // We don't want to drop this, since it's getting moved to the other core.
+        let mut entry = ManuallyDrop::new(entry);
+
+        // Push the arguments to `core1_startup` onto the stack.
+        unsafe {
+            // Push `stack_bottom`.
+            stack_ptr = stack_ptr.sub(1);
+            stack_ptr.cast::<*mut usize>().write(stack.as_mut_ptr());
+
+            // Push `entry`.
+            stack_ptr = stack_ptr.sub(1);
+            stack_ptr.cast::<&mut ManuallyDrop<F>>().write(&mut entry);
+        }
+
+        // Make sure the compiler does not reorder the stack writes after to after the
+        // below FIFO writes, which would result in them not being seen by the second
+        // core.
+        //
+        // From the compiler perspective, this doesn't guarantee that the second core
+        // actually sees those writes. However, we know that the RP2040 doesn't have
+        // memory caches, and writes happen in-order.
+        compiler_fence(Ordering::Release);
+
+        let p = unsafe { cortex_m::Peripherals::steal() };
+        let vector_table = p.SCB.vtor.read();
+
+        // After reset, core 1 is waiting to receive commands over FIFO.
+        // This is the sequence to have it jump to some code.
+        let cmd_seq = [
+            0,
+            0,
+            1,
+            vector_table as usize,
+            stack_ptr as usize,
+            core1_startup::<F> as usize,
+        ];
+
+        let mut seq = 0;
+        let mut fails = 0;
+        loop {
+            let cmd = cmd_seq[seq] as u32;
+            if cmd == 0 {
+                fifo_drain();
+                cortex_m::asm::sev();
+            }
+            fifo_write(cmd);
+
+            let response = fifo_read();
+            if cmd == response {
+                seq += 1;
+            } else {
+                seq = 0;
+                fails += 1;
+                if fails > 16 {
+                    // The second core isn't responding, and isn't going to take the entrypoint,
+                    // so we have to drop it ourselves.
+                    drop(ManuallyDrop::into_inner(entry));
+                    return Err(Error::Unresponsive);
                 }
             }
+            if seq >= cmd_seq.len() {
+                break;
+            }
         }
 
-        fn fifo_drain() {
-            unsafe {
-                let sio = pac::SIO;
-                while sio.fifo().st().read().vld() {
-                    let _ = sio.fifo().rd().read();
-                }
-            }
+        // Wait until the other core has copied `entry` before returning.
+        fifo_read();
+
+        Ok(())
+    }
+}
+
+/// Pause execution on CORE1.
+pub fn pause_core1() {
+    if IS_CORE1_INIT.load(Ordering::Acquire) {
+        fifo_write(PAUSE_TOKEN);
+        // Wait for CORE1 to signal it has paused execution.
+        while fifo_read() != PAUSE_TOKEN {}
+    }
+}
+
+/// Resume CORE1 execution.
+pub fn resume_core1() {
+    if IS_CORE1_INIT.load(Ordering::Acquire) {
+        fifo_write(RESUME_TOKEN);
+        // Wait for CORE1 to signal it has resumed execution.
+        while fifo_read() != RESUME_TOKEN {}
+    }
+}
+
+// Push a value to the inter-core FIFO, block until space is available
+#[inline(always)]
+fn fifo_write(value: u32) {
+    unsafe {
+        let sio = pac::SIO;
+        // Wait for the FIFO to have enough space
+        while !sio.fifo().st().read().rdy() {
+            cortex_m::asm::nop();
         }
+        sio.fifo().wr().write_value(value);
+    }
+    // Fire off an event to the other core.
+    // This is required as the other core may be `wfe` (waiting for event)
+    cortex_m::asm::sev();
+}
 
-        match self.id {
-            CoreId::Core1 => {
-                // The first two ignored `u64` parameters are there to take up all of the registers,
-                // which means that the rest of the arguments are taken from the stack,
-                // where we're able to put them from core 0.
-                extern "C" fn core1_startup<F: FnOnce() -> bad::Never>(
-                    _: u64,
-                    _: u64,
-                    entry: &mut ManuallyDrop<F>,
-                    stack_bottom: *mut usize,
-                ) -> ! {
-                    core1_setup(stack_bottom);
-                    let entry = unsafe { ManuallyDrop::take(entry) };
-                    // Signal that it's safe for core 0 to get rid of the original value now.
-                    fifo_write(1);
-                    entry()
-                }
+// Pop a value from inter-core FIFO, block until available
+#[inline(always)]
+fn fifo_read() -> u32 {
+    unsafe {
+        let sio = pac::SIO;
+        // Wait until FIFO has data
+        while !sio.fifo().st().read().vld() {
+            cortex_m::asm::nop();
+        }
+        sio.fifo().rd().read()
+    }
+}
 
-                // Reset the core
-                unsafe {
-                    let psm = pac::PSM;
-                    psm.frce_off().modify(|w| w.set_proc1(true));
-                    while !psm.frce_off().read().proc1() {
-                        cortex_m::asm::nop();
-                    }
-                    psm.frce_off().modify(|w| w.set_proc1(false));
-                }
+// Pop a value from inter-core FIFO, `wfe` until available
+#[inline(always)]
+fn fifo_read_wfe() -> u32 {
+    unsafe {
+        let sio = pac::SIO;
+        // Wait until FIFO has data
+        while !sio.fifo().st().read().vld() {
+            cortex_m::asm::wfe();
+        }
+        sio.fifo().rd().read()
+    }
+}
 
-                // Set up the stack
-                let mut stack_ptr = unsafe { stack.as_mut_ptr().add(stack.len()) };
-
-                // We don't want to drop this, since it's getting moved to the other core.
-                let mut entry = ManuallyDrop::new(entry);
-
-                // Push the arguments to `core1_startup` onto the stack.
-                unsafe {
-                    // Push `stack_bottom`.
-                    stack_ptr = stack_ptr.sub(1);
-                    stack_ptr.cast::<*mut usize>().write(stack.as_mut_ptr());
-
-                    // Push `entry`.
-                    stack_ptr = stack_ptr.sub(1);
-                    stack_ptr.cast::<&mut ManuallyDrop<F>>().write(&mut entry);
-                }
-
-                // Make sure the compiler does not reorder the stack writes after to after the
-                // below FIFO writes, which would result in them not being seen by the second
-                // core.
-                //
-                // From the compiler perspective, this doesn't guarantee that the second core
-                // actually sees those writes. However, we know that the RP2040 doesn't have
-                // memory caches, and writes happen in-order.
-                compiler_fence(Ordering::Release);
-
-                let p = unsafe { cortex_m::Peripherals::steal() };
-                let vector_table = p.SCB.vtor.read();
-
-                // After reset, core 1 is waiting to receive commands over FIFO.
-                // This is the sequence to have it jump to some code.
-                let cmd_seq = [
-                    0,
-                    0,
-                    1,
-                    vector_table as usize,
-                    stack_ptr as usize,
-                    core1_startup::<F> as usize,
-                ];
-
-                let mut seq = 0;
-                let mut fails = 0;
-                loop {
-                    let cmd = cmd_seq[seq] as u32;
-                    if cmd == 0 {
-                        fifo_drain();
-                        cortex_m::asm::sev();
-                    }
-                    fifo_write(cmd);
-
-                    let response = fifo_read();
-                    if cmd == response {
-                        seq += 1;
-                    } else {
-                        seq = 0;
-                        fails += 1;
-                        if fails > 16 {
-                            // The second core isn't responding, and isn't going to take the entrypoint,
-                            // so we have to drop it ourselves.
-                            drop(ManuallyDrop::into_inner(entry));
-                            return Err(Error::Unresponsive);
-                        }
-                    }
-                    if seq >= cmd_seq.len() {
-                        break;
-                    }
-                }
-
-                // Wait until the other core has copied `entry` before returning.
-                fifo_read();
-
-                Ok(())
-            }
-            _ => Err(Error::InvalidCore),
+// Drain inter-core FIFO
+#[inline(always)]
+fn fifo_drain() {
+    unsafe {
+        let sio = pac::SIO;
+        while sio.fifo().st().read().vld() {
+            let _ = sio.fifo().rd().read();
         }
     }
 }

From aea28c8aa0bc13251835c6e38f4e1fbcbd30f4db Mon Sep 17 00:00:00 2001
From: kalkyl <henrik.alser@me.com>
Date: Tue, 13 Dec 2022 09:45:11 +0100
Subject: [PATCH 07/10] Add usage in to docs

---
 embassy-rp/src/multicore.rs | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/embassy-rp/src/multicore.rs b/embassy-rp/src/multicore.rs
index 09d40b2e..3703fe81 100644
--- a/embassy-rp/src/multicore.rs
+++ b/embassy-rp/src/multicore.rs
@@ -1,4 +1,4 @@
-//! MultiCore support
+//! Multicore support
 //!
 //! This module handles setup of the 2nd cpu core on the rp2040, which we refer to as core1.
 //! It provides functionality for setting up the stack, and starting core1.
@@ -7,6 +7,28 @@
 //!
 //! Enable the `critical-section-impl` feature in embassy-rp when sharing data across cores using
 //! the `embassy-sync` primitives and `CriticalSectionRawMutex`.
+//!
+//! # Usage
+//! ```no_run
+//! static mut CORE1_STACK: Stack<4096> = Stack::new();
+//! static EXECUTOR0: StaticCell<Executor> = StaticCell::new();
+//! static EXECUTOR1: StaticCell<Executor> = StaticCell::new();
+//!
+//! #[cortex_m_rt::entry]
+//! fn main() -> ! {
+//!     let p = embassy_rp::init(Default::default());
+//!
+//!     let mut mc = MultiCore::new();
+//!     let _ = mc.cores.1.spawn(unsafe { &mut CORE1_STACK.mem }, move || {
+//!         let executor1 = EXECUTOR1.init(Executor::new());
+//!         executor1.run(|spawner| unwrap!(spawner.spawn(core1_task())));
+//!     });
+//!
+//!     let executor0 = EXECUTOR0.init(Executor::new());
+//!     executor0.run(|spawner| unwrap!(spawner.spawn(core0_task())));
+//! }
+//! ```
+//!
 
 use core::mem::ManuallyDrop;
 use core::sync::atomic::{compiler_fence, Ordering};
@@ -28,14 +50,6 @@ pub enum Error {
     Unresponsive,
 }
 
-/// Core ID
-#[derive(Debug)]
-#[cfg_attr(feature = "defmt", derive(defmt::Format))]
-pub enum CoreId {
-    Core0,
-    Core1,
-}
-
 #[inline(always)]
 fn install_stack_guard(stack_bottom: *mut usize) {
     let core = unsafe { cortex_m::Peripherals::steal() };

From 13d9d8fde109c09e310cbf1735917a768f4a1cf6 Mon Sep 17 00:00:00 2001
From: kalkyl <henrik.alser@me.com>
Date: Tue, 13 Dec 2022 13:49:51 +0100
Subject: [PATCH 08/10] Refactor after review

---
 embassy-rp/src/lib.rs            |   2 +
 embassy-rp/src/multicore.rs      | 251 +++++++++++++------------------
 examples/rp/src/bin/multicore.rs |   5 +-
 tests/rp/src/bin/multicore.rs    |  47 ++++++
 4 files changed, 159 insertions(+), 146 deletions(-)
 create mode 100644 tests/rp/src/bin/multicore.rs

diff --git a/embassy-rp/src/lib.rs b/embassy-rp/src/lib.rs
index 2cd99f45..c6442c56 100644
--- a/embassy-rp/src/lib.rs
+++ b/embassy-rp/src/lib.rs
@@ -106,6 +106,8 @@ embassy_hal_common::peripherals! {
     FLASH,
 
     ADC,
+
+    CORE1,
 }
 
 #[link_section = ".boot2"]
diff --git a/embassy-rp/src/multicore.rs b/embassy-rp/src/multicore.rs
index 3703fe81..2dfa215b 100644
--- a/embassy-rp/src/multicore.rs
+++ b/embassy-rp/src/multicore.rs
@@ -36,20 +36,13 @@ use core::sync::atomic::{compiler_fence, Ordering};
 use atomic_polyfill::AtomicBool;
 
 use crate::interrupt::{Interrupt, InterruptExt};
+use crate::peripherals::CORE1;
 use crate::{interrupt, pac};
 
 const PAUSE_TOKEN: u32 = 0xDEADBEEF;
 const RESUME_TOKEN: u32 = !0xDEADBEEF;
 static IS_CORE1_INIT: AtomicBool = AtomicBool::new(false);
 
-/// Errors for multicore operations.
-#[derive(Debug)]
-#[cfg_attr(feature = "defmt", derive(defmt::Format))]
-pub enum Error {
-    /// Core was unresponsive to commands.
-    Unresponsive,
-}
-
 #[inline(always)]
 fn install_stack_guard(stack_bottom: *mut usize) {
     let core = unsafe { cortex_m::Peripherals::steal() };
@@ -81,44 +74,20 @@ fn core1_setup(stack_bottom: *mut usize) {
     install_stack_guard(stack_bottom);
 }
 
-/// MultiCore execution management.
-pub struct MultiCore {
-    pub cores: (Core0, Core1),
-}
-
-/// Data type for a properly aligned stack of N 32-bit (usize) words
+/// Data type for a properly aligned stack of N bytes
 #[repr(C, align(32))]
 pub struct Stack<const SIZE: usize> {
     /// Memory to be used for the stack
-    pub mem: [usize; SIZE],
+    pub mem: [u8; SIZE],
 }
 
 impl<const SIZE: usize> Stack<SIZE> {
     /// Construct a stack of length SIZE, initialized to 0
     pub const fn new() -> Stack<SIZE> {
-        Stack { mem: [0; SIZE] }
+        Stack { mem: [0_u8; SIZE] }
     }
 }
 
-impl MultiCore {
-    /// Create a new |MultiCore| instance.
-    pub fn new() -> Self {
-        Self {
-            cores: (Core0 {}, Core1 {}),
-        }
-    }
-
-    /// Get the available |Core| instances.
-    pub fn cores(&mut self) -> &mut (Core0, Core1) {
-        &mut self.cores
-    }
-}
-
-/// A handle for controlling a logical core.
-pub struct Core0 {}
-/// A handle for controlling a logical core.
-pub struct Core1 {}
-
 #[interrupt]
 #[link_section = ".data.ram_func"]
 unsafe fn SIO_IRQ_PROC1() {
@@ -143,117 +112,113 @@ unsafe fn SIO_IRQ_PROC1() {
     }
 }
 
-impl Core1 {
-    /// Spawn a function on this core
-    pub fn spawn<F>(&mut self, stack: &'static mut [usize], entry: F) -> Result<(), Error>
-    where
-        F: FnOnce() -> bad::Never + Send + 'static,
-    {
-        // The first two ignored `u64` parameters are there to take up all of the registers,
-        // which means that the rest of the arguments are taken from the stack,
-        // where we're able to put them from core 0.
-        extern "C" fn core1_startup<F: FnOnce() -> bad::Never>(
-            _: u64,
-            _: u64,
-            entry: &mut ManuallyDrop<F>,
-            stack_bottom: *mut usize,
-        ) -> ! {
-            core1_setup(stack_bottom);
-            let entry = unsafe { ManuallyDrop::take(entry) };
-            // Signal that it's safe for core 0 to get rid of the original value now.
-            fifo_write(1);
+/// Spawn a function on this core
+pub fn spawn_core1<F, const SIZE: usize>(_core1: CORE1, stack: &'static mut Stack<SIZE>, entry: F)
+where
+    F: FnOnce() -> bad::Never + Send + 'static,
+{
+    // The first two ignored `u64` parameters are there to take up all of the registers,
+    // which means that the rest of the arguments are taken from the stack,
+    // where we're able to put them from core 0.
+    extern "C" fn core1_startup<F: FnOnce() -> bad::Never>(
+        _: u64,
+        _: u64,
+        entry: &mut ManuallyDrop<F>,
+        stack_bottom: *mut usize,
+    ) -> ! {
+        core1_setup(stack_bottom);
+        let entry = unsafe { ManuallyDrop::take(entry) };
+        // Signal that it's safe for core 0 to get rid of the original value now.
+        fifo_write(1);
 
-            IS_CORE1_INIT.store(true, Ordering::Release);
-            // Enable fifo interrupt on CORE1 for `pause` functionality.
-            let irq = unsafe { interrupt::SIO_IRQ_PROC1::steal() };
-            irq.enable();
+        IS_CORE1_INIT.store(true, Ordering::Release);
+        // Enable fifo interrupt on CORE1 for `pause` functionality.
+        let irq = unsafe { interrupt::SIO_IRQ_PROC1::steal() };
+        irq.enable();
 
-            entry()
-        }
-
-        // Reset the core
-        unsafe {
-            let psm = pac::PSM;
-            psm.frce_off().modify(|w| w.set_proc1(true));
-            while !psm.frce_off().read().proc1() {
-                cortex_m::asm::nop();
-            }
-            psm.frce_off().modify(|w| w.set_proc1(false));
-        }
-
-        // Set up the stack
-        let mut stack_ptr = unsafe { stack.as_mut_ptr().add(stack.len()) };
-
-        // We don't want to drop this, since it's getting moved to the other core.
-        let mut entry = ManuallyDrop::new(entry);
-
-        // Push the arguments to `core1_startup` onto the stack.
-        unsafe {
-            // Push `stack_bottom`.
-            stack_ptr = stack_ptr.sub(1);
-            stack_ptr.cast::<*mut usize>().write(stack.as_mut_ptr());
-
-            // Push `entry`.
-            stack_ptr = stack_ptr.sub(1);
-            stack_ptr.cast::<&mut ManuallyDrop<F>>().write(&mut entry);
-        }
-
-        // Make sure the compiler does not reorder the stack writes after to after the
-        // below FIFO writes, which would result in them not being seen by the second
-        // core.
-        //
-        // From the compiler perspective, this doesn't guarantee that the second core
-        // actually sees those writes. However, we know that the RP2040 doesn't have
-        // memory caches, and writes happen in-order.
-        compiler_fence(Ordering::Release);
-
-        let p = unsafe { cortex_m::Peripherals::steal() };
-        let vector_table = p.SCB.vtor.read();
-
-        // After reset, core 1 is waiting to receive commands over FIFO.
-        // This is the sequence to have it jump to some code.
-        let cmd_seq = [
-            0,
-            0,
-            1,
-            vector_table as usize,
-            stack_ptr as usize,
-            core1_startup::<F> as usize,
-        ];
-
-        let mut seq = 0;
-        let mut fails = 0;
-        loop {
-            let cmd = cmd_seq[seq] as u32;
-            if cmd == 0 {
-                fifo_drain();
-                cortex_m::asm::sev();
-            }
-            fifo_write(cmd);
-
-            let response = fifo_read();
-            if cmd == response {
-                seq += 1;
-            } else {
-                seq = 0;
-                fails += 1;
-                if fails > 16 {
-                    // The second core isn't responding, and isn't going to take the entrypoint,
-                    // so we have to drop it ourselves.
-                    drop(ManuallyDrop::into_inner(entry));
-                    return Err(Error::Unresponsive);
-                }
-            }
-            if seq >= cmd_seq.len() {
-                break;
-            }
-        }
-
-        // Wait until the other core has copied `entry` before returning.
-        fifo_read();
-
-        Ok(())
+        entry()
     }
+
+    // Reset the core
+    unsafe {
+        let psm = pac::PSM;
+        psm.frce_off().modify(|w| w.set_proc1(true));
+        while !psm.frce_off().read().proc1() {
+            cortex_m::asm::nop();
+        }
+        psm.frce_off().modify(|w| w.set_proc1(false));
+    }
+
+    let mem = unsafe { core::slice::from_raw_parts_mut(stack.mem.as_mut_ptr() as *mut usize, stack.mem.len() / 4) };
+
+    // Set up the stack
+    let mut stack_ptr = unsafe { mem.as_mut_ptr().add(mem.len()) };
+
+    // We don't want to drop this, since it's getting moved to the other core.
+    let mut entry = ManuallyDrop::new(entry);
+
+    // Push the arguments to `core1_startup` onto the stack.
+    unsafe {
+        // Push `stack_bottom`.
+        stack_ptr = stack_ptr.sub(1);
+        stack_ptr.cast::<*mut usize>().write(mem.as_mut_ptr());
+
+        // Push `entry`.
+        stack_ptr = stack_ptr.sub(1);
+        stack_ptr.cast::<&mut ManuallyDrop<F>>().write(&mut entry);
+    }
+
+    // Make sure the compiler does not reorder the stack writes after to after the
+    // below FIFO writes, which would result in them not being seen by the second
+    // core.
+    //
+    // From the compiler perspective, this doesn't guarantee that the second core
+    // actually sees those writes. However, we know that the RP2040 doesn't have
+    // memory caches, and writes happen in-order.
+    compiler_fence(Ordering::Release);
+
+    let p = unsafe { cortex_m::Peripherals::steal() };
+    let vector_table = p.SCB.vtor.read();
+
+    // After reset, core 1 is waiting to receive commands over FIFO.
+    // This is the sequence to have it jump to some code.
+    let cmd_seq = [
+        0,
+        0,
+        1,
+        vector_table as usize,
+        stack_ptr as usize,
+        core1_startup::<F> as usize,
+    ];
+
+    let mut seq = 0;
+    let mut fails = 0;
+    loop {
+        let cmd = cmd_seq[seq] as u32;
+        if cmd == 0 {
+            fifo_drain();
+            cortex_m::asm::sev();
+        }
+        fifo_write(cmd);
+
+        let response = fifo_read();
+        if cmd == response {
+            seq += 1;
+        } else {
+            seq = 0;
+            fails += 1;
+            if fails > 16 {
+                // The second core isn't responding, and isn't going to take the entrypoint
+                panic!("CORE1 not responding");
+            }
+        }
+        if seq >= cmd_seq.len() {
+            break;
+        }
+    }
+
+    // Wait until the other core has copied `entry` before returning.
+    fifo_read();
 }
 
 /// Pause execution on CORE1.
diff --git a/examples/rp/src/bin/multicore.rs b/examples/rp/src/bin/multicore.rs
index 53941da6..376b2b61 100644
--- a/examples/rp/src/bin/multicore.rs
+++ b/examples/rp/src/bin/multicore.rs
@@ -6,7 +6,7 @@ use defmt::*;
 use embassy_executor::Executor;
 use embassy_executor::_export::StaticCell;
 use embassy_rp::gpio::{Level, Output};
-use embassy_rp::multicore::{MultiCore, Stack};
+use embassy_rp::multicore::{spawn_core1, Stack};
 use embassy_rp::peripherals::PIN_25;
 use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
 use embassy_sync::channel::Channel;
@@ -28,8 +28,7 @@ fn main() -> ! {
     let p = embassy_rp::init(Default::default());
     let led = Output::new(p.PIN_25, Level::Low);
 
-    let mut mc = MultiCore::new();
-    let _ = mc.cores.1.spawn(unsafe { &mut CORE1_STACK.mem }, move || {
+    spawn_core1(p.CORE1, unsafe { &mut CORE1_STACK }, move || {
         let executor1 = EXECUTOR1.init(Executor::new());
         executor1.run(|spawner| unwrap!(spawner.spawn(core1_task(led))));
     });
diff --git a/tests/rp/src/bin/multicore.rs b/tests/rp/src/bin/multicore.rs
new file mode 100644
index 00000000..da78e887
--- /dev/null
+++ b/tests/rp/src/bin/multicore.rs
@@ -0,0 +1,47 @@
+#![no_std]
+#![no_main]
+#![feature(type_alias_impl_trait)]
+
+use defmt::{info, unwrap};
+use embassy_executor::Executor;
+use embassy_executor::_export::StaticCell;
+use embassy_rp::multicore::{spawn_core1, Stack};
+use embassy_sync::blocking_mutex::raw::CriticalSectionRawMutex;
+use embassy_sync::channel::Channel;
+use {defmt_rtt as _, panic_probe as _};
+
+static mut CORE1_STACK: Stack<1024> = Stack::new();
+static EXECUTOR0: StaticCell<Executor> = StaticCell::new();
+static EXECUTOR1: StaticCell<Executor> = StaticCell::new();
+static CHANNEL0: Channel<CriticalSectionRawMutex, bool, 1> = Channel::new();
+static CHANNEL1: Channel<CriticalSectionRawMutex, bool, 1> = Channel::new();
+
+#[cortex_m_rt::entry]
+fn main() -> ! {
+    let p = embassy_rp::init(Default::default());
+    spawn_core1(p.CORE1, unsafe { &mut CORE1_STACK }, move || {
+        let executor1 = EXECUTOR1.init(Executor::new());
+        executor1.run(|spawner| unwrap!(spawner.spawn(core1_task())));
+    });
+    let executor0 = EXECUTOR0.init(Executor::new());
+    executor0.run(|spawner| unwrap!(spawner.spawn(core0_task())));
+}
+
+#[embassy_executor::task]
+async fn core0_task() {
+    info!("CORE0 is running");
+    let ping = true;
+    CHANNEL0.send(ping).await;
+    let pong = CHANNEL1.recv().await;
+    assert_eq!(ping, pong);
+
+    info!("Test OK");
+    cortex_m::asm::bkpt();
+}
+
+#[embassy_executor::task]
+async fn core1_task() {
+    info!("CORE1 is running");
+    let ping = CHANNEL0.recv().await;
+    CHANNEL1.send(ping).await;
+}

From 731eb3c6e31ca7218636c1fba266b2c643e8fb44 Mon Sep 17 00:00:00 2001
From: kalkyl <henrik.alser@me.com>
Date: Tue, 13 Dec 2022 13:55:23 +0100
Subject: [PATCH 09/10] fmt

---
 embassy-rp/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/embassy-rp/src/lib.rs b/embassy-rp/src/lib.rs
index 248884e7..0ea97aec 100644
--- a/embassy-rp/src/lib.rs
+++ b/embassy-rp/src/lib.rs
@@ -116,7 +116,7 @@ embassy_hal_common::peripherals! {
     ADC,
 
     CORE1,
-    
+
     PIO0,
     PIO1,
 }

From c4d8f3579e4927446421730d52a409d724f2800c Mon Sep 17 00:00:00 2001
From: kalkyl <henrik.alser@me.com>
Date: Tue, 13 Dec 2022 14:15:04 +0100
Subject: [PATCH 10/10] Update usage in docs

---
 embassy-rp/src/multicore.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/embassy-rp/src/multicore.rs b/embassy-rp/src/multicore.rs
index 2dfa215b..8290c62a 100644
--- a/embassy-rp/src/multicore.rs
+++ b/embassy-rp/src/multicore.rs
@@ -18,8 +18,7 @@
 //! fn main() -> ! {
 //!     let p = embassy_rp::init(Default::default());
 //!
-//!     let mut mc = MultiCore::new();
-//!     let _ = mc.cores.1.spawn(unsafe { &mut CORE1_STACK.mem }, move || {
+//!     spawn_core1(p.CORE1, unsafe { &mut CORE1_STACK }, move || {
 //!         let executor1 = EXECUTOR1.init(Executor::new());
 //!         executor1.run(|spawner| unwrap!(spawner.spawn(core1_task())));
 //!     });
@@ -28,7 +27,6 @@
 //!     executor0.run(|spawner| unwrap!(spawner.spawn(core0_task())));
 //! }
 //! ```
-//!
 
 use core::mem::ManuallyDrop;
 use core::sync::atomic::{compiler_fence, Ordering};