From 5324bce815bc36bc4e92d4e0bda8faa6a74d7df9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1niel=20Buga?= <bugadani@gmail.com>
Date: Fri, 20 Sep 2024 13:05:37 +0200
Subject: [PATCH] Rework locks (#2197)

* Move lock impls out of lib.rs

* Reimplement Lock using ReentrantMutex

* Reimplement Lock using ReentrantMutex

* Refactor away some duplicate lock logic

* Return owner from try_lock

* Rework critical section to avoid spinning in irq-free context

* Fail compilation on unknown architectures

* Explain what RESERVED_MASK is

* Create one lock per timer group
---
 esp-hal/CHANGELOG.md                          |   1 +
 esp-hal/src/lib.rs                            | 265 +-----------------
 esp-hal/src/lock.rs                           | 227 +++++++++++++++
 esp-hal/src/timer/systimer.rs                 |   7 +-
 esp-hal/src/timer/timg.rs                     |  13 +-
 examples/src/bin/embassy_multicore.rs         |   4 +-
 .../src/bin/embassy_multicore_interrupt.rs    |   4 +-
 7 files changed, 243 insertions(+), 278 deletions(-)
 create mode 100644 esp-hal/src/lock.rs

diff --git a/esp-hal/CHANGELOG.md b/esp-hal/CHANGELOG.md
index d4716178d..e35e202fa 100644
--- a/esp-hal/CHANGELOG.md
+++ b/esp-hal/CHANGELOG.md
@@ -48,6 +48,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Renamed `DummyPin` to `NoPin` and removed all internal logic from it. (#2133)
 - The `NO_PIN` constant has been removed. (#2133)
 - MSRV bump to 1.79 (#2156)
+- Allow handling interrupts while trying to lock critical section on multi-core chips. (#2197)
 
 ### Fixed
 
diff --git a/esp-hal/src/lib.rs b/esp-hal/src/lib.rs
index 577fb496c..cf86a3b47 100644
--- a/esp-hal/src/lib.rs
+++ b/esp-hal/src/lib.rs
@@ -234,6 +234,7 @@ pub mod uart;
 pub mod usb_serial_jtag;
 
 pub mod debugger;
+mod lock;
 
 /// State of the CPU saved when entering exception or interrupt
 pub mod trapframe {
@@ -396,270 +397,6 @@ fn get_raw_core() -> usize {
     (xtensa_lx::get_processor_id() & 0x2000) as usize
 }
 
-mod critical_section_impl {
-    struct CriticalSection;
-
-    critical_section::set_impl!(CriticalSection);
-
-    #[cfg(xtensa)]
-    mod xtensa {
-        // PS has 15 useful bits. Bits 12..16 and 19..32 are unused, so we can use bit
-        // #31 as our reentry flag.
-        #[cfg(multi_core)]
-        const REENTRY_FLAG: u32 = 1 << 31;
-
-        unsafe impl critical_section::Impl for super::CriticalSection {
-            unsafe fn acquire() -> critical_section::RawRestoreState {
-                let mut tkn: critical_section::RawRestoreState;
-                core::arch::asm!("rsil {0}, 5", out(reg) tkn);
-                #[cfg(multi_core)]
-                {
-                    use super::multicore::{LockKind, MULTICORE_LOCK};
-
-                    match MULTICORE_LOCK.lock() {
-                        LockKind::Lock => {
-                            // We can assume the reserved bit is 0 otherwise
-                            // rsil - wsr pairings would be undefined behavior
-                        }
-                        LockKind::Reentry => tkn |= REENTRY_FLAG,
-                    }
-                }
-                tkn
-            }
-
-            unsafe fn release(token: critical_section::RawRestoreState) {
-                #[cfg(multi_core)]
-                {
-                    use super::multicore::MULTICORE_LOCK;
-
-                    debug_assert!(MULTICORE_LOCK.is_owned_by_current_thread());
-
-                    if token & REENTRY_FLAG != 0 {
-                        return;
-                    }
-
-                    MULTICORE_LOCK.unlock();
-                }
-
-                const RESERVED_MASK: u32 = 0b1111_1111_1111_1000_1111_0000_0000_0000;
-                debug_assert!(token & RESERVED_MASK == 0);
-
-                core::arch::asm!(
-                    "wsr.ps {0}",
-                    "rsync", in(reg) token)
-            }
-        }
-    }
-
-    #[cfg(riscv)]
-    mod riscv {
-        // The restore state is a u8 that is casted from a bool, so it has a value of
-        // 0x00 or 0x01 before we add the reentry flag to it.
-        #[cfg(multi_core)]
-        const REENTRY_FLAG: u8 = 1 << 7;
-
-        unsafe impl critical_section::Impl for super::CriticalSection {
-            unsafe fn acquire() -> critical_section::RawRestoreState {
-                let mut mstatus = 0u32;
-                core::arch::asm!("csrrci {0}, mstatus, 8", inout(reg) mstatus);
-
-                #[cfg_attr(single_core, allow(unused_mut))]
-                let mut tkn = ((mstatus & 0b1000) != 0) as critical_section::RawRestoreState;
-
-                #[cfg(multi_core)]
-                {
-                    use super::multicore::{LockKind, MULTICORE_LOCK};
-
-                    match MULTICORE_LOCK.lock() {
-                        LockKind::Lock => {}
-                        LockKind::Reentry => tkn |= REENTRY_FLAG,
-                    }
-                }
-
-                tkn
-            }
-
-            unsafe fn release(token: critical_section::RawRestoreState) {
-                #[cfg(multi_core)]
-                {
-                    use super::multicore::MULTICORE_LOCK;
-
-                    debug_assert!(MULTICORE_LOCK.is_owned_by_current_thread());
-
-                    if token & REENTRY_FLAG != 0 {
-                        return;
-                    }
-
-                    MULTICORE_LOCK.unlock();
-                }
-
-                if token != 0 {
-                    riscv::interrupt::enable();
-                }
-            }
-        }
-    }
-
-    #[cfg(multi_core)]
-    mod multicore {
-        use portable_atomic::{AtomicUsize, Ordering};
-
-        // We're using a value that we know get_raw_core() will never return. This
-        // avoids an unnecessary increment of the core ID.
-        //
-        // Safety: Ensure that when adding new chips get_raw_core doesn't return this
-        // value. TODO when we have HIL tests ensure this is the case!
-        const UNUSED_THREAD_ID_VALUE: usize = 0x100;
-
-        fn thread_id() -> usize {
-            crate::get_raw_core()
-        }
-
-        pub(super) static MULTICORE_LOCK: ReentrantMutex = ReentrantMutex::new();
-
-        pub(super) enum LockKind {
-            Lock = 0,
-            Reentry,
-        }
-
-        pub(super) struct ReentrantMutex {
-            owner: AtomicUsize,
-        }
-
-        impl ReentrantMutex {
-            const fn new() -> Self {
-                Self {
-                    owner: AtomicUsize::new(UNUSED_THREAD_ID_VALUE),
-                }
-            }
-
-            pub fn is_owned_by_current_thread(&self) -> bool {
-                self.owner.load(Ordering::Relaxed) == thread_id()
-            }
-
-            pub(super) fn lock(&self) -> LockKind {
-                let current_thread_id = thread_id();
-
-                if self.try_lock(current_thread_id) {
-                    return LockKind::Lock;
-                }
-
-                let current_owner = self.owner.load(Ordering::Relaxed);
-                if current_owner == current_thread_id {
-                    return LockKind::Reentry;
-                }
-
-                while !self.try_lock(current_thread_id) {}
-
-                LockKind::Lock
-            }
-
-            fn try_lock(&self, new_owner: usize) -> bool {
-                self.owner
-                    .compare_exchange(
-                        UNUSED_THREAD_ID_VALUE,
-                        new_owner,
-                        Ordering::Acquire,
-                        Ordering::Relaxed,
-                    )
-                    .is_ok()
-            }
-
-            pub(super) fn unlock(&self) {
-                self.owner.store(UNUSED_THREAD_ID_VALUE, Ordering::Release);
-            }
-        }
-    }
-}
-
-// The state of a re-entrant lock
-pub(crate) struct LockState {
-    #[cfg(multi_core)]
-    core: portable_atomic::AtomicUsize,
-}
-
-impl LockState {
-    #[cfg(multi_core)]
-    const UNLOCKED: usize = usize::MAX;
-
-    pub const fn new() -> Self {
-        Self {
-            #[cfg(multi_core)]
-            core: portable_atomic::AtomicUsize::new(Self::UNLOCKED),
-        }
-    }
-}
-
-// This is preferred over critical-section as this allows you to have multiple
-// locks active at the same time rather than using the global mutex that is
-// critical-section.
-#[allow(unused_variables)]
-pub(crate) fn lock<T>(state: &LockState, f: impl FnOnce() -> T) -> T {
-    // In regards to disabling interrupts, we only need to disable
-    // the interrupts that may be calling this function.
-
-    #[cfg(not(multi_core))]
-    {
-        // Disabling interrupts is enough on single core chips to ensure mutual
-        // exclusion.
-
-        #[cfg(riscv)]
-        return riscv::interrupt::free(f);
-        #[cfg(xtensa)]
-        return xtensa_lx::interrupt::free(|_| f());
-    }
-
-    #[cfg(multi_core)]
-    {
-        use portable_atomic::Ordering;
-
-        let current_core = get_core() as usize;
-
-        let mut f = f;
-
-        loop {
-            let func = || {
-                // Use Acquire ordering in success to ensure `f()` "happens after" the lock is
-                // taken. Use Relaxed ordering in failure as there's no
-                // synchronisation happening.
-                if let Err(locked_core) = state.core.compare_exchange(
-                    LockState::UNLOCKED,
-                    current_core,
-                    Ordering::Acquire,
-                    Ordering::Relaxed,
-                ) {
-                    assert_ne!(
-                        locked_core, current_core,
-                        "esp_hal::lock is not re-entrant!"
-                    );
-
-                    Err(f)
-                } else {
-                    let result = f();
-
-                    // Use Release ordering here to ensure `f()` "happens before" this lock is
-                    // released.
-                    state.core.store(LockState::UNLOCKED, Ordering::Release);
-
-                    Ok(result)
-                }
-            };
-
-            #[cfg(riscv)]
-            let result = riscv::interrupt::free(func);
-            #[cfg(xtensa)]
-            let result = xtensa_lx::interrupt::free(|_| func());
-
-            match result {
-                Ok(result) => break result,
-                Err(the_function) => f = the_function,
-            }
-
-            // Consider using core::hint::spin_loop(); Might need SW_INT.
-        }
-    }
-}
-
 /// Default (unhandled) interrupt handler
 pub const DEFAULT_INTERRUPT_HANDLER: interrupt::InterruptHandler = interrupt::InterruptHandler::new(
     unsafe { core::mem::transmute::<*const (), extern "C" fn()>(EspDefaultHandler as *const ()) },
diff --git a/esp-hal/src/lock.rs b/esp-hal/src/lock.rs
new file mode 100644
index 000000000..bf76bc60b
--- /dev/null
+++ b/esp-hal/src/lock.rs
@@ -0,0 +1,227 @@
+mod single_core {
+    pub unsafe fn disable_interrupts() -> critical_section::RawRestoreState {
+        cfg_if::cfg_if! {
+            if #[cfg(riscv)] {
+                let mut mstatus = 0u32;
+                core::arch::asm!("csrrci {0}, mstatus, 8", inout(reg) mstatus);
+                ((mstatus & 0b1000) != 0) as critical_section::RawRestoreState
+            } else if #[cfg(xtensa)] {
+                let token: critical_section::RawRestoreState;
+                core::arch::asm!("rsil {0}, 5", out(reg) token);
+                token
+            } else {
+                compile_error!("Unsupported architecture")
+            }
+        }
+    }
+
+    pub unsafe fn reenable_interrupts(token: critical_section::RawRestoreState) {
+        cfg_if::cfg_if! {
+            if #[cfg(riscv)] {
+                if token != 0 {
+                    esp_riscv_rt::riscv::interrupt::enable();
+                }
+            } else if #[cfg(xtensa)] {
+                // Reserved bits in the PS register, these must be written as 0.
+                const RESERVED_MASK: u32 = 0b1111_1111_1111_1000_1111_0000_0000_0000;
+                debug_assert!(token & RESERVED_MASK == 0);
+                core::arch::asm!(
+                    "wsr.ps {0}",
+                    "rsync", in(reg) token)
+            } else {
+                compile_error!("Unsupported architecture")
+            }
+        }
+    }
+}
+
+#[cfg(multi_core)]
+mod multicore {
+    use portable_atomic::{AtomicUsize, Ordering};
+
+    cfg_if::cfg_if! {
+        if #[cfg(riscv)] {
+            // The restore state is a u8 that is casted from a bool, so it has a value of
+            // 0x00 or 0x01 before we add the reentry flag to it.
+            pub const REENTRY_FLAG: u8 = 1 << 7;
+        } else if #[cfg(xtensa)] {
+            // PS has 15 useful bits. Bits 12..16 and 19..32 are unused, so we can use bit
+            // #31 as our reentry flag.
+            // We can assume the reserved bit is 0 otherwise rsil - wsr pairings would be
+            // undefined behavior: Quoting the ISA summary, table 64:
+            // Writing a non-zero value to these fields results in undefined processor behavior.
+            pub const REENTRY_FLAG: u32 = 1 << 31;
+        }
+    }
+
+    // Safety: Ensure that when adding new chips `get_raw_core` doesn't return this
+    // value.
+    // FIXME: ensure in HIL tests this is the case!
+    const UNUSED_THREAD_ID_VALUE: usize = 0x100;
+
+    pub fn thread_id() -> usize {
+        crate::get_raw_core()
+    }
+
+    pub(super) struct AtomicLock {
+        owner: AtomicUsize,
+    }
+
+    impl AtomicLock {
+        pub const fn new() -> Self {
+            Self {
+                owner: AtomicUsize::new(UNUSED_THREAD_ID_VALUE),
+            }
+        }
+
+        pub fn is_owned_by_current_thread(&self) -> bool {
+            self.is_owned_by(thread_id())
+        }
+
+        pub fn is_owned_by(&self, thread: usize) -> bool {
+            self.owner.load(Ordering::Relaxed) == thread
+        }
+
+        pub fn try_lock(&self, new_owner: usize) -> Result<(), usize> {
+            self.owner
+                .compare_exchange(
+                    UNUSED_THREAD_ID_VALUE,
+                    new_owner,
+                    Ordering::Acquire,
+                    Ordering::Relaxed,
+                )
+                .map(|_| ())
+        }
+
+        /// # Safety:
+        ///
+        /// This function must only be called if the lock was acquired by the
+        /// current thread.
+        pub unsafe fn unlock(&self) {
+            debug_assert!(self.is_owned_by_current_thread());
+            self.owner.store(UNUSED_THREAD_ID_VALUE, Ordering::Release);
+        }
+    }
+}
+
+pub(crate) struct Lock {
+    #[cfg(multi_core)]
+    inner: multicore::AtomicLock,
+}
+
+impl Lock {
+    pub const fn new() -> Self {
+        Self {
+            #[cfg(multi_core)]
+            inner: multicore::AtomicLock::new(),
+        }
+    }
+
+    fn acquire(&self) -> critical_section::RawRestoreState {
+        cfg_if::cfg_if! {
+            if #[cfg(single_core)] {
+                unsafe { single_core::disable_interrupts() }
+            } else if #[cfg(multi_core)] {
+                // We acquire the lock inside an interrupt-free context to prevent a subtle
+                // race condition:
+                // In case an interrupt handler tries to lock the same resource, it could win if
+                // the current thread is holding the lock but isn't yet in interrupt-free context.
+                // If we maintain non-reentrant semantics, this situation would panic.
+                // If we allow reentrancy, the interrupt handler would technically be a different
+                // context with the same `current_thread_id`, so it would be allowed to lock the
+                // resource in a theoretically incorrect way.
+                let try_lock = |current_thread_id| {
+                    let mut tkn = unsafe { single_core::disable_interrupts() };
+
+                    match self.inner.try_lock(current_thread_id) {
+                        Ok(()) => Some(tkn),
+                        Err(owner) if owner == current_thread_id => {
+                            tkn |= multicore::REENTRY_FLAG;
+                            Some(tkn)
+                        }
+                        Err(_) => {
+                            unsafe { single_core::reenable_interrupts(tkn) };
+                            None
+                        }
+                    }
+                };
+
+                let current_thread_id = multicore::thread_id();
+                loop {
+                    if let Some(token) = try_lock(current_thread_id) {
+                        return token;
+                    }
+                }
+            }
+        }
+    }
+
+    /// # Safety
+    /// This function must only be called if the lock was acquired by the
+    /// current thread.
+    unsafe fn release(&self, token: critical_section::RawRestoreState) {
+        #[cfg(multi_core)]
+        {
+            if token & multicore::REENTRY_FLAG != 0 {
+                return;
+            }
+
+            self.inner.unlock();
+        }
+
+        single_core::reenable_interrupts(token);
+    }
+}
+
+// This is preferred over critical-section as this allows you to have multiple
+// locks active at the same time rather than using the global mutex that is
+// critical-section.
+#[allow(unused_variables)]
+pub(crate) fn lock<T>(lock: &Lock, f: impl FnOnce() -> T) -> T {
+    // In regards to disabling interrupts, we only need to disable
+    // the interrupts that may be calling this function.
+
+    struct LockGuard<'a> {
+        lock: &'a Lock,
+        token: critical_section::RawRestoreState,
+    }
+
+    impl<'a> LockGuard<'a> {
+        fn new(lock: &'a Lock) -> Self {
+            let token = lock.acquire();
+
+            #[cfg(multi_core)]
+            assert!(
+                token & multicore::REENTRY_FLAG == 0,
+                "lock is not reentrant"
+            );
+
+            Self { lock, token }
+        }
+    }
+
+    impl<'a> Drop for LockGuard<'a> {
+        fn drop(&mut self) {
+            unsafe { self.lock.release(self.token) };
+        }
+    }
+
+    let _token = LockGuard::new(lock);
+    f()
+}
+
+struct CriticalSection;
+
+critical_section::set_impl!(CriticalSection);
+
+static CRITICAL_SECTION: Lock = Lock::new();
+
+unsafe impl critical_section::Impl for CriticalSection {
+    unsafe fn acquire() -> critical_section::RawRestoreState {
+        CRITICAL_SECTION.acquire()
+    }
+
+    unsafe fn release(token: critical_section::RawRestoreState) {
+        CRITICAL_SECTION.release(token);
+    }
+}
diff --git a/esp-hal/src/timer/systimer.rs b/esp-hal/src/timer/systimer.rs
index e8e4bdd97..ed5f01fbb 100644
--- a/esp-hal/src/timer/systimer.rs
+++ b/esp-hal/src/timer/systimer.rs
@@ -79,7 +79,7 @@ use fugit::{Instant, MicrosDurationU32, MicrosDurationU64};
 use super::{Error, Timer as _};
 use crate::{
     interrupt::{self, InterruptHandler},
-    lock,
+    lock::{lock, Lock},
     peripheral::Peripheral,
     peripherals::{Interrupt, SYSTIMER},
     system::{Peripheral as PeripheralEnable, PeripheralClockControl},
@@ -87,7 +87,6 @@ use crate::{
     Blocking,
     Cpu,
     InterruptConfigurable,
-    LockState,
     Mode,
 };
 
@@ -1009,8 +1008,8 @@ where
     }
 }
 
-static CONF_LOCK: LockState = LockState::new();
-static INT_ENA_LOCK: LockState = LockState::new();
+static CONF_LOCK: Lock = Lock::new();
+static INT_ENA_LOCK: Lock = Lock::new();
 
 // Async functionality of the system timer.
 mod asynch {
diff --git a/esp-hal/src/timer/timg.rs b/esp-hal/src/timer/timg.rs
index 3c1c54460..eaec0a262 100644
--- a/esp-hal/src/timer/timg.rs
+++ b/esp-hal/src/timer/timg.rs
@@ -76,7 +76,7 @@ use crate::soc::constants::TIMG_DEFAULT_CLK_SRC;
 use crate::{
     clock::Clocks,
     interrupt::{self, InterruptHandler},
-    lock,
+    lock::{lock, Lock},
     peripheral::{Peripheral, PeripheralRef},
     peripherals::{timg0::RegisterBlock, Interrupt, TIMG0},
     private::Sealed,
@@ -84,11 +84,12 @@ use crate::{
     Async,
     Blocking,
     InterruptConfigurable,
-    LockState,
     Mode,
 };
 
-static INT_ENA_LOCK: LockState = LockState::new();
+const NUM_TIMG: usize = 1 + cfg!(timg1) as usize;
+
+static INT_ENA_LOCK: [Lock; NUM_TIMG] = [const { Lock::new() }; NUM_TIMG];
 
 /// A timer group consisting of
 #[cfg_attr(not(timg_timer1), doc = "a general purpose timer")]
@@ -475,7 +476,7 @@ where
             .config()
             .modify(|_, w| w.level_int_en().set_bit());
 
-        lock(&INT_ENA_LOCK, || {
+        lock(&INT_ENA_LOCK[self.timer_group() as usize], || {
             self.register_block()
                 .int_ena()
                 .modify(|_, w| w.t(self.timer_number()).bit(state));
@@ -694,7 +695,7 @@ where
             .config()
             .modify(|_, w| w.level_int_en().set_bit());
 
-        lock(&INT_ENA_LOCK, || {
+        lock(&INT_ENA_LOCK[self.timer_group() as usize], || {
             self.register_block()
                 .int_ena()
                 .modify(|_, w| w.t(T).set_bit());
@@ -702,7 +703,7 @@ where
     }
 
     fn unlisten(&self) {
-        lock(&INT_ENA_LOCK, || {
+        lock(&INT_ENA_LOCK[self.timer_group() as usize], || {
             self.register_block()
                 .int_ena()
                 .modify(|_, w| w.t(T).clear_bit());
diff --git a/examples/src/bin/embassy_multicore.rs b/examples/src/bin/embassy_multicore.rs
index 8ca327d01..741b196bb 100644
--- a/examples/src/bin/embassy_multicore.rs
+++ b/examples/src/bin/embassy_multicore.rs
@@ -21,7 +21,7 @@ use esp_backtrace as _;
 use esp_hal::{
     cpu_control::{CpuControl, Stack},
     get_core,
-    gpio::{Io, Level, Output, Pin},
+    gpio::{Io, Level, Output},
     timer::{timg::TimerGroup, AnyTimer},
 };
 use esp_hal_embassy::Executor;
@@ -65,7 +65,7 @@ async fn main(_spawner: Spawner) {
     static LED_CTRL: StaticCell<Signal<CriticalSectionRawMutex, bool>> = StaticCell::new();
     let led_ctrl_signal = &*LED_CTRL.init(Signal::new());
 
-    let led = Output::new(io.pins.gpio0.degrade(), Level::Low);
+    let led = Output::new(io.pins.gpio0, Level::Low);
 
     let _guard = cpu_control
         .start_app_core(unsafe { &mut *addr_of_mut!(APP_CORE_STACK) }, move || {
diff --git a/examples/src/bin/embassy_multicore_interrupt.rs b/examples/src/bin/embassy_multicore_interrupt.rs
index a4c2cbf26..1b427ebe7 100644
--- a/examples/src/bin/embassy_multicore_interrupt.rs
+++ b/examples/src/bin/embassy_multicore_interrupt.rs
@@ -20,7 +20,7 @@ use esp_backtrace as _;
 use esp_hal::{
     cpu_control::{CpuControl, Stack},
     get_core,
-    gpio::{Io, Level, Output, Pin},
+    gpio::{Io, Level, Output},
     interrupt::{software::SoftwareInterruptControl, Priority},
     prelude::*,
     timer::{timg::TimerGroup, AnyTimer},
@@ -87,7 +87,7 @@ fn main() -> ! {
     static LED_CTRL: StaticCell<Signal<CriticalSectionRawMutex, bool>> = StaticCell::new();
     let led_ctrl_signal = &*LED_CTRL.init(Signal::new());
 
-    let led = Output::new(io.pins.gpio0.degrade(), Level::Low);
+    let led = Output::new(io.pins.gpio0, Level::Low);
 
     static EXECUTOR_CORE_1: StaticCell<InterruptExecutor<1>> = StaticCell::new();
     let executor_core1 = InterruptExecutor::new(sw_ints.software_interrupt1);