diff --git a/esp-hal/CHANGELOG.md b/esp-hal/CHANGELOG.md index a912c67df..4e4b47c36 100644 --- a/esp-hal/CHANGELOG.md +++ b/esp-hal/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Peripheral driver constructors don't take `InterruptHandler`s anymore. Use `set_interrupt_handler` to explicitly set the interrupt handler now. (#1819) +- Allow DMA to/from psram for esp32s3 (#1827) + ### Fixed - Improve error detection in the I2C driver (#1847) diff --git a/esp-hal/ld/esp32s3/rom-functions.x b/esp-hal/ld/esp32s3/rom-functions.x index 531d3d4bc..e1e4a893b 100644 --- a/esp-hal/ld/esp32s3/rom-functions.x +++ b/esp-hal/ld/esp32s3/rom-functions.x @@ -14,8 +14,13 @@ PROVIDE(esp_rom_spiflash_select_qio_pins = 0x40000a68 ); PROVIDE(esp_rom_spi_set_op_mode = 0x400008a0 ); PROVIDE(esp_rom_spi_cmd_start = 0x40000888); PROVIDE(esp_rom_spi_cmd_config = 0x4000087c); +PROVIDE(Cache_Get_DCache_Line_Size = 0x40001608 ); +PROVIDE(Cache_Invalidate_Addr = 0x400016b0 ); PROVIDE(Cache_Suspend_DCache = 0x400018b4 ); PROVIDE(Cache_Resume_DCache = 0x400018c0 ); +PROVIDE(Cache_Suspend_DCache_Autoload = 0x40001734 ); +PROVIDE(Cache_Resume_DCache_Autoload = 0x40001740 ); +PROVIDE(Cache_WriteBack_Addr = 0x400016c8 ); PROVIDE(rom_config_data_cache_mode = 0x40001a28 ); PROVIDE(rom_config_instruction_cache_mode = 0x40001a1c ); PROVIDE(ets_efuse_get_wp_pad = 0x40001fa4); diff --git a/esp-hal/src/dma/gdma.rs b/esp-hal/src/dma/gdma.rs index c8b49563c..ebdb7af2c 100644 --- a/esp-hal/src/dma/gdma.rs +++ b/esp-hal/src/dma/gdma.rs @@ -93,6 +93,13 @@ impl RegisterAccess for Channel { .modify(|_, w| w.mem_trans_en().bit(value)); } + #[cfg(esp32s3)] + fn set_out_ext_mem_block_size(size: DmaExtMemBKSize) { + Self::ch() + .out_conf1() + .modify(|_, w| unsafe { w.out_ext_mem_bk_size().bits(size as u8) }); + } + fn set_out_burstmode(burst_mode: bool) { Self::ch().out_conf0().modify(|_, w| { w.out_data_burst_en() @@ -206,6 +213,13 @@ impl RegisterAccess for Channel { .write(|w| w.out_eof().clear_bit_by_one()); } + #[cfg(esp32s3)] + fn set_in_ext_mem_block_size(size: DmaExtMemBKSize) { + Self::ch() + .in_conf1() + .modify(|_, w| unsafe { w.in_ext_mem_bk_size().bits(size as u8) }); + } + fn set_in_burstmode(burst_mode: bool) { Self::ch().in_conf0().modify(|_, w| { w.in_data_burst_en() @@ -626,6 +640,8 @@ pub use m2m::*; mod m2m { use embedded_dma::{ReadBuffer, WriteBuffer}; + #[cfg(esp32s3)] + use crate::dma::DmaExtMemBKSize; use crate::dma::{ dma_private::{DmaSupport, DmaSupportRx}, Channel, @@ -751,6 +767,21 @@ mod m2m { .prepare_transfer_without_start(self.peripheral, &self.rx_chain)?; self.channel.rx.set_mem2mem_mode(true); } + #[cfg(esp32s3)] + { + let align = match unsafe { crate::soc::cache_get_dcache_line_size() } { + 16 => DmaExtMemBKSize::Size16, + 32 => DmaExtMemBKSize::Size32, + 64 => DmaExtMemBKSize::Size64, + _ => panic!("unsupported cache line size"), + }; + if crate::soc::is_valid_psram_address(tx_ptr as u32) { + self.channel.tx.set_ext_mem_block_size(align); + } + if crate::soc::is_valid_psram_address(rx_ptr as u32) { + self.channel.rx.set_ext_mem_block_size(align); + } + } self.channel.tx.start_transfer()?; self.channel.rx.start_transfer()?; Ok(DmaTransferRx::new(self)) diff --git a/esp-hal/src/dma/mod.rs b/esp-hal/src/dma/mod.rs index 289f74ce7..dafadbd90 100644 --- a/esp-hal/src/dma/mod.rs +++ b/esp-hal/src/dma/mod.rs @@ -43,6 +43,11 @@ //! ⚠️ Note: Descriptors should be sized as `(max_transfer_size + CHUNK_SIZE - 1) / CHUNK_SIZE`. //! I.e., to transfer buffers of size `1..=CHUNK_SIZE`, you need 1 descriptor. //! +//! ⚠️ Note: For chips that support DMA to/from PSRAM (esp32s3) DMA transfers to/from PSRAM +//! have extra alignment requirements. The address and size of the buffer pointed to by +//! each descriptor must be a multiple of the cache line (block) size. This is 32 bytes +//! on esp32s3. +//! //! For convenience you can use the [crate::dma_buffers] macro. #![warn(missing_docs)] @@ -66,7 +71,7 @@ impl Debug for DmaDescriptorFlags { .field("size", &self.size()) .field("length", &self.length()) .field("suc_eof", &self.suc_eof()) - .field("owner", &self.owner()) + .field("owner", &(if self.owner() { "DMA" } else { "CPU" })) .finish() } } @@ -95,6 +100,11 @@ impl DmaDescriptor { self.flags.set_length(len as u16) } + #[allow(unused)] + fn size(&self) -> usize { + self.flags.size() as usize + } + fn len(&self) -> usize { self.flags.length() as usize } @@ -567,8 +577,8 @@ impl DescriptorChain { ) -> Result<(), DmaError> { if !crate::soc::is_valid_ram_address(self.first() as u32) || !crate::soc::is_valid_ram_address(self.last() as u32) - || !crate::soc::is_valid_ram_address(data as u32) - || !crate::soc::is_valid_ram_address(unsafe { data.add(len) } as u32) + || !crate::soc::is_valid_memory_address(data as u32) + || !crate::soc::is_valid_memory_address(unsafe { data.add(len) } as u32) { return Err(DmaError::UnsupportedMemoryRegion); } @@ -639,8 +649,8 @@ impl DescriptorChain { ) -> Result<(), DmaError> { if !crate::soc::is_valid_ram_address(self.first() as u32) || !crate::soc::is_valid_ram_address(self.last() as u32) - || !crate::soc::is_valid_ram_address(data as u32) - || !crate::soc::is_valid_ram_address(unsafe { data.add(len) } as u32) + || !crate::soc::is_valid_memory_address(data as u32) + || !crate::soc::is_valid_memory_address(unsafe { data.add(len) } as u32) { return Err(DmaError::UnsupportedMemoryRegion); } @@ -707,6 +717,15 @@ impl DescriptorChain { } } +/// Block size for transfers to/from psram +#[derive(Copy, Clone, Debug, PartialEq)] +#[allow(missing_docs)] +pub enum DmaExtMemBKSize { + Size16 = 0, + Size32 = 1, + Size64 = 2, +} + pub(crate) struct TxCircularState { write_offset: usize, write_descr_ptr: *mut DmaDescriptor, @@ -967,6 +986,9 @@ pub trait RxPrivate: crate::private::Sealed { fn start_transfer(&mut self) -> Result<(), DmaError>; + #[cfg(esp32s3)] + fn set_ext_mem_block_size(&self, size: DmaExtMemBKSize); + #[cfg(gdma)] fn set_mem2mem_mode(&mut self, value: bool); @@ -1119,6 +1141,25 @@ where return Err(DmaError::InvalidAlignment); } + // for esp32s3 we check each descriptor buffer that points to psram for + // alignment and invalidate the cache for that buffer + // NOTE: for RX the `buffer` and `size` need to be aligned but the `len` does + // not. TRM section 3.4.9 + #[cfg(esp32s3)] + for des in chain.descriptors.iter() { + // we are forcing the DMA alignment to the cache line size + // required when we are using dcache + let alignment = crate::soc::cache_get_dcache_line_size() as usize; + if crate::soc::is_valid_psram_address(des.buffer as u32) { + // both the size and address of the buffer must be aligned + if des.buffer as usize % alignment != 0 && des.size() % alignment != 0 { + return Err(DmaError::InvalidAlignment); + } + // TODO: make this optional? + crate::soc::cache_invalidate_addr(des.buffer as u32, des.size() as u32); + } + } + self.rx_impl.prepare_transfer_without_start(chain, peri) } @@ -1126,6 +1167,11 @@ where self.rx_impl.start_transfer() } + #[cfg(esp32s3)] + fn set_ext_mem_block_size(&self, size: DmaExtMemBKSize) { + CH::Channel::set_in_ext_mem_block_size(size); + } + #[cfg(gdma)] fn set_mem2mem_mode(&mut self, value: bool) { CH::Channel::set_mem2mem_mode(value); @@ -1244,6 +1290,9 @@ pub trait TxPrivate: crate::private::Sealed { fn start_transfer(&mut self) -> Result<(), DmaError>; + #[cfg(esp32s3)] + fn set_ext_mem_block_size(&self, size: DmaExtMemBKSize); + fn clear_ch_out_done(&self); fn is_ch_out_done_set(&self) -> bool; @@ -1403,6 +1452,21 @@ where peri: DmaPeripheral, chain: &DescriptorChain, ) -> Result<(), DmaError> { + // for esp32s3 we check each descriptor buffer that points to psram for + // alignment and writeback the cache for that buffer + #[cfg(esp32s3)] + for des in chain.descriptors.iter() { + // we are forcing the DMA alignment to the cache line size + // required when we are using dcache + let alignment = crate::soc::cache_get_dcache_line_size() as usize; + if crate::soc::is_valid_psram_address(des.buffer as u32) { + // both the size and address of the buffer must be aligned + if des.buffer as usize % alignment != 0 && des.size() % alignment != 0 { + return Err(DmaError::InvalidAlignment); + } + crate::soc::cache_writeback_addr(des.buffer as u32, des.size() as u32); + } + } self.tx_impl.prepare_transfer_without_start(chain, peri) } @@ -1410,6 +1474,11 @@ where self.tx_impl.start_transfer() } + #[cfg(esp32s3)] + fn set_ext_mem_block_size(&self, size: DmaExtMemBKSize) { + CH::Channel::set_out_ext_mem_block_size(size); + } + fn clear_ch_out_done(&self) { self.tx_impl.clear_ch_out_done(); } @@ -1489,6 +1558,8 @@ pub trait RegisterAccess: crate::private::Sealed { fn init_channel(); #[cfg(gdma)] fn set_mem2mem_mode(value: bool); + #[cfg(esp32s3)] + fn set_out_ext_mem_block_size(size: DmaExtMemBKSize); fn set_out_burstmode(burst_mode: bool); fn set_out_priority(priority: DmaPriority); fn clear_out_interrupts(); @@ -1507,6 +1578,8 @@ pub trait RegisterAccess: crate::private::Sealed { fn reset_out_eof_interrupt(); fn last_out_dscr_address() -> usize; + #[cfg(esp32s3)] + fn set_in_ext_mem_block_size(size: DmaExtMemBKSize); fn set_in_burstmode(burst_mode: bool); fn set_in_priority(priority: DmaPriority); fn clear_in_interrupts(); diff --git a/esp-hal/src/soc/esp32s3/mod.rs b/esp-hal/src/soc/esp32s3/mod.rs index f580cdd4c..f71130412 100644 --- a/esp-hal/src/soc/esp32s3/mod.rs +++ b/esp-hal/src/soc/esp32s3/mod.rs @@ -170,3 +170,35 @@ unsafe fn post_init() { Wdt::::set_wdt_enabled(false); Wdt::::set_wdt_enabled(false); } + +#[doc(hidden)] +#[link_section = ".rwtext"] +pub unsafe fn cache_writeback_addr(addr: u32, size: u32) { + extern "C" { + fn Cache_WriteBack_Addr(addr: u32, size: u32); + fn Cache_Suspend_DCache_Autoload() -> u32; + fn Cache_Resume_DCache_Autoload(value: u32); + } + // suspend autoload, avoid load cachelines being written back + let autoload = Cache_Suspend_DCache_Autoload(); + Cache_WriteBack_Addr(addr, size); + Cache_Resume_DCache_Autoload(autoload); +} + +#[doc(hidden)] +#[link_section = ".rwtext"] +pub unsafe fn cache_invalidate_addr(addr: u32, size: u32) { + extern "C" { + fn Cache_Invalidate_Addr(addr: u32, size: u32); + } + Cache_Invalidate_Addr(addr, size); +} + +#[doc(hidden)] +#[link_section = ".rwtext"] +pub unsafe fn cache_get_dcache_line_size() -> u32 { + extern "C" { + fn Cache_Get_DCache_Line_Size() -> u32; + } + Cache_Get_DCache_Line_Size() +} diff --git a/esp-hal/src/soc/mod.rs b/esp-hal/src/soc/mod.rs index 470904371..1e7c93b41 100644 --- a/esp-hal/src/soc/mod.rs +++ b/esp-hal/src/soc/mod.rs @@ -71,3 +71,20 @@ impl self::efuse::Efuse { pub(crate) fn is_valid_ram_address(address: u32) -> bool { (self::constants::SOC_DRAM_LOW..=self::constants::SOC_DRAM_HIGH).contains(&address) } + +#[allow(unused)] +pub(crate) fn is_valid_psram_address(address: u32) -> bool { + #[cfg(psram)] + { + let start = crate::psram::psram_vaddr_start() as u32; + let end = start + crate::psram::PSRAM_BYTES as u32; + (start..=end).contains(&address) + } + #[cfg(not(psram))] + false +} + +#[allow(unused)] +pub(crate) fn is_valid_memory_address(address: u32) -> bool { + is_valid_ram_address(address) || is_valid_psram_address(address) +} diff --git a/examples/Cargo.toml b/examples/Cargo.toml index d373b7196..e3bb27e00 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -7,6 +7,7 @@ publish = false [dependencies] aes = "0.8.4" +aligned = { version = "0.4.2", optional = true } bleps = { git = "https://github.com/bjoernQ/bleps", package = "bleps", rev = "a5148d8ae679e021b78f53fd33afb8bb35d0b62e", features = [ "macros", "async"] } cfg-if = "1.0.0" critical-section = "1.1.2" diff --git a/examples/src/bin/dma_extmem2mem.rs b/examples/src/bin/dma_extmem2mem.rs new file mode 100644 index 000000000..63360a776 --- /dev/null +++ b/examples/src/bin/dma_extmem2mem.rs @@ -0,0 +1,158 @@ +//! Uses DMA to copy psram to internal memory. + +//% FEATURES: esp-hal/log opsram-2m aligned +//% CHIPS: esp32s3 + +#![no_std] +#![no_main] + +use aligned::{Aligned, A64}; +use esp_backtrace as _; +use esp_hal::{ + clock::ClockControl, + delay::Delay, + dma::{Dma, DmaPriority, Mem2Mem}, + dma_descriptors_chunk_size, + peripherals::Peripherals, + prelude::*, + system::SystemControl, +}; +use log::{error, info}; +extern crate alloc; + +const DATA_SIZE: usize = 1024 * 10; +const CHUNK_SIZE: usize = 4032; // size is aligned to 64 bytes + +macro_rules! dma_buffer_aligned { + ($size:expr, $align:ty) => {{ + static mut BUFFER: Aligned<$align, [u8; $size]> = Aligned([0; $size]); + unsafe { &mut *BUFFER } + }}; +} + +macro_rules! dma_alloc_buffer { + ($size:expr, $align:expr) => {{ + let layout = core::alloc::Layout::from_size_align($size, $align).unwrap(); + unsafe { + let ptr = alloc::alloc::alloc(layout); + if ptr.is_null() { + error!("make_buffers: alloc failed"); + alloc::alloc::handle_alloc_error(layout); + } + core::slice::from_raw_parts_mut(ptr, $size) + } + }}; +} + +#[global_allocator] +static ALLOCATOR: esp_alloc::EspHeap = esp_alloc::EspHeap::empty(); + +fn init_heap(psram: impl esp_hal::peripheral::Peripheral

) { + esp_hal::psram::init_psram(psram); + info!( + "init_heap: start: 0x{:0x}", + esp_hal::psram::psram_vaddr_start() + ); + unsafe { + ALLOCATOR.init( + esp_hal::psram::psram_vaddr_start() as *mut u8, + esp_hal::psram::PSRAM_BYTES, + ); + } +} + +#[entry] +fn main() -> ! { + esp_println::logger::init_logger(log::LevelFilter::Info); + + let peripherals = Peripherals::take(); + init_heap(peripherals.PSRAM); + let system = SystemControl::new(peripherals.SYSTEM); + let clocks = ClockControl::boot_defaults(system.clock_control).freeze(); + let delay = Delay::new(&clocks); + + let mut extram_buffer: &mut [u8] = dma_alloc_buffer!(DATA_SIZE, 64); + let mut intram_buffer = dma_buffer_aligned!(DATA_SIZE, A64); + let (tx_descriptors, rx_descriptors) = dma_descriptors_chunk_size!(DATA_SIZE, CHUNK_SIZE); + + let dma = Dma::new(peripherals.DMA); + let channel = dma.channel0.configure(false, DmaPriority::Priority0); + let dma_peripheral = peripherals.SPI2; + + let mut mem2mem = Mem2Mem::new_with_chunk_size( + channel, + dma_peripheral, + tx_descriptors, + rx_descriptors, + CHUNK_SIZE, + ) + .unwrap(); + + for i in 0..core::mem::size_of_val(extram_buffer) { + extram_buffer[i] = (i % 256) as u8; + intram_buffer[i] = 255 - extram_buffer[i]; + } + + info!(" ext2int: Starting transfer of {} bytes", DATA_SIZE); + match mem2mem.start_transfer(&extram_buffer, &mut intram_buffer) { + Ok(dma_wait) => { + info!("Transfer started"); + dma_wait.wait().unwrap(); + info!("Transfer completed, comparing buffer"); + let mut error = false; + for i in 0..core::mem::size_of_val(extram_buffer) { + if intram_buffer[i] != extram_buffer[i] { + error!( + "Error: extram_buffer[{}] = {}, intram_buffer[{}] = {}", + i, extram_buffer[i], i, intram_buffer[i] + ); + error = true; + break; + } + } + if !error { + info!("Buffers are equal"); + } + info!("Done"); + } + Err(e) => { + error!("start_transfer: Error: {:?}", e); + } + } + + for i in 0..core::mem::size_of_val(extram_buffer) { + intram_buffer[i] = (i % 256) as u8; + extram_buffer[i] = 255 - intram_buffer[i]; + } + + info!(" int2ext: Starting transfer of {} bytes", DATA_SIZE); + match mem2mem.start_transfer(&intram_buffer, &mut extram_buffer) { + Ok(dma_wait) => { + info!("Transfer started"); + dma_wait.wait().unwrap(); + info!("Transfer completed, comparing buffer"); + let mut error = false; + for i in 0..core::mem::size_of_val(extram_buffer) { + if intram_buffer[i] != extram_buffer[i] { + error!( + "Error: extram_buffer[{}] = {}, intram_buffer[{}] = {}", + i, extram_buffer[i], i, intram_buffer[i] + ); + error = true; + break; + } + } + if !error { + info!("Buffers are equal"); + } + info!("Done"); + } + Err(e) => { + error!("start_transfer: Error: {:?}", e); + } + } + + loop { + delay.delay(2.secs()); + } +}