Implement DMA to/from psram on esp32s3 (#1827)

* initial non-working attemt for dma from psram on esp32s3

* flush cache - now works for extmem as source but not for extmem as destination

* use cache_invalidate_addr on destination address

* update changelog

* require dma transfers to/from psram to be aligned to dcache line size

* cache_writeback_addr() should suspend/resume dcache autoload

* no need for cfg(esp32s3) in esp32s3 specific module

* dma: document alignment requirements for DMA to/from PSRAM

* fix doc typos
This commit is contained in:
liebman 2024-07-25 05:51:02 -07:00 committed by GitHub
parent 7f251b457a
commit c7218fba2b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 324 additions and 5 deletions

View File

@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Peripheral driver constructors don't take `InterruptHandler`s anymore. Use `set_interrupt_handler` to explicitly set the interrupt handler now. (#1819)
- Allow DMA to/from psram for esp32s3 (#1827)
### Fixed
- Improve error detection in the I2C driver (#1847)

View File

@ -14,8 +14,13 @@ PROVIDE(esp_rom_spiflash_select_qio_pins = 0x40000a68 );
PROVIDE(esp_rom_spi_set_op_mode = 0x400008a0 );
PROVIDE(esp_rom_spi_cmd_start = 0x40000888);
PROVIDE(esp_rom_spi_cmd_config = 0x4000087c);
PROVIDE(Cache_Get_DCache_Line_Size = 0x40001608 );
PROVIDE(Cache_Invalidate_Addr = 0x400016b0 );
PROVIDE(Cache_Suspend_DCache = 0x400018b4 );
PROVIDE(Cache_Resume_DCache = 0x400018c0 );
PROVIDE(Cache_Suspend_DCache_Autoload = 0x40001734 );
PROVIDE(Cache_Resume_DCache_Autoload = 0x40001740 );
PROVIDE(Cache_WriteBack_Addr = 0x400016c8 );
PROVIDE(rom_config_data_cache_mode = 0x40001a28 );
PROVIDE(rom_config_instruction_cache_mode = 0x40001a1c );
PROVIDE(ets_efuse_get_wp_pad = 0x40001fa4);

View File

@ -93,6 +93,13 @@ impl<const N: u8> RegisterAccess for Channel<N> {
.modify(|_, w| w.mem_trans_en().bit(value));
}
#[cfg(esp32s3)]
fn set_out_ext_mem_block_size(size: DmaExtMemBKSize) {
Self::ch()
.out_conf1()
.modify(|_, w| unsafe { w.out_ext_mem_bk_size().bits(size as u8) });
}
fn set_out_burstmode(burst_mode: bool) {
Self::ch().out_conf0().modify(|_, w| {
w.out_data_burst_en()
@ -206,6 +213,13 @@ impl<const N: u8> RegisterAccess for Channel<N> {
.write(|w| w.out_eof().clear_bit_by_one());
}
#[cfg(esp32s3)]
fn set_in_ext_mem_block_size(size: DmaExtMemBKSize) {
Self::ch()
.in_conf1()
.modify(|_, w| unsafe { w.in_ext_mem_bk_size().bits(size as u8) });
}
fn set_in_burstmode(burst_mode: bool) {
Self::ch().in_conf0().modify(|_, w| {
w.in_data_burst_en()
@ -626,6 +640,8 @@ pub use m2m::*;
mod m2m {
use embedded_dma::{ReadBuffer, WriteBuffer};
#[cfg(esp32s3)]
use crate::dma::DmaExtMemBKSize;
use crate::dma::{
dma_private::{DmaSupport, DmaSupportRx},
Channel,
@ -751,6 +767,21 @@ mod m2m {
.prepare_transfer_without_start(self.peripheral, &self.rx_chain)?;
self.channel.rx.set_mem2mem_mode(true);
}
#[cfg(esp32s3)]
{
let align = match unsafe { crate::soc::cache_get_dcache_line_size() } {
16 => DmaExtMemBKSize::Size16,
32 => DmaExtMemBKSize::Size32,
64 => DmaExtMemBKSize::Size64,
_ => panic!("unsupported cache line size"),
};
if crate::soc::is_valid_psram_address(tx_ptr as u32) {
self.channel.tx.set_ext_mem_block_size(align);
}
if crate::soc::is_valid_psram_address(rx_ptr as u32) {
self.channel.rx.set_ext_mem_block_size(align);
}
}
self.channel.tx.start_transfer()?;
self.channel.rx.start_transfer()?;
Ok(DmaTransferRx::new(self))

View File

@ -43,6 +43,11 @@
//! ⚠️ Note: Descriptors should be sized as `(max_transfer_size + CHUNK_SIZE - 1) / CHUNK_SIZE`.
//! I.e., to transfer buffers of size `1..=CHUNK_SIZE`, you need 1 descriptor.
//!
//! ⚠️ Note: For chips that support DMA to/from PSRAM (esp32s3) DMA transfers to/from PSRAM
//! have extra alignment requirements. The address and size of the buffer pointed to by
//! each descriptor must be a multiple of the cache line (block) size. This is 32 bytes
//! on esp32s3.
//!
//! For convenience you can use the [crate::dma_buffers] macro.
#![warn(missing_docs)]
@ -66,7 +71,7 @@ impl Debug for DmaDescriptorFlags {
.field("size", &self.size())
.field("length", &self.length())
.field("suc_eof", &self.suc_eof())
.field("owner", &self.owner())
.field("owner", &(if self.owner() { "DMA" } else { "CPU" }))
.finish()
}
}
@ -95,6 +100,11 @@ impl DmaDescriptor {
self.flags.set_length(len as u16)
}
#[allow(unused)]
fn size(&self) -> usize {
self.flags.size() as usize
}
fn len(&self) -> usize {
self.flags.length() as usize
}
@ -567,8 +577,8 @@ impl DescriptorChain {
) -> Result<(), DmaError> {
if !crate::soc::is_valid_ram_address(self.first() as u32)
|| !crate::soc::is_valid_ram_address(self.last() as u32)
|| !crate::soc::is_valid_ram_address(data as u32)
|| !crate::soc::is_valid_ram_address(unsafe { data.add(len) } as u32)
|| !crate::soc::is_valid_memory_address(data as u32)
|| !crate::soc::is_valid_memory_address(unsafe { data.add(len) } as u32)
{
return Err(DmaError::UnsupportedMemoryRegion);
}
@ -639,8 +649,8 @@ impl DescriptorChain {
) -> Result<(), DmaError> {
if !crate::soc::is_valid_ram_address(self.first() as u32)
|| !crate::soc::is_valid_ram_address(self.last() as u32)
|| !crate::soc::is_valid_ram_address(data as u32)
|| !crate::soc::is_valid_ram_address(unsafe { data.add(len) } as u32)
|| !crate::soc::is_valid_memory_address(data as u32)
|| !crate::soc::is_valid_memory_address(unsafe { data.add(len) } as u32)
{
return Err(DmaError::UnsupportedMemoryRegion);
}
@ -707,6 +717,15 @@ impl DescriptorChain {
}
}
/// Block size for transfers to/from psram
#[derive(Copy, Clone, Debug, PartialEq)]
#[allow(missing_docs)]
pub enum DmaExtMemBKSize {
Size16 = 0,
Size32 = 1,
Size64 = 2,
}
pub(crate) struct TxCircularState {
write_offset: usize,
write_descr_ptr: *mut DmaDescriptor,
@ -967,6 +986,9 @@ pub trait RxPrivate: crate::private::Sealed {
fn start_transfer(&mut self) -> Result<(), DmaError>;
#[cfg(esp32s3)]
fn set_ext_mem_block_size(&self, size: DmaExtMemBKSize);
#[cfg(gdma)]
fn set_mem2mem_mode(&mut self, value: bool);
@ -1119,6 +1141,25 @@ where
return Err(DmaError::InvalidAlignment);
}
// for esp32s3 we check each descriptor buffer that points to psram for
// alignment and invalidate the cache for that buffer
// NOTE: for RX the `buffer` and `size` need to be aligned but the `len` does
// not. TRM section 3.4.9
#[cfg(esp32s3)]
for des in chain.descriptors.iter() {
// we are forcing the DMA alignment to the cache line size
// required when we are using dcache
let alignment = crate::soc::cache_get_dcache_line_size() as usize;
if crate::soc::is_valid_psram_address(des.buffer as u32) {
// both the size and address of the buffer must be aligned
if des.buffer as usize % alignment != 0 && des.size() % alignment != 0 {
return Err(DmaError::InvalidAlignment);
}
// TODO: make this optional?
crate::soc::cache_invalidate_addr(des.buffer as u32, des.size() as u32);
}
}
self.rx_impl.prepare_transfer_without_start(chain, peri)
}
@ -1126,6 +1167,11 @@ where
self.rx_impl.start_transfer()
}
#[cfg(esp32s3)]
fn set_ext_mem_block_size(&self, size: DmaExtMemBKSize) {
CH::Channel::set_in_ext_mem_block_size(size);
}
#[cfg(gdma)]
fn set_mem2mem_mode(&mut self, value: bool) {
CH::Channel::set_mem2mem_mode(value);
@ -1244,6 +1290,9 @@ pub trait TxPrivate: crate::private::Sealed {
fn start_transfer(&mut self) -> Result<(), DmaError>;
#[cfg(esp32s3)]
fn set_ext_mem_block_size(&self, size: DmaExtMemBKSize);
fn clear_ch_out_done(&self);
fn is_ch_out_done_set(&self) -> bool;
@ -1403,6 +1452,21 @@ where
peri: DmaPeripheral,
chain: &DescriptorChain,
) -> Result<(), DmaError> {
// for esp32s3 we check each descriptor buffer that points to psram for
// alignment and writeback the cache for that buffer
#[cfg(esp32s3)]
for des in chain.descriptors.iter() {
// we are forcing the DMA alignment to the cache line size
// required when we are using dcache
let alignment = crate::soc::cache_get_dcache_line_size() as usize;
if crate::soc::is_valid_psram_address(des.buffer as u32) {
// both the size and address of the buffer must be aligned
if des.buffer as usize % alignment != 0 && des.size() % alignment != 0 {
return Err(DmaError::InvalidAlignment);
}
crate::soc::cache_writeback_addr(des.buffer as u32, des.size() as u32);
}
}
self.tx_impl.prepare_transfer_without_start(chain, peri)
}
@ -1410,6 +1474,11 @@ where
self.tx_impl.start_transfer()
}
#[cfg(esp32s3)]
fn set_ext_mem_block_size(&self, size: DmaExtMemBKSize) {
CH::Channel::set_out_ext_mem_block_size(size);
}
fn clear_ch_out_done(&self) {
self.tx_impl.clear_ch_out_done();
}
@ -1489,6 +1558,8 @@ pub trait RegisterAccess: crate::private::Sealed {
fn init_channel();
#[cfg(gdma)]
fn set_mem2mem_mode(value: bool);
#[cfg(esp32s3)]
fn set_out_ext_mem_block_size(size: DmaExtMemBKSize);
fn set_out_burstmode(burst_mode: bool);
fn set_out_priority(priority: DmaPriority);
fn clear_out_interrupts();
@ -1507,6 +1578,8 @@ pub trait RegisterAccess: crate::private::Sealed {
fn reset_out_eof_interrupt();
fn last_out_dscr_address() -> usize;
#[cfg(esp32s3)]
fn set_in_ext_mem_block_size(size: DmaExtMemBKSize);
fn set_in_burstmode(burst_mode: bool);
fn set_in_priority(priority: DmaPriority);
fn clear_in_interrupts();

View File

@ -170,3 +170,35 @@ unsafe fn post_init() {
Wdt::<TIMG0, crate::Blocking>::set_wdt_enabled(false);
Wdt::<TIMG1, crate::Blocking>::set_wdt_enabled(false);
}
#[doc(hidden)]
#[link_section = ".rwtext"]
pub unsafe fn cache_writeback_addr(addr: u32, size: u32) {
extern "C" {
fn Cache_WriteBack_Addr(addr: u32, size: u32);
fn Cache_Suspend_DCache_Autoload() -> u32;
fn Cache_Resume_DCache_Autoload(value: u32);
}
// suspend autoload, avoid load cachelines being written back
let autoload = Cache_Suspend_DCache_Autoload();
Cache_WriteBack_Addr(addr, size);
Cache_Resume_DCache_Autoload(autoload);
}
#[doc(hidden)]
#[link_section = ".rwtext"]
pub unsafe fn cache_invalidate_addr(addr: u32, size: u32) {
extern "C" {
fn Cache_Invalidate_Addr(addr: u32, size: u32);
}
Cache_Invalidate_Addr(addr, size);
}
#[doc(hidden)]
#[link_section = ".rwtext"]
pub unsafe fn cache_get_dcache_line_size() -> u32 {
extern "C" {
fn Cache_Get_DCache_Line_Size() -> u32;
}
Cache_Get_DCache_Line_Size()
}

View File

@ -71,3 +71,20 @@ impl self::efuse::Efuse {
pub(crate) fn is_valid_ram_address(address: u32) -> bool {
(self::constants::SOC_DRAM_LOW..=self::constants::SOC_DRAM_HIGH).contains(&address)
}
#[allow(unused)]
pub(crate) fn is_valid_psram_address(address: u32) -> bool {
#[cfg(psram)]
{
let start = crate::psram::psram_vaddr_start() as u32;
let end = start + crate::psram::PSRAM_BYTES as u32;
(start..=end).contains(&address)
}
#[cfg(not(psram))]
false
}
#[allow(unused)]
pub(crate) fn is_valid_memory_address(address: u32) -> bool {
is_valid_ram_address(address) || is_valid_psram_address(address)
}

View File

@ -7,6 +7,7 @@ publish = false
[dependencies]
aes = "0.8.4"
aligned = { version = "0.4.2", optional = true }
bleps = { git = "https://github.com/bjoernQ/bleps", package = "bleps", rev = "a5148d8ae679e021b78f53fd33afb8bb35d0b62e", features = [ "macros", "async"] }
cfg-if = "1.0.0"
critical-section = "1.1.2"

View File

@ -0,0 +1,158 @@
//! Uses DMA to copy psram to internal memory.
//% FEATURES: esp-hal/log opsram-2m aligned
//% CHIPS: esp32s3
#![no_std]
#![no_main]
use aligned::{Aligned, A64};
use esp_backtrace as _;
use esp_hal::{
clock::ClockControl,
delay::Delay,
dma::{Dma, DmaPriority, Mem2Mem},
dma_descriptors_chunk_size,
peripherals::Peripherals,
prelude::*,
system::SystemControl,
};
use log::{error, info};
extern crate alloc;
const DATA_SIZE: usize = 1024 * 10;
const CHUNK_SIZE: usize = 4032; // size is aligned to 64 bytes
macro_rules! dma_buffer_aligned {
($size:expr, $align:ty) => {{
static mut BUFFER: Aligned<$align, [u8; $size]> = Aligned([0; $size]);
unsafe { &mut *BUFFER }
}};
}
macro_rules! dma_alloc_buffer {
($size:expr, $align:expr) => {{
let layout = core::alloc::Layout::from_size_align($size, $align).unwrap();
unsafe {
let ptr = alloc::alloc::alloc(layout);
if ptr.is_null() {
error!("make_buffers: alloc failed");
alloc::alloc::handle_alloc_error(layout);
}
core::slice::from_raw_parts_mut(ptr, $size)
}
}};
}
#[global_allocator]
static ALLOCATOR: esp_alloc::EspHeap = esp_alloc::EspHeap::empty();
fn init_heap(psram: impl esp_hal::peripheral::Peripheral<P = esp_hal::peripherals::PSRAM>) {
esp_hal::psram::init_psram(psram);
info!(
"init_heap: start: 0x{:0x}",
esp_hal::psram::psram_vaddr_start()
);
unsafe {
ALLOCATOR.init(
esp_hal::psram::psram_vaddr_start() as *mut u8,
esp_hal::psram::PSRAM_BYTES,
);
}
}
#[entry]
fn main() -> ! {
esp_println::logger::init_logger(log::LevelFilter::Info);
let peripherals = Peripherals::take();
init_heap(peripherals.PSRAM);
let system = SystemControl::new(peripherals.SYSTEM);
let clocks = ClockControl::boot_defaults(system.clock_control).freeze();
let delay = Delay::new(&clocks);
let mut extram_buffer: &mut [u8] = dma_alloc_buffer!(DATA_SIZE, 64);
let mut intram_buffer = dma_buffer_aligned!(DATA_SIZE, A64);
let (tx_descriptors, rx_descriptors) = dma_descriptors_chunk_size!(DATA_SIZE, CHUNK_SIZE);
let dma = Dma::new(peripherals.DMA);
let channel = dma.channel0.configure(false, DmaPriority::Priority0);
let dma_peripheral = peripherals.SPI2;
let mut mem2mem = Mem2Mem::new_with_chunk_size(
channel,
dma_peripheral,
tx_descriptors,
rx_descriptors,
CHUNK_SIZE,
)
.unwrap();
for i in 0..core::mem::size_of_val(extram_buffer) {
extram_buffer[i] = (i % 256) as u8;
intram_buffer[i] = 255 - extram_buffer[i];
}
info!(" ext2int: Starting transfer of {} bytes", DATA_SIZE);
match mem2mem.start_transfer(&extram_buffer, &mut intram_buffer) {
Ok(dma_wait) => {
info!("Transfer started");
dma_wait.wait().unwrap();
info!("Transfer completed, comparing buffer");
let mut error = false;
for i in 0..core::mem::size_of_val(extram_buffer) {
if intram_buffer[i] != extram_buffer[i] {
error!(
"Error: extram_buffer[{}] = {}, intram_buffer[{}] = {}",
i, extram_buffer[i], i, intram_buffer[i]
);
error = true;
break;
}
}
if !error {
info!("Buffers are equal");
}
info!("Done");
}
Err(e) => {
error!("start_transfer: Error: {:?}", e);
}
}
for i in 0..core::mem::size_of_val(extram_buffer) {
intram_buffer[i] = (i % 256) as u8;
extram_buffer[i] = 255 - intram_buffer[i];
}
info!(" int2ext: Starting transfer of {} bytes", DATA_SIZE);
match mem2mem.start_transfer(&intram_buffer, &mut extram_buffer) {
Ok(dma_wait) => {
info!("Transfer started");
dma_wait.wait().unwrap();
info!("Transfer completed, comparing buffer");
let mut error = false;
for i in 0..core::mem::size_of_val(extram_buffer) {
if intram_buffer[i] != extram_buffer[i] {
error!(
"Error: extram_buffer[{}] = {}, intram_buffer[{}] = {}",
i, extram_buffer[i], i, intram_buffer[i]
);
error = true;
break;
}
}
if !error {
info!("Buffers are equal");
}
info!("Done");
}
Err(e) => {
error!("start_transfer: Error: {:?}", e);
}
}
loop {
delay.delay(2.secs());
}
}