* Create issue_handler.yml * No longer re-export `embedded-hal`, hide exported macros in documentation * Add simple package-level documentation for each HAL package * Clean up/simplify re-exports * Fix the examples that I broke * Ensure top-level modules/types/functions have doc comments * Update CHANGELOG * Re-export the `soc::psram` module where available --------- Co-authored-by: Sergio Gasquez Arcos <sergio.gasquez@gmail.com>
524 lines
17 KiB
Rust
524 lines
17 KiB
Rust
//! Secure Hash Algorithm peripheral driver
|
||
|
||
use core::convert::Infallible;
|
||
|
||
use crate::{
|
||
peripheral::{Peripheral, PeripheralRef},
|
||
peripherals::SHA,
|
||
system::PeripheralClockControl,
|
||
};
|
||
|
||
// All the hash algorithms introduced in FIPS PUB 180-4 Spec.
|
||
// – SHA-1
|
||
// – SHA-224
|
||
// – SHA-256
|
||
// – SHA-384
|
||
// – SHA-512
|
||
// – SHA-512/224
|
||
// – SHA-512/256
|
||
// – SHA-512/t (not implemented yet)
|
||
// Two working modes
|
||
// – Typical SHA
|
||
// – DMA-SHA (not implemented yet)
|
||
|
||
const ALIGN_SIZE: usize = core::mem::size_of::<u32>();
|
||
|
||
// ESP32 does reversed order
|
||
#[cfg(esp32)]
|
||
const U32_FROM_BYTES: fn([u8; 4]) -> u32 = u32::from_be_bytes;
|
||
|
||
#[cfg(not(esp32))]
|
||
const U32_FROM_BYTES: fn([u8; 4]) -> u32 = u32::from_ne_bytes;
|
||
|
||
// The alignment helper helps you write to registers that only accepts u32 using
|
||
// regular u8s (bytes) It keeps a write buffer of 4 u8 (could in theory be 3 but
|
||
// less convient) And if the incoming data is not convertable to u32 (i.e. not a
|
||
// multiple of 4 in length) it will store the remainder in the buffer until the
|
||
// next call
|
||
//
|
||
// It assumes incoming `dst` are aligned to desired layout (in future
|
||
// ptr.is_aligned can be used) It also assumes that writes are done in FIFO
|
||
// order
|
||
#[derive(Debug)]
|
||
struct AlignmentHelper {
|
||
buf: [u8; ALIGN_SIZE],
|
||
buf_fill: usize,
|
||
}
|
||
|
||
impl AlignmentHelper {
|
||
pub fn default() -> AlignmentHelper {
|
||
AlignmentHelper {
|
||
buf: [0u8; ALIGN_SIZE],
|
||
buf_fill: 0,
|
||
}
|
||
}
|
||
|
||
// This function will write any remaining buffer to dst and return the amount of
|
||
// *bytes* written (0 means no write)
|
||
pub unsafe fn flush_to(&mut self, dst: *mut u32) -> usize {
|
||
if self.buf_fill != 0 {
|
||
for i in self.buf_fill..ALIGN_SIZE {
|
||
self.buf[i] = 0;
|
||
}
|
||
|
||
dst.write_volatile(U32_FROM_BYTES(self.buf));
|
||
}
|
||
|
||
let flushed = self.buf_fill;
|
||
self.buf_fill = 0;
|
||
|
||
return flushed;
|
||
}
|
||
|
||
// This function is similar to `volatile_set_memory` but will prepend data that
|
||
// was previously ingested and ensure aligned (u32) writes
|
||
#[allow(unused)]
|
||
pub unsafe fn volatile_write_bytes(&mut self, dst: *mut u32, val: u8, count: usize) {
|
||
let mut cursor = 0;
|
||
if self.buf_fill != 0 {
|
||
for i in self.buf_fill..ALIGN_SIZE {
|
||
self.buf[i] = val;
|
||
}
|
||
|
||
dst.write_volatile(U32_FROM_BYTES(self.buf));
|
||
cursor = 1;
|
||
|
||
self.buf_fill = 0;
|
||
}
|
||
|
||
core::ptr::write_bytes(dst.add(cursor), val, count);
|
||
}
|
||
|
||
// This function is similar to `volatile_copy_nonoverlapping_memory`, however it
|
||
// buffers up to a u32 in order to always write to registers in an aligned
|
||
// way. Additionally it will keep stop writing when the end of the register
|
||
// (defined by `dst_bound` relative to `dst`) and returns the remaining data
|
||
// (if not possible to write everything), and if it wrote till dst_bound or
|
||
// exited early (due to lack of data).
|
||
pub unsafe fn aligned_volatile_copy<'a>(
|
||
&mut self,
|
||
dst: *mut u32,
|
||
src: &'a [u8],
|
||
dst_bound: usize,
|
||
) -> (&'a [u8], bool) {
|
||
assert!(dst_bound > 0);
|
||
|
||
let mut nsrc = src;
|
||
let mut cursor = 0;
|
||
if self.buf_fill != 0 {
|
||
// First prepend existing data
|
||
let max_fill = ALIGN_SIZE - self.buf_fill;
|
||
let (nbuf, src) = src.split_at(core::cmp::min(src.len(), max_fill));
|
||
nsrc = src;
|
||
for i in 0..max_fill {
|
||
match nbuf.get(i) {
|
||
Some(v) => {
|
||
self.buf[self.buf_fill + i] = *v;
|
||
self.buf_fill += 1;
|
||
}
|
||
None => return (&[], false), // Used up entire buffer before filling buff_fil
|
||
}
|
||
}
|
||
|
||
dst.write_volatile(U32_FROM_BYTES(self.buf));
|
||
cursor += 1;
|
||
|
||
self.buf_fill = 0;
|
||
}
|
||
|
||
if dst_bound <= cursor * ALIGN_SIZE {
|
||
return (nsrc, true);
|
||
}
|
||
|
||
let (to_write, remaining) = nsrc.split_at(core::cmp::min(
|
||
dst_bound - cursor * ALIGN_SIZE,
|
||
(nsrc.len() / ALIGN_SIZE) * ALIGN_SIZE, // TODO: unstable div_floor for clarity?
|
||
));
|
||
|
||
if to_write.len() > 0 {
|
||
// Raw v_c_n_m also works but only when src.len() >= 4 * ALIGN_SIZE, otherwise
|
||
// it be broken
|
||
// core::intrinsics::volatile_copy_nonoverlapping_memory::<u32>(dst.add(cursor),
|
||
// to_write.as_ptr() as *const u32, to_write.len()/alignment);
|
||
for (i, v) in to_write.chunks_exact(ALIGN_SIZE).enumerate() {
|
||
dst.add(i)
|
||
.write_volatile(U32_FROM_BYTES(v.try_into().unwrap()).to_be());
|
||
}
|
||
}
|
||
|
||
// If it's data we can't store we don't need to try and align it, just wait for
|
||
// next write Generally this applies when (src/4*4) != src
|
||
let was_bounded = dst_bound - to_write.len() == 0;
|
||
if remaining.len() > 0 && remaining.len() < 4 {
|
||
for i in 0..remaining.len() {
|
||
self.buf[i] = remaining[i];
|
||
}
|
||
|
||
self.buf_fill = remaining.len();
|
||
|
||
return (&[], was_bounded);
|
||
}
|
||
|
||
return (remaining, was_bounded);
|
||
}
|
||
}
|
||
|
||
pub struct Sha<'d> {
|
||
sha: PeripheralRef<'d, SHA>,
|
||
mode: ShaMode,
|
||
alignment_helper: AlignmentHelper,
|
||
cursor: usize,
|
||
first_run: bool,
|
||
finished: bool,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Copy)]
|
||
pub enum ShaMode {
|
||
SHA1,
|
||
#[cfg(not(esp32))]
|
||
SHA224,
|
||
SHA256,
|
||
#[cfg(any(esp32s2, esp32s3, esp32))]
|
||
SHA384,
|
||
#[cfg(any(esp32s2, esp32s3, esp32))]
|
||
SHA512,
|
||
#[cfg(any(esp32s2, esp32s3))]
|
||
SHA512_224,
|
||
#[cfg(any(esp32s2, esp32s3))]
|
||
SHA512_256,
|
||
// SHA512_(u16) // Max 511
|
||
}
|
||
|
||
// TODO: Maybe make Sha Generic (Sha<Mode>) in order to allow for better
|
||
// compiler optimizations? (Requires complex const generics which isn't stable
|
||
// yet)
|
||
|
||
#[cfg(not(esp32))]
|
||
fn mode_as_bits(mode: ShaMode) -> u8 {
|
||
match mode {
|
||
ShaMode::SHA1 => 0,
|
||
ShaMode::SHA224 => 1,
|
||
ShaMode::SHA256 => 2,
|
||
#[cfg(any(esp32s2, esp32s3))]
|
||
ShaMode::SHA384 => 3,
|
||
#[cfg(any(esp32s2, esp32s3))]
|
||
ShaMode::SHA512 => 4,
|
||
#[cfg(any(esp32s2, esp32s3))]
|
||
ShaMode::SHA512_224 => 5,
|
||
#[cfg(any(esp32s2, esp32s3))]
|
||
ShaMode::SHA512_256 => 6,
|
||
// _ => 0 // TODO: SHA512/t
|
||
}
|
||
}
|
||
|
||
// TODO: Allow/Implemenet SHA512_(u16)
|
||
|
||
// A few notes on this implementation with regards to 'memcpy',
|
||
// - It seems that ptr::write_bytes already acts as volatile, while ptr::copy_*
|
||
// does not (in this case)
|
||
// - The registers are *not* cleared after processing, so padding needs to be
|
||
// written out
|
||
// - This component uses core::intrinsics::volatile_* which is unstable, but is
|
||
// the only way to
|
||
// efficiently copy memory with volatile
|
||
// - For this particular registers (and probably others), a full u32 needs to be
|
||
// written partial
|
||
// register writes (i.e. in u8 mode) does not work
|
||
// - This means that we need to buffer bytes coming in up to 4 u8's in order
|
||
// to create a full u32
|
||
|
||
// This implementation might fail after u32::MAX/8 bytes, to increase please see
|
||
// ::finish() length/self.cursor usage
|
||
impl<'d> Sha<'d> {
|
||
pub fn new(
|
||
sha: impl Peripheral<P = SHA> + 'd,
|
||
mode: ShaMode,
|
||
peripheral_clock_control: &mut PeripheralClockControl,
|
||
) -> Self {
|
||
crate::into_ref!(sha);
|
||
peripheral_clock_control.enable(crate::system::Peripheral::Sha);
|
||
|
||
// Setup SHA Mode
|
||
#[cfg(not(esp32))]
|
||
sha.mode
|
||
.write(|w| unsafe { w.mode().bits(mode_as_bits(mode)) });
|
||
|
||
Self {
|
||
sha,
|
||
mode,
|
||
cursor: 0,
|
||
first_run: true,
|
||
finished: false,
|
||
alignment_helper: AlignmentHelper::default(),
|
||
}
|
||
}
|
||
|
||
pub fn first_run(&self) -> bool {
|
||
self.first_run
|
||
}
|
||
|
||
pub fn finished(&self) -> bool {
|
||
self.finished
|
||
}
|
||
|
||
#[cfg(not(esp32))]
|
||
fn process_buffer(&mut self) {
|
||
// FIXME: SHA_START_REG & SHA_CONTINUE_REG are wrongly marked as RO (they are
|
||
// WO)
|
||
if self.first_run {
|
||
// Set SHA_START_REG
|
||
unsafe {
|
||
self.sha.start.as_ptr().write_volatile(1u32);
|
||
}
|
||
self.first_run = false;
|
||
} else {
|
||
// SET SHA_CONTINUE_REG
|
||
unsafe {
|
||
self.sha.continue_.as_ptr().write_volatile(1u32);
|
||
}
|
||
}
|
||
}
|
||
|
||
#[cfg(esp32)]
|
||
fn process_buffer(&mut self) {
|
||
if self.first_run {
|
||
match self.mode {
|
||
ShaMode::SHA1 => self.sha.sha1_start.write(|w| unsafe { w.bits(1) }),
|
||
ShaMode::SHA256 => self.sha.sha256_start.write(|w| unsafe { w.bits(1) }),
|
||
ShaMode::SHA384 => self.sha.sha384_start.write(|w| unsafe { w.bits(1) }),
|
||
ShaMode::SHA512 => self.sha.sha512_start.write(|w| unsafe { w.bits(1) }),
|
||
}
|
||
self.first_run = false;
|
||
} else {
|
||
match self.mode {
|
||
ShaMode::SHA1 => self.sha.sha1_continue.write(|w| unsafe { w.bits(1) }),
|
||
ShaMode::SHA256 => self.sha.sha256_continue.write(|w| unsafe { w.bits(1) }),
|
||
ShaMode::SHA384 => self.sha.sha384_continue.write(|w| unsafe { w.bits(1) }),
|
||
ShaMode::SHA512 => self.sha.sha512_continue.write(|w| unsafe { w.bits(1) }),
|
||
}
|
||
}
|
||
}
|
||
|
||
fn chunk_length(&self) -> usize {
|
||
return match self.mode {
|
||
ShaMode::SHA1 | ShaMode::SHA256 => 64,
|
||
#[cfg(not(esp32))]
|
||
ShaMode::SHA224 => 64,
|
||
#[cfg(not(any(esp32c2, esp32c3, esp32c6, esp32h2)))]
|
||
_ => 128,
|
||
};
|
||
}
|
||
|
||
#[cfg(esp32)]
|
||
fn is_busy(&self) -> bool {
|
||
match self.mode {
|
||
ShaMode::SHA1 => self.sha.sha1_busy.read().sha1_busy().bit_is_set(),
|
||
ShaMode::SHA256 => self.sha.sha256_busy.read().sha256_busy().bit_is_set(),
|
||
ShaMode::SHA384 => self.sha.sha384_busy.read().sha384_busy().bit_is_set(),
|
||
ShaMode::SHA512 => self.sha.sha512_busy.read().sha512_busy().bit_is_set(),
|
||
}
|
||
}
|
||
|
||
#[cfg(not(esp32))]
|
||
fn is_busy(&self) -> bool {
|
||
self.sha.busy.read().bits() != 0
|
||
}
|
||
|
||
pub fn digest_length(&self) -> usize {
|
||
match self.mode {
|
||
ShaMode::SHA1 => 20,
|
||
#[cfg(not(esp32))]
|
||
ShaMode::SHA224 => 28,
|
||
ShaMode::SHA256 => 32,
|
||
#[cfg(any(esp32, esp32s2, esp32s3))]
|
||
ShaMode::SHA384 => 48,
|
||
#[cfg(any(esp32, esp32s2, esp32s3))]
|
||
ShaMode::SHA512 => 64,
|
||
#[cfg(any(esp32s2, esp32s3))]
|
||
ShaMode::SHA512_224 => 28,
|
||
#[cfg(any(esp32s2, esp32s3))]
|
||
ShaMode::SHA512_256 => 32,
|
||
}
|
||
}
|
||
|
||
#[cfg(not(esp32))]
|
||
fn input_ptr(&self) -> *mut u32 {
|
||
return self.sha.m_mem[0].as_ptr() as *mut u32;
|
||
}
|
||
|
||
#[cfg(esp32)]
|
||
fn input_ptr(&self) -> *mut u32 {
|
||
return self.sha.text[0].as_ptr() as *mut u32;
|
||
}
|
||
|
||
#[cfg(not(esp32))]
|
||
fn output_ptr(&self) -> *const u32 {
|
||
return self.sha.h_mem[0].as_ptr() as *const u32;
|
||
}
|
||
|
||
#[cfg(esp32)]
|
||
fn output_ptr(&self) -> *const u32 {
|
||
return self.sha.text[0].as_ptr() as *const u32;
|
||
}
|
||
|
||
fn flush_data(&mut self) -> nb::Result<(), Infallible> {
|
||
if self.is_busy() {
|
||
return Err(nb::Error::WouldBlock);
|
||
}
|
||
|
||
unsafe {
|
||
let dst_ptr = self
|
||
.input_ptr()
|
||
.add((self.cursor % self.chunk_length()) / ALIGN_SIZE);
|
||
let flushed = self.alignment_helper.flush_to(dst_ptr);
|
||
if flushed != 0 {
|
||
self.cursor = self.cursor.wrapping_add(ALIGN_SIZE - flushed);
|
||
if self.cursor % self.chunk_length() == 0 {
|
||
self.process_buffer();
|
||
}
|
||
}
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
|
||
// This function ensures that incoming data is aligned to u32 (due to issues
|
||
// with cpy_mem<u8>)
|
||
fn write_data<'a>(&mut self, incoming: &'a [u8]) -> nb::Result<&'a [u8], Infallible> {
|
||
let mod_cursor = self.cursor % self.chunk_length();
|
||
|
||
unsafe {
|
||
let ptr = self.input_ptr().add(mod_cursor / ALIGN_SIZE);
|
||
let (remaining, bound_reached) = self.alignment_helper.aligned_volatile_copy(
|
||
ptr,
|
||
incoming,
|
||
self.chunk_length() - mod_cursor,
|
||
);
|
||
self.cursor = self.cursor.wrapping_add(incoming.len() - remaining.len());
|
||
if bound_reached {
|
||
self.process_buffer();
|
||
}
|
||
|
||
Ok(remaining)
|
||
}
|
||
}
|
||
|
||
pub fn update<'a>(&mut self, buffer: &'a [u8]) -> nb::Result<&'a [u8], Infallible> {
|
||
if self.is_busy() {
|
||
return Err(nb::Error::WouldBlock);
|
||
}
|
||
|
||
self.finished = false;
|
||
|
||
let remaining = self.write_data(buffer)?;
|
||
|
||
Ok(remaining)
|
||
}
|
||
|
||
// Finish of the calculation (if not alreaedy) and copy result to output
|
||
// After `finish()` is called `update()`s will contribute to a new hash which
|
||
// can be calculated again with `finish()`.
|
||
//
|
||
// Typically output is expected to be the size of digest_length(), but smaller
|
||
// inputs can be given to get a "short hash"
|
||
pub fn finish(&mut self, output: &mut [u8]) -> nb::Result<(), Infallible> {
|
||
// The main purpose of this function is to dynamically generate padding for the
|
||
// input. Padding: Append "1" bit, Pad zeros until 512/1024 filled
|
||
// then set the message length in the LSB (overwriting the padding)
|
||
// If not enough free space for length+1, add length at end of a new zero'd
|
||
// block
|
||
|
||
if self.is_busy() {
|
||
return Err(nb::Error::WouldBlock);
|
||
}
|
||
|
||
let chunk_len = self.chunk_length();
|
||
|
||
if !self.finished {
|
||
// Store message length for padding
|
||
let length = self.cursor * 8;
|
||
nb::block!(self.update(&[0x80]))?; // Append "1" bit
|
||
nb::block!(self.flush_data())?; // Flush partial data, ensures aligned cursor
|
||
debug_assert!(self.cursor % 4 == 0);
|
||
|
||
let mod_cursor = self.cursor % chunk_len;
|
||
if chunk_len - mod_cursor < chunk_len / 8 {
|
||
// Zero out remaining data if buffer is almost full (>=448/896), and process
|
||
// buffer
|
||
let pad_len = chunk_len - mod_cursor;
|
||
unsafe {
|
||
let m_cursor_ptr = self.input_ptr().add(mod_cursor / ALIGN_SIZE);
|
||
self.alignment_helper.volatile_write_bytes(
|
||
m_cursor_ptr,
|
||
0,
|
||
pad_len / ALIGN_SIZE,
|
||
);
|
||
}
|
||
self.process_buffer();
|
||
self.cursor = self.cursor.wrapping_add(pad_len);
|
||
|
||
// Spin-wait for finish
|
||
while self.is_busy() {}
|
||
}
|
||
|
||
let mod_cursor = self.cursor % chunk_len; // Should be zero if branched above
|
||
unsafe {
|
||
let m_cursor_ptr = self.input_ptr();
|
||
// Pad zeros
|
||
let pad_ptr = m_cursor_ptr.add(mod_cursor / ALIGN_SIZE);
|
||
let pad_len = (chunk_len - mod_cursor) - ALIGN_SIZE;
|
||
|
||
self.alignment_helper
|
||
.volatile_write_bytes(pad_ptr, 0, pad_len / ALIGN_SIZE);
|
||
|
||
// Write length (BE) to end
|
||
// NOTE: aligned_volatile_copy does not work here
|
||
// The decompiler suggest volatile_copy_memory/write_volatile is optimized to a
|
||
// simple *v = *pv; While the aligned_volatile_copy makes an
|
||
// actual call to memcpy, why this makes a difference when
|
||
// memcpy does works in other places, I don't know
|
||
let end_ptr = m_cursor_ptr.add((chunk_len / ALIGN_SIZE) - 1);
|
||
#[cfg(not(esp32))]
|
||
end_ptr.write_volatile(length.to_be() as u32);
|
||
#[cfg(esp32)]
|
||
end_ptr.write_volatile(length.to_le() as u32);
|
||
}
|
||
|
||
self.process_buffer();
|
||
// Spin-wait for final buffer to be processed
|
||
while self.is_busy() {}
|
||
|
||
// ESP32 requires additional load to retrieve output
|
||
#[cfg(esp32)]
|
||
{
|
||
match self.mode {
|
||
ShaMode::SHA1 => unsafe { self.sha.sha1_load.write(|w| w.bits(1)) },
|
||
ShaMode::SHA256 => unsafe { self.sha.sha256_load.write(|w| w.bits(1)) },
|
||
ShaMode::SHA384 => unsafe { self.sha.sha384_load.write(|w| w.bits(1)) },
|
||
ShaMode::SHA512 => unsafe { self.sha.sha512_load.write(|w| w.bits(1)) },
|
||
}
|
||
|
||
// Spin wait for result, 8-20 clock cycles according to manual
|
||
while self.is_busy() {}
|
||
}
|
||
|
||
self.finished = true;
|
||
}
|
||
|
||
unsafe {
|
||
let digest_ptr = self.output_ptr();
|
||
let out_ptr = output.as_mut_ptr() as *mut u32;
|
||
let digest_out = core::cmp::min(self.digest_length(), output.len()) / ALIGN_SIZE;
|
||
for i in 0..digest_out {
|
||
#[cfg(not(esp32))]
|
||
out_ptr.add(i).write(*digest_ptr.add(i));
|
||
// ESP32 does reversed order
|
||
#[cfg(esp32)]
|
||
out_ptr.add(i).write((*digest_ptr.add(i)).to_be());
|
||
}
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
}
|