esp-hal/esp-hal-common/src/sha.rs
Jesse Braham 0b26d3851c Clean up re-exports and make small improvements to documentation (#607)
* Create issue_handler.yml

* No longer re-export `embedded-hal`, hide exported macros in documentation

* Add simple package-level documentation for each HAL package

* Clean up/simplify re-exports

* Fix the examples that I broke

* Ensure top-level modules/types/functions have doc comments

* Update CHANGELOG

* Re-export the `soc::psram` module where available

---------

Co-authored-by: Sergio Gasquez Arcos <sergio.gasquez@gmail.com>
2023-07-04 11:47:21 +01:00

524 lines
17 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Secure Hash Algorithm peripheral driver
use core::convert::Infallible;
use crate::{
peripheral::{Peripheral, PeripheralRef},
peripherals::SHA,
system::PeripheralClockControl,
};
// All the hash algorithms introduced in FIPS PUB 180-4 Spec.
// SHA-1
// SHA-224
// SHA-256
// SHA-384
// SHA-512
// SHA-512/224
// SHA-512/256
// SHA-512/t (not implemented yet)
// Two working modes
// Typical SHA
// DMA-SHA (not implemented yet)
const ALIGN_SIZE: usize = core::mem::size_of::<u32>();
// ESP32 does reversed order
#[cfg(esp32)]
const U32_FROM_BYTES: fn([u8; 4]) -> u32 = u32::from_be_bytes;
#[cfg(not(esp32))]
const U32_FROM_BYTES: fn([u8; 4]) -> u32 = u32::from_ne_bytes;
// The alignment helper helps you write to registers that only accepts u32 using
// regular u8s (bytes) It keeps a write buffer of 4 u8 (could in theory be 3 but
// less convient) And if the incoming data is not convertable to u32 (i.e. not a
// multiple of 4 in length) it will store the remainder in the buffer until the
// next call
//
// It assumes incoming `dst` are aligned to desired layout (in future
// ptr.is_aligned can be used) It also assumes that writes are done in FIFO
// order
#[derive(Debug)]
struct AlignmentHelper {
buf: [u8; ALIGN_SIZE],
buf_fill: usize,
}
impl AlignmentHelper {
pub fn default() -> AlignmentHelper {
AlignmentHelper {
buf: [0u8; ALIGN_SIZE],
buf_fill: 0,
}
}
// This function will write any remaining buffer to dst and return the amount of
// *bytes* written (0 means no write)
pub unsafe fn flush_to(&mut self, dst: *mut u32) -> usize {
if self.buf_fill != 0 {
for i in self.buf_fill..ALIGN_SIZE {
self.buf[i] = 0;
}
dst.write_volatile(U32_FROM_BYTES(self.buf));
}
let flushed = self.buf_fill;
self.buf_fill = 0;
return flushed;
}
// This function is similar to `volatile_set_memory` but will prepend data that
// was previously ingested and ensure aligned (u32) writes
#[allow(unused)]
pub unsafe fn volatile_write_bytes(&mut self, dst: *mut u32, val: u8, count: usize) {
let mut cursor = 0;
if self.buf_fill != 0 {
for i in self.buf_fill..ALIGN_SIZE {
self.buf[i] = val;
}
dst.write_volatile(U32_FROM_BYTES(self.buf));
cursor = 1;
self.buf_fill = 0;
}
core::ptr::write_bytes(dst.add(cursor), val, count);
}
// This function is similar to `volatile_copy_nonoverlapping_memory`, however it
// buffers up to a u32 in order to always write to registers in an aligned
// way. Additionally it will keep stop writing when the end of the register
// (defined by `dst_bound` relative to `dst`) and returns the remaining data
// (if not possible to write everything), and if it wrote till dst_bound or
// exited early (due to lack of data).
pub unsafe fn aligned_volatile_copy<'a>(
&mut self,
dst: *mut u32,
src: &'a [u8],
dst_bound: usize,
) -> (&'a [u8], bool) {
assert!(dst_bound > 0);
let mut nsrc = src;
let mut cursor = 0;
if self.buf_fill != 0 {
// First prepend existing data
let max_fill = ALIGN_SIZE - self.buf_fill;
let (nbuf, src) = src.split_at(core::cmp::min(src.len(), max_fill));
nsrc = src;
for i in 0..max_fill {
match nbuf.get(i) {
Some(v) => {
self.buf[self.buf_fill + i] = *v;
self.buf_fill += 1;
}
None => return (&[], false), // Used up entire buffer before filling buff_fil
}
}
dst.write_volatile(U32_FROM_BYTES(self.buf));
cursor += 1;
self.buf_fill = 0;
}
if dst_bound <= cursor * ALIGN_SIZE {
return (nsrc, true);
}
let (to_write, remaining) = nsrc.split_at(core::cmp::min(
dst_bound - cursor * ALIGN_SIZE,
(nsrc.len() / ALIGN_SIZE) * ALIGN_SIZE, // TODO: unstable div_floor for clarity?
));
if to_write.len() > 0 {
// Raw v_c_n_m also works but only when src.len() >= 4 * ALIGN_SIZE, otherwise
// it be broken
// core::intrinsics::volatile_copy_nonoverlapping_memory::<u32>(dst.add(cursor),
// to_write.as_ptr() as *const u32, to_write.len()/alignment);
for (i, v) in to_write.chunks_exact(ALIGN_SIZE).enumerate() {
dst.add(i)
.write_volatile(U32_FROM_BYTES(v.try_into().unwrap()).to_be());
}
}
// If it's data we can't store we don't need to try and align it, just wait for
// next write Generally this applies when (src/4*4) != src
let was_bounded = dst_bound - to_write.len() == 0;
if remaining.len() > 0 && remaining.len() < 4 {
for i in 0..remaining.len() {
self.buf[i] = remaining[i];
}
self.buf_fill = remaining.len();
return (&[], was_bounded);
}
return (remaining, was_bounded);
}
}
pub struct Sha<'d> {
sha: PeripheralRef<'d, SHA>,
mode: ShaMode,
alignment_helper: AlignmentHelper,
cursor: usize,
first_run: bool,
finished: bool,
}
#[derive(Debug, Clone, Copy)]
pub enum ShaMode {
SHA1,
#[cfg(not(esp32))]
SHA224,
SHA256,
#[cfg(any(esp32s2, esp32s3, esp32))]
SHA384,
#[cfg(any(esp32s2, esp32s3, esp32))]
SHA512,
#[cfg(any(esp32s2, esp32s3))]
SHA512_224,
#[cfg(any(esp32s2, esp32s3))]
SHA512_256,
// SHA512_(u16) // Max 511
}
// TODO: Maybe make Sha Generic (Sha<Mode>) in order to allow for better
// compiler optimizations? (Requires complex const generics which isn't stable
// yet)
#[cfg(not(esp32))]
fn mode_as_bits(mode: ShaMode) -> u8 {
match mode {
ShaMode::SHA1 => 0,
ShaMode::SHA224 => 1,
ShaMode::SHA256 => 2,
#[cfg(any(esp32s2, esp32s3))]
ShaMode::SHA384 => 3,
#[cfg(any(esp32s2, esp32s3))]
ShaMode::SHA512 => 4,
#[cfg(any(esp32s2, esp32s3))]
ShaMode::SHA512_224 => 5,
#[cfg(any(esp32s2, esp32s3))]
ShaMode::SHA512_256 => 6,
// _ => 0 // TODO: SHA512/t
}
}
// TODO: Allow/Implemenet SHA512_(u16)
// A few notes on this implementation with regards to 'memcpy',
// - It seems that ptr::write_bytes already acts as volatile, while ptr::copy_*
// does not (in this case)
// - The registers are *not* cleared after processing, so padding needs to be
// written out
// - This component uses core::intrinsics::volatile_* which is unstable, but is
// the only way to
// efficiently copy memory with volatile
// - For this particular registers (and probably others), a full u32 needs to be
// written partial
// register writes (i.e. in u8 mode) does not work
// - This means that we need to buffer bytes coming in up to 4 u8's in order
// to create a full u32
// This implementation might fail after u32::MAX/8 bytes, to increase please see
// ::finish() length/self.cursor usage
impl<'d> Sha<'d> {
pub fn new(
sha: impl Peripheral<P = SHA> + 'd,
mode: ShaMode,
peripheral_clock_control: &mut PeripheralClockControl,
) -> Self {
crate::into_ref!(sha);
peripheral_clock_control.enable(crate::system::Peripheral::Sha);
// Setup SHA Mode
#[cfg(not(esp32))]
sha.mode
.write(|w| unsafe { w.mode().bits(mode_as_bits(mode)) });
Self {
sha,
mode,
cursor: 0,
first_run: true,
finished: false,
alignment_helper: AlignmentHelper::default(),
}
}
pub fn first_run(&self) -> bool {
self.first_run
}
pub fn finished(&self) -> bool {
self.finished
}
#[cfg(not(esp32))]
fn process_buffer(&mut self) {
// FIXME: SHA_START_REG & SHA_CONTINUE_REG are wrongly marked as RO (they are
// WO)
if self.first_run {
// Set SHA_START_REG
unsafe {
self.sha.start.as_ptr().write_volatile(1u32);
}
self.first_run = false;
} else {
// SET SHA_CONTINUE_REG
unsafe {
self.sha.continue_.as_ptr().write_volatile(1u32);
}
}
}
#[cfg(esp32)]
fn process_buffer(&mut self) {
if self.first_run {
match self.mode {
ShaMode::SHA1 => self.sha.sha1_start.write(|w| unsafe { w.bits(1) }),
ShaMode::SHA256 => self.sha.sha256_start.write(|w| unsafe { w.bits(1) }),
ShaMode::SHA384 => self.sha.sha384_start.write(|w| unsafe { w.bits(1) }),
ShaMode::SHA512 => self.sha.sha512_start.write(|w| unsafe { w.bits(1) }),
}
self.first_run = false;
} else {
match self.mode {
ShaMode::SHA1 => self.sha.sha1_continue.write(|w| unsafe { w.bits(1) }),
ShaMode::SHA256 => self.sha.sha256_continue.write(|w| unsafe { w.bits(1) }),
ShaMode::SHA384 => self.sha.sha384_continue.write(|w| unsafe { w.bits(1) }),
ShaMode::SHA512 => self.sha.sha512_continue.write(|w| unsafe { w.bits(1) }),
}
}
}
fn chunk_length(&self) -> usize {
return match self.mode {
ShaMode::SHA1 | ShaMode::SHA256 => 64,
#[cfg(not(esp32))]
ShaMode::SHA224 => 64,
#[cfg(not(any(esp32c2, esp32c3, esp32c6, esp32h2)))]
_ => 128,
};
}
#[cfg(esp32)]
fn is_busy(&self) -> bool {
match self.mode {
ShaMode::SHA1 => self.sha.sha1_busy.read().sha1_busy().bit_is_set(),
ShaMode::SHA256 => self.sha.sha256_busy.read().sha256_busy().bit_is_set(),
ShaMode::SHA384 => self.sha.sha384_busy.read().sha384_busy().bit_is_set(),
ShaMode::SHA512 => self.sha.sha512_busy.read().sha512_busy().bit_is_set(),
}
}
#[cfg(not(esp32))]
fn is_busy(&self) -> bool {
self.sha.busy.read().bits() != 0
}
pub fn digest_length(&self) -> usize {
match self.mode {
ShaMode::SHA1 => 20,
#[cfg(not(esp32))]
ShaMode::SHA224 => 28,
ShaMode::SHA256 => 32,
#[cfg(any(esp32, esp32s2, esp32s3))]
ShaMode::SHA384 => 48,
#[cfg(any(esp32, esp32s2, esp32s3))]
ShaMode::SHA512 => 64,
#[cfg(any(esp32s2, esp32s3))]
ShaMode::SHA512_224 => 28,
#[cfg(any(esp32s2, esp32s3))]
ShaMode::SHA512_256 => 32,
}
}
#[cfg(not(esp32))]
fn input_ptr(&self) -> *mut u32 {
return self.sha.m_mem[0].as_ptr() as *mut u32;
}
#[cfg(esp32)]
fn input_ptr(&self) -> *mut u32 {
return self.sha.text[0].as_ptr() as *mut u32;
}
#[cfg(not(esp32))]
fn output_ptr(&self) -> *const u32 {
return self.sha.h_mem[0].as_ptr() as *const u32;
}
#[cfg(esp32)]
fn output_ptr(&self) -> *const u32 {
return self.sha.text[0].as_ptr() as *const u32;
}
fn flush_data(&mut self) -> nb::Result<(), Infallible> {
if self.is_busy() {
return Err(nb::Error::WouldBlock);
}
unsafe {
let dst_ptr = self
.input_ptr()
.add((self.cursor % self.chunk_length()) / ALIGN_SIZE);
let flushed = self.alignment_helper.flush_to(dst_ptr);
if flushed != 0 {
self.cursor = self.cursor.wrapping_add(ALIGN_SIZE - flushed);
if self.cursor % self.chunk_length() == 0 {
self.process_buffer();
}
}
}
Ok(())
}
// This function ensures that incoming data is aligned to u32 (due to issues
// with cpy_mem<u8>)
fn write_data<'a>(&mut self, incoming: &'a [u8]) -> nb::Result<&'a [u8], Infallible> {
let mod_cursor = self.cursor % self.chunk_length();
unsafe {
let ptr = self.input_ptr().add(mod_cursor / ALIGN_SIZE);
let (remaining, bound_reached) = self.alignment_helper.aligned_volatile_copy(
ptr,
incoming,
self.chunk_length() - mod_cursor,
);
self.cursor = self.cursor.wrapping_add(incoming.len() - remaining.len());
if bound_reached {
self.process_buffer();
}
Ok(remaining)
}
}
pub fn update<'a>(&mut self, buffer: &'a [u8]) -> nb::Result<&'a [u8], Infallible> {
if self.is_busy() {
return Err(nb::Error::WouldBlock);
}
self.finished = false;
let remaining = self.write_data(buffer)?;
Ok(remaining)
}
// Finish of the calculation (if not alreaedy) and copy result to output
// After `finish()` is called `update()`s will contribute to a new hash which
// can be calculated again with `finish()`.
//
// Typically output is expected to be the size of digest_length(), but smaller
// inputs can be given to get a "short hash"
pub fn finish(&mut self, output: &mut [u8]) -> nb::Result<(), Infallible> {
// The main purpose of this function is to dynamically generate padding for the
// input. Padding: Append "1" bit, Pad zeros until 512/1024 filled
// then set the message length in the LSB (overwriting the padding)
// If not enough free space for length+1, add length at end of a new zero'd
// block
if self.is_busy() {
return Err(nb::Error::WouldBlock);
}
let chunk_len = self.chunk_length();
if !self.finished {
// Store message length for padding
let length = self.cursor * 8;
nb::block!(self.update(&[0x80]))?; // Append "1" bit
nb::block!(self.flush_data())?; // Flush partial data, ensures aligned cursor
debug_assert!(self.cursor % 4 == 0);
let mod_cursor = self.cursor % chunk_len;
if chunk_len - mod_cursor < chunk_len / 8 {
// Zero out remaining data if buffer is almost full (>=448/896), and process
// buffer
let pad_len = chunk_len - mod_cursor;
unsafe {
let m_cursor_ptr = self.input_ptr().add(mod_cursor / ALIGN_SIZE);
self.alignment_helper.volatile_write_bytes(
m_cursor_ptr,
0,
pad_len / ALIGN_SIZE,
);
}
self.process_buffer();
self.cursor = self.cursor.wrapping_add(pad_len);
// Spin-wait for finish
while self.is_busy() {}
}
let mod_cursor = self.cursor % chunk_len; // Should be zero if branched above
unsafe {
let m_cursor_ptr = self.input_ptr();
// Pad zeros
let pad_ptr = m_cursor_ptr.add(mod_cursor / ALIGN_SIZE);
let pad_len = (chunk_len - mod_cursor) - ALIGN_SIZE;
self.alignment_helper
.volatile_write_bytes(pad_ptr, 0, pad_len / ALIGN_SIZE);
// Write length (BE) to end
// NOTE: aligned_volatile_copy does not work here
// The decompiler suggest volatile_copy_memory/write_volatile is optimized to a
// simple *v = *pv; While the aligned_volatile_copy makes an
// actual call to memcpy, why this makes a difference when
// memcpy does works in other places, I don't know
let end_ptr = m_cursor_ptr.add((chunk_len / ALIGN_SIZE) - 1);
#[cfg(not(esp32))]
end_ptr.write_volatile(length.to_be() as u32);
#[cfg(esp32)]
end_ptr.write_volatile(length.to_le() as u32);
}
self.process_buffer();
// Spin-wait for final buffer to be processed
while self.is_busy() {}
// ESP32 requires additional load to retrieve output
#[cfg(esp32)]
{
match self.mode {
ShaMode::SHA1 => unsafe { self.sha.sha1_load.write(|w| w.bits(1)) },
ShaMode::SHA256 => unsafe { self.sha.sha256_load.write(|w| w.bits(1)) },
ShaMode::SHA384 => unsafe { self.sha.sha384_load.write(|w| w.bits(1)) },
ShaMode::SHA512 => unsafe { self.sha.sha512_load.write(|w| w.bits(1)) },
}
// Spin wait for result, 8-20 clock cycles according to manual
while self.is_busy() {}
}
self.finished = true;
}
unsafe {
let digest_ptr = self.output_ptr();
let out_ptr = output.as_mut_ptr() as *mut u32;
let digest_out = core::cmp::min(self.digest_length(), output.len()) / ALIGN_SIZE;
for i in 0..digest_out {
#[cfg(not(esp32))]
out_ptr.add(i).write(*digest_ptr.add(i));
// ESP32 does reversed order
#[cfg(esp32)]
out_ptr.add(i).write((*digest_ptr.add(i)).to_be());
}
}
Ok(())
}
}