Add some provided methods to Encoder/Decoder.

The methods for `i8`, `bool`, `char`, `str` are the same for all impls,
because they layered on top of other methods.
This commit is contained in:
Nicholas Nethercote 2023-04-28 09:06:57 +10:00
parent fa133f5354
commit 7a16d25365
2 changed files with 56 additions and 84 deletions

View File

@ -51,13 +51,6 @@ macro_rules! write_leb128 {
}};
}
/// A byte that [cannot occur in UTF8 sequences][utf8]. Used to mark the end of a string.
/// This way we can skip validation and still be relatively sure that deserialization
/// did not desynchronize.
///
/// [utf8]: https://en.wikipedia.org/w/index.php?title=UTF-8&oldid=1058865525#Codepage_layout
const STR_SENTINEL: u8 = 0xC1;
impl Encoder for MemEncoder {
#[inline]
fn emit_usize(&mut self, v: usize) {
@ -114,28 +107,6 @@ impl Encoder for MemEncoder {
self.data.extend_from_slice(&v.to_le_bytes());
}
#[inline]
fn emit_i8(&mut self, v: i8) {
self.emit_u8(v as u8);
}
#[inline]
fn emit_bool(&mut self, v: bool) {
self.emit_u8(if v { 1 } else { 0 });
}
#[inline]
fn emit_char(&mut self, v: char) {
self.emit_u32(v as u32);
}
#[inline]
fn emit_str(&mut self, v: &str) {
self.emit_usize(v.len());
self.emit_raw_bytes(v.as_bytes());
self.emit_u8(STR_SENTINEL);
}
#[inline]
fn emit_raw_bytes(&mut self, s: &[u8]) {
self.data.extend_from_slice(s);
@ -480,28 +451,6 @@ impl Encoder for FileEncoder {
self.write_all(&v.to_le_bytes());
}
#[inline]
fn emit_i8(&mut self, v: i8) {
self.emit_u8(v as u8);
}
#[inline]
fn emit_bool(&mut self, v: bool) {
self.emit_u8(if v { 1 } else { 0 });
}
#[inline]
fn emit_char(&mut self, v: char) {
self.emit_u32(v as u32);
}
#[inline]
fn emit_str(&mut self, v: &str) {
self.emit_usize(v.len());
self.emit_raw_bytes(v.as_bytes());
self.emit_u8(STR_SENTINEL);
}
#[inline]
fn emit_raw_bytes(&mut self, s: &[u8]) {
self.write_all(s);
@ -665,36 +614,11 @@ impl<'a> Decoder for MemDecoder<'a> {
i16::from_le_bytes(self.read_array())
}
#[inline]
fn read_i8(&mut self) -> i8 {
self.read_byte() as i8
}
#[inline]
fn read_isize(&mut self) -> isize {
read_leb128!(self, read_isize_leb128)
}
#[inline]
fn read_bool(&mut self) -> bool {
let value = self.read_u8();
value != 0
}
#[inline]
fn read_char(&mut self) -> char {
let bits = self.read_u32();
std::char::from_u32(bits).unwrap()
}
#[inline]
fn read_str(&mut self) -> &str {
let len = self.read_usize();
let bytes = self.read_raw_bytes(len + 1);
assert!(bytes[len] == STR_SENTINEL);
unsafe { std::str::from_utf8_unchecked(&bytes[..len]) }
}
#[inline]
fn read_raw_bytes(&mut self, bytes: usize) -> &'a [u8] {
if bytes > self.remaining() {

View File

@ -12,6 +12,13 @@ use std::path;
use std::rc::Rc;
use std::sync::Arc;
/// A byte that [cannot occur in UTF8 sequences][utf8]. Used to mark the end of a string.
/// This way we can skip validation and still be relatively sure that deserialization
/// did not desynchronize.
///
/// [utf8]: https://en.wikipedia.org/w/index.php?title=UTF-8&oldid=1058865525#Codepage_layout
const STR_SENTINEL: u8 = 0xC1;
/// A note about error handling.
///
/// Encoders may be fallible, but in practice failure is rare and there are so
@ -40,10 +47,29 @@ pub trait Encoder {
fn emit_i64(&mut self, v: i64);
fn emit_i32(&mut self, v: i32);
fn emit_i16(&mut self, v: i16);
fn emit_i8(&mut self, v: i8);
fn emit_bool(&mut self, v: bool);
fn emit_char(&mut self, v: char);
fn emit_str(&mut self, v: &str);
#[inline]
fn emit_i8(&mut self, v: i8) {
self.emit_u8(v as u8);
}
#[inline]
fn emit_bool(&mut self, v: bool) {
self.emit_u8(if v { 1 } else { 0 });
}
#[inline]
fn emit_char(&mut self, v: char) {
self.emit_u32(v as u32);
}
#[inline]
fn emit_str(&mut self, v: &str) {
self.emit_usize(v.len());
self.emit_raw_bytes(v.as_bytes());
self.emit_u8(STR_SENTINEL);
}
fn emit_raw_bytes(&mut self, s: &[u8]);
fn emit_enum_variant<F>(&mut self, v_id: usize, f: F)
@ -79,10 +105,32 @@ pub trait Decoder {
fn read_i64(&mut self) -> i64;
fn read_i32(&mut self) -> i32;
fn read_i16(&mut self) -> i16;
fn read_i8(&mut self) -> i8;
fn read_bool(&mut self) -> bool;
fn read_char(&mut self) -> char;
fn read_str(&mut self) -> &str;
#[inline]
fn read_i8(&mut self) -> i8 {
self.read_u8() as i8
}
#[inline]
fn read_bool(&mut self) -> bool {
let value = self.read_u8();
value != 0
}
#[inline]
fn read_char(&mut self) -> char {
let bits = self.read_u32();
std::char::from_u32(bits).unwrap()
}
#[inline]
fn read_str(&mut self) -> &str {
let len = self.read_usize();
let bytes = self.read_raw_bytes(len + 1);
assert!(bytes[len] == STR_SENTINEL);
unsafe { std::str::from_utf8_unchecked(&bytes[..len]) }
}
fn read_raw_bytes(&mut self, len: usize) -> &[u8];
// Although there is an `emit_enum_variant` method in `Encoder`, the code