PR feedback

This commit is contained in:
Ben Kimock 2023-09-11 18:12:25 -04:00
parent 01e9798148
commit 6cee6b0bde
3 changed files with 56 additions and 36 deletions

View File

@ -394,10 +394,7 @@ struct NodeInfo<K: DepKind> {
impl<K: DepKind> Encodable<FileEncoder> for NodeInfo<K> { impl<K: DepKind> Encodable<FileEncoder> for NodeInfo<K> {
fn encode(&self, e: &mut FileEncoder) { fn encode(&self, e: &mut FileEncoder) {
let header = SerializedNodeHeader::new(self); let header = SerializedNodeHeader::new(self);
e.write_with(|dest| { e.write_array(header.bytes);
*dest = header.bytes;
header.bytes.len()
});
if header.len().is_none() { if header.len().is_none() {
e.emit_usize(self.edges.len()); e.emit_usize(self.edges.len());

View File

@ -22,8 +22,11 @@ const BUF_SIZE: usize = 8192;
/// size of the buffer, rather than the full length of the encoded data, and /// size of the buffer, rather than the full length of the encoded data, and
/// because it doesn't need to reallocate memory along the way. /// because it doesn't need to reallocate memory along the way.
pub struct FileEncoder { pub struct FileEncoder {
/// The input buffer. For adequate performance, we need to be able to write // The input buffer. For adequate performance, we need to be able to write
/// directly to the unwritten region of the buffer, without calling copy_from_slice. // directly to the unwritten region of the buffer, without calling copy_from_slice.
// Note that our buffer is always initialized so that we can do that direct access
// without unsafe code. Users of this type write many more than BUF_SIZE bytes, so the
// initialization is approximately free.
buf: Box<[u8; BUF_SIZE]>, buf: Box<[u8; BUF_SIZE]>,
buffered: usize, buffered: usize,
flushed: usize, flushed: usize,
@ -54,13 +57,12 @@ impl FileEncoder {
#[cold] #[cold]
#[inline(never)] #[inline(never)]
pub fn flush(&mut self) -> &mut [u8; BUF_SIZE] { pub fn flush(&mut self) {
if self.res.is_ok() { if self.res.is_ok() {
self.res = self.file.write_all(&self.buf[..self.buffered]); self.res = self.file.write_all(&self.buf[..self.buffered]);
} }
self.flushed += self.buffered; self.flushed += self.buffered;
self.buffered = 0; self.buffered = 0;
&mut self.buf
} }
pub fn file(&self) -> &File { pub fn file(&self) -> &File {
@ -76,7 +78,8 @@ impl FileEncoder {
#[cold] #[cold]
#[inline(never)] #[inline(never)]
fn write_all_cold_path(&mut self, buf: &[u8]) { fn write_all_cold_path(&mut self, buf: &[u8]) {
if let Some(dest) = self.flush().get_mut(..buf.len()) { self.flush();
if let Some(dest) = self.buf.get_mut(..buf.len()) {
dest.copy_from_slice(buf); dest.copy_from_slice(buf);
self.buffered += buf.len(); self.buffered += buf.len();
} else { } else {
@ -99,13 +102,20 @@ impl FileEncoder {
/// Write up to `N` bytes to this encoder. /// Write up to `N` bytes to this encoder.
/// ///
/// Whenever possible, use this function to do writes whose length has a small and /// This function can be used to avoid the overhead of calling memcpy for writes that
/// compile-time constant upper bound. /// have runtime-variable length, but are small and have a small fixed upper bound.
///
/// This can be used to do in-place encoding as is done for leb128 (without this function
/// we would need to write to a temporary buffer then memcpy into the encoder), and it can
/// also be used to implement the varint scheme we use for rmeta and dep graph encoding,
/// where we only want to encode the first few bytes of an integer. Copying in the whole
/// integer then only advancing the encoder state for the few bytes we care about is more
/// efficient than calling [`FileEncoder::write_all`], because variable-size copies are
/// always lowered to `memcpy`, which has overhead and contains a lot of logic we can bypass
/// with this function. Note that common architectures support fixed-size writes up to 8 bytes
/// with one instruction, so while this does in some sense do wasted work, we come out ahead.
#[inline] #[inline]
pub fn write_with<const N: usize, V>(&mut self, mut visitor: V) pub fn write_with<const N: usize>(&mut self, visitor: impl FnOnce(&mut [u8; N]) -> usize) {
where
V: FnMut(&mut [u8; N]) -> usize,
{
let flush_threshold = const { BUF_SIZE.checked_sub(N).unwrap() }; let flush_threshold = const { BUF_SIZE.checked_sub(N).unwrap() };
if std::intrinsics::unlikely(self.buffered > flush_threshold) { if std::intrinsics::unlikely(self.buffered > flush_threshold) {
self.flush(); self.flush();
@ -115,26 +125,50 @@ impl FileEncoder {
// We produce a post-mono error if N > BUF_SIZE. // We produce a post-mono error if N > BUF_SIZE.
let buf = unsafe { self.buffer_empty().first_chunk_mut::<N>().unwrap_unchecked() }; let buf = unsafe { self.buffer_empty().first_chunk_mut::<N>().unwrap_unchecked() };
let written = visitor(buf); let written = visitor(buf);
debug_assert!(written <= N);
// We have to ensure that an errant visitor cannot cause self.buffered to exeed BUF_SIZE. // We have to ensure that an errant visitor cannot cause self.buffered to exeed BUF_SIZE.
self.buffered += written.min(N); if written > N {
Self::panic_invalid_write::<N>(written);
}
self.buffered += written;
}
#[cold]
#[inline(never)]
fn panic_invalid_write<const N: usize>(written: usize) {
panic!("FileEncoder::write_with::<{N}> cannot be used to write {written} bytes");
}
/// Helper for calls where [`FileEncoder::write_with`] always writes the whole array.
#[inline]
pub fn write_array<const N: usize>(&mut self, buf: [u8; N]) {
self.write_with(|dest| {
*dest = buf;
N
})
} }
pub fn finish(mut self) -> Result<usize, io::Error> { pub fn finish(mut self) -> Result<usize, io::Error> {
self.flush(); self.flush();
match self.res { match std::mem::replace(&mut self.res, Ok(())) {
Ok(()) => Ok(self.position()), Ok(()) => Ok(self.position()),
Err(e) => Err(e), Err(e) => Err(e),
} }
} }
} }
impl Drop for FileEncoder {
fn drop(&mut self) {
// Likely to be a no-op, because `finish` should have been called and
// it also flushes. But do it just in case.
self.flush();
}
}
macro_rules! write_leb128 { macro_rules! write_leb128 {
($this_fn:ident, $int_ty:ty, $write_leb_fn:ident) => { ($this_fn:ident, $int_ty:ty, $write_leb_fn:ident) => {
#[inline] #[inline]
fn $this_fn(&mut self, v: $int_ty) { fn $this_fn(&mut self, v: $int_ty) {
const MAX_ENCODED_LEN: usize = $crate::leb128::max_leb128_len::<$int_ty>(); self.write_with(|buf| leb128::$write_leb_fn(buf, v))
self.write_with::<MAX_ENCODED_LEN, _>(|buf| leb128::$write_leb_fn(buf, v))
} }
}; };
} }
@ -147,18 +181,12 @@ impl Encoder for FileEncoder {
#[inline] #[inline]
fn emit_u16(&mut self, v: u16) { fn emit_u16(&mut self, v: u16) {
self.write_with(|buf| { self.write_array(v.to_le_bytes());
*buf = v.to_le_bytes();
2
});
} }
#[inline] #[inline]
fn emit_u8(&mut self, v: u8) { fn emit_u8(&mut self, v: u8) {
self.write_with(|buf: &mut [u8; 1]| { self.write_array([v]);
buf[0] = v;
1
});
} }
write_leb128!(emit_isize, isize, write_isize_leb128); write_leb128!(emit_isize, isize, write_isize_leb128);
@ -168,10 +196,7 @@ impl Encoder for FileEncoder {
#[inline] #[inline]
fn emit_i16(&mut self, v: i16) { fn emit_i16(&mut self, v: i16) {
self.write_with(|buf| { self.write_array(v.to_le_bytes());
*buf = v.to_le_bytes();
2
});
} }
#[inline] #[inline]
@ -370,10 +395,7 @@ impl Encodable<FileEncoder> for IntEncodedWithFixedSize {
#[inline] #[inline]
fn encode(&self, e: &mut FileEncoder) { fn encode(&self, e: &mut FileEncoder) {
let _start_pos = e.position(); let _start_pos = e.position();
e.write_with(|buf| { e.write_array(self.0.to_le_bytes());
*buf = self.0.to_le_bytes();
buf.len()
});
let _end_pos = e.position(); let _end_pos = e.position();
debug_assert_eq!((_end_pos - _start_pos), IntEncodedWithFixedSize::ENCODED_SIZE); debug_assert_eq!((_end_pos - _start_pos), IntEncodedWithFixedSize::ENCODED_SIZE);
} }

View File

@ -511,6 +511,7 @@ impl Error {
/// let eof_error = Error::from(ErrorKind::UnexpectedEof); /// let eof_error = Error::from(ErrorKind::UnexpectedEof);
/// ``` /// ```
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
#[inline(never)]
pub fn new<E>(kind: ErrorKind, error: E) -> Error pub fn new<E>(kind: ErrorKind, error: E) -> Error
where where
E: Into<Box<dyn error::Error + Send + Sync>>, E: Into<Box<dyn error::Error + Send + Sync>>,