From 52f21791fbec58e14a54b8c6f9146807a8ae8d84 Mon Sep 17 00:00:00 2001 From: Tyson Nottingham Date: Sun, 6 Dec 2020 17:30:55 -0800 Subject: [PATCH 1/2] Serialize incr comp structures to file via fixed-size buffer Reduce a large memory spike that happens during serialization by writing the incr comp structures to file by way of a fixed-size buffer, rather than an unbounded vector. Effort was made to keep the instruction count close to that of the previous implementation. However, buffered writing to a file inherently has more overhead than writing to a vector, because each write may result in a handleable error. To reduce this overhead, arrangements are made so that each LEB128-encoded integer can be written to the buffer with only one capacity and error check. Higher-level optimizations in which entire composite structures can be written with one capacity and error check are possible, but would require much more work. The performance is mostly on par with the previous implementation, with small to moderate instruction count regressions. The memory reduction is significant, however, so it seems like a worth-while trade-off. --- .../rustc_data_structures/src/fingerprint.rs | 22 +- .../src/persist/file_format.rs | 16 +- .../rustc_incremental/src/persist/save.rs | 70 +-- compiler/rustc_metadata/src/rmeta/encoder.rs | 2 +- compiler/rustc_middle/src/ty/codec.rs | 16 - compiler/rustc_middle/src/ty/context.rs | 6 +- .../src/ty/query/on_disk_cache.rs | 86 ++-- compiler/rustc_serialize/src/leb128.rs | 104 +++-- compiler/rustc_serialize/src/lib.rs | 3 + compiler/rustc_serialize/src/opaque.rs | 411 +++++++++++++++++- compiler/rustc_serialize/tests/leb128.rs | 51 ++- 11 files changed, 611 insertions(+), 176 deletions(-) diff --git a/compiler/rustc_data_structures/src/fingerprint.rs b/compiler/rustc_data_structures/src/fingerprint.rs index 8afe94ac8db..08c3419a842 100644 --- a/compiler/rustc_data_structures/src/fingerprint.rs +++ b/compiler/rustc_data_structures/src/fingerprint.rs @@ -1,6 +1,6 @@ use crate::stable_hasher; use rustc_serialize::{ - opaque::{self, EncodeResult}, + opaque::{self, EncodeResult, FileEncodeResult}, Decodable, Encodable, }; use std::hash::{Hash, Hasher}; @@ -53,13 +53,6 @@ impl Fingerprint { format!("{:x}{:x}", self.0, self.1) } - pub fn encode_opaque(&self, encoder: &mut opaque::Encoder) -> EncodeResult { - let bytes: [u8; 16] = unsafe { mem::transmute([self.0.to_le(), self.1.to_le()]) }; - - encoder.emit_raw_bytes(&bytes); - Ok(()) - } - pub fn decode_opaque(decoder: &mut opaque::Decoder<'_>) -> Result { let mut bytes: [MaybeUninit; 16] = MaybeUninit::uninit_array(); @@ -142,7 +135,16 @@ impl FingerprintEncoder for E { impl FingerprintEncoder for opaque::Encoder { fn encode_fingerprint(&mut self, f: &Fingerprint) -> EncodeResult { - f.encode_opaque(self) + let bytes: [u8; 16] = unsafe { mem::transmute([f.0.to_le(), f.1.to_le()]) }; + self.emit_raw_bytes(&bytes); + Ok(()) + } +} + +impl FingerprintEncoder for opaque::FileEncoder { + fn encode_fingerprint(&mut self, f: &Fingerprint) -> FileEncodeResult { + let bytes: [u8; 16] = unsafe { mem::transmute([f.0.to_le(), f.1.to_le()]) }; + self.emit_raw_bytes(&bytes) } } @@ -198,7 +200,7 @@ impl Encodable for PackedFingerprint { impl Decodable for PackedFingerprint { #[inline] fn decode(d: &mut D) -> Result { - Fingerprint::decode(d).map(|f| PackedFingerprint(f)) + Fingerprint::decode(d).map(PackedFingerprint) } } diff --git a/compiler/rustc_incremental/src/persist/file_format.rs b/compiler/rustc_incremental/src/persist/file_format.rs index c86122f8939..087f83c2475 100644 --- a/compiler/rustc_incremental/src/persist/file_format.rs +++ b/compiler/rustc_incremental/src/persist/file_format.rs @@ -14,7 +14,7 @@ use std::fs; use std::io::{self, Read}; use std::path::Path; -use rustc_serialize::opaque::Encoder; +use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; /// The first few bytes of files generated by incremental compilation. const FILE_MAGIC: &[u8] = b"RSIC"; @@ -27,15 +27,17 @@ const HEADER_FORMAT_VERSION: u16 = 0; /// the Git commit hash. const RUSTC_VERSION: Option<&str> = option_env!("CFG_VERSION"); -pub fn write_file_header(stream: &mut Encoder, nightly_build: bool) { - stream.emit_raw_bytes(FILE_MAGIC); - stream - .emit_raw_bytes(&[(HEADER_FORMAT_VERSION >> 0) as u8, (HEADER_FORMAT_VERSION >> 8) as u8]); +pub fn write_file_header(stream: &mut FileEncoder, nightly_build: bool) -> FileEncodeResult { + stream.emit_raw_bytes(FILE_MAGIC)?; + stream.emit_raw_bytes(&[ + (HEADER_FORMAT_VERSION >> 0) as u8, + (HEADER_FORMAT_VERSION >> 8) as u8, + ])?; let rustc_version = rustc_version(nightly_build); assert_eq!(rustc_version.len(), (rustc_version.len() as u8) as usize); - stream.emit_raw_bytes(&[rustc_version.len() as u8]); - stream.emit_raw_bytes(rustc_version.as_bytes()); + stream.emit_raw_bytes(&[rustc_version.len() as u8])?; + stream.emit_raw_bytes(rustc_version.as_bytes()) } /// Reads the contents of a file with a file header as defined in this module. diff --git a/compiler/rustc_incremental/src/persist/save.rs b/compiler/rustc_incremental/src/persist/save.rs index 2169f5a89e1..f63cdfc5694 100644 --- a/compiler/rustc_incremental/src/persist/save.rs +++ b/compiler/rustc_incremental/src/persist/save.rs @@ -2,7 +2,7 @@ use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::join; use rustc_middle::dep_graph::{DepGraph, DepKind, WorkProduct, WorkProductId}; use rustc_middle::ty::TyCtxt; -use rustc_serialize::opaque::Encoder; +use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; use rustc_serialize::Encodable as RustcEncodable; use rustc_session::Session; use std::fs; @@ -33,12 +33,12 @@ pub fn save_dep_graph(tcx: TyCtxt<'_>) { join( move || { sess.time("incr_comp_persist_result_cache", || { - save_in(sess, query_cache_path, |e| encode_query_cache(tcx, e)); + save_in(sess, query_cache_path, "query cache", |e| encode_query_cache(tcx, e)); }); }, || { sess.time("incr_comp_persist_dep_graph", || { - save_in(sess, dep_graph_path, |e| { + save_in(sess, dep_graph_path, "dependency graph", |e| { sess.time("incr_comp_encode_dep_graph", || encode_dep_graph(tcx, e)) }); }); @@ -65,7 +65,7 @@ pub fn save_work_product_index( debug!("save_work_product_index()"); dep_graph.assert_ignored(); let path = work_products_path(sess); - save_in(sess, path, |e| encode_work_product_index(&new_work_products, e)); + save_in(sess, path, "work product index", |e| encode_work_product_index(&new_work_products, e)); // We also need to clean out old work-products, as not all of them are // deleted during invalidation. Some object files don't change their @@ -92,13 +92,13 @@ pub fn save_work_product_index( }); } -fn save_in(sess: &Session, path_buf: PathBuf, encode: F) +fn save_in(sess: &Session, path_buf: PathBuf, name: &str, encode: F) where - F: FnOnce(&mut Encoder), + F: FnOnce(&mut FileEncoder) -> FileEncodeResult, { debug!("save: storing data in {}", path_buf.display()); - // delete the old dep-graph, if any + // Delete the old file, if any. // Note: It's important that we actually delete the old file and not just // truncate and overwrite it, since it might be a shared hard-link, the // underlying data of which we don't want to modify @@ -109,7 +109,8 @@ where Err(err) if err.kind() == io::ErrorKind::NotFound => (), Err(err) => { sess.err(&format!( - "unable to delete old dep-graph at `{}`: {}", + "unable to delete old {} at `{}`: {}", + name, path_buf.display(), err )); @@ -117,26 +118,35 @@ where } } - // generate the data in a memory buffer - let mut encoder = Encoder::new(Vec::new()); - file_format::write_file_header(&mut encoder, sess.is_nightly_build()); - encode(&mut encoder); - - // write the data out - let data = encoder.into_inner(); - match fs::write(&path_buf, data) { - Ok(_) => { - debug!("save: data written to disk successfully"); - } + let mut encoder = match FileEncoder::new(&path_buf) { + Ok(encoder) => encoder, Err(err) => { - sess.err(&format!("failed to write dep-graph to `{}`: {}", path_buf.display(), err)); + sess.err(&format!("failed to create {} at `{}`: {}", name, path_buf.display(), err)); + return; } + }; + + if let Err(err) = file_format::write_file_header(&mut encoder, sess.is_nightly_build()) { + sess.err(&format!("failed to write {} header to `{}`: {}", name, path_buf.display(), err)); + return; } + + if let Err(err) = encode(&mut encoder) { + sess.err(&format!("failed to write {} to `{}`: {}", name, path_buf.display(), err)); + return; + } + + if let Err(err) = encoder.flush() { + sess.err(&format!("failed to flush {} to `{}`: {}", name, path_buf.display(), err)); + return; + } + + debug!("save: data written to disk successfully"); } -fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) { +fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut FileEncoder) -> FileEncodeResult { // First encode the commandline arguments hash - tcx.sess.opts.dep_tracking_hash().encode(encoder).unwrap(); + tcx.sess.opts.dep_tracking_hash().encode(encoder)?; // Encode the graph data. let serialized_graph = @@ -214,15 +224,13 @@ fn encode_dep_graph(tcx: TyCtxt<'_>, encoder: &mut Encoder) { println!("[incremental]"); } - tcx.sess.time("incr_comp_encode_serialized_dep_graph", || { - serialized_graph.encode(encoder).unwrap(); - }); + tcx.sess.time("incr_comp_encode_serialized_dep_graph", || serialized_graph.encode(encoder)) } fn encode_work_product_index( work_products: &FxHashMap, - encoder: &mut Encoder, -) { + encoder: &mut FileEncoder, +) -> FileEncodeResult { let serialized_products: Vec<_> = work_products .iter() .map(|(id, work_product)| SerializedWorkProduct { @@ -231,11 +239,9 @@ fn encode_work_product_index( }) .collect(); - serialized_products.encode(encoder).unwrap(); + serialized_products.encode(encoder) } -fn encode_query_cache(tcx: TyCtxt<'_>, encoder: &mut Encoder) { - tcx.sess.time("incr_comp_serialize_result_cache", || { - tcx.serialize_query_result_cache(encoder).unwrap(); - }) +fn encode_query_cache(tcx: TyCtxt<'_>, encoder: &mut FileEncoder) -> FileEncodeResult { + tcx.sess.time("incr_comp_serialize_result_cache", || tcx.serialize_query_result_cache(encoder)) } diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index f6ae8275a8f..8abae6924b5 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -308,7 +308,7 @@ impl<'a, 'tcx> Encodable> for Span { impl<'a, 'tcx> FingerprintEncoder for EncodeContext<'a, 'tcx> { fn encode_fingerprint(&mut self, f: &Fingerprint) -> Result<(), Self::Error> { - f.encode_opaque(&mut self.opaque) + self.opaque.encode_fingerprint(f) } } diff --git a/compiler/rustc_middle/src/ty/codec.rs b/compiler/rustc_middle/src/ty/codec.rs index df594690215..9d371503e0a 100644 --- a/compiler/rustc_middle/src/ty/codec.rs +++ b/compiler/rustc_middle/src/ty/codec.rs @@ -50,22 +50,6 @@ impl<'tcx, E: TyEncoder<'tcx>> EncodableWithShorthand<'tcx, E> for ty::Predicate } } -pub trait OpaqueEncoder: Encoder { - fn opaque(&mut self) -> &mut rustc_serialize::opaque::Encoder; - fn encoder_position(&self) -> usize; -} - -impl OpaqueEncoder for rustc_serialize::opaque::Encoder { - #[inline] - fn opaque(&mut self) -> &mut rustc_serialize::opaque::Encoder { - self - } - #[inline] - fn encoder_position(&self) -> usize { - self.position() - } -} - pub trait TyEncoder<'tcx>: Encoder { const CLEAR_CROSS_CRATE: bool; diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index b2db09cbc80..3540f0f06b6 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -47,6 +47,7 @@ use rustc_hir::{ }; use rustc_index::vec::{Idx, IndexVec}; use rustc_macros::HashStable; +use rustc_serialize::opaque::{FileEncodeResult, FileEncoder}; use rustc_session::config::{BorrowckMode, CrateType, OutputFilenames}; use rustc_session::lint::{Level, Lint}; use rustc_session::Session; @@ -1336,10 +1337,7 @@ impl<'tcx> TyCtxt<'tcx> { } } - pub fn serialize_query_result_cache(self, encoder: &mut E) -> Result<(), E::Error> - where - E: ty::codec::OpaqueEncoder, - { + pub fn serialize_query_result_cache(self, encoder: &mut FileEncoder) -> FileEncodeResult { self.queries.on_disk_cache.as_ref().map(|c| c.serialize(self, encoder)).unwrap_or(Ok(())) } diff --git a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs index eb4f1b958be..abe58aacbb1 100644 --- a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs @@ -1,7 +1,7 @@ use crate::dep_graph::{DepNode, DepNodeIndex, SerializedDepNodeIndex}; use crate::mir::interpret::{AllocDecodingSession, AllocDecodingState}; use crate::mir::{self, interpret}; -use crate::ty::codec::{OpaqueEncoder, RefDecodable, TyDecoder, TyEncoder}; +use crate::ty::codec::{RefDecodable, TyDecoder, TyEncoder}; use crate::ty::context::TyCtxt; use crate::ty::{self, Ty}; use rustc_data_structures::fingerprint::{Fingerprint, FingerprintDecoder, FingerprintEncoder}; @@ -14,7 +14,10 @@ use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, LOCAL_CRATE}; use rustc_hir::definitions::DefPathHash; use rustc_hir::definitions::Definitions; use rustc_index::vec::{Idx, IndexVec}; -use rustc_serialize::{opaque, Decodable, Decoder, Encodable, Encoder}; +use rustc_serialize::{ + opaque::{self, FileEncodeResult, FileEncoder}, + Decodable, Decoder, Encodable, Encoder, +}; use rustc_session::{CrateDisambiguator, Session}; use rustc_span::hygiene::{ ExpnDataDecodeMode, ExpnDataEncodeMode, ExpnId, HygieneDecodeContext, HygieneEncodeContext, @@ -241,10 +244,11 @@ impl<'sess> OnDiskCache<'sess> { } } - pub fn serialize<'tcx, E>(&self, tcx: TyCtxt<'tcx>, encoder: &mut E) -> Result<(), E::Error> - where - E: OpaqueEncoder, - { + pub fn serialize<'tcx>( + &self, + tcx: TyCtxt<'tcx>, + encoder: &mut FileEncoder, + ) -> FileEncodeResult { // Serializing the `DepGraph` should not modify it. tcx.dep_graph.with_ignore(|| { // Allocate `SourceFileIndex`es. @@ -298,14 +302,14 @@ impl<'sess> OnDiskCache<'sess> { // Encode query results. let mut query_result_index = EncodedQueryResultIndex::new(); - tcx.sess.time("encode_query_results", || { + tcx.sess.time("encode_query_results", || -> FileEncodeResult { let enc = &mut encoder; let qri = &mut query_result_index; macro_rules! encode_queries { ($($query:ident,)*) => { $( - encode_query_results::, _>( + encode_query_results::>( tcx, enc, qri @@ -324,15 +328,17 @@ impl<'sess> OnDiskCache<'sess> { .current_diagnostics .borrow() .iter() - .map(|(dep_node_index, diagnostics)| { - let pos = AbsoluteBytePos::new(encoder.position()); - // Let's make sure we get the expected type here. - let diagnostics: &EncodedDiagnostics = diagnostics; - let dep_node_index = SerializedDepNodeIndex::new(dep_node_index.index()); - encoder.encode_tagged(dep_node_index, diagnostics)?; + .map( + |(dep_node_index, diagnostics)| -> Result<_, ::Error> { + let pos = AbsoluteBytePos::new(encoder.position()); + // Let's make sure we get the expected type here. + let diagnostics: &EncodedDiagnostics = diagnostics; + let dep_node_index = SerializedDepNodeIndex::new(dep_node_index.index()); + encoder.encode_tagged(dep_node_index, diagnostics)?; - Ok((dep_node_index, pos)) - }) + Ok((dep_node_index, pos)) + }, + ) .collect::>()?; let interpret_alloc_index = { @@ -375,13 +381,13 @@ impl<'sess> OnDiskCache<'sess> { hygiene_encode_context.encode( &mut encoder, - |encoder, index, ctxt_data| { + |encoder, index, ctxt_data| -> FileEncodeResult { let pos = AbsoluteBytePos::new(encoder.position()); encoder.encode_tagged(TAG_SYNTAX_CONTEXT, ctxt_data)?; syntax_contexts.insert(index, pos); Ok(()) }, - |encoder, index, expn_data| { + |encoder, index, expn_data| -> FileEncodeResult { let pos = AbsoluteBytePos::new(encoder.position()); encoder.encode_tagged(TAG_EXPN_DATA, expn_data)?; expn_ids.insert(index, pos); @@ -410,7 +416,7 @@ impl<'sess> OnDiskCache<'sess> { // Encode the position of the footer as the last 8 bytes of the // file so we know where to look for it. - IntEncodedWithFixedSize(footer_pos).encode(encoder.encoder.opaque())?; + IntEncodedWithFixedSize(footer_pos).encode(encoder.encoder)?; // DO NOT WRITE ANYTHING TO THE ENCODER AFTER THIS POINT! The address // of the footer must be the last thing in the data stream. @@ -964,6 +970,17 @@ impl<'a, 'tcx> Decodable> for &'tcx [Span] { //- ENCODING ------------------------------------------------------------------- +trait OpaqueEncoder: Encoder { + fn position(&self) -> usize; +} + +impl OpaqueEncoder for FileEncoder { + #[inline] + fn position(&self) -> usize { + FileEncoder::position(self) + } +} + /// An encoder that can write to the incremental compilation cache. struct CacheEncoder<'a, 'tcx, E: OpaqueEncoder> { tcx: TyCtxt<'tcx>, @@ -1005,9 +1022,9 @@ where } } -impl<'a, 'tcx> FingerprintEncoder for CacheEncoder<'a, 'tcx, rustc_serialize::opaque::Encoder> { - fn encode_fingerprint(&mut self, f: &Fingerprint) -> opaque::EncodeResult { - f.encode_opaque(self.encoder) +impl<'a, 'tcx, E: OpaqueEncoder> FingerprintEncoder for CacheEncoder<'a, 'tcx, E> { + fn encode_fingerprint(&mut self, f: &Fingerprint) -> Result<(), E::Error> { + self.encoder.encode_fingerprint(f) } } @@ -1073,7 +1090,7 @@ where const CLEAR_CROSS_CRATE: bool = false; fn position(&self) -> usize { - self.encoder.encoder_position() + self.encoder.position() } fn type_shorthands(&mut self) -> &mut FxHashMap, usize> { &mut self.type_shorthands @@ -1159,12 +1176,12 @@ where } } -// This ensures that the `Encodable::encode` specialization for byte slices -// is used when a `CacheEncoder` having an `opaque::Encoder` is passed to `Encodable::encode`. +// This ensures that the `Encodable::encode` specialization for byte slices +// is used when a `CacheEncoder` having an `opaque::FileEncoder` is passed to `Encodable::encode`. // Unfortunately, we have to manually opt into specializations this way, given how `CacheEncoder` // and the encoding traits currently work. -impl<'a, 'tcx> Encodable> for [u8] { - fn encode(&self, e: &mut CacheEncoder<'a, 'tcx, opaque::Encoder>) -> opaque::EncodeResult { +impl<'a, 'tcx> Encodable> for [u8] { + fn encode(&self, e: &mut CacheEncoder<'a, 'tcx, FileEncoder>) -> FileEncodeResult { self.encode(e.encoder) } } @@ -1176,8 +1193,8 @@ impl IntEncodedWithFixedSize { pub const ENCODED_SIZE: usize = 8; } -impl Encodable for IntEncodedWithFixedSize { - fn encode(&self, e: &mut opaque::Encoder) -> Result<(), !> { +impl Encodable for IntEncodedWithFixedSize { + fn encode(&self, e: &mut E) -> Result<(), E::Error> { let start_pos = e.position(); for i in 0..IntEncodedWithFixedSize::ENCODED_SIZE { ((self.0 >> (i * 8)) as u8).encode(e)?; @@ -1205,15 +1222,14 @@ impl<'a> Decodable> for IntEncodedWithFixedSize { } } -fn encode_query_results<'a, 'tcx, Q, E>( +fn encode_query_results<'a, 'tcx, Q>( tcx: TyCtxt<'tcx>, - encoder: &mut CacheEncoder<'a, 'tcx, E>, + encoder: &mut CacheEncoder<'a, 'tcx, FileEncoder>, query_result_index: &mut EncodedQueryResultIndex, -) -> Result<(), E::Error> +) -> FileEncodeResult where Q: super::QueryDescription> + super::QueryAccessors>, - Q::Value: Encodable>, - E: 'a + OpaqueEncoder, + Q::Value: Encodable>, { let _timer = tcx .sess @@ -1230,7 +1246,7 @@ where // Record position of the cache entry. query_result_index - .push((dep_node, AbsoluteBytePos::new(encoder.encoder.opaque().position()))); + .push((dep_node, AbsoluteBytePos::new(encoder.encoder.position()))); // Encode the type check tables with the `SerializedDepNodeIndex` // as tag. diff --git a/compiler/rustc_serialize/src/leb128.rs b/compiler/rustc_serialize/src/leb128.rs index 1fe6a309e96..0b13a35bb5c 100644 --- a/compiler/rustc_serialize/src/leb128.rs +++ b/compiler/rustc_serialize/src/leb128.rs @@ -1,16 +1,45 @@ +#![macro_use] + +macro_rules! max_leb128_len { + ($int_ty:ty) => { + // The longest LEB128 encoding for an integer uses 7 bits per byte. + (std::mem::size_of::<$int_ty>() * 8 + 6) / 7 + }; +} + +// Returns the longest LEB128 encoding of all supported integer types. +pub const fn max_leb128_len() -> usize { + max_leb128_len!(u128) +} + macro_rules! impl_write_unsigned_leb128 { - ($fn_name:ident, $int_ty:ident) => { + ($fn_name:ident, $int_ty:ty) => { #[inline] - pub fn $fn_name(out: &mut Vec, mut value: $int_ty) { + pub fn $fn_name( + out: &mut [::std::mem::MaybeUninit; max_leb128_len!($int_ty)], + mut value: $int_ty, + ) -> &[u8] { + let mut i = 0; + loop { if value < 0x80 { - out.push(value as u8); + unsafe { + *out.get_unchecked_mut(i).as_mut_ptr() = value as u8; + } + + i += 1; break; } else { - out.push(((value & 0x7f) | 0x80) as u8); + unsafe { + *out.get_unchecked_mut(i).as_mut_ptr() = ((value & 0x7f) | 0x80) as u8; + } + value >>= 7; + i += 1; } } + + unsafe { ::std::mem::MaybeUninit::slice_assume_init_ref(&out.get_unchecked(..i)) } } }; } @@ -22,7 +51,7 @@ impl_write_unsigned_leb128!(write_u128_leb128, u128); impl_write_unsigned_leb128!(write_usize_leb128, usize); macro_rules! impl_read_unsigned_leb128 { - ($fn_name:ident, $int_ty:ident) => { + ($fn_name:ident, $int_ty:ty) => { #[inline] pub fn $fn_name(slice: &[u8]) -> ($int_ty, usize) { let mut result = 0; @@ -49,39 +78,46 @@ impl_read_unsigned_leb128!(read_u64_leb128, u64); impl_read_unsigned_leb128!(read_u128_leb128, u128); impl_read_unsigned_leb128!(read_usize_leb128, usize); -#[inline] -/// encodes an integer using signed leb128 encoding and stores -/// the result using a callback function. -/// -/// The callback `write` is called once for each position -/// that is to be written to with the byte to be encoded -/// at that position. -pub fn write_signed_leb128_to(mut value: i128, mut write: W) -where - W: FnMut(u8), -{ - loop { - let mut byte = (value as u8) & 0x7f; - value >>= 7; - let more = - !(((value == 0) && ((byte & 0x40) == 0)) || ((value == -1) && ((byte & 0x40) != 0))); +macro_rules! impl_write_signed_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub fn $fn_name( + out: &mut [::std::mem::MaybeUninit; max_leb128_len!($int_ty)], + mut value: $int_ty, + ) -> &[u8] { + let mut i = 0; - if more { - byte |= 0x80; // Mark this byte to show that more bytes will follow. + loop { + let mut byte = (value as u8) & 0x7f; + value >>= 7; + let more = !(((value == 0) && ((byte & 0x40) == 0)) + || ((value == -1) && ((byte & 0x40) != 0))); + + if more { + byte |= 0x80; // Mark this byte to show that more bytes will follow. + } + + unsafe { + *out.get_unchecked_mut(i).as_mut_ptr() = byte; + } + + i += 1; + + if !more { + break; + } + } + + unsafe { ::std::mem::MaybeUninit::slice_assume_init_ref(&out.get_unchecked(..i)) } } - - write(byte); - - if !more { - break; - } - } + }; } -#[inline] -pub fn write_signed_leb128(out: &mut Vec, value: i128) { - write_signed_leb128_to(value, |v| out.push(v)) -} +impl_write_signed_leb128!(write_i16_leb128, i16); +impl_write_signed_leb128!(write_i32_leb128, i32); +impl_write_signed_leb128!(write_i64_leb128, i64); +impl_write_signed_leb128!(write_i128_leb128, i128); +impl_write_signed_leb128!(write_isize_leb128, isize); #[inline] pub fn read_signed_leb128(data: &[u8], start_position: usize) -> (i128, usize) { diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs index f58ed14d997..ea04e7bb44b 100644 --- a/compiler/rustc_serialize/src/lib.rs +++ b/compiler/rustc_serialize/src/lib.rs @@ -16,6 +16,9 @@ Core encoding and decoding interfaces. #![cfg_attr(bootstrap, feature(min_const_generics))] #![feature(min_specialization)] #![feature(vec_spare_capacity)] +#![feature(core_intrinsics)] +#![feature(maybe_uninit_slice)] +#![feature(new_uninit)] #![cfg_attr(test, feature(test))] #![allow(rustc::internal)] diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 673742df7f0..a31b73b4ee9 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -1,7 +1,10 @@ -use crate::leb128::{self, read_signed_leb128, write_signed_leb128}; +use crate::leb128::{self, max_leb128_len, read_signed_leb128}; use crate::serialize; use std::borrow::Cow; +use std::fs::File; +use std::io::{self, Write}; use std::mem::MaybeUninit; +use std::path::Path; use std::ptr; // ----------------------------------------------------------------------------- @@ -23,22 +26,35 @@ impl Encoder { self.data } + #[inline] + pub fn position(&self) -> usize { + self.data.len() + } + #[inline] pub fn emit_raw_bytes(&mut self, s: &[u8]) { self.data.extend_from_slice(s); } } -macro_rules! write_uleb128 { - ($enc:expr, $value:expr, $fun:ident) => {{ - leb128::$fun(&mut $enc.data, $value); - Ok(()) - }}; -} +macro_rules! write_leb128 { + ($enc:expr, $value:expr, $int_ty:ty, $fun:ident) => {{ + const MAX_ENCODED_LEN: usize = max_leb128_len!($int_ty); + let old_len = $enc.data.len(); + + if MAX_ENCODED_LEN > $enc.data.capacity() - old_len { + $enc.data.reserve(MAX_ENCODED_LEN); + } + + // SAFETY: The above check and `reserve` ensures that there is enough + // room to write the encoded value to the vector's internal buffer. + unsafe { + let buf = &mut *($enc.data.as_mut_ptr().add(old_len) + as *mut [MaybeUninit; MAX_ENCODED_LEN]); + let encoded = leb128::$fun(buf, $value); + $enc.data.set_len(old_len + encoded.len()); + } -macro_rules! write_sleb128 { - ($enc:expr, $value:expr) => {{ - write_signed_leb128(&mut $enc.data, $value as i128); Ok(()) }}; } @@ -53,27 +69,27 @@ impl serialize::Encoder for Encoder { #[inline] fn emit_usize(&mut self, v: usize) -> EncodeResult { - write_uleb128!(self, v, write_usize_leb128) + write_leb128!(self, v, usize, write_usize_leb128) } #[inline] fn emit_u128(&mut self, v: u128) -> EncodeResult { - write_uleb128!(self, v, write_u128_leb128) + write_leb128!(self, v, u128, write_u128_leb128) } #[inline] fn emit_u64(&mut self, v: u64) -> EncodeResult { - write_uleb128!(self, v, write_u64_leb128) + write_leb128!(self, v, u64, write_u64_leb128) } #[inline] fn emit_u32(&mut self, v: u32) -> EncodeResult { - write_uleb128!(self, v, write_u32_leb128) + write_leb128!(self, v, u32, write_u32_leb128) } #[inline] fn emit_u16(&mut self, v: u16) -> EncodeResult { - write_uleb128!(self, v, write_u16_leb128) + write_leb128!(self, v, u16, write_u16_leb128) } #[inline] @@ -84,27 +100,27 @@ impl serialize::Encoder for Encoder { #[inline] fn emit_isize(&mut self, v: isize) -> EncodeResult { - write_sleb128!(self, v) + write_leb128!(self, v, isize, write_isize_leb128) } #[inline] fn emit_i128(&mut self, v: i128) -> EncodeResult { - write_sleb128!(self, v) + write_leb128!(self, v, i128, write_i128_leb128) } #[inline] fn emit_i64(&mut self, v: i64) -> EncodeResult { - write_sleb128!(self, v) + write_leb128!(self, v, i64, write_i64_leb128) } #[inline] fn emit_i32(&mut self, v: i32) -> EncodeResult { - write_sleb128!(self, v) + write_leb128!(self, v, i32, write_i32_leb128) } #[inline] fn emit_i16(&mut self, v: i16) -> EncodeResult { - write_sleb128!(self, v) + write_leb128!(self, v, i16, write_i16_leb128) } #[inline] @@ -143,10 +159,354 @@ impl serialize::Encoder for Encoder { } } -impl Encoder { +pub type FileEncodeResult = Result<(), io::Error>; + +// `FileEncoder` encodes data to file via fixed-size buffer. +// +// When encoding large amounts of data to a file, using `FileEncoder` may be +// preferred over using `Encoder` to encode to a `Vec`, and then writing the +// `Vec` to file, as the latter uses as much memory as there is encoded data, +// while the former uses the fixed amount of memory allocated to the buffer. +// `FileEncoder` also has the advantage of not needing to reallocate as data +// is appended to it, but the disadvantage of requiring more error handling, +// which has some runtime overhead. +pub struct FileEncoder { + // The input buffer. For adequate performance, we need more control over + // buffering than `BufWriter` offers. If `BufWriter` ever offers a raw + // buffer access API, we can use it, and remove `buf` and `buffered`. + buf: Box<[MaybeUninit]>, + buffered: usize, + flushed: usize, + file: File, +} + +impl FileEncoder { + pub fn new>(path: P) -> io::Result { + const DEFAULT_BUF_SIZE: usize = 8192; + FileEncoder::with_capacity(path, DEFAULT_BUF_SIZE) + } + + pub fn with_capacity>(path: P, capacity: usize) -> io::Result { + // Require capacity at least as large as the largest LEB128 encoding + // here, so that we don't have to check or handle this on every write. + assert!(capacity >= max_leb128_len()); + + // Require capacity small enough such that some capacity checks can be + // done using guaranteed non-overflowing add rather than sub, which + // shaves an instruction off those code paths (on x86 at least). + assert!(capacity <= usize::MAX - max_leb128_len()); + + let file = File::create(path)?; + + Ok(FileEncoder { buf: Box::new_uninit_slice(capacity), buffered: 0, flushed: 0, file }) + } + #[inline] pub fn position(&self) -> usize { - self.data.len() + // Tracking position this way instead of having a `self.position` field + // means that we don't have to update the position on every write call. + self.flushed + self.buffered + } + + #[inline] + pub fn emit_raw_bytes(&mut self, s: &[u8]) -> FileEncodeResult { + self.write_all(s) + } + + pub fn flush(&mut self) -> FileEncodeResult { + // This is basically a copy of `BufWriter::flush`. If `BufWriter` ever + // offers a raw buffer access API, we can use it, and remove this. + + /// Helper struct to ensure the buffer is updated after all the writes + /// are complete. It tracks the number of written bytes and drains them + /// all from the front of the buffer when dropped. + struct BufGuard<'a> { + buffer: &'a mut [u8], + encoder_buffered: &'a mut usize, + encoder_flushed: &'a mut usize, + flushed: usize, + } + + impl<'a> BufGuard<'a> { + fn new( + buffer: &'a mut [u8], + encoder_buffered: &'a mut usize, + encoder_flushed: &'a mut usize, + ) -> Self { + assert_eq!(buffer.len(), *encoder_buffered); + Self { buffer, encoder_buffered, encoder_flushed, flushed: 0 } + } + + /// The unwritten part of the buffer + fn remaining(&self) -> &[u8] { + &self.buffer[self.flushed..] + } + + /// Flag some bytes as removed from the front of the buffer + fn consume(&mut self, amt: usize) { + self.flushed += amt; + } + + /// true if all of the bytes have been written + fn done(&self) -> bool { + self.flushed >= *self.encoder_buffered + } + } + + impl Drop for BufGuard<'_> { + fn drop(&mut self) { + if self.flushed > 0 { + if self.done() { + *self.encoder_flushed += *self.encoder_buffered; + *self.encoder_buffered = 0; + } else { + self.buffer.copy_within(self.flushed.., 0); + *self.encoder_flushed += self.flushed; + *self.encoder_buffered -= self.flushed; + } + } + } + } + + let mut guard = BufGuard::new( + unsafe { MaybeUninit::slice_assume_init_mut(&mut self.buf[..self.buffered]) }, + &mut self.buffered, + &mut self.flushed, + ); + + while !guard.done() { + match self.file.write(guard.remaining()) { + Ok(0) => { + return Err(io::Error::new( + io::ErrorKind::WriteZero, + "failed to write the buffered data", + )); + } + Ok(n) => guard.consume(n), + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => return Err(e), + } + } + + Ok(()) + } + + #[inline] + fn capacity(&self) -> usize { + self.buf.len() + } + + #[inline] + fn write_one(&mut self, value: u8) -> FileEncodeResult { + // We ensure this during `FileEncoder` construction. + debug_assert!(self.capacity() >= 1); + + let mut buffered = self.buffered; + + if std::intrinsics::unlikely(buffered >= self.capacity()) { + self.flush()?; + buffered = 0; + } + + // SAFETY: The above check and `flush` ensures that there is enough + // room to write the input to the buffer. + unsafe { + *MaybeUninit::slice_as_mut_ptr(&mut self.buf).add(buffered) = value; + } + + self.buffered = buffered + 1; + + Ok(()) + } + + #[inline] + fn write_all(&mut self, buf: &[u8]) -> FileEncodeResult { + let capacity = self.capacity(); + let buf_len = buf.len(); + + if std::intrinsics::likely(buf_len <= capacity) { + let mut buffered = self.buffered; + + if std::intrinsics::unlikely(buf_len > capacity - buffered) { + self.flush()?; + buffered = 0; + } + + // SAFETY: The above check and `flush` ensures that there is enough + // room to write the input to the buffer. + unsafe { + let src = buf.as_ptr(); + let dst = MaybeUninit::slice_as_mut_ptr(&mut self.buf).add(buffered); + ptr::copy_nonoverlapping(src, dst, buf_len); + } + + self.buffered = buffered + buf_len; + + Ok(()) + } else { + self.write_all_unbuffered(buf) + } + } + + fn write_all_unbuffered(&mut self, mut buf: &[u8]) -> FileEncodeResult { + if self.buffered > 0 { + self.flush()?; + } + + // This is basically a copy of `Write::write_all` but also updates our + // `self.flushed`. It's necessary because `Write::write_all` does not + // return the number of bytes written when an error is encountered, and + // without that, we cannot accurately update `self.flushed` on error. + while !buf.is_empty() { + match self.file.write(buf) { + Ok(0) => { + return Err(io::Error::new( + io::ErrorKind::WriteZero, + "failed to write whole buffer", + )); + } + Ok(n) => { + buf = &buf[n..]; + self.flushed += n; + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => return Err(e), + } + } + + Ok(()) + } +} + +impl Drop for FileEncoder { + fn drop(&mut self) { + let _result = self.flush(); + } +} + +macro_rules! file_encoder_write_leb128 { + ($enc:expr, $value:expr, $int_ty:ty, $fun:ident) => {{ + const MAX_ENCODED_LEN: usize = max_leb128_len!($int_ty); + + // We ensure this during `FileEncoder` construction. + debug_assert!($enc.capacity() >= MAX_ENCODED_LEN); + + let mut buffered = $enc.buffered; + + // This can't overflow. See assertion in `FileEncoder::with_capacity`. + if std::intrinsics::unlikely(buffered + MAX_ENCODED_LEN > $enc.capacity()) { + $enc.flush()?; + buffered = 0; + } + + // SAFETY: The above check and flush ensures that there is enough + // room to write the encoded value to the buffer. + let buf = unsafe { + &mut *($enc.buf.as_mut_ptr().add(buffered) as *mut [MaybeUninit; MAX_ENCODED_LEN]) + }; + + let encoded = leb128::$fun(buf, $value); + $enc.buffered = buffered + encoded.len(); + + Ok(()) + }}; +} + +impl serialize::Encoder for FileEncoder { + type Error = io::Error; + + #[inline] + fn emit_unit(&mut self) -> FileEncodeResult { + Ok(()) + } + + #[inline] + fn emit_usize(&mut self, v: usize) -> FileEncodeResult { + file_encoder_write_leb128!(self, v, usize, write_usize_leb128) + } + + #[inline] + fn emit_u128(&mut self, v: u128) -> FileEncodeResult { + file_encoder_write_leb128!(self, v, u128, write_u128_leb128) + } + + #[inline] + fn emit_u64(&mut self, v: u64) -> FileEncodeResult { + file_encoder_write_leb128!(self, v, u64, write_u64_leb128) + } + + #[inline] + fn emit_u32(&mut self, v: u32) -> FileEncodeResult { + file_encoder_write_leb128!(self, v, u32, write_u32_leb128) + } + + #[inline] + fn emit_u16(&mut self, v: u16) -> FileEncodeResult { + file_encoder_write_leb128!(self, v, u16, write_u16_leb128) + } + + #[inline] + fn emit_u8(&mut self, v: u8) -> FileEncodeResult { + self.write_one(v) + } + + #[inline] + fn emit_isize(&mut self, v: isize) -> FileEncodeResult { + file_encoder_write_leb128!(self, v, isize, write_isize_leb128) + } + + #[inline] + fn emit_i128(&mut self, v: i128) -> FileEncodeResult { + file_encoder_write_leb128!(self, v, i128, write_i128_leb128) + } + + #[inline] + fn emit_i64(&mut self, v: i64) -> FileEncodeResult { + file_encoder_write_leb128!(self, v, i64, write_i64_leb128) + } + + #[inline] + fn emit_i32(&mut self, v: i32) -> FileEncodeResult { + file_encoder_write_leb128!(self, v, i32, write_i32_leb128) + } + + #[inline] + fn emit_i16(&mut self, v: i16) -> FileEncodeResult { + file_encoder_write_leb128!(self, v, i16, write_i16_leb128) + } + + #[inline] + fn emit_i8(&mut self, v: i8) -> FileEncodeResult { + let as_u8: u8 = unsafe { std::mem::transmute(v) }; + self.emit_u8(as_u8) + } + + #[inline] + fn emit_bool(&mut self, v: bool) -> FileEncodeResult { + self.emit_u8(if v { 1 } else { 0 }) + } + + #[inline] + fn emit_f64(&mut self, v: f64) -> FileEncodeResult { + let as_u64: u64 = v.to_bits(); + self.emit_u64(as_u64) + } + + #[inline] + fn emit_f32(&mut self, v: f32) -> FileEncodeResult { + let as_u32: u32 = v.to_bits(); + self.emit_u32(as_u32) + } + + #[inline] + fn emit_char(&mut self, v: char) -> FileEncodeResult { + self.emit_u32(v as u32) + } + + #[inline] + fn emit_str(&mut self, v: &str) -> FileEncodeResult { + self.emit_usize(v.len())?; + self.emit_raw_bytes(v.as_bytes()) } } @@ -342,6 +702,13 @@ impl serialize::Encodable for [u8] { } } +impl serialize::Encodable for [u8] { + fn encode(&self, e: &mut FileEncoder) -> FileEncodeResult { + serialize::Encoder::emit_usize(e, self.len())?; + e.emit_raw_bytes(self) + } +} + // Specialize decoding `Vec`. This specialization also applies to decoding `Box<[u8]>`s, etc., // since the default implementations call `decode` to produce a `Vec` internally. impl<'a> serialize::Decodable> for Vec { diff --git a/compiler/rustc_serialize/tests/leb128.rs b/compiler/rustc_serialize/tests/leb128.rs index b0f7e785b78..45afcc256ed 100644 --- a/compiler/rustc_serialize/tests/leb128.rs +++ b/compiler/rustc_serialize/tests/leb128.rs @@ -1,4 +1,8 @@ +#![feature(maybe_uninit_slice)] +#![feature(maybe_uninit_uninit_array)] + use rustc_serialize::leb128::*; +use std::mem::MaybeUninit; macro_rules! impl_test_unsigned_leb128 { ($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => { @@ -7,7 +11,8 @@ macro_rules! impl_test_unsigned_leb128 { let mut stream = Vec::new(); for x in 0..62 { - $write_fn_name(&mut stream, (3u64 << x) as $int_ty); + let mut buf = MaybeUninit::uninit_array(); + stream.extend($write_fn_name(&mut buf, (3u64 << x) as $int_ty)); } let mut position = 0; @@ -28,18 +33,34 @@ impl_test_unsigned_leb128!(test_u64_leb128, write_u64_leb128, read_u64_leb128, u impl_test_unsigned_leb128!(test_u128_leb128, write_u128_leb128, read_u128_leb128, u128); impl_test_unsigned_leb128!(test_usize_leb128, write_usize_leb128, read_usize_leb128, usize); -#[test] -fn test_signed_leb128() { - let values: Vec<_> = (-500..500).map(|i| i * 0x12345789ABCDEF).collect(); - let mut stream = Vec::new(); - for &x in &values { - write_signed_leb128(&mut stream, x); - } - let mut pos = 0; - for &x in &values { - let (value, bytes_read) = read_signed_leb128(&mut stream, pos); - pos += bytes_read; - assert_eq!(x, value); - } - assert_eq!(pos, stream.len()); +macro_rules! impl_test_signed_leb128 { + ($test_name:ident, $write_fn_name:ident, $int_ty:ident) => { + #[test] + fn $test_name() { + let values: Vec<_> = (-500..500) + .map(|i| ((i as $int_ty).wrapping_mul(0x12345789ABCDEF_i64 as $int_ty))) + .collect(); + let mut stream = Vec::new(); + + for &x in &values { + let mut buf = MaybeUninit::uninit_array(); + stream.extend($write_fn_name(&mut buf, x)); + } + + let mut position = 0; + for &x in &values { + let expected = x as i128; + let (actual, bytes_read) = read_signed_leb128(&mut stream, position); + assert_eq!(expected, actual); + position += bytes_read; + } + assert_eq!(stream.len(), position); + } + }; } + +impl_test_signed_leb128!(test_i16_leb128, write_i16_leb128, i16); +impl_test_signed_leb128!(test_i32_leb128, write_i32_leb128, i32); +impl_test_signed_leb128!(test_i64_leb128, write_i64_leb128, i64); +impl_test_signed_leb128!(test_i128_leb128, write_i128_leb128, i128); +impl_test_signed_leb128!(test_isize_leb128, write_isize_leb128, isize); From f15fae822e3b1d2218699976219aecc26187dc9a Mon Sep 17 00:00:00 2001 From: Tyson Nottingham Date: Tue, 29 Dec 2020 15:14:33 -0800 Subject: [PATCH 2/2] rustc_serialize: fix incorrect signed LEB128 decoding The signed LEB128 decoding function used a hardcoded constant of 64 instead of the number of bits in the type of integer being decoded, which resulted in incorrect results for some inputs. Fix this, make the decoding more consistent with the unsigned version, and increase the LEB128 encoding and decoding test coverage. --- compiler/rustc_serialize/src/leb128.rs | 52 +++++++++++--------- compiler/rustc_serialize/src/lib.rs | 1 + compiler/rustc_serialize/src/opaque.rs | 32 +++++-------- compiler/rustc_serialize/tests/leb128.rs | 60 +++++++++++++++++------- 4 files changed, 88 insertions(+), 57 deletions(-) diff --git a/compiler/rustc_serialize/src/leb128.rs b/compiler/rustc_serialize/src/leb128.rs index 0b13a35bb5c..ea2df80e641 100644 --- a/compiler/rustc_serialize/src/leb128.rs +++ b/compiler/rustc_serialize/src/leb128.rs @@ -119,28 +119,38 @@ impl_write_signed_leb128!(write_i64_leb128, i64); impl_write_signed_leb128!(write_i128_leb128, i128); impl_write_signed_leb128!(write_isize_leb128, isize); -#[inline] -pub fn read_signed_leb128(data: &[u8], start_position: usize) -> (i128, usize) { - let mut result = 0; - let mut shift = 0; - let mut position = start_position; - let mut byte; +macro_rules! impl_read_signed_leb128 { + ($fn_name:ident, $int_ty:ty) => { + #[inline] + pub fn $fn_name(slice: &[u8]) -> ($int_ty, usize) { + let mut result = 0; + let mut shift = 0; + let mut position = 0; + let mut byte; - loop { - byte = data[position]; - position += 1; - result |= i128::from(byte & 0x7F) << shift; - shift += 7; + loop { + byte = slice[position]; + position += 1; + result |= <$int_ty>::from(byte & 0x7F) << shift; + shift += 7; - if (byte & 0x80) == 0 { - break; + if (byte & 0x80) == 0 { + break; + } + } + + if (shift < <$int_ty>::BITS) && ((byte & 0x40) != 0) { + // sign extend + result |= (!0 << shift); + } + + (result, position) } - } - - if (shift < 64) && ((byte & 0x40) != 0) { - // sign extend - result |= -(1 << shift); - } - - (result, position - start_position) + }; } + +impl_read_signed_leb128!(read_i16_leb128, i16); +impl_read_signed_leb128!(read_i32_leb128, i32); +impl_read_signed_leb128!(read_i64_leb128, i64); +impl_read_signed_leb128!(read_i128_leb128, i128); +impl_read_signed_leb128!(read_isize_leb128, isize); diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs index ea04e7bb44b..53c3adcc20c 100644 --- a/compiler/rustc_serialize/src/lib.rs +++ b/compiler/rustc_serialize/src/lib.rs @@ -17,6 +17,7 @@ Core encoding and decoding interfaces. #![feature(min_specialization)] #![feature(vec_spare_capacity)] #![feature(core_intrinsics)] +#![feature(int_bits_const)] #![feature(maybe_uninit_slice)] #![feature(new_uninit)] #![cfg_attr(test, feature(test))] diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index a31b73b4ee9..3e37fc87ce6 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -1,4 +1,4 @@ -use crate::leb128::{self, max_leb128_len, read_signed_leb128}; +use crate::leb128::{self, max_leb128_len}; use crate::serialize; use std::borrow::Cow; use std::fs::File; @@ -561,7 +561,7 @@ impl<'a> Decoder<'a> { } } -macro_rules! read_uleb128 { +macro_rules! read_leb128 { ($dec:expr, $fun:ident) => {{ let (value, bytes_read) = leb128::$fun(&$dec.data[$dec.position..]); $dec.position += bytes_read; @@ -569,14 +569,6 @@ macro_rules! read_uleb128 { }}; } -macro_rules! read_sleb128 { - ($dec:expr, $t:ty) => {{ - let (value, bytes_read) = read_signed_leb128($dec.data, $dec.position); - $dec.position += bytes_read; - Ok(value as $t) - }}; -} - impl<'a> serialize::Decoder for Decoder<'a> { type Error = String; @@ -587,22 +579,22 @@ impl<'a> serialize::Decoder for Decoder<'a> { #[inline] fn read_u128(&mut self) -> Result { - read_uleb128!(self, read_u128_leb128) + read_leb128!(self, read_u128_leb128) } #[inline] fn read_u64(&mut self) -> Result { - read_uleb128!(self, read_u64_leb128) + read_leb128!(self, read_u64_leb128) } #[inline] fn read_u32(&mut self) -> Result { - read_uleb128!(self, read_u32_leb128) + read_leb128!(self, read_u32_leb128) } #[inline] fn read_u16(&mut self) -> Result { - read_uleb128!(self, read_u16_leb128) + read_leb128!(self, read_u16_leb128) } #[inline] @@ -614,27 +606,27 @@ impl<'a> serialize::Decoder for Decoder<'a> { #[inline] fn read_usize(&mut self) -> Result { - read_uleb128!(self, read_usize_leb128) + read_leb128!(self, read_usize_leb128) } #[inline] fn read_i128(&mut self) -> Result { - read_sleb128!(self, i128) + read_leb128!(self, read_i128_leb128) } #[inline] fn read_i64(&mut self) -> Result { - read_sleb128!(self, i64) + read_leb128!(self, read_i64_leb128) } #[inline] fn read_i32(&mut self) -> Result { - read_sleb128!(self, i32) + read_leb128!(self, read_i32_leb128) } #[inline] fn read_i16(&mut self) -> Result { - read_sleb128!(self, i16) + read_leb128!(self, read_i16_leb128) } #[inline] @@ -646,7 +638,7 @@ impl<'a> serialize::Decoder for Decoder<'a> { #[inline] fn read_isize(&mut self) -> Result { - read_sleb128!(self, isize) + read_leb128!(self, read_isize_leb128) } #[inline] diff --git a/compiler/rustc_serialize/tests/leb128.rs b/compiler/rustc_serialize/tests/leb128.rs index 45afcc256ed..a2bcf2c251d 100644 --- a/compiler/rustc_serialize/tests/leb128.rs +++ b/compiler/rustc_serialize/tests/leb128.rs @@ -1,3 +1,4 @@ +#![feature(int_bits_const)] #![feature(maybe_uninit_slice)] #![feature(maybe_uninit_uninit_array)] @@ -8,16 +9,28 @@ macro_rules! impl_test_unsigned_leb128 { ($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => { #[test] fn $test_name() { + // Test 256 evenly spaced values of integer range, + // integer max value, and some "random" numbers. + let mut values = Vec::new(); + + let increment = (1 as $int_ty) << ($int_ty::BITS - 8); + values.extend((0..256).map(|i| $int_ty::MIN + i * increment)); + + values.push($int_ty::MAX); + + values.extend( + (-500..500).map(|i| (i as $int_ty).wrapping_mul(0x12345789ABCDEFu64 as $int_ty)), + ); + let mut stream = Vec::new(); - for x in 0..62 { + for &x in &values { let mut buf = MaybeUninit::uninit_array(); - stream.extend($write_fn_name(&mut buf, (3u64 << x) as $int_ty)); + stream.extend($write_fn_name(&mut buf, x)); } let mut position = 0; - for x in 0..62 { - let expected = (3u64 << x) as $int_ty; + for &expected in &values { let (actual, bytes_read) = $read_fn_name(&stream[position..]); assert_eq!(expected, actual); position += bytes_read; @@ -34,12 +47,28 @@ impl_test_unsigned_leb128!(test_u128_leb128, write_u128_leb128, read_u128_leb128 impl_test_unsigned_leb128!(test_usize_leb128, write_usize_leb128, read_usize_leb128, usize); macro_rules! impl_test_signed_leb128 { - ($test_name:ident, $write_fn_name:ident, $int_ty:ident) => { + ($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => { #[test] fn $test_name() { - let values: Vec<_> = (-500..500) - .map(|i| ((i as $int_ty).wrapping_mul(0x12345789ABCDEF_i64 as $int_ty))) - .collect(); + // Test 256 evenly spaced values of integer range, + // integer max value, and some "random" numbers. + let mut values = Vec::new(); + + let mut value = $int_ty::MIN; + let increment = (1 as $int_ty) << ($int_ty::BITS - 8); + + for _ in 0..256 { + values.push(value); + // The addition in the last loop iteration overflows. + value = value.wrapping_add(increment); + } + + values.push($int_ty::MAX); + + values.extend( + (-500..500).map(|i| (i as $int_ty).wrapping_mul(0x12345789ABCDEFi64 as $int_ty)), + ); + let mut stream = Vec::new(); for &x in &values { @@ -48,9 +77,8 @@ macro_rules! impl_test_signed_leb128 { } let mut position = 0; - for &x in &values { - let expected = x as i128; - let (actual, bytes_read) = read_signed_leb128(&mut stream, position); + for &expected in &values { + let (actual, bytes_read) = $read_fn_name(&stream[position..]); assert_eq!(expected, actual); position += bytes_read; } @@ -59,8 +87,8 @@ macro_rules! impl_test_signed_leb128 { }; } -impl_test_signed_leb128!(test_i16_leb128, write_i16_leb128, i16); -impl_test_signed_leb128!(test_i32_leb128, write_i32_leb128, i32); -impl_test_signed_leb128!(test_i64_leb128, write_i64_leb128, i64); -impl_test_signed_leb128!(test_i128_leb128, write_i128_leb128, i128); -impl_test_signed_leb128!(test_isize_leb128, write_isize_leb128, isize); +impl_test_signed_leb128!(test_i16_leb128, write_i16_leb128, read_i16_leb128, i16); +impl_test_signed_leb128!(test_i32_leb128, write_i32_leb128, read_i32_leb128, i32); +impl_test_signed_leb128!(test_i64_leb128, write_i64_leb128, read_i64_leb128, i64); +impl_test_signed_leb128!(test_i128_leb128, write_i128_leb128, read_i128_leb128, i128); +impl_test_signed_leb128!(test_isize_leb128, write_isize_leb128, read_isize_leb128, isize);