Rollup merge of #81136 - Xavientois:io_reader_size_hint, r=cramertj

Improved IO Bytes Size Hint

After trying to implement better `size_hint()` return values for `File` in [this PR](https://github.com/rust-lang/rust/pull/81044) and changing to implementing it for `BufReader` in [this PR](https://github.com/rust-lang/rust/pull/81052), I have arrived at this implementation that provides tighter bounds for the `Bytes` iterator of various readers including `BufReader`, `Empty`, and `Chain`.

Unfortunately, for `BufReader`, the size_hint only improves after calling `fill_buffer` due to it using the contents of the buffer for the hint. Nevertheless, the the tighter bounds  should result in better pre-allocation of space to handle the contents of the `Bytes` iterator.

Closes #81052
This commit is contained in:
Mara 2021-03-05 10:57:17 +01:00 committed by GitHub
commit 60138110d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 103 additions and 3 deletions

View File

@ -1,6 +1,8 @@
use crate::cmp;
use crate::fmt;
use crate::io::{self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, DEFAULT_BUF_SIZE};
use crate::io::{
self, BufRead, Initializer, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
};
/// The `BufReader<R>` struct adds buffering to any reader.
///
@ -435,3 +437,9 @@ impl<R: Seek> Seek for BufReader<R> {
})
}
}
impl<T> SizeHint for BufReader<T> {
fn lower_bound(&self) -> usize {
self.buffer().len()
}
}

View File

@ -2238,6 +2238,19 @@ impl<T: BufRead, U: BufRead> BufRead for Chain<T, U> {
}
}
impl<T, U> SizeHint for Chain<T, U> {
fn lower_bound(&self) -> usize {
SizeHint::lower_bound(&self.first) + SizeHint::lower_bound(&self.second)
}
fn upper_bound(&self) -> Option<usize> {
match (SizeHint::upper_bound(&self.first), SizeHint::upper_bound(&self.second)) {
(Some(first), Some(second)) => Some(first + second),
_ => None,
}
}
}
/// Reader adaptor which limits the bytes read from an underlying reader.
///
/// This struct is generally created by calling [`take`] on a reader.
@ -2464,6 +2477,30 @@ impl<R: Read> Iterator for Bytes<R> {
};
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
SizeHint::size_hint(&self.inner)
}
}
trait SizeHint {
fn lower_bound(&self) -> usize;
fn upper_bound(&self) -> Option<usize>;
fn size_hint(&self) -> (usize, Option<usize>) {
(self.lower_bound(), self.upper_bound())
}
}
impl<T> SizeHint for T {
default fn lower_bound(&self) -> usize {
0
}
default fn upper_bound(&self) -> Option<usize> {
None
}
}
/// An iterator over the contents of an instance of `BufRead` split on a

View File

@ -1,7 +1,7 @@
use super::{repeat, Cursor, SeekFrom};
use crate::cmp::{self, min};
use crate::io::{self, IoSlice, IoSliceMut};
use crate::io::{BufRead, Read, Seek, Write};
use crate::io::{BufRead, BufReader, Read, Seek, Write};
use crate::ops::Deref;
#[test]
@ -198,6 +198,53 @@ fn chain_bufread() {
cmp_bufread(chain1, chain2, &testdata[..]);
}
#[test]
fn bufreader_size_hint() {
let testdata = b"ABCDEFGHIJKL";
let mut buf_reader = BufReader::new(&testdata[..]);
assert_eq!(buf_reader.buffer().len(), 0);
let buffer_length = testdata.len();
buf_reader.fill_buf().unwrap();
// Check that size hint matches buffer contents
let mut buffered_bytes = buf_reader.bytes();
let (lower_bound, _upper_bound) = buffered_bytes.size_hint();
assert_eq!(lower_bound, buffer_length);
// Check that size hint matches buffer contents after advancing
buffered_bytes.next().unwrap().unwrap();
let (lower_bound, _upper_bound) = buffered_bytes.size_hint();
assert_eq!(lower_bound, buffer_length - 1);
}
#[test]
fn empty_size_hint() {
let size_hint = io::empty().bytes().size_hint();
assert_eq!(size_hint, (0, Some(0)));
}
#[test]
fn chain_empty_size_hint() {
let chain = io::empty().chain(io::empty());
let size_hint = chain.bytes().size_hint();
assert_eq!(size_hint, (0, Some(0)));
}
#[test]
fn chain_size_hint() {
let testdata = b"ABCDEFGHIJKL";
let mut buf_reader_1 = BufReader::new(&testdata[..6]);
let mut buf_reader_2 = BufReader::new(&testdata[6..]);
buf_reader_1.fill_buf().unwrap();
buf_reader_2.fill_buf().unwrap();
let chain = buf_reader_1.chain(buf_reader_2);
let size_hint = chain.bytes().size_hint();
assert_eq!(size_hint, (testdata.len(), None));
}
#[test]
fn chain_zero_length_read_is_not_eof() {
let a = b"A";

View File

@ -4,7 +4,9 @@
mod tests;
use crate::fmt;
use crate::io::{self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write};
use crate::io::{
self, BufRead, Initializer, IoSlice, IoSliceMut, Read, Seek, SeekFrom, SizeHint, Write,
};
/// A reader which is always at EOF.
///
@ -80,6 +82,12 @@ impl fmt::Debug for Empty {
}
}
impl SizeHint for Empty {
fn upper_bound(&self) -> Option<usize> {
Some(0)
}
}
/// A reader which yields one byte over and over and over and over and over and...
///
/// This struct is generally created by calling [`repeat()`]. Please