Rollup merge of #130670 - the8472:read-to-end-heuristics, r=ChrisDenton

delay uncapping the max_read_size in File::read_to_end

In https://github.com/rust-lang/rust/issues/130600#issuecomment-2365136985 I realized that we're likely still passing too-large buffers to the OS, at least once at the end.

Previous issues and PRs:
* #110650
* #110655
* #118222

r? ChrisDenton
This commit is contained in:
Guillaume Gomez 2024-09-22 19:19:15 +02:00 committed by GitHub
commit 82b4177395
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -398,8 +398,7 @@ where
// - avoid passing large buffers to readers that always initialize the free capacity if they perform short reads (#23815, #23820)
// - pass large buffers to readers that do not initialize the spare capacity. this can amortize per-call overheads
// - and finally pass not-too-small and not-too-large buffers to Windows read APIs because they manage to suffer from both problems
// at the same time, i.e. small reads suffer from syscall overhead, all reads incur initialization cost
// proportional to buffer size (#110650)
// at the same time, i.e. small reads suffer from syscall overhead, all reads incur costs proportional to buffer size (#110650)
//
pub(crate) fn default_read_to_end<R: Read + ?Sized>(
r: &mut R,
@ -444,6 +443,8 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
}
}
let mut consecutive_short_reads = 0;
loop {
if buf.len() == buf.capacity() && buf.capacity() == start_cap {
// The buffer might be an exact fit. Let's read into a probe buffer
@ -489,6 +490,12 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
return Ok(buf.len() - start_len);
}
if bytes_read < buf_len {
consecutive_short_reads += 1;
} else {
consecutive_short_reads = 0;
}
// store how much was initialized but not filled
initialized = unfilled_but_initialized;
@ -503,7 +510,10 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
// The reader is returning short reads but it doesn't call ensure_init().
// In that case we no longer need to restrict read sizes to avoid
// initialization costs.
if !was_fully_initialized {
// When reading from disk we usually don't get any short reads except at EOF.
// So we wait for at least 2 short reads before uncapping the read buffer;
// this helps with the Windows issue.
if !was_fully_initialized && consecutive_short_reads > 1 {
max_read_size = usize::MAX;
}