mirror of
https://github.com/rust-lang/rust.git
synced 2025-04-28 02:57:37 +00:00
reduce syscalls by inferring FD types based on source struct instead of calling stat()
also adds handling for edge-cases involving large sparse files where sendfile could fail with EOVERFLOW
This commit is contained in:
parent
0624730d9e
commit
46e7fbe60b
@ -99,6 +99,7 @@ mod kernel_copy {
|
||||
use crate::os::unix::fs::FileTypeExt;
|
||||
use crate::os::unix::io::{AsRawFd, FromRawFd, RawFd};
|
||||
use crate::process::{ChildStderr, ChildStdin, ChildStdout};
|
||||
use crate::sys::fs::{copy_regular_files, sendfile_splice, CopyResult, SpliceMode};
|
||||
|
||||
pub(super) fn copy_spec<R: Read + ?Sized, W: Write + ?Sized>(
|
||||
read: &mut R,
|
||||
@ -108,20 +109,55 @@ mod kernel_copy {
|
||||
SpecCopy::copy(copier)
|
||||
}
|
||||
|
||||
/// This type represents either the inferred `FileType` of a `RawFd` based on the source
|
||||
/// type from which it was extracted or the actual metadata
|
||||
///
|
||||
/// The methods on this type only provide hints, due to `AsRawFd` and `FromRawFd` the inferred
|
||||
/// type may be wrong.
|
||||
enum FdMeta {
|
||||
/// We obtained the FD from a type that can contain any type of `FileType` and queried the metadata
|
||||
/// because it is cheaper than probing all possible syscalls (reader side)
|
||||
Metadata(Metadata),
|
||||
Socket,
|
||||
Pipe,
|
||||
None,
|
||||
/// We don't have any metadata, e.g. because the original type was `File` which can represent
|
||||
/// any `FileType` and we did not query the metadata either since it did not seem beneficial
|
||||
/// (writer side)
|
||||
NoneObtained,
|
||||
}
|
||||
|
||||
impl FdMeta {
|
||||
fn is_fifo(&self) -> bool {
|
||||
fn maybe_fifo(&self) -> bool {
|
||||
match self {
|
||||
FdMeta::Metadata(meta) => meta.file_type().is_fifo(),
|
||||
FdMeta::Socket => false,
|
||||
FdMeta::Pipe => true,
|
||||
FdMeta::None => false,
|
||||
FdMeta::NoneObtained => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn potential_sendfile_source(&self) -> bool {
|
||||
match self {
|
||||
// procfs erronously shows 0 length on non-empty readable files.
|
||||
// and if a file is truly empty then a `read` syscall will determine that and skip the write syscall
|
||||
// thus there would be benefit from attempting sendfile
|
||||
FdMeta::Metadata(meta)
|
||||
if meta.file_type().is_file() && meta.len() > 0
|
||||
|| meta.file_type().is_block_device() =>
|
||||
{
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn copy_file_range_candidate(&self) -> bool {
|
||||
match self {
|
||||
// copy_file_range will fail on empty procfs files. `read` can determine whether EOF has been reached
|
||||
// without extra cost and skip the write, thus there is no benefit in attempting copy_file_range
|
||||
FdMeta::Metadata(meta) if meta.is_file() && meta.len() > 0 => true,
|
||||
FdMeta::NoneObtained => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -149,66 +185,65 @@ mod kernel_copy {
|
||||
let r_cfg = reader.properties();
|
||||
let w_cfg = writer.properties();
|
||||
|
||||
// before direct operations on file descriptors ensure that all source and sink buffers are emtpy
|
||||
// before direct operations on file descriptors ensure that all source and sink buffers are emtpy
|
||||
let mut flush = || -> crate::io::Result<u64> {
|
||||
let bytes = reader.drain_to(writer, u64::MAX)?;
|
||||
// BufWriter buffered bytes have already been accounted for in earlier write() calls
|
||||
writer.flush()?;
|
||||
Ok(bytes)
|
||||
};
|
||||
|
||||
match (r_cfg, w_cfg) {
|
||||
(
|
||||
CopyParams(FdMeta::Metadata(reader_meta), Some(readfd)),
|
||||
CopyParams(FdMeta::Metadata(writer_meta), Some(writefd)),
|
||||
) if reader_meta.is_file() && writer_meta.is_file() => {
|
||||
let bytes_flushed = flush()?;
|
||||
let max_write = reader.min_limit();
|
||||
let (mut reader, mut writer) =
|
||||
unsafe { (fd_as_file(readfd), fd_as_file(writefd)) };
|
||||
let len = reader_meta.len();
|
||||
crate::sys::fs::copy_regular_files(
|
||||
&mut reader,
|
||||
&mut writer,
|
||||
min(len, max_write),
|
||||
)
|
||||
.map(|bytes_copied| bytes_copied + bytes_flushed)
|
||||
}
|
||||
(
|
||||
CopyParams(FdMeta::Metadata(reader_meta), Some(readfd)),
|
||||
CopyParams(_, Some(writefd)),
|
||||
) if reader_meta.is_file() => {
|
||||
// try sendfile, most modern systems it should work with any target as long as the source is a mmapable file.
|
||||
// in the rare cases where it's no supported the wrapper function will fall back to a normal copy loop
|
||||
let bytes_flushed = flush()?;
|
||||
let (mut reader, mut writer) =
|
||||
unsafe { (fd_as_file(readfd), fd_as_file(writefd)) };
|
||||
let len = reader_meta.len();
|
||||
let max_write = reader.min_limit();
|
||||
crate::sys::fs::sendfile_splice(
|
||||
crate::sys::fs::SpliceMode::Sendfile,
|
||||
&mut reader,
|
||||
&mut writer,
|
||||
min(len, max_write),
|
||||
)
|
||||
.map(|bytes_sent| bytes_sent + bytes_flushed)
|
||||
}
|
||||
(CopyParams(reader_meta, Some(readfd)), CopyParams(writer_meta, Some(writefd)))
|
||||
if reader_meta.is_fifo() || writer_meta.is_fifo() =>
|
||||
let mut written = 0u64;
|
||||
|
||||
if let (CopyParams(input_meta, Some(readfd)), CopyParams(output_meta, Some(writefd))) =
|
||||
(r_cfg, w_cfg)
|
||||
{
|
||||
written += flush()?;
|
||||
let max_write = reader.min_limit();
|
||||
|
||||
if input_meta.copy_file_range_candidate() && output_meta.copy_file_range_candidate()
|
||||
{
|
||||
// splice
|
||||
let bytes_flushed = flush()?;
|
||||
let max_write = reader.min_limit();
|
||||
let (mut reader, mut writer) =
|
||||
unsafe { (fd_as_file(readfd), fd_as_file(writefd)) };
|
||||
crate::sys::fs::sendfile_splice(
|
||||
crate::sys::fs::SpliceMode::Splice,
|
||||
&mut reader,
|
||||
&mut writer,
|
||||
max_write,
|
||||
)
|
||||
.map(|bytes_sent| bytes_sent + bytes_flushed)
|
||||
let result = copy_regular_files(readfd, writefd, max_write);
|
||||
|
||||
match result {
|
||||
CopyResult::Ended(Ok(bytes_copied)) => return Ok(bytes_copied + written),
|
||||
CopyResult::Ended(err) => return err,
|
||||
CopyResult::Fallback(bytes) => written += bytes,
|
||||
}
|
||||
}
|
||||
_ => super::generic_copy(reader, writer),
|
||||
|
||||
// on modern kernels sendfile can copy from any mmapable type (some but not all regular files and block devices)
|
||||
// to any writable file descriptor. On older kernels the writer side can only be a socket.
|
||||
// So we just try and fallback if needed.
|
||||
// If current file offsets + write sizes overflow it may also fail, we do not try to fix that and instead
|
||||
// fall back to the generic copy loop.
|
||||
if input_meta.potential_sendfile_source() {
|
||||
let result = sendfile_splice(SpliceMode::Sendfile, readfd, writefd, max_write);
|
||||
|
||||
match result {
|
||||
CopyResult::Ended(Ok(bytes_copied)) => return Ok(bytes_copied + written),
|
||||
CopyResult::Ended(err) => return err,
|
||||
CopyResult::Fallback(bytes) => written += bytes,
|
||||
}
|
||||
}
|
||||
|
||||
if input_meta.maybe_fifo() || output_meta.maybe_fifo() {
|
||||
let result = sendfile_splice(SpliceMode::Splice, readfd, writefd, max_write);
|
||||
|
||||
match result {
|
||||
CopyResult::Ended(Ok(bytes_copied)) => return Ok(bytes_copied + written),
|
||||
CopyResult::Ended(err) => return err,
|
||||
CopyResult::Fallback(0) => { /* use fallback */ }
|
||||
CopyResult::Fallback(_) => {
|
||||
unreachable!("splice should not return > 0 bytes on the fallback path")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match super::generic_copy(reader, writer) {
|
||||
Ok(bytes) => Ok(bytes + written),
|
||||
err => err,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -235,7 +270,10 @@ mod kernel_copy {
|
||||
fn properties(&self) -> CopyParams;
|
||||
}
|
||||
|
||||
impl<T> CopyRead for &mut T where T: CopyRead {
|
||||
impl<T> CopyRead for &mut T
|
||||
where
|
||||
T: CopyRead,
|
||||
{
|
||||
fn drain_to<W: Write>(&mut self, writer: &mut W, limit: u64) -> Result<u64> {
|
||||
(**self).drain_to(writer, limit)
|
||||
}
|
||||
@ -249,13 +287,15 @@ mod kernel_copy {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> CopyWrite for &mut T where T: CopyWrite {
|
||||
impl<T> CopyWrite for &mut T
|
||||
where
|
||||
T: CopyWrite,
|
||||
{
|
||||
fn properties(&self) -> CopyParams {
|
||||
(**self).properties()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl CopyRead for File {
|
||||
fn properties(&self) -> CopyParams {
|
||||
CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
|
||||
@ -270,13 +310,13 @@ mod kernel_copy {
|
||||
|
||||
impl CopyWrite for File {
|
||||
fn properties(&self) -> CopyParams {
|
||||
CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
|
||||
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
|
||||
}
|
||||
}
|
||||
|
||||
impl CopyWrite for &File {
|
||||
fn properties(&self) -> CopyParams {
|
||||
CopyParams(fd_to_meta(*self), Some(self.as_raw_fd()))
|
||||
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
|
||||
}
|
||||
}
|
||||
|
||||
@ -345,13 +385,13 @@ mod kernel_copy {
|
||||
|
||||
impl CopyWrite for StdoutLock<'_> {
|
||||
fn properties(&self) -> CopyParams {
|
||||
CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
|
||||
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
|
||||
}
|
||||
}
|
||||
|
||||
impl CopyWrite for StderrLock<'_> {
|
||||
fn properties(&self) -> CopyParams {
|
||||
CopyParams(fd_to_meta(self), Some(self.as_raw_fd()))
|
||||
CopyParams(FdMeta::NoneObtained, Some(self.as_raw_fd()))
|
||||
}
|
||||
}
|
||||
|
||||
@ -411,11 +451,7 @@ mod kernel_copy {
|
||||
let file: ManuallyDrop<File> = ManuallyDrop::new(unsafe { File::from_raw_fd(fd) });
|
||||
match file.metadata() {
|
||||
Ok(meta) => FdMeta::Metadata(meta),
|
||||
Err(_) => FdMeta::None,
|
||||
Err(_) => FdMeta::NoneObtained,
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn fd_as_file(fd: RawFd) -> ManuallyDrop<File> {
|
||||
ManuallyDrop::new(File::from_raw_fd(fd))
|
||||
}
|
||||
}
|
||||
|
@ -1195,17 +1195,26 @@ pub fn copy(from: &Path, to: &Path) -> io::Result<u64> {
|
||||
let max_len = u64::MAX;
|
||||
let (mut writer, _) = open_to_and_set_permissions(to, reader_metadata)?;
|
||||
|
||||
copy_regular_files(&mut reader, &mut writer, max_len)
|
||||
return match copy_regular_files(reader.as_raw_fd(), writer.as_raw_fd(), max_len) {
|
||||
CopyResult::Ended(result) => result,
|
||||
CopyResult::Fallback(written) => {
|
||||
// fallback is only > 0 on EOVERFLOW, which shouldn't happen
|
||||
// because the copy loop starts at a file offset 0 and countns down from `len`
|
||||
assert_eq!(0, written);
|
||||
io::copy::generic_copy(&mut reader, &mut writer)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// linux-specific implementation that will attempt to use copy_file_range for copy offloading
|
||||
/// as the name says, it only works on regular files
|
||||
///
|
||||
/// Callers must handle fallback to a generic copy loop.
|
||||
/// `Fallback` may indicate non-zero number of bytes already written
|
||||
/// if one of the files' cursor +`max_len` would exceed u64::MAX (`EOVERFLOW`).
|
||||
/// If the initial file offset was 0 then `Fallback` will only contain `0`.
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
pub(crate) fn copy_regular_files(
|
||||
reader: &mut crate::fs::File,
|
||||
writer: &mut crate::fs::File,
|
||||
max_len: u64,
|
||||
) -> io::Result<u64> {
|
||||
pub(crate) fn copy_regular_files(reader: RawFd, writer: RawFd, max_len: u64) -> CopyResult {
|
||||
use crate::cmp;
|
||||
use crate::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
@ -1228,14 +1237,18 @@ pub(crate) fn copy_regular_files(
|
||||
let mut written = 0u64;
|
||||
while written < max_len {
|
||||
let copy_result = if has_copy_file_range {
|
||||
let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64) as usize;
|
||||
let bytes_to_copy = cmp::min(max_len - written, usize::MAX as u64);
|
||||
// cap to 2GB chunks in case u64::MAX is passed in as file size and the file has a non-zero offset
|
||||
// this allows us to copy large chunks without hitting the limit,
|
||||
// unless someone sets a file offset close to u64::MAX - 2GB, in which case the fallback would kick in
|
||||
let bytes_to_copy = cmp::min(bytes_to_copy as usize, 0x8000_0000usize);
|
||||
let copy_result = unsafe {
|
||||
// We actually don't have to adjust the offsets,
|
||||
// because copy_file_range adjusts the file offset automatically
|
||||
cvt(copy_file_range(
|
||||
reader.as_raw_fd(),
|
||||
reader,
|
||||
ptr::null_mut(),
|
||||
writer.as_raw_fd(),
|
||||
writer,
|
||||
ptr::null_mut(),
|
||||
bytes_to_copy,
|
||||
0,
|
||||
@ -1260,12 +1273,14 @@ pub(crate) fn copy_regular_files(
|
||||
// - reading virtual files from the proc filesystem which appear to have 0 size
|
||||
// but are not empty. noted in coreutils to affect kernels at least up to 5.6.19.
|
||||
// - copying from an overlay filesystem in docker. reported to occur on fedora 32.
|
||||
return io::copy(reader, writer);
|
||||
return CopyResult::Fallback(0);
|
||||
}
|
||||
Ok(0) => return Ok(written), // reached EOF
|
||||
Ok(0) => return CopyResult::Ended(Ok(written)), // reached EOF
|
||||
Ok(ret) => written += ret as u64,
|
||||
Err(err) => {
|
||||
match err.raw_os_error() {
|
||||
// when file offset + max_length > u64::MAX
|
||||
Some(libc::EOVERFLOW) => return CopyResult::Fallback(written),
|
||||
Some(
|
||||
libc::ENOSYS | libc::EXDEV | libc::EINVAL | libc::EPERM | libc::EOPNOTSUPP,
|
||||
) => {
|
||||
@ -1276,43 +1291,55 @@ pub(crate) fn copy_regular_files(
|
||||
// - copy_file_range is disallowed, for example by seccomp (EPERM)
|
||||
// - copy_file_range cannot be used with pipes or device nodes (EINVAL)
|
||||
assert_eq!(written, 0);
|
||||
return io::copy::generic_copy(reader, writer);
|
||||
return CopyResult::Fallback(0);
|
||||
}
|
||||
_ => return Err(err),
|
||||
_ => return CopyResult::Ended(Err(err)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(written)
|
||||
CopyResult::Ended(Ok(written))
|
||||
}
|
||||
|
||||
#[derive(PartialEq)]
|
||||
pub(crate) enum SpliceMode {
|
||||
Sendfile,
|
||||
Splice,
|
||||
}
|
||||
|
||||
pub(crate) enum CopyResult {
|
||||
Ended(io::Result<u64>),
|
||||
Fallback(u64),
|
||||
}
|
||||
|
||||
/// performs splice or sendfile between file descriptors
|
||||
/// Does _not_ fall back to a generic copy loop.
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
pub(crate) fn sendfile_splice(
|
||||
mode: SpliceMode,
|
||||
reader: &mut crate::fs::File,
|
||||
writer: &mut crate::fs::File,
|
||||
reader: RawFd,
|
||||
writer: RawFd,
|
||||
len: u64,
|
||||
) -> io::Result<u64> {
|
||||
) -> CopyResult {
|
||||
let mut written = 0u64;
|
||||
while written < len {
|
||||
let chunk_size = crate::cmp::min(len - written, 0x7ffff000_u64) as usize;
|
||||
|
||||
let result = match mode {
|
||||
SpliceMode::Sendfile => cvt(unsafe {
|
||||
libc::sendfile(writer.as_raw_fd(), reader.as_raw_fd(), ptr::null_mut(), chunk_size)
|
||||
}),
|
||||
SpliceMode::Sendfile => {
|
||||
cvt(unsafe { libc::sendfile(writer, reader, ptr::null_mut(), chunk_size) })
|
||||
}
|
||||
SpliceMode::Splice => cvt(unsafe {
|
||||
libc::splice(
|
||||
reader.as_raw_fd(),
|
||||
reader,
|
||||
ptr::null_mut(),
|
||||
writer.as_raw_fd(),
|
||||
writer,
|
||||
ptr::null_mut(),
|
||||
// default pipe size is 64KiB. try to only fill/drain half of that capacity
|
||||
// so that the next loop iteration won't be put to sleep.
|
||||
// If reader and writer operate at the same pace they will experience fewer blocking waits.
|
||||
// This is only needed for splice since sendfile stays in kernel space when it has to block.
|
||||
//crate::cmp::min(32*1024, chunk_size),
|
||||
chunk_size,
|
||||
0,
|
||||
)
|
||||
@ -1325,17 +1352,19 @@ pub(crate) fn sendfile_splice(
|
||||
Err(err) => {
|
||||
match err.raw_os_error() {
|
||||
Some(os_err) if os_err == libc::EINVAL => {
|
||||
// Try fallback io::copy if splice/sendfile do not support this particular
|
||||
// file descritor (EINVAL)
|
||||
// splice/sendfile do not support this particular file descritor (EINVAL)
|
||||
assert_eq!(written, 0);
|
||||
return io::copy::generic_copy(reader, writer);
|
||||
return CopyResult::Fallback(0);
|
||||
}
|
||||
_ => return Err(err),
|
||||
Some(os_err) if mode == SpliceMode::Sendfile && os_err == libc::EOVERFLOW => {
|
||||
return CopyResult::Fallback(written);
|
||||
}
|
||||
_ => return CopyResult::Ended(Err(err)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(written)
|
||||
CopyResult::Ended(Ok(written))
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "macos", target_os = "ios"))]
|
||||
|
Loading…
Reference in New Issue
Block a user