Move small-copy optimization into copy_from_slice

Ultimately copy_from_slice is being a bottleneck, not io::Cursor::read.
It might be worthwhile to move the check here, so more places can
benefit from it.
This commit is contained in:
Ruud van Asseldonk 2016-11-04 00:20:11 +01:00
parent cd7fade0a9
commit 341805288e
2 changed files with 16 additions and 18 deletions

View File

@ -515,9 +515,19 @@ impl<T> SliceExt for [T] {
fn copy_from_slice(&mut self, src: &[T]) where T: Copy {
assert!(self.len() == src.len(),
"destination and source slices have different lengths");
unsafe {
ptr::copy_nonoverlapping(
src.as_ptr(), self.as_mut_ptr(), self.len());
// First check if the amount of elements we want to copy is small:
// `copy_nonoverlapping` will do a memcopy, which involves an indirect
// function call when `memcpy` is in the dynamically-linked libc. For
// small elements (such as a single byte or pointer), the overhead is
// significant. If the element is big then the assignment is a memcopy
// anyway.
if self.len() == 1 {
self[0] = src[0];
} else {
unsafe {
ptr::copy_nonoverlapping(
src.as_ptr(), self.as_mut_ptr(), self.len());
}
}
}

View File

@ -219,21 +219,9 @@ impl<T> io::Seek for Cursor<T> where T: AsRef<[u8]> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<T> Read for Cursor<T> where T: AsRef<[u8]> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
// First check if the amount of bytes we want to read is small: the read
// in the else branch will end up calling `<&[u8] as Read>::read()`,
// which will copy the buffer using a memcopy. If we only want to read a
// single byte, then the overhead of the function call is significant.
let num_read = {
let mut inner_buf = self.fill_buf()?;
if buf.len() == 1 && inner_buf.len() > 0 {
buf[0] = inner_buf[0];
1
} else {
Read::read(&mut inner_buf, buf)?
}
};
self.pos += num_read as u64;
Ok(num_read)
let n = Read::read(&mut self.fill_buf()?, buf)?;
self.pos += n as u64;
Ok(n)
}
}