Rollup merge of #92124 - AngelicosPhosphoros:remove_extra_alloc_in_cstring_new_35838, r=Mark-Simulacrum

Little improves in CString `new` when creating from slice

Old code already contain optimization for cases with `&str` and `&[u8]` args. This commit adds a specialization for `&mut[u8]` too.

Also, I added usage of old slice in search for zero bytes instead of new buffer because it produce better code for constant inputs on Windows LTO builds. For other platforms, this wouldn't cause any difference because it calls `libc` anyway.

Inlined `_new` method into spec trait to reduce amount of code generated to `CString::new` callers.
This commit is contained in:
Matthias Krüger 2022-01-19 10:42:15 +01:00 committed by GitHub
commit 3148a322d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -373,38 +373,61 @@ impl CString {
/// the position of the nul byte.
#[stable(feature = "rust1", since = "1.0.0")]
pub fn new<T: Into<Vec<u8>>>(t: T) -> Result<CString, NulError> {
trait SpecIntoVec {
fn into_vec(self) -> Vec<u8>;
trait SpecNewImpl {
fn spec_new_impl(self) -> Result<CString, NulError>;
}
impl<T: Into<Vec<u8>>> SpecIntoVec for T {
default fn into_vec(self) -> Vec<u8> {
self.into()
}
}
// Specialization for avoiding reallocation.
impl SpecIntoVec for &'_ [u8] {
fn into_vec(self) -> Vec<u8> {
let mut v = Vec::with_capacity(self.len() + 1);
v.extend(self);
v
}
}
impl SpecIntoVec for &'_ str {
fn into_vec(self) -> Vec<u8> {
let mut v = Vec::with_capacity(self.len() + 1);
v.extend(self.as_bytes());
v
impl<T: Into<Vec<u8>>> SpecNewImpl for T {
default fn spec_new_impl(self) -> Result<CString, NulError> {
let bytes: Vec<u8> = self.into();
match memchr::memchr(0, &bytes) {
Some(i) => Err(NulError(i, bytes)),
None => Ok(unsafe { CString::from_vec_unchecked(bytes) }),
}
}
}
Self::_new(SpecIntoVec::into_vec(t))
}
// Specialization for avoiding reallocation
#[inline(always)] // Without that it is not inlined into specializations
fn spec_new_impl_bytes(bytes: &[u8]) -> Result<CString, NulError> {
// We cannot have such large slice that we would overflow here
// but using `checked_add` allows LLVM to assume that capacity never overflows
// and generate twice shorter code.
// `saturating_add` doesn't help for some reason.
let capacity = bytes.len().checked_add(1).unwrap();
fn _new(bytes: Vec<u8>) -> Result<CString, NulError> {
match memchr::memchr(0, &bytes) {
Some(i) => Err(NulError(i, bytes)),
None => Ok(unsafe { CString::from_vec_unchecked(bytes) }),
// Allocate before validation to avoid duplication of allocation code.
// We still need to allocate and copy memory even if we get an error.
let mut buffer = Vec::with_capacity(capacity);
buffer.extend(bytes);
// Check memory of self instead of new buffer.
// This allows better optimizations if lto enabled.
match memchr::memchr(0, bytes) {
Some(i) => Err(NulError(i, buffer)),
None => Ok(unsafe { CString::from_vec_unchecked(buffer) }),
}
}
impl SpecNewImpl for &'_ [u8] {
fn spec_new_impl(self) -> Result<CString, NulError> {
spec_new_impl_bytes(self)
}
}
impl SpecNewImpl for &'_ str {
fn spec_new_impl(self) -> Result<CString, NulError> {
spec_new_impl_bytes(self.as_bytes())
}
}
impl SpecNewImpl for &'_ mut [u8] {
fn spec_new_impl(self) -> Result<CString, NulError> {
spec_new_impl_bytes(self)
}
}
t.spec_new_impl()
}
/// Creates a C-compatible string by consuming a byte vector,