mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-22 23:04:33 +00:00
Minimize unsafety in encode_utf8
Use slice patterns to avoid having to skip bounds checking
This commit is contained in:
parent
9ae6cedb8d
commit
df4d490038
@ -434,36 +434,35 @@ impl char {
|
||||
#[inline]
|
||||
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
|
||||
let code = self as u32;
|
||||
// SAFETY: each arm checks the size of the slice and only uses `get_unchecked` unsafe ops
|
||||
unsafe {
|
||||
let len = if code < MAX_ONE_B && !dst.is_empty() {
|
||||
*dst.get_unchecked_mut(0) = code as u8;
|
||||
1
|
||||
} else if code < MAX_TWO_B && dst.len() >= 2 {
|
||||
*dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
|
||||
*dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
|
||||
2
|
||||
} else if code < MAX_THREE_B && dst.len() >= 3 {
|
||||
*dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
|
||||
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
*dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
|
||||
3
|
||||
} else if dst.len() >= 4 {
|
||||
*dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
|
||||
*dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
|
||||
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
*dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
|
||||
4
|
||||
} else {
|
||||
panic!(
|
||||
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
|
||||
from_u32_unchecked(code).len_utf8(),
|
||||
code,
|
||||
dst.len(),
|
||||
)
|
||||
};
|
||||
from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
|
||||
}
|
||||
let len = self.len_utf8();
|
||||
match (len, &mut dst[..]) {
|
||||
(1, [a, ..]) => {
|
||||
*a = code as u8;
|
||||
}
|
||||
(2, [a, b, ..]) => {
|
||||
*a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
|
||||
*b = (code & 0x3F) as u8 | TAG_CONT;
|
||||
}
|
||||
(3, [a, b, c, ..]) => {
|
||||
*a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
|
||||
*b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
*c = (code & 0x3F) as u8 | TAG_CONT;
|
||||
}
|
||||
(4, [a, b, c, d, ..]) => {
|
||||
*a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
|
||||
*b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
|
||||
*c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
*d = (code & 0x3F) as u8 | TAG_CONT;
|
||||
}
|
||||
_ => panic!(
|
||||
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
|
||||
len,
|
||||
code,
|
||||
dst.len(),
|
||||
),
|
||||
};
|
||||
// SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
|
||||
unsafe { from_utf8_unchecked_mut(&mut dst[..len]) }
|
||||
}
|
||||
|
||||
/// Encodes this character as UTF-16 into the provided `u16` buffer,
|
||||
|
@ -129,6 +129,7 @@
|
||||
#![feature(associated_type_bounds)]
|
||||
#![feature(const_type_id)]
|
||||
#![feature(const_caller_location)]
|
||||
#![feature(slice_patterns)]
|
||||
|
||||
#[prelude_import]
|
||||
#[allow(unused)]
|
||||
|
Loading…
Reference in New Issue
Block a user