mirror of
https://github.com/rust-lang/rust.git
synced 2025-04-10 19:16:51 +00:00
rollup merge of #20391: daramos/utf8_lossy
Prior to9bae6ec828
from_utf8_lossy had a minor optimization in place that avoided having to loop from the beginning of the input slice. Recently4908017d59
implemented Utf8Error::InvalidByte which makes this possible again.
This commit is contained in:
commit
c5b9ffdee6
@ -143,14 +143,18 @@ impl String {
|
||||
/// ```
|
||||
#[stable]
|
||||
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> {
|
||||
let mut i = 0;
|
||||
match str::from_utf8(v) {
|
||||
Ok(s) => return Cow::Borrowed(s),
|
||||
Err(..) => {}
|
||||
Err(e) => {
|
||||
if let Utf8Error::InvalidByte(firstbad) = e {
|
||||
i = firstbad;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static TAG_CONT_U8: u8 = 128u8;
|
||||
static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
|
||||
let mut i = 0;
|
||||
let total = v.len();
|
||||
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
|
||||
unsafe { *xs.get_unchecked(i) }
|
||||
@ -174,7 +178,7 @@ impl String {
|
||||
// subseqidx is the index of the first byte of the subsequence we're looking at.
|
||||
// It's used to copy a bunch of contiguous good codepoints at once instead of copying
|
||||
// them one by one.
|
||||
let mut subseqidx = 0;
|
||||
let mut subseqidx = i;
|
||||
|
||||
while i < total {
|
||||
let i_ = i;
|
||||
|
Loading…
Reference in New Issue
Block a user