rollup merge of #20391: daramos/utf8_lossy

Prior to 9bae6ec828 from_utf8_lossy had a minor optimization in place that avoided having to loop from the beginning of the input slice. Recently 4908017d59 implemented Utf8Error::InvalidByte which makes this possible again.
2025-04-10 19:16:51 +00:00 · 2015-01-02 09:22:42 -08:00 · 2015-01-02 09:22:42 -08:00 · c5b9ffdee6
commit c5b9ffdee6
parent e80b9811a6 8aeefbbfdd
1 changed files with 7 additions and 3 deletions
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@ -143,14 +143,18 @@ impl String {
    /// ```
    #[stable]
    pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> {
+        let mut i = 0;
        match str::from_utf8(v) {
            Ok(s) => return Cow::Borrowed(s),
-            Err(..) => {}
+            Err(e) => {
+                if let Utf8Error::InvalidByte(firstbad) = e {
+                    i = firstbad;
+                }
+            }
        }

        static TAG_CONT_U8: u8 = 128u8;
        static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
-        let mut i = 0;
        let total = v.len();
        fn unsafe_get(xs: &[u8], i: uint) -> u8 {
            unsafe { *xs.get_unchecked(i) }
@ -174,7 +178,7 @@ impl String {
        // subseqidx is the index of the first byte of the subsequence we're looking at.
        // It's used to copy a bunch of contiguous good codepoints at once instead of copying
        // them one by one.
-        let mut subseqidx = 0;
+        let mut subseqidx = i;

        while i < total {
            let i_ = i;