From ec18991492849393a8b3ae4e0c70a22319e8ea60 Mon Sep 17 00:00:00 2001 From: Corey Farwell Date: Sat, 11 Aug 2018 14:09:59 -0400 Subject: [PATCH] Add links to std::char::REPLACEMENT_CHARACTER from docs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a few places where we mention the replacement character in the docs, and it could be helpful for users to utilize the constant which is available in the standard library, so let’s link to it! --- src/liballoc/string.rs | 6 ++++-- src/libcore/str/mod.rs | 5 ++++- src/libstd/ffi/c_str.rs | 7 ++++--- src/libstd/ffi/os_str.rs | 4 +++- src/libstd/path.rs | 4 +++- src/libstd/sys/windows/ext/ffi.rs | 3 ++- 6 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/liballoc/string.rs b/src/liballoc/string.rs index 631779a17a1..dd559df08cc 100644 --- a/src/liballoc/string.rs +++ b/src/liballoc/string.rs @@ -519,10 +519,11 @@ impl String { /// between the two. Not all byte slices are valid strings, however: strings /// are required to be valid UTF-8. During this conversion, /// `from_utf8_lossy()` will replace any invalid UTF-8 sequences with - /// `U+FFFD REPLACEMENT CHARACTER`, which looks like this: � + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], which looks like this: � /// /// [`u8`]: ../../std/primitive.u8.html /// [byteslice]: ../../std/primitive.slice.html + /// [U+FFFD]: ../char/constant.REPLACEMENT_CHARACTER.html /// /// If you are sure that the byte slice is valid UTF-8, and you don't want /// to incur the overhead of the conversion, there is an unsafe version @@ -621,7 +622,7 @@ impl String { } /// Decode a UTF-16 encoded slice `v` into a `String`, replacing - /// invalid data with the replacement character (U+FFFD). + /// invalid data with [the replacement character (`U+FFFD`)][U+FFFD]. /// /// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`], /// `from_utf16_lossy` returns a `String` since the UTF-16 to UTF-8 @@ -629,6 +630,7 @@ impl String { /// /// [`from_utf8_lossy`]: #method.from_utf8_lossy /// [`Cow<'a, str>`]: ../borrow/enum.Cow.html + /// [U+FFFD]: ../char/constant.REPLACEMENT_CHARACTER.html /// /// # Examples /// diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 356534a9187..54b17b0fbb3 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -244,7 +244,10 @@ impl Utf8Error { /// The length provided is that of the invalid byte sequence /// that starts at the index given by `valid_up_to()`. /// Decoding should resume after that sequence - /// (after inserting a U+FFFD REPLACEMENT CHARACTER) in case of lossy decoding. + /// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of + /// lossy decoding. + /// + /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html #[stable(feature = "utf8_error_error_len", since = "1.20.0")] pub fn error_len(&self) -> Option { self.error_len.map(|len| len as usize) diff --git a/src/libstd/ffi/c_str.rs b/src/libstd/ffi/c_str.rs index b2777f5c485..2b87094926c 100644 --- a/src/libstd/ffi/c_str.rs +++ b/src/libstd/ffi/c_str.rs @@ -1175,9 +1175,9 @@ impl CStr { /// If the contents of the `CStr` are valid UTF-8 data, this /// function will return a [`Cow`]`::`[`Borrowed`]`(`[`&str`]`)` /// with the the corresponding [`&str`] slice. Otherwise, it will - /// replace any invalid UTF-8 sequences with `U+FFFD REPLACEMENT - /// CHARACTER` and return a [`Cow`]`::`[`Owned`]`(`[`String`]`)` - /// with the result. + /// replace any invalid UTF-8 sequences with + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD] and return a + /// [`Cow`]`::`[`Owned`]`(`[`String`]`)` with the result. /// /// > **Note**: This method is currently implemented to check for validity /// > after a constant-time cast, but it is planned to alter its definition @@ -1189,6 +1189,7 @@ impl CStr { /// [`Owned`]: ../borrow/enum.Cow.html#variant.Owned /// [`str`]: ../primitive.str.html /// [`String`]: ../string/struct.String.html + /// [U+FFFD]: ../char/constant.REPLACEMENT_CHARACTER.html /// /// # Examples /// diff --git a/src/libstd/ffi/os_str.rs b/src/libstd/ffi/os_str.rs index 9e501a84e05..6bcd62dbd59 100644 --- a/src/libstd/ffi/os_str.rs +++ b/src/libstd/ffi/os_str.rs @@ -520,10 +520,12 @@ impl OsStr { /// Converts an `OsStr` to a [`Cow`]`<`[`str`]`>`. /// - /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER. + /// Any non-Unicode sequences are replaced with + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. /// /// [`Cow`]: ../../std/borrow/enum.Cow.html /// [`str`]: ../../std/primitive.str.html + /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html /// /// # Examples /// diff --git a/src/libstd/path.rs b/src/libstd/path.rs index 688a7e99f10..ca8be75fab5 100644 --- a/src/libstd/path.rs +++ b/src/libstd/path.rs @@ -1737,9 +1737,11 @@ impl Path { /// Converts a `Path` to a [`Cow`]. /// - /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER. + /// Any non-Unicode sequences are replaced with + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. /// /// [`Cow`]: ../borrow/enum.Cow.html + /// [U+FFFD]: ../char/constant.REPLACEMENT_CHARACTER.html /// /// # Examples /// diff --git a/src/libstd/sys/windows/ext/ffi.rs b/src/libstd/sys/windows/ext/ffi.rs index 98d43552489..bae0d02786a 100644 --- a/src/libstd/sys/windows/ext/ffi.rs +++ b/src/libstd/sys/windows/ext/ffi.rs @@ -31,7 +31,7 @@ //! //! If Rust code *does* need to look into those strings, it can //! convert them to valid UTF-8, possibly lossily, by substituting -//! invalid sequences with U+FFFD REPLACEMENT CHARACTER, as is +//! invalid sequences with [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], as is //! conventionally done in other Rust APIs that deal with string //! encodings. //! @@ -65,6 +65,7 @@ //! [`from_wide`]: trait.OsStringExt.html#tymethod.from_wide //! [`encode_wide`]: trait.OsStrExt.html#tymethod.encode_wide //! [`collect`]: ../../../iter/trait.Iterator.html#method.collect +//! [U+FFFD]: ../../../char/constant.REPLACEMENT_CHARACTER.html #![stable(feature = "rust1", since = "1.0.0")]