Rollup merge of #65165 - BO41:char_docs, r=varkor

Improve docs on some char boolean methods simple revival of #61794 (also rustfmt on rest of file :) Documentation for `is_xid_start()` and `is_xid_continue()` couldn't be improved since both methods got remove from this repository r? @dtolnay cc @JohnCSimon
2024-11-29 10:13:54 +00:00 · 2019-10-13 13:34:30 +02:00 · 2019-10-13 13:34:30 +02:00 · 82fb193cdf
commit 82fb193cdf
parent 4dc0b8aa5b d8c2956906
1 changed files with 109 additions and 63 deletions
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@ -116,9 +116,9 @@ impl char {

        // the code is split up here to improve execution speed for cases where
        // the `radix` is constant and 10 or smaller
-        let val = if radix <= 10  {
+        let val = if radix <= 10 {
            match self {
-                '0' ..= '9' => self as u32 - '0' as u32,
+                '0'..='9' => self as u32 - '0' as u32,
                _ => return None,
            }
        } else {
@ -130,8 +130,11 @@ impl char {
            }
        };

-        if val < radix { Some(val) }
-        else { None }
+        if val < radix {
+            Some(val)
+        } else {
+            None
+        }
    }

    /// Returns an iterator that yields the hexadecimal Unicode escape of a
@ -303,8 +306,8 @@ impl char {
            '\r' => EscapeDefaultState::Backslash('r'),
            '\n' => EscapeDefaultState::Backslash('n'),
            '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
-            '\x20' ..= '\x7e' => EscapeDefaultState::Char(self),
-            _ => EscapeDefaultState::Unicode(self.escape_unicode())
+            '\x20'..='\x7e' => EscapeDefaultState::Char(self),
+            _ => EscapeDefaultState::Unicode(self.escape_unicode()),
        };
        EscapeDefault { state: init_state }
    }
@ -436,30 +439,31 @@ impl char {
    pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
        let code = self as u32;
        unsafe {
-            let len =
-            if code < MAX_ONE_B && !dst.is_empty() {
+            let len = if code < MAX_ONE_B && !dst.is_empty() {
                *dst.get_unchecked_mut(0) = code as u8;
                1
            } else if code < MAX_TWO_B && dst.len() >= 2 {
                *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
                *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
                2
-            } else if code < MAX_THREE_B && dst.len() >= 3  {
+            } else if code < MAX_THREE_B && dst.len() >= 3 {
                *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
-                *dst.get_unchecked_mut(1) = (code >>  6 & 0x3F) as u8 | TAG_CONT;
+                *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
                *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
                3
            } else if dst.len() >= 4 {
                *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
                *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
-                *dst.get_unchecked_mut(2) = (code >>  6 & 0x3F) as u8 | TAG_CONT;
+                *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
                *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
                4
            } else {
-                panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
+                panic!(
+                    "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
                    from_u32_unchecked(code).len_utf8(),
                    code,
-                    dst.len())
+                    dst.len(),
+                )
            };
            from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
        }
@ -515,15 +519,24 @@ impl char {
                *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
                slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
            } else {
-                panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
+                panic!(
+                    "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
                    from_u32_unchecked(code).len_utf16(),
                    code,
-                    dst.len())
+                    dst.len(),
+                )
            }
        }
    }

-    /// Returns `true` if this `char` is an alphabetic code point, and false if not.
+    /// Returns `true` if this `char` has the `Alphabetic` property.
+    ///
+    /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
+    /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
    ///
    /// # Examples
    ///
@ -547,10 +560,14 @@ impl char {
        }
    }

-    /// Returns `true` if this `char` is lowercase.
+    /// Returns `true` if this `char` has the `Lowercase` property.
    ///
-    /// 'Lowercase' is defined according to the terms of the Unicode Derived Core
-    /// Property `Lowercase`.
+    /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
+    /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
    ///
    /// # Examples
    ///
@ -575,10 +592,14 @@ impl char {
        }
    }

-    /// Returns `true` if this `char` is uppercase.
+    /// Returns `true` if this `char` has the `Uppercase` property.
    ///
-    /// 'Uppercase' is defined according to the terms of the Unicode Derived Core
-    /// Property `Uppercase`.
+    /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
+    /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
    ///
    /// # Examples
    ///
@ -603,10 +624,12 @@ impl char {
        }
    }

-    /// Returns `true` if this `char` is whitespace.
+    /// Returns `true` if this `char` has the `White_Space` property.
    ///
-    /// 'Whitespace' is defined according to the terms of the Unicode Derived Core
-    /// Property `White_Space`.
+    /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
+    ///
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
    ///
    /// # Examples
    ///
@ -630,10 +653,10 @@ impl char {
        }
    }

-    /// Returns `true` if this `char` is alphanumeric.
+    /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
    ///
-    /// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories
-    /// `Nd`, `Nl`, `No` and the Derived Core Property `Alphabetic`.
+    /// [`is_alphabetic()`]: #method.is_alphabetic
+    /// [`is_numeric()`]: #method.is_numeric
    ///
    /// # Examples
    ///
@ -655,10 +678,15 @@ impl char {
        self.is_alphabetic() || self.is_numeric()
    }

-    /// Returns `true` if this `char` is a control code point.
+    /// Returns `true` if this `char` has the general category for control codes.
    ///
-    /// 'Control code point' is defined in terms of the Unicode General
-    /// Category `Cc`.
+    /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
+    /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
+    /// Database][ucd] [`UnicodeData.txt`].
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
    ///
    /// # Examples
    ///
@ -675,19 +703,29 @@ impl char {
        general_category::Cc(self)
    }

-    /// Returns `true` if this `char` is an extended grapheme character.
+    /// Returns `true` if this `char` has the `Grapheme_Extend` property.
    ///
-    /// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering
-    /// Category `Grapheme_Extend`.
+    /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
+    /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
+    /// [`DerivedCoreProperties.txt`].
+    ///
+    /// [uax29]: https://www.unicode.org/reports/tr29/
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
    #[inline]
    pub(crate) fn is_grapheme_extended(self) -> bool {
        derived_property::Grapheme_Extend(self)
    }

-    /// Returns `true` if this `char` is numeric.
+    /// Returns `true` if this `char` has one of the general categories for numbers.
    ///
-    /// 'Numeric'-ness is defined in terms of the Unicode General Categories
-    /// `Nd`, `Nl`, `No`.
+    /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
+    /// characters, and `No` for other numeric characters) are specified in the [Unicode Character
+    /// Database][ucd] [`UnicodeData.txt`].
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
    ///
    /// # Examples
    ///
@ -713,25 +751,29 @@ impl char {
        }
    }

-    /// Returns an iterator that yields the lowercase equivalent of a `char`
-    /// as one or more `char`s.
+    /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
+    /// `char`s.
    ///
-    /// If a character does not have a lowercase equivalent, the same character
-    /// will be returned back by the iterator.
+    /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
    ///
-    /// This performs complex unconditional mappings with no tailoring: it maps
-    /// one Unicode character to its lowercase equivalent according to the
-    /// [Unicode database] and the additional complex mappings
-    /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
-    /// language) are not considered here.
+    /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
+    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
    ///
-    /// For a full reference, see [here][reference].
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
    ///
-    /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
+    /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
+    /// the `char`(s) given by [`SpecialCasing.txt`].
    ///
-    /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
+    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
    ///
-    /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
+    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
+    /// is independent of context and language.
+    ///
+    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
+    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
    ///
    /// # Examples
    ///
@ -774,25 +816,29 @@ impl char {
        ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
    }

-    /// Returns an iterator that yields the uppercase equivalent of a `char`
-    /// as one or more `char`s.
+    /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
+    /// `char`s.
    ///
-    /// If a character does not have an uppercase equivalent, the same character
-    /// will be returned back by the iterator.
+    /// If this `char` does not have a uppercase mapping, the iterator yields the same `char`.
    ///
-    /// This performs complex unconditional mappings with no tailoring: it maps
-    /// one Unicode character to its uppercase equivalent according to the
-    /// [Unicode database] and the additional complex mappings
-    /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
-    /// language) are not considered here.
+    /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
+    /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
    ///
-    /// For a full reference, see [here][reference].
+    /// [ucd]: https://www.unicode.org/reports/tr44/
+    /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
    ///
-    /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
+    /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
+    /// the `char`(s) given by [`SpecialCasing.txt`].
    ///
-    /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
+    /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
    ///
-    /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
+    /// This operation performs an unconditional mapping without tailoring. That is, the conversion
+    /// is independent of context and language.
+    ///
+    /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
+    /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
+    ///
+    /// [Unicode Standard]: https://www.unicode.org/versions/latest/
    ///
    /// # Examples
    ///