mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-29 10:13:54 +00:00
Rollup merge of #65165 - BO41:char_docs, r=varkor
Improve docs on some char boolean methods simple revival of #61794 (also rustfmt on rest of file :) Documentation for `is_xid_start()` and `is_xid_continue()` couldn't be improved since both methods got remove from this repository r? @dtolnay cc @JohnCSimon
This commit is contained in:
commit
82fb193cdf
@ -116,9 +116,9 @@ impl char {
|
||||
|
||||
// the code is split up here to improve execution speed for cases where
|
||||
// the `radix` is constant and 10 or smaller
|
||||
let val = if radix <= 10 {
|
||||
let val = if radix <= 10 {
|
||||
match self {
|
||||
'0' ..= '9' => self as u32 - '0' as u32,
|
||||
'0'..='9' => self as u32 - '0' as u32,
|
||||
_ => return None,
|
||||
}
|
||||
} else {
|
||||
@ -130,8 +130,11 @@ impl char {
|
||||
}
|
||||
};
|
||||
|
||||
if val < radix { Some(val) }
|
||||
else { None }
|
||||
if val < radix {
|
||||
Some(val)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator that yields the hexadecimal Unicode escape of a
|
||||
@ -303,8 +306,8 @@ impl char {
|
||||
'\r' => EscapeDefaultState::Backslash('r'),
|
||||
'\n' => EscapeDefaultState::Backslash('n'),
|
||||
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
|
||||
'\x20' ..= '\x7e' => EscapeDefaultState::Char(self),
|
||||
_ => EscapeDefaultState::Unicode(self.escape_unicode())
|
||||
'\x20'..='\x7e' => EscapeDefaultState::Char(self),
|
||||
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
|
||||
};
|
||||
EscapeDefault { state: init_state }
|
||||
}
|
||||
@ -436,30 +439,31 @@ impl char {
|
||||
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
|
||||
let code = self as u32;
|
||||
unsafe {
|
||||
let len =
|
||||
if code < MAX_ONE_B && !dst.is_empty() {
|
||||
let len = if code < MAX_ONE_B && !dst.is_empty() {
|
||||
*dst.get_unchecked_mut(0) = code as u8;
|
||||
1
|
||||
} else if code < MAX_TWO_B && dst.len() >= 2 {
|
||||
*dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
|
||||
*dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
|
||||
2
|
||||
} else if code < MAX_THREE_B && dst.len() >= 3 {
|
||||
} else if code < MAX_THREE_B && dst.len() >= 3 {
|
||||
*dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
|
||||
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
*dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
|
||||
3
|
||||
} else if dst.len() >= 4 {
|
||||
*dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
|
||||
*dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
|
||||
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
|
||||
*dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
|
||||
4
|
||||
} else {
|
||||
panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
|
||||
panic!(
|
||||
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
|
||||
from_u32_unchecked(code).len_utf8(),
|
||||
code,
|
||||
dst.len())
|
||||
dst.len(),
|
||||
)
|
||||
};
|
||||
from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
|
||||
}
|
||||
@ -515,15 +519,24 @@ impl char {
|
||||
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
|
||||
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
|
||||
} else {
|
||||
panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
|
||||
panic!(
|
||||
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
|
||||
from_u32_unchecked(code).len_utf16(),
|
||||
code,
|
||||
dst.len())
|
||||
dst.len(),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` is an alphabetic code point, and false if not.
|
||||
/// Returns `true` if this `char` has the `Alphabetic` property.
|
||||
///
|
||||
/// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
|
||||
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
|
||||
///
|
||||
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -547,10 +560,14 @@ impl char {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` is lowercase.
|
||||
/// Returns `true` if this `char` has the `Lowercase` property.
|
||||
///
|
||||
/// 'Lowercase' is defined according to the terms of the Unicode Derived Core
|
||||
/// Property `Lowercase`.
|
||||
/// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
|
||||
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
|
||||
///
|
||||
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -575,10 +592,14 @@ impl char {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` is uppercase.
|
||||
/// Returns `true` if this `char` has the `Uppercase` property.
|
||||
///
|
||||
/// 'Uppercase' is defined according to the terms of the Unicode Derived Core
|
||||
/// Property `Uppercase`.
|
||||
/// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
|
||||
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
|
||||
///
|
||||
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -603,10 +624,12 @@ impl char {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` is whitespace.
|
||||
/// Returns `true` if this `char` has the `White_Space` property.
|
||||
///
|
||||
/// 'Whitespace' is defined according to the terms of the Unicode Derived Core
|
||||
/// Property `White_Space`.
|
||||
/// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
|
||||
///
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -630,10 +653,10 @@ impl char {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` is alphanumeric.
|
||||
/// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
|
||||
///
|
||||
/// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories
|
||||
/// `Nd`, `Nl`, `No` and the Derived Core Property `Alphabetic`.
|
||||
/// [`is_alphabetic()`]: #method.is_alphabetic
|
||||
/// [`is_numeric()`]: #method.is_numeric
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -655,10 +678,15 @@ impl char {
|
||||
self.is_alphabetic() || self.is_numeric()
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` is a control code point.
|
||||
/// Returns `true` if this `char` has the general category for control codes.
|
||||
///
|
||||
/// 'Control code point' is defined in terms of the Unicode General
|
||||
/// Category `Cc`.
|
||||
/// Control codes (code points with the general category of `Cc`) are described in Chapter 4
|
||||
/// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
|
||||
/// Database][ucd] [`UnicodeData.txt`].
|
||||
///
|
||||
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -675,19 +703,29 @@ impl char {
|
||||
general_category::Cc(self)
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` is an extended grapheme character.
|
||||
/// Returns `true` if this `char` has the `Grapheme_Extend` property.
|
||||
///
|
||||
/// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering
|
||||
/// Category `Grapheme_Extend`.
|
||||
/// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
|
||||
/// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
|
||||
/// [`DerivedCoreProperties.txt`].
|
||||
///
|
||||
/// [uax29]: https://www.unicode.org/reports/tr29/
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
|
||||
#[inline]
|
||||
pub(crate) fn is_grapheme_extended(self) -> bool {
|
||||
derived_property::Grapheme_Extend(self)
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` is numeric.
|
||||
/// Returns `true` if this `char` has one of the general categories for numbers.
|
||||
///
|
||||
/// 'Numeric'-ness is defined in terms of the Unicode General Categories
|
||||
/// `Nd`, `Nl`, `No`.
|
||||
/// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
|
||||
/// characters, and `No` for other numeric characters) are specified in the [Unicode Character
|
||||
/// Database][ucd] [`UnicodeData.txt`].
|
||||
///
|
||||
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -713,25 +751,29 @@ impl char {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator that yields the lowercase equivalent of a `char`
|
||||
/// as one or more `char`s.
|
||||
/// Returns an iterator that yields the lowercase mapping of this `char` as one or more
|
||||
/// `char`s.
|
||||
///
|
||||
/// If a character does not have a lowercase equivalent, the same character
|
||||
/// will be returned back by the iterator.
|
||||
/// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
|
||||
///
|
||||
/// This performs complex unconditional mappings with no tailoring: it maps
|
||||
/// one Unicode character to its lowercase equivalent according to the
|
||||
/// [Unicode database] and the additional complex mappings
|
||||
/// [`SpecialCasing.txt`]. Conditional mappings (based on context or
|
||||
/// language) are not considered here.
|
||||
/// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
|
||||
/// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
|
||||
///
|
||||
/// For a full reference, see [here][reference].
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
||||
///
|
||||
/// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
|
||||
/// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
|
||||
/// the `char`(s) given by [`SpecialCasing.txt`].
|
||||
///
|
||||
/// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
|
||||
/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
|
||||
///
|
||||
/// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
|
||||
/// This operation performs an unconditional mapping without tailoring. That is, the conversion
|
||||
/// is independent of context and language.
|
||||
///
|
||||
/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
|
||||
/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
|
||||
///
|
||||
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@ -774,25 +816,29 @@ impl char {
|
||||
ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
|
||||
}
|
||||
|
||||
/// Returns an iterator that yields the uppercase equivalent of a `char`
|
||||
/// as one or more `char`s.
|
||||
/// Returns an iterator that yields the uppercase mapping of this `char` as one or more
|
||||
/// `char`s.
|
||||
///
|
||||
/// If a character does not have an uppercase equivalent, the same character
|
||||
/// will be returned back by the iterator.
|
||||
/// If this `char` does not have a uppercase mapping, the iterator yields the same `char`.
|
||||
///
|
||||
/// This performs complex unconditional mappings with no tailoring: it maps
|
||||
/// one Unicode character to its uppercase equivalent according to the
|
||||
/// [Unicode database] and the additional complex mappings
|
||||
/// [`SpecialCasing.txt`]. Conditional mappings (based on context or
|
||||
/// language) are not considered here.
|
||||
/// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
|
||||
/// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
|
||||
///
|
||||
/// For a full reference, see [here][reference].
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
||||
///
|
||||
/// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
|
||||
/// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
|
||||
/// the `char`(s) given by [`SpecialCasing.txt`].
|
||||
///
|
||||
/// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
|
||||
/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
|
||||
///
|
||||
/// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
|
||||
/// This operation performs an unconditional mapping without tailoring. That is, the conversion
|
||||
/// is independent of context and language.
|
||||
///
|
||||
/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
|
||||
/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
|
||||
///
|
||||
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
|
Loading…
Reference in New Issue
Block a user