Add the basic ascii::Char type

This commit is contained in:
Scott McMurray 2023-04-29 14:45:36 -07:00
parent 831c9298c8
commit 8c781b0906
12 changed files with 724 additions and 1 deletions

View File

@ -101,6 +101,7 @@
#![feature(array_into_iter_constructors)]
#![feature(array_methods)]
#![feature(array_windows)]
#![feature(ascii_char)]
#![feature(assert_matches)]
#![feature(async_iterator)]
#![feature(coerce_unsized)]

View File

@ -2526,6 +2526,15 @@ impl<T: fmt::Display + ?Sized> ToString for T {
}
}
#[cfg(not(no_global_oom_handling))]
#[unstable(feature = "ascii_char", issue = "110998")]
impl ToString for core::ascii::Char {
#[inline]
fn to_string(&self) -> String {
self.as_str().to_owned()
}
}
#[cfg(not(no_global_oom_handling))]
#[stable(feature = "char_to_string_specialization", since = "1.46.0")]
impl ToString for char {

View File

@ -0,0 +1,34 @@
use crate::ascii;
#[cfg(not(test))]
impl<const N: usize> [u8; N] {
/// Converts this array of bytes into a array of ASCII characters,
/// or returns `None` if any of the characters is non-ASCII.
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub fn as_ascii(&self) -> Option<&[ascii::Char; N]> {
if self.is_ascii() {
// SAFETY: Just checked that it's ASCII
Some(unsafe { self.as_ascii_unchecked() })
} else {
None
}
}
/// Converts this array of bytes into a array of ASCII characters,
/// without checking whether they're valid.
///
/// # Safety
///
/// Every byte in the array must be in `0..=127`, or else this is UB.
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char; N] {
let byte_ptr: *const [u8; N] = self;
let ascii_ptr = byte_ptr as *const [ascii::Char; N];
// SAFETY: The caller promised all the bytes are ASCII
unsafe { &*ascii_ptr }
}
}

View File

@ -17,6 +17,7 @@ use crate::ops::{
};
use crate::slice::{Iter, IterMut};
mod ascii;
mod drain;
mod equality;
mod iter;

View File

@ -14,6 +14,10 @@ use crate::iter::FusedIterator;
use crate::ops::Range;
use crate::str::from_utf8_unchecked;
mod ascii_char;
#[unstable(feature = "ascii_char", issue = "110998")]
pub use ascii_char::AsciiChar as Char;
/// An iterator over the escaped version of a byte.
///
/// This `struct` is created by the [`escape_default`] function. See its

View File

@ -0,0 +1,565 @@
//! This uses the name `AsciiChar`, even though it's not exposed that way right now,
//! because it avoids a whole bunch of "are you sure you didn't mean `char`?"
//! suggestions from rustc if you get anything slightly wrong in here, and overall
//! helps with clarity as we're also referring to `char` intentionally in here.
use crate::fmt;
use crate::mem::transmute;
/// One of the 128 Unicode characters from U+0000 through U+007F,
/// often known as the [ASCII] subset.
///
/// Officially, this is the first [block] in Unicode, _Basic Latin_.
/// For details, see the [*C0 Controls and Basic Latin*][chart] code chart.
///
/// This block was based on older 7-bit character code standards such as
/// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2].
///
/// # When to use this
///
/// The main advantage of this subset is that it's always valid UTF-8. As such,
/// the `&[ascii::Char]` -> `&str` conversion function (as well as other related
/// ones) are O(1): *no* runtime checks are needed.
///
/// If you're consuming strings, you should usually handle Unicode and thus
/// accept `str`s, not limit yourself to `ascii::Char`s.
///
/// However, certain formats are intentionally designed to produce ASCII-only
/// output in order to be 8-bit-clean. In those cases, it can be simpler and
/// faster to generate `ascii::Char`s instead of dealing with the variable width
/// properties of general UTF-8 encoded strings, while still allowing the result
/// to be used freely with other Rust things that deal in general `str`s.
///
/// For example, a UUID library might offer a way to produce the string
/// representation of a UUID as an `[ascii::Char; 36]` to avoid memory
/// allocation yet still allow it to be used as UTF-8 via `as_str` without
/// paying for validation (or needing `unsafe` code) the way it would if it
/// were provided as a `[u8; 36]`.
///
/// # Layout
///
/// This type is guaranteed to have a size and alignment of 1 byte.
///
/// # Names
///
/// The variants on this type are [Unicode names][NamesList] of the characters
/// in upper camel case, with a few tweaks:
/// - For `<control>` characters, the primary alias name is used.
/// - `LATIN` is dropped, as this block has no non-latin letters.
/// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block.
/// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc.
///
/// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII
/// [block]: https://www.unicode.org/glossary/index.html#block
/// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf
/// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf
/// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[unstable(feature = "ascii_char", issue = "110998")]
#[repr(u8)]
pub enum AsciiChar {
/// U+0000
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Null = 0,
/// U+0001
#[unstable(feature = "ascii_char_variants", issue = "110998")]
StartOfHeading = 1,
/// U+0002
#[unstable(feature = "ascii_char_variants", issue = "110998")]
StartOfText = 2,
/// U+0003
#[unstable(feature = "ascii_char_variants", issue = "110998")]
EndOfText = 3,
/// U+0004
#[unstable(feature = "ascii_char_variants", issue = "110998")]
EndOfTransmission = 4,
/// U+0005
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Enquiry = 5,
/// U+0006
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Acknowledge = 6,
/// U+0007
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Bell = 7,
/// U+0008
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Backspace = 8,
/// U+0009
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CharacterTabulation = 9,
/// U+000A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LineFeed = 10,
/// U+000B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LineTabulation = 11,
/// U+000C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
FormFeed = 12,
/// U+000D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CarriageReturn = 13,
/// U+000E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
ShiftOut = 14,
/// U+000F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
ShiftIn = 15,
/// U+0010
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DataLinkEscape = 16,
/// U+0011
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DeviceControlOne = 17,
/// U+0012
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DeviceControlTwo = 18,
/// U+0013
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DeviceControlThree = 19,
/// U+0014
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DeviceControlFour = 20,
/// U+0015
#[unstable(feature = "ascii_char_variants", issue = "110998")]
NegativeAcknowledge = 21,
/// U+0016
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SynchronousIdle = 22,
/// U+0017
#[unstable(feature = "ascii_char_variants", issue = "110998")]
EndOfTransmissionBlock = 23,
/// U+0018
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Cancel = 24,
/// U+0019
#[unstable(feature = "ascii_char_variants", issue = "110998")]
EndOfMedium = 25,
/// U+001A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Substitute = 26,
/// U+001B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Escape = 27,
/// U+001C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
InformationSeparatorFour = 28,
/// U+001D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
InformationSeparatorThree = 29,
/// U+001E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
InformationSeparatorTwo = 30,
/// U+001F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
InformationSeparatorOne = 31,
/// U+0020
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Space = 32,
/// U+0021
#[unstable(feature = "ascii_char_variants", issue = "110998")]
ExclamationMark = 33,
/// U+0022
#[unstable(feature = "ascii_char_variants", issue = "110998")]
QuotationMark = 34,
/// U+0023
#[unstable(feature = "ascii_char_variants", issue = "110998")]
NumberSign = 35,
/// U+0024
#[unstable(feature = "ascii_char_variants", issue = "110998")]
DollarSign = 36,
/// U+0025
#[unstable(feature = "ascii_char_variants", issue = "110998")]
PercentSign = 37,
/// U+0026
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Ampersand = 38,
/// U+0027
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Apostrophe = 39,
/// U+0028
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LeftParenthesis = 40,
/// U+0029
#[unstable(feature = "ascii_char_variants", issue = "110998")]
RightParenthesis = 41,
/// U+002A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Asterisk = 42,
/// U+002B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
PlusSign = 43,
/// U+002C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Comma = 44,
/// U+002D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
HyphenMinus = 45,
/// U+002E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
FullStop = 46,
/// U+002F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Solidus = 47,
/// U+0030
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit0 = 48,
/// U+0031
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit1 = 49,
/// U+0032
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit2 = 50,
/// U+0033
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit3 = 51,
/// U+0034
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit4 = 52,
/// U+0035
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit5 = 53,
/// U+0036
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit6 = 54,
/// U+0037
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit7 = 55,
/// U+0038
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit8 = 56,
/// U+0039
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Digit9 = 57,
/// U+003A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Colon = 58,
/// U+003B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Semicolon = 59,
/// U+003C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LessThanSign = 60,
/// U+003D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
EqualsSign = 61,
/// U+003E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
GreaterThanSign = 62,
/// U+003F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
QuestionMark = 63,
/// U+0040
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CommercialAt = 64,
/// U+0041
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalA = 65,
/// U+0042
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalB = 66,
/// U+0043
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalC = 67,
/// U+0044
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalD = 68,
/// U+0045
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalE = 69,
/// U+0046
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalF = 70,
/// U+0047
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalG = 71,
/// U+0048
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalH = 72,
/// U+0049
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalI = 73,
/// U+004A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalJ = 74,
/// U+004B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalK = 75,
/// U+004C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalL = 76,
/// U+004D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalM = 77,
/// U+004E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalN = 78,
/// U+004F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalO = 79,
/// U+0050
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalP = 80,
/// U+0051
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalQ = 81,
/// U+0052
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalR = 82,
/// U+0053
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalS = 83,
/// U+0054
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalT = 84,
/// U+0055
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalU = 85,
/// U+0056
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalV = 86,
/// U+0057
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalW = 87,
/// U+0058
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalX = 88,
/// U+0059
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalY = 89,
/// U+005A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CapitalZ = 90,
/// U+005B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LeftSquareBracket = 91,
/// U+005C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
ReverseSolidus = 92,
/// U+005D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
RightSquareBracket = 93,
/// U+005E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
CircumflexAccent = 94,
/// U+005F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LowLine = 95,
/// U+0060
#[unstable(feature = "ascii_char_variants", issue = "110998")]
GraveAccent = 96,
/// U+0061
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallA = 97,
/// U+0062
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallB = 98,
/// U+0063
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallC = 99,
/// U+0064
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallD = 100,
/// U+0065
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallE = 101,
/// U+0066
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallF = 102,
/// U+0067
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallG = 103,
/// U+0068
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallH = 104,
/// U+0069
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallI = 105,
/// U+006A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallJ = 106,
/// U+006B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallK = 107,
/// U+006C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallL = 108,
/// U+006D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallM = 109,
/// U+006E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallN = 110,
/// U+006F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallO = 111,
/// U+0070
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallP = 112,
/// U+0071
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallQ = 113,
/// U+0072
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallR = 114,
/// U+0073
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallS = 115,
/// U+0074
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallT = 116,
/// U+0075
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallU = 117,
/// U+0076
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallV = 118,
/// U+0077
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallW = 119,
/// U+0078
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallX = 120,
/// U+0079
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallY = 121,
/// U+007A
#[unstable(feature = "ascii_char_variants", issue = "110998")]
SmallZ = 122,
/// U+007B
#[unstable(feature = "ascii_char_variants", issue = "110998")]
LeftCurlyBracket = 123,
/// U+007C
#[unstable(feature = "ascii_char_variants", issue = "110998")]
VerticalLine = 124,
/// U+007D
#[unstable(feature = "ascii_char_variants", issue = "110998")]
RightCurlyBracket = 125,
/// U+007E
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Tilde = 126,
/// U+007F
#[unstable(feature = "ascii_char_variants", issue = "110998")]
Delete = 127,
}
impl AsciiChar {
/// Creates an ascii character from the byte `b`,
/// or returns `None` if it's too large.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn from_u8(b: u8) -> Option<Self> {
if b <= 127 {
// SAFETY: Just checked that `b` is in-range
Some(unsafe { Self::from_u8_unchecked(b) })
} else {
None
}
}
/// Creates an ASCII character from the byte `b`,
/// without checking whether it's valid.
///
/// # Safety
///
/// `b` must be in `0..=127`, or else this is UB.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const unsafe fn from_u8_unchecked(b: u8) -> Self {
// SAFETY: Our safety precondition is that `b` is in-range.
unsafe { transmute(b) }
}
/// When passed the *number* `0`, `1`, …, `9`, returns the *character*
/// `'0'`, `'1'`, …, `'9'` respectively.
///
/// If `d >= 10`, returns `None`.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn digit(d: u8) -> Option<Self> {
if d < 10 {
// SAFETY: Just checked it's in-range.
Some(unsafe { Self::digit_unchecked(d) })
} else {
None
}
}
/// When passed the *number* `0`, `1`, …, `9`, returns the *character*
/// `'0'`, `'1'`, …, `'9'` respectively, without checking that it's in-range.
///
/// # Safety
///
/// This is immediate UB if called with `d > 64`.
///
/// If `d >= 10` and `d <= 64`, this is allowed to return any value or panic.
/// Notably, it should not be expected to return hex digits, or any other
/// reasonable extension of the decimal digits.
///
/// (This lose safety condition is intended to simplify soundness proofs
/// when writing code using this method, since the implementation doesn't
/// need something really specific, not to make those other arguments do
/// something useful. It might be tightened before stabilization.)
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const unsafe fn digit_unchecked(d: u8) -> Self {
debug_assert!(d < 10);
// SAFETY: `'0'` through `'9'` are U+00030 through U+0039,
// so because `d` must be 64 or less the addition can return at most
// 112 (0x70), which doesn't overflow and is within the ASCII range.
unsafe {
let byte = b'0'.unchecked_add(d);
Self::from_u8_unchecked(byte)
}
}
/// Gets this ASCII character as a byte.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_u8(self) -> u8 {
self as u8
}
/// Gets this ASCII character as a `char` Unicode Scalar Value.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_char(self) -> char {
self as u8 as char
}
/// Views this ASCII character as a one-code-unit UTF-8 `str`.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_str(&self) -> &str {
crate::slice::from_ref(self).as_str()
}
}
impl [AsciiChar] {
/// Views this slice of ASCII characters as a UTF-8 `str`.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_str(&self) -> &str {
let ascii_ptr: *const Self = self;
let str_ptr = ascii_ptr as *const str;
// SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte
// code unit having the same value as the ASCII byte.
unsafe { &*str_ptr }
}
/// Views this slice of ASCII characters as a slice of `u8` bytes.
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_bytes(&self) -> &[u8] {
self.as_str().as_bytes()
}
}
#[unstable(feature = "ascii_char", issue = "110998")]
impl fmt::Display for AsciiChar {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
<str as fmt::Display>::fmt(self.as_str(), f)
}
}

View File

@ -1,5 +1,6 @@
//! impl char {}
use crate::ascii;
use crate::slice;
use crate::str::from_utf8_unchecked_mut;
use crate::unicode::printable::is_printable;
@ -1116,6 +1117,24 @@ impl char {
*self as u32 <= 0x7F
}
/// Returns `Some` if the value is within the ASCII range,
/// or `None` if it's not.
///
/// This is preferred to [`Self::is_ascii`] when you're passing the value
/// along to something else that can take [`ascii::Char`] rather than
/// needing to check again for itself whether the value is in ASCII.
#[must_use]
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_ascii(&self) -> Option<ascii::Char> {
if self.is_ascii() {
// SAFETY: Just checked that this is ASCII.
Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
} else {
None
}
}
/// Makes a copy of the value in its ASCII upper case equivalent.
///
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',

View File

@ -472,7 +472,16 @@ impl u8 {
#[rustc_const_stable(feature = "const_u8_is_ascii", since = "1.43.0")]
#[inline]
pub const fn is_ascii(&self) -> bool {
*self & 128 == 0
*self <= 127
}
/// If the value of this byte is within the ASCII range, returns it as an
/// [ASCII character](ascii::Char). Otherwise, returns `None`.
#[must_use]
#[unstable(feature = "ascii_char", issue = "110998")]
#[inline]
pub const fn as_ascii(&self) -> Option<ascii::Char> {
ascii::Char::from_u8(*self)
}
/// Makes a copy of the value in its ASCII upper case equivalent.

View File

@ -16,6 +16,36 @@ impl [u8] {
is_ascii(self)
}
/// If this slice [`is_ascii`](Self::is_ascii), returns it as a slice of
/// [ASCII characters](`ascii::Char`), otherwise returns `None`.
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
if self.is_ascii() {
// SAFETY: Just checked that it's ASCII
Some(unsafe { self.as_ascii_unchecked() })
} else {
None
}
}
/// Converts this slice of bytes into a slice of ASCII characters,
/// without checking whether they're valid.
///
/// # Safety
///
/// Every byte in the slice must be in `0..=127`, or else this is UB.
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub const unsafe fn as_ascii_unchecked(&self) -> &[ascii::Char] {
let byte_ptr: *const [u8] = self;
let ascii_ptr = byte_ptr as *const [ascii::Char];
// SAFETY: The caller promised all the bytes are ASCII
unsafe { &*ascii_ptr }
}
/// Checks that two slices are an ASCII case-insensitive match.
///
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,

View File

@ -16,6 +16,7 @@ mod validations;
use self::pattern::Pattern;
use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
use crate::ascii;
use crate::char::{self, EscapeDebugExtArgs};
use crate::mem;
use crate::slice::{self, SliceIndex};
@ -2366,6 +2367,16 @@ impl str {
self.as_bytes().is_ascii()
}
/// If this string slice [`is_ascii`](Self::is_ascii), returns it as a slice
/// of [ASCII characters](`ascii::Char`), otherwise returns `None`.
#[unstable(feature = "ascii_char", issue = "110998")]
#[must_use]
#[inline]
pub fn as_ascii(&self) -> Option<&[ascii::Char]> {
// Like in `is_ascii`, we can work on the bytes directly.
self.as_bytes().as_ascii()
}
/// Checks that two strings are an ASCII case-insensitive match.
///
/// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,

View File

@ -16,6 +16,9 @@
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::ascii::{escape_default, EscapeDefault};
#[unstable(feature = "ascii_char", issue = "110998")]
pub use core::ascii::Char;
/// Extension methods for ASCII-subset only operations.
///
/// Be aware that operations on seemingly non-ASCII characters can sometimes

View File

@ -0,0 +1,37 @@
// compile-flags: -C opt-level=1
// ignore-debug (the extra assertions get in the way)
#![crate_type = "lib"]
#![feature(ascii_char)]
use std::ascii::Char as AsciiChar;
// CHECK-LABEL: i8 @unwrap_digit_from_remainder(i32
#[no_mangle]
pub fn unwrap_digit_from_remainder(v: u32) -> AsciiChar {
// CHECK-NOT: icmp
// CHECK-NOT: panic
// CHECK: %[[R:.+]] = urem i32 %v, 10
// CHECK-NEXT: %[[T:.+]] = trunc i32 %[[R]] to i8
// CHECK-NEXT: %[[D:.+]] = or i8 %[[T]], 48
// CHECK-NEXT: ret i8 %[[D]]
// CHECK-NOT: icmp
// CHECK-NOT: panic
AsciiChar::digit((v % 10) as u8).unwrap()
}
// CHECK-LABEL: i8 @unwrap_from_masked(i8
#[no_mangle]
pub fn unwrap_from_masked(b: u8) -> AsciiChar {
// CHECK-NOT: icmp
// CHECK-NOT: panic
// CHECK: %[[M:.+]] = and i8 %b, 127
// CHECK-NEXT: ret i8 %[[M]]
// CHECK-NOT: icmp
// CHECK-NOT: panic
AsciiChar::from_u8(b & 0x7f).unwrap()
}