mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-25 16:24:46 +00:00
Auto merge of #35755 - SimonSapin:char_convert, r=alexcrichton
Implement std::convert traits for char This is motivated by avoiding the `as` operator, which sometimes silently truncates, and instead use conversions that are explicitly lossless and infallible. I’m less certain that `From<u8> for char` should be implemented: while it matches an existing behavior of `as`, it’s not necessarily the right thing to use for non-ASCII bytes. It effectively decodes bytes as ISO/IEC 8859-1 (since Unicode designed its first 256 code points to be compatible with that encoding), but that is not apparent in the API name.
This commit is contained in:
commit
b2799a56a1
@ -16,6 +16,8 @@
|
|||||||
#![stable(feature = "core_char", since = "1.2.0")]
|
#![stable(feature = "core_char", since = "1.2.0")]
|
||||||
|
|
||||||
use char_private::is_printable;
|
use char_private::is_printable;
|
||||||
|
use convert::TryFrom;
|
||||||
|
use fmt;
|
||||||
use iter::FusedIterator;
|
use iter::FusedIterator;
|
||||||
use mem::transmute;
|
use mem::transmute;
|
||||||
|
|
||||||
@ -122,12 +124,7 @@ pub const MAX: char = '\u{10ffff}';
|
|||||||
#[inline]
|
#[inline]
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
pub fn from_u32(i: u32) -> Option<char> {
|
pub fn from_u32(i: u32) -> Option<char> {
|
||||||
// catch out-of-bounds and surrogates
|
char::try_from(i).ok()
|
||||||
if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(unsafe { from_u32_unchecked(i) })
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts a `u32` to a `char`, ignoring validity.
|
/// Converts a `u32` to a `char`, ignoring validity.
|
||||||
@ -175,6 +172,66 @@ pub unsafe fn from_u32_unchecked(i: u32) -> char {
|
|||||||
transmute(i)
|
transmute(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[stable(feature = "char_convert", since = "1.13.0")]
|
||||||
|
impl From<char> for u32 {
|
||||||
|
#[inline]
|
||||||
|
fn from(c: char) -> Self {
|
||||||
|
c as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Maps a byte in 0x00...0xFF to a `char` whose code point has the same value, in U+0000 to U+00FF.
|
||||||
|
///
|
||||||
|
/// Unicode is designed such that this effectively decodes bytes
|
||||||
|
/// with the character encoding that IANA calls ISO-8859-1.
|
||||||
|
/// This encoding is compatible with ASCII.
|
||||||
|
///
|
||||||
|
/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hypen),
|
||||||
|
/// which leaves some "blanks", byte values that are not assigned to any character.
|
||||||
|
/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
|
||||||
|
///
|
||||||
|
/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
|
||||||
|
/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
|
||||||
|
/// to punctuation and various Latin characters.
|
||||||
|
///
|
||||||
|
/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
|
||||||
|
/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
|
||||||
|
/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
|
||||||
|
/// C0 and C1 control codes.
|
||||||
|
#[stable(feature = "char_convert", since = "1.13.0")]
|
||||||
|
impl From<u8> for char {
|
||||||
|
#[inline]
|
||||||
|
fn from(i: u8) -> Self {
|
||||||
|
i as char
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[unstable(feature = "try_from", issue = "33417")]
|
||||||
|
impl TryFrom<u32> for char {
|
||||||
|
type Err = CharTryFromError;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn try_from(i: u32) -> Result<Self, Self::Err> {
|
||||||
|
if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
|
||||||
|
Err(CharTryFromError(()))
|
||||||
|
} else {
|
||||||
|
Ok(unsafe { from_u32_unchecked(i) })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The error type returned when a conversion from u32 to char fails.
|
||||||
|
#[unstable(feature = "try_from", issue = "33417")]
|
||||||
|
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||||
|
pub struct CharTryFromError(());
|
||||||
|
|
||||||
|
#[unstable(feature = "try_from", issue = "33417")]
|
||||||
|
impl fmt::Display for CharTryFromError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
"converted integer out of range for `char`".fmt(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Converts a digit in the given radix to a `char`.
|
/// Converts a digit in the given radix to a `char`.
|
||||||
///
|
///
|
||||||
/// A 'radix' here is sometimes also called a 'base'. A radix of two
|
/// A 'radix' here is sometimes also called a 'base'. A radix of two
|
||||||
|
@ -9,6 +9,24 @@
|
|||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
use std::char;
|
use std::char;
|
||||||
|
use std::convert::TryFrom;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_convert() {
|
||||||
|
assert_eq!(u32::from('a'), 0x61);
|
||||||
|
assert_eq!(char::from(b'\0'), '\0');
|
||||||
|
assert_eq!(char::from(b'a'), 'a');
|
||||||
|
assert_eq!(char::from(b'\xFF'), '\u{FF}');
|
||||||
|
assert_eq!(char::try_from(0_u32), Ok('\0'));
|
||||||
|
assert_eq!(char::try_from(0x61_u32), Ok('a'));
|
||||||
|
assert_eq!(char::try_from(0xD7FF_u32), Ok('\u{D7FF}'));
|
||||||
|
assert!(char::try_from(0xD800_u32).is_err());
|
||||||
|
assert!(char::try_from(0xDFFF_u32).is_err());
|
||||||
|
assert_eq!(char::try_from(0xE000_u32), Ok('\u{E000}'));
|
||||||
|
assert_eq!(char::try_from(0x10FFFF_u32), Ok('\u{10FFFF}'));
|
||||||
|
assert!(char::try_from(0x110000_u32).is_err());
|
||||||
|
assert!(char::try_from(0xFFFF_FFFF_u32).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_is_lowercase() {
|
fn test_is_lowercase() {
|
||||||
|
@ -40,6 +40,8 @@ pub use core::char::{MAX, from_digit, from_u32, from_u32_unchecked};
|
|||||||
pub use core::char::{EncodeUtf16, EncodeUtf8, EscapeDebug, EscapeDefault, EscapeUnicode};
|
pub use core::char::{EncodeUtf16, EncodeUtf8, EscapeDebug, EscapeDefault, EscapeUnicode};
|
||||||
|
|
||||||
// unstable reexports
|
// unstable reexports
|
||||||
|
#[unstable(feature = "try_from", issue = "33417")]
|
||||||
|
pub use core::char::CharTryFromError;
|
||||||
#[unstable(feature = "decode_utf8", issue = "33906")]
|
#[unstable(feature = "decode_utf8", issue = "33906")]
|
||||||
pub use core::char::{DecodeUtf8, decode_utf8};
|
pub use core::char::{DecodeUtf8, decode_utf8};
|
||||||
#[unstable(feature = "unicode", issue = "27783")]
|
#[unstable(feature = "unicode", issue = "27783")]
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
#![feature(fused)]
|
#![feature(fused)]
|
||||||
#![feature(lang_items)]
|
#![feature(lang_items)]
|
||||||
#![feature(staged_api)]
|
#![feature(staged_api)]
|
||||||
|
#![feature(try_from)]
|
||||||
#![feature(unicode)]
|
#![feature(unicode)]
|
||||||
|
|
||||||
mod tables;
|
mod tables;
|
||||||
|
@ -302,6 +302,13 @@ impl<'a, T: ?Sized + Reflect> Error for cell::BorrowMutError<'a, T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[unstable(feature = "try_from", issue = "33417")]
|
||||||
|
impl Error for char::CharTryFromError {
|
||||||
|
fn description(&self) -> &str {
|
||||||
|
"converted integer out of range for `char`"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// copied from any.rs
|
// copied from any.rs
|
||||||
impl Error + 'static {
|
impl Error + 'static {
|
||||||
/// Returns true if the boxed type is the same as `T`
|
/// Returns true if the boxed type is the same as `T`
|
||||||
|
Loading…
Reference in New Issue
Block a user