mirror of
https://github.com/rust-lang/rust.git
synced 2025-02-27 14:24:08 +00:00
Make slice->str conversion and related functions const
This commit makes the following functions from `core::str` `const fn`: - `from_utf8[_mut]` (`feature(const_str_from_utf8)`) - `from_utf8_unchecked_mut` (`feature(const_str_from_utf8_unchecked_mut)`) - `Utf8Error::{valid_up_to,error_len}` (`feature(const_str_from_utf8)`)
This commit is contained in:
parent
c9c4b5d727
commit
cf6f64a963
@ -25,6 +25,7 @@
|
||||
#![feature(const_btree_new)]
|
||||
#![feature(const_default_impls)]
|
||||
#![feature(const_trait_impl)]
|
||||
#![feature(const_str_from_utf8)]
|
||||
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
@ -1,3 +1,4 @@
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Ordering::{Equal, Greater, Less};
|
||||
use std::str::{from_utf8, from_utf8_unchecked};
|
||||
@ -883,6 +884,33 @@ fn test_is_utf8() {
|
||||
assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_const_is_utf8() {
|
||||
const _: () = {
|
||||
// deny overlong encodings
|
||||
assert!(from_utf8(&[0xc0, 0x80]).is_err());
|
||||
assert!(from_utf8(&[0xc0, 0xae]).is_err());
|
||||
assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
|
||||
assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
|
||||
assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
|
||||
assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
|
||||
assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
|
||||
|
||||
// deny surrogates
|
||||
assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
|
||||
assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
|
||||
|
||||
assert!(from_utf8(&[0xC2, 0x80]).is_ok());
|
||||
assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
|
||||
assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
|
||||
assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
|
||||
assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
|
||||
assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
|
||||
assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
|
||||
assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_utf8_mostly_ascii() {
|
||||
// deny invalid bytes embedded in long stretches of ascii
|
||||
@ -895,13 +923,43 @@ fn from_utf8_mostly_ascii() {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn const_from_utf8_mostly_ascii() {
|
||||
const _: () = {
|
||||
// deny invalid bytes embedded in long stretches of ascii
|
||||
let mut i = 32;
|
||||
while i < 64 {
|
||||
let mut data = [0; 128];
|
||||
data[i] = 0xC0;
|
||||
assert!(from_utf8(&data).is_err());
|
||||
data[i] = 0xC2;
|
||||
assert!(from_utf8(&data).is_err());
|
||||
|
||||
i = i + 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_utf8_error() {
|
||||
macro_rules! test {
|
||||
($input: expr, $expected_valid_up_to: expr, $expected_error_len: expr) => {
|
||||
($input: expr, $expected_valid_up_to:pat, $expected_error_len:pat) => {
|
||||
let error = from_utf8($input).unwrap_err();
|
||||
assert_eq!(error.valid_up_to(), $expected_valid_up_to);
|
||||
assert_eq!(error.error_len(), $expected_error_len);
|
||||
assert_matches!(error.valid_up_to(), $expected_valid_up_to);
|
||||
assert_matches!(error.error_len(), $expected_error_len);
|
||||
|
||||
const _: () = {
|
||||
match from_utf8($input) {
|
||||
Err(error) => {
|
||||
let valid_up_to = error.valid_up_to();
|
||||
let error_len = error.error_len();
|
||||
|
||||
assert!(matches!(valid_up_to, $expected_valid_up_to));
|
||||
assert!(matches!(error_len, $expected_error_len));
|
||||
}
|
||||
Ok(_) => unreachable!(),
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
test!(b"A\xC3\xA9 \xFF ", 4, Some(1));
|
||||
|
@ -97,6 +97,7 @@
|
||||
#![allow(explicit_outlives_requirements)]
|
||||
//
|
||||
// Library features for const fns:
|
||||
#![feature(const_align_offset)]
|
||||
#![feature(const_align_of_val)]
|
||||
#![feature(const_alloc_layout)]
|
||||
#![feature(const_arguments_as_str)]
|
||||
@ -130,6 +131,7 @@
|
||||
#![feature(const_size_of_val)]
|
||||
#![feature(const_slice_from_raw_parts)]
|
||||
#![feature(const_slice_ptr_len)]
|
||||
#![feature(const_str_from_utf8_unchecked_mut)]
|
||||
#![feature(const_swap)]
|
||||
#![feature(const_trait_impl)]
|
||||
#![feature(const_type_id)]
|
||||
@ -138,6 +140,7 @@
|
||||
#![feature(duration_consts_2)]
|
||||
#![feature(ptr_metadata)]
|
||||
#![feature(slice_ptr_get)]
|
||||
#![feature(str_internals)]
|
||||
#![feature(variant_count)]
|
||||
#![feature(const_array_from_ref)]
|
||||
#![feature(const_slice_from_ref)]
|
||||
|
@ -82,10 +82,16 @@ use super::Utf8Error;
|
||||
/// assert_eq!("💖", sparkle_heart);
|
||||
/// ```
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
||||
run_utf8_validation(v)?;
|
||||
// SAFETY: Just ran validation.
|
||||
Ok(unsafe { from_utf8_unchecked(v) })
|
||||
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
|
||||
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
||||
// This should use `?` again, once it's `const`
|
||||
match run_utf8_validation(v) {
|
||||
Ok(_) => {
|
||||
// SAFETY: validation succeeded.
|
||||
Ok(unsafe { from_utf8_unchecked(v) })
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a mutable slice of bytes to a mutable string slice.
|
||||
@ -119,10 +125,16 @@ pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
|
||||
/// See the docs for [`Utf8Error`] for more details on the kinds of
|
||||
/// errors that can be returned.
|
||||
#[stable(feature = "str_mut_extras", since = "1.20.0")]
|
||||
pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
|
||||
run_utf8_validation(v)?;
|
||||
// SAFETY: Just ran validation.
|
||||
Ok(unsafe { from_utf8_unchecked_mut(v) })
|
||||
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
|
||||
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
|
||||
// This should use `?` again, once it's `const`
|
||||
match run_utf8_validation(v) {
|
||||
Ok(_) => {
|
||||
// SAFETY: validation succeeded.
|
||||
Ok(unsafe { from_utf8_unchecked_mut(v) })
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a slice of bytes to a string slice without checking
|
||||
@ -184,7 +196,8 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
|
||||
#[inline]
|
||||
#[must_use]
|
||||
#[stable(feature = "str_mut_extras", since = "1.20.0")]
|
||||
pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
|
||||
#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked_mut", issue = "none")]
|
||||
pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
|
||||
// SAFETY: the caller must guarantee that the bytes `v`
|
||||
// are valid UTF-8, thus the cast to `*mut str` is safe.
|
||||
// Also, the pointer dereference is safe because that pointer
|
||||
|
@ -72,9 +72,10 @@ impl Utf8Error {
|
||||
/// assert_eq!(1, error.valid_up_to());
|
||||
/// ```
|
||||
#[stable(feature = "utf8_error", since = "1.5.0")]
|
||||
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn valid_up_to(&self) -> usize {
|
||||
pub const fn valid_up_to(&self) -> usize {
|
||||
self.valid_up_to
|
||||
}
|
||||
|
||||
@ -94,10 +95,15 @@ impl Utf8Error {
|
||||
///
|
||||
/// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
|
||||
#[stable(feature = "utf8_error_error_len", since = "1.20.0")]
|
||||
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "none")]
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn error_len(&self) -> Option<usize> {
|
||||
self.error_len.map(|len| len as usize)
|
||||
pub const fn error_len(&self) -> Option<usize> {
|
||||
// This should become `map` again, once it's `const`
|
||||
match self.error_len {
|
||||
Some(len) => Some(len as usize),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,25 +8,25 @@ use super::Utf8Error;
|
||||
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
|
||||
/// for width 3, and 3 bits for width 4.
|
||||
#[inline]
|
||||
fn utf8_first_byte(byte: u8, width: u32) -> u32 {
|
||||
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
|
||||
(byte & (0x7F >> width)) as u32
|
||||
}
|
||||
|
||||
/// Returns the value of `ch` updated with continuation byte `byte`.
|
||||
#[inline]
|
||||
fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
|
||||
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
|
||||
(ch << 6) | (byte & CONT_MASK) as u32
|
||||
}
|
||||
|
||||
/// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
|
||||
/// bits `10`).
|
||||
#[inline]
|
||||
pub(super) fn utf8_is_cont_byte(byte: u8) -> bool {
|
||||
pub(super) const fn utf8_is_cont_byte(byte: u8) -> bool {
|
||||
(byte as i8) < -64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn unwrap_or_0(opt: Option<&u8>) -> u8 {
|
||||
const fn unwrap_or_0(opt: Option<&u8>) -> u8 {
|
||||
match opt {
|
||||
Some(&byte) => byte,
|
||||
None => 0,
|
||||
@ -105,14 +105,15 @@ const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
|
||||
|
||||
/// Returns `true` if any byte in the word `x` is nonascii (>= 128).
|
||||
#[inline]
|
||||
fn contains_nonascii(x: usize) -> bool {
|
||||
const fn contains_nonascii(x: usize) -> bool {
|
||||
(x & NONASCII_MASK) != 0
|
||||
}
|
||||
|
||||
/// Walks through `v` checking that it's a valid UTF-8 sequence,
|
||||
/// returning `Ok(())` in that case, or, if it is invalid, `Err(err)`.
|
||||
#[inline(always)]
|
||||
pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
|
||||
#[rustc_const_unstable(feature = "str_internals", issue = "none")]
|
||||
pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
|
||||
let mut index = 0;
|
||||
let len = v.len();
|
||||
|
||||
@ -142,7 +143,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
|
||||
|
||||
let first = v[index];
|
||||
if first >= 128 {
|
||||
let w = UTF8_CHAR_WIDTH[first as usize];
|
||||
let w = utf8_char_width(first);
|
||||
// 2-byte encoding is for codepoints \u{0080} to \u{07ff}
|
||||
// first C2 80 last DF BF
|
||||
// 3-byte encoding is for codepoints \u{0800} to \u{ffff}
|
||||
@ -230,7 +231,7 @@ pub(super) fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
|
||||
}
|
||||
|
||||
// https://tools.ietf.org/html/rfc3629
|
||||
static UTF8_CHAR_WIDTH: [u8; 256] = [
|
||||
const UTF8_CHAR_WIDTH: &[u8; 256] = &[
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, // 0x1F
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
@ -253,7 +254,7 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [
|
||||
#[unstable(feature = "str_internals", issue = "none")]
|
||||
#[must_use]
|
||||
#[inline]
|
||||
pub fn utf8_char_width(b: u8) -> usize {
|
||||
pub const fn utf8_char_width(b: u8) -> usize {
|
||||
UTF8_CHAR_WIDTH[b as usize] as usize
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user