mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-02 07:22:42 +00:00
Rollup merge of #119808 - GnomedDev:encode-charsearcher-size-in-type, r=Mark-Simulacrum
Store core::str::CharSearcher::utf8_size as u8 This is already relied on being smaller than u8 due to the `safety invariant: utf8_size must be less than 5`, so this helps LLVM optimize and maybe improve copies due to padding instead of unused bytes.
This commit is contained in:
commit
c5da0382c8
@ -40,6 +40,7 @@
|
||||
|
||||
use crate::cmp;
|
||||
use crate::cmp::Ordering;
|
||||
use crate::convert::TryInto as _;
|
||||
use crate::fmt;
|
||||
use crate::slice::memchr;
|
||||
|
||||
@ -370,11 +371,17 @@ pub struct CharSearcher<'a> {
|
||||
|
||||
// safety invariant: `utf8_size` must be less than 5
|
||||
/// The number of bytes `needle` takes up when encoded in utf8.
|
||||
utf8_size: usize,
|
||||
utf8_size: u8,
|
||||
/// A utf8 encoded copy of the `needle`
|
||||
utf8_encoded: [u8; 4],
|
||||
}
|
||||
|
||||
impl CharSearcher<'_> {
|
||||
fn utf8_size(&self) -> usize {
|
||||
self.utf8_size.into()
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
|
||||
#[inline]
|
||||
fn haystack(&self) -> &'a str {
|
||||
@ -414,7 +421,7 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
|
||||
let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?;
|
||||
// the last byte of the utf8 encoded needle
|
||||
// SAFETY: we have an invariant that `utf8_size < 5`
|
||||
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
|
||||
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) };
|
||||
if let Some(index) = memchr::memchr(last_byte, bytes) {
|
||||
// The new finger is the index of the byte we found,
|
||||
// plus one, since we memchr'd for the last byte of the character.
|
||||
@ -434,10 +441,10 @@ unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
|
||||
// find something. When we find something the `finger` will be set
|
||||
// to a UTF8 boundary.
|
||||
self.finger += index + 1;
|
||||
if self.finger >= self.utf8_size {
|
||||
let found_char = self.finger - self.utf8_size;
|
||||
if self.finger >= self.utf8_size() {
|
||||
let found_char = self.finger - self.utf8_size();
|
||||
if let Some(slice) = self.haystack.as_bytes().get(found_char..self.finger) {
|
||||
if slice == &self.utf8_encoded[0..self.utf8_size] {
|
||||
if slice == &self.utf8_encoded[0..self.utf8_size()] {
|
||||
return Some((found_char, self.finger));
|
||||
}
|
||||
}
|
||||
@ -482,7 +489,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
|
||||
let bytes = haystack.get(self.finger..self.finger_back)?;
|
||||
// the last byte of the utf8 encoded needle
|
||||
// SAFETY: we have an invariant that `utf8_size < 5`
|
||||
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
|
||||
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size() - 1) };
|
||||
if let Some(index) = memchr::memrchr(last_byte, bytes) {
|
||||
// we searched a slice that was offset by self.finger,
|
||||
// add self.finger to recoup the original index
|
||||
@ -493,14 +500,14 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
|
||||
// char in the paradigm of reverse iteration). For
|
||||
// multibyte chars we need to skip down by the number of more
|
||||
// bytes they have than ASCII
|
||||
let shift = self.utf8_size - 1;
|
||||
let shift = self.utf8_size() - 1;
|
||||
if index >= shift {
|
||||
let found_char = index - shift;
|
||||
if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size)) {
|
||||
if slice == &self.utf8_encoded[0..self.utf8_size] {
|
||||
if let Some(slice) = haystack.get(found_char..(found_char + self.utf8_size())) {
|
||||
if slice == &self.utf8_encoded[0..self.utf8_size()] {
|
||||
// move finger to before the character found (i.e., at its start index)
|
||||
self.finger_back = found_char;
|
||||
return Some((self.finger_back, self.finger_back + self.utf8_size));
|
||||
return Some((self.finger_back, self.finger_back + self.utf8_size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -542,7 +549,12 @@ impl<'a> Pattern<'a> for char {
|
||||
#[inline]
|
||||
fn into_searcher(self, haystack: &'a str) -> Self::Searcher {
|
||||
let mut utf8_encoded = [0; 4];
|
||||
let utf8_size = self.encode_utf8(&mut utf8_encoded).len();
|
||||
let utf8_size = self
|
||||
.encode_utf8(&mut utf8_encoded)
|
||||
.len()
|
||||
.try_into()
|
||||
.expect("char len should be less than 255");
|
||||
|
||||
CharSearcher {
|
||||
haystack,
|
||||
finger: 0,
|
||||
|
Loading…
Reference in New Issue
Block a user