mirror of
https://github.com/rust-lang/rust.git
synced 2025-05-14 02:49:40 +00:00
Rollup merge of #84751 - Soveu:is_char_boundary_opt, r=Amanieu
str::is_char_boundary - slight optimization Current `str::is_char_boundary` implementation emits slightly more instructions, because it includes an additional branch for `index == s.len()` ```rust pub fn is_char_boundary(s: &str, index: usize) -> bool { if index == 0 || index == s.len() { return true; } match s.as_bytes().get(index) { None => false, Some(&b) => (b as i8) >= -0x40, } } ``` Just changing the place of `index == s.len()` merges it with `index < s.len()` from `s.as_bytes().get(index)` ```rust pub fn is_char_boundary2(s: &str, index: usize) -> bool { if index == 0 { return true; } match s.as_bytes().get(index) { // For some reason, LLVM likes this comparison here more None => index == s.len(), // This is bit magic equivalent to: b < 128 || b >= 192 Some(&b) => (b as i8) >= -0x40, } } ``` This one has better codegen on every platform, except powerpc <details><summary>x86 codegen</summary> <p> ```nasm example::is_char_boundary: mov al, 1 test rdx, rdx je .LBB0_5 cmp rsi, rdx je .LBB0_5 cmp rsi, rdx jbe .LBB0_3 cmp byte ptr [rdi + rdx], -65 setg al .LBB0_5: ret .LBB0_3: xor eax, eax ret example::is_char_boundary2: test rdx, rdx je .LBB1_1 cmp rsi, rdx jbe .LBB1_4 cmp byte ptr [rdi + rdx], -65 setg al ret .LBB1_1: ; technically this branch is the same as LBB1_4 mov al, 1 ret .LBB1_4: sete al ret ``` </p> </details> <details><summary>aarch64 codegen</summary> <p> ```as example::is_char_boundary: mov x8, x0 mov w0, #1 cbz x2, .LBB0_4 cmp x1, x2 b.eq .LBB0_4 b.ls .LBB0_5 ldrsb w8, [x8, x2] cmn w8, #65 cset w0, gt .LBB0_4: ret .LBB0_5: mov w0, wzr ret example::is_char_boundary2: cbz x2, .LBB1_3 cmp x1, x2 b.ls .LBB1_4 ldrsb w8, [x0, x2] cmn w8, #65 cset w0, gt ret .LBB1_3: mov w0, #1 ret .LBB1_4: cset w0, eq ret ``` </p> </details> <details><summary>riscv64gc codegen</summary> <p> example::is_char_boundary: seqz a3, a2 xor a4, a1, a2 seqz a4, a4 or a4, a4, a3 addi a3, zero, 1 bnez a4, .LBB0_3 bgeu a2, a1, .LBB0_4 add a0, a0, a2 lb a0, 0(a0) addi a1, zero, -65 slt a3, a1, a0 .LBB0_3: mv a0, a3 ret .LBB0_4: mv a0, zero ret example::is_char_boundary2: beqz a2, .LBB1_3 bgeu a2, a1, .LBB1_4 add a0, a0, a2 lb a0, 0(a0) addi a1, zero, -65 slt a0, a1, a0 ret .LBB1_3: addi a0, zero, 1 ret .LBB1_4: xor a0, a1, a2 seqz a0, a0 ret </p> </details> [Link to godbolt](https://godbolt.org/z/K8avEz8Gr) `@rustbot` label: A-codegen
This commit is contained in:
commit
62b834fb9f
@ -192,14 +192,26 @@ impl str {
|
||||
#[stable(feature = "is_char_boundary", since = "1.9.0")]
|
||||
#[inline]
|
||||
pub fn is_char_boundary(&self, index: usize) -> bool {
|
||||
// 0 and len are always ok.
|
||||
// 0 is always ok.
|
||||
// Test for 0 explicitly so that it can optimize out the check
|
||||
// easily and skip reading string data for that case.
|
||||
if index == 0 || index == self.len() {
|
||||
// Note that optimizing `self.get(..index)` relies on this.
|
||||
if index == 0 {
|
||||
return true;
|
||||
}
|
||||
|
||||
match self.as_bytes().get(index) {
|
||||
None => false,
|
||||
// For `None` we have two options:
|
||||
//
|
||||
// - index == self.len()
|
||||
// Empty strings are valid, so return true
|
||||
// - index > self.len()
|
||||
// In this case return false
|
||||
//
|
||||
// The check is placed exactly here, because it improves generated
|
||||
// code on higher opt-levels. See PR #84751 for more details.
|
||||
None => index == self.len(),
|
||||
|
||||
// This is bit magic equivalent to: b < 128 || b >= 192
|
||||
Some(&b) => (b as i8) >= -0x40,
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user