Refactor memchr to allow optimization

2024-11-23 23:34:48 +00:00 · 2020-09-20 14:46:07 +02:00 · 2020-09-20 14:46:07 +02:00 · 89b8a97aea
commit 89b8a97aea
parent 5b9e886403
4 changed files with 90 additions and 17 deletions
--- a/library/core/src/slice/cmp.rs
+++ b/library/core/src/slice/cmp.rs
@ -268,12 +268,14 @@ where
 }

 impl SliceContains for u8 {
+    #[inline]
    fn slice_contains(&self, x: &[Self]) -> bool {
        memchr::memchr(*self, x).is_some()
    }
 }

 impl SliceContains for i8 {
+    #[inline]
    fn slice_contains(&self, x: &[Self]) -> bool {
        let byte = *self as u8;
        // SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()`
--- a/library/core/src/slice/memchr.rs
+++ b/library/core/src/slice/memchr.rs
@ -12,6 +12,7 @@ const HI_U64: u64 = 0x8080808080808080;
 // Use truncation.
 const LO_USIZE: usize = LO_U64 as usize;
 const HI_USIZE: usize = HI_U64 as usize;
+const USIZE_BYTES: usize = mem::size_of::<usize>();

 /// Returns `true` if `x` contains any zero byte.
 ///
@ -38,19 +39,29 @@ fn repeat_byte(b: u8) -> usize {
 }

 /// Returns the first index matching the byte `x` in `text`.
+#[inline]
 pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
+    // Fast path for small slices
+    if text.len() < 2 * USIZE_BYTES {
+        return text.iter().position(|elt| *elt == x);
+    }
+
+    memchr_general_case(x, text)
+}
+
+fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
    // Scan for a single byte value by reading two `usize` words at a time.
    //
    // Split `text` in three parts
    // - unaligned initial part, before the first word aligned address in text
    // - body, scan by 2 words at a time
    // - the last remaining part, < 2 word size
-    let len = text.len();
-    let ptr = text.as_ptr();
-    let usize_bytes = mem::size_of::<usize>();

    // search up to an aligned boundary
-    let mut offset = ptr.align_offset(usize_bytes);
+    let len = text.len();
+    let ptr = text.as_ptr();
+    let mut offset = ptr.align_offset(USIZE_BYTES);
+
    if offset > 0 {
        offset = cmp::min(offset, len);
        if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
@ -60,22 +71,19 @@ pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {

    // search the body of the text
    let repeated_x = repeat_byte(x);
+    while offset <= len - 2 * USIZE_BYTES {
+        unsafe {
+            let u = *(ptr.add(offset) as *const usize);
+            let v = *(ptr.add(offset + USIZE_BYTES) as *const usize);

-    if len >= 2 * usize_bytes {
-        while offset <= len - 2 * usize_bytes {
-            unsafe {
-                let u = *(ptr.add(offset) as *const usize);
-                let v = *(ptr.add(offset + usize_bytes) as *const usize);
-
-                // break if there is a matching byte
-                let zu = contains_zero_byte(u ^ repeated_x);
-                let zv = contains_zero_byte(v ^ repeated_x);
-                if zu || zv {
-                    break;
-                }
+            // break if there is a matching byte
+            let zu = contains_zero_byte(u ^ repeated_x);
+            let zv = contains_zero_byte(v ^ repeated_x);
+            if zu || zv {
+                break;
            }
-            offset += usize_bytes * 2;
        }
+        offset += USIZE_BYTES * 2;
    }

    // Find the byte after the point the body loop stopped.
--- a/library/core/src/slice/mod.rs
+++ b/library/core/src/slice/mod.rs
@ -1633,6 +1633,7 @@ impl<T> [T] {
    /// assert!(!v.iter().any(|e| e == "hi"));
    /// ```
    #[stable(feature = "rust1", since = "1.0.0")]
+    #[inline]
    pub fn contains(&self, x: &T) -> bool
    where
        T: PartialEq,
--- a/src/test/codegen/issue-75659.rs
+++ b/src/test/codegen/issue-75659.rs
@ -0,0 +1,62 @@
+// This test checks that the call to memchr/slice_contains is optimized away
+// when searching in small slices.
+
+// compile-flags: -O
+
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @foo1
+#[no_mangle]
+pub fn foo1(x: u8, data: &[u8; 1]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo2
+#[no_mangle]
+pub fn foo2(x: u8, data: &[u8; 2]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo3
+#[no_mangle]
+pub fn foo3(x: u8, data: &[u8; 3]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo4
+#[no_mangle]
+pub fn foo4(x: u8, data: &[u8; 4]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo8
+#[no_mangle]
+pub fn foo8(x: u8, data: &[u8; 8]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    data.contains(&x)
+}
+
+// CHECK-LABEL: @foo8_i8
+#[no_mangle]
+pub fn foo8_i8(x: i8, data: &[i8; 8]) -> bool {
+    // CHECK-NOT: memchr
+    // CHECK-NOT: slice_contains
+    !data.contains(&x)
+}
+
+// Check that the general case isn't inlined
+// CHECK-LABEL: @foo80
+#[no_mangle]
+pub fn foo80(x: u8, data: &[u8; 80]) -> bool {
+    // CHECK: call core::slice::memchr
+    data.contains(&x)
+}