Auto merge of #108157 - scottmcm:tuple-gt-via-partialcmp, r=dtolnay

Use `partial_cmp` to implement tuple `lt`/`le`/`ge`/`gt` In today's implementation, `(A, B)::gt` contains calls to *both* `A::eq` *and* `A::gt`. That's fine for primitives, but for things like `String`s it's kinda weird -- `(String, usize)::gt` has a call to both `bcmp` and `memcmp` (<https://rust.godbolt.org/z/7jbbPMesf>) because when `bcmp` says the `String`s aren't equal, it turns around and calls `memcmp` to find out which one's bigger. This PR changes the implementation to instead implement `(A, …, C, Z)::gt` using `A::partial_cmp`, `…::partial_cmp`, `C::partial_cmp`, and `Z::gt`. (And analogously for `lt`, `le`, and `ge`.) That way expensive comparisons don't need to be repeated. Technically this is an observable change on stable, so I've marked it `needs-fcp` + `T-libs-api` and will r? rust-lang/libs-api I'm hoping that this will be non-controversial, however, since it's very similar to the observable changes that were made to the derives (#81384 #98655) -- like those, this only changes behaviour if a type overrode behaviour in a way inconsistent with the rules for the various traits involved. (The first commit here is #108156, adding the codegen test, which I used to make sure this doesn't regress behaviour for primitives.) Zulip conversation about this change: <https://rust-lang.zulipchat.com/#narrow/stream/219381-t-libs/topic/.60.3E.60.20on.20Tuples/near/328392927>.
2024-11-25 08:13:41 +00:00 · 2023-03-05 22:02:26 +00:00 · 2023-03-05 22:02:26 +00:00 · 816f958ac3
commit 816f958ac3
parent 7820b62d20 4492793e0d
4 changed files with 179 additions and 13 deletions
--- a/library/core/benches/lib.rs
+++ b/library/core/benches/lib.rs
@ -20,6 +20,7 @@ mod ops;
 mod pattern;
 mod slice;
 mod str;
+mod tuple;

 /// Returns a `rand::Rng` seeded with a consistent seed.
 ///
--- a/library/core/benches/tuple.rs
+++ b/library/core/benches/tuple.rs
@ -0,0 +1,22 @@
+use rand::prelude::*;
+use test::{black_box, Bencher};
+
+#[bench]
+fn bench_tuple_comparison(b: &mut Bencher) {
+    let mut rng = black_box(super::bench_rng());
+
+    let data = black_box([
+        ("core::iter::adapters::Chain", 123_usize),
+        ("core::iter::adapters::Clone", 456_usize),
+        ("core::iter::adapters::Copie", 789_usize),
+        ("core::iter::adapters::Cycle", 123_usize),
+        ("core::iter::adapters::Flatt", 456_usize),
+        ("core::iter::adapters::TakeN", 789_usize),
+    ]);
+
+    b.iter(|| {
+        let x = data.choose(&mut rng).unwrap();
+        let y = data.choose(&mut rng).unwrap();
+        [x < y, x <= y, x > y, x >= y]
+    });
+}
--- a/library/core/src/tuple.rs
+++ b/library/core/src/tuple.rs
@ -1,7 +1,7 @@
 // See src/libstd/primitive_docs.rs for documentation.

-use crate::cmp::Ordering::*;
-use crate::cmp::*;
+use crate::cmp::Ordering::{self, *};
+use crate::mem::transmute;

 // Recursive macro for implementing n-ary tuple functions and operations
 //
@ -61,19 +61,19 @@ macro_rules! tuple_impls {
                }
                #[inline]
                fn lt(&self, other: &($($T,)+)) -> bool {
-                    lexical_ord!(lt, $( ${ignore(T)} self.${index()}, other.${index()} ),+)
+                    lexical_ord!(lt, Less, $( ${ignore(T)} self.${index()}, other.${index()} ),+)
                }
                #[inline]
                fn le(&self, other: &($($T,)+)) -> bool {
-                    lexical_ord!(le, $( ${ignore(T)} self.${index()}, other.${index()} ),+)
+                    lexical_ord!(le, Less, $( ${ignore(T)} self.${index()}, other.${index()} ),+)
                }
                #[inline]
                fn ge(&self, other: &($($T,)+)) -> bool {
-                    lexical_ord!(ge, $( ${ignore(T)} self.${index()}, other.${index()} ),+)
+                    lexical_ord!(ge, Greater, $( ${ignore(T)} self.${index()}, other.${index()} ),+)
                }
                #[inline]
                fn gt(&self, other: &($($T,)+)) -> bool {
-                    lexical_ord!(gt, $( ${ignore(T)} self.${index()}, other.${index()} ),+)
+                    lexical_ord!(gt, Greater, $( ${ignore(T)} self.${index()}, other.${index()} ),+)
                }
            }
        }
@ -123,16 +123,38 @@ macro_rules! maybe_tuple_doc {
    };
 }

-// Constructs an expression that performs a lexical ordering using method $rel.
+#[inline]
+const fn ordering_is_some(c: Option<Ordering>, x: Ordering) -> bool {
+    // FIXME: Just use `==` once that's const-stable on `Option`s.
+    // This isn't using `match` because that optimizes worse due to
+    // making a two-step check (`Some` *then* the inner value).
+
+    // SAFETY: There's no public guarantee for `Option<Ordering>`,
+    // but we're core so we know that it's definitely a byte.
+    unsafe {
+        let c: i8 = transmute(c);
+        let x: i8 = transmute(Some(x));
+        c == x
+    }
+}
+
+// Constructs an expression that performs a lexical ordering using method `$rel`.
 // The values are interleaved, so the macro invocation for
-// `(a1, a2, a3) < (b1, b2, b3)` would be `lexical_ord!(lt, a1, b1, a2, b2,
-// a3, b3)` (and similarly for `lexical_cmp`)
+// `(a1, a2, a3) < (b1, b2, b3)` would be `lexical_ord!(lt, opt_is_lt, a1, b1,
+// a2, b2, a3, b3)` (and similarly for `lexical_cmp`)
+//
+// `$ne_rel` is only used to determine the result after checking that they're
+// not equal, so `lt` and `le` can both just use `Less`.
 macro_rules! lexical_ord {
-    ($rel: ident, $a:expr, $b:expr, $($rest_a:expr, $rest_b:expr),+) => {
-        if $a != $b { lexical_ord!($rel, $a, $b) }
-        else { lexical_ord!($rel, $($rest_a, $rest_b),+) }
+    ($rel: ident, $ne_rel: ident, $a:expr, $b:expr, $($rest_a:expr, $rest_b:expr),+) => {{
+        let c = PartialOrd::partial_cmp(&$a, &$b);
+        if !ordering_is_some(c, Equal) { ordering_is_some(c, $ne_rel) }
+        else { lexical_ord!($rel, $ne_rel, $($rest_a, $rest_b),+) }
+    }};
+    ($rel: ident, $ne_rel: ident, $a:expr, $b:expr) => {
+        // Use the specific method for the last element
+        PartialOrd::$rel(&$a, &$b)
    };
-    ($rel: ident, $a:expr, $b:expr) => { ($a) . $rel (& $b) };
 }

 macro_rules! lexical_partial_cmp {
--- a/tests/codegen/comparison-operators-2-tuple.rs
+++ b/tests/codegen/comparison-operators-2-tuple.rs
@ -0,0 +1,121 @@
+// compile-flags: -C opt-level=1 -Z merge-functions=disabled
+// min-llvm-version: 15.0
+// only-x86_64
+
+#![crate_type = "lib"]
+
+use std::cmp::Ordering;
+
+type TwoTuple = (i16, u16);
+
+//
+// The operators are all overridden directly, so should optimize easily.
+//
+// Yes, the `s[lg]t` is correct for the `[lg]e` version because it's only used
+// in the side of the select where we know the values are *not* equal.
+//
+
+// CHECK-LABEL: @check_lt_direct
+// CHECK-SAME: (i16 noundef %[[A0:.+]], i16 noundef %[[A1:.+]], i16 noundef %[[B0:.+]], i16 noundef %[[B1:.+]])
+#[no_mangle]
+pub fn check_lt_direct(a: TwoTuple, b: TwoTuple) -> bool {
+    // CHECK-DAG: %[[EQ:.+]] = icmp eq i16 %[[A0]], %[[B0]]
+    // CHECK-DAG: %[[CMP0:.+]] = icmp slt i16 %[[A0]], %[[B0]]
+    // CHECK-DAG: %[[CMP1:.+]] = icmp ult i16 %[[A1]], %[[B1]]
+    // CHECK: %[[R:.+]] = select i1 %[[EQ]], i1 %[[CMP1]], i1 %[[CMP0]]
+    // CHECK: ret i1 %[[R]]
+    a < b
+}
+
+// CHECK-LABEL: @check_le_direct
+// CHECK-SAME: (i16 noundef %[[A0:.+]], i16 noundef %[[A1:.+]], i16 noundef %[[B0:.+]], i16 noundef %[[B1:.+]])
+#[no_mangle]
+pub fn check_le_direct(a: TwoTuple, b: TwoTuple) -> bool {
+    // CHECK-DAG: %[[EQ:.+]] = icmp eq i16 %[[A0]], %[[B0]]
+    // CHECK-DAG: %[[CMP0:.+]] = icmp slt i16 %[[A0]], %[[B0]]
+    // CHECK-DAG: %[[CMP1:.+]] = icmp ule i16 %[[A1]], %[[B1]]
+    // CHECK: %[[R:.+]] = select i1 %[[EQ]], i1 %[[CMP1]], i1 %[[CMP0]]
+    // CHECK: ret i1 %[[R]]
+    a <= b
+}
+
+// CHECK-LABEL: @check_gt_direct
+// CHECK-SAME: (i16 noundef %[[A0:.+]], i16 noundef %[[A1:.+]], i16 noundef %[[B0:.+]], i16 noundef %[[B1:.+]])
+#[no_mangle]
+pub fn check_gt_direct(a: TwoTuple, b: TwoTuple) -> bool {
+    // CHECK-DAG: %[[EQ:.+]] = icmp eq i16 %[[A0]], %[[B0]]
+    // CHECK-DAG: %[[CMP0:.+]] = icmp sgt i16 %[[A0]], %[[B0]]
+    // CHECK-DAG: %[[CMP1:.+]] = icmp ugt i16 %[[A1]], %[[B1]]
+    // CHECK: %[[R:.+]] = select i1 %[[EQ]], i1 %[[CMP1]], i1 %[[CMP0]]
+    // CHECK: ret i1 %[[R]]
+    a > b
+}
+
+// CHECK-LABEL: @check_ge_direct
+// CHECK-SAME: (i16 noundef %[[A0:.+]], i16 noundef %[[A1:.+]], i16 noundef %[[B0:.+]], i16 noundef %[[B1:.+]])
+#[no_mangle]
+pub fn check_ge_direct(a: TwoTuple, b: TwoTuple) -> bool {
+    // CHECK-DAG: %[[EQ:.+]] = icmp eq i16 %[[A0]], %[[B0]]
+    // CHECK-DAG: %[[CMP0:.+]] = icmp sgt i16 %[[A0]], %[[B0]]
+    // CHECK-DAG: %[[CMP1:.+]] = icmp uge i16 %[[A1]], %[[B1]]
+    // CHECK: %[[R:.+]] = select i1 %[[EQ]], i1 %[[CMP1]], i1 %[[CMP0]]
+    // CHECK: ret i1 %[[R]]
+    a >= b
+}
+
+//
+// These ones are harder, since there are more intermediate values to remove.
+//
+// `<` seems to be getting lucky right now, so test that doesn't regress.
+//
+// The others, however, aren't managing to optimize away the extra `select`s yet.
+// See <https://github.com/rust-lang/rust/issues/106107> for more about this.
+//
+
+// CHECK-LABEL: @check_lt_via_cmp
+// CHECK-SAME: (i16 noundef %[[A0:.+]], i16 noundef %[[A1:.+]], i16 noundef %[[B0:.+]], i16 noundef %[[B1:.+]])
+#[no_mangle]
+pub fn check_lt_via_cmp(a: TwoTuple, b: TwoTuple) -> bool {
+    // CHECK-DAG: %[[EQ:.+]] = icmp eq i16 %[[A0]], %[[B0]]
+    // CHECK-DAG: %[[CMP0:.+]] = icmp slt i16 %[[A0]], %[[B0]]
+    // CHECK-DAG: %[[CMP1:.+]] = icmp ult i16 %[[A1]], %[[B1]]
+    // CHECK: %[[R:.+]] = select i1 %[[EQ]], i1 %[[CMP1]], i1 %[[CMP0]]
+    // CHECK: ret i1 %[[R]]
+    Ord::cmp(&a, &b).is_lt()
+}
+
+// CHECK-LABEL: @check_le_via_cmp
+// CHECK-SAME: (i16 noundef %[[A0:.+]], i16 noundef %[[A1:.+]], i16 noundef %[[B0:.+]], i16 noundef %[[B1:.+]])
+#[no_mangle]
+pub fn check_le_via_cmp(a: TwoTuple, b: TwoTuple) -> bool {
+    // FIXME-CHECK-DAG: %[[EQ:.+]] = icmp eq i16 %[[A0]], %[[B0]]
+    // FIXME-CHECK-DAG: %[[CMP0:.+]] = icmp sle i16 %[[A0]], %[[B0]]
+    // FIXME-CHECK-DAG: %[[CMP1:.+]] = icmp ule i16 %[[A1]], %[[B1]]
+    // FIXME-CHECK: %[[R:.+]] = select i1 %[[EQ]], i1 %[[CMP1]], i1 %[[CMP0]]
+    // FIXME-CHECK: ret i1 %[[R]]
+    Ord::cmp(&a, &b).is_le()
+}
+
+// CHECK-LABEL: @check_gt_via_cmp
+// CHECK-SAME: (i16 noundef %[[A0:.+]], i16 noundef %[[A1:.+]], i16 noundef %[[B0:.+]], i16 noundef %[[B1:.+]])
+#[no_mangle]
+pub fn check_gt_via_cmp(a: TwoTuple, b: TwoTuple) -> bool {
+    // FIXME-CHECK-DAG: %[[EQ:.+]] = icmp eq i16 %[[A0]], %[[B0]]
+    // FIXME-CHECK-DAG: %[[CMP0:.+]] = icmp sgt i16 %[[A0]], %[[B0]]
+    // FIXME-CHECK-DAG: %[[CMP1:.+]] = icmp ugt i16 %[[A1]], %[[B1]]
+    // FIXME-CHECK: %[[R:.+]] = select i1 %[[EQ]], i1 %[[CMP1]], i1 %[[CMP0]]
+    // FIXME-CHECK: ret i1 %[[R]]
+    Ord::cmp(&a, &b).is_gt()
+}
+
+// CHECK-LABEL: @check_ge_via_cmp
+// CHECK-SAME: (i16 noundef %[[A0:.+]], i16 noundef %[[A1:.+]], i16 noundef %[[B0:.+]], i16 noundef %[[B1:.+]])
+#[no_mangle]
+pub fn check_ge_via_cmp(a: TwoTuple, b: TwoTuple) -> bool {
+    // FIXME-CHECK-DAG: %[[EQ:.+]] = icmp eq i16 %[[A0]], %[[B0]]
+    // FIXME-CHECK-DAG: %[[CMP0:.+]] = icmp sge i16 %[[A0]], %[[B0]]
+    // FIXME-CHECK-DAG: %[[CMP1:.+]] = icmp uge i16 %[[A1]], %[[B1]]
+    // FIXME-CHECK: %[[R:.+]] = select i1 %[[EQ]], i1 %[[CMP1]], i1 %[[CMP0]]
+    // FIXME-CHECK: ret i1 %[[R]]
+    Ord::cmp(&a, &b).is_ge()
+}