Rollup merge of #138992 - dianne:simplify-byte-string-to-pat, r=oli-obk

literal pattern lowering: use the pattern's type instead of the literal's in `const_to_pat`

This has two purposes:
- First, it enables removing the `treat_byte_string_as_slice` fields from `TypeckResults` and `ConstToPat`. A byte string pattern's type will be `&[u8]` when matching on a slice reference, so `const_to_pat` will lower it to a slice ref pattern. I believe this is tested by `tests/ui/match/pattern-deref-miscompile.rs`.
- Second, it will simplify the implementation of byte string literals in deref patterns. If byte string patterns can be given the type `[u8; N]` or `[u8]` during HIR typeck, then nothing needs to be changed in `const_to_pat` in order to lower the patterns `deref!(b"..."): Vec<u8>` and `deref!(b"..."): Box<[u8; 3]>`.

Implementation-wise, this uses `lit_to_const` to make a const with the pattern's type and the literal's valtree; that feels to me like the best way to make sure that the valtree representations of the pattern type and literal are the same. Though it may necessitate later changes to `lit_to_const` to accommodate giving byte string literal patterns non-reference types—would that be reasonable?

This unfortunately doesn't work for the `string_deref_patterns` feature (since that gives string literal patterns the `String` type), so I added a workaround for that. However, once `deref_patterns` supports string literals, it may be able to replace `string_deref_patterns`; the special case for `String` can removed at that point.

r? ``@oli-obk``
This commit is contained in:
Matthias Krüger 2025-04-02 19:44:12 +02:00 committed by GitHub
commit 3fb1230adc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 172 additions and 48 deletions

View File

@ -632,10 +632,6 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
{
let tcx = self.tcx;
trace!(?lt.hir_id.local_id, "polymorphic byte string lit");
self.typeck_results
.borrow_mut()
.treat_byte_string_as_slice
.insert(lt.hir_id.local_id);
pat_ty =
Ty::new_imm_ref(tcx, tcx.lifetimes.re_static, Ty::new_slice(tcx, tcx.types.u8));
}

View File

@ -81,9 +81,6 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
debug!("used_trait_imports({:?}) = {:?}", item_def_id, used_trait_imports);
wbcx.typeck_results.used_trait_imports = used_trait_imports;
wbcx.typeck_results.treat_byte_string_as_slice =
mem::take(&mut self.typeck_results.borrow_mut().treat_byte_string_as_slice);
debug!("writeback: typeck results for {:?} are {:#?}", item_def_id, wbcx.typeck_results);
self.tcx.arena.alloc(wbcx.typeck_results)

View File

@ -197,12 +197,6 @@ pub struct TypeckResults<'tcx> {
/// formatting modified file tests/ui/coroutine/retain-resume-ref.rs
pub coroutine_stalled_predicates: FxIndexSet<(ty::Predicate<'tcx>, ObligationCause<'tcx>)>,
/// We sometimes treat byte string literals (which are of type `&[u8; N]`)
/// as `&[u8]`, depending on the pattern in which they are used.
/// This hashset records all instances where we behave
/// like this to allow `const_to_pat` to reliably handle this situation.
pub treat_byte_string_as_slice: ItemLocalSet,
/// Contains the data for evaluating the effect of feature `capture_disjoint_fields`
/// on closure size.
pub closure_size_eval: LocalDefIdMap<ClosureSizeProfileData<'tcx>>,
@ -237,7 +231,6 @@ impl<'tcx> TypeckResults<'tcx> {
closure_fake_reads: Default::default(),
rvalue_scopes: Default::default(),
coroutine_stalled_predicates: Default::default(),
treat_byte_string_as_slice: Default::default(),
closure_size_eval: Default::default(),
offset_of_data: Default::default(),
}

View File

@ -58,25 +58,13 @@ struct ConstToPat<'tcx> {
span: Span,
id: hir::HirId,
treat_byte_string_as_slice: bool,
c: ty::Const<'tcx>,
}
impl<'tcx> ConstToPat<'tcx> {
fn new(pat_ctxt: &PatCtxt<'_, 'tcx>, id: hir::HirId, span: Span, c: ty::Const<'tcx>) -> Self {
trace!(?pat_ctxt.typeck_results.hir_owner);
ConstToPat {
tcx: pat_ctxt.tcx,
typing_env: pat_ctxt.typing_env,
span,
id,
treat_byte_string_as_slice: pat_ctxt
.typeck_results
.treat_byte_string_as_slice
.contains(&id.local_id),
c,
}
ConstToPat { tcx: pat_ctxt.tcx, typing_env: pat_ctxt.typing_env, span, id, c }
}
fn type_marked_structural(&self, ty: Ty<'tcx>) -> bool {
@ -108,8 +96,6 @@ impl<'tcx> ConstToPat<'tcx> {
uv: ty::UnevaluatedConst<'tcx>,
ty: Ty<'tcx>,
) -> Box<Pat<'tcx>> {
trace!(self.treat_byte_string_as_slice);
// It's not *technically* correct to be revealing opaque types here as borrowcheck has
// not run yet. However, CTFE itself uses `TypingMode::PostAnalysis` unconditionally even
// during typeck and not doing so has a lot of (undesirable) fallout (#101478, #119821).
@ -307,21 +293,8 @@ impl<'tcx> ConstToPat<'tcx> {
ty,
);
} else {
// `b"foo"` produces a `&[u8; 3]`, but you can't use constants of array type when
// matching against references, you can only use byte string literals.
// The typechecker has a special case for byte string literals, by treating them
// as slices. This means we turn `&[T; N]` constants into slice patterns, which
// has no negative effects on pattern matching, even if we're actually matching on
// arrays.
let pointee_ty = match *pointee_ty.kind() {
ty::Array(elem_ty, _) if self.treat_byte_string_as_slice => {
Ty::new_slice(tcx, elem_ty)
}
_ => *pointee_ty,
};
// References have the same valtree representation as their pointee.
let subpattern = self.valtree_to_pat(cv, pointee_ty);
PatKind::Deref { subpattern }
PatKind::Deref { subpattern: self.valtree_to_pat(cv, *pointee_ty) }
}
}
},

View File

@ -11,7 +11,7 @@ use rustc_abi::{FieldIdx, Integer};
use rustc_errors::codes::*;
use rustc_hir::def::{CtorOf, DefKind, Res};
use rustc_hir::pat_util::EnumerateAndAdjustIterator;
use rustc_hir::{self as hir, RangeEnd};
use rustc_hir::{self as hir, LangItem, RangeEnd};
use rustc_index::Idx;
use rustc_middle::mir::interpret::LitToConstInput;
use rustc_middle::thir::{
@ -130,7 +130,7 @@ impl<'a, 'tcx> PatCtxt<'a, 'tcx> {
// Lower the endpoint into a temporary `PatKind` that will then be
// deconstructed to obtain the constant value and other data.
let mut kind: PatKind<'tcx> = self.lower_pat_expr(expr);
let mut kind: PatKind<'tcx> = self.lower_pat_expr(expr, None);
// Unpeel any ascription or inline-const wrapper nodes.
loop {
@ -294,7 +294,7 @@ impl<'a, 'tcx> PatCtxt<'a, 'tcx> {
hir::PatKind::Never => PatKind::Never,
hir::PatKind::Expr(value) => self.lower_pat_expr(value),
hir::PatKind::Expr(value) => self.lower_pat_expr(value, Some(ty)),
hir::PatKind::Range(ref lo_expr, ref hi_expr, end) => {
let (lo_expr, hi_expr) = (lo_expr.as_deref(), hi_expr.as_deref());
@ -630,7 +630,11 @@ impl<'a, 'tcx> PatCtxt<'a, 'tcx> {
/// - Paths (e.g. `FOO`, `foo::BAR`, `Option::None`)
/// - Inline const blocks (e.g. `const { 1 + 1 }`)
/// - Literals, possibly negated (e.g. `-128u8`, `"hello"`)
fn lower_pat_expr(&mut self, expr: &'tcx hir::PatExpr<'tcx>) -> PatKind<'tcx> {
fn lower_pat_expr(
&mut self,
expr: &'tcx hir::PatExpr<'tcx>,
pat_ty: Option<Ty<'tcx>>,
) -> PatKind<'tcx> {
let (lit, neg) = match &expr.kind {
hir::PatExprKind::Path(qpath) => {
return self.lower_path(qpath, expr.hir_id, expr.span).kind;
@ -641,7 +645,31 @@ impl<'a, 'tcx> PatCtxt<'a, 'tcx> {
hir::PatExprKind::Lit { lit, negated } => (lit, *negated),
};
let ct_ty = self.typeck_results.node_type(expr.hir_id);
// We handle byte string literal patterns by using the pattern's type instead of the
// literal's type in `const_to_pat`: if the literal `b"..."` matches on a slice reference,
// the pattern's type will be `&[u8]` whereas the literal's type is `&[u8; 3]`; using the
// pattern's type means we'll properly translate it to a slice reference pattern. This works
// because slices and arrays have the same valtree representation.
// HACK: As an exception, use the literal's type if `pat_ty` is `String`; this can happen if
// `string_deref_patterns` is enabled. There's a special case for that when lowering to MIR.
// FIXME(deref_patterns): This hack won't be necessary once `string_deref_patterns` is
// superseded by a more general implementation of deref patterns.
let ct_ty = match pat_ty {
Some(pat_ty)
if let ty::Adt(def, _) = *pat_ty.kind()
&& self.tcx.is_lang_item(def.did(), LangItem::String) =>
{
if !self.tcx.features().string_deref_patterns() {
span_bug!(
expr.span,
"matching on `String` went through without enabling string_deref_patterns"
);
}
self.typeck_results.node_type(expr.hir_id)
}
Some(pat_ty) => pat_ty,
None => self.typeck_results.node_type(expr.hir_id),
};
let lit_input = LitToConstInput { lit: &lit.node, ty: ct_ty, neg };
let constant = self.tcx.at(expr.span).lit_to_const(lit_input);
self.const_to_pat(constant, ct_ty, expr.hir_id, lit.span).kind

View File

@ -0,0 +1,53 @@
//! Tests that arrays and slices in constants aren't interchangeable when used as patterns.
#[derive(PartialEq, Eq)]
struct SomeStruct<T: ?Sized>(T);
const BSTR_SIZED: &'static [u8; 3] = b"012";
const BSTR_UNSIZED: &'static [u8] = BSTR_SIZED;
const STRUCT_SIZED: &'static SomeStruct<[u8; 3]> = &SomeStruct(*BSTR_SIZED);
const STRUCT_UNSIZED: &'static SomeStruct<[u8]> = STRUCT_SIZED;
fn type_mismatches() {
// Test that array consts can't be used where a slice pattern is expected. This helps ensure
// that `const_to_pat` won't produce irrefutable `thir::PatKind::Array` patterns when matching
// on slices, which would result in missing length checks.
// See also `tests/ui/match/pattern-deref-miscompile.rs`, which tests that byte string literal
// patterns check slices' length appropriately when matching on slices.
match BSTR_UNSIZED {
BSTR_SIZED => {}
//~^ ERROR: mismatched types
_ => {}
}
match STRUCT_UNSIZED {
STRUCT_SIZED => {}
//~^ ERROR: mismatched types
_ => {}
}
// Test that slice consts can't be used where an array pattern is expected.
match BSTR_UNSIZED {
BSTR_SIZED => {}
//~^ ERROR: mismatched types
_ => {}
}
// If the types matched here, this would still error, since unsized structs aren't permitted in
// constant patterns. See the `invalid_patterns` test below.
match STRUCT_UNSIZED {
STRUCT_SIZED => {}
//~^ ERROR: mismatched types
_ => {}
}
}
fn invalid_patterns() {
// Test that unsized structs containing slices can't be used as patterns.
// See `tests/ui/consts/issue-87046.rs` for an example with `str`.
match STRUCT_UNSIZED {
STRUCT_UNSIZED => {}
//~^ ERROR: cannot use unsized non-slice type `SomeStruct<[u8]>` in constant patterns
_ => {}
}
}
fn main() {}

View File

@ -0,0 +1,84 @@
error[E0308]: mismatched types
--> $DIR/arrays-and-slices.rs:18:9
|
LL | const BSTR_SIZED: &'static [u8; 3] = b"012";
| ---------------------------------- constant defined here
...
LL | match BSTR_UNSIZED {
| ------------ this expression has type `&[u8]`
LL | BSTR_SIZED => {}
| ^^^^^^^^^^
| |
| expected `&[u8]`, found `&[u8; 3]`
| `BSTR_SIZED` is interpreted as a constant, not a new binding
| help: introduce a new binding instead: `other_bstr_sized`
|
= note: expected reference `&[u8]`
found reference `&'static [u8; 3]`
error[E0308]: mismatched types
--> $DIR/arrays-and-slices.rs:23:9
|
LL | const STRUCT_SIZED: &'static SomeStruct<[u8; 3]> = &SomeStruct(*BSTR_SIZED);
| ------------------------------------------------ constant defined here
...
LL | match STRUCT_UNSIZED {
| -------------- this expression has type `&SomeStruct<[u8]>`
LL | STRUCT_SIZED => {}
| ^^^^^^^^^^^^
| |
| expected `&SomeStruct<[u8]>`, found `&SomeStruct<[u8; 3]>`
| `STRUCT_SIZED` is interpreted as a constant, not a new binding
| help: introduce a new binding instead: `other_struct_sized`
|
= note: expected reference `&SomeStruct<[u8]>`
found reference `&'static SomeStruct<[u8; 3]>`
error[E0308]: mismatched types
--> $DIR/arrays-and-slices.rs:30:9
|
LL | const BSTR_SIZED: &'static [u8; 3] = b"012";
| ---------------------------------- constant defined here
...
LL | match BSTR_UNSIZED {
| ------------ this expression has type `&[u8]`
LL | BSTR_SIZED => {}
| ^^^^^^^^^^
| |
| expected `&[u8]`, found `&[u8; 3]`
| `BSTR_SIZED` is interpreted as a constant, not a new binding
| help: introduce a new binding instead: `other_bstr_sized`
|
= note: expected reference `&[u8]`
found reference `&'static [u8; 3]`
error[E0308]: mismatched types
--> $DIR/arrays-and-slices.rs:37:9
|
LL | const STRUCT_SIZED: &'static SomeStruct<[u8; 3]> = &SomeStruct(*BSTR_SIZED);
| ------------------------------------------------ constant defined here
...
LL | match STRUCT_UNSIZED {
| -------------- this expression has type `&SomeStruct<[u8]>`
LL | STRUCT_SIZED => {}
| ^^^^^^^^^^^^
| |
| expected `&SomeStruct<[u8]>`, found `&SomeStruct<[u8; 3]>`
| `STRUCT_SIZED` is interpreted as a constant, not a new binding
| help: introduce a new binding instead: `other_struct_sized`
|
= note: expected reference `&SomeStruct<[u8]>`
found reference `&'static SomeStruct<[u8; 3]>`
error: cannot use unsized non-slice type `SomeStruct<[u8]>` in constant patterns
--> $DIR/arrays-and-slices.rs:47:9
|
LL | const STRUCT_UNSIZED: &'static SomeStruct<[u8]> = STRUCT_SIZED;
| ----------------------------------------------- constant defined here
...
LL | STRUCT_UNSIZED => {}
| ^^^^^^^^^^^^^^
error: aborting due to 5 previous errors
For more information about this error, try `rustc --explain E0308`.