From afe238f66fc9348b8c4a1daacefb93c0ec36296a Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 17 Apr 2024 09:59:27 +1000 Subject: [PATCH] Introduce `InvisibleOrigin` on invisible delimiters. It's not used meaningfully yet, but will be needed to get rid of interpolated tokens. --- compiler/rustc_ast/src/attr/mod.rs | 4 +- compiler/rustc_ast/src/token.rs | 114 +++++++++++++++++- compiler/rustc_ast/src/tokenstream.rs | 6 +- compiler/rustc_ast_pretty/src/pprust/state.rs | 5 +- compiler/rustc_expand/src/mbe/macro_rules.rs | 2 +- compiler/rustc_expand/src/mbe/quoted.rs | 11 +- .../rustc_expand/src/proc_macro_server.rs | 4 +- .../rustc_parse/src/parser/attr_wrapper.rs | 4 +- compiler/rustc_parse/src/parser/mod.rs | 12 +- src/tools/rustfmt/src/macros.rs | 2 +- 10 files changed, 133 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 54e826585d2..888b13efa31 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -457,7 +457,7 @@ impl MetaItemKind { tokens: &mut impl Iterator, ) -> Option { match tokens.next() { - Some(TokenTree::Delimited(.., Delimiter::Invisible, inner_tokens)) => { + Some(TokenTree::Delimited(.., Delimiter::Invisible(_), inner_tokens)) => { MetaItemKind::name_value_from_tokens(&mut inner_tokens.trees()) } Some(TokenTree::Token(token, _)) => { @@ -605,7 +605,7 @@ impl MetaItemInner { tokens.next(); return Some(MetaItemInner::Lit(lit)); } - Some(TokenTree::Delimited(.., Delimiter::Invisible, inner_tokens)) => { + Some(TokenTree::Delimited(.., Delimiter::Invisible(_), inner_tokens)) => { tokens.next(); return MetaItemInner::from_tokens(&mut inner_tokens.trees().peekable()); } diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index 3b9edef0615..592b56cc08f 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -42,11 +42,86 @@ pub enum BinOpToken { Shr, } +// This type must not implement `Hash` due to the unusual `PartialEq` impl below. +#[derive(Copy, Clone, Debug, Encodable, Decodable, HashStable_Generic)] +pub enum InvisibleOrigin { + // From the expansion of a metavariable in a declarative macro. + MetaVar(MetaVarKind), + + // Converted from `proc_macro::Delimiter` in + // `proc_macro::Delimiter::to_internal`, i.e. returned by a proc macro. + ProcMacro, + + // Converted from `TokenKind::Interpolated` in + // `TokenStream::flatten_token`. Treated similarly to `ProcMacro`. + FlattenToken, +} + +impl PartialEq for InvisibleOrigin { + #[inline] + fn eq(&self, _other: &InvisibleOrigin) -> bool { + // When we had AST-based nonterminals we couldn't compare them, and the + // old `Nonterminal` type had an `eq` that always returned false, + // resulting in this restriction: + // https://doc.rust-lang.org/nightly/reference/macros-by-example.html#forwarding-a-matched-fragment + // This `eq` emulates that behaviour. We could consider lifting this + // restriction now but there are still cases involving invisible + // delimiters that make it harder than it first appears. + false + } +} + +/// Annoyingly similar to `NonterminalKind`, but the slight differences are important. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)] +pub enum MetaVarKind { + Item, + Block, + Stmt, + Pat(NtPatKind), + Expr { + kind: NtExprKind, + // This field is needed for `Token::can_begin_literal_maybe_minus`. + can_begin_literal_maybe_minus: bool, + // This field is needed for `Token::can_begin_string_literal`. + can_begin_string_literal: bool, + }, + Ty, + Ident, + Lifetime, + Literal, + Meta, + Path, + Vis, + TT, +} + +impl fmt::Display for MetaVarKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let sym = match self { + MetaVarKind::Item => sym::item, + MetaVarKind::Block => sym::block, + MetaVarKind::Stmt => sym::stmt, + MetaVarKind::Pat(PatParam { inferred: true } | PatWithOr) => sym::pat, + MetaVarKind::Pat(PatParam { inferred: false }) => sym::pat_param, + MetaVarKind::Expr { kind: Expr2021 { inferred: true } | Expr, .. } => sym::expr, + MetaVarKind::Expr { kind: Expr2021 { inferred: false }, .. } => sym::expr_2021, + MetaVarKind::Ty => sym::ty, + MetaVarKind::Ident => sym::ident, + MetaVarKind::Lifetime => sym::lifetime, + MetaVarKind::Literal => sym::literal, + MetaVarKind::Meta => sym::meta, + MetaVarKind::Path => sym::path, + MetaVarKind::Vis => sym::vis, + MetaVarKind::TT => sym::tt, + }; + write!(f, "{sym}") + } +} + /// Describes how a sequence of token trees is delimited. /// Cannot use `proc_macro::Delimiter` directly because this /// structure should implement some additional traits. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -#[derive(Encodable, Decodable, Hash, HashStable_Generic)] +#[derive(Copy, Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)] pub enum Delimiter { /// `( ... )` Parenthesis, @@ -59,7 +134,34 @@ pub enum Delimiter { /// "macro variable" `$var`. It is important to preserve operator priorities in cases like /// `$var * 3` where `$var` is `1 + 2`. /// Invisible delimiters might not survive roundtrip of a token stream through a string. - Invisible, + Invisible(InvisibleOrigin), +} + +impl Delimiter { + // Should the parser skip these delimiters? Only happens for certain kinds + // of invisible delimiters. Ideally this function will eventually disappear + // and no invisible delimiters will be skipped. + #[inline] + pub fn skip(&self) -> bool { + match self { + Delimiter::Parenthesis | Delimiter::Bracket | Delimiter::Brace => false, + Delimiter::Invisible(InvisibleOrigin::MetaVar(_)) => false, + Delimiter::Invisible(InvisibleOrigin::FlattenToken | InvisibleOrigin::ProcMacro) => { + true + } + } + } + + // This exists because `InvisibleOrigin`s should be compared. It is only used for assertions. + pub fn eq_ignoring_invisible_origin(&self, other: &Delimiter) -> bool { + match (self, other) { + (Delimiter::Parenthesis, Delimiter::Parenthesis) => true, + (Delimiter::Brace, Delimiter::Brace) => true, + (Delimiter::Bracket, Delimiter::Bracket) => true, + (Delimiter::Invisible(_), Delimiter::Invisible(_)) => true, + _ => false, + } + } } // Note that the suffix is *not* considered when deciding the `LitKind` in this @@ -896,7 +998,7 @@ impl PartialEq for Token { } } -#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)] pub enum NtPatKind { // Matches or-patterns. Was written using `pat` in edition 2021 or later. PatWithOr, @@ -906,7 +1008,7 @@ pub enum NtPatKind { PatParam { inferred: bool }, } -#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)] pub enum NtExprKind { // Matches expressions using the post-edition 2024. Was written using // `expr` in edition 2024 or later. @@ -933,7 +1035,7 @@ pub enum Nonterminal { NtVis(P), } -#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)] pub enum NonterminalKind { Item, Block, diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 0b4bfc0b36a..c6b6addc946 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -24,7 +24,7 @@ use rustc_span::{DUMMY_SP, Span, SpanDecoder, SpanEncoder, Symbol, sym}; use crate::ast::{AttrStyle, StmtKind}; use crate::ast_traits::{HasAttrs, HasTokens}; -use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind}; +use crate::token::{self, Delimiter, InvisibleOrigin, Nonterminal, Token, TokenKind}; use crate::{AttrVec, Attribute}; /// Part of a `TokenStream`. @@ -484,13 +484,13 @@ impl TokenStream { token::NtLifetime(ident, is_raw) => TokenTree::Delimited( DelimSpan::from_single(token.span), DelimSpacing::new(Spacing::JointHidden, spacing), - Delimiter::Invisible, + Delimiter::Invisible(InvisibleOrigin::FlattenToken), TokenStream::token_alone(token::Lifetime(ident.name, is_raw), ident.span), ), token::Interpolated(ref nt) => TokenTree::Delimited( DelimSpan::from_single(token.span), DelimSpacing::new(Spacing::JointHidden, spacing), - Delimiter::Invisible, + Delimiter::Invisible(InvisibleOrigin::FlattenToken), TokenStream::from_nonterminal_ast(&nt).flattened(), ), _ => TokenTree::Token(token.clone(), spacing), diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index de9f5187be7..d7c531f3760 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -942,9 +942,8 @@ pub trait PrintState<'a>: std::ops::Deref + std::ops::Dere token::CloseDelim(Delimiter::Bracket) => "]".into(), token::OpenDelim(Delimiter::Brace) => "{".into(), token::CloseDelim(Delimiter::Brace) => "}".into(), - token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible) => { - "".into() - } + token::OpenDelim(Delimiter::Invisible(_)) + | token::CloseDelim(Delimiter::Invisible(_)) => "".into(), token::Pound => "#".into(), token::Dollar => "$".into(), token::Question => "?".into(), diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index fcc90c3ce0d..a373c753cc1 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -693,7 +693,7 @@ fn has_compile_error_macro(rhs: &mbe::TokenTree) -> bool { && let mbe::TokenTree::Token(bang) = bang && let TokenKind::Not = bang.kind && let mbe::TokenTree::Delimited(.., del) = args - && del.delim != Delimiter::Invisible + && !del.delim.skip() { true } else { diff --git a/compiler/rustc_expand/src/mbe/quoted.rs b/compiler/rustc_expand/src/mbe/quoted.rs index 1345f06d5ac..36094707fac 100644 --- a/compiler/rustc_expand/src/mbe/quoted.rs +++ b/compiler/rustc_expand/src/mbe/quoted.rs @@ -165,11 +165,12 @@ fn parse_tree<'a>( // during parsing. let mut next = outer_trees.next(); let mut trees: Box>; - if let Some(tokenstream::TokenTree::Delimited(.., Delimiter::Invisible, tts)) = next { - trees = Box::new(tts.trees()); - next = trees.next(); - } else { - trees = Box::new(outer_trees); + match next { + Some(tokenstream::TokenTree::Delimited(.., delim, tts)) if delim.skip() => { + trees = Box::new(tts.trees()); + next = trees.next(); + } + _ => trees = Box::new(outer_trees), } match next { diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 0dc35618ff8..263df235b3e 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -38,7 +38,7 @@ impl FromInternal for Delimiter { token::Delimiter::Parenthesis => Delimiter::Parenthesis, token::Delimiter::Brace => Delimiter::Brace, token::Delimiter::Bracket => Delimiter::Bracket, - token::Delimiter::Invisible => Delimiter::None, + token::Delimiter::Invisible(_) => Delimiter::None, } } } @@ -49,7 +49,7 @@ impl ToInternal for Delimiter { Delimiter::Parenthesis => token::Delimiter::Parenthesis, Delimiter::Brace => token::Delimiter::Brace, Delimiter::Bracket => token::Delimiter::Bracket, - Delimiter::None => token::Delimiter::Invisible, + Delimiter::None => token::Delimiter::Invisible(token::InvisibleOrigin::ProcMacro), } } } diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index c85d0bd05cb..434f71beac2 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -510,8 +510,8 @@ fn make_attr_token_stream( FlatToken::Token((Token { kind: TokenKind::CloseDelim(delim), span }, spacing)) => { let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap()); let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap(); - assert_eq!( - open_delim, delim, + assert!( + open_delim.eq_ignoring_invisible_origin(&delim), "Mismatched open/close delims: open={open_delim:?} close={span:?}" ); let dspan = DelimSpan::from_pair(open_sp, span); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 042ee96bbe8..226f29f9900 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -317,7 +317,7 @@ impl TokenCursor { spacing, delim, )); - if delim != Delimiter::Invisible { + if !delim.skip() { return (Token::new(token::OpenDelim(delim), sp.open), spacing.open); } // No open delimiter to return; continue on to the next iteration. @@ -326,7 +326,7 @@ impl TokenCursor { } else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() { // We have exhausted this token stream. Move back to its parent token stream. self.tree_cursor = tree_cursor; - if delim != Delimiter::Invisible { + if !delim.skip() { return (Token::new(token::CloseDelim(delim), span.close), spacing.close); } // No close delimiter to return; continue on to the next iteration. @@ -1163,7 +1163,7 @@ impl<'a> Parser<'a> { } debug_assert!(!matches!( next.0.kind, - token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible) + token::OpenDelim(delim) | token::CloseDelim(delim) if delim.skip() )); self.inlined_bump_with(next) } @@ -1187,7 +1187,7 @@ impl<'a> Parser<'a> { match tree { TokenTree::Token(token, _) => return looker(token), &TokenTree::Delimited(dspan, _, delim, _) => { - if delim != Delimiter::Invisible { + if !delim.skip() { return looker(&Token::new(token::OpenDelim(delim), dspan.open)); } } @@ -1197,7 +1197,7 @@ impl<'a> Parser<'a> { // The tree cursor lookahead went (one) past the end of the // current token tree. Try to return a close delimiter. if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last() - && delim != Delimiter::Invisible + && !delim.skip() { // We are not in the outermost token stream, so we have // delimiters. Also, those delimiters are not skipped. @@ -1216,7 +1216,7 @@ impl<'a> Parser<'a> { token = cursor.next().0; if matches!( token.kind, - token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible) + token::OpenDelim(delim) | token::CloseDelim(delim) if delim.skip() ) { continue; } diff --git a/src/tools/rustfmt/src/macros.rs b/src/tools/rustfmt/src/macros.rs index 5a35e115d8f..4083d9398f6 100644 --- a/src/tools/rustfmt/src/macros.rs +++ b/src/tools/rustfmt/src/macros.rs @@ -620,7 +620,7 @@ fn delim_token_to_str( ("{ ", " }") } } - Delimiter::Invisible => unreachable!(), + Delimiter::Invisible(_) => unreachable!(), }; if use_multiple_lines { let indent_str = shape.indent.to_string_with_newline(context.config);