From afe238f66fc9348b8c4a1daacefb93c0ec36296a Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 17 Apr 2024 09:59:27 +1000 Subject: [PATCH 1/4] Introduce `InvisibleOrigin` on invisible delimiters. It's not used meaningfully yet, but will be needed to get rid of interpolated tokens. --- compiler/rustc_ast/src/attr/mod.rs | 4 +- compiler/rustc_ast/src/token.rs | 114 +++++++++++++++++- compiler/rustc_ast/src/tokenstream.rs | 6 +- compiler/rustc_ast_pretty/src/pprust/state.rs | 5 +- compiler/rustc_expand/src/mbe/macro_rules.rs | 2 +- compiler/rustc_expand/src/mbe/quoted.rs | 11 +- .../rustc_expand/src/proc_macro_server.rs | 4 +- .../rustc_parse/src/parser/attr_wrapper.rs | 4 +- compiler/rustc_parse/src/parser/mod.rs | 12 +- src/tools/rustfmt/src/macros.rs | 2 +- 10 files changed, 133 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs index 54e826585d2..888b13efa31 100644 --- a/compiler/rustc_ast/src/attr/mod.rs +++ b/compiler/rustc_ast/src/attr/mod.rs @@ -457,7 +457,7 @@ impl MetaItemKind { tokens: &mut impl Iterator, ) -> Option { match tokens.next() { - Some(TokenTree::Delimited(.., Delimiter::Invisible, inner_tokens)) => { + Some(TokenTree::Delimited(.., Delimiter::Invisible(_), inner_tokens)) => { MetaItemKind::name_value_from_tokens(&mut inner_tokens.trees()) } Some(TokenTree::Token(token, _)) => { @@ -605,7 +605,7 @@ impl MetaItemInner { tokens.next(); return Some(MetaItemInner::Lit(lit)); } - Some(TokenTree::Delimited(.., Delimiter::Invisible, inner_tokens)) => { + Some(TokenTree::Delimited(.., Delimiter::Invisible(_), inner_tokens)) => { tokens.next(); return MetaItemInner::from_tokens(&mut inner_tokens.trees().peekable()); } diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index 3b9edef0615..592b56cc08f 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -42,11 +42,86 @@ pub enum BinOpToken { Shr, } +// This type must not implement `Hash` due to the unusual `PartialEq` impl below. +#[derive(Copy, Clone, Debug, Encodable, Decodable, HashStable_Generic)] +pub enum InvisibleOrigin { + // From the expansion of a metavariable in a declarative macro. + MetaVar(MetaVarKind), + + // Converted from `proc_macro::Delimiter` in + // `proc_macro::Delimiter::to_internal`, i.e. returned by a proc macro. + ProcMacro, + + // Converted from `TokenKind::Interpolated` in + // `TokenStream::flatten_token`. Treated similarly to `ProcMacro`. + FlattenToken, +} + +impl PartialEq for InvisibleOrigin { + #[inline] + fn eq(&self, _other: &InvisibleOrigin) -> bool { + // When we had AST-based nonterminals we couldn't compare them, and the + // old `Nonterminal` type had an `eq` that always returned false, + // resulting in this restriction: + // https://doc.rust-lang.org/nightly/reference/macros-by-example.html#forwarding-a-matched-fragment + // This `eq` emulates that behaviour. We could consider lifting this + // restriction now but there are still cases involving invisible + // delimiters that make it harder than it first appears. + false + } +} + +/// Annoyingly similar to `NonterminalKind`, but the slight differences are important. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)] +pub enum MetaVarKind { + Item, + Block, + Stmt, + Pat(NtPatKind), + Expr { + kind: NtExprKind, + // This field is needed for `Token::can_begin_literal_maybe_minus`. + can_begin_literal_maybe_minus: bool, + // This field is needed for `Token::can_begin_string_literal`. + can_begin_string_literal: bool, + }, + Ty, + Ident, + Lifetime, + Literal, + Meta, + Path, + Vis, + TT, +} + +impl fmt::Display for MetaVarKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let sym = match self { + MetaVarKind::Item => sym::item, + MetaVarKind::Block => sym::block, + MetaVarKind::Stmt => sym::stmt, + MetaVarKind::Pat(PatParam { inferred: true } | PatWithOr) => sym::pat, + MetaVarKind::Pat(PatParam { inferred: false }) => sym::pat_param, + MetaVarKind::Expr { kind: Expr2021 { inferred: true } | Expr, .. } => sym::expr, + MetaVarKind::Expr { kind: Expr2021 { inferred: false }, .. } => sym::expr_2021, + MetaVarKind::Ty => sym::ty, + MetaVarKind::Ident => sym::ident, + MetaVarKind::Lifetime => sym::lifetime, + MetaVarKind::Literal => sym::literal, + MetaVarKind::Meta => sym::meta, + MetaVarKind::Path => sym::path, + MetaVarKind::Vis => sym::vis, + MetaVarKind::TT => sym::tt, + }; + write!(f, "{sym}") + } +} + /// Describes how a sequence of token trees is delimited. /// Cannot use `proc_macro::Delimiter` directly because this /// structure should implement some additional traits. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -#[derive(Encodable, Decodable, Hash, HashStable_Generic)] +#[derive(Copy, Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)] pub enum Delimiter { /// `( ... )` Parenthesis, @@ -59,7 +134,34 @@ pub enum Delimiter { /// "macro variable" `$var`. It is important to preserve operator priorities in cases like /// `$var * 3` where `$var` is `1 + 2`. /// Invisible delimiters might not survive roundtrip of a token stream through a string. - Invisible, + Invisible(InvisibleOrigin), +} + +impl Delimiter { + // Should the parser skip these delimiters? Only happens for certain kinds + // of invisible delimiters. Ideally this function will eventually disappear + // and no invisible delimiters will be skipped. + #[inline] + pub fn skip(&self) -> bool { + match self { + Delimiter::Parenthesis | Delimiter::Bracket | Delimiter::Brace => false, + Delimiter::Invisible(InvisibleOrigin::MetaVar(_)) => false, + Delimiter::Invisible(InvisibleOrigin::FlattenToken | InvisibleOrigin::ProcMacro) => { + true + } + } + } + + // This exists because `InvisibleOrigin`s should be compared. It is only used for assertions. + pub fn eq_ignoring_invisible_origin(&self, other: &Delimiter) -> bool { + match (self, other) { + (Delimiter::Parenthesis, Delimiter::Parenthesis) => true, + (Delimiter::Brace, Delimiter::Brace) => true, + (Delimiter::Bracket, Delimiter::Bracket) => true, + (Delimiter::Invisible(_), Delimiter::Invisible(_)) => true, + _ => false, + } + } } // Note that the suffix is *not* considered when deciding the `LitKind` in this @@ -896,7 +998,7 @@ impl PartialEq for Token { } } -#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)] pub enum NtPatKind { // Matches or-patterns. Was written using `pat` in edition 2021 or later. PatWithOr, @@ -906,7 +1008,7 @@ pub enum NtPatKind { PatParam { inferred: bool }, } -#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)] pub enum NtExprKind { // Matches expressions using the post-edition 2024. Was written using // `expr` in edition 2024 or later. @@ -933,7 +1035,7 @@ pub enum Nonterminal { NtVis(P), } -#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Encodable, Decodable, Hash, HashStable_Generic)] pub enum NonterminalKind { Item, Block, diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 0b4bfc0b36a..c6b6addc946 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -24,7 +24,7 @@ use rustc_span::{DUMMY_SP, Span, SpanDecoder, SpanEncoder, Symbol, sym}; use crate::ast::{AttrStyle, StmtKind}; use crate::ast_traits::{HasAttrs, HasTokens}; -use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind}; +use crate::token::{self, Delimiter, InvisibleOrigin, Nonterminal, Token, TokenKind}; use crate::{AttrVec, Attribute}; /// Part of a `TokenStream`. @@ -484,13 +484,13 @@ impl TokenStream { token::NtLifetime(ident, is_raw) => TokenTree::Delimited( DelimSpan::from_single(token.span), DelimSpacing::new(Spacing::JointHidden, spacing), - Delimiter::Invisible, + Delimiter::Invisible(InvisibleOrigin::FlattenToken), TokenStream::token_alone(token::Lifetime(ident.name, is_raw), ident.span), ), token::Interpolated(ref nt) => TokenTree::Delimited( DelimSpan::from_single(token.span), DelimSpacing::new(Spacing::JointHidden, spacing), - Delimiter::Invisible, + Delimiter::Invisible(InvisibleOrigin::FlattenToken), TokenStream::from_nonterminal_ast(&nt).flattened(), ), _ => TokenTree::Token(token.clone(), spacing), diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index de9f5187be7..d7c531f3760 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -942,9 +942,8 @@ pub trait PrintState<'a>: std::ops::Deref + std::ops::Dere token::CloseDelim(Delimiter::Bracket) => "]".into(), token::OpenDelim(Delimiter::Brace) => "{".into(), token::CloseDelim(Delimiter::Brace) => "}".into(), - token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible) => { - "".into() - } + token::OpenDelim(Delimiter::Invisible(_)) + | token::CloseDelim(Delimiter::Invisible(_)) => "".into(), token::Pound => "#".into(), token::Dollar => "$".into(), token::Question => "?".into(), diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index fcc90c3ce0d..a373c753cc1 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -693,7 +693,7 @@ fn has_compile_error_macro(rhs: &mbe::TokenTree) -> bool { && let mbe::TokenTree::Token(bang) = bang && let TokenKind::Not = bang.kind && let mbe::TokenTree::Delimited(.., del) = args - && del.delim != Delimiter::Invisible + && !del.delim.skip() { true } else { diff --git a/compiler/rustc_expand/src/mbe/quoted.rs b/compiler/rustc_expand/src/mbe/quoted.rs index 1345f06d5ac..36094707fac 100644 --- a/compiler/rustc_expand/src/mbe/quoted.rs +++ b/compiler/rustc_expand/src/mbe/quoted.rs @@ -165,11 +165,12 @@ fn parse_tree<'a>( // during parsing. let mut next = outer_trees.next(); let mut trees: Box>; - if let Some(tokenstream::TokenTree::Delimited(.., Delimiter::Invisible, tts)) = next { - trees = Box::new(tts.trees()); - next = trees.next(); - } else { - trees = Box::new(outer_trees); + match next { + Some(tokenstream::TokenTree::Delimited(.., delim, tts)) if delim.skip() => { + trees = Box::new(tts.trees()); + next = trees.next(); + } + _ => trees = Box::new(outer_trees), } match next { diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 0dc35618ff8..263df235b3e 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -38,7 +38,7 @@ impl FromInternal for Delimiter { token::Delimiter::Parenthesis => Delimiter::Parenthesis, token::Delimiter::Brace => Delimiter::Brace, token::Delimiter::Bracket => Delimiter::Bracket, - token::Delimiter::Invisible => Delimiter::None, + token::Delimiter::Invisible(_) => Delimiter::None, } } } @@ -49,7 +49,7 @@ impl ToInternal for Delimiter { Delimiter::Parenthesis => token::Delimiter::Parenthesis, Delimiter::Brace => token::Delimiter::Brace, Delimiter::Bracket => token::Delimiter::Bracket, - Delimiter::None => token::Delimiter::Invisible, + Delimiter::None => token::Delimiter::Invisible(token::InvisibleOrigin::ProcMacro), } } } diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index c85d0bd05cb..434f71beac2 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -510,8 +510,8 @@ fn make_attr_token_stream( FlatToken::Token((Token { kind: TokenKind::CloseDelim(delim), span }, spacing)) => { let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap()); let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap(); - assert_eq!( - open_delim, delim, + assert!( + open_delim.eq_ignoring_invisible_origin(&delim), "Mismatched open/close delims: open={open_delim:?} close={span:?}" ); let dspan = DelimSpan::from_pair(open_sp, span); diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 042ee96bbe8..226f29f9900 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -317,7 +317,7 @@ impl TokenCursor { spacing, delim, )); - if delim != Delimiter::Invisible { + if !delim.skip() { return (Token::new(token::OpenDelim(delim), sp.open), spacing.open); } // No open delimiter to return; continue on to the next iteration. @@ -326,7 +326,7 @@ impl TokenCursor { } else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() { // We have exhausted this token stream. Move back to its parent token stream. self.tree_cursor = tree_cursor; - if delim != Delimiter::Invisible { + if !delim.skip() { return (Token::new(token::CloseDelim(delim), span.close), spacing.close); } // No close delimiter to return; continue on to the next iteration. @@ -1163,7 +1163,7 @@ impl<'a> Parser<'a> { } debug_assert!(!matches!( next.0.kind, - token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible) + token::OpenDelim(delim) | token::CloseDelim(delim) if delim.skip() )); self.inlined_bump_with(next) } @@ -1187,7 +1187,7 @@ impl<'a> Parser<'a> { match tree { TokenTree::Token(token, _) => return looker(token), &TokenTree::Delimited(dspan, _, delim, _) => { - if delim != Delimiter::Invisible { + if !delim.skip() { return looker(&Token::new(token::OpenDelim(delim), dspan.open)); } } @@ -1197,7 +1197,7 @@ impl<'a> Parser<'a> { // The tree cursor lookahead went (one) past the end of the // current token tree. Try to return a close delimiter. if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last() - && delim != Delimiter::Invisible + && !delim.skip() { // We are not in the outermost token stream, so we have // delimiters. Also, those delimiters are not skipped. @@ -1216,7 +1216,7 @@ impl<'a> Parser<'a> { token = cursor.next().0; if matches!( token.kind, - token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible) + token::OpenDelim(delim) | token::CloseDelim(delim) if delim.skip() ) { continue; } diff --git a/src/tools/rustfmt/src/macros.rs b/src/tools/rustfmt/src/macros.rs index 5a35e115d8f..4083d9398f6 100644 --- a/src/tools/rustfmt/src/macros.rs +++ b/src/tools/rustfmt/src/macros.rs @@ -620,7 +620,7 @@ fn delim_token_to_str( ("{ ", " }") } } - Delimiter::Invisible => unreachable!(), + Delimiter::Invisible(_) => unreachable!(), }; if use_multiple_lines { let indent_str = shape.indent.to_string_with_newline(context.config); From cfafa9380b446b74708986a344172ec48e295740 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 18 Apr 2024 08:58:06 +1000 Subject: [PATCH 2/4] Add metavariables to `TokenDescription`. Pasted metavariables are wrapped in invisible delimiters, which pretty-print as empty strings, and changing that can break some proc macros. But error messages saying "expected identifer, found ``" are bad. So this commit adds support for metavariables in `TokenDescription` so they print as "metavariable" in error messages, instead of "``". It's not used meaningfully yet, but will be needed to get rid of interpolated tokens. --- compiler/rustc_parse/messages.ftl | 7 +++++ compiler/rustc_parse/src/errors.rs | 28 ++++++++++++++++-- compiler/rustc_parse/src/parser/mod.rs | 39 +++++++++++++++++--------- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/compiler/rustc_parse/messages.ftl b/compiler/rustc_parse/messages.ftl index ef259703f0c..cafd4b6dca2 100644 --- a/compiler/rustc_parse/messages.ftl +++ b/compiler/rustc_parse/messages.ftl @@ -216,6 +216,9 @@ parse_expected_identifier_found_doc_comment = expected identifier, found doc com parse_expected_identifier_found_doc_comment_str = expected identifier, found doc comment `{$token}` parse_expected_identifier_found_keyword = expected identifier, found keyword parse_expected_identifier_found_keyword_str = expected identifier, found keyword `{$token}` +parse_expected_identifier_found_metavar = expected identifier, found metavariable +# This one deliberately doesn't print a token. +parse_expected_identifier_found_metavar_str = expected identifier, found metavariable parse_expected_identifier_found_reserved_identifier = expected identifier, found reserved identifier parse_expected_identifier_found_reserved_identifier_str = expected identifier, found reserved identifier `{$token}` parse_expected_identifier_found_reserved_keyword = expected identifier, found reserved keyword @@ -227,6 +230,8 @@ parse_expected_mut_or_const_in_raw_pointer_type = expected `mut` or `const` keyw parse_expected_semi_found_doc_comment_str = expected `;`, found doc comment `{$token}` parse_expected_semi_found_keyword_str = expected `;`, found keyword `{$token}` +# This one deliberately doesn't print a token. +parse_expected_semi_found_metavar_str = expected `;`, found metavariable parse_expected_semi_found_reserved_identifier_str = expected `;`, found reserved identifier `{$token}` parse_expected_semi_found_reserved_keyword_str = expected `;`, found reserved keyword `{$token}` parse_expected_semi_found_str = expected `;`, found `{$token}` @@ -864,6 +869,8 @@ parse_unexpected_token_after_not_logical = use `!` to perform logical negation parse_unexpected_token_after_struct_name = expected `where`, `{"{"}`, `(`, or `;` after struct name parse_unexpected_token_after_struct_name_found_doc_comment = expected `where`, `{"{"}`, `(`, or `;` after struct name, found doc comment `{$token}` parse_unexpected_token_after_struct_name_found_keyword = expected `where`, `{"{"}`, `(`, or `;` after struct name, found keyword `{$token}` +# This one deliberately doesn't print a token. +parse_unexpected_token_after_struct_name_found_metavar = expected `where`, `{"{"}`, `(`, or `;` after struct name, found metavar parse_unexpected_token_after_struct_name_found_other = expected `where`, `{"{"}`, `(`, or `;` after struct name, found `{$token}` parse_unexpected_token_after_struct_name_found_reserved_identifier = expected `where`, `{"{"}`, `(`, or `;` after struct name, found reserved identifier `{$token}` diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index 37eb463cba6..9bdb99dc000 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -1086,6 +1086,8 @@ pub(crate) enum ExpectedIdentifierFound { ReservedKeyword(#[primary_span] Span), #[label(parse_expected_identifier_found_doc_comment)] DocComment(#[primary_span] Span), + #[label(parse_expected_identifier_found_metavar)] + MetaVar(#[primary_span] Span), #[label(parse_expected_identifier)] Other(#[primary_span] Span), } @@ -1099,6 +1101,7 @@ impl ExpectedIdentifierFound { Some(TokenDescription::Keyword) => ExpectedIdentifierFound::Keyword, Some(TokenDescription::ReservedKeyword) => ExpectedIdentifierFound::ReservedKeyword, Some(TokenDescription::DocComment) => ExpectedIdentifierFound::DocComment, + Some(TokenDescription::MetaVar(_)) => ExpectedIdentifierFound::MetaVar, None => ExpectedIdentifierFound::Other, })(span) } @@ -1117,6 +1120,7 @@ impl<'a, G: EmissionGuarantee> Diagnostic<'a, G> for ExpectedIdentifier { fn into_diag(self, dcx: DiagCtxtHandle<'a>, level: Level) -> Diag<'a, G> { let token_descr = TokenDescription::from_token(&self.token); + let mut add_token = true; let mut diag = Diag::new(dcx, level, match token_descr { Some(TokenDescription::ReservedIdentifier) => { fluent::parse_expected_identifier_found_reserved_identifier_str @@ -1128,10 +1132,16 @@ impl<'a, G: EmissionGuarantee> Diagnostic<'a, G> for ExpectedIdentifier { Some(TokenDescription::DocComment) => { fluent::parse_expected_identifier_found_doc_comment_str } + Some(TokenDescription::MetaVar(_)) => { + add_token = false; + fluent::parse_expected_identifier_found_metavar_str + } None => fluent::parse_expected_identifier_found_str, }); diag.span(self.span); - diag.arg("token", self.token); + if add_token { + diag.arg("token", self.token); + } if let Some(sugg) = self.suggest_raw { sugg.add_to_diag(&mut diag); @@ -1171,6 +1181,7 @@ impl<'a, G: EmissionGuarantee> Diagnostic<'a, G> for ExpectedSemi { fn into_diag(self, dcx: DiagCtxtHandle<'a>, level: Level) -> Diag<'a, G> { let token_descr = TokenDescription::from_token(&self.token); + let mut add_token = true; let mut diag = Diag::new(dcx, level, match token_descr { Some(TokenDescription::ReservedIdentifier) => { fluent::parse_expected_semi_found_reserved_identifier_str @@ -1180,10 +1191,16 @@ impl<'a, G: EmissionGuarantee> Diagnostic<'a, G> for ExpectedSemi { fluent::parse_expected_semi_found_reserved_keyword_str } Some(TokenDescription::DocComment) => fluent::parse_expected_semi_found_doc_comment_str, + Some(TokenDescription::MetaVar(_)) => { + add_token = false; + fluent::parse_expected_semi_found_metavar_str + } None => fluent::parse_expected_semi_found_str, }); diag.span(self.span); - diag.arg("token", self.token); + if add_token { + diag.arg("token", self.token); + } if let Some(unexpected_token_label) = self.unexpected_token_label { diag.span_label(unexpected_token_label, fluent::parse_label_unexpected_token); @@ -1925,6 +1942,12 @@ pub(crate) enum UnexpectedTokenAfterStructName { span: Span, token: Token, }, + #[diag(parse_unexpected_token_after_struct_name_found_metavar)] + MetaVar { + #[primary_span] + #[label(parse_unexpected_token_after_struct_name)] + span: Span, + }, #[diag(parse_unexpected_token_after_struct_name_found_other)] Other { #[primary_span] @@ -1941,6 +1964,7 @@ impl UnexpectedTokenAfterStructName { Some(TokenDescription::Keyword) => Self::Keyword { span, token }, Some(TokenDescription::ReservedKeyword) => Self::ReservedKeyword { span, token }, Some(TokenDescription::DocComment) => Self::DocComment { span, token }, + Some(TokenDescription::MetaVar(_)) => Self::MetaVar { span }, None => Self::Other { span, token }, } } diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 226f29f9900..0ed8d152d2d 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -21,7 +21,9 @@ pub(crate) use item::FnParseMode; pub use pat::{CommaRecoveryMode, RecoverColon, RecoverComma}; use path::PathStyle; use rustc_ast::ptr::P; -use rustc_ast::token::{self, Delimiter, IdentIsRaw, Nonterminal, Token, TokenKind}; +use rustc_ast::token::{ + self, Delimiter, IdentIsRaw, InvisibleOrigin, MetaVarKind, Nonterminal, Token, TokenKind, +}; use rustc_ast::tokenstream::{ AttrsTarget, DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree, TokenTreeCursor, }; @@ -410,6 +412,12 @@ pub(super) enum TokenDescription { Keyword, ReservedKeyword, DocComment, + + // Expanded metavariables are wrapped in invisible delimiters which aren't + // pretty-printed. In error messages we must handle these specially + // otherwise we get confusing things in messages like "expected `(`, found + // ``". It's better to say e.g. "expected `(`, found type metavariable". + MetaVar(MetaVarKind), } impl TokenDescription { @@ -419,26 +427,29 @@ impl TokenDescription { _ if token.is_used_keyword() => Some(TokenDescription::Keyword), _ if token.is_unused_keyword() => Some(TokenDescription::ReservedKeyword), token::DocComment(..) => Some(TokenDescription::DocComment), + token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(kind))) => { + Some(TokenDescription::MetaVar(kind)) + } _ => None, } } } pub fn token_descr(token: &Token) -> String { - let name = pprust::token_to_string(token).to_string(); + let s = pprust::token_to_string(token).to_string(); - let kind = match (TokenDescription::from_token(token), &token.kind) { - (Some(TokenDescription::ReservedIdentifier), _) => Some("reserved identifier"), - (Some(TokenDescription::Keyword), _) => Some("keyword"), - (Some(TokenDescription::ReservedKeyword), _) => Some("reserved keyword"), - (Some(TokenDescription::DocComment), _) => Some("doc comment"), - (None, TokenKind::NtIdent(..)) => Some("identifier"), - (None, TokenKind::NtLifetime(..)) => Some("lifetime"), - (None, TokenKind::Interpolated(node)) => Some(node.descr()), - (None, _) => None, - }; - - if let Some(kind) = kind { format!("{kind} `{name}`") } else { format!("`{name}`") } + match (TokenDescription::from_token(token), &token.kind) { + (Some(TokenDescription::ReservedIdentifier), _) => format!("reserved identifier `{s}`"), + (Some(TokenDescription::Keyword), _) => format!("keyword `{s}`"), + (Some(TokenDescription::ReservedKeyword), _) => format!("reserved keyword `{s}`"), + (Some(TokenDescription::DocComment), _) => format!("doc comment `{s}`"), + // Deliberately doesn't print `s`, which is empty. + (Some(TokenDescription::MetaVar(kind)), _) => format!("`{kind}` metavariable"), + (None, TokenKind::NtIdent(..)) => format!("identifier `{s}`"), + (None, TokenKind::NtLifetime(..)) => format!("lifetime `{s}`"), + (None, TokenKind::Interpolated(node)) => format!("{} `{s}`", node.descr()), + (None, _) => format!("`{s}`"), + } } impl<'a> Parser<'a> { From cee88f7a3f1c5bc284d0c6a87fb1d5535ebb3af8 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 17 Apr 2024 09:37:00 +1000 Subject: [PATCH 3/4] Prepare for invisible delimiters. Current places where `Interpolated` is used are going to change to instead use invisible delimiters. This prepares for that. - It adds invisible delimiter cases to the `can_begin_*`/`may_be_*` methods and the `failed_to_match_macro` that are equivalent to the existing `Interpolated` cases. - It adds panics/asserts in some places where invisible delimiters should never occur. - In `Parser::parse_struct_fields` it excludes an ident + invisible delimiter from special consideration in an error message, because that's quite different to an ident + paren/brace/bracket. --- compiler/rustc_ast/src/token.rs | 38 +++++++++++++- compiler/rustc_expand/src/mbe/diagnostics.rs | 6 ++- compiler/rustc_parse/src/lexer/tokentrees.rs | 16 ++++-- compiler/rustc_parse/src/parser/expr.rs | 12 ++++- .../rustc_parse/src/parser/nonterminal.rs | 50 +++++++++++++++++-- 5 files changed, 108 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index 592b56cc08f..678f43e3511 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -598,10 +598,11 @@ impl Token { /// **NB**: Take care when modifying this function, since it will change /// the stable set of tokens that are allowed to match an expr nonterminal. pub fn can_begin_expr(&self) -> bool { + use Delimiter::*; match self.uninterpolate().kind { Ident(name, is_raw) => ident_can_begin_expr(name, self.span, is_raw), // value name or keyword - OpenDelim(..) | // tuple, array or block + OpenDelim(Parenthesis | Brace | Bracket) | // tuple, array or block Literal(..) | // literal Not | // operator not BinOp(Minus) | // unary minus @@ -612,7 +613,7 @@ impl Token { // DotDotDot is no longer supported, but we need some way to display the error DotDot | DotDotDot | DotDotEq | // range notation Lt | BinOp(Shl) | // associated path - PathSep | // global path + PathSep | // global path Lifetime(..) | // labeled loop Pound => true, // expression attributes Interpolated(ref nt) => @@ -622,6 +623,12 @@ impl Token { NtLiteral(..) | NtPath(..) ), + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar( + MetaVarKind::Block | + MetaVarKind::Expr { .. } | + MetaVarKind::Literal | + MetaVarKind::Path + ))) => true, _ => false, } } @@ -655,6 +662,14 @@ impl Token { | NtPath(..) | NtTy(..) ), + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar( + MetaVarKind::Expr { .. } | + MetaVarKind::Literal | + MetaVarKind::Meta | + MetaVarKind::Pat(_) | + MetaVarKind::Path | + MetaVarKind::Ty + ))) => true, _ => false, } } @@ -675,6 +690,10 @@ impl Token { Lt | BinOp(Shl) | // associated path PathSep => true, // global path Interpolated(ref nt) => matches!(&**nt, NtTy(..) | NtPath(..)), + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar( + MetaVarKind::Ty | + MetaVarKind::Path + ))) => true, // For anonymous structs or unions, which only appear in specific positions // (type of struct fields or union fields), we don't consider them as regular types _ => false, @@ -687,6 +706,9 @@ impl Token { OpenDelim(Delimiter::Brace) | Literal(..) | BinOp(Minus) => true, Ident(name, IdentIsRaw::No) if name.is_bool_lit() => true, Interpolated(ref nt) => matches!(&**nt, NtExpr(..) | NtBlock(..) | NtLiteral(..)), + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar( + MetaVarKind::Expr { .. } | MetaVarKind::Block | MetaVarKind::Literal, + ))) => true, _ => false, } } @@ -743,6 +765,13 @@ impl Token { }, _ => false, }, + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind))) => match mv_kind { + MetaVarKind::Literal => true, + MetaVarKind::Expr { can_begin_literal_maybe_minus, .. } => { + can_begin_literal_maybe_minus + } + _ => false, + }, _ => false, } } @@ -758,6 +787,11 @@ impl Token { }, _ => false, }, + OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind))) => match mv_kind { + MetaVarKind::Literal => true, + MetaVarKind::Expr { can_begin_string_literal, .. } => can_begin_string_literal, + _ => false, + }, _ => false, } } diff --git a/compiler/rustc_expand/src/mbe/diagnostics.rs b/compiler/rustc_expand/src/mbe/diagnostics.rs index ffcce1e52f6..77b8d228922 100644 --- a/compiler/rustc_expand/src/mbe/diagnostics.rs +++ b/compiler/rustc_expand/src/mbe/diagnostics.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use rustc_ast::token::{self, Token, TokenKind}; +use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::tokenstream::TokenStream; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, DiagMessage}; use rustc_macros::Subdiagnostic; @@ -68,7 +68,9 @@ pub(super) fn failed_to_match_macro( if let MatcherLoc::Token { token: expected_token } = &remaining_matcher && (matches!(expected_token.kind, TokenKind::Interpolated(_)) - || matches!(token.kind, TokenKind::Interpolated(_))) + || matches!(token.kind, TokenKind::Interpolated(_)) + || matches!(expected_token.kind, TokenKind::OpenDelim(Delimiter::Invisible(_))) + || matches!(token.kind, TokenKind::OpenDelim(Delimiter::Invisible(_)))) { err.note("captured metavariables except for `:tt`, `:ident` and `:lifetime` cannot be compared to other tokens"); err.note("see for more information"); diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index d35c9c7bae7..7b21ffacc84 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -43,11 +43,19 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> { let mut buf = Vec::new(); loop { match self.token.kind { - token::OpenDelim(delim) => buf.push(match self.lex_token_tree_open_delim(delim) { - Ok(val) => val, - Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)), - }), + token::OpenDelim(delim) => { + // Invisible delimiters cannot occur here because `TokenTreesReader` parses + // code directly from strings, with no macro expansion involved. + debug_assert!(!matches!(delim, Delimiter::Invisible(_))); + buf.push(match self.lex_token_tree_open_delim(delim) { + Ok(val) => val, + Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)), + }) + } token::CloseDelim(delim) => { + // Invisible delimiters cannot occur here because `TokenTreesReader` parses + // code directly from strings, with no macro expansion involved. + debug_assert!(!matches!(delim, Delimiter::Invisible(_))); return ( open_spacing, TokenStream::new(buf), diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 0012db471ef..b110d891c84 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -3591,11 +3591,19 @@ impl<'a> Parser<'a> { && !self.token.is_reserved_ident() && self.look_ahead(1, |t| { AssocOp::from_token(t).is_some() - || matches!(t.kind, token::OpenDelim(_)) + || matches!( + t.kind, + token::OpenDelim( + Delimiter::Parenthesis + | Delimiter::Bracket + | Delimiter::Brace + ) + ) || *t == token::Dot }) { - // Looks like they tried to write a shorthand, complex expression. + // Looks like they tried to write a shorthand, complex expression, + // E.g.: `n + m`, `f(a)`, `a[i]`, `S { x: 3 }`, or `x.y`. e.span_suggestion_verbose( self.token.span.shrink_to_lo(), "try naming a field", diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index 43c3de90d9d..8fb6f85d0dd 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -3,7 +3,9 @@ use rustc_ast::ptr::P; use rustc_ast::token::Nonterminal::*; use rustc_ast::token::NtExprKind::*; use rustc_ast::token::NtPatKind::*; -use rustc_ast::token::{self, Delimiter, NonterminalKind, Token}; +use rustc_ast::token::{ + self, Delimiter, InvisibleOrigin, MetaVarKind, Nonterminal, NonterminalKind, Token, +}; use rustc_ast_pretty::pprust; use rustc_data_structures::sync::Lrc; use rustc_errors::PResult; @@ -22,7 +24,28 @@ impl<'a> Parser<'a> { #[inline] pub fn nonterminal_may_begin_with(kind: NonterminalKind, token: &Token) -> bool { /// Checks whether the non-terminal may contain a single (non-keyword) identifier. - fn may_be_ident(nt: &token::Nonterminal) -> bool { + fn may_be_ident(kind: MetaVarKind) -> bool { + match kind { + MetaVarKind::Stmt + | MetaVarKind::Pat(_) + | MetaVarKind::Expr { .. } + | MetaVarKind::Ty + | MetaVarKind::Literal // `true`, `false` + | MetaVarKind::Meta + | MetaVarKind::Path => true, + + MetaVarKind::Item + | MetaVarKind::Block + | MetaVarKind::Vis => false, + + MetaVarKind::Ident + | MetaVarKind::Lifetime + | MetaVarKind::TT => unreachable!(), + } + } + + /// Old variant of `may_be_ident`. Being phased out. + fn nt_may_be_ident(nt: &Nonterminal) -> bool { match nt { NtStmt(_) | NtPat(_) @@ -69,7 +92,8 @@ impl<'a> Parser<'a> { | token::Ident(..) | token::NtIdent(..) | token::NtLifetime(..) - | token::Interpolated(_) => true, + | token::Interpolated(_) + | token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(_))) => true, _ => token.can_begin_type(), }, NonterminalKind::Block => match &token.kind { @@ -79,11 +103,29 @@ impl<'a> Parser<'a> { NtBlock(_) | NtStmt(_) | NtExpr(_) | NtLiteral(_) => true, NtItem(_) | NtPat(_) | NtTy(_) | NtMeta(_) | NtPath(_) | NtVis(_) => false, }, + token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(k))) => match k { + MetaVarKind::Block + | MetaVarKind::Stmt + | MetaVarKind::Expr { .. } + | MetaVarKind::Literal => true, + MetaVarKind::Item + | MetaVarKind::Pat(_) + | MetaVarKind::Ty + | MetaVarKind::Meta + | MetaVarKind::Path + | MetaVarKind::Vis => false, + MetaVarKind::Lifetime | MetaVarKind::Ident | MetaVarKind::TT => { + unreachable!() + } + }, _ => false, }, NonterminalKind::Path | NonterminalKind::Meta => match &token.kind { token::PathSep | token::Ident(..) | token::NtIdent(..) => true, - token::Interpolated(nt) => may_be_ident(nt), + token::Interpolated(nt) => nt_may_be_ident(nt), + token::OpenDelim(Delimiter::Invisible(InvisibleOrigin::MetaVar(kind))) => { + may_be_ident(*kind) + } _ => false, }, NonterminalKind::Pat(pat_kind) => token.can_begin_pattern(pat_kind), From 03159d4bff6de12f26daee0f6c6890e71407a389 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 20 Sep 2024 13:02:14 +1000 Subject: [PATCH 4/4] Remove `ErrorGuaranteed` retval from `error_unexpected_after_dot`. It was added in #130349, but it's not used meaningfully, and causes difficulties for Nonterminal removal in #124141. --- compiler/rustc_parse/src/parser/expr.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index b110d891c84..aa5e9586daf 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -49,7 +49,7 @@ pub(super) enum DestructuredFloat { /// 1.2 | 1.2e3 MiddleDot(Symbol, Span, Span, Symbol, Span), /// Invalid - Error(ErrorGuaranteed), + Error, } impl<'a> Parser<'a> { @@ -1005,7 +1005,7 @@ impl<'a> Parser<'a> { self.mk_expr_tuple_field_access(lo, ident1_span, base, sym1, None); self.mk_expr_tuple_field_access(lo, ident2_span, base1, sym2, suffix) } - DestructuredFloat::Error(_) => base, + DestructuredFloat::Error => base, }) } _ => { @@ -1015,7 +1015,7 @@ impl<'a> Parser<'a> { } } - fn error_unexpected_after_dot(&self) -> ErrorGuaranteed { + fn error_unexpected_after_dot(&self) { let actual = pprust::token_to_string(&self.token); let span = self.token.span; let sm = self.psess.source_map(); @@ -1025,7 +1025,7 @@ impl<'a> Parser<'a> { } _ => (span, actual), }; - self.dcx().emit_err(errors::UnexpectedTokenAfterDot { span, actual }) + self.dcx().emit_err(errors::UnexpectedTokenAfterDot { span, actual }); } /// We need an identifier or integer, but the next token is a float. @@ -1111,8 +1111,8 @@ impl<'a> Parser<'a> { // 1.2e+3 | 1.2e-3 [IdentLike(_), Punct('.'), IdentLike(_), Punct('+' | '-'), IdentLike(_)] => { // See the FIXME about `TokenCursor` above. - let guar = self.error_unexpected_after_dot(); - DestructuredFloat::Error(guar) + self.error_unexpected_after_dot(); + DestructuredFloat::Error } _ => panic!("unexpected components in a float token: {components:?}"), } @@ -1178,7 +1178,7 @@ impl<'a> Parser<'a> { fields.insert(start_idx, Ident::new(symbol2, span2)); fields.insert(start_idx, Ident::new(symbol1, span1)); } - DestructuredFloat::Error(_) => { + DestructuredFloat::Error => { trailing_dot = None; fields.insert(start_idx, Ident::new(symbol, self.prev_token.span)); }