From 97910580aadab067ef13b5d0094a57e124c743ea Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Thu, 5 Sep 2024 05:43:55 -0400 Subject: [PATCH] Add initial support for raw lifetimes --- compiler/rustc_ast/src/mut_visit.rs | 4 +- compiler/rustc_ast/src/token.rs | 18 +++++---- compiler/rustc_ast/src/tokenstream.rs | 4 +- compiler/rustc_ast_pretty/src/pprust/state.rs | 13 +++++-- compiler/rustc_expand/src/mbe/macro_parser.rs | 6 ++- compiler/rustc_expand/src/mbe/transcribe.rs | 4 +- .../rustc_expand/src/proc_macro_server.rs | 9 +++-- compiler/rustc_lexer/src/lib.rs | 13 ++++++- compiler/rustc_lint/messages.ftl | 4 ++ .../rustc_lint/src/context/diagnostics.rs | 4 ++ compiler/rustc_lint/src/lints.rs | 9 +++++ compiler/rustc_lint_defs/src/lib.rs | 2 + compiler/rustc_parse/src/lexer/mod.rs | 39 +++++++++++++++++-- compiler/rustc_parse/src/parser/expr.rs | 6 +-- compiler/rustc_parse/src/parser/mod.rs | 2 +- .../rustc_parse/src/parser/nonterminal.rs | 6 +-- compiler/rustc_parse/src/parser/pat.rs | 6 +-- compiler/rustc_parse/src/parser/ty.rs | 7 ++-- 18 files changed, 116 insertions(+), 40 deletions(-) diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs index 8a66894a356..7c8af6c4a55 100644 --- a/compiler/rustc_ast/src/mut_visit.rs +++ b/compiler/rustc_ast/src/mut_visit.rs @@ -752,7 +752,7 @@ fn visit_lazy_tts(vis: &mut T, lazy_tts: &mut Option(vis: &mut T, t: &mut Token) { let Token { kind, span } = t; match kind { - token::Ident(name, _ /*raw*/) | token::Lifetime(name) => { + token::Ident(name, _is_raw) | token::Lifetime(name, _is_raw) => { let mut ident = Ident::new(*name, *span); vis.visit_ident(&mut ident); *name = ident.name; @@ -762,7 +762,7 @@ pub fn visit_token(vis: &mut T, t: &mut Token) { token::NtIdent(ident, _is_raw) => { vis.visit_ident(ident); } - token::NtLifetime(ident) => { + token::NtLifetime(ident, _is_raw) => { vis.visit_ident(ident); } token::Interpolated(nt) => { diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index f1dddb3acac..a0082a41713 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -331,11 +331,11 @@ pub enum TokenKind { /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers. /// It's recommended to use `Token::(lifetime,uninterpolate,uninterpolated_span)` to /// treat regular and interpolated lifetime identifiers in the same way. - Lifetime(Symbol), + Lifetime(Symbol, IdentIsRaw), /// This identifier (and its span) is the lifetime passed to the /// declarative macro. The span in the surrounding `Token` is the span of /// the `lifetime` metavariable in the macro's RHS. - NtLifetime(Ident), + NtLifetime(Ident, IdentIsRaw), /// An embedded AST node, as produced by a macro. This only exists for /// historical reasons. We'd like to get rid of it, for multiple reasons. @@ -458,7 +458,7 @@ impl Token { /// if they keep spans or perform edition checks. pub fn uninterpolated_span(&self) -> Span { match self.kind { - NtIdent(ident, _) | NtLifetime(ident) => ident.span, + NtIdent(ident, _) | NtLifetime(ident, _) => ident.span, Interpolated(ref nt) => nt.use_span(), _ => self.span, } @@ -661,7 +661,9 @@ impl Token { pub fn uninterpolate(&self) -> Cow<'_, Token> { match self.kind { NtIdent(ident, is_raw) => Cow::Owned(Token::new(Ident(ident.name, is_raw), ident.span)), - NtLifetime(ident) => Cow::Owned(Token::new(Lifetime(ident.name), ident.span)), + NtLifetime(ident, is_raw) => { + Cow::Owned(Token::new(Lifetime(ident.name, is_raw), ident.span)) + } _ => Cow::Borrowed(self), } } @@ -679,11 +681,11 @@ impl Token { /// Returns a lifetime identifier if this token is a lifetime. #[inline] - pub fn lifetime(&self) -> Option { + pub fn lifetime(&self) -> Option<(Ident, IdentIsRaw)> { // We avoid using `Token::uninterpolate` here because it's slow. match self.kind { - Lifetime(name) => Some(Ident::new(name, self.span)), - NtLifetime(ident) => Some(ident), + Lifetime(name, is_raw) => Some((Ident::new(name, self.span), is_raw)), + NtLifetime(ident, is_raw) => Some((ident, is_raw)), _ => None, } } @@ -865,7 +867,7 @@ impl Token { _ => return None, }, SingleQuote => match joint.kind { - Ident(name, IdentIsRaw::No) => Lifetime(Symbol::intern(&format!("'{name}"))), + Ident(name, is_raw) => Lifetime(Symbol::intern(&format!("'{name}")), is_raw), _ => return None, }, diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 057b4455dca..fc1dd2caf68 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -482,11 +482,11 @@ impl TokenStream { token::NtIdent(ident, is_raw) => { TokenTree::Token(Token::new(token::Ident(ident.name, is_raw), ident.span), spacing) } - token::NtLifetime(ident) => TokenTree::Delimited( + token::NtLifetime(ident, is_raw) => TokenTree::Delimited( DelimSpan::from_single(token.span), DelimSpacing::new(Spacing::JointHidden, spacing), Delimiter::Invisible, - TokenStream::token_alone(token::Lifetime(ident.name), ident.span), + TokenStream::token_alone(token::Lifetime(ident.name, is_raw), ident.span), ), token::Interpolated(ref nt) => TokenTree::Delimited( DelimSpan::from_single(token.span), diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index c7ff39d23ed..3b1449d9a91 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -11,7 +11,9 @@ use std::borrow::Cow; use ast::TraitBoundModifiers; use rustc_ast::attr::AttrIdGenerator; use rustc_ast::ptr::P; -use rustc_ast::token::{self, BinOpToken, CommentKind, Delimiter, Nonterminal, Token, TokenKind}; +use rustc_ast::token::{ + self, BinOpToken, CommentKind, Delimiter, IdentIsRaw, Nonterminal, Token, TokenKind, +}; use rustc_ast::tokenstream::{Spacing, TokenStream, TokenTree}; use rustc_ast::util::classify; use rustc_ast::util::comments::{Comment, CommentStyle}; @@ -947,8 +949,13 @@ pub trait PrintState<'a>: std::ops::Deref + std::ops::Dere token::NtIdent(ident, is_raw) => { IdentPrinter::for_ast_ident(ident, is_raw.into()).to_string().into() } - token::Lifetime(name) => name.to_string().into(), - token::NtLifetime(ident) => ident.name.to_string().into(), + + token::Lifetime(name, IdentIsRaw::No) + | token::NtLifetime(Ident { name, .. }, IdentIsRaw::No) => name.to_string().into(), + token::Lifetime(name, IdentIsRaw::Yes) + | token::NtLifetime(Ident { name, .. }, IdentIsRaw::Yes) => { + format!("'r#{}", &name.as_str()[1..]).into() + } /* Other */ token::DocComment(comment_kind, attr_style, data) => { diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 9011d02da33..501a2417fcf 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -398,8 +398,10 @@ pub(crate) enum NamedMatch { fn token_name_eq(t1: &Token, t2: &Token) -> bool { if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) { ident1.name == ident2.name && is_raw1 == is_raw2 - } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) { - ident1.name == ident2.name + } else if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = + (t1.lifetime(), t2.lifetime()) + { + ident1.name == ident2.name && is_raw1 == is_raw2 } else { t1.kind == t2.kind } diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index b06910595bb..39489a8df1b 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -283,9 +283,9 @@ pub(super) fn transcribe<'a>( let kind = token::NtIdent(*ident, *is_raw); TokenTree::token_alone(kind, sp) } - MatchedSingle(ParseNtResult::Lifetime(ident)) => { + MatchedSingle(ParseNtResult::Lifetime(ident, is_raw)) => { marker.visit_span(&mut sp); - let kind = token::NtLifetime(*ident); + let kind = token::NtLifetime(*ident, *is_raw); TokenTree::token_alone(kind, sp) } MatchedSingle(ParseNtResult::Nt(nt)) => { diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 4ff5da1a4bd..5798bcedc22 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -229,15 +229,16 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec { + Lifetime(name, is_raw) => { let ident = symbol::Ident::new(name, span).without_first_quote(); trees.extend([ TokenTree::Punct(Punct { ch: b'\'', joint: true, span }), - TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }), + TokenTree::Ident(Ident { sym: ident.name, is_raw: is_raw.into(), span }), ]); } - NtLifetime(ident) => { - let stream = TokenStream::token_alone(token::Lifetime(ident.name), ident.span); + NtLifetime(ident, is_raw) => { + let stream = + TokenStream::token_alone(token::Lifetime(ident.name, is_raw), ident.span); trees.push(TokenTree::Group(Group { delimiter: pm::Delimiter::None, stream: Some(stream), diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 6561e416cfa..60aab668cba 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -97,6 +97,9 @@ pub enum TokenKind { /// and not the separator. UnknownPrefixLifetime, + /// `'r#lt`, which in edition < 2021 is split into several tokens: `'r # lt`. + RawLifetime, + /// Similar to the above, but *always* an error on every edition. This is used /// for emoji identifier recovery, as those are not meant to be ever accepted. InvalidPrefix, @@ -683,9 +686,17 @@ impl Cursor<'_> { return Literal { kind, suffix_start }; } + if self.first() == 'r' && self.second() == '#' && is_id_start(self.third()) { + // Eat "r" and `#`, and identifier start characters. + self.bump(); + self.bump(); + self.bump(); + self.eat_while(is_id_continue); + return RawLifetime; + } + // Either a lifetime or a character literal with // length greater than 1. - let starts_with_number = self.first().is_ascii_digit(); // Skip the literal contents. diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl index 759320b9eb6..f7d5c27baef 100644 --- a/compiler/rustc_lint/messages.ftl +++ b/compiler/rustc_lint/messages.ftl @@ -707,6 +707,10 @@ lint_range_endpoint_out_of_range = range endpoint is out of range for `{$ty}` lint_range_use_inclusive_range = use an inclusive range instead +lint_raw_prefix = prefix `'r` is reserved + .label = reserved prefix + .suggestion = insert whitespace here to avoid this being parsed as a prefix in Rust 2021 + lint_reason_must_be_string_literal = reason must be a string literal lint_reason_must_come_last = reason in lint attribute must come last diff --git a/compiler/rustc_lint/src/context/diagnostics.rs b/compiler/rustc_lint/src/context/diagnostics.rs index fd43afa1743..fbff210d87d 100644 --- a/compiler/rustc_lint/src/context/diagnostics.rs +++ b/compiler/rustc_lint/src/context/diagnostics.rs @@ -172,6 +172,10 @@ pub(super) fn decorate_lint(sess: &Session, diagnostic: BuiltinLintDiag, diag: & } .decorate_lint(diag); } + BuiltinLintDiag::RawPrefix(label_span) => { + lints::RawPrefix { label: label_span, suggestion: label_span.shrink_to_hi() } + .decorate_lint(diag); + } BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => { lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag); } diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs index 9050f36acba..8b62e716929 100644 --- a/compiler/rustc_lint/src/lints.rs +++ b/compiler/rustc_lint/src/lints.rs @@ -2772,6 +2772,15 @@ pub(crate) struct ReservedPrefix { pub prefix: String, } +#[derive(LintDiagnostic)] +#[diag(lint_raw_prefix)] +pub(crate) struct RawPrefix { + #[label] + pub label: Span, + #[suggestion(code = " ", applicability = "machine-applicable")] + pub suggestion: Span, +} + #[derive(LintDiagnostic)] #[diag(lint_unused_builtin_attribute)] pub(crate) struct UnusedBuiltinAttribute { diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs index 6ee33041623..2b2a67dc846 100644 --- a/compiler/rustc_lint_defs/src/lib.rs +++ b/compiler/rustc_lint_defs/src/lib.rs @@ -609,6 +609,8 @@ pub enum BuiltinLintDiag { LegacyDeriveHelpers(Span), OrPatternsBackCompat(Span, String), ReservedPrefix(Span, String), + /// `'r#` in edition < 2021. + RawPrefix(Span), TrailingMacro(bool, Ident), BreakWithLabelAndLoop(Span), UnicodeTextFlow(Span, String), diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index c0731cf31c3..a680e2a3bc2 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -13,7 +13,6 @@ use rustc_session::lint::builtin::{ }; use rustc_session::lint::BuiltinLintDiag; use rustc_session::parse::ParseSess; -use rustc_span::edition::Edition; use rustc_span::symbol::Symbol; use rustc_span::{BytePos, Pos, Span}; use tracing::debug; @@ -284,7 +283,41 @@ impl<'psess, 'src> StringReader<'psess, 'src> { .stash(span, StashKey::LifetimeIsChar); } let ident = Symbol::intern(lifetime_name); - token::Lifetime(ident) + token::Lifetime(ident, IdentIsRaw::No) + } + rustc_lexer::TokenKind::RawLifetime => { + self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); + + let ident_start = start + BytePos(3); + let prefix_span = self.mk_sp(start, ident_start); + + if prefix_span.at_least_rust_2021() { + let lifetime_name_without_tick = self.str_from(ident_start); + // Put the `'` back onto the lifetime name. + let mut lifetime_name = String::with_capacity(lifetime_name_without_tick.len() + 1); + lifetime_name.push('\''); + lifetime_name += lifetime_name_without_tick; + let sym = Symbol::intern(&lifetime_name); + + token::Lifetime(sym, IdentIsRaw::Yes) + } else { + // Otherwise, this is just `'r`. Warn about it though. + self.psess.buffer_lint( + RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, + prefix_span, + ast::CRATE_NODE_ID, + BuiltinLintDiag::RawPrefix(prefix_span), + ); + + // Reset the state so we just lex the `'r`. + let lt_start = start + BytePos(2); + self.pos = lt_start; + self.cursor = Cursor::new(&str_before[2 as usize..]); + + let lifetime_name = self.str_from(start); + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident, IdentIsRaw::No) + } } rustc_lexer::TokenKind::Semi => token::Semi, rustc_lexer::TokenKind::Comma => token::Comma, @@ -712,7 +745,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let expn_data = prefix_span.ctxt().outer_expn_data(); - if expn_data.edition >= Edition::Edition2021 { + if expn_data.edition.at_least_rust_2021() { // In Rust 2021, this is a hard error. let sugg = if prefix == "rb" { Some(errors::UnknownPrefixSugg::UseBr(prefix_span)) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 84684e808d9..ecc4cd96faf 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -2050,7 +2050,7 @@ impl<'a> Parser<'a> { }; // On an error path, eagerly consider a lifetime to be an unclosed character lit, if that // makes sense. - if let Some(ident) = self.token.lifetime() + if let Some((ident, IdentIsRaw::No)) = self.token.lifetime() && could_be_unclosed_char_literal(ident) { let lt = self.expect_lifetime(); @@ -2925,9 +2925,9 @@ impl<'a> Parser<'a> { } pub(crate) fn eat_label(&mut self) -> Option