diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs index 8a66894a356..7c8af6c4a55 100644 --- a/compiler/rustc_ast/src/mut_visit.rs +++ b/compiler/rustc_ast/src/mut_visit.rs @@ -752,7 +752,7 @@ fn visit_lazy_tts(vis: &mut T, lazy_tts: &mut Option(vis: &mut T, t: &mut Token) { let Token { kind, span } = t; match kind { - token::Ident(name, _ /*raw*/) | token::Lifetime(name) => { + token::Ident(name, _is_raw) | token::Lifetime(name, _is_raw) => { let mut ident = Ident::new(*name, *span); vis.visit_ident(&mut ident); *name = ident.name; @@ -762,7 +762,7 @@ pub fn visit_token(vis: &mut T, t: &mut Token) { token::NtIdent(ident, _is_raw) => { vis.visit_ident(ident); } - token::NtLifetime(ident) => { + token::NtLifetime(ident, _is_raw) => { vis.visit_ident(ident); } token::Interpolated(nt) => { diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index f1dddb3acac..a0082a41713 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -331,11 +331,11 @@ pub enum TokenKind { /// Do not forget about `NtLifetime` when you want to match on lifetime identifiers. /// It's recommended to use `Token::(lifetime,uninterpolate,uninterpolated_span)` to /// treat regular and interpolated lifetime identifiers in the same way. - Lifetime(Symbol), + Lifetime(Symbol, IdentIsRaw), /// This identifier (and its span) is the lifetime passed to the /// declarative macro. The span in the surrounding `Token` is the span of /// the `lifetime` metavariable in the macro's RHS. - NtLifetime(Ident), + NtLifetime(Ident, IdentIsRaw), /// An embedded AST node, as produced by a macro. This only exists for /// historical reasons. We'd like to get rid of it, for multiple reasons. @@ -458,7 +458,7 @@ impl Token { /// if they keep spans or perform edition checks. pub fn uninterpolated_span(&self) -> Span { match self.kind { - NtIdent(ident, _) | NtLifetime(ident) => ident.span, + NtIdent(ident, _) | NtLifetime(ident, _) => ident.span, Interpolated(ref nt) => nt.use_span(), _ => self.span, } @@ -661,7 +661,9 @@ impl Token { pub fn uninterpolate(&self) -> Cow<'_, Token> { match self.kind { NtIdent(ident, is_raw) => Cow::Owned(Token::new(Ident(ident.name, is_raw), ident.span)), - NtLifetime(ident) => Cow::Owned(Token::new(Lifetime(ident.name), ident.span)), + NtLifetime(ident, is_raw) => { + Cow::Owned(Token::new(Lifetime(ident.name, is_raw), ident.span)) + } _ => Cow::Borrowed(self), } } @@ -679,11 +681,11 @@ impl Token { /// Returns a lifetime identifier if this token is a lifetime. #[inline] - pub fn lifetime(&self) -> Option { + pub fn lifetime(&self) -> Option<(Ident, IdentIsRaw)> { // We avoid using `Token::uninterpolate` here because it's slow. match self.kind { - Lifetime(name) => Some(Ident::new(name, self.span)), - NtLifetime(ident) => Some(ident), + Lifetime(name, is_raw) => Some((Ident::new(name, self.span), is_raw)), + NtLifetime(ident, is_raw) => Some((ident, is_raw)), _ => None, } } @@ -865,7 +867,7 @@ impl Token { _ => return None, }, SingleQuote => match joint.kind { - Ident(name, IdentIsRaw::No) => Lifetime(Symbol::intern(&format!("'{name}"))), + Ident(name, is_raw) => Lifetime(Symbol::intern(&format!("'{name}")), is_raw), _ => return None, }, diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 057b4455dca..fc1dd2caf68 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -482,11 +482,11 @@ impl TokenStream { token::NtIdent(ident, is_raw) => { TokenTree::Token(Token::new(token::Ident(ident.name, is_raw), ident.span), spacing) } - token::NtLifetime(ident) => TokenTree::Delimited( + token::NtLifetime(ident, is_raw) => TokenTree::Delimited( DelimSpan::from_single(token.span), DelimSpacing::new(Spacing::JointHidden, spacing), Delimiter::Invisible, - TokenStream::token_alone(token::Lifetime(ident.name), ident.span), + TokenStream::token_alone(token::Lifetime(ident.name, is_raw), ident.span), ), token::Interpolated(ref nt) => TokenTree::Delimited( DelimSpan::from_single(token.span), diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs index c7ff39d23ed..3b1449d9a91 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state.rs @@ -11,7 +11,9 @@ use std::borrow::Cow; use ast::TraitBoundModifiers; use rustc_ast::attr::AttrIdGenerator; use rustc_ast::ptr::P; -use rustc_ast::token::{self, BinOpToken, CommentKind, Delimiter, Nonterminal, Token, TokenKind}; +use rustc_ast::token::{ + self, BinOpToken, CommentKind, Delimiter, IdentIsRaw, Nonterminal, Token, TokenKind, +}; use rustc_ast::tokenstream::{Spacing, TokenStream, TokenTree}; use rustc_ast::util::classify; use rustc_ast::util::comments::{Comment, CommentStyle}; @@ -947,8 +949,13 @@ pub trait PrintState<'a>: std::ops::Deref + std::ops::Dere token::NtIdent(ident, is_raw) => { IdentPrinter::for_ast_ident(ident, is_raw.into()).to_string().into() } - token::Lifetime(name) => name.to_string().into(), - token::NtLifetime(ident) => ident.name.to_string().into(), + + token::Lifetime(name, IdentIsRaw::No) + | token::NtLifetime(Ident { name, .. }, IdentIsRaw::No) => name.to_string().into(), + token::Lifetime(name, IdentIsRaw::Yes) + | token::NtLifetime(Ident { name, .. }, IdentIsRaw::Yes) => { + format!("'r#{}", &name.as_str()[1..]).into() + } /* Other */ token::DocComment(comment_kind, attr_style, data) => { diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 9011d02da33..501a2417fcf 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -398,8 +398,10 @@ pub(crate) enum NamedMatch { fn token_name_eq(t1: &Token, t2: &Token) -> bool { if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) { ident1.name == ident2.name && is_raw1 == is_raw2 - } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) { - ident1.name == ident2.name + } else if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = + (t1.lifetime(), t2.lifetime()) + { + ident1.name == ident2.name && is_raw1 == is_raw2 } else { t1.kind == t2.kind } diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index b06910595bb..39489a8df1b 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -283,9 +283,9 @@ pub(super) fn transcribe<'a>( let kind = token::NtIdent(*ident, *is_raw); TokenTree::token_alone(kind, sp) } - MatchedSingle(ParseNtResult::Lifetime(ident)) => { + MatchedSingle(ParseNtResult::Lifetime(ident, is_raw)) => { marker.visit_span(&mut sp); - let kind = token::NtLifetime(*ident); + let kind = token::NtLifetime(*ident, *is_raw); TokenTree::token_alone(kind, sp) } MatchedSingle(ParseNtResult::Nt(nt)) => { diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 4ff5da1a4bd..5798bcedc22 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -229,15 +229,16 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)> for Vec { + Lifetime(name, is_raw) => { let ident = symbol::Ident::new(name, span).without_first_quote(); trees.extend([ TokenTree::Punct(Punct { ch: b'\'', joint: true, span }), - TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }), + TokenTree::Ident(Ident { sym: ident.name, is_raw: is_raw.into(), span }), ]); } - NtLifetime(ident) => { - let stream = TokenStream::token_alone(token::Lifetime(ident.name), ident.span); + NtLifetime(ident, is_raw) => { + let stream = + TokenStream::token_alone(token::Lifetime(ident.name, is_raw), ident.span); trees.push(TokenTree::Group(Group { delimiter: pm::Delimiter::None, stream: Some(stream), diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 31fdd2d7cec..60aab668cba 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -91,6 +91,15 @@ pub enum TokenKind { /// tokens. UnknownPrefix, + /// An unknown prefix in a lifetime, like `'foo#`. + /// + /// Note that like above, only the `'` and prefix are included in the token + /// and not the separator. + UnknownPrefixLifetime, + + /// `'r#lt`, which in edition < 2021 is split into several tokens: `'r # lt`. + RawLifetime, + /// Similar to the above, but *always* an error on every edition. This is used /// for emoji identifier recovery, as those are not meant to be ever accepted. InvalidPrefix, @@ -677,9 +686,17 @@ impl Cursor<'_> { return Literal { kind, suffix_start }; } + if self.first() == 'r' && self.second() == '#' && is_id_start(self.third()) { + // Eat "r" and `#`, and identifier start characters. + self.bump(); + self.bump(); + self.bump(); + self.eat_while(is_id_continue); + return RawLifetime; + } + // Either a lifetime or a character literal with // length greater than 1. - let starts_with_number = self.first().is_ascii_digit(); // Skip the literal contents. @@ -688,15 +705,17 @@ impl Cursor<'_> { self.bump(); self.eat_while(is_id_continue); - // Check if after skipping literal contents we've met a closing - // single quote (which means that user attempted to create a - // string with single quotes). - if self.first() == '\'' { - self.bump(); - let kind = Char { terminated: true }; - Literal { kind, suffix_start: self.pos_within_token() } - } else { - Lifetime { starts_with_number } + match self.first() { + // Check if after skipping literal contents we've met a closing + // single quote (which means that user attempted to create a + // string with single quotes). + '\'' => { + self.bump(); + let kind = Char { terminated: true }; + Literal { kind, suffix_start: self.pos_within_token() } + } + '#' if !starts_with_number => UnknownPrefixLifetime, + _ => Lifetime { starts_with_number }, } } diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl index 5a368f4ed9a..7d4dee45c26 100644 --- a/compiler/rustc_lint/messages.ftl +++ b/compiler/rustc_lint/messages.ftl @@ -708,6 +708,10 @@ lint_range_endpoint_out_of_range = range endpoint is out of range for `{$ty}` lint_range_use_inclusive_range = use an inclusive range instead +lint_raw_prefix = prefix `'r` is reserved + .label = reserved prefix + .suggestion = insert whitespace here to avoid this being parsed as a prefix in Rust 2021 + lint_reason_must_be_string_literal = reason must be a string literal lint_reason_must_come_last = reason in lint attribute must come last diff --git a/compiler/rustc_lint/src/builtin.rs b/compiler/rustc_lint/src/builtin.rs index d8482567bbe..824a1868c55 100644 --- a/compiler/rustc_lint/src/builtin.rs +++ b/compiler/rustc_lint/src/builtin.rs @@ -1851,9 +1851,16 @@ impl KeywordIdents { TokenTree::Token(token, _) => { if let Some((ident, token::IdentIsRaw::No)) = token.ident() { if !prev_dollar { - self.check_ident_token(cx, UnderMacro(true), ident); + self.check_ident_token(cx, UnderMacro(true), ident, ""); } - } else if *token == TokenKind::Dollar { + } else if let Some((ident, token::IdentIsRaw::No)) = token.lifetime() { + self.check_ident_token( + cx, + UnderMacro(true), + ident.without_first_quote(), + "'", + ); + } else if token.kind == TokenKind::Dollar { prev_dollar = true; continue; } @@ -1869,6 +1876,7 @@ impl KeywordIdents { cx: &EarlyContext<'_>, UnderMacro(under_macro): UnderMacro, ident: Ident, + prefix: &'static str, ) { let (lint, edition) = match ident.name { kw::Async | kw::Await | kw::Try => (KEYWORD_IDENTS_2018, Edition::Edition2018), @@ -1902,7 +1910,7 @@ impl KeywordIdents { cx.emit_span_lint( lint, ident.span, - BuiltinKeywordIdents { kw: ident, next: edition, suggestion: ident.span }, + BuiltinKeywordIdents { kw: ident, next: edition, suggestion: ident.span, prefix }, ); } } @@ -1915,7 +1923,11 @@ impl EarlyLintPass for KeywordIdents { self.check_tokens(cx, &mac.args.tokens); } fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: Ident) { - self.check_ident_token(cx, UnderMacro(false), ident); + if ident.name.as_str().starts_with('\'') { + self.check_ident_token(cx, UnderMacro(false), ident.without_first_quote(), "'"); + } else { + self.check_ident_token(cx, UnderMacro(false), ident, ""); + } } } diff --git a/compiler/rustc_lint/src/context/diagnostics.rs b/compiler/rustc_lint/src/context/diagnostics.rs index de34b9bebe9..fd13c418c09 100644 --- a/compiler/rustc_lint/src/context/diagnostics.rs +++ b/compiler/rustc_lint/src/context/diagnostics.rs @@ -172,6 +172,10 @@ pub(super) fn decorate_lint(sess: &Session, diagnostic: BuiltinLintDiag, diag: & } .decorate_lint(diag); } + BuiltinLintDiag::RawPrefix(label_span) => { + lints::RawPrefix { label: label_span, suggestion: label_span.shrink_to_hi() } + .decorate_lint(diag); + } BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => { lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag); } diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs index ae7e9659856..e49b102cb39 100644 --- a/compiler/rustc_lint/src/lints.rs +++ b/compiler/rustc_lint/src/lints.rs @@ -363,8 +363,9 @@ pub(crate) enum BuiltinEllipsisInclusiveRangePatternsLint { pub(crate) struct BuiltinKeywordIdents { pub kw: Ident, pub next: Edition, - #[suggestion(code = "r#{kw}", applicability = "machine-applicable")] + #[suggestion(code = "{prefix}r#{kw}", applicability = "machine-applicable")] pub suggestion: Span, + pub prefix: &'static str, } #[derive(LintDiagnostic)] @@ -2814,6 +2815,15 @@ pub(crate) struct ReservedPrefix { pub prefix: String, } +#[derive(LintDiagnostic)] +#[diag(lint_raw_prefix)] +pub(crate) struct RawPrefix { + #[label] + pub label: Span, + #[suggestion(code = " ", applicability = "machine-applicable")] + pub suggestion: Span, +} + #[derive(LintDiagnostic)] #[diag(lint_unused_builtin_attribute)] pub(crate) struct UnusedBuiltinAttribute { diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs index 9a72e4f6739..5c4b7e5664d 100644 --- a/compiler/rustc_lint_defs/src/lib.rs +++ b/compiler/rustc_lint_defs/src/lib.rs @@ -612,6 +612,8 @@ pub enum BuiltinLintDiag { LegacyDeriveHelpers(Span), OrPatternsBackCompat(Span, String), ReservedPrefix(Span, String), + /// `'r#` in edition < 2021. + RawPrefix(Span), TrailingMacro(bool, Ident), BreakWithLabelAndLoop(Span), UnicodeTextFlow(Span, String), diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index f30939093c2..b7232ff21ca 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -13,7 +13,6 @@ use rustc_session::lint::builtin::{ }; use rustc_session::lint::BuiltinLintDiag; use rustc_session::parse::ParseSess; -use rustc_span::edition::Edition; use rustc_span::symbol::Symbol; use rustc_span::{BytePos, Pos, Span}; use tracing::debug; @@ -188,9 +187,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { preceded_by_whitespace = true; continue; } - rustc_lexer::TokenKind::Ident => { - self.ident(start) - } + rustc_lexer::TokenKind::Ident => self.ident(start), rustc_lexer::TokenKind::RawIdent => { let sym = nfc_normalize(self.str_from(start + BytePos(2))); let span = self.mk_sp(start, self.pos); @@ -205,20 +202,31 @@ impl<'psess, 'src> StringReader<'psess, 'src> { self.report_unknown_prefix(start); self.ident(start) } - rustc_lexer::TokenKind::InvalidIdent - | rustc_lexer::TokenKind::InvalidPrefix + rustc_lexer::TokenKind::UnknownPrefixLifetime => { + self.report_unknown_prefix(start); + // Include the leading `'` in the real identifier, for macro + // expansion purposes. See #12512 for the gory details of why + // this is necessary. + let lifetime_name = self.str_from(start); + self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident, IdentIsRaw::No) + } + rustc_lexer::TokenKind::InvalidIdent | rustc_lexer::TokenKind::InvalidPrefix // Do not recover an identifier with emoji if the codepoint is a confusable // with a recoverable substitution token, like `➖`. - if !UNICODE_ARRAY - .iter() - .any(|&(c, _, _)| { - let sym = self.str_from(start); - sym.chars().count() == 1 && c == sym.chars().next().unwrap() - }) => + if !UNICODE_ARRAY.iter().any(|&(c, _, _)| { + let sym = self.str_from(start); + sym.chars().count() == 1 && c == sym.chars().next().unwrap() + }) => { let sym = nfc_normalize(self.str_from(start)); let span = self.mk_sp(start, self.pos); - self.psess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default() + self.psess + .bad_unicode_identifiers + .borrow_mut() + .entry(sym) + .or_default() .push(span); token::Ident(sym, IdentIsRaw::No) } @@ -249,9 +257,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let suffix = if suffix_start < self.pos { let string = self.str_from(suffix_start); if string == "_" { - self - .dcx() - .emit_err(errors::UnderscoreLiteralSuffix { span: self.mk_sp(suffix_start, self.pos) }); + self.dcx().emit_err(errors::UnderscoreLiteralSuffix { + span: self.mk_sp(suffix_start, self.pos), + }); None } else { Some(Symbol::intern(string)) @@ -269,12 +277,47 @@ impl<'psess, 'src> StringReader<'psess, 'src> { self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); if starts_with_number { let span = self.mk_sp(start, self.pos); - self.dcx().struct_err("lifetimes cannot start with a number") + self.dcx() + .struct_err("lifetimes cannot start with a number") .with_span(span) .stash(span, StashKey::LifetimeIsChar); } let ident = Symbol::intern(lifetime_name); - token::Lifetime(ident) + token::Lifetime(ident, IdentIsRaw::No) + } + rustc_lexer::TokenKind::RawLifetime => { + self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); + + let ident_start = start + BytePos(3); + let prefix_span = self.mk_sp(start, ident_start); + + if prefix_span.at_least_rust_2021() { + let lifetime_name_without_tick = self.str_from(ident_start); + // Put the `'` back onto the lifetime name. + let mut lifetime_name = String::with_capacity(lifetime_name_without_tick.len() + 1); + lifetime_name.push('\''); + lifetime_name += lifetime_name_without_tick; + let sym = Symbol::intern(&lifetime_name); + + token::Lifetime(sym, IdentIsRaw::Yes) + } else { + // Otherwise, this should be parsed like `'r`. Warn about it though. + self.psess.buffer_lint( + RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, + prefix_span, + ast::CRATE_NODE_ID, + BuiltinLintDiag::RawPrefix(prefix_span), + ); + + // Reset the state so we just lex the `'r`. + let lt_start = start + BytePos(2); + self.pos = lt_start; + self.cursor = Cursor::new(&str_before[2 as usize..]); + + let lifetime_name = self.str_from(start); + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident, IdentIsRaw::No) + } } rustc_lexer::TokenKind::Semi => token::Semi, rustc_lexer::TokenKind::Comma => token::Comma, @@ -331,16 +374,19 @@ impl<'psess, 'src> StringReader<'psess, 'src> { // first remove compound tokens like `<<` from `rustc_lexer`, and then add // fancier error recovery to it, as there will be less overall work to do this // way. - let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1); + let (token, sugg) = + unicode_chars::check_for_substitution(self, start, c, repeats + 1); self.dcx().emit_err(errors::UnknownTokenStart { span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())), escaped: escaped_char(c), sugg, - null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None}, + null: if c == '\x00' { Some(errors::UnknownTokenNull) } else { None }, repeat: if repeats > 0 { swallow_next_invalid = repeats; Some(errors::UnknownTokenRepeat { repeats }) - } else {None} + } else { + None + }, }); if let Some(token) = token { @@ -699,7 +745,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let expn_data = prefix_span.ctxt().outer_expn_data(); - if expn_data.edition >= Edition::Edition2021 { + if expn_data.edition.at_least_rust_2021() { // In Rust 2021, this is a hard error. let sugg = if prefix == "rb" { Some(errors::UnknownPrefixSugg::UseBr(prefix_span)) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 84684e808d9..ecc4cd96faf 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -2050,7 +2050,7 @@ impl<'a> Parser<'a> { }; // On an error path, eagerly consider a lifetime to be an unclosed character lit, if that // makes sense. - if let Some(ident) = self.token.lifetime() + if let Some((ident, IdentIsRaw::No)) = self.token.lifetime() && could_be_unclosed_char_literal(ident) { let lt = self.expect_lifetime(); @@ -2925,9 +2925,9 @@ impl<'a> Parser<'a> { } pub(crate) fn eat_label(&mut self) -> Option