From ff7d5ba65e94980fbfb13793ef5d503ffad559b0 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sun, 30 Jul 2023 17:16:20 +1000 Subject: [PATCH] Move doc comment desugaring out of `TokenCursor`. `TokenCursor` currently does doc comment desugaring on the fly, if the `desugar_doc_comment` field is set. This requires also modifying the token stream on the fly with `replace_prev_and_rewind`. This commit moves the doc comment desugaring out of `TokenCursor`, by introducing a new `TokenStream::desugar_doc_comment` method. This separation of desugaring and iterating makes the code nicer. --- compiler/rustc_ast/src/tokenstream.rs | 92 +++++++++++++++++++++++++- compiler/rustc_parse/src/parser/mod.rs | 86 ++++-------------------- 2 files changed, 102 insertions(+), 76 deletions(-) diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 348c37c480f..50b4e68a1bd 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -13,7 +13,7 @@ //! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking //! ownership of the original. -use crate::ast::StmtKind; +use crate::ast::{AttrStyle, StmtKind}; use crate::ast_traits::{HasAttrs, HasSpan, HasTokens}; use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind}; use crate::AttrVec; @@ -22,11 +22,11 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::sync::{self, Lrc}; use rustc_macros::HashStable_Generic; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; -use rustc_span::{Span, DUMMY_SP}; +use rustc_span::{sym, Span, Symbol, DUMMY_SP}; use smallvec::{smallvec, SmallVec}; use std::borrow::Cow; -use std::{fmt, iter, mem}; +use std::{cmp, fmt, iter, mem}; /// When the main Rust parser encounters a syntax-extension invocation, it /// parses the arguments to the invocation as a token tree. This is a very @@ -566,6 +566,92 @@ impl TokenStream { pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> { self.0.chunks(chunk_size) } + + /// Desugar doc comments like `/// foo` in the stream into `#[doc = + /// r"foo"]`. Modifies the `TokenStream` via `Lrc::make_mut`, but as little + /// as possible. + pub fn desugar_doc_comments(&mut self) { + if let Some(desugared_stream) = desugar_inner(self.clone()) { + *self = desugared_stream; + } + + // The return value is `None` if nothing in `stream` changed. + fn desugar_inner(mut stream: TokenStream) -> Option { + let mut i = 0; + let mut modified = false; + while let Some(tt) = stream.0.get(i) { + match tt { + &TokenTree::Token( + Token { kind: token::DocComment(_, attr_style, data), span }, + _spacing, + ) => { + let desugared = desugared_tts(attr_style, data, span); + let desugared_len = desugared.len(); + Lrc::make_mut(&mut stream.0).splice(i..i + 1, desugared); + modified = true; + i += desugared_len; + } + + &TokenTree::Token(..) => i += 1, + + &TokenTree::Delimited(sp, delim, ref delim_stream) => { + if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) { + let new_tt = TokenTree::Delimited(sp, delim, desugared_delim_stream); + Lrc::make_mut(&mut stream.0)[i] = new_tt; + modified = true; + } + i += 1; + } + } + } + if modified { Some(stream) } else { None } + } + + fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec { + // Searches for the occurrences of `"#*` and returns the minimum number of `#`s + // required to wrap the text. E.g. + // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0) + // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1) + // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3) + let mut num_of_hashes = 0; + let mut count = 0; + for ch in data.as_str().chars() { + count = match ch { + '"' => 1, + '#' if count > 0 => count + 1, + _ => 0, + }; + num_of_hashes = cmp::max(num_of_hashes, count); + } + + // `/// foo` becomes `doc = r"foo"`. + let delim_span = DelimSpan::from_single(span); + let body = TokenTree::Delimited( + delim_span, + Delimiter::Bracket, + [ + TokenTree::token_alone(token::Ident(sym::doc, false), span), + TokenTree::token_alone(token::Eq, span), + TokenTree::token_alone( + TokenKind::lit(token::StrRaw(num_of_hashes), data, None), + span, + ), + ] + .into_iter() + .collect::(), + ); + + if attr_style == AttrStyle::Inner { + vec![ + TokenTree::token_alone(token::Pound, span), + TokenTree::token_alone(token::Not, span), + body, + ] + } else { + vec![TokenTree::token_alone(token::Pound, span), body] + } + } + } } /// By-reference iterator over a [`TokenStream`], that produces `&TokenTree` diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 1d3489aba1b..7d39bf8edf3 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -24,7 +24,7 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor}; use rustc_ast::util::case::Case; use rustc_ast::AttrId; use rustc_ast::DUMMY_NODE_ID; -use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern}; +use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern}; use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit}; use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind}; use rustc_ast_pretty::pprust; @@ -38,7 +38,7 @@ use rustc_session::parse::ParseSess; use rustc_span::source_map::{Span, DUMMY_SP}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use std::ops::Range; -use std::{cmp, mem, slice}; +use std::{mem, slice}; use thin_vec::ThinVec; use tracing::debug; @@ -224,11 +224,6 @@ struct TokenCursor { // because it's the outermost token stream which never has delimiters. stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>, - // We need to desugar doc comments from `/// foo` form into `#[doc = - // r"foo"]` form when parsing declarative macro inputs in `parse_tt`, - // because some declarative macros look for `doc` attributes. - desugar_doc_comments: bool, - // Counts the number of calls to `{,inlined_}next`. num_next_calls: usize, @@ -271,23 +266,11 @@ impl TokenCursor { if let Some(tree) = self.tree_cursor.next_ref() { match tree { &TokenTree::Token(ref token, spacing) => { - match (self.desugar_doc_comments, token) { - ( - true, - &Token { kind: token::DocComment(_, attr_style, data), span }, - ) => { - let desugared = self.desugar(attr_style, data, span); - self.tree_cursor.replace_prev_and_rewind(desugared); - // Continue to get the first token of the desugared doc comment. - } - _ => { - debug_assert!(!matches!( - token.kind, - token::OpenDelim(_) | token::CloseDelim(_) - )); - return (token.clone(), spacing); - } - } + debug_assert!(!matches!( + token.kind, + token::OpenDelim(_) | token::CloseDelim(_) + )); + return (token.clone(), spacing); } &TokenTree::Delimited(sp, delim, ref tts) => { let trees = tts.clone().into_trees(); @@ -311,52 +294,6 @@ impl TokenCursor { } } } - - // Desugar a doc comment into something like `#[doc = r"foo"]`. - fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec { - // Searches for the occurrences of `"#*` and returns the minimum number of `#`s - // required to wrap the text. E.g. - // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0) - // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1) - // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3) - let mut num_of_hashes = 0; - let mut count = 0; - for ch in data.as_str().chars() { - count = match ch { - '"' => 1, - '#' if count > 0 => count + 1, - _ => 0, - }; - num_of_hashes = cmp::max(num_of_hashes, count); - } - - // `/// foo` becomes `doc = r"foo"`. - let delim_span = DelimSpan::from_single(span); - let body = TokenTree::Delimited( - delim_span, - Delimiter::Bracket, - [ - TokenTree::token_alone(token::Ident(sym::doc, false), span), - TokenTree::token_alone(token::Eq, span), - TokenTree::token_alone( - TokenKind::lit(token::StrRaw(num_of_hashes), data, None), - span, - ), - ] - .into_iter() - .collect::(), - ); - - if attr_style == AttrStyle::Inner { - vec![ - TokenTree::token_alone(token::Pound, span), - TokenTree::token_alone(token::Not, span), - body, - ] - } else { - vec![TokenTree::token_alone(token::Pound, span), body] - } - } } #[derive(Debug, Clone, PartialEq)] @@ -451,10 +388,14 @@ pub(super) fn token_descr(token: &Token) -> String { impl<'a> Parser<'a> { pub fn new( sess: &'a ParseSess, - tokens: TokenStream, + mut stream: TokenStream, desugar_doc_comments: bool, subparser_name: Option<&'static str>, ) -> Self { + if desugar_doc_comments { + stream.desugar_doc_comments(); + } + let mut parser = Parser { sess, token: Token::dummy(), @@ -464,10 +405,9 @@ impl<'a> Parser<'a> { restrictions: Restrictions::empty(), expected_tokens: Vec::new(), token_cursor: TokenCursor { - tree_cursor: tokens.into_trees(), + tree_cursor: stream.into_trees(), stack: Vec::new(), num_next_calls: 0, - desugar_doc_comments, break_last_token: false, }, unmatched_angle_bracket_count: 0,