Auto merge of #95669 - nnethercote:call-compute_locs-once-per-rule, r=petrochenkov

Call `compute_locs` once per rule This fixes the small regressions on `wg-grammar` and `hyper-0.14.18` seen in #95555. r? `@petrochenkov`
2024-11-01 15:01:51 +00:00 · 2022-04-06 16:29:32 +00:00 · 2022-04-06 16:29:32 +00:00 · c2afaba465
commit c2afaba465
parent b6ab1fae73 238d9076fc
2 changed files with 169 additions and 149 deletions
--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@ -106,9 +106,9 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
 ///
 /// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
 /// simply incrementing the current matcher position index by one.
-enum MatcherLoc<'tt> {
+pub(super) enum MatcherLoc {
    Token {
-        token: &'tt Token,
+        token: Token,
    },
    Delimited,
    Sequence {
@ -123,7 +123,7 @@ enum MatcherLoc<'tt> {
        idx_first: usize,
    },
    SequenceSep {
-        separator: &'tt Token,
+        separator: Token,
    },
    SequenceKleeneOpAfterSep {
        idx_first: usize,
@ -131,13 +131,85 @@ enum MatcherLoc<'tt> {
    MetaVarDecl {
        span: Span,
        bind: Ident,
-        kind: NonterminalKind,
+        kind: Option<NonterminalKind>,
        next_metavar: usize,
        seq_depth: usize,
    },
    Eof,
 }

+pub(super) fn compute_locs(sess: &ParseSess, matcher: &[TokenTree]) -> Vec<MatcherLoc> {
+    fn inner(
+        sess: &ParseSess,
+        tts: &[TokenTree],
+        locs: &mut Vec<MatcherLoc>,
+        next_metavar: &mut usize,
+        seq_depth: usize,
+    ) {
+        for tt in tts {
+            match tt {
+                TokenTree::Token(token) => {
+                    locs.push(MatcherLoc::Token { token: token.clone() });
+                }
+                TokenTree::Delimited(_, delimited) => {
+                    locs.push(MatcherLoc::Delimited);
+                    inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth);
+                }
+                TokenTree::Sequence(_, seq) => {
+                    // We can't determine `idx_first_after` and construct the final
+                    // `MatcherLoc::Sequence` until after `inner()` is called and the sequence end
+                    // pieces are processed. So we push a dummy value (`Eof` is cheapest to
+                    // construct) now, and overwrite it with the proper value below.
+                    let dummy = MatcherLoc::Eof;
+                    locs.push(dummy);
+
+                    let next_metavar_orig = *next_metavar;
+                    let op = seq.kleene.op;
+                    let idx_first = locs.len();
+                    let idx_seq = idx_first - 1;
+                    inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1);
+
+                    if let Some(separator) = &seq.separator {
+                        locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
+                        locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
+                    } else {
+                        locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
+                    }
+
+                    // Overwrite the dummy value pushed above with the proper value.
+                    locs[idx_seq] = MatcherLoc::Sequence {
+                        op,
+                        num_metavar_decls: seq.num_captures,
+                        idx_first_after: locs.len(),
+                        next_metavar: next_metavar_orig,
+                        seq_depth,
+                    };
+                }
+                &TokenTree::MetaVarDecl(span, bind, kind) => {
+                    locs.push(MatcherLoc::MetaVarDecl {
+                        span,
+                        bind,
+                        kind,
+                        next_metavar: *next_metavar,
+                        seq_depth,
+                    });
+                    *next_metavar += 1;
+                }
+                TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
+            }
+        }
+    }
+
+    let mut locs = vec![];
+    let mut next_metavar = 0;
+    inner(sess, matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0);
+
+    // A final entry is needed for eof.
+    locs.push(MatcherLoc::Eof);
+
+    locs
+}
+
 /// A single matcher position, representing the state of matching.
 struct MatcherPos {
    /// The index into `TtParser::locs`, which represents the "dot".
@ -298,12 +370,9 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {

 // Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess
 // allocations we have a single vector fo each kind that is cleared and reused repeatedly.
-pub struct TtParser<'tt> {
+pub struct TtParser {
    macro_name: Ident,

-    /// The matcher of the current rule.
-    locs: Vec<MatcherLoc<'tt>>,
-
    /// The set of current mps to be processed. This should be empty by the end of a successful
    /// execution of `parse_tt_inner`.
    cur_mps: Vec<MatcherPos>,
@ -320,11 +389,10 @@ pub struct TtParser<'tt> {
    empty_matches: Lrc<NamedMatchVec>,
 }

-impl<'tt> TtParser<'tt> {
-    pub(super) fn new(macro_name: Ident) -> TtParser<'tt> {
+impl TtParser {
+    pub(super) fn new(macro_name: Ident) -> TtParser {
        TtParser {
            macro_name,
-            locs: vec![],
            cur_mps: vec![],
            next_mps: vec![],
            bb_mps: vec![],
@ -332,99 +400,6 @@ impl<'tt> TtParser<'tt> {
        }
    }

-    /// Convert a `&[TokenTree]` to a `&[MatcherLoc]`. Note: this conversion happens every time the
-    /// macro is called, which may be many times if there are many call sites or if it is
-    /// recursive. This conversion is fairly cheap and the representation is sufficiently better
-    /// for matching than `&[TokenTree]` that it's a clear performance win even with the overhead.
-    /// But it might be possible to move the conversion outwards so it only occurs once per macro.
-    fn compute_locs(
-        &mut self,
-        sess: &ParseSess,
-        matcher: &'tt [TokenTree],
-    ) -> Result<usize, (Span, String)> {
-        fn inner<'tt>(
-            sess: &ParseSess,
-            tts: &'tt [TokenTree],
-            locs: &mut Vec<MatcherLoc<'tt>>,
-            next_metavar: &mut usize,
-            seq_depth: usize,
-        ) -> Result<(), (Span, String)> {
-            for tt in tts {
-                match tt {
-                    TokenTree::Token(token) => {
-                        locs.push(MatcherLoc::Token { token });
-                    }
-                    TokenTree::Delimited(_, delimited) => {
-                        locs.push(MatcherLoc::Delimited);
-                        inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth)?;
-                    }
-                    TokenTree::Sequence(_, seq) => {
-                        // We can't determine `idx_first_after` and construct the final
-                        // `MatcherLoc::Sequence` until after `inner()` is called and the sequence
-                        // end pieces are processed. So we push a dummy value (`Eof` is cheapest to
-                        // construct) now, and overwrite it with the proper value below.
-                        let dummy = MatcherLoc::Eof;
-                        locs.push(dummy);
-
-                        let next_metavar_orig = *next_metavar;
-                        let op = seq.kleene.op;
-                        let idx_first = locs.len();
-                        let idx_seq = idx_first - 1;
-                        inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1)?;
-
-                        if let Some(separator) = &seq.separator {
-                            locs.push(MatcherLoc::SequenceSep { separator });
-                            locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
-                        } else {
-                            locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
-                        }
-
-                        // Overwrite the dummy value pushed above with the proper value.
-                        locs[idx_seq] = MatcherLoc::Sequence {
-                            op,
-                            num_metavar_decls: seq.num_captures,
-                            idx_first_after: locs.len(),
-                            next_metavar: next_metavar_orig,
-                            seq_depth,
-                        };
-                    }
-                    &TokenTree::MetaVarDecl(span, bind, kind) => {
-                        if let Some(kind) = kind {
-                            locs.push(MatcherLoc::MetaVarDecl {
-                                span,
-                                bind,
-                                kind,
-                                next_metavar: *next_metavar,
-                                seq_depth,
-                            });
-                            *next_metavar += 1;
-                        } else if sess
-                            .missing_fragment_specifiers
-                            .borrow_mut()
-                            .remove(&span)
-                            .is_some()
-                        {
-                            // E.g. `$e` instead of `$e:expr`.
-                            return Err((span, "missing fragment specifier".to_string()));
-                        }
-                    }
-                    TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
-                }
-            }
-            Ok(())
-        }
-
-        self.locs.clear();
-        let mut next_metavar = 0;
-        inner(sess, matcher, &mut self.locs, &mut next_metavar, /* seq_depth */ 0)?;
-
-        // A final entry is needed for eof.
-        self.locs.push(MatcherLoc::Eof);
-
-        // This is the number of metavar decls.
-        Ok(next_metavar)
-    }
-
    /// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
    /// produce more mps in `next_mps` and `bb_mps`.
    ///
@ -434,7 +409,8 @@ impl<'tt> TtParser<'tt> {
    /// track of through the mps generated.
    fn parse_tt_inner(
        &mut self,
-        num_metavar_decls: usize,
+        sess: &ParseSess,
+        matcher: &[MatcherLoc],
        token: &Token,
    ) -> Option<NamedParseResult> {
        // Matcher positions that would be valid if the macro invocation was over now. Only
@ -442,7 +418,7 @@ impl<'tt> TtParser<'tt> {
        let mut eof_mps = EofMatcherPositions::None;

        while let Some(mut mp) = self.cur_mps.pop() {
-            match &self.locs[mp.idx] {
+            match &matcher[mp.idx] {
                MatcherLoc::Token { token: t } => {
                    // If it's a doc comment, we just ignore it and move on to the next tt in the
                    // matcher. This is a bug, but #95267 showed that existing programs rely on
@ -532,17 +508,25 @@ impl<'tt> TtParser<'tt> {
                    mp.idx = idx_first;
                    self.cur_mps.push(mp);
                }
-                MatcherLoc::MetaVarDecl { kind, .. } => {
+                &MatcherLoc::MetaVarDecl { span, kind, .. } => {
                    // Built-in nonterminals never start with these tokens, so we can eliminate
                    // them from consideration. We use the span of the metavariable declaration
                    // to determine any edition-specific matching behavior for non-terminals.
-                    if Parser::nonterminal_may_begin_with(*kind, token) {
-                        self.bb_mps.push(mp);
+                    if let Some(kind) = kind {
+                        if Parser::nonterminal_may_begin_with(kind, token) {
+                            self.bb_mps.push(mp);
+                        }
+                    } else {
+                        // Both this check and the one in `nameize` are necessary, surprisingly.
+                        if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
+                            // E.g. `$e` instead of `$e:expr`.
+                            return Some(Error(span, "missing fragment specifier".to_string()));
+                        }
                    }
                }
                MatcherLoc::Eof => {
                    // We are past the matcher's end, and not in a sequence. Try to end things.
-                    debug_assert_eq!(mp.idx, self.locs.len() - 1);
+                    debug_assert_eq!(mp.idx, matcher.len() - 1);
                    if *token == token::Eof {
                        eof_mps = match eof_mps {
                            EofMatcherPositions::None => EofMatcherPositions::One(mp),
@ -560,11 +544,10 @@ impl<'tt> TtParser<'tt> {
        if *token == token::Eof {
            Some(match eof_mps {
                EofMatcherPositions::One(mut eof_mp) => {
-                    assert_eq!(eof_mp.matches.len(), num_metavar_decls);
                    // Need to take ownership of the matches from within the `Lrc`.
                    Lrc::make_mut(&mut eof_mp.matches);
                    let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
-                    self.nameize(matches)
+                    self.nameize(sess, matcher, matches)
                }
                EofMatcherPositions::Multiple => {
                    Error(token.span, "ambiguity: multiple successful parses".to_string())
@ -586,13 +569,8 @@ impl<'tt> TtParser<'tt> {
    pub(super) fn parse_tt(
        &mut self,
        parser: &mut Cow<'_, Parser<'_>>,
-        matcher: &'tt [TokenTree],
+        matcher: &[MatcherLoc],
    ) -> NamedParseResult {
-        let num_metavar_decls = match self.compute_locs(parser.sess, matcher) {
-            Ok(num_metavar_decls) => num_metavar_decls,
-            Err((span, msg)) => return Error(span, msg),
-        };
-
        // A queue of possible matcher positions. We initialize it with the matcher position in
        // which the "dot" is before the first token of the first token tree in `matcher`.
        // `parse_tt_inner` then processes all of these possible matcher positions and produces
@ -607,7 +585,7 @@ impl<'tt> TtParser<'tt> {

            // Process `cur_mps` until either we have finished the input or we need to get some
            // parsing from the black-box parser done.
-            if let Some(res) = self.parse_tt_inner(num_metavar_decls, &parser.token) {
+            if let Some(res) = self.parse_tt_inner(&parser.sess, matcher, &parser.token) {
                return res;
            }

@ -635,9 +613,13 @@ impl<'tt> TtParser<'tt> {
                (0, 1) => {
                    // We need to call the black-box parser to get some nonterminal.
                    let mut mp = self.bb_mps.pop().unwrap();
-                    let loc = &self.locs[mp.idx];
+                    let loc = &matcher[mp.idx];
                    if let &MatcherLoc::MetaVarDecl {
-                        span, kind, next_metavar, seq_depth, ..
+                        span,
+                        kind: Some(kind),
+                        next_metavar,
+                        seq_depth,
+                        ..
                    } = loc
                    {
                        // We use the span of the metavariable declaration to determine any
@ -669,7 +651,7 @@ impl<'tt> TtParser<'tt> {

                (_, _) => {
                    // Too many possibilities!
-                    return self.ambiguity_error(parser.token.span);
+                    return self.ambiguity_error(matcher, parser.token.span);
                }
            }

@ -677,12 +659,18 @@ impl<'tt> TtParser<'tt> {
        }
    }

-    fn ambiguity_error(&self, token_span: rustc_span::Span) -> NamedParseResult {
+    fn ambiguity_error(
+        &self,
+        matcher: &[MatcherLoc],
+        token_span: rustc_span::Span,
+    ) -> NamedParseResult {
        let nts = self
            .bb_mps
            .iter()
-            .map(|mp| match &self.locs[mp.idx] {
-                MatcherLoc::MetaVarDecl { bind, kind, .. } => format!("{} ('{}')", kind, bind),
+            .map(|mp| match &matcher[mp.idx] {
+                MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => {
+                    format!("{} ('{}')", kind, bind)
+                }
                _ => unreachable!(),
            })
            .collect::<Vec<String>>()
@ -702,16 +690,31 @@ impl<'tt> TtParser<'tt> {
        )
    }

-    fn nameize<I: Iterator<Item = NamedMatch>>(&self, mut res: I) -> NamedParseResult {
+    fn nameize<I: Iterator<Item = NamedMatch>>(
+        &self,
+        sess: &ParseSess,
+        matcher: &[MatcherLoc],
+        mut res: I,
+    ) -> NamedParseResult {
        // Make that each metavar has _exactly one_ binding. If so, insert the binding into the
        // `NamedParseResult`. Otherwise, it's an error.
        let mut ret_val = FxHashMap::default();
-        for loc in self.locs.iter() {
-            if let &MatcherLoc::MetaVarDecl { span, bind, .. } = loc {
-                match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
-                    Vacant(spot) => spot.insert(res.next().unwrap()),
-                    Occupied(..) => return Error(span, format!("duplicated bind name: {}", bind)),
-                };
+        for loc in matcher {
+            if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc {
+                if kind.is_some() {
+                    match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
+                        Vacant(spot) => spot.insert(res.next().unwrap()),
+                        Occupied(..) => {
+                            return Error(span, format!("duplicated bind name: {}", bind));
+                        }
+                    };
+                } else {
+                    // Both this check and the one in `parse_tt_inner` are necessary, surprisingly.
+                    if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
+                        // E.g. `$e` instead of `$e:expr`.
+                        return Error(span, "missing fragment specifier".to_string());
+                    }
+                }
            }
        }
        Success(ret_val)
--- a/compiler/rustc_expand/src/mbe/macro_rules.rs
+++ b/compiler/rustc_expand/src/mbe/macro_rules.rs
@ -4,7 +4,7 @@ use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstF
 use crate::mbe;
 use crate::mbe::macro_check;
 use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser};
-use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree};
+use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree, MatcherLoc};
 use crate::mbe::transcribe::transcribe;

 use rustc_ast as ast;
@ -160,7 +160,7 @@ struct MacroRulesMacroExpander {
    name: Ident,
    span: Span,
    transparency: Transparency,
-    lhses: Vec<mbe::TokenTree>,
+    lhses: Vec<Vec<MatcherLoc>>,
    rhses: Vec<mbe::TokenTree>,
    valid: bool,
    is_local: bool,
@ -211,7 +211,7 @@ fn generic_extension<'cx, 'tt>(
    name: Ident,
    transparency: Transparency,
    arg: TokenStream,
-    lhses: &'tt [mbe::TokenTree],
+    lhses: &'tt [Vec<MatcherLoc>],
    rhses: &'tt [mbe::TokenTree],
    is_local: bool,
 ) -> Box<dyn MacResult + 'cx> {
@ -246,14 +246,6 @@ fn generic_extension<'cx, 'tt>(
    // this situation.)
    let parser = parser_from_cx(sess, arg.clone());

-    // A matcher is always delimited, but the delimiters are ignored.
-    let delimited_inner_tts = |tt: &'tt mbe::TokenTree| -> &'tt [mbe::TokenTree] {
-        match tt {
-            mbe::TokenTree::Delimited(_, delimited) => delimited.inner_tts(),
-            _ => cx.span_bug(sp, "malformed macro lhs"),
-        }
-    };
-
    // Try each arm's matchers.
    let mut tt_parser = TtParser::new(name);
    for (i, lhs) in lhses.iter().enumerate() {
@ -263,13 +255,19 @@ fn generic_extension<'cx, 'tt>(
        // are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
        let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut());

-        match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), delimited_inner_tts(lhs)) {
+        match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
            Success(named_matches) => {
                // The matcher was `Success(..)`ful.
                // Merge the gated spans from parsing the matcher with the pre-existing ones.
                sess.gated_spans.merge(gated_spans_snapshot);

-                let rhs = delimited_inner_tts(&rhses[i]).to_vec().clone();
+                // Ignore the delimiters on the RHS.
+                let rhs = match &rhses[i] {
+                    mbe::TokenTree::Delimited(_, delimited) => {
+                        delimited.inner_tts().to_vec().clone()
+                    }
+                    _ => cx.span_bug(sp, "malformed macro rhs"),
+                };
                let arm_span = rhses[i].span();

                let rhs_spans = rhs.iter().map(|t| t.span()).collect::<Vec<_>>();
@ -347,10 +345,8 @@ fn generic_extension<'cx, 'tt>(
    // Check whether there's a missing comma in this macro call, like `println!("{}" a);`
    if let Some((arg, comma_span)) = arg.add_comma() {
        for lhs in lhses {
-            if let Success(_) = tt_parser.parse_tt(
-                &mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())),
-                delimited_inner_tts(lhs),
-            ) {
+            let parser = parser_from_cx(sess, arg.clone());
+            if let Success(_) = tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
                if comma_span.is_dummy() {
                    err.note("you might be missing a comma");
                } else {
@ -441,6 +437,8 @@ pub fn compile_declarative_macro(
            }),
        ),
    ];
+    // Convert it into `MatcherLoc` form.
+    let argument_gram = mbe::macro_parser::compute_locs(&sess.parse_sess, &argument_gram);

    let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS);
    let mut tt_parser = TtParser::new(def.ident);
@ -537,6 +535,25 @@ pub fn compile_declarative_macro(
        None => {}
    }

+    // Convert the lhses into `MatcherLoc` form, which is better for doing the
+    // actual matching. Unless the matcher is invalid.
+    let lhses = if valid {
+        lhses
+            .iter()
+            .map(|lhs| {
+                // Ignore the delimiters around the matcher.
+                match lhs {
+                    mbe::TokenTree::Delimited(_, delimited) => {
+                        mbe::macro_parser::compute_locs(&sess.parse_sess, delimited.inner_tts())
+                    }
+                    _ => sess.parse_sess.span_diagnostic.span_bug(def.span, "malformed macro lhs"),
+                }
+            })
+            .collect()
+    } else {
+        vec![]
+    };
+
    mk_syn_ext(Box::new(MacroRulesMacroExpander {
        name: def.ident,
        span: def.span,