Avoid double-handling of attributes in collect_tokens.

By keeping track of attributes that have been previously processed. This fixes the `macro-rules-derive-cfg.stdout` test, and is necessary for #124141 which removes nonterminals. Also shrink the `SmallVec` inline size used in `IntervalSet`. 2 gives slightly better perf than 4 now that there's an `IntervalSet` in `Parser`, which is cloned reasonably often.
2024-11-22 06:44:35 +00:00 · 2024-08-21 14:16:42 +10:00 · 2024-08-21 14:16:42 +10:00 · 1fdabfbebb
commit 1fdabfbebb
parent b7e7f6e903
6 changed files with 30 additions and 35 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4172,6 +4172,7 @@ dependencies = [
 "rustc_errors",
 "rustc_feature",
 "rustc_fluent_macro",
+ "rustc_index",
 "rustc_lexer",
 "rustc_macros",
 "rustc_session",
--- a/compiler/rustc_index/src/interval.rs
+++ b/compiler/rustc_index/src/interval.rs
@ -18,7 +18,7 @@ mod tests;
 #[derive(Debug, Clone)]
 pub struct IntervalSet<I> {
    // Start, end
-    map: SmallVec<[(u32, u32); 4]>,
+    map: SmallVec<[(u32, u32); 2]>,
    domain: usize,
    _data: PhantomData<I>,
 }
--- a/compiler/rustc_parse/Cargo.toml
+++ b/compiler/rustc_parse/Cargo.toml
@ -12,6 +12,7 @@ rustc_data_structures = { path = "../rustc_data_structures" }
 rustc_errors = { path = "../rustc_errors" }
 rustc_feature = { path = "../rustc_feature" }
 rustc_fluent_macro = { path = "../rustc_fluent_macro" }
+rustc_index = { path = "../rustc_index" }
 rustc_lexer = { path = "../rustc_lexer" }
 rustc_macros = { path = "../rustc_macros" }
 rustc_session = { path = "../rustc_session" }
--- a/compiler/rustc_parse/src/parser/attr_wrapper.rs
+++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@ -256,6 +256,20 @@ impl<'a> Parser<'a> {
            res?
        };

+        // Ignore any attributes we've previously processed. This happens when
+        // an inner call to `collect_tokens` returns an AST node and then an
+        // outer call ends up with the same AST node without any additional
+        // wrapping layer.
+        let ret_attrs: AttrVec = ret
+            .attrs()
+            .iter()
+            .cloned()
+            .filter(|attr| {
+                let is_unseen = self.capture_state.seen_attrs.insert(attr.id);
+                is_unseen
+            })
+            .collect();
+
        // When we're not in "definite capture mode", then skip collecting and
        // return early if either of the following conditions hold.
        // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
@ -269,7 +283,7 @@ impl<'a> Parser<'a> {
        // tokens.
        let definite_capture_mode = self.capture_cfg
            && matches!(self.capture_state.capturing, Capturing::Yes)
-            && has_cfg_or_cfg_attr(ret.attrs());
+            && has_cfg_or_cfg_attr(&ret_attrs);
        if !definite_capture_mode && matches!(ret.tokens_mut(), None | Some(Some(_))) {
            return Ok(ret);
        }
@ -289,7 +303,7 @@ impl<'a> Parser<'a> {
            //   outer and inner attributes. So this check is more precise than
            //   the earlier `needs_tokens` check, and we don't need to
            //   check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
-            || needs_tokens(ret.attrs())
+            || needs_tokens(&ret_attrs)
            // - We are in "definite capture mode", which requires that there
            //   are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
            //   non-`capture_cfg` parsing, we don't need any special capturing
@ -328,7 +342,7 @@ impl<'a> Parser<'a> {
        // `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
        // which means the relevant tokens will be removed. (More details below.)
        let mut inner_attr_parser_replacements = Vec::new();
-        for attr in ret.attrs() {
+        for attr in ret_attrs.iter() {
            if attr.style == ast::AttrStyle::Inner {
                if let Some(inner_attr_parser_range) =
                    self.capture_state.inner_attr_parser_ranges.remove(&attr.id)
@ -418,7 +432,7 @@ impl<'a> Parser<'a> {
            // cfg-expand this AST node.
            let start_pos =
                if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
-            let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
+            let target = AttrsTarget { attrs: ret_attrs, tokens };
            tokens_used = true;
            self.capture_state
                .parser_replacements
@ -428,6 +442,7 @@ impl<'a> Parser<'a> {
            // the outermost call to this method.
            self.capture_state.parser_replacements.clear();
            self.capture_state.inner_attr_parser_ranges.clear();
+            self.capture_state.seen_attrs.clear();
        }
        assert!(tokens_used); // check we didn't create `tokens` unnecessarily
        Ok(ret)
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@ -35,6 +35,7 @@ use rustc_ast_pretty::pprust;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::sync::Lrc;
 use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult};
+use rustc_index::interval::IntervalSet;
 use rustc_session::parse::ParseSess;
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use rustc_span::{Span, DUMMY_SP};
@ -183,7 +184,7 @@ pub struct Parser<'a> {
 // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
 // it doesn't unintentionally get bigger.
 #[cfg(target_pointer_width = "64")]
-rustc_data_structures::static_assert_size!(Parser<'_>, 256);
+rustc_data_structures::static_assert_size!(Parser<'_>, 288);

 /// Stores span information about a closure.
 #[derive(Clone, Debug)]
@ -261,6 +262,9 @@ struct CaptureState {
    capturing: Capturing,
    parser_replacements: Vec<ParserReplacement>,
    inner_attr_parser_ranges: FxHashMap<AttrId, ParserRange>,
+    // `IntervalSet` is good for perf because attrs are mostly added to this
+    // set in contiguous ranges.
+    seen_attrs: IntervalSet<AttrId>,
 }

 /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
@ -458,6 +462,7 @@ impl<'a> Parser<'a> {
                capturing: Capturing::No,
                parser_replacements: Vec::new(),
                inner_attr_parser_ranges: Default::default(),
+                seen_attrs: IntervalSet::new(u32::MAX as usize),
            },
            current_closure: None,
            recovery: Recovery::Allowed,
--- a/tests/ui/proc-macro/macro-rules-derive-cfg.stdout
+++ b/tests/ui/proc-macro/macro-rules-derive-cfg.stdout
@ -1,11 +1,9 @@
 PRINT-DERIVE INPUT (DISPLAY): struct
 Foo([bool; #[rustc_dummy(first)] #[rustc_dummy(second)]
-{ #![rustc_dummy(third)] #[rustc_dummy(fourth)] #[rustc_dummy(fourth)] 30 }]);
+{ #![rustc_dummy(third)] #[rustc_dummy(fourth)] 30 }]);
 PRINT-DERIVE DEEP-RE-COLLECTED (DISPLAY): struct
 Foo([bool; #[rustc_dummy(first)] #[rustc_dummy(second)]
-{
-    #! [rustc_dummy(third)] #[rustc_dummy(fourth)] #[rustc_dummy(fourth)] 30
-}]);
+{ #! [rustc_dummy(third)] #[rustc_dummy(fourth)] 30 }]);
 PRINT-DERIVE INPUT (DEBUG): TokenStream [
    Ident {
        ident: "struct",
@ -138,31 +136,6 @@ PRINT-DERIVE INPUT (DEBUG): TokenStream [
                                ],
                                span: $DIR/macro-rules-derive-cfg.rs:25:6: 25:49 (#0),
                            },
-                            Punct {
-                                ch: '#',
-                                spacing: Alone,
-                                span: $DIR/macro-rules-derive-cfg.rs:25:5: 25:6 (#0),
-                            },
-                            Group {
-                                delimiter: Bracket,
-                                stream: TokenStream [
-                                    Ident {
-                                        ident: "rustc_dummy",
-                                        span: $DIR/macro-rules-derive-cfg.rs:25:28: 25:39 (#0),
-                                    },
-                                    Group {
-                                        delimiter: Parenthesis,
-                                        stream: TokenStream [
-                                            Ident {
-                                                ident: "fourth",
-                                                span: $DIR/macro-rules-derive-cfg.rs:25:40: 25:46 (#0),
-                                            },
-                                        ],
-                                        span: $DIR/macro-rules-derive-cfg.rs:25:39: 25:47 (#0),
-                                    },
-                                ],
-                                span: $DIR/macro-rules-derive-cfg.rs:25:6: 25:49 (#0),
-                            },
                            Literal {
                                kind: Integer,
                                symbol: "30",