Inline and remove cook_lexer_token.

This is a small performance win, alas.
2024-11-23 15:23:46 +00:00 · 2022-09-26 12:12:58 +10:00 · 2022-09-26 12:12:58 +10:00 · fb4dba0a17
commit fb4dba0a17
parent da84f0f4c3
1 changed files with 175 additions and 172 deletions
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@ -86,88 +86,16 @@ impl<'a> StringReader<'a> {

            debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));

-            match self.cook_lexer_token(token.kind, start) {
-                Some(kind) => {
-                    let span = self.mk_sp(start, self.pos);
-                    return (Token::new(kind, span), preceded_by_whitespace);
-                }
-                None => preceded_by_whitespace = true,
-            }
-        }
-    }
-
-    /// Report a fatal lexical error with a given span.
-    fn fatal_span(&self, sp: Span, m: &str) -> ! {
-        self.sess.span_diagnostic.span_fatal(sp, m)
-    }
-
-    /// Report a lexical error with a given span.
-    fn err_span(&self, sp: Span, m: &str) {
-        self.sess.span_diagnostic.struct_span_err(sp, m).emit();
-    }
-
-    /// Report a fatal error spanning [`from_pos`, `to_pos`).
-    fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
-        self.fatal_span(self.mk_sp(from_pos, to_pos), m)
-    }
-
-    /// Report a lexical error spanning [`from_pos`, `to_pos`).
-    fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
-        self.err_span(self.mk_sp(from_pos, to_pos), m)
-    }
-
-    fn struct_fatal_span_char(
-        &self,
-        from_pos: BytePos,
-        to_pos: BytePos,
-        m: &str,
-        c: char,
-    ) -> DiagnosticBuilder<'a, !> {
-        self.sess
-            .span_diagnostic
-            .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
-    }
-
-    fn struct_err_span_char(
-        &self,
-        from_pos: BytePos,
-        to_pos: BytePos,
-        m: &str,
-        c: char,
-    ) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
-        self.sess
-            .span_diagnostic
-            .struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
-    }
-
-    /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
-    /// complain about it.
-    fn lint_unicode_text_flow(&self, start: BytePos) {
-        // Opening delimiter of the length 2 is not included into the comment text.
-        let content_start = start + BytePos(2);
-        let content = self.str_from(content_start);
-        if contains_text_flow_control_chars(content) {
-            let span = self.mk_sp(start, self.pos);
-            self.sess.buffer_lint_with_diagnostic(
-                &TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
-                span,
-                ast::CRATE_NODE_ID,
-                "unicode codepoint changing visible direction of text present in comment",
-                BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
-            );
-        }
-    }
-
-    /// Turns simple `rustc_lexer::TokenKind` enum into a rich
-    /// `rustc_ast::TokenKind`. This turns strings into interned
-    /// symbols and runs additional validation.
-    fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> Option<TokenKind> {
-        Some(match token {
+            // Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a
+            // rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs
+            // additional validation.
+            let kind = match token.kind {
                rustc_lexer::TokenKind::LineComment { doc_style } => {
                    // Skip non-doc comments
                    let Some(doc_style) = doc_style else {
                        self.lint_unicode_text_flow(start);
-                    return None;
+                        preceded_by_whitespace = true;
+                        continue;
                    };

                    // Opening delimiter of the length 3 is not included into the symbol.
@ -183,7 +111,8 @@ impl<'a> StringReader<'a> {
                    // Skip non-doc comments
                    let Some(doc_style) = doc_style else {
                        self.lint_unicode_text_flow(start);
-                    return None;
+                        preceded_by_whitespace = true;
+                        continue;
                    };

                    // Opening delimiter of the length 3 and closing delimiter of the length 2
@ -193,7 +122,10 @@ impl<'a> StringReader<'a> {
                    let content = self.str_from_to(content_start, content_end);
                    self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
                }
-            rustc_lexer::TokenKind::Whitespace => return None,
+                rustc_lexer::TokenKind::Whitespace => {
+                    preceded_by_whitespace = true;
+                    continue;
+                }
                rustc_lexer::TokenKind::Ident => {
                    let sym = nfc_normalize(self.str_from(start));
                    let span = self.mk_sp(start, self.pos);
@ -225,12 +157,12 @@ impl<'a> StringReader<'a> {
                        .any(|&(c, _, _)| {
                            let sym = self.str_from(start);
                            sym.chars().count() == 1 && c == sym.chars().next().unwrap()
-                    })
-                     =>
+                        }) =>
                {
                    let sym = nfc_normalize(self.str_from(start));
                    let span = self.mk_sp(start, self.pos);
-                self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default().push(span);
+                    self.sess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default()
+                        .push(span);
                    token::Ident(sym, false)
                }
                rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
@ -308,20 +240,91 @@ impl<'a> StringReader<'a> {
                    let c = self.str_from(start).chars().next().unwrap();
                    let mut err =
                        self.struct_err_span_char(start, self.pos, "unknown start of token", c);
-                // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
-                // instead of keeping a table in `check_for_substitution`into the token. Ideally,
-                // this should be inside `rustc_lexer`. However, we should first remove compound
-                // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
-                // as there will be less overall work to do this way.
+                    // FIXME: the lexer could be used to turn the ASCII version of unicode
+                    // homoglyphs, instead of keeping a table in `check_for_substitution`into the
+                    // token. Ideally, this should be inside `rustc_lexer`. However, we should
+                    // first remove compound tokens like `<<` from `rustc_lexer`, and then add
+                    // fancier error recovery to it, as there will be less overall work to do this
+                    // way.
                    let token = unicode_chars::check_for_substitution(self, start, c, &mut err);
                    if c == '\x00' {
                        err.help("source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used");
                    }
                    err.emit();
-                token?
+                    if let Some(token) = token {
+                        token
+                    } else {
+                        preceded_by_whitespace = true;
+                        continue;
+                    }
                }
                rustc_lexer::TokenKind::Eof => token::Eof,
-        })
+            };
+            let span = self.mk_sp(start, self.pos);
+            return (Token::new(kind, span), preceded_by_whitespace);
+        }
+    }
+
+    /// Report a fatal lexical error with a given span.
+    fn fatal_span(&self, sp: Span, m: &str) -> ! {
+        self.sess.span_diagnostic.span_fatal(sp, m)
+    }
+
+    /// Report a lexical error with a given span.
+    fn err_span(&self, sp: Span, m: &str) {
+        self.sess.span_diagnostic.struct_span_err(sp, m).emit();
+    }
+
+    /// Report a fatal error spanning [`from_pos`, `to_pos`).
+    fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> ! {
+        self.fatal_span(self.mk_sp(from_pos, to_pos), m)
+    }
+
+    /// Report a lexical error spanning [`from_pos`, `to_pos`).
+    fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
+        self.err_span(self.mk_sp(from_pos, to_pos), m)
+    }
+
+    fn struct_fatal_span_char(
+        &self,
+        from_pos: BytePos,
+        to_pos: BytePos,
+        m: &str,
+        c: char,
+    ) -> DiagnosticBuilder<'a, !> {
+        self.sess
+            .span_diagnostic
+            .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
+    }
+
+    fn struct_err_span_char(
+        &self,
+        from_pos: BytePos,
+        to_pos: BytePos,
+        m: &str,
+        c: char,
+    ) -> DiagnosticBuilder<'a, ErrorGuaranteed> {
+        self.sess
+            .span_diagnostic
+            .struct_span_err(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
+    }
+
+    /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly
+    /// complain about it.
+    fn lint_unicode_text_flow(&self, start: BytePos) {
+        // Opening delimiter of the length 2 is not included into the comment text.
+        let content_start = start + BytePos(2);
+        let content = self.str_from(content_start);
+        if contains_text_flow_control_chars(content) {
+            let span = self.mk_sp(start, self.pos);
+            self.sess.buffer_lint_with_diagnostic(
+                &TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
+                span,
+                ast::CRATE_NODE_ID,
+                "unicode codepoint changing visible direction of text present in comment",
+                BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()),
+            );
+        }
    }

    fn cook_doc_comment(