Rollup merge of #43918 - mystor:rustdoc-pound, r=QuietMisdreavus

Don't highlight # which does not start an attribute in rustdoc Currently when we highlight some macros for rustdoc (e.g. `quote!` from https://github.com/dtolnay/quote), we get really bad syntax highlighting, because we assume that every token between a `#` character and the next `]` in the source must be an attribute. This patch improves that highlighting behavior to instead only highlight after finding the `[` token after the `#` token. (NOTE: I've only run this patch against https://github.com/nrc/rustdoc-highlight so if it doesn't build on travis that's why - I don't have a recent rustc build on this laptop) I'm guessing r? @steveklabnik
2025-05-14 02:49:40 +00:00 · 2017-08-29 21:40:55 +00:00 · 2017-08-29 21:40:55 +00:00 · b1fff23f60
commit b1fff23f60
parent 64a21f875f 2f19383742
2 changed files with 49 additions and 17 deletions
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@ -172,6 +172,21 @@ impl<'a> Classifier<'a> {
        }
    }

+    /// Gets the next token out of the lexer, emitting fatal errors if lexing fails.
+    fn try_next_token(&mut self) -> io::Result<TokenAndSpan> {
+        match self.lexer.try_next_token() {
+            Ok(tas) => Ok(tas),
+            Err(_) => {
+                self.lexer.emit_fatal_errors();
+                self.lexer.sess.span_diagnostic
+                    .struct_warn("Backing out of syntax highlighting")
+                    .note("You probably did not intend to render this as a rust code-block")
+                    .emit();
+                Err(io::Error::new(io::ErrorKind::Other, ""))
+            }
+        }
+    }
+
    /// Exhausts the `lexer` writing the output into `out`.
    ///
    /// The general structure for this method is to iterate over each token,
@ -183,18 +198,7 @@ impl<'a> Classifier<'a> {
                                   out: &mut W)
                                   -> io::Result<()> {
        loop {
-            let next = match self.lexer.try_next_token() {
-                Ok(tas) => tas,
-                Err(_) => {
-                    self.lexer.emit_fatal_errors();
-                    self.lexer.sess.span_diagnostic
-                        .struct_warn("Backing out of syntax highlighting")
-                        .note("You probably did not intend to render this as a rust code-block")
-                        .emit();
-                    return Err(io::Error::new(io::ErrorKind::Other, ""));
-                }
-            };
-
+            let next = self.try_next_token()?;
            if next.tok == token::Eof {
                break;
            }
@ -255,13 +259,37 @@ impl<'a> Classifier<'a> {
                }
            }

-            // This is the start of an attribute. We're going to want to
+            // This might be the start of an attribute. We're going to want to
            // continue highlighting it as an attribute until the ending ']' is
            // seen, so skip out early. Down below we terminate the attribute
            // span when we see the ']'.
            token::Pound => {
-                self.in_attribute = true;
-                out.enter_span(Class::Attribute)?;
+                // We can't be sure that our # begins an attribute (it could
+                // just be appearing in a macro) until we read either `#![` or
+                // `#[` from the input stream.
+                //
+                // We don't want to start highlighting as an attribute until
+                // we're confident there is going to be a ] coming up, as
+                // otherwise # tokens in macros highlight the rest of the input
+                // as an attribute.
+
+                // Case 1: #![inner_attribute]
+                if self.lexer.peek().tok == token::Not {
+                    self.try_next_token()?; // NOTE: consumes `!` token!
+                    if self.lexer.peek().tok == token::OpenDelim(token::Bracket) {
+                        self.in_attribute = true;
+                        out.enter_span(Class::Attribute)?;
+                    }
+                    out.string("#", Class::None, None)?;
+                    out.string("!", Class::None, None)?;
+                    return Ok(());
+                }
+
+                // Case 2: #[outer_attribute]
+                if self.lexer.peek().tok == token::OpenDelim(token::Bracket) {
+                    self.in_attribute = true;
+                    out.enter_span(Class::Attribute)?;
+                }
                out.string("#", Class::None, None)?;
                return Ok(());
            }
--- a/src/test/rustdoc/issue-41783.rs
+++ b/src/test/rustdoc/issue-41783.rs
@ -12,8 +12,10 @@
 // @!has - 'space'
 // @!has - 'comment'
 // @has - '# <span class="ident">single'
-// @has - '#<span class="attribute"># <span class="ident">double</span>'
-// @has - '#<span class="attribute">#<span class="attribute"># <span class="ident">triple</span>'
+// @has - '## <span class="ident">double</span>'
+// @has - '### <span class="ident">triple</span>'
+// @has - '<span class="attribute">#[<span class="ident">outer</span>]</span>'
+// @has - '<span class="attribute">#![<span class="ident">inner</span>]</span>'

 /// ```no_run
 /// # # space
@ -21,5 +23,7 @@
 /// ## single
 /// ### double
 /// #### triple
+/// ##[outer]
+/// ##![inner]
 /// ```
 pub struct Foo;