Rollup merge of #126994 - Alexendoo:explain-markdown, r=tgross35

Support lists and stylings in more places for `rustc --explain` Adds support for `*foo*`, stylings not immediately following whitespace e.g. ``(`Foo`)`` and lists starting with whitespace: ```md * previously supported ``` ```md * now also supported ``` These are fairly common in the existing error docs, some before/after examples: ### E0460 ![image](https://github.com/rust-lang/rust/assets/1830331/4d0dc5dd-b71f-48b1-97ae-9f7199e952ed) ![image](https://github.com/rust-lang/rust/assets/1830331/4bbcb1e4-99ba-4d0d-b338-fe19d96a5eb1) ### E0059 ![image](https://github.com/rust-lang/rust/assets/1830331/8457f69a-3126-4777-aa4a-953f7b29f59b) ![image](https://github.com/rust-lang/rust/assets/1830331/ac2189f8-512e-4b3b-886d-6c4a619d17f2)
2024-11-22 06:44:35 +00:00 · 2024-07-23 19:42:35 +02:00 · 2024-07-23 19:42:35 +02:00 · 8e206c0387
commit 8e206c0387
parent f1a29ee226 5824ab178b
5 changed files with 96 additions and 36 deletions
--- a/compiler/rustc_error_codes/src/error_codes/E0373.md
+++ b/compiler/rustc_error_codes/src/error_codes/E0373.md
@ -70,4 +70,4 @@ fn spawn<F: Future + Send + 'static>(future: F) {
 Similarly to closures, `async` blocks are not executed immediately and may
 capture closed-over data by reference. For more information, see
-https://rust-lang.github.io/async-book/03_async_await/01_chapter.html.
+<https://rust-lang.github.io/async-book/03_async_await/01_chapter.html>.
--- a/compiler/rustc_error_codes/src/error_codes/E0378.md
+++ b/compiler/rustc_error_codes/src/error_codes/E0378.md
@ -20,7 +20,7 @@ where
 The `DispatchFromDyn` trait currently can only be implemented for
 builtin pointer types and structs that are newtype wrappers around them
-— that is, the struct must have only one field (except for`PhantomData`),
+— that is, the struct must have only one field (except for `PhantomData`),
 and that field must itself implement `DispatchFromDyn`.
 ```
--- a/compiler/rustc_errors/src/lib.rs
+++ b/compiler/rustc_errors/src/lib.rs
@ -15,6 +15,7 @@
 #![feature(box_patterns)]
 #![feature(error_reporter)]
 #![feature(extract_if)]
 #![feature(if_let_guard)]
 #![feature(let_chains)]
 #![feature(negative_impls)]
 #![feature(never_type)]
--- a/compiler/rustc_errors/src/markdown/parse.rs
+++ b/compiler/rustc_errors/src/markdown/parse.rs
@ -10,15 +10,15 @@ const CBK: &[u8] = b"```";
 const CIL: &[u8] = b"`";
 const CMT_E: &[u8] = b"-->";
 const CMT_S: &[u8] = b"<!--";
-const EMP: &[u8] = b"_";
+const EMP_U: &[u8] = b"_";
 const EMP_A: &[u8] = b"*";
 const HDG: &[u8] = b"#";
 const LNK_CHARS: &str = "$-_.+!*'()/&?=:%";
 const LNK_E: &[u8] = b"]";
 const LNK_S: &[u8] = b"[";
-const STG: &[u8] = b"**";
+const STG_U: &[u8] = b"__";
 const STG_A: &[u8] = b"**";
 const STK: &[u8] = b"~~";
 const UL1: &[u8] = b"* ";
 const UL2: &[u8] = b"- ";
 /// Pattern replacements
 const REPLACEMENTS: &[(&str, &str)] = &[
@ -100,22 +100,29 @@ fn parse_recursive<'a>(buf: &'a [u8], ctx: Context) -> MdStream<'_> {
        };
        let res: ParseResult<'_> = match (top_blk, prev) {
-            (_, Newline | Whitespace) if loop_buf.starts_with(CMT_S) => {
+            _ if loop_buf.starts_with(CMT_S) => {
                parse_simple_pat(loop_buf, CMT_S, CMT_E, Po::TrimNoEsc, MdTree::Comment)
            }
            (true, Newline) if loop_buf.starts_with(CBK) => Some(parse_codeblock(loop_buf)),
-            (_, Newline | Whitespace) if loop_buf.starts_with(CIL) => parse_codeinline(loop_buf),
+            _ if loop_buf.starts_with(CIL) => parse_codeinline(loop_buf),
            (true, Newline | Whitespace) if loop_buf.starts_with(HDG) => parse_heading(loop_buf),
            (true, Newline) if loop_buf.starts_with(BRK) => {
                Some((MdTree::HorizontalRule, parse_to_newline(loop_buf).1))
            }
-            (_, Newline | Whitespace) if loop_buf.starts_with(EMP) => {
+            (_, Newline) if unordered_list_start(loop_buf) => Some(parse_unordered_li(loop_buf)),
-                parse_simple_pat(loop_buf, EMP, EMP, Po::None, MdTree::Emphasis)
+            (_, Newline | Whitespace) if loop_buf.starts_with(STG_U) => {
                parse_simple_pat(loop_buf, STG_U, STG_U, Po::None, MdTree::Strong)
            }
-            (_, Newline | Whitespace) if loop_buf.starts_with(STG) => {
+            _ if loop_buf.starts_with(STG_A) => {
-                parse_simple_pat(loop_buf, STG, STG, Po::None, MdTree::Strong)
+                parse_simple_pat(loop_buf, STG_A, STG_A, Po::None, MdTree::Strong)
            }
-            (_, Newline | Whitespace) if loop_buf.starts_with(STK) => {
+            (_, Newline | Whitespace) if loop_buf.starts_with(EMP_U) => {
                parse_simple_pat(loop_buf, EMP_U, EMP_U, Po::None, MdTree::Emphasis)
            }
            _ if loop_buf.starts_with(EMP_A) => {
                parse_simple_pat(loop_buf, EMP_A, EMP_A, Po::None, MdTree::Emphasis)
            }
            _ if loop_buf.starts_with(STK) => {
                parse_simple_pat(loop_buf, STK, STK, Po::None, MdTree::Strikethrough)
            }
            (_, Newline | Whitespace) if loop_buf.starts_with(ANC_S) => {
@ -130,11 +137,8 @@ fn parse_recursive<'a>(buf: &'a [u8], ctx: Context) -> MdStream<'_> {
                    _ => None,
                }
            }
            (_, Newline) if (loop_buf.starts_with(UL1) || loop_buf.starts_with(UL2)) => {
                Some(parse_unordered_li(loop_buf))
            }
            (_, Newline) if ord_list_start(loop_buf).is_some() => Some(parse_ordered_li(loop_buf)),
-            (_, Newline | Whitespace) if loop_buf.starts_with(LNK_S) => {
+            _ if loop_buf.starts_with(LNK_S) => {
                parse_any_link(loop_buf, top_blk && prev == Prev::Newline)
            }
            (_, Escape | _) => None,
@ -251,7 +255,6 @@ fn parse_heading(buf: &[u8]) -> ParseResult<'_> {
 /// Bulleted list
 fn parse_unordered_li(buf: &[u8]) -> Parsed<'_> {
    debug_assert!(buf.starts_with(b"* ") || buf.starts_with(b"- "));
    let (txt, rest) = get_indented_section(&buf[2..]);
    let ctx = Context { top_block: false, prev: Prev::Whitespace };
    let stream = parse_recursive(trim_ascii_start(txt), ctx);
@ -267,25 +270,28 @@ fn parse_ordered_li(buf: &[u8]) -> Parsed<'_> {
    (MdTree::OrderedListItem(num, stream), rest)
 }
 /// Find first line that isn't empty or doesn't start with whitespace, that will
 /// be our contents
 fn get_indented_section(buf: &[u8]) -> (&[u8], &[u8]) {
-    let mut end = buf.len();
+    let mut lines = buf.split(|&byte| byte == b'\n');
-    for (idx, window) in buf.windows(2).enumerate() {
+    let mut end = lines.next().map_or(0, |line| line.len());
-        let &[ch, next_ch] = window else { unreachable!("always 2 elements") };
+    for line in lines {
-        if idx >= buf.len().saturating_sub(2) && next_ch == b'\n' {
+        if let Some(first) = line.first() {
-            // End of stream
+            if unordered_list_start(line) || !first.is_ascii_whitespace() {
-            end = buf.len().saturating_sub(1);
+                break;
-            break;
+            }
        } else if ch == b'\n' && (!next_ch.is_ascii_whitespace() || next_ch == b'\n') {
            end = idx;
            break;
        }
        end += line.len() + 1;
    }
    (&buf[..end], &buf[end..])
 }
 fn unordered_list_start(mut buf: &[u8]) -> bool {
    while let [b' ', rest @ ..] = buf {
        buf = rest;
    }
    matches!(buf, [b'*' | b'-', b' ', ..])
 }
 /// Verify a valid ordered list start (e.g. `1.`) and parse it. Returns the
 /// parsed number and offset of character after the dot.
 fn ord_list_start(buf: &[u8]) -> Option<(u16, usize)> {
--- a/compiler/rustc_errors/src/markdown/tests/parse.rs
+++ b/compiler/rustc_errors/src/markdown/tests/parse.rs
@ -4,13 +4,13 @@ use ParseOpt as PO;
 #[test]
 fn test_parse_simple() {
    let buf = "**abcd** rest";
-    let (t, r) = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong).unwrap();
+    let (t, r) = parse_simple_pat(buf.as_bytes(), b"**", b"**", PO::None, MdTree::Strong).unwrap();
    assert_eq!(t, MdTree::Strong("abcd"));
    assert_eq!(r, b" rest");
    // Escaping should fail
    let buf = r"**abcd\** rest";
-    let res = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong);
+    let res = parse_simple_pat(buf.as_bytes(), b"**", b"**", PO::None, MdTree::Strong);
    assert!(res.is_none());
 }
@ -141,12 +141,12 @@ fn test_indented_section() {
    assert_eq!(str::from_utf8(r).unwrap(), "\nnot ind");
    let (txt, rest) = get_indented_section(IND2.as_bytes());
-    assert_eq!(str::from_utf8(txt).unwrap(), "test end of stream\n  1\n  2");
+    assert_eq!(str::from_utf8(txt).unwrap(), "test end of stream\n  1\n  2\n");
-    assert_eq!(str::from_utf8(rest).unwrap(), "\n");
+    assert_eq!(str::from_utf8(rest).unwrap(), "");
    let (txt, rest) = get_indented_section(IND3.as_bytes());
-    assert_eq!(str::from_utf8(txt).unwrap(), "test empty lines\n  1\n  2");
+    assert_eq!(str::from_utf8(txt).unwrap(), "test empty lines\n  1\n  2\n");
-    assert_eq!(str::from_utf8(rest).unwrap(), "\n\nnot ind");
+    assert_eq!(str::from_utf8(rest).unwrap(), "\nnot ind");
 }
 const HBT: &str = r"# Heading
@ -310,3 +310,56 @@ fn test_code_at_start() {
    let res = entrypoint(CODE_STARTLINE);
    assert_eq!(res, expected);
 }
 #[test]
 fn test_code_in_parens() {
    let expected =
        vec![MdTree::PlainText("("), MdTree::CodeInline("Foo"), MdTree::PlainText(")")].into();
    let res = entrypoint("(`Foo`)");
    assert_eq!(res, expected);
 }
 const LIST_WITH_SPACE: &str = "
 para
 * l1
 * l2
 ";
 #[test]
 fn test_list_with_space() {
    let expected = vec![
        MdTree::PlainText("para"),
        MdTree::ParagraphBreak,
        MdTree::UnorderedListItem(vec![MdTree::PlainText("l1")].into()),
        MdTree::LineBreak,
        MdTree::UnorderedListItem(vec![MdTree::PlainText("l2")].into()),
    ]
    .into();
    let res = entrypoint(LIST_WITH_SPACE);
    assert_eq!(res, expected);
 }
 const SNAKE_CASE: &str = "
 foo*bar*
 foo**bar**
 foo_bar_
 foo__bar__
 ";
 #[test]
 fn test_snake_case() {
    let expected = vec![
        MdTree::PlainText("foo"),
        MdTree::Emphasis("bar"),
        MdTree::PlainText(" "),
        MdTree::PlainText("foo"),
        MdTree::Strong("bar"),
        MdTree::PlainText(" "),
        MdTree::PlainText("foo_bar_"),
        MdTree::PlainText(" "),
        MdTree::PlainText("foo__bar__"),
    ]
    .into();
    let res = entrypoint(SNAKE_CASE);
    assert_eq!(res, expected);
 }