Support lists and stylings in more places for rustc --explain

2024-11-21 22:34:05 +00:00 · 2024-06-26 14:02:52 +00:00 · 2024-06-26 14:02:52 +00:00 · 5824ab178b
commit 5824ab178b
parent bda221a0eb
5 changed files with 96 additions and 36 deletions
--- a/compiler/rustc_error_codes/src/error_codes/E0373.md
+++ b/compiler/rustc_error_codes/src/error_codes/E0373.md
@ -70,4 +70,4 @@ fn spawn<F: Future + Send + 'static>(future: F) {

 Similarly to closures, `async` blocks are not executed immediately and may
 capture closed-over data by reference. For more information, see
-https://rust-lang.github.io/async-book/03_async_await/01_chapter.html.
+<https://rust-lang.github.io/async-book/03_async_await/01_chapter.html>.
--- a/compiler/rustc_error_codes/src/error_codes/E0378.md
+++ b/compiler/rustc_error_codes/src/error_codes/E0378.md
@ -20,7 +20,7 @@ where

 The `DispatchFromDyn` trait currently can only be implemented for
 builtin pointer types and structs that are newtype wrappers around them
-— that is, the struct must have only one field (except for`PhantomData`),
+— that is, the struct must have only one field (except for `PhantomData`),
 and that field must itself implement `DispatchFromDyn`.

 ```
--- a/compiler/rustc_errors/src/lib.rs
+++ b/compiler/rustc_errors/src/lib.rs
@ -15,6 +15,7 @@
 #![feature(box_patterns)]
 #![feature(error_reporter)]
 #![feature(extract_if)]
+#![feature(if_let_guard)]
 #![feature(let_chains)]
 #![feature(negative_impls)]
 #![feature(never_type)]
--- a/compiler/rustc_errors/src/markdown/parse.rs
+++ b/compiler/rustc_errors/src/markdown/parse.rs
@ -10,15 +10,15 @@ const CBK: &[u8] = b"```";
 const CIL: &[u8] = b"`";
 const CMT_E: &[u8] = b"-->";
 const CMT_S: &[u8] = b"<!--";
-const EMP: &[u8] = b"_";
+const EMP_U: &[u8] = b"_";
+const EMP_A: &[u8] = b"*";
 const HDG: &[u8] = b"#";
 const LNK_CHARS: &str = "$-_.+!*'()/&?=:%";
 const LNK_E: &[u8] = b"]";
 const LNK_S: &[u8] = b"[";
-const STG: &[u8] = b"**";
+const STG_U: &[u8] = b"__";
+const STG_A: &[u8] = b"**";
 const STK: &[u8] = b"~~";
-const UL1: &[u8] = b"* ";
-const UL2: &[u8] = b"- ";

 /// Pattern replacements
 const REPLACEMENTS: &[(&str, &str)] = &[
@ -100,22 +100,29 @@ fn parse_recursive<'a>(buf: &'a [u8], ctx: Context) -> MdStream<'_> {
        };

        let res: ParseResult<'_> = match (top_blk, prev) {
-            (_, Newline | Whitespace) if loop_buf.starts_with(CMT_S) => {
+            _ if loop_buf.starts_with(CMT_S) => {
                parse_simple_pat(loop_buf, CMT_S, CMT_E, Po::TrimNoEsc, MdTree::Comment)
            }
            (true, Newline) if loop_buf.starts_with(CBK) => Some(parse_codeblock(loop_buf)),
-            (_, Newline | Whitespace) if loop_buf.starts_with(CIL) => parse_codeinline(loop_buf),
+            _ if loop_buf.starts_with(CIL) => parse_codeinline(loop_buf),
            (true, Newline | Whitespace) if loop_buf.starts_with(HDG) => parse_heading(loop_buf),
            (true, Newline) if loop_buf.starts_with(BRK) => {
                Some((MdTree::HorizontalRule, parse_to_newline(loop_buf).1))
            }
-            (_, Newline | Whitespace) if loop_buf.starts_with(EMP) => {
-                parse_simple_pat(loop_buf, EMP, EMP, Po::None, MdTree::Emphasis)
+            (_, Newline) if unordered_list_start(loop_buf) => Some(parse_unordered_li(loop_buf)),
+            (_, Newline | Whitespace) if loop_buf.starts_with(STG_U) => {
+                parse_simple_pat(loop_buf, STG_U, STG_U, Po::None, MdTree::Strong)
            }
-            (_, Newline | Whitespace) if loop_buf.starts_with(STG) => {
-                parse_simple_pat(loop_buf, STG, STG, Po::None, MdTree::Strong)
+            _ if loop_buf.starts_with(STG_A) => {
+                parse_simple_pat(loop_buf, STG_A, STG_A, Po::None, MdTree::Strong)
            }
-            (_, Newline | Whitespace) if loop_buf.starts_with(STK) => {
+            (_, Newline | Whitespace) if loop_buf.starts_with(EMP_U) => {
+                parse_simple_pat(loop_buf, EMP_U, EMP_U, Po::None, MdTree::Emphasis)
+            }
+            _ if loop_buf.starts_with(EMP_A) => {
+                parse_simple_pat(loop_buf, EMP_A, EMP_A, Po::None, MdTree::Emphasis)
+            }
+            _ if loop_buf.starts_with(STK) => {
                parse_simple_pat(loop_buf, STK, STK, Po::None, MdTree::Strikethrough)
            }
            (_, Newline | Whitespace) if loop_buf.starts_with(ANC_S) => {
@ -130,11 +137,8 @@ fn parse_recursive<'a>(buf: &'a [u8], ctx: Context) -> MdStream<'_> {
                    _ => None,
                }
            }
-            (_, Newline) if (loop_buf.starts_with(UL1) || loop_buf.starts_with(UL2)) => {
-                Some(parse_unordered_li(loop_buf))
-            }
            (_, Newline) if ord_list_start(loop_buf).is_some() => Some(parse_ordered_li(loop_buf)),
-            (_, Newline | Whitespace) if loop_buf.starts_with(LNK_S) => {
+            _ if loop_buf.starts_with(LNK_S) => {
                parse_any_link(loop_buf, top_blk && prev == Prev::Newline)
            }
            (_, Escape | _) => None,
@ -251,7 +255,6 @@ fn parse_heading(buf: &[u8]) -> ParseResult<'_> {

 /// Bulleted list
 fn parse_unordered_li(buf: &[u8]) -> Parsed<'_> {
-    debug_assert!(buf.starts_with(b"* ") || buf.starts_with(b"- "));
    let (txt, rest) = get_indented_section(&buf[2..]);
    let ctx = Context { top_block: false, prev: Prev::Whitespace };
    let stream = parse_recursive(trim_ascii_start(txt), ctx);
@ -267,25 +270,28 @@ fn parse_ordered_li(buf: &[u8]) -> Parsed<'_> {
    (MdTree::OrderedListItem(num, stream), rest)
 }

-/// Find first line that isn't empty or doesn't start with whitespace, that will
-/// be our contents
 fn get_indented_section(buf: &[u8]) -> (&[u8], &[u8]) {
-    let mut end = buf.len();
-    for (idx, window) in buf.windows(2).enumerate() {
-        let &[ch, next_ch] = window else { unreachable!("always 2 elements") };
-        if idx >= buf.len().saturating_sub(2) && next_ch == b'\n' {
-            // End of stream
-            end = buf.len().saturating_sub(1);
-            break;
-        } else if ch == b'\n' && (!next_ch.is_ascii_whitespace() || next_ch == b'\n') {
-            end = idx;
-            break;
+    let mut lines = buf.split(|&byte| byte == b'\n');
+    let mut end = lines.next().map_or(0, |line| line.len());
+    for line in lines {
+        if let Some(first) = line.first() {
+            if unordered_list_start(line) || !first.is_ascii_whitespace() {
+                break;
+            }
        }
+        end += line.len() + 1;
    }

    (&buf[..end], &buf[end..])
 }

+fn unordered_list_start(mut buf: &[u8]) -> bool {
+    while let [b' ', rest @ ..] = buf {
+        buf = rest;
+    }
+    matches!(buf, [b'*' | b'-', b' ', ..])
+}
+
 /// Verify a valid ordered list start (e.g. `1.`) and parse it. Returns the
 /// parsed number and offset of character after the dot.
 fn ord_list_start(buf: &[u8]) -> Option<(u16, usize)> {
--- a/compiler/rustc_errors/src/markdown/tests/parse.rs
+++ b/compiler/rustc_errors/src/markdown/tests/parse.rs
@ -4,13 +4,13 @@ use ParseOpt as PO;
 #[test]
 fn test_parse_simple() {
    let buf = "**abcd** rest";
-    let (t, r) = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong).unwrap();
+    let (t, r) = parse_simple_pat(buf.as_bytes(), b"**", b"**", PO::None, MdTree::Strong).unwrap();
    assert_eq!(t, MdTree::Strong("abcd"));
    assert_eq!(r, b" rest");

    // Escaping should fail
    let buf = r"**abcd\** rest";
-    let res = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong);
+    let res = parse_simple_pat(buf.as_bytes(), b"**", b"**", PO::None, MdTree::Strong);
    assert!(res.is_none());
 }

@ -141,12 +141,12 @@ fn test_indented_section() {
    assert_eq!(str::from_utf8(r).unwrap(), "\nnot ind");

    let (txt, rest) = get_indented_section(IND2.as_bytes());
-    assert_eq!(str::from_utf8(txt).unwrap(), "test end of stream\n  1\n  2");
-    assert_eq!(str::from_utf8(rest).unwrap(), "\n");
+    assert_eq!(str::from_utf8(txt).unwrap(), "test end of stream\n  1\n  2\n");
+    assert_eq!(str::from_utf8(rest).unwrap(), "");

    let (txt, rest) = get_indented_section(IND3.as_bytes());
-    assert_eq!(str::from_utf8(txt).unwrap(), "test empty lines\n  1\n  2");
-    assert_eq!(str::from_utf8(rest).unwrap(), "\n\nnot ind");
+    assert_eq!(str::from_utf8(txt).unwrap(), "test empty lines\n  1\n  2\n");
+    assert_eq!(str::from_utf8(rest).unwrap(), "\nnot ind");
 }

 const HBT: &str = r"# Heading
@ -310,3 +310,56 @@ fn test_code_at_start() {
    let res = entrypoint(CODE_STARTLINE);
    assert_eq!(res, expected);
 }
+
+#[test]
+fn test_code_in_parens() {
+    let expected =
+        vec![MdTree::PlainText("("), MdTree::CodeInline("Foo"), MdTree::PlainText(")")].into();
+    let res = entrypoint("(`Foo`)");
+    assert_eq!(res, expected);
+}
+
+const LIST_WITH_SPACE: &str = "
+para
+ * l1
+ * l2
+";
+
+#[test]
+fn test_list_with_space() {
+    let expected = vec![
+        MdTree::PlainText("para"),
+        MdTree::ParagraphBreak,
+        MdTree::UnorderedListItem(vec![MdTree::PlainText("l1")].into()),
+        MdTree::LineBreak,
+        MdTree::UnorderedListItem(vec![MdTree::PlainText("l2")].into()),
+    ]
+    .into();
+    let res = entrypoint(LIST_WITH_SPACE);
+    assert_eq!(res, expected);
+}
+
+const SNAKE_CASE: &str = "
+foo*bar*
+foo**bar**
+foo_bar_
+foo__bar__
+";
+
+#[test]
+fn test_snake_case() {
+    let expected = vec![
+        MdTree::PlainText("foo"),
+        MdTree::Emphasis("bar"),
+        MdTree::PlainText(" "),
+        MdTree::PlainText("foo"),
+        MdTree::Strong("bar"),
+        MdTree::PlainText(" "),
+        MdTree::PlainText("foo_bar_"),
+        MdTree::PlainText(" "),
+        MdTree::PlainText("foo__bar__"),
+    ]
+    .into();
+    let res = entrypoint(SNAKE_CASE);
+    assert_eq!(res, expected);
+}