Add a simple markdown parser for formatting rustc --explain

Currently, the output of `rustc --explain foo` displays the raw markdown in a pager. This is acceptable, but using actual formatting makes it easier to understand. This patch consists of three major components: 1. A markdown parser. This is an extremely simple non-backtracking recursive implementation that requires normalization of the final token stream 2. A utility to write the token stream to an output buffer 3. Configuration within rustc_driver_impl to invoke this combination for `--explain`. Like the current implementation, it first attempts to print to a pager with a fallback colorized terminal, and standard print as a last resort. If color is disabled, or if the output does not support it, or if printing with color fails, it will write the raw markdown (which matches current behavior). Pagers known to support color are: `less` (with `-r`), `bat` (aka `catbat`), and `delta`. The markdown parser does not support the entire markdown specification, but should support the following with reasonable accuracy: - Headings, including formatting - Comments - Code, inline and fenced block (no indented block) - Strong, emphasis, and strikethrough formatted text - Links, anchor, inline, and reference-style - Horizontal rules - Unordered and ordered list items, including formatting This parser and writer should be reusable by other systems if ever needed.
2024-11-22 06:44:35 +00:00 · 2022-12-19 12:09:40 -06:00 · 2022-12-19 12:09:40 -06:00 · 6a1c10bd85
commit 6a1c10bd85
parent 8aed93d912
15 changed files with 1408 additions and 19 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4746,9 +4746,9 @@ dependencies = [

 [[package]]
 name = "termcolor"
-version = "1.1.3"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
+checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"
 dependencies = [
 "winapi-util",
 ]
--- a/compiler/rustc_driver_impl/src/lib.rs
+++ b/compiler/rustc_driver_impl/src/lib.rs
@ -24,6 +24,7 @@ use rustc_data_structures::profiling::{
 };
 use rustc_data_structures::sync::SeqCst;
 use rustc_errors::registry::{InvalidErrorCode, Registry};
+use rustc_errors::{markdown, ColorConfig};
 use rustc_errors::{
    DiagnosticMessage, ErrorGuaranteed, Handler, PResult, SubdiagnosticMessage, TerminalUrl,
 };
@ -282,7 +283,7 @@ fn run_compiler(
    interface::set_thread_safe_mode(&sopts.unstable_opts);

    if let Some(ref code) = matches.opt_str("explain") {
-        handle_explain(&early_error_handler, diagnostics_registry(), code);
+        handle_explain(&early_error_handler, diagnostics_registry(), code, sopts.color);
        return Ok(());
    }

@ -540,7 +541,7 @@ impl Compilation {
    }
 }

-fn handle_explain(handler: &EarlyErrorHandler, registry: Registry, code: &str) {
+fn handle_explain(handler: &EarlyErrorHandler, registry: Registry, code: &str, color: ColorConfig) {
    let upper_cased_code = code.to_ascii_uppercase();
    let normalised =
        if upper_cased_code.starts_with('E') { upper_cased_code } else { format!("E{code:0>4}") };
@ -564,7 +565,7 @@ fn handle_explain(handler: &EarlyErrorHandler, registry: Registry, code: &str) {
                text.push('\n');
            }
            if io::stdout().is_terminal() {
-                show_content_with_pager(&text);
+                show_md_content_with_pager(&text, color);
            } else {
                safe_print!("{text}");
            }
@ -575,34 +576,72 @@ fn handle_explain(handler: &EarlyErrorHandler, registry: Registry, code: &str) {
    }
 }

-fn show_content_with_pager(content: &str) {
+/// If color is always or auto, print formatted & colorized markdown. If color is never or
+/// if formatted printing fails, print the raw text.
+///
+/// Prefers a pager, falls back standard print
+fn show_md_content_with_pager(content: &str, color: ColorConfig) {
+    let mut fallback_to_println = false;
    let pager_name = env::var_os("PAGER").unwrap_or_else(|| {
        if cfg!(windows) { OsString::from("more.com") } else { OsString::from("less") }
    });

-    let mut fallback_to_println = false;
+    let mut cmd = Command::new(&pager_name);
+    // FIXME: find if other pagers accept color options
+    let mut print_formatted = if pager_name == "less" {
+        cmd.arg("-r");
+        true
+    } else if ["bat", "catbat", "delta"].iter().any(|v| *v == pager_name) {
+        true
+    } else {
+        false
+    };

-    match Command::new(pager_name).stdin(Stdio::piped()).spawn() {
-        Ok(mut pager) => {
-            if let Some(pipe) = pager.stdin.as_mut() {
-                if pipe.write_all(content.as_bytes()).is_err() {
-                    fallback_to_println = true;
-                }
-            }
+    if color == ColorConfig::Never {
+        print_formatted = false;
+    } else if color == ColorConfig::Always {
+        print_formatted = true;
+    }

-            if pager.wait().is_err() {
+    let mdstream = markdown::MdStream::parse_str(content);
+    let bufwtr = markdown::create_stdout_bufwtr();
+    let mut mdbuf = bufwtr.buffer();
+    if mdstream.write_termcolor_buf(&mut mdbuf).is_err() {
+        print_formatted = false;
+    }
+
+    if let Ok(mut pager) = cmd.stdin(Stdio::piped()).spawn() {
+        if let Some(pipe) = pager.stdin.as_mut() {
+            let res = if print_formatted {
+                pipe.write_all(mdbuf.as_slice())
+            } else {
+                pipe.write_all(content.as_bytes())
+            };
+
+            if res.is_err() {
                fallback_to_println = true;
            }
        }
-        Err(_) => {
+
+        if pager.wait().is_err() {
            fallback_to_println = true;
        }
+    } else {
+        fallback_to_println = true;
    }

    // If pager fails for whatever reason, we should still print the content
    // to standard output
    if fallback_to_println {
-        safe_print!("{content}");
+        let fmt_success = match color {
+            ColorConfig::Auto => io::stdout().is_terminal() && bufwtr.print(&mdbuf).is_ok(),
+            ColorConfig::Always => bufwtr.print(&mdbuf).is_ok(),
+            ColorConfig::Never => false,
+        };
+
+        if !fmt_success {
+            safe_print!("{content}");
+        }
    }
 }

--- a/compiler/rustc_errors/Cargo.toml
+++ b/compiler/rustc_errors/Cargo.toml
@ -20,7 +20,7 @@ rustc_hir = { path = "../rustc_hir" }
 rustc_lint_defs = { path = "../rustc_lint_defs" }
 rustc_type_ir = { path = "../rustc_type_ir" }
 unicode-width = "0.1.4"
-termcolor = "1.0"
+termcolor = "1.2.0"
 annotate-snippets = "0.9"
 termize = "0.1.1"
 serde = { version = "1.0.125", features = [ "derive" ] }
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@ -616,7 +616,7 @@ pub enum ColorConfig {
 }

 impl ColorConfig {
-    fn to_color_choice(self) -> ColorChoice {
+    pub fn to_color_choice(self) -> ColorChoice {
        match self {
            ColorConfig::Always => {
                if io::stderr().is_terminal() {
--- a/compiler/rustc_errors/src/lib.rs
+++ b/compiler/rustc_errors/src/lib.rs
@ -61,6 +61,7 @@ pub mod emitter;
 pub mod error;
 pub mod json;
 mod lock;
+pub mod markdown;
 pub mod registry;
 mod snippet;
 mod styled_buffer;
--- a/compiler/rustc_errors/src/markdown/mod.rs
+++ b/compiler/rustc_errors/src/markdown/mod.rs
@ -0,0 +1,76 @@
+//! A simple markdown parser that can write formatted text to the terminal
+//!
+//! Entrypoint is `MdStream::parse_str(...)`
+use std::io;
+
+use termcolor::{Buffer, BufferWriter, ColorChoice};
+mod parse;
+mod term;
+
+/// An AST representation of a Markdown document
+#[derive(Clone, Debug, Default, PartialEq)]
+pub struct MdStream<'a>(Vec<MdTree<'a>>);
+
+impl<'a> MdStream<'a> {
+    /// Parse a markdown string to a tokenstream
+    #[must_use]
+    pub fn parse_str(s: &str) -> MdStream<'_> {
+        parse::entrypoint(s)
+    }
+
+    /// Write formatted output to a termcolor buffer
+    pub fn write_termcolor_buf(&self, buf: &mut Buffer) -> io::Result<()> {
+        term::entrypoint(self, buf)
+    }
+}
+
+/// Create a termcolor buffer with the `Always` color choice
+pub fn create_stdout_bufwtr() -> BufferWriter {
+    BufferWriter::stdout(ColorChoice::Always)
+}
+
+/// A single tokentree within a Markdown document
+#[derive(Clone, Debug, PartialEq)]
+pub enum MdTree<'a> {
+    /// Leaf types
+    Comment(&'a str),
+    CodeBlock {
+        txt: &'a str,
+        lang: Option<&'a str>,
+    },
+    CodeInline(&'a str),
+    Strong(&'a str),
+    Emphasis(&'a str),
+    Strikethrough(&'a str),
+    PlainText(&'a str),
+    /// [Foo](www.foo.com) or simple anchor <www.foo.com>
+    Link {
+        disp: &'a str,
+        link: &'a str,
+    },
+    /// `[Foo link][ref]`
+    RefLink {
+        disp: &'a str,
+        id: Option<&'a str>,
+    },
+    /// [ref]: www.foo.com
+    LinkDef {
+        id: &'a str,
+        link: &'a str,
+    },
+    /// Break bewtween two paragraphs (double `\n`), not directly parsed but
+    /// added later
+    ParagraphBreak,
+    /// Break bewtween two lines (single `\n`)
+    LineBreak,
+    HorizontalRule,
+    Heading(u8, MdStream<'a>),
+    OrderedListItem(u16, MdStream<'a>),
+    UnorderedListItem(MdStream<'a>),
+}
+
+impl<'a> From<Vec<MdTree<'a>>> for MdStream<'a> {
+    fn from(value: Vec<MdTree<'a>>) -> Self {
+        Self(value)
+    }
+}
--- a/compiler/rustc_errors/src/markdown/parse.rs
+++ b/compiler/rustc_errors/src/markdown/parse.rs
@ -0,0 +1,588 @@
+use crate::markdown::{MdStream, MdTree};
+use std::{iter, mem, str};
+
+/// Short aliases that we can use in match patterns. If an end pattern is not
+/// included, this type may be variable
+const ANC_E: &[u8] = b">";
+const ANC_S: &[u8] = b"<";
+const BRK: &[u8] = b"---";
+const CBK: &[u8] = b"```";
+const CIL: &[u8] = b"`";
+const CMT_E: &[u8] = b"-->";
+const CMT_S: &[u8] = b"<!--";
+const EMP: &[u8] = b"_";
+const HDG: &[u8] = b"#";
+const LNK_CHARS: &str = "$-_.+!*'()/&?=:%";
+const LNK_E: &[u8] = b"]";
+const LNK_S: &[u8] = b"[";
+const STG: &[u8] = b"**";
+const STK: &[u8] = b"~~";
+const UL1: &[u8] = b"* ";
+const UL2: &[u8] = b"- ";
+
+/// Pattern replacements
+const REPLACEMENTS: &[(&str, &str)] = &[
+    ("(c)", "©"),
+    ("(C)", "©"),
+    ("(r)", "®"),
+    ("(R)", "®"),
+    ("(tm)", "™"),
+    ("(TM)", "™"),
+    (":crab:", "🦀"),
+    ("\n", " "),
+];
+
+/// `(extracted, remaining)`
+type Parsed<'a> = (MdTree<'a>, &'a [u8]);
+/// Output of a parse function
+type ParseResult<'a> = Option<Parsed<'a>>;
+
+/// Parsing context
+#[derive(Clone, Copy, Debug, PartialEq)]
+struct Context {
+    /// If true, we are at a the topmost level (not recursing a nested tt)
+    top_block: bool,
+    /// Previous character
+    prev: Prev,
+}
+
+/// Character class preceding this one
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum Prev {
+    Newline,
+    /// Whitespace that is not a newline
+    Whitespace,
+    Escape,
+    Any,
+}
+
+impl Default for Context {
+    /// Most common setting for non top-level parsing: not top block, not at
+    /// line start (yes leading whitespace, not escaped)
+    fn default() -> Self {
+        Self { top_block: false, prev: Prev::Whitespace }
+    }
+}
+
+/// Flags to simple parser function
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum ParseOpt {
+    /// Ignore escapes before closing pattern, trim content
+    TrimNoEsc,
+    None,
+}
+
+/// Parse a buffer
+pub fn entrypoint(txt: &str) -> MdStream<'_> {
+    let ctx = Context { top_block: true, prev: Prev::Newline };
+    normalize(parse_recursive(txt.trim().as_bytes(), ctx), &mut Vec::new())
+}
+
+/// Parse a buffer with specified context
+fn parse_recursive<'a>(buf: &'a [u8], ctx: Context) -> MdStream<'_> {
+    use ParseOpt as Po;
+    use Prev::{Escape, Newline, Whitespace};
+
+    let mut stream: Vec<MdTree<'a>> = Vec::new();
+    let Context { top_block: top_blk, mut prev } = ctx;
+
+    // wip_buf is our entire unprocessed (unpushed) buffer, loop_buf is our to
+    // check buffer that shrinks with each loop
+    let mut wip_buf = buf;
+    let mut loop_buf = wip_buf;
+
+    while !loop_buf.is_empty() {
+        let next_prev = match loop_buf[0] {
+            b'\n' => Newline,
+            b'\\' => Escape,
+            x if x.is_ascii_whitespace() => Whitespace,
+            _ => Prev::Any,
+        };
+
+        let res: ParseResult<'_> = match (top_blk, prev) {
+            (_, Newline | Whitespace) if loop_buf.starts_with(CMT_S) => {
+                parse_simple_pat(loop_buf, CMT_S, CMT_E, Po::TrimNoEsc, MdTree::Comment)
+            }
+            (true, Newline) if loop_buf.starts_with(CBK) => Some(parse_codeblock(loop_buf)),
+            (_, Newline | Whitespace) if loop_buf.starts_with(CIL) => parse_codeinline(loop_buf),
+            (true, Newline | Whitespace) if loop_buf.starts_with(HDG) => parse_heading(loop_buf),
+            (true, Newline) if loop_buf.starts_with(BRK) => {
+                Some((MdTree::HorizontalRule, parse_to_newline(loop_buf).1))
+            }
+            (_, Newline | Whitespace) if loop_buf.starts_with(EMP) => {
+                parse_simple_pat(loop_buf, EMP, EMP, Po::None, MdTree::Emphasis)
+            }
+            (_, Newline | Whitespace) if loop_buf.starts_with(STG) => {
+                parse_simple_pat(loop_buf, STG, STG, Po::None, MdTree::Strong)
+            }
+            (_, Newline | Whitespace) if loop_buf.starts_with(STK) => {
+                parse_simple_pat(loop_buf, STK, STK, Po::None, MdTree::Strikethrough)
+            }
+            (_, Newline | Whitespace) if loop_buf.starts_with(ANC_S) => {
+                let tt_fn = |link| MdTree::Link { disp: link, link };
+                let ret = parse_simple_pat(loop_buf, ANC_S, ANC_E, Po::None, tt_fn);
+                match ret {
+                    Some((MdTree::Link { disp, .. }, _))
+                        if disp.chars().all(|ch| LNK_CHARS.contains(ch)) =>
+                    {
+                        ret
+                    }
+                    _ => None,
+                }
+            }
+            (_, Newline) if (loop_buf.starts_with(UL1) || loop_buf.starts_with(UL2)) => {
+                Some(parse_unordered_li(loop_buf))
+            }
+            (_, Newline) if ord_list_start(loop_buf).is_some() => Some(parse_ordered_li(loop_buf)),
+            (_, Newline | Whitespace) if loop_buf.starts_with(LNK_S) => {
+                parse_any_link(loop_buf, top_blk && prev == Prev::Newline)
+            }
+            (_, Escape | _) => None,
+        };
+
+        if let Some((tree, rest)) = res {
+            // We found something: push our WIP and then push the found tree
+            let prev_buf = &wip_buf[..(wip_buf.len() - loop_buf.len())];
+            if !prev_buf.is_empty() {
+                let prev_str = str::from_utf8(prev_buf).unwrap();
+                stream.push(MdTree::PlainText(prev_str));
+            }
+            stream.push(tree);
+
+            wip_buf = rest;
+            loop_buf = rest;
+        } else {
+            // Just move on to the next character
+            loop_buf = &loop_buf[1..];
+            // If we are at the end and haven't found anything, just push plain text
+            if loop_buf.is_empty() && !wip_buf.is_empty() {
+                let final_str = str::from_utf8(wip_buf).unwrap();
+                stream.push(MdTree::PlainText(final_str));
+            }
+        };
+
+        prev = next_prev;
+    }
+
+    MdStream(stream)
+}
+
+/// The simplest kind of patterns: data within start and end patterns
+fn parse_simple_pat<'a, F>(
+    buf: &'a [u8],
+    start_pat: &[u8],
+    end_pat: &[u8],
+    opts: ParseOpt,
+    create_tt: F,
+) -> ParseResult<'a>
+where
+    F: FnOnce(&'a str) -> MdTree<'a>,
+{
+    let ignore_esc = matches!(opts, ParseOpt::TrimNoEsc);
+    let trim = matches!(opts, ParseOpt::TrimNoEsc);
+    let (txt, rest) = parse_with_end_pat(&buf[start_pat.len()..], end_pat, ignore_esc)?;
+    let mut txt = str::from_utf8(txt).unwrap();
+    if trim {
+        txt = txt.trim();
+    }
+    Some((create_tt(txt), rest))
+}
+
+/// Parse backtick-wrapped inline code. Accounts for >1 backtick sets
+fn parse_codeinline(buf: &[u8]) -> ParseResult<'_> {
+    let seps = buf.iter().take_while(|ch| **ch == b'`').count();
+    let (txt, rest) = parse_with_end_pat(&buf[seps..], &buf[..seps], true)?;
+    Some((MdTree::CodeInline(str::from_utf8(txt).unwrap()), rest))
+}
+
+/// Parse a codeblock. Accounts for >3 backticks and language specification
+fn parse_codeblock(buf: &[u8]) -> Parsed<'_> {
+    // account for ````code```` style
+    let seps = buf.iter().take_while(|ch| **ch == b'`').count();
+    let end_sep = &buf[..seps];
+    let mut working = &buf[seps..];
+
+    // Handle "````rust" style language specifications
+    let next_ws_idx = working.iter().take_while(|ch| !ch.is_ascii_whitespace()).count();
+
+    let lang = if next_ws_idx > 0 {
+        // Munch the lang
+        let tmp = str::from_utf8(&working[..next_ws_idx]).unwrap();
+        working = &working[next_ws_idx..];
+        Some(tmp)
+    } else {
+        None
+    };
+
+    let mut end_pat = vec![b'\n'];
+    end_pat.extend(end_sep);
+
+    // Find first end pattern with nothing else on its line
+    let mut found = None;
+    for idx in (0..working.len()).filter(|idx| working[*idx..].starts_with(&end_pat)) {
+        let (eol_txt, rest) = parse_to_newline(&working[(idx + end_pat.len())..]);
+        if !eol_txt.iter().any(u8::is_ascii_whitespace) {
+            found = Some((&working[..idx], rest));
+            break;
+        }
+    }
+
+    let (txt, rest) = found.unwrap_or((working, &[]));
+    let txt = str::from_utf8(txt).unwrap().trim_matches('\n');
+
+    (MdTree::CodeBlock { txt, lang }, rest)
+}
+
+fn parse_heading(buf: &[u8]) -> ParseResult<'_> {
+    let level = buf.iter().take_while(|ch| **ch == b'#').count();
+    let buf = &buf[level..];
+
+    if level > 6 || (buf.len() > 1 && !buf[0].is_ascii_whitespace()) {
+        // Enforce max 6 levels and whitespace following the `##` pattern
+        return None;
+    }
+
+    let (txt, rest) = parse_to_newline(&buf[1..]);
+    let ctx = Context { top_block: false, prev: Prev::Whitespace };
+    let stream = parse_recursive(txt, ctx);
+
+    Some((MdTree::Heading(level.try_into().unwrap(), stream), rest))
+}
+
+/// Bulleted list
+fn parse_unordered_li(buf: &[u8]) -> Parsed<'_> {
+    debug_assert!(buf.starts_with(b"* ") || buf.starts_with(b"- "));
+    let (txt, rest) = get_indented_section(&buf[2..]);
+    let ctx = Context { top_block: false, prev: Prev::Whitespace };
+    let stream = parse_recursive(trim_ascii_start(txt), ctx);
+    (MdTree::UnorderedListItem(stream), rest)
+}
+
+/// Numbered list
+fn parse_ordered_li(buf: &[u8]) -> Parsed<'_> {
+    let (num, pos) = ord_list_start(buf).unwrap(); // success tested in caller
+    let (txt, rest) = get_indented_section(&buf[pos..]);
+    let ctx = Context { top_block: false, prev: Prev::Whitespace };
+    let stream = parse_recursive(trim_ascii_start(txt), ctx);
+    (MdTree::OrderedListItem(num, stream), rest)
+}
+
+/// Find first line that isn't empty or doesn't start with whitespace, that will
+/// be our contents
+fn get_indented_section(buf: &[u8]) -> (&[u8], &[u8]) {
+    let mut end = buf.len();
+    for (idx, window) in buf.windows(2).enumerate() {
+        let &[ch, next_ch] = window else {unreachable!("always 2 elements")};
+        if idx >= buf.len().saturating_sub(2) && next_ch == b'\n' {
+            // End of stream
+            end = buf.len().saturating_sub(1);
+            break;
+        } else if ch == b'\n' && (!next_ch.is_ascii_whitespace() || next_ch == b'\n') {
+            end = idx;
+            break;
+        }
+    }
+
+    (&buf[..end], &buf[end..])
+}
+
+/// Verify a valid ordered list start (e.g. `1.`) and parse it. Returns the
+/// parsed number and offset of character after the dot.
+fn ord_list_start(buf: &[u8]) -> Option<(u16, usize)> {
+    let pos = buf.iter().take(10).position(|ch| *ch == b'.')?;
+    let n = str::from_utf8(&buf[..pos]).ok()?;
+    if !buf.get(pos + 1)?.is_ascii_whitespace() {
+        return None;
+    }
+    n.parse::<u16>().ok().map(|v| (v, pos + 2))
+}
+
+/// Parse links. `can_be_def` indicates that a link definition is possible (top
+/// level, located at the start of a line)
+fn parse_any_link(buf: &[u8], can_be_def: bool) -> ParseResult<'_> {
+    let (bracketed, rest) = parse_with_end_pat(&buf[1..], LNK_E, true)?;
+    if rest.is_empty() {
+        return None;
+    }
+
+    let disp = str::from_utf8(bracketed).unwrap();
+    match (can_be_def, rest[0]) {
+        (true, b':') => {
+            let (link, tmp) = parse_to_newline(&rest[1..]);
+            let link = str::from_utf8(link).unwrap().trim();
+            Some((MdTree::LinkDef { id: disp, link }, tmp))
+        }
+        (_, b'(') => parse_simple_pat(rest, b"(", b")", ParseOpt::TrimNoEsc, |link| MdTree::Link {
+            disp,
+            link,
+        }),
+        (_, b'[') => parse_simple_pat(rest, b"[", b"]", ParseOpt::TrimNoEsc, |id| {
+            MdTree::RefLink { disp, id: Some(id) }
+        }),
+        _ => Some((MdTree::RefLink { disp, id: None }, rest)),
+    }
+}
+
+/// Find and consume an end pattern, return `(match, residual)`
+fn parse_with_end_pat<'a>(
+    buf: &'a [u8],
+    end_sep: &[u8],
+    ignore_esc: bool,
+) -> Option<(&'a [u8], &'a [u8])> {
+    // Find positions that start with the end seperator
+    for idx in (0..buf.len()).filter(|idx| buf[*idx..].starts_with(end_sep)) {
+        if !ignore_esc && idx > 0 && buf[idx - 1] == b'\\' {
+            continue;
+        }
+        return Some((&buf[..idx], &buf[idx + end_sep.len()..]));
+    }
+    None
+}
+
+/// Resturn `(match, residual)` to end of line. The EOL is returned with the
+/// residual.
+fn parse_to_newline(buf: &[u8]) -> (&[u8], &[u8]) {
+    buf.iter().position(|ch| *ch == b'\n').map_or((buf, &[]), |pos| buf.split_at(pos))
+}
+
+/// Take a parsed stream and fix the little things
+fn normalize<'a>(MdStream(stream): MdStream<'a>, linkdefs: &mut Vec<MdTree<'a>>) -> MdStream<'a> {
+    let mut new_stream = Vec::with_capacity(stream.len());
+    let new_defs = stream.iter().filter(|tt| matches!(tt, MdTree::LinkDef { .. }));
+    linkdefs.extend(new_defs.cloned());
+
+    // Run plaintest expansions on types that need it, call this function on nested types
+    for item in stream {
+        match item {
+            MdTree::PlainText(txt) => expand_plaintext(txt, &mut new_stream, MdTree::PlainText),
+            MdTree::Strong(txt) => expand_plaintext(txt, &mut new_stream, MdTree::Strong),
+            MdTree::Emphasis(txt) => expand_plaintext(txt, &mut new_stream, MdTree::Emphasis),
+            MdTree::Strikethrough(txt) => {
+                expand_plaintext(txt, &mut new_stream, MdTree::Strikethrough);
+            }
+            MdTree::RefLink { disp, id } => new_stream.push(match_reflink(linkdefs, disp, id)),
+            MdTree::OrderedListItem(n, st) => {
+                new_stream.push(MdTree::OrderedListItem(n, normalize(st, linkdefs)));
+            }
+            MdTree::UnorderedListItem(st) => {
+                new_stream.push(MdTree::UnorderedListItem(normalize(st, linkdefs)));
+            }
+            MdTree::Heading(n, st) => new_stream.push(MdTree::Heading(n, normalize(st, linkdefs))),
+            _ => new_stream.push(item),
+        }
+    }
+
+    // Remove non printing types, duplicate paragraph breaks, and breaks at start/end
+    new_stream.retain(|x| !matches!(x, MdTree::Comment(_) | MdTree::LinkDef { .. }));
+    new_stream.dedup_by(|r, l| matches!((r, l), (MdTree::ParagraphBreak, MdTree::ParagraphBreak)));
+
+    if new_stream.first().is_some_and(is_break_ty) {
+        new_stream.remove(0);
+    }
+    if new_stream.last().is_some_and(is_break_ty) {
+        new_stream.pop();
+    }
+
+    // Remove paragraph breaks that shouldn't be there. w[1] is what will be
+    // removed in these cases. Note that these are the items to keep, not delete
+    // (for `retain`)
+    let to_keep: Vec<bool> = new_stream
+        .windows(3)
+        .map(|w| {
+            !((matches!(&w[1], MdTree::ParagraphBreak)
+                && matches!(should_break(&w[0], &w[2]), BreakRule::Always(1) | BreakRule::Never))
+                || (matches!(&w[1], MdTree::PlainText(txt) if txt.trim().is_empty())
+                    && matches!(
+                        should_break(&w[0], &w[2]),
+                        BreakRule::Always(_) | BreakRule::Never
+                    )))
+        })
+        .collect();
+    let mut iter = iter::once(true).chain(to_keep).chain(iter::once(true));
+    new_stream.retain(|_| iter.next().unwrap());
+
+    // Insert line or paragraph breaks where there should be some
+    let mut insertions = 0;
+    let to_insert: Vec<(usize, MdTree<'_>)> = new_stream
+        .windows(2)
+        .enumerate()
+        .filter_map(|(idx, w)| match should_break(&w[0], &w[1]) {
+            BreakRule::Always(1) => Some((idx, MdTree::LineBreak)),
+            BreakRule::Always(2) => Some((idx, MdTree::ParagraphBreak)),
+            _ => None,
+        })
+        .map(|(idx, tt)| {
+            insertions += 1;
+            (idx + insertions, tt)
+        })
+        .collect();
+    to_insert.into_iter().for_each(|(idx, tt)| new_stream.insert(idx, tt));
+
+    MdStream(new_stream)
+}
+
+/// Whether two types should or shouldn't have a paragraph break between them
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum BreakRule {
+    Always(u8),
+    Never,
+    Optional,
+}
+
+/// Blocks that automatically handle their own text wrapping
+fn should_break(left: &MdTree<'_>, right: &MdTree<'_>) -> BreakRule {
+    use MdTree::*;
+
+    match (left, right) {
+        // Separate these types with a single line
+        (HorizontalRule, _)
+        | (_, HorizontalRule)
+        | (OrderedListItem(_, _), OrderedListItem(_, _))
+        | (UnorderedListItem(_), UnorderedListItem(_)) => BreakRule::Always(1),
+        // Condensed types shouldn't have an extra break on either side
+        (Comment(_) | ParagraphBreak | Heading(_, _), _) | (_, Comment(_) | ParagraphBreak) => {
+            BreakRule::Never
+        }
+        // Block types should always be separated by full breaks
+        (CodeBlock { .. } | OrderedListItem(_, _) | UnorderedListItem(_), _)
+        | (_, CodeBlock { .. } | Heading(_, _) | OrderedListItem(_, _) | UnorderedListItem(_)) => {
+            BreakRule::Always(2)
+        }
+        // Text types may or may not be separated by a break
+        (
+            CodeInline(_)
+            | Strong(_)
+            | Emphasis(_)
+            | Strikethrough(_)
+            | PlainText(_)
+            | Link { .. }
+            | RefLink { .. }
+            | LinkDef { .. },
+            CodeInline(_)
+            | Strong(_)
+            | Emphasis(_)
+            | Strikethrough(_)
+            | PlainText(_)
+            | Link { .. }
+            | RefLink { .. }
+            | LinkDef { .. },
+        ) => BreakRule::Optional,
+        (LineBreak, _) | (_, LineBreak) => {
+            unreachable!("should have been removed during deduplication")
+        }
+    }
+}
+
+/// Types that indicate some form of break
+fn is_break_ty(val: &MdTree<'_>) -> bool {
+    matches!(val, MdTree::ParagraphBreak | MdTree::LineBreak)
+        // >1 break between paragraphs acts as a break
+        || matches!(val, MdTree::PlainText(txt) if txt.trim().is_empty())
+}
+
+/// Perform tranformations to text. This splits paragraphs, replaces patterns,
+/// and corrects newlines.
+///
+/// To avoid allocating strings (and using a different heavier tt type), our
+/// replace method means split into three and append each. For this reason, any
+/// viewer should treat consecutive `PlainText` types as belonging to the same
+/// paragraph.
+fn expand_plaintext<'a>(
+    txt: &'a str,
+    stream: &mut Vec<MdTree<'a>>,
+    mut f: fn(&'a str) -> MdTree<'a>,
+) {
+    if txt.is_empty() {
+        return;
+    } else if txt == "\n" {
+        if let Some(tt) = stream.last() {
+            let tmp = MdTree::PlainText(" ");
+            if should_break(tt, &tmp) == BreakRule::Optional {
+                stream.push(tmp);
+            }
+        }
+        return;
+    }
+    let mut queue1 = Vec::new();
+    let mut queue2 = Vec::new();
+    let stream_start_len = stream.len();
+    for paragraph in txt.split("\n\n") {
+        if paragraph.is_empty() {
+            stream.push(MdTree::ParagraphBreak);
+            continue;
+        }
+        let paragraph = trim_extra_ws(paragraph);
+
+        queue1.clear();
+        queue1.push(paragraph);
+
+        for (from, to) in REPLACEMENTS {
+            queue2.clear();
+            for item in &queue1 {
+                for s in item.split(from) {
+                    queue2.extend(&[s, to]);
+                }
+                if queue2.len() > 1 {
+                    let _ = queue2.pop(); // remove last unnecessary intersperse
+                }
+            }
+            mem::swap(&mut queue1, &mut queue2);
+        }
+
+        // Make sure we don't double whitespace
+        queue1.retain(|s| !s.is_empty());
+        for idx in 0..queue1.len() {
+            queue1[idx] = trim_extra_ws(queue1[idx]);
+            if idx < queue1.len() - 1
+                && queue1[idx].ends_with(char::is_whitespace)
+                && queue1[idx + 1].starts_with(char::is_whitespace)
+            {
+                queue1[idx] = queue1[idx].trim_end();
+            }
+        }
+        stream.extend(queue1.iter().copied().filter(|txt| !txt.is_empty()).map(&mut f));
+        stream.push(MdTree::ParagraphBreak);
+    }
+
+    if stream.len() - stream_start_len > 1 {
+        let _ = stream.pop(); // remove last unnecessary intersperse
+    }
+}
+
+/// Turn reflinks (links with reference IDs) into normal standalone links using
+/// listed link definitions
+fn match_reflink<'a>(linkdefs: &[MdTree<'a>], disp: &'a str, match_id: Option<&str>) -> MdTree<'a> {
+    let to_match = match_id.unwrap_or(disp); // Match with the display name if there isn't an id
+    for def in linkdefs {
+        if let MdTree::LinkDef { id, link } = def {
+            if *id == to_match {
+                return MdTree::Link { disp, link };
+            }
+        }
+    }
+    MdTree::Link { disp, link: "" } // link not found
+}
+
+/// If there is more than one whitespace char at start or end, trim the extras
+fn trim_extra_ws(mut txt: &str) -> &str {
+    let start_ws =
+        txt.bytes().position(|ch| !ch.is_ascii_whitespace()).unwrap_or(txt.len()).saturating_sub(1);
+    txt = &txt[start_ws..];
+    let end_ws = txt
+        .bytes()
+        .rev()
+        .position(|ch| !ch.is_ascii_whitespace())
+        .unwrap_or(txt.len())
+        .saturating_sub(1);
+    &txt[..txt.len() - end_ws]
+}
+
+/// If there is more than one whitespace char at start, trim the extras
+fn trim_ascii_start(buf: &[u8]) -> &[u8] {
+    let count = buf.iter().take_while(|ch| ch.is_ascii_whitespace()).count();
+    &buf[count..]
+}
+
+#[cfg(test)]
+#[path = "tests/parse.rs"]
+mod tests;
--- a/compiler/rustc_errors/src/markdown/term.rs
+++ b/compiler/rustc_errors/src/markdown/term.rs
@ -0,0 +1,189 @@
+use std::cell::Cell;
+use std::io::{self, Write};
+
+use termcolor::{Buffer, Color, ColorSpec, WriteColor};
+
+use crate::markdown::{MdStream, MdTree};
+
+const DEFAULT_COLUMN_WIDTH: usize = 140;
+
+thread_local! {
+    /// Track the position of viewable characters in our buffer
+    static CURSOR: Cell<usize> = Cell::new(0);
+    /// Width of the terminal
+    static WIDTH: Cell<usize> = Cell::new(DEFAULT_COLUMN_WIDTH);
+}
+
+/// Print to terminal output to a buffer
+pub fn entrypoint(stream: &MdStream<'_>, buf: &mut Buffer) -> io::Result<()> {
+    #[cfg(not(test))]
+    if let Some((w, _)) = termize::dimensions() {
+        WIDTH.with(|c| c.set(std::cmp::min(w, DEFAULT_COLUMN_WIDTH)));
+    }
+    write_stream(stream, buf, None, 0)?;
+    buf.write_all(b"\n")
+}
+
+/// Write the buffer, reset to the default style after each
+fn write_stream(
+    MdStream(stream): &MdStream<'_>,
+    buf: &mut Buffer,
+    default: Option<&ColorSpec>,
+    indent: usize,
+) -> io::Result<()> {
+    match default {
+        Some(c) => buf.set_color(c)?,
+        None => buf.reset()?,
+    }
+
+    for tt in stream {
+        write_tt(tt, buf, indent)?;
+        if let Some(c) = default {
+            buf.set_color(c)?;
+        }
+    }
+
+    buf.reset()?;
+    Ok(())
+}
+
+pub fn write_tt(tt: &MdTree<'_>, buf: &mut Buffer, indent: usize) -> io::Result<()> {
+    match tt {
+        MdTree::CodeBlock { txt, lang: _ } => {
+            buf.set_color(ColorSpec::new().set_dimmed(true))?;
+            buf.write_all(txt.as_bytes())?;
+        }
+        MdTree::CodeInline(txt) => {
+            buf.set_color(ColorSpec::new().set_dimmed(true))?;
+            write_wrapping(buf, txt, indent, None)?;
+        }
+        MdTree::Strong(txt) => {
+            buf.set_color(ColorSpec::new().set_bold(true))?;
+            write_wrapping(buf, txt, indent, None)?;
+        }
+        MdTree::Emphasis(txt) => {
+            buf.set_color(ColorSpec::new().set_italic(true))?;
+            write_wrapping(buf, txt, indent, None)?;
+        }
+        MdTree::Strikethrough(txt) => {
+            buf.set_color(ColorSpec::new().set_strikethrough(true))?;
+            write_wrapping(buf, txt, indent, None)?;
+        }
+        MdTree::PlainText(txt) => {
+            write_wrapping(buf, txt, indent, None)?;
+        }
+        MdTree::Link { disp, link } => {
+            write_wrapping(buf, disp, indent, Some(link))?;
+        }
+        MdTree::ParagraphBreak => {
+            buf.write_all(b"\n\n")?;
+            reset_cursor();
+        }
+        MdTree::LineBreak => {
+            buf.write_all(b"\n")?;
+            reset_cursor();
+        }
+        MdTree::HorizontalRule => {
+            (0..WIDTH.with(Cell::get)).for_each(|_| buf.write_all(b"-").unwrap());
+            reset_cursor();
+        }
+        MdTree::Heading(n, stream) => {
+            let mut cs = ColorSpec::new();
+            cs.set_fg(Some(Color::Cyan));
+            match n {
+                1 => cs.set_intense(true).set_bold(true).set_underline(true),
+                2 => cs.set_intense(true).set_underline(true),
+                3 => cs.set_intense(true).set_italic(true),
+                4.. => cs.set_underline(true).set_italic(true),
+                0 => unreachable!(),
+            };
+            write_stream(stream, buf, Some(&cs), 0)?;
+            buf.write_all(b"\n")?;
+        }
+        MdTree::OrderedListItem(n, stream) => {
+            let base = format!("{n}. ");
+            write_wrapping(buf, &format!("{base:<4}"), indent, None)?;
+            write_stream(stream, buf, None, indent + 4)?;
+        }
+        MdTree::UnorderedListItem(stream) => {
+            let base = "* ";
+            write_wrapping(buf, &format!("{base:<4}"), indent, None)?;
+            write_stream(stream, buf, None, indent + 4)?;
+        }
+        // Patterns popped in previous step
+        MdTree::Comment(_) | MdTree::LinkDef { .. } | MdTree::RefLink { .. } => unreachable!(),
+    }
+
+    buf.reset()?;
+
+    Ok(())
+}
+
+/// End of that block, just wrap the line
+fn reset_cursor() {
+    CURSOR.with(|cur| cur.set(0));
+}
+
+/// Change to be generic on Write for testing. If we have a link URL, we don't
+/// count the extra tokens to make it clickable.
+fn write_wrapping<B: io::Write>(
+    buf: &mut B,
+    text: &str,
+    indent: usize,
+    link_url: Option<&str>,
+) -> io::Result<()> {
+    let ind_ws = &b"          "[..indent];
+    let mut to_write = text;
+    if let Some(url) = link_url {
+        // This is a nonprinting prefix so we don't increment our cursor
+        write!(buf, "\x1b]8;;{url}\x1b\\")?;
+    }
+    CURSOR.with(|cur| {
+        loop {
+            if cur.get() == 0 {
+                buf.write_all(ind_ws)?;
+                cur.set(indent);
+            }
+            let ch_count = WIDTH.with(Cell::get) - cur.get();
+            let mut iter = to_write.char_indices();
+            let Some((end_idx, _ch)) = iter.nth(ch_count) else {
+                // Write entire line
+                buf.write_all(to_write.as_bytes())?;
+                cur.set(cur.get()+to_write.chars().count());
+                break;
+            };
+
+            if let Some((break_idx, ch)) = to_write[..end_idx]
+                .char_indices()
+                .rev()
+                .find(|(_idx, ch)| ch.is_whitespace() || ['_', '-'].contains(ch))
+            {
+                // Found whitespace to break at
+                if ch.is_whitespace() {
+                    writeln!(buf, "{}", &to_write[..break_idx])?;
+                    to_write = to_write[break_idx..].trim_start();
+                } else {
+                    // Break at a `-` or `_` separator
+                    writeln!(buf, "{}", &to_write.get(..break_idx + 1).unwrap_or(to_write))?;
+                    to_write = to_write.get(break_idx + 1..).unwrap_or_default().trim_start();
+                }
+            } else {
+                // No whitespace, we need to just split
+                let ws_idx =
+                    iter.find(|(_, ch)| ch.is_whitespace()).map_or(to_write.len(), |(idx, _)| idx);
+                writeln!(buf, "{}", &to_write[..ws_idx])?;
+                to_write = to_write.get(ws_idx + 1..).map_or("", str::trim_start);
+            }
+            cur.set(0);
+        }
+        if link_url.is_some() {
+            buf.write_all(b"\x1b]8;;\x1b\\")?;
+        }
+
+        Ok(())
+    })
+}
+
+#[cfg(test)]
+#[path = "tests/term.rs"]
+mod tests;
--- a/compiler/rustc_errors/src/markdown/tests/input.md
+++ b/compiler/rustc_errors/src/markdown/tests/input.md
@ -0,0 +1,50 @@
+# H1 Heading [with a link][remote-link]
+
+H1 content: **some words in bold** and `so does inline code`
+
+## H2 Heading
+
+H2 content: _some words in italic_
+
+### H3 Heading
+
+H3 content: ~~strikethrough~~ text
+
+#### H4 Heading
+
+H4 content: A [simple link](https://docs.rs) and a [remote-link].
+
+---
+
+A section break was above. We can also do paragraph breaks:
+
+(new paragraph) and unordered lists:
+
+- Item 1 in `code`
+- Item 2 in _italics_
+
+Or ordered:
+
+1. Item 1 in **bold**
+2. Item 2 with some long lines that should wrap: Lorem ipsum dolor sit amet,
+   consectetur adipiscing elit. Aenean ac mattis nunc. Phasellus elit quam,
+   pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan
+   in cursus sit amet, dictum a nunc. Suspendisse aliquet, lorem eu eleifend
+   accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.
+
+---
+
+## Code
+
+Both `inline code` and code blocks are supported:
+
+```rust
+/// A rust enum
+#[derive(Debug, PartialEq, Clone)]
+enum Foo {
+    /// Start of line
+    Bar
+}
+```
+
+[remote-link]: http://docs.rs
--- a/compiler/rustc_errors/src/markdown/tests/output.stdout
+++ b/compiler/rustc_errors/src/markdown/tests/output.stdout
@ -0,0 +1,35 @@
+[0m[0m[1m[4m[38;5;14mH1 Heading [0m[0m[1m[4m[38;5;14m]8;;http://docs.rs\with a link]8;;\[0m[0m[1m[4m[38;5;14m[0m
+[0mH1 content: [0m[0m[1msome words in bold[0m and [0m[0m[2mso does inline code[0m
+
+[0m[0m[4m[38;5;14mH2 Heading[0m[0m[4m[38;5;14m[0m
+[0mH2 content: [0m[0m[3msome words in italic[0m
+
+[0m[0m[3m[38;5;14mH3 Heading[0m[0m[3m[38;5;14m[0m
+[0mH3 content: [0m[0m[9mstrikethrough[0m text[0m
+
+[0m[0m[3m[4m[36mH4 Heading[0m[0m[3m[4m[36m[0m
+[0mH4 content: A [0m]8;;https://docs.rs\simple link]8;;\[0m and a [0m]8;;http://docs.rs\remote-link]8;;\[0m.[0m
+[0m--------------------------------------------------------------------------------------------------------------------------------------------[0m
+[0mA section break was above. We can also do paragraph breaks:[0m
+
+[0m(new paragraph) and unordered lists:[0m
+
+[0m*   [0mItem 1 in [0m[0m[2mcode[0m[0m[0m
+[0m*   [0mItem 2 in [0m[0m[3mitalics[0m[0m[0m
+
+[0mOr ordered:[0m
+
+[0m1.  [0mItem 1 in [0m[0m[1mbold[0m[0m[0m
+[0m2.  [0mItem 2 with some long lines that should wrap: Lorem ipsum dolor sit amet,[0m consectetur adipiscing elit. Aenean ac mattis nunc. Phasellus
+    elit quam,[0m pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan[0m in cursus sit amet, dictum a nunc. Suspendisse
+    aliquet, lorem eu eleifend[0m accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.[0m[0m[0m
+[0m--------------------------------------------------------------------------------------------------------------------------------------------[0m
+[0m[0m[4m[38;5;14mCode[0m[0m[4m[38;5;14m[0m
+[0mBoth [0m[0m[2minline code[0m and code blocks are supported:[0m
+
+[0m[0m[2m/// A rust enum
+#[derive(Debug, PartialEq, Clone)]
+enum Foo {
+    /// Start of line
+    Bar
+}[0m[0m
--- a/compiler/rustc_errors/src/markdown/tests/parse.rs
+++ b/compiler/rustc_errors/src/markdown/tests/parse.rs
@ -0,0 +1,312 @@
+use super::*;
+use ParseOpt as PO;
+
+#[test]
+fn test_parse_simple() {
+    let buf = "**abcd** rest";
+    let (t, r) = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong).unwrap();
+    assert_eq!(t, MdTree::Strong("abcd"));
+    assert_eq!(r, b" rest");
+
+    // Escaping should fail
+    let buf = r"**abcd\** rest";
+    let res = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong);
+    assert!(res.is_none());
+}
+
+#[test]
+fn test_parse_comment() {
+    let opt = PO::TrimNoEsc;
+    let buf = "<!-- foobar! -->rest";
+    let (t, r) = parse_simple_pat(buf.as_bytes(), CMT_S, CMT_E, opt, MdTree::Comment).unwrap();
+    assert_eq!(t, MdTree::Comment("foobar!"));
+    assert_eq!(r, b"rest");
+
+    let buf = r"<!-- foobar! \-->rest";
+    let (t, r) = parse_simple_pat(buf.as_bytes(), CMT_S, CMT_E, opt, MdTree::Comment).unwrap();
+    assert_eq!(t, MdTree::Comment(r"foobar! \"));
+    assert_eq!(r, b"rest");
+}
+
+#[test]
+fn test_parse_heading() {
+    let buf1 = "# Top level\nrest";
+    let (t, r) = parse_heading(buf1.as_bytes()).unwrap();
+    assert_eq!(t, MdTree::Heading(1, vec![MdTree::PlainText("Top level")].into()));
+    assert_eq!(r, b"\nrest");
+
+    let buf1 = "# Empty";
+    let (t, r) = parse_heading(buf1.as_bytes()).unwrap();
+    assert_eq!(t, MdTree::Heading(1, vec![MdTree::PlainText("Empty")].into()));
+    assert_eq!(r, b"");
+
+    // Combo
+    let buf2 = "### Top `level` _woo_\nrest";
+    let (t, r) = parse_heading(buf2.as_bytes()).unwrap();
+    assert_eq!(
+        t,
+        MdTree::Heading(
+            3,
+            vec![
+                MdTree::PlainText("Top "),
+                MdTree::CodeInline("level"),
+                MdTree::PlainText(" "),
+                MdTree::Emphasis("woo"),
+            ]
+            .into()
+        )
+    );
+    assert_eq!(r, b"\nrest");
+}
+
+#[test]
+fn test_parse_code_inline() {
+    let buf1 = "`abcd` rest";
+    let (t, r) = parse_codeinline(buf1.as_bytes()).unwrap();
+    assert_eq!(t, MdTree::CodeInline("abcd"));
+    assert_eq!(r, b" rest");
+
+    // extra backticks, newline
+    let buf2 = "```ab\ncd``` rest";
+    let (t, r) = parse_codeinline(buf2.as_bytes()).unwrap();
+    assert_eq!(t, MdTree::CodeInline("ab\ncd"));
+    assert_eq!(r, b" rest");
+
+    // test no escaping
+    let buf3 = r"`abcd\` rest";
+    let (t, r) = parse_codeinline(buf3.as_bytes()).unwrap();
+    assert_eq!(t, MdTree::CodeInline(r"abcd\"));
+    assert_eq!(r, b" rest");
+}
+
+#[test]
+fn test_parse_code_block() {
+    let buf1 = "```rust\ncode\ncode\n```\nleftovers";
+    let (t, r) = parse_codeblock(buf1.as_bytes());
+    assert_eq!(t, MdTree::CodeBlock { txt: "code\ncode", lang: Some("rust") });
+    assert_eq!(r, b"\nleftovers");
+
+    let buf2 = "`````\ncode\ncode````\n`````\nleftovers";
+    let (t, r) = parse_codeblock(buf2.as_bytes());
+    assert_eq!(t, MdTree::CodeBlock { txt: "code\ncode````", lang: None });
+    assert_eq!(r, b"\nleftovers");
+}
+
+#[test]
+fn test_parse_link() {
+    let simple = "[see here](docs.rs) other";
+    let (t, r) = parse_any_link(simple.as_bytes(), false).unwrap();
+    assert_eq!(t, MdTree::Link { disp: "see here", link: "docs.rs" });
+    assert_eq!(r, b" other");
+
+    let simple_toplevel = "[see here](docs.rs) other";
+    let (t, r) = parse_any_link(simple_toplevel.as_bytes(), true).unwrap();
+    assert_eq!(t, MdTree::Link { disp: "see here", link: "docs.rs" });
+    assert_eq!(r, b" other");
+
+    let reference = "[see here] other";
+    let (t, r) = parse_any_link(reference.as_bytes(), true).unwrap();
+    assert_eq!(t, MdTree::RefLink { disp: "see here", id: None });
+    assert_eq!(r, b" other");
+
+    let reference_full = "[see here][docs-rs] other";
+    let (t, r) = parse_any_link(reference_full.as_bytes(), false).unwrap();
+    assert_eq!(t, MdTree::RefLink { disp: "see here", id: Some("docs-rs") });
+    assert_eq!(r, b" other");
+
+    let reference_def = "[see here]: docs.rs\nother";
+    let (t, r) = parse_any_link(reference_def.as_bytes(), true).unwrap();
+    assert_eq!(t, MdTree::LinkDef { id: "see here", link: "docs.rs" });
+    assert_eq!(r, b"\nother");
+}
+
+const IND1: &str = r"test standard
+    ind
+    ind2
+not ind";
+const IND2: &str = r"test end of stream
+  1
+  2
+";
+const IND3: &str = r"test empty lines
+  1
+  2
+
+not ind";
+
+#[test]
+fn test_indented_section() {
+    let (t, r) = get_indented_section(IND1.as_bytes());
+    assert_eq!(str::from_utf8(t).unwrap(), "test standard\n    ind\n    ind2");
+    assert_eq!(str::from_utf8(r).unwrap(), "\nnot ind");
+
+    let (txt, rest) = get_indented_section(IND2.as_bytes());
+    assert_eq!(str::from_utf8(txt).unwrap(), "test end of stream\n  1\n  2");
+    assert_eq!(str::from_utf8(rest).unwrap(), "\n");
+
+    let (txt, rest) = get_indented_section(IND3.as_bytes());
+    assert_eq!(str::from_utf8(txt).unwrap(), "test empty lines\n  1\n  2");
+    assert_eq!(str::from_utf8(rest).unwrap(), "\n\nnot ind");
+}
+
+const HBT: &str = r"# Heading
+
+content";
+
+#[test]
+fn test_heading_breaks() {
+    let expected = vec![
+        MdTree::Heading(1, vec![MdTree::PlainText("Heading")].into()),
+        MdTree::PlainText("content"),
+    ]
+    .into();
+    let res = entrypoint(HBT);
+    assert_eq!(res, expected);
+}
+
+const NL1: &str = r"start
+
+end";
+const NL2: &str = r"start
+
+
+end";
+const NL3: &str = r"start
+
+
+
+end";
+
+#[test]
+fn test_newline_breaks() {
+    let expected =
+        vec![MdTree::PlainText("start"), MdTree::ParagraphBreak, MdTree::PlainText("end")].into();
+    for (idx, check) in [NL1, NL2, NL3].iter().enumerate() {
+        let res = entrypoint(check);
+        assert_eq!(res, expected, "failed {idx}");
+    }
+}
+
+const WRAP: &str = "plain _italics
+italics_";
+
+#[test]
+fn test_wrap_pattern() {
+    let expected = vec![
+        MdTree::PlainText("plain "),
+        MdTree::Emphasis("italics"),
+        MdTree::Emphasis(" "),
+        MdTree::Emphasis("italics"),
+    ]
+    .into();
+    let res = entrypoint(WRAP);
+    assert_eq!(res, expected);
+}
+
+const WRAP_NOTXT: &str = r"_italics_
+**bold**";
+
+#[test]
+fn test_wrap_notxt() {
+    let expected =
+        vec![MdTree::Emphasis("italics"), MdTree::PlainText(" "), MdTree::Strong("bold")].into();
+    let res = entrypoint(WRAP_NOTXT);
+    assert_eq!(res, expected);
+}
+
+const MIXED_LIST: &str = r"start
+- _italics item_
+<!-- comment -->
+- **bold item**
+  second line [link1](foobar1)
+  third line [link2][link-foo]
+-   :crab:
+    extra indent
+end
+[link-foo]: foobar2
+";
+
+#[test]
+fn test_list() {
+    let expected = vec![
+        MdTree::PlainText("start"),
+        MdTree::ParagraphBreak,
+        MdTree::UnorderedListItem(vec![MdTree::Emphasis("italics item")].into()),
+        MdTree::LineBreak,
+        MdTree::UnorderedListItem(
+            vec![
+                MdTree::Strong("bold item"),
+                MdTree::PlainText(" second line "),
+                MdTree::Link { disp: "link1", link: "foobar1" },
+                MdTree::PlainText(" third line "),
+                MdTree::Link { disp: "link2", link: "foobar2" },
+            ]
+            .into(),
+        ),
+        MdTree::LineBreak,
+        MdTree::UnorderedListItem(
+            vec![MdTree::PlainText("🦀"), MdTree::PlainText(" extra indent")].into(),
+        ),
+        MdTree::ParagraphBreak,
+        MdTree::PlainText("end"),
+    ]
+    .into();
+    let res = entrypoint(MIXED_LIST);
+    assert_eq!(res, expected);
+}
+
+const SMOOSHED: &str = r#"
+start
+### heading
+1. ordered item
+```rust
+println!("Hello, world!");
+```
+`inline`
+``end``
+"#;
+
+#[test]
+fn test_without_breaks() {
+    let expected = vec![
+        MdTree::PlainText("start"),
+        MdTree::ParagraphBreak,
+        MdTree::Heading(3, vec![MdTree::PlainText("heading")].into()),
+        MdTree::OrderedListItem(1, vec![MdTree::PlainText("ordered item")].into()),
+        MdTree::ParagraphBreak,
+        MdTree::CodeBlock { txt: r#"println!("Hello, world!");"#, lang: Some("rust") },
+        MdTree::ParagraphBreak,
+        MdTree::CodeInline("inline"),
+        MdTree::PlainText(" "),
+        MdTree::CodeInline("end"),
+    ]
+    .into();
+    let res = entrypoint(SMOOSHED);
+    assert_eq!(res, expected);
+}
+
+const CODE_STARTLINE: &str = r#"
+start
+`code`
+middle
+`more code`
+end
+"#;
+
+#[test]
+fn test_code_at_start() {
+    let expected = vec![
+        MdTree::PlainText("start"),
+        MdTree::PlainText(" "),
+        MdTree::CodeInline("code"),
+        MdTree::PlainText(" "),
+        MdTree::PlainText("middle"),
+        MdTree::PlainText(" "),
+        MdTree::CodeInline("more code"),
+        MdTree::PlainText(" "),
+        MdTree::PlainText("end"),
+    ]
+    .into();
+    let res = entrypoint(CODE_STARTLINE);
+    assert_eq!(res, expected);
+}
--- a/compiler/rustc_errors/src/markdown/tests/term.rs
+++ b/compiler/rustc_errors/src/markdown/tests/term.rs
@ -0,0 +1,90 @@
+use std::io::BufWriter;
+use std::path::PathBuf;
+use termcolor::{BufferWriter, ColorChoice};
+
+use super::*;
+use crate::markdown::MdStream;
+
+const INPUT: &str = include_str!("input.md");
+const OUTPUT_PATH: &[&str] = &[env!("CARGO_MANIFEST_DIR"), "src","markdown","tests","output.stdout"];
+
+const TEST_WIDTH: usize = 80;
+
+// We try to make some words long to create corner cases
+const TXT: &str = r"Lorem ipsum dolor sit amet, consecteturadipiscingelit.
+Fusce-id-urna-sollicitudin, pharetra nisl nec, lobortis tellus. In at
+metus hendrerit, tincidunteratvel, ultrices turpis. Curabitur_risus_sapien,
+porta-sed-nunc-sed, ultricesposuerelacus. Sed porttitor quis
+dolor non venenatis. Aliquam ut. ";
+
+const WRAPPED: &str = r"Lorem ipsum dolor sit amet, consecteturadipiscingelit. Fusce-id-urna-
+sollicitudin, pharetra nisl nec, lobortis tellus. In at metus hendrerit,
+tincidunteratvel, ultrices turpis. Curabitur_risus_sapien, porta-sed-nunc-sed,
+ultricesposuerelacus. Sed porttitor quis dolor non venenatis. Aliquam ut. Lorem
+    ipsum dolor sit amet, consecteturadipiscingelit. Fusce-id-urna-
+    sollicitudin, pharetra nisl nec, lobortis tellus. In at metus hendrerit,
+    tincidunteratvel, ultrices turpis. Curabitur_risus_sapien, porta-sed-nunc-
+    sed, ultricesposuerelacus. Sed porttitor quis dolor non venenatis. Aliquam
+    ut. Sample link lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet,
+consecteturadipiscingelit. Fusce-id-urna-sollicitudin, pharetra nisl nec,
+lobortis tellus. In at metus hendrerit, tincidunteratvel, ultrices turpis.
+Curabitur_risus_sapien, porta-sed-nunc-sed, ultricesposuerelacus. Sed porttitor
+quis dolor non venenatis. Aliquam ut. ";
+
+#[test]
+fn test_wrapping_write() {
+    WIDTH.with(|w| w.set(TEST_WIDTH));
+    let mut buf = BufWriter::new(Vec::new());
+    let txt = TXT.replace("-\n","-").replace("_\n","_").replace('\n', " ").replace("    ", "");
+    write_wrapping(&mut buf, &txt, 0, None).unwrap();
+    write_wrapping(&mut buf, &txt, 4, None).unwrap();
+    write_wrapping(
+        &mut buf,
+        "Sample link lorem ipsum dolor sit amet. ",
+        4,
+        Some("link-address-placeholder"),
+    )
+    .unwrap();
+    write_wrapping(&mut buf, &txt, 0, None).unwrap();
+    let out = String::from_utf8(buf.into_inner().unwrap()).unwrap();
+    let out = out
+        .replace("\x1b\\", "")
+        .replace('\x1b', "")
+        .replace("]8;;", "")
+        .replace("link-address-placeholder", "");
+
+    for line in out.lines() {
+        assert!(line.len() <= TEST_WIDTH, "line length\n'{line}'")
+    }
+
+    assert_eq!(out, WRAPPED);
+}
+
+#[test]
+fn test_output() {
+    // Capture `--bless` when run via ./x
+    let bless = std::env::var("RUSTC_BLESS").unwrap_or_default() == "1";
+    let ast = MdStream::parse_str(INPUT);
+    let bufwtr = BufferWriter::stderr(ColorChoice::Always);
+    let mut buffer = bufwtr.buffer();
+    ast.write_termcolor_buf(&mut buffer).unwrap();
+
+    let mut blessed = PathBuf::new();
+    blessed.extend(OUTPUT_PATH);
+
+    if bless {
+        std::fs::write(&blessed, buffer.into_inner()).unwrap();
+        eprintln!("blessed output at {}", blessed.display());
+    } else {
+        let output = buffer.into_inner();
+        if std::fs::read(blessed).unwrap() != output {
+            // hack: I don't know any way to write bytes to the captured stdout
+            // that cargo test uses
+            let mut out = std::io::stdout();
+            out.write_all(b"\n\nMarkdown output did not match. Expected:\n").unwrap();
+            out.write_all(&output).unwrap();
+            out.write_all(b"\n\n").unwrap();
+            panic!("markdown output mismatch");
+        }
+    }
+}
--- a/compiler/rustc_session/src/config.rs
+++ b/compiler/rustc_session/src/config.rs
@ -909,6 +909,7 @@ impl Default for Options {
            json_future_incompat: false,
            pretty: None,
            working_dir: RealFileName::LocalPath(std::env::current_dir().unwrap()),
+            color: ColorConfig::Auto,
        }
    }
 }
@ -2673,6 +2674,7 @@ pub fn build_session_options(
        json_future_incompat,
        pretty,
        working_dir,
+        color,
    }
 }

--- a/compiler/rustc_session/src/options.rs
+++ b/compiler/rustc_session/src/options.rs
@ -4,6 +4,7 @@ use crate::search_paths::SearchPath;
 use crate::utils::NativeLib;
 use crate::{lint, EarlyErrorHandler};
 use rustc_data_structures::profiling::TimePassesFormat;
+use rustc_errors::ColorConfig;
 use rustc_errors::{LanguageIdentifier, TerminalUrl};
 use rustc_target::spec::{CodeModel, LinkerFlavorCli, MergeFunctions, PanicStrategy, SanitizerSet};
 use rustc_target::spec::{
@ -212,6 +213,7 @@ top_level_options!(

        /// The (potentially remapped) working directory
        working_dir: RealFileName [TRACKED],
+        color: ColorConfig [UNTRACKED],
    }
 );

--- a/src/bootstrap/test.rs
+++ b/src/bootstrap/test.rs
@ -2217,6 +2217,11 @@ fn prepare_cargo_test(
 ) -> Command {
    let mut cargo = cargo.into();

+    // If bless is passed, give downstream crates a way to use it
+    if builder.config.cmd.bless() {
+        cargo.env("RUSTC_BLESS", "1");
+    }
+
    // Pass in some standard flags then iterate over the graph we've discovered
    // in `cargo metadata` with the maps above and figure out what `-p`
    // arguments need to get passed.