Add a simple markdown parser for formatting rustc --explain

Currently, the output of `rustc --explain foo` displays the raw markdown in a
pager. This is acceptable, but using actual formatting makes it easier to
understand.

This patch consists of three major components:

1.  A markdown parser. This is an extremely simple non-backtracking recursive
    implementation that requires normalization of the final token stream
2.  A utility to write the token stream to an output buffer
3.  Configuration within rustc_driver_impl to invoke this combination for
    `--explain`. Like the current implementation, it first attempts to print to
    a pager with a fallback colorized terminal, and standard print as a last
    resort.

    If color is disabled, or if the output does not support it, or if printing
    with color fails, it will write the raw markdown (which matches current
    behavior).

    Pagers known to support color are: `less` (with `-r`), `bat` (aka `catbat`),
    and `delta`.

The markdown parser does not support the entire markdown specification, but
should support the following with reasonable accuracy:

-   Headings, including formatting
-   Comments
-   Code, inline and fenced block (no indented block)
-   Strong, emphasis, and strikethrough formatted text
-   Links, anchor, inline, and reference-style
-   Horizontal rules
-   Unordered and ordered list items, including formatting

This parser and writer should be reusable by other systems if ever needed.
This commit is contained in:
Trevor Gross 2022-12-19 12:09:40 -06:00
parent 8aed93d912
commit 6a1c10bd85
15 changed files with 1408 additions and 19 deletions

View File

@ -4746,9 +4746,9 @@ dependencies = [
[[package]]
name = "termcolor"
version = "1.1.3"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"
dependencies = [
"winapi-util",
]

View File

@ -24,6 +24,7 @@ use rustc_data_structures::profiling::{
};
use rustc_data_structures::sync::SeqCst;
use rustc_errors::registry::{InvalidErrorCode, Registry};
use rustc_errors::{markdown, ColorConfig};
use rustc_errors::{
DiagnosticMessage, ErrorGuaranteed, Handler, PResult, SubdiagnosticMessage, TerminalUrl,
};
@ -282,7 +283,7 @@ fn run_compiler(
interface::set_thread_safe_mode(&sopts.unstable_opts);
if let Some(ref code) = matches.opt_str("explain") {
handle_explain(&early_error_handler, diagnostics_registry(), code);
handle_explain(&early_error_handler, diagnostics_registry(), code, sopts.color);
return Ok(());
}
@ -540,7 +541,7 @@ impl Compilation {
}
}
fn handle_explain(handler: &EarlyErrorHandler, registry: Registry, code: &str) {
fn handle_explain(handler: &EarlyErrorHandler, registry: Registry, code: &str, color: ColorConfig) {
let upper_cased_code = code.to_ascii_uppercase();
let normalised =
if upper_cased_code.starts_with('E') { upper_cased_code } else { format!("E{code:0>4}") };
@ -564,7 +565,7 @@ fn handle_explain(handler: &EarlyErrorHandler, registry: Registry, code: &str) {
text.push('\n');
}
if io::stdout().is_terminal() {
show_content_with_pager(&text);
show_md_content_with_pager(&text, color);
} else {
safe_print!("{text}");
}
@ -575,17 +576,49 @@ fn handle_explain(handler: &EarlyErrorHandler, registry: Registry, code: &str) {
}
}
fn show_content_with_pager(content: &str) {
/// If color is always or auto, print formatted & colorized markdown. If color is never or
/// if formatted printing fails, print the raw text.
///
/// Prefers a pager, falls back standard print
fn show_md_content_with_pager(content: &str, color: ColorConfig) {
let mut fallback_to_println = false;
let pager_name = env::var_os("PAGER").unwrap_or_else(|| {
if cfg!(windows) { OsString::from("more.com") } else { OsString::from("less") }
});
let mut fallback_to_println = false;
let mut cmd = Command::new(&pager_name);
// FIXME: find if other pagers accept color options
let mut print_formatted = if pager_name == "less" {
cmd.arg("-r");
true
} else if ["bat", "catbat", "delta"].iter().any(|v| *v == pager_name) {
true
} else {
false
};
match Command::new(pager_name).stdin(Stdio::piped()).spawn() {
Ok(mut pager) => {
if color == ColorConfig::Never {
print_formatted = false;
} else if color == ColorConfig::Always {
print_formatted = true;
}
let mdstream = markdown::MdStream::parse_str(content);
let bufwtr = markdown::create_stdout_bufwtr();
let mut mdbuf = bufwtr.buffer();
if mdstream.write_termcolor_buf(&mut mdbuf).is_err() {
print_formatted = false;
}
if let Ok(mut pager) = cmd.stdin(Stdio::piped()).spawn() {
if let Some(pipe) = pager.stdin.as_mut() {
if pipe.write_all(content.as_bytes()).is_err() {
let res = if print_formatted {
pipe.write_all(mdbuf.as_slice())
} else {
pipe.write_all(content.as_bytes())
};
if res.is_err() {
fallback_to_println = true;
}
}
@ -593,18 +626,24 @@ fn show_content_with_pager(content: &str) {
if pager.wait().is_err() {
fallback_to_println = true;
}
}
Err(_) => {
} else {
fallback_to_println = true;
}
}
// If pager fails for whatever reason, we should still print the content
// to standard output
if fallback_to_println {
let fmt_success = match color {
ColorConfig::Auto => io::stdout().is_terminal() && bufwtr.print(&mdbuf).is_ok(),
ColorConfig::Always => bufwtr.print(&mdbuf).is_ok(),
ColorConfig::Never => false,
};
if !fmt_success {
safe_print!("{content}");
}
}
}
pub fn try_process_rlink(sess: &Session, compiler: &interface::Compiler) -> Compilation {
if sess.opts.unstable_opts.link_only {

View File

@ -20,7 +20,7 @@ rustc_hir = { path = "../rustc_hir" }
rustc_lint_defs = { path = "../rustc_lint_defs" }
rustc_type_ir = { path = "../rustc_type_ir" }
unicode-width = "0.1.4"
termcolor = "1.0"
termcolor = "1.2.0"
annotate-snippets = "0.9"
termize = "0.1.1"
serde = { version = "1.0.125", features = [ "derive" ] }

View File

@ -616,7 +616,7 @@ pub enum ColorConfig {
}
impl ColorConfig {
fn to_color_choice(self) -> ColorChoice {
pub fn to_color_choice(self) -> ColorChoice {
match self {
ColorConfig::Always => {
if io::stderr().is_terminal() {

View File

@ -61,6 +61,7 @@ pub mod emitter;
pub mod error;
pub mod json;
mod lock;
pub mod markdown;
pub mod registry;
mod snippet;
mod styled_buffer;

View File

@ -0,0 +1,76 @@
//! A simple markdown parser that can write formatted text to the terminal
//!
//! Entrypoint is `MdStream::parse_str(...)`
use std::io;
use termcolor::{Buffer, BufferWriter, ColorChoice};
mod parse;
mod term;
/// An AST representation of a Markdown document
#[derive(Clone, Debug, Default, PartialEq)]
pub struct MdStream<'a>(Vec<MdTree<'a>>);
impl<'a> MdStream<'a> {
/// Parse a markdown string to a tokenstream
#[must_use]
pub fn parse_str(s: &str) -> MdStream<'_> {
parse::entrypoint(s)
}
/// Write formatted output to a termcolor buffer
pub fn write_termcolor_buf(&self, buf: &mut Buffer) -> io::Result<()> {
term::entrypoint(self, buf)
}
}
/// Create a termcolor buffer with the `Always` color choice
pub fn create_stdout_bufwtr() -> BufferWriter {
BufferWriter::stdout(ColorChoice::Always)
}
/// A single tokentree within a Markdown document
#[derive(Clone, Debug, PartialEq)]
pub enum MdTree<'a> {
/// Leaf types
Comment(&'a str),
CodeBlock {
txt: &'a str,
lang: Option<&'a str>,
},
CodeInline(&'a str),
Strong(&'a str),
Emphasis(&'a str),
Strikethrough(&'a str),
PlainText(&'a str),
/// [Foo](www.foo.com) or simple anchor <www.foo.com>
Link {
disp: &'a str,
link: &'a str,
},
/// `[Foo link][ref]`
RefLink {
disp: &'a str,
id: Option<&'a str>,
},
/// [ref]: www.foo.com
LinkDef {
id: &'a str,
link: &'a str,
},
/// Break bewtween two paragraphs (double `\n`), not directly parsed but
/// added later
ParagraphBreak,
/// Break bewtween two lines (single `\n`)
LineBreak,
HorizontalRule,
Heading(u8, MdStream<'a>),
OrderedListItem(u16, MdStream<'a>),
UnorderedListItem(MdStream<'a>),
}
impl<'a> From<Vec<MdTree<'a>>> for MdStream<'a> {
fn from(value: Vec<MdTree<'a>>) -> Self {
Self(value)
}
}

View File

@ -0,0 +1,588 @@
use crate::markdown::{MdStream, MdTree};
use std::{iter, mem, str};
/// Short aliases that we can use in match patterns. If an end pattern is not
/// included, this type may be variable
const ANC_E: &[u8] = b">";
const ANC_S: &[u8] = b"<";
const BRK: &[u8] = b"---";
const CBK: &[u8] = b"```";
const CIL: &[u8] = b"`";
const CMT_E: &[u8] = b"-->";
const CMT_S: &[u8] = b"<!--";
const EMP: &[u8] = b"_";
const HDG: &[u8] = b"#";
const LNK_CHARS: &str = "$-_.+!*'()/&?=:%";
const LNK_E: &[u8] = b"]";
const LNK_S: &[u8] = b"[";
const STG: &[u8] = b"**";
const STK: &[u8] = b"~~";
const UL1: &[u8] = b"* ";
const UL2: &[u8] = b"- ";
/// Pattern replacements
const REPLACEMENTS: &[(&str, &str)] = &[
("(c)", "©"),
("(C)", "©"),
("(r)", "®"),
("(R)", "®"),
("(tm)", ""),
("(TM)", ""),
(":crab:", "🦀"),
("\n", " "),
];
/// `(extracted, remaining)`
type Parsed<'a> = (MdTree<'a>, &'a [u8]);
/// Output of a parse function
type ParseResult<'a> = Option<Parsed<'a>>;
/// Parsing context
#[derive(Clone, Copy, Debug, PartialEq)]
struct Context {
/// If true, we are at a the topmost level (not recursing a nested tt)
top_block: bool,
/// Previous character
prev: Prev,
}
/// Character class preceding this one
#[derive(Clone, Copy, Debug, PartialEq)]
enum Prev {
Newline,
/// Whitespace that is not a newline
Whitespace,
Escape,
Any,
}
impl Default for Context {
/// Most common setting for non top-level parsing: not top block, not at
/// line start (yes leading whitespace, not escaped)
fn default() -> Self {
Self { top_block: false, prev: Prev::Whitespace }
}
}
/// Flags to simple parser function
#[derive(Clone, Copy, Debug, PartialEq)]
enum ParseOpt {
/// Ignore escapes before closing pattern, trim content
TrimNoEsc,
None,
}
/// Parse a buffer
pub fn entrypoint(txt: &str) -> MdStream<'_> {
let ctx = Context { top_block: true, prev: Prev::Newline };
normalize(parse_recursive(txt.trim().as_bytes(), ctx), &mut Vec::new())
}
/// Parse a buffer with specified context
fn parse_recursive<'a>(buf: &'a [u8], ctx: Context) -> MdStream<'_> {
use ParseOpt as Po;
use Prev::{Escape, Newline, Whitespace};
let mut stream: Vec<MdTree<'a>> = Vec::new();
let Context { top_block: top_blk, mut prev } = ctx;
// wip_buf is our entire unprocessed (unpushed) buffer, loop_buf is our to
// check buffer that shrinks with each loop
let mut wip_buf = buf;
let mut loop_buf = wip_buf;
while !loop_buf.is_empty() {
let next_prev = match loop_buf[0] {
b'\n' => Newline,
b'\\' => Escape,
x if x.is_ascii_whitespace() => Whitespace,
_ => Prev::Any,
};
let res: ParseResult<'_> = match (top_blk, prev) {
(_, Newline | Whitespace) if loop_buf.starts_with(CMT_S) => {
parse_simple_pat(loop_buf, CMT_S, CMT_E, Po::TrimNoEsc, MdTree::Comment)
}
(true, Newline) if loop_buf.starts_with(CBK) => Some(parse_codeblock(loop_buf)),
(_, Newline | Whitespace) if loop_buf.starts_with(CIL) => parse_codeinline(loop_buf),
(true, Newline | Whitespace) if loop_buf.starts_with(HDG) => parse_heading(loop_buf),
(true, Newline) if loop_buf.starts_with(BRK) => {
Some((MdTree::HorizontalRule, parse_to_newline(loop_buf).1))
}
(_, Newline | Whitespace) if loop_buf.starts_with(EMP) => {
parse_simple_pat(loop_buf, EMP, EMP, Po::None, MdTree::Emphasis)
}
(_, Newline | Whitespace) if loop_buf.starts_with(STG) => {
parse_simple_pat(loop_buf, STG, STG, Po::None, MdTree::Strong)
}
(_, Newline | Whitespace) if loop_buf.starts_with(STK) => {
parse_simple_pat(loop_buf, STK, STK, Po::None, MdTree::Strikethrough)
}
(_, Newline | Whitespace) if loop_buf.starts_with(ANC_S) => {
let tt_fn = |link| MdTree::Link { disp: link, link };
let ret = parse_simple_pat(loop_buf, ANC_S, ANC_E, Po::None, tt_fn);
match ret {
Some((MdTree::Link { disp, .. }, _))
if disp.chars().all(|ch| LNK_CHARS.contains(ch)) =>
{
ret
}
_ => None,
}
}
(_, Newline) if (loop_buf.starts_with(UL1) || loop_buf.starts_with(UL2)) => {
Some(parse_unordered_li(loop_buf))
}
(_, Newline) if ord_list_start(loop_buf).is_some() => Some(parse_ordered_li(loop_buf)),
(_, Newline | Whitespace) if loop_buf.starts_with(LNK_S) => {
parse_any_link(loop_buf, top_blk && prev == Prev::Newline)
}
(_, Escape | _) => None,
};
if let Some((tree, rest)) = res {
// We found something: push our WIP and then push the found tree
let prev_buf = &wip_buf[..(wip_buf.len() - loop_buf.len())];
if !prev_buf.is_empty() {
let prev_str = str::from_utf8(prev_buf).unwrap();
stream.push(MdTree::PlainText(prev_str));
}
stream.push(tree);
wip_buf = rest;
loop_buf = rest;
} else {
// Just move on to the next character
loop_buf = &loop_buf[1..];
// If we are at the end and haven't found anything, just push plain text
if loop_buf.is_empty() && !wip_buf.is_empty() {
let final_str = str::from_utf8(wip_buf).unwrap();
stream.push(MdTree::PlainText(final_str));
}
};
prev = next_prev;
}
MdStream(stream)
}
/// The simplest kind of patterns: data within start and end patterns
fn parse_simple_pat<'a, F>(
buf: &'a [u8],
start_pat: &[u8],
end_pat: &[u8],
opts: ParseOpt,
create_tt: F,
) -> ParseResult<'a>
where
F: FnOnce(&'a str) -> MdTree<'a>,
{
let ignore_esc = matches!(opts, ParseOpt::TrimNoEsc);
let trim = matches!(opts, ParseOpt::TrimNoEsc);
let (txt, rest) = parse_with_end_pat(&buf[start_pat.len()..], end_pat, ignore_esc)?;
let mut txt = str::from_utf8(txt).unwrap();
if trim {
txt = txt.trim();
}
Some((create_tt(txt), rest))
}
/// Parse backtick-wrapped inline code. Accounts for >1 backtick sets
fn parse_codeinline(buf: &[u8]) -> ParseResult<'_> {
let seps = buf.iter().take_while(|ch| **ch == b'`').count();
let (txt, rest) = parse_with_end_pat(&buf[seps..], &buf[..seps], true)?;
Some((MdTree::CodeInline(str::from_utf8(txt).unwrap()), rest))
}
/// Parse a codeblock. Accounts for >3 backticks and language specification
fn parse_codeblock(buf: &[u8]) -> Parsed<'_> {
// account for ````code```` style
let seps = buf.iter().take_while(|ch| **ch == b'`').count();
let end_sep = &buf[..seps];
let mut working = &buf[seps..];
// Handle "````rust" style language specifications
let next_ws_idx = working.iter().take_while(|ch| !ch.is_ascii_whitespace()).count();
let lang = if next_ws_idx > 0 {
// Munch the lang
let tmp = str::from_utf8(&working[..next_ws_idx]).unwrap();
working = &working[next_ws_idx..];
Some(tmp)
} else {
None
};
let mut end_pat = vec![b'\n'];
end_pat.extend(end_sep);
// Find first end pattern with nothing else on its line
let mut found = None;
for idx in (0..working.len()).filter(|idx| working[*idx..].starts_with(&end_pat)) {
let (eol_txt, rest) = parse_to_newline(&working[(idx + end_pat.len())..]);
if !eol_txt.iter().any(u8::is_ascii_whitespace) {
found = Some((&working[..idx], rest));
break;
}
}
let (txt, rest) = found.unwrap_or((working, &[]));
let txt = str::from_utf8(txt).unwrap().trim_matches('\n');
(MdTree::CodeBlock { txt, lang }, rest)
}
fn parse_heading(buf: &[u8]) -> ParseResult<'_> {
let level = buf.iter().take_while(|ch| **ch == b'#').count();
let buf = &buf[level..];
if level > 6 || (buf.len() > 1 && !buf[0].is_ascii_whitespace()) {
// Enforce max 6 levels and whitespace following the `##` pattern
return None;
}
let (txt, rest) = parse_to_newline(&buf[1..]);
let ctx = Context { top_block: false, prev: Prev::Whitespace };
let stream = parse_recursive(txt, ctx);
Some((MdTree::Heading(level.try_into().unwrap(), stream), rest))
}
/// Bulleted list
fn parse_unordered_li(buf: &[u8]) -> Parsed<'_> {
debug_assert!(buf.starts_with(b"* ") || buf.starts_with(b"- "));
let (txt, rest) = get_indented_section(&buf[2..]);
let ctx = Context { top_block: false, prev: Prev::Whitespace };
let stream = parse_recursive(trim_ascii_start(txt), ctx);
(MdTree::UnorderedListItem(stream), rest)
}
/// Numbered list
fn parse_ordered_li(buf: &[u8]) -> Parsed<'_> {
let (num, pos) = ord_list_start(buf).unwrap(); // success tested in caller
let (txt, rest) = get_indented_section(&buf[pos..]);
let ctx = Context { top_block: false, prev: Prev::Whitespace };
let stream = parse_recursive(trim_ascii_start(txt), ctx);
(MdTree::OrderedListItem(num, stream), rest)
}
/// Find first line that isn't empty or doesn't start with whitespace, that will
/// be our contents
fn get_indented_section(buf: &[u8]) -> (&[u8], &[u8]) {
let mut end = buf.len();
for (idx, window) in buf.windows(2).enumerate() {
let &[ch, next_ch] = window else {unreachable!("always 2 elements")};
if idx >= buf.len().saturating_sub(2) && next_ch == b'\n' {
// End of stream
end = buf.len().saturating_sub(1);
break;
} else if ch == b'\n' && (!next_ch.is_ascii_whitespace() || next_ch == b'\n') {
end = idx;
break;
}
}
(&buf[..end], &buf[end..])
}
/// Verify a valid ordered list start (e.g. `1.`) and parse it. Returns the
/// parsed number and offset of character after the dot.
fn ord_list_start(buf: &[u8]) -> Option<(u16, usize)> {
let pos = buf.iter().take(10).position(|ch| *ch == b'.')?;
let n = str::from_utf8(&buf[..pos]).ok()?;
if !buf.get(pos + 1)?.is_ascii_whitespace() {
return None;
}
n.parse::<u16>().ok().map(|v| (v, pos + 2))
}
/// Parse links. `can_be_def` indicates that a link definition is possible (top
/// level, located at the start of a line)
fn parse_any_link(buf: &[u8], can_be_def: bool) -> ParseResult<'_> {
let (bracketed, rest) = parse_with_end_pat(&buf[1..], LNK_E, true)?;
if rest.is_empty() {
return None;
}
let disp = str::from_utf8(bracketed).unwrap();
match (can_be_def, rest[0]) {
(true, b':') => {
let (link, tmp) = parse_to_newline(&rest[1..]);
let link = str::from_utf8(link).unwrap().trim();
Some((MdTree::LinkDef { id: disp, link }, tmp))
}
(_, b'(') => parse_simple_pat(rest, b"(", b")", ParseOpt::TrimNoEsc, |link| MdTree::Link {
disp,
link,
}),
(_, b'[') => parse_simple_pat(rest, b"[", b"]", ParseOpt::TrimNoEsc, |id| {
MdTree::RefLink { disp, id: Some(id) }
}),
_ => Some((MdTree::RefLink { disp, id: None }, rest)),
}
}
/// Find and consume an end pattern, return `(match, residual)`
fn parse_with_end_pat<'a>(
buf: &'a [u8],
end_sep: &[u8],
ignore_esc: bool,
) -> Option<(&'a [u8], &'a [u8])> {
// Find positions that start with the end seperator
for idx in (0..buf.len()).filter(|idx| buf[*idx..].starts_with(end_sep)) {
if !ignore_esc && idx > 0 && buf[idx - 1] == b'\\' {
continue;
}
return Some((&buf[..idx], &buf[idx + end_sep.len()..]));
}
None
}
/// Resturn `(match, residual)` to end of line. The EOL is returned with the
/// residual.
fn parse_to_newline(buf: &[u8]) -> (&[u8], &[u8]) {
buf.iter().position(|ch| *ch == b'\n').map_or((buf, &[]), |pos| buf.split_at(pos))
}
/// Take a parsed stream and fix the little things
fn normalize<'a>(MdStream(stream): MdStream<'a>, linkdefs: &mut Vec<MdTree<'a>>) -> MdStream<'a> {
let mut new_stream = Vec::with_capacity(stream.len());
let new_defs = stream.iter().filter(|tt| matches!(tt, MdTree::LinkDef { .. }));
linkdefs.extend(new_defs.cloned());
// Run plaintest expansions on types that need it, call this function on nested types
for item in stream {
match item {
MdTree::PlainText(txt) => expand_plaintext(txt, &mut new_stream, MdTree::PlainText),
MdTree::Strong(txt) => expand_plaintext(txt, &mut new_stream, MdTree::Strong),
MdTree::Emphasis(txt) => expand_plaintext(txt, &mut new_stream, MdTree::Emphasis),
MdTree::Strikethrough(txt) => {
expand_plaintext(txt, &mut new_stream, MdTree::Strikethrough);
}
MdTree::RefLink { disp, id } => new_stream.push(match_reflink(linkdefs, disp, id)),
MdTree::OrderedListItem(n, st) => {
new_stream.push(MdTree::OrderedListItem(n, normalize(st, linkdefs)));
}
MdTree::UnorderedListItem(st) => {
new_stream.push(MdTree::UnorderedListItem(normalize(st, linkdefs)));
}
MdTree::Heading(n, st) => new_stream.push(MdTree::Heading(n, normalize(st, linkdefs))),
_ => new_stream.push(item),
}
}
// Remove non printing types, duplicate paragraph breaks, and breaks at start/end
new_stream.retain(|x| !matches!(x, MdTree::Comment(_) | MdTree::LinkDef { .. }));
new_stream.dedup_by(|r, l| matches!((r, l), (MdTree::ParagraphBreak, MdTree::ParagraphBreak)));
if new_stream.first().is_some_and(is_break_ty) {
new_stream.remove(0);
}
if new_stream.last().is_some_and(is_break_ty) {
new_stream.pop();
}
// Remove paragraph breaks that shouldn't be there. w[1] is what will be
// removed in these cases. Note that these are the items to keep, not delete
// (for `retain`)
let to_keep: Vec<bool> = new_stream
.windows(3)
.map(|w| {
!((matches!(&w[1], MdTree::ParagraphBreak)
&& matches!(should_break(&w[0], &w[2]), BreakRule::Always(1) | BreakRule::Never))
|| (matches!(&w[1], MdTree::PlainText(txt) if txt.trim().is_empty())
&& matches!(
should_break(&w[0], &w[2]),
BreakRule::Always(_) | BreakRule::Never
)))
})
.collect();
let mut iter = iter::once(true).chain(to_keep).chain(iter::once(true));
new_stream.retain(|_| iter.next().unwrap());
// Insert line or paragraph breaks where there should be some
let mut insertions = 0;
let to_insert: Vec<(usize, MdTree<'_>)> = new_stream
.windows(2)
.enumerate()
.filter_map(|(idx, w)| match should_break(&w[0], &w[1]) {
BreakRule::Always(1) => Some((idx, MdTree::LineBreak)),
BreakRule::Always(2) => Some((idx, MdTree::ParagraphBreak)),
_ => None,
})
.map(|(idx, tt)| {
insertions += 1;
(idx + insertions, tt)
})
.collect();
to_insert.into_iter().for_each(|(idx, tt)| new_stream.insert(idx, tt));
MdStream(new_stream)
}
/// Whether two types should or shouldn't have a paragraph break between them
#[derive(Clone, Copy, Debug, PartialEq)]
enum BreakRule {
Always(u8),
Never,
Optional,
}
/// Blocks that automatically handle their own text wrapping
fn should_break(left: &MdTree<'_>, right: &MdTree<'_>) -> BreakRule {
use MdTree::*;
match (left, right) {
// Separate these types with a single line
(HorizontalRule, _)
| (_, HorizontalRule)
| (OrderedListItem(_, _), OrderedListItem(_, _))
| (UnorderedListItem(_), UnorderedListItem(_)) => BreakRule::Always(1),
// Condensed types shouldn't have an extra break on either side
(Comment(_) | ParagraphBreak | Heading(_, _), _) | (_, Comment(_) | ParagraphBreak) => {
BreakRule::Never
}
// Block types should always be separated by full breaks
(CodeBlock { .. } | OrderedListItem(_, _) | UnorderedListItem(_), _)
| (_, CodeBlock { .. } | Heading(_, _) | OrderedListItem(_, _) | UnorderedListItem(_)) => {
BreakRule::Always(2)
}
// Text types may or may not be separated by a break
(
CodeInline(_)
| Strong(_)
| Emphasis(_)
| Strikethrough(_)
| PlainText(_)
| Link { .. }
| RefLink { .. }
| LinkDef { .. },
CodeInline(_)
| Strong(_)
| Emphasis(_)
| Strikethrough(_)
| PlainText(_)
| Link { .. }
| RefLink { .. }
| LinkDef { .. },
) => BreakRule::Optional,
(LineBreak, _) | (_, LineBreak) => {
unreachable!("should have been removed during deduplication")
}
}
}
/// Types that indicate some form of break
fn is_break_ty(val: &MdTree<'_>) -> bool {
matches!(val, MdTree::ParagraphBreak | MdTree::LineBreak)
// >1 break between paragraphs acts as a break
|| matches!(val, MdTree::PlainText(txt) if txt.trim().is_empty())
}
/// Perform tranformations to text. This splits paragraphs, replaces patterns,
/// and corrects newlines.
///
/// To avoid allocating strings (and using a different heavier tt type), our
/// replace method means split into three and append each. For this reason, any
/// viewer should treat consecutive `PlainText` types as belonging to the same
/// paragraph.
fn expand_plaintext<'a>(
txt: &'a str,
stream: &mut Vec<MdTree<'a>>,
mut f: fn(&'a str) -> MdTree<'a>,
) {
if txt.is_empty() {
return;
} else if txt == "\n" {
if let Some(tt) = stream.last() {
let tmp = MdTree::PlainText(" ");
if should_break(tt, &tmp) == BreakRule::Optional {
stream.push(tmp);
}
}
return;
}
let mut queue1 = Vec::new();
let mut queue2 = Vec::new();
let stream_start_len = stream.len();
for paragraph in txt.split("\n\n") {
if paragraph.is_empty() {
stream.push(MdTree::ParagraphBreak);
continue;
}
let paragraph = trim_extra_ws(paragraph);
queue1.clear();
queue1.push(paragraph);
for (from, to) in REPLACEMENTS {
queue2.clear();
for item in &queue1 {
for s in item.split(from) {
queue2.extend(&[s, to]);
}
if queue2.len() > 1 {
let _ = queue2.pop(); // remove last unnecessary intersperse
}
}
mem::swap(&mut queue1, &mut queue2);
}
// Make sure we don't double whitespace
queue1.retain(|s| !s.is_empty());
for idx in 0..queue1.len() {
queue1[idx] = trim_extra_ws(queue1[idx]);
if idx < queue1.len() - 1
&& queue1[idx].ends_with(char::is_whitespace)
&& queue1[idx + 1].starts_with(char::is_whitespace)
{
queue1[idx] = queue1[idx].trim_end();
}
}
stream.extend(queue1.iter().copied().filter(|txt| !txt.is_empty()).map(&mut f));
stream.push(MdTree::ParagraphBreak);
}
if stream.len() - stream_start_len > 1 {
let _ = stream.pop(); // remove last unnecessary intersperse
}
}
/// Turn reflinks (links with reference IDs) into normal standalone links using
/// listed link definitions
fn match_reflink<'a>(linkdefs: &[MdTree<'a>], disp: &'a str, match_id: Option<&str>) -> MdTree<'a> {
let to_match = match_id.unwrap_or(disp); // Match with the display name if there isn't an id
for def in linkdefs {
if let MdTree::LinkDef { id, link } = def {
if *id == to_match {
return MdTree::Link { disp, link };
}
}
}
MdTree::Link { disp, link: "" } // link not found
}
/// If there is more than one whitespace char at start or end, trim the extras
fn trim_extra_ws(mut txt: &str) -> &str {
let start_ws =
txt.bytes().position(|ch| !ch.is_ascii_whitespace()).unwrap_or(txt.len()).saturating_sub(1);
txt = &txt[start_ws..];
let end_ws = txt
.bytes()
.rev()
.position(|ch| !ch.is_ascii_whitespace())
.unwrap_or(txt.len())
.saturating_sub(1);
&txt[..txt.len() - end_ws]
}
/// If there is more than one whitespace char at start, trim the extras
fn trim_ascii_start(buf: &[u8]) -> &[u8] {
let count = buf.iter().take_while(|ch| ch.is_ascii_whitespace()).count();
&buf[count..]
}
#[cfg(test)]
#[path = "tests/parse.rs"]
mod tests;

View File

@ -0,0 +1,189 @@
use std::cell::Cell;
use std::io::{self, Write};
use termcolor::{Buffer, Color, ColorSpec, WriteColor};
use crate::markdown::{MdStream, MdTree};
const DEFAULT_COLUMN_WIDTH: usize = 140;
thread_local! {
/// Track the position of viewable characters in our buffer
static CURSOR: Cell<usize> = Cell::new(0);
/// Width of the terminal
static WIDTH: Cell<usize> = Cell::new(DEFAULT_COLUMN_WIDTH);
}
/// Print to terminal output to a buffer
pub fn entrypoint(stream: &MdStream<'_>, buf: &mut Buffer) -> io::Result<()> {
#[cfg(not(test))]
if let Some((w, _)) = termize::dimensions() {
WIDTH.with(|c| c.set(std::cmp::min(w, DEFAULT_COLUMN_WIDTH)));
}
write_stream(stream, buf, None, 0)?;
buf.write_all(b"\n")
}
/// Write the buffer, reset to the default style after each
fn write_stream(
MdStream(stream): &MdStream<'_>,
buf: &mut Buffer,
default: Option<&ColorSpec>,
indent: usize,
) -> io::Result<()> {
match default {
Some(c) => buf.set_color(c)?,
None => buf.reset()?,
}
for tt in stream {
write_tt(tt, buf, indent)?;
if let Some(c) = default {
buf.set_color(c)?;
}
}
buf.reset()?;
Ok(())
}
pub fn write_tt(tt: &MdTree<'_>, buf: &mut Buffer, indent: usize) -> io::Result<()> {
match tt {
MdTree::CodeBlock { txt, lang: _ } => {
buf.set_color(ColorSpec::new().set_dimmed(true))?;
buf.write_all(txt.as_bytes())?;
}
MdTree::CodeInline(txt) => {
buf.set_color(ColorSpec::new().set_dimmed(true))?;
write_wrapping(buf, txt, indent, None)?;
}
MdTree::Strong(txt) => {
buf.set_color(ColorSpec::new().set_bold(true))?;
write_wrapping(buf, txt, indent, None)?;
}
MdTree::Emphasis(txt) => {
buf.set_color(ColorSpec::new().set_italic(true))?;
write_wrapping(buf, txt, indent, None)?;
}
MdTree::Strikethrough(txt) => {
buf.set_color(ColorSpec::new().set_strikethrough(true))?;
write_wrapping(buf, txt, indent, None)?;
}
MdTree::PlainText(txt) => {
write_wrapping(buf, txt, indent, None)?;
}
MdTree::Link { disp, link } => {
write_wrapping(buf, disp, indent, Some(link))?;
}
MdTree::ParagraphBreak => {
buf.write_all(b"\n\n")?;
reset_cursor();
}
MdTree::LineBreak => {
buf.write_all(b"\n")?;
reset_cursor();
}
MdTree::HorizontalRule => {
(0..WIDTH.with(Cell::get)).for_each(|_| buf.write_all(b"-").unwrap());
reset_cursor();
}
MdTree::Heading(n, stream) => {
let mut cs = ColorSpec::new();
cs.set_fg(Some(Color::Cyan));
match n {
1 => cs.set_intense(true).set_bold(true).set_underline(true),
2 => cs.set_intense(true).set_underline(true),
3 => cs.set_intense(true).set_italic(true),
4.. => cs.set_underline(true).set_italic(true),
0 => unreachable!(),
};
write_stream(stream, buf, Some(&cs), 0)?;
buf.write_all(b"\n")?;
}
MdTree::OrderedListItem(n, stream) => {
let base = format!("{n}. ");
write_wrapping(buf, &format!("{base:<4}"), indent, None)?;
write_stream(stream, buf, None, indent + 4)?;
}
MdTree::UnorderedListItem(stream) => {
let base = "* ";
write_wrapping(buf, &format!("{base:<4}"), indent, None)?;
write_stream(stream, buf, None, indent + 4)?;
}
// Patterns popped in previous step
MdTree::Comment(_) | MdTree::LinkDef { .. } | MdTree::RefLink { .. } => unreachable!(),
}
buf.reset()?;
Ok(())
}
/// End of that block, just wrap the line
fn reset_cursor() {
CURSOR.with(|cur| cur.set(0));
}
/// Change to be generic on Write for testing. If we have a link URL, we don't
/// count the extra tokens to make it clickable.
fn write_wrapping<B: io::Write>(
buf: &mut B,
text: &str,
indent: usize,
link_url: Option<&str>,
) -> io::Result<()> {
let ind_ws = &b" "[..indent];
let mut to_write = text;
if let Some(url) = link_url {
// This is a nonprinting prefix so we don't increment our cursor
write!(buf, "\x1b]8;;{url}\x1b\\")?;
}
CURSOR.with(|cur| {
loop {
if cur.get() == 0 {
buf.write_all(ind_ws)?;
cur.set(indent);
}
let ch_count = WIDTH.with(Cell::get) - cur.get();
let mut iter = to_write.char_indices();
let Some((end_idx, _ch)) = iter.nth(ch_count) else {
// Write entire line
buf.write_all(to_write.as_bytes())?;
cur.set(cur.get()+to_write.chars().count());
break;
};
if let Some((break_idx, ch)) = to_write[..end_idx]
.char_indices()
.rev()
.find(|(_idx, ch)| ch.is_whitespace() || ['_', '-'].contains(ch))
{
// Found whitespace to break at
if ch.is_whitespace() {
writeln!(buf, "{}", &to_write[..break_idx])?;
to_write = to_write[break_idx..].trim_start();
} else {
// Break at a `-` or `_` separator
writeln!(buf, "{}", &to_write.get(..break_idx + 1).unwrap_or(to_write))?;
to_write = to_write.get(break_idx + 1..).unwrap_or_default().trim_start();
}
} else {
// No whitespace, we need to just split
let ws_idx =
iter.find(|(_, ch)| ch.is_whitespace()).map_or(to_write.len(), |(idx, _)| idx);
writeln!(buf, "{}", &to_write[..ws_idx])?;
to_write = to_write.get(ws_idx + 1..).map_or("", str::trim_start);
}
cur.set(0);
}
if link_url.is_some() {
buf.write_all(b"\x1b]8;;\x1b\\")?;
}
Ok(())
})
}
#[cfg(test)]
#[path = "tests/term.rs"]
mod tests;

View File

@ -0,0 +1,50 @@
# H1 Heading [with a link][remote-link]
H1 content: **some words in bold** and `so does inline code`
## H2 Heading
H2 content: _some words in italic_
### H3 Heading
H3 content: ~~strikethrough~~ text
#### H4 Heading
H4 content: A [simple link](https://docs.rs) and a [remote-link].
---
A section break was above. We can also do paragraph breaks:
(new paragraph) and unordered lists:
- Item 1 in `code`
- Item 2 in _italics_
Or ordered:
1. Item 1 in **bold**
2. Item 2 with some long lines that should wrap: Lorem ipsum dolor sit amet,
consectetur adipiscing elit. Aenean ac mattis nunc. Phasellus elit quam,
pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan
in cursus sit amet, dictum a nunc. Suspendisse aliquet, lorem eu eleifend
accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.
---
## Code
Both `inline code` and code blocks are supported:
```rust
/// A rust enum
#[derive(Debug, PartialEq, Clone)]
enum Foo {
/// Start of line
Bar
}
```
[remote-link]: http://docs.rs

View File

@ -0,0 +1,35 @@
H1 Heading ]8;;http://docs.rs\with a link]8;;\
H1 content: some words in bold and so does inline code
H2 Heading
H2 content: some words in italic
H3 Heading
H3 content: strikethrough text
H4 Heading
H4 content: A ]8;;https://docs.rs\simple link]8;;\ and a ]8;;http://docs.rs\remote-link]8;;\.
--------------------------------------------------------------------------------------------------------------------------------------------
A section break was above. We can also do paragraph breaks:
(new paragraph) and unordered lists:
* Item 1 in code
* Item 2 in italics
Or ordered:
1. Item 1 in bold
2. Item 2 with some long lines that should wrap: Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean ac mattis nunc. Phasellus
elit quam, pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan in cursus sit amet, dictum a nunc. Suspendisse
aliquet, lorem eu eleifend accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.
--------------------------------------------------------------------------------------------------------------------------------------------
Code
Both inline code and code blocks are supported:
/// A rust enum
#[derive(Debug, PartialEq, Clone)]
enum Foo {
/// Start of line
Bar
}

View File

@ -0,0 +1,312 @@
use super::*;
use ParseOpt as PO;
#[test]
fn test_parse_simple() {
let buf = "**abcd** rest";
let (t, r) = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong).unwrap();
assert_eq!(t, MdTree::Strong("abcd"));
assert_eq!(r, b" rest");
// Escaping should fail
let buf = r"**abcd\** rest";
let res = parse_simple_pat(buf.as_bytes(), STG, STG, PO::None, MdTree::Strong);
assert!(res.is_none());
}
#[test]
fn test_parse_comment() {
let opt = PO::TrimNoEsc;
let buf = "<!-- foobar! -->rest";
let (t, r) = parse_simple_pat(buf.as_bytes(), CMT_S, CMT_E, opt, MdTree::Comment).unwrap();
assert_eq!(t, MdTree::Comment("foobar!"));
assert_eq!(r, b"rest");
let buf = r"<!-- foobar! \-->rest";
let (t, r) = parse_simple_pat(buf.as_bytes(), CMT_S, CMT_E, opt, MdTree::Comment).unwrap();
assert_eq!(t, MdTree::Comment(r"foobar! \"));
assert_eq!(r, b"rest");
}
#[test]
fn test_parse_heading() {
let buf1 = "# Top level\nrest";
let (t, r) = parse_heading(buf1.as_bytes()).unwrap();
assert_eq!(t, MdTree::Heading(1, vec![MdTree::PlainText("Top level")].into()));
assert_eq!(r, b"\nrest");
let buf1 = "# Empty";
let (t, r) = parse_heading(buf1.as_bytes()).unwrap();
assert_eq!(t, MdTree::Heading(1, vec![MdTree::PlainText("Empty")].into()));
assert_eq!(r, b"");
// Combo
let buf2 = "### Top `level` _woo_\nrest";
let (t, r) = parse_heading(buf2.as_bytes()).unwrap();
assert_eq!(
t,
MdTree::Heading(
3,
vec![
MdTree::PlainText("Top "),
MdTree::CodeInline("level"),
MdTree::PlainText(" "),
MdTree::Emphasis("woo"),
]
.into()
)
);
assert_eq!(r, b"\nrest");
}
#[test]
fn test_parse_code_inline() {
let buf1 = "`abcd` rest";
let (t, r) = parse_codeinline(buf1.as_bytes()).unwrap();
assert_eq!(t, MdTree::CodeInline("abcd"));
assert_eq!(r, b" rest");
// extra backticks, newline
let buf2 = "```ab\ncd``` rest";
let (t, r) = parse_codeinline(buf2.as_bytes()).unwrap();
assert_eq!(t, MdTree::CodeInline("ab\ncd"));
assert_eq!(r, b" rest");
// test no escaping
let buf3 = r"`abcd\` rest";
let (t, r) = parse_codeinline(buf3.as_bytes()).unwrap();
assert_eq!(t, MdTree::CodeInline(r"abcd\"));
assert_eq!(r, b" rest");
}
#[test]
fn test_parse_code_block() {
let buf1 = "```rust\ncode\ncode\n```\nleftovers";
let (t, r) = parse_codeblock(buf1.as_bytes());
assert_eq!(t, MdTree::CodeBlock { txt: "code\ncode", lang: Some("rust") });
assert_eq!(r, b"\nleftovers");
let buf2 = "`````\ncode\ncode````\n`````\nleftovers";
let (t, r) = parse_codeblock(buf2.as_bytes());
assert_eq!(t, MdTree::CodeBlock { txt: "code\ncode````", lang: None });
assert_eq!(r, b"\nleftovers");
}
#[test]
fn test_parse_link() {
let simple = "[see here](docs.rs) other";
let (t, r) = parse_any_link(simple.as_bytes(), false).unwrap();
assert_eq!(t, MdTree::Link { disp: "see here", link: "docs.rs" });
assert_eq!(r, b" other");
let simple_toplevel = "[see here](docs.rs) other";
let (t, r) = parse_any_link(simple_toplevel.as_bytes(), true).unwrap();
assert_eq!(t, MdTree::Link { disp: "see here", link: "docs.rs" });
assert_eq!(r, b" other");
let reference = "[see here] other";
let (t, r) = parse_any_link(reference.as_bytes(), true).unwrap();
assert_eq!(t, MdTree::RefLink { disp: "see here", id: None });
assert_eq!(r, b" other");
let reference_full = "[see here][docs-rs] other";
let (t, r) = parse_any_link(reference_full.as_bytes(), false).unwrap();
assert_eq!(t, MdTree::RefLink { disp: "see here", id: Some("docs-rs") });
assert_eq!(r, b" other");
let reference_def = "[see here]: docs.rs\nother";
let (t, r) = parse_any_link(reference_def.as_bytes(), true).unwrap();
assert_eq!(t, MdTree::LinkDef { id: "see here", link: "docs.rs" });
assert_eq!(r, b"\nother");
}
const IND1: &str = r"test standard
ind
ind2
not ind";
const IND2: &str = r"test end of stream
1
2
";
const IND3: &str = r"test empty lines
1
2
not ind";
#[test]
fn test_indented_section() {
let (t, r) = get_indented_section(IND1.as_bytes());
assert_eq!(str::from_utf8(t).unwrap(), "test standard\n ind\n ind2");
assert_eq!(str::from_utf8(r).unwrap(), "\nnot ind");
let (txt, rest) = get_indented_section(IND2.as_bytes());
assert_eq!(str::from_utf8(txt).unwrap(), "test end of stream\n 1\n 2");
assert_eq!(str::from_utf8(rest).unwrap(), "\n");
let (txt, rest) = get_indented_section(IND3.as_bytes());
assert_eq!(str::from_utf8(txt).unwrap(), "test empty lines\n 1\n 2");
assert_eq!(str::from_utf8(rest).unwrap(), "\n\nnot ind");
}
const HBT: &str = r"# Heading
content";
#[test]
fn test_heading_breaks() {
let expected = vec![
MdTree::Heading(1, vec![MdTree::PlainText("Heading")].into()),
MdTree::PlainText("content"),
]
.into();
let res = entrypoint(HBT);
assert_eq!(res, expected);
}
const NL1: &str = r"start
end";
const NL2: &str = r"start
end";
const NL3: &str = r"start
end";
#[test]
fn test_newline_breaks() {
let expected =
vec![MdTree::PlainText("start"), MdTree::ParagraphBreak, MdTree::PlainText("end")].into();
for (idx, check) in [NL1, NL2, NL3].iter().enumerate() {
let res = entrypoint(check);
assert_eq!(res, expected, "failed {idx}");
}
}
const WRAP: &str = "plain _italics
italics_";
#[test]
fn test_wrap_pattern() {
let expected = vec![
MdTree::PlainText("plain "),
MdTree::Emphasis("italics"),
MdTree::Emphasis(" "),
MdTree::Emphasis("italics"),
]
.into();
let res = entrypoint(WRAP);
assert_eq!(res, expected);
}
const WRAP_NOTXT: &str = r"_italics_
**bold**";
#[test]
fn test_wrap_notxt() {
let expected =
vec![MdTree::Emphasis("italics"), MdTree::PlainText(" "), MdTree::Strong("bold")].into();
let res = entrypoint(WRAP_NOTXT);
assert_eq!(res, expected);
}
const MIXED_LIST: &str = r"start
- _italics item_
<!-- comment -->
- **bold item**
second line [link1](foobar1)
third line [link2][link-foo]
- :crab:
extra indent
end
[link-foo]: foobar2
";
#[test]
fn test_list() {
let expected = vec![
MdTree::PlainText("start"),
MdTree::ParagraphBreak,
MdTree::UnorderedListItem(vec![MdTree::Emphasis("italics item")].into()),
MdTree::LineBreak,
MdTree::UnorderedListItem(
vec![
MdTree::Strong("bold item"),
MdTree::PlainText(" second line "),
MdTree::Link { disp: "link1", link: "foobar1" },
MdTree::PlainText(" third line "),
MdTree::Link { disp: "link2", link: "foobar2" },
]
.into(),
),
MdTree::LineBreak,
MdTree::UnorderedListItem(
vec![MdTree::PlainText("🦀"), MdTree::PlainText(" extra indent")].into(),
),
MdTree::ParagraphBreak,
MdTree::PlainText("end"),
]
.into();
let res = entrypoint(MIXED_LIST);
assert_eq!(res, expected);
}
const SMOOSHED: &str = r#"
start
### heading
1. ordered item
```rust
println!("Hello, world!");
```
`inline`
``end``
"#;
#[test]
fn test_without_breaks() {
let expected = vec![
MdTree::PlainText("start"),
MdTree::ParagraphBreak,
MdTree::Heading(3, vec![MdTree::PlainText("heading")].into()),
MdTree::OrderedListItem(1, vec![MdTree::PlainText("ordered item")].into()),
MdTree::ParagraphBreak,
MdTree::CodeBlock { txt: r#"println!("Hello, world!");"#, lang: Some("rust") },
MdTree::ParagraphBreak,
MdTree::CodeInline("inline"),
MdTree::PlainText(" "),
MdTree::CodeInline("end"),
]
.into();
let res = entrypoint(SMOOSHED);
assert_eq!(res, expected);
}
const CODE_STARTLINE: &str = r#"
start
`code`
middle
`more code`
end
"#;
#[test]
fn test_code_at_start() {
let expected = vec![
MdTree::PlainText("start"),
MdTree::PlainText(" "),
MdTree::CodeInline("code"),
MdTree::PlainText(" "),
MdTree::PlainText("middle"),
MdTree::PlainText(" "),
MdTree::CodeInline("more code"),
MdTree::PlainText(" "),
MdTree::PlainText("end"),
]
.into();
let res = entrypoint(CODE_STARTLINE);
assert_eq!(res, expected);
}

View File

@ -0,0 +1,90 @@
use std::io::BufWriter;
use std::path::PathBuf;
use termcolor::{BufferWriter, ColorChoice};
use super::*;
use crate::markdown::MdStream;
const INPUT: &str = include_str!("input.md");
const OUTPUT_PATH: &[&str] = &[env!("CARGO_MANIFEST_DIR"), "src","markdown","tests","output.stdout"];
const TEST_WIDTH: usize = 80;
// We try to make some words long to create corner cases
const TXT: &str = r"Lorem ipsum dolor sit amet, consecteturadipiscingelit.
Fusce-id-urna-sollicitudin, pharetra nisl nec, lobortis tellus. In at
metus hendrerit, tincidunteratvel, ultrices turpis. Curabitur_risus_sapien,
porta-sed-nunc-sed, ultricesposuerelacus. Sed porttitor quis
dolor non venenatis. Aliquam ut. ";
const WRAPPED: &str = r"Lorem ipsum dolor sit amet, consecteturadipiscingelit. Fusce-id-urna-
sollicitudin, pharetra nisl nec, lobortis tellus. In at metus hendrerit,
tincidunteratvel, ultrices turpis. Curabitur_risus_sapien, porta-sed-nunc-sed,
ultricesposuerelacus. Sed porttitor quis dolor non venenatis. Aliquam ut. Lorem
ipsum dolor sit amet, consecteturadipiscingelit. Fusce-id-urna-
sollicitudin, pharetra nisl nec, lobortis tellus. In at metus hendrerit,
tincidunteratvel, ultrices turpis. Curabitur_risus_sapien, porta-sed-nunc-
sed, ultricesposuerelacus. Sed porttitor quis dolor non venenatis. Aliquam
ut. Sample link lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet,
consecteturadipiscingelit. Fusce-id-urna-sollicitudin, pharetra nisl nec,
lobortis tellus. In at metus hendrerit, tincidunteratvel, ultrices turpis.
Curabitur_risus_sapien, porta-sed-nunc-sed, ultricesposuerelacus. Sed porttitor
quis dolor non venenatis. Aliquam ut. ";
#[test]
fn test_wrapping_write() {
WIDTH.with(|w| w.set(TEST_WIDTH));
let mut buf = BufWriter::new(Vec::new());
let txt = TXT.replace("-\n","-").replace("_\n","_").replace('\n', " ").replace(" ", "");
write_wrapping(&mut buf, &txt, 0, None).unwrap();
write_wrapping(&mut buf, &txt, 4, None).unwrap();
write_wrapping(
&mut buf,
"Sample link lorem ipsum dolor sit amet. ",
4,
Some("link-address-placeholder"),
)
.unwrap();
write_wrapping(&mut buf, &txt, 0, None).unwrap();
let out = String::from_utf8(buf.into_inner().unwrap()).unwrap();
let out = out
.replace("\x1b\\", "")
.replace('\x1b', "")
.replace("]8;;", "")
.replace("link-address-placeholder", "");
for line in out.lines() {
assert!(line.len() <= TEST_WIDTH, "line length\n'{line}'")
}
assert_eq!(out, WRAPPED);
}
#[test]
fn test_output() {
// Capture `--bless` when run via ./x
let bless = std::env::var("RUSTC_BLESS").unwrap_or_default() == "1";
let ast = MdStream::parse_str(INPUT);
let bufwtr = BufferWriter::stderr(ColorChoice::Always);
let mut buffer = bufwtr.buffer();
ast.write_termcolor_buf(&mut buffer).unwrap();
let mut blessed = PathBuf::new();
blessed.extend(OUTPUT_PATH);
if bless {
std::fs::write(&blessed, buffer.into_inner()).unwrap();
eprintln!("blessed output at {}", blessed.display());
} else {
let output = buffer.into_inner();
if std::fs::read(blessed).unwrap() != output {
// hack: I don't know any way to write bytes to the captured stdout
// that cargo test uses
let mut out = std::io::stdout();
out.write_all(b"\n\nMarkdown output did not match. Expected:\n").unwrap();
out.write_all(&output).unwrap();
out.write_all(b"\n\n").unwrap();
panic!("markdown output mismatch");
}
}
}

View File

@ -909,6 +909,7 @@ impl Default for Options {
json_future_incompat: false,
pretty: None,
working_dir: RealFileName::LocalPath(std::env::current_dir().unwrap()),
color: ColorConfig::Auto,
}
}
}
@ -2673,6 +2674,7 @@ pub fn build_session_options(
json_future_incompat,
pretty,
working_dir,
color,
}
}

View File

@ -4,6 +4,7 @@ use crate::search_paths::SearchPath;
use crate::utils::NativeLib;
use crate::{lint, EarlyErrorHandler};
use rustc_data_structures::profiling::TimePassesFormat;
use rustc_errors::ColorConfig;
use rustc_errors::{LanguageIdentifier, TerminalUrl};
use rustc_target::spec::{CodeModel, LinkerFlavorCli, MergeFunctions, PanicStrategy, SanitizerSet};
use rustc_target::spec::{
@ -212,6 +213,7 @@ top_level_options!(
/// The (potentially remapped) working directory
working_dir: RealFileName [TRACKED],
color: ColorConfig [UNTRACKED],
}
);

View File

@ -2217,6 +2217,11 @@ fn prepare_cargo_test(
) -> Command {
let mut cargo = cargo.into();
// If bless is passed, give downstream crates a way to use it
if builder.config.cmd.bless() {
cargo.env("RUSTC_BLESS", "1");
}
// Pass in some standard flags then iterate over the graph we've discovered
// in `cargo metadata` with the maps above and figure out what `-p`
// arguments need to get passed.