mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-25 08:13:41 +00:00
Improve markdown parsing for the doc lint
This commit is contained in:
parent
71b41b6e01
commit
ac0bb4126c
@ -51,6 +51,8 @@ impl EarlyLintPass for Doc {
|
||||
}
|
||||
|
||||
pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) {
|
||||
let mut docs = vec![];
|
||||
|
||||
let mut in_multiline = false;
|
||||
for attr in attrs {
|
||||
if attr.node.is_sugared_doc {
|
||||
@ -66,37 +68,20 @@ pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [a
|
||||
in_multiline = !in_multiline;
|
||||
}
|
||||
if !in_multiline {
|
||||
check_doc(cx, valid_idents, real_doc, span);
|
||||
docs.push((real_doc, span));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! jump_to {
|
||||
// Get the next character’s first byte UTF-8 friendlyly.
|
||||
(@next_char, $chars: expr, $len: expr) => {{
|
||||
if let Some(&(pos, _)) = $chars.peek() {
|
||||
pos
|
||||
} else {
|
||||
$len
|
||||
}
|
||||
}};
|
||||
|
||||
// Jump to the next `$c`. If no such character is found, give up.
|
||||
($chars: expr, $c: expr, $len: expr) => {{
|
||||
if $chars.find(|&(_, c)| c == $c).is_some() {
|
||||
jump_to!(@next_char, $chars, $len)
|
||||
}
|
||||
else {
|
||||
return;
|
||||
}
|
||||
}};
|
||||
for (doc, span) in docs {
|
||||
let _ = check_doc(cx, valid_idents, doc, span);
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(while_let_loop)] // #362
|
||||
pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Span) {
|
||||
pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Span) -> Result<(), ()> {
|
||||
// In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context.
|
||||
// There really is no markdown specification that would disambiguate this properly. This is
|
||||
// what GitHub and Rustdoc do:
|
||||
@ -108,8 +93,8 @@ pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Sp
|
||||
// (_baz_) → (<em>baz</em>)
|
||||
// foo _ bar _ baz → foo _ bar _ baz
|
||||
|
||||
/// Character that can appear in a word
|
||||
fn is_word_char(c: char) -> bool {
|
||||
/// Character that can appear in a path
|
||||
fn is_path_char(c: char) -> bool {
|
||||
match c {
|
||||
t if t.is_alphanumeric() => true,
|
||||
':' | '_' => true,
|
||||
@ -117,81 +102,140 @@ pub fn check_doc(cx: &EarlyContext, valid_idents: &[String], doc: &str, span: Sp
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(cast_possible_truncation)]
|
||||
fn word_span(mut span: Span, begin: usize, end: usize) -> Span {
|
||||
debug_assert_eq!(end as u32 as usize, end);
|
||||
debug_assert_eq!(begin as u32 as usize, begin);
|
||||
span.hi = span.lo + BytePos(end as u32);
|
||||
span.lo = span.lo + BytePos(begin as u32);
|
||||
span
|
||||
#[derive(Clone, Debug)]
|
||||
struct Parser<'a> {
|
||||
link: bool,
|
||||
line: &'a str,
|
||||
span: Span,
|
||||
current_word_begin: usize,
|
||||
new_line: bool,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
let mut new_line = true;
|
||||
let len = doc.len();
|
||||
let mut chars = doc.char_indices().peekable();
|
||||
let mut current_word_begin = 0;
|
||||
impl<'a> Parser<'a> {
|
||||
fn advance_begin(&mut self) {
|
||||
self.current_word_begin = self.pos;
|
||||
}
|
||||
|
||||
fn peek(&self) -> Option<char> {
|
||||
self.line[self.pos..].chars().next()
|
||||
}
|
||||
|
||||
fn jump_to(&mut self, n: char) -> Result<(), ()> {
|
||||
while let Some(c) = self.next() {
|
||||
if c == n {
|
||||
self.advance_begin();
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
return Err(());
|
||||
}
|
||||
|
||||
fn put_back(&mut self, c: char) {
|
||||
self.pos -= c.len_utf8();
|
||||
}
|
||||
|
||||
#[allow(cast_possible_truncation)]
|
||||
fn word(&self) -> (&'a str, Span) {
|
||||
let begin = self.current_word_begin;
|
||||
let end = self.pos;
|
||||
|
||||
debug_assert_eq!(end as u32 as usize, end);
|
||||
debug_assert_eq!(begin as u32 as usize, begin);
|
||||
|
||||
let mut span = self.span;
|
||||
span.hi = span.lo + BytePos(end as u32);
|
||||
span.lo = span.lo + BytePos(begin as u32);
|
||||
|
||||
(&self.line[begin..end], span)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Parser<'a> {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<char> {
|
||||
let mut chars = self.line[self.pos..].chars();
|
||||
let c = chars.next();
|
||||
|
||||
if let Some(c) = c {
|
||||
self.pos += c.len_utf8();
|
||||
} else {
|
||||
// TODO: new line
|
||||
}
|
||||
|
||||
c
|
||||
}
|
||||
}
|
||||
|
||||
let mut parser = Parser {
|
||||
link: false,
|
||||
line: doc,
|
||||
span: span,
|
||||
current_word_begin: 0,
|
||||
new_line: true,
|
||||
pos: 0,
|
||||
};
|
||||
|
||||
loop {
|
||||
match chars.next() {
|
||||
Some((_, c)) => {
|
||||
match parser.next() {
|
||||
Some(c) => {
|
||||
match c {
|
||||
'#' if new_line => { // don’t warn on titles
|
||||
current_word_begin = jump_to!(chars, '\n', len);
|
||||
try!(parser.jump_to('\n'));
|
||||
}
|
||||
'`' => {
|
||||
current_word_begin = jump_to!(chars, '`', len);
|
||||
try!(parser.jump_to('`'));
|
||||
}
|
||||
'[' => {
|
||||
let end = jump_to!(chars, ']', len);
|
||||
let link_text = &doc[current_word_begin + 1..end];
|
||||
let word_span = word_span(span, current_word_begin + 1, end + 1);
|
||||
|
||||
match chars.peek() {
|
||||
Some(&(_, c)) => {
|
||||
// Trying to parse a link. Let’s ignore the link.
|
||||
|
||||
// FIXME: how does markdown handles such link?
|
||||
// https://en.wikipedia.org/w/index.php?title=)
|
||||
match c {
|
||||
'(' => { // inline link
|
||||
current_word_begin = jump_to!(chars, ')', len);
|
||||
check_doc(cx, valid_idents, link_text, word_span);
|
||||
}
|
||||
'[' => { // reference link
|
||||
current_word_begin = jump_to!(chars, ']', len);
|
||||
check_doc(cx, valid_idents, link_text, word_span);
|
||||
}
|
||||
':' => { // reference link
|
||||
current_word_begin = jump_to!(chars, '\n', len);
|
||||
}
|
||||
_ => { // automatic reference link
|
||||
current_word_begin = jump_to!(@next_char, chars, len);
|
||||
check_doc(cx, valid_idents, link_text, word_span);
|
||||
}
|
||||
// Check for a reference definition `[foo]:` at the beginning of a line
|
||||
let mut link = true;
|
||||
if parser.new_line {
|
||||
let mut lookup_parser = parser.clone();
|
||||
if let Some(_) = lookup_parser.find(|&c| c == ']') {
|
||||
if let Some(':') = lookup_parser.next() {
|
||||
try!(lookup_parser.jump_to(')'));
|
||||
parser = lookup_parser;
|
||||
link = false;
|
||||
}
|
||||
}
|
||||
None => return,
|
||||
}
|
||||
|
||||
parser.advance_begin();
|
||||
parser.link = link;
|
||||
}
|
||||
']' if parser.link => {
|
||||
parser.link = false;
|
||||
|
||||
match parser.peek() {
|
||||
Some('(') => try!(parser.jump_to(')')),
|
||||
Some('[') => try!(parser.jump_to(']')),
|
||||
Some(_) => continue,
|
||||
None => return Err(()),
|
||||
}
|
||||
}
|
||||
// anything that’s neither alphanumeric nor '_' is not part of an ident anyway
|
||||
c if !c.is_alphanumeric() && c != '_' => {
|
||||
current_word_begin = jump_to!(@next_char, chars, len);
|
||||
c if !is_path_char(c) => {
|
||||
parser.advance_begin();
|
||||
}
|
||||
_ => {
|
||||
let end = match chars.find(|&(_, c)| !is_word_char(c)) {
|
||||
Some((end, _)) => end,
|
||||
None => len,
|
||||
};
|
||||
let word_span = word_span(span, current_word_begin, end);
|
||||
check_word(cx, valid_idents, &doc[current_word_begin..end], word_span);
|
||||
current_word_begin = jump_to!(@next_char, chars, len);
|
||||
if let Some(c) = parser.find(|&c| !is_path_char(c)) {
|
||||
parser.put_back(c);
|
||||
}
|
||||
|
||||
let (word, span) = parser.word();
|
||||
check_word(cx, valid_idents, word, span);
|
||||
parser.advance_begin();
|
||||
}
|
||||
}
|
||||
|
||||
new_line = c == '\n' || (new_line && c.is_whitespace());
|
||||
parser.new_line = c == '\n' || (parser.new_line && c.is_whitespace());
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) {
|
||||
|
@ -12,6 +12,8 @@
|
||||
/// Markdown is _weird_. I mean _really weird_. This \_ is ok. So is `_`. But not Foo::some_fun
|
||||
//~^ ERROR: you should put `Foo::some_fun` between ticks
|
||||
/// which should be reported only once despite being __doubly bad__.
|
||||
/// Here be ::is::a::global:path.
|
||||
//~^ ERROR: you should put `is::a::global:path` between ticks
|
||||
/// be_sure_we_got_to_the_end_of_it
|
||||
//~^ ERROR: you should put `be_sure_we_got_to_the_end_of_it` between ticks
|
||||
fn foo_bar() {
|
||||
@ -141,3 +143,8 @@ fn issue900() {
|
||||
//~^ ERROR: you should put `be_sure_we_got_to_the_end_of_it` between ticks
|
||||
fn issue883() {
|
||||
}
|
||||
|
||||
/// `foo_bar
|
||||
/// baz_quz`
|
||||
fn multiline() {
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user