Auto merge of #74627 - petrochenkov:docbeauty2, r=Aaron1011

rustc_ast: Stop using "string typing" for doc comment tokens

Explicitly store their kind and style retrieved during lexing in the `token::DocComment`.

Also don't "beautify" doc comments before converting them to `#[doc]` attributes when passing them to macros (both declarative and procedural).
The trimming of empty lines, lines containing only `*`s, etc is purely a rustdoc's job as a part of its presentation of doc strings to users, rustc must not do this and must pass tokens as precisely as possible internally.
This commit is contained in:
bors 2020-08-07 13:29:25 +00:00
commit 64f99b4cfb
25 changed files with 313 additions and 241 deletions

View File

@ -23,7 +23,7 @@ pub use GenericArgs::*;
pub use UnsafeSource::*;
use crate::ptr::P;
use crate::token::{self, DelimToken};
use crate::token::{self, CommentKind, DelimToken};
use crate::tokenstream::{DelimSpan, TokenStream, TokenTree};
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
@ -2365,7 +2365,7 @@ pub enum AttrKind {
/// A doc comment (e.g. `/// ...`, `//! ...`, `/** ... */`, `/*! ... */`).
/// Doc attributes (e.g. `#[doc="..."]`) are represented with the `Normal`
/// variant (which is much less compact and thus more expensive).
DocComment(Symbol),
DocComment(CommentKind, Symbol),
}
/// `TraitRef`s appear in impls.

View File

@ -7,7 +7,7 @@ use crate::ast::{MacArgs, MacDelimiter, MetaItem, MetaItemKind, NestedMetaItem};
use crate::ast::{Path, PathSegment};
use crate::mut_visit::visit_clobber;
use crate::ptr::P;
use crate::token::{self, Token};
use crate::token::{self, CommentKind, Token};
use crate::tokenstream::{DelimSpan, TokenStream, TokenTree, TreeAndJoint};
use rustc_data_structures::sync::Lock;
@ -169,7 +169,7 @@ impl Attribute {
pub fn has_name(&self, name: Symbol) -> bool {
match self.kind {
AttrKind::Normal(ref item) => item.path == name,
AttrKind::DocComment(_) => false,
AttrKind::DocComment(..) => false,
}
}
@ -198,7 +198,7 @@ impl Attribute {
None
}
}
AttrKind::DocComment(_) => None,
AttrKind::DocComment(..) => None,
}
}
pub fn name_or_empty(&self) -> Symbol {
@ -218,7 +218,7 @@ impl Attribute {
Some(MetaItem { kind: MetaItemKind::List(list), .. }) => Some(list),
_ => None,
},
AttrKind::DocComment(_) => None,
AttrKind::DocComment(..) => None,
}
}
@ -314,13 +314,13 @@ impl Attribute {
pub fn is_doc_comment(&self) -> bool {
match self.kind {
AttrKind::Normal(_) => false,
AttrKind::DocComment(_) => true,
AttrKind::DocComment(..) => true,
}
}
pub fn doc_str(&self) -> Option<Symbol> {
match self.kind {
AttrKind::DocComment(symbol) => Some(symbol),
AttrKind::DocComment(.., data) => Some(data),
AttrKind::Normal(ref item) if item.path == sym::doc => {
item.meta(self.span).and_then(|meta| meta.value_str())
}
@ -331,14 +331,14 @@ impl Attribute {
pub fn get_normal_item(&self) -> &AttrItem {
match self.kind {
AttrKind::Normal(ref item) => item,
AttrKind::DocComment(_) => panic!("unexpected doc comment"),
AttrKind::DocComment(..) => panic!("unexpected doc comment"),
}
}
pub fn unwrap_normal_item(self) -> AttrItem {
match self.kind {
AttrKind::Normal(item) => item,
AttrKind::DocComment(_) => panic!("unexpected doc comment"),
AttrKind::DocComment(..) => panic!("unexpected doc comment"),
}
}
@ -405,8 +405,13 @@ pub fn mk_attr_outer(item: MetaItem) -> Attribute {
mk_attr(AttrStyle::Outer, item.path, item.kind.mac_args(item.span), item.span)
}
pub fn mk_doc_comment(style: AttrStyle, comment: Symbol, span: Span) -> Attribute {
Attribute { kind: AttrKind::DocComment(comment), id: mk_attr_id(), style, span }
pub fn mk_doc_comment(
comment_kind: CommentKind,
style: AttrStyle,
data: Symbol,
span: Span,
) -> Attribute {
Attribute { kind: AttrKind::DocComment(comment_kind, data), id: mk_attr_id(), style, span }
}
pub fn list_contains_name(items: &[NestedMetaItem], name: Symbol) -> bool {

View File

@ -582,7 +582,7 @@ pub fn noop_visit_attribute<T: MutVisitor>(attr: &mut Attribute, vis: &mut T) {
vis.visit_path(path);
visit_mac_args(args, vis);
}
AttrKind::DocComment(_) => {}
AttrKind::DocComment(..) => {}
}
vis.visit_span(span);
}

View File

@ -17,6 +17,12 @@ use rustc_span::{self, Span, DUMMY_SP};
use std::borrow::Cow;
use std::{fmt, mem};
#[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug, HashStable_Generic)]
pub enum CommentKind {
Line,
Block,
}
#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)]
#[derive(HashStable_Generic)]
pub enum BinOpToken {
@ -238,9 +244,10 @@ pub enum TokenKind {
Interpolated(Lrc<Nonterminal>),
// Can be expanded into several tokens.
/// A doc comment.
DocComment(Symbol),
/// A doc comment token.
/// `Symbol` is the doc comment's data excluding its "quotes" (`///`, `/**`, etc)
/// similarly to symbols in string literal tokens.
DocComment(CommentKind, ast::AttrStyle, Symbol),
// Junk. These carry no data because we don't really care about the data
// they *would* carry, and don't really want to allocate a new ident for

View File

@ -1,11 +1,7 @@
pub use CommentStyle::*;
use crate::ast;
use crate::ast::AttrStyle;
use rustc_span::source_map::SourceMap;
use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};
use log::debug;
#[cfg(test)]
mod tests;
@ -28,43 +24,48 @@ pub struct Comment {
pub pos: BytePos,
}
pub fn is_line_doc_comment(s: &str) -> bool {
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/')
|| s.starts_with("//!");
debug!("is {:?} a doc comment? {}", s, res);
res
}
pub fn is_block_doc_comment(s: &str) -> bool {
// Prevent `/**/` from being parsed as a doc comment
let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*')
|| s.starts_with("/*!"))
&& s.len() >= 5;
debug!("is {:?} a doc comment? {}", s, res);
res
}
// FIXME(#64197): Try to privatize this again.
pub fn is_doc_comment(s: &str) -> bool {
(s.starts_with("///") && is_line_doc_comment(s))
|| s.starts_with("//!")
|| (s.starts_with("/**") && is_block_doc_comment(s))
|| s.starts_with("/*!")
}
pub fn doc_comment_style(comment: Symbol) -> ast::AttrStyle {
let comment = &comment.as_str();
assert!(is_doc_comment(comment));
if comment.starts_with("//!") || comment.starts_with("/*!") {
ast::AttrStyle::Inner
} else {
ast::AttrStyle::Outer
/// For a full line comment string returns its doc comment style if it's a doc comment
/// and returns `None` if it's a regular comment.
pub fn line_doc_comment_style(line_comment: &str) -> Option<AttrStyle> {
let line_comment = line_comment.as_bytes();
assert!(line_comment.starts_with(b"//"));
match line_comment.get(2) {
// `//!` is an inner line doc comment.
Some(b'!') => Some(AttrStyle::Inner),
Some(b'/') => match line_comment.get(3) {
// `////` (more than 3 slashes) is not considered a doc comment.
Some(b'/') => None,
// Otherwise `///` is an outer line doc comment.
_ => Some(AttrStyle::Outer),
},
_ => None,
}
}
pub fn strip_doc_comment_decoration(comment: Symbol) -> String {
let comment = &comment.as_str();
/// For a full block comment string returns its doc comment style if it's a doc comment
/// and returns `None` if it's a regular comment.
pub fn block_doc_comment_style(block_comment: &str, terminated: bool) -> Option<AttrStyle> {
let block_comment = block_comment.as_bytes();
assert!(block_comment.starts_with(b"/*"));
assert!(!terminated || block_comment.ends_with(b"*/"));
match block_comment.get(2) {
// `/*!` is an inner block doc comment.
Some(b'!') => Some(AttrStyle::Inner),
Some(b'*') => match block_comment.get(3) {
// `/***` (more than 2 stars) is not considered a doc comment.
Some(b'*') => None,
// `/**/` is not considered a doc comment.
Some(b'/') if block_comment.len() == 4 => None,
// Otherwise `/**` is an outer block doc comment.
_ => Some(AttrStyle::Outer),
},
_ => None,
}
}
/// Makes a doc string more presentable to users.
/// Used by rustdoc and perhaps other tools, but not by rustc.
pub fn beautify_doc_string(data: Symbol) -> String {
/// remove whitespace-only lines from the start/end of lines
fn vertical_trim(lines: Vec<String>) -> Vec<String> {
let mut i = 0;
@ -126,26 +127,15 @@ pub fn strip_doc_comment_decoration(comment: Symbol) -> String {
}
}
// one-line comments lose their prefix
const ONELINERS: &[&str] = &["///!", "///", "//!", "//"];
for prefix in ONELINERS {
if comment.starts_with(*prefix) {
return (&comment[prefix.len()..]).to_string();
}
}
if comment.starts_with("/*") {
let lines =
comment[3..comment.len() - 2].lines().map(|s| s.to_string()).collect::<Vec<String>>();
let data = data.as_str();
if data.contains('\n') {
let lines = data.lines().map(|s| s.to_string()).collect::<Vec<String>>();
let lines = vertical_trim(lines);
let lines = horizontal_trim(lines);
return lines.join("\n");
lines.join("\n")
} else {
data.to_string()
}
panic!("not a doc-comment: {}", comment);
}
/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
@ -203,7 +193,7 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
comments.push(Comment {
style: Isolated,
style: CommentStyle::Isolated,
lines: vec![text[..shebang_len].to_string()],
pos: start_bpos,
});
@ -219,23 +209,23 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
idx = idx + 1 + next_newline;
comments.push(Comment {
style: BlankLine,
style: CommentStyle::BlankLine,
lines: vec![],
pos: start_bpos + BytePos((pos + idx) as u32),
});
}
}
}
rustc_lexer::TokenKind::BlockComment { terminated: _ } => {
if !is_block_doc_comment(token_text) {
rustc_lexer::TokenKind::BlockComment { terminated } => {
if block_doc_comment_style(token_text, terminated).is_none() {
let code_to_the_right = match text[pos + token.len..].chars().next() {
Some('\r' | '\n') => false,
_ => true,
};
let style = match (code_to_the_left, code_to_the_right) {
(_, true) => Mixed,
(false, false) => Isolated,
(true, false) => Trailing,
(_, true) => CommentStyle::Mixed,
(false, false) => CommentStyle::Isolated,
(true, false) => CommentStyle::Trailing,
};
// Count the number of chars since the start of the line by rescanning.
@ -249,9 +239,13 @@ pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comme
}
}
rustc_lexer::TokenKind::LineComment => {
if !is_doc_comment(token_text) {
if line_doc_comment_style(token_text).is_none() {
comments.push(Comment {
style: if code_to_the_left { Trailing } else { Isolated },
style: if code_to_the_left {
CommentStyle::Trailing
} else {
CommentStyle::Isolated
},
lines: vec![token_text.to_string()],
pos: start_bpos + BytePos(pos as u32),
})

View File

@ -1,11 +1,18 @@
use super::*;
use crate::with_default_session_globals;
#[test]
fn line_doc_comments() {
assert!(line_doc_comment_style("///").is_some());
assert!(line_doc_comment_style("/// blah").is_some());
assert!(line_doc_comment_style("////").is_none());
}
#[test]
fn test_block_doc_comment_1() {
with_default_session_globals(|| {
let comment = "/**\n * Test \n ** Test\n * Test\n*/";
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
let comment = "\n * Test \n ** Test\n * Test\n";
let stripped = beautify_doc_string(Symbol::intern(comment));
assert_eq!(stripped, " Test \n* Test\n Test");
})
}
@ -13,8 +20,8 @@ fn test_block_doc_comment_1() {
#[test]
fn test_block_doc_comment_2() {
with_default_session_globals(|| {
let comment = "/**\n * Test\n * Test\n*/";
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
let comment = "\n * Test\n * Test\n";
let stripped = beautify_doc_string(Symbol::intern(comment));
assert_eq!(stripped, " Test\n Test");
})
}
@ -22,37 +29,22 @@ fn test_block_doc_comment_2() {
#[test]
fn test_block_doc_comment_3() {
with_default_session_globals(|| {
let comment = "/**\n let a: *i32;\n *a = 5;\n*/";
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
let comment = "\n let a: *i32;\n *a = 5;\n";
let stripped = beautify_doc_string(Symbol::intern(comment));
assert_eq!(stripped, " let a: *i32;\n *a = 5;");
})
}
#[test]
fn test_block_doc_comment_4() {
with_default_session_globals(|| {
let comment = "/*******************\n test\n *********************/";
let stripped = strip_doc_comment_decoration(Symbol::intern(comment));
assert_eq!(stripped, " test");
})
}
#[test]
fn test_line_doc_comment() {
with_default_session_globals(|| {
let stripped = strip_doc_comment_decoration(Symbol::intern("/// test"));
let stripped = beautify_doc_string(Symbol::intern(" test"));
assert_eq!(stripped, " test");
let stripped = strip_doc_comment_decoration(Symbol::intern("///! test"));
assert_eq!(stripped, " test");
let stripped = strip_doc_comment_decoration(Symbol::intern("// test"));
assert_eq!(stripped, " test");
let stripped = strip_doc_comment_decoration(Symbol::intern("// test"));
assert_eq!(stripped, " test");
let stripped = strip_doc_comment_decoration(Symbol::intern("///test"));
assert_eq!(stripped, "test");
let stripped = strip_doc_comment_decoration(Symbol::intern("///!test"));
assert_eq!(stripped, "test");
let stripped = strip_doc_comment_decoration(Symbol::intern("//test"));
let stripped = beautify_doc_string(Symbol::intern("! test"));
assert_eq!(stripped, "! test");
let stripped = beautify_doc_string(Symbol::intern("test"));
assert_eq!(stripped, "test");
let stripped = beautify_doc_string(Symbol::intern("!test"));
assert_eq!(stripped, "!test");
})
}

View File

@ -880,7 +880,7 @@ pub fn walk_vis<'a, V: Visitor<'a>>(visitor: &mut V, vis: &'a Visibility) {
pub fn walk_attribute<'a, V: Visitor<'a>>(visitor: &mut V, attr: &'a Attribute) {
match attr.kind {
AttrKind::Normal(ref item) => walk_mac_args(visitor, &item.args),
AttrKind::DocComment(_) => {}
AttrKind::DocComment(..) => {}
}
}

View File

@ -981,7 +981,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
path: item.path.clone(),
args: self.lower_mac_args(&item.args),
}),
AttrKind::DocComment(comment) => AttrKind::DocComment(comment),
AttrKind::DocComment(comment_kind, data) => AttrKind::DocComment(comment_kind, data),
};
Attribute { kind, id: attr.id, style: attr.style, span: attr.span }

View File

@ -8,10 +8,11 @@ use rustc_ast::ast::{InlineAsmOperand, InlineAsmRegOrRegClass};
use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece};
use rustc_ast::attr;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, BinOpToken, DelimToken, Nonterminal, Token, TokenKind};
use rustc_ast::token::{self, BinOpToken, CommentKind, DelimToken, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::{TokenStream, TokenTree};
use rustc_ast::util::classify;
use rustc_ast::util::comments::{gather_comments, Comment, CommentStyle};
use rustc_ast::util::parser::{self, AssocOp, Fixity};
use rustc_ast::util::{classify, comments};
use rustc_span::edition::Edition;
use rustc_span::source_map::{SourceMap, Spanned};
use rustc_span::symbol::{kw, sym, Ident, IdentPrinter, Symbol};
@ -50,17 +51,17 @@ impl PpAnn for NoAnn {}
pub struct Comments<'a> {
sm: &'a SourceMap,
comments: Vec<comments::Comment>,
comments: Vec<Comment>,
current: usize,
}
impl<'a> Comments<'a> {
pub fn new(sm: &'a SourceMap, filename: FileName, input: String) -> Comments<'a> {
let comments = comments::gather_comments(sm, filename, input);
let comments = gather_comments(sm, filename, input);
Comments { sm, comments, current: 0 }
}
pub fn next(&self) -> Option<comments::Comment> {
pub fn next(&self) -> Option<Comment> {
self.comments.get(self.current).cloned()
}
@ -68,9 +69,9 @@ impl<'a> Comments<'a> {
&mut self,
span: rustc_span::Span,
next_pos: Option<BytePos>,
) -> Option<comments::Comment> {
) -> Option<Comment> {
if let Some(cmnt) = self.next() {
if cmnt.style != comments::Trailing {
if cmnt.style != CommentStyle::Trailing {
return None;
}
let span_line = self.sm.lookup_char_pos(span.hi());
@ -152,8 +153,8 @@ pub fn to_string(f: impl FnOnce(&mut State<'_>)) -> String {
// and also addresses some specific regressions described in #63896 and #73345.
fn tt_prepend_space(tt: &TokenTree, prev: &TokenTree) -> bool {
if let TokenTree::Token(token) = prev {
if let token::DocComment(s) = token.kind {
return !s.as_str().starts_with("//");
if let token::DocComment(comment_kind, ..) = token.kind {
return comment_kind != CommentKind::Line;
}
}
match tt {
@ -194,6 +195,19 @@ fn binop_to_string(op: BinOpToken) -> &'static str {
}
}
fn doc_comment_to_string(
comment_kind: CommentKind,
attr_style: ast::AttrStyle,
data: Symbol,
) -> String {
match (comment_kind, attr_style) {
(CommentKind::Line, ast::AttrStyle::Outer) => format!("///{}", data),
(CommentKind::Line, ast::AttrStyle::Inner) => format!("//!{}", data),
(CommentKind::Block, ast::AttrStyle::Outer) => format!("/**{}*/", data),
(CommentKind::Block, ast::AttrStyle::Inner) => format!("/*!{}*/", data),
}
}
pub fn literal_to_string(lit: token::Lit) -> String {
let token::Lit { kind, symbol, suffix } = lit;
let mut out = match kind {
@ -271,7 +285,9 @@ fn token_kind_to_string_ext(tok: &TokenKind, convert_dollar_crate: Option<Span>)
token::Lifetime(s) => s.to_string(),
/* Other */
token::DocComment(s) => s.to_string(),
token::DocComment(comment_kind, attr_style, data) => {
doc_comment_to_string(comment_kind, attr_style, data)
}
token::Eof => "<eof>".to_string(),
token::Whitespace => " ".to_string(),
token::Comment => "/* */".to_string(),
@ -447,9 +463,9 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
}
}
fn print_comment(&mut self, cmnt: &comments::Comment) {
fn print_comment(&mut self, cmnt: &Comment) {
match cmnt.style {
comments::Mixed => {
CommentStyle::Mixed => {
if !self.is_beginning_of_line() {
self.zerobreak();
}
@ -468,7 +484,7 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
}
self.zerobreak()
}
comments::Isolated => {
CommentStyle::Isolated => {
self.hardbreak_if_not_bol();
for line in &cmnt.lines {
// Don't print empty lines because they will end up as trailing
@ -479,7 +495,7 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
self.hardbreak();
}
}
comments::Trailing => {
CommentStyle::Trailing => {
if !self.is_beginning_of_line() {
self.word(" ");
}
@ -497,7 +513,7 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
self.end();
}
}
comments::BlankLine => {
CommentStyle::BlankLine => {
// We need to do at least one, possibly two hardbreaks.
let twice = match self.last_token() {
pp::Token::String(s) => ";" == s,
@ -516,7 +532,7 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
}
}
fn next_comment(&mut self) -> Option<comments::Comment> {
fn next_comment(&mut self) -> Option<Comment> {
self.comments().as_mut().and_then(|c| c.next())
}
@ -599,8 +615,8 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
self.print_attr_item(&item, attr.span);
self.word("]");
}
ast::AttrKind::DocComment(comment) => {
self.word(comment.to_string());
ast::AttrKind::DocComment(comment_kind, data) => {
self.word(doc_comment_to_string(comment_kind, attr.style, data));
self.hardbreak()
}
}

View File

@ -1,5 +1,5 @@
use rustc_ast::token::{self, Token, TokenKind};
use rustc_ast::util::comments::is_doc_comment;
use rustc_ast::ast::AttrStyle;
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
use rustc_ast::with_default_session_globals;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{emitter::EmitterWriter, Handler};
@ -223,13 +223,6 @@ fn literal_suffixes() {
})
}
#[test]
fn line_doc_comments() {
assert!(is_doc_comment("///"));
assert!(is_doc_comment("/// blah"));
assert!(!is_doc_comment("////"));
}
#[test]
fn nested_block_comments() {
with_default_session_globals(|| {
@ -251,6 +244,9 @@ fn crlf_comments() {
assert_eq!(comment.kind, token::Comment);
assert_eq!((comment.span.lo(), comment.span.hi()), (BytePos(0), BytePos(7)));
assert_eq!(lexer.next_token(), token::Whitespace);
assert_eq!(lexer.next_token(), token::DocComment(Symbol::intern("/// test")));
assert_eq!(
lexer.next_token(),
token::DocComment(CommentKind::Line, AttrStyle::Outer, Symbol::intern(" test"))
);
})
}

View File

@ -244,20 +244,20 @@ fn crlf_doc_comments() {
let source = "/// doc comment\r\nfn foo() {}".to_string();
let item = parse_item_from_source_str(name_1, source, &sess).unwrap().unwrap();
let doc = item.attrs.iter().filter_map(|at| at.doc_str()).next().unwrap();
assert_eq!(doc.as_str(), "/// doc comment");
assert_eq!(doc.as_str(), " doc comment");
let name_2 = FileName::Custom("crlf_source_2".to_string());
let source = "/// doc comment\r\n/// line 2\r\nfn foo() {}".to_string();
let item = parse_item_from_source_str(name_2, source, &sess).unwrap().unwrap();
let docs = item.attrs.iter().filter_map(|at| at.doc_str()).collect::<Vec<_>>();
let b: &[_] = &[Symbol::intern("/// doc comment"), Symbol::intern("/// line 2")];
let b: &[_] = &[Symbol::intern(" doc comment"), Symbol::intern(" line 2")];
assert_eq!(&docs[..], b);
let name_3 = FileName::Custom("clrf_source_3".to_string());
let source = "/** doc comment\r\n * with CRLF */\r\nfn foo() {}".to_string();
let item = parse_item_from_source_str(name_3, source, &sess).unwrap().unwrap();
let doc = item.attrs.iter().filter_map(|at| at.doc_str()).next().unwrap();
assert_eq!(doc.as_str(), "/** doc comment\n * with CRLF */");
assert_eq!(doc.as_str(), " doc comment\n * with CRLF ");
});
}

View File

@ -3,7 +3,6 @@ use crate::base::ExtCtxt;
use rustc_ast::ast;
use rustc_ast::token;
use rustc_ast::tokenstream::{self, DelimSpan, IsJoint::*, TokenStream, TreeAndJoint};
use rustc_ast::util::comments;
use rustc_ast_pretty::pprust;
use rustc_data_structures::sync::Lrc;
use rustc_errors::Diagnostic;
@ -148,11 +147,9 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
tt!(Punct::new('\'', true))
}
Literal(lit) => tt!(Literal { lit }),
DocComment(c) => {
let style = comments::doc_comment_style(c);
let stripped = comments::strip_doc_comment_decoration(c);
DocComment(_, attr_style, data) => {
let mut escaped = String::new();
for ch in stripped.chars() {
for ch in data.as_str().chars() {
escaped.extend(ch.escape_debug());
}
let stream = vec![
@ -169,7 +166,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
span: DelimSpan::from_single(span),
flatten: false,
}));
if style == ast::AttrStyle::Inner {
if attr_style == ast::AttrStyle::Inner {
stack.push(tt!(Punct::new('!', false)));
}
tt!(Punct::new('#', false))

View File

@ -1,4 +1,4 @@
use rustc_ast::token::{self, Token, TokenKind};
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
use rustc_ast::util::comments;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError};
@ -170,22 +170,20 @@ impl<'a> StringReader<'a> {
match token {
rustc_lexer::TokenKind::LineComment => {
let string = self.str_from(start);
// comments with only more "/"s are not doc comments
if comments::is_line_doc_comment(string) {
if let Some(attr_style) = comments::line_doc_comment_style(string) {
self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment");
token::DocComment(Symbol::intern(string))
// Opening delimiter of the length 3 is not included into the symbol.
token::DocComment(CommentKind::Line, attr_style, Symbol::intern(&string[3..]))
} else {
token::Comment
}
}
rustc_lexer::TokenKind::BlockComment { terminated } => {
let string = self.str_from(start);
// block comments starting with "/**" or "/*!" are doc-comments
// but comments with only "*"s between two "/"s are not
let is_doc_comment = comments::is_block_doc_comment(string);
let attr_style = comments::block_doc_comment_style(string, terminated);
if !terminated {
let msg = if is_doc_comment {
let msg = if attr_style.is_some() {
"unterminated block doc-comment"
} else {
"unterminated block comment"
@ -202,9 +200,15 @@ impl<'a> StringReader<'a> {
FatalError.raise();
}
if is_doc_comment {
if let Some(attr_style) = attr_style {
self.forbid_bare_cr(start, string, "bare CR not allowed in block doc-comment");
token::DocComment(Symbol::intern(string))
// Opening delimiter of the length 3 and closing delimiter of the length 2
// are not included into the symbol.
token::DocComment(
CommentKind::Block,
attr_style,
Symbol::intern(&string[3..string.len() - if terminated { 2 } else { 0 }]),
)
} else {
token::Comment
}

View File

@ -486,7 +486,9 @@ fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool {
(&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b,
(&DocComment(a), &DocComment(b)) | (&Shebang(a), &Shebang(b)) => a == b,
(&DocComment(a1, a2, a3), &DocComment(b1, b2, b3)) => a1 == b1 && a2 == b2 && a3 == b3,
(&Shebang(a), &Shebang(b)) => a == b,
(&Literal(a), &Literal(b)) => a == b,
@ -524,7 +526,7 @@ fn prepend_attrs(
let item = match attr.kind {
ast::AttrKind::Normal(ref item) => item,
ast::AttrKind::DocComment(_) => {
ast::AttrKind::DocComment(..) => {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
builder.push(stream);
continue;

View File

@ -2,10 +2,9 @@ use super::{Parser, PathStyle};
use rustc_ast::ast;
use rustc_ast::attr;
use rustc_ast::token::{self, Nonterminal};
use rustc_ast::util::comments;
use rustc_ast_pretty::pprust;
use rustc_errors::{error_code, PResult};
use rustc_span::{Span, Symbol};
use rustc_span::Span;
use log::debug;
@ -47,8 +46,8 @@ impl<'a> Parser<'a> {
let attr = self.parse_attribute_with_inner_parse_policy(inner_parse_policy)?;
attrs.push(attr);
just_parsed_doc_comment = false;
} else if let token::DocComment(s) = self.token.kind {
let attr = self.mk_doc_comment(s);
} else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind {
let attr = attr::mk_doc_comment(comment_kind, attr_style, data, self.token.span);
if attr.style != ast::AttrStyle::Outer {
self.sess
.span_diagnostic
@ -73,10 +72,6 @@ impl<'a> Parser<'a> {
Ok(attrs)
}
fn mk_doc_comment(&self, s: Symbol) -> ast::Attribute {
attr::mk_doc_comment(comments::doc_comment_style(s), s, self.token.span)
}
/// Matches `attribute = # ! [ meta_item ]`.
///
/// If `permit_inner` is `true`, then a leading `!` indicates an inner
@ -184,9 +179,9 @@ impl<'a> Parser<'a> {
let attr = self.parse_attribute(true)?;
assert_eq!(attr.style, ast::AttrStyle::Inner);
attrs.push(attr);
} else if let token::DocComment(s) = self.token.kind {
} else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind {
// We need to get the position of this token before we bump.
let attr = self.mk_doc_comment(s);
let attr = attr::mk_doc_comment(comment_kind, attr_style, data, self.token.span);
if attr.style == ast::AttrStyle::Inner {
attrs.push(attr);
self.bump();

View File

@ -1419,7 +1419,7 @@ impl<'a> Parser<'a> {
}
pub(super) fn eat_incorrect_doc_comment_for_param_type(&mut self) {
if let token::DocComment(_) = self.token.kind {
if let token::DocComment(..) = self.token.kind {
self.struct_span_err(
self.token.span,
"documentation comments cannot be applied to a function parameter's type",

View File

@ -610,7 +610,7 @@ impl<'a> Parser<'a> {
/// Recover on a doc comment before `}`.
fn recover_doc_comment_before_brace(&mut self) -> bool {
if let token::DocComment(_) = self.token.kind {
if let token::DocComment(..) = self.token.kind {
if self.look_ahead(1, |tok| tok == &token::CloseDelim(token::Brace)) {
struct_span_err!(
self.diagnostic(),
@ -1231,7 +1231,7 @@ impl<'a> Parser<'a> {
self.bump();
}
token::CloseDelim(token::Brace) => {}
token::DocComment(_) => {
token::DocComment(..) => {
let previous_span = self.prev_token.span;
let mut err = self.span_fatal_err(self.token.span, Error::UselessDocComment);
self.bump(); // consume the doc comment

View File

@ -22,7 +22,6 @@ use rustc_ast::ast::{
use rustc_ast::ptr::P;
use rustc_ast::token::{self, DelimToken, Token, TokenKind};
use rustc_ast::tokenstream::{self, DelimSpan, TokenStream, TokenTree, TreeAndJoint};
use rustc_ast::util::comments::{doc_comment_style, strip_doc_comment_decoration};
use rustc_ast_pretty::pprust;
use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError, PResult};
use rustc_session::parse::ParseSess;
@ -209,18 +208,18 @@ impl TokenCursor {
}
fn next_desugared(&mut self) -> Token {
let (name, sp) = match self.next() {
Token { kind: token::DocComment(name), span } => (name, span),
let (data, attr_style, sp) = match self.next() {
Token { kind: token::DocComment(_, attr_style, data), span } => {
(data, attr_style, span)
}
tok => return tok,
};
let stripped = strip_doc_comment_decoration(name);
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
// required to wrap the text.
let mut num_of_hashes = 0;
let mut count = 0;
for ch in stripped.chars() {
for ch in data.as_str().chars() {
count = match ch {
'"' => 1,
'#' if count > 0 => count + 1,
@ -236,10 +235,7 @@ impl TokenCursor {
[
TokenTree::token(token::Ident(sym::doc, false), sp),
TokenTree::token(token::Eq, sp),
TokenTree::token(
TokenKind::lit(token::StrRaw(num_of_hashes), Symbol::intern(&stripped), None),
sp,
),
TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), sp),
]
.iter()
.cloned()
@ -251,7 +247,7 @@ impl TokenCursor {
TokenCursorFrame::new(
delim_span,
token::NoDelim,
&if doc_comment_style(name) == AttrStyle::Inner {
&if attr_style == AttrStyle::Inner {
[TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body]
.iter()
.cloned()

View File

@ -10,7 +10,7 @@ mod span_utils;
mod sig;
use rustc_ast::ast::{self};
use rustc_ast::util::comments::strip_doc_comment_decoration;
use rustc_ast::util::comments::beautify_doc_string;
use rustc_ast_pretty::pprust::attribute_to_string;
use rustc_hir as hir;
use rustc_hir::def::{DefKind as HirDefKind, Res};
@ -822,11 +822,8 @@ impl<'tcx> SaveContext<'tcx> {
for attr in attrs {
if let Some(val) = attr.doc_str() {
if attr.is_doc_comment() {
result.push_str(&strip_doc_comment_decoration(val));
} else {
result.push_str(&val.as_str());
}
// FIXME: Should save-analysis beautify doc strings itself or leave it to users?
result.push_str(&beautify_doc_string(val));
result.push('\n');
} else if attr.check_name(sym::doc) {
if let Some(meta_list) = attr.meta_item_list() {

View File

@ -10,7 +10,7 @@ use std::{slice, vec};
use rustc_ast::ast::{self, AttrStyle};
use rustc_ast::attr;
use rustc_ast::util::comments::strip_doc_comment_decoration;
use rustc_ast::util::comments::beautify_doc_string;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_hir as hir;
use rustc_hir::def::Res;
@ -506,10 +506,11 @@ impl Attributes {
.iter()
.filter_map(|attr| {
if let Some(value) = attr.doc_str() {
let (value, mk_fragment): (_, fn(_, _, _) -> _) = if attr.is_doc_comment() {
(strip_doc_comment_decoration(value), DocFragment::SugaredDoc)
let value = beautify_doc_string(value);
let mk_fragment: fn(_, _, _) -> _ = if attr.is_doc_comment() {
DocFragment::SugaredDoc
} else {
(value.to_string(), DocFragment::RawDoc)
DocFragment::RawDoc
};
let line = doc_line;

View File

@ -0,0 +1,24 @@
// check-pass
// aux-build:test-macros.rs
// Anonymize unstable non-dummy spans while still showing dummy spans `0..0`.
// normalize-stdout-test "bytes\([^0]\w*\.\.(\w+)\)" -> "bytes(LO..$1)"
// normalize-stdout-test "bytes\((\w+)\.\.[^0]\w*\)" -> "bytes($1..HI)"
#[macro_use]
extern crate test_macros;
print_bang! {
/**
*******
* DOC *
* DOC *
* DOC *
*******
*/
pub struct S;
}
fn main() {}

View File

@ -0,0 +1,54 @@
PRINT-BANG INPUT (DISPLAY): /**
*******
* DOC *
* DOC *
* DOC *
*******
*/
pub struct S ;
PRINT-BANG RE-COLLECTED (DISPLAY): #[doc = "\n*******\n* DOC *\n* DOC *\n* DOC *\n*******\n"] pub struct S ;
PRINT-BANG INPUT (DEBUG): TokenStream [
Punct {
ch: '#',
spacing: Alone,
span: #0 bytes(LO..HI),
},
Group {
delimiter: Bracket,
stream: TokenStream [
Ident {
ident: "doc",
span: #0 bytes(LO..HI),
},
Punct {
ch: '=',
spacing: Alone,
span: #0 bytes(LO..HI),
},
Literal {
kind: Str,
symbol: "\n*******\n* DOC *\n* DOC *\n* DOC *\n*******\n",
suffix: None,
span: #0 bytes(LO..HI),
},
],
span: #0 bytes(LO..HI),
},
Ident {
ident: "pub",
span: #0 bytes(LO..HI),
},
Ident {
ident: "struct",
span: #0 bytes(LO..HI),
},
Ident {
ident: "S",
span: #0 bytes(LO..HI),
},
Punct {
ch: ';',
spacing: Alone,
span: #0 bytes(LO..HI),
},
]

View File

@ -2,6 +2,7 @@ use crate::utils::{implements_trait, is_entrypoint_fn, is_type_diagnostic_item,
use if_chain::if_chain;
use itertools::Itertools;
use rustc_ast::ast::{AttrKind, Attribute};
use rustc_ast::token::CommentKind;
use rustc_data_structures::fx::FxHashSet;
use rustc_hir as hir;
use rustc_lint::{LateContext, LateLintPass};
@ -249,7 +250,7 @@ fn lint_for_missing_headers<'tcx>(
}
}
/// Cleanup documentation decoration (`///` and such).
/// Cleanup documentation decoration.
///
/// We can't use `rustc_ast::attr::AttributeMethods::with_desugared_doc` or
/// `rustc_ast::parse::lexer::comments::strip_doc_comment_decoration` because we
@ -257,54 +258,45 @@ fn lint_for_missing_headers<'tcx>(
/// the spans but this function is inspired from the later.
#[allow(clippy::cast_possible_truncation)]
#[must_use]
pub fn strip_doc_comment_decoration(comment: &str, span: Span) -> (String, Vec<(usize, Span)>) {
pub fn strip_doc_comment_decoration(doc: &str, comment_kind: CommentKind, span: Span) -> (String, Vec<(usize, Span)>) {
// one-line comments lose their prefix
const ONELINERS: &[&str] = &["///!", "///", "//!", "//"];
for prefix in ONELINERS {
if comment.starts_with(*prefix) {
let doc = &comment[prefix.len()..];
let mut doc = doc.to_owned();
doc.push('\n');
return (
doc.to_owned(),
vec![(doc.len(), span.with_lo(span.lo() + BytePos(prefix.len() as u32)))],
);
}
if comment_kind == CommentKind::Line {
let mut doc = doc.to_owned();
doc.push('\n');
let len = doc.len();
// +3 skips the opening delimiter
return (doc, vec![(len, span.with_lo(span.lo() + BytePos(3)))]);
}
if comment.starts_with("/*") {
let doc = &comment[3..comment.len() - 2];
let mut sizes = vec![];
let mut contains_initial_stars = false;
for line in doc.lines() {
let offset = line.as_ptr() as usize - comment.as_ptr() as usize;
debug_assert_eq!(offset as u32 as usize, offset);
contains_initial_stars |= line.trim_start().starts_with('*');
// +1 for the newline
sizes.push((line.len() + 1, span.with_lo(span.lo() + BytePos(offset as u32))));
}
if !contains_initial_stars {
return (doc.to_string(), sizes);
}
// remove the initial '*'s if any
let mut no_stars = String::with_capacity(doc.len());
for line in doc.lines() {
let mut chars = line.chars();
while let Some(c) = chars.next() {
if c.is_whitespace() {
no_stars.push(c);
} else {
no_stars.push(if c == '*' { ' ' } else { c });
break;
}
let mut sizes = vec![];
let mut contains_initial_stars = false;
for line in doc.lines() {
let offset = line.as_ptr() as usize - doc.as_ptr() as usize;
debug_assert_eq!(offset as u32 as usize, offset);
contains_initial_stars |= line.trim_start().starts_with('*');
// +1 adds the newline, +3 skips the opening delimiter
sizes.push((line.len() + 1, span.with_lo(span.lo() + BytePos(3 + offset as u32))));
}
if !contains_initial_stars {
return (doc.to_string(), sizes);
}
// remove the initial '*'s if any
let mut no_stars = String::with_capacity(doc.len());
for line in doc.lines() {
let mut chars = line.chars();
while let Some(c) = chars.next() {
if c.is_whitespace() {
no_stars.push(c);
} else {
no_stars.push(if c == '*' { ' ' } else { c });
break;
}
no_stars.push_str(chars.as_str());
no_stars.push('\n');
}
return (no_stars, sizes);
no_stars.push_str(chars.as_str());
no_stars.push('\n');
}
panic!("not a doc-comment: {}", comment);
(no_stars, sizes)
}
#[derive(Copy, Clone)]
@ -318,9 +310,8 @@ fn check_attrs<'a>(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs
let mut spans = vec![];
for attr in attrs {
if let AttrKind::DocComment(ref comment) = attr.kind {
let comment = comment.to_string();
let (comment, current_spans) = strip_doc_comment_decoration(&comment, attr.span);
if let AttrKind::DocComment(comment_kind, comment) = attr.kind {
let (comment, current_spans) = strip_doc_comment_decoration(&comment.as_str(), comment_kind, attr.span);
spans.extend_from_slice(&current_spans);
doc.push_str(&comment);
} else if attr.has_name(sym!(doc)) {

View File

@ -60,13 +60,14 @@ declare_lint_pass!(TabsInDocComments => [TABS_IN_DOC_COMMENTS]);
impl TabsInDocComments {
fn warn_if_tabs_in_doc(cx: &EarlyContext<'_>, attr: &ast::Attribute) {
if let ast::AttrKind::DocComment(comment) = attr.kind {
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
let comment = comment.as_str();
for (lo, hi) in get_chunks_of_tabs(&comment) {
// +3 skips the opening delimiter
let new_span = Span::new(
attr.span.lo() + BytePos(lo),
attr.span.lo() + BytePos(hi),
attr.span.lo() + BytePos(3 + lo),
attr.span.lo() + BytePos(3 + hi),
attr.span.ctxt(),
);
span_lint_and_sugg(

View File

@ -506,7 +506,7 @@ pub fn eq_attr(l: &Attribute, r: &Attribute) -> bool {
use AttrKind::*;
l.style == r.style
&& match (&l.kind, &r.kind) {
(DocComment(l), DocComment(r)) => l == r,
(DocComment(l1, l2), DocComment(r1, r2)) => l1 == r1 && l2 == r2,
(Normal(l), Normal(r)) => eq_path(&l.path, &r.path) && eq_mac_args(&l.args, &r.args),
_ => false,
}