Format some macros 2.0 macro defs

cc #1539
This commit is contained in:
Nick Cameron 2018-01-01 19:40:51 +13:00
parent 47d9ccd6a1
commit f86f6dcd9c
4 changed files with 437 additions and 14 deletions

View File

@ -684,6 +684,7 @@ create_config! {
"Enables unstable features. Only available on nightly channel";
disable_all_formatting: bool, false, false, "Don't reformat anything";
skip_children: bool, false, false, "Don't reformat out of line modules";
hide_parse_errors: bool, false, false, "Hide errors from the parser";
error_on_line_overflow: bool, true, false, "Error if unable to get all lines within max_width";
error_on_unformatted: bool, false, false,
"Error if unable to get comments or string literals within max_width, \

View File

@ -9,6 +9,7 @@
// except according to those terms.
#![feature(rustc_private)]
#![feature(type_ascription)]
#[macro_use]
extern crate derive_new;
@ -597,8 +598,16 @@ pub fn format_input<T: Write>(
}
let codemap = Rc::new(CodeMap::new(FilePathMapping::empty()));
let tty_handler =
Handler::with_tty_emitter(ColorConfig::Auto, true, false, Some(codemap.clone()));
let tty_handler = if config.hide_parse_errors() {
let silent_emitter = Box::new(EmitterWriter::new(
Box::new(Vec::new()),
Some(codemap.clone()),
false,
));
Handler::with_emitter(true, false, silent_emitter)
} else {
Handler::with_tty_emitter(ColorConfig::Auto, true, false, Some(codemap.clone()))
};
let mut parse_session = ParseSess::with_span_handler(tty_handler, codemap.clone());
let main_file = match input {

View File

@ -19,21 +19,23 @@
// List-like invocations with parentheses will be formatted as function calls,
// and those with brackets will be formatted as array literals.
use std::collections::HashMap;
use syntax::ast;
use syntax::codemap::BytePos;
use syntax::codemap::{BytePos, Span};
use syntax::parse::new_parser_from_tts;
use syntax::parse::parser::Parser;
use syntax::parse::token::Token;
use syntax::parse::token::{BinOpToken, DelimToken, Token};
use syntax::print::pprust;
use syntax::symbol;
use syntax::tokenstream::TokenStream;
use syntax::tokenstream::{Cursor, ThinTokenStream, TokenStream, TokenTree};
use syntax::util::ThinVec;
use codemap::SpanUtils;
use comment::{contains_comment, FindUncommented};
use comment::{contains_comment, remove_trailing_white_spaces, FindUncommented};
use expr::{rewrite_array, rewrite_call_inner};
use rewrite::{Rewrite, RewriteContext};
use shape::{Indent, Shape};
use utils::mk_sp;
use utils::{format_visibility, mk_sp};
const FORCED_BRACKET_MACROS: &[&str] = &["vec!"];
@ -278,6 +280,306 @@ pub fn rewrite_macro(
}
}
pub fn rewrite_macro_def(
context: &RewriteContext,
indent: Indent,
def: &ast::MacroDef,
ident: ast::Ident,
vis: &ast::Visibility,
span: Span,
) -> Option<String> {
let snippet = Some(remove_trailing_white_spaces(context.snippet(span)));
if def.legacy {
return snippet;
}
let mut parser = MacroParser::new(def.stream().into_trees());
let mut parsed_def = match parser.parse() {
Some(def) => def,
None => return snippet,
};
// Only attempt to format function-like macros.
if parsed_def.branches.len() != 1 || parsed_def.branches[0].args_paren_kind != DelimToken::Paren
{
// FIXME(#1539): implement for non-sugared macros.
return snippet;
}
let branch = parsed_def.branches.remove(0);
let args_str = format_macro_args(branch.args)?;
// The macro body is the most interesting part. It might end up as various
// AST nodes, but also has special variables (e.g, `$foo`) which can't be
// parsed as regular Rust code (and note that these can be escaped using
// `$$`). We'll try and format like an AST node, but we'll substitute
// variables for new names with the same length first.
let body_sp = match branch.body {
Some(sp) => sp,
None => {
// FIXME: should check the single-line empty function option
return Some(format!(
"{}macro {}({}) {{}}\n",
format_visibility(vis),
ident,
args_str,
));
}
};
let old_body = context.snippet(body_sp);
let (body_str, substs) = replace_names(old_body);
// We'll hack the indent below, take this into account when formatting,
let mut config = context.config.clone();
let new_width = config.max_width() - indent.block_indent(&config).width();
config.set().max_width(new_width);
config.set().hide_parse_errors(true);
// First try to format as items, then as statements.
let new_body = match ::format_snippet(&body_str, &config) {
Some(new_body) => new_body,
None => match ::format_code_block(&body_str, &config) {
Some(new_body) => new_body,
None => return snippet,
},
};
// Indent the body since it is in a block.
let indent_str = indent.block_indent(&config).to_string(&config);
let mut new_body = new_body
.lines()
.map(|l| format!("{}{}", indent_str, l))
.collect::<Vec<_>>()
.join("\n");
// Undo our replacement of macro variables.
// FIXME: this could be *much* more efficient.
for (old, new) in substs.iter() {
if old_body.find(new).is_some() {
debug!(
"rewrite_macro_def: bailing matching variable: `{}` in `{}`",
new, ident
);
return snippet;
}
new_body = new_body.replace(new, old);
}
let result = format!(
"{}macro {}({}) {{\n{}\n{}}}",
format_visibility(vis),
ident,
args_str,
new_body,
indent.to_string(&context.config),
);
Some(result)
}
// Replaces `$foo` with `zfoo`. We must check for name overlap to ensure we
// aren't causing problems.
// This should also work for escaped `$` variables, where we leave earlier `$`s.
fn replace_names(input: &str) -> (String, HashMap<String, String>) {
// Each substitution will require five or six extra bytes.
let mut result = String::with_capacity(input.len() + 64);
let mut substs = HashMap::new();
let mut dollar_count = 0;
let mut cur_name = String::new();
for c in input.chars() {
if c == '$' {
dollar_count += 1;
} else if dollar_count == 0 {
result.push(c);
} else if !c.is_alphanumeric() && !cur_name.is_empty() {
// Terminates a name following one or more dollars.
let mut new_name = String::new();
let mut old_name = String::new();
old_name.push('$');
for _ in 0..(dollar_count - 1) {
new_name.push('$');
old_name.push('$');
}
new_name.push('z');
new_name.push_str(&cur_name);
old_name.push_str(&cur_name);
result.push_str(&new_name);
substs.insert(old_name, new_name);
result.push(c);
dollar_count = 0;
cur_name = String::new();
} else if c.is_alphanumeric() {
cur_name.push(c);
}
}
// FIXME: duplicate code
if !cur_name.is_empty() {
let mut new_name = String::new();
let mut old_name = String::new();
old_name.push('$');
for _ in 0..(dollar_count - 1) {
new_name.push('$');
old_name.push('$');
}
new_name.push('z');
new_name.push_str(&cur_name);
old_name.push_str(&cur_name);
result.push_str(&new_name);
substs.insert(old_name, new_name);
}
debug!("replace_names `{}` {:?}", result, substs);
(result, substs)
}
// This is a bit sketchy. The token rules probably need tweaking, but it works
// for some common cases. I hope the basic logic is sufficient. Note that the
// meaning of some tokens is a bit different here from usual Rust, e.g., `*`
// and `(`/`)` have special meaning.
//
// We always try and format on one line.
fn format_macro_args(toks: ThinTokenStream) -> Option<String> {
let mut result = String::with_capacity(128);
let mut insert_space = SpaceState::Never;
for tok in (toks.into(): TokenStream).trees() {
match tok {
TokenTree::Token(_, t) => {
if !result.is_empty() && force_space_before(&t) {
insert_space = SpaceState::Always;
}
if force_no_space_before(&t) {
insert_space = SpaceState::Never;
}
match (insert_space, ident_like(&t)) {
(SpaceState::Always, _)
| (SpaceState::Punctuation, false)
| (SpaceState::Ident, true) => {
result.push(' ');
}
_ => {}
}
result.push_str(&pprust::token_to_string(&t));
insert_space = next_space(&t);
}
TokenTree::Delimited(_, d) => {
let formatted = format_macro_args(d.tts)?;
match insert_space {
SpaceState::Always => {
result.push(' ');
}
_ => {}
}
match d.delim {
DelimToken::Paren => {
result.push_str(&format!("({})", formatted));
insert_space = SpaceState::Always;
}
DelimToken::Bracket => {
result.push_str(&format!("[{}]", formatted));
insert_space = SpaceState::Always;
}
DelimToken::Brace => {
result.push_str(&format!(" {{ {} }}", formatted));
insert_space = SpaceState::Always;
}
DelimToken::NoDelim => {
result.push_str(&format!("{}", formatted));
insert_space = SpaceState::Always;
}
}
}
}
}
Some(result)
}
// We should insert a space if the next token is a:
#[derive(Copy, Clone)]
enum SpaceState {
Never,
Punctuation,
Ident, // Or ident/literal-like thing.
Always,
}
fn force_space_before(tok: &Token) -> bool {
match *tok {
Token::Eq
| Token::Lt
| Token::Le
| Token::EqEq
| Token::Ne
| Token::Ge
| Token::Gt
| Token::AndAnd
| Token::OrOr
| Token::Not
| Token::Tilde
| Token::BinOpEq(_)
| Token::At
| Token::RArrow
| Token::LArrow
| Token::FatArrow
| Token::Pound
| Token::Dollar => true,
Token::BinOp(bot) => bot != BinOpToken::Star,
_ => false,
}
}
fn force_no_space_before(tok: &Token) -> bool {
match *tok {
Token::Semi | Token::Comma | Token::Dot => true,
Token::BinOp(bot) => bot == BinOpToken::Star,
_ => false,
}
}
fn ident_like(tok: &Token) -> bool {
match *tok {
Token::Ident(_) | Token::Literal(..) | Token::Lifetime(_) => true,
_ => false,
}
}
fn next_space(tok: &Token) -> SpaceState {
match *tok {
Token::Not
| Token::Tilde
| Token::At
| Token::Comma
| Token::Dot
| Token::DotDot
| Token::DotDotDot
| Token::DotDotEq
| Token::DotEq
| Token::Question
| Token::Underscore
| Token::BinOp(_) => SpaceState::Punctuation,
Token::ModSep
| Token::Pound
| Token::Dollar
| Token::OpenDelim(_)
| Token::CloseDelim(_)
| Token::Whitespace => SpaceState::Never,
Token::Literal(..) | Token::Ident(_) | Token::Lifetime(_) => SpaceState::Ident,
_ => SpaceState::Always,
}
}
/// Tries to convert a macro use into a short hand try expression. Returns None
/// when the macro is not an instance of try! (or parsing the inner expression
/// failed).
@ -393,3 +695,108 @@ fn get_prefix_space_width(context: &RewriteContext, s: &str) -> usize {
fn is_empty_line(s: &str) -> bool {
s.is_empty() || s.chars().all(char::is_whitespace)
}
// A very simple parser that just parses a macros 2.0 definition into its branches.
// Currently we do not attempt to parse any further than that.
#[derive(new)]
struct MacroParser {
toks: Cursor,
}
impl MacroParser {
// (`(` ... `)` `=>` `{` ... `}`)*
fn parse(&mut self) -> Option<Macro> {
let mut branches = vec![];
while self.toks.look_ahead(1).is_some() {
branches.push(self.parse_branch()?);
}
Some(Macro { branches })
}
// `(` ... `)` `=>` `{` ... `}`
fn parse_branch(&mut self) -> Option<MacroBranch> {
let (args_paren_kind, args) = match self.toks.next()? {
TokenTree::Token(..) => return None,
TokenTree::Delimited(_, ref d) => (d.delim, d.tts.clone().into()),
};
match self.toks.next()? {
TokenTree::Token(_, Token::FatArrow) => {}
_ => return None,
}
let body = match self.toks.next()? {
TokenTree::Token(..) => return None,
TokenTree::Delimited(_, ref d) => inner_span(d.tts.clone().into()),
};
Some(MacroBranch {
args,
args_paren_kind,
body,
})
}
}
fn inner_span(ts: TokenStream) -> Option<Span> {
let mut cursor = ts.trees();
let first = match cursor.next() {
Some(t) => t.span(),
None => return None,
};
let last = match cursor.last() {
Some(t) => t.span(),
None => return Some(first),
};
Some(first.to(last))
}
// A parsed macros 2.0 macro definition.
struct Macro {
branches: Vec<MacroBranch>,
}
// FIXME: it would be more efficient to use references to the token streams
// rather than clone them, if we can make the borrowing work out.
struct MacroBranch {
args: ThinTokenStream,
args_paren_kind: DelimToken,
body: Option<Span>,
}
#[cfg(test)]
mod test {
use super::*;
use syntax::parse::{parse_stream_from_source_str, ParseSess};
use syntax::codemap::{FileName, FilePathMapping};
fn format_macro_args_str(s: &str) -> String {
let input = parse_stream_from_source_str(
FileName::Custom("stdin".to_owned()),
s.to_owned(),
&ParseSess::new(FilePathMapping::empty()),
None,
);
format_macro_args(input.into()).unwrap()
}
#[test]
fn test_format_macro_args() {
assert_eq!(format_macro_args_str(""), "".to_owned());
assert_eq!(format_macro_args_str("$ x : ident"), "$x: ident".to_owned());
assert_eq!(
format_macro_args_str("$ m1 : ident , $ m2 : ident , $ x : ident"),
"$m1: ident, $m2: ident, $x: ident".to_owned()
);
assert_eq!(
format_macro_args_str("$($beginning:ident),*;$middle:ident;$($end:ident),*"),
"$($beginning: ident),*; $middle: ident; $($end: ident),*".to_owned()
);
assert_eq!(
format_macro_args_str(
"$ name : ident ( $ ( $ dol : tt $ var : ident ) * ) $ ( $ body : tt ) *"
),
"$name: ident($($dol: tt $var: ident)*) $($body: tt)*".to_owned()
);
}
}

View File

@ -16,8 +16,8 @@ use syntax::codemap::{self, BytePos, CodeMap, Pos, Span};
use syntax::parse::ParseSess;
use codemap::{LineRangeUtils, SpanUtils};
use comment::{combine_strs_with_missing_comments, contains_comment, remove_trailing_white_spaces,
CodeCharKind, CommentCodeSlices, FindUncommented};
use comment::{combine_strs_with_missing_comments, contains_comment, CodeCharKind,
CommentCodeSlices, FindUncommented};
use comment::rewrite_comment;
use config::{BraceStyle, Config};
use expr::rewrite_literal;
@ -25,7 +25,7 @@ use items::{format_impl, format_trait, format_trait_alias, rewrite_associated_im
rewrite_associated_type, rewrite_type_alias, FnSig, StaticParts, StructParts};
use lists::{itemize_list, write_list, DefinitiveListTactic, ListFormatting, SeparatorPlace,
SeparatorTactic};
use macros::{rewrite_macro, MacroPosition};
use macros::{rewrite_macro, rewrite_macro_def, MacroPosition};
use regex::Regex;
use rewrite::{Rewrite, RewriteContext};
use shape::{Indent, Shape};
@ -439,10 +439,16 @@ impl<'b, 'a: 'b> FmtVisitor<'a> {
let snippet = Some(self.snippet(item.span).to_owned());
self.push_rewrite(item.span, snippet);
}
ast::ItemKind::MacroDef(..) => {
// FIXME(#1539): macros 2.0
let mac_snippet = Some(remove_trailing_white_spaces(self.snippet(item.span)));
self.push_rewrite(item.span, mac_snippet);
ast::ItemKind::MacroDef(ref def) => {
let rewrite = rewrite_macro_def(
&self.get_context(),
self.block_indent,
def,
item.ident,
&item.vis,
item.span,
);
self.push_rewrite(item.span, rewrite);
}
}
}