Move license template parsing into config phase

This commit is contained in:
David Lukes 2018-02-19 17:26:29 +01:00
parent d012d52b4d
commit ad76741bca
3 changed files with 238 additions and 146 deletions

View File

@ -78,6 +78,9 @@ macro_rules! create_config {
#[derive(Clone)]
pub struct Config {
// if a license_template_path has been specified, successfully read, parsed and compiled
// into a regex, it will be stored here
pub license_template: Option<Regex>,
// For each config item, we store a bool indicating whether it has
// been accessed and the value, and a bool whether the option was
// manually initialised, or taken from the default,
@ -118,8 +121,10 @@ macro_rules! create_config {
$(
pub fn $i(&mut self, value: $ty) {
(self.0).$i.2 = value;
if stringify!($i) == "use_small_heuristics" {
self.0.set_heuristics();
match stringify!($i) {
"use_small_heuristics" => self.0.set_heuristics(),
"license_template_path" => self.0.set_license_template(),
&_ => (),
}
}
)+
@ -189,6 +194,7 @@ macro_rules! create_config {
}
)+
self.set_heuristics();
self.set_license_template();
self
}
@ -276,8 +282,10 @@ macro_rules! create_config {
_ => panic!("Unknown config key in override: {}", key)
}
if key == "use_small_heuristics" {
self.set_heuristics();
match key {
"use_small_heuristics" => self.set_heuristics(),
"license_template_path" => self.set_license_template(),
&_ => (),
}
}
@ -382,12 +390,50 @@ macro_rules! create_config {
self.set().width_heuristics(WidthHeuristics::null());
}
}
fn set_license_template(&mut self) {
let license_template_path = self.license_template_path();
let mut license_template_file = match File::open(&license_template_path) {
Ok(file) => file,
Err(e) => {
eprintln!("Warning: unable to open license template file {:?}: {}",
license_template_path, e);
return;
}
};
let mut license_template_str = String::new();
match license_template_file.read_to_string(&mut license_template_str) {
Ok(_) => (),
Err(e) => {
eprintln!("Warning: unable to read from license template file {:?}: {}",
license_template_path, e);
return;
}
}
let license_template_parsed = match parse_license_template(&license_template_str) {
Ok(string) => string,
Err(e) => {
eprintln!("Warning: unable to parse license template file {:?}: {}",
license_template_path, e);
return;
}
};
self.license_template = match Regex::new(&license_template_parsed) {
Ok(re) => Some(re),
Err(e) => {
eprintln!("Warning: regex syntax error in placeholder, unable to compile \
license template from file {:?}: {}", license_template_path, e);
return;
}
}
}
}
// Template for the default configuration
impl Default for Config {
fn default() -> Config {
Config {
license_template: None,
$(
$i: (Cell::new(false), false, $def, $stb),
)+

View File

@ -15,6 +15,8 @@ use std::fs::File;
use std::io::{Error, ErrorKind, Read};
use std::path::{Path, PathBuf};
use regex::Regex;
#[macro_use]
mod config_type;
#[macro_use]
@ -50,7 +52,7 @@ create_config! {
comment_width: usize, 80, false,
"Maximum length of comments. No effect unless wrap_comments = true";
normalize_comments: bool, false, true, "Convert /* */ comments to // comments where possible";
license_template: String, String::default(), false, "Check for license";
license_template_path: String, String::default(), false, "Beginning of file must match license template";
// Single line expressions and items.
empty_item_single_line: bool, true, false,
@ -172,9 +174,145 @@ pub fn get_toml_path(dir: &Path) -> Result<Option<PathBuf>, Error> {
Ok(None)
}
/// Convert the license template into a string which can be turned into a regex.
///
/// The license template could use regex syntax directly, but that would require a lot of manual
/// escaping, which is inconvenient. It is therefore literal by default, with optional regex
/// subparts delimited by `{` and `}`. Additionally:
///
/// - to insert literal `{`, `}` or `\`, escape it with `\`
/// - an empty regex placeholder (`{}`) is shorthand for `{.*?}`
///
/// This function parses this input format and builds a properly escaped *string* representation of
/// the equivalent regular expression. It **does not** however guarantee that the returned string is
/// a syntactically valid regular expression.
///
/// # Examples
///
/// ```
/// assert_eq!(
/// rustfmt_config::parse_license_template(
/// r"
/// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)}
/// // file at the top-level directory of this distribution and at
/// // {}.
/// //
/// // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
/// // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
/// // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
/// // option. This file may not be copied, modified, or distributed
/// // except according to those terms.
/// "
/// ).unwrap(),
/// r"^
/// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+)
/// // file at the top\-level directory of this distribution and at
/// // .*?\.
/// //
/// // Licensed under the Apache License, Version 2\.0 <LICENSE\-APACHE or
/// // http://www\.apache\.org/licenses/LICENSE\-2\.0> or the MIT license
/// // <LICENSE\-MIT or http://opensource\.org/licenses/MIT>, at your
/// // option\. This file may not be copied, modified, or distributed
/// // except according to those terms\.
/// "
/// );
/// ```
pub fn parse_license_template(template: &str) -> Result<String, String> {
// the template is parsed using a state machine
enum State {
Lit,
LitEsc,
// the u32 keeps track of brace nesting
Re(u32),
ReEsc(u32),
}
let mut parsed = String::from("^");
let mut buffer = String::new();
let mut state = State::Lit;
let mut linum = 1;
// keeps track of last line on which a regex placeholder was started
let mut open_brace_line = 0;
for chr in template.chars() {
if chr == '\n' {
linum += 1;
}
state = match state {
State::Lit => match chr {
'{' => {
parsed.push_str(&regex::escape(&buffer));
buffer.clear();
open_brace_line = linum;
State::Re(1)
}
'}' => return Err(format!("escape or balance closing brace on l. {}", linum)),
'\\' => State::LitEsc,
_ => {
buffer.push(chr);
State::Lit
}
},
State::LitEsc => {
buffer.push(chr);
State::Lit
}
State::Re(brace_nesting) => {
match chr {
'{' => {
buffer.push(chr);
State::Re(brace_nesting + 1)
}
'}' => {
match brace_nesting {
1 => {
// default regex for empty placeholder {}
if buffer.is_empty() {
buffer = ".*?".to_string();
}
parsed.push_str(&buffer);
buffer.clear();
State::Lit
}
_ => {
buffer.push(chr);
State::Re(brace_nesting - 1)
}
}
}
'\\' => {
buffer.push(chr);
State::ReEsc(brace_nesting)
}
_ => {
buffer.push(chr);
State::Re(brace_nesting)
}
}
}
State::ReEsc(brace_nesting) => {
buffer.push(chr);
State::Re(brace_nesting)
}
}
}
match state {
State::Re(_) | State::ReEsc(_) => {
return Err(format!(
"escape or balance opening brace on l. {}",
open_brace_line
));
}
State::LitEsc => return Err(format!("incomplete escape sequence on l. {}", linum)),
_ => (),
}
parsed.push_str(&regex::escape(&buffer));
Ok(parsed)
}
#[cfg(test)]
mod test {
use super::Config;
use super::{parse_license_template, Config};
#[test]
fn test_config_set() {
@ -211,6 +349,43 @@ mod test {
assert_eq!(config.was_set().verbose(), false);
}
#[test]
fn test_parse_license_template() {
assert_eq!(
parse_license_template("literal (.*)").unwrap(),
r"^literal \(\.\*\)"
);
assert_eq!(
parse_license_template(r"escaping \}").unwrap(),
r"^escaping \}"
);
assert!(parse_license_template("unbalanced } without escape").is_err());
assert_eq!(
parse_license_template(r"{\d+} place{-?}holder{s?}").unwrap(),
r"^\d+ place-?holders?"
);
assert_eq!(
parse_license_template("default {}").unwrap(),
"^default .*?"
);
assert_eq!(
parse_license_template(r"unbalanced nested braces {\{{3}}").unwrap(),
r"^unbalanced nested braces \{{3}"
);
assert_eq!(
parse_license_template("parsing error }").unwrap_err(),
"escape or balance closing brace on l. 1"
);
assert_eq!(
parse_license_template("parsing error {\nsecond line").unwrap_err(),
"escape or balance opening brace on l. 1"
);
assert_eq!(
parse_license_template(r"parsing error \").unwrap_err(),
"incomplete escape sequence on l. 1"
);
}
// FIXME(#2183) these tests cannot be run in parallel because they use env vars
// #[test]
// fn test_as_not_nightly_channel() {

View File

@ -43,7 +43,6 @@ use syntax::ast;
use syntax::codemap::{CodeMap, FilePathMapping};
pub use syntax::codemap::FileName;
use syntax::parse::{self, ParseSess};
use regex::Regex;
use checkstyle::{output_footer, output_header};
use comment::{CharClasses, FullCodeCharKind};
@ -102,8 +101,6 @@ pub enum ErrorKind {
BadIssue(Issue),
// License check has failed
LicenseCheck,
// License template could not be parsed
ParsingLicense,
}
impl fmt::Display for ErrorKind {
@ -117,7 +114,6 @@ impl fmt::Display for ErrorKind {
ErrorKind::TrailingWhitespace => write!(fmt, "left behind trailing whitespace"),
ErrorKind::BadIssue(issue) => write!(fmt, "found {}", issue),
ErrorKind::LicenseCheck => write!(fmt, "license check failed"),
ErrorKind::ParsingLicense => write!(fmt, "parsing regex in license template failed"),
}
}
}
@ -136,8 +132,7 @@ impl FormattingError {
match self.kind {
ErrorKind::LineOverflow(..)
| ErrorKind::TrailingWhitespace
| ErrorKind::LicenseCheck
| ErrorKind::ParsingLicense => "error:",
| ErrorKind::LicenseCheck => "error:",
ErrorKind::BadIssue(_) => "WARNING:",
}
}
@ -415,82 +410,6 @@ fn should_report_error(
}
}
fn check_license(text: &str, license_template: &str) -> Result<bool, regex::Error> {
// the template is parsed using a state machine
enum State {
Lit,
LitEsc,
// the u32 keeps track of brace nesting
Re(u32),
ReEsc(u32),
}
let mut template_re = String::from("^");
let mut buffer = String::new();
let mut state = State::Lit;
for chr in license_template.chars() {
state = match state {
State::Lit => match chr {
'{' => {
template_re.push_str(&regex::escape(&buffer));
buffer.clear();
State::Re(1)
}
'}' => panic!("license template syntax error"),
'\\' => State::LitEsc,
_ => {
buffer.push(chr);
State::Lit
}
},
State::LitEsc => {
buffer.push(chr);
State::Lit
}
State::Re(brace_nesting) => {
match chr {
'{' => {
buffer.push(chr);
State::Re(brace_nesting + 1)
}
'}' => {
match brace_nesting {
1 => {
// default regex for empty placeholder {}
if buffer.is_empty() {
buffer = ".*?".to_string();
}
template_re.push_str(&buffer);
buffer.clear();
State::Lit
}
_ => {
buffer.push(chr);
State::Re(brace_nesting - 1)
}
}
}
'\\' => {
buffer.push(chr);
State::ReEsc(brace_nesting)
}
_ => {
buffer.push(chr);
State::Re(brace_nesting)
}
}
}
State::ReEsc(brace_nesting) => {
buffer.push(chr);
State::Re(brace_nesting)
}
}
}
template_re.push_str(&regex::escape(&buffer));
let template_re = Regex::new(&template_re)?;
Ok(template_re.is_match(text))
}
// Formatting done on a char by char or line by line basis.
// FIXME(#20) other stuff for parity with make tidy
fn format_lines(
@ -513,28 +432,15 @@ fn format_lines(
let allow_issue_seek = !issue_seeker.is_disabled();
// Check license.
if config.was_set().license_template() {
match check_license(text, &config.license_template()) {
Ok(check) => {
if !check {
errors.push(FormattingError {
line: cur_line,
kind: ErrorKind::LicenseCheck,
is_comment: false,
is_string: false,
line_buffer: String::new(),
});
}
}
Err(_) => {
errors.push(FormattingError {
line: cur_line,
kind: ErrorKind::ParsingLicense,
is_comment: false,
is_string: false,
line_buffer: String::new(),
});
}
if let Some(ref license_template) = config.license_template {
if !license_template.is_match(text) {
errors.push(FormattingError {
line: cur_line,
kind: ErrorKind::LicenseCheck,
is_comment: false,
is_string: false,
line_buffer: String::new(),
});
}
}
@ -964,7 +870,7 @@ pub fn run(input: Input, config: &Config) -> Summary {
#[cfg(test)]
mod test {
use super::{check_license, format_code_block, format_snippet, Config};
use super::{format_code_block, format_snippet, Config};
#[test]
fn test_no_panic_on_format_snippet_and_format_code_block() {
@ -1050,39 +956,4 @@ false,
};";
assert!(test_format_inner(format_code_block, code_block, expected));
}
#[test]
fn test_check_license() {
assert!(check_license("literal matching", "literal matching").unwrap());
assert!(!check_license("literal no match", "literal matching").unwrap());
assert!(
check_license(
"Regex start and end: 2018",
r"{[Rr]egex} start {} end: {\d+}"
).unwrap()
);
assert!(!check_license(
"Regex start and end no match: 2018",
r"{[Rr]egex} start {} end: {\d+}"
).unwrap());
assert!(
check_license(
"Regex in the middle: 2018 (tm)",
r"Regex {} middle: {\d+} (tm)"
).unwrap()
);
assert!(!check_license(
"Regex in the middle no match: 2018 (tm)",
r"Regex {} middle: {\d+} (tm)"
).unwrap());
assert!(!check_license("default doesn't match\nacross lines", "default {} lines").unwrap());
assert!(check_license("", "this is not a valid {[regex}").is_err());
assert!(
check_license(
"parse unbalanced nested delimiters{{{",
r"parse unbalanced nested delimiters{\{{3}}"
).unwrap()
);
assert!(check_license("escaping }", r"escaping \}").unwrap());
}
}