From ad76741bca51d169d0e167223435bcc08fc2a3af Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 19 Feb 2018 17:26:29 +0100 Subject: [PATCH] Move license template parsing into config phase --- src/config/config_type.rs | 54 +++++++++++- src/config/mod.rs | 179 +++++++++++++++++++++++++++++++++++++- src/lib.rs | 151 +++----------------------------- 3 files changed, 238 insertions(+), 146 deletions(-) diff --git a/src/config/config_type.rs b/src/config/config_type.rs index 950225679a5..02e9b2d1078 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -78,6 +78,9 @@ macro_rules! create_config { #[derive(Clone)] pub struct Config { + // if a license_template_path has been specified, successfully read, parsed and compiled + // into a regex, it will be stored here + pub license_template: Option, // For each config item, we store a bool indicating whether it has // been accessed and the value, and a bool whether the option was // manually initialised, or taken from the default, @@ -118,8 +121,10 @@ macro_rules! create_config { $( pub fn $i(&mut self, value: $ty) { (self.0).$i.2 = value; - if stringify!($i) == "use_small_heuristics" { - self.0.set_heuristics(); + match stringify!($i) { + "use_small_heuristics" => self.0.set_heuristics(), + "license_template_path" => self.0.set_license_template(), + &_ => (), } } )+ @@ -189,6 +194,7 @@ macro_rules! create_config { } )+ self.set_heuristics(); + self.set_license_template(); self } @@ -276,8 +282,10 @@ macro_rules! create_config { _ => panic!("Unknown config key in override: {}", key) } - if key == "use_small_heuristics" { - self.set_heuristics(); + match key { + "use_small_heuristics" => self.set_heuristics(), + "license_template_path" => self.set_license_template(), + &_ => (), } } @@ -382,12 +390,50 @@ macro_rules! create_config { self.set().width_heuristics(WidthHeuristics::null()); } } + + fn set_license_template(&mut self) { + let license_template_path = self.license_template_path(); + let mut license_template_file = match File::open(&license_template_path) { + Ok(file) => file, + Err(e) => { + eprintln!("Warning: unable to open license template file {:?}: {}", + license_template_path, e); + return; + } + }; + let mut license_template_str = String::new(); + match license_template_file.read_to_string(&mut license_template_str) { + Ok(_) => (), + Err(e) => { + eprintln!("Warning: unable to read from license template file {:?}: {}", + license_template_path, e); + return; + } + } + let license_template_parsed = match parse_license_template(&license_template_str) { + Ok(string) => string, + Err(e) => { + eprintln!("Warning: unable to parse license template file {:?}: {}", + license_template_path, e); + return; + } + }; + self.license_template = match Regex::new(&license_template_parsed) { + Ok(re) => Some(re), + Err(e) => { + eprintln!("Warning: regex syntax error in placeholder, unable to compile \ + license template from file {:?}: {}", license_template_path, e); + return; + } + } + } } // Template for the default configuration impl Default for Config { fn default() -> Config { Config { + license_template: None, $( $i: (Cell::new(false), false, $def, $stb), )+ diff --git a/src/config/mod.rs b/src/config/mod.rs index c16d5bb6799..53078716414 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -15,6 +15,8 @@ use std::fs::File; use std::io::{Error, ErrorKind, Read}; use std::path::{Path, PathBuf}; +use regex::Regex; + #[macro_use] mod config_type; #[macro_use] @@ -50,7 +52,7 @@ create_config! { comment_width: usize, 80, false, "Maximum length of comments. No effect unless wrap_comments = true"; normalize_comments: bool, false, true, "Convert /* */ comments to // comments where possible"; - license_template: String, String::default(), false, "Check for license"; + license_template_path: String, String::default(), false, "Beginning of file must match license template"; // Single line expressions and items. empty_item_single_line: bool, true, false, @@ -172,9 +174,145 @@ pub fn get_toml_path(dir: &Path) -> Result, Error> { Ok(None) } +/// Convert the license template into a string which can be turned into a regex. +/// +/// The license template could use regex syntax directly, but that would require a lot of manual +/// escaping, which is inconvenient. It is therefore literal by default, with optional regex +/// subparts delimited by `{` and `}`. Additionally: +/// +/// - to insert literal `{`, `}` or `\`, escape it with `\` +/// - an empty regex placeholder (`{}`) is shorthand for `{.*?}` +/// +/// This function parses this input format and builds a properly escaped *string* representation of +/// the equivalent regular expression. It **does not** however guarantee that the returned string is +/// a syntactically valid regular expression. +/// +/// # Examples +/// +/// ``` +/// assert_eq!( +/// rustfmt_config::parse_license_template( +/// r" +/// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)} +/// // file at the top-level directory of this distribution and at +/// // {}. +/// // +/// // Licensed under the Apache License, Version 2.0 or the MIT license +/// // , at your +/// // option. This file may not be copied, modified, or distributed +/// // except according to those terms. +/// " +/// ).unwrap(), +/// r"^ +/// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+) +/// // file at the top\-level directory of this distribution and at +/// // .*?\. +/// // +/// // Licensed under the Apache License, Version 2\.0 or the MIT license +/// // , at your +/// // option\. This file may not be copied, modified, or distributed +/// // except according to those terms\. +/// " +/// ); +/// ``` +pub fn parse_license_template(template: &str) -> Result { + // the template is parsed using a state machine + enum State { + Lit, + LitEsc, + // the u32 keeps track of brace nesting + Re(u32), + ReEsc(u32), + } + + let mut parsed = String::from("^"); + let mut buffer = String::new(); + let mut state = State::Lit; + let mut linum = 1; + // keeps track of last line on which a regex placeholder was started + let mut open_brace_line = 0; + for chr in template.chars() { + if chr == '\n' { + linum += 1; + } + state = match state { + State::Lit => match chr { + '{' => { + parsed.push_str(®ex::escape(&buffer)); + buffer.clear(); + open_brace_line = linum; + State::Re(1) + } + '}' => return Err(format!("escape or balance closing brace on l. {}", linum)), + '\\' => State::LitEsc, + _ => { + buffer.push(chr); + State::Lit + } + }, + State::LitEsc => { + buffer.push(chr); + State::Lit + } + State::Re(brace_nesting) => { + match chr { + '{' => { + buffer.push(chr); + State::Re(brace_nesting + 1) + } + '}' => { + match brace_nesting { + 1 => { + // default regex for empty placeholder {} + if buffer.is_empty() { + buffer = ".*?".to_string(); + } + parsed.push_str(&buffer); + buffer.clear(); + State::Lit + } + _ => { + buffer.push(chr); + State::Re(brace_nesting - 1) + } + } + } + '\\' => { + buffer.push(chr); + State::ReEsc(brace_nesting) + } + _ => { + buffer.push(chr); + State::Re(brace_nesting) + } + } + } + State::ReEsc(brace_nesting) => { + buffer.push(chr); + State::Re(brace_nesting) + } + } + } + match state { + State::Re(_) | State::ReEsc(_) => { + return Err(format!( + "escape or balance opening brace on l. {}", + open_brace_line + )); + } + State::LitEsc => return Err(format!("incomplete escape sequence on l. {}", linum)), + _ => (), + } + parsed.push_str(®ex::escape(&buffer)); + + Ok(parsed) +} + #[cfg(test)] mod test { - use super::Config; + use super::{parse_license_template, Config}; #[test] fn test_config_set() { @@ -211,6 +349,43 @@ mod test { assert_eq!(config.was_set().verbose(), false); } + #[test] + fn test_parse_license_template() { + assert_eq!( + parse_license_template("literal (.*)").unwrap(), + r"^literal \(\.\*\)" + ); + assert_eq!( + parse_license_template(r"escaping \}").unwrap(), + r"^escaping \}" + ); + assert!(parse_license_template("unbalanced } without escape").is_err()); + assert_eq!( + parse_license_template(r"{\d+} place{-?}holder{s?}").unwrap(), + r"^\d+ place-?holders?" + ); + assert_eq!( + parse_license_template("default {}").unwrap(), + "^default .*?" + ); + assert_eq!( + parse_license_template(r"unbalanced nested braces {\{{3}}").unwrap(), + r"^unbalanced nested braces \{{3}" + ); + assert_eq!( + parse_license_template("parsing error }").unwrap_err(), + "escape or balance closing brace on l. 1" + ); + assert_eq!( + parse_license_template("parsing error {\nsecond line").unwrap_err(), + "escape or balance opening brace on l. 1" + ); + assert_eq!( + parse_license_template(r"parsing error \").unwrap_err(), + "incomplete escape sequence on l. 1" + ); + } + // FIXME(#2183) these tests cannot be run in parallel because they use env vars // #[test] // fn test_as_not_nightly_channel() { diff --git a/src/lib.rs b/src/lib.rs index 6771a2ab794..e53f8bfb907 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,7 +43,6 @@ use syntax::ast; use syntax::codemap::{CodeMap, FilePathMapping}; pub use syntax::codemap::FileName; use syntax::parse::{self, ParseSess}; -use regex::Regex; use checkstyle::{output_footer, output_header}; use comment::{CharClasses, FullCodeCharKind}; @@ -102,8 +101,6 @@ pub enum ErrorKind { BadIssue(Issue), // License check has failed LicenseCheck, - // License template could not be parsed - ParsingLicense, } impl fmt::Display for ErrorKind { @@ -117,7 +114,6 @@ impl fmt::Display for ErrorKind { ErrorKind::TrailingWhitespace => write!(fmt, "left behind trailing whitespace"), ErrorKind::BadIssue(issue) => write!(fmt, "found {}", issue), ErrorKind::LicenseCheck => write!(fmt, "license check failed"), - ErrorKind::ParsingLicense => write!(fmt, "parsing regex in license template failed"), } } } @@ -136,8 +132,7 @@ impl FormattingError { match self.kind { ErrorKind::LineOverflow(..) | ErrorKind::TrailingWhitespace - | ErrorKind::LicenseCheck - | ErrorKind::ParsingLicense => "error:", + | ErrorKind::LicenseCheck => "error:", ErrorKind::BadIssue(_) => "WARNING:", } } @@ -415,82 +410,6 @@ fn should_report_error( } } -fn check_license(text: &str, license_template: &str) -> Result { - // the template is parsed using a state machine - enum State { - Lit, - LitEsc, - // the u32 keeps track of brace nesting - Re(u32), - ReEsc(u32), - } - - let mut template_re = String::from("^"); - let mut buffer = String::new(); - let mut state = State::Lit; - for chr in license_template.chars() { - state = match state { - State::Lit => match chr { - '{' => { - template_re.push_str(®ex::escape(&buffer)); - buffer.clear(); - State::Re(1) - } - '}' => panic!("license template syntax error"), - '\\' => State::LitEsc, - _ => { - buffer.push(chr); - State::Lit - } - }, - State::LitEsc => { - buffer.push(chr); - State::Lit - } - State::Re(brace_nesting) => { - match chr { - '{' => { - buffer.push(chr); - State::Re(brace_nesting + 1) - } - '}' => { - match brace_nesting { - 1 => { - // default regex for empty placeholder {} - if buffer.is_empty() { - buffer = ".*?".to_string(); - } - template_re.push_str(&buffer); - buffer.clear(); - State::Lit - } - _ => { - buffer.push(chr); - State::Re(brace_nesting - 1) - } - } - } - '\\' => { - buffer.push(chr); - State::ReEsc(brace_nesting) - } - _ => { - buffer.push(chr); - State::Re(brace_nesting) - } - } - } - State::ReEsc(brace_nesting) => { - buffer.push(chr); - State::Re(brace_nesting) - } - } - } - template_re.push_str(®ex::escape(&buffer)); - let template_re = Regex::new(&template_re)?; - Ok(template_re.is_match(text)) -} - // Formatting done on a char by char or line by line basis. // FIXME(#20) other stuff for parity with make tidy fn format_lines( @@ -513,28 +432,15 @@ fn format_lines( let allow_issue_seek = !issue_seeker.is_disabled(); // Check license. - if config.was_set().license_template() { - match check_license(text, &config.license_template()) { - Ok(check) => { - if !check { - errors.push(FormattingError { - line: cur_line, - kind: ErrorKind::LicenseCheck, - is_comment: false, - is_string: false, - line_buffer: String::new(), - }); - } - } - Err(_) => { - errors.push(FormattingError { - line: cur_line, - kind: ErrorKind::ParsingLicense, - is_comment: false, - is_string: false, - line_buffer: String::new(), - }); - } + if let Some(ref license_template) = config.license_template { + if !license_template.is_match(text) { + errors.push(FormattingError { + line: cur_line, + kind: ErrorKind::LicenseCheck, + is_comment: false, + is_string: false, + line_buffer: String::new(), + }); } } @@ -964,7 +870,7 @@ pub fn run(input: Input, config: &Config) -> Summary { #[cfg(test)] mod test { - use super::{check_license, format_code_block, format_snippet, Config}; + use super::{format_code_block, format_snippet, Config}; #[test] fn test_no_panic_on_format_snippet_and_format_code_block() { @@ -1050,39 +956,4 @@ false, };"; assert!(test_format_inner(format_code_block, code_block, expected)); } - - #[test] - fn test_check_license() { - assert!(check_license("literal matching", "literal matching").unwrap()); - assert!(!check_license("literal no match", "literal matching").unwrap()); - assert!( - check_license( - "Regex start and end: 2018", - r"{[Rr]egex} start {} end: {\d+}" - ).unwrap() - ); - assert!(!check_license( - "Regex start and end no match: 2018", - r"{[Rr]egex} start {} end: {\d+}" - ).unwrap()); - assert!( - check_license( - "Regex in the middle: 2018 (tm)", - r"Regex {} middle: {\d+} (tm)" - ).unwrap() - ); - assert!(!check_license( - "Regex in the middle no match: 2018 (tm)", - r"Regex {} middle: {\d+} (tm)" - ).unwrap()); - assert!(!check_license("default doesn't match\nacross lines", "default {} lines").unwrap()); - assert!(check_license("", "this is not a valid {[regex}").is_err()); - assert!( - check_license( - "parse unbalanced nested delimiters{{{", - r"parse unbalanced nested delimiters{\{{3}}" - ).unwrap() - ); - assert!(check_license("escaping }", r"escaping \}").unwrap()); - } }