diff --git a/rustfmt-config/src/license.rs b/rustfmt-config/src/license.rs deleted file mode 100644 index 4563f8a7809..00000000000 --- a/rustfmt-config/src/license.rs +++ /dev/null @@ -1,174 +0,0 @@ -use regex; - -/// Convert the license template into a string which can be turned into a regex. -/// -/// The license template could use regex syntax directly, but that would require a lot of manual -/// escaping, which is inconvenient. It is therefore literal by default, with optional regex -/// subparts delimited by `{` and `}`. Additionally: -/// -/// - to insert literal `{`, `}` or `\`, escape it with `\` -/// - an empty regex placeholder (`{}`) is shorthand for `{.*?}` -/// -/// This function parses this input format and builds a properly escaped *string* representation of -/// the equivalent regular expression. It **does not** however guarantee that the returned string is -/// a syntactically valid regular expression. -/// -/// # Examples -/// -/// ``` -/// # use rustfmt_config::license; -/// assert_eq!( -/// license::parse_template( -/// r" -/// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)} -/// // file at the top-level directory of this distribution and at -/// // {}. -/// // -/// // Licensed under the Apache License, Version 2.0 or the MIT license -/// // , at your -/// // option. This file may not be copied, modified, or distributed -/// // except according to those terms. -/// " -/// ).unwrap(), -/// r"^ -/// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+) -/// // file at the top\-level directory of this distribution and at -/// // .*?\. -/// // -/// // Licensed under the Apache License, Version 2\.0 or the MIT license -/// // , at your -/// // option\. This file may not be copied, modified, or distributed -/// // except according to those terms\. -/// " -/// ); -/// ``` -pub fn parse_template(template: &str) -> Result { - // the template is parsed using a state machine - enum State { - Lit, - LitEsc, - // the u32 keeps track of brace nesting - Re(u32), - ReEsc(u32), - } - - let mut parsed = String::from("^"); - let mut buffer = String::new(); - let mut state = State::Lit; - let mut linum = 1; - // keeps track of last line on which a regex placeholder was started - let mut open_brace_line = 0; - for chr in template.chars() { - if chr == '\n' { - linum += 1; - } - state = match state { - State::Lit => match chr { - '{' => { - parsed.push_str(®ex::escape(&buffer)); - buffer.clear(); - open_brace_line = linum; - State::Re(1) - } - '}' => return Err(format!("escape or balance closing brace on l. {}", linum)), - '\\' => State::LitEsc, - _ => { - buffer.push(chr); - State::Lit - } - }, - State::LitEsc => { - buffer.push(chr); - State::Lit - } - State::Re(brace_nesting) => { - match chr { - '{' => { - buffer.push(chr); - State::Re(brace_nesting + 1) - } - '}' => { - match brace_nesting { - 1 => { - // default regex for empty placeholder {} - if buffer.is_empty() { - buffer = ".*?".to_string(); - } - parsed.push_str(&buffer); - buffer.clear(); - State::Lit - } - _ => { - buffer.push(chr); - State::Re(brace_nesting - 1) - } - } - } - '\\' => { - buffer.push(chr); - State::ReEsc(brace_nesting) - } - _ => { - buffer.push(chr); - State::Re(brace_nesting) - } - } - } - State::ReEsc(brace_nesting) => { - buffer.push(chr); - State::Re(brace_nesting) - } - } - } - match state { - State::Re(_) | State::ReEsc(_) => { - return Err(format!( - "escape or balance opening brace on l. {}", - open_brace_line - )); - } - State::LitEsc => return Err(format!("incomplete escape sequence on l. {}", linum)), - _ => (), - } - parsed.push_str(®ex::escape(&buffer)); - - Ok(parsed) -} - -#[cfg(test)] -mod test { - use super::parse_template; - - #[test] - fn test_parse_license_template() { - assert_eq!( - parse_template("literal (.*)").unwrap(), - r"^literal \(\.\*\)" - ); - assert_eq!(parse_template(r"escaping \}").unwrap(), r"^escaping \}"); - assert!(parse_template("unbalanced } without escape").is_err()); - assert_eq!( - parse_template(r"{\d+} place{-?}holder{s?}").unwrap(), - r"^\d+ place-?holders?" - ); - assert_eq!(parse_template("default {}").unwrap(), "^default .*?"); - assert_eq!( - parse_template(r"unbalanced nested braces {\{{3}}").unwrap(), - r"^unbalanced nested braces \{{3}" - ); - assert_eq!( - parse_template("parsing error }").unwrap_err(), - "escape or balance closing brace on l. 1" - ); - assert_eq!( - parse_template("parsing error {\nsecond line").unwrap_err(), - "escape or balance opening brace on l. 1" - ); - assert_eq!( - parse_template(r"parsing error \").unwrap_err(), - "incomplete escape sequence on l. 1" - ); - } -} diff --git a/src/config/config_type.rs b/src/config/config_type.rs index fe0e4c309e2..8b9a6b2d84d 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -408,7 +408,7 @@ macro_rules! create_config { license_template_path, e); return; }; - let license_template_parsed = match license::parse_template(&license_template_str) { + let license_template_parsed = match TemplateParser::parse(&license_template_str) { Ok(string) => string, Err(e) => { eprintln!("Warning: unable to parse license template file {:?}: {}", diff --git a/src/config/license.rs b/src/config/license.rs new file mode 100644 index 00000000000..3de04599129 --- /dev/null +++ b/src/config/license.rs @@ -0,0 +1,213 @@ +use regex; + +// the template is parsed using a state machine +enum ParsingState { + Lit, + LitEsc, + // the u32 keeps track of brace nesting + Re(u32), + ReEsc(u32), + Abort(String), +} + +use self::ParsingState::*; + +pub struct TemplateParser { + parsed: String, + buffer: String, + state: ParsingState, + linum: u32, + open_brace_line: u32, +} + +impl TemplateParser { + fn new() -> Self { + Self { + parsed: "^".to_owned(), + buffer: String::new(), + state: Lit, + linum: 1, + // keeps track of last line on which a regex placeholder was started + open_brace_line: 0, + } + } + + /// Convert a license template into a string which can be turned into a regex. + /// + /// The license template could use regex syntax directly, but that would require a lot of manual + /// escaping, which is inconvenient. It is therefore literal by default, with optional regex + /// subparts delimited by `{` and `}`. Additionally: + /// + /// - to insert literal `{`, `}` or `\`, escape it with `\` + /// - an empty regex placeholder (`{}`) is shorthand for `{.*?}` + /// + /// This function parses this input format and builds a properly escaped *string* representation + /// of the equivalent regular expression. It **does not** however guarantee that the returned + /// string is a syntactically valid regular expression. + /// + /// # Examples + /// + /// ``` + /// # use rustfmt_config::license::TemplateParser; + /// assert_eq!( + /// TemplateParser::parse( + /// r" + /// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)} + /// // file at the top-level directory of this distribution and at + /// // {}. + /// // + /// // Licensed under the Apache License, Version 2.0 or the MIT license + /// // , at your + /// // option. This file may not be copied, modified, or distributed + /// // except according to those terms. + /// " + /// ).unwrap(), + /// r"^ + /// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+) + /// // file at the top\-level directory of this distribution and at + /// // .*?\. + /// // + /// // Licensed under the Apache License, Version 2\.0 or the MIT license + /// // , at your + /// // option\. This file may not be copied, modified, or distributed + /// // except according to those terms\. + /// " + /// ); + /// ``` + pub fn parse(template: &str) -> Result { + let mut parser = Self::new(); + for chr in template.chars() { + if chr == '\n' { + parser.linum += 1; + } + parser.state = match parser.state { + Lit => parser.trans_from_lit(chr), + LitEsc => parser.trans_from_litesc(chr), + Re(brace_nesting) => parser.trans_from_re(chr, brace_nesting), + ReEsc(brace_nesting) => parser.trans_from_reesc(chr, brace_nesting), + Abort(msg) => return Err(msg), + }; + } + // check if we've ended parsing in a valid state + match parser.state { + Abort(msg) => return Err(msg), + Re(_) | ReEsc(_) => { + return Err(format!( + "escape or balance opening brace on l. {}", + parser.open_brace_line + )); + } + LitEsc => return Err(format!("incomplete escape sequence on l. {}", parser.linum)), + _ => (), + } + parser.parsed.push_str(®ex::escape(&parser.buffer)); + + Ok(parser.parsed) + } + + fn trans_from_lit(&mut self, chr: char) -> ParsingState { + match chr { + '{' => { + self.parsed.push_str(®ex::escape(&self.buffer)); + self.buffer.clear(); + self.open_brace_line = self.linum; + Re(1) + } + '}' => Abort(format!( + "escape or balance closing brace on l. {}", + self.linum + )), + '\\' => LitEsc, + _ => { + self.buffer.push(chr); + Lit + } + } + } + + fn trans_from_litesc(&mut self, chr: char) -> ParsingState { + self.buffer.push(chr); + Lit + } + + fn trans_from_re(&mut self, chr: char, brace_nesting: u32) -> ParsingState { + match chr { + '{' => { + self.buffer.push(chr); + Re(brace_nesting + 1) + } + '}' => { + match brace_nesting { + 1 => { + // default regex for empty placeholder {} + if self.buffer.is_empty() { + self.parsed.push_str(".*?"); + } else { + self.parsed.push_str(&self.buffer); + } + self.buffer.clear(); + Lit + } + _ => { + self.buffer.push(chr); + Re(brace_nesting - 1) + } + } + } + '\\' => { + self.buffer.push(chr); + ReEsc(brace_nesting) + } + _ => { + self.buffer.push(chr); + Re(brace_nesting) + } + } + } + + fn trans_from_reesc(&mut self, chr: char, brace_nesting: u32) -> ParsingState { + self.buffer.push(chr); + Re(brace_nesting) + } +} + +#[cfg(test)] +mod test { + use super::TemplateParser; + + #[test] + fn test_parse_license_template() { + assert_eq!( + TemplateParser::parse("literal (.*)").unwrap(), + r"^literal \(\.\*\)" + ); + assert_eq!( + TemplateParser::parse(r"escaping \}").unwrap(), + r"^escaping \}" + ); + assert!(TemplateParser::parse("unbalanced } without escape").is_err()); + assert_eq!( + TemplateParser::parse(r"{\d+} place{-?}holder{s?}").unwrap(), + r"^\d+ place-?holders?" + ); + assert_eq!(TemplateParser::parse("default {}").unwrap(), "^default .*?"); + assert_eq!( + TemplateParser::parse(r"unbalanced nested braces {\{{3}}").unwrap(), + r"^unbalanced nested braces \{{3}" + ); + assert_eq!( + TemplateParser::parse("parsing error }").unwrap_err(), + "escape or balance closing brace on l. 1" + ); + assert_eq!( + TemplateParser::parse("parsing error {\nsecond line").unwrap_err(), + "escape or balance opening brace on l. 1" + ); + assert_eq!( + TemplateParser::parse(r"parsing error \").unwrap_err(), + "incomplete escape sequence on l. 1" + ); + } +} diff --git a/src/config/mod.rs b/src/config/mod.rs index 0d4ec8557d3..8b93743ec4d 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -29,6 +29,7 @@ pub mod license; use config::config_type::ConfigType; use config::file_lines::FileLines; +use config::license::TemplateParser; pub use config::lists::*; pub use config::options::*; use config::summary::Summary;