mirror of
https://github.com/rust-lang/rust.git
synced 2025-02-03 02:23:20 +00:00
Refactor parsing code into struct
This also splits the giant state machine match expression into separate methods.
This commit is contained in:
parent
310c1146f2
commit
bbd6d9cd55
@ -1,174 +0,0 @@
|
||||
use regex;
|
||||
|
||||
/// Convert the license template into a string which can be turned into a regex.
|
||||
///
|
||||
/// The license template could use regex syntax directly, but that would require a lot of manual
|
||||
/// escaping, which is inconvenient. It is therefore literal by default, with optional regex
|
||||
/// subparts delimited by `{` and `}`. Additionally:
|
||||
///
|
||||
/// - to insert literal `{`, `}` or `\`, escape it with `\`
|
||||
/// - an empty regex placeholder (`{}`) is shorthand for `{.*?}`
|
||||
///
|
||||
/// This function parses this input format and builds a properly escaped *string* representation of
|
||||
/// the equivalent regular expression. It **does not** however guarantee that the returned string is
|
||||
/// a syntactically valid regular expression.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use rustfmt_config::license;
|
||||
/// assert_eq!(
|
||||
/// license::parse_template(
|
||||
/// r"
|
||||
/// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)}
|
||||
/// // file at the top-level directory of this distribution and at
|
||||
/// // {}.
|
||||
/// //
|
||||
/// // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
/// // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
/// // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
/// // option. This file may not be copied, modified, or distributed
|
||||
/// // except according to those terms.
|
||||
/// "
|
||||
/// ).unwrap(),
|
||||
/// r"^
|
||||
/// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+)
|
||||
/// // file at the top\-level directory of this distribution and at
|
||||
/// // .*?\.
|
||||
/// //
|
||||
/// // Licensed under the Apache License, Version 2\.0 <LICENSE\-APACHE or
|
||||
/// // http://www\.apache\.org/licenses/LICENSE\-2\.0> or the MIT license
|
||||
/// // <LICENSE\-MIT or http://opensource\.org/licenses/MIT>, at your
|
||||
/// // option\. This file may not be copied, modified, or distributed
|
||||
/// // except according to those terms\.
|
||||
/// "
|
||||
/// );
|
||||
/// ```
|
||||
pub fn parse_template(template: &str) -> Result<String, String> {
|
||||
// the template is parsed using a state machine
|
||||
enum State {
|
||||
Lit,
|
||||
LitEsc,
|
||||
// the u32 keeps track of brace nesting
|
||||
Re(u32),
|
||||
ReEsc(u32),
|
||||
}
|
||||
|
||||
let mut parsed = String::from("^");
|
||||
let mut buffer = String::new();
|
||||
let mut state = State::Lit;
|
||||
let mut linum = 1;
|
||||
// keeps track of last line on which a regex placeholder was started
|
||||
let mut open_brace_line = 0;
|
||||
for chr in template.chars() {
|
||||
if chr == '\n' {
|
||||
linum += 1;
|
||||
}
|
||||
state = match state {
|
||||
State::Lit => match chr {
|
||||
'{' => {
|
||||
parsed.push_str(®ex::escape(&buffer));
|
||||
buffer.clear();
|
||||
open_brace_line = linum;
|
||||
State::Re(1)
|
||||
}
|
||||
'}' => return Err(format!("escape or balance closing brace on l. {}", linum)),
|
||||
'\\' => State::LitEsc,
|
||||
_ => {
|
||||
buffer.push(chr);
|
||||
State::Lit
|
||||
}
|
||||
},
|
||||
State::LitEsc => {
|
||||
buffer.push(chr);
|
||||
State::Lit
|
||||
}
|
||||
State::Re(brace_nesting) => {
|
||||
match chr {
|
||||
'{' => {
|
||||
buffer.push(chr);
|
||||
State::Re(brace_nesting + 1)
|
||||
}
|
||||
'}' => {
|
||||
match brace_nesting {
|
||||
1 => {
|
||||
// default regex for empty placeholder {}
|
||||
if buffer.is_empty() {
|
||||
buffer = ".*?".to_string();
|
||||
}
|
||||
parsed.push_str(&buffer);
|
||||
buffer.clear();
|
||||
State::Lit
|
||||
}
|
||||
_ => {
|
||||
buffer.push(chr);
|
||||
State::Re(brace_nesting - 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
'\\' => {
|
||||
buffer.push(chr);
|
||||
State::ReEsc(brace_nesting)
|
||||
}
|
||||
_ => {
|
||||
buffer.push(chr);
|
||||
State::Re(brace_nesting)
|
||||
}
|
||||
}
|
||||
}
|
||||
State::ReEsc(brace_nesting) => {
|
||||
buffer.push(chr);
|
||||
State::Re(brace_nesting)
|
||||
}
|
||||
}
|
||||
}
|
||||
match state {
|
||||
State::Re(_) | State::ReEsc(_) => {
|
||||
return Err(format!(
|
||||
"escape or balance opening brace on l. {}",
|
||||
open_brace_line
|
||||
));
|
||||
}
|
||||
State::LitEsc => return Err(format!("incomplete escape sequence on l. {}", linum)),
|
||||
_ => (),
|
||||
}
|
||||
parsed.push_str(®ex::escape(&buffer));
|
||||
|
||||
Ok(parsed)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::parse_template;
|
||||
|
||||
#[test]
|
||||
fn test_parse_license_template() {
|
||||
assert_eq!(
|
||||
parse_template("literal (.*)").unwrap(),
|
||||
r"^literal \(\.\*\)"
|
||||
);
|
||||
assert_eq!(parse_template(r"escaping \}").unwrap(), r"^escaping \}");
|
||||
assert!(parse_template("unbalanced } without escape").is_err());
|
||||
assert_eq!(
|
||||
parse_template(r"{\d+} place{-?}holder{s?}").unwrap(),
|
||||
r"^\d+ place-?holders?"
|
||||
);
|
||||
assert_eq!(parse_template("default {}").unwrap(), "^default .*?");
|
||||
assert_eq!(
|
||||
parse_template(r"unbalanced nested braces {\{{3}}").unwrap(),
|
||||
r"^unbalanced nested braces \{{3}"
|
||||
);
|
||||
assert_eq!(
|
||||
parse_template("parsing error }").unwrap_err(),
|
||||
"escape or balance closing brace on l. 1"
|
||||
);
|
||||
assert_eq!(
|
||||
parse_template("parsing error {\nsecond line").unwrap_err(),
|
||||
"escape or balance opening brace on l. 1"
|
||||
);
|
||||
assert_eq!(
|
||||
parse_template(r"parsing error \").unwrap_err(),
|
||||
"incomplete escape sequence on l. 1"
|
||||
);
|
||||
}
|
||||
}
|
@ -408,7 +408,7 @@ macro_rules! create_config {
|
||||
license_template_path, e);
|
||||
return;
|
||||
};
|
||||
let license_template_parsed = match license::parse_template(&license_template_str) {
|
||||
let license_template_parsed = match TemplateParser::parse(&license_template_str) {
|
||||
Ok(string) => string,
|
||||
Err(e) => {
|
||||
eprintln!("Warning: unable to parse license template file {:?}: {}",
|
||||
|
213
src/config/license.rs
Normal file
213
src/config/license.rs
Normal file
@ -0,0 +1,213 @@
|
||||
use regex;
|
||||
|
||||
// the template is parsed using a state machine
|
||||
enum ParsingState {
|
||||
Lit,
|
||||
LitEsc,
|
||||
// the u32 keeps track of brace nesting
|
||||
Re(u32),
|
||||
ReEsc(u32),
|
||||
Abort(String),
|
||||
}
|
||||
|
||||
use self::ParsingState::*;
|
||||
|
||||
pub struct TemplateParser {
|
||||
parsed: String,
|
||||
buffer: String,
|
||||
state: ParsingState,
|
||||
linum: u32,
|
||||
open_brace_line: u32,
|
||||
}
|
||||
|
||||
impl TemplateParser {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
parsed: "^".to_owned(),
|
||||
buffer: String::new(),
|
||||
state: Lit,
|
||||
linum: 1,
|
||||
// keeps track of last line on which a regex placeholder was started
|
||||
open_brace_line: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a license template into a string which can be turned into a regex.
|
||||
///
|
||||
/// The license template could use regex syntax directly, but that would require a lot of manual
|
||||
/// escaping, which is inconvenient. It is therefore literal by default, with optional regex
|
||||
/// subparts delimited by `{` and `}`. Additionally:
|
||||
///
|
||||
/// - to insert literal `{`, `}` or `\`, escape it with `\`
|
||||
/// - an empty regex placeholder (`{}`) is shorthand for `{.*?}`
|
||||
///
|
||||
/// This function parses this input format and builds a properly escaped *string* representation
|
||||
/// of the equivalent regular expression. It **does not** however guarantee that the returned
|
||||
/// string is a syntactically valid regular expression.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use rustfmt_config::license::TemplateParser;
|
||||
/// assert_eq!(
|
||||
/// TemplateParser::parse(
|
||||
/// r"
|
||||
/// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)}
|
||||
/// // file at the top-level directory of this distribution and at
|
||||
/// // {}.
|
||||
/// //
|
||||
/// // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
/// // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
/// // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
/// // option. This file may not be copied, modified, or distributed
|
||||
/// // except according to those terms.
|
||||
/// "
|
||||
/// ).unwrap(),
|
||||
/// r"^
|
||||
/// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+)
|
||||
/// // file at the top\-level directory of this distribution and at
|
||||
/// // .*?\.
|
||||
/// //
|
||||
/// // Licensed under the Apache License, Version 2\.0 <LICENSE\-APACHE or
|
||||
/// // http://www\.apache\.org/licenses/LICENSE\-2\.0> or the MIT license
|
||||
/// // <LICENSE\-MIT or http://opensource\.org/licenses/MIT>, at your
|
||||
/// // option\. This file may not be copied, modified, or distributed
|
||||
/// // except according to those terms\.
|
||||
/// "
|
||||
/// );
|
||||
/// ```
|
||||
pub fn parse(template: &str) -> Result<String, String> {
|
||||
let mut parser = Self::new();
|
||||
for chr in template.chars() {
|
||||
if chr == '\n' {
|
||||
parser.linum += 1;
|
||||
}
|
||||
parser.state = match parser.state {
|
||||
Lit => parser.trans_from_lit(chr),
|
||||
LitEsc => parser.trans_from_litesc(chr),
|
||||
Re(brace_nesting) => parser.trans_from_re(chr, brace_nesting),
|
||||
ReEsc(brace_nesting) => parser.trans_from_reesc(chr, brace_nesting),
|
||||
Abort(msg) => return Err(msg),
|
||||
};
|
||||
}
|
||||
// check if we've ended parsing in a valid state
|
||||
match parser.state {
|
||||
Abort(msg) => return Err(msg),
|
||||
Re(_) | ReEsc(_) => {
|
||||
return Err(format!(
|
||||
"escape or balance opening brace on l. {}",
|
||||
parser.open_brace_line
|
||||
));
|
||||
}
|
||||
LitEsc => return Err(format!("incomplete escape sequence on l. {}", parser.linum)),
|
||||
_ => (),
|
||||
}
|
||||
parser.parsed.push_str(®ex::escape(&parser.buffer));
|
||||
|
||||
Ok(parser.parsed)
|
||||
}
|
||||
|
||||
fn trans_from_lit(&mut self, chr: char) -> ParsingState {
|
||||
match chr {
|
||||
'{' => {
|
||||
self.parsed.push_str(®ex::escape(&self.buffer));
|
||||
self.buffer.clear();
|
||||
self.open_brace_line = self.linum;
|
||||
Re(1)
|
||||
}
|
||||
'}' => Abort(format!(
|
||||
"escape or balance closing brace on l. {}",
|
||||
self.linum
|
||||
)),
|
||||
'\\' => LitEsc,
|
||||
_ => {
|
||||
self.buffer.push(chr);
|
||||
Lit
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn trans_from_litesc(&mut self, chr: char) -> ParsingState {
|
||||
self.buffer.push(chr);
|
||||
Lit
|
||||
}
|
||||
|
||||
fn trans_from_re(&mut self, chr: char, brace_nesting: u32) -> ParsingState {
|
||||
match chr {
|
||||
'{' => {
|
||||
self.buffer.push(chr);
|
||||
Re(brace_nesting + 1)
|
||||
}
|
||||
'}' => {
|
||||
match brace_nesting {
|
||||
1 => {
|
||||
// default regex for empty placeholder {}
|
||||
if self.buffer.is_empty() {
|
||||
self.parsed.push_str(".*?");
|
||||
} else {
|
||||
self.parsed.push_str(&self.buffer);
|
||||
}
|
||||
self.buffer.clear();
|
||||
Lit
|
||||
}
|
||||
_ => {
|
||||
self.buffer.push(chr);
|
||||
Re(brace_nesting - 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
'\\' => {
|
||||
self.buffer.push(chr);
|
||||
ReEsc(brace_nesting)
|
||||
}
|
||||
_ => {
|
||||
self.buffer.push(chr);
|
||||
Re(brace_nesting)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn trans_from_reesc(&mut self, chr: char, brace_nesting: u32) -> ParsingState {
|
||||
self.buffer.push(chr);
|
||||
Re(brace_nesting)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::TemplateParser;
|
||||
|
||||
#[test]
|
||||
fn test_parse_license_template() {
|
||||
assert_eq!(
|
||||
TemplateParser::parse("literal (.*)").unwrap(),
|
||||
r"^literal \(\.\*\)"
|
||||
);
|
||||
assert_eq!(
|
||||
TemplateParser::parse(r"escaping \}").unwrap(),
|
||||
r"^escaping \}"
|
||||
);
|
||||
assert!(TemplateParser::parse("unbalanced } without escape").is_err());
|
||||
assert_eq!(
|
||||
TemplateParser::parse(r"{\d+} place{-?}holder{s?}").unwrap(),
|
||||
r"^\d+ place-?holders?"
|
||||
);
|
||||
assert_eq!(TemplateParser::parse("default {}").unwrap(), "^default .*?");
|
||||
assert_eq!(
|
||||
TemplateParser::parse(r"unbalanced nested braces {\{{3}}").unwrap(),
|
||||
r"^unbalanced nested braces \{{3}"
|
||||
);
|
||||
assert_eq!(
|
||||
TemplateParser::parse("parsing error }").unwrap_err(),
|
||||
"escape or balance closing brace on l. 1"
|
||||
);
|
||||
assert_eq!(
|
||||
TemplateParser::parse("parsing error {\nsecond line").unwrap_err(),
|
||||
"escape or balance opening brace on l. 1"
|
||||
);
|
||||
assert_eq!(
|
||||
TemplateParser::parse(r"parsing error \").unwrap_err(),
|
||||
"incomplete escape sequence on l. 1"
|
||||
);
|
||||
}
|
||||
}
|
@ -29,6 +29,7 @@ pub mod license;
|
||||
|
||||
use config::config_type::ConfigType;
|
||||
use config::file_lines::FileLines;
|
||||
use config::license::TemplateParser;
|
||||
pub use config::lists::*;
|
||||
pub use config::options::*;
|
||||
use config::summary::Summary;
|
||||
|
Loading…
Reference in New Issue
Block a user