Attempt at checking for license (#209)

I'm not quite sure how best to handle loading the license template from
a path -- I mean obviously I know *how* to do it, but I'm not sure where
to fit it in the codebase :) So this first attempt puts the license
template directly into the config file.

These are my misgivings about the license template config option as a
path to a file (I'd love feedback if some of these are wrong or can be
easily circumvented!):

1. I thought the obvious choice for the type of `license_template` in
`create_config!` should be `PathBuf`, but `PathBuf` doesn't implement
`FromStr` (yet? see https://github.com/rust-lang/rust/issues/44431), so
it would have to be wrapped in a tuple struct, and I went down that road
for a little while but then it seemed like too much ceremony for too
little gain.

2. So a plain `String` then (which, mind you, also means the same
`doc_hint()`, i.e. `<string>`, not `<path>` or something like that). The
fact that it's a valid path will be checked once we try to read the
file.

3. But where in the code should the license template be read? The
obvious choice for me would be somewhere in `Config::from_toml()`, but
since `Config` is defined via the `create_config!` macro, that would
mean tight coupling between the macro invocation (which defines the
configuration option `license_template`) and its definition (which would
rely on the existence of that option to run the template loading code).

4. `license_template` could also be made a special option which is
hardwired into the macro. This gets rid of the tight coupling, but
special-casing one of the config options would make the code harder to
navigate.

5. Instead, the macro could maybe be rewritten to allow for config
options that load additional resources from files when the config is
being parsed, but that's beyond my skill level I'm afraid (and probably
overengineering the problem if it's only ever going to be used for this
one option).

6. Finally, the file can be loaded at some later point in time, e.g. in
`format_lines()`, right before `check_license()` is called. But to
face a potential *IO* error at so late a stage, when the source files
have already been parsed... I don't know, it doesn't feel right.

BTW I don't like that I'm actually parsing the license template as late
as inside `check_license()` either, but for much the same reasons, I
don't know where else to put it. If the `Config` were hand-rolled
instead of a macro, I'd just define a custom `license_template` option
and load and parse the template in the `Config`'s init. But the way
things are, I'm a bit at a loss.

However, if someone more familiar with the project would kindly provide
a few hints as to how the path approach can be done in a way that is as
clean as possible in the context of the codebase, I'll be more than
happy to implement it! :)
This commit is contained in:
David Lukes 2018-02-16 23:21:57 +01:00
parent 5025a53b30
commit 2eebe614c7
2 changed files with 106 additions and 4 deletions

View File

@ -50,6 +50,7 @@ create_config! {
comment_width: usize, 80, false,
"Maximum length of comments. No effect unless wrap_comments = true";
normalize_comments: bool, false, true, "Convert /* */ comments to // comments where possible";
license_template: String, String::default(), false, "Check for license";
// Single line expressions and items.
empty_item_single_line: bool, true, false,

View File

@ -43,6 +43,7 @@ use syntax::ast;
use syntax::codemap::{CodeMap, FilePathMapping};
pub use syntax::codemap::FileName;
use syntax::parse::{self, ParseSess};
use regex::{Regex, RegexBuilder};
use checkstyle::{output_footer, output_header};
use comment::{CharClasses, FullCodeCharKind};
@ -99,6 +100,10 @@ pub enum ErrorKind {
TrailingWhitespace,
// TO-DO or FIX-ME item without an issue number
BadIssue(Issue),
// License check has failed
LicenseCheck,
// License template could not be parsed
ParsingLicense,
}
impl fmt::Display for ErrorKind {
@ -111,6 +116,8 @@ impl fmt::Display for ErrorKind {
),
ErrorKind::TrailingWhitespace => write!(fmt, "left behind trailing whitespace"),
ErrorKind::BadIssue(issue) => write!(fmt, "found {}", issue),
ErrorKind::LicenseCheck => write!(fmt, "license check failed"),
ErrorKind::ParsingLicense => write!(fmt, "parsing regex in license template failed"),
}
}
}
@ -127,7 +134,10 @@ pub struct FormattingError {
impl FormattingError {
fn msg_prefix(&self) -> &str {
match self.kind {
ErrorKind::LineOverflow(..) | ErrorKind::TrailingWhitespace => "error:",
ErrorKind::LineOverflow(..)
| ErrorKind::TrailingWhitespace
| ErrorKind::LicenseCheck
| ErrorKind::ParsingLicense => "error:",
ErrorKind::BadIssue(_) => "WARNING:",
}
}
@ -405,8 +415,39 @@ fn should_report_error(
}
}
fn check_license(text: &str, license_template: &str) -> Result<bool, regex::Error> {
let mut template_re = String::from("^");
// the template is parsed as a series of pairs of capture groups of (1) lazy whatever, which
// will be matched literally, followed by (2) a {}-delimited block, which will be matched as a
// regex
let template_parser = RegexBuilder::new(r"(.*?)\{(.*?)\}")
.dot_matches_new_line(true)
.build()
.unwrap();
// keep track of the last matched offset and ultimately append the tail of the template (if any)
// after the last {} block
let mut last_matched_offset = 0;
for caps in template_parser.captures_iter(license_template) {
if let Some(mat) = caps.get(0) {
last_matched_offset = mat.end()
}
if let Some(mat) = caps.get(1) {
template_re.push_str(&regex::escape(mat.as_str()))
}
if let Some(mat) = caps.get(2) {
let mut re = mat.as_str();
if re.is_empty() {
re = ".*?";
}
template_re.push_str(re)
}
}
template_re.push_str(&regex::escape(&license_template[last_matched_offset..]));
let template_re = Regex::new(&template_re)?;
Ok(template_re.is_match(text))
}
// Formatting done on a char by char or line by line basis.
// FIXME(#209) warn on bad license
// FIXME(#20) other stuff for parity with make tidy
fn format_lines(
text: &mut String,
@ -415,7 +456,6 @@ fn format_lines(
config: &Config,
report: &mut FormatReport,
) {
// Iterate over the chars in the file map.
let mut trims = vec![];
let mut last_wspace: Option<usize> = None;
let mut line_len = 0;
@ -428,6 +468,33 @@ fn format_lines(
let mut format_line = config.file_lines().contains_line(name, cur_line);
let allow_issue_seek = !issue_seeker.is_disabled();
// Check license.
if config.was_set().license_template() {
match check_license(text, &config.license_template()) {
Ok(check) => {
if !check {
errors.push(FormattingError {
line: cur_line,
kind: ErrorKind::LicenseCheck,
is_comment: false,
is_string: false,
line_buffer: String::new(),
});
}
}
Err(_) => {
errors.push(FormattingError {
line: cur_line,
kind: ErrorKind::ParsingLicense,
is_comment: false,
is_string: false,
line_buffer: String::new(),
});
}
}
}
// Iterate over the chars in the file map.
for (kind, (b, c)) in CharClasses::new(text.chars().enumerate()) {
if c == '\r' {
continue;
@ -853,7 +920,7 @@ pub fn run(input: Input, config: &Config) -> Summary {
#[cfg(test)]
mod test {
use super::{format_code_block, format_snippet, Config};
use super::{check_license, format_code_block, format_snippet, Config};
#[test]
fn test_no_panic_on_format_snippet_and_format_code_block() {
@ -939,4 +1006,38 @@ false,
};";
assert!(test_format_inner(format_code_block, code_block, expected));
}
#[test]
fn test_check_license() {
assert!(check_license("literal matching", "literal matching").unwrap());
assert!(!check_license("literal no match", "literal matching").unwrap());
assert!(
check_license(
"Regex start and end: 2018",
r"{[Rr]egex} start {} end: {\d+}"
).unwrap()
);
assert!(!check_license(
"Regex start and end no match: 2018",
r"{[Rr]egex} start {} end: {\d+}"
).unwrap());
assert!(
check_license(
"Regex in the middle: 2018 (tm)",
r"Regex {} middle: {\d+} (tm)"
).unwrap()
);
assert!(!check_license(
"Regex in the middle no match: 2018 (tm)",
r"Regex {} middle: {\d+} (tm)"
).unwrap());
assert!(!check_license("default doesn't match\nacross lines", "default {} lines").unwrap());
assert!(check_license("", "this is not a valid {[regex}").is_err());
assert!(
check_license(
"can't parse nested delimiters with regex",
r"can't parse nested delimiters with regex{\.{3}}"
).is_err()
);
}
}