Ignore things in .gitignore in tidy

- Switch from `walkdir` to `ignore`. This required various changes to
  make `skip` thread-safe.
- Ignore `build` anywhere in the source tree, not just at the top-level.
  We support this in bootstrap, we should support it in tidy too.

As a nice side benefit, this also makes tidy a bit faster.

Before:
```
; hyperfine -i '"/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0-tools-bin/rust-tidy" "/home/gh-jyn514/rust2" "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0/bin/cargo" "/home/gh-jyn514/rust2/build" "32"'
Benchmark 1: "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0-tools-bin/rust-tidy" "/home/gh-jyn514/rust2" "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0/bin/cargo" "/home/gh-jyn514/rust2/build" "32"
  Time (mean ± σ):      1.080 s ±  0.008 s    [User: 2.616 s, System: 3.243 s]
  Range (min … max):    1.069 s …  1.099 s    10 runs
```

After:
```
; hyperfine '"/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0-tools-bin/rust-tidy" "/home/gh-jyn514/rust2" "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0/bin/cargo" "/home/gh-jyn514/rust2/build" "32"'
Benchmark 1: "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0-tools-bin/rust-tidy" "/home/gh-jyn514/rust2" "/home/gh-jyn514/rust2/build/aarch64-unknown-linux-gnu/stage0/bin/cargo" "/home/gh-jyn514/rust2/build" "32"
  Time (mean ± σ):     705.0 ms ±   1.4 ms    [User: 3179.1 ms, System: 1517.5 ms]
  Range (min … max):   702.3 ms … 706.9 ms    10 runs
```
This commit is contained in:
Joshua Nelson 2023-01-04 02:16:24 +00:00
parent 14c54b637b
commit 1cccf2dd4c
15 changed files with 75 additions and 85 deletions

2
.gitignore vendored
View File

@ -41,7 +41,7 @@ no_llvm_build
/inst/
/llvm/
/mingw-build/
/build/
build/
/build-rust-analyzer/
/dist/
/unicode-downloads

View File

@ -10,7 +10,7 @@ fn main() {
let version_str = version_str.trim();
walk::walk(
&root_path,
&mut |path| {
|path| {
walk::filter_dirs(path)
// We exempt these as they require the placeholder
// for their operation

View File

@ -95,7 +95,7 @@ fn check_section<'a>(
}
pub fn check(path: &Path, bad: &mut bool) {
walk(path, &mut filter_dirs, &mut |entry, contents| {
walk(path, filter_dirs, &mut |entry, contents| {
let file = &entry.path().display();
let mut lines = contents.lines().enumerate().peekable();

View File

@ -101,54 +101,38 @@ mod os_impl {
const ALLOWED: &[&str] = &["configure", "x"];
walk_no_read(
path,
&mut |path| {
filter_dirs(path)
|| path.ends_with("src/etc")
// This is a list of directories that we almost certainly
// don't need to walk. A future PR will likely want to
// remove these in favor of crate::walk_no_read using git
// ls-files to discover the paths we should check, which
// would naturally ignore all of these directories. It's
// also likely faster than walking the directory tree
// directly (since git is just reading from a couple files
// to produce the results).
|| path.ends_with("target")
|| path.ends_with("build")
|| path.ends_with(".git")
},
&mut |entry| {
let file = entry.path();
let extension = file.extension();
let scripts = ["py", "sh", "ps1"];
if scripts.into_iter().any(|e| extension == Some(OsStr::new(e))) {
// FIXME: we don't need to look at all binaries, only files that have been modified in this branch
// (e.g. using `git ls-files`).
walk_no_read(path, |path| filter_dirs(path) || path.ends_with("src/etc"), &mut |entry| {
let file = entry.path();
let extension = file.extension();
let scripts = ["py", "sh", "ps1"];
if scripts.into_iter().any(|e| extension == Some(OsStr::new(e))) {
return;
}
if t!(is_executable(&file), file) {
let rel_path = file.strip_prefix(path).unwrap();
let git_friendly_path = rel_path.to_str().unwrap().replace("\\", "/");
if ALLOWED.contains(&git_friendly_path.as_str()) {
return;
}
if t!(is_executable(&file), file) {
let rel_path = file.strip_prefix(path).unwrap();
let git_friendly_path = rel_path.to_str().unwrap().replace("\\", "/");
if ALLOWED.contains(&git_friendly_path.as_str()) {
return;
}
let output = Command::new("git")
.arg("ls-files")
.arg(&git_friendly_path)
.current_dir(path)
.stderr(Stdio::null())
.output()
.unwrap_or_else(|e| {
panic!("could not run git ls-files: {e}");
});
let path_bytes = rel_path.as_os_str().as_bytes();
if output.status.success() && output.stdout.starts_with(path_bytes) {
tidy_error!(bad, "binary checked into source: {}", file.display());
}
let output = Command::new("git")
.arg("ls-files")
.arg(&git_friendly_path)
.current_dir(path)
.stderr(Stdio::null())
.output()
.unwrap_or_else(|e| {
panic!("could not run git ls-files: {e}");
});
let path_bytes = rel_path.as_os_str().as_bytes();
if output.status.success() && output.stdout.starts_with(path_bytes) {
tidy_error!(bad, "binary checked into source: {}", file.display());
}
},
)
}
})
}
}

View File

@ -6,7 +6,7 @@ use std::path::Path;
const GRAPHVIZ_POSTFLOW_MSG: &str = "`borrowck_graphviz_postflow` attribute in test";
pub fn check(test_dir: &Path, bad: &mut bool) {
walk(test_dir, &mut filter_dirs, &mut |entry, contents| {
walk(test_dir, filter_dirs, &mut |entry, contents| {
let filename = entry.path();
let is_rust = filename.extension().map_or(false, |ext| ext == "rs");
if !is_rust {

View File

@ -11,7 +11,7 @@ fn is_edition_2021(mut line: &str) -> bool {
pub fn check(path: &Path, bad: &mut bool) {
walk(
path,
&mut |path| {
|path| {
filter_dirs(path)
|| (path.ends_with("tests") && path.join("COMPILER_TESTS.md").exists())
},

View File

@ -127,7 +127,7 @@ fn check_error_codes_docs(
let mut no_longer_emitted_codes = Vec::new();
walk(&docs_path, &mut |_| false, &mut |entry, contents| {
walk(&docs_path, |_| false, &mut |entry, contents| {
let path = entry.path();
// Error if the file isn't markdown.
@ -319,7 +319,7 @@ fn check_error_codes_used(
let mut found_codes = Vec::new();
walk_many(search_paths, &mut filter_dirs, &mut |entry, contents| {
walk_many(search_paths, filter_dirs, &mut |entry, contents| {
let path = entry.path();
// Return early if we aren't looking at a source file.

View File

@ -101,7 +101,7 @@ pub fn check(
&tests_path.join("rustdoc-ui"),
&tests_path.join("rustdoc"),
],
&mut filter_dirs,
filter_dirs,
&mut |entry, contents| {
let file = entry.path();
let filename = file.file_name().unwrap().to_string_lossy();
@ -477,11 +477,11 @@ fn get_and_check_lib_features(
fn map_lib_features(
base_src_path: &Path,
mf: &mut dyn FnMut(Result<(&str, Feature), &str>, &Path, usize),
mf: &mut (dyn Send + Sync + FnMut(Result<(&str, Feature), &str>, &Path, usize)),
) {
walk(
base_src_path,
&mut |path| filter_dirs(path) || path.ends_with("tests"),
|path| filter_dirs(path) || path.ends_with("tests"),
&mut |entry, contents| {
let file = entry.path();
let filename = file.file_name().unwrap().to_string_lossy();

View File

@ -68,7 +68,7 @@ pub fn check(path: &Path, bad: &mut bool) {
// Sanity check that the complex parsing here works.
let mut saw_target_arch = false;
let mut saw_cfg_bang = false;
walk(path, &mut filter_dirs, &mut |entry, contents| {
walk(path, filter_dirs, &mut |entry, contents| {
let file = entry.path();
let filestr = file.to_string_lossy().replace("\\", "/");
if !filestr.ends_with(".rs") {

View File

@ -5,7 +5,7 @@ use std::path::Path;
pub fn check(path: &Path, bad: &mut bool) {
crate::walk::walk(
&path.join("rustdoc-gui"),
&mut |p| {
|p| {
// If there is no extension, it's very likely a folder and we want to go into it.
p.extension().map(|e| e != "goml").unwrap_or(false)
},

View File

@ -235,7 +235,7 @@ pub fn check(path: &Path, bad: &mut bool) {
.chain(PROBLEMATIC_CONSTS.iter().map(|v| format!("{:X}", v)))
.collect();
let problematic_regex = RegexSet::new(problematic_consts_strings.as_slice()).unwrap();
walk(path, &mut skip, &mut |entry, contents| {
walk(path, skip, &mut |entry, contents| {
let file = entry.path();
let filename = file.file_name().unwrap().to_string_lossy();
let extensions =

View File

@ -37,7 +37,7 @@ struct RevisionInfo<'a> {
pub fn check(path: &Path, bad: &mut bool) {
crate::walk::walk(
path,
&mut |path| path.extension().map(|p| p == "rs") == Some(false),
|path| path.extension().map(|p| p == "rs") == Some(false),
&mut |entry, content| {
let file = entry.path().display();
let mut header_map = BTreeMap::new();

View File

@ -54,7 +54,7 @@ fn check_entries(path: &Path, bad: &mut bool) {
pub fn check(path: &Path, bad: &mut bool) {
check_entries(&path, bad);
for path in &[&path.join("ui"), &path.join("ui-fulldeps")] {
crate::walk::walk_no_read(path, &mut |_| false, &mut |entry| {
crate::walk::walk_no_read(path, |_| false, &mut |entry| {
let file_path = entry.path();
if let Some(ext) = file_path.extension() {
if ext == "stderr" || ext == "stdout" {

View File

@ -11,14 +11,16 @@ use crate::walk::{filter_dirs, walk};
use std::path::Path;
pub fn check(root_path: &Path, bad: &mut bool) {
let core = &root_path.join("core");
let core_tests = &core.join("tests");
let core_benches = &core.join("benches");
let is_core = |path: &Path| {
path.starts_with(core) && !(path.starts_with(core_tests) || path.starts_with(core_benches))
let core = root_path.join("core");
let core_copy = core.clone();
let core_tests = core.join("tests");
let core_benches = core.join("benches");
let is_core = move |path: &Path| {
path.starts_with(&core)
&& !(path.starts_with(&core_tests) || path.starts_with(&core_benches))
};
let mut skip = |path: &Path| {
let skip = move |path: &Path| {
let file_name = path.file_name().unwrap_or_default();
if path.is_dir() {
filter_dirs(path)
@ -35,9 +37,9 @@ pub fn check(root_path: &Path, bad: &mut bool) {
}
};
walk(root_path, &mut skip, &mut |entry, contents| {
walk(root_path, skip, &mut |entry, contents| {
let path = entry.path();
let is_core = path.starts_with(core);
let is_core = path.starts_with(&core_copy);
for (i, line) in contents.lines().enumerate() {
let line = line.trim();
let is_test = || line.contains("#[test]") && !line.contains("`#[test]");

View File

@ -1,9 +1,8 @@
use std::fs::File;
use std::io::Read;
use walkdir::{DirEntry, WalkDir};
use ignore::DirEntry;
use std::path::Path;
use std::{fs, path::Path};
/// The default directory filter.
pub fn filter_dirs(path: &Path) -> bool {
let skip = [
"tidy-test-file",
@ -36,34 +35,39 @@ pub fn filter_dirs(path: &Path) -> bool {
pub fn walk_many(
paths: &[&Path],
skip: &mut dyn FnMut(&Path) -> bool,
skip: impl Clone + Send + Sync + 'static + Fn(&Path) -> bool,
f: &mut dyn FnMut(&DirEntry, &str),
) {
for path in paths {
walk(path, skip, f);
walk(path, skip.clone(), f);
}
}
pub fn walk(path: &Path, skip: &mut dyn FnMut(&Path) -> bool, f: &mut dyn FnMut(&DirEntry, &str)) {
let mut contents = String::new();
pub fn walk(
path: &Path,
skip: impl Send + Sync + 'static + Fn(&Path) -> bool,
f: &mut dyn FnMut(&DirEntry, &str),
) {
walk_no_read(path, skip, &mut |entry| {
contents.clear();
if t!(File::open(entry.path()), entry.path()).read_to_string(&mut contents).is_err() {
contents.clear();
}
f(&entry, &contents);
let contents = t!(fs::read(entry.path()), entry.path());
let contents_str = match String::from_utf8(contents) {
Ok(s) => s,
Err(_) => return, // skip this file
};
f(&entry, &contents_str);
});
}
pub(crate) fn walk_no_read(
path: &Path,
skip: &mut dyn FnMut(&Path) -> bool,
skip: impl Send + Sync + 'static + Fn(&Path) -> bool,
f: &mut dyn FnMut(&DirEntry),
) {
let walker = WalkDir::new(path).into_iter().filter_entry(|e| !skip(e.path()));
for entry in walker {
let mut walker = ignore::WalkBuilder::new(path);
let walker = walker.filter_entry(move |e| !skip(e.path()));
for entry in walker.build() {
if let Ok(entry) = entry {
if entry.file_type().is_dir() {
if entry.file_type().map_or(true, |kind| kind.is_dir() || kind.is_symlink()) {
continue;
}
f(&entry);