Add new tool to check HTML:

* Make html-checker run by default on rust compiler docs as well
 * Ensure html-checker is run on CI
 * Lazify tidy binary presence check
This commit is contained in:
Guillaume Gomez 2021-04-23 16:43:18 +02:00
parent 451e98e7b0
commit 83a2bc31b9
9 changed files with 166 additions and 4 deletions

View File

@ -1578,6 +1578,13 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "html-checker"
version = "0.1.0"
dependencies = [
"walkdir",
]
[[package]]
name = "html5ever"
version = "0.25.1"

View File

@ -34,6 +34,7 @@ members = [
"src/tools/unicode-table-generator",
"src/tools/expand-yaml-anchors",
"src/tools/jsondocck",
"src/tools/html-checker",
]
exclude = [

View File

@ -450,6 +450,7 @@ impl<'a> Builder<'a> {
test::RustdocTheme,
test::RustdocUi,
test::RustdocJson,
test::HtmlCheck,
// Run bootstrap close to the end as it's unlikely to fail
test::Bootstrap,
// Run run-make last, since these won't pass without make on Windows

View File

@ -501,8 +501,8 @@ impl Step for Std {
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct Rustc {
stage: u32,
target: TargetSelection,
pub stage: u32,
pub target: TargetSelection,
}
impl Step for Rustc {

View File

@ -9,7 +9,7 @@ use std::fmt;
use std::fs;
use std::iter;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::process::{Command, Stdio};
use build_helper::{self, output, t};
@ -161,6 +161,49 @@ You can skip linkcheck with --exclude src/tools/linkchecker"
}
}
fn check_if_tidy_is_installed() -> bool {
Command::new("tidy")
.arg("--version")
.stdout(Stdio::null())
.status()
.map_or(false, |status| status.success())
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct HtmlCheck {
target: TargetSelection,
}
impl Step for HtmlCheck {
type Output = ();
const DEFAULT: bool = true;
const ONLY_HOSTS: bool = true;
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
let run = run.path("src/tools/html-checker");
run.lazy_default_condition(Box::new(check_if_tidy_is_installed))
}
fn make_run(run: RunConfig<'_>) {
run.builder.ensure(HtmlCheck { target: run.target });
}
fn run(self, builder: &Builder<'_>) {
if !check_if_tidy_is_installed() {
eprintln!("not running HTML-check tool because `tidy` is missing");
eprintln!(
"Note that `tidy` is not the in-tree `src/tools/tidy` but needs to be installed"
);
panic!("Cannot run html-check tests");
}
// Ensure that a few different kinds of documentation are available.
builder.default_doc(&[]);
builder.ensure(crate::doc::Rustc { target: self.target, stage: builder.top_stage });
try_run(builder, builder.tool_cmd(Tool::HtmlChecker).arg(builder.doc_out(self.target)));
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct Cargotest {
stage: u32,

View File

@ -376,6 +376,7 @@ bootstrap_tool!(
ExpandYamlAnchors, "src/tools/expand-yaml-anchors", "expand-yaml-anchors";
LintDocs, "src/tools/lint-docs", "lint-docs";
JsonDocCk, "src/tools/jsondocck", "jsondocck";
HtmlChecker, "src/tools/html-checker", "html-checker";
);
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]

View File

@ -12,7 +12,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
libssl-dev \
sudo \
xz-utils
xz-utils \
tidy
# Install dependencies for chromium browser
RUN apt-get install -y \

View File

@ -0,0 +1,12 @@
[package]
name = "html-checker"
version = "0.1.0"
authors = ["Guillaume Gomez <guillaume1.gomez@gmail.com>"]
edition = "2018"
[[bin]]
name = "html-checker"
path = "main.rs"
[dependencies]
walkdir = "2"

View File

@ -0,0 +1,96 @@
use std::env;
use std::path::Path;
use std::process::{Command, Output};
fn check_html_file(file: &Path) -> usize {
let to_mute = &[
// "disabled" on <link> or "autocomplete" on <select> emit this warning
"PROPRIETARY_ATTRIBUTE",
// It complains when multiple in the same page link to the same anchor for some reason...
"ANCHOR_NOT_UNIQUE",
// If a <span> contains only HTML elements and no text, it complains about it.
"TRIM_EMPTY_ELEMENT",
// FIXME: the three next warnings are about <pre> elements which are not supposed to
// contain HTML. The solution here would be to replace them with a <div> with
// ""
"MISSING_ENDTAG_BEFORE",
"INSERTING_TAG",
"DISCARDING_UNEXPECTED",
// FIXME: mdbook repeats the name attribute on <input>. When the fix is merged upstream,
// this warning can be used again.
"REPEATED_ATTRIBUTE",
// FIXME: mdbook uses "align" attribute on <td>, which is not allowed.
"MISMATCHED_ATTRIBUTE_WARN",
// FIXME: mdbook doesn't add "alt" attribute on images.
"MISSING_ATTRIBUTE",
// FIXME: mdbook doesn't escape `&` (in "&String" for example).
"UNKNOWN_ENTITY",
// Compiler docs have some inlined <style> in the markdown.
"MOVED_STYLE_TO_HEAD",
];
let to_mute_s = to_mute.join(",");
let mut command = Command::new("tidy");
command
.arg("-errors")
.arg("-quiet")
.arg("--mute-id") // this option is useful in case we want to mute more warnings
.arg("yes")
.arg("--mute")
.arg(&to_mute_s)
.arg(file);
let Output { status, stderr, .. } = command.output().expect("failed to run tidy command");
if status.success() {
0
} else {
let stderr = String::from_utf8(stderr).expect("String::from_utf8 failed...");
if stderr.is_empty() && status.code() != Some(2) {
0
} else {
eprintln!(
"=> Errors for `{}` (error code: {}) <=",
file.display(),
status.code().unwrap_or(-1)
);
eprintln!("{}", stderr);
stderr.lines().count()
}
}
}
// Returns the number of files read and the number of errors.
fn find_all_html_files(dir: &Path) -> (usize, usize) {
let mut files_read = 0;
let mut errors = 0;
for entry in walkdir::WalkDir::new(dir) {
let entry = entry.expect("failed to read file");
if !entry.file_type().is_file() {
continue;
}
let entry = entry.path();
if entry.extension().and_then(|s| s.to_str()) == Some("html") {
errors += check_html_file(&entry);
files_read += 1;
}
}
(files_read, errors)
}
fn main() -> Result<(), String> {
let args = env::args().collect::<Vec<_>>();
if args.len() != 2 {
return Err(format!("Usage: {} <doc folder>", args[0]));
}
println!("Running HTML checker...");
let (files_read, errors) = find_all_html_files(&Path::new(&args[1]));
println!("Done! Read {} files...", files_read);
if errors > 0 {
Err(format!("HTML check failed: {} errors", errors))
} else {
println!("No error found!");
Ok(())
}
}