From 83a2bc31b9f68d8ba5fe2854bf38df5e564c575b Mon Sep 17 00:00:00 2001 From: Guillaume Gomez <guillaume1.gomez@gmail.com> Date: Fri, 23 Apr 2021 16:43:18 +0200 Subject: [PATCH 1/3] Add new tool to check HTML: * Make html-checker run by default on rust compiler docs as well * Ensure html-checker is run on CI * Lazify tidy binary presence check --- Cargo.lock | 7 ++ Cargo.toml | 1 + src/bootstrap/builder.rs | 1 + src/bootstrap/doc.rs | 4 +- src/bootstrap/test.rs | 45 ++++++++- src/bootstrap/tool.rs | 1 + .../host-x86_64/x86_64-gnu-tools/Dockerfile | 3 +- src/tools/html-checker/Cargo.toml | 12 +++ src/tools/html-checker/main.rs | 96 +++++++++++++++++++ 9 files changed, 166 insertions(+), 4 deletions(-) create mode 100644 src/tools/html-checker/Cargo.toml create mode 100644 src/tools/html-checker/main.rs diff --git a/Cargo.lock b/Cargo.lock index 65ad130d559..d57d17a3581 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1578,6 +1578,13 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "html-checker" +version = "0.1.0" +dependencies = [ + "walkdir", +] + [[package]] name = "html5ever" version = "0.25.1" diff --git a/Cargo.toml b/Cargo.toml index 327afe35c2f..4c00a7dc99e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ members = [ "src/tools/unicode-table-generator", "src/tools/expand-yaml-anchors", "src/tools/jsondocck", + "src/tools/html-checker", ] exclude = [ diff --git a/src/bootstrap/builder.rs b/src/bootstrap/builder.rs index e2f605257bd..b4c5a2abc9c 100644 --- a/src/bootstrap/builder.rs +++ b/src/bootstrap/builder.rs @@ -450,6 +450,7 @@ impl<'a> Builder<'a> { test::RustdocTheme, test::RustdocUi, test::RustdocJson, + test::HtmlCheck, // Run bootstrap close to the end as it's unlikely to fail test::Bootstrap, // Run run-make last, since these won't pass without make on Windows diff --git a/src/bootstrap/doc.rs b/src/bootstrap/doc.rs index d2fabf9967f..634332df863 100644 --- a/src/bootstrap/doc.rs +++ b/src/bootstrap/doc.rs @@ -501,8 +501,8 @@ impl Step for Std { #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub struct Rustc { - stage: u32, - target: TargetSelection, + pub stage: u32, + pub target: TargetSelection, } impl Step for Rustc { diff --git a/src/bootstrap/test.rs b/src/bootstrap/test.rs index 92ac3b364f6..64b3ee7c359 100644 --- a/src/bootstrap/test.rs +++ b/src/bootstrap/test.rs @@ -9,7 +9,7 @@ use std::fmt; use std::fs; use std::iter; use std::path::{Path, PathBuf}; -use std::process::Command; +use std::process::{Command, Stdio}; use build_helper::{self, output, t}; @@ -161,6 +161,49 @@ You can skip linkcheck with --exclude src/tools/linkchecker" } } +fn check_if_tidy_is_installed() -> bool { + Command::new("tidy") + .arg("--version") + .stdout(Stdio::null()) + .status() + .map_or(false, |status| status.success()) +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub struct HtmlCheck { + target: TargetSelection, +} + +impl Step for HtmlCheck { + type Output = (); + const DEFAULT: bool = true; + const ONLY_HOSTS: bool = true; + + fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> { + let run = run.path("src/tools/html-checker"); + run.lazy_default_condition(Box::new(check_if_tidy_is_installed)) + } + + fn make_run(run: RunConfig<'_>) { + run.builder.ensure(HtmlCheck { target: run.target }); + } + + fn run(self, builder: &Builder<'_>) { + if !check_if_tidy_is_installed() { + eprintln!("not running HTML-check tool because `tidy` is missing"); + eprintln!( + "Note that `tidy` is not the in-tree `src/tools/tidy` but needs to be installed" + ); + panic!("Cannot run html-check tests"); + } + // Ensure that a few different kinds of documentation are available. + builder.default_doc(&[]); + builder.ensure(crate::doc::Rustc { target: self.target, stage: builder.top_stage }); + + try_run(builder, builder.tool_cmd(Tool::HtmlChecker).arg(builder.doc_out(self.target))); + } +} + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct Cargotest { stage: u32, diff --git a/src/bootstrap/tool.rs b/src/bootstrap/tool.rs index 9d75ad0918a..aa7fe658df3 100644 --- a/src/bootstrap/tool.rs +++ b/src/bootstrap/tool.rs @@ -376,6 +376,7 @@ bootstrap_tool!( ExpandYamlAnchors, "src/tools/expand-yaml-anchors", "expand-yaml-anchors"; LintDocs, "src/tools/lint-docs", "lint-docs"; JsonDocCk, "src/tools/jsondocck", "jsondocck"; + HtmlChecker, "src/tools/html-checker", "html-checker"; ); #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] diff --git a/src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile b/src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile index 35d274da673..faed1761fa4 100644 --- a/src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile +++ b/src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile @@ -12,7 +12,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cmake \ libssl-dev \ sudo \ - xz-utils + xz-utils \ + tidy # Install dependencies for chromium browser RUN apt-get install -y \ diff --git a/src/tools/html-checker/Cargo.toml b/src/tools/html-checker/Cargo.toml new file mode 100644 index 00000000000..fe35df823b6 --- /dev/null +++ b/src/tools/html-checker/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "html-checker" +version = "0.1.0" +authors = ["Guillaume Gomez <guillaume1.gomez@gmail.com>"] +edition = "2018" + +[[bin]] +name = "html-checker" +path = "main.rs" + +[dependencies] +walkdir = "2" diff --git a/src/tools/html-checker/main.rs b/src/tools/html-checker/main.rs new file mode 100644 index 00000000000..a93191191cc --- /dev/null +++ b/src/tools/html-checker/main.rs @@ -0,0 +1,96 @@ +use std::env; +use std::path::Path; +use std::process::{Command, Output}; + +fn check_html_file(file: &Path) -> usize { + let to_mute = &[ + // "disabled" on <link> or "autocomplete" on <select> emit this warning + "PROPRIETARY_ATTRIBUTE", + // It complains when multiple in the same page link to the same anchor for some reason... + "ANCHOR_NOT_UNIQUE", + // If a <span> contains only HTML elements and no text, it complains about it. + "TRIM_EMPTY_ELEMENT", + // FIXME: the three next warnings are about <pre> elements which are not supposed to + // contain HTML. The solution here would be to replace them with a <div> with + // "" + "MISSING_ENDTAG_BEFORE", + "INSERTING_TAG", + "DISCARDING_UNEXPECTED", + // FIXME: mdbook repeats the name attribute on <input>. When the fix is merged upstream, + // this warning can be used again. + "REPEATED_ATTRIBUTE", + // FIXME: mdbook uses "align" attribute on <td>, which is not allowed. + "MISMATCHED_ATTRIBUTE_WARN", + // FIXME: mdbook doesn't add "alt" attribute on images. + "MISSING_ATTRIBUTE", + // FIXME: mdbook doesn't escape `&` (in "&String" for example). + "UNKNOWN_ENTITY", + // Compiler docs have some inlined <style> in the markdown. + "MOVED_STYLE_TO_HEAD", + ]; + let to_mute_s = to_mute.join(","); + let mut command = Command::new("tidy"); + command + .arg("-errors") + .arg("-quiet") + .arg("--mute-id") // this option is useful in case we want to mute more warnings + .arg("yes") + .arg("--mute") + .arg(&to_mute_s) + .arg(file); + + let Output { status, stderr, .. } = command.output().expect("failed to run tidy command"); + if status.success() { + 0 + } else { + let stderr = String::from_utf8(stderr).expect("String::from_utf8 failed..."); + if stderr.is_empty() && status.code() != Some(2) { + 0 + } else { + eprintln!( + "=> Errors for `{}` (error code: {}) <=", + file.display(), + status.code().unwrap_or(-1) + ); + eprintln!("{}", stderr); + stderr.lines().count() + } + } +} + +// Returns the number of files read and the number of errors. +fn find_all_html_files(dir: &Path) -> (usize, usize) { + let mut files_read = 0; + let mut errors = 0; + + for entry in walkdir::WalkDir::new(dir) { + let entry = entry.expect("failed to read file"); + if !entry.file_type().is_file() { + continue; + } + let entry = entry.path(); + if entry.extension().and_then(|s| s.to_str()) == Some("html") { + errors += check_html_file(&entry); + files_read += 1; + } + } + (files_read, errors) +} + +fn main() -> Result<(), String> { + let args = env::args().collect::<Vec<_>>(); + if args.len() != 2 { + return Err(format!("Usage: {} <doc folder>", args[0])); + } + + println!("Running HTML checker..."); + + let (files_read, errors) = find_all_html_files(&Path::new(&args[1])); + println!("Done! Read {} files...", files_read); + if errors > 0 { + Err(format!("HTML check failed: {} errors", errors)) + } else { + println!("No error found!"); + Ok(()) + } +} From af37ed738a0b9b03c350ef225e9682f486dcbfad Mon Sep 17 00:00:00 2001 From: Guillaume Gomez <guillaume1.gomez@gmail.com> Date: Sat, 24 Apr 2021 17:25:45 +0200 Subject: [PATCH 2/3] Install tidy on x86_64-gnu-aux target to run html check --- src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile b/src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile index 7f1a5820e22..ee3cd092f4c 100644 --- a/src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile +++ b/src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile @@ -17,7 +17,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libgl1-mesa-dev \ llvm-dev \ libfreetype6-dev \ - libexpat1-dev + libexpat1-dev \ + tidy COPY scripts/sccache.sh /scripts/ RUN sh /scripts/sccache.sh From 785b705ae4cc0cc503a79c7bccfcf689bcc7820f Mon Sep 17 00:00:00 2001 From: Guillaume Gomez <guillaume1.gomez@gmail.com> Date: Thu, 3 Jun 2021 19:57:49 +0200 Subject: [PATCH 3/3] Only run HTML check on rustdoc generated items --- src/tools/html-checker/main.rs | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/tools/html-checker/main.rs b/src/tools/html-checker/main.rs index a93191191cc..bf2830254e8 100644 --- a/src/tools/html-checker/main.rs +++ b/src/tools/html-checker/main.rs @@ -11,22 +11,10 @@ fn check_html_file(file: &Path) -> usize { // If a <span> contains only HTML elements and no text, it complains about it. "TRIM_EMPTY_ELEMENT", // FIXME: the three next warnings are about <pre> elements which are not supposed to - // contain HTML. The solution here would be to replace them with a <div> with - // "" + // contain HTML. The solution here would be to replace them with a <div> "MISSING_ENDTAG_BEFORE", "INSERTING_TAG", "DISCARDING_UNEXPECTED", - // FIXME: mdbook repeats the name attribute on <input>. When the fix is merged upstream, - // this warning can be used again. - "REPEATED_ATTRIBUTE", - // FIXME: mdbook uses "align" attribute on <td>, which is not allowed. - "MISMATCHED_ATTRIBUTE_WARN", - // FIXME: mdbook doesn't add "alt" attribute on images. - "MISSING_ATTRIBUTE", - // FIXME: mdbook doesn't escape `&` (in "&String" for example). - "UNKNOWN_ENTITY", - // Compiler docs have some inlined <style> in the markdown. - "MOVED_STYLE_TO_HEAD", ]; let to_mute_s = to_mute.join(","); let mut command = Command::new("tidy"); @@ -58,12 +46,21 @@ fn check_html_file(file: &Path) -> usize { } } +const DOCS_TO_CHECK: &[&str] = + &["alloc", "core", "proc_macro", "implementors", "src", "std", "test"]; + // Returns the number of files read and the number of errors. fn find_all_html_files(dir: &Path) -> (usize, usize) { let mut files_read = 0; let mut errors = 0; - for entry in walkdir::WalkDir::new(dir) { + for entry in walkdir::WalkDir::new(dir).into_iter().filter_entry(|e| { + e.depth() != 1 + || e.file_name() + .to_str() + .map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s)) + .unwrap_or(false) + }) { let entry = entry.expect("failed to read file"); if !entry.file_type().is_file() { continue;