From 83a2bc31b9f68d8ba5fe2854bf38df5e564c575b Mon Sep 17 00:00:00 2001
From: Guillaume Gomez <guillaume1.gomez@gmail.com>
Date: Fri, 23 Apr 2021 16:43:18 +0200
Subject: [PATCH 1/3] Add new tool to check HTML:  * Make html-checker run by
 default on rust compiler docs as well  * Ensure html-checker is run on CI  *
 Lazify tidy binary presence check

---
 Cargo.lock                                    |  7 ++
 Cargo.toml                                    |  1 +
 src/bootstrap/builder.rs                      |  1 +
 src/bootstrap/doc.rs                          |  4 +-
 src/bootstrap/test.rs                         | 45 ++++++++-
 src/bootstrap/tool.rs                         |  1 +
 .../host-x86_64/x86_64-gnu-tools/Dockerfile   |  3 +-
 src/tools/html-checker/Cargo.toml             | 12 +++
 src/tools/html-checker/main.rs                | 96 +++++++++++++++++++
 9 files changed, 166 insertions(+), 4 deletions(-)
 create mode 100644 src/tools/html-checker/Cargo.toml
 create mode 100644 src/tools/html-checker/main.rs

diff --git a/Cargo.lock b/Cargo.lock
index 65ad130d559..d57d17a3581 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1578,6 +1578,13 @@ dependencies = [
  "winapi 0.3.9",
 ]
 
+[[package]]
+name = "html-checker"
+version = "0.1.0"
+dependencies = [
+ "walkdir",
+]
+
 [[package]]
 name = "html5ever"
 version = "0.25.1"
diff --git a/Cargo.toml b/Cargo.toml
index 327afe35c2f..4c00a7dc99e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -34,6 +34,7 @@ members = [
   "src/tools/unicode-table-generator",
   "src/tools/expand-yaml-anchors",
   "src/tools/jsondocck",
+  "src/tools/html-checker",
 ]
 
 exclude = [
diff --git a/src/bootstrap/builder.rs b/src/bootstrap/builder.rs
index e2f605257bd..b4c5a2abc9c 100644
--- a/src/bootstrap/builder.rs
+++ b/src/bootstrap/builder.rs
@@ -450,6 +450,7 @@ impl<'a> Builder<'a> {
                 test::RustdocTheme,
                 test::RustdocUi,
                 test::RustdocJson,
+                test::HtmlCheck,
                 // Run bootstrap close to the end as it's unlikely to fail
                 test::Bootstrap,
                 // Run run-make last, since these won't pass without make on Windows
diff --git a/src/bootstrap/doc.rs b/src/bootstrap/doc.rs
index d2fabf9967f..634332df863 100644
--- a/src/bootstrap/doc.rs
+++ b/src/bootstrap/doc.rs
@@ -501,8 +501,8 @@ impl Step for Std {
 
 #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
 pub struct Rustc {
-    stage: u32,
-    target: TargetSelection,
+    pub stage: u32,
+    pub target: TargetSelection,
 }
 
 impl Step for Rustc {
diff --git a/src/bootstrap/test.rs b/src/bootstrap/test.rs
index 92ac3b364f6..64b3ee7c359 100644
--- a/src/bootstrap/test.rs
+++ b/src/bootstrap/test.rs
@@ -9,7 +9,7 @@ use std::fmt;
 use std::fs;
 use std::iter;
 use std::path::{Path, PathBuf};
-use std::process::Command;
+use std::process::{Command, Stdio};
 
 use build_helper::{self, output, t};
 
@@ -161,6 +161,49 @@ You can skip linkcheck with --exclude src/tools/linkchecker"
     }
 }
 
+fn check_if_tidy_is_installed() -> bool {
+    Command::new("tidy")
+        .arg("--version")
+        .stdout(Stdio::null())
+        .status()
+        .map_or(false, |status| status.success())
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub struct HtmlCheck {
+    target: TargetSelection,
+}
+
+impl Step for HtmlCheck {
+    type Output = ();
+    const DEFAULT: bool = true;
+    const ONLY_HOSTS: bool = true;
+
+    fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
+        let run = run.path("src/tools/html-checker");
+        run.lazy_default_condition(Box::new(check_if_tidy_is_installed))
+    }
+
+    fn make_run(run: RunConfig<'_>) {
+        run.builder.ensure(HtmlCheck { target: run.target });
+    }
+
+    fn run(self, builder: &Builder<'_>) {
+        if !check_if_tidy_is_installed() {
+            eprintln!("not running HTML-check tool because `tidy` is missing");
+            eprintln!(
+                "Note that `tidy` is not the in-tree `src/tools/tidy` but needs to be installed"
+            );
+            panic!("Cannot run html-check tests");
+        }
+        // Ensure that a few different kinds of documentation are available.
+        builder.default_doc(&[]);
+        builder.ensure(crate::doc::Rustc { target: self.target, stage: builder.top_stage });
+
+        try_run(builder, builder.tool_cmd(Tool::HtmlChecker).arg(builder.doc_out(self.target)));
+    }
+}
+
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
 pub struct Cargotest {
     stage: u32,
diff --git a/src/bootstrap/tool.rs b/src/bootstrap/tool.rs
index 9d75ad0918a..aa7fe658df3 100644
--- a/src/bootstrap/tool.rs
+++ b/src/bootstrap/tool.rs
@@ -376,6 +376,7 @@ bootstrap_tool!(
     ExpandYamlAnchors, "src/tools/expand-yaml-anchors", "expand-yaml-anchors";
     LintDocs, "src/tools/lint-docs", "lint-docs";
     JsonDocCk, "src/tools/jsondocck", "jsondocck";
+    HtmlChecker, "src/tools/html-checker", "html-checker";
 );
 
 #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]
diff --git a/src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile b/src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile
index 35d274da673..faed1761fa4 100644
--- a/src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile
+++ b/src/ci/docker/host-x86_64/x86_64-gnu-tools/Dockerfile
@@ -12,7 +12,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   cmake \
   libssl-dev \
   sudo \
-  xz-utils
+  xz-utils \
+  tidy
 
 # Install dependencies for chromium browser
 RUN apt-get install -y \
diff --git a/src/tools/html-checker/Cargo.toml b/src/tools/html-checker/Cargo.toml
new file mode 100644
index 00000000000..fe35df823b6
--- /dev/null
+++ b/src/tools/html-checker/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "html-checker"
+version = "0.1.0"
+authors = ["Guillaume Gomez <guillaume1.gomez@gmail.com>"]
+edition = "2018"
+
+[[bin]]
+name = "html-checker"
+path = "main.rs"
+
+[dependencies]
+walkdir = "2"
diff --git a/src/tools/html-checker/main.rs b/src/tools/html-checker/main.rs
new file mode 100644
index 00000000000..a93191191cc
--- /dev/null
+++ b/src/tools/html-checker/main.rs
@@ -0,0 +1,96 @@
+use std::env;
+use std::path::Path;
+use std::process::{Command, Output};
+
+fn check_html_file(file: &Path) -> usize {
+    let to_mute = &[
+        // "disabled" on <link> or "autocomplete" on <select> emit this warning
+        "PROPRIETARY_ATTRIBUTE",
+        // It complains when multiple in the same page link to the same anchor for some reason...
+        "ANCHOR_NOT_UNIQUE",
+        // If a <span> contains only HTML elements and no text, it complains about it.
+        "TRIM_EMPTY_ELEMENT",
+        // FIXME: the three next warnings are about <pre> elements which are not supposed to
+        //        contain HTML. The solution here would be to replace them with a <div> with
+        //        ""
+        "MISSING_ENDTAG_BEFORE",
+        "INSERTING_TAG",
+        "DISCARDING_UNEXPECTED",
+        // FIXME: mdbook repeats the name attribute on <input>. When the fix is merged upstream,
+        //        this warning can be used again.
+        "REPEATED_ATTRIBUTE",
+        // FIXME: mdbook uses "align" attribute on <td>, which is not allowed.
+        "MISMATCHED_ATTRIBUTE_WARN",
+        // FIXME: mdbook doesn't add "alt" attribute on images.
+        "MISSING_ATTRIBUTE",
+        // FIXME: mdbook doesn't escape `&` (in "&String" for example).
+        "UNKNOWN_ENTITY",
+        // Compiler docs have some inlined <style> in the markdown.
+        "MOVED_STYLE_TO_HEAD",
+    ];
+    let to_mute_s = to_mute.join(",");
+    let mut command = Command::new("tidy");
+    command
+        .arg("-errors")
+        .arg("-quiet")
+        .arg("--mute-id") // this option is useful in case we want to mute more warnings
+        .arg("yes")
+        .arg("--mute")
+        .arg(&to_mute_s)
+        .arg(file);
+
+    let Output { status, stderr, .. } = command.output().expect("failed to run tidy command");
+    if status.success() {
+        0
+    } else {
+        let stderr = String::from_utf8(stderr).expect("String::from_utf8 failed...");
+        if stderr.is_empty() && status.code() != Some(2) {
+            0
+        } else {
+            eprintln!(
+                "=> Errors for `{}` (error code: {}) <=",
+                file.display(),
+                status.code().unwrap_or(-1)
+            );
+            eprintln!("{}", stderr);
+            stderr.lines().count()
+        }
+    }
+}
+
+// Returns the number of files read and the number of errors.
+fn find_all_html_files(dir: &Path) -> (usize, usize) {
+    let mut files_read = 0;
+    let mut errors = 0;
+
+    for entry in walkdir::WalkDir::new(dir) {
+        let entry = entry.expect("failed to read file");
+        if !entry.file_type().is_file() {
+            continue;
+        }
+        let entry = entry.path();
+        if entry.extension().and_then(|s| s.to_str()) == Some("html") {
+            errors += check_html_file(&entry);
+            files_read += 1;
+        }
+    }
+    (files_read, errors)
+}
+
+fn main() -> Result<(), String> {
+    let args = env::args().collect::<Vec<_>>();
+    if args.len() != 2 {
+        return Err(format!("Usage: {} <doc folder>", args[0]));
+    }
+
+    println!("Running HTML checker...");
+
+    let (files_read, errors) = find_all_html_files(&Path::new(&args[1]));
+    println!("Done! Read {} files...", files_read);
+    if errors > 0 {
+        Err(format!("HTML check failed: {} errors", errors))
+    } else {
+        println!("No error found!");
+        Ok(())
+    }
+}

From af37ed738a0b9b03c350ef225e9682f486dcbfad Mon Sep 17 00:00:00 2001
From: Guillaume Gomez <guillaume1.gomez@gmail.com>
Date: Sat, 24 Apr 2021 17:25:45 +0200
Subject: [PATCH 2/3] Install tidy on x86_64-gnu-aux target to run html check

---
 src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile b/src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile
index 7f1a5820e22..ee3cd092f4c 100644
--- a/src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile
+++ b/src/ci/docker/host-x86_64/x86_64-gnu-aux/Dockerfile
@@ -17,7 +17,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   libgl1-mesa-dev \
   llvm-dev \
   libfreetype6-dev \
-  libexpat1-dev
+  libexpat1-dev \
+  tidy
 
 COPY scripts/sccache.sh /scripts/
 RUN sh /scripts/sccache.sh

From 785b705ae4cc0cc503a79c7bccfcf689bcc7820f Mon Sep 17 00:00:00 2001
From: Guillaume Gomez <guillaume1.gomez@gmail.com>
Date: Thu, 3 Jun 2021 19:57:49 +0200
Subject: [PATCH 3/3] Only run HTML check on rustdoc generated items

---
 src/tools/html-checker/main.rs | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/src/tools/html-checker/main.rs b/src/tools/html-checker/main.rs
index a93191191cc..bf2830254e8 100644
--- a/src/tools/html-checker/main.rs
+++ b/src/tools/html-checker/main.rs
@@ -11,22 +11,10 @@ fn check_html_file(file: &Path) -> usize {
         // If a <span> contains only HTML elements and no text, it complains about it.
         "TRIM_EMPTY_ELEMENT",
         // FIXME: the three next warnings are about <pre> elements which are not supposed to
-        //        contain HTML. The solution here would be to replace them with a <div> with
-        //        ""
+        //        contain HTML. The solution here would be to replace them with a <div>
         "MISSING_ENDTAG_BEFORE",
         "INSERTING_TAG",
         "DISCARDING_UNEXPECTED",
-        // FIXME: mdbook repeats the name attribute on <input>. When the fix is merged upstream,
-        //        this warning can be used again.
-        "REPEATED_ATTRIBUTE",
-        // FIXME: mdbook uses "align" attribute on <td>, which is not allowed.
-        "MISMATCHED_ATTRIBUTE_WARN",
-        // FIXME: mdbook doesn't add "alt" attribute on images.
-        "MISSING_ATTRIBUTE",
-        // FIXME: mdbook doesn't escape `&` (in "&String" for example).
-        "UNKNOWN_ENTITY",
-        // Compiler docs have some inlined <style> in the markdown.
-        "MOVED_STYLE_TO_HEAD",
     ];
     let to_mute_s = to_mute.join(",");
     let mut command = Command::new("tidy");
@@ -58,12 +46,21 @@ fn check_html_file(file: &Path) -> usize {
     }
 }
 
+const DOCS_TO_CHECK: &[&str] =
+    &["alloc", "core", "proc_macro", "implementors", "src", "std", "test"];
+
 // Returns the number of files read and the number of errors.
 fn find_all_html_files(dir: &Path) -> (usize, usize) {
     let mut files_read = 0;
     let mut errors = 0;
 
-    for entry in walkdir::WalkDir::new(dir) {
+    for entry in walkdir::WalkDir::new(dir).into_iter().filter_entry(|e| {
+        e.depth() != 1
+            || e.file_name()
+                .to_str()
+                .map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s))
+                .unwrap_or(false)
+    }) {
         let entry = entry.expect("failed to read file");
         if !entry.file_type().is_file() {
             continue;