generate-copyright: Produce HTML, not Markdown

This format works better with large amounts of structured data.

We also mark which deps are in the stdlib
This commit is contained in:
Jonathan Pallant 2024-07-29 11:41:02 +01:00
parent ba0d6c9739
commit 204e3eadf1
No known key found for this signature in database
3 changed files with 151 additions and 102 deletions

View File

@ -212,7 +212,7 @@ impl Step for GenerateCopyright {
let license_metadata = builder.ensure(CollectLicenseMetadata);
// Temporary location, it will be moved to the proper one once it's accurate.
let dest = builder.out.join("COPYRIGHT.md");
let dest = builder.out.join("COPYRIGHT.html");
let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
cmd.env("LICENSE_METADATA", &license_metadata);

View File

@ -1,6 +1,6 @@
//! Gets metadata about a workspace from Cargo
use std::collections::{BTreeMap, BTreeSet};
use std::collections::BTreeMap;
use std::ffi::{OsStr, OsString};
use std::path::Path;
@ -23,13 +23,18 @@ pub enum Error {
RunningVendor,
}
/// Describes one of our dependencies
/// Uniquely describes a package on crates.io
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Dependency {
pub struct Package {
/// The name of the package
pub name: String,
/// The version number
pub version: String,
}
/// Extra data about a package
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct PackageMetadata {
/// The license it is under
pub license: String,
/// The list of authors from the package metadata
@ -40,20 +45,44 @@ pub struct Dependency {
pub notices: BTreeMap<OsString, String>,
}
/// Use `cargo` to get a list of dependencies and their license data.
/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data.
///
/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
/// grab the license files.
///
/// Any dependency with a path beginning with `root_path` is ignored, as we
/// assume `reuse` has covered it already.
pub fn get(
pub fn get_metadata_and_notices(
cargo: &Path,
dest: &Path,
root_path: &Path,
manifest_paths: &[&Path],
) -> Result<BTreeSet<Dependency>, Error> {
let mut temp_set = BTreeSet::new();
) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
let mut output = get_metadata(cargo, root_path, manifest_paths)?;
// Now do a cargo-vendor and grab everything
let vendor_path = dest.join("vendor");
println!("Vendoring deps into {}...", vendor_path.display());
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
// Now for each dependency we found, go and grab any important looking files
for (package, metadata) in output.iter_mut() {
load_important_files(package, metadata, &vendor_path)?;
}
Ok(output)
}
/// Use `cargo metadata` to get a list of dependencies and their license data.
///
/// Any dependency with a path beginning with `root_path` is ignored, as we
/// assume `reuse` has covered it already.
pub fn get_metadata(
cargo: &Path,
root_path: &Path,
manifest_paths: &[&Path],
) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
let mut output = BTreeMap::new();
// Look at the metadata for each manifest
for manifest_path in manifest_paths {
if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {
@ -71,7 +100,7 @@ pub fn get(
.and_then(|v| v.as_str())
.map(Path::new)
.ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?;
if manifest_path.starts_with(&root_path) {
if manifest_path.starts_with(root_path) {
// it's an in-tree dependency and reuse covers it
continue;
}
@ -93,28 +122,14 @@ pub fn get(
.ok_or_else(|| Error::MissingJsonElement("package.authors"))?;
let authors: Vec<String> =
authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect();
temp_set.insert(Dependency {
name: name.to_owned(),
version: version.to_owned(),
license: license.to_owned(),
authors,
notices: BTreeMap::new(),
});
let package = Package { name: name.to_owned(), version: version.to_owned() };
output.insert(
package.clone(),
PackageMetadata { license: license.to_owned(), authors, notices: BTreeMap::new() },
);
}
}
// Now do a cargo-vendor and grab everything
let vendor_path = dest.join("vendor");
println!("Vendoring deps into {}...", vendor_path.display());
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
// Now for each dependency we found, go and grab any important looking files
let mut output = BTreeSet::new();
for mut dep in temp_set {
load_important_files(&mut dep, &vendor_path)?;
output.insert(dep);
}
Ok(output)
}
@ -128,7 +143,7 @@ fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result<serde_json::V
.arg(manifest_path)
.env("RUSTC_BOOTSTRAP", "1")
.output()
.map_err(|e| Error::LaunchingMetadata(e))?;
.map_err(Error::LaunchingMetadata)?;
if !metadata_output.status.success() {
return Err(Error::GettingMetadata(
String::from_utf8(metadata_output.stderr).expect("UTF-8 output from cargo"),
@ -151,7 +166,7 @@ fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Resu
}
vendor_command.arg(dest);
let vendor_status = vendor_command.status().map_err(|e| Error::LaunchingVendor(e))?;
let vendor_status = vendor_command.status().map_err(Error::LaunchingVendor)?;
if !vendor_status.success() {
return Err(Error::RunningVendor);
@ -164,8 +179,12 @@ fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Resu
///
/// Maybe one-day Cargo.toml will contain enough information that we don't need
/// to do this manual scraping.
fn load_important_files(dep: &mut Dependency, vendor_root: &Path) -> Result<(), Error> {
let name_version = format!("{}-{}", dep.name, dep.version);
fn load_important_files(
package: &Package,
dep: &mut PackageMetadata,
vendor_root: &Path,
) -> Result<(), Error> {
let name_version = format!("{}-{}", package.name, package.version);
println!("Scraping notices for {}...", name_version);
let dep_vendor_path = vendor_root.join(name_version);
for entry in std::fs::read_dir(dep_vendor_path)? {

View File

@ -1,3 +1,4 @@
use std::collections::BTreeMap;
use std::io::Write;
use std::path::{Path, PathBuf};
@ -5,6 +6,33 @@ use anyhow::Error;
mod cargo_metadata;
static TOP_BOILERPLATE: &str = r##"
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Copyright notices for The Rust Toolchain</title>
</head>
<body>
<h1>Copyright notices for The Rust Toolchain</h1>
<p>This file describes the copyright and licensing information for the source
code within The Rust Project git tree, and the third-party dependencies used
when building the Rust toolchain (including the Rust Standard Library).</p>
<h2>Table of Contents</h2>
<ul>
<li><a href="#in-tree-files">In-tree files</a></li>
<li><a href="#out-of-tree-dependencies">Out-of-tree dependencies</a></li>
</ul>
"##;
static BOTTOM_BOILERPLATE: &str = r#"
</body>
</html>
"#;
/// The entry point to the binary.
///
/// You should probably let `bootstrap` execute this program instead of running it directly.
@ -26,43 +54,28 @@ fn main() -> Result<(), Error> {
Path::new("./library/std/Cargo.toml"),
];
let collected_cargo_metadata =
cargo_metadata::get(&cargo, &out_dir, &root_path, &workspace_paths)?;
cargo_metadata::get_metadata_and_notices(&cargo, &out_dir, &root_path, &workspace_paths)?;
let stdlib_set =
cargo_metadata::get_metadata(&cargo, &root_path, &[Path::new("./library/std/Cargo.toml")])?;
let mut buffer = Vec::new();
writeln!(buffer, "# COPYRIGHT for Rust")?;
writeln!(buffer)?;
writeln!(buffer, "{}", TOP_BOILERPLATE)?;
writeln!(
buffer,
"This file describes the copyright and licensing information for the source code within The Rust Project git tree, and the third-party dependencies used when building the Rust toolchain (including the Rust Standard Library)"
r#"<h2 id="in-tree-files">In-tree files</h2><p>The following licenses cover the in-tree source files that were used in this release:</p>"#
)?;
writeln!(buffer)?;
writeln!(buffer, "## Table of Contents")?;
writeln!(buffer)?;
writeln!(buffer, "* [In-tree files](#in-tree-files)")?;
writeln!(buffer, "* [Out-of-tree files](#out-of-tree-files)")?;
// writeln!(buffer, "* [License Texts](#license-texts)")?;
writeln!(buffer)?;
render_tree_recursive(&collected_tree_metadata.files, &mut buffer)?;
writeln!(buffer, "## In-tree files")?;
writeln!(buffer)?;
writeln!(
buffer,
"The following licenses cover the in-tree source files that were used in this release:"
r#"<h2 id="out-of-tree-dependencies">Out-of-tree dependencies</h2><p>The following licenses cover the out-of-tree crates that were used in this release:</p>"#
)?;
writeln!(buffer)?;
render_tree_recursive(&collected_tree_metadata.files, &mut buffer, 0)?;
render_deps(&collected_cargo_metadata, &stdlib_set, &mut buffer)?;
writeln!(buffer)?;
writeln!(buffer, "## Out-of-tree files")?;
writeln!(buffer)?;
writeln!(
buffer,
"The following licenses cover the out-of-tree crates that were used in this release:"
)?;
writeln!(buffer)?;
render_deps(collected_cargo_metadata.iter(), &mut buffer)?;
writeln!(buffer, "{}", BOTTOM_BOILERPLATE)?;
std::fs::write(&dest_file, &buffer)?;
@ -71,56 +84,51 @@ fn main() -> Result<(), Error> {
/// Recursively draw the tree of files/folders we found on disk and their licenses, as
/// markdown, into the given Vec.
fn render_tree_recursive(node: &Node, buffer: &mut Vec<u8>, depth: usize) -> Result<(), Error> {
let prefix = std::iter::repeat("> ").take(depth + 1).collect::<String>();
fn render_tree_recursive(node: &Node, buffer: &mut Vec<u8>) -> Result<(), Error> {
writeln!(buffer, r#"<div style="border:1px solid black; padding: 5px;">"#)?;
match node {
Node::Root { children } => {
for child in children {
render_tree_recursive(child, buffer, depth)?;
render_tree_recursive(child, buffer)?;
}
}
Node::Directory { name, children, license } => {
render_tree_license(&prefix, std::iter::once(name), license.as_ref(), buffer)?;
render_tree_license(std::iter::once(name), license.as_ref(), buffer)?;
if !children.is_empty() {
writeln!(buffer, "{prefix}")?;
writeln!(buffer, "{prefix}*Exceptions:*")?;
writeln!(buffer, "<p><b>Exceptions:</b></p>")?;
for child in children {
writeln!(buffer, "{prefix}")?;
render_tree_recursive(child, buffer, depth + 1)?;
render_tree_recursive(child, buffer)?;
}
}
}
Node::Group { files, directories, license } => {
render_tree_license(
&prefix,
directories.iter().chain(files.iter()),
Some(license),
buffer,
)?;
render_tree_license(directories.iter().chain(files.iter()), Some(license), buffer)?;
}
Node::File { name, license } => {
render_tree_license(&prefix, std::iter::once(name), Some(license), buffer)?;
render_tree_license(std::iter::once(name), Some(license), buffer)?;
}
}
writeln!(buffer, "</div>")?;
Ok(())
}
/// Draw a series of sibling files/folders, as markdown, into the given Vec.
fn render_tree_license<'a>(
prefix: &str,
names: impl Iterator<Item = &'a String>,
license: Option<&License>,
buffer: &mut Vec<u8>,
) -> Result<(), Error> {
writeln!(buffer, "<p><b>File/Directory:</b> ")?;
for name in names {
writeln!(buffer, "{prefix}**`{name}`** ")?;
writeln!(buffer, "<code>{name}</code>")?;
}
writeln!(buffer, "</p>")?;
if let Some(license) = license {
writeln!(buffer, "{prefix}License: `{}`", license.spdx)?;
writeln!(buffer, "<p><b>License:</b> {}</p>", license.spdx)?;
for copyright in license.copyright.iter() {
writeln!(buffer, "{prefix}Copyright: {copyright}")?;
writeln!(buffer, "<p><b>Copyright:</b> {copyright}</p>")?;
}
}
@ -128,36 +136,48 @@ fn render_tree_license<'a>(
}
/// Render a list of out-of-tree dependencies as markdown into the given Vec.
fn render_deps<'a, 'b>(
deps: impl Iterator<Item = &'a cargo_metadata::Dependency>,
buffer: &'b mut Vec<u8>,
fn render_deps(
all_deps: &BTreeMap<cargo_metadata::Package, cargo_metadata::PackageMetadata>,
stdlib_set: &BTreeMap<cargo_metadata::Package, cargo_metadata::PackageMetadata>,
buffer: &mut Vec<u8>,
) -> Result<(), Error> {
for dep in deps {
let authors_list = dep.authors.join(", ").replace("<", "\\<").replace(">", "\\>");
let url = format!("https://crates.io/crates/{}/{}", dep.name, dep.version);
for (package, metadata) in all_deps {
let authors_list = if metadata.authors.is_empty() {
"None Specified".to_owned()
} else {
metadata.authors.join(", ")
};
let url = format!("https://crates.io/crates/{}/{}", package.name, package.version);
writeln!(buffer)?;
writeln!(
buffer,
"### [{name} {version}]({url})",
name = dep.name,
version = dep.version,
url = url,
r#"<h3>📦 {name}-{version}</h3>"#,
name = package.name,
version = package.version,
)?;
writeln!(buffer)?;
writeln!(buffer, "* Authors: {}", authors_list)?;
writeln!(buffer, "* License: {}", dep.license)?;
for (name, contents) in &dep.notices {
writeln!(buffer)?;
writeln!(buffer, "#### {}", name.to_string_lossy())?;
writeln!(buffer)?;
writeln!(buffer, "<details><summary>Click to expand</summary>")?;
writeln!(buffer)?;
writeln!(buffer, "```")?;
writeln!(buffer, "{}", contents)?;
writeln!(buffer, "```")?;
writeln!(buffer)?;
writeln!(buffer, "</details>")?;
writeln!(buffer, r#"<p><b>URL:</b> <a href="{url}">{url}</a></p>"#,)?;
writeln!(
buffer,
"<p><b>In libstd:</b> {}</p>",
if stdlib_set.contains_key(package) { "Yes" } else { "No" }
)?;
writeln!(buffer, "<p><b>Authors:</b> {}</p>", escape_html(&authors_list))?;
writeln!(buffer, "<p><b>License:</b> {}</p>", escape_html(&metadata.license))?;
writeln!(buffer, "<p><b>Notices:</b> ")?;
if metadata.notices.is_empty() {
writeln!(buffer, "None")?;
} else {
for (name, contents) in &metadata.notices {
writeln!(
buffer,
"<details><summary><code>{}</code></summary>",
name.to_string_lossy()
)?;
writeln!(buffer, "<pre>\n{}\n</pre>", contents)?;
writeln!(buffer, "</details>")?;
}
}
writeln!(buffer, "</p>")?;
}
Ok(())
}
@ -192,3 +212,13 @@ fn env_path(var: &str) -> Result<PathBuf, Error> {
anyhow::bail!("missing environment variable {var}")
}
}
/// Escapes any invalid HTML characters
fn escape_html(input: &str) -> String {
static MAPPING: [(char, &str); 3] = [('&', "&amp;"), ('<', "&lt;"), ('>', "&gt;")];
let mut output = input.to_owned();
for (ch, s) in &MAPPING {
output = output.replace(*ch, s);
}
output
}