diff --git a/Cargo.lock b/Cargo.lock index eea4c28ec52..fb762cd2fe6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1406,8 +1406,11 @@ name = "generate-copyright" version = "0.1.0" dependencies = [ "anyhow", + "cargo_metadata 0.18.1", + "rinja", "serde", "serde_json", + "thiserror", ] [[package]] @@ -3094,7 +3097,10 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d3762e3740cdbf2fd2be465cc2c26d643ad17353cc2e0223d211c1b096118bd" dependencies = [ + "humansize", "itoa", + "num-traits", + "percent-encoding", "rinja_derive", ] diff --git a/REUSE.toml b/REUSE.toml index 1a30d8016c9..efd70555247 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -163,7 +163,7 @@ SPDX-License-Identifier = "MIT OR Apache-2.0" path = "src/llvm-project/**" precedence = "override" SPDX-FileCopyrightText = [ - "2003-2019 by the contributors listed in [CREDITS.TXT](https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)", + "2003-2019 by the contributors listed in CREDITS.TXT (https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)", "2010 Apple Inc", "2003-2019 University of Illinois at Urbana-Champaign.", ] diff --git a/src/bootstrap/src/core/build_steps/run.rs b/src/bootstrap/src/core/build_steps/run.rs index fde1693646a..65d635c0bd6 100644 --- a/src/bootstrap/src/core/build_steps/run.rs +++ b/src/bootstrap/src/core/build_steps/run.rs @@ -212,11 +212,13 @@ impl Step for GenerateCopyright { let license_metadata = builder.ensure(CollectLicenseMetadata); // Temporary location, it will be moved to the proper one once it's accurate. - let dest = builder.out.join("COPYRIGHT.md"); + let dest = builder.out.join("COPYRIGHT.html"); let mut cmd = builder.tool_cmd(Tool::GenerateCopyright); cmd.env("LICENSE_METADATA", &license_metadata); cmd.env("DEST", &dest); + cmd.env("OUT_DIR", &builder.out); + cmd.env("CARGO", &builder.initial_cargo); cmd.run(builder); dest diff --git a/src/tools/collect-license-metadata/Cargo.toml b/src/tools/collect-license-metadata/Cargo.toml index d0820cfc2a0..edf9e5c5393 100644 --- a/src/tools/collect-license-metadata/Cargo.toml +++ b/src/tools/collect-license-metadata/Cargo.toml @@ -2,6 +2,8 @@ name = "collect-license-metadata" version = "0.1.0" edition = "2021" +description = "Runs the reuse tool and caches the output, so rust toolchain devs don't need to have reuse installed" +license = "MIT OR Apache-2.0" [dependencies] anyhow = "1.0.65" diff --git a/src/tools/collect-license-metadata/src/main.rs b/src/tools/collect-license-metadata/src/main.rs index ca6aa01d78c..dce36bb17b6 100644 --- a/src/tools/collect-license-metadata/src/main.rs +++ b/src/tools/collect-license-metadata/src/main.rs @@ -8,6 +8,11 @@ use anyhow::Error; use crate::licenses::LicensesInterner; +/// The entry point to the binary. +/// +/// You should probably let `bootstrap` execute this program instead of running it directly. +/// +/// Run `x.py run collect-license-metadata` fn main() -> Result<(), Error> { let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into(); let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into(); diff --git a/src/tools/generate-copyright/Cargo.toml b/src/tools/generate-copyright/Cargo.toml index 899ef0f8a6c..404101abd41 100644 --- a/src/tools/generate-copyright/Cargo.toml +++ b/src/tools/generate-copyright/Cargo.toml @@ -2,10 +2,14 @@ name = "generate-copyright" version = "0.1.0" edition = "2021" +description = "Produces a manifest of all the copyrighted materials in the Rust Toolchain" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] anyhow = "1.0.65" +cargo_metadata = "0.18.1" +rinja = "0.3.0" serde = { version = "1.0.147", features = ["derive"] } serde_json = "1.0.85" +thiserror = "1" diff --git a/src/tools/generate-copyright/src/cargo_metadata.rs b/src/tools/generate-copyright/src/cargo_metadata.rs new file mode 100644 index 00000000000..c85e4aa371a --- /dev/null +++ b/src/tools/generate-copyright/src/cargo_metadata.rs @@ -0,0 +1,191 @@ +//! Gets metadata about a workspace from Cargo + +use std::collections::BTreeMap; +use std::ffi::OsStr; +use std::path::{Path, PathBuf}; + +/// Describes how this module can fail +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("I/O Error: {0:?}")] + Io(#[from] std::io::Error), + #[error("Failed get output from cargo-metadata: {0:?}")] + GettingMetadata(#[from] cargo_metadata::Error), + #[error("Failed to run cargo vendor: {0:?}")] + LaunchingVendor(std::io::Error), + #[error("Failed to complete cargo vendor")] + RunningVendor, + #[error("Bad path {0:?} whilst scraping files")] + Scraping(PathBuf), +} + +/// Uniquely describes a package on crates.io +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Package { + /// The name of the package + pub name: String, + /// The version number + pub version: String, +} + +/// Extra data about a package +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct PackageMetadata { + /// The license it is under + pub license: String, + /// The list of authors from the package metadata + pub authors: Vec, + /// A list of important files from the package, with their contents. + /// + /// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive. + pub notices: BTreeMap, + /// If this is true, this dep is in the Rust Standard Library + pub is_in_libstd: Option, +} + +/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data. +/// +/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can +/// grab the license files. +/// +/// Any dependency with a path beginning with `root_path` is ignored, as we +/// assume `reuse` has covered it already. +pub fn get_metadata_and_notices( + cargo: &Path, + dest: &Path, + root_path: &Path, + manifest_paths: &[&Path], +) -> Result, Error> { + let mut output = get_metadata(cargo, root_path, manifest_paths)?; + + // Now do a cargo-vendor and grab everything + let vendor_path = dest.join("vendor"); + println!("Vendoring deps into {}...", vendor_path.display()); + run_cargo_vendor(cargo, &vendor_path, manifest_paths)?; + + // Now for each dependency we found, go and grab any important looking files + for (package, metadata) in output.iter_mut() { + load_important_files(package, metadata, &vendor_path)?; + } + + Ok(output) +} + +/// Use `cargo metadata` to get a list of dependencies and their license data. +/// +/// Any dependency with a path beginning with `root_path` is ignored, as we +/// assume `reuse` has covered it already. +pub fn get_metadata( + cargo: &Path, + root_path: &Path, + manifest_paths: &[&Path], +) -> Result, Error> { + let mut output = BTreeMap::new(); + // Look at the metadata for each manifest + for manifest_path in manifest_paths { + if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) { + panic!("cargo_manifest::get requires a path to a Cargo.toml file"); + } + let metadata = cargo_metadata::MetadataCommand::new() + .cargo_path(cargo) + .env("RUSTC_BOOTSTRAP", "1") + .manifest_path(manifest_path) + .exec()?; + for package in metadata.packages { + let manifest_path = package.manifest_path.as_path(); + if manifest_path.starts_with(root_path) { + // it's an in-tree dependency and reuse covers it + continue; + } + // otherwise it's an out-of-tree dependency + let package_id = Package { name: package.name, version: package.version.to_string() }; + output.insert( + package_id, + PackageMetadata { + license: package.license.unwrap_or_else(|| String::from("Unspecified")), + authors: package.authors, + notices: BTreeMap::new(), + is_in_libstd: None, + }, + ); + } + } + + Ok(output) +} + +/// Run cargo-vendor, fetching into the given dir +fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> { + let mut vendor_command = std::process::Command::new(cargo); + vendor_command.env("RUSTC_BOOTSTRAP", "1"); + vendor_command.arg("vendor"); + vendor_command.arg("--quiet"); + vendor_command.arg("--versioned-dirs"); + for manifest_path in manifest_paths { + vendor_command.arg("-s"); + vendor_command.arg(manifest_path); + } + vendor_command.arg(dest); + + let vendor_status = vendor_command.status().map_err(Error::LaunchingVendor)?; + + if !vendor_status.success() { + return Err(Error::RunningVendor); + } + + Ok(()) +} + +/// Add important files off disk into this dependency. +/// +/// Maybe one-day Cargo.toml will contain enough information that we don't need +/// to do this manual scraping. +fn load_important_files( + package: &Package, + dep: &mut PackageMetadata, + vendor_root: &Path, +) -> Result<(), Error> { + let name_version = format!("{}-{}", package.name, package.version); + println!("Scraping notices for {}...", name_version); + let dep_vendor_path = vendor_root.join(name_version); + for entry in std::fs::read_dir(dep_vendor_path)? { + let entry = entry?; + let metadata = entry.metadata()?; + let path = entry.path(); + let Some(filename) = path.file_name() else { + return Err(Error::Scraping(path)); + }; + let lc_filename = filename.to_ascii_lowercase(); + let lc_filename_str = lc_filename.to_string_lossy(); + let mut keep = false; + for m in ["copyright", "licence", "license", "author", "notice"] { + if lc_filename_str.contains(m) { + keep = true; + break; + } + } + if keep { + if metadata.is_dir() { + for inner_entry in std::fs::read_dir(entry.path())? { + let inner_entry = inner_entry?; + if inner_entry.metadata()?.is_file() { + let inner_filename = inner_entry.file_name(); + let inner_filename_str = inner_filename.to_string_lossy(); + let qualified_filename = + format!("{}/{}", lc_filename_str, inner_filename_str); + println!("Scraping {}", qualified_filename); + dep.notices.insert( + qualified_filename.to_string(), + std::fs::read_to_string(inner_entry.path())?, + ); + } + } + } else if metadata.is_file() { + let filename = filename.to_string_lossy(); + println!("Scraping {}", filename); + dep.notices.insert(filename.to_string(), std::fs::read_to_string(path)?); + } + } + } + Ok(()) +} diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs index dce1a558697..afa75d0d671 100644 --- a/src/tools/generate-copyright/src/main.rs +++ b/src/tools/generate-copyright/src/main.rs @@ -1,79 +1,70 @@ -use std::io::Write; -use std::path::PathBuf; +use std::collections::BTreeMap; +use std::path::{Path, PathBuf}; use anyhow::Error; +use rinja::Template; +mod cargo_metadata; + +#[derive(Template)] +#[template(path = "COPYRIGHT.html")] +struct CopyrightTemplate { + in_tree: Node, + dependencies: BTreeMap, +} + +/// The entry point to the binary. +/// +/// You should probably let `bootstrap` execute this program instead of running it directly. +/// +/// Run `x.py run generate-copyright` fn main() -> Result<(), Error> { - let dest = env_path("DEST")?; + let dest_file = env_path("DEST")?; + let out_dir = env_path("OUT_DIR")?; + let cargo = env_path("CARGO")?; let license_metadata = env_path("LICENSE_METADATA")?; - let metadata: Metadata = serde_json::from_slice(&std::fs::read(&license_metadata)?)?; + let collected_tree_metadata: Metadata = + serde_json::from_slice(&std::fs::read(&license_metadata)?)?; - let mut buffer = Vec::new(); - render_recursive(&metadata.files, &mut buffer, 0)?; + let root_path = std::path::absolute(".")?; + let workspace_paths = [ + Path::new("./Cargo.toml"), + Path::new("./src/tools/cargo/Cargo.toml"), + Path::new("./library/Cargo.toml"), + ]; + let mut collected_cargo_metadata = + cargo_metadata::get_metadata_and_notices(&cargo, &out_dir, &root_path, &workspace_paths)?; - std::fs::write(&dest, &buffer)?; - - Ok(()) -} - -fn render_recursive(node: &Node, buffer: &mut Vec, depth: usize) -> Result<(), Error> { - let prefix = std::iter::repeat("> ").take(depth + 1).collect::(); - - match node { - Node::Root { children } => { - for child in children { - render_recursive(child, buffer, depth)?; - } - } - Node::Directory { name, children, license } => { - render_license(&prefix, std::iter::once(name), license.as_ref(), buffer)?; - if !children.is_empty() { - writeln!(buffer, "{prefix}")?; - writeln!(buffer, "{prefix}*Exceptions:*")?; - for child in children { - writeln!(buffer, "{prefix}")?; - render_recursive(child, buffer, depth + 1)?; - } - } - } - Node::Group { files, directories, license } => { - render_license(&prefix, directories.iter().chain(files.iter()), Some(license), buffer)?; - } - Node::File { name, license } => { - render_license(&prefix, std::iter::once(name), Some(license), buffer)?; - } - } - - Ok(()) -} - -fn render_license<'a>( - prefix: &str, - names: impl Iterator, - license: Option<&License>, - buffer: &mut Vec, -) -> Result<(), Error> { - for name in names { - writeln!(buffer, "{prefix}**`{name}`** ")?; - } - if let Some(license) = license { - writeln!(buffer, "{prefix}License: `{}`", license.spdx)?; - for copyright in license.copyright.iter() { - writeln!(buffer, "{prefix}Copyright: {copyright}")?; - } - } + let stdlib_set = + cargo_metadata::get_metadata(&cargo, &root_path, &[Path::new("./library/std/Cargo.toml")])?; + + for (key, value) in collected_cargo_metadata.iter_mut() { + value.is_in_libstd = Some(stdlib_set.contains_key(key)); + } + + let template = CopyrightTemplate { + in_tree: collected_tree_metadata.files, + dependencies: collected_cargo_metadata, + }; + + let output = template.render()?; + + std::fs::write(&dest_file, output)?; Ok(()) } +/// Describes a tree of metadata for our filesystem tree #[derive(serde::Deserialize)] struct Metadata { files: Node, } -#[derive(serde::Deserialize)] +/// Describes one node in our metadata tree +#[derive(serde::Deserialize, rinja::Template)] #[serde(rename_all = "kebab-case", tag = "type")] +#[template(path = "Node.html")] pub(crate) enum Node { Root { children: Vec }, Directory { name: String, children: Vec, license: Option }, @@ -81,12 +72,14 @@ pub(crate) enum Node { Group { files: Vec, directories: Vec, license: License }, } +/// A License has an SPDX license name and a list of copyright holders. #[derive(serde::Deserialize)] struct License { spdx: String, copyright: Vec, } +/// Grab an environment variable as a PathBuf, or fail nicely. fn env_path(var: &str) -> Result { if let Some(var) = std::env::var_os(var) { Ok(var.into()) diff --git a/src/tools/generate-copyright/templates/COPYRIGHT.html b/src/tools/generate-copyright/templates/COPYRIGHT.html new file mode 100644 index 00000000000..ccb177a54d4 --- /dev/null +++ b/src/tools/generate-copyright/templates/COPYRIGHT.html @@ -0,0 +1,54 @@ + + + + + Copyright notices for The Rust Toolchain + + + +

Copyright notices for The Rust Toolchain

+ +

This file describes the copyright and licensing information for the source +code within The Rust Project git tree, and the third-party dependencies used +when building the Rust toolchain (including the Rust Standard Library).

+ +

Table of Contents

+ + +

In-tree files

+ +

The following licenses cover the in-tree source files that were used in this +release:

+ +{{ in_tree|safe }} + +

Out-of-tree dependencies

+ +

The following licenses cover the out-of-tree crates that were used in this +release:

+ +{% for (key, value) in dependencies %} +

📦 {{key.name}}-{{key.version}}

+

URL: https://crates.io/crates/{{ key.name }}/{{ key.version }}

+

In libstd: {% if value.is_in_libstd.unwrap() %} Yes {% else %} No {% endif %}

+

Authors: {{ value.authors|join(", ") }}

+

License: {{ value.license }}

+ {% let len = value.notices.len() %} + {% if len > 0 %} +

Notices: + {% for (notice_name, notice_text) in value.notices %} +

+ {{ notice_name }} +
+{{ notice_text }}
+                
+
+ {% endfor %} +

+ {% endif %} +{% endfor %} + + \ No newline at end of file diff --git a/src/tools/generate-copyright/templates/Node.html b/src/tools/generate-copyright/templates/Node.html new file mode 100644 index 00000000000..a71a1bf3b73 --- /dev/null +++ b/src/tools/generate-copyright/templates/Node.html @@ -0,0 +1,71 @@ +{% match self %} + +{% when Node::Root { children } %} + +{% for child in children %} +{{ child|safe }} +{% endfor %} + +{% when Node::Directory { name, children, license } %} + +
+ +

+ File/Directory: {{ name }} +

+ + {% if let Some(license) = license %} + +

License: {{ license.spdx }}

+ {% for copyright in license.copyright.iter() %} +

Copyright: {{ copyright }}

+ {% endfor %} + + {% endif %} + + {% if !children.is_empty() %} + +

Exceptions:

+ {% for child in children %} + {{ child|safe }} + {% endfor %} + + {% endif %} + +
+ +{% when Node::File { name, license } %} + +
+

+ File/Directory: {{ name }} +

+ +

License: {{ license.spdx }}

+ {% for copyright in license.copyright.iter() %} +

Copyright: {{ copyright }}

+ {% endfor %} +
+ +{% when Node::Group { files, directories, license } %} + +
+ +

+ File/Directory: + {% for name in files %} + {{ name }} + {% endfor %} + {% for name in directories %} + {{ name }} + {% endfor %} +

+ +

License: {{ license.spdx }}

+ {% for copyright in license.copyright.iter() %} +

Copyright: {{ copyright }}

+ {% endfor %} + +
+ +{% endmatch %}