From 8081096a7f9b84fe780f4426d70f4c5bb767eba8 Mon Sep 17 00:00:00 2001
From: Michael Howell <michael@notriddle.com>
Date: Mon, 27 Jun 2022 11:07:16 -0700
Subject: [PATCH] Add documentation

---
 src/librustdoc/html/static/js/externs.js | 53 ++++++++++++++++++++++++
 src/librustdoc/html/static/js/search.js  | 35 +++++++++++++++-
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/src/librustdoc/html/static/js/externs.js b/src/librustdoc/html/static/js/externs.js
index defdc20132e..141d76d59e1 100644
--- a/src/librustdoc/html/static/js/externs.js
+++ b/src/librustdoc/html/static/js/externs.js
@@ -81,3 +81,56 @@ let ResultsTable;
  * }}
  */
 let Results;
+
+/**
+ * A pair of [inputs, outputs], or 0 for null. This is gets stored in the search index.
+ * The JavaScript deserializes this into FunctionSearchType.
+ *
+ * An input or output can be encoded as just a number if there is only one of them, AND
+ * it has no generics. The no generics rule exists to avoid ambiguity: imagine if you had
+ * a function with a single output, and that output had a single generic:
+ *
+ *     fn something() -> Result<usize, usize>
+ *
+ * If output was allowed to be any RawFunctionType, it would look like this
+ *
+ *     [[], [50, [3, 3]]]
+ *
+ * The problem is that the above output could be interpreted as either a type with ID 50 and two
+ * generics, or it could be interpreted as a pair of types, the first one with ID 50 and the second
+ * with ID 3 and a single generic parameter that is also ID 3. We avoid this ambiguity by choosing
+ * in favor of the pair of types interpretation. This is why the `(number|Array<RawFunctionType>)`
+ * is used instead of `(RawFunctionType|Array<RawFunctionType>)`.
+ *
+ * @typedef {(
+ *     0 |
+ *     [(number|Array<RawFunctionType>)] |
+ *     [(number|Array<RawFunctionType>), (number|Array<RawFunctionType>)]
+ * )}
+ */
+let RawFunctionSearchType;
+
+/**
+ * A single function input or output type. This is either a single path ID, or a pair of
+ * [path ID, generics].
+ *
+ * @typedef {number | [number, Array<RawFunctionType>]}
+ */
+let RawFunctionType;
+
+/**
+ * @typedef {{
+ *     inputs: Array<FunctionType>,
+ *     outputs: Array<FunctionType>,
+ * }}
+ */
+let FunctionSearchType;
+
+/**
+ * @typedef {{
+ *     name: (null|string),
+ *     ty: (null|number),
+ *     generics: Array<FunctionType>,
+ * }}
+ */
+let FunctionType;
diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js
index 54057627c92..a766dd68e10 100644
--- a/src/librustdoc/html/static/js/search.js
+++ b/src/librustdoc/html/static/js/search.js
@@ -1825,6 +1825,24 @@ function initSearch(rawSearchIndex) {
             filterCrates);
     }
 
+    /**
+     * Convert a list of RawFunctionType / ID to object-based FunctionType.
+     *
+     * Crates often have lots of functions in them, and it's common to have a large number of
+     * functions that operate on a small set of data types, so the search index compresses them
+     * by encoding function parameter and return types as indexes into an array of names.
+     *
+     * Even when a general-purpose compression algorithm is used, this is still a win. I checked.
+     * https://github.com/rust-lang/rust/pull/98475#issue-1284395985
+     *
+     * The format for individual function types is encoded in
+     * librustdoc/html/render/mod.rs: impl Serialize for RenderType
+     *
+     * @param {null|Array<RawFunctionType>} types
+     * @param {Array<{name: string, ty: number}>} lowercasePaths
+     *
+     * @return {Array<FunctionSearchType>}
+     */
     function buildItemSearchTypeAll(types, lowercasePaths) {
         const PATH_INDEX_DATA = 0;
         const GENERICS_DATA = 1;
@@ -1848,6 +1866,21 @@ function initSearch(rawSearchIndex) {
         });
     }
 
+    /**
+     * Convert from RawFunctionSearchType to FunctionSearchType.
+     *
+     * Crates often have lots of functions in them, and function signatures are sometimes complex,
+     * so rustdoc uses a pretty tight encoding for them. This function converts it to a simpler,
+     * object-based encoding so that the actual search code is more readable and easier to debug.
+     *
+     * The raw function search type format is generated using serde in
+     * librustdoc/html/render/mod.rs: impl Serialize for IndexItemFunctionType
+     *
+     * @param {RawFunctionSearchType} functionSearchType
+     * @param {Array<{name: string, ty: number}>} lowercasePaths
+     *
+     * @return {null|FunctionSearchType}
+     */
     function buildFunctionSearchType(functionSearchType, lowercasePaths) {
         const INPUTS_DATA = 0;
         const OUTPUT_DATA = 1;
@@ -1935,7 +1968,7 @@ function initSearch(rawSearchIndex) {
              *   d: Array<string>,
              *   q: Array<string>,
              *   i: Array<Number>,
-             *   f: Array<0 | Object>,
+             *   f: Array<RawFunctionSearchType>,
              *   p: Array<Object>,
              * }}
              */