From 9a926e5d6c100d169b936a66120b7107c016ae5b Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Wed, 25 May 2022 16:24:07 +1000
Subject: [PATCH] Fix metadata stats.

This commit:
- Counts some things that weren't being counted previously, and adds
  an assertion that ensure everything is counted.
- Reorders things so the `eprintln`s order matches the code order.
- Adds percentages, and makes clear that the zero bytes count is orthogonal to
  the other measurements.

Example of the new output:
```
55463779 metadata bytes, of which 18054531 bytes (32.6%) are zero
             preamble:       30 bytes ( 0.0%)
                  dep:        0 bytes ( 0.0%)
          lib feature:    17458 bytes ( 0.0%)
            lang item:      337 bytes ( 0.0%)
      diagnostic item:     1788 bytes ( 0.0%)
           native lib:        0 bytes ( 0.0%)
      foreign modules:     5113 bytes ( 0.0%)
       def-path table:   720180 bytes ( 1.3%)
               traits:      359 bytes ( 0.0%)
                impls:    64624 bytes ( 0.1%)
     incoherent_impls:      130 bytes ( 0.0%)
                  mir: 16137354 bytes (29.1%)
                 item: 23773099 bytes (42.9%)
interpret_alloc_index:      599 bytes ( 0.0%)
      proc-macro-data:        0 bytes ( 0.0%)
               tables: 10081135 bytes (18.2%)
 debugger visualizers:        0 bytes ( 0.0%)
     exported symbols:     5666 bytes ( 0.0%)
              hygiene:  1539390 bytes ( 2.8%)
      def-path hashes:  2752564 bytes ( 5.0%)
           source_map:   363540 bytes ( 0.7%)
                final:      413 bytes ( 0.0%)
```
---
 compiler/rustc_metadata/src/rmeta/encoder.rs | 92 +++++++++++++++-----
 1 file changed, 69 insertions(+), 23 deletions(-)

diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs
index e66d226a441..8534188d18a 100644
--- a/compiler/rustc_metadata/src/rmeta/encoder.rs
+++ b/compiler/rustc_metadata/src/rmeta/encoder.rs
@@ -539,9 +539,11 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
 
     fn encode_crate_root(&mut self) -> Lazy<CrateRoot<'tcx>> {
         let tcx = self.tcx;
-        let mut i = self.position();
+        let mut i = 0;
+        let preamble_bytes = self.position() - i;
 
         // Encode the crate deps
+        i = self.position();
         let crate_deps = self.encode_crate_deps();
         let dylib_dependency_formats = self.encode_dylib_dependency_formats();
         let dep_bytes = self.position() - i;
@@ -567,7 +569,9 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
         let native_libraries = self.encode_native_libraries();
         let native_lib_bytes = self.position() - i;
 
+        i = self.position();
         let foreign_modules = self.encode_foreign_modules();
+        let foreign_modules_bytes = self.position() - i;
 
         // Encode DefPathTable
         i = self.position();
@@ -587,6 +591,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
         i = self.position();
         let incoherent_impls = self.encode_incoherent_impls();
         let incoherent_impls_bytes = self.position() - i;
+
         // Encode MIR.
         i = self.position();
         self.encode_mir();
@@ -599,6 +604,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
         let item_bytes = self.position() - i;
 
         // Encode the allocation index
+        i = self.position();
         let interpret_alloc_index = {
             let mut interpret_alloc_index = Vec::new();
             let mut n = 0;
@@ -621,6 +627,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
             }
             self.lazy(interpret_alloc_index)
         };
+        let interpret_alloc_index_bytes = self.position() - i;
 
         // Encode the proc macro data. This affects 'tables',
         // so we need to do this before we encode the tables
@@ -665,9 +672,9 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
         let source_map = self.encode_source_map();
         let source_map_bytes = self.position() - i;
 
+        i = self.position();
         let attrs = tcx.hir().krate_attrs();
         let has_default_lib_allocator = tcx.sess.contains_name(&attrs, sym::default_lib_allocator);
-
         let root = self.lazy(CrateRoot {
             name: tcx.crate_name(LOCAL_CRATE),
             extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
@@ -710,9 +717,34 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
             expn_hashes,
             def_path_hash_map,
         });
+        let final_bytes = self.position() - i;
 
         let total_bytes = self.position();
 
+        let computed_total_bytes = preamble_bytes
+            + dep_bytes
+            + lib_feature_bytes
+            + lang_item_bytes
+            + diagnostic_item_bytes
+            + native_lib_bytes
+            + foreign_modules_bytes
+            + def_path_table_bytes
+            + traits_bytes
+            + impls_bytes
+            + incoherent_impls_bytes
+            + mir_bytes
+            + item_bytes
+            + interpret_alloc_index_bytes
+            + proc_macro_data_bytes
+            + tables_bytes
+            + debugger_visualizers_bytes
+            + exported_symbols_bytes
+            + hygiene_bytes
+            + def_path_hash_map_bytes
+            + source_map_bytes
+            + final_bytes;
+        assert_eq!(total_bytes, computed_total_bytes);
+
         if tcx.sess.meta_stats() {
             let mut zero_bytes = 0;
             for e in self.opaque.data.iter() {
@@ -721,27 +753,41 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
                 }
             }
 
-            eprintln!("metadata stats:");
-            eprintln!("                  dep bytes: {}", dep_bytes);
-            eprintln!("          lib feature bytes: {}", lib_feature_bytes);
-            eprintln!("            lang item bytes: {}", lang_item_bytes);
-            eprintln!("      diagnostic item bytes: {}", diagnostic_item_bytes);
-            eprintln!("               native bytes: {}", native_lib_bytes);
-            eprintln!(" debugger visualizers bytes: {}", debugger_visualizers_bytes);
-            eprintln!("           source_map bytes: {}", source_map_bytes);
-            eprintln!("               traits bytes: {}", traits_bytes);
-            eprintln!("                impls bytes: {}", impls_bytes);
-            eprintln!("     incoherent_impls bytes: {}", incoherent_impls_bytes);
-            eprintln!("         exp. symbols bytes: {}", exported_symbols_bytes);
-            eprintln!("       def-path table bytes: {}", def_path_table_bytes);
-            eprintln!("      def-path hashes bytes: {}", def_path_hash_map_bytes);
-            eprintln!("      proc-macro-data-bytes: {}", proc_macro_data_bytes);
-            eprintln!("                  mir bytes: {}", mir_bytes);
-            eprintln!("                 item bytes: {}", item_bytes);
-            eprintln!("                table bytes: {}", tables_bytes);
-            eprintln!("              hygiene bytes: {}", hygiene_bytes);
-            eprintln!("                 zero bytes: {}", zero_bytes);
-            eprintln!("                total bytes: {}", total_bytes);
+            let perc = |bytes| (bytes * 100) as f64 / total_bytes as f64;
+            let p = |label, bytes| {
+                eprintln!("{:>21}: {:>8} bytes ({:4.1}%)", label, bytes, perc(bytes));
+            };
+
+            eprintln!("");
+            eprintln!(
+                "{} metadata bytes, of which {} bytes ({:.1}%) are zero",
+                total_bytes,
+                zero_bytes,
+                perc(zero_bytes)
+            );
+            p("preamble", preamble_bytes);
+            p("dep", dep_bytes);
+            p("lib feature", lib_feature_bytes);
+            p("lang item", lang_item_bytes);
+            p("diagnostic item", diagnostic_item_bytes);
+            p("native lib", native_lib_bytes);
+            p("foreign modules", foreign_modules_bytes);
+            p("def-path table", def_path_table_bytes);
+            p("traits", traits_bytes);
+            p("impls", impls_bytes);
+            p("incoherent_impls", incoherent_impls_bytes);
+            p("mir", mir_bytes);
+            p("item", item_bytes);
+            p("interpret_alloc_index", interpret_alloc_index_bytes);
+            p("proc-macro-data", proc_macro_data_bytes);
+            p("tables", tables_bytes);
+            p("debugger visualizers", debugger_visualizers_bytes);
+            p("exported symbols", exported_symbols_bytes);
+            p("hygiene", hygiene_bytes);
+            p("def-path hashes", def_path_hash_map_bytes);
+            p("source_map", source_map_bytes);
+            p("final", final_bytes);
+            eprintln!("");
         }
 
         root