From 0bcddfe23a4b96d970f953b99a2e4f28ece3c0d6 Mon Sep 17 00:00:00 2001
From: Charles Lew <crlf0710@gmail.com>
Date: Sat, 23 Nov 2019 22:33:40 +0800
Subject: [PATCH 1/3] Normalize identifiers in librustc_parse.

---
 Cargo.lock                      |  8 ++++++--
 src/librustc_parse/Cargo.toml   |  1 +
 src/librustc_parse/lexer/mod.rs | 17 +++++++++++++++--
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5132f77e578..02717c85ccf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3696,6 +3696,7 @@ dependencies = [
  "smallvec 1.0.0",
  "syntax",
  "syntax_pos",
+ "unicode-normalization",
 ]
 
 [[package]]
@@ -4913,9 +4914,12 @@ dependencies = [
 
 [[package]]
 name = "unicode-normalization"
-version = "0.1.7"
+version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25"
+checksum = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf"
+dependencies = [
+ "smallvec 1.0.0",
+]
 
 [[package]]
 name = "unicode-segmentation"
diff --git a/src/librustc_parse/Cargo.toml b/src/librustc_parse/Cargo.toml
index fb5cb742ab6..73458a444f4 100644
--- a/src/librustc_parse/Cargo.toml
+++ b/src/librustc_parse/Cargo.toml
@@ -20,3 +20,4 @@ rustc_error_codes = { path = "../librustc_error_codes" }
 smallvec = { version = "1.0", features = ["union", "may_dangle"] }
 syntax_pos = { path = "../libsyntax_pos" }
 syntax = { path = "../libsyntax" }
+unicode-normalization = "0.1.11"
diff --git a/src/librustc_parse/lexer/mod.rs b/src/librustc_parse/lexer/mod.rs
index e5d3927af86..d69cd14d544 100644
--- a/src/librustc_parse/lexer/mod.rs
+++ b/src/librustc_parse/lexer/mod.rs
@@ -220,8 +220,7 @@ impl<'a> StringReader<'a> {
                 if is_raw_ident {
                     ident_start = ident_start + BytePos(2);
                 }
-                // FIXME: perform NFKC normalization here. (Issue #2253)
-                let sym = self.symbol_from(ident_start);
+                let sym = self.nfc_symbol_from(ident_start);
                 if is_raw_ident {
                     let span = self.mk_sp(start, self.pos);
                     if !sym.can_be_raw() {
@@ -470,6 +469,20 @@ impl<'a> StringReader<'a> {
         Symbol::intern(self.str_from_to(start, end))
     }
 
+    /// As symbol_from, with the text normalized into Unicode NFC form.
+    fn nfc_symbol_from(&self, start: BytePos) -> Symbol {
+        use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
+        debug!("taking an normalized ident from {:?} to {:?}", start, self.pos);
+        let sym = self.str_from(start);
+        match is_nfc_quick(sym.chars()) {
+            IsNormalized::Yes => Symbol::intern(sym),
+            _ => {
+                let sym_str: String = sym.chars().nfc().collect();
+                Symbol::intern(&sym_str)
+            }
+        }
+    }
+
     /// Slice of the source text spanning from `start` up to but excluding `end`.
     fn str_from_to(&self, start: BytePos, end: BytePos) -> &str {
         &self.src[self.src_index(start)..self.src_index(end)]

From 541d879f710c05aaa47ffd16373d83b949ff1edb Mon Sep 17 00:00:00 2001
From: Charles Lew <crlf0710@gmail.com>
Date: Sat, 23 Nov 2019 22:37:46 +0800
Subject: [PATCH 2/3] Add a test and bless existing test case.

---
 src/test/ui/codemap_tests/unicode_2.stderr | 2 +-
 src/test/ui/rfc-2457/idents-normalized.rs  | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 src/test/ui/rfc-2457/idents-normalized.rs

diff --git a/src/test/ui/codemap_tests/unicode_2.stderr b/src/test/ui/codemap_tests/unicode_2.stderr
index 92634d8e5f9..c01942712d4 100644
--- a/src/test/ui/codemap_tests/unicode_2.stderr
+++ b/src/test/ui/codemap_tests/unicode_2.stderr
@@ -14,7 +14,7 @@ LL |     let _ = ("아あ", 1i42);
    |
    = help: valid widths are 8, 16, 32, 64 and 128
 
-error[E0425]: cannot find value `a̐é` in this scope
+error[E0425]: cannot find value `a̐é` in this scope
   --> $DIR/unicode_2.rs:6:13
    |
 LL |     let _ = a̐é;
diff --git a/src/test/ui/rfc-2457/idents-normalized.rs b/src/test/ui/rfc-2457/idents-normalized.rs
new file mode 100644
index 00000000000..109cec7548e
--- /dev/null
+++ b/src/test/ui/rfc-2457/idents-normalized.rs
@@ -0,0 +1,8 @@
+// check-pass
+#![feature(non_ascii_idents)]
+
+struct Résumé; // ['LATIN SMALL LETTER E WITH ACUTE']
+
+fn main() {
+    let _ = Résumé; // ['LATIN SMALL LETTER E', 'COMBINING ACUTE ACCENT']
+}

From 27e7a1baedbcc5ddaf44f930860828dae99a7ebf Mon Sep 17 00:00:00 2001
From: Charles Lew <crlf0710@gmail.com>
Date: Thu, 19 Dec 2019 11:57:30 +0800
Subject: [PATCH 3/3] Add unicode-normalization to whitelist.

---
 Cargo.lock                                | 1 +
 src/tools/rustc-workspace-hack/Cargo.toml | 3 ++-
 src/tools/tidy/src/deps.rs                | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/Cargo.lock b/Cargo.lock
index 02717c85ccf..f3fad1296bf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3333,6 +3333,7 @@ dependencies = [
  "serde",
  "serde_json",
  "smallvec 0.6.10",
+ "smallvec 1.0.0",
  "syn 0.15.35",
  "url 2.1.0",
  "winapi 0.3.8",
diff --git a/src/tools/rustc-workspace-hack/Cargo.toml b/src/tools/rustc-workspace-hack/Cargo.toml
index 285af038a1e..fced6c52012 100644
--- a/src/tools/rustc-workspace-hack/Cargo.toml
+++ b/src/tools/rustc-workspace-hack/Cargo.toml
@@ -62,7 +62,8 @@ curl-sys = { version = "0.4.13", features = ["http2", "libnghttp2-sys"], optiona
 crossbeam-utils = { version = "0.6.5", features = ["nightly"] }
 serde = { version = "1.0.82", features = ['derive'] }
 serde_json = { version = "1.0.31", features = ["raw_value"] }
-smallvec = { version = "0.6", features = ['union', 'may_dangle'] }
+smallvec-0_6 = { package = "smallvec", version = "0.6", features = ['union', 'may_dangle'] }
+smallvec = { version = "1.0", features = ['union', 'may_dangle'] }
 url = { version = "2.0", features = ['serde'] }
 syn = { version = "0.15", features = ['full'] }
 
diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs
index cb48f723d86..a3042803dd7 100644
--- a/src/tools/tidy/src/deps.rs
+++ b/src/tools/tidy/src/deps.rs
@@ -170,6 +170,7 @@ const WHITELIST: &[Crate<'_>] = &[
     Crate("term_size"),
     Crate("thread_local"),
     Crate("ucd-util"),
+    Crate("unicode-normalization"),
     Crate("unicode-width"),
     Crate("unicode-xid"),
     Crate("unreachable"),