diff --git a/Cargo.lock b/Cargo.lock index 53e1e4d7567..9f6df6e14c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3786,7 +3786,7 @@ name = "rustc_lexer" version = "0.1.0" dependencies = [ "expect-test", - "unic-emoji-char", + "unicode-properties", "unicode-xid", ] @@ -5446,38 +5446,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "unic-char-property" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" -dependencies = [ - "unic-char-range", -] - -[[package]] -name = "unic-char-range" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" - -[[package]] -name = "unic-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" - -[[package]] -name = "unic-emoji-char" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - [[package]] name = "unic-langid" version = "0.9.1" @@ -5521,15 +5489,6 @@ dependencies = [ "unic-langid-impl", ] -[[package]] -name = "unic-ucd-version" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" -dependencies = [ - "unic-common", -] - [[package]] name = "unicase" version = "2.6.0" @@ -5567,6 +5526,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-properties" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f91c8b21fbbaa18853c3d0801c78f4fc94cdb976699bb03e832e75f7fd22f0" + [[package]] name = "unicode-script" version = "0.5.5" diff --git a/compiler/rustc_lexer/Cargo.toml b/compiler/rustc_lexer/Cargo.toml index 23294dc2e1b..2211ac1c8a7 100644 --- a/compiler/rustc_lexer/Cargo.toml +++ b/compiler/rustc_lexer/Cargo.toml @@ -16,7 +16,11 @@ Rust lexer used by rustc. No stability guarantees are provided. # Note that this crate purposefully does not depend on other rustc crates [dependencies] unicode-xid = "0.2.0" -unic-emoji-char = "0.9.0" + +[dependencies.unicode-properties] +version = "0.1.0" +default-features = false +features = ["emoji"] [dev-dependencies] expect-test = "1.4.0" diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index d511d2b1280..43dfd34a6ff 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -34,6 +34,7 @@ pub use crate::cursor::Cursor; use self::LiteralKind::*; use self::TokenKind::*; use crate::cursor::EOF_CHAR; +use unicode_properties::UnicodeEmoji; /// Parsed token. /// It doesn't contain information about data that has been parsed, @@ -428,9 +429,7 @@ impl Cursor<'_> { Literal { kind, suffix_start } } // Identifier starting with an emoji. Only lexed for graceful error recovery. - c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => { - self.fake_ident_or_unknown_prefix() - } + c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(), _ => Unknown, }; let res = Token::new(token_kind, self.pos_within_token()); @@ -514,9 +513,7 @@ impl Cursor<'_> { // we see a prefix here, it is definitely an unknown prefix. match self.first() { '#' | '"' | '\'' => UnknownPrefix, - c if !c.is_ascii() && unic_emoji_char::is_emoji(c) => { - self.fake_ident_or_unknown_prefix() - } + c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(), _ => Ident, } } @@ -525,7 +522,7 @@ impl Cursor<'_> { // Start is already eaten, eat the rest of identifier. self.eat_while(|c| { unicode_xid::UnicodeXID::is_xid_continue(c) - || (!c.is_ascii() && unic_emoji_char::is_emoji(c)) + || (!c.is_ascii() && c.is_emoji_char()) || c == '\u{200d}' }); // Known prefixes must have been handled earlier. So if diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index 57cbfe68be4..a015c36d7eb 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -270,18 +270,14 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "twox-hash", "type-map", "typenum", - "unic-char-property", - "unic-char-range", - "unic-common", - "unic-emoji-char", "unic-langid", "unic-langid-impl", "unic-langid-macros", "unic-langid-macros-impl", - "unic-ucd-version", "unicase", "unicode-ident", "unicode-normalization", + "unicode-properties", "unicode-script", "unicode-security", "unicode-width", diff --git a/tests/ui/lexer/lex-emoji-identifiers.rs b/tests/ui/lexer/lex-emoji-identifiers.rs index 91b5929c0fe..decf2f00587 100644 --- a/tests/ui/lexer/lex-emoji-identifiers.rs +++ b/tests/ui/lexer/lex-emoji-identifiers.rs @@ -1,9 +1,7 @@ fn invalid_emoji_usages() { let arrow↔️ = "basic emoji"; //~ ERROR: identifiers cannot contain emoji - // FIXME - let planet🪐 = "basic emoji"; //~ ERROR: unknown start of token - // FIXME - let wireless🛜 = "basic emoji"; //~ ERROR: unknown start of token + let planet🪐 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji + let wireless🛜 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji // FIXME let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token //~^ WARN: identifier contains uncommon Unicode codepoints diff --git a/tests/ui/lexer/lex-emoji-identifiers.stderr b/tests/ui/lexer/lex-emoji-identifiers.stderr index 6237c5d0236..747825fa2a9 100644 --- a/tests/ui/lexer/lex-emoji-identifiers.stderr +++ b/tests/ui/lexer/lex-emoji-identifiers.stderr @@ -1,17 +1,5 @@ -error: unknown start of token: \u{1fa90} - --> $DIR/lex-emoji-identifiers.rs:4:15 - | -LL | let planet🪐 = "basic emoji"; - | ^^ - -error: unknown start of token: \u{1f6dc} - --> $DIR/lex-emoji-identifiers.rs:6:17 - | -LL | let wireless🛜 = "basic emoji"; - | ^^ - error: unknown start of token: \u{20e3} - --> $DIR/lex-emoji-identifiers.rs:8:14 + --> $DIR/lex-emoji-identifiers.rs:6:14 | LL | let key1️⃣ = "keycap sequence"; | ^ @@ -22,26 +10,38 @@ error: identifiers cannot contain emoji: `arrow↔️` LL | let arrow↔️ = "basic emoji"; | ^^^^^^ +error: identifiers cannot contain emoji: `planet🪐` + --> $DIR/lex-emoji-identifiers.rs:3:9 + | +LL | let planet🪐 = "basic emoji"; + | ^^^^^^^^ + +error: identifiers cannot contain emoji: `wireless🛜` + --> $DIR/lex-emoji-identifiers.rs:4:9 + | +LL | let wireless🛜 = "basic emoji"; + | ^^^^^^^^^^ + error: identifiers cannot contain emoji: `flag🇺🇳` - --> $DIR/lex-emoji-identifiers.rs:10:9 + --> $DIR/lex-emoji-identifiers.rs:8:9 | LL | let flag🇺🇳 = "flag sequence"; | ^^^^^^ error: identifiers cannot contain emoji: `wales🏴` - --> $DIR/lex-emoji-identifiers.rs:11:9 + --> $DIR/lex-emoji-identifiers.rs:9:9 | LL | let wales🏴 = "tag sequence"; | ^^^^^^^ error: identifiers cannot contain emoji: `folded🙏🏿` - --> $DIR/lex-emoji-identifiers.rs:12:9 + --> $DIR/lex-emoji-identifiers.rs:10:9 | LL | let folded🙏🏿 = "modifier sequence"; | ^^^^^^^^^^ warning: identifier contains uncommon Unicode codepoints - --> $DIR/lex-emoji-identifiers.rs:8:9 + --> $DIR/lex-emoji-identifiers.rs:6:9 | LL | let key1️⃣ = "keycap sequence"; | ^^^^