mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-28 09:44:08 +00:00
Rollup merge of #88795 - FabianWolff:issue-88684, r=wesleywiser
Print a note if a character literal contains a variation selector Fixes #88684.
This commit is contained in:
commit
c2cdba42b9
@ -3,7 +3,7 @@
|
||||
use std::iter::once;
|
||||
use std::ops::Range;
|
||||
|
||||
use rustc_errors::{Applicability, Handler};
|
||||
use rustc_errors::{pluralize, Applicability, Handler};
|
||||
use rustc_lexer::unescape::{EscapeError, Mode};
|
||||
use rustc_span::{BytePos, Span};
|
||||
|
||||
@ -49,24 +49,57 @@ pub(crate) fn emit_unescape_error(
|
||||
.emit();
|
||||
}
|
||||
EscapeError::MoreThanOneChar => {
|
||||
let (prefix, msg) = if mode.is_bytes() {
|
||||
("b", "if you meant to write a byte string literal, use double quotes")
|
||||
} else {
|
||||
("", "if you meant to write a `str` literal, use double quotes")
|
||||
};
|
||||
use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
|
||||
|
||||
handler
|
||||
.struct_span_err(
|
||||
span_with_quotes,
|
||||
"character literal may only contain one codepoint",
|
||||
)
|
||||
.span_suggestion(
|
||||
let mut has_help = false;
|
||||
let mut handler = handler.struct_span_err(
|
||||
span_with_quotes,
|
||||
"character literal may only contain one codepoint",
|
||||
);
|
||||
|
||||
if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
|
||||
let escaped_marks =
|
||||
lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
|
||||
handler.span_note(
|
||||
span,
|
||||
&format!(
|
||||
"this `{}` is followed by the combining mark{} `{}`",
|
||||
lit.chars().next().unwrap(),
|
||||
pluralize!(escaped_marks.len()),
|
||||
escaped_marks.join(""),
|
||||
),
|
||||
);
|
||||
let normalized = lit.nfc().to_string();
|
||||
if normalized.chars().count() == 1 {
|
||||
has_help = true;
|
||||
handler.span_suggestion(
|
||||
span,
|
||||
&format!(
|
||||
"consider using the normalized form `{}` of this character",
|
||||
normalized.chars().next().unwrap().escape_default()
|
||||
),
|
||||
normalized,
|
||||
Applicability::MachineApplicable,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if !has_help {
|
||||
let (prefix, msg) = if mode.is_bytes() {
|
||||
("b", "if you meant to write a byte string literal, use double quotes")
|
||||
} else {
|
||||
("", "if you meant to write a `str` literal, use double quotes")
|
||||
};
|
||||
|
||||
handler.span_suggestion(
|
||||
span_with_quotes,
|
||||
msg,
|
||||
format!("{}\"{}\"", prefix, lit),
|
||||
Applicability::MachineApplicable,
|
||||
)
|
||||
.emit();
|
||||
);
|
||||
}
|
||||
|
||||
handler.emit();
|
||||
}
|
||||
EscapeError::EscapeOnlyChar => {
|
||||
let (c, char_span) = last_char();
|
||||
|
21
src/test/ui/parser/unicode-character-literal.fixed
Normal file
21
src/test/ui/parser/unicode-character-literal.fixed
Normal file
@ -0,0 +1,21 @@
|
||||
// Regression test for #88684: Improve diagnostics for combining marks
|
||||
// in character literals.
|
||||
|
||||
// run-rustfix
|
||||
|
||||
fn main() {
|
||||
let _spade = "♠️";
|
||||
//~^ ERROR: character literal may only contain one codepoint
|
||||
//~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
|
||||
//~| HELP: if you meant to write a `str` literal, use double quotes
|
||||
|
||||
let _s = "ṩ̂̊";
|
||||
//~^ ERROR: character literal may only contain one codepoint
|
||||
//~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
|
||||
//~| HELP: if you meant to write a `str` literal, use double quotes
|
||||
|
||||
let _a = 'Å';
|
||||
//~^ ERROR: character literal may only contain one codepoint
|
||||
//~| NOTE: this `A` is followed by the combining mark `\u{30a}`
|
||||
//~| HELP: consider using the normalized form `\u{c5}` of this character
|
||||
}
|
21
src/test/ui/parser/unicode-character-literal.rs
Normal file
21
src/test/ui/parser/unicode-character-literal.rs
Normal file
@ -0,0 +1,21 @@
|
||||
// Regression test for #88684: Improve diagnostics for combining marks
|
||||
// in character literals.
|
||||
|
||||
// run-rustfix
|
||||
|
||||
fn main() {
|
||||
let _spade = '♠️';
|
||||
//~^ ERROR: character literal may only contain one codepoint
|
||||
//~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
|
||||
//~| HELP: if you meant to write a `str` literal, use double quotes
|
||||
|
||||
let _s = 'ṩ̂̊';
|
||||
//~^ ERROR: character literal may only contain one codepoint
|
||||
//~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
|
||||
//~| HELP: if you meant to write a `str` literal, use double quotes
|
||||
|
||||
let _a = 'Å';
|
||||
//~^ ERROR: character literal may only contain one codepoint
|
||||
//~| NOTE: this `A` is followed by the combining mark `\u{30a}`
|
||||
//~| HELP: consider using the normalized form `\u{c5}` of this character
|
||||
}
|
48
src/test/ui/parser/unicode-character-literal.stderr
Normal file
48
src/test/ui/parser/unicode-character-literal.stderr
Normal file
@ -0,0 +1,48 @@
|
||||
error: character literal may only contain one codepoint
|
||||
--> $DIR/unicode-character-literal.rs:7:18
|
||||
|
|
||||
LL | let _spade = '♠️';
|
||||
| ^^^
|
||||
|
|
||||
note: this `♠` is followed by the combining mark `\u{fe0f}`
|
||||
--> $DIR/unicode-character-literal.rs:7:19
|
||||
|
|
||||
LL | let _spade = '♠️';
|
||||
| ^
|
||||
help: if you meant to write a `str` literal, use double quotes
|
||||
|
|
||||
LL | let _spade = "♠️";
|
||||
| ~~~
|
||||
|
||||
error: character literal may only contain one codepoint
|
||||
--> $DIR/unicode-character-literal.rs:12:14
|
||||
|
|
||||
LL | let _s = 'ṩ̂̊';
|
||||
| ^^^
|
||||
|
|
||||
note: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
|
||||
--> $DIR/unicode-character-literal.rs:12:15
|
||||
|
|
||||
LL | let _s = 'ṩ̂̊';
|
||||
| ^
|
||||
help: if you meant to write a `str` literal, use double quotes
|
||||
|
|
||||
LL | let _s = "ṩ̂̊";
|
||||
| ~~~
|
||||
|
||||
error: character literal may only contain one codepoint
|
||||
--> $DIR/unicode-character-literal.rs:17:14
|
||||
|
|
||||
LL | let _a = 'Å';
|
||||
| ^-^
|
||||
| |
|
||||
| help: consider using the normalized form `\u{c5}` of this character: `Å`
|
||||
|
|
||||
note: this `A` is followed by the combining mark `\u{30a}`
|
||||
--> $DIR/unicode-character-literal.rs:17:15
|
||||
|
|
||||
LL | let _a = 'Å';
|
||||
| ^
|
||||
|
||||
error: aborting due to 3 previous errors
|
||||
|
Loading…
Reference in New Issue
Block a user