From 89f273f40dafb693139496ed6f914872b6533fa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Tue, 9 Jul 2024 16:46:09 +0000 Subject: [PATCH] Replace ASCII control chars with Unicode Control Pictures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` error: bare CR not allowed in doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32 | LL | /// doc comment with bare CR: '␍' | ^ ``` --- compiler/rustc_errors/src/emitter.rs | 69 ++++++++++++++---- ...-bare-cr-string-literal-doc-comment.stderr | 14 ++-- tests/ui/parser/bad-char-literals.rs | Bin 496 -> 608 bytes tests/ui/parser/bad-char-literals.stderr | 17 ++++- tests/ui/parser/issues/issue-66473.stderr | Bin 1061 -> 1209 bytes tests/ui/parser/issues/issue-68629.stderr | Bin 944 -> 976 bytes tests/ui/parser/issues/issue-68730.stderr | Bin 1266 -> 1294 bytes .../raw/raw-byte-string-literals.stderr | 2 +- ...ral-carriage-returns-in-doc-comment.stderr | 6 +- .../trailing-carriage-return-in-string.stderr | 2 +- tests/ui/parser/utf16-be-without-bom.stderr | Bin 3641 -> 4029 bytes tests/ui/parser/utf16-le-without-bom.stderr | Bin 3603 -> 3939 bytes .../rfc-3348-c-string-literals/no-nuls.stderr | Bin 2028 -> 2040 bytes tests/ui/str/str-escape.stderr | 2 +- 14 files changed, 81 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index aa47ca16676..95e1b5348b7 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -677,10 +677,7 @@ impl HumanEmitter { .skip(left) .take_while(|ch| { // Make sure that the trimming on the right will fall within the terminal width. - // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` - // is. For now, just accept that sometimes the code line will be longer than - // desired. - let next = unicode_width::UnicodeWidthChar::width(*ch).unwrap_or(1); + let next = char_width(*ch); if taken + next > right - left { return false; } @@ -742,11 +739,7 @@ impl HumanEmitter { let left = margin.left(source_string.len()); // Account for unicode characters of width !=0 that were removed. - let left = source_string - .chars() - .take(left) - .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1)) - .sum(); + let left = source_string.chars().take(left).map(|ch| char_width(ch)).sum(); self.draw_line( buffer, @@ -2039,7 +2032,7 @@ impl HumanEmitter { let sub_len: usize = if is_whitespace_addition { &part.snippet } else { part.snippet.trim() } .chars() - .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1)) + .map(|ch| char_width(ch)) .sum(); let offset: isize = offsets @@ -2076,11 +2069,8 @@ impl HumanEmitter { } // length of the code after substitution - let full_sub_len = part - .snippet - .chars() - .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1)) - .sum::() as isize; + let full_sub_len = + part.snippet.chars().map(|ch| char_width(ch)).sum::() as isize; // length of the code to be substituted let snippet_len = span_end_pos as isize - span_start_pos as isize; @@ -2580,6 +2570,40 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ ('\u{2068}', ""), ('\u{202C}', ""), ('\u{2069}', ""), + // In terminals without Unicode support the following will be garbled, but in *all* terminals + // the underlying codepoint will be as well. We could gate this replacement behind a "unicode + // support" gate. + ('\u{0000}', "␀"), + ('\u{0001}', "␁"), + ('\u{0002}', "␂"), + ('\u{0003}', "␃"), + ('\u{0004}', "␄"), + ('\u{0005}', "␅"), + ('\u{0006}', "␆"), + ('\u{0007}', "␇"), + ('\u{0008}', "␈"), + ('\u{000B}', "␋"), + ('\u{000C}', "␌"), + ('\u{000D}', "␍"), + ('\u{000E}', "␎"), + ('\u{000F}', "␏"), + ('\u{0010}', "␐"), + ('\u{0011}', "␑"), + ('\u{0012}', "␒"), + ('\u{0013}', "␓"), + ('\u{0014}', "␔"), + ('\u{0015}', "␕"), + ('\u{0016}', "␖"), + ('\u{0017}', "␗"), + ('\u{0018}', "␘"), + ('\u{0019}', "␙"), + ('\u{001A}', "␚"), + ('\u{001B}', "␛"), + ('\u{001C}', "␜"), + ('\u{001D}', "␝"), + ('\u{001E}', "␞"), + ('\u{001F}', "␟"), + ('\u{007F}', "␡"), ]; fn normalize_whitespace(str: &str) -> String { @@ -2590,6 +2614,21 @@ fn normalize_whitespace(str: &str) -> String { s } +fn char_width(ch: char) -> usize { + // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` is. For now, + // just accept that sometimes the code line will be longer than desired. + match ch { + '\t' => 4, + '\u{0000}' | '\u{0001}' | '\u{0002}' | '\u{0003}' | '\u{0004}' | '\u{0005}' + | '\u{0006}' | '\u{0007}' | '\u{0008}' | '\u{000B}' | '\u{000C}' | '\u{000D}' + | '\u{000E}' | '\u{000F}' | '\u{0010}' | '\u{0011}' | '\u{0012}' | '\u{0013}' + | '\u{0014}' | '\u{0015}' | '\u{0016}' | '\u{0017}' | '\u{0018}' | '\u{0019}' + | '\u{001A}' | '\u{001B}' | '\u{001C}' | '\u{001D}' | '\u{001E}' | '\u{001F}' + | '\u{007F}' => 1, + _ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1), + } +} + fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) { buffer.puts(line, col, "| ", Style::LineNumber); } diff --git a/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr index da80991c727..841d5236ede 100644 --- a/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr +++ b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr @@ -1,31 +1,31 @@ error: bare CR not allowed in doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32 | -LL | /// doc comment with bare CR: ' ' +LL | /// doc comment with bare CR: '␍' | ^ error: bare CR not allowed in block doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:7:38 | -LL | /** block doc comment with bare CR: ' ' */ +LL | /** block doc comment with bare CR: '␍' */ | ^ error: bare CR not allowed in doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:12:36 | -LL | //! doc comment with bare CR: ' ' +LL | //! doc comment with bare CR: '␍' | ^ error: bare CR not allowed in block doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:15:42 | -LL | /*! block doc comment with bare CR: ' ' */ +LL | /*! block doc comment with bare CR: '␍' */ | ^ error: bare CR not allowed in string, use `\r` instead --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:19:18 | -LL | let _s = "foo bar"; +LL | let _s = "foo␍bar"; | ^ | help: escape the character @@ -36,13 +36,13 @@ LL | let _s = "foo\rbar"; error: bare CR not allowed in raw string --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:22:19 | -LL | let _s = r"bar foo"; +LL | let _s = r"bar␍foo"; | ^ error: unknown character escape: `\r` --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:25:19 | -LL | let _s = "foo\ bar"; +LL | let _s = "foo\␍bar"; | ^ unknown character escape | = help: this is an isolated carriage return; consider checking your editor and version control settings diff --git a/tests/ui/parser/bad-char-literals.rs b/tests/ui/parser/bad-char-literals.rs index 748b4a22253f52301df92a22cf2b848ac6e4726f..c3d55d3f7e3bfbb53c98f8ce78ebfe4bff3fe477 100644 GIT binary patch delta 72 zcmeys{D5V{Oh#V6P#+&I1t3t@Wzd~$$0(telUb5ll$fKCn^>ukpO;gqker`al9-tX Yhy)h^ diff --git a/tests/ui/parser/bad-char-literals.stderr b/tests/ui/parser/bad-char-literals.stderr index 89253d7d4aa..38889da5da1 100644 --- a/tests/ui/parser/bad-char-literals.stderr +++ b/tests/ui/parser/bad-char-literals.stderr @@ -25,7 +25,7 @@ LL | '\n'; error: character constant must be escaped: `\r` --> $DIR/bad-char-literals.rs:15:6 | -LL | ' '; +LL | '␍'; | ^ | help: escape the character @@ -33,8 +33,19 @@ help: escape the character LL | '\r'; | ++ +error: character literal may only contain one codepoint + --> $DIR/bad-char-literals.rs:18:5 + | +LL | '-␀-'; + | ^^^^ + | +help: if you meant to write a string literal, use double quotes + | +LL | "-␀-"; + | ~ ~ + error: character constant must be escaped: `\t` - --> $DIR/bad-char-literals.rs:18:6 + --> $DIR/bad-char-literals.rs:21:6 | LL | ' '; | ^^^^ @@ -44,5 +55,5 @@ help: escape the character LL | '\t'; | ++ -error: aborting due to 4 previous errors +error: aborting due to 5 previous errors diff --git a/tests/ui/parser/issues/issue-66473.stderr b/tests/ui/parser/issues/issue-66473.stderr index 0e8b0a5da220569b607fe3512ae59ee94e3fcf1b..4be992d58460ebac5cf5fa9ee562da839f670d59 100644 GIT binary patch delta 316 zcmZ3=v6FK`kjLc%1G?IY7gkKRWlpvu&rJ}U6m%#1F-wbKw_x%^W)}eZ C*Q@9N delta 126 zcmdnVxs+oi_@% diff --git a/tests/ui/parser/issues/issue-68629.stderr b/tests/ui/parser/issues/issue-68629.stderr index 2562baa1c49c3034012d9721c128aecc4303dc02..ccb0624208b2e20aee028a8efb2fd0fc25d61c94 100644 GIT binary patch delta 125 zcmdnMet~_0pV*@ba~@5YUsC*NLW5#-btadBfzJYy$AFGUfNioC3{rVF-bYSc#2xGDe<2@z@hRF@gqMK!zSQ(+By3D>1(EtFB CauJFE diff --git a/tests/ui/parser/issues/issue-68730.stderr b/tests/ui/parser/issues/issue-68730.stderr index 5bca5bbebeacb44984c8679e4d2c4757be4ee742..6025ea8c1ae7fbdec8c78ea332e3560ee2796981 100644 GIT binary patch delta 96 zcmeyw*~c{@kmJ#WhSXdjF){QBoITl=@dbjT$+Qc>dCruE-~=*HMsQv*_aZozER_HO CpD&UC delta 67 zcmeC<`ouXQkcA;NmtkV$6EHp5mGK3HVal`%!uZaV24SQ!PX;q4vGhU&rm|E50I|*! AssI20 diff --git a/tests/ui/parser/raw/raw-byte-string-literals.stderr b/tests/ui/parser/raw/raw-byte-string-literals.stderr index a2f27d1ed70..a20ce845c32 100644 --- a/tests/ui/parser/raw/raw-byte-string-literals.stderr +++ b/tests/ui/parser/raw/raw-byte-string-literals.stderr @@ -1,7 +1,7 @@ error: bare CR not allowed in raw string --> $DIR/raw-byte-string-literals.rs:4:9 | -LL | br"a "; +LL | br"a␍"; | ^ error: non-ASCII character in raw byte string literal diff --git a/tests/ui/parser/several-carriage-returns-in-doc-comment.stderr b/tests/ui/parser/several-carriage-returns-in-doc-comment.stderr index 07066fc22e6..3150570e1c9 100644 --- a/tests/ui/parser/several-carriage-returns-in-doc-comment.stderr +++ b/tests/ui/parser/several-carriage-returns-in-doc-comment.stderr @@ -1,19 +1,19 @@ error: bare CR not allowed in doc-comment --> $DIR/several-carriage-returns-in-doc-comment.rs:6:12 | -LL | /// This do c comment contains three isolated `\r` symbols +LL | /// This do␍c comment contains ␍three isolated `\r`␍ symbols | ^ error: bare CR not allowed in doc-comment --> $DIR/several-carriage-returns-in-doc-comment.rs:6:32 | -LL | /// This do c comment contains three isolated `\r` symbols +LL | /// This do␍c comment contains ␍three isolated `\r`␍ symbols | ^ error: bare CR not allowed in doc-comment --> $DIR/several-carriage-returns-in-doc-comment.rs:6:52 | -LL | /// This do c comment contains three isolated `\r` symbols +LL | /// This do␍c comment contains ␍three isolated `\r`␍ symbols | ^ error: aborting due to 3 previous errors diff --git a/tests/ui/parser/trailing-carriage-return-in-string.stderr b/tests/ui/parser/trailing-carriage-return-in-string.stderr index fa2677921b3..c5949432af8 100644 --- a/tests/ui/parser/trailing-carriage-return-in-string.stderr +++ b/tests/ui/parser/trailing-carriage-return-in-string.stderr @@ -1,7 +1,7 @@ error: unknown character escape: `\r` --> $DIR/trailing-carriage-return-in-string.rs:10:25 | -LL | let bad = "This is \ a test"; +LL | let bad = "This is \␍ a test"; | ^ unknown character escape | = help: this is an isolated carriage return; consider checking your editor and version control settings diff --git a/tests/ui/parser/utf16-be-without-bom.stderr b/tests/ui/parser/utf16-be-without-bom.stderr index c041f3ecf53b22d8bfd994f7a8c24d583594e027..28cf6d97e96c2a8d08d193f97d4289da04c2aa38 100644 GIT binary patch literal 4029 zcmds)%}T>S6ovPBiX*sCFlx2_1#~5X;7W8SC24YFhNg4N{8&p>d<37-C-J6%;z|@| zA+tCzOfvV*{T7EgWDJ`OUQaa7HLNR)1J`goA)VpDFZW*)AWbi@d3k-i=UqKGPREqa zm223$bj;ITW3yp4kl*l<+}z*=pC9kFNQ=7lR8)wRsN35TZHu~ZOVO)nbsD3-h*Ua9 zL+vbsWp7GCty;2}dh1ZKc7@Wozq?9LLAvCMDmZ#}J2D47&!}{=p?%w8yl~wy_1MT% zW$6v(4QUjpR9%xH?fL>rKSEh}OI4CkFeZl~n8XILR5=|iMjgn0$1CA0-FdL4M&OtaYjuKBmLynS9NLT)MfYy18HFd!<-^jrR zd6wlnsM+fZxz6R4_lt-*4DuKp;fFi*9s1}gLDQ;=3=B>Asf&oNA=p_VdDO8iND}6R r7IQW-M7^5gdjkG@{C#^{r_W-{#&ta_zv?y^VrJ+`vA0+SRGTDUM5ECC literal 3641 zcmds)OG^VW6os>%UvUH%3LUjtUjw=lLFh_!CuOEHcRHbMZpot$+WOl~Td2FzfD6ek z2{$+QoW&s@Y0Wx=Q$xiw1!D_s!8I%oNoDwQ%d2$)r0FwGU*6s?ovqti?KU02lyz~ z7o`JX@fW29fgu>92iKuV7^MlpaTuiw;hFgFY}j)JE81XCev*SV@+`|+sM)Csxyt?D z=LjI`1bmTtIPmhW&kmG478zNQhNdx3H$Y<>f|(Sao-F$Xc^(?lV#eB!T5o6lrLsd% e_WiwQqN2~*ZhBX4+qK=nMAKBVGuUF2Ns=Enq*san diff --git a/tests/ui/parser/utf16-le-without-bom.stderr b/tests/ui/parser/utf16-le-without-bom.stderr index cc2220441ac1073c755b178c4da74085faedd2a1..53004ac942d988ea867e0ea7573b4ffd6cfbd026 100644 GIT binary patch literal 3939 zcmds)%SyvQ6o&VCieGS{V6@R%B%mu11XrRvk)+9q8Jf;1bFqpSd<37-C-J0((v^6b zh0NlIkYr|ZzQs>IGKNhKuP2)48rGG@fg3oUkj`=Ap9U`pkftZtJUzeM^RCH8X-DZ? zxt6_4$2{FNHXr8u@*19#iwiuX`FMZR5=f?!QlcdNy)DVMq<{7#d66uZ0=LU8WS?WS z)Xj2O_NF2sp-L*1P zRe6JXOBy9=)ih*CyX6K--$7Y;OLdY^Fe1ah6NwGUC;*b!m>h**5*w6k5QIu}SOT=H z)N6EAn}z7wD8U42+$hO}>D?I7><22}DA|N+;3(k)tKsJ1a#U x9m@rY3{L2A&PFC^79*@w_2-`Y`ns~ja?Hl{2iAUZR5rxS(7j@Bu@Fg{B%hBqn&$uj literal 3603 zcmds)PfNo<5XE!Or+9(~g+`6mA^|;#Ab1kJi6l)XW@)+;_K#_4^|QNerSzm-@F08J zER)%J@9>x(Da9&-l@rOmgf@jT;2MS}Bs09&#o0#;B*_g9Z|@&Zt*O&d(o)h3)37zk zn5QR7XT$8=m0=k_JYb1Bk`bb5R7Ap^j-q4twTM2VbqVmgDRuSNXsGQnXtt^()S@Mg zsnrH0OH&Am=cl{m3M5OesDh!H8MtX_)=_EPg|uyp@!YI_6K{>1y)3Ol-;hLsO4Kze zl4jFD$TtWLtEq}(`ZMLj{gsC%edHlMH0@VD+Cvkco_VPcPQFLlxtI$s@=~A73(^3u zG$4h0846SqZ*Z{r|M-Z<&E5tq>