mirror of
https://github.com/rust-lang/rust.git
synced 2024-10-30 05:51:58 +00:00
Auto merge of #45711 - tirr-c:unicode-span, r=estebank
Display spans correctly when there are zero-width or wide characters Hopefully... * fixes #45211 * fixes #8706 --- Before: ``` error: invalid width `7` for integer literal --> unicode_2.rs:12:25 | 12 | let _ = ("a̐éö̲", 0u7); | ^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: invalid width `42` for integer literal --> unicode_2.rs:13:20 | 13 | let _ = ("아あ", 1i42); | ^^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: aborting due to 2 previous errors ``` After: ``` error: invalid width `7` for integer literal --> unicode_2.rs:12:25 | 12 | let _ = ("a̐éö̲", 0u7); | ^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: invalid width `42` for integer literal --> unicode_2.rs:13:20 | 13 | let _ = ("아あ", 1i42); | ^^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: aborting due to 2 previous errors ``` Spans might display incorrectly on the browser. r? @estebank
This commit is contained in:
commit
12e6b53744
1
src/Cargo.lock
generated
1
src/Cargo.lock
generated
@ -2230,6 +2230,7 @@ version = "0.0.0"
|
||||
dependencies = [
|
||||
"rustc_data_structures 0.0.0",
|
||||
"serialize 0.0.0",
|
||||
"unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -364,6 +364,7 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
|
||||
end_pos: _,
|
||||
ref lines,
|
||||
ref multibyte_chars,
|
||||
ref non_narrow_chars,
|
||||
} = *self;
|
||||
|
||||
name.hash_stable(hcx, hasher);
|
||||
@ -389,6 +390,12 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
|
||||
for &char_pos in multibyte_chars.iter() {
|
||||
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
|
||||
}
|
||||
|
||||
let non_narrow_chars = non_narrow_chars.borrow();
|
||||
non_narrow_chars.len().hash_stable(hcx, hasher);
|
||||
for &char_pos in non_narrow_chars.iter() {
|
||||
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -408,3 +415,12 @@ fn stable_multibyte_char(mbc: ::syntax_pos::MultiByteChar,
|
||||
|
||||
(pos.0 - filemap_start.0, bytes as u32)
|
||||
}
|
||||
|
||||
fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar,
|
||||
filemap_start: ::syntax_pos::BytePos)
|
||||
-> (u32, u32) {
|
||||
let pos = swc.pos();
|
||||
let width = swc.width();
|
||||
|
||||
(pos.0 - filemap_start.0, width as u32)
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
use self::Destination::*;
|
||||
|
||||
use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan, CharPos};
|
||||
use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan};
|
||||
|
||||
use {Level, CodeSuggestion, DiagnosticBuilder, SubDiagnostic, CodeMapper, DiagnosticId};
|
||||
use RenderSpan::*;
|
||||
@ -201,8 +201,8 @@ impl EmitterWriter {
|
||||
// 6..7. This is degenerate input, but it's best to degrade
|
||||
// gracefully -- and the parser likes to supply a span like
|
||||
// that for EOF, in particular.
|
||||
if lo.col == hi.col && lo.line == hi.line {
|
||||
hi.col = CharPos(lo.col.0 + 1);
|
||||
if lo.col_display == hi.col_display && lo.line == hi.line {
|
||||
hi.col_display += 1;
|
||||
}
|
||||
|
||||
let ann_type = if lo.line != hi.line {
|
||||
@ -210,8 +210,8 @@ impl EmitterWriter {
|
||||
depth: 1,
|
||||
line_start: lo.line,
|
||||
line_end: hi.line,
|
||||
start_col: lo.col.0,
|
||||
end_col: hi.col.0,
|
||||
start_col: lo.col_display,
|
||||
end_col: hi.col_display,
|
||||
is_primary: span_label.is_primary,
|
||||
label: span_label.label.clone(),
|
||||
};
|
||||
@ -221,8 +221,8 @@ impl EmitterWriter {
|
||||
AnnotationType::Singleline
|
||||
};
|
||||
let ann = Annotation {
|
||||
start_col: lo.col.0,
|
||||
end_col: hi.col.0,
|
||||
start_col: lo.col_display,
|
||||
end_col: hi.col_display,
|
||||
is_primary: span_label.is_primary,
|
||||
label: span_label.label.clone(),
|
||||
annotation_type: ann_type,
|
||||
|
@ -1189,6 +1189,7 @@ impl<'a, 'tcx> CrateMetadata {
|
||||
end_pos,
|
||||
lines,
|
||||
multibyte_chars,
|
||||
non_narrow_chars,
|
||||
.. } = filemap_to_import;
|
||||
|
||||
let source_length = (end_pos - start_pos).to_usize();
|
||||
@ -1206,6 +1207,10 @@ impl<'a, 'tcx> CrateMetadata {
|
||||
for mbc in &mut multibyte_chars {
|
||||
mbc.pos = mbc.pos - start_pos;
|
||||
}
|
||||
let mut non_narrow_chars = non_narrow_chars.into_inner();
|
||||
for swc in &mut non_narrow_chars {
|
||||
*swc = *swc - start_pos;
|
||||
}
|
||||
|
||||
let local_version = local_codemap.new_imported_filemap(name,
|
||||
name_was_remapped,
|
||||
@ -1213,7 +1218,8 @@ impl<'a, 'tcx> CrateMetadata {
|
||||
src_hash,
|
||||
source_length,
|
||||
lines,
|
||||
multibyte_chars);
|
||||
multibyte_chars,
|
||||
non_narrow_chars);
|
||||
debug!("CrateMetaData::imported_filemaps alloc \
|
||||
filemap {:?} original (start_pos {:?} end_pos {:?}) \
|
||||
translated (start_pos {:?} end_pos {:?})",
|
||||
|
@ -242,7 +242,8 @@ impl CodeMap {
|
||||
src_hash: u128,
|
||||
source_len: usize,
|
||||
mut file_local_lines: Vec<BytePos>,
|
||||
mut file_local_multibyte_chars: Vec<MultiByteChar>)
|
||||
mut file_local_multibyte_chars: Vec<MultiByteChar>,
|
||||
mut file_local_non_narrow_chars: Vec<NonNarrowChar>)
|
||||
-> Rc<FileMap> {
|
||||
let start_pos = self.next_start_pos();
|
||||
let mut files = self.files.borrow_mut();
|
||||
@ -258,6 +259,10 @@ impl CodeMap {
|
||||
mbc.pos = mbc.pos + start_pos;
|
||||
}
|
||||
|
||||
for swc in &mut file_local_non_narrow_chars {
|
||||
*swc = *swc + start_pos;
|
||||
}
|
||||
|
||||
let filemap = Rc::new(FileMap {
|
||||
name: filename,
|
||||
name_was_remapped,
|
||||
@ -270,6 +275,7 @@ impl CodeMap {
|
||||
end_pos,
|
||||
lines: RefCell::new(file_local_lines),
|
||||
multibyte_chars: RefCell::new(file_local_multibyte_chars),
|
||||
non_narrow_chars: RefCell::new(file_local_non_narrow_chars),
|
||||
});
|
||||
|
||||
files.push(filemap.clone());
|
||||
@ -297,6 +303,24 @@ impl CodeMap {
|
||||
let line = a + 1; // Line numbers start at 1
|
||||
let linebpos = (*f.lines.borrow())[a];
|
||||
let linechpos = self.bytepos_to_file_charpos(linebpos);
|
||||
let col = chpos - linechpos;
|
||||
|
||||
let col_display = {
|
||||
let non_narrow_chars = f.non_narrow_chars.borrow();
|
||||
let start_width_idx = non_narrow_chars
|
||||
.binary_search_by_key(&linebpos, |x| x.pos())
|
||||
.unwrap_or_else(|x| x);
|
||||
let end_width_idx = non_narrow_chars
|
||||
.binary_search_by_key(&pos, |x| x.pos())
|
||||
.unwrap_or_else(|x| x);
|
||||
let special_chars = end_width_idx - start_width_idx;
|
||||
let non_narrow: usize =
|
||||
non_narrow_chars[start_width_idx..end_width_idx]
|
||||
.into_iter()
|
||||
.map(|x| x.width())
|
||||
.sum();
|
||||
col.0 - special_chars + non_narrow
|
||||
};
|
||||
debug!("byte pos {:?} is on the line at byte pos {:?}",
|
||||
pos, linebpos);
|
||||
debug!("char pos {:?} is on the line at char pos {:?}",
|
||||
@ -306,14 +330,28 @@ impl CodeMap {
|
||||
Loc {
|
||||
file: f,
|
||||
line,
|
||||
col: chpos - linechpos,
|
||||
col,
|
||||
col_display,
|
||||
}
|
||||
}
|
||||
Err(f) => {
|
||||
let col_display = {
|
||||
let non_narrow_chars = f.non_narrow_chars.borrow();
|
||||
let end_width_idx = non_narrow_chars
|
||||
.binary_search_by_key(&pos, |x| x.pos())
|
||||
.unwrap_or_else(|x| x);
|
||||
let non_narrow: usize =
|
||||
non_narrow_chars[0..end_width_idx]
|
||||
.into_iter()
|
||||
.map(|x| x.width())
|
||||
.sum();
|
||||
chpos.0 - end_width_idx + non_narrow
|
||||
};
|
||||
Loc {
|
||||
file: f,
|
||||
line: 0,
|
||||
col: chpos,
|
||||
col_display,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -433,6 +433,7 @@ impl<'a> StringReader<'a> {
|
||||
self.filemap.record_multibyte_char(self.pos, new_ch_len);
|
||||
}
|
||||
}
|
||||
self.filemap.record_width(self.pos, new_ch);
|
||||
} else {
|
||||
self.ch = None;
|
||||
self.pos = new_pos;
|
||||
|
@ -11,3 +11,4 @@ crate-type = ["dylib"]
|
||||
[dependencies]
|
||||
serialize = { path = "../libserialize" }
|
||||
rustc_data_structures = { path = "../librustc_data_structures" }
|
||||
unicode-width = "0.1.4"
|
||||
|
@ -44,6 +44,8 @@ use serialize::{Encodable, Decodable, Encoder, Decoder};
|
||||
extern crate serialize;
|
||||
extern crate serialize as rustc_serialize; // used by deriving
|
||||
|
||||
extern crate unicode_width;
|
||||
|
||||
pub mod hygiene;
|
||||
pub use hygiene::{SyntaxContext, ExpnInfo, ExpnFormat, NameAndSpan, CompilerDesugaringKind};
|
||||
|
||||
@ -494,6 +496,63 @@ pub struct MultiByteChar {
|
||||
pub bytes: usize,
|
||||
}
|
||||
|
||||
/// Identifies an offset of a non-narrow character in a FileMap
|
||||
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)]
|
||||
pub enum NonNarrowChar {
|
||||
/// Represents a zero-width character
|
||||
ZeroWidth(BytePos),
|
||||
/// Represents a wide (fullwidth) character
|
||||
Wide(BytePos),
|
||||
}
|
||||
|
||||
impl NonNarrowChar {
|
||||
fn new(pos: BytePos, width: usize) -> Self {
|
||||
match width {
|
||||
0 => NonNarrowChar::ZeroWidth(pos),
|
||||
2 => NonNarrowChar::Wide(pos),
|
||||
_ => panic!("width {} given for non-narrow character", width),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the absolute offset of the character in the CodeMap
|
||||
pub fn pos(&self) -> BytePos {
|
||||
match *self {
|
||||
NonNarrowChar::ZeroWidth(p) |
|
||||
NonNarrowChar::Wide(p) => p,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the width of the character, 0 (zero-width) or 2 (wide)
|
||||
pub fn width(&self) -> usize {
|
||||
match *self {
|
||||
NonNarrowChar::ZeroWidth(_) => 0,
|
||||
NonNarrowChar::Wide(_) => 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Add<BytePos> for NonNarrowChar {
|
||||
type Output = Self;
|
||||
|
||||
fn add(self, rhs: BytePos) -> Self {
|
||||
match self {
|
||||
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
|
||||
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub<BytePos> for NonNarrowChar {
|
||||
type Output = Self;
|
||||
|
||||
fn sub(self, rhs: BytePos) -> Self {
|
||||
match self {
|
||||
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
|
||||
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The state of the lazy external source loading mechanism of a FileMap.
|
||||
#[derive(PartialEq, Eq, Clone)]
|
||||
pub enum ExternalSource {
|
||||
@ -552,11 +611,13 @@ pub struct FileMap {
|
||||
pub lines: RefCell<Vec<BytePos>>,
|
||||
/// Locations of multi-byte characters in the source code
|
||||
pub multibyte_chars: RefCell<Vec<MultiByteChar>>,
|
||||
/// Width of characters that are not narrow in the source code
|
||||
pub non_narrow_chars: RefCell<Vec<NonNarrowChar>>,
|
||||
}
|
||||
|
||||
impl Encodable for FileMap {
|
||||
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
|
||||
s.emit_struct("FileMap", 7, |s| {
|
||||
s.emit_struct("FileMap", 8, |s| {
|
||||
s.emit_struct_field("name", 0, |s| self.name.encode(s))?;
|
||||
s.emit_struct_field("name_was_remapped", 1, |s| self.name_was_remapped.encode(s))?;
|
||||
s.emit_struct_field("src_hash", 6, |s| self.src_hash.encode(s))?;
|
||||
@ -610,6 +671,9 @@ impl Encodable for FileMap {
|
||||
})?;
|
||||
s.emit_struct_field("multibyte_chars", 5, |s| {
|
||||
(*self.multibyte_chars.borrow()).encode(s)
|
||||
})?;
|
||||
s.emit_struct_field("non_narrow_chars", 7, |s| {
|
||||
(*self.non_narrow_chars.borrow()).encode(s)
|
||||
})
|
||||
})
|
||||
}
|
||||
@ -618,7 +682,7 @@ impl Encodable for FileMap {
|
||||
impl Decodable for FileMap {
|
||||
fn decode<D: Decoder>(d: &mut D) -> Result<FileMap, D::Error> {
|
||||
|
||||
d.read_struct("FileMap", 6, |d| {
|
||||
d.read_struct("FileMap", 8, |d| {
|
||||
let name: String = d.read_struct_field("name", 0, |d| Decodable::decode(d))?;
|
||||
let name_was_remapped: bool =
|
||||
d.read_struct_field("name_was_remapped", 1, |d| Decodable::decode(d))?;
|
||||
@ -657,6 +721,8 @@ impl Decodable for FileMap {
|
||||
})?;
|
||||
let multibyte_chars: Vec<MultiByteChar> =
|
||||
d.read_struct_field("multibyte_chars", 5, |d| Decodable::decode(d))?;
|
||||
let non_narrow_chars: Vec<NonNarrowChar> =
|
||||
d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?;
|
||||
Ok(FileMap {
|
||||
name,
|
||||
name_was_remapped,
|
||||
@ -671,7 +737,8 @@ impl Decodable for FileMap {
|
||||
src_hash,
|
||||
external_src: RefCell::new(ExternalSource::AbsentOk),
|
||||
lines: RefCell::new(lines),
|
||||
multibyte_chars: RefCell::new(multibyte_chars)
|
||||
multibyte_chars: RefCell::new(multibyte_chars),
|
||||
non_narrow_chars: RefCell::new(non_narrow_chars)
|
||||
})
|
||||
})
|
||||
}
|
||||
@ -709,6 +776,7 @@ impl FileMap {
|
||||
end_pos: Pos::from_usize(end_pos),
|
||||
lines: RefCell::new(Vec::new()),
|
||||
multibyte_chars: RefCell::new(Vec::new()),
|
||||
non_narrow_chars: RefCell::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
@ -798,6 +866,23 @@ impl FileMap {
|
||||
self.multibyte_chars.borrow_mut().push(mbc);
|
||||
}
|
||||
|
||||
pub fn record_width(&self, pos: BytePos, ch: char) {
|
||||
let width = match ch {
|
||||
'\t' | '\n' =>
|
||||
// Tabs will consume one column.
|
||||
// Make newlines take one column so that displayed spans can point them.
|
||||
1,
|
||||
ch =>
|
||||
// Assume control characters are zero width.
|
||||
// FIXME: How can we decide between `width` and `width_cjk`?
|
||||
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0),
|
||||
};
|
||||
// Only record non-narrow characters.
|
||||
if width != 1 {
|
||||
self.non_narrow_chars.borrow_mut().push(NonNarrowChar::new(pos, width));
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_real_file(&self) -> bool {
|
||||
!(self.name.starts_with("<") &&
|
||||
self.name.ends_with(">"))
|
||||
@ -944,7 +1029,9 @@ pub struct Loc {
|
||||
/// The (1-based) line number
|
||||
pub line: usize,
|
||||
/// The (0-based) column offset
|
||||
pub col: CharPos
|
||||
pub col: CharPos,
|
||||
/// The (0-based) column offset when displayed
|
||||
pub col_display: usize,
|
||||
}
|
||||
|
||||
/// A source code location used as the result of lookup_char_pos_adj
|
||||
|
@ -2,7 +2,7 @@ error: invalid ABI: expected one of [cdecl, stdcall, fastcall, vectorcall, thisc
|
||||
--> $DIR/unicode.rs:11:8
|
||||
|
|
||||
11 | extern "路濫狼á́́" fn foo() {}
|
||||
| ^^^^^^^^
|
||||
| ^^^^^^^^^
|
||||
|
||||
error: aborting due to previous error
|
||||
|
||||
|
17
src/test/ui/codemap_tests/unicode_2.rs
Normal file
17
src/test/ui/codemap_tests/unicode_2.rs
Normal file
@ -0,0 +1,17 @@
|
||||
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(non_ascii_idents)]
|
||||
|
||||
fn main() {
|
||||
let _ = ("a̐éö̲", 0u7);
|
||||
let _ = ("아あ", 1i42);
|
||||
let _ = a̐é;
|
||||
}
|
24
src/test/ui/codemap_tests/unicode_2.stderr
Normal file
24
src/test/ui/codemap_tests/unicode_2.stderr
Normal file
@ -0,0 +1,24 @@
|
||||
error: invalid width `7` for integer literal
|
||||
--> $DIR/unicode_2.rs:14:25
|
||||
|
|
||||
14 | let _ = ("a̐éö̲", 0u7);
|
||||
| ^^^
|
||||
|
|
||||
= help: valid widths are 8, 16, 32, 64 and 128
|
||||
|
||||
error: invalid width `42` for integer literal
|
||||
--> $DIR/unicode_2.rs:15:20
|
||||
|
|
||||
15 | let _ = ("아あ", 1i42);
|
||||
| ^^^^
|
||||
|
|
||||
= help: valid widths are 8, 16, 32, 64 and 128
|
||||
|
||||
error[E0425]: cannot find value `a̐é` in this scope
|
||||
--> $DIR/unicode_2.rs:16:13
|
||||
|
|
||||
16 | let _ = a̐é;
|
||||
| ^^ not found in this scope
|
||||
|
||||
error: aborting due to 3 previous errors
|
||||
|
14
src/test/ui/codemap_tests/unicode_3.rs
Normal file
14
src/test/ui/codemap_tests/unicode_3.rs
Normal file
@ -0,0 +1,14 @@
|
||||
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
fn main() {
|
||||
let s = "ZͨA͑ͦ͒͋ͤ͑̚L̄͑͋Ĝͨͥ̿͒̽̈́Oͥ͛ͭ!̏"; while true { break; }
|
||||
println!("{}", s);
|
||||
}
|
10
src/test/ui/codemap_tests/unicode_3.stderr
Normal file
10
src/test/ui/codemap_tests/unicode_3.stderr
Normal file
@ -0,0 +1,10 @@
|
||||
warning: denote infinite loops with `loop { ... }`
|
||||
--> $DIR/unicode_3.rs:12:45
|
||||
|
|
||||
12 | let s = "ZͨA͑ͦ͒͋ͤ͑̚L̄͑͋Ĝͨͥ̿͒̽̈́Oͥ͛ͭ!̏"; while true { break; }
|
||||
| ----------^^^^^^^^^^^
|
||||
| |
|
||||
| help: use `loop`
|
||||
|
|
||||
= note: #[warn(while_true)] on by default
|
||||
|
@ -2,7 +2,7 @@ error: unterminated double quote string
|
||||
--> $DIR/issue-44078.rs:12:8
|
||||
|
|
||||
12 | "😊"";
|
||||
| ________^
|
||||
| _________^
|
||||
13 | | }
|
||||
| |__^
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user