Auto merge of #45711 - tirr-c:unicode-span, r=estebank

Display spans correctly when there are zero-width or wide characters

Hopefully...
* fixes #45211
* fixes #8706

---

Before:
```
error: invalid width `7` for integer literal
  --> unicode_2.rs:12:25
   |
12 |     let _ = ("a̐éö̲", 0u7);
   |                         ^^^
   |
   = help: valid widths are 8, 16, 32, 64 and 128

error: invalid width `42` for integer literal
  --> unicode_2.rs:13:20
   |
13 |     let _ = ("아あ", 1i42);
   |                    ^^^^
   |
   = help: valid widths are 8, 16, 32, 64 and 128

error: aborting due to 2 previous errors
```

After:
```
error: invalid width `7` for integer literal
  --> unicode_2.rs:12:25
   |
12 |     let _ = ("a̐éö̲", 0u7);
   |                     ^^^
   |
   = help: valid widths are 8, 16, 32, 64 and 128

error: invalid width `42` for integer literal
  --> unicode_2.rs:13:20
   |
13 |     let _ = ("아あ", 1i42);
   |                      ^^^^
   |
   = help: valid widths are 8, 16, 32, 64 and 128

error: aborting due to 2 previous errors
```

Spans might display incorrectly on the browser.

r? @estebank
This commit is contained in:
bors 2017-11-04 23:09:19 +00:00
commit 12e6b53744
14 changed files with 231 additions and 16 deletions

1
src/Cargo.lock generated
View File

@ -2230,6 +2230,7 @@ version = "0.0.0"
dependencies = [
"rustc_data_structures 0.0.0",
"serialize 0.0.0",
"unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]

View File

@ -364,6 +364,7 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
end_pos: _,
ref lines,
ref multibyte_chars,
ref non_narrow_chars,
} = *self;
name.hash_stable(hcx, hasher);
@ -389,6 +390,12 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
for &char_pos in multibyte_chars.iter() {
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
}
let non_narrow_chars = non_narrow_chars.borrow();
non_narrow_chars.len().hash_stable(hcx, hasher);
for &char_pos in non_narrow_chars.iter() {
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
}
}
}
@ -408,3 +415,12 @@ fn stable_multibyte_char(mbc: ::syntax_pos::MultiByteChar,
(pos.0 - filemap_start.0, bytes as u32)
}
fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar,
filemap_start: ::syntax_pos::BytePos)
-> (u32, u32) {
let pos = swc.pos();
let width = swc.width();
(pos.0 - filemap_start.0, width as u32)
}

View File

@ -10,7 +10,7 @@
use self::Destination::*;
use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan, CharPos};
use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan};
use {Level, CodeSuggestion, DiagnosticBuilder, SubDiagnostic, CodeMapper, DiagnosticId};
use RenderSpan::*;
@ -201,8 +201,8 @@ impl EmitterWriter {
// 6..7. This is degenerate input, but it's best to degrade
// gracefully -- and the parser likes to supply a span like
// that for EOF, in particular.
if lo.col == hi.col && lo.line == hi.line {
hi.col = CharPos(lo.col.0 + 1);
if lo.col_display == hi.col_display && lo.line == hi.line {
hi.col_display += 1;
}
let ann_type = if lo.line != hi.line {
@ -210,8 +210,8 @@ impl EmitterWriter {
depth: 1,
line_start: lo.line,
line_end: hi.line,
start_col: lo.col.0,
end_col: hi.col.0,
start_col: lo.col_display,
end_col: hi.col_display,
is_primary: span_label.is_primary,
label: span_label.label.clone(),
};
@ -221,8 +221,8 @@ impl EmitterWriter {
AnnotationType::Singleline
};
let ann = Annotation {
start_col: lo.col.0,
end_col: hi.col.0,
start_col: lo.col_display,
end_col: hi.col_display,
is_primary: span_label.is_primary,
label: span_label.label.clone(),
annotation_type: ann_type,

View File

@ -1189,6 +1189,7 @@ impl<'a, 'tcx> CrateMetadata {
end_pos,
lines,
multibyte_chars,
non_narrow_chars,
.. } = filemap_to_import;
let source_length = (end_pos - start_pos).to_usize();
@ -1206,6 +1207,10 @@ impl<'a, 'tcx> CrateMetadata {
for mbc in &mut multibyte_chars {
mbc.pos = mbc.pos - start_pos;
}
let mut non_narrow_chars = non_narrow_chars.into_inner();
for swc in &mut non_narrow_chars {
*swc = *swc - start_pos;
}
let local_version = local_codemap.new_imported_filemap(name,
name_was_remapped,
@ -1213,7 +1218,8 @@ impl<'a, 'tcx> CrateMetadata {
src_hash,
source_length,
lines,
multibyte_chars);
multibyte_chars,
non_narrow_chars);
debug!("CrateMetaData::imported_filemaps alloc \
filemap {:?} original (start_pos {:?} end_pos {:?}) \
translated (start_pos {:?} end_pos {:?})",

View File

@ -242,7 +242,8 @@ impl CodeMap {
src_hash: u128,
source_len: usize,
mut file_local_lines: Vec<BytePos>,
mut file_local_multibyte_chars: Vec<MultiByteChar>)
mut file_local_multibyte_chars: Vec<MultiByteChar>,
mut file_local_non_narrow_chars: Vec<NonNarrowChar>)
-> Rc<FileMap> {
let start_pos = self.next_start_pos();
let mut files = self.files.borrow_mut();
@ -258,6 +259,10 @@ impl CodeMap {
mbc.pos = mbc.pos + start_pos;
}
for swc in &mut file_local_non_narrow_chars {
*swc = *swc + start_pos;
}
let filemap = Rc::new(FileMap {
name: filename,
name_was_remapped,
@ -270,6 +275,7 @@ impl CodeMap {
end_pos,
lines: RefCell::new(file_local_lines),
multibyte_chars: RefCell::new(file_local_multibyte_chars),
non_narrow_chars: RefCell::new(file_local_non_narrow_chars),
});
files.push(filemap.clone());
@ -297,6 +303,24 @@ impl CodeMap {
let line = a + 1; // Line numbers start at 1
let linebpos = (*f.lines.borrow())[a];
let linechpos = self.bytepos_to_file_charpos(linebpos);
let col = chpos - linechpos;
let col_display = {
let non_narrow_chars = f.non_narrow_chars.borrow();
let start_width_idx = non_narrow_chars
.binary_search_by_key(&linebpos, |x| x.pos())
.unwrap_or_else(|x| x);
let end_width_idx = non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let special_chars = end_width_idx - start_width_idx;
let non_narrow: usize =
non_narrow_chars[start_width_idx..end_width_idx]
.into_iter()
.map(|x| x.width())
.sum();
col.0 - special_chars + non_narrow
};
debug!("byte pos {:?} is on the line at byte pos {:?}",
pos, linebpos);
debug!("char pos {:?} is on the line at char pos {:?}",
@ -306,14 +330,28 @@ impl CodeMap {
Loc {
file: f,
line,
col: chpos - linechpos,
col,
col_display,
}
}
Err(f) => {
let col_display = {
let non_narrow_chars = f.non_narrow_chars.borrow();
let end_width_idx = non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let non_narrow: usize =
non_narrow_chars[0..end_width_idx]
.into_iter()
.map(|x| x.width())
.sum();
chpos.0 - end_width_idx + non_narrow
};
Loc {
file: f,
line: 0,
col: chpos,
col_display,
}
}
}

View File

@ -433,6 +433,7 @@ impl<'a> StringReader<'a> {
self.filemap.record_multibyte_char(self.pos, new_ch_len);
}
}
self.filemap.record_width(self.pos, new_ch);
} else {
self.ch = None;
self.pos = new_pos;

View File

@ -11,3 +11,4 @@ crate-type = ["dylib"]
[dependencies]
serialize = { path = "../libserialize" }
rustc_data_structures = { path = "../librustc_data_structures" }
unicode-width = "0.1.4"

View File

@ -44,6 +44,8 @@ use serialize::{Encodable, Decodable, Encoder, Decoder};
extern crate serialize;
extern crate serialize as rustc_serialize; // used by deriving
extern crate unicode_width;
pub mod hygiene;
pub use hygiene::{SyntaxContext, ExpnInfo, ExpnFormat, NameAndSpan, CompilerDesugaringKind};
@ -494,6 +496,63 @@ pub struct MultiByteChar {
pub bytes: usize,
}
/// Identifies an offset of a non-narrow character in a FileMap
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)]
pub enum NonNarrowChar {
/// Represents a zero-width character
ZeroWidth(BytePos),
/// Represents a wide (fullwidth) character
Wide(BytePos),
}
impl NonNarrowChar {
fn new(pos: BytePos, width: usize) -> Self {
match width {
0 => NonNarrowChar::ZeroWidth(pos),
2 => NonNarrowChar::Wide(pos),
_ => panic!("width {} given for non-narrow character", width),
}
}
/// Returns the absolute offset of the character in the CodeMap
pub fn pos(&self) -> BytePos {
match *self {
NonNarrowChar::ZeroWidth(p) |
NonNarrowChar::Wide(p) => p,
}
}
/// Returns the width of the character, 0 (zero-width) or 2 (wide)
pub fn width(&self) -> usize {
match *self {
NonNarrowChar::ZeroWidth(_) => 0,
NonNarrowChar::Wide(_) => 2,
}
}
}
impl Add<BytePos> for NonNarrowChar {
type Output = Self;
fn add(self, rhs: BytePos) -> Self {
match self {
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
}
}
}
impl Sub<BytePos> for NonNarrowChar {
type Output = Self;
fn sub(self, rhs: BytePos) -> Self {
match self {
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
}
}
}
/// The state of the lazy external source loading mechanism of a FileMap.
#[derive(PartialEq, Eq, Clone)]
pub enum ExternalSource {
@ -552,11 +611,13 @@ pub struct FileMap {
pub lines: RefCell<Vec<BytePos>>,
/// Locations of multi-byte characters in the source code
pub multibyte_chars: RefCell<Vec<MultiByteChar>>,
/// Width of characters that are not narrow in the source code
pub non_narrow_chars: RefCell<Vec<NonNarrowChar>>,
}
impl Encodable for FileMap {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_struct("FileMap", 7, |s| {
s.emit_struct("FileMap", 8, |s| {
s.emit_struct_field("name", 0, |s| self.name.encode(s))?;
s.emit_struct_field("name_was_remapped", 1, |s| self.name_was_remapped.encode(s))?;
s.emit_struct_field("src_hash", 6, |s| self.src_hash.encode(s))?;
@ -610,6 +671,9 @@ impl Encodable for FileMap {
})?;
s.emit_struct_field("multibyte_chars", 5, |s| {
(*self.multibyte_chars.borrow()).encode(s)
})?;
s.emit_struct_field("non_narrow_chars", 7, |s| {
(*self.non_narrow_chars.borrow()).encode(s)
})
})
}
@ -618,7 +682,7 @@ impl Encodable for FileMap {
impl Decodable for FileMap {
fn decode<D: Decoder>(d: &mut D) -> Result<FileMap, D::Error> {
d.read_struct("FileMap", 6, |d| {
d.read_struct("FileMap", 8, |d| {
let name: String = d.read_struct_field("name", 0, |d| Decodable::decode(d))?;
let name_was_remapped: bool =
d.read_struct_field("name_was_remapped", 1, |d| Decodable::decode(d))?;
@ -657,6 +721,8 @@ impl Decodable for FileMap {
})?;
let multibyte_chars: Vec<MultiByteChar> =
d.read_struct_field("multibyte_chars", 5, |d| Decodable::decode(d))?;
let non_narrow_chars: Vec<NonNarrowChar> =
d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?;
Ok(FileMap {
name,
name_was_remapped,
@ -671,7 +737,8 @@ impl Decodable for FileMap {
src_hash,
external_src: RefCell::new(ExternalSource::AbsentOk),
lines: RefCell::new(lines),
multibyte_chars: RefCell::new(multibyte_chars)
multibyte_chars: RefCell::new(multibyte_chars),
non_narrow_chars: RefCell::new(non_narrow_chars)
})
})
}
@ -709,6 +776,7 @@ impl FileMap {
end_pos: Pos::from_usize(end_pos),
lines: RefCell::new(Vec::new()),
multibyte_chars: RefCell::new(Vec::new()),
non_narrow_chars: RefCell::new(Vec::new()),
}
}
@ -798,6 +866,23 @@ impl FileMap {
self.multibyte_chars.borrow_mut().push(mbc);
}
pub fn record_width(&self, pos: BytePos, ch: char) {
let width = match ch {
'\t' | '\n' =>
// Tabs will consume one column.
// Make newlines take one column so that displayed spans can point them.
1,
ch =>
// Assume control characters are zero width.
// FIXME: How can we decide between `width` and `width_cjk`?
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0),
};
// Only record non-narrow characters.
if width != 1 {
self.non_narrow_chars.borrow_mut().push(NonNarrowChar::new(pos, width));
}
}
pub fn is_real_file(&self) -> bool {
!(self.name.starts_with("<") &&
self.name.ends_with(">"))
@ -944,7 +1029,9 @@ pub struct Loc {
/// The (1-based) line number
pub line: usize,
/// The (0-based) column offset
pub col: CharPos
pub col: CharPos,
/// The (0-based) column offset when displayed
pub col_display: usize,
}
/// A source code location used as the result of lookup_char_pos_adj

View File

@ -2,7 +2,7 @@ error: invalid ABI: expected one of [cdecl, stdcall, fastcall, vectorcall, thisc
--> $DIR/unicode.rs:11:8
|
11 | extern "路濫狼á́́" fn foo() {}
| ^^^^^^^^
| ^^^^^^^^^
error: aborting due to previous error

View File

@ -0,0 +1,17 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(non_ascii_idents)]
fn main() {
let _ = ("a̐éö̲", 0u7);
let _ = ("아あ", 1i42);
let _ = a̐é;
}

View File

@ -0,0 +1,24 @@
error: invalid width `7` for integer literal
--> $DIR/unicode_2.rs:14:25
|
14 | let _ = ("a̐éö̲", 0u7);
| ^^^
|
= help: valid widths are 8, 16, 32, 64 and 128
error: invalid width `42` for integer literal
--> $DIR/unicode_2.rs:15:20
|
15 | let _ = ("아あ", 1i42);
| ^^^^
|
= help: valid widths are 8, 16, 32, 64 and 128
error[E0425]: cannot find value `a̐é` in this scope
--> $DIR/unicode_2.rs:16:13
|
16 | let _ = a̐é;
| ^^ not found in this scope
error: aborting due to 3 previous errors

View File

@ -0,0 +1,14 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
fn main() {
let s = "ZͨA͑ͦ͒͋ͤ͑̚L̄͑͋Ĝͨͥ̿͒̽̈́Oͥ͛ͭ!̏"; while true { break; }
println!("{}", s);
}

View File

@ -0,0 +1,10 @@
warning: denote infinite loops with `loop { ... }`
--> $DIR/unicode_3.rs:12:45
|
12 | let s = "ZͨA͑ͦ͒͋ͤ͑̚L̄͑͋Ĝͨͥ̿͒̽̈́Oͥ͛ͭ!̏"; while true { break; }
| ----------^^^^^^^^^^^
| |
| help: use `loop`
|
= note: #[warn(while_true)] on by default

View File

@ -2,7 +2,7 @@ error: unterminated double quote string
--> $DIR/issue-44078.rs:12:8
|
12 | "😊"";
| ________^
| _________^
13 | | }
| |__^