From 272c2faa1d766fd4185141106959cdb58b88e6e9 Mon Sep 17 00:00:00 2001 From: Wonwoo Choi Date: Thu, 2 Nov 2017 10:25:54 +0900 Subject: [PATCH] Display spans correctly when there are non-half-width characters --- src/Cargo.lock | 1 + src/librustc/ich/impls_syntax.rs | 16 ++++ src/librustc_errors/emitter.rs | 14 ++-- src/librustc_metadata/decoder.rs | 8 +- src/libsyntax/codemap.rs | 42 +++++++++- src/libsyntax/parse/lexer/mod.rs | 1 + src/libsyntax_pos/Cargo.toml | 1 + src/libsyntax_pos/lib.rs | 95 +++++++++++++++++++++- src/test/ui/codemap_tests/unicode.stderr | 2 +- src/test/ui/codemap_tests/unicode_2.rs | 17 ++++ src/test/ui/codemap_tests/unicode_2.stderr | 24 ++++++ src/test/ui/codemap_tests/unicode_3.rs | 14 ++++ src/test/ui/codemap_tests/unicode_3.stderr | 10 +++ src/test/ui/issue-44078.stderr | 2 +- 14 files changed, 231 insertions(+), 16 deletions(-) create mode 100644 src/test/ui/codemap_tests/unicode_2.rs create mode 100644 src/test/ui/codemap_tests/unicode_2.stderr create mode 100644 src/test/ui/codemap_tests/unicode_3.rs create mode 100644 src/test/ui/codemap_tests/unicode_3.stderr diff --git a/src/Cargo.lock b/src/Cargo.lock index 69c3789f337..81bc4da5b5c 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -2244,6 +2244,7 @@ version = "0.0.0" dependencies = [ "rustc_data_structures 0.0.0", "serialize 0.0.0", + "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] diff --git a/src/librustc/ich/impls_syntax.rs b/src/librustc/ich/impls_syntax.rs index 799e790b85f..fea4e283db1 100644 --- a/src/librustc/ich/impls_syntax.rs +++ b/src/librustc/ich/impls_syntax.rs @@ -364,6 +364,7 @@ impl<'gcx> HashStable> for FileMap { end_pos: _, ref lines, ref multibyte_chars, + ref non_narrow_chars, } = *self; name.hash_stable(hcx, hasher); @@ -389,6 +390,12 @@ impl<'gcx> HashStable> for FileMap { for &char_pos in multibyte_chars.iter() { stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher); } + + let non_narrow_chars = non_narrow_chars.borrow(); + non_narrow_chars.len().hash_stable(hcx, hasher); + for &char_pos in non_narrow_chars.iter() { + stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher); + } } } @@ -408,3 +415,12 @@ fn stable_multibyte_char(mbc: ::syntax_pos::MultiByteChar, (pos.0 - filemap_start.0, bytes as u32) } + +fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar, + filemap_start: ::syntax_pos::BytePos) + -> (u32, u32) { + let pos = swc.pos(); + let width = swc.width(); + + (pos.0 - filemap_start.0, width as u32) +} diff --git a/src/librustc_errors/emitter.rs b/src/librustc_errors/emitter.rs index 5db5a9a1133..7417794db6b 100644 --- a/src/librustc_errors/emitter.rs +++ b/src/librustc_errors/emitter.rs @@ -10,7 +10,7 @@ use self::Destination::*; -use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan, CharPos}; +use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan}; use {Level, CodeSuggestion, DiagnosticBuilder, SubDiagnostic, CodeMapper}; use RenderSpan::*; @@ -201,8 +201,8 @@ impl EmitterWriter { // 6..7. This is degenerate input, but it's best to degrade // gracefully -- and the parser likes to supply a span like // that for EOF, in particular. - if lo.col == hi.col && lo.line == hi.line { - hi.col = CharPos(lo.col.0 + 1); + if lo.col_display == hi.col_display && lo.line == hi.line { + hi.col_display += 1; } let ann_type = if lo.line != hi.line { @@ -210,8 +210,8 @@ impl EmitterWriter { depth: 1, line_start: lo.line, line_end: hi.line, - start_col: lo.col.0, - end_col: hi.col.0, + start_col: lo.col_display, + end_col: hi.col_display, is_primary: span_label.is_primary, label: span_label.label.clone(), }; @@ -221,8 +221,8 @@ impl EmitterWriter { AnnotationType::Singleline }; let ann = Annotation { - start_col: lo.col.0, - end_col: hi.col.0, + start_col: lo.col_display, + end_col: hi.col_display, is_primary: span_label.is_primary, label: span_label.label.clone(), annotation_type: ann_type, diff --git a/src/librustc_metadata/decoder.rs b/src/librustc_metadata/decoder.rs index b4519619124..02cee9610b7 100644 --- a/src/librustc_metadata/decoder.rs +++ b/src/librustc_metadata/decoder.rs @@ -1189,6 +1189,7 @@ impl<'a, 'tcx> CrateMetadata { end_pos, lines, multibyte_chars, + non_narrow_chars, .. } = filemap_to_import; let source_length = (end_pos - start_pos).to_usize(); @@ -1206,6 +1207,10 @@ impl<'a, 'tcx> CrateMetadata { for mbc in &mut multibyte_chars { mbc.pos = mbc.pos - start_pos; } + let mut non_narrow_chars = non_narrow_chars.into_inner(); + for swc in &mut non_narrow_chars { + *swc = *swc - start_pos; + } let local_version = local_codemap.new_imported_filemap(name, name_was_remapped, @@ -1213,7 +1218,8 @@ impl<'a, 'tcx> CrateMetadata { src_hash, source_length, lines, - multibyte_chars); + multibyte_chars, + non_narrow_chars); debug!("CrateMetaData::imported_filemaps alloc \ filemap {:?} original (start_pos {:?} end_pos {:?}) \ translated (start_pos {:?} end_pos {:?})", diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index ad78c550cf6..3464db2a811 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -242,7 +242,8 @@ impl CodeMap { src_hash: u128, source_len: usize, mut file_local_lines: Vec, - mut file_local_multibyte_chars: Vec) + mut file_local_multibyte_chars: Vec, + mut file_local_non_narrow_chars: Vec) -> Rc { let start_pos = self.next_start_pos(); let mut files = self.files.borrow_mut(); @@ -258,6 +259,10 @@ impl CodeMap { mbc.pos = mbc.pos + start_pos; } + for swc in &mut file_local_non_narrow_chars { + *swc = *swc + start_pos; + } + let filemap = Rc::new(FileMap { name: filename, name_was_remapped, @@ -270,6 +275,7 @@ impl CodeMap { end_pos, lines: RefCell::new(file_local_lines), multibyte_chars: RefCell::new(file_local_multibyte_chars), + non_narrow_chars: RefCell::new(file_local_non_narrow_chars), }); files.push(filemap.clone()); @@ -297,6 +303,24 @@ impl CodeMap { let line = a + 1; // Line numbers start at 1 let linebpos = (*f.lines.borrow())[a]; let linechpos = self.bytepos_to_file_charpos(linebpos); + let col = chpos - linechpos; + + let col_display = { + let non_narrow_chars = f.non_narrow_chars.borrow(); + let start_width_idx = non_narrow_chars + .binary_search_by_key(&linebpos, |x| x.pos()) + .unwrap_or_else(|x| x); + let end_width_idx = non_narrow_chars + .binary_search_by_key(&pos, |x| x.pos()) + .unwrap_or_else(|x| x); + let special_chars = end_width_idx - start_width_idx; + let non_narrow: usize = + non_narrow_chars[start_width_idx..end_width_idx] + .into_iter() + .map(|x| x.width()) + .sum(); + col.0 - special_chars + non_narrow + }; debug!("byte pos {:?} is on the line at byte pos {:?}", pos, linebpos); debug!("char pos {:?} is on the line at char pos {:?}", @@ -306,14 +330,28 @@ impl CodeMap { Loc { file: f, line, - col: chpos - linechpos, + col, + col_display, } } Err(f) => { + let col_display = { + let non_narrow_chars = f.non_narrow_chars.borrow(); + let end_width_idx = non_narrow_chars + .binary_search_by_key(&pos, |x| x.pos()) + .unwrap_or_else(|x| x); + let non_narrow: usize = + non_narrow_chars[0..end_width_idx] + .into_iter() + .map(|x| x.width()) + .sum(); + chpos.0 - end_width_idx + non_narrow + }; Loc { file: f, line: 0, col: chpos, + col_display, } } } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index d9c3dbb630d..951163d35fa 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -433,6 +433,7 @@ impl<'a> StringReader<'a> { self.filemap.record_multibyte_char(self.pos, new_ch_len); } } + self.filemap.record_width(self.pos, new_ch); } else { self.ch = None; self.pos = new_pos; diff --git a/src/libsyntax_pos/Cargo.toml b/src/libsyntax_pos/Cargo.toml index dd8129bab51..aad2155157d 100644 --- a/src/libsyntax_pos/Cargo.toml +++ b/src/libsyntax_pos/Cargo.toml @@ -11,3 +11,4 @@ crate-type = ["dylib"] [dependencies] serialize = { path = "../libserialize" } rustc_data_structures = { path = "../librustc_data_structures" } +unicode-width = "0.1.4" diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index 44e73d876e8..47755dc1d54 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -44,6 +44,8 @@ use serialize::{Encodable, Decodable, Encoder, Decoder}; extern crate serialize; extern crate serialize as rustc_serialize; // used by deriving +extern crate unicode_width; + pub mod hygiene; pub use hygiene::{SyntaxContext, ExpnInfo, ExpnFormat, NameAndSpan, CompilerDesugaringKind}; @@ -494,6 +496,63 @@ pub struct MultiByteChar { pub bytes: usize, } +/// Identifies an offset of a non-narrow character in a FileMap +#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)] +pub enum NonNarrowChar { + /// Represents a zero-width character + ZeroWidth(BytePos), + /// Represents a wide (fullwidth) character + Wide(BytePos), +} + +impl NonNarrowChar { + fn new(pos: BytePos, width: usize) -> Self { + match width { + 0 => NonNarrowChar::ZeroWidth(pos), + 2 => NonNarrowChar::Wide(pos), + _ => panic!("width {} given for non-narrow character", width), + } + } + + /// Returns the absolute offset of the character in the CodeMap + pub fn pos(&self) -> BytePos { + match *self { + NonNarrowChar::ZeroWidth(p) | + NonNarrowChar::Wide(p) => p, + } + } + + /// Returns the width of the character, 0 (zero-width) or 2 (wide) + pub fn width(&self) -> usize { + match *self { + NonNarrowChar::ZeroWidth(_) => 0, + NonNarrowChar::Wide(_) => 2, + } + } +} + +impl Add for NonNarrowChar { + type Output = Self; + + fn add(self, rhs: BytePos) -> Self { + match self { + NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs), + NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs), + } + } +} + +impl Sub for NonNarrowChar { + type Output = Self; + + fn sub(self, rhs: BytePos) -> Self { + match self { + NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs), + NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs), + } + } +} + /// The state of the lazy external source loading mechanism of a FileMap. #[derive(PartialEq, Eq, Clone)] pub enum ExternalSource { @@ -552,11 +611,13 @@ pub struct FileMap { pub lines: RefCell>, /// Locations of multi-byte characters in the source code pub multibyte_chars: RefCell>, + /// Width of characters that are not narrow in the source code + pub non_narrow_chars: RefCell>, } impl Encodable for FileMap { fn encode(&self, s: &mut S) -> Result<(), S::Error> { - s.emit_struct("FileMap", 7, |s| { + s.emit_struct("FileMap", 8, |s| { s.emit_struct_field("name", 0, |s| self.name.encode(s))?; s.emit_struct_field("name_was_remapped", 1, |s| self.name_was_remapped.encode(s))?; s.emit_struct_field("src_hash", 6, |s| self.src_hash.encode(s))?; @@ -610,6 +671,9 @@ impl Encodable for FileMap { })?; s.emit_struct_field("multibyte_chars", 5, |s| { (*self.multibyte_chars.borrow()).encode(s) + })?; + s.emit_struct_field("non_narrow_chars", 7, |s| { + (*self.non_narrow_chars.borrow()).encode(s) }) }) } @@ -618,7 +682,7 @@ impl Encodable for FileMap { impl Decodable for FileMap { fn decode(d: &mut D) -> Result { - d.read_struct("FileMap", 6, |d| { + d.read_struct("FileMap", 8, |d| { let name: String = d.read_struct_field("name", 0, |d| Decodable::decode(d))?; let name_was_remapped: bool = d.read_struct_field("name_was_remapped", 1, |d| Decodable::decode(d))?; @@ -657,6 +721,8 @@ impl Decodable for FileMap { })?; let multibyte_chars: Vec = d.read_struct_field("multibyte_chars", 5, |d| Decodable::decode(d))?; + let non_narrow_chars: Vec = + d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?; Ok(FileMap { name, name_was_remapped, @@ -671,7 +737,8 @@ impl Decodable for FileMap { src_hash, external_src: RefCell::new(ExternalSource::AbsentOk), lines: RefCell::new(lines), - multibyte_chars: RefCell::new(multibyte_chars) + multibyte_chars: RefCell::new(multibyte_chars), + non_narrow_chars: RefCell::new(non_narrow_chars) }) }) } @@ -709,6 +776,7 @@ impl FileMap { end_pos: Pos::from_usize(end_pos), lines: RefCell::new(Vec::new()), multibyte_chars: RefCell::new(Vec::new()), + non_narrow_chars: RefCell::new(Vec::new()), } } @@ -798,6 +866,23 @@ impl FileMap { self.multibyte_chars.borrow_mut().push(mbc); } + pub fn record_width(&self, pos: BytePos, ch: char) { + let width = match ch { + '\t' | '\n' => + // Tabs will consume one column. + // Make newlines take one column so that displayed spans can point them. + 1, + ch => + // Assume control characters are zero width. + // FIXME: How can we decide between `width` and `width_cjk`? + unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0), + }; + // Only record non-narrow characters. + if width != 1 { + self.non_narrow_chars.borrow_mut().push(NonNarrowChar::new(pos, width)); + } + } + pub fn is_real_file(&self) -> bool { !(self.name.starts_with("<") && self.name.ends_with(">")) @@ -944,7 +1029,9 @@ pub struct Loc { /// The (1-based) line number pub line: usize, /// The (0-based) column offset - pub col: CharPos + pub col: CharPos, + /// The (0-based) column offset when displayed + pub col_display: usize, } /// A source code location used as the result of lookup_char_pos_adj diff --git a/src/test/ui/codemap_tests/unicode.stderr b/src/test/ui/codemap_tests/unicode.stderr index 0828fd28b58..02a9d7ee0ef 100644 --- a/src/test/ui/codemap_tests/unicode.stderr +++ b/src/test/ui/codemap_tests/unicode.stderr @@ -2,7 +2,7 @@ error: invalid ABI: expected one of [cdecl, stdcall, fastcall, vectorcall, thisc --> $DIR/unicode.rs:11:8 | 11 | extern "路濫狼á́́" fn foo() {} - | ^^^^^^^^ + | ^^^^^^^^^ error: aborting due to previous error diff --git a/src/test/ui/codemap_tests/unicode_2.rs b/src/test/ui/codemap_tests/unicode_2.rs new file mode 100644 index 00000000000..cc3eae90f90 --- /dev/null +++ b/src/test/ui/codemap_tests/unicode_2.rs @@ -0,0 +1,17 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(non_ascii_idents)] + +fn main() { + let _ = ("a̐éö̲", 0u7); + let _ = ("아あ", 1i42); + let _ = a̐é; +} diff --git a/src/test/ui/codemap_tests/unicode_2.stderr b/src/test/ui/codemap_tests/unicode_2.stderr new file mode 100644 index 00000000000..6cfa66730a2 --- /dev/null +++ b/src/test/ui/codemap_tests/unicode_2.stderr @@ -0,0 +1,24 @@ +error: invalid width `7` for integer literal + --> $DIR/unicode_2.rs:14:25 + | +14 | let _ = ("a̐éö̲", 0u7); + | ^^^ + | + = help: valid widths are 8, 16, 32, 64 and 128 + +error: invalid width `42` for integer literal + --> $DIR/unicode_2.rs:15:20 + | +15 | let _ = ("아あ", 1i42); + | ^^^^ + | + = help: valid widths are 8, 16, 32, 64 and 128 + +error[E0425]: cannot find value `a̐é` in this scope + --> $DIR/unicode_2.rs:16:13 + | +16 | let _ = a̐é; + | ^^ not found in this scope + +error: aborting due to 3 previous errors + diff --git a/src/test/ui/codemap_tests/unicode_3.rs b/src/test/ui/codemap_tests/unicode_3.rs new file mode 100644 index 00000000000..5294eedb845 --- /dev/null +++ b/src/test/ui/codemap_tests/unicode_3.rs @@ -0,0 +1,14 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + let s = "ZͨA͑ͦ͒͋ͤ͑̚L̄͑͋Ĝͨͥ̿͒̽̈́Oͥ͛ͭ!̏"; while true { break; } + println!("{}", s); +} diff --git a/src/test/ui/codemap_tests/unicode_3.stderr b/src/test/ui/codemap_tests/unicode_3.stderr new file mode 100644 index 00000000000..a7514a6b792 --- /dev/null +++ b/src/test/ui/codemap_tests/unicode_3.stderr @@ -0,0 +1,10 @@ +warning: denote infinite loops with `loop { ... }` + --> $DIR/unicode_3.rs:12:45 + | +12 | let s = "ZͨA͑ͦ͒͋ͤ͑̚L̄͑͋Ĝͨͥ̿͒̽̈́Oͥ͛ͭ!̏"; while true { break; } + | ----------^^^^^^^^^^^ + | | + | help: use `loop` + | + = note: #[warn(while_true)] on by default + diff --git a/src/test/ui/issue-44078.stderr b/src/test/ui/issue-44078.stderr index 389f3b2479a..2ed4578d538 100644 --- a/src/test/ui/issue-44078.stderr +++ b/src/test/ui/issue-44078.stderr @@ -2,7 +2,7 @@ error: unterminated double quote string --> $DIR/issue-44078.rs:12:8 | 12 | "😊""; - | ________^ + | _________^ 13 | | } | |__^