mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-26 08:44:35 +00:00
syntax: calculate positions of multibyte characters more correctly.
They are still are not completely correct, since it does not handle graphemes at all, just codepoints, but at least it handles the common case correctly. The calculation was previously very wrong (rather than just a little bit wrong): it wasn't accounting for the fact that every character is 1 byte, and so multibyte characters were pretending to be zero width. cc #8706
This commit is contained in:
parent
ff79a4471c
commit
8812e8ad49
@ -460,11 +460,12 @@ impl CodeMap {
|
||||
for mbc in multibyte_chars.get().iter() {
|
||||
debug!("codemap: {:?}-byte char at {:?}", mbc.bytes, mbc.pos);
|
||||
if mbc.pos < bpos {
|
||||
total_extra_bytes += mbc.bytes;
|
||||
// every character is at least one byte, so we only
|
||||
// count the actual extra bytes.
|
||||
total_extra_bytes += mbc.bytes - 1;
|
||||
// We should never see a byte position in the middle of a
|
||||
// character
|
||||
assert!(bpos == mbc.pos ||
|
||||
bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
|
||||
assert!(bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
@ -4,3 +4,8 @@ all:
|
||||
# check that we don't ICE on unicode input, issue #11178
|
||||
$(RUSTC) multiple_files.rs
|
||||
$(call RUN,multiple_files) "$(RUSTC)" "$(TMPDIR)"
|
||||
|
||||
# check that our multibyte-ident spans are (approximately) the
|
||||
# correct length. issue #8706
|
||||
$(RUSTC) span_length.rs
|
||||
$(call RUN,span_length) "$(RUSTC)" "$(TMPDIR)"
|
||||
|
@ -1,3 +1,13 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use std::{char, os, run, str};
|
||||
use std::rand::{task_rng, Rng};
|
||||
use std::io::File;
|
||||
@ -36,7 +46,8 @@ fn main() {
|
||||
|
||||
for _ in range(0, 100) {
|
||||
{
|
||||
let mut w = File::create(&tmpdir.join("unicode_input_multiple_files_chars.rs")).unwrap();
|
||||
let randoms = tmpdir.join("unicode_input_multiple_files_chars.rs");
|
||||
let mut w = File::create(&randoms).unwrap();
|
||||
for _ in range(0, 30) {
|
||||
let _ = w.write_char(random_char());
|
||||
}
|
||||
|
62
src/test/run-make/unicode-input/span_length.rs
Normal file
62
src/test/run-make/unicode-input/span_length.rs
Normal file
@ -0,0 +1,62 @@
|
||||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use std::{char, os, run, str};
|
||||
use std::rand::{task_rng, Rng};
|
||||
use std::io::File;
|
||||
|
||||
// creates a file with `fn main() { <random ident> }` and checks the
|
||||
// compiler emits a span of the appropriate length (for the
|
||||
// "unresolved name" message); currently just using the number of code
|
||||
// points, but should be the number of graphemes (FIXME #7043)
|
||||
|
||||
fn random_char() -> char {
|
||||
let mut rng = task_rng();
|
||||
// a subset of the XID_start unicode table (ensuring that the
|
||||
// compiler doesn't fail with an "unrecognised token" error)
|
||||
let (lo, hi): (u32, u32) = match rng.gen_range(1, 4 + 1) {
|
||||
1 => (0x41, 0x5a),
|
||||
2 => (0xf8, 0x1ba),
|
||||
3 => (0x1401, 0x166c),
|
||||
_ => (0x10400, 0x1044f)
|
||||
};
|
||||
|
||||
char::from_u32(rng.gen_range(lo, hi + 1)).unwrap()
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args = os::args();
|
||||
let rustc = args[1].as_slice();
|
||||
let tmpdir = Path::new(args[2].as_slice());
|
||||
|
||||
let main_file = tmpdir.join("span_main.rs");
|
||||
let main_file_str = main_file.as_str().unwrap();
|
||||
|
||||
for _ in range(0, 100) {
|
||||
let n = task_rng().gen_range(3u, 20);
|
||||
|
||||
{
|
||||
let _ = write!(&mut File::create(&main_file).unwrap(),
|
||||
r"\#[feature(non_ascii_idents)]; fn main() \{ {} \}",
|
||||
// random string of length n
|
||||
range(0, n).map(|_| random_char()).collect::<~str>());
|
||||
}
|
||||
|
||||
// rustc is passed to us with --out-dir and -L etc., so we
|
||||
// can't exec it directly
|
||||
let result = run::process_output("sh", [~"-c", rustc + " " + main_file_str]).unwrap();
|
||||
|
||||
let err = str::from_utf8_lossy(result.error);
|
||||
|
||||
// the span should end the line (e.g no extra ~'s)
|
||||
let expected_span = "^" + "~".repeat(n - 1) + "\n";
|
||||
assert!(err.as_slice().contains(expected_span));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user