lexer: show correct span on lexical errors

Previously, the lexer calling `rdr.fatal(...)` would report the span of the last complete token, instead of a span within the erroneous token (besides one span fixed in 1ac90bb). This commit adds a wrapper around `rdr.fatal(...)` that sets the span explicilty, so that all fatal errors in `libsyntax/parse/lexer.rs` now report the offending code more precisely. A number of tests try to verify that, though the `compile-fail` testing setup can only check that the spans are on the right lines, and the "unterminated string/block comment" errors can't have the line marked at all, so that's incomplete. Closes #9149.
2024-11-23 23:34:48 +00:00 · 2013-09-19 16:11:23 +02:00 · 2013-09-19 16:11:23 +02:00 · 1019177958
commit 1019177958
parent d2b0b11aeb
13 changed files with 215 additions and 19 deletions
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@ -149,6 +149,16 @@ impl reader for TtReader {
    fn dup(@mut self) -> @mut reader { dup_tt_reader(self) as @mut reader }
 }

+// report a lexical error spanning [`from_pos`, `to_pos`)
+fn fatal_span(rdr: @mut StringReader,
+              from_pos: BytePos,
+              to_pos: BytePos,
+              m: ~str)
+           -> ! {
+    rdr.peek_span = codemap::mk_sp(from_pos, to_pos);
+    rdr.fatal(m);
+}
+
 // EFFECT: advance peek_tok and peek_span to refer to the next token.
 // EFFECT: update the interner, maybe.
 fn string_advance_token(r: @mut StringReader) {
@ -327,7 +337,8 @@ fn consume_block_comment(rdr: @mut StringReader)
            bump(rdr);
        }
        if is_eof(rdr) {
-            rdr.fatal(~"unterminated block doc-comment");
+            fatal_span(rdr, start_bpos, rdr.last_pos,
+                       ~"unterminated block doc-comment");
        } else {
            bump(rdr);
            bump(rdr);
@ -344,8 +355,12 @@ fn consume_block_comment(rdr: @mut StringReader)
            }
        }
    } else {
+        let start_bpos = rdr.last_pos - BytePos(2u);
        loop {
-            if is_eof(rdr) { rdr.fatal(~"unterminated block comment"); }
+            if is_eof(rdr) {
+                fatal_span(rdr, start_bpos, rdr.last_pos,
+                           ~"unterminated block comment");
+            }
            if rdr.curr == '*' && nextch(rdr) == '/' {
                bump(rdr);
                bump(rdr);
@ -362,6 +377,7 @@ fn consume_block_comment(rdr: @mut StringReader)
 }

 fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
+    let start_bpos = rdr.last_pos;
    let mut c = rdr.curr;
    let mut rslt = ~"";
    if c == 'e' || c == 'E' {
@ -375,7 +391,10 @@ fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
        let exponent = scan_digits(rdr, 10u);
        if exponent.len() > 0u {
            return Some(rslt + exponent);
-        } else { rdr.fatal(~"scan_exponent: bad fp literal"); }
+        } else {
+            fatal_span(rdr, start_bpos, rdr.last_pos,
+                       ~"scan_exponent: bad fp literal");
+        }
    } else { return None::<~str>; }
 }

@ -399,6 +418,7 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
    let mut base = 10u;
    let mut c = c;
    let mut n = nextch(rdr);
+    let start_bpos = rdr.last_pos;
    if c == '0' && n == 'x' {
        bump(rdr);
        bump(rdr);
@ -442,11 +462,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
                      else { either::Right(ast::ty_u64) };
        }
        if num_str.len() == 0u {
-            rdr.fatal(~"no valid digits found for number");
+            fatal_span(rdr, start_bpos, rdr.last_pos,
+                       ~"no valid digits found for number");
        }
        let parsed = match from_str_radix::<u64>(num_str, base as uint) {
            Some(p) => p,
-            None => rdr.fatal(~"int literal is too large")
+            None => fatal_span(rdr, start_bpos, rdr.last_pos,
+                               ~"int literal is too large")
        };

        match tp {
@ -464,8 +486,10 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
    }
    if is_float {
        match base {
-          16u => rdr.fatal(~"hexadecimal float literal is not supported"),
-          2u => rdr.fatal(~"binary float literal is not supported"),
+          16u => fatal_span(rdr, start_bpos, rdr.last_pos,
+                            ~"hexadecimal float literal is not supported"),
+          2u => fatal_span(rdr, start_bpos, rdr.last_pos,
+                           ~"binary float literal is not supported"),
          _ => ()
        }
    }
@ -507,11 +531,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
        return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str));
    } else {
        if num_str.len() == 0u {
-            rdr.fatal(~"no valid digits found for number");
+            fatal_span(rdr, start_bpos, rdr.last_pos,
+                       ~"no valid digits found for number");
        }
        let parsed = match from_str_radix::<u64>(num_str, base as uint) {
            Some(p) => p,
-            None => rdr.fatal(~"int literal is too large")
+            None => fatal_span(rdr, start_bpos, rdr.last_pos,
+                               ~"int literal is too large")
        };

        debug!("lexing %s as an unsuffixed integer literal",
@ -523,19 +549,23 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
 fn scan_numeric_escape(rdr: @mut StringReader, n_hex_digits: uint) -> char {
    let mut accum_int = 0;
    let mut i = n_hex_digits;
+    let start_bpos = rdr.last_pos;
    while i != 0u {
        let n = rdr.curr;
-        bump(rdr);
        if !is_hex_digit(n) {
-            rdr.fatal(fmt!("illegal numeric character escape: %d", n as int));
+            fatal_span(rdr, rdr.last_pos, rdr.pos,
+                       fmt!("illegal numeric character escape: %d",
+                            n as int));
        }
+        bump(rdr);
        accum_int *= 16;
        accum_int += hex_digit_val(n);
        i -= 1u;
    }
    match char::from_u32(accum_int as u32) {
        Some(x) => x,
-        None => rdr.fatal(fmt!("illegal numeric character escape"))
+        None => fatal_span(rdr, start_bpos, rdr.last_pos,
+                           fmt!("illegal numeric character escape"))
    }
 }

@ -691,6 +721,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
        if c2 == '\\' {
            // '\X' for some X must be a character constant:
            let escaped = rdr.curr;
+            let escaped_pos = rdr.last_pos;
            bump(rdr);
            match escaped {
              'n' => { c2 = '\n'; }
@ -704,12 +735,18 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
              'u' => { c2 = scan_numeric_escape(rdr, 4u); }
              'U' => { c2 = scan_numeric_escape(rdr, 8u); }
              c2 => {
-                rdr.fatal(fmt!("unknown character escape: %d", c2 as int));
+                fatal_span(rdr, escaped_pos, rdr.last_pos,
+                          fmt!("unknown character escape: %d", c2 as int));
              }
            }
        }
        if rdr.curr != '\'' {
-            rdr.fatal(~"unterminated character constant");
+            fatal_span(rdr,
+                       // Byte offsetting here is okay because the character
+                       // before position `start` is an ascii single quote.
+                       start - BytePos(1u),
+                       rdr.last_pos,
+                       ~"unterminated character constant");
        }
        bump(rdr); // advance curr past token
        return token::LIT_CHAR(c2 as u32);
@ -721,7 +758,9 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
        while rdr.curr != '"' {
            if is_eof(rdr) {
                do with_str_from(rdr, n) |s| {
-                    rdr.fatal(fmt!("unterminated double quote string: %s", s));
+                    fatal_span(rdr, n, rdr.last_pos,
+                               fmt!("unterminated double quote string: %s",
+                                    s));
                }
            }

@ -730,6 +769,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
            match ch {
              '\\' => {
                let escaped = rdr.curr;
+                let escaped_pos = rdr.last_pos;
                bump(rdr);
                match escaped {
                  'n' => accum_str.push_char('\n'),
@ -750,7 +790,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
                    accum_str.push_char(scan_numeric_escape(rdr, 8u));
                  }
                  c2 => {
-                    rdr.fatal(fmt!("unknown string escape: %d", c2 as int));
+                    fatal_span(rdr, escaped_pos, rdr.last_pos,
+                               fmt!("unknown string escape: %d", c2 as int));
                  }
                }
              }
@ -786,11 +827,10 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
      '^' => { return binop(rdr, token::CARET); }
      '%' => { return binop(rdr, token::PERCENT); }
      c => {
-          // So the error span points to the unrecognized character
-          rdr.peek_span = codemap::mk_sp(rdr.last_pos, rdr.pos);
          let mut cs = ~"";
          char::escape_default(c, |c| cs.push_char(c));
-          rdr.fatal(fmt!("unknown start of token: %s", cs));
+          fatal_span(rdr, rdr.last_pos, rdr.pos,
+                     fmt!("unknown start of token: %s", cs));
      }
    }
 }
--- a/src/test/compile-fail/lex-bad-fp-lit.rs
+++ b/src/test/compile-fail/lex-bad-fp-lit.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static f: float =
+    1e+ //~ ERROR: scan_exponent: bad fp literal
+;
--- a/src/test/compile-fail/lex-hex-float-lit.rs
+++ b/src/test/compile-fail/lex-hex-float-lit.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static f: float =
+    0x539.0 //~ ERROR: hexadecimal float literal is not supported
+;
--- a/src/test/compile-fail/lex-illegal-num-char-escape-2.rs
+++ b/src/test/compile-fail/lex-illegal-num-char-escape-2.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static c: char =
+    '\Uffffffff' //~ ERROR: illegal numeric character escape
+;
--- a/src/test/compile-fail/lex-illegal-num-char-escape.rs
+++ b/src/test/compile-fail/lex-illegal-num-char-escape.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static c: char =
+    '\u539_' //~ ERROR: illegal numeric character escape
+;
--- a/src/test/compile-fail/lex-int-lit-too-large-2.rs
+++ b/src/test/compile-fail/lex-int-lit-too-large-2.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static i: int =
+    99999999999999999999999999999999u32 //~ ERROR: int literal is too large
+;
--- a/src/test/compile-fail/lex-int-lit-too-large.rs
+++ b/src/test/compile-fail/lex-int-lit-too-large.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static i: int =
+    99999999999999999999999999999999 //~ ERROR: int literal is too large
+;
--- a/src/test/compile-fail/lex-no-valid-digits-2.rs
+++ b/src/test/compile-fail/lex-no-valid-digits-2.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static i: int =
+    0xu32 //~ ERROR: no valid digits
+;
--- a/src/test/compile-fail/lex-no-valid-digits.rs
+++ b/src/test/compile-fail/lex-no-valid-digits.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static i: int =
+    0x //~ ERROR: no valid digits
+;
--- a/src/test/compile-fail/lex-unknown-char-escape.rs
+++ b/src/test/compile-fail/lex-unknown-char-escape.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static c: char =
+    '\●' //~ ERROR: unknown character escape
+;
--- a/src/test/compile-fail/lex-unknown-start-tok.rs
+++ b/src/test/compile-fail/lex-unknown-start-tok.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+fn main() {
+    ● //~ ERROR: unknown start of token
+}
--- a/src/test/compile-fail/lex-unknown-str-escape.rs
+++ b/src/test/compile-fail/lex-unknown-str-escape.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static s: &'static str =
+    "\●" //~ ERROR: unknown string escape
+;
--- a/src/test/compile-fail/lex-unterminated-char-const.rs
+++ b/src/test/compile-fail/lex-unterminated-char-const.rs
@ -0,0 +1,13 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+static c: char =
+    '●  //~ ERROR: unterminated character constant
+;