mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-27 09:14:20 +00:00
auto merge of #12086 : huonw/rust/safe-json, r=kballard
The lexer and json were using `transmute(-1): char` as a sentinel value for EOF, which is invalid since `char` is strictly a unicode codepoint. Fixing this allows for range asserts on chars since they always lie between 0 and 0x10FFFF.
This commit is contained in:
commit
95483e30a2
@ -229,7 +229,6 @@ fn main() {
|
||||
*/
|
||||
|
||||
use std::char;
|
||||
use std::cast::transmute;
|
||||
use std::f64;
|
||||
use std::hashmap::HashMap;
|
||||
use std::io;
|
||||
@ -718,7 +717,7 @@ impl Json {
|
||||
|
||||
pub struct Parser<T> {
|
||||
priv rdr: T,
|
||||
priv ch: char,
|
||||
priv ch: Option<char>,
|
||||
priv line: uint,
|
||||
priv col: uint,
|
||||
}
|
||||
@ -728,7 +727,7 @@ impl<T: Iterator<char>> Parser<T> {
|
||||
pub fn new(rdr: T) -> Parser<T> {
|
||||
let mut p = Parser {
|
||||
rdr: rdr,
|
||||
ch: '\x00',
|
||||
ch: Some('\x00'),
|
||||
line: 1,
|
||||
col: 0,
|
||||
};
|
||||
@ -756,16 +755,12 @@ impl<T: Iterator<char>> Parser<T> {
|
||||
}
|
||||
|
||||
impl<T : Iterator<char>> Parser<T> {
|
||||
// FIXME: #8971: unsound
|
||||
fn eof(&self) -> bool { self.ch == unsafe { transmute(-1u32) } }
|
||||
|
||||
fn eof(&self) -> bool { self.ch.is_none() }
|
||||
fn ch_or_null(&self) -> char { self.ch.unwrap_or('\x00') }
|
||||
fn bump(&mut self) {
|
||||
match self.rdr.next() {
|
||||
Some(ch) => self.ch = ch,
|
||||
None() => self.ch = unsafe { transmute(-1u32) }, // FIXME: #8971: unsound
|
||||
}
|
||||
self.ch = self.rdr.next();
|
||||
|
||||
if self.ch == '\n' {
|
||||
if self.ch_is('\n') {
|
||||
self.line += 1u;
|
||||
self.col = 1u;
|
||||
} else {
|
||||
@ -773,10 +768,13 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
}
|
||||
}
|
||||
|
||||
fn next_char(&mut self) -> char {
|
||||
fn next_char(&mut self) -> Option<char> {
|
||||
self.bump();
|
||||
self.ch
|
||||
}
|
||||
fn ch_is(&self, c: char) -> bool {
|
||||
self.ch == Some(c)
|
||||
}
|
||||
|
||||
fn error<T>(&self, msg: ~str) -> Result<T, Error> {
|
||||
Err(Error { line: self.line, col: self.col, msg: msg })
|
||||
@ -787,31 +785,32 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
|
||||
if self.eof() { return self.error(~"EOF while parsing value"); }
|
||||
|
||||
match self.ch {
|
||||
'n' => self.parse_ident("ull", Null),
|
||||
't' => self.parse_ident("rue", Boolean(true)),
|
||||
'f' => self.parse_ident("alse", Boolean(false)),
|
||||
'0' .. '9' | '-' => self.parse_number(),
|
||||
'"' =>
|
||||
match self.parse_str() {
|
||||
Ok(s) => Ok(String(s)),
|
||||
Err(e) => Err(e),
|
||||
match self.ch_or_null() {
|
||||
'n' => self.parse_ident("ull", Null),
|
||||
't' => self.parse_ident("rue", Boolean(true)),
|
||||
'f' => self.parse_ident("alse", Boolean(false)),
|
||||
'0' .. '9' | '-' => self.parse_number(),
|
||||
'"' => {
|
||||
match self.parse_str() {
|
||||
Ok(s) => Ok(String(s)),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
},
|
||||
'[' => self.parse_list(),
|
||||
'{' => self.parse_object(),
|
||||
_ => self.error(~"invalid syntax")
|
||||
'[' => self.parse_list(),
|
||||
'{' => self.parse_object(),
|
||||
_ => self.error(~"invalid syntax"),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_whitespace(&mut self) {
|
||||
while self.ch == ' ' ||
|
||||
self.ch == '\n' ||
|
||||
self.ch == '\t' ||
|
||||
self.ch == '\r' { self.bump(); }
|
||||
while self.ch_is(' ') ||
|
||||
self.ch_is('\n') ||
|
||||
self.ch_is('\t') ||
|
||||
self.ch_is('\r') { self.bump(); }
|
||||
}
|
||||
|
||||
fn parse_ident(&mut self, ident: &str, value: Json) -> Result<Json, Error> {
|
||||
if ident.chars().all(|c| c == self.next_char()) {
|
||||
if ident.chars().all(|c| Some(c) == self.next_char()) {
|
||||
self.bump();
|
||||
Ok(value)
|
||||
} else {
|
||||
@ -822,7 +821,7 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
fn parse_number(&mut self) -> Result<Json, Error> {
|
||||
let mut neg = 1.0;
|
||||
|
||||
if self.ch == '-' {
|
||||
if self.ch_is('-') {
|
||||
self.bump();
|
||||
neg = -1.0;
|
||||
}
|
||||
@ -832,14 +831,14 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
Err(e) => return Err(e)
|
||||
};
|
||||
|
||||
if self.ch == '.' {
|
||||
if self.ch_is('.') {
|
||||
match self.parse_decimal(res) {
|
||||
Ok(r) => res = r,
|
||||
Err(e) => return Err(e)
|
||||
}
|
||||
}
|
||||
|
||||
if self.ch == 'e' || self.ch == 'E' {
|
||||
if self.ch_is('e') || self.ch_is('E') {
|
||||
match self.parse_exponent(res) {
|
||||
Ok(r) => res = r,
|
||||
Err(e) => return Err(e)
|
||||
@ -852,32 +851,31 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
fn parse_integer(&mut self) -> Result<f64, Error> {
|
||||
let mut res = 0.0;
|
||||
|
||||
match self.ch {
|
||||
'0' => {
|
||||
self.bump();
|
||||
match self.ch_or_null() {
|
||||
'0' => {
|
||||
self.bump();
|
||||
|
||||
// There can be only one leading '0'.
|
||||
match self.ch {
|
||||
'0' .. '9' => return self.error(~"invalid number"),
|
||||
_ => ()
|
||||
}
|
||||
}
|
||||
'1' .. '9' => {
|
||||
while !self.eof() {
|
||||
match self.ch {
|
||||
'0' .. '9' => {
|
||||
res *= 10.0;
|
||||
res += ((self.ch as int) - ('0' as int)) as f64;
|
||||
// There can be only one leading '0'.
|
||||
match self.ch_or_null() {
|
||||
'0' .. '9' => return self.error(~"invalid number"),
|
||||
_ => ()
|
||||
}
|
||||
},
|
||||
'1' .. '9' => {
|
||||
while !self.eof() {
|
||||
match self.ch_or_null() {
|
||||
c @ '0' .. '9' => {
|
||||
res *= 10.0;
|
||||
res += ((c as int) - ('0' as int)) as f64;
|
||||
|
||||
self.bump();
|
||||
}
|
||||
_ => break
|
||||
self.bump();
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => return self.error(~"invalid number")
|
||||
_ => return self.error(~"invalid number"),
|
||||
}
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
@ -885,22 +883,22 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
self.bump();
|
||||
|
||||
// Make sure a digit follows the decimal place.
|
||||
match self.ch {
|
||||
'0' .. '9' => (),
|
||||
_ => return self.error(~"invalid number")
|
||||
match self.ch_or_null() {
|
||||
'0' .. '9' => (),
|
||||
_ => return self.error(~"invalid number")
|
||||
}
|
||||
|
||||
let mut res = res;
|
||||
let mut dec = 1.0;
|
||||
while !self.eof() {
|
||||
match self.ch {
|
||||
'0' .. '9' => {
|
||||
dec /= 10.0;
|
||||
res += (((self.ch as int) - ('0' as int)) as f64) * dec;
|
||||
match self.ch_or_null() {
|
||||
c @ '0' .. '9' => {
|
||||
dec /= 10.0;
|
||||
res += (((c as int) - ('0' as int)) as f64) * dec;
|
||||
|
||||
self.bump();
|
||||
}
|
||||
_ => break
|
||||
self.bump();
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
@ -913,27 +911,27 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
let mut exp = 0u;
|
||||
let mut neg_exp = false;
|
||||
|
||||
match self.ch {
|
||||
'+' => self.bump(),
|
||||
'-' => { self.bump(); neg_exp = true; }
|
||||
_ => ()
|
||||
if self.ch_is('+') {
|
||||
self.bump();
|
||||
} else if self.ch_is('-') {
|
||||
self.bump();
|
||||
neg_exp = true;
|
||||
}
|
||||
|
||||
// Make sure a digit follows the exponent place.
|
||||
match self.ch {
|
||||
'0' .. '9' => (),
|
||||
_ => return self.error(~"invalid number")
|
||||
match self.ch_or_null() {
|
||||
'0' .. '9' => (),
|
||||
_ => return self.error(~"invalid number")
|
||||
}
|
||||
|
||||
while !self.eof() {
|
||||
match self.ch {
|
||||
'0' .. '9' => {
|
||||
exp *= 10u;
|
||||
exp += (self.ch as uint) - ('0' as uint);
|
||||
match self.ch_or_null() {
|
||||
c @ '0' .. '9' => {
|
||||
exp *= 10;
|
||||
exp += (c as uint) - ('0' as uint);
|
||||
|
||||
self.bump();
|
||||
}
|
||||
_ => break
|
||||
self.bump();
|
||||
}
|
||||
_ => break
|
||||
}
|
||||
}
|
||||
|
||||
@ -958,56 +956,55 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
}
|
||||
|
||||
if escape {
|
||||
match self.ch {
|
||||
'"' => res.push_char('"'),
|
||||
'\\' => res.push_char('\\'),
|
||||
'/' => res.push_char('/'),
|
||||
'b' => res.push_char('\x08'),
|
||||
'f' => res.push_char('\x0c'),
|
||||
'n' => res.push_char('\n'),
|
||||
'r' => res.push_char('\r'),
|
||||
't' => res.push_char('\t'),
|
||||
'u' => {
|
||||
// Parse \u1234.
|
||||
let mut i = 0u;
|
||||
let mut n = 0u;
|
||||
while i < 4u {
|
||||
match self.next_char() {
|
||||
'0' .. '9' => {
|
||||
n = n * 16u + (self.ch as uint)
|
||||
- ('0' as uint);
|
||||
},
|
||||
'a' | 'A' => n = n * 16u + 10u,
|
||||
'b' | 'B' => n = n * 16u + 11u,
|
||||
'c' | 'C' => n = n * 16u + 12u,
|
||||
'd' | 'D' => n = n * 16u + 13u,
|
||||
'e' | 'E' => n = n * 16u + 14u,
|
||||
'f' | 'F' => n = n * 16u + 15u,
|
||||
_ => return self.error(
|
||||
~"invalid \\u escape (unrecognized hex)")
|
||||
}
|
||||
i += 1u;
|
||||
}
|
||||
match self.ch_or_null() {
|
||||
'"' => res.push_char('"'),
|
||||
'\\' => res.push_char('\\'),
|
||||
'/' => res.push_char('/'),
|
||||
'b' => res.push_char('\x08'),
|
||||
'f' => res.push_char('\x0c'),
|
||||
'n' => res.push_char('\n'),
|
||||
'r' => res.push_char('\r'),
|
||||
't' => res.push_char('\t'),
|
||||
'u' => {
|
||||
// Parse \u1234.
|
||||
let mut i = 0u;
|
||||
let mut n = 0u;
|
||||
while i < 4u && !self.eof() {
|
||||
self.bump();
|
||||
n = match self.ch_or_null() {
|
||||
c @ '0' .. '9' => n * 16u + (c as uint) - ('0' as uint),
|
||||
'a' | 'A' => n * 16u + 10u,
|
||||
'b' | 'B' => n * 16u + 11u,
|
||||
'c' | 'C' => n * 16u + 12u,
|
||||
'd' | 'D' => n * 16u + 13u,
|
||||
'e' | 'E' => n * 16u + 14u,
|
||||
'f' | 'F' => n * 16u + 15u,
|
||||
_ => return self.error(
|
||||
~"invalid \\u escape (unrecognized hex)")
|
||||
};
|
||||
|
||||
// Error out if we didn't parse 4 digits.
|
||||
if i != 4u {
|
||||
return self.error(
|
||||
~"invalid \\u escape (not four digits)");
|
||||
}
|
||||
i += 1u;
|
||||
}
|
||||
|
||||
res.push_char(char::from_u32(n as u32).unwrap());
|
||||
}
|
||||
_ => return self.error(~"invalid escape")
|
||||
// Error out if we didn't parse 4 digits.
|
||||
if i != 4u {
|
||||
return self.error(
|
||||
~"invalid \\u escape (not four digits)");
|
||||
}
|
||||
|
||||
res.push_char(char::from_u32(n as u32).unwrap());
|
||||
}
|
||||
_ => return self.error(~"invalid escape"),
|
||||
}
|
||||
escape = false;
|
||||
} else if self.ch == '\\' {
|
||||
} else if self.ch_is('\\') {
|
||||
escape = true;
|
||||
} else {
|
||||
if self.ch == '"' {
|
||||
self.bump();
|
||||
return Ok(res);
|
||||
match self.ch {
|
||||
Some('"') => { self.bump(); return Ok(res); },
|
||||
Some(c) => res.push_char(c),
|
||||
None => unreachable!()
|
||||
}
|
||||
res.push_char(self.ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1018,7 +1015,7 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
|
||||
let mut values = ~[];
|
||||
|
||||
if self.ch == ']' {
|
||||
if self.ch_is(']') {
|
||||
self.bump();
|
||||
return Ok(List(values));
|
||||
}
|
||||
@ -1034,10 +1031,13 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
return self.error(~"EOF while parsing list");
|
||||
}
|
||||
|
||||
match self.ch {
|
||||
',' => self.bump(),
|
||||
']' => { self.bump(); return Ok(List(values)); }
|
||||
_ => return self.error(~"expected `,` or `]`")
|
||||
if self.ch_is(',') {
|
||||
self.bump();
|
||||
} else if self.ch_is(']') {
|
||||
self.bump();
|
||||
return Ok(List(values));
|
||||
} else {
|
||||
return self.error(~"expected `,` or `]`")
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -1048,7 +1048,7 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
|
||||
let mut values = ~TreeMap::new();
|
||||
|
||||
if self.ch == '}' {
|
||||
if self.ch_is('}') {
|
||||
self.bump();
|
||||
return Ok(Object(values));
|
||||
}
|
||||
@ -1056,7 +1056,7 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
while !self.eof() {
|
||||
self.parse_whitespace();
|
||||
|
||||
if self.ch != '"' {
|
||||
if !self.ch_is('"') {
|
||||
return self.error(~"key must be a string");
|
||||
}
|
||||
|
||||
@ -1067,7 +1067,7 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
|
||||
self.parse_whitespace();
|
||||
|
||||
if self.ch != ':' {
|
||||
if !self.ch_is(':') {
|
||||
if self.eof() { break; }
|
||||
return self.error(~"expected `:`");
|
||||
}
|
||||
@ -1079,13 +1079,13 @@ impl<T : Iterator<char>> Parser<T> {
|
||||
}
|
||||
self.parse_whitespace();
|
||||
|
||||
match self.ch {
|
||||
',' => self.bump(),
|
||||
'}' => { self.bump(); return Ok(Object(values)); }
|
||||
_ => {
|
||||
if self.eof() { break; }
|
||||
return self.error(~"expected `,` or `}`");
|
||||
}
|
||||
match self.ch_or_null() {
|
||||
',' => self.bump(),
|
||||
'}' => { self.bump(); return Ok(Object(values)); },
|
||||
_ => {
|
||||
if self.eof() { break; }
|
||||
return self.error(~"expected `,` or `}`");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -545,7 +545,11 @@ fn load<'a>(bcx: &'a Block<'a>, llptr: ValueRef, ty: ty::t) -> ValueRef {
|
||||
if type_is_zero_size(bcx.ccx(), ty) {
|
||||
C_undef(type_of::type_of(bcx.ccx(), ty))
|
||||
} else if ty::type_is_bool(ty) {
|
||||
LoadRangeAssert(bcx, llptr, 0, 2, lib::llvm::True)
|
||||
LoadRangeAssert(bcx, llptr, 0, 2, lib::llvm::False)
|
||||
} else if ty::type_is_char(ty) {
|
||||
// a char is a unicode codepoint, and so takes values from 0
|
||||
// to 0x10FFFF inclusive only.
|
||||
LoadRangeAssert(bcx, llptr, 0, 0x10FFFF + 1, lib::llvm::False)
|
||||
} else {
|
||||
Load(bcx, llptr)
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ use ast;
|
||||
use codemap::{BytePos, CharPos, CodeMap, Pos};
|
||||
use diagnostic;
|
||||
use parse::lexer::{is_whitespace, with_str_from, Reader};
|
||||
use parse::lexer::{StringReader, bump, is_eof, nextch, TokenAndSpan};
|
||||
use parse::lexer::{StringReader, bump, is_eof, nextch_is, TokenAndSpan};
|
||||
use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
|
||||
use parse::lexer;
|
||||
use parse::token;
|
||||
@ -136,11 +136,11 @@ pub fn strip_doc_comment_decoration(comment: &str) -> ~str {
|
||||
|
||||
fn read_to_eol(rdr: &StringReader) -> ~str {
|
||||
let mut val = ~"";
|
||||
while rdr.curr.get() != '\n' && !is_eof(rdr) {
|
||||
val.push_char(rdr.curr.get());
|
||||
while !rdr.curr_is('\n') && !is_eof(rdr) {
|
||||
val.push_char(rdr.curr.get().unwrap());
|
||||
bump(rdr);
|
||||
}
|
||||
if rdr.curr.get() == '\n' { bump(rdr); }
|
||||
if rdr.curr_is('\n') { bump(rdr); }
|
||||
return val;
|
||||
}
|
||||
|
||||
@ -152,7 +152,7 @@ fn read_one_line_comment(rdr: &StringReader) -> ~str {
|
||||
}
|
||||
|
||||
fn consume_non_eol_whitespace(rdr: &StringReader) {
|
||||
while is_whitespace(rdr.curr.get()) && rdr.curr.get() != '\n' &&
|
||||
while is_whitespace(rdr.curr.get()) && !rdr.curr_is('\n') &&
|
||||
!is_eof(rdr) {
|
||||
bump(rdr);
|
||||
}
|
||||
@ -171,7 +171,7 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut ~[Comment]) {
|
||||
fn consume_whitespace_counting_blank_lines(rdr: &StringReader,
|
||||
comments: &mut ~[Comment]) {
|
||||
while is_whitespace(rdr.curr.get()) && !is_eof(rdr) {
|
||||
if rdr.col.get() == CharPos(0u) && rdr.curr.get() == '\n' {
|
||||
if rdr.col.get() == CharPos(0u) && rdr.curr_is('\n') {
|
||||
push_blank_line_comment(rdr, &mut *comments);
|
||||
}
|
||||
bump(rdr);
|
||||
@ -196,7 +196,7 @@ fn read_line_comments(rdr: &StringReader, code_to_the_left: bool,
|
||||
debug!(">>> line comments");
|
||||
let p = rdr.last_pos.get();
|
||||
let mut lines: ~[~str] = ~[];
|
||||
while rdr.curr.get() == '/' && nextch(rdr) == '/' {
|
||||
while rdr.curr_is('/') && nextch_is(rdr, '/') {
|
||||
let line = read_one_line_comment(rdr);
|
||||
debug!("{}", line);
|
||||
if is_doc_comment(line) { // doc-comments are not put in comments
|
||||
@ -261,9 +261,9 @@ fn read_block_comment(rdr: &StringReader,
|
||||
let mut curr_line = ~"/*";
|
||||
|
||||
// doc-comments are not really comments, they are attributes
|
||||
if rdr.curr.get() == '*' || rdr.curr.get() == '!' {
|
||||
while !(rdr.curr.get() == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
|
||||
curr_line.push_char(rdr.curr.get());
|
||||
if rdr.curr_is('*') || rdr.curr_is('!') {
|
||||
while !(rdr.curr_is('*') && nextch_is(rdr, '/')) && !is_eof(rdr) {
|
||||
curr_line.push_char(rdr.curr.get().unwrap());
|
||||
bump(rdr);
|
||||
}
|
||||
if !is_eof(rdr) {
|
||||
@ -281,20 +281,20 @@ fn read_block_comment(rdr: &StringReader,
|
||||
if is_eof(rdr) {
|
||||
rdr.fatal(~"unterminated block comment");
|
||||
}
|
||||
if rdr.curr.get() == '\n' {
|
||||
if rdr.curr_is('\n') {
|
||||
trim_whitespace_prefix_and_push_line(&mut lines, curr_line,
|
||||
col);
|
||||
curr_line = ~"";
|
||||
bump(rdr);
|
||||
} else {
|
||||
curr_line.push_char(rdr.curr.get());
|
||||
if rdr.curr.get() == '/' && nextch(rdr) == '*' {
|
||||
curr_line.push_char(rdr.curr.get().unwrap());
|
||||
if rdr.curr_is('/') && nextch_is(rdr, '*') {
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
curr_line.push_char('*');
|
||||
level += 1;
|
||||
} else {
|
||||
if rdr.curr.get() == '*' && nextch(rdr) == '/' {
|
||||
if rdr.curr_is('*') && nextch_is(rdr, '/') {
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
curr_line.push_char('/');
|
||||
@ -310,7 +310,7 @@ fn read_block_comment(rdr: &StringReader,
|
||||
|
||||
let mut style = if code_to_the_left { Trailing } else { Isolated };
|
||||
consume_non_eol_whitespace(rdr);
|
||||
if !is_eof(rdr) && rdr.curr.get() != '\n' && lines.len() == 1u {
|
||||
if !is_eof(rdr) && !rdr.curr_is('\n') && lines.len() == 1u {
|
||||
style = Mixed;
|
||||
}
|
||||
debug!("<<< block comment");
|
||||
@ -318,20 +318,20 @@ fn read_block_comment(rdr: &StringReader,
|
||||
}
|
||||
|
||||
fn peeking_at_comment(rdr: &StringReader) -> bool {
|
||||
return ((rdr.curr.get() == '/' && nextch(rdr) == '/') ||
|
||||
(rdr.curr.get() == '/' && nextch(rdr) == '*')) ||
|
||||
(rdr.curr.get() == '#' && nextch(rdr) == '!');
|
||||
return (rdr.curr_is('/') && nextch_is(rdr, '/')) ||
|
||||
(rdr.curr_is('/') && nextch_is(rdr, '*')) ||
|
||||
(rdr.curr_is('#') && nextch_is(rdr, '!'));
|
||||
}
|
||||
|
||||
fn consume_comment(rdr: &StringReader,
|
||||
code_to_the_left: bool,
|
||||
comments: &mut ~[Comment]) {
|
||||
debug!(">>> consume comment");
|
||||
if rdr.curr.get() == '/' && nextch(rdr) == '/' {
|
||||
if rdr.curr_is('/') && nextch_is(rdr, '/') {
|
||||
read_line_comments(rdr, code_to_the_left, comments);
|
||||
} else if rdr.curr.get() == '/' && nextch(rdr) == '*' {
|
||||
} else if rdr.curr_is('/') && nextch_is(rdr, '*') {
|
||||
read_block_comment(rdr, code_to_the_left, comments);
|
||||
} else if rdr.curr.get() == '#' && nextch(rdr) == '!' {
|
||||
} else if rdr.curr_is('#') && nextch_is(rdr, '!') {
|
||||
read_shebang_comment(rdr, code_to_the_left, comments);
|
||||
} else { fail!(); }
|
||||
debug!("<<< consume comment");
|
||||
@ -363,7 +363,7 @@ pub fn gather_comments_and_literals(span_diagnostic:
|
||||
loop {
|
||||
let mut code_to_the_left = !first_read;
|
||||
consume_non_eol_whitespace(&rdr);
|
||||
if rdr.curr.get() == '\n' {
|
||||
if rdr.curr_is('\n') {
|
||||
code_to_the_left = false;
|
||||
consume_whitespace_counting_blank_lines(&rdr, &mut comments);
|
||||
}
|
||||
|
@ -16,7 +16,6 @@ use ext::tt::transcribe::{dup_tt_reader, tt_next_token};
|
||||
use parse::token;
|
||||
use parse::token::{str_to_ident};
|
||||
|
||||
use std::cast::transmute;
|
||||
use std::cell::{Cell, RefCell};
|
||||
use std::char;
|
||||
use std::num::from_str_radix;
|
||||
@ -48,13 +47,19 @@ pub struct StringReader {
|
||||
// The column of the next character to read
|
||||
col: Cell<CharPos>,
|
||||
// The last character to be read
|
||||
curr: Cell<char>,
|
||||
curr: Cell<Option<char>>,
|
||||
filemap: @codemap::FileMap,
|
||||
/* cached: */
|
||||
peek_tok: RefCell<token::Token>,
|
||||
peek_span: RefCell<Span>,
|
||||
}
|
||||
|
||||
impl StringReader {
|
||||
pub fn curr_is(&self, c: char) -> bool {
|
||||
self.curr.get() == Some(c)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_string_reader(span_diagnostic: @SpanHandler,
|
||||
filemap: @codemap::FileMap)
|
||||
-> StringReader {
|
||||
@ -74,7 +79,7 @@ pub fn new_low_level_string_reader(span_diagnostic: @SpanHandler,
|
||||
pos: Cell::new(filemap.start_pos),
|
||||
last_pos: Cell::new(filemap.start_pos),
|
||||
col: Cell::new(CharPos(0)),
|
||||
curr: Cell::new(initial_char),
|
||||
curr: Cell::new(Some(initial_char)),
|
||||
filemap: filemap,
|
||||
/* dummy values; not read */
|
||||
peek_tok: RefCell::new(token::EOF),
|
||||
@ -246,14 +251,12 @@ pub fn bump(rdr: &StringReader) {
|
||||
rdr.last_pos.set(rdr.pos.get());
|
||||
let current_byte_offset = byte_offset(rdr, rdr.pos.get()).to_uint();
|
||||
if current_byte_offset < (rdr.filemap.src).len() {
|
||||
assert!(rdr.curr.get() != unsafe {
|
||||
transmute(-1u32)
|
||||
}); // FIXME: #8971: unsound
|
||||
let last_char = rdr.curr.get();
|
||||
assert!(rdr.curr.get().is_some());
|
||||
let last_char = rdr.curr.get().unwrap();
|
||||
let next = rdr.filemap.src.char_range_at(current_byte_offset);
|
||||
let byte_offset_diff = next.next - current_byte_offset;
|
||||
rdr.pos.set(rdr.pos.get() + Pos::from_uint(byte_offset_diff));
|
||||
rdr.curr.set(next.ch);
|
||||
rdr.curr.set(Some(next.ch));
|
||||
rdr.col.set(rdr.col.get() + CharPos(1u));
|
||||
if last_char == '\n' {
|
||||
rdr.filemap.next_line(rdr.last_pos.get());
|
||||
@ -265,37 +268,50 @@ pub fn bump(rdr: &StringReader) {
|
||||
Pos::from_uint(current_byte_offset), byte_offset_diff);
|
||||
}
|
||||
} else {
|
||||
rdr.curr.set(unsafe { transmute(-1u32) }); // FIXME: #8971: unsound
|
||||
rdr.curr.set(None);
|
||||
}
|
||||
}
|
||||
pub fn is_eof(rdr: &StringReader) -> bool {
|
||||
rdr.curr.get() == unsafe { transmute(-1u32) } // FIXME: #8971: unsound
|
||||
rdr.curr.get().is_none()
|
||||
}
|
||||
pub fn nextch(rdr: &StringReader) -> char {
|
||||
pub fn nextch(rdr: &StringReader) -> Option<char> {
|
||||
let offset = byte_offset(rdr, rdr.pos.get()).to_uint();
|
||||
if offset < (rdr.filemap.src).len() {
|
||||
return rdr.filemap.src.char_at(offset);
|
||||
} else { return unsafe { transmute(-1u32) }; } // FIXME: #8971: unsound
|
||||
Some(rdr.filemap.src.char_at(offset))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
pub fn nextch_is(rdr: &StringReader, c: char) -> bool {
|
||||
nextch(rdr) == Some(c)
|
||||
}
|
||||
|
||||
fn hex_digit_val(c: char) -> int {
|
||||
if in_range(c, '0', '9') { return (c as int) - ('0' as int); }
|
||||
if in_range(c, 'a', 'f') { return (c as int) - ('a' as int) + 10; }
|
||||
if in_range(c, 'A', 'F') { return (c as int) - ('A' as int) + 10; }
|
||||
fn hex_digit_val(c: Option<char>) -> int {
|
||||
let d = c.unwrap_or('\x00');
|
||||
|
||||
if in_range(c, '0', '9') { return (d as int) - ('0' as int); }
|
||||
if in_range(c, 'a', 'f') { return (d as int) - ('a' as int) + 10; }
|
||||
if in_range(c, 'A', 'F') { return (d as int) - ('A' as int) + 10; }
|
||||
fail!();
|
||||
}
|
||||
|
||||
pub fn is_whitespace(c: char) -> bool {
|
||||
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
|
||||
pub fn is_whitespace(c: Option<char>) -> bool {
|
||||
match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace
|
||||
' ' | '\n' | '\t' | '\r' => true,
|
||||
_ => false
|
||||
}
|
||||
}
|
||||
|
||||
fn in_range(c: char, lo: char, hi: char) -> bool {
|
||||
return lo <= c && c <= hi
|
||||
fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
|
||||
match c {
|
||||
Some(c) => lo <= c && c <= hi,
|
||||
_ => false
|
||||
}
|
||||
}
|
||||
|
||||
fn is_dec_digit(c: char) -> bool { return in_range(c, '0', '9'); }
|
||||
fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); }
|
||||
|
||||
fn is_hex_digit(c: char) -> bool {
|
||||
fn is_hex_digit(c: Option<char>) -> bool {
|
||||
return in_range(c, '0', '9') || in_range(c, 'a', 'f') ||
|
||||
in_range(c, 'A', 'F');
|
||||
}
|
||||
@ -317,15 +333,15 @@ pub fn is_line_non_doc_comment(s: &str) -> bool {
|
||||
// returns a Some(sugared-doc-attr) if one exists, None otherwise
|
||||
fn consume_any_line_comment(rdr: &StringReader)
|
||||
-> Option<TokenAndSpan> {
|
||||
if rdr.curr.get() == '/' {
|
||||
if rdr.curr_is('/') {
|
||||
match nextch(rdr) {
|
||||
'/' => {
|
||||
Some('/') => {
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
// line comments starting with "///" or "//!" are doc-comments
|
||||
if rdr.curr.get() == '/' || rdr.curr.get() == '!' {
|
||||
if rdr.curr_is('/') || rdr.curr_is('!') {
|
||||
let start_bpos = rdr.pos.get() - BytePos(3);
|
||||
while rdr.curr.get() != '\n' && !is_eof(rdr) {
|
||||
while !rdr.curr_is('\n') && !is_eof(rdr) {
|
||||
bump(rdr);
|
||||
}
|
||||
let ret = with_str_from(rdr, start_bpos, |string| {
|
||||
@ -344,16 +360,16 @@ fn consume_any_line_comment(rdr: &StringReader)
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
while rdr.curr.get() != '\n' && !is_eof(rdr) { bump(rdr); }
|
||||
while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
|
||||
}
|
||||
// Restart whitespace munch.
|
||||
return consume_whitespace_and_comments(rdr);
|
||||
}
|
||||
'*' => { bump(rdr); bump(rdr); return consume_block_comment(rdr); }
|
||||
Some('*') => { bump(rdr); bump(rdr); return consume_block_comment(rdr); }
|
||||
_ => ()
|
||||
}
|
||||
} else if rdr.curr.get() == '#' {
|
||||
if nextch(rdr) == '!' {
|
||||
} else if rdr.curr_is('#') {
|
||||
if nextch_is(rdr, '!') {
|
||||
// I guess this is the only way to figure out if
|
||||
// we're at the beginning of the file...
|
||||
let cmap = @CodeMap::new();
|
||||
@ -363,7 +379,7 @@ fn consume_any_line_comment(rdr: &StringReader)
|
||||
}
|
||||
let loc = cmap.lookup_char_pos_adj(rdr.last_pos.get());
|
||||
if loc.line == 1u && loc.col == CharPos(0u) {
|
||||
while rdr.curr.get() != '\n' && !is_eof(rdr) { bump(rdr); }
|
||||
while !rdr.curr_is('\n') && !is_eof(rdr) { bump(rdr); }
|
||||
return consume_whitespace_and_comments(rdr);
|
||||
}
|
||||
}
|
||||
@ -378,7 +394,7 @@ pub fn is_block_non_doc_comment(s: &str) -> bool {
|
||||
// might return a sugared-doc-attr
|
||||
fn consume_block_comment(rdr: &StringReader) -> Option<TokenAndSpan> {
|
||||
// block comments starting with "/**" or "/*!" are doc-comments
|
||||
let is_doc_comment = rdr.curr.get() == '*' || rdr.curr.get() == '!';
|
||||
let is_doc_comment = rdr.curr_is('*') || rdr.curr_is('!');
|
||||
let start_bpos = rdr.pos.get() - BytePos(if is_doc_comment {3} else {2});
|
||||
|
||||
let mut level: int = 1;
|
||||
@ -390,11 +406,11 @@ fn consume_block_comment(rdr: &StringReader) -> Option<TokenAndSpan> {
|
||||
~"unterminated block comment"
|
||||
};
|
||||
fatal_span(rdr, start_bpos, rdr.last_pos.get(), msg);
|
||||
} else if rdr.curr.get() == '/' && nextch(rdr) == '*' {
|
||||
} else if rdr.curr_is('/') && nextch_is(rdr, '*') {
|
||||
level += 1;
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
} else if rdr.curr.get() == '*' && nextch(rdr) == '/' {
|
||||
} else if rdr.curr_is('*') && nextch_is(rdr, '/') {
|
||||
level -= 1;
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
@ -424,12 +440,13 @@ fn consume_block_comment(rdr: &StringReader) -> Option<TokenAndSpan> {
|
||||
}
|
||||
|
||||
fn scan_exponent(rdr: &StringReader, start_bpos: BytePos) -> Option<~str> {
|
||||
let mut c = rdr.curr.get();
|
||||
// \x00 hits the `return None` case immediately, so this is fine.
|
||||
let mut c = rdr.curr.get().unwrap_or('\x00');
|
||||
let mut rslt = ~"";
|
||||
if c == 'e' || c == 'E' {
|
||||
rslt.push_char(c);
|
||||
bump(rdr);
|
||||
c = rdr.curr.get();
|
||||
c = rdr.curr.get().unwrap_or('\x00');
|
||||
if c == '-' || c == '+' {
|
||||
rslt.push_char(c);
|
||||
bump(rdr);
|
||||
@ -448,10 +465,10 @@ fn scan_digits(rdr: &StringReader, radix: uint) -> ~str {
|
||||
let mut rslt = ~"";
|
||||
loop {
|
||||
let c = rdr.curr.get();
|
||||
if c == '_' { bump(rdr); continue; }
|
||||
match char::to_digit(c, radix) {
|
||||
if c == Some('_') { bump(rdr); continue; }
|
||||
match c.and_then(|cc| char::to_digit(cc, radix)) {
|
||||
Some(_) => {
|
||||
rslt.push_char(c);
|
||||
rslt.push_char(c.unwrap());
|
||||
bump(rdr);
|
||||
}
|
||||
_ => return rslt
|
||||
@ -476,7 +493,7 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
|
||||
let mut num_str;
|
||||
let mut base = 10u;
|
||||
let mut c = c;
|
||||
let mut n = nextch(rdr);
|
||||
let mut n = nextch(rdr).unwrap_or('\x00');
|
||||
let start_bpos = rdr.last_pos.get();
|
||||
if c == '0' && n == 'x' {
|
||||
bump(rdr);
|
||||
@ -492,7 +509,7 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
|
||||
base = 2u;
|
||||
}
|
||||
num_str = scan_digits(rdr, base);
|
||||
c = rdr.curr.get();
|
||||
c = rdr.curr.get().unwrap_or('\x00');
|
||||
nextch(rdr);
|
||||
if c == 'u' || c == 'i' {
|
||||
enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
|
||||
@ -502,13 +519,13 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
|
||||
else { Unsigned(ast::TyU) }
|
||||
};
|
||||
bump(rdr);
|
||||
c = rdr.curr.get();
|
||||
c = rdr.curr.get().unwrap_or('\x00');
|
||||
if c == '8' {
|
||||
bump(rdr);
|
||||
tp = if signed { Signed(ast::TyI8) }
|
||||
else { Unsigned(ast::TyU8) };
|
||||
}
|
||||
n = nextch(rdr);
|
||||
n = nextch(rdr).unwrap_or('\x00');
|
||||
if c == '1' && n == '6' {
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
@ -541,8 +558,7 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
|
||||
}
|
||||
}
|
||||
let mut is_float = false;
|
||||
if rdr.curr.get() == '.' && !(ident_start(nextch(rdr)) || nextch(rdr) ==
|
||||
'.') {
|
||||
if rdr.curr_is('.') && !(ident_start(nextch(rdr)) || nextch_is(rdr, '.')) {
|
||||
is_float = true;
|
||||
bump(rdr);
|
||||
let dec_part = scan_digits(rdr, 10u);
|
||||
@ -557,10 +573,10 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
|
||||
None => ()
|
||||
}
|
||||
|
||||
if rdr.curr.get() == 'f' {
|
||||
if rdr.curr_is('f') {
|
||||
bump(rdr);
|
||||
c = rdr.curr.get();
|
||||
n = nextch(rdr);
|
||||
c = rdr.curr.get().unwrap_or('\x00');
|
||||
n = nextch(rdr).unwrap_or('\x00');
|
||||
if c == '3' && n == '2' {
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
@ -602,18 +618,23 @@ fn scan_numeric_escape(rdr: &StringReader, n_hex_digits: uint) -> char {
|
||||
let mut accum_int = 0;
|
||||
let mut i = n_hex_digits;
|
||||
let start_bpos = rdr.last_pos.get();
|
||||
while i != 0u {
|
||||
while i != 0u && !is_eof(rdr) {
|
||||
let n = rdr.curr.get();
|
||||
if !is_hex_digit(n) {
|
||||
fatal_span_char(rdr, rdr.last_pos.get(), rdr.pos.get(),
|
||||
~"illegal character in numeric character escape",
|
||||
n);
|
||||
n.unwrap());
|
||||
}
|
||||
bump(rdr);
|
||||
accum_int *= 16;
|
||||
accum_int += hex_digit_val(n);
|
||||
i -= 1u;
|
||||
}
|
||||
if i != 0 && is_eof(rdr) {
|
||||
fatal_span(rdr, start_bpos, rdr.last_pos.get(),
|
||||
~"unterminated numeric character escape");
|
||||
}
|
||||
|
||||
match char::from_u32(accum_int as u32) {
|
||||
Some(x) => x,
|
||||
None => fatal_span(rdr, start_bpos, rdr.last_pos.get(),
|
||||
@ -621,14 +642,18 @@ fn scan_numeric_escape(rdr: &StringReader, n_hex_digits: uint) -> char {
|
||||
}
|
||||
}
|
||||
|
||||
fn ident_start(c: char) -> bool {
|
||||
fn ident_start(c: Option<char>) -> bool {
|
||||
let c = match c { Some(c) => c, None => return false };
|
||||
|
||||
(c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && char::is_XID_start(c))
|
||||
}
|
||||
|
||||
fn ident_continue(c: char) -> bool {
|
||||
fn ident_continue(c: Option<char>) -> bool {
|
||||
let c = match c { Some(c) => c, None => return false };
|
||||
|
||||
(c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9')
|
||||
@ -641,7 +666,7 @@ fn ident_continue(c: char) -> bool {
|
||||
// EFFECT: updates the interner
|
||||
fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
let c = rdr.curr.get();
|
||||
if ident_start(c) && nextch(rdr) != '"' && nextch(rdr) != '#' {
|
||||
if ident_start(c) && !nextch_is(rdr, '"') && !nextch_is(rdr, '#') {
|
||||
// Note: r as in r" or r#" is part of a raw string literal,
|
||||
// not an identifier, and is handled further down.
|
||||
|
||||
@ -654,7 +679,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
if string == "_" {
|
||||
token::UNDERSCORE
|
||||
} else {
|
||||
let is_mod_name = rdr.curr.get() == ':' && nextch(rdr) == ':';
|
||||
let is_mod_name = rdr.curr_is(':') && nextch_is(rdr, ':');
|
||||
|
||||
// FIXME: perform NFKC normalization here. (Issue #2253)
|
||||
token::IDENT(str_to_ident(string), is_mod_name)
|
||||
@ -662,16 +687,16 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
})
|
||||
}
|
||||
if is_dec_digit(c) {
|
||||
return scan_number(c, rdr);
|
||||
return scan_number(c.unwrap(), rdr);
|
||||
}
|
||||
fn binop(rdr: &StringReader, op: token::BinOp) -> token::Token {
|
||||
bump(rdr);
|
||||
if rdr.curr.get() == '=' {
|
||||
if rdr.curr_is('=') {
|
||||
bump(rdr);
|
||||
return token::BINOPEQ(op);
|
||||
} else { return token::BINOP(op); }
|
||||
}
|
||||
match c {
|
||||
match c.expect("next_token_inner called at EOF") {
|
||||
|
||||
|
||||
|
||||
@ -682,9 +707,9 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
',' => { bump(rdr); return token::COMMA; }
|
||||
'.' => {
|
||||
bump(rdr);
|
||||
return if rdr.curr.get() == '.' {
|
||||
return if rdr.curr_is('.') {
|
||||
bump(rdr);
|
||||
if rdr.curr.get() == '.' {
|
||||
if rdr.curr_is('.') {
|
||||
bump(rdr);
|
||||
token::DOTDOTDOT
|
||||
} else {
|
||||
@ -705,7 +730,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
'~' => { bump(rdr); return token::TILDE; }
|
||||
':' => {
|
||||
bump(rdr);
|
||||
if rdr.curr.get() == ':' {
|
||||
if rdr.curr_is(':') {
|
||||
bump(rdr);
|
||||
return token::MOD_SEP;
|
||||
} else { return token::COLON; }
|
||||
@ -720,10 +745,10 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
// Multi-byte tokens.
|
||||
'=' => {
|
||||
bump(rdr);
|
||||
if rdr.curr.get() == '=' {
|
||||
if rdr.curr_is('=') {
|
||||
bump(rdr);
|
||||
return token::EQEQ;
|
||||
} else if rdr.curr.get() == '>' {
|
||||
} else if rdr.curr_is('>') {
|
||||
bump(rdr);
|
||||
return token::FAT_ARROW;
|
||||
} else {
|
||||
@ -732,19 +757,19 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
}
|
||||
'!' => {
|
||||
bump(rdr);
|
||||
if rdr.curr.get() == '=' {
|
||||
if rdr.curr_is('=') {
|
||||
bump(rdr);
|
||||
return token::NE;
|
||||
} else { return token::NOT; }
|
||||
}
|
||||
'<' => {
|
||||
bump(rdr);
|
||||
match rdr.curr.get() {
|
||||
match rdr.curr.get().unwrap_or('\x00') {
|
||||
'=' => { bump(rdr); return token::LE; }
|
||||
'<' => { return binop(rdr, token::SHL); }
|
||||
'-' => {
|
||||
bump(rdr);
|
||||
match rdr.curr.get() {
|
||||
match rdr.curr.get().unwrap_or('\x00') {
|
||||
'>' => { bump(rdr); return token::DARROW; }
|
||||
_ => { return token::LARROW; }
|
||||
}
|
||||
@ -754,7 +779,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
}
|
||||
'>' => {
|
||||
bump(rdr);
|
||||
match rdr.curr.get() {
|
||||
match rdr.curr.get().unwrap_or('\x00') {
|
||||
'=' => { bump(rdr); return token::GE; }
|
||||
'>' => { return binop(rdr, token::SHR); }
|
||||
_ => { return token::GT; }
|
||||
@ -764,12 +789,14 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
// Either a character constant 'a' OR a lifetime name 'abc
|
||||
bump(rdr);
|
||||
let start = rdr.last_pos.get();
|
||||
let mut c2 = rdr.curr.get();
|
||||
|
||||
// the eof will be picked up by the final `'` check below
|
||||
let mut c2 = rdr.curr.get().unwrap_or('\x00');
|
||||
bump(rdr);
|
||||
|
||||
// If the character is an ident start not followed by another single
|
||||
// quote, then this is a lifetime name:
|
||||
if ident_start(c2) && rdr.curr.get() != '\'' {
|
||||
if ident_start(Some(c2)) && !rdr.curr_is('\'') {
|
||||
while ident_continue(rdr.curr.get()) {
|
||||
bump(rdr);
|
||||
}
|
||||
@ -798,19 +825,24 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
let escaped_pos = rdr.last_pos.get();
|
||||
bump(rdr);
|
||||
match escaped {
|
||||
'n' => { c2 = '\n'; }
|
||||
'r' => { c2 = '\r'; }
|
||||
't' => { c2 = '\t'; }
|
||||
'\\' => { c2 = '\\'; }
|
||||
'\'' => { c2 = '\''; }
|
||||
'"' => { c2 = '"'; }
|
||||
'0' => { c2 = '\x00'; }
|
||||
'x' => { c2 = scan_numeric_escape(rdr, 2u); }
|
||||
'u' => { c2 = scan_numeric_escape(rdr, 4u); }
|
||||
'U' => { c2 = scan_numeric_escape(rdr, 8u); }
|
||||
c2 => {
|
||||
fatal_span_char(rdr, escaped_pos, rdr.last_pos.get(),
|
||||
~"unknown character escape", c2);
|
||||
None => {}
|
||||
Some(e) => {
|
||||
c2 = match e {
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'\\' => '\\',
|
||||
'\'' => '\'',
|
||||
'"' => '"',
|
||||
'0' => '\x00',
|
||||
'x' => scan_numeric_escape(rdr, 2u),
|
||||
'u' => scan_numeric_escape(rdr, 4u),
|
||||
'U' => scan_numeric_escape(rdr, 8u),
|
||||
c2 => {
|
||||
fatal_span_char(rdr, escaped_pos, rdr.last_pos.get(),
|
||||
~"unknown character escape", c2)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -820,7 +852,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if rdr.curr.get() != '\'' {
|
||||
if !rdr.curr_is('\'') {
|
||||
fatal_span_verbose(rdr,
|
||||
// Byte offsetting here is okay because the
|
||||
// character before position `start` is an
|
||||
@ -836,17 +868,22 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
let mut accum_str = ~"";
|
||||
let start_bpos = rdr.last_pos.get();
|
||||
bump(rdr);
|
||||
while rdr.curr.get() != '"' {
|
||||
while !rdr.curr_is('"') {
|
||||
if is_eof(rdr) {
|
||||
fatal_span(rdr, start_bpos, rdr.last_pos.get(),
|
||||
~"unterminated double quote string");
|
||||
}
|
||||
|
||||
let ch = rdr.curr.get();
|
||||
let ch = rdr.curr.get().unwrap();
|
||||
bump(rdr);
|
||||
match ch {
|
||||
'\\' => {
|
||||
let escaped = rdr.curr.get();
|
||||
if is_eof(rdr) {
|
||||
fatal_span(rdr, start_bpos, rdr.last_pos.get(),
|
||||
~"unterminated double quote string");
|
||||
}
|
||||
|
||||
let escaped = rdr.curr.get().unwrap();
|
||||
let escaped_pos = rdr.last_pos.get();
|
||||
bump(rdr);
|
||||
match escaped {
|
||||
@ -883,15 +920,19 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
let start_bpos = rdr.last_pos.get();
|
||||
bump(rdr);
|
||||
let mut hash_count = 0u;
|
||||
while rdr.curr.get() == '#' {
|
||||
while rdr.curr_is('#') {
|
||||
bump(rdr);
|
||||
hash_count += 1;
|
||||
}
|
||||
if rdr.curr.get() != '"' {
|
||||
|
||||
if is_eof(rdr) {
|
||||
fatal_span(rdr, start_bpos, rdr.last_pos.get(),
|
||||
~"unterminated raw string");
|
||||
} else if !rdr.curr_is('"') {
|
||||
fatal_span_char(rdr, start_bpos, rdr.last_pos.get(),
|
||||
~"only `#` is allowed in raw string delimitation; \
|
||||
found illegal character",
|
||||
rdr.curr.get());
|
||||
rdr.curr.get().unwrap());
|
||||
}
|
||||
bump(rdr);
|
||||
let content_start_bpos = rdr.last_pos.get();
|
||||
@ -901,11 +942,11 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
fatal_span(rdr, start_bpos, rdr.last_pos.get(),
|
||||
~"unterminated raw string");
|
||||
}
|
||||
if rdr.curr.get() == '"' {
|
||||
if rdr.curr_is('"') {
|
||||
content_end_bpos = rdr.last_pos.get();
|
||||
for _ in range(0, hash_count) {
|
||||
bump(rdr);
|
||||
if rdr.curr.get() != '#' {
|
||||
if !rdr.curr_is('#') {
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
@ -921,14 +962,14 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
return token::LIT_STR_RAW(str_content, hash_count);
|
||||
}
|
||||
'-' => {
|
||||
if nextch(rdr) == '>' {
|
||||
if nextch_is(rdr, '>') {
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
return token::RARROW;
|
||||
} else { return binop(rdr, token::MINUS); }
|
||||
}
|
||||
'&' => {
|
||||
if nextch(rdr) == '&' {
|
||||
if nextch_is(rdr, '&') {
|
||||
bump(rdr);
|
||||
bump(rdr);
|
||||
return token::ANDAND;
|
||||
@ -936,7 +977,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
|
||||
}
|
||||
'|' => {
|
||||
match nextch(rdr) {
|
||||
'|' => { bump(rdr); bump(rdr); return token::OROR; }
|
||||
Some('|') => { bump(rdr); bump(rdr); return token::OROR; }
|
||||
_ => { return binop(rdr, token::OR); }
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user