rust/src/libsyntax/parse/lexer.rs

659 lines
20 KiB
Rust
Raw Normal View History

import util::interner;
import util::interner::intern;
import diagnostic;
2012-06-01 22:52:07 +00:00
import ast::{tt_delim,tt_flat};
2012-05-30 18:36:30 +00:00
export reader, string_reader, new_string_reader, is_whitespace;
2012-06-07 22:33:59 +00:00
export tt_reader, new_tt_reader, dup_tt_reader;
2012-05-30 18:36:30 +00:00
export nextch, is_eof, bump, get_str_from;
export string_reader_as_reader, tt_reader_as_reader;
2012-05-30 18:36:30 +00:00
iface reader {
fn is_eof() -> bool;
fn next_token() -> {tok: token::token, chpos: uint};
fn fatal(str) -> !;
fn chpos() -> uint;
fn interner() -> @interner::interner<@str>;
}
2012-04-15 10:49:59 +00:00
2012-06-01 22:52:07 +00:00
enum tt_frame_up { /* to break a circularity */
tt_frame_up(option<tt_frame>)
}
#[doc = "an unzipping of `token_tree`s"]
2012-06-01 22:52:07 +00:00
type tt_frame = @{
readme: [ast::token_tree],
mut idx: uint,
up: tt_frame_up
};
type tt_reader = @{
span_diagnostic: diagnostic::span_handler,
interner: @interner::interner<@str>,
mut cur: tt_frame,
2012-06-01 22:52:07 +00:00
/* cached: */
mut cur_tok: token::token,
mut cur_chpos: uint
};
fn new_tt_reader(span_diagnostic: diagnostic::span_handler,
itr: @interner::interner<@str>, src: [ast::token_tree])
-> tt_reader {
let r = @{span_diagnostic: span_diagnostic, interner: itr,
mut cur: @{readme: src, mut idx: 0u,
up: tt_frame_up(option::none)},
mut cur_tok: token::EOF, /* dummy value, never read */
mut cur_chpos: 0u /* dummy value, never read */
};
(r as reader).next_token(); /* get cur_tok and cur_chpos set up */
ret r;
}
2012-06-07 22:33:59 +00:00
pure fn dup_tt_frame(&&f: tt_frame) -> tt_frame {
@{readme: f.readme, mut idx: f.idx,
up: alt f.up {
tt_frame_up(o_f) {
tt_frame_up(option::map(o_f, dup_tt_frame))
}
}
}
}
pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader {
@{span_diagnostic: r.span_diagnostic, interner: r.interner,
mut cur: dup_tt_frame(r.cur),
mut cur_tok: r.cur_tok, mut cur_chpos: r.cur_chpos}
}
2012-05-30 18:36:30 +00:00
type string_reader = @{
span_diagnostic: diagnostic::span_handler,
src: @str,
2012-03-27 01:35:18 +00:00
mut col: uint,
mut pos: uint,
mut curr: char,
mut chpos: uint,
filemap: codemap::filemap,
2012-06-09 07:53:34 +00:00
interner: @interner::interner<@str>
};
fn new_string_reader(span_diagnostic: diagnostic::span_handler,
filemap: codemap::filemap,
itr: @interner::interner<@str>) -> string_reader {
let r = @{span_diagnostic: span_diagnostic, src: filemap.src,
mut col: 0u, mut pos: 0u, mut curr: -1 as char,
mut chpos: filemap.start_pos.ch,
filemap: filemap, interner: itr};
if r.pos < (*filemap.src).len() {
let next = str::char_range_at(*r.src, r.pos);
r.pos = next.next;
r.curr = next.ch;
}
ret r;
}
2012-05-30 18:36:30 +00:00
impl string_reader_as_reader of reader for string_reader {
fn is_eof() -> bool { is_eof(self) }
fn next_token() -> {tok: token::token, chpos: uint} {
consume_whitespace_and_comments(self);
let start_chpos = self.chpos;
let tok = if is_eof(self) {
token::EOF
} else {
2012-05-30 18:36:30 +00:00
next_token_inner(self)
};
ret {tok: tok, chpos: start_chpos};
}
fn fatal(m: str) -> ! {
self.span_diagnostic.span_fatal(
2012-06-01 22:52:07 +00:00
ast_util::mk_sp(self.chpos, self.chpos), m)
}
2012-05-30 18:36:30 +00:00
fn chpos() -> uint { self.chpos }
fn interner() -> @interner::interner<@str> { self.interner }
2012-06-01 22:52:07 +00:00
}
impl tt_reader_as_reader of reader for tt_reader {
fn is_eof() -> bool { self.cur_tok == token::EOF }
fn next_token() -> {tok: token::token, chpos: uint} {
let ret_val = { tok: self.cur_tok, chpos: self.cur_chpos };
if self.cur.idx >= vec::len(self.cur.readme) {
/* done with this set; pop */
alt self.cur.up {
tt_frame_up(option::none) {
self.cur_tok = token::EOF;
ret ret_val;
}
tt_frame_up(option::some(tt_f)) {
self.cur = tt_f;
/* the above `if` would need to be a `while` if we didn't know
that the last thing in a `tt_delim` is always a `tt_flat` */
self.cur.idx += 1u;
}
}
}
/* if `tt_delim`s could be 0-length, we'd need to be able to switch
between popping and pushing until we got to an actual `tt_flat` */
loop { /* because it's easiest, this handles `tt_delim` not starting
with a `tt_flat`, even though it won't happen */
alt self.cur.readme[self.cur.idx] {
tt_delim(tts) {
self.cur = @{readme: tts, mut idx: 0u,
up: tt_frame_up(option::some(self.cur)) };
}
tt_flat(chpos, tok) {
self.cur_chpos = chpos; self.cur_tok = tok;
self.cur.idx += 1u;
2012-06-01 22:52:07 +00:00
ret ret_val;
}
}
}
}
fn fatal(m: str) -> ! {
self.span_diagnostic.span_fatal(
ast_util::mk_sp(self.chpos(), self.chpos()), m);
}
fn chpos() -> uint { self.cur_chpos }
fn interner() -> @interner::interner<@str> { self.interner }
}
2012-05-30 18:36:30 +00:00
fn get_str_from(rdr: string_reader, start: uint) -> str unsafe {
// I'm pretty skeptical about this subtraction. What if there's a
// multi-byte character before the mark?
ret str::slice(*rdr.src, start - 1u, rdr.pos - 1u);
}
fn bump(rdr: string_reader) {
if rdr.pos < (*rdr.src).len() {
rdr.col += 1u;
rdr.chpos += 1u;
if rdr.curr == '\n' {
codemap::next_line(rdr.filemap, rdr.chpos, rdr.pos);
rdr.col = 0u;
}
let next = str::char_range_at(*rdr.src, rdr.pos);
rdr.pos = next.next;
rdr.curr = next.ch;
} else {
if (rdr.curr != -1 as char) {
rdr.col += 1u;
rdr.chpos += 1u;
rdr.curr = -1 as char;
}
}
}
fn is_eof(rdr: string_reader) -> bool {
rdr.curr == -1 as char
}
fn nextch(rdr: string_reader) -> char {
if rdr.pos < (*rdr.src).len() {
ret str::char_at(*rdr.src, rdr.pos);
} else { ret -1 as char; }
}
2011-07-27 12:19:39 +00:00
fn dec_digit_val(c: char) -> int { ret (c as int) - ('0' as int); }
2011-07-27 12:19:39 +00:00
fn hex_digit_val(c: char) -> int {
if in_range(c, '0', '9') { ret (c as int) - ('0' as int); }
if in_range(c, 'a', 'f') { ret (c as int) - ('a' as int) + 10; }
if in_range(c, 'A', 'F') { ret (c as int) - ('A' as int) + 10; }
fail;
}
2011-07-27 12:19:39 +00:00
fn bin_digit_value(c: char) -> int { if c == '0' { ret 0; } ret 1; }
2011-07-27 12:19:39 +00:00
fn is_whitespace(c: char) -> bool {
ret c == ' ' || c == '\t' || c == '\r' || c == '\n';
2010-08-20 01:42:17 +00:00
}
2011-07-27 12:19:39 +00:00
fn may_begin_ident(c: char) -> bool { ret is_alpha(c) || c == '_'; }
2011-07-27 12:19:39 +00:00
fn in_range(c: char, lo: char, hi: char) -> bool { ret lo <= c && c <= hi; }
2011-07-27 12:19:39 +00:00
fn is_alpha(c: char) -> bool {
ret in_range(c, 'a', 'z') || in_range(c, 'A', 'Z');
}
2011-07-27 12:19:39 +00:00
fn is_dec_digit(c: char) -> bool { ret in_range(c, '0', '9'); }
2011-07-27 12:19:39 +00:00
fn is_alnum(c: char) -> bool { ret is_alpha(c) || is_dec_digit(c); }
2011-07-27 12:19:39 +00:00
fn is_hex_digit(c: char) -> bool {
ret in_range(c, '0', '9') || in_range(c, 'a', 'f') ||
in_range(c, 'A', 'F');
}
2011-07-27 12:19:39 +00:00
fn is_bin_digit(c: char) -> bool { ret c == '0' || c == '1'; }
2012-05-30 18:36:30 +00:00
fn consume_whitespace_and_comments(rdr: string_reader) {
while is_whitespace(rdr.curr) { bump(rdr); }
2012-05-14 23:55:01 +00:00
ret consume_any_line_comment(rdr);
}
2012-05-30 18:36:30 +00:00
fn consume_any_line_comment(rdr: string_reader) {
if rdr.curr == '/' {
2012-05-30 18:36:30 +00:00
alt nextch(rdr) {
2011-07-27 12:19:39 +00:00
'/' {
2012-05-30 18:36:30 +00:00
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
2011-07-27 12:19:39 +00:00
// Restart whitespace munch.
2012-05-14 23:55:01 +00:00
ret consume_whitespace_and_comments(rdr);
2011-07-27 12:19:39 +00:00
}
2012-05-30 18:36:30 +00:00
'*' { bump(rdr); bump(rdr); ret consume_block_comment(rdr); }
2011-07-27 12:19:39 +00:00
_ { ret; }
}
} else if rdr.curr == '#' {
2012-05-30 18:36:30 +00:00
if nextch(rdr) == '!' {
let cmap = codemap::new_codemap();
(*cmap).files.push(rdr.filemap);
2012-05-23 01:13:24 +00:00
let loc = codemap::lookup_char_pos_adj(cmap, rdr.chpos);
if loc.line == 1u && loc.col == 0u {
2012-05-30 18:36:30 +00:00
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
ret consume_whitespace_and_comments(rdr);
}
}
}
}
2012-05-30 18:36:30 +00:00
fn consume_block_comment(rdr: string_reader) {
let mut level: int = 1;
2011-07-27 12:19:39 +00:00
while level > 0 {
2012-05-30 18:36:30 +00:00
if is_eof(rdr) { rdr.fatal("unterminated block comment"); }
if rdr.curr == '/' && nextch(rdr) == '*' {
bump(rdr);
bump(rdr);
level += 1;
} else {
2012-05-30 18:36:30 +00:00
if rdr.curr == '*' && nextch(rdr) == '/' {
bump(rdr);
bump(rdr);
level -= 1;
2012-05-30 18:36:30 +00:00
} else { bump(rdr); }
}
}
// restart whitespace munch.
2012-05-14 23:55:01 +00:00
ret consume_whitespace_and_comments(rdr);
}
2012-05-30 18:36:30 +00:00
fn scan_exponent(rdr: string_reader) -> option<str> {
let mut c = rdr.curr;
let mut rslt = "";
2011-07-27 12:19:39 +00:00
if c == 'e' || c == 'E' {
str::push_char(rslt, c);
2012-05-30 18:36:30 +00:00
bump(rdr);
c = rdr.curr;
2011-07-27 12:19:39 +00:00
if c == '-' || c == '+' {
str::push_char(rslt, c);
2012-05-30 18:36:30 +00:00
bump(rdr);
}
let exponent = scan_digits(rdr, 10u);
2012-02-23 09:44:04 +00:00
if str::len(exponent) > 0u {
ret some(rslt + exponent);
} else { rdr.fatal("scan_exponent: bad fp literal"); }
2011-09-02 22:34:58 +00:00
} else { ret none::<str>; }
}
2012-05-30 18:36:30 +00:00
fn scan_digits(rdr: string_reader, radix: uint) -> str {
let mut rslt = "";
loop {
let c = rdr.curr;
2012-05-30 18:36:30 +00:00
if c == '_' { bump(rdr); cont; }
alt char::to_digit(c, radix) {
some(d) {
str::push_char(rslt, c);
2012-05-30 18:36:30 +00:00
bump(rdr);
}
_ { ret rslt; }
}
};
}
2012-05-30 18:36:30 +00:00
fn scan_number(c: char, rdr: string_reader) -> token::token {
let mut num_str, base = 10u, c = c, n = nextch(rdr);
2011-07-27 12:19:39 +00:00
if c == '0' && n == 'x' {
2012-05-30 18:36:30 +00:00
bump(rdr);
bump(rdr);
base = 16u;
} else if c == '0' && n == 'b' {
2012-05-30 18:36:30 +00:00
bump(rdr);
bump(rdr);
base = 2u;
2011-09-02 05:39:44 +00:00
}
num_str = scan_digits(rdr, base);
c = rdr.curr;
2012-05-30 18:36:30 +00:00
nextch(rdr);
2011-07-27 12:19:39 +00:00
if c == 'u' || c == 'i' {
let signed = c == 'i';
let mut tp = {
if signed { either::left(ast::ty_i) }
else { either::right(ast::ty_u) }
};
2012-05-30 18:36:30 +00:00
bump(rdr);
c = rdr.curr;
2011-07-27 12:19:39 +00:00
if c == '8' {
2012-05-30 18:36:30 +00:00
bump(rdr);
tp = if signed { either::left(ast::ty_i8) }
else { either::right(ast::ty_u8) };
}
2012-05-30 18:36:30 +00:00
n = nextch(rdr);
2011-07-27 12:19:39 +00:00
if c == '1' && n == '6' {
2012-05-30 18:36:30 +00:00
bump(rdr);
bump(rdr);
tp = if signed { either::left(ast::ty_i16) }
else { either::right(ast::ty_u16) };
} else if c == '3' && n == '2' {
2012-05-30 18:36:30 +00:00
bump(rdr);
bump(rdr);
tp = if signed { either::left(ast::ty_i32) }
else { either::right(ast::ty_u32) };
} else if c == '6' && n == '4' {
2012-05-30 18:36:30 +00:00
bump(rdr);
bump(rdr);
tp = if signed { either::left(ast::ty_i64) }
else { either::right(ast::ty_u64) };
}
2012-02-23 09:44:04 +00:00
if str::len(num_str) == 0u {
rdr.fatal("no valid digits found for number");
}
let parsed = option::get(u64::from_str_radix(num_str, base as u64));
alt tp {
either::left(t) { ret token::LIT_INT(parsed as i64, t); }
either::right(t) { ret token::LIT_UINT(parsed, t); }
}
}
let mut is_float = false;
2012-05-30 18:36:30 +00:00
if rdr.curr == '.' && !(is_alpha(nextch(rdr)) || nextch(rdr) == '_') {
is_float = true;
2012-05-30 18:36:30 +00:00
bump(rdr);
let dec_part = scan_digits(rdr, 10u);
num_str += "." + dec_part;
}
alt scan_exponent(rdr) {
some(s) {
is_float = true;
num_str += s;
}
none {}
}
if rdr.curr == 'f' {
2012-05-30 18:36:30 +00:00
bump(rdr);
c = rdr.curr;
2012-05-30 18:36:30 +00:00
n = nextch(rdr);
if c == '3' && n == '2' {
2012-05-30 18:36:30 +00:00
bump(rdr);
bump(rdr);
2012-06-09 07:53:34 +00:00
ret token::LIT_FLOAT(intern(*rdr.interner, @num_str),
ast::ty_f32);
} else if c == '6' && n == '4' {
2012-05-30 18:36:30 +00:00
bump(rdr);
bump(rdr);
2012-06-09 07:53:34 +00:00
ret token::LIT_FLOAT(intern(*rdr.interner, @num_str),
ast::ty_f64);
/* FIXME: if this is out of range for either a 32-bit or
64-bit float, it won't be noticed till the back-end (Issue #2252)
*/
} else {
is_float = true;
}
}
if is_float {
2012-06-09 07:53:34 +00:00
ret token::LIT_FLOAT(intern(*rdr.interner, @num_str),
ast::ty_f);
} else {
2012-02-23 09:44:04 +00:00
if str::len(num_str) == 0u {
rdr.fatal("no valid digits found for number");
}
let parsed = option::get(u64::from_str_radix(num_str, base as u64));
#debug["lexing %s as an unsuffixed integer literal",
num_str];
ret token::LIT_INT_UNSUFFIXED(parsed as i64);
}
}
2012-05-30 18:36:30 +00:00
fn scan_numeric_escape(rdr: string_reader, n_hex_digits: uint) -> char {
let mut accum_int = 0, i = n_hex_digits;
while i != 0u {
let n = rdr.curr;
2012-05-30 18:36:30 +00:00
bump(rdr);
2011-07-27 12:19:39 +00:00
if !is_hex_digit(n) {
rdr.fatal(#fmt["illegal numeric character escape: %d", n as int]);
}
accum_int *= 16;
accum_int += hex_digit_val(n);
i -= 1u;
}
ret accum_int as char;
}
2012-05-30 18:36:30 +00:00
fn next_token_inner(rdr: string_reader) -> token::token {
let mut accum_str = "";
let mut c = rdr.curr;
2012-03-03 02:45:10 +00:00
if (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| c == '_'
|| (c > 'z' && char::is_XID_start(c)) {
while (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
|| c == '_'
|| (c > 'z' && char::is_XID_continue(c)) {
str::push_char(accum_str, c);
2012-05-30 18:36:30 +00:00
bump(rdr);
c = rdr.curr;
}
2011-09-02 22:34:58 +00:00
if str::eq(accum_str, "_") { ret token::UNDERSCORE; }
2012-05-30 18:36:30 +00:00
let is_mod_name = c == ':' && nextch(rdr) == ':';
// FIXME: perform NFKC normalization here. (Issue #2253)
2012-06-09 07:53:34 +00:00
ret token::IDENT(interner::intern(*rdr.interner,
@accum_str), is_mod_name);
2010-08-20 17:03:31 +00:00
}
if is_dec_digit(c) {
2011-09-02 05:39:44 +00:00
ret scan_number(c, rdr);
}
2012-05-30 18:36:30 +00:00
fn binop(rdr: string_reader, op: token::binop) -> token::token {
bump(rdr);
if rdr.curr == '=' {
2012-05-30 18:36:30 +00:00
bump(rdr);
ret token::BINOPEQ(op);
} else { ret token::BINOP(op); }
}
2011-07-27 12:19:39 +00:00
alt c {
2011-09-02 22:34:58 +00:00
2011-09-12 10:39:38 +00:00
2011-07-27 12:19:39 +00:00
// One-byte tokens.
2012-05-30 18:36:30 +00:00
';' { bump(rdr); ret token::SEMI; }
',' { bump(rdr); ret token::COMMA; }
2011-07-27 12:19:39 +00:00
'.' {
2012-05-30 18:36:30 +00:00
bump(rdr);
if rdr.curr == '.' && nextch(rdr) == '.' {
bump(rdr);
bump(rdr);
2011-07-27 12:19:39 +00:00
ret token::ELLIPSIS;
}
ret token::DOT;
}
2012-05-30 18:36:30 +00:00
'(' { bump(rdr); ret token::LPAREN; }
')' { bump(rdr); ret token::RPAREN; }
'{' { bump(rdr); ret token::LBRACE; }
'}' { bump(rdr); ret token::RBRACE; }
'[' { bump(rdr); ret token::LBRACKET; }
']' { bump(rdr); ret token::RBRACKET; }
'@' { bump(rdr); ret token::AT; }
'#' { bump(rdr); ret token::POUND; }
'~' { bump(rdr); ret token::TILDE; }
2011-07-27 12:19:39 +00:00
':' {
2012-05-30 18:36:30 +00:00
bump(rdr);
if rdr.curr == ':' {
2012-05-30 18:36:30 +00:00
bump(rdr);
2011-07-27 12:19:39 +00:00
ret token::MOD_SEP;
} else { ret token::COLON; }
}
2012-05-30 18:36:30 +00:00
'$' { bump(rdr); ret token::DOLLAR; }
2011-09-02 22:34:58 +00:00
2011-09-12 10:39:38 +00:00
2011-07-27 12:19:39 +00:00
// Multi-byte tokens.
'=' {
2012-05-30 18:36:30 +00:00
bump(rdr);
if rdr.curr == '=' {
2012-05-30 18:36:30 +00:00
bump(rdr);
2011-07-27 12:19:39 +00:00
ret token::EQEQ;
} else if rdr.curr == '>' {
2012-05-30 18:36:30 +00:00
bump(rdr);
ret token::FAT_ARROW;
} else {
ret token::EQ;
}
2011-07-27 12:19:39 +00:00
}
'!' {
2012-05-30 18:36:30 +00:00
bump(rdr);
if rdr.curr == '=' {
2012-05-30 18:36:30 +00:00
bump(rdr);
2011-07-27 12:19:39 +00:00
ret token::NE;
} else { ret token::NOT; }
}
'<' {
2012-05-30 18:36:30 +00:00
bump(rdr);
alt rdr.curr {
2012-05-30 18:36:30 +00:00
'=' { bump(rdr); ret token::LE; }
'<' { ret binop(rdr, token::SHL); }
2011-07-27 12:19:39 +00:00
'-' {
2012-05-30 18:36:30 +00:00
bump(rdr);
alt rdr.curr {
2012-05-30 18:36:30 +00:00
'>' { bump(rdr); ret token::DARROW; }
2011-07-27 12:19:39 +00:00
_ { ret token::LARROW; }
}
2011-07-27 12:19:39 +00:00
}
_ { ret token::LT; }
}
2011-07-27 12:19:39 +00:00
}
'>' {
2012-05-30 18:36:30 +00:00
bump(rdr);
alt rdr.curr {
2012-05-30 18:36:30 +00:00
'=' { bump(rdr); ret token::GE; }
'>' { ret binop(rdr, token::SHR); }
2011-07-27 12:19:39 +00:00
_ { ret token::GT; }
}
2011-07-27 12:19:39 +00:00
}
'\'' {
2012-05-30 18:36:30 +00:00
bump(rdr);
let mut c2 = rdr.curr;
2012-05-30 18:36:30 +00:00
bump(rdr);
2011-07-27 12:19:39 +00:00
if c2 == '\\' {
let escaped = rdr.curr;
2012-05-30 18:36:30 +00:00
bump(rdr);
2011-07-27 12:19:39 +00:00
alt escaped {
'n' { c2 = '\n'; }
'r' { c2 = '\r'; }
't' { c2 = '\t'; }
'\\' { c2 = '\\'; }
'\'' { c2 = '\''; }
'"' { c2 = '"'; }
2011-07-27 12:19:39 +00:00
'x' { c2 = scan_numeric_escape(rdr, 2u); }
'u' { c2 = scan_numeric_escape(rdr, 4u); }
'U' { c2 = scan_numeric_escape(rdr, 8u); }
c2 {
rdr.fatal(#fmt["unknown character escape: %d", c2 as int]);
2011-07-27 12:19:39 +00:00
}
}
}
if rdr.curr != '\'' {
rdr.fatal("unterminated character constant");
2011-07-27 12:19:39 +00:00
}
2012-05-30 18:36:30 +00:00
bump(rdr); // advance curr past token
ret token::LIT_INT(c2 as i64, ast::ty_char);
2011-07-27 12:19:39 +00:00
}
'"' {
let n = rdr.chpos;
2012-05-30 18:36:30 +00:00
bump(rdr);
while rdr.curr != '"' {
2012-05-30 18:36:30 +00:00
if is_eof(rdr) {
rdr.fatal(#fmt["unterminated double quote string: %s",
2012-05-30 18:36:30 +00:00
get_str_from(rdr, n)]);
}
let ch = rdr.curr;
2012-05-30 18:36:30 +00:00
bump(rdr);
2011-07-27 12:19:39 +00:00
alt ch {
'\\' {
let escaped = rdr.curr;
2012-05-30 18:36:30 +00:00
bump(rdr);
2011-07-27 12:19:39 +00:00
alt escaped {
'n' { str::push_char(accum_str, '\n'); }
'r' { str::push_char(accum_str, '\r'); }
't' { str::push_char(accum_str, '\t'); }
'\\' { str::push_char(accum_str, '\\'); }
'"' { str::push_char(accum_str, '"'); }
2011-07-27 12:19:39 +00:00
'\n' { consume_whitespace(rdr); }
'x' {
str::push_char(accum_str, scan_numeric_escape(rdr, 2u));
2011-07-27 12:19:39 +00:00
}
'u' {
str::push_char(accum_str, scan_numeric_escape(rdr, 4u));
2011-07-27 12:19:39 +00:00
}
'U' {
str::push_char(accum_str, scan_numeric_escape(rdr, 8u));
2011-07-27 12:19:39 +00:00
}
c2 {
rdr.fatal(#fmt["unknown string escape: %d", c2 as int]);
2011-07-27 12:19:39 +00:00
}
}
2011-07-27 12:19:39 +00:00
}
_ { str::push_char(accum_str, ch); }
}
}
2012-05-30 18:36:30 +00:00
bump(rdr);
2012-06-09 07:53:34 +00:00
ret token::LIT_STR(interner::intern(*rdr.interner,
@accum_str));
2011-07-27 12:19:39 +00:00
}
'-' {
2012-05-30 18:36:30 +00:00
if nextch(rdr) == '>' {
bump(rdr);
bump(rdr);
2011-07-27 12:19:39 +00:00
ret token::RARROW;
} else { ret binop(rdr, token::MINUS); }
}
'&' {
2012-05-30 18:36:30 +00:00
if nextch(rdr) == '&' {
bump(rdr);
bump(rdr);
2011-07-27 12:19:39 +00:00
ret token::ANDAND;
} else { ret binop(rdr, token::AND); }
}
'|' {
2012-05-30 18:36:30 +00:00
alt nextch(rdr) {
'|' { bump(rdr); bump(rdr); ret token::OROR; }
2011-07-27 12:19:39 +00:00
_ { ret binop(rdr, token::OR); }
}
}
'+' { ret binop(rdr, token::PLUS); }
'*' { ret binop(rdr, token::STAR); }
'/' { ret binop(rdr, token::SLASH); }
'^' { ret binop(rdr, token::CARET); }
'%' { ret binop(rdr, token::PERCENT); }
2012-03-26 19:53:06 +00:00
c { rdr.fatal(#fmt["unknown start of token: %d", c as int]); }
}
}
2012-05-30 18:36:30 +00:00
fn consume_whitespace(rdr: string_reader) {
while is_whitespace(rdr.curr) && !is_eof(rdr) { bump(rdr); }
}
//
// Local Variables:
// mode: rust
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End:
//