// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. use ast; use codemap::{BytePos, CharPos, CodeMap, Pos, Span}; use codemap; use diagnostic::span_handler; use ext::tt::transcribe::{tt_next_token}; use ext::tt::transcribe::{dup_tt_reader}; use parse::token; use parse::token::{str_to_ident}; use std::cast::transmute; use std::char; use std::either; use std::num::from_str_radix; use std::util; pub use ext::tt::transcribe::{TtReader, new_tt_reader}; pub trait reader { fn is_eof(@mut self) -> bool; fn next_token(@mut self) -> TokenAndSpan; fn fatal(@mut self, ~str) -> !; fn span_diag(@mut self) -> @mut span_handler; fn peek(@mut self) -> TokenAndSpan; fn dup(@mut self) -> @mut reader; } #[deriving(Clone, Eq)] pub struct TokenAndSpan { tok: token::Token, sp: Span, } pub struct StringReader { span_diagnostic: @mut span_handler, src: @str, // The absolute offset within the codemap of the next character to read pos: BytePos, // The absolute offset within the codemap of the last character read(curr) last_pos: BytePos, // The column of the next character to read col: CharPos, // The last character to be read curr: char, filemap: @codemap::FileMap, /* cached: */ peek_tok: token::Token, peek_span: Span } pub fn new_string_reader(span_diagnostic: @mut span_handler, filemap: @codemap::FileMap) -> @mut StringReader { let r = new_low_level_string_reader(span_diagnostic, filemap); string_advance_token(r); /* fill in peek_* */ return r; } /* For comments.rs, which hackily pokes into 'pos' and 'curr' */ pub fn new_low_level_string_reader(span_diagnostic: @mut span_handler, filemap: @codemap::FileMap) -> @mut StringReader { // Force the initial reader bump to start on a fresh line let initial_char = '\n'; let r = @mut StringReader { span_diagnostic: span_diagnostic, src: filemap.src, pos: filemap.start_pos, last_pos: filemap.start_pos, col: CharPos(0), curr: initial_char, filemap: filemap, /* dummy values; not read */ peek_tok: token::EOF, peek_span: codemap::dummy_sp() }; bump(r); return r; } // duplicating the string reader is probably a bad idea, in // that using them will cause interleaved pushes of line // offsets to the underlying filemap... fn dup_string_reader(r: @mut StringReader) -> @mut StringReader { @mut StringReader { span_diagnostic: r.span_diagnostic, src: r.src, pos: r.pos, last_pos: r.last_pos, col: r.col, curr: r.curr, filemap: r.filemap, peek_tok: r.peek_tok.clone(), peek_span: r.peek_span } } impl reader for StringReader { fn is_eof(@mut self) -> bool { is_eof(self) } // return the next token. EFFECT: advances the string_reader. fn next_token(@mut self) -> TokenAndSpan { let ret_val = TokenAndSpan { tok: util::replace(&mut self.peek_tok, token::UNDERSCORE), sp: self.peek_span, }; string_advance_token(self); ret_val } fn fatal(@mut self, m: ~str) -> ! { self.span_diagnostic.span_fatal(self.peek_span, m) } fn span_diag(@mut self) -> @mut span_handler { self.span_diagnostic } fn peek(@mut self) -> TokenAndSpan { // XXX(pcwalton): Bad copy! TokenAndSpan { tok: self.peek_tok.clone(), sp: self.peek_span, } } fn dup(@mut self) -> @mut reader { dup_string_reader(self) as @mut reader } } impl reader for TtReader { fn is_eof(@mut self) -> bool { self.cur_tok == token::EOF } fn next_token(@mut self) -> TokenAndSpan { let r = tt_next_token(self); debug2!("TtReader: r={:?}", r); return r; } fn fatal(@mut self, m: ~str) -> ! { self.sp_diag.span_fatal(self.cur_span, m); } fn span_diag(@mut self) -> @mut span_handler { self.sp_diag } fn peek(@mut self) -> TokenAndSpan { TokenAndSpan { tok: self.cur_tok.clone(), sp: self.cur_span, } } fn dup(@mut self) -> @mut reader { dup_tt_reader(self) as @mut reader } } // report a lexical error spanning [`from_pos`, `to_pos`) fn fatal_span(rdr: @mut StringReader, from_pos: BytePos, to_pos: BytePos, m: ~str) -> ! { rdr.peek_span = codemap::mk_sp(from_pos, to_pos); rdr.fatal(m); } // report a lexical error spanning [`from_pos`, `to_pos`), appending an // escaped character to the error message fn fatal_span_char(rdr: @mut StringReader, from_pos: BytePos, to_pos: BytePos, m: ~str, c: char) -> ! { let mut m = m; m.push_str(": "); char::escape_default(c, |c| m.push_char(c)); fatal_span(rdr, from_pos, to_pos, m); } // report a lexical error spanning [`from_pos`, `to_pos`), appending the // offending string to the error message fn fatal_span_verbose(rdr: @mut StringReader, from_pos: BytePos, to_pos: BytePos, m: ~str) -> ! { let mut m = m; m.push_str(": "); let s = rdr.src.slice( byte_offset(rdr, from_pos).to_uint(), byte_offset(rdr, to_pos).to_uint()); m.push_str(s); fatal_span(rdr, from_pos, to_pos, m); } // EFFECT: advance peek_tok and peek_span to refer to the next token. // EFFECT: update the interner, maybe. fn string_advance_token(r: @mut StringReader) { match (consume_whitespace_and_comments(r)) { Some(comment) => { r.peek_span = comment.sp; r.peek_tok = comment.tok; }, None => { if is_eof(r) { r.peek_tok = token::EOF; } else { let start_bytepos = r.last_pos; r.peek_tok = next_token_inner(r); r.peek_span = codemap::mk_sp(start_bytepos, r.last_pos); }; } } } fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos { (pos - rdr.filemap.start_pos) } pub fn with_str_from(rdr: @mut StringReader, start: BytePos, f: &fn(s: &str) -> T) -> T { f(rdr.src.slice( byte_offset(rdr, start).to_uint(), byte_offset(rdr, rdr.last_pos).to_uint())) } // EFFECT: advance the StringReader by one character. If a newline is // discovered, add it to the FileMap's list of line start offsets. pub fn bump(rdr: &mut StringReader) { rdr.last_pos = rdr.pos; let current_byte_offset = byte_offset(rdr, rdr.pos).to_uint(); if current_byte_offset < (rdr.src).len() { assert!(rdr.curr != unsafe { transmute(-1u32) }); // FIXME: #8971: unsound let last_char = rdr.curr; let next = rdr.src.char_range_at(current_byte_offset); let byte_offset_diff = next.next - current_byte_offset; rdr.pos = rdr.pos + BytePos(byte_offset_diff); rdr.curr = next.ch; rdr.col = rdr.col + CharPos(1u); if last_char == '\n' { rdr.filemap.next_line(rdr.last_pos); rdr.col = CharPos(0u); } if byte_offset_diff > 1 { rdr.filemap.record_multibyte_char( BytePos(current_byte_offset), byte_offset_diff); } } else { rdr.curr = unsafe { transmute(-1u32) }; // FIXME: #8971: unsound } } pub fn is_eof(rdr: @mut StringReader) -> bool { rdr.curr == unsafe { transmute(-1u32) } // FIXME: #8971: unsound } pub fn nextch(rdr: @mut StringReader) -> char { let offset = byte_offset(rdr, rdr.pos).to_uint(); if offset < (rdr.src).len() { return rdr.src.char_at(offset); } else { return unsafe { transmute(-1u32) }; } // FIXME: #8971: unsound } fn dec_digit_val(c: char) -> int { return (c as int) - ('0' as int); } fn hex_digit_val(c: char) -> int { if in_range(c, '0', '9') { return (c as int) - ('0' as int); } if in_range(c, 'a', 'f') { return (c as int) - ('a' as int) + 10; } if in_range(c, 'A', 'F') { return (c as int) - ('A' as int) + 10; } fail2!(); } fn bin_digit_value(c: char) -> int { if c == '0' { return 0; } return 1; } pub fn is_whitespace(c: char) -> bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } fn in_range(c: char, lo: char, hi: char) -> bool { return lo <= c && c <= hi } fn is_dec_digit(c: char) -> bool { return in_range(c, '0', '9'); } fn is_hex_digit(c: char) -> bool { return in_range(c, '0', '9') || in_range(c, 'a', 'f') || in_range(c, 'A', 'F'); } fn is_bin_digit(c: char) -> bool { return c == '0' || c == '1'; } // EFFECT: eats whitespace and comments. // returns a Some(sugared-doc-attr) if one exists, None otherwise. fn consume_whitespace_and_comments(rdr: @mut StringReader) -> Option { while is_whitespace(rdr.curr) { bump(rdr); } return consume_any_line_comment(rdr); } pub fn is_line_non_doc_comment(s: &str) -> bool { let s = s.trim_right(); s.len() > 3 && s.iter().all(|ch| ch == '/') } // PRECONDITION: rdr.curr is not whitespace // EFFECT: eats any kind of comment. // returns a Some(sugared-doc-attr) if one exists, None otherwise fn consume_any_line_comment(rdr: @mut StringReader) -> Option { if rdr.curr == '/' { match nextch(rdr) { '/' => { bump(rdr); bump(rdr); // line comments starting with "///" or "//!" are doc-comments if rdr.curr == '/' || rdr.curr == '!' { let start_bpos = rdr.pos - BytePos(3u); while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } let ret = do with_str_from(rdr, start_bpos) |string| { // but comments with only more "/"s are not if !is_line_non_doc_comment(string) { Some(TokenAndSpan{ tok: token::DOC_COMMENT(str_to_ident(string)), sp: codemap::mk_sp(start_bpos, rdr.pos) }) } else { None } }; if ret.is_some() { return ret; } } else { while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } } // Restart whitespace munch. return consume_whitespace_and_comments(rdr); } '*' => { bump(rdr); bump(rdr); return consume_block_comment(rdr); } _ => () } } else if rdr.curr == '#' { if nextch(rdr) == '!' { // I guess this is the only way to figure out if // we're at the beginning of the file... let cmap = @CodeMap::new(); (*cmap).files.push(rdr.filemap); let loc = cmap.lookup_char_pos_adj(rdr.last_pos); if loc.line == 1u && loc.col == CharPos(0u) { while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } return consume_whitespace_and_comments(rdr); } } } return None; } pub fn is_block_non_doc_comment(s: &str) -> bool { assert!(s.len() >= 1u); s.slice(1u, s.len() - 1u).iter().all(|ch| ch == '*') } // might return a sugared-doc-attr fn consume_block_comment(rdr: @mut StringReader) -> Option { // block comments starting with "/**" or "/*!" are doc-comments let res = if rdr.curr == '*' || rdr.curr == '!' { let start_bpos = rdr.pos - BytePos(3u); while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) { bump(rdr); } if is_eof(rdr) { fatal_span(rdr, start_bpos, rdr.last_pos, ~"unterminated block doc-comment"); } else { bump(rdr); bump(rdr); do with_str_from(rdr, start_bpos) |string| { // but comments with only "*"s between two "/"s are not if !is_block_non_doc_comment(string) { Some(TokenAndSpan{ tok: token::DOC_COMMENT(str_to_ident(string)), sp: codemap::mk_sp(start_bpos, rdr.pos) }) } else { None } } } } else { let start_bpos = rdr.last_pos - BytePos(2u); loop { if is_eof(rdr) { fatal_span(rdr, start_bpos, rdr.last_pos, ~"unterminated block comment"); } if rdr.curr == '*' && nextch(rdr) == '/' { bump(rdr); bump(rdr); break; } else { bump(rdr); } } None }; // restart whitespace munch. if res.is_some() { res } else { consume_whitespace_and_comments(rdr) } } fn scan_exponent(rdr: @mut StringReader, start_bpos: BytePos) -> Option<~str> { let mut c = rdr.curr; let mut rslt = ~""; if c == 'e' || c == 'E' { rslt.push_char(c); bump(rdr); c = rdr.curr; if c == '-' || c == '+' { rslt.push_char(c); bump(rdr); } let exponent = scan_digits(rdr, 10u); if exponent.len() > 0u { return Some(rslt + exponent); } else { fatal_span(rdr, start_bpos, rdr.last_pos, ~"scan_exponent: bad fp literal"); } } else { return None::<~str>; } } fn scan_digits(rdr: @mut StringReader, radix: uint) -> ~str { let mut rslt = ~""; loop { let c = rdr.curr; if c == '_' { bump(rdr); loop; } match char::to_digit(c, radix) { Some(_) => { rslt.push_char(c); bump(rdr); } _ => return rslt } }; } fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { let mut num_str; let mut base = 10u; let mut c = c; let mut n = nextch(rdr); let start_bpos = rdr.last_pos; if c == '0' && n == 'x' { bump(rdr); bump(rdr); base = 16u; } else if c == '0' && n == 'b' { bump(rdr); bump(rdr); base = 2u; } num_str = scan_digits(rdr, base); c = rdr.curr; nextch(rdr); if c == 'u' || c == 'i' { let signed = c == 'i'; let mut tp = { if signed { either::Left(ast::ty_i) } else { either::Right(ast::ty_u) } }; bump(rdr); c = rdr.curr; if c == '8' { bump(rdr); tp = if signed { either::Left(ast::ty_i8) } else { either::Right(ast::ty_u8) }; } n = nextch(rdr); if c == '1' && n == '6' { bump(rdr); bump(rdr); tp = if signed { either::Left(ast::ty_i16) } else { either::Right(ast::ty_u16) }; } else if c == '3' && n == '2' { bump(rdr); bump(rdr); tp = if signed { either::Left(ast::ty_i32) } else { either::Right(ast::ty_u32) }; } else if c == '6' && n == '4' { bump(rdr); bump(rdr); tp = if signed { either::Left(ast::ty_i64) } else { either::Right(ast::ty_u64) }; } if num_str.len() == 0u { fatal_span(rdr, start_bpos, rdr.last_pos, ~"no valid digits found for number"); } let parsed = match from_str_radix::(num_str, base as uint) { Some(p) => p, None => fatal_span(rdr, start_bpos, rdr.last_pos, ~"int literal is too large") }; match tp { either::Left(t) => return token::LIT_INT(parsed as i64, t), either::Right(t) => return token::LIT_UINT(parsed, t) } } let mut is_float = false; if rdr.curr == '.' && !(ident_start(nextch(rdr)) || nextch(rdr) == '.') { is_float = true; bump(rdr); let dec_part = scan_digits(rdr, 10u); num_str.push_char('.'); num_str.push_str(dec_part); } if is_float { match base { 16u => fatal_span(rdr, start_bpos, rdr.last_pos, ~"hexadecimal float literal is not supported"), 2u => fatal_span(rdr, start_bpos, rdr.last_pos, ~"binary float literal is not supported"), _ => () } } match scan_exponent(rdr, start_bpos) { Some(ref s) => { is_float = true; num_str.push_str(*s); } None => () } let mut is_machine_float = false; if rdr.curr == 'f' { bump(rdr); c = rdr.curr; n = nextch(rdr); if c == '3' && n == '2' { bump(rdr); bump(rdr); return token::LIT_FLOAT(str_to_ident(num_str), ast::ty_f32); } else if c == '6' && n == '4' { bump(rdr); bump(rdr); return token::LIT_FLOAT(str_to_ident(num_str), ast::ty_f64); /* FIXME (#2252): if this is out of range for either a 32-bit or 64-bit float, it won't be noticed till the back-end. */ } else { is_float = true; is_machine_float = true; } } if is_float { if is_machine_float { return token::LIT_FLOAT(str_to_ident(num_str), ast::ty_f); } return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str)); } else { if num_str.len() == 0u { fatal_span(rdr, start_bpos, rdr.last_pos, ~"no valid digits found for number"); } let parsed = match from_str_radix::(num_str, base as uint) { Some(p) => p, None => fatal_span(rdr, start_bpos, rdr.last_pos, ~"int literal is too large") }; debug2!("lexing {} as an unsuffixed integer literal", num_str); return token::LIT_INT_UNSUFFIXED(parsed as i64); } } fn scan_numeric_escape(rdr: @mut StringReader, n_hex_digits: uint) -> char { let mut accum_int = 0; let mut i = n_hex_digits; let start_bpos = rdr.last_pos; while i != 0u { let n = rdr.curr; if !is_hex_digit(n) { fatal_span_char(rdr, rdr.last_pos, rdr.pos, ~"illegal character in numeric character escape", n); } bump(rdr); accum_int *= 16; accum_int += hex_digit_val(n); i -= 1u; } match char::from_u32(accum_int as u32) { Some(x) => x, None => fatal_span(rdr, start_bpos, rdr.last_pos, ~"illegal numeric character escape") } } fn ident_start(c: char) -> bool { (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && char::is_XID_start(c)) } fn ident_continue(c: char) -> bool { (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || (c > '\x7f' && char::is_XID_continue(c)) } // return the next token from the string // EFFECT: advances the input past that token // EFFECT: updates the interner fn next_token_inner(rdr: @mut StringReader) -> token::Token { let c = rdr.curr; if ident_start(c) { let start = rdr.last_pos; while ident_continue(rdr.curr) { bump(rdr); } return do with_str_from(rdr, start) |string| { if string == "_" { token::UNDERSCORE } else { let is_mod_name = rdr.curr == ':' && nextch(rdr) == ':'; // FIXME: perform NFKC normalization here. (Issue #2253) token::IDENT(str_to_ident(string), is_mod_name) } } } if is_dec_digit(c) { return scan_number(c, rdr); } fn binop(rdr: @mut StringReader, op: token::binop) -> token::Token { bump(rdr); if rdr.curr == '=' { bump(rdr); return token::BINOPEQ(op); } else { return token::BINOP(op); } } match c { // One-byte tokens. ';' => { bump(rdr); return token::SEMI; } ',' => { bump(rdr); return token::COMMA; } '.' => { bump(rdr); if rdr.curr == '.' && nextch(rdr) != '.' { bump(rdr); return token::DOTDOT; } return token::DOT; } '(' => { bump(rdr); return token::LPAREN; } ')' => { bump(rdr); return token::RPAREN; } '{' => { bump(rdr); return token::LBRACE; } '}' => { bump(rdr); return token::RBRACE; } '[' => { bump(rdr); return token::LBRACKET; } ']' => { bump(rdr); return token::RBRACKET; } '@' => { bump(rdr); return token::AT; } '#' => { bump(rdr); return token::POUND; } '~' => { bump(rdr); return token::TILDE; } ':' => { bump(rdr); if rdr.curr == ':' { bump(rdr); return token::MOD_SEP; } else { return token::COLON; } } '$' => { bump(rdr); return token::DOLLAR; } // Multi-byte tokens. '=' => { bump(rdr); if rdr.curr == '=' { bump(rdr); return token::EQEQ; } else if rdr.curr == '>' { bump(rdr); return token::FAT_ARROW; } else { return token::EQ; } } '!' => { bump(rdr); if rdr.curr == '=' { bump(rdr); return token::NE; } else { return token::NOT; } } '<' => { bump(rdr); match rdr.curr { '=' => { bump(rdr); return token::LE; } '<' => { return binop(rdr, token::SHL); } '-' => { bump(rdr); match rdr.curr { '>' => { bump(rdr); return token::DARROW; } _ => { return token::LARROW; } } } _ => { return token::LT; } } } '>' => { bump(rdr); match rdr.curr { '=' => { bump(rdr); return token::GE; } '>' => { return binop(rdr, token::SHR); } _ => { return token::GT; } } } '\'' => { // Either a character constant 'a' OR a lifetime name 'abc bump(rdr); let start = rdr.last_pos; let mut c2 = rdr.curr; bump(rdr); // If the character is an ident start not followed by another single // quote, then this is a lifetime name: if ident_start(c2) && rdr.curr != '\'' { while ident_continue(rdr.curr) { bump(rdr); } return do with_str_from(rdr, start) |lifetime_name| { token::LIFETIME(str_to_ident(lifetime_name)) } } // Otherwise it is a character constant: match c2 { '\\' => { // '\X' for some X must be a character constant: let escaped = rdr.curr; let escaped_pos = rdr.last_pos; bump(rdr); match escaped { 'n' => { c2 = '\n'; } 'r' => { c2 = '\r'; } 't' => { c2 = '\t'; } '\\' => { c2 = '\\'; } '\'' => { c2 = '\''; } '"' => { c2 = '"'; } '0' => { c2 = '\x00'; } 'x' => { c2 = scan_numeric_escape(rdr, 2u); } 'u' => { c2 = scan_numeric_escape(rdr, 4u); } 'U' => { c2 = scan_numeric_escape(rdr, 8u); } c2 => { fatal_span_char(rdr, escaped_pos, rdr.last_pos, ~"unknown character escape", c2); } } } '\t' | '\n' | '\r' | '\'' => { fatal_span_char(rdr, start, rdr.last_pos, ~"character constant must be escaped", c2); } _ => {} } if rdr.curr != '\'' { fatal_span_verbose(rdr, // Byte offsetting here is okay because the // character before position `start` is an // ascii single quote. start - BytePos(1u), rdr.last_pos, ~"unterminated character constant"); } bump(rdr); // advance curr past token return token::LIT_CHAR(c2 as u32); } '"' => { let mut accum_str = ~""; let start_bpos = rdr.last_pos; bump(rdr); while rdr.curr != '"' { if is_eof(rdr) { fatal_span(rdr, start_bpos, rdr.last_pos, ~"unterminated double quote string"); } let ch = rdr.curr; bump(rdr); match ch { '\\' => { let escaped = rdr.curr; let escaped_pos = rdr.last_pos; bump(rdr); match escaped { 'n' => accum_str.push_char('\n'), 'r' => accum_str.push_char('\r'), 't' => accum_str.push_char('\t'), '\\' => accum_str.push_char('\\'), '\'' => accum_str.push_char('\''), '"' => accum_str.push_char('"'), '\n' => consume_whitespace(rdr), '0' => accum_str.push_char('\x00'), 'x' => { accum_str.push_char(scan_numeric_escape(rdr, 2u)); } 'u' => { accum_str.push_char(scan_numeric_escape(rdr, 4u)); } 'U' => { accum_str.push_char(scan_numeric_escape(rdr, 8u)); } c2 => { fatal_span_char(rdr, escaped_pos, rdr.last_pos, ~"unknown string escape", c2); } } } _ => accum_str.push_char(ch) } } bump(rdr); return token::LIT_STR(str_to_ident(accum_str)); } '-' => { if nextch(rdr) == '>' { bump(rdr); bump(rdr); return token::RARROW; } else { return binop(rdr, token::MINUS); } } '&' => { if nextch(rdr) == '&' { bump(rdr); bump(rdr); return token::ANDAND; } else { return binop(rdr, token::AND); } } '|' => { match nextch(rdr) { '|' => { bump(rdr); bump(rdr); return token::OROR; } _ => { return binop(rdr, token::OR); } } } '+' => { return binop(rdr, token::PLUS); } '*' => { return binop(rdr, token::STAR); } '/' => { return binop(rdr, token::SLASH); } '^' => { return binop(rdr, token::CARET); } '%' => { return binop(rdr, token::PERCENT); } c => { fatal_span_char(rdr, rdr.last_pos, rdr.pos, ~"unknown start of token", c); } } } fn consume_whitespace(rdr: @mut StringReader) { while is_whitespace(rdr.curr) && !is_eof(rdr) { bump(rdr); } } #[cfg(test)] mod test { use super::*; use codemap::{BytePos, CodeMap, Span}; use diagnostic; use parse::token; use parse::token::{str_to_ident}; // represents a testing reader (incl. both reader and interner) struct Env { string_reader: @mut StringReader } // open a string reader for the given string fn setup(teststr: @str) -> Env { let cm = CodeMap::new(); let fm = cm.new_filemap(@"zebra.rs", teststr); let span_handler = diagnostic::mk_span_handler(diagnostic::mk_handler(None),@cm); Env { string_reader: new_string_reader(span_handler,fm) } } #[test] fn t1 () { let Env {string_reader} = setup(@"/* my source file */ \ fn main() { io::println(~\"zebra\"); }\n"); let id = str_to_ident("fn"); let tok1 = string_reader.next_token(); let tok2 = TokenAndSpan{ tok:token::IDENT(id, false), sp:Span {lo:BytePos(21),hi:BytePos(23),expn_info: None}}; assert_eq!(tok1,tok2); // the 'main' id is already read: assert_eq!(string_reader.last_pos.clone(), BytePos(28)); // read another token: let tok3 = string_reader.next_token(); let tok4 = TokenAndSpan{ tok:token::IDENT(str_to_ident("main"), false), sp:Span {lo:BytePos(24),hi:BytePos(28),expn_info: None}}; assert_eq!(tok3,tok4); // the lparen is already read: assert_eq!(string_reader.last_pos.clone(), BytePos(29)) } // check that the given reader produces the desired stream // of tokens (stop checking after exhausting the expected vec) fn check_tokenization (env: Env, expected: ~[token::Token]) { for expected_tok in expected.iter() { let TokenAndSpan {tok:actual_tok, sp: _} = env.string_reader.next_token(); assert_eq!(&actual_tok,expected_tok); } } // make the identifier by looking up the string in the interner fn mk_ident (id: &str, is_mod_name: bool) -> token::Token { token::IDENT (str_to_ident(id),is_mod_name) } #[test] fn doublecolonparsing () { let env = setup (@"a b"); check_tokenization (env, ~[mk_ident("a",false), mk_ident("b",false)]); } #[test] fn dcparsing_2 () { let env = setup (@"a::b"); check_tokenization (env, ~[mk_ident("a",true), token::MOD_SEP, mk_ident("b",false)]); } #[test] fn dcparsing_3 () { let env = setup (@"a ::b"); check_tokenization (env, ~[mk_ident("a",false), token::MOD_SEP, mk_ident("b",false)]); } #[test] fn dcparsing_4 () { let env = setup (@"a:: b"); check_tokenization (env, ~[mk_ident("a",true), token::MOD_SEP, mk_ident("b",false)]); } #[test] fn character_a() { let env = setup(@"'a'"); let TokenAndSpan {tok, sp: _} = env.string_reader.next_token(); assert_eq!(tok,token::LIT_CHAR('a' as u32)); } #[test] fn character_space() { let env = setup(@"' '"); let TokenAndSpan {tok, sp: _} = env.string_reader.next_token(); assert_eq!(tok, token::LIT_CHAR(' ' as u32)); } #[test] fn character_escaped() { let env = setup(@"'\\n'"); let TokenAndSpan {tok, sp: _} = env.string_reader.next_token(); assert_eq!(tok, token::LIT_CHAR('\n' as u32)); } #[test] fn lifetime_name() { let env = setup(@"'abc"); let TokenAndSpan {tok, sp: _} = env.string_reader.next_token(); let id = token::str_to_ident("abc"); assert_eq!(tok, token::LIFETIME(id)); } #[test] fn line_doc_comments() { assert!(!is_line_non_doc_comment("///")); assert!(!is_line_non_doc_comment("/// blah")); assert!(is_line_non_doc_comment("////")); } }