Pull out an interface for the lexer.

This commit is contained in:
Paul Stansifer 2012-05-30 11:36:30 -07:00
parent 473b1ec0a0
commit 32167f52b0
7 changed files with 285 additions and 247 deletions

View File

@ -374,13 +374,10 @@ enum blk_sort {
*/
#[auto_serialize]
type token_tree = spanned<token_tree_>;
#[auto_serialize]
enum token_tree_ {
enum token_tree {
/* for macro invocations; parsing is the macro's job */
tt_delim(token::token, [token_tree]),
tt_flat(token::token)
tt_delim([token_tree]),
tt_flat(uint, token::token)
}
#[auto_serialize]

View File

@ -4,6 +4,7 @@ import dvec::extensions;
export parse_sess;
export next_node_id;
export new_parser_from_file;
export new_parser_etc_from_file;
export new_parser_from_source_str;
export parse_crate_from_file;
export parse_crate_from_crate_file;
@ -17,7 +18,7 @@ import attr::parser_attr;
import common::parser_common;
import ast::node_id;
import util::interner;
import lexer::reader;
import lexer::{string_reader_as_reader, reader, string_reader};
type parse_sess = @{
cm: codemap::codemap,
@ -42,14 +43,15 @@ fn parse_crate_from_file(input: str, cfg: ast::crate_cfg, sess: parse_sess) ->
fn parse_crate_from_crate_file(input: str, cfg: ast::crate_cfg,
sess: parse_sess) -> @ast::crate {
let p = new_parser_from_file(sess, cfg, input, parser::CRATE_FILE);
let (p, rdr) = new_parser_etc_from_file(sess, cfg, input,
parser::CRATE_FILE);
let lo = p.span.lo;
let prefix = path::dirname(p.reader.filemap.name);
let prefix = path::dirname(input);
let leading_attrs = p.parse_inner_attrs_and_next();
let { inner: crate_attrs, next: first_cdir_attr } = leading_attrs;
let cdirs = p.parse_crate_directives(token::EOF, first_cdir_attr);
sess.chpos = p.reader.chpos;
sess.byte_pos = sess.byte_pos + p.reader.pos;
sess.chpos = rdr.chpos;
sess.byte_pos = sess.byte_pos + rdr.pos;
let cx = @{sess: sess, cfg: /* FIXME: bad */ copy p.cfg};
let (companionmod, _) = path::splitext(path::basename(input));
let (m, attrs) = eval::eval_crate_directives_to_mod(
@ -65,41 +67,42 @@ fn parse_crate_from_crate_file(input: str, cfg: ast::crate_cfg,
fn parse_crate_from_source_file(input: str, cfg: ast::crate_cfg,
sess: parse_sess) -> @ast::crate {
let p = new_parser_from_file(sess, cfg, input, parser::SOURCE_FILE);
let (p, rdr) = new_parser_etc_from_file(sess, cfg, input,
parser::SOURCE_FILE);
let r = p.parse_crate_mod(cfg);
sess.chpos = p.reader.chpos;
sess.byte_pos = sess.byte_pos + p.reader.pos;
sess.chpos = rdr.chpos;
sess.byte_pos = sess.byte_pos + rdr.pos;
ret r;
}
fn parse_crate_from_source_str(name: str, source: @str, cfg: ast::crate_cfg,
sess: parse_sess) -> @ast::crate {
let p = new_parser_from_source_str(
sess, cfg, name, codemap::fss_none, source);
let (p, rdr) = new_parser_etc_from_source_str(sess, cfg, name,
codemap::fss_none, source);
let r = p.parse_crate_mod(cfg);
sess.chpos = p.reader.chpos;
sess.byte_pos = sess.byte_pos + p.reader.pos;
sess.chpos = rdr.chpos;
sess.byte_pos = sess.byte_pos + rdr.pos;
ret r;
}
fn parse_expr_from_source_str(name: str, source: @str, cfg: ast::crate_cfg,
sess: parse_sess) -> @ast::expr {
let p = new_parser_from_source_str(
sess, cfg, name, codemap::fss_none, source);
let (p, rdr) = new_parser_etc_from_source_str(sess, cfg, name,
codemap::fss_none, source);
let r = p.parse_expr();
sess.chpos = p.reader.chpos;
sess.byte_pos = sess.byte_pos + p.reader.pos;
sess.chpos = rdr.chpos;
sess.byte_pos = sess.byte_pos + rdr.pos;
ret r;
}
fn parse_item_from_source_str(name: str, source: @str, cfg: ast::crate_cfg,
+attrs: [ast::attribute], vis: ast::visibility,
sess: parse_sess) -> option<@ast::item> {
let p = new_parser_from_source_str(
sess, cfg, name, codemap::fss_none, source);
let (p, rdr) = new_parser_etc_from_source_str(sess, cfg, name,
codemap::fss_none, source);
let r = p.parse_item(attrs, vis);
sess.chpos = p.reader.chpos;
sess.byte_pos = sess.byte_pos + p.reader.pos;
sess.chpos = rdr.chpos;
sess.byte_pos = sess.byte_pos + rdr.pos;
ret r;
}
@ -109,13 +112,14 @@ fn parse_from_source_str<T>(f: fn (p: parser) -> T,
sess: parse_sess)
-> T
{
let p = new_parser_from_source_str(sess, cfg, name, ss, source);
let (p, rdr) = new_parser_etc_from_source_str(sess, cfg, name, ss,
source);
let r = f(p);
if !p.reader.is_eof() {
p.reader.fatal("expected end-of-string");
}
sess.chpos = p.reader.chpos;
sess.byte_pos = sess.byte_pos + p.reader.pos;
sess.chpos = rdr.chpos;
sess.byte_pos = sess.byte_pos + rdr.pos;
ret r;
}
@ -127,9 +131,9 @@ fn next_node_id(sess: parse_sess) -> node_id {
ret rv;
}
fn new_parser_from_source_str(sess: parse_sess, cfg: ast::crate_cfg,
+name: str, +ss: codemap::file_substr,
source: @str) -> parser {
fn new_parser_etc_from_source_str(sess: parse_sess, cfg: ast::crate_cfg,
+name: str, +ss: codemap::file_substr,
source: @str) -> (parser, string_reader) {
let ftype = parser::SOURCE_FILE;
let filemap = codemap::new_filemap_w_substr
(name, ss, source, sess.chpos, sess.byte_pos);
@ -138,14 +142,21 @@ fn new_parser_from_source_str(sess: parse_sess, cfg: ast::crate_cfg,
{|x|str::hash(*x)},
{|x,y|str::eq(*x, *y)}
);
let rdr = lexer::new_reader(sess.span_diagnostic,
filemap, itr);
ret parser(sess, cfg, rdr, ftype);
let srdr = lexer::new_string_reader(sess.span_diagnostic, filemap, itr);
ret (parser(sess, cfg, srdr as reader, ftype), srdr);
}
fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str,
ftype: parser::file_type) ->
parser {
fn new_parser_from_source_str(sess: parse_sess, cfg: ast::crate_cfg,
+name: str, +ss: codemap::file_substr,
source: @str) -> parser {
let (p, _) = new_parser_etc_from_source_str(sess, cfg, name, ss, source);
ret p;
}
fn new_parser_etc_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str,
ftype: parser::file_type) ->
(parser, string_reader) {
let res = io::read_whole_file_str(path);
alt res {
result::ok(_) { /* Continue. */ }
@ -158,6 +169,12 @@ fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str,
{|x|str::hash(*x)},
{|x,y|str::eq(*x, *y)}
);
let rdr = lexer::new_reader(sess.span_diagnostic, filemap, itr);
ret parser(sess, cfg, rdr, ftype);
let srdr = lexer::new_string_reader(sess.span_diagnostic, filemap, itr);
ret (parser(sess, cfg, srdr as reader, ftype), srdr);
}
fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str,
ftype: parser::file_type) -> parser {
let (p, _) = new_parser_etc_from_file(sess, cfg, path, ftype);
ret p;
}

View File

@ -1,7 +1,8 @@
import io::reader_util;
import io::println;//XXXXXXXXxxx
import util::interner;
import lexer::{ reader, new_reader, next_token, is_whitespace };
import lexer::{ string_reader, bump, is_eof, nextch, new_string_reader,
is_whitespace, get_str_from, string_reader_as_reader };
export cmnt;
export lit;
@ -17,45 +18,46 @@ enum cmnt_style {
type cmnt = {style: cmnt_style, lines: [str], pos: uint};
fn read_to_eol(rdr: reader) -> str {
fn read_to_eol(rdr: string_reader) -> str {
let mut val = "";
while rdr.curr != '\n' && !rdr.is_eof() {
while rdr.curr != '\n' && !is_eof(rdr) {
str::push_char(val, rdr.curr);
rdr.bump();
bump(rdr);
}
if rdr.curr == '\n' { rdr.bump(); }
if rdr.curr == '\n' { bump(rdr); }
ret val;
}
fn read_one_line_comment(rdr: reader) -> str {
fn read_one_line_comment(rdr: string_reader) -> str {
let val = read_to_eol(rdr);
assert ((val[0] == '/' as u8 && val[1] == '/' as u8) ||
(val[0] == '#' as u8 && val[1] == '!' as u8));
ret val;
}
fn consume_non_eol_whitespace(rdr: reader) {
while is_whitespace(rdr.curr) && rdr.curr != '\n' && !rdr.is_eof() {
rdr.bump();
fn consume_non_eol_whitespace(rdr: string_reader) {
while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) {
bump(rdr);
}
}
fn push_blank_line_comment(rdr: reader, &comments: [cmnt]) {
fn push_blank_line_comment(rdr: string_reader, &comments: [cmnt]) {
#debug(">>> blank-line comment");
let v: [str] = [];
comments += [{style: blank_line, lines: v, pos: rdr.chpos}];
}
fn consume_whitespace_counting_blank_lines(rdr: reader, &comments: [cmnt]) {
while is_whitespace(rdr.curr) && !rdr.is_eof() {
fn consume_whitespace_counting_blank_lines(rdr: string_reader,
&comments: [cmnt]) {
while is_whitespace(rdr.curr) && !is_eof(rdr) {
if rdr.col == 0u && rdr.curr == '\n' {
push_blank_line_comment(rdr, comments);
}
rdr.bump();
bump(rdr);
}
}
fn read_shebang_comment(rdr: reader, code_to_the_left: bool) -> cmnt {
fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt {
#debug(">>> shebang comment");
let p = rdr.chpos;
#debug("<<< shebang comment");
@ -64,11 +66,11 @@ fn read_shebang_comment(rdr: reader, code_to_the_left: bool) -> cmnt {
pos: p};
}
fn read_line_comments(rdr: reader, code_to_the_left: bool) -> cmnt {
fn read_line_comments(rdr: string_reader, code_to_the_left: bool) -> cmnt {
#debug(">>> line comments");
let p = rdr.chpos;
let mut lines: [str] = [];
while rdr.curr == '/' && rdr.next() == '/' {
while rdr.curr == '/' && nextch(rdr) == '/' {
let line = read_one_line_comment(rdr);
log(debug, line);
lines += [line];
@ -99,36 +101,36 @@ fn trim_whitespace_prefix_and_push_line(&lines: [str],
lines += [s1];
}
fn read_block_comment(rdr: reader, code_to_the_left: bool) -> cmnt {
fn read_block_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt {
#debug(">>> block comment");
let p = rdr.chpos;
let mut lines: [str] = [];
let mut col: uint = rdr.col;
rdr.bump();
rdr.bump();
bump(rdr);
bump(rdr);
let mut curr_line = "/*";
let mut level: int = 1;
while level > 0 {
#debug("=== block comment level %d", level);
if rdr.is_eof() { rdr.fatal("unterminated block comment"); }
if is_eof(rdr) {(rdr as reader).fatal("unterminated block comment");}
if rdr.curr == '\n' {
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
curr_line = "";
rdr.bump();
bump(rdr);
} else {
str::push_char(curr_line, rdr.curr);
if rdr.curr == '/' && rdr.next() == '*' {
rdr.bump();
rdr.bump();
if rdr.curr == '/' && nextch(rdr) == '*' {
bump(rdr);
bump(rdr);
curr_line += "*";
level += 1;
} else {
if rdr.curr == '*' && rdr.next() == '/' {
rdr.bump();
rdr.bump();
if rdr.curr == '*' && nextch(rdr) == '/' {
bump(rdr);
bump(rdr);
curr_line += "/";
level -= 1;
} else { rdr.bump(); }
} else { bump(rdr); }
}
}
}
@ -137,26 +139,27 @@ fn read_block_comment(rdr: reader, code_to_the_left: bool) -> cmnt {
}
let mut style = if code_to_the_left { trailing } else { isolated };
consume_non_eol_whitespace(rdr);
if !rdr.is_eof() && rdr.curr != '\n' && vec::len(lines) == 1u {
if !is_eof(rdr) && rdr.curr != '\n' && vec::len(lines) == 1u {
style = mixed;
}
#debug("<<< block comment");
ret {style: style, lines: lines, pos: p};
}
fn peeking_at_comment(rdr: reader) -> bool {
ret ((rdr.curr == '/' && rdr.next() == '/') ||
(rdr.curr == '/' && rdr.next() == '*')) ||
(rdr.curr == '#' && rdr.next() == '!');
fn peeking_at_comment(rdr: string_reader) -> bool {
ret ((rdr.curr == '/' && nextch(rdr) == '/') ||
(rdr.curr == '/' && nextch(rdr) == '*')) ||
(rdr.curr == '#' && nextch(rdr) == '!');
}
fn consume_comment(rdr: reader, code_to_the_left: bool, &comments: [cmnt]) {
fn consume_comment(rdr: string_reader, code_to_the_left: bool,
&comments: [cmnt]) {
#debug(">>> consume comment");
if rdr.curr == '/' && rdr.next() == '/' {
if rdr.curr == '/' && nextch(rdr) == '/' {
comments += [read_line_comments(rdr, code_to_the_left)];
} else if rdr.curr == '/' && rdr.next() == '*' {
} else if rdr.curr == '/' && nextch(rdr) == '*' {
comments += [read_block_comment(rdr, code_to_the_left)];
} else if rdr.curr == '#' && rdr.next() == '!' {
} else if rdr.curr == '#' && nextch(rdr) == '!' {
comments += [read_shebang_comment(rdr, code_to_the_left)];
} else { fail; }
#debug("<<< consume comment");
@ -173,12 +176,12 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
{|x|str::hash(*x)},
{|x,y|str::eq(*x, *y)}
);
let rdr = new_reader(span_diagnostic,
codemap::new_filemap(path, src, 0u, 0u), itr);
let rdr = new_string_reader(span_diagnostic,
codemap::new_filemap(path, src, 0u, 0u), itr);
let mut comments: [cmnt] = [];
let mut literals: [lit] = [];
let mut first_read: bool = true;
while !rdr.is_eof() {
while !is_eof(rdr) {
loop {
let mut code_to_the_left = !first_read;
consume_non_eol_whitespace(rdr);
@ -192,9 +195,10 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
}
break;
}
let tok = next_token(rdr);
let bpos = rdr.pos;
let tok = rdr.next_token();
if token::is_lit(tok.tok) {
let s = rdr.get_str_from(tok.bpos);
let s = get_str_from(rdr, bpos);
literals += [{lit: s, pos: tok.chpos}];
log(debug, "tok lit: " + s);
} else {

View File

@ -18,7 +18,7 @@ fn seq_sep_none() -> seq_sep {
}
fn token_to_str(reader: reader, ++token: token::token) -> str {
token::to_str(*reader.interner, token)
token::to_str(*reader.interner(), token)
}
// This should be done with traits, once traits work

View File

@ -64,11 +64,12 @@ fn parse_companion_mod(cx: ctx, prefix: str, suffix: option<str>)
#debug("looking for companion mod %s", modpath);
if file_exists(modpath) {
#debug("found companion mod");
let p0 = new_parser_from_file(cx.sess, cx.cfg, modpath, SOURCE_FILE);
let (p0, r0) = new_parser_etc_from_file(cx.sess, cx.cfg,
modpath, SOURCE_FILE);
let inner_attrs = p0.parse_inner_attrs_and_next();
let m0 = p0.parse_mod_items(token::EOF, inner_attrs.next);
cx.sess.chpos = p0.reader.chpos;
cx.sess.byte_pos = cx.sess.byte_pos + p0.reader.pos;
cx.sess.chpos = p0.reader.chpos();
cx.sess.byte_pos = cx.sess.byte_pos + r0.pos;
ret (m0.view_items, m0.items, inner_attrs.inner);
} else {
ret ([], [], []);
@ -94,8 +95,8 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: str,
if path::path_is_absolute(*file_path) {
*file_path
} else { prefix + path::path_sep() + *file_path };
let p0 =
new_parser_from_file(cx.sess, cx.cfg, full_path, SOURCE_FILE);
let (p0, r0) =
new_parser_etc_from_file(cx.sess, cx.cfg, full_path, SOURCE_FILE);
let inner_attrs = p0.parse_inner_attrs_and_next();
let mod_attrs = attrs + inner_attrs.inner;
let first_item_outer_attrs = inner_attrs.next;
@ -105,8 +106,8 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: str,
/* FIXME: bad */ copy id,
ast::item_mod(m0), ast::public, mod_attrs);
// Thread defids, chpos and byte_pos through the parsers
cx.sess.chpos = p0.reader.chpos;
cx.sess.byte_pos = cx.sess.byte_pos + p0.reader.pos;
cx.sess.chpos = p0.reader.chpos();
cx.sess.byte_pos = cx.sess.byte_pos + r0.pos;
items += [i];
}
ast::cdir_dir_mod(id, cdirs, attrs) {

View File

@ -2,9 +2,19 @@ import util::interner;
import util::interner::intern;
import diagnostic;
export reader, new_reader, next_token, is_whitespace;
export reader, string_reader, new_string_reader, is_whitespace;
export nextch, is_eof, bump, get_str_from;
export string_reader_as_reader;
type reader = @{
iface reader {
fn is_eof() -> bool;
fn next_token() -> {tok: token::token, chpos: uint};
fn fatal(str) -> !;
fn chpos() -> uint;
fn interner() -> @interner::interner<@str>;
}
type string_reader = @{
span_diagnostic: diagnostic::span_handler,
src: @str,
mut col: uint,
@ -15,47 +25,64 @@ type reader = @{
interner: @interner::interner<@str>
};
impl reader for reader {
fn is_eof() -> bool { self.curr == -1 as char }
fn get_str_from(start: uint) -> str unsafe {
// I'm pretty skeptical about this subtraction. What if there's a
// multi-byte character before the mark?
ret str::slice(*self.src, start - 1u, self.pos - 1u);
}
fn next() -> char {
if self.pos < (*self.src).len() {
ret str::char_at(*self.src, self.pos);
} else { ret -1 as char; }
}
fn bump() {
if self.pos < (*self.src).len() {
self.col += 1u;
self.chpos += 1u;
if self.curr == '\n' {
codemap::next_line(self.filemap, self.chpos, self.pos);
self.col = 0u;
}
let next = str::char_range_at(*self.src, self.pos);
self.pos = next.next;
self.curr = next.ch;
impl string_reader_as_reader of reader for string_reader {
fn is_eof() -> bool { is_eof(self) }
fn next_token() -> {tok: token::token, chpos: uint} {
consume_whitespace_and_comments(self);
let start_chpos = self.chpos;
let tok = if is_eof(self) {
token::EOF
} else {
if (self.curr != -1 as char) {
self.col += 1u;
self.chpos += 1u;
self.curr = -1 as char;
}
}
next_token_inner(self)
};
ret {tok: tok, chpos: start_chpos};
}
fn fatal(m: str) -> ! {
self.span_diagnostic.span_fatal(
ast_util::mk_sp(self.chpos, self.chpos),
m)
}
fn chpos() -> uint { self.chpos }
fn interner() -> @interner::interner<@str> { self.interner }
}
fn new_reader(span_diagnostic: diagnostic::span_handler,
filemap: codemap::filemap,
itr: @interner::interner<@str>) -> reader {
fn get_str_from(rdr: string_reader, start: uint) -> str unsafe {
// I'm pretty skeptical about this subtraction. What if there's a
// multi-byte character before the mark?
ret str::slice(*rdr.src, start - 1u, rdr.pos - 1u);
}
fn bump(rdr: string_reader) {
if rdr.pos < (*rdr.src).len() {
rdr.col += 1u;
rdr.chpos += 1u;
if rdr.curr == '\n' {
codemap::next_line(rdr.filemap, rdr.chpos, rdr.pos);
rdr.col = 0u;
}
let next = str::char_range_at(*rdr.src, rdr.pos);
rdr.pos = next.next;
rdr.curr = next.ch;
} else {
if (rdr.curr != -1 as char) {
rdr.col += 1u;
rdr.chpos += 1u;
rdr.curr = -1 as char;
}
}
}
fn is_eof(rdr: string_reader) -> bool {
rdr.curr == -1 as char
}
fn nextch(rdr: string_reader) -> char {
if rdr.pos < (*rdr.src).len() {
ret str::char_at(*rdr.src, rdr.pos);
} else { ret -1 as char; }
}
fn new_string_reader(span_diagnostic: diagnostic::span_handler,
filemap: codemap::filemap,
itr: @interner::interner<@str>) -> string_reader {
let r = @{span_diagnostic: span_diagnostic, src: filemap.src,
mut col: 0u, mut pos: 0u, mut curr: -1 as char,
mut chpos: filemap.start_pos.ch,
@ -102,50 +129,50 @@ fn is_hex_digit(c: char) -> bool {
fn is_bin_digit(c: char) -> bool { ret c == '0' || c == '1'; }
fn consume_whitespace_and_comments(rdr: reader) {
while is_whitespace(rdr.curr) { rdr.bump(); }
fn consume_whitespace_and_comments(rdr: string_reader) {
while is_whitespace(rdr.curr) { bump(rdr); }
ret consume_any_line_comment(rdr);
}
fn consume_any_line_comment(rdr: reader) {
fn consume_any_line_comment(rdr: string_reader) {
if rdr.curr == '/' {
alt rdr.next() {
alt nextch(rdr) {
'/' {
while rdr.curr != '\n' && !rdr.is_eof() { rdr.bump(); }
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
// Restart whitespace munch.
ret consume_whitespace_and_comments(rdr);
}
'*' { rdr.bump(); rdr.bump(); ret consume_block_comment(rdr); }
'*' { bump(rdr); bump(rdr); ret consume_block_comment(rdr); }
_ { ret; }
}
} else if rdr.curr == '#' {
if rdr.next() == '!' {
if nextch(rdr) == '!' {
let cmap = codemap::new_codemap();
(*cmap).files.push(rdr.filemap);
let loc = codemap::lookup_char_pos_adj(cmap, rdr.chpos);
if loc.line == 1u && loc.col == 0u {
while rdr.curr != '\n' && !rdr.is_eof() { rdr.bump(); }
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
ret consume_whitespace_and_comments(rdr);
}
}
}
}
fn consume_block_comment(rdr: reader) {
fn consume_block_comment(rdr: string_reader) {
let mut level: int = 1;
while level > 0 {
if rdr.is_eof() { rdr.fatal("unterminated block comment"); }
if rdr.curr == '/' && rdr.next() == '*' {
rdr.bump();
rdr.bump();
if is_eof(rdr) { rdr.fatal("unterminated block comment"); }
if rdr.curr == '/' && nextch(rdr) == '*' {
bump(rdr);
bump(rdr);
level += 1;
} else {
if rdr.curr == '*' && rdr.next() == '/' {
rdr.bump();
rdr.bump();
if rdr.curr == '*' && nextch(rdr) == '/' {
bump(rdr);
bump(rdr);
level -= 1;
} else { rdr.bump(); }
} else { bump(rdr); }
}
}
// restart whitespace munch.
@ -153,16 +180,16 @@ fn consume_block_comment(rdr: reader) {
ret consume_whitespace_and_comments(rdr);
}
fn scan_exponent(rdr: reader) -> option<str> {
fn scan_exponent(rdr: string_reader) -> option<str> {
let mut c = rdr.curr;
let mut rslt = "";
if c == 'e' || c == 'E' {
str::push_char(rslt, c);
rdr.bump();
bump(rdr);
c = rdr.curr;
if c == '-' || c == '+' {
str::push_char(rslt, c);
rdr.bump();
bump(rdr);
}
let exponent = scan_digits(rdr, 10u);
if str::len(exponent) > 0u {
@ -171,62 +198,62 @@ fn scan_exponent(rdr: reader) -> option<str> {
} else { ret none::<str>; }
}
fn scan_digits(rdr: reader, radix: uint) -> str {
fn scan_digits(rdr: string_reader, radix: uint) -> str {
let mut rslt = "";
loop {
let c = rdr.curr;
if c == '_' { rdr.bump(); cont; }
if c == '_' { bump(rdr); cont; }
alt char::to_digit(c, radix) {
some(d) {
str::push_char(rslt, c);
rdr.bump();
bump(rdr);
}
_ { ret rslt; }
}
};
}
fn scan_number(c: char, rdr: reader) -> token::token {
let mut num_str, base = 10u, c = c, n = rdr.next();
fn scan_number(c: char, rdr: string_reader) -> token::token {
let mut num_str, base = 10u, c = c, n = nextch(rdr);
if c == '0' && n == 'x' {
rdr.bump();
rdr.bump();
bump(rdr);
bump(rdr);
base = 16u;
} else if c == '0' && n == 'b' {
rdr.bump();
rdr.bump();
bump(rdr);
bump(rdr);
base = 2u;
}
num_str = scan_digits(rdr, base);
c = rdr.curr;
rdr.next();
nextch(rdr);
if c == 'u' || c == 'i' {
let signed = c == 'i';
let mut tp = {
if signed { either::left(ast::ty_i) }
else { either::right(ast::ty_u) }
};
rdr.bump();
bump(rdr);
c = rdr.curr;
if c == '8' {
rdr.bump();
bump(rdr);
tp = if signed { either::left(ast::ty_i8) }
else { either::right(ast::ty_u8) };
}
n = rdr.next();
n = nextch(rdr);
if c == '1' && n == '6' {
rdr.bump();
rdr.bump();
bump(rdr);
bump(rdr);
tp = if signed { either::left(ast::ty_i16) }
else { either::right(ast::ty_u16) };
} else if c == '3' && n == '2' {
rdr.bump();
rdr.bump();
bump(rdr);
bump(rdr);
tp = if signed { either::left(ast::ty_i32) }
else { either::right(ast::ty_u32) };
} else if c == '6' && n == '4' {
rdr.bump();
rdr.bump();
bump(rdr);
bump(rdr);
tp = if signed { either::left(ast::ty_i64) }
else { either::right(ast::ty_u64) };
}
@ -240,9 +267,9 @@ fn scan_number(c: char, rdr: reader) -> token::token {
}
}
let mut is_float = false;
if rdr.curr == '.' && !(is_alpha(rdr.next()) || rdr.next() == '_') {
if rdr.curr == '.' && !(is_alpha(nextch(rdr)) || nextch(rdr) == '_') {
is_float = true;
rdr.bump();
bump(rdr);
let dec_part = scan_digits(rdr, 10u);
num_str += "." + dec_part;
}
@ -254,17 +281,17 @@ fn scan_number(c: char, rdr: reader) -> token::token {
none {}
}
if rdr.curr == 'f' {
rdr.bump();
bump(rdr);
c = rdr.curr;
n = rdr.next();
n = nextch(rdr);
if c == '3' && n == '2' {
rdr.bump();
rdr.bump();
bump(rdr);
bump(rdr);
ret token::LIT_FLOAT(intern(*rdr.interner, @num_str),
ast::ty_f32);
} else if c == '6' && n == '4' {
rdr.bump();
rdr.bump();
bump(rdr);
bump(rdr);
ret token::LIT_FLOAT(intern(*rdr.interner, @num_str),
ast::ty_f64);
/* FIXME: if this is out of range for either a 32-bit or
@ -289,11 +316,11 @@ fn scan_number(c: char, rdr: reader) -> token::token {
}
}
fn scan_numeric_escape(rdr: reader, n_hex_digits: uint) -> char {
fn scan_numeric_escape(rdr: string_reader, n_hex_digits: uint) -> char {
let mut accum_int = 0, i = n_hex_digits;
while i != 0u {
let n = rdr.curr;
rdr.bump();
bump(rdr);
if !is_hex_digit(n) {
rdr.fatal(#fmt["illegal numeric character escape: %d", n as int]);
}
@ -304,15 +331,7 @@ fn scan_numeric_escape(rdr: reader, n_hex_digits: uint) -> char {
ret accum_int as char;
}
fn next_token(rdr: reader) -> {tok: token::token, chpos: uint, bpos: uint} {
consume_whitespace_and_comments(rdr);
let start_chpos = rdr.chpos;
let start_bpos = rdr.pos;
let tok = if rdr.is_eof() { token::EOF } else { next_token_inner(rdr) };
ret {tok: tok, chpos: start_chpos, bpos: start_bpos};
}
fn next_token_inner(rdr: reader) -> token::token {
fn next_token_inner(rdr: string_reader) -> token::token {
let mut accum_str = "";
let mut c = rdr.curr;
if (c >= 'a' && c <= 'z')
@ -325,11 +344,11 @@ fn next_token_inner(rdr: reader) -> token::token {
|| c == '_'
|| (c > 'z' && char::is_XID_continue(c)) {
str::push_char(accum_str, c);
rdr.bump();
bump(rdr);
c = rdr.curr;
}
if str::eq(accum_str, "_") { ret token::UNDERSCORE; }
let is_mod_name = c == ':' && rdr.next() == ':';
let is_mod_name = c == ':' && nextch(rdr) == ':';
// FIXME: perform NFKC normalization here. (Issue #2253)
ret token::IDENT(interner::intern(*rdr.interner,
@ -338,10 +357,10 @@ fn next_token_inner(rdr: reader) -> token::token {
if is_dec_digit(c) {
ret scan_number(c, rdr);
}
fn binop(rdr: reader, op: token::binop) -> token::token {
rdr.bump();
fn binop(rdr: string_reader, op: token::binop) -> token::token {
bump(rdr);
if rdr.curr == '=' {
rdr.bump();
bump(rdr);
ret token::BINOPEQ(op);
} else { ret token::BINOP(op); }
}
@ -352,35 +371,35 @@ fn next_token_inner(rdr: reader) -> token::token {
// One-byte tokens.
';' { rdr.bump(); ret token::SEMI; }
',' { rdr.bump(); ret token::COMMA; }
';' { bump(rdr); ret token::SEMI; }
',' { bump(rdr); ret token::COMMA; }
'.' {
rdr.bump();
if rdr.curr == '.' && rdr.next() == '.' {
rdr.bump();
rdr.bump();
bump(rdr);
if rdr.curr == '.' && nextch(rdr) == '.' {
bump(rdr);
bump(rdr);
ret token::ELLIPSIS;
}
ret token::DOT;
}
'(' { rdr.bump(); ret token::LPAREN; }
')' { rdr.bump(); ret token::RPAREN; }
'{' { rdr.bump(); ret token::LBRACE; }
'}' { rdr.bump(); ret token::RBRACE; }
'[' { rdr.bump(); ret token::LBRACKET; }
']' { rdr.bump(); ret token::RBRACKET; }
'@' { rdr.bump(); ret token::AT; }
'#' { rdr.bump(); ret token::POUND; }
'~' { rdr.bump(); ret token::TILDE; }
'(' { bump(rdr); ret token::LPAREN; }
')' { bump(rdr); ret token::RPAREN; }
'{' { bump(rdr); ret token::LBRACE; }
'}' { bump(rdr); ret token::RBRACE; }
'[' { bump(rdr); ret token::LBRACKET; }
']' { bump(rdr); ret token::RBRACKET; }
'@' { bump(rdr); ret token::AT; }
'#' { bump(rdr); ret token::POUND; }
'~' { bump(rdr); ret token::TILDE; }
':' {
rdr.bump();
bump(rdr);
if rdr.curr == ':' {
rdr.bump();
bump(rdr);
ret token::MOD_SEP;
} else { ret token::COLON; }
}
'$' { rdr.bump(); ret token::DOLLAR; }
'$' { bump(rdr); ret token::DOLLAR; }
@ -388,33 +407,33 @@ fn next_token_inner(rdr: reader) -> token::token {
// Multi-byte tokens.
'=' {
rdr.bump();
bump(rdr);
if rdr.curr == '=' {
rdr.bump();
bump(rdr);
ret token::EQEQ;
} else if rdr.curr == '>' {
rdr.bump();
bump(rdr);
ret token::FAT_ARROW;
} else {
ret token::EQ;
}
}
'!' {
rdr.bump();
bump(rdr);
if rdr.curr == '=' {
rdr.bump();
bump(rdr);
ret token::NE;
} else { ret token::NOT; }
}
'<' {
rdr.bump();
bump(rdr);
alt rdr.curr {
'=' { rdr.bump(); ret token::LE; }
'=' { bump(rdr); ret token::LE; }
'<' { ret binop(rdr, token::SHL); }
'-' {
rdr.bump();
bump(rdr);
alt rdr.curr {
'>' { rdr.bump(); ret token::DARROW; }
'>' { bump(rdr); ret token::DARROW; }
_ { ret token::LARROW; }
}
}
@ -422,20 +441,20 @@ fn next_token_inner(rdr: reader) -> token::token {
}
}
'>' {
rdr.bump();
bump(rdr);
alt rdr.curr {
'=' { rdr.bump(); ret token::GE; }
'=' { bump(rdr); ret token::GE; }
'>' { ret binop(rdr, token::SHR); }
_ { ret token::GT; }
}
}
'\'' {
rdr.bump();
bump(rdr);
let mut c2 = rdr.curr;
rdr.bump();
bump(rdr);
if c2 == '\\' {
let escaped = rdr.curr;
rdr.bump();
bump(rdr);
alt escaped {
'n' { c2 = '\n'; }
'r' { c2 = '\r'; }
@ -454,24 +473,24 @@ fn next_token_inner(rdr: reader) -> token::token {
if rdr.curr != '\'' {
rdr.fatal("unterminated character constant");
}
rdr.bump(); // advance curr past token
bump(rdr); // advance curr past token
ret token::LIT_INT(c2 as i64, ast::ty_char);
}
'"' {
let n = rdr.chpos;
rdr.bump();
bump(rdr);
while rdr.curr != '"' {
if rdr.is_eof() {
if is_eof(rdr) {
rdr.fatal(#fmt["unterminated double quote string: %s",
rdr.get_str_from(n)]);
get_str_from(rdr, n)]);
}
let ch = rdr.curr;
rdr.bump();
bump(rdr);
alt ch {
'\\' {
let escaped = rdr.curr;
rdr.bump();
bump(rdr);
alt escaped {
'n' { str::push_char(accum_str, '\n'); }
'r' { str::push_char(accum_str, '\r'); }
@ -496,27 +515,27 @@ fn next_token_inner(rdr: reader) -> token::token {
_ { str::push_char(accum_str, ch); }
}
}
rdr.bump();
bump(rdr);
ret token::LIT_STR(interner::intern(*rdr.interner,
@accum_str));
}
'-' {
if rdr.next() == '>' {
rdr.bump();
rdr.bump();
if nextch(rdr) == '>' {
bump(rdr);
bump(rdr);
ret token::RARROW;
} else { ret binop(rdr, token::MINUS); }
}
'&' {
if rdr.next() == '&' {
rdr.bump();
rdr.bump();
if nextch(rdr) == '&' {
bump(rdr);
bump(rdr);
ret token::ANDAND;
} else { ret binop(rdr, token::AND); }
}
'|' {
alt rdr.next() {
'|' { rdr.bump(); rdr.bump(); ret token::OROR; }
alt nextch(rdr) {
'|' { bump(rdr); bump(rdr); ret token::OROR; }
_ { ret binop(rdr, token::OR); }
}
}
@ -529,8 +548,8 @@ fn next_token_inner(rdr: reader) -> token::token {
}
}
fn consume_whitespace(rdr: reader) {
while is_whitespace(rdr.curr) && !rdr.is_eof() { rdr.bump(); }
fn consume_whitespace(rdr: string_reader) {
while is_whitespace(rdr.curr) && !is_eof(rdr) { bump(rdr); }
}

View File

@ -71,10 +71,11 @@ class parser {
let keywords: hashmap<str, ()>;
let restricted_keywords: hashmap<str, ()>;
new(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader,
ftype: file_type) {
let tok0 = lexer::next_token(rdr);
let span0 = ast_util::mk_sp(tok0.chpos, rdr.chpos);
new(sess: parse_sess, cfg: ast::crate_cfg, +rdr: reader, ftype: file_type)
{
self.reader <- rdr;
let tok0 = self.reader.next_token();
let span0 = ast_util::mk_sp(tok0.chpos, self.reader.chpos());
self.sess = sess;
self.cfg = cfg;
self.file_type = ftype;
@ -90,7 +91,6 @@ class parser {
self.buffer_start = 0;
self.buffer_end = 0;
self.restriction = UNRESTRICTED;
self.reader = rdr;
self.keywords = token::keyword_table();
self.restricted_keywords = token::restricted_keyword_table();
}
@ -101,9 +101,9 @@ class parser {
fn bump() {
self.last_span = self.span;
if self.buffer_start == self.buffer_end {
let next = lexer::next_token(self.reader);
let next = self.reader.next_token();
self.token = next.tok;
self.span = mk_sp(next.chpos, self.reader.chpos);
self.span = mk_sp(next.chpos, self.reader.chpos());
} else {
let next = self.buffer[self.buffer_start];
self.buffer_start = (self.buffer_start + 1) & 3;
@ -124,8 +124,8 @@ class parser {
fn look_ahead(distance: uint) -> token::token {
let dist = distance as int;
while self.buffer_length() < dist {
let next = lexer::next_token(self.reader);
let sp = mk_sp(next.chpos, self.reader.chpos);
let next = self.reader.next_token();
let sp = mk_sp(next.chpos, self.reader.chpos());
self.buffer[self.buffer_end] = {tok: next.tok, span: sp};
self.buffer_end = (self.buffer_end + 1) & 3;
}
@ -144,7 +144,7 @@ class parser {
self.sess.span_diagnostic.span_warn(copy self.span, m)
}
fn get_str(i: token::str_num) -> @str {
interner::get(*self.reader.interner, i)
interner::get(*self.reader.interner(), i)
}
fn get_id() -> node_id { next_node_id(self.sess) }
@ -1060,7 +1060,7 @@ class parser {
fn parse_token_tree() -> token_tree {
#[doc="what's the opposite delimiter?"]
fn flip(t: token::token) -> token::token {
fn flip(&t: token::token) -> token::token {
alt t {
token::LPAREN { token::RPAREN }
token::LBRACE { token::RBRACE }