Some rearranging in perparation for MBE-style TT transcription.

This commit is contained in:
Paul Stansifer 2012-06-27 15:29:35 -07:00
parent 74c2266a06
commit 39590d81f0
8 changed files with 198 additions and 123 deletions

View File

@ -374,12 +374,17 @@ enum blk_sort {
*/ */
#[auto_serialize] #[auto_serialize]
#[doc="For macro invocations; parsing is delegated to the macro"]
enum token_tree { enum token_tree {
/* for macro invocations; parsing is the macro's job */
tt_delim(~[token_tree]), tt_delim(~[token_tree]),
tt_flat(span, token::token) tt_flat(span, token::token),
/* These only make sense for right-hand-sides of MBE macros*/
tt_dotdotdot(~[token_tree]),
tt_interpolate(ident)
} }
#[auto_serialize] #[auto_serialize]
type matcher = spanned<matcher_>; type matcher = spanned<matcher_>;

View File

@ -1,12 +1,15 @@
// Earley-like parser for macros. // Earley-like parser for macros.
import parse::token; import parse::token;
import parse::token::{token, EOF, to_str, whole_nt}; import parse::token::{token, EOF, to_str, whole_nt};
import parse::lexer::{reader, tt_reader, tt_reader_as_reader}; import parse::lexer::*; //resolve bug?
//import parse::lexer::{reader, tt_reader, tt_reader_as_reader};
import parse::parser::{parser,SOURCE_FILE}; import parse::parser::{parser,SOURCE_FILE};
import parse::common::parser_common; //import parse::common::parser_common;
import parse::common::*; //resolve bug?
import parse::parse_sess; import parse::parse_sess;
import dvec::{dvec, extensions}; import dvec::{dvec, extensions};
import ast::{matcher, mtc_tok, mtc_rep, mtc_bb}; import ast::{matcher, mtc_tok, mtc_rep, mtc_bb, ident};
import std::map::{hashmap, box_str_hash};
/* This is an Earley-like parser, without support for nonterminals. This /* This is an Earley-like parser, without support for nonterminals. This
means that there are no completer or predictor rules, and therefore no need to means that there are no completer or predictor rules, and therefore no need to
@ -66,8 +69,31 @@ enum arb_depth { leaf(whole_nt), seq(~[@arb_depth]) }
type earley_item = matcher_pos; type earley_item = matcher_pos;
fn nameize(&&p_s: parse_sess, ms: ~[matcher], &&res: ~[@arb_depth])
-> hashmap<ident,@arb_depth> {
fn n_rec(&&p_s: parse_sess, &&m: matcher, &&res: ~[@arb_depth],
&&ret_val: hashmap<ident, @arb_depth>) {
alt m {
{node: mtc_tok(_), span: _} { }
{node: mtc_rep(more_ms, _, _), span: _} {
for more_ms.each() |next_m| { n_rec(p_s, next_m, res, ret_val) };
}
{node: mtc_bb(bind_name, _, idx), span: sp} {
if ret_val.contains_key(bind_name) {
p_s.span_diagnostic.span_fatal(sp, "Duplicated bind name: "
+ *bind_name)
}
ret_val.insert(bind_name, res[idx]);
}
}
}
let ret_val = box_str_hash::<@arb_depth>();
for ms.each() |m| { n_rec(p_s, m, res, ret_val) };
ret ret_val;
}
fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
-> ~[@arb_depth] { -> hashmap<ident,@arb_depth> {
let mut cur_eis = ~[]; let mut cur_eis = ~[];
vec::push(cur_eis, new_matcher_pos(ms, none)); vec::push(cur_eis, new_matcher_pos(ms, none));
@ -164,9 +190,9 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
/* error messages here could be improved with links to orig. rules */ /* error messages here could be improved with links to orig. rules */
if tok == EOF { if tok == EOF {
if eof_eis.len() == 1u { if eof_eis.len() == 1u { /* success */
let ret_val = vec::map(eof_eis[0u].matches, |dv| dv.pop()); ret nameize(sess, ms,
ret ret_val; /* success */ vec::map(eof_eis[0u].matches, |dv| dv.pop()));
} else if eof_eis.len() > 1u { } else if eof_eis.len() > 1u {
rdr.fatal("Ambiguity: multiple successful parses"); rdr.fatal("Ambiguity: multiple successful parses");
} else { } else {

View File

@ -0,0 +1,114 @@
import util::interner::interner;
import diagnostic::span_handler;
import ast::{tt_delim,tt_flat,tt_dotdotdot,tt_interpolate,ident};
import ext::earley_parser::arb_depth;
import codemap::span;
import parse::token::{EOF,token};
export tt_reader, new_tt_reader, dup_tt_reader, tt_next_token;
enum tt_frame_up { /* to break a circularity */
tt_frame_up(option<tt_frame>)
}
/* TODO: figure out how to have a uniquely linked stack, and change to `~` */
///an unzipping of `token_tree`s
type tt_frame = @{
readme: [ast::token_tree]/~,
mut idx: uint,
up: tt_frame_up
};
type tt_reader = @{
span_diagnostic: span_handler,
interner: @interner<@str>,
mut cur: tt_frame,
/* for MBE-style macro transcription */
interpolations: std::map::hashmap<ident, @arb_depth>,
/* cached: */
mut cur_tok: token,
mut cur_span: span
};
/** This can do Macro-By-Example transcription. On the other hand, if
* `doc` contains no `tt_dotdotdot`s and `tt_interpolate`s, `interp` can (and
* should) be none. */
fn new_tt_reader(span_diagnostic: span_handler, itr: @interner<@str>,
interp: option<std::map::hashmap<ident,@arb_depth>>,
src: [ast::token_tree]/~)
-> tt_reader {
let r = @{span_diagnostic: span_diagnostic, interner: itr,
mut cur: @{readme: src, mut idx: 0u,
up: tt_frame_up(option::none)},
interpolations: alt interp { /* just a convienience */
none { std::map::box_str_hash::<@arb_depth>() }
some(x) { x }
},
/* dummy values, never read: */
mut cur_tok: EOF,
mut cur_span: ast_util::mk_sp(0u,0u)
};
tt_next_token(r); /* get cur_tok and cur_span set up */
ret r;
}
pure fn dup_tt_frame(&&f: tt_frame) -> tt_frame {
@{readme: f.readme, mut idx: f.idx,
up: alt f.up {
tt_frame_up(some(up_frame)) {
tt_frame_up(some(dup_tt_frame(up_frame)))
}
tt_frame_up(none) { tt_frame_up(none) }
}
}
}
pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader {
@{span_diagnostic: r.span_diagnostic, interner: r.interner,
mut cur: dup_tt_frame(r.cur),
interpolations: r.interpolations,
mut cur_tok: r.cur_tok, mut cur_span: r.cur_span}
}
fn tt_next_token(&&r: tt_reader) -> {tok: token, sp: span} {
let ret_val = { tok: r.cur_tok, sp: r.cur_span };
if r.cur.idx >= vec::len(r.cur.readme) {
/* done with this set; pop */
alt r.cur.up {
tt_frame_up(none) {
r.cur_tok = EOF;
ret ret_val;
}
tt_frame_up(some(tt_f)) {
r.cur = tt_f;
/* the above `if` would need to be a `while` if we didn't know
that the last thing in a `tt_delim` is always a `tt_flat` */
r.cur.idx += 1u;
}
}
}
/* if `tt_delim`s could be 0-length, we'd need to be able to switch
between popping and pushing until we got to an actual `tt_flat` */
loop { /* because it's easiest, this handles `tt_delim` not starting
with a `tt_flat`, even though it won't happen */
alt copy r.cur.readme[r.cur.idx] {
tt_delim(tts) {
r.cur = @{readme: tts, mut idx: 0u,
up: tt_frame_up(option::some(r.cur)) };
}
tt_flat(sp, tok) {
r.cur_span = sp; r.cur_tok = tok;
r.cur.idx += 1u;
ret ret_val;
}
tt_dotdotdot(tts) {
fail;
}
tt_interpolate(ident) {
fail;
}
}
}
}

View File

@ -13,8 +13,10 @@ export parse_expr_from_source_str, parse_item_from_source_str;
export parse_from_source_str; export parse_from_source_str;
import parser::parser; import parser::parser;
import attr::parser_attr; //import attr::parser_attr;
import common::parser_common; import attr::*; //resolve bug?
//import common::parser_common;
import common::*; //resolve bug?
import ast::node_id; import ast::node_id;
import util::interner; import util::interner;
// FIXME (#1935): resolve badness // FIXME (#1935): resolve badness
@ -199,6 +201,7 @@ fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str,
fn new_parser_from_tt(sess: parse_sess, cfg: ast::crate_cfg, fn new_parser_from_tt(sess: parse_sess, cfg: ast::crate_cfg,
tt: ~[ast::token_tree]) -> parser { tt: ~[ast::token_tree]) -> parser {
let trdr = lexer::new_tt_reader(sess.span_diagnostic, sess.interner, tt); let trdr = lexer::new_tt_reader(sess.span_diagnostic, sess.interner,
none, tt);
ret parser(sess, cfg, trdr as reader, parser::SOURCE_FILE) ret parser(sess, cfg, trdr as reader, parser::SOURCE_FILE)
} }

View File

@ -1,6 +1,7 @@
import either::{either, left, right}; import either::{either, left, right};
import ast_util::spanned; import ast_util::spanned;
import common::{parser_common, seq_sep_trailing_disallowed}; import common::*; //resolve bug?
//import common::{parser_common, seq_sep_trailing_disallowed};
export attr_or_ext; export attr_or_ext;
export parser_attr; export parser_attr;

View File

@ -1,11 +1,11 @@
import util::interner; import util::interner::{interner,intern};
import util::interner::intern; import diagnostic::span_handler;
import diagnostic;
import ast::{tt_delim,tt_flat};
import codemap::span; import codemap::span;
import ext::tt::transcribe::{tt_reader, new_tt_reader, dup_tt_reader,
tt_next_token};
export reader, string_reader, new_string_reader, is_whitespace; export reader, string_reader, new_string_reader, is_whitespace;
export tt_reader, new_tt_reader, dup_tt_reader; export tt_reader, new_tt_reader;
export nextch, is_eof, bump, get_str_from, new_low_level_string_reader; export nextch, is_eof, bump, get_str_from, new_low_level_string_reader;
export string_reader_as_reader, tt_reader_as_reader; export string_reader_as_reader, tt_reader_as_reader;
@ -13,91 +13,38 @@ iface reader {
fn is_eof() -> bool; fn is_eof() -> bool;
fn next_token() -> {tok: token::token, sp: span}; fn next_token() -> {tok: token::token, sp: span};
fn fatal(str) -> !; fn fatal(str) -> !;
fn span_diag() -> diagnostic::span_handler; fn span_diag() -> span_handler;
fn interner() -> @interner::interner<@str>; fn interner() -> @interner<@str>;
fn peek() -> {tok: token::token, sp: span}; fn peek() -> {tok: token::token, sp: span};
fn dup() -> reader; fn dup() -> reader;
} }
enum tt_frame_up { /* to break a circularity */
tt_frame_up(option<tt_frame>)
}
/* FIXME (#2811): figure out how to have a uniquely linked stack,
and change to `~` */
/// an unzipping of `token_tree`s
type tt_frame = @{
readme: ~[ast::token_tree],
mut idx: uint,
up: tt_frame_up
};
type tt_reader = @{
span_diagnostic: diagnostic::span_handler,
interner: @interner::interner<@str>,
mut cur: tt_frame,
/* cached: */
mut cur_tok: token::token,
mut cur_span: span
};
fn new_tt_reader(span_diagnostic: diagnostic::span_handler,
itr: @interner::interner<@str>, src: ~[ast::token_tree])
-> tt_reader {
let r = @{span_diagnostic: span_diagnostic, interner: itr,
mut cur: @{readme: src, mut idx: 0u,
up: tt_frame_up(option::none)},
/* dummy values, never read: */
mut cur_tok: token::EOF,
mut cur_span: ast_util::mk_sp(0u,0u)
};
tt_next_token(r); /* get cur_tok and cur_span set up */
ret r;
}
pure fn dup_tt_frame(&&f: tt_frame) -> tt_frame {
@{readme: f.readme, mut idx: f.idx,
up: alt f.up {
tt_frame_up(some(up_frame)) {
tt_frame_up(some(dup_tt_frame(up_frame)))
}
tt_frame_up(none) { tt_frame_up(none) }
}
}
}
pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader {
@{span_diagnostic: r.span_diagnostic, interner: r.interner,
mut cur: dup_tt_frame(r.cur),
mut cur_tok: r.cur_tok, mut cur_span: r.cur_span}
}
type string_reader = @{ type string_reader = @{
span_diagnostic: diagnostic::span_handler, span_diagnostic: span_handler,
src: @str, src: @str,
mut col: uint, mut col: uint,
mut pos: uint, mut pos: uint,
mut curr: char, mut curr: char,
mut chpos: uint, mut chpos: uint,
filemap: codemap::filemap, filemap: codemap::filemap,
interner: @interner::interner<@str>, interner: @interner<@str>,
/* cached: */ /* cached: */
mut peek_tok: token::token, mut peek_tok: token::token,
mut peek_span: span mut peek_span: span
}; };
fn new_string_reader(span_diagnostic: diagnostic::span_handler, fn new_string_reader(span_diagnostic: span_handler,
filemap: codemap::filemap, filemap: codemap::filemap,
itr: @interner::interner<@str>) -> string_reader { itr: @interner<@str>) -> string_reader {
let r = new_low_level_string_reader(span_diagnostic, filemap, itr); let r = new_low_level_string_reader(span_diagnostic, filemap, itr);
string_advance_token(r); /* fill in peek_* */ string_advance_token(r); /* fill in peek_* */
ret r; ret r;
} }
/* For comments.rs, which hackily pokes into 'pos' and 'curr' */ /* For comments.rs, which hackily pokes into 'pos' and 'curr' */
fn new_low_level_string_reader(span_diagnostic: diagnostic::span_handler, fn new_low_level_string_reader(span_diagnostic: span_handler,
filemap: codemap::filemap, filemap: codemap::filemap,
itr: @interner::interner<@str>) itr: @interner<@str>)
-> string_reader { -> string_reader {
let r = @{span_diagnostic: span_diagnostic, src: filemap.src, let r = @{span_diagnostic: span_diagnostic, src: filemap.src,
mut col: 0u, mut pos: 0u, mut curr: -1 as char, mut col: 0u, mut pos: 0u, mut curr: -1 as char,
@ -131,8 +78,8 @@ impl string_reader_as_reader of reader for string_reader {
fn fatal(m: str) -> ! { fn fatal(m: str) -> ! {
self.span_diagnostic.span_fatal(copy self.peek_span, m) self.span_diagnostic.span_fatal(copy self.peek_span, m)
} }
fn span_diag() -> diagnostic::span_handler { self.span_diagnostic } fn span_diag() -> span_handler { self.span_diagnostic }
fn interner() -> @interner::interner<@str> { self.interner } fn interner() -> @interner<@str> { self.interner }
fn peek() -> {tok: token::token, sp: span} { fn peek() -> {tok: token::token, sp: span} {
{tok: self.peek_tok, sp: self.peek_span} {tok: self.peek_tok, sp: self.peek_span}
} }
@ -153,8 +100,8 @@ impl tt_reader_as_reader of reader for tt_reader {
fn fatal(m: str) -> ! { fn fatal(m: str) -> ! {
self.span_diagnostic.span_fatal(copy self.cur_span, m); self.span_diagnostic.span_fatal(copy self.cur_span, m);
} }
fn span_diag() -> diagnostic::span_handler { self.span_diagnostic } fn span_diag() -> span_handler { self.span_diagnostic }
fn interner() -> @interner::interner<@str> { self.interner } fn interner() -> @interner<@str> { self.interner }
fn peek() -> {tok: token::token, sp: span} { fn peek() -> {tok: token::token, sp: span} {
{ tok: self.cur_tok, sp: self.cur_span } { tok: self.cur_tok, sp: self.cur_span }
} }
@ -178,42 +125,6 @@ fn string_advance_token(&&r: string_reader) {
} }
fn tt_next_token(&&r: tt_reader) -> {tok: token::token, sp: span} {
let ret_val = { tok: r.cur_tok, sp: r.cur_span };
if r.cur.idx >= vec::len(r.cur.readme) {
/* done with this set; pop */
alt r.cur.up {
tt_frame_up(none) {
r.cur_tok = token::EOF;
ret ret_val;
}
tt_frame_up(some(tt_f)) {
r.cur = tt_f;
/* the above `if` would need to be a `while` if we didn't know
that the last thing in a `tt_delim` is always a `tt_flat` */
r.cur.idx += 1u;
}
}
}
/* if `tt_delim`s could be 0-length, we'd need to be able to switch
between popping and pushing until we got to an actual `tt_flat` */
loop { /* because it's easiest, this handles `tt_delim` not starting
with a `tt_flat`, even though it won't happen */
alt copy r.cur.readme[r.cur.idx] {
tt_delim(tts) {
r.cur = @{readme: tts, mut idx: 0u,
up: tt_frame_up(option::some(r.cur)) };
}
tt_flat(sp, tok) {
r.cur_span = sp; r.cur_tok = tok;
r.cur.idx += 1u;
ret ret_val;
}
}
}
}
fn get_str_from(rdr: string_reader, start: uint) -> str unsafe { fn get_str_from(rdr: string_reader, start: uint) -> str unsafe {
// I'm pretty skeptical about this subtraction. What if there's a // I'm pretty skeptical about this subtraction. What if there's a
// multi-byte character before the mark? // multi-byte character before the mark?
@ -548,7 +459,7 @@ fn next_token_inner(rdr: string_reader) -> token::token {
let is_mod_name = c == ':' && nextch(rdr) == ':'; let is_mod_name = c == ':' && nextch(rdr) == ':';
// FIXME: perform NFKC normalization here. (Issue #2253) // FIXME: perform NFKC normalization here. (Issue #2253)
ret token::IDENT(interner::intern(*rdr.interner, ret token::IDENT(intern(*rdr.interner,
@accum_str), is_mod_name); @accum_str), is_mod_name);
} }
if is_dec_digit(c) { if is_dec_digit(c) {
@ -713,8 +624,7 @@ fn next_token_inner(rdr: string_reader) -> token::token {
} }
} }
bump(rdr); bump(rdr);
ret token::LIT_STR(interner::intern(*rdr.interner, ret token::LIT_STR(intern(*rdr.interner, @accum_str));
@accum_str));
} }
'-' { '-' {
if nextch(rdr) == '>' { if nextch(rdr) == '>' {

View File

@ -70,6 +70,7 @@ class parser {
let mut buffer_start: int; let mut buffer_start: int;
let mut buffer_end: int; let mut buffer_end: int;
let mut restriction: restriction; let mut restriction: restriction;
let mut quote_depth: uint; // not (yet) related to the quasiquoter
let reader: reader; let reader: reader;
let keywords: hashmap<str, ()>; let keywords: hashmap<str, ()>;
let restricted_keywords: hashmap<str, ()>; let restricted_keywords: hashmap<str, ()>;
@ -94,6 +95,7 @@ class parser {
self.buffer_start = 0; self.buffer_start = 0;
self.buffer_end = 0; self.buffer_end = 0;
self.restriction = UNRESTRICTED; self.restriction = UNRESTRICTED;
self.quote_depth = 0u;
self.keywords = token::keyword_table(); self.keywords = token::keyword_table();
self.restricted_keywords = token::restricted_keyword_table(); self.restricted_keywords = token::restricted_keyword_table();
} }
@ -1067,6 +1069,11 @@ class parser {
} }
fn parse_tt_flat(p: parser, delim_ok: bool) -> token_tree { fn parse_tt_flat(p: parser, delim_ok: bool) -> token_tree {
if p.eat_keyword("many") && p.quote_depth > 0u {
ret tt_dotdotdot(
p.parse_seq(token::LPAREN, token::RPAREN, seq_sep_none(),
|p| p.parse_token_tree()).node);
}
alt p.token { alt p.token {
token::RPAREN | token::RBRACE | token::RBRACKET token::RPAREN | token::RBRACE | token::RBRACKET
if !delim_ok { if !delim_ok {
@ -1076,6 +1083,11 @@ class parser {
token::EOF { token::EOF {
p.fatal("file ended in the middle of a macro invocation"); p.fatal("file ended in the middle of a macro invocation");
} }
/* we ought to allow different depths of unquotation */
token::DOLLAR if p.quote_depth > 0u {
p.bump();
ret tt_interpolate(p.parse_ident());
}
_ { /* ok */ } _ { /* ok */ }
} }
let res = tt_flat(p.span, p.token); let res = tt_flat(p.span, p.token);
@ -1104,10 +1116,11 @@ class parser {
common::seq_sep_none(), common::seq_sep_none(),
|p| p.parse_matcher(@mut 0u)).node; |p| p.parse_matcher(@mut 0u)).node;
let tt = self.parse_token_tree(); let tt = self.parse_token_tree();
//let tt_rhs = self.parse_token_tree();
alt tt { alt tt {
tt_delim(tts) { tt_delim(tts) {
let rdr = lexer::new_tt_reader(self.reader.span_diag(), let rdr = lexer::new_tt_reader(self.reader.span_diag(),
self.reader.interner(), tts) self.reader.interner(), none, tts)
as reader; as reader;
ext::earley_parser::parse(self.sess, self.cfg, rdr, ms); ext::earley_parser::parse(self.sess, self.cfg, rdr, ms);
} }

View File

@ -66,6 +66,9 @@ mod ext {
mod build; mod build;
mod earley_parser; mod earley_parser;
mod tt {
mod transcribe;
}
mod fmt; mod fmt;
mod env; mod env;