mirror of
https://github.com/rust-lang/rust.git
synced 2025-02-16 00:43:50 +00:00
Some rearranging in perparation for MBE-style TT transcription.
This commit is contained in:
parent
74c2266a06
commit
39590d81f0
@ -374,12 +374,17 @@ enum blk_sort {
|
||||
*/
|
||||
|
||||
#[auto_serialize]
|
||||
#[doc="For macro invocations; parsing is delegated to the macro"]
|
||||
enum token_tree {
|
||||
/* for macro invocations; parsing is the macro's job */
|
||||
tt_delim(~[token_tree]),
|
||||
tt_flat(span, token::token)
|
||||
tt_flat(span, token::token),
|
||||
/* These only make sense for right-hand-sides of MBE macros*/
|
||||
tt_dotdotdot(~[token_tree]),
|
||||
tt_interpolate(ident)
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[auto_serialize]
|
||||
type matcher = spanned<matcher_>;
|
||||
|
||||
|
@ -1,12 +1,15 @@
|
||||
// Earley-like parser for macros.
|
||||
import parse::token;
|
||||
import parse::token::{token, EOF, to_str, whole_nt};
|
||||
import parse::lexer::{reader, tt_reader, tt_reader_as_reader};
|
||||
import parse::lexer::*; //resolve bug?
|
||||
//import parse::lexer::{reader, tt_reader, tt_reader_as_reader};
|
||||
import parse::parser::{parser,SOURCE_FILE};
|
||||
import parse::common::parser_common;
|
||||
//import parse::common::parser_common;
|
||||
import parse::common::*; //resolve bug?
|
||||
import parse::parse_sess;
|
||||
import dvec::{dvec, extensions};
|
||||
import ast::{matcher, mtc_tok, mtc_rep, mtc_bb};
|
||||
import ast::{matcher, mtc_tok, mtc_rep, mtc_bb, ident};
|
||||
import std::map::{hashmap, box_str_hash};
|
||||
|
||||
/* This is an Earley-like parser, without support for nonterminals. This
|
||||
means that there are no completer or predictor rules, and therefore no need to
|
||||
@ -66,8 +69,31 @@ enum arb_depth { leaf(whole_nt), seq(~[@arb_depth]) }
|
||||
type earley_item = matcher_pos;
|
||||
|
||||
|
||||
fn nameize(&&p_s: parse_sess, ms: ~[matcher], &&res: ~[@arb_depth])
|
||||
-> hashmap<ident,@arb_depth> {
|
||||
fn n_rec(&&p_s: parse_sess, &&m: matcher, &&res: ~[@arb_depth],
|
||||
&&ret_val: hashmap<ident, @arb_depth>) {
|
||||
alt m {
|
||||
{node: mtc_tok(_), span: _} { }
|
||||
{node: mtc_rep(more_ms, _, _), span: _} {
|
||||
for more_ms.each() |next_m| { n_rec(p_s, next_m, res, ret_val) };
|
||||
}
|
||||
{node: mtc_bb(bind_name, _, idx), span: sp} {
|
||||
if ret_val.contains_key(bind_name) {
|
||||
p_s.span_diagnostic.span_fatal(sp, "Duplicated bind name: "
|
||||
+ *bind_name)
|
||||
}
|
||||
ret_val.insert(bind_name, res[idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
let ret_val = box_str_hash::<@arb_depth>();
|
||||
for ms.each() |m| { n_rec(p_s, m, res, ret_val) };
|
||||
ret ret_val;
|
||||
}
|
||||
|
||||
fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
|
||||
-> ~[@arb_depth] {
|
||||
-> hashmap<ident,@arb_depth> {
|
||||
let mut cur_eis = ~[];
|
||||
vec::push(cur_eis, new_matcher_pos(ms, none));
|
||||
|
||||
@ -164,9 +190,9 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
|
||||
|
||||
/* error messages here could be improved with links to orig. rules */
|
||||
if tok == EOF {
|
||||
if eof_eis.len() == 1u {
|
||||
let ret_val = vec::map(eof_eis[0u].matches, |dv| dv.pop());
|
||||
ret ret_val; /* success */
|
||||
if eof_eis.len() == 1u { /* success */
|
||||
ret nameize(sess, ms,
|
||||
vec::map(eof_eis[0u].matches, |dv| dv.pop()));
|
||||
} else if eof_eis.len() > 1u {
|
||||
rdr.fatal("Ambiguity: multiple successful parses");
|
||||
} else {
|
||||
|
114
src/libsyntax/ext/tt/transcribe.rs
Normal file
114
src/libsyntax/ext/tt/transcribe.rs
Normal file
@ -0,0 +1,114 @@
|
||||
import util::interner::interner;
|
||||
import diagnostic::span_handler;
|
||||
import ast::{tt_delim,tt_flat,tt_dotdotdot,tt_interpolate,ident};
|
||||
import ext::earley_parser::arb_depth;
|
||||
import codemap::span;
|
||||
import parse::token::{EOF,token};
|
||||
|
||||
export tt_reader, new_tt_reader, dup_tt_reader, tt_next_token;
|
||||
|
||||
enum tt_frame_up { /* to break a circularity */
|
||||
tt_frame_up(option<tt_frame>)
|
||||
}
|
||||
|
||||
/* TODO: figure out how to have a uniquely linked stack, and change to `~` */
|
||||
///an unzipping of `token_tree`s
|
||||
type tt_frame = @{
|
||||
readme: [ast::token_tree]/~,
|
||||
mut idx: uint,
|
||||
up: tt_frame_up
|
||||
};
|
||||
|
||||
type tt_reader = @{
|
||||
span_diagnostic: span_handler,
|
||||
interner: @interner<@str>,
|
||||
mut cur: tt_frame,
|
||||
/* for MBE-style macro transcription */
|
||||
interpolations: std::map::hashmap<ident, @arb_depth>,
|
||||
/* cached: */
|
||||
mut cur_tok: token,
|
||||
mut cur_span: span
|
||||
};
|
||||
|
||||
/** This can do Macro-By-Example transcription. On the other hand, if
|
||||
* `doc` contains no `tt_dotdotdot`s and `tt_interpolate`s, `interp` can (and
|
||||
* should) be none. */
|
||||
fn new_tt_reader(span_diagnostic: span_handler, itr: @interner<@str>,
|
||||
interp: option<std::map::hashmap<ident,@arb_depth>>,
|
||||
src: [ast::token_tree]/~)
|
||||
-> tt_reader {
|
||||
let r = @{span_diagnostic: span_diagnostic, interner: itr,
|
||||
mut cur: @{readme: src, mut idx: 0u,
|
||||
up: tt_frame_up(option::none)},
|
||||
interpolations: alt interp { /* just a convienience */
|
||||
none { std::map::box_str_hash::<@arb_depth>() }
|
||||
some(x) { x }
|
||||
},
|
||||
/* dummy values, never read: */
|
||||
mut cur_tok: EOF,
|
||||
mut cur_span: ast_util::mk_sp(0u,0u)
|
||||
};
|
||||
tt_next_token(r); /* get cur_tok and cur_span set up */
|
||||
ret r;
|
||||
}
|
||||
|
||||
pure fn dup_tt_frame(&&f: tt_frame) -> tt_frame {
|
||||
@{readme: f.readme, mut idx: f.idx,
|
||||
up: alt f.up {
|
||||
tt_frame_up(some(up_frame)) {
|
||||
tt_frame_up(some(dup_tt_frame(up_frame)))
|
||||
}
|
||||
tt_frame_up(none) { tt_frame_up(none) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader {
|
||||
@{span_diagnostic: r.span_diagnostic, interner: r.interner,
|
||||
mut cur: dup_tt_frame(r.cur),
|
||||
interpolations: r.interpolations,
|
||||
mut cur_tok: r.cur_tok, mut cur_span: r.cur_span}
|
||||
}
|
||||
|
||||
|
||||
fn tt_next_token(&&r: tt_reader) -> {tok: token, sp: span} {
|
||||
let ret_val = { tok: r.cur_tok, sp: r.cur_span };
|
||||
if r.cur.idx >= vec::len(r.cur.readme) {
|
||||
/* done with this set; pop */
|
||||
alt r.cur.up {
|
||||
tt_frame_up(none) {
|
||||
r.cur_tok = EOF;
|
||||
ret ret_val;
|
||||
}
|
||||
tt_frame_up(some(tt_f)) {
|
||||
r.cur = tt_f;
|
||||
/* the above `if` would need to be a `while` if we didn't know
|
||||
that the last thing in a `tt_delim` is always a `tt_flat` */
|
||||
r.cur.idx += 1u;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* if `tt_delim`s could be 0-length, we'd need to be able to switch
|
||||
between popping and pushing until we got to an actual `tt_flat` */
|
||||
loop { /* because it's easiest, this handles `tt_delim` not starting
|
||||
with a `tt_flat`, even though it won't happen */
|
||||
alt copy r.cur.readme[r.cur.idx] {
|
||||
tt_delim(tts) {
|
||||
r.cur = @{readme: tts, mut idx: 0u,
|
||||
up: tt_frame_up(option::some(r.cur)) };
|
||||
}
|
||||
tt_flat(sp, tok) {
|
||||
r.cur_span = sp; r.cur_tok = tok;
|
||||
r.cur.idx += 1u;
|
||||
ret ret_val;
|
||||
}
|
||||
tt_dotdotdot(tts) {
|
||||
fail;
|
||||
}
|
||||
tt_interpolate(ident) {
|
||||
fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -13,8 +13,10 @@ export parse_expr_from_source_str, parse_item_from_source_str;
|
||||
export parse_from_source_str;
|
||||
|
||||
import parser::parser;
|
||||
import attr::parser_attr;
|
||||
import common::parser_common;
|
||||
//import attr::parser_attr;
|
||||
import attr::*; //resolve bug?
|
||||
//import common::parser_common;
|
||||
import common::*; //resolve bug?
|
||||
import ast::node_id;
|
||||
import util::interner;
|
||||
// FIXME (#1935): resolve badness
|
||||
@ -199,6 +201,7 @@ fn new_parser_from_file(sess: parse_sess, cfg: ast::crate_cfg, +path: str,
|
||||
|
||||
fn new_parser_from_tt(sess: parse_sess, cfg: ast::crate_cfg,
|
||||
tt: ~[ast::token_tree]) -> parser {
|
||||
let trdr = lexer::new_tt_reader(sess.span_diagnostic, sess.interner, tt);
|
||||
let trdr = lexer::new_tt_reader(sess.span_diagnostic, sess.interner,
|
||||
none, tt);
|
||||
ret parser(sess, cfg, trdr as reader, parser::SOURCE_FILE)
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
import either::{either, left, right};
|
||||
import ast_util::spanned;
|
||||
import common::{parser_common, seq_sep_trailing_disallowed};
|
||||
import common::*; //resolve bug?
|
||||
//import common::{parser_common, seq_sep_trailing_disallowed};
|
||||
|
||||
export attr_or_ext;
|
||||
export parser_attr;
|
||||
|
@ -1,11 +1,11 @@
|
||||
import util::interner;
|
||||
import util::interner::intern;
|
||||
import diagnostic;
|
||||
import ast::{tt_delim,tt_flat};
|
||||
import util::interner::{interner,intern};
|
||||
import diagnostic::span_handler;
|
||||
import codemap::span;
|
||||
import ext::tt::transcribe::{tt_reader, new_tt_reader, dup_tt_reader,
|
||||
tt_next_token};
|
||||
|
||||
export reader, string_reader, new_string_reader, is_whitespace;
|
||||
export tt_reader, new_tt_reader, dup_tt_reader;
|
||||
export tt_reader, new_tt_reader;
|
||||
export nextch, is_eof, bump, get_str_from, new_low_level_string_reader;
|
||||
export string_reader_as_reader, tt_reader_as_reader;
|
||||
|
||||
@ -13,91 +13,38 @@ iface reader {
|
||||
fn is_eof() -> bool;
|
||||
fn next_token() -> {tok: token::token, sp: span};
|
||||
fn fatal(str) -> !;
|
||||
fn span_diag() -> diagnostic::span_handler;
|
||||
fn interner() -> @interner::interner<@str>;
|
||||
fn span_diag() -> span_handler;
|
||||
fn interner() -> @interner<@str>;
|
||||
fn peek() -> {tok: token::token, sp: span};
|
||||
fn dup() -> reader;
|
||||
}
|
||||
|
||||
enum tt_frame_up { /* to break a circularity */
|
||||
tt_frame_up(option<tt_frame>)
|
||||
}
|
||||
|
||||
/* FIXME (#2811): figure out how to have a uniquely linked stack,
|
||||
and change to `~` */
|
||||
/// an unzipping of `token_tree`s
|
||||
type tt_frame = @{
|
||||
readme: ~[ast::token_tree],
|
||||
mut idx: uint,
|
||||
up: tt_frame_up
|
||||
};
|
||||
|
||||
type tt_reader = @{
|
||||
span_diagnostic: diagnostic::span_handler,
|
||||
interner: @interner::interner<@str>,
|
||||
mut cur: tt_frame,
|
||||
/* cached: */
|
||||
mut cur_tok: token::token,
|
||||
mut cur_span: span
|
||||
};
|
||||
|
||||
fn new_tt_reader(span_diagnostic: diagnostic::span_handler,
|
||||
itr: @interner::interner<@str>, src: ~[ast::token_tree])
|
||||
-> tt_reader {
|
||||
let r = @{span_diagnostic: span_diagnostic, interner: itr,
|
||||
mut cur: @{readme: src, mut idx: 0u,
|
||||
up: tt_frame_up(option::none)},
|
||||
/* dummy values, never read: */
|
||||
mut cur_tok: token::EOF,
|
||||
mut cur_span: ast_util::mk_sp(0u,0u)
|
||||
};
|
||||
tt_next_token(r); /* get cur_tok and cur_span set up */
|
||||
ret r;
|
||||
}
|
||||
|
||||
pure fn dup_tt_frame(&&f: tt_frame) -> tt_frame {
|
||||
@{readme: f.readme, mut idx: f.idx,
|
||||
up: alt f.up {
|
||||
tt_frame_up(some(up_frame)) {
|
||||
tt_frame_up(some(dup_tt_frame(up_frame)))
|
||||
}
|
||||
tt_frame_up(none) { tt_frame_up(none) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader {
|
||||
@{span_diagnostic: r.span_diagnostic, interner: r.interner,
|
||||
mut cur: dup_tt_frame(r.cur),
|
||||
mut cur_tok: r.cur_tok, mut cur_span: r.cur_span}
|
||||
}
|
||||
|
||||
type string_reader = @{
|
||||
span_diagnostic: diagnostic::span_handler,
|
||||
span_diagnostic: span_handler,
|
||||
src: @str,
|
||||
mut col: uint,
|
||||
mut pos: uint,
|
||||
mut curr: char,
|
||||
mut chpos: uint,
|
||||
filemap: codemap::filemap,
|
||||
interner: @interner::interner<@str>,
|
||||
interner: @interner<@str>,
|
||||
/* cached: */
|
||||
mut peek_tok: token::token,
|
||||
mut peek_span: span
|
||||
};
|
||||
|
||||
fn new_string_reader(span_diagnostic: diagnostic::span_handler,
|
||||
fn new_string_reader(span_diagnostic: span_handler,
|
||||
filemap: codemap::filemap,
|
||||
itr: @interner::interner<@str>) -> string_reader {
|
||||
itr: @interner<@str>) -> string_reader {
|
||||
let r = new_low_level_string_reader(span_diagnostic, filemap, itr);
|
||||
string_advance_token(r); /* fill in peek_* */
|
||||
ret r;
|
||||
}
|
||||
|
||||
/* For comments.rs, which hackily pokes into 'pos' and 'curr' */
|
||||
fn new_low_level_string_reader(span_diagnostic: diagnostic::span_handler,
|
||||
fn new_low_level_string_reader(span_diagnostic: span_handler,
|
||||
filemap: codemap::filemap,
|
||||
itr: @interner::interner<@str>)
|
||||
itr: @interner<@str>)
|
||||
-> string_reader {
|
||||
let r = @{span_diagnostic: span_diagnostic, src: filemap.src,
|
||||
mut col: 0u, mut pos: 0u, mut curr: -1 as char,
|
||||
@ -131,8 +78,8 @@ impl string_reader_as_reader of reader for string_reader {
|
||||
fn fatal(m: str) -> ! {
|
||||
self.span_diagnostic.span_fatal(copy self.peek_span, m)
|
||||
}
|
||||
fn span_diag() -> diagnostic::span_handler { self.span_diagnostic }
|
||||
fn interner() -> @interner::interner<@str> { self.interner }
|
||||
fn span_diag() -> span_handler { self.span_diagnostic }
|
||||
fn interner() -> @interner<@str> { self.interner }
|
||||
fn peek() -> {tok: token::token, sp: span} {
|
||||
{tok: self.peek_tok, sp: self.peek_span}
|
||||
}
|
||||
@ -153,8 +100,8 @@ impl tt_reader_as_reader of reader for tt_reader {
|
||||
fn fatal(m: str) -> ! {
|
||||
self.span_diagnostic.span_fatal(copy self.cur_span, m);
|
||||
}
|
||||
fn span_diag() -> diagnostic::span_handler { self.span_diagnostic }
|
||||
fn interner() -> @interner::interner<@str> { self.interner }
|
||||
fn span_diag() -> span_handler { self.span_diagnostic }
|
||||
fn interner() -> @interner<@str> { self.interner }
|
||||
fn peek() -> {tok: token::token, sp: span} {
|
||||
{ tok: self.cur_tok, sp: self.cur_span }
|
||||
}
|
||||
@ -178,42 +125,6 @@ fn string_advance_token(&&r: string_reader) {
|
||||
|
||||
}
|
||||
|
||||
fn tt_next_token(&&r: tt_reader) -> {tok: token::token, sp: span} {
|
||||
let ret_val = { tok: r.cur_tok, sp: r.cur_span };
|
||||
if r.cur.idx >= vec::len(r.cur.readme) {
|
||||
/* done with this set; pop */
|
||||
alt r.cur.up {
|
||||
tt_frame_up(none) {
|
||||
r.cur_tok = token::EOF;
|
||||
ret ret_val;
|
||||
}
|
||||
tt_frame_up(some(tt_f)) {
|
||||
r.cur = tt_f;
|
||||
/* the above `if` would need to be a `while` if we didn't know
|
||||
that the last thing in a `tt_delim` is always a `tt_flat` */
|
||||
r.cur.idx += 1u;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* if `tt_delim`s could be 0-length, we'd need to be able to switch
|
||||
between popping and pushing until we got to an actual `tt_flat` */
|
||||
loop { /* because it's easiest, this handles `tt_delim` not starting
|
||||
with a `tt_flat`, even though it won't happen */
|
||||
alt copy r.cur.readme[r.cur.idx] {
|
||||
tt_delim(tts) {
|
||||
r.cur = @{readme: tts, mut idx: 0u,
|
||||
up: tt_frame_up(option::some(r.cur)) };
|
||||
}
|
||||
tt_flat(sp, tok) {
|
||||
r.cur_span = sp; r.cur_tok = tok;
|
||||
r.cur.idx += 1u;
|
||||
ret ret_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn get_str_from(rdr: string_reader, start: uint) -> str unsafe {
|
||||
// I'm pretty skeptical about this subtraction. What if there's a
|
||||
// multi-byte character before the mark?
|
||||
@ -548,7 +459,7 @@ fn next_token_inner(rdr: string_reader) -> token::token {
|
||||
let is_mod_name = c == ':' && nextch(rdr) == ':';
|
||||
|
||||
// FIXME: perform NFKC normalization here. (Issue #2253)
|
||||
ret token::IDENT(interner::intern(*rdr.interner,
|
||||
ret token::IDENT(intern(*rdr.interner,
|
||||
@accum_str), is_mod_name);
|
||||
}
|
||||
if is_dec_digit(c) {
|
||||
@ -713,8 +624,7 @@ fn next_token_inner(rdr: string_reader) -> token::token {
|
||||
}
|
||||
}
|
||||
bump(rdr);
|
||||
ret token::LIT_STR(interner::intern(*rdr.interner,
|
||||
@accum_str));
|
||||
ret token::LIT_STR(intern(*rdr.interner, @accum_str));
|
||||
}
|
||||
'-' {
|
||||
if nextch(rdr) == '>' {
|
||||
|
@ -70,6 +70,7 @@ class parser {
|
||||
let mut buffer_start: int;
|
||||
let mut buffer_end: int;
|
||||
let mut restriction: restriction;
|
||||
let mut quote_depth: uint; // not (yet) related to the quasiquoter
|
||||
let reader: reader;
|
||||
let keywords: hashmap<str, ()>;
|
||||
let restricted_keywords: hashmap<str, ()>;
|
||||
@ -94,6 +95,7 @@ class parser {
|
||||
self.buffer_start = 0;
|
||||
self.buffer_end = 0;
|
||||
self.restriction = UNRESTRICTED;
|
||||
self.quote_depth = 0u;
|
||||
self.keywords = token::keyword_table();
|
||||
self.restricted_keywords = token::restricted_keyword_table();
|
||||
}
|
||||
@ -1067,6 +1069,11 @@ class parser {
|
||||
}
|
||||
|
||||
fn parse_tt_flat(p: parser, delim_ok: bool) -> token_tree {
|
||||
if p.eat_keyword("many") && p.quote_depth > 0u {
|
||||
ret tt_dotdotdot(
|
||||
p.parse_seq(token::LPAREN, token::RPAREN, seq_sep_none(),
|
||||
|p| p.parse_token_tree()).node);
|
||||
}
|
||||
alt p.token {
|
||||
token::RPAREN | token::RBRACE | token::RBRACKET
|
||||
if !delim_ok {
|
||||
@ -1076,6 +1083,11 @@ class parser {
|
||||
token::EOF {
|
||||
p.fatal("file ended in the middle of a macro invocation");
|
||||
}
|
||||
/* we ought to allow different depths of unquotation */
|
||||
token::DOLLAR if p.quote_depth > 0u {
|
||||
p.bump();
|
||||
ret tt_interpolate(p.parse_ident());
|
||||
}
|
||||
_ { /* ok */ }
|
||||
}
|
||||
let res = tt_flat(p.span, p.token);
|
||||
@ -1104,10 +1116,11 @@ class parser {
|
||||
common::seq_sep_none(),
|
||||
|p| p.parse_matcher(@mut 0u)).node;
|
||||
let tt = self.parse_token_tree();
|
||||
//let tt_rhs = self.parse_token_tree();
|
||||
alt tt {
|
||||
tt_delim(tts) {
|
||||
let rdr = lexer::new_tt_reader(self.reader.span_diag(),
|
||||
self.reader.interner(), tts)
|
||||
self.reader.interner(), none, tts)
|
||||
as reader;
|
||||
ext::earley_parser::parse(self.sess, self.cfg, rdr, ms);
|
||||
}
|
||||
|
@ -66,6 +66,9 @@ mod ext {
|
||||
mod build;
|
||||
|
||||
mod earley_parser;
|
||||
mod tt {
|
||||
mod transcribe;
|
||||
}
|
||||
|
||||
mod fmt;
|
||||
mod env;
|
||||
|
Loading…
Reference in New Issue
Block a user