Auto merge of #95159 - nnethercote:TtParser, r=petrochenkov

Introduce `TtParser`

These commits make a number of changes to declarative macro expansion, resulting in code that is shorter, simpler, and faster.

Best reviewed one commit at a time.

r? `@petrochenkov`
This commit is contained in:
bors 2022-03-22 21:46:57 +00:00
commit a4a5e79814
7 changed files with 424 additions and 514 deletions

View File

@ -1,5 +1,6 @@
#![feature(associated_type_bounds)]
#![feature(associated_type_defaults)]
#![feature(box_syntax)]
#![feature(crate_visibility_modifier)]
#![feature(decl_macro)]
#![feature(if_let_guard)]

View File

@ -17,23 +17,48 @@ use rustc_data_structures::sync::Lrc;
use rustc_span::symbol::Ident;
use rustc_span::Span;
/// Contains the sub-token-trees of a "delimited" token tree, such as the contents of `(`. Note
/// that the delimiter itself might be `NoDelim`.
/// Contains the sub-token-trees of a "delimited" token tree such as `(a b c)`. The delimiter itself
/// might be `NoDelim`.
#[derive(Clone, PartialEq, Encodable, Decodable, Debug)]
struct Delimited {
delim: token::DelimToken,
tts: Vec<TokenTree>,
/// Note: This contains the opening and closing delimiters tokens (e.g. `(` and `)`). Note that
/// these could be `NoDelim`. These token kinds must match `delim`, and the methods below
/// debug_assert this.
all_tts: Vec<TokenTree>,
}
impl Delimited {
/// Returns a `self::TokenTree` with a `Span` corresponding to the opening delimiter.
fn open_tt(&self, span: DelimSpan) -> TokenTree {
TokenTree::token(token::OpenDelim(self.delim), span.open)
/// Returns a `self::TokenTree` with a `Span` corresponding to the opening delimiter. Panics if
/// the delimiter is `NoDelim`.
fn open_tt(&self) -> &TokenTree {
let tt = self.all_tts.first().unwrap();
debug_assert!(matches!(
tt,
&TokenTree::Token(token::Token { kind: token::OpenDelim(d), .. }) if d == self.delim
));
tt
}
/// Returns a `self::TokenTree` with a `Span` corresponding to the closing delimiter.
fn close_tt(&self, span: DelimSpan) -> TokenTree {
TokenTree::token(token::CloseDelim(self.delim), span.close)
/// Returns a `self::TokenTree` with a `Span` corresponding to the closing delimiter. Panics if
/// the delimeter is `NoDelim`.
fn close_tt(&self) -> &TokenTree {
let tt = self.all_tts.last().unwrap();
debug_assert!(matches!(
tt,
&TokenTree::Token(token::Token { kind: token::CloseDelim(d), .. }) if d == self.delim
));
tt
}
/// Returns the tts excluding the outer delimiters.
///
/// FIXME: #67062 has details about why this is sub-optimal.
fn inner_tts(&self) -> &[TokenTree] {
// These functions are called for the assertions within them.
let _open_tt = self.open_tt();
let _close_tt = self.close_tt();
&self.all_tts[1..self.all_tts.len() - 1]
}
}
@ -73,35 +98,24 @@ enum KleeneOp {
ZeroOrOne,
}
/// Similar to `tokenstream::TokenTree`, except that `$i`, `$i:ident`, `$(...)`,
/// and `${...}` are "first-class" token trees. Useful for parsing macros.
/// Similar to `tokenstream::TokenTree`, except that `Sequence`, `MetaVar`, `MetaVarDecl`, and
/// `MetaVarExpr` are "first-class" token trees. Useful for parsing macros.
#[derive(Debug, Clone, PartialEq, Encodable, Decodable)]
enum TokenTree {
Token(Token),
/// A delimited sequence, e.g. `($e:expr)` (RHS) or `{ $e }` (LHS).
Delimited(DelimSpan, Lrc<Delimited>),
/// A kleene-style repetition sequence
/// A kleene-style repetition sequence, e.g. `$($e:expr)*` (RHS) or `$($e),*` (LHS).
Sequence(DelimSpan, Lrc<SequenceRepetition>),
/// e.g., `$var`
/// e.g., `$var`.
MetaVar(Span, Ident),
/// e.g., `$var:expr`. This is only used in the left hand side of MBE macros.
/// e.g., `$var:expr`. Only appears on the LHS.
MetaVarDecl(Span, Ident /* name to bind */, Option<NonterminalKind>),
/// A meta-variable expression inside `${...}`
/// A meta-variable expression inside `${...}`.
MetaVarExpr(DelimSpan, MetaVarExpr),
}
impl TokenTree {
/// Return the number of tokens in the tree.
fn len(&self) -> usize {
match *self {
TokenTree::Delimited(_, ref delimed) => match delimed.delim {
token::NoDelim => delimed.tts.len(),
_ => delimed.tts.len() + 2,
},
TokenTree::Sequence(_, ref seq) => seq.tts.len(),
_ => 0,
}
}
/// Returns `true` if the given token tree is delimited.
fn is_delimited(&self) -> bool {
matches!(*self, TokenTree::Delimited(..))
@ -115,26 +129,6 @@ impl TokenTree {
}
}
/// Gets the `index`-th sub-token-tree. This only makes sense for delimited trees and sequences.
fn get_tt(&self, index: usize) -> TokenTree {
match (self, index) {
(&TokenTree::Delimited(_, ref delimed), _) if delimed.delim == token::NoDelim => {
delimed.tts[index].clone()
}
(&TokenTree::Delimited(span, ref delimed), _) => {
if index == 0 {
return delimed.open_tt(span);
}
if index == delimed.tts.len() + 1 {
return delimed.close_tt(span);
}
delimed.tts[index - 1].clone()
}
(&TokenTree::Sequence(_, ref seq), _) => seq.tts[index].clone(),
_ => panic!("Cannot expand a token tree"),
}
}
/// Retrieves the `TokenTree`'s span.
fn span(&self) -> Span {
match *self {

View File

@ -281,7 +281,7 @@ fn check_binders(
// `MetaVarExpr` can not appear in the LHS of a macro arm
TokenTree::MetaVarExpr(..) => {}
TokenTree::Delimited(_, ref del) => {
for tt in &del.tts {
for tt in del.inner_tts() {
check_binders(sess, node_id, tt, macros, binders, ops, valid);
}
}
@ -344,7 +344,7 @@ fn check_occurrences(
check_ops_is_prefix(sess, node_id, macros, binders, ops, dl.entire(), name);
}
TokenTree::Delimited(_, ref del) => {
check_nested_occurrences(sess, node_id, &del.tts, macros, binders, ops, valid);
check_nested_occurrences(sess, node_id, del.inner_tts(), macros, binders, ops, valid);
}
TokenTree::Sequence(_, ref seq) => {
let ops = ops.push(seq.kleene);
@ -431,14 +431,20 @@ fn check_nested_occurrences(
{
let macro_rules = state == NestedMacroState::MacroRulesNotName;
state = NestedMacroState::Empty;
let rest =
check_nested_macro(sess, node_id, macro_rules, &del.tts, &nested_macros, valid);
let rest = check_nested_macro(
sess,
node_id,
macro_rules,
del.inner_tts(),
&nested_macros,
valid,
);
// If we did not check the whole macro definition, then check the rest as if outside
// the macro definition.
check_nested_occurrences(
sess,
node_id,
&del.tts[rest..],
&del.inner_tts()[rest..],
macros,
binders,
ops,

View File

@ -72,9 +72,8 @@
crate use NamedMatch::*;
crate use ParseResult::*;
use TokenTreeOrTokenTreeSlice::*;
use crate::mbe::{self, DelimSpan, SequenceRepetition, TokenTree};
use crate::mbe::{self, SequenceRepetition, TokenTree};
use rustc_ast::token::{self, DocComment, Nonterminal, Token};
use rustc_parse::parser::Parser;
@ -89,36 +88,6 @@ use rustc_span::symbol::Ident;
use std::borrow::Cow;
use std::collections::hash_map::Entry::{Occupied, Vacant};
use std::mem;
use std::ops::{Deref, DerefMut};
// To avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body.
/// Either a slice of token trees or a single one. This is used as the representation of the
/// token trees that make up a matcher.
#[derive(Clone)]
enum TokenTreeOrTokenTreeSlice<'tt> {
Tt(TokenTree),
TtSlice(&'tt [TokenTree]),
}
impl<'tt> TokenTreeOrTokenTreeSlice<'tt> {
/// Returns the number of constituent top-level token trees of `self` (top-level in that it
/// will not recursively descend into subtrees).
fn len(&self) -> usize {
match *self {
TtSlice(ref v) => v.len(),
Tt(ref tt) => tt.len(),
}
}
/// The `index`-th token tree of `self`.
fn get_tt(&self, index: usize) -> TokenTree {
match *self {
TtSlice(ref v) => v[index].clone(),
Tt(ref tt) => tt.get_tt(index),
}
}
}
/// An unzipping of `TokenTree`s... see the `stack` field of `MatcherPos`.
///
@ -127,7 +96,7 @@ impl<'tt> TokenTreeOrTokenTreeSlice<'tt> {
#[derive(Clone)]
struct MatcherTtFrame<'tt> {
/// The "parent" matcher that we are descending into.
elts: TokenTreeOrTokenTreeSlice<'tt>,
elts: &'tt [TokenTree],
/// The position of the "dot" in `elts` at the time we descended.
idx: usize,
}
@ -136,26 +105,10 @@ type NamedMatchVec = SmallVec<[NamedMatch; 4]>;
/// Represents a single "position" (aka "matcher position", aka "item"), as
/// described in the module documentation.
///
/// Here:
///
/// - `'root` represents the lifetime of the stack slot that holds the root
/// `MatcherPos`. As described in `MatcherPosHandle`, the root `MatcherPos`
/// structure is stored on the stack, but subsequent instances are put into
/// the heap.
/// - `'tt` represents the lifetime of the token trees that this matcher
/// position refers to.
///
/// It is important to distinguish these two lifetimes because we have a
/// `SmallVec<TokenTreeOrTokenTreeSlice<'tt>>` below, and the destructor of
/// that is considered to possibly access the data from its elements (it lacks
/// a `#[may_dangle]` attribute). As a result, the compiler needs to know that
/// all the elements in that `SmallVec` strictly outlive the root stack slot
/// lifetime. By separating `'tt` from `'root`, we can show that.
#[derive(Clone)]
struct MatcherPos<'root, 'tt> {
struct MatcherPos<'tt> {
/// The token or slice of tokens that make up the matcher. `elts` is short for "elements".
top_elts: TokenTreeOrTokenTreeSlice<'tt>,
top_elts: &'tt [TokenTree],
/// The position of the "dot" in this matcher
idx: usize,
@ -185,7 +138,7 @@ struct MatcherPos<'root, 'tt> {
match_hi: usize,
/// This field is only used if we are matching a repetition.
repetition: Option<MatcherPosRepetition<'root, 'tt>>,
repetition: Option<MatcherPosRepetition<'tt>>,
/// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from
/// a delimited token tree (e.g., something wrapped in `(` `)`) or to get the contents of a doc
@ -200,9 +153,9 @@ struct MatcherPos<'root, 'tt> {
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
rustc_data_structures::static_assert_size!(MatcherPos<'_, '_>, 240);
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 136);
impl<'root, 'tt> MatcherPos<'root, 'tt> {
impl<'tt> MatcherPos<'tt> {
/// `len` `Vec`s (initially shared and empty) that will store matches of metavars.
fn create_matches(len: usize) -> Box<[Lrc<NamedMatchVec>]> {
if len == 0 {
@ -220,7 +173,7 @@ impl<'root, 'tt> MatcherPos<'root, 'tt> {
let match_idx_hi = count_names(ms);
MatcherPos {
// Start with the top level matcher given to us.
top_elts: TtSlice(ms),
top_elts: ms,
// The "dot" is before the first token of the matcher.
idx: 0,
@ -241,13 +194,9 @@ impl<'root, 'tt> MatcherPos<'root, 'tt> {
}
}
fn repetition(
up: MatcherPosHandle<'root, 'tt>,
sp: DelimSpan,
seq: Lrc<SequenceRepetition>,
) -> Self {
fn repetition(up: Box<MatcherPos<'tt>>, seq: &'tt SequenceRepetition) -> Self {
MatcherPos {
stack: smallvec![],
top_elts: &seq.tts,
idx: 0,
matches: Self::create_matches(up.matches.len()),
match_lo: up.match_cur,
@ -258,7 +207,7 @@ impl<'root, 'tt> MatcherPos<'root, 'tt> {
sep: seq.separator.clone(),
seq_op: seq.kleene.op,
}),
top_elts: Tt(TokenTree::Sequence(sp, seq)),
stack: smallvec![],
}
}
@ -270,7 +219,7 @@ impl<'root, 'tt> MatcherPos<'root, 'tt> {
}
#[derive(Clone)]
struct MatcherPosRepetition<'root, 'tt> {
struct MatcherPosRepetition<'tt> {
/// The KleeneOp of this sequence.
seq_op: mbe::KleeneOp,
@ -279,55 +228,12 @@ struct MatcherPosRepetition<'root, 'tt> {
/// The "parent" matcher position. That is, the matcher position just before we enter the
/// sequence.
up: MatcherPosHandle<'root, 'tt>,
up: Box<MatcherPos<'tt>>,
}
// Lots of MatcherPos instances are created at runtime. Allocating them on the
// heap is slow. Furthermore, using SmallVec<MatcherPos> to allocate them all
// on the stack is also slow, because MatcherPos is quite a large type and
// instances get moved around a lot between vectors, which requires lots of
// slow memcpy calls.
//
// Therefore, the initial MatcherPos is always allocated on the stack,
// subsequent ones (of which there aren't that many) are allocated on the heap,
// and this type is used to encapsulate both cases.
enum MatcherPosHandle<'root, 'tt> {
Ref(&'root mut MatcherPos<'root, 'tt>),
Box(Box<MatcherPos<'root, 'tt>>),
}
impl<'root, 'tt> Clone for MatcherPosHandle<'root, 'tt> {
// This always produces a new Box.
fn clone(&self) -> Self {
MatcherPosHandle::Box(match *self {
MatcherPosHandle::Ref(ref r) => Box::new((**r).clone()),
MatcherPosHandle::Box(ref b) => b.clone(),
})
}
}
impl<'root, 'tt> Deref for MatcherPosHandle<'root, 'tt> {
type Target = MatcherPos<'root, 'tt>;
fn deref(&self) -> &Self::Target {
match *self {
MatcherPosHandle::Ref(ref r) => r,
MatcherPosHandle::Box(ref b) => b,
}
}
}
impl<'root, 'tt> DerefMut for MatcherPosHandle<'root, 'tt> {
fn deref_mut(&mut self) -> &mut MatcherPos<'root, 'tt> {
match *self {
MatcherPosHandle::Ref(ref mut r) => r,
MatcherPosHandle::Box(ref mut b) => b,
}
}
}
enum EofItems<'root, 'tt> {
enum EofItems<'tt> {
None,
One(MatcherPosHandle<'root, 'tt>),
One(Box<MatcherPos<'tt>>),
Multiple,
}
@ -352,8 +258,8 @@ crate type NamedParseResult = ParseResult<FxHashMap<MacroRulesNormalizedIdent, N
pub(super) fn count_names(ms: &[TokenTree]) -> usize {
ms.iter().fold(0, |count, elt| {
count
+ match *elt {
TokenTree::Delimited(_, ref delim) => count_names(&delim.tts),
+ match elt {
TokenTree::Delimited(_, delim) => count_names(delim.inner_tts()),
TokenTree::MetaVar(..) => 0,
TokenTree::MetaVarDecl(..) => 1,
// Panicking here would abort execution because `parse_tree` makes use of this
@ -362,7 +268,7 @@ pub(super) fn count_names(ms: &[TokenTree]) -> usize {
// `0` is still returned to inform that no meta-variable was found. `Meta-variables
// != Meta-variable expressions`
TokenTree::MetaVarExpr(..) => 0,
TokenTree::Sequence(_, ref seq) => seq.num_captures,
TokenTree::Sequence(_, seq) => seq.num_captures,
TokenTree::Token(..) => 0,
}
})
@ -446,7 +352,7 @@ fn nameize<I: Iterator<Item = NamedMatch>>(
}
}
TokenTree::Delimited(_, ref delim) => {
for next_m in &delim.tts {
for next_m in delim.inner_tts() {
n_rec(sess, next_m, res.by_ref(), ret_val)?;
}
}
@ -492,38 +398,49 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {
}
}
/// Process the matcher positions of `cur_items` until it is empty. In the process, this will
/// produce more items in `next_items` and `bb_items`.
///
/// For more info about the how this happens, see the module-level doc comments and the inline
/// comments of this function.
///
/// # Parameters
///
/// - `cur_items`: the set of current items to be processed. This should be empty by the end of a
/// successful execution of this function.
/// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in
/// the function `parse`.
/// - `bb_items`: the set of items that are waiting for the black-box parser.
/// - `token`: the current token of the parser.
///
/// # Returns
///
/// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept track of
/// through the items generated.
fn parse_tt_inner<'root, 'tt>(
// Note: the item vectors could be created and dropped within `parse_tt`, but to avoid excess
// allocations we have a single vector fo each kind that is cleared and reused repeatedly.
pub struct TtParser<'tt> {
macro_name: Ident,
/// The set of current items to be processed. This should be empty by the end of a successful
/// execution of `parse_tt_inner`.
cur_items: Vec<Box<MatcherPos<'tt>>>,
/// The set of newly generated items. These are used to replenish `cur_items` in the function
/// `parse_tt`.
next_items: Vec<Box<MatcherPos<'tt>>>,
/// The set of items that are waiting for the black-box parser.
bb_items: Vec<Box<MatcherPos<'tt>>>,
}
impl<'tt> TtParser<'tt> {
pub(super) fn new(macro_name: Ident) -> TtParser<'tt> {
TtParser { macro_name, cur_items: vec![], next_items: vec![], bb_items: vec![] }
}
/// Process the matcher positions of `cur_items` until it is empty. In the process, this will
/// produce more items in `next_items` and `bb_items`.
///
/// For more info about the how this happens, see the module-level doc comments and the inline
/// comments of this function.
///
/// # Returns
///
/// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept
/// track of through the items generated.
fn parse_tt_inner(
&mut self,
sess: &ParseSess,
ms: &[TokenTree],
cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>,
next_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>,
bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>,
token: &Token,
) -> Option<NamedParseResult> {
// Matcher positions that would be valid if the macro invocation was over now. Only modified if
// `token == Eof`.
) -> Option<NamedParseResult> {
// Matcher positions that would be valid if the macro invocation was over now. Only
// modified if `token == Eof`.
let mut eof_items = EofItems::None;
while let Some(mut item) = cur_items.pop() {
while let Some(mut item) = self.cur_items.pop() {
// When unzipped trees end, remove them. This corresponds to backtracking out of a
// delimited submatcher into which we already descended. When backtracking out again, we
// need to advance the "dot" past the delimiters in the outer matcher.
@ -537,15 +454,16 @@ fn parse_tt_inner<'root, 'tt>(
}
}
// Get the current position of the "dot" (`idx`) in `item` and the number of token trees in
// the matcher (`len`).
// Get the current position of the "dot" (`idx`) in `item` and the number of token
// trees in the matcher (`len`).
let idx = item.idx;
let len = item.top_elts.len();
if idx < len {
// We are in the middle of a matcher. Compare the matcher's current tt against `token`.
match item.top_elts.get_tt(idx) {
TokenTree::Sequence(sp, seq) => {
// We are in the middle of a matcher. Compare the matcher's current tt against
// `token`.
match &item.top_elts[idx] {
TokenTree::Sequence(_sp, seq) => {
let op = seq.kleene.op;
if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne {
// Allow for the possibility of zero matches of this sequence.
@ -555,55 +473,55 @@ fn parse_tt_inner<'root, 'tt>(
for idx in item.match_cur..item.match_cur + seq.num_captures {
new_item.push_match(idx, MatchedSeq(Lrc::new(smallvec![])));
}
cur_items.push(new_item);
self.cur_items.push(new_item);
}
// Allow for the possibility of one or more matches of this sequence.
cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos::repetition(
item, sp, seq,
))));
self.cur_items.push(box MatcherPos::repetition(item, &seq));
}
TokenTree::MetaVarDecl(span, _, None) => {
&TokenTree::MetaVarDecl(span, _, None) => {
// E.g. `$e` instead of `$e:expr`.
if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
return Some(Error(span, "missing fragment specifier".to_string()));
}
}
TokenTree::MetaVarDecl(_, _, Some(kind)) => {
&TokenTree::MetaVarDecl(_, _, Some(kind)) => {
// Built-in nonterminals never start with these tokens, so we can eliminate
// them from consideration.
//
// We use the span of the metavariable declaration to determine any
// edition-specific matching behavior for non-terminals.
if Parser::nonterminal_may_begin_with(kind, token) {
bb_items.push(item);
self.bb_items.push(item);
}
}
seq @ (TokenTree::Delimited(..)
| TokenTree::Token(Token { kind: DocComment(..), .. })) => {
// To descend into a delimited submatcher or a doc comment, we push the current
// matcher onto a stack and push a new item containing the submatcher onto
// `cur_items`.
TokenTree::Delimited(_, delimited) => {
// To descend into a delimited submatcher, we push the current matcher onto
// a stack and push a new item containing the submatcher onto `cur_items`.
//
// At the beginning of the loop, if we reach the end of the delimited
// submatcher, we pop the stack to backtrack out of the descent.
let lower_elts = mem::replace(&mut item.top_elts, Tt(seq));
// submatcher, we pop the stack to backtrack out of the descent. Note that
// we use `all_tts` to include the open and close delimiter tokens.
let lower_elts = mem::replace(&mut item.top_elts, &delimited.all_tts);
let idx = item.idx;
item.stack.push(MatcherTtFrame { elts: lower_elts, idx });
item.idx = 0;
cur_items.push(item);
self.cur_items.push(item);
}
TokenTree::Token(t) => {
// If the token matches, we can just advance the parser. Otherwise, this match
// hash failed, there is nothing to do, and hopefully another item in
// Doc comments cannot appear in a matcher.
debug_assert!(!matches!(t, Token { kind: DocComment(..), .. }));
// If the token matches, we can just advance the parser. Otherwise, this
// match hash failed, there is nothing to do, and hopefully another item in
// `cur_items` will match.
if token_name_eq(&t, token) {
item.idx += 1;
next_items.push(item);
self.next_items.push(item);
}
}
@ -613,7 +531,6 @@ fn parse_tt_inner<'root, 'tt>(
} else if let Some(repetition) = &item.repetition {
// We are past the end of a repetition.
debug_assert!(idx <= len + 1);
debug_assert!(matches!(item.top_elts, Tt(TokenTree::Sequence(..))));
if idx == len {
// Add all matches from the sequence to `up`, and move the "dot" past the
@ -626,7 +543,7 @@ fn parse_tt_inner<'root, 'tt>(
}
new_pos.match_cur = item.match_hi;
new_pos.idx += 1;
cur_items.push(new_pos);
self.cur_items.push(new_pos);
}
if idx == len && repetition.sep.is_some() {
@ -634,7 +551,7 @@ fn parse_tt_inner<'root, 'tt>(
// The matcher has a separator, and it matches the current token. We can
// advance past the separator token.
item.idx += 1;
next_items.push(item);
self.next_items.push(item);
}
} else if repetition.seq_op != mbe::KleeneOp::ZeroOrOne {
// We don't need a separator. Move the "dot" back to the beginning of the
@ -642,10 +559,11 @@ fn parse_tt_inner<'root, 'tt>(
// repetition.
item.match_cur = item.match_lo;
item.idx = 0;
cur_items.push(item);
self.cur_items.push(item);
}
} else {
// We are past the end of the matcher, and not in a repetition. Look for end of input.
// We are past the end of the matcher, and not in a repetition. Look for end of
// input.
debug_assert_eq!(idx, len);
if *token == token::Eof {
eof_items = match eof_items {
@ -656,8 +574,8 @@ fn parse_tt_inner<'root, 'tt>(
}
}
// If we reached the end of input, check that there is EXACTLY ONE possible matcher. Otherwise,
// either the parse is ambiguous (which is an error) or there is a syntax error.
// If we reached the end of input, check that there is EXACTLY ONE possible matcher.
// Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error.
if *token == token::Eof {
Some(match eof_items {
EofItems::One(mut eof_item) => {
@ -679,68 +597,58 @@ fn parse_tt_inner<'root, 'tt>(
} else {
None
}
}
}
/// Use the given slice of token trees (`ms`) as a matcher. Match the token stream from the given
/// `parser` against it and return the match.
pub(super) fn parse_tt(
/// Use the given slice of token trees (`ms`) as a matcher. Match the token stream from the
/// given `parser` against it and return the match.
pub(super) fn parse_tt(
&mut self,
parser: &mut Cow<'_, Parser<'_>>,
ms: &[TokenTree],
macro_name: Ident,
) -> NamedParseResult {
// A queue of possible matcher positions. We initialize it with the matcher position in which
// the "dot" is before the first token of the first token tree in `ms`. `parse_tt_inner` then
// processes all of these possible matcher positions and produces possible next positions into
// `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items`
// and we start over again.
//
// This MatcherPos instance is allocated on the stack. All others -- and there are frequently
// *no* others! -- are allocated on the heap.
let mut initial = MatcherPos::new(ms);
let mut cur_items = smallvec![MatcherPosHandle::Ref(&mut initial)];
ms: &'tt [TokenTree],
) -> NamedParseResult {
// A queue of possible matcher positions. We initialize it with the matcher position in
// which the "dot" is before the first token of the first token tree in `ms`.
// `parse_tt_inner` then processes all of these possible matcher positions and produces
// possible next positions into `next_items`. After some post-processing, the contents of
// `next_items` replenish `cur_items` and we start over again.
self.cur_items.clear();
self.cur_items.push(box MatcherPos::new(ms));
loop {
let mut next_items = SmallVec::new();
// Matcher positions black-box parsed by `Parser`.
let mut bb_items = SmallVec::new();
self.next_items.clear();
self.bb_items.clear();
// Process `cur_items` until either we have finished the input or we need to get some
// parsing from the black-box parser done.
if let Some(result) = parse_tt_inner(
parser.sess,
ms,
&mut cur_items,
&mut next_items,
&mut bb_items,
&parser.token,
) {
if let Some(result) = self.parse_tt_inner(parser.sess, ms, &parser.token) {
return result;
}
// `parse_tt_inner` handled all cur_items, so it's empty.
assert!(cur_items.is_empty());
assert!(self.cur_items.is_empty());
// Error messages here could be improved with links to original rules.
match (next_items.len(), bb_items.len()) {
match (self.next_items.len(), self.bb_items.len()) {
(0, 0) => {
// There are no possible next positions AND we aren't waiting for the black-box
// parser: syntax error.
return Failure(parser.token.clone(), "no rules expected this token in macro call");
return Failure(
parser.token.clone(),
"no rules expected this token in macro call",
);
}
(_, 0) => {
// Dump all possible `next_items` into `cur_items` for the next iteration. Then
// process the next token.
cur_items.extend(next_items.drain(..));
self.cur_items.extend(self.next_items.drain(..));
parser.to_mut().bump();
}
(0, 1) => {
// We need to call the black-box parser to get some nonterminal.
let mut item = bb_items.pop().unwrap();
if let TokenTree::MetaVarDecl(span, _, Some(kind)) = item.top_elts.get_tt(item.idx)
{
let mut item = self.bb_items.pop().unwrap();
if let TokenTree::MetaVarDecl(span, _, Some(kind)) = item.top_elts[item.idx] {
let match_cur = item.match_cur;
// We use the span of the metavariable declaration to determine any
// edition-specific matching behavior for non-terminals.
@ -748,7 +656,9 @@ pub(super) fn parse_tt(
Err(mut err) => {
err.span_label(
span,
format!("while parsing argument for this `{kind}` macro fragment"),
format!(
"while parsing argument for this `{kind}` macro fragment"
),
)
.emit();
return ErrorReported;
@ -761,33 +671,24 @@ pub(super) fn parse_tt(
} else {
unreachable!()
}
cur_items.push(item);
self.cur_items.push(item);
}
(_, _) => {
// Too many possibilities!
return bb_items_ambiguity_error(
macro_name,
next_items,
bb_items,
parser.token.span,
);
return self.ambiguity_error(parser.token.span);
}
}
assert!(!cur_items.is_empty());
assert!(!self.cur_items.is_empty());
}
}
}
fn bb_items_ambiguity_error<'root, 'tt>(
macro_name: Ident,
next_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>,
bb_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>,
token_span: rustc_span::Span,
) -> NamedParseResult {
let nts = bb_items
fn ambiguity_error(&self, token_span: rustc_span::Span) -> NamedParseResult {
let nts = self
.bb_items
.iter()
.map(|item| match item.top_elts.get_tt(item.idx) {
.map(|item| match item.top_elts[item.idx] {
TokenTree::MetaVarDecl(_, bind, Some(kind)) => {
format!("{} ('{}')", kind, bind)
}
@ -799,12 +700,14 @@ fn bb_items_ambiguity_error<'root, 'tt>(
Error(
token_span,
format!(
"local ambiguity when calling macro `{macro_name}`: multiple parsing options: {}",
match next_items.len() {
"local ambiguity when calling macro `{}`: multiple parsing options: {}",
self.macro_name,
match self.next_items.len() {
0 => format!("built-in NTs {}.", nts),
1 => format!("built-in NTs {} or 1 other option.", nts),
n => format!("built-in NTs {} or {} other options.", nts, n),
}
),
)
}
}

View File

@ -3,8 +3,7 @@ use crate::base::{SyntaxExtension, SyntaxExtensionKind};
use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind};
use crate::mbe;
use crate::mbe::macro_check;
use crate::mbe::macro_parser::parse_tt;
use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success};
use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser};
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
use crate::mbe::transcribe::transcribe;
@ -204,15 +203,15 @@ fn trace_macros_note(cx_expansions: &mut FxHashMap<Span, Vec<String>>, sp: Span,
}
/// Given `lhses` and `rhses`, this is the new macro we create
fn generic_extension<'cx>(
fn generic_extension<'cx, 'tt>(
cx: &'cx mut ExtCtxt<'_>,
sp: Span,
def_span: Span,
name: Ident,
transparency: Transparency,
arg: TokenStream,
lhses: &[mbe::TokenTree],
rhses: &[mbe::TokenTree],
lhses: &'tt [mbe::TokenTree],
rhses: &'tt [mbe::TokenTree],
is_local: bool,
) -> Box<dyn MacResult + 'cx> {
let sess = &cx.sess.parse_sess;
@ -246,30 +245,30 @@ fn generic_extension<'cx>(
// this situation.)
let parser = parser_from_cx(sess, arg.clone());
for (i, lhs) in lhses.iter().enumerate() {
// try each arm's matchers
let lhs_tt = match *lhs {
mbe::TokenTree::Delimited(_, ref delim) => &delim.tts,
// A matcher is always delimited, but the delimiters are ignored.
let delimited_inner_tts = |tt: &'tt mbe::TokenTree| -> &'tt [mbe::TokenTree] {
match tt {
mbe::TokenTree::Delimited(_, delimited) => delimited.inner_tts(),
_ => cx.span_bug(sp, "malformed macro lhs"),
}
};
// Try each arm's matchers.
let mut tt_parser = TtParser::new(name);
for (i, lhs) in lhses.iter().enumerate() {
// Take a snapshot of the state of pre-expansion gating at this point.
// This is used so that if a matcher is not `Success(..)`ful,
// then the spans which became gated when parsing the unsuccessful matcher
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut());
match parse_tt(&mut Cow::Borrowed(&parser), lhs_tt, name) {
match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), delimited_inner_tts(lhs)) {
Success(named_matches) => {
// The matcher was `Success(..)`ful.
// Merge the gated spans from parsing the matcher with the pre-existing ones.
sess.gated_spans.merge(gated_spans_snapshot);
let rhs = match rhses[i] {
// ignore delimiters
mbe::TokenTree::Delimited(_, ref delimed) => delimed.tts.clone(),
_ => cx.span_bug(sp, "malformed macro rhs"),
};
let rhs = delimited_inner_tts(&rhses[i]).to_vec().clone();
let arm_span = rhses[i].span();
let rhs_spans = rhs.iter().map(|t| t.span()).collect::<Vec<_>>();
@ -347,14 +346,10 @@ fn generic_extension<'cx>(
// Check whether there's a missing comma in this macro call, like `println!("{}" a);`
if let Some((arg, comma_span)) = arg.add_comma() {
for lhs in lhses {
// try each arm's matchers
let lhs_tt = match *lhs {
mbe::TokenTree::Delimited(_, ref delim) => &delim.tts,
_ => continue,
};
if let Success(_) =
parse_tt(&mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())), lhs_tt, name)
{
if let Success(_) = tt_parser.parse_tt(
&mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())),
delimited_inner_tts(lhs),
) {
if comma_span.is_dummy() {
err.note("you might be missing a comma");
} else {
@ -447,7 +442,8 @@ pub fn compile_declarative_macro(
];
let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS);
let argument_map = match parse_tt(&mut Cow::Borrowed(&parser), &argument_gram, def.ident) {
let mut tt_parser = TtParser::new(def.ident);
let argument_map = match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) {
Success(m) => m,
Failure(token, msg) => {
let s = parse_failure_msg(&token);
@ -476,16 +472,17 @@ pub fn compile_declarative_macro(
.map(|m| {
if let MatchedNonterminal(ref nt) = *m {
if let NtTT(ref tt) = **nt {
let tt = mbe::quoted::parse(
let mut tts = vec![];
mbe::quoted::parse(
tt.clone().into(),
true,
&sess.parse_sess,
def.id,
features,
edition,
)
.pop()
.unwrap();
&mut tts,
);
let tt = tts.pop().unwrap();
valid &= check_lhs_nt_follows(&sess.parse_sess, features, &def, &tt);
return tt;
}
@ -502,16 +499,17 @@ pub fn compile_declarative_macro(
.map(|m| {
if let MatchedNonterminal(ref nt) = *m {
if let NtTT(ref tt) = **nt {
return mbe::quoted::parse(
let mut tts = vec![];
mbe::quoted::parse(
tt.clone().into(),
false,
&sess.parse_sess,
def.id,
features,
edition,
)
.pop()
.unwrap();
&mut tts,
);
return tts.pop().unwrap();
}
}
sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
@ -563,8 +561,8 @@ fn check_lhs_nt_follows(
) -> bool {
// lhs is going to be like TokenTree::Delimited(...), where the
// entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens.
if let mbe::TokenTree::Delimited(_, ref tts) = *lhs {
check_matcher(sess, features, def, &tts.tts)
if let mbe::TokenTree::Delimited(_, delimited) = lhs {
check_matcher(sess, features, def, delimited.inner_tts())
} else {
let msg = "invalid macro matcher; matchers must be contained in balanced delimiters";
sess.span_diagnostic.span_err(lhs.span(), msg);
@ -585,7 +583,7 @@ fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[mbe::TokenTree]) -> bool {
| TokenTree::MetaVarDecl(..)
| TokenTree::MetaVarExpr(..) => (),
TokenTree::Delimited(_, ref del) => {
if !check_lhs_no_empty_seq(sess, &del.tts) {
if !check_lhs_no_empty_seq(sess, del.inner_tts()) {
return false;
}
}
@ -680,9 +678,9 @@ impl FirstSets {
| TokenTree::MetaVarExpr(..) => {
first.replace_with(tt.clone());
}
TokenTree::Delimited(span, ref delimited) => {
build_recur(sets, &delimited.tts);
first.replace_with(delimited.open_tt(span));
TokenTree::Delimited(_span, ref delimited) => {
build_recur(sets, delimited.inner_tts());
first.replace_with(delimited.open_tt().clone());
}
TokenTree::Sequence(sp, ref seq_rep) => {
let subfirst = build_recur(sets, &seq_rep.tts);
@ -746,8 +744,8 @@ impl FirstSets {
first.add_one(tt.clone());
return first;
}
TokenTree::Delimited(span, ref delimited) => {
first.add_one(delimited.open_tt(span));
TokenTree::Delimited(_span, ref delimited) => {
first.add_one(delimited.open_tt().clone());
return first;
}
TokenTree::Sequence(sp, ref seq_rep) => {
@ -933,9 +931,9 @@ fn check_matcher_core(
suffix_first = build_suffix_first();
}
}
TokenTree::Delimited(span, ref d) => {
let my_suffix = TokenSet::singleton(d.close_tt(span));
check_matcher_core(sess, features, def, first_sets, &d.tts, &my_suffix);
TokenTree::Delimited(_span, ref d) => {
let my_suffix = TokenSet::singleton(d.close_tt().clone());
check_matcher_core(sess, features, def, first_sets, d.inner_tts(), &my_suffix);
// don't track non NT tokens
last.replace_with_irrelevant();

View File

@ -45,10 +45,8 @@ pub(super) fn parse(
node_id: NodeId,
features: &Features,
edition: Edition,
) -> Vec<TokenTree> {
// Will contain the final collection of `self::TokenTree`
let mut result = Vec::new();
result: &mut Vec<TokenTree>,
) {
// For each token tree in `input`, parse the token into a `self::TokenTree`, consuming
// additional trees if need be.
let mut trees = input.trees();
@ -115,7 +113,6 @@ pub(super) fn parse(
_ => result.push(tree),
}
}
result
}
/// Asks for the `macro_metavar_expr` feature if it is not already declared
@ -208,7 +205,8 @@ fn parse_tree(
// If we didn't find a metavar expression above, then we must have a
// repetition sequence in the macro (e.g. `$(pat)*`). Parse the
// contents of the sequence itself
let sequence = parse(tts, parsing_patterns, sess, node_id, features, edition);
let mut sequence = vec![];
parse(tts, parsing_patterns, sess, node_id, features, edition, &mut sequence);
// Get the Kleene operator and optional separator
let (separator, kleene) =
parse_sep_and_kleene_op(&mut trees, delim_span.entire(), sess);
@ -225,8 +223,8 @@ fn parse_tree(
)
}
// `tree` is followed by an `ident`. This could be `$meta_var` or the `$crate` special
// metavariable that names the crate of the invocation.
// `tree` is followed by an `ident`. This could be `$meta_var` or the `$crate`
// special metavariable that names the crate of the invocation.
Some(tokenstream::TokenTree::Token(token)) if token.is_ident() => {
let (ident, is_raw) = token.ident().unwrap();
let span = ident.span.with_lo(span.lo());
@ -270,13 +268,15 @@ fn parse_tree(
// `tree` is the beginning of a delimited set of tokens (e.g., `(` or `{`). We need to
// descend into the delimited set and further parse it.
tokenstream::TokenTree::Delimited(span, delim, tts) => TokenTree::Delimited(
span,
Lrc::new(Delimited {
delim,
tts: parse(tts, parsing_patterns, sess, node_id, features, edition),
}),
),
tokenstream::TokenTree::Delimited(span, delim, tts) => {
let mut all_tts = vec![];
// Add the explicit open and close delimiters, which
// `tokenstream::TokenTree::Delimited` lacks.
all_tts.push(TokenTree::token(token::OpenDelim(delim), span.open));
parse(tts, parsing_patterns, sess, node_id, features, edition, &mut all_tts);
all_tts.push(TokenTree::token(token::CloseDelim(delim), span.close));
TokenTree::Delimited(span, Lrc::new(Delimited { delim, all_tts }))
}
}
}

View File

@ -10,7 +10,7 @@ use rustc_errors::{pluralize, PResult};
use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed};
use rustc_span::hygiene::{LocalExpnId, Transparency};
use rustc_span::symbol::{sym, Ident, MacroRulesNormalizedIdent};
use rustc_span::Span;
use rustc_span::{Span, DUMMY_SP};
use smallvec::{smallvec, SmallVec};
use std::mem;
@ -34,8 +34,14 @@ enum Frame {
impl Frame {
/// Construct a new frame around the delimited set of tokens.
fn new(tts: Vec<mbe::TokenTree>) -> Frame {
let forest = Lrc::new(mbe::Delimited { delim: token::NoDelim, tts });
fn new(mut tts: Vec<mbe::TokenTree>) -> Frame {
// Need to add empty delimeters.
let open_tt = mbe::TokenTree::token(token::OpenDelim(token::NoDelim), DUMMY_SP);
let close_tt = mbe::TokenTree::token(token::CloseDelim(token::NoDelim), DUMMY_SP);
tts.insert(0, open_tt);
tts.push(close_tt);
let forest = Lrc::new(mbe::Delimited { delim: token::NoDelim, all_tts: tts });
Frame::Delimited { forest, idx: 0, span: DelimSpan::dummy() }
}
}
@ -46,12 +52,14 @@ impl Iterator for Frame {
fn next(&mut self) -> Option<mbe::TokenTree> {
match *self {
Frame::Delimited { ref forest, ref mut idx, .. } => {
let res = forest.inner_tts().get(*idx).cloned();
*idx += 1;
forest.tts.get(*idx - 1).cloned()
res
}
Frame::Sequence { ref forest, ref mut idx, .. } => {
let res = forest.tts.get(*idx).cloned();
*idx += 1;
forest.tts.get(*idx - 1).cloned()
res
}
}
}
@ -376,8 +384,8 @@ fn lockstep_iter_size(
) -> LockstepIterSize {
use mbe::TokenTree;
match *tree {
TokenTree::Delimited(_, ref delimed) => {
delimed.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| {
TokenTree::Delimited(_, ref delimited) => {
delimited.inner_tts().iter().fold(LockstepIterSize::Unconstrained, |size, tt| {
size.with(lockstep_iter_size(tt, interpolations, repeats))
})
}