Auto merge of #95669 - nnethercote:call-compute_locs-once-per-rule, r=petrochenkov

Call `compute_locs` once per rule

This fixes the small regressions on `wg-grammar` and `hyper-0.14.18` seen in #95555.

r? `@petrochenkov`
This commit is contained in:
bors 2022-04-06 16:29:32 +00:00
commit c2afaba465
2 changed files with 169 additions and 149 deletions

View File

@ -106,9 +106,9 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
///
/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
/// simply incrementing the current matcher position index by one.
enum MatcherLoc<'tt> {
pub(super) enum MatcherLoc {
Token {
token: &'tt Token,
token: Token,
},
Delimited,
Sequence {
@ -123,7 +123,7 @@ enum MatcherLoc<'tt> {
idx_first: usize,
},
SequenceSep {
separator: &'tt Token,
separator: Token,
},
SequenceKleeneOpAfterSep {
idx_first: usize,
@ -131,13 +131,85 @@ enum MatcherLoc<'tt> {
MetaVarDecl {
span: Span,
bind: Ident,
kind: NonterminalKind,
kind: Option<NonterminalKind>,
next_metavar: usize,
seq_depth: usize,
},
Eof,
}
pub(super) fn compute_locs(sess: &ParseSess, matcher: &[TokenTree]) -> Vec<MatcherLoc> {
fn inner(
sess: &ParseSess,
tts: &[TokenTree],
locs: &mut Vec<MatcherLoc>,
next_metavar: &mut usize,
seq_depth: usize,
) {
for tt in tts {
match tt {
TokenTree::Token(token) => {
locs.push(MatcherLoc::Token { token: token.clone() });
}
TokenTree::Delimited(_, delimited) => {
locs.push(MatcherLoc::Delimited);
inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth);
}
TokenTree::Sequence(_, seq) => {
// We can't determine `idx_first_after` and construct the final
// `MatcherLoc::Sequence` until after `inner()` is called and the sequence end
// pieces are processed. So we push a dummy value (`Eof` is cheapest to
// construct) now, and overwrite it with the proper value below.
let dummy = MatcherLoc::Eof;
locs.push(dummy);
let next_metavar_orig = *next_metavar;
let op = seq.kleene.op;
let idx_first = locs.len();
let idx_seq = idx_first - 1;
inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1);
if let Some(separator) = &seq.separator {
locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
} else {
locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
}
// Overwrite the dummy value pushed above with the proper value.
locs[idx_seq] = MatcherLoc::Sequence {
op,
num_metavar_decls: seq.num_captures,
idx_first_after: locs.len(),
next_metavar: next_metavar_orig,
seq_depth,
};
}
&TokenTree::MetaVarDecl(span, bind, kind) => {
locs.push(MatcherLoc::MetaVarDecl {
span,
bind,
kind,
next_metavar: *next_metavar,
seq_depth,
});
*next_metavar += 1;
}
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
}
}
}
let mut locs = vec![];
let mut next_metavar = 0;
inner(sess, matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0);
// A final entry is needed for eof.
locs.push(MatcherLoc::Eof);
locs
}
/// A single matcher position, representing the state of matching.
struct MatcherPos {
/// The index into `TtParser::locs`, which represents the "dot".
@ -298,12 +370,9 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {
// Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess
// allocations we have a single vector fo each kind that is cleared and reused repeatedly.
pub struct TtParser<'tt> {
pub struct TtParser {
macro_name: Ident,
/// The matcher of the current rule.
locs: Vec<MatcherLoc<'tt>>,
/// The set of current mps to be processed. This should be empty by the end of a successful
/// execution of `parse_tt_inner`.
cur_mps: Vec<MatcherPos>,
@ -320,11 +389,10 @@ pub struct TtParser<'tt> {
empty_matches: Lrc<NamedMatchVec>,
}
impl<'tt> TtParser<'tt> {
pub(super) fn new(macro_name: Ident) -> TtParser<'tt> {
impl TtParser {
pub(super) fn new(macro_name: Ident) -> TtParser {
TtParser {
macro_name,
locs: vec![],
cur_mps: vec![],
next_mps: vec![],
bb_mps: vec![],
@ -332,99 +400,6 @@ impl<'tt> TtParser<'tt> {
}
}
/// Convert a `&[TokenTree]` to a `&[MatcherLoc]`. Note: this conversion happens every time the
/// macro is called, which may be many times if there are many call sites or if it is
/// recursive. This conversion is fairly cheap and the representation is sufficiently better
/// for matching than `&[TokenTree]` that it's a clear performance win even with the overhead.
/// But it might be possible to move the conversion outwards so it only occurs once per macro.
fn compute_locs(
&mut self,
sess: &ParseSess,
matcher: &'tt [TokenTree],
) -> Result<usize, (Span, String)> {
fn inner<'tt>(
sess: &ParseSess,
tts: &'tt [TokenTree],
locs: &mut Vec<MatcherLoc<'tt>>,
next_metavar: &mut usize,
seq_depth: usize,
) -> Result<(), (Span, String)> {
for tt in tts {
match tt {
TokenTree::Token(token) => {
locs.push(MatcherLoc::Token { token });
}
TokenTree::Delimited(_, delimited) => {
locs.push(MatcherLoc::Delimited);
inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth)?;
}
TokenTree::Sequence(_, seq) => {
// We can't determine `idx_first_after` and construct the final
// `MatcherLoc::Sequence` until after `inner()` is called and the sequence
// end pieces are processed. So we push a dummy value (`Eof` is cheapest to
// construct) now, and overwrite it with the proper value below.
let dummy = MatcherLoc::Eof;
locs.push(dummy);
let next_metavar_orig = *next_metavar;
let op = seq.kleene.op;
let idx_first = locs.len();
let idx_seq = idx_first - 1;
inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1)?;
if let Some(separator) = &seq.separator {
locs.push(MatcherLoc::SequenceSep { separator });
locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
} else {
locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
}
// Overwrite the dummy value pushed above with the proper value.
locs[idx_seq] = MatcherLoc::Sequence {
op,
num_metavar_decls: seq.num_captures,
idx_first_after: locs.len(),
next_metavar: next_metavar_orig,
seq_depth,
};
}
&TokenTree::MetaVarDecl(span, bind, kind) => {
if let Some(kind) = kind {
locs.push(MatcherLoc::MetaVarDecl {
span,
bind,
kind,
next_metavar: *next_metavar,
seq_depth,
});
*next_metavar += 1;
} else if sess
.missing_fragment_specifiers
.borrow_mut()
.remove(&span)
.is_some()
{
// E.g. `$e` instead of `$e:expr`.
return Err((span, "missing fragment specifier".to_string()));
}
}
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
}
}
Ok(())
}
self.locs.clear();
let mut next_metavar = 0;
inner(sess, matcher, &mut self.locs, &mut next_metavar, /* seq_depth */ 0)?;
// A final entry is needed for eof.
self.locs.push(MatcherLoc::Eof);
// This is the number of metavar decls.
Ok(next_metavar)
}
/// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
/// produce more mps in `next_mps` and `bb_mps`.
///
@ -434,7 +409,8 @@ impl<'tt> TtParser<'tt> {
/// track of through the mps generated.
fn parse_tt_inner(
&mut self,
num_metavar_decls: usize,
sess: &ParseSess,
matcher: &[MatcherLoc],
token: &Token,
) -> Option<NamedParseResult> {
// Matcher positions that would be valid if the macro invocation was over now. Only
@ -442,7 +418,7 @@ impl<'tt> TtParser<'tt> {
let mut eof_mps = EofMatcherPositions::None;
while let Some(mut mp) = self.cur_mps.pop() {
match &self.locs[mp.idx] {
match &matcher[mp.idx] {
MatcherLoc::Token { token: t } => {
// If it's a doc comment, we just ignore it and move on to the next tt in the
// matcher. This is a bug, but #95267 showed that existing programs rely on
@ -532,17 +508,25 @@ impl<'tt> TtParser<'tt> {
mp.idx = idx_first;
self.cur_mps.push(mp);
}
MatcherLoc::MetaVarDecl { kind, .. } => {
&MatcherLoc::MetaVarDecl { span, kind, .. } => {
// Built-in nonterminals never start with these tokens, so we can eliminate
// them from consideration. We use the span of the metavariable declaration
// to determine any edition-specific matching behavior for non-terminals.
if Parser::nonterminal_may_begin_with(*kind, token) {
self.bb_mps.push(mp);
if let Some(kind) = kind {
if Parser::nonterminal_may_begin_with(kind, token) {
self.bb_mps.push(mp);
}
} else {
// Both this check and the one in `nameize` are necessary, surprisingly.
if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
// E.g. `$e` instead of `$e:expr`.
return Some(Error(span, "missing fragment specifier".to_string()));
}
}
}
MatcherLoc::Eof => {
// We are past the matcher's end, and not in a sequence. Try to end things.
debug_assert_eq!(mp.idx, self.locs.len() - 1);
debug_assert_eq!(mp.idx, matcher.len() - 1);
if *token == token::Eof {
eof_mps = match eof_mps {
EofMatcherPositions::None => EofMatcherPositions::One(mp),
@ -560,11 +544,10 @@ impl<'tt> TtParser<'tt> {
if *token == token::Eof {
Some(match eof_mps {
EofMatcherPositions::One(mut eof_mp) => {
assert_eq!(eof_mp.matches.len(), num_metavar_decls);
// Need to take ownership of the matches from within the `Lrc`.
Lrc::make_mut(&mut eof_mp.matches);
let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
self.nameize(matches)
self.nameize(sess, matcher, matches)
}
EofMatcherPositions::Multiple => {
Error(token.span, "ambiguity: multiple successful parses".to_string())
@ -586,13 +569,8 @@ impl<'tt> TtParser<'tt> {
pub(super) fn parse_tt(
&mut self,
parser: &mut Cow<'_, Parser<'_>>,
matcher: &'tt [TokenTree],
matcher: &[MatcherLoc],
) -> NamedParseResult {
let num_metavar_decls = match self.compute_locs(parser.sess, matcher) {
Ok(num_metavar_decls) => num_metavar_decls,
Err((span, msg)) => return Error(span, msg),
};
// A queue of possible matcher positions. We initialize it with the matcher position in
// which the "dot" is before the first token of the first token tree in `matcher`.
// `parse_tt_inner` then processes all of these possible matcher positions and produces
@ -607,7 +585,7 @@ impl<'tt> TtParser<'tt> {
// Process `cur_mps` until either we have finished the input or we need to get some
// parsing from the black-box parser done.
if let Some(res) = self.parse_tt_inner(num_metavar_decls, &parser.token) {
if let Some(res) = self.parse_tt_inner(&parser.sess, matcher, &parser.token) {
return res;
}
@ -635,9 +613,13 @@ impl<'tt> TtParser<'tt> {
(0, 1) => {
// We need to call the black-box parser to get some nonterminal.
let mut mp = self.bb_mps.pop().unwrap();
let loc = &self.locs[mp.idx];
let loc = &matcher[mp.idx];
if let &MatcherLoc::MetaVarDecl {
span, kind, next_metavar, seq_depth, ..
span,
kind: Some(kind),
next_metavar,
seq_depth,
..
} = loc
{
// We use the span of the metavariable declaration to determine any
@ -669,7 +651,7 @@ impl<'tt> TtParser<'tt> {
(_, _) => {
// Too many possibilities!
return self.ambiguity_error(parser.token.span);
return self.ambiguity_error(matcher, parser.token.span);
}
}
@ -677,12 +659,18 @@ impl<'tt> TtParser<'tt> {
}
}
fn ambiguity_error(&self, token_span: rustc_span::Span) -> NamedParseResult {
fn ambiguity_error(
&self,
matcher: &[MatcherLoc],
token_span: rustc_span::Span,
) -> NamedParseResult {
let nts = self
.bb_mps
.iter()
.map(|mp| match &self.locs[mp.idx] {
MatcherLoc::MetaVarDecl { bind, kind, .. } => format!("{} ('{}')", kind, bind),
.map(|mp| match &matcher[mp.idx] {
MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => {
format!("{} ('{}')", kind, bind)
}
_ => unreachable!(),
})
.collect::<Vec<String>>()
@ -702,16 +690,31 @@ impl<'tt> TtParser<'tt> {
)
}
fn nameize<I: Iterator<Item = NamedMatch>>(&self, mut res: I) -> NamedParseResult {
fn nameize<I: Iterator<Item = NamedMatch>>(
&self,
sess: &ParseSess,
matcher: &[MatcherLoc],
mut res: I,
) -> NamedParseResult {
// Make that each metavar has _exactly one_ binding. If so, insert the binding into the
// `NamedParseResult`. Otherwise, it's an error.
let mut ret_val = FxHashMap::default();
for loc in self.locs.iter() {
if let &MatcherLoc::MetaVarDecl { span, bind, .. } = loc {
match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
Vacant(spot) => spot.insert(res.next().unwrap()),
Occupied(..) => return Error(span, format!("duplicated bind name: {}", bind)),
};
for loc in matcher {
if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc {
if kind.is_some() {
match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
Vacant(spot) => spot.insert(res.next().unwrap()),
Occupied(..) => {
return Error(span, format!("duplicated bind name: {}", bind));
}
};
} else {
// Both this check and the one in `parse_tt_inner` are necessary, surprisingly.
if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
// E.g. `$e` instead of `$e:expr`.
return Error(span, "missing fragment specifier".to_string());
}
}
}
}
Success(ret_val)

View File

@ -4,7 +4,7 @@ use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstF
use crate::mbe;
use crate::mbe::macro_check;
use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser};
use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree};
use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree, MatcherLoc};
use crate::mbe::transcribe::transcribe;
use rustc_ast as ast;
@ -160,7 +160,7 @@ struct MacroRulesMacroExpander {
name: Ident,
span: Span,
transparency: Transparency,
lhses: Vec<mbe::TokenTree>,
lhses: Vec<Vec<MatcherLoc>>,
rhses: Vec<mbe::TokenTree>,
valid: bool,
is_local: bool,
@ -211,7 +211,7 @@ fn generic_extension<'cx, 'tt>(
name: Ident,
transparency: Transparency,
arg: TokenStream,
lhses: &'tt [mbe::TokenTree],
lhses: &'tt [Vec<MatcherLoc>],
rhses: &'tt [mbe::TokenTree],
is_local: bool,
) -> Box<dyn MacResult + 'cx> {
@ -246,14 +246,6 @@ fn generic_extension<'cx, 'tt>(
// this situation.)
let parser = parser_from_cx(sess, arg.clone());
// A matcher is always delimited, but the delimiters are ignored.
let delimited_inner_tts = |tt: &'tt mbe::TokenTree| -> &'tt [mbe::TokenTree] {
match tt {
mbe::TokenTree::Delimited(_, delimited) => delimited.inner_tts(),
_ => cx.span_bug(sp, "malformed macro lhs"),
}
};
// Try each arm's matchers.
let mut tt_parser = TtParser::new(name);
for (i, lhs) in lhses.iter().enumerate() {
@ -263,13 +255,19 @@ fn generic_extension<'cx, 'tt>(
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut());
match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), delimited_inner_tts(lhs)) {
match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
Success(named_matches) => {
// The matcher was `Success(..)`ful.
// Merge the gated spans from parsing the matcher with the pre-existing ones.
sess.gated_spans.merge(gated_spans_snapshot);
let rhs = delimited_inner_tts(&rhses[i]).to_vec().clone();
// Ignore the delimiters on the RHS.
let rhs = match &rhses[i] {
mbe::TokenTree::Delimited(_, delimited) => {
delimited.inner_tts().to_vec().clone()
}
_ => cx.span_bug(sp, "malformed macro rhs"),
};
let arm_span = rhses[i].span();
let rhs_spans = rhs.iter().map(|t| t.span()).collect::<Vec<_>>();
@ -347,10 +345,8 @@ fn generic_extension<'cx, 'tt>(
// Check whether there's a missing comma in this macro call, like `println!("{}" a);`
if let Some((arg, comma_span)) = arg.add_comma() {
for lhs in lhses {
if let Success(_) = tt_parser.parse_tt(
&mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())),
delimited_inner_tts(lhs),
) {
let parser = parser_from_cx(sess, arg.clone());
if let Success(_) = tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
if comma_span.is_dummy() {
err.note("you might be missing a comma");
} else {
@ -441,6 +437,8 @@ pub fn compile_declarative_macro(
}),
),
];
// Convert it into `MatcherLoc` form.
let argument_gram = mbe::macro_parser::compute_locs(&sess.parse_sess, &argument_gram);
let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS);
let mut tt_parser = TtParser::new(def.ident);
@ -537,6 +535,25 @@ pub fn compile_declarative_macro(
None => {}
}
// Convert the lhses into `MatcherLoc` form, which is better for doing the
// actual matching. Unless the matcher is invalid.
let lhses = if valid {
lhses
.iter()
.map(|lhs| {
// Ignore the delimiters around the matcher.
match lhs {
mbe::TokenTree::Delimited(_, delimited) => {
mbe::macro_parser::compute_locs(&sess.parse_sess, delimited.inner_tts())
}
_ => sess.parse_sess.span_diagnostic.span_bug(def.span, "malformed macro lhs"),
}
})
.collect()
} else {
vec![]
};
mk_syn_ext(Box::new(MacroRulesMacroExpander {
name: def.ident,
span: def.span,