mirror of
https://github.com/rust-lang/rust.git
synced 2025-05-09 08:27:38 +00:00

It uses `once` chained with `(0..self.num_calls).map(...)` followed by `.take(self.num_calls`. I found this hard to read. It's simpler to just use `repeat_with`.
466 lines
20 KiB
Rust
466 lines
20 KiB
Rust
use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor, TrailingToken};
|
|
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
|
|
use rustc_ast::tokenstream::{AttrTokenStream, AttributesData, ToAttrTokenStream};
|
|
use rustc_ast::tokenstream::{AttrTokenTree, DelimSpan, LazyAttrTokenStream, Spacing};
|
|
use rustc_ast::{self as ast};
|
|
use rustc_ast::{AttrVec, Attribute, HasAttrs, HasTokens};
|
|
use rustc_errors::PResult;
|
|
use rustc_session::parse::ParseSess;
|
|
use rustc_span::{sym, Span, DUMMY_SP};
|
|
|
|
use std::ops::Range;
|
|
|
|
/// A wrapper type to ensure that the parser handles outer attributes correctly.
|
|
/// When we parse outer attributes, we need to ensure that we capture tokens
|
|
/// for the attribute target. This allows us to perform cfg-expansion on
|
|
/// a token stream before we invoke a derive proc-macro.
|
|
///
|
|
/// This wrapper prevents direct access to the underlying `ast::AttrVec>`.
|
|
/// Parsing code can only get access to the underlying attributes
|
|
/// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`.
|
|
/// This makes it difficult to accidentally construct an AST node
|
|
/// (which stores an `ast::AttrVec`) without first collecting tokens.
|
|
///
|
|
/// This struct has its own module, to ensure that the parser code
|
|
/// cannot directly access the `attrs` field
|
|
#[derive(Debug, Clone)]
|
|
pub struct AttrWrapper {
|
|
attrs: AttrVec,
|
|
// The start of the outer attributes in the token cursor.
|
|
// This allows us to create a `ReplaceRange` for the entire attribute
|
|
// target, including outer attributes.
|
|
start_pos: usize,
|
|
}
|
|
|
|
impl AttrWrapper {
|
|
pub(super) fn new(attrs: AttrVec, start_pos: usize) -> AttrWrapper {
|
|
AttrWrapper { attrs, start_pos }
|
|
}
|
|
pub fn empty() -> AttrWrapper {
|
|
AttrWrapper { attrs: AttrVec::new(), start_pos: usize::MAX }
|
|
}
|
|
|
|
pub(crate) fn take_for_recovery(self, sess: &ParseSess) -> AttrVec {
|
|
sess.span_diagnostic.delay_span_bug(
|
|
self.attrs.get(0).map(|attr| attr.span).unwrap_or(DUMMY_SP),
|
|
"AttrVec is taken for recovery but no error is produced",
|
|
);
|
|
|
|
self.attrs
|
|
}
|
|
|
|
/// Prepend `self.attrs` to `attrs`.
|
|
// FIXME: require passing an NT to prevent misuse of this method
|
|
pub(crate) fn prepend_to_nt_inner(self, attrs: &mut AttrVec) {
|
|
let mut self_attrs = self.attrs;
|
|
std::mem::swap(attrs, &mut self_attrs);
|
|
attrs.extend(self_attrs);
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.attrs.is_empty()
|
|
}
|
|
|
|
pub fn is_complete(&self) -> bool {
|
|
crate::parser::attr::is_complete(&self.attrs)
|
|
}
|
|
}
|
|
|
|
/// Returns `true` if `attrs` contains a `cfg` or `cfg_attr` attribute
|
|
fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
|
|
// NOTE: Builtin attributes like `cfg` and `cfg_attr` cannot be renamed via imports.
|
|
// Therefore, the absence of a literal `cfg` or `cfg_attr` guarantees that
|
|
// we don't need to do any eager expansion.
|
|
attrs.iter().any(|attr| {
|
|
attr.ident().is_some_and(|ident| ident.name == sym::cfg || ident.name == sym::cfg_attr)
|
|
})
|
|
}
|
|
|
|
// Produces a `TokenStream` on-demand. Using `cursor_snapshot`
|
|
// and `num_calls`, we can reconstruct the `TokenStream` seen
|
|
// by the callback. This allows us to avoid producing a `TokenStream`
|
|
// if it is never needed - for example, a captured `macro_rules!`
|
|
// argument that is never passed to a proc macro.
|
|
// In practice token stream creation happens rarely compared to
|
|
// calls to `collect_tokens` (see some statistics in #78736),
|
|
// so we are doing as little up-front work as possible.
|
|
//
|
|
// This also makes `Parser` very cheap to clone, since
|
|
// there is no intermediate collection buffer to clone.
|
|
#[derive(Clone)]
|
|
struct LazyAttrTokenStreamImpl {
|
|
start_token: (Token, Spacing),
|
|
cursor_snapshot: TokenCursor,
|
|
num_calls: usize,
|
|
break_last_token: bool,
|
|
replace_ranges: Box<[ReplaceRange]>,
|
|
}
|
|
|
|
impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
|
|
fn to_attr_token_stream(&self) -> AttrTokenStream {
|
|
// The token produced by the final call to `{,inlined_}next` was not
|
|
// actually consumed by the callback. The combination of chaining the
|
|
// initial token and using `take` produces the desired result - we
|
|
// produce an empty `TokenStream` if no calls were made, and omit the
|
|
// final token otherwise.
|
|
let mut cursor_snapshot = self.cursor_snapshot.clone();
|
|
let tokens =
|
|
std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
|
|
.chain(std::iter::repeat_with(|| {
|
|
let token = cursor_snapshot.next();
|
|
(FlatToken::Token(token.0), token.1)
|
|
}))
|
|
.take(self.num_calls);
|
|
|
|
if !self.replace_ranges.is_empty() {
|
|
let mut tokens: Vec<_> = tokens.collect();
|
|
let mut replace_ranges = self.replace_ranges.to_vec();
|
|
replace_ranges.sort_by_key(|(range, _)| range.start);
|
|
|
|
#[cfg(debug_assertions)]
|
|
{
|
|
for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() {
|
|
assert!(
|
|
range.end <= next_range.start || range.end >= next_range.end,
|
|
"Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
|
|
range,
|
|
tokens,
|
|
next_range,
|
|
next_tokens,
|
|
);
|
|
}
|
|
}
|
|
|
|
// Process the replace ranges, starting from the highest start
|
|
// position and working our way back. If have tokens like:
|
|
//
|
|
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
|
|
//
|
|
// Then we will generate replace ranges for both
|
|
// the `#[cfg(FALSE)] field: bool` and the entire
|
|
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
|
|
//
|
|
// By starting processing from the replace range with the greatest
|
|
// start position, we ensure that any replace range which encloses
|
|
// another replace range will capture the *replaced* tokens for the inner
|
|
// range, not the original tokens.
|
|
for (range, new_tokens) in replace_ranges.into_iter().rev() {
|
|
assert!(!range.is_empty(), "Cannot replace an empty range: {range:?}");
|
|
// Replace ranges are only allowed to decrease the number of tokens.
|
|
assert!(
|
|
range.len() >= new_tokens.len(),
|
|
"Range {range:?} has greater len than {new_tokens:?}"
|
|
);
|
|
|
|
// Replace any removed tokens with `FlatToken::Empty`.
|
|
// This keeps the total length of `tokens` constant throughout the
|
|
// replacement process, allowing us to use all of the `ReplaceRanges` entries
|
|
// without adjusting indices.
|
|
let filler = std::iter::repeat((FlatToken::Empty, Spacing::Alone))
|
|
.take(range.len() - new_tokens.len());
|
|
|
|
tokens.splice(
|
|
(range.start as usize)..(range.end as usize),
|
|
new_tokens.into_iter().chain(filler),
|
|
);
|
|
}
|
|
make_token_stream(tokens.into_iter(), self.break_last_token)
|
|
} else {
|
|
make_token_stream(tokens, self.break_last_token)
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Parser<'a> {
|
|
/// Records all tokens consumed by the provided callback,
|
|
/// including the current token. These tokens are collected
|
|
/// into a `LazyAttrTokenStream`, and returned along with the result
|
|
/// of the callback.
|
|
///
|
|
/// Note: If your callback consumes an opening delimiter
|
|
/// (including the case where you call `collect_tokens`
|
|
/// when the current token is an opening delimiter),
|
|
/// you must also consume the corresponding closing delimiter.
|
|
///
|
|
/// That is, you can consume
|
|
/// `something ([{ }])` or `([{}])`, but not `([{}]`
|
|
///
|
|
/// This restriction shouldn't be an issue in practice,
|
|
/// since this function is used to record the tokens for
|
|
/// a parsed AST item, which always has matching delimiters.
|
|
pub fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(
|
|
&mut self,
|
|
attrs: AttrWrapper,
|
|
force_collect: ForceCollect,
|
|
f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, TrailingToken)>,
|
|
) -> PResult<'a, R> {
|
|
// We only bail out when nothing could possibly observe the collected tokens:
|
|
// 1. We cannot be force collecting tokens (since force-collecting requires tokens
|
|
// by definition
|
|
if matches!(force_collect, ForceCollect::No)
|
|
// None of our outer attributes can require tokens (e.g. a proc-macro)
|
|
&& attrs.is_complete()
|
|
// If our target supports custom inner attributes, then we cannot bail
|
|
// out early, since we may need to capture tokens for a custom inner attribute
|
|
// invocation.
|
|
&& !R::SUPPORTS_CUSTOM_INNER_ATTRS
|
|
// Never bail out early in `capture_cfg` mode, since there might be `#[cfg]`
|
|
// or `#[cfg_attr]` attributes.
|
|
&& !self.capture_cfg
|
|
{
|
|
return Ok(f(self, attrs.attrs)?.0);
|
|
}
|
|
|
|
let start_token = (self.token.clone(), self.token_spacing);
|
|
let cursor_snapshot = self.token_cursor.clone();
|
|
let start_pos = self.num_bump_calls;
|
|
|
|
let has_outer_attrs = !attrs.attrs.is_empty();
|
|
let prev_capturing = std::mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
|
|
let replace_ranges_start = self.capture_state.replace_ranges.len();
|
|
|
|
let ret = f(self, attrs.attrs);
|
|
|
|
self.capture_state.capturing = prev_capturing;
|
|
|
|
let (mut ret, trailing) = ret?;
|
|
|
|
// When we're not in `capture-cfg` mode, then bail out early if:
|
|
// 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`)
|
|
// so there's nothing for us to do.
|
|
// 2. Our target already has tokens set (e.g. we've parsed something
|
|
// like `#[my_attr] $item`. The actual parsing code takes care of prepending
|
|
// any attributes to the nonterminal, so we don't need to modify the
|
|
// already captured tokens.
|
|
// Note that this check is independent of `force_collect`- if we already
|
|
// have tokens, or can't even store them, then there's never a need to
|
|
// force collection of new tokens.
|
|
if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
|
|
return Ok(ret);
|
|
}
|
|
|
|
// This is very similar to the bail out check at the start of this function.
|
|
// Now that we've parsed an AST node, we have more information available.
|
|
if matches!(force_collect, ForceCollect::No)
|
|
// We now have inner attributes available, so this check is more precise
|
|
// than `attrs.is_complete()` at the start of the function.
|
|
// As a result, we don't need to check `R::SUPPORTS_CUSTOM_INNER_ATTRS`
|
|
&& crate::parser::attr::is_complete(ret.attrs())
|
|
// Subtle: We call `has_cfg_or_cfg_attr` with the attrs from `ret`.
|
|
// This ensures that we consider inner attributes (e.g. `#![cfg]`),
|
|
// which require us to have tokens available
|
|
// We also call `has_cfg_or_cfg_attr` at the beginning of this function,
|
|
// but we only bail out if there's no possibility of inner attributes
|
|
// (!R::SUPPORTS_CUSTOM_INNER_ATTRS)
|
|
// We only capture about `#[cfg]` or `#[cfg_attr]` in `capture_cfg`
|
|
// mode - during normal parsing, we don't need any special capturing
|
|
// for those attributes, since they're builtin.
|
|
&& !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()))
|
|
{
|
|
return Ok(ret);
|
|
}
|
|
|
|
let mut inner_attr_replace_ranges = Vec::new();
|
|
// Take the captured ranges for any inner attributes that we parsed.
|
|
for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) {
|
|
if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) {
|
|
inner_attr_replace_ranges.push(attr_range);
|
|
} else {
|
|
self.sess
|
|
.span_diagnostic
|
|
.delay_span_bug(inner_attr.span, "Missing token range for attribute");
|
|
}
|
|
}
|
|
|
|
let replace_ranges_end = self.capture_state.replace_ranges.len();
|
|
|
|
let mut end_pos = self.num_bump_calls;
|
|
|
|
let mut captured_trailing = false;
|
|
|
|
// Capture a trailing token if requested by the callback 'f'
|
|
match trailing {
|
|
TrailingToken::None => {}
|
|
TrailingToken::Gt => {
|
|
assert_eq!(self.token.kind, token::Gt);
|
|
}
|
|
TrailingToken::Semi => {
|
|
assert_eq!(self.token.kind, token::Semi);
|
|
end_pos += 1;
|
|
captured_trailing = true;
|
|
}
|
|
TrailingToken::MaybeComma => {
|
|
if self.token.kind == token::Comma {
|
|
end_pos += 1;
|
|
captured_trailing = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens),
|
|
// then extend the range of captured tokens to include it, since the parser
|
|
// was not actually bumped past it. When the `LazyAttrTokenStream` gets converted
|
|
// into an `AttrTokenStream`, we will create the proper token.
|
|
if self.break_last_token {
|
|
assert!(!captured_trailing, "Cannot set break_last_token and have trailing token");
|
|
end_pos += 1;
|
|
}
|
|
|
|
let num_calls = end_pos - start_pos;
|
|
|
|
// If we have no attributes, then we will never need to
|
|
// use any replace ranges.
|
|
let replace_ranges: Box<[ReplaceRange]> = if ret.attrs().is_empty() && !self.capture_cfg {
|
|
Box::new([])
|
|
} else {
|
|
// Grab any replace ranges that occur *inside* the current AST node.
|
|
// We will perform the actual replacement when we convert the `LazyAttrTokenStream`
|
|
// to an `AttrTokenStream`.
|
|
let start_calls: u32 = start_pos.try_into().unwrap();
|
|
self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]
|
|
.iter()
|
|
.cloned()
|
|
.chain(inner_attr_replace_ranges.iter().cloned())
|
|
.map(|(range, tokens)| {
|
|
((range.start - start_calls)..(range.end - start_calls), tokens)
|
|
})
|
|
.collect()
|
|
};
|
|
|
|
let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
|
|
start_token,
|
|
num_calls,
|
|
cursor_snapshot,
|
|
break_last_token: self.break_last_token,
|
|
replace_ranges,
|
|
});
|
|
|
|
// If we support tokens at all
|
|
if let Some(target_tokens) = ret.tokens_mut() {
|
|
if target_tokens.is_none() {
|
|
// Store se our newly captured tokens into the AST node
|
|
*target_tokens = Some(tokens.clone());
|
|
}
|
|
}
|
|
|
|
let final_attrs = ret.attrs();
|
|
|
|
// If `capture_cfg` is set and we're inside a recursive call to
|
|
// `collect_tokens_trailing_token`, then we need to register a replace range
|
|
// if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
|
|
// on the captured token stream.
|
|
if self.capture_cfg
|
|
&& matches!(self.capture_state.capturing, Capturing::Yes)
|
|
&& has_cfg_or_cfg_attr(final_attrs)
|
|
{
|
|
let attr_data = AttributesData { attrs: final_attrs.iter().cloned().collect(), tokens };
|
|
|
|
// Replace the entire AST node that we just parsed, including attributes,
|
|
// with a `FlatToken::AttrTarget`. If this AST node is inside an item
|
|
// that has `#[derive]`, then this will allow us to cfg-expand this
|
|
// AST node.
|
|
let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };
|
|
let new_tokens = vec![(FlatToken::AttrTarget(attr_data), Spacing::Alone)];
|
|
|
|
assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
|
|
let range: Range<u32> = (start_pos.try_into().unwrap())..(end_pos.try_into().unwrap());
|
|
self.capture_state.replace_ranges.push((range, new_tokens));
|
|
self.capture_state.replace_ranges.extend(inner_attr_replace_ranges);
|
|
}
|
|
|
|
// Only clear our `replace_ranges` when we're finished capturing entirely.
|
|
if matches!(self.capture_state.capturing, Capturing::No) {
|
|
self.capture_state.replace_ranges.clear();
|
|
// We don't clear `inner_attr_ranges`, as doing so repeatedly
|
|
// had a measurable performance impact. Most inner attributes that
|
|
// we insert will get removed - when we drop the parser, we'll free
|
|
// up the memory used by any attributes that we didn't remove from the map.
|
|
}
|
|
Ok(ret)
|
|
}
|
|
}
|
|
|
|
/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
|
|
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
|
|
/// of open and close delims.
|
|
fn make_token_stream(
|
|
mut iter: impl Iterator<Item = (FlatToken, Spacing)>,
|
|
break_last_token: bool,
|
|
) -> AttrTokenStream {
|
|
#[derive(Debug)]
|
|
struct FrameData {
|
|
// This is `None` for the first frame, `Some` for all others.
|
|
open_delim_sp: Option<(Delimiter, Span)>,
|
|
inner: Vec<AttrTokenTree>,
|
|
}
|
|
let mut stack = vec![FrameData { open_delim_sp: None, inner: vec![] }];
|
|
let mut token_and_spacing = iter.next();
|
|
while let Some((token, spacing)) = token_and_spacing {
|
|
match token {
|
|
FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => {
|
|
stack.push(FrameData { open_delim_sp: Some((delim, span)), inner: vec![] });
|
|
}
|
|
FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => {
|
|
let frame_data = stack
|
|
.pop()
|
|
.unwrap_or_else(|| panic!("Token stack was empty for token: {token:?}"));
|
|
|
|
let (open_delim, open_sp) = frame_data.open_delim_sp.unwrap();
|
|
assert_eq!(
|
|
open_delim, delim,
|
|
"Mismatched open/close delims: open={open_delim:?} close={span:?}"
|
|
);
|
|
let dspan = DelimSpan::from_pair(open_sp, span);
|
|
let stream = AttrTokenStream::new(frame_data.inner);
|
|
let delimited = AttrTokenTree::Delimited(dspan, delim, stream);
|
|
stack
|
|
.last_mut()
|
|
.unwrap_or_else(|| panic!("Bottom token frame is missing for token: {token:?}"))
|
|
.inner
|
|
.push(delimited);
|
|
}
|
|
FlatToken::Token(token) => stack
|
|
.last_mut()
|
|
.expect("Bottom token frame is missing!")
|
|
.inner
|
|
.push(AttrTokenTree::Token(token, spacing)),
|
|
FlatToken::AttrTarget(data) => stack
|
|
.last_mut()
|
|
.expect("Bottom token frame is missing!")
|
|
.inner
|
|
.push(AttrTokenTree::Attributes(data)),
|
|
FlatToken::Empty => {}
|
|
}
|
|
token_and_spacing = iter.next();
|
|
}
|
|
let mut final_buf = stack.pop().expect("Missing final buf!");
|
|
if break_last_token {
|
|
let last_token = final_buf.inner.pop().unwrap();
|
|
if let AttrTokenTree::Token(last_token, spacing) = last_token {
|
|
let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
|
|
|
|
// An 'unglued' token is always two ASCII characters
|
|
let mut first_span = last_token.span.shrink_to_lo();
|
|
first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
|
|
|
|
final_buf
|
|
.inner
|
|
.push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing));
|
|
} else {
|
|
panic!("Unexpected last token {last_token:?}")
|
|
}
|
|
}
|
|
AttrTokenStream::new(final_buf.inner)
|
|
}
|
|
|
|
// Some types are used a lot. Make sure they don't unintentionally get bigger.
|
|
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
|
|
mod size_asserts {
|
|
use super::*;
|
|
use rustc_data_structures::static_assert_size;
|
|
// tidy-alphabetical-start
|
|
static_assert_size!(AttrWrapper, 16);
|
|
static_assert_size!(LazyAttrTokenStreamImpl, 104);
|
|
// tidy-alphabetical-end
|
|
}
|