2019-10-16 11:23:46 +00:00
|
|
|
pub mod attr;
|
2021-02-13 17:42:43 +00:00
|
|
|
mod attr_wrapper;
|
2020-08-02 12:05:19 +00:00
|
|
|
mod diagnostics;
|
2019-08-11 11:14:30 +00:00
|
|
|
mod expr;
|
2020-08-02 12:05:19 +00:00
|
|
|
mod generics;
|
2019-08-11 16:34:42 +00:00
|
|
|
mod item;
|
2020-07-27 12:04:54 +00:00
|
|
|
mod nonterminal;
|
2019-08-11 17:59:27 +00:00
|
|
|
mod pat;
|
|
|
|
mod path;
|
2019-08-11 18:32:29 +00:00
|
|
|
mod stmt;
|
2020-08-02 12:05:19 +00:00
|
|
|
mod ty;
|
2020-07-27 12:04:54 +00:00
|
|
|
|
2023-02-21 14:51:19 +00:00
|
|
|
use crate::lexer::UnmatchedDelim;
|
2024-02-13 23:28:27 +00:00
|
|
|
use ast::token::IdentIsRaw;
|
2021-02-13 17:42:43 +00:00
|
|
|
pub use attr_wrapper::AttrWrapper;
|
2020-08-12 22:39:15 +00:00
|
|
|
pub use diagnostics::AttemptLocalParseRecovery;
|
2023-09-08 10:14:36 +00:00
|
|
|
pub(crate) use expr::ForbiddenLetReason;
|
2021-12-04 18:05:30 +00:00
|
|
|
pub(crate) use item::FnParseMode;
|
2022-01-12 20:43:24 +00:00
|
|
|
pub use pat::{CommaRecoveryMode, RecoverColon, RecoverComma};
|
2020-08-02 12:05:19 +00:00
|
|
|
pub use path::PathStyle;
|
2019-10-15 20:48:13 +00:00
|
|
|
|
2020-02-29 17:37:32 +00:00
|
|
|
use rustc_ast::ptr::P;
|
2024-03-19 02:33:33 +00:00
|
|
|
use rustc_ast::token::{self, Delimiter, Token, TokenKind};
|
2023-10-12 04:36:14 +00:00
|
|
|
use rustc_ast::tokenstream::{AttributesData, DelimSpacing, DelimSpan, Spacing};
|
2023-02-01 01:58:04 +00:00
|
|
|
use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
|
2022-09-15 16:27:23 +00:00
|
|
|
use rustc_ast::util::case::Case;
|
2024-03-24 01:04:45 +00:00
|
|
|
use rustc_ast::{
|
|
|
|
self as ast, AnonConst, AttrArgs, AttrArgsEq, AttrId, ByRef, Const, CoroutineKind, DelimArgs,
|
|
|
|
Expr, ExprKind, Extern, HasAttrs, HasTokens, Mutability, StrLit, Unsafe, Visibility,
|
|
|
|
VisibilityKind, DUMMY_NODE_ID,
|
|
|
|
};
|
2020-01-11 16:02:46 +00:00
|
|
|
use rustc_ast_pretty::pprust;
|
2020-11-28 23:33:17 +00:00
|
|
|
use rustc_data_structures::fx::FxHashMap;
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
use rustc_errors::PResult;
|
2024-02-22 23:20:45 +00:00
|
|
|
use rustc_errors::{Applicability, Diag, FatalError, MultiSpan};
|
2020-01-11 14:03:15 +00:00
|
|
|
use rustc_session::parse::ParseSess;
|
2020-04-19 11:00:18 +00:00
|
|
|
use rustc_span::symbol::{kw, sym, Ident, Symbol};
|
2023-11-02 03:10:12 +00:00
|
|
|
use rustc_span::{Span, DUMMY_SP};
|
2020-11-28 23:33:17 +00:00
|
|
|
use std::ops::Range;
|
2023-07-30 07:16:20 +00:00
|
|
|
use std::{mem, slice};
|
2022-11-23 00:55:16 +00:00
|
|
|
use thin_vec::ThinVec;
|
|
|
|
use tracing::debug;
|
2018-07-03 17:38:14 +00:00
|
|
|
|
2022-09-01 17:29:23 +00:00
|
|
|
use crate::errors::{
|
2023-04-27 00:53:06 +00:00
|
|
|
self, IncorrectVisibilityRestriction, MismatchedClosingDelimiter, NonStringAbiLiteral,
|
2022-09-01 17:29:23 +00:00
|
|
|
};
|
2022-08-24 20:41:51 +00:00
|
|
|
|
2019-02-06 17:33:01 +00:00
|
|
|
bitflags::bitflags! {
|
2023-12-30 16:09:02 +00:00
|
|
|
#[derive(Clone, Copy)]
|
2018-05-31 22:53:30 +00:00
|
|
|
struct Restrictions: u8 {
|
2017-09-08 19:08:01 +00:00
|
|
|
const STMT_EXPR = 1 << 0;
|
|
|
|
const NO_STRUCT_LITERAL = 1 << 1;
|
2020-10-03 18:30:32 +00:00
|
|
|
const CONST_EXPR = 1 << 2;
|
2022-08-01 01:13:16 +00:00
|
|
|
const ALLOW_LET = 1 << 3;
|
2023-10-03 21:21:02 +00:00
|
|
|
const IN_IF_GUARD = 1 << 4;
|
2024-01-28 15:12:21 +00:00
|
|
|
const IS_PAT = 1 << 5;
|
2014-09-16 05:22:12 +00:00
|
|
|
}
|
2011-12-21 04:12:52 +00:00
|
|
|
}
|
2011-01-24 23:26:10 +00:00
|
|
|
|
2018-03-20 22:58:25 +00:00
|
|
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
2019-10-08 07:35:34 +00:00
|
|
|
enum SemiColonMode {
|
2016-02-10 03:11:27 +00:00
|
|
|
Break,
|
|
|
|
Ignore,
|
2019-01-20 08:37:06 +00:00
|
|
|
Comma,
|
2016-02-10 03:11:27 +00:00
|
|
|
}
|
|
|
|
|
2018-03-20 22:58:25 +00:00
|
|
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
2019-10-08 07:35:34 +00:00
|
|
|
enum BlockMode {
|
2017-04-13 19:37:05 +00:00
|
|
|
Break,
|
|
|
|
Ignore,
|
|
|
|
}
|
|
|
|
|
2021-01-18 21:47:37 +00:00
|
|
|
/// Whether or not we should force collection of tokens for an AST node,
|
|
|
|
/// regardless of whether or not it has attributes
|
2021-05-06 13:21:40 +00:00
|
|
|
#[derive(Clone, Copy, PartialEq)]
|
2021-01-18 21:47:37 +00:00
|
|
|
pub enum ForceCollect {
|
|
|
|
Yes,
|
|
|
|
No,
|
|
|
|
}
|
|
|
|
|
2020-11-28 23:33:17 +00:00
|
|
|
#[derive(Debug, Eq, PartialEq)]
|
2021-01-14 15:42:01 +00:00
|
|
|
pub enum TrailingToken {
|
|
|
|
None,
|
|
|
|
Semi,
|
2022-10-20 11:40:21 +00:00
|
|
|
Gt,
|
2021-01-22 18:28:08 +00:00
|
|
|
/// If the trailing token is a comma, then capture it
|
|
|
|
/// Otherwise, ignore the trailing token
|
|
|
|
MaybeComma,
|
|
|
|
}
|
|
|
|
|
2019-09-06 02:56:45 +00:00
|
|
|
/// Like `maybe_whole_expr`, but for things other than expressions.
|
2019-08-11 13:24:37 +00:00
|
|
|
#[macro_export]
|
2014-11-14 17:18:10 +00:00
|
|
|
macro_rules! maybe_whole {
|
2016-11-02 03:03:55 +00:00
|
|
|
($p:expr, $constructor:ident, |$x:ident| $e:expr) => {
|
2024-03-19 20:08:22 +00:00
|
|
|
if let token::Interpolated(nt) = &$p.token.kind
|
|
|
|
&& let token::$constructor(x) = &nt.0
|
|
|
|
{
|
|
|
|
#[allow(unused_mut)]
|
|
|
|
let mut $x = x.clone();
|
|
|
|
$p.bump();
|
|
|
|
return Ok($e);
|
2013-07-02 19:47:32 +00:00
|
|
|
}
|
2016-11-02 03:03:55 +00:00
|
|
|
};
|
2014-11-14 17:18:10 +00:00
|
|
|
}
|
2012-07-04 01:39:37 +00:00
|
|
|
|
2019-03-09 14:41:01 +00:00
|
|
|
/// If the next tokens are ill-formed `$ty::` recover them as `<$ty>::`.
|
2019-08-11 11:14:30 +00:00
|
|
|
#[macro_export]
|
2019-03-09 14:41:01 +00:00
|
|
|
macro_rules! maybe_recover_from_interpolated_ty_qpath {
|
|
|
|
($self: expr, $allow_qpath_recovery: expr) => {
|
2022-02-28 10:49:56 +00:00
|
|
|
if $allow_qpath_recovery
|
2023-11-13 13:24:55 +00:00
|
|
|
&& $self.may_recover()
|
2024-04-04 17:03:32 +00:00
|
|
|
&& $self.look_ahead(1, |t| t == &token::PathSep)
|
2023-11-13 13:24:55 +00:00
|
|
|
&& let token::Interpolated(nt) = &$self.token.kind
|
2023-07-31 14:55:47 +00:00
|
|
|
&& let token::NtTy(ty) = &nt.0
|
2023-11-13 13:24:55 +00:00
|
|
|
{
|
|
|
|
let ty = ty.clone();
|
|
|
|
$self.bump();
|
|
|
|
return $self.maybe_recover_from_bad_qpath_stage_2($self.prev_token.span, ty);
|
|
|
|
}
|
2019-03-09 14:41:01 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-10-25 19:24:01 +00:00
|
|
|
#[derive(Clone, Copy)]
|
|
|
|
pub enum Recovery {
|
|
|
|
Allowed,
|
|
|
|
Forbidden,
|
|
|
|
}
|
|
|
|
|
2017-06-15 03:42:24 +00:00
|
|
|
#[derive(Clone)]
|
2014-03-09 14:54:34 +00:00
|
|
|
pub struct Parser<'a> {
|
2024-03-04 05:31:49 +00:00
|
|
|
pub psess: &'a ParseSess,
|
2020-03-07 13:34:29 +00:00
|
|
|
/// The current token.
|
2020-02-24 10:04:13 +00:00
|
|
|
pub token: Token,
|
2023-08-08 01:43:44 +00:00
|
|
|
/// The spacing for the current token.
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
pub token_spacing: Spacing,
|
2020-03-07 13:34:29 +00:00
|
|
|
/// The previous token.
|
2020-02-24 10:04:13 +00:00
|
|
|
pub prev_token: Token,
|
2020-11-28 23:33:17 +00:00
|
|
|
pub capture_cfg: bool,
|
2018-05-31 22:53:30 +00:00
|
|
|
restrictions: Restrictions,
|
2019-10-08 07:35:34 +00:00
|
|
|
expected_tokens: Vec<TokenType>,
|
2019-08-31 13:03:54 +00:00
|
|
|
token_cursor: TokenCursor,
|
2023-07-31 06:15:54 +00:00
|
|
|
// The number of calls to `bump`, i.e. the position in the token stream.
|
|
|
|
num_bump_calls: usize,
|
2023-07-31 06:36:27 +00:00
|
|
|
// During parsing we may sometimes need to 'unglue' a glued token into two
|
|
|
|
// component tokens (e.g. '>>' into '>' and '>), so the parser can consume
|
|
|
|
// them one at a time. This process bypasses the normal capturing mechanism
|
|
|
|
// (e.g. `num_bump_calls` will not be incremented), since the 'unglued'
|
|
|
|
// tokens due not exist in the original `TokenStream`.
|
|
|
|
//
|
|
|
|
// If we end up consuming both unglued tokens, this is not an issue. We'll
|
|
|
|
// end up capturing the single 'glued' token.
|
|
|
|
//
|
|
|
|
// However, sometimes we may want to capture just the first 'unglued'
|
|
|
|
// token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>`
|
|
|
|
// requires us to unglue the trailing `>>` token. The `break_last_token`
|
|
|
|
// field is used to track this token. It gets appended to the captured
|
|
|
|
// stream when we evaluate a `LazyAttrTokenStream`.
|
|
|
|
break_last_token: bool,
|
2019-01-23 01:35:13 +00:00
|
|
|
/// This field is used to keep track of how many left angle brackets we have seen. This is
|
|
|
|
/// required in order to detect extra leading left angle brackets (`<` characters) and error
|
|
|
|
/// appropriately.
|
|
|
|
///
|
|
|
|
/// See the comments in the `parse_path_segment` function for more details.
|
2023-10-24 22:22:52 +00:00
|
|
|
unmatched_angle_bracket_count: u16,
|
|
|
|
angle_bracket_nesting: u16,
|
2023-03-03 22:48:21 +00:00
|
|
|
|
2019-10-08 07:35:34 +00:00
|
|
|
last_unexpected_token_span: Option<Span>,
|
2019-05-22 05:17:53 +00:00
|
|
|
/// If present, this `Parser` is not parsing Rust code but rather a macro call.
|
2019-10-08 07:35:34 +00:00
|
|
|
subparser_name: Option<&'static str>,
|
2020-11-28 23:33:17 +00:00
|
|
|
capture_state: CaptureState,
|
2021-08-16 13:22:36 +00:00
|
|
|
/// This allows us to recover when the user forget to add braces around
|
|
|
|
/// multiple statements in the closure body.
|
|
|
|
pub current_closure: Option<ClosureSpans>,
|
2022-10-26 19:09:28 +00:00
|
|
|
/// Whether the parser is allowed to do recovery.
|
2022-10-25 19:24:01 +00:00
|
|
|
/// This is disabled when parsing macro arguments, see #103534
|
|
|
|
pub recovery: Recovery,
|
2021-08-16 13:22:36 +00:00
|
|
|
}
|
|
|
|
|
2022-10-25 19:24:01 +00:00
|
|
|
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
|
2022-04-19 23:31:34 +00:00
|
|
|
// it doesn't unintentionally get bigger.
|
2024-04-16 07:02:20 +00:00
|
|
|
#[cfg(target_pointer_width = "64")]
|
2023-07-31 06:36:27 +00:00
|
|
|
rustc_data_structures::static_assert_size!(Parser<'_>, 264);
|
2022-04-19 23:31:34 +00:00
|
|
|
|
2022-03-30 19:14:15 +00:00
|
|
|
/// Stores span information about a closure.
|
2021-08-16 13:22:36 +00:00
|
|
|
#[derive(Clone)]
|
|
|
|
pub struct ClosureSpans {
|
|
|
|
pub whole_closure: Span,
|
|
|
|
pub closing_pipe: Span,
|
|
|
|
pub body: Span,
|
2020-11-28 23:33:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Indicates a range of tokens that should be replaced by
|
|
|
|
/// the tokens in the provided vector. This is used in two
|
|
|
|
/// places during token collection:
|
|
|
|
///
|
|
|
|
/// 1. During the parsing of an AST node that may have a `#[derive]`
|
|
|
|
/// attribute, we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]`
|
|
|
|
/// In this case, we use a `ReplaceRange` to replace the entire inner AST node
|
|
|
|
/// with `FlatToken::AttrTarget`, allowing us to perform eager cfg-expansion
|
2022-09-09 02:44:05 +00:00
|
|
|
/// on an `AttrTokenStream`.
|
2020-11-28 23:33:17 +00:00
|
|
|
///
|
|
|
|
/// 2. When we parse an inner attribute while collecting tokens. We
|
|
|
|
/// remove inner attributes from the token stream entirely, and
|
|
|
|
/// instead track them through the `attrs` field on the AST node.
|
|
|
|
/// This allows us to easily manipulate them (for example, removing
|
|
|
|
/// the first macro inner attribute to invoke a proc-macro).
|
|
|
|
/// When create a `TokenStream`, the inner attributes get inserted
|
|
|
|
/// into the proper place in the token stream.
|
|
|
|
pub type ReplaceRange = (Range<u32>, Vec<(FlatToken, Spacing)>);
|
|
|
|
|
|
|
|
/// Controls how we capture tokens. Capturing can be expensive,
|
|
|
|
/// so we try to avoid performing capturing in cases where
|
2022-09-09 02:44:05 +00:00
|
|
|
/// we will never need an `AttrTokenStream`.
|
2020-11-28 23:33:17 +00:00
|
|
|
#[derive(Copy, Clone)]
|
|
|
|
pub enum Capturing {
|
|
|
|
/// We aren't performing any capturing - this is the default mode.
|
|
|
|
No,
|
|
|
|
/// We are capturing tokens
|
|
|
|
Yes,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
struct CaptureState {
|
|
|
|
capturing: Capturing,
|
|
|
|
replace_ranges: Vec<ReplaceRange>,
|
|
|
|
inner_attr_ranges: FxHashMap<AttrId, ReplaceRange>,
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
}
|
|
|
|
|
2023-02-01 01:58:04 +00:00
|
|
|
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
|
|
|
|
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
|
|
|
|
/// use this type to emit them as a linear sequence. But a linear sequence is
|
|
|
|
/// what the parser expects, for the most part.
|
2017-06-10 03:30:33 +00:00
|
|
|
#[derive(Clone)]
|
2019-08-31 13:03:54 +00:00
|
|
|
struct TokenCursor {
|
2023-02-01 01:43:13 +00:00
|
|
|
// Cursor for the current (innermost) token stream. The delimiters for this
|
|
|
|
// token stream are found in `self.stack.last()`; when that is `None` then
|
|
|
|
// we are in the outermost token stream which never has delimiters.
|
2023-02-01 01:58:04 +00:00
|
|
|
tree_cursor: TokenTreeCursor,
|
2023-02-01 01:43:13 +00:00
|
|
|
|
|
|
|
// Token streams surrounding the current one. The delimiters for stack[n]'s
|
|
|
|
// tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
|
|
|
|
// because it's the outermost token stream which never has delimiters.
|
2023-10-12 04:36:14 +00:00
|
|
|
stack: Vec<(TokenTreeCursor, DelimSpan, DelimSpacing, Delimiter)>,
|
2017-02-20 05:44:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl TokenCursor {
|
2023-07-25 23:17:32 +00:00
|
|
|
fn next(&mut self) -> (Token, Spacing) {
|
|
|
|
self.inlined_next()
|
2022-03-07 04:55:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// This always-inlined version should only be used on hot code paths.
|
|
|
|
#[inline(always)]
|
2023-07-25 23:17:32 +00:00
|
|
|
fn inlined_next(&mut self) -> (Token, Spacing) {
|
2022-04-19 07:01:26 +00:00
|
|
|
loop {
|
2023-07-31 01:10:25 +00:00
|
|
|
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
|
|
|
|
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
|
|
|
|
// below can be removed.
|
2023-02-01 01:43:13 +00:00
|
|
|
if let Some(tree) = self.tree_cursor.next_ref() {
|
2022-04-20 02:22:42 +00:00
|
|
|
match tree {
|
2023-07-25 23:17:32 +00:00
|
|
|
&TokenTree::Token(ref token, spacing) => {
|
2023-07-30 07:16:20 +00:00
|
|
|
debug_assert!(!matches!(
|
|
|
|
token.kind,
|
|
|
|
token::OpenDelim(_) | token::CloseDelim(_)
|
|
|
|
));
|
|
|
|
return (token.clone(), spacing);
|
2023-07-25 23:17:32 +00:00
|
|
|
}
|
2023-10-12 04:36:14 +00:00
|
|
|
&TokenTree::Delimited(sp, spacing, delim, ref tts) => {
|
2023-02-01 01:43:13 +00:00
|
|
|
let trees = tts.clone().into_trees();
|
2023-10-12 04:36:14 +00:00
|
|
|
self.stack.push((
|
|
|
|
mem::replace(&mut self.tree_cursor, trees),
|
|
|
|
sp,
|
|
|
|
spacing,
|
|
|
|
delim,
|
|
|
|
));
|
2022-04-26 12:40:14 +00:00
|
|
|
if delim != Delimiter::Invisible {
|
2023-10-12 04:36:14 +00:00
|
|
|
return (Token::new(token::OpenDelim(delim), sp.open), spacing.open);
|
2022-04-20 02:22:42 +00:00
|
|
|
}
|
2022-08-18 02:13:37 +00:00
|
|
|
// No open delimiter to return; continue on to the next iteration.
|
2022-04-19 03:41:02 +00:00
|
|
|
}
|
2022-04-19 07:01:26 +00:00
|
|
|
};
|
2023-10-12 04:36:14 +00:00
|
|
|
} else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() {
|
2023-02-01 01:43:13 +00:00
|
|
|
// We have exhausted this token stream. Move back to its parent token stream.
|
|
|
|
self.tree_cursor = tree_cursor;
|
2023-01-31 23:53:00 +00:00
|
|
|
if delim != Delimiter::Invisible {
|
2023-10-12 04:36:14 +00:00
|
|
|
return (Token::new(token::CloseDelim(delim), span.close), spacing.close);
|
2022-04-21 02:26:58 +00:00
|
|
|
}
|
|
|
|
// No close delimiter to return; continue on to the next iteration.
|
2017-02-20 05:44:06 +00:00
|
|
|
} else {
|
2023-08-08 01:43:44 +00:00
|
|
|
// We have exhausted the outermost token stream. The use of
|
|
|
|
// `Spacing::Alone` is arbitrary and immaterial, because the
|
|
|
|
// `Eof` token's spacing is never used.
|
2022-04-19 03:41:02 +00:00
|
|
|
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
|
2017-02-20 05:44:06 +00:00
|
|
|
}
|
2022-04-19 07:01:26 +00:00
|
|
|
}
|
|
|
|
}
|
2017-02-20 05:44:06 +00:00
|
|
|
}
|
|
|
|
|
2020-12-15 16:43:24 +00:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
2019-10-08 07:35:34 +00:00
|
|
|
enum TokenType {
|
2019-06-05 11:17:56 +00:00
|
|
|
Token(TokenKind),
|
2019-05-11 14:41:37 +00:00
|
|
|
Keyword(Symbol),
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
Operator,
|
2017-01-18 16:01:04 +00:00
|
|
|
Lifetime,
|
|
|
|
Ident,
|
|
|
|
Path,
|
|
|
|
Type,
|
2019-02-05 15:49:38 +00:00
|
|
|
Const,
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl TokenType {
|
2019-10-08 07:35:34 +00:00
|
|
|
fn to_string(&self) -> String {
|
2022-11-22 09:42:01 +00:00
|
|
|
match self {
|
|
|
|
TokenType::Token(t) => format!("`{}`", pprust::token_kind_to_string(t)),
|
2023-07-25 20:00:13 +00:00
|
|
|
TokenType::Keyword(kw) => format!("`{kw}`"),
|
2017-01-18 16:01:04 +00:00
|
|
|
TokenType::Operator => "an operator".to_string(),
|
|
|
|
TokenType::Lifetime => "lifetime".to_string(),
|
|
|
|
TokenType::Ident => "identifier".to_string(),
|
|
|
|
TokenType::Path => "path".to_string(),
|
|
|
|
TokenType::Type => "type".to_string(),
|
2020-12-04 10:33:30 +00:00
|
|
|
TokenType::Const => "a const expression".to_string(),
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
}
|
|
|
|
}
|
2012-09-08 02:04:40 +00:00
|
|
|
}
|
2012-05-23 22:06:11 +00:00
|
|
|
|
2023-12-28 13:06:51 +00:00
|
|
|
/// Used by [`Parser::expect_any_with_type`].
|
2018-03-20 22:58:25 +00:00
|
|
|
#[derive(Copy, Clone, Debug)]
|
2019-10-08 07:35:34 +00:00
|
|
|
enum TokenExpectType {
|
2023-12-28 13:06:51 +00:00
|
|
|
/// Unencountered tokens are inserted into [`Parser::expected_tokens`].
|
|
|
|
/// See [`Parser::check`].
|
2017-09-07 06:07:49 +00:00
|
|
|
Expect,
|
2023-12-28 13:06:51 +00:00
|
|
|
|
|
|
|
/// Unencountered tokens are not inserted into [`Parser::expected_tokens`].
|
|
|
|
/// See [`Parser::check_noexpect`].
|
2017-09-07 06:07:49 +00:00
|
|
|
NoExpect,
|
|
|
|
}
|
|
|
|
|
2019-10-08 10:59:59 +00:00
|
|
|
/// A sequence separator.
|
|
|
|
struct SeqSep {
|
|
|
|
/// The separator token.
|
|
|
|
sep: Option<TokenKind>,
|
|
|
|
/// `true` if a trailing separator is allowed.
|
|
|
|
trailing_sep_allowed: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl SeqSep {
|
|
|
|
fn trailing_allowed(t: TokenKind) -> SeqSep {
|
|
|
|
SeqSep { sep: Some(t), trailing_sep_allowed: true }
|
|
|
|
}
|
|
|
|
|
|
|
|
fn none() -> SeqSep {
|
|
|
|
SeqSep { sep: None, trailing_sep_allowed: false }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-07 10:26:36 +00:00
|
|
|
pub enum FollowedByType {
|
|
|
|
Yes,
|
|
|
|
No,
|
|
|
|
}
|
|
|
|
|
2024-02-13 23:44:33 +00:00
|
|
|
/// Whether a function performed recovery
|
|
|
|
#[derive(Copy, Clone, Debug)]
|
|
|
|
pub enum Recovered {
|
|
|
|
No,
|
|
|
|
Yes,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<Recovered> for bool {
|
|
|
|
fn from(r: Recovered) -> bool {
|
|
|
|
matches!(r, Recovered::Yes)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-02-13 23:48:23 +00:00
|
|
|
#[derive(Copy, Clone, Debug)]
|
|
|
|
pub enum Trailing {
|
|
|
|
No,
|
|
|
|
Yes,
|
|
|
|
}
|
|
|
|
|
2022-09-04 08:14:00 +00:00
|
|
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
2022-09-22 16:39:17 +00:00
|
|
|
pub enum TokenDescription {
|
2022-09-04 08:14:00 +00:00
|
|
|
ReservedIdentifier,
|
|
|
|
Keyword,
|
|
|
|
ReservedKeyword,
|
|
|
|
DocComment,
|
|
|
|
}
|
|
|
|
|
2022-09-22 16:39:17 +00:00
|
|
|
impl TokenDescription {
|
|
|
|
pub fn from_token(token: &Token) -> Option<Self> {
|
|
|
|
match token.kind {
|
|
|
|
_ if token.is_special_ident() => Some(TokenDescription::ReservedIdentifier),
|
|
|
|
_ if token.is_used_keyword() => Some(TokenDescription::Keyword),
|
|
|
|
_ if token.is_unused_keyword() => Some(TokenDescription::ReservedKeyword),
|
|
|
|
token::DocComment(..) => Some(TokenDescription::DocComment),
|
|
|
|
_ => None,
|
|
|
|
}
|
|
|
|
}
|
2019-12-07 02:07:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub(super) fn token_descr(token: &Token) -> String {
|
2022-09-22 16:39:17 +00:00
|
|
|
let name = pprust::token_to_string(token).to_string();
|
2022-09-04 08:14:00 +00:00
|
|
|
|
2023-07-31 14:55:47 +00:00
|
|
|
let kind = match (TokenDescription::from_token(token), &token.kind) {
|
|
|
|
(Some(TokenDescription::ReservedIdentifier), _) => Some("reserved identifier"),
|
|
|
|
(Some(TokenDescription::Keyword), _) => Some("keyword"),
|
|
|
|
(Some(TokenDescription::ReservedKeyword), _) => Some("reserved keyword"),
|
|
|
|
(Some(TokenDescription::DocComment), _) => Some("doc comment"),
|
|
|
|
(None, TokenKind::Interpolated(node)) => Some(node.0.descr()),
|
|
|
|
(None, _) => None,
|
|
|
|
};
|
2022-09-04 08:14:00 +00:00
|
|
|
|
2023-07-25 20:00:13 +00:00
|
|
|
if let Some(kind) = kind { format!("{kind} `{name}`") } else { format!("`{name}`") }
|
2019-12-07 02:07:35 +00:00
|
|
|
}
|
|
|
|
|
2014-03-09 14:54:34 +00:00
|
|
|
impl<'a> Parser<'a> {
|
2019-10-16 08:59:30 +00:00
|
|
|
pub fn new(
|
2024-03-04 05:31:49 +00:00
|
|
|
psess: &'a ParseSess,
|
2023-07-31 02:49:01 +00:00
|
|
|
stream: TokenStream,
|
2019-05-22 05:17:53 +00:00
|
|
|
subparser_name: Option<&'static str>,
|
2019-05-22 00:47:23 +00:00
|
|
|
) -> Self {
|
2016-11-03 07:43:29 +00:00
|
|
|
let mut parser = Parser {
|
2024-03-04 05:31:49 +00:00
|
|
|
psess,
|
2019-06-05 06:39:34 +00:00
|
|
|
token: Token::dummy(),
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
token_spacing: Spacing::Alone,
|
2020-02-09 14:54:38 +00:00
|
|
|
prev_token: Token::dummy(),
|
2020-11-28 23:33:17 +00:00
|
|
|
capture_cfg: false,
|
2015-04-29 21:58:43 +00:00
|
|
|
restrictions: Restrictions::empty(),
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
expected_tokens: Vec::new(),
|
2023-07-31 06:36:27 +00:00
|
|
|
token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
|
2023-07-31 06:15:54 +00:00
|
|
|
num_bump_calls: 0,
|
2023-07-31 06:36:27 +00:00
|
|
|
break_last_token: false,
|
2019-01-23 01:35:13 +00:00
|
|
|
unmatched_angle_bracket_count: 0,
|
2023-10-24 22:22:52 +00:00
|
|
|
angle_bracket_nesting: 0,
|
2019-03-02 05:47:06 +00:00
|
|
|
last_unexpected_token_span: None,
|
2019-05-22 05:17:53 +00:00
|
|
|
subparser_name,
|
2020-11-28 23:33:17 +00:00
|
|
|
capture_state: CaptureState {
|
|
|
|
capturing: Capturing::No,
|
|
|
|
replace_ranges: Vec::new(),
|
|
|
|
inner_attr_ranges: Default::default(),
|
|
|
|
},
|
2021-08-16 13:22:36 +00:00
|
|
|
current_closure: None,
|
2022-10-25 19:24:01 +00:00
|
|
|
recovery: Recovery::Allowed,
|
2016-11-03 07:43:29 +00:00
|
|
|
};
|
|
|
|
|
2020-02-16 13:47:24 +00:00
|
|
|
// Make parser point to the first token.
|
|
|
|
parser.bump();
|
2017-05-17 22:37:24 +00:00
|
|
|
|
2016-11-03 07:43:29 +00:00
|
|
|
parser
|
|
|
|
}
|
|
|
|
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2022-11-12 21:12:33 +00:00
|
|
|
pub fn recovery(mut self, recovery: Recovery) -> Self {
|
|
|
|
self.recovery = recovery;
|
2022-10-25 19:24:01 +00:00
|
|
|
self
|
|
|
|
}
|
|
|
|
|
2022-10-26 19:09:28 +00:00
|
|
|
/// Whether the parser is allowed to recover from broken code.
|
|
|
|
///
|
|
|
|
/// If this returns false, recovering broken code into valid code (especially if this recovery does lookahead)
|
|
|
|
/// is not allowed. All recovery done by the parser must be gated behind this check.
|
|
|
|
///
|
2022-10-26 20:06:35 +00:00
|
|
|
/// Technically, this only needs to restrict eager recovery by doing lookahead at more tokens.
|
2022-10-26 19:09:28 +00:00
|
|
|
/// But making the distinction is very subtle, and simply forbidding all recovery is a lot simpler to uphold.
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2022-10-25 19:24:01 +00:00
|
|
|
fn may_recover(&self) -> bool {
|
|
|
|
matches!(self.recovery, Recovery::Allowed)
|
|
|
|
}
|
|
|
|
|
2024-03-15 11:36:21 +00:00
|
|
|
/// Version of [`unexpected`](Parser::unexpected) that "returns" any type in the `Ok`
|
|
|
|
/// (both those functions never return "Ok", and so can lie like that in the type).
|
|
|
|
pub fn unexpected_any<T>(&mut self) -> PResult<'a, T> {
|
2015-03-28 21:58:51 +00:00
|
|
|
match self.expect_one_of(&[], &[]) {
|
2015-12-30 23:11:53 +00:00
|
|
|
Err(e) => Err(e),
|
2019-11-13 11:05:37 +00:00
|
|
|
// We can get `Ok(true)` from `recover_closing_delimiter`
|
|
|
|
// which is called in `expected_one_of_not_found`.
|
|
|
|
Ok(_) => FatalError.raise(),
|
2015-03-28 21:58:51 +00:00
|
|
|
}
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2024-03-15 11:36:21 +00:00
|
|
|
pub fn unexpected(&mut self) -> PResult<'a, ()> {
|
|
|
|
self.unexpected_any()
|
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Expects and consumes the token `t`. Signals an error if the next token is not `t`.
|
2024-02-13 23:44:33 +00:00
|
|
|
pub fn expect(&mut self, t: &TokenKind) -> PResult<'a, Recovered> {
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
if self.expected_tokens.is_empty() {
|
|
|
|
if self.token == *t {
|
2015-12-30 23:11:53 +00:00
|
|
|
self.bump();
|
2024-02-13 23:44:33 +00:00
|
|
|
Ok(Recovered::No)
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
} else {
|
2019-05-22 05:17:53 +00:00
|
|
|
self.unexpected_try_recover(t)
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
}
|
2013-06-15 01:21:47 +00:00
|
|
|
} else {
|
2018-06-01 14:05:46 +00:00
|
|
|
self.expect_one_of(slice::from_ref(t), &[])
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-16 20:34:16 +00:00
|
|
|
/// Expect next token to be edible or inedible token. If edible,
|
2014-06-09 20:12:30 +00:00
|
|
|
/// then consume it; if inedible, then return without consuming
|
2022-11-16 20:34:16 +00:00
|
|
|
/// anything. Signal a fatal error if next token is unexpected.
|
2019-01-28 05:04:50 +00:00
|
|
|
pub fn expect_one_of(
|
|
|
|
&mut self,
|
2019-06-05 11:17:56 +00:00
|
|
|
edible: &[TokenKind],
|
|
|
|
inedible: &[TokenKind],
|
2024-02-13 23:44:33 +00:00
|
|
|
) -> PResult<'a, Recovered> {
|
2019-06-08 19:38:23 +00:00
|
|
|
if edible.contains(&self.token.kind) {
|
2015-12-30 23:11:53 +00:00
|
|
|
self.bump();
|
2024-02-13 23:44:33 +00:00
|
|
|
Ok(Recovered::No)
|
2019-06-08 19:38:23 +00:00
|
|
|
} else if inedible.contains(&self.token.kind) {
|
2013-08-05 20:18:29 +00:00
|
|
|
// leave it in the input
|
2024-02-13 23:44:33 +00:00
|
|
|
Ok(Recovered::No)
|
2023-05-04 08:30:02 +00:00
|
|
|
} else if self.token.kind != token::Eof
|
|
|
|
&& self.last_unexpected_token_span == Some(self.token.span)
|
|
|
|
{
|
2019-03-02 05:47:06 +00:00
|
|
|
FatalError.raise();
|
2013-08-05 20:18:29 +00:00
|
|
|
} else {
|
2019-05-23 20:10:24 +00:00
|
|
|
self.expected_one_of_not_found(edible, inedible)
|
2013-08-05 20:18:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-11 19:19:57 +00:00
|
|
|
// Public for rustfmt usage.
|
2020-04-19 11:00:18 +00:00
|
|
|
pub fn parse_ident(&mut self) -> PResult<'a, Ident> {
|
2018-01-06 22:43:20 +00:00
|
|
|
self.parse_ident_common(true)
|
|
|
|
}
|
|
|
|
|
2020-04-19 11:00:18 +00:00
|
|
|
fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, Ident> {
|
2023-03-17 08:35:43 +00:00
|
|
|
let (ident, is_raw) = self.ident_or_err(recover)?;
|
2023-03-17 09:27:17 +00:00
|
|
|
|
2024-02-13 23:28:27 +00:00
|
|
|
if matches!(is_raw, IdentIsRaw::No) && ident.is_reserved() {
|
Make `DiagnosticBuilder::emit` consuming.
This works for most of its call sites. This is nice, because `emit` very
much makes sense as a consuming operation -- indeed,
`DiagnosticBuilderState` exists to ensure no diagnostic is emitted
twice, but it uses runtime checks.
For the small number of call sites where a consuming emit doesn't work,
the commit adds `DiagnosticBuilder::emit_without_consuming`. (This will
be removed in subsequent commits.)
Likewise, `emit_unless` becomes consuming. And `delay_as_bug` becomes
consuming, while `delay_as_bug_without_consuming` is added (which will
also be removed in subsequent commits.)
All this requires significant changes to `DiagnosticBuilder`'s chaining
methods. Currently `DiagnosticBuilder` method chaining uses a
non-consuming `&mut self -> &mut Self` style, which allows chaining to
be used when the chain ends in `emit()`, like so:
```
struct_err(msg).span(span).emit();
```
But it doesn't work when producing a `DiagnosticBuilder` value,
requiring this:
```
let mut err = self.struct_err(msg);
err.span(span);
err
```
This style of chaining won't work with consuming `emit` though. For
that, we need to use to a `self -> Self` style. That also would allow
`DiagnosticBuilder` production to be chained, e.g.:
```
self.struct_err(msg).span(span)
```
However, removing the `&mut self -> &mut Self` style would require that
individual modifications of a `DiagnosticBuilder` go from this:
```
err.span(span);
```
to this:
```
err = err.span(span);
```
There are *many* such places. I have a high tolerance for tedious
refactorings, but even I gave up after a long time trying to convert
them all.
Instead, this commit has it both ways: the existing `&mut self -> Self`
chaining methods are kept, and new `self -> Self` chaining methods are
added, all of which have a `_mv` suffix (short for "move"). Changes to
the existing `forward!` macro lets this happen with very little
additional boilerplate code. I chose to add the suffix to the new
chaining methods rather than the existing ones, because the number of
changes required is much smaller that way.
This doubled chainging is a bit clumsy, but I think it is worthwhile
because it allows a *lot* of good things to subsequently happen. In this
commit, there are many `mut` qualifiers removed in places where
diagnostics are emitted without being modified. In subsequent commits:
- chaining can be used more, making the code more concise;
- more use of chaining also permits the removal of redundant diagnostic
APIs like `struct_err_with_code`, which can be replaced easily with
`struct_err` + `code_mv`;
- `emit_without_diagnostic` can be removed, which simplifies a lot of
machinery, removing the need for `DiagnosticBuilderState`.
2024-01-03 01:17:35 +00:00
|
|
|
let err = self.expected_ident_found_err();
|
2020-09-16 21:10:05 +00:00
|
|
|
if recover {
|
|
|
|
err.emit();
|
|
|
|
} else {
|
|
|
|
return Err(err);
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
}
|
2020-09-16 21:10:05 +00:00
|
|
|
self.bump();
|
|
|
|
Ok(ident)
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2024-02-13 23:28:27 +00:00
|
|
|
fn ident_or_err(&mut self, recover: bool) -> PResult<'a, (Ident, IdentIsRaw)> {
|
2023-12-27 20:16:29 +00:00
|
|
|
match self.token.ident() {
|
|
|
|
Some(ident) => Ok(ident),
|
|
|
|
None => self.expected_ident_found(recover),
|
|
|
|
}
|
2023-03-17 08:35:43 +00:00
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Checks if the next token is `tok`, and returns `true` if so.
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
///
|
2016-01-31 19:39:50 +00:00
|
|
|
/// This method will automatically add `tok` to `expected_tokens` if `tok` is not
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
/// encountered.
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2019-10-08 07:35:34 +00:00
|
|
|
fn check(&mut self, tok: &TokenKind) -> bool {
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
let is_present = self.token == *tok;
|
|
|
|
if !is_present {
|
|
|
|
self.expected_tokens.push(TokenType::Token(tok.clone()));
|
|
|
|
}
|
|
|
|
is_present
|
|
|
|
}
|
|
|
|
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2022-05-01 17:05:35 +00:00
|
|
|
fn check_noexpect(&self, tok: &TokenKind) -> bool {
|
|
|
|
self.token == *tok
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Consumes a token 'tok' if it exists. Returns whether the given token was present.
|
|
|
|
///
|
|
|
|
/// the main purpose of this function is to reduce the cluttering of the suggestions list
|
|
|
|
/// which using the normal eat method could introduce in some cases.
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2022-05-01 17:05:35 +00:00
|
|
|
pub fn eat_noexpect(&mut self, tok: &TokenKind) -> bool {
|
|
|
|
let is_present = self.check_noexpect(tok);
|
|
|
|
if is_present {
|
|
|
|
self.bump()
|
|
|
|
}
|
|
|
|
is_present
|
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Consumes a token 'tok' if it exists. Returns whether the given token was present.
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2019-06-05 11:17:56 +00:00
|
|
|
pub fn eat(&mut self, tok: &TokenKind) -> bool {
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
let is_present = self.check(tok);
|
2015-12-30 23:11:53 +00:00
|
|
|
if is_present {
|
|
|
|
self.bump()
|
|
|
|
}
|
|
|
|
is_present
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2019-10-01 03:13:42 +00:00
|
|
|
/// If the next token is the given keyword, returns `true` without eating it.
|
|
|
|
/// An expectation is also added for diagnostics purposes.
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2019-05-11 14:41:37 +00:00
|
|
|
fn check_keyword(&mut self, kw: Symbol) -> bool {
|
2015-01-16 03:04:28 +00:00
|
|
|
self.expected_tokens.push(TokenType::Keyword(kw));
|
|
|
|
self.token.is_keyword(kw)
|
|
|
|
}
|
|
|
|
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2022-09-15 16:27:23 +00:00
|
|
|
fn check_keyword_case(&mut self, kw: Symbol, case: Case) -> bool {
|
2022-09-13 18:48:29 +00:00
|
|
|
if self.check_keyword(kw) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2022-09-15 16:27:23 +00:00
|
|
|
if case == Case::Insensitive
|
2024-02-13 23:28:27 +00:00
|
|
|
&& let Some((ident, IdentIsRaw::No)) = self.token.ident()
|
2022-09-13 18:48:29 +00:00
|
|
|
&& ident.as_str().to_lowercase() == kw.as_str().to_lowercase()
|
|
|
|
{
|
|
|
|
true
|
|
|
|
} else {
|
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-01 03:13:42 +00:00
|
|
|
/// If the next token is the given keyword, eats it and returns `true`.
|
|
|
|
/// Otherwise, returns `false`. An expectation is also added for diagnostics purposes.
|
2020-01-11 19:19:57 +00:00
|
|
|
// Public for rustfmt usage.
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2020-01-11 19:19:57 +00:00
|
|
|
pub fn eat_keyword(&mut self, kw: Symbol) -> bool {
|
2015-01-16 03:04:28 +00:00
|
|
|
if self.check_keyword(kw) {
|
2015-12-30 23:11:53 +00:00
|
|
|
self.bump();
|
|
|
|
true
|
2015-01-16 03:04:28 +00:00
|
|
|
} else {
|
2015-12-30 23:11:53 +00:00
|
|
|
false
|
2015-01-16 03:04:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-07-29 19:06:13 +00:00
|
|
|
/// Eats a keyword, optionally ignoring the case.
|
|
|
|
/// If the case differs (and is ignored) an error is issued.
|
|
|
|
/// This is useful for recovery.
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2022-09-15 16:27:23 +00:00
|
|
|
fn eat_keyword_case(&mut self, kw: Symbol, case: Case) -> bool {
|
2022-07-29 19:06:13 +00:00
|
|
|
if self.eat_keyword(kw) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2022-09-15 16:27:23 +00:00
|
|
|
if case == Case::Insensitive
|
2024-02-13 23:28:27 +00:00
|
|
|
&& let Some((ident, IdentIsRaw::No)) = self.token.ident()
|
2022-07-29 19:06:13 +00:00
|
|
|
&& ident.as_str().to_lowercase() == kw.as_str().to_lowercase()
|
|
|
|
{
|
2023-12-18 10:14:02 +00:00
|
|
|
self.dcx().emit_err(errors::KwBadCase { span: ident.span, kw: kw.as_str() });
|
2022-07-29 19:06:13 +00:00
|
|
|
self.bump();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
false
|
|
|
|
}
|
|
|
|
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2019-05-11 14:41:37 +00:00
|
|
|
fn eat_keyword_noexpect(&mut self, kw: Symbol) -> bool {
|
2014-10-27 12:33:30 +00:00
|
|
|
if self.token.is_keyword(kw) {
|
2015-12-30 23:11:53 +00:00
|
|
|
self.bump();
|
|
|
|
true
|
2014-08-28 04:34:03 +00:00
|
|
|
} else {
|
2015-12-30 23:11:53 +00:00
|
|
|
false
|
2014-08-28 04:34:03 +00:00
|
|
|
}
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// If the given word is not a keyword, signals an error.
|
|
|
|
/// If the next token is not the given word, signals an error.
|
|
|
|
/// Otherwise, eats it.
|
2019-05-11 14:41:37 +00:00
|
|
|
fn expect_keyword(&mut self, kw: Symbol) -> PResult<'a, ()> {
|
2015-12-30 23:11:53 +00:00
|
|
|
if !self.eat_keyword(kw) { self.unexpected() } else { Ok(()) }
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2020-02-01 05:18:10 +00:00
|
|
|
/// Is the given keyword `kw` followed by a non-reserved identifier?
|
|
|
|
fn is_kw_followed_by_ident(&self, kw: Symbol) -> bool {
|
|
|
|
self.token.is_keyword(kw) && self.look_ahead(1, |t| t.is_ident() && !t.is_reserved_ident())
|
|
|
|
}
|
|
|
|
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2019-10-01 03:55:28 +00:00
|
|
|
fn check_or_expected(&mut self, ok: bool, typ: TokenType) -> bool {
|
2019-09-30 04:21:30 +00:00
|
|
|
if ok {
|
2017-01-18 16:01:04 +00:00
|
|
|
true
|
|
|
|
} else {
|
2019-10-01 03:55:28 +00:00
|
|
|
self.expected_tokens.push(typ);
|
2017-01-18 16:01:04 +00:00
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-08 07:35:34 +00:00
|
|
|
fn check_ident(&mut self) -> bool {
|
2019-10-01 03:55:28 +00:00
|
|
|
self.check_or_expected(self.token.is_ident(), TokenType::Ident)
|
2019-09-30 04:21:30 +00:00
|
|
|
}
|
|
|
|
|
2017-01-18 16:01:04 +00:00
|
|
|
fn check_path(&mut self) -> bool {
|
2019-10-01 03:55:28 +00:00
|
|
|
self.check_or_expected(self.token.is_path_start(), TokenType::Path)
|
2017-01-18 16:01:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn check_type(&mut self) -> bool {
|
2019-10-01 03:55:28 +00:00
|
|
|
self.check_or_expected(self.token.can_begin_type(), TokenType::Type)
|
2017-01-18 16:01:04 +00:00
|
|
|
}
|
|
|
|
|
2019-02-05 15:49:38 +00:00
|
|
|
fn check_const_arg(&mut self) -> bool {
|
2019-10-01 03:55:28 +00:00
|
|
|
self.check_or_expected(self.token.can_begin_const_arg(), TokenType::Const)
|
2019-09-30 04:21:30 +00:00
|
|
|
}
|
|
|
|
|
2022-12-20 16:15:55 +00:00
|
|
|
fn check_const_closure(&self) -> bool {
|
|
|
|
self.is_keyword_ahead(0, &[kw::Const])
|
|
|
|
&& self.look_ahead(1, |t| match &t.kind {
|
2023-02-01 05:55:48 +00:00
|
|
|
// async closures do not work with const closures, so we do not parse that here.
|
|
|
|
token::Ident(kw::Move | kw::Static, _) | token::OrOr | token::BinOp(token::Or) => {
|
|
|
|
true
|
|
|
|
}
|
2022-12-20 16:15:55 +00:00
|
|
|
_ => false,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-10-19 21:44:37 +00:00
|
|
|
fn check_inline_const(&self, dist: usize) -> bool {
|
|
|
|
self.is_keyword_ahead(dist, &[kw::Const])
|
2022-11-22 09:42:01 +00:00
|
|
|
&& self.look_ahead(dist + 1, |t| match &t.kind {
|
2023-07-31 14:55:47 +00:00
|
|
|
token::Interpolated(nt) => matches!(&nt.0, token::NtBlock(..)),
|
2022-04-26 12:40:14 +00:00
|
|
|
token::OpenDelim(Delimiter::Brace) => true,
|
2020-10-19 19:57:04 +00:00
|
|
|
_ => false,
|
|
|
|
})
|
2020-09-21 20:55:58 +00:00
|
|
|
}
|
|
|
|
|
2019-09-30 04:21:30 +00:00
|
|
|
/// Checks to see if the next token is either `+` or `+=`.
|
|
|
|
/// Otherwise returns `false`.
|
2024-03-19 05:33:53 +00:00
|
|
|
#[inline]
|
2019-09-30 04:21:30 +00:00
|
|
|
fn check_plus(&mut self) -> bool {
|
|
|
|
self.check_or_expected(
|
|
|
|
self.token.is_like_plus(),
|
2019-10-01 03:55:28 +00:00
|
|
|
TokenType::Token(token::BinOp(token::Plus)),
|
2019-09-30 04:21:30 +00:00
|
|
|
)
|
2019-02-05 15:49:38 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats the expected token if it's present possibly breaking
|
|
|
|
/// compound tokens like multi-character operators in process.
|
|
|
|
/// Returns `true` if the token was eaten.
|
|
|
|
fn break_and_eat(&mut self, expected: TokenKind) -> bool {
|
|
|
|
if self.token.kind == expected {
|
|
|
|
self.bump();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
match self.token.kind.break_two_token_op() {
|
|
|
|
Some((first, second)) if first == expected => {
|
2024-03-04 05:31:49 +00:00
|
|
|
let first_span = self.psess.source_map().start_point(self.token.span);
|
2020-02-22 13:22:38 +00:00
|
|
|
let second_span = self.token.span.with_lo(first_span.hi());
|
2020-03-07 13:34:29 +00:00
|
|
|
self.token = Token::new(first, first_span);
|
2020-12-12 20:20:22 +00:00
|
|
|
// Keep track of this token - if we end token capturing now,
|
|
|
|
// we'll want to append this token to the captured stream.
|
|
|
|
//
|
|
|
|
// If we consume any additional tokens, then this token
|
|
|
|
// is not needed (we'll capture the entire 'glued' token),
|
2022-04-14 01:13:20 +00:00
|
|
|
// and `bump` will set this field to `None`
|
2023-07-31 06:36:27 +00:00
|
|
|
self.break_last_token = true;
|
2023-08-08 01:43:44 +00:00
|
|
|
// Use the spacing of the glued token as the spacing of the
|
|
|
|
// unglued second token.
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
self.bump_with((Token::new(second, second_span), self.token_spacing));
|
2018-05-25 20:40:16 +00:00
|
|
|
true
|
|
|
|
}
|
2020-02-22 13:22:38 +00:00
|
|
|
_ => {
|
|
|
|
self.expected_tokens.push(TokenType::Token(expected));
|
|
|
|
false
|
2018-05-25 20:40:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-05-25 21:36:23 +00:00
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats `+` possibly breaking tokens like `+=` in process.
|
|
|
|
fn eat_plus(&mut self) -> bool {
|
|
|
|
self.break_and_eat(token::BinOp(token::Plus))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Eats `&` possibly breaking tokens like `&&` in process.
|
|
|
|
/// Signals an error if `&` is not eaten.
|
2015-12-20 21:00:43 +00:00
|
|
|
fn expect_and(&mut self) -> PResult<'a, ()> {
|
2020-02-22 13:22:38 +00:00
|
|
|
if self.break_and_eat(token::BinOp(token::And)) { Ok(()) } else { self.unexpected() }
|
2014-04-17 08:35:31 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats `|` possibly breaking tokens like `||` in process.
|
|
|
|
/// Signals an error if `|` was not eaten.
|
2017-09-07 06:07:49 +00:00
|
|
|
fn expect_or(&mut self) -> PResult<'a, ()> {
|
2020-02-22 13:22:38 +00:00
|
|
|
if self.break_and_eat(token::BinOp(token::Or)) { Ok(()) } else { self.unexpected() }
|
2017-09-07 06:07:49 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats `<` possibly breaking tokens like `<<` in process.
|
2015-12-30 23:11:53 +00:00
|
|
|
fn eat_lt(&mut self) -> bool {
|
2020-02-22 13:22:38 +00:00
|
|
|
let ate = self.break_and_eat(token::Lt);
|
2019-01-23 01:35:13 +00:00
|
|
|
if ate {
|
|
|
|
// See doc comment for `unmatched_angle_bracket_count`.
|
|
|
|
self.unmatched_angle_bracket_count += 1;
|
|
|
|
debug!("eat_lt: (increment) count={:?}", self.unmatched_angle_bracket_count);
|
2014-05-11 04:27:44 +00:00
|
|
|
}
|
2019-01-23 01:35:13 +00:00
|
|
|
ate
|
2014-05-11 04:27:44 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats `<` possibly breaking tokens like `<<` in process.
|
|
|
|
/// Signals an error if `<` was not eaten.
|
2015-12-20 21:00:43 +00:00
|
|
|
fn expect_lt(&mut self) -> PResult<'a, ()> {
|
2020-02-22 13:22:38 +00:00
|
|
|
if self.eat_lt() { Ok(()) } else { self.unexpected() }
|
2014-05-11 04:27:44 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats `>` possibly breaking tokens like `>>` in process.
|
|
|
|
/// Signals an error if `>` was not eaten.
|
2018-05-31 22:53:30 +00:00
|
|
|
fn expect_gt(&mut self) -> PResult<'a, ()> {
|
2020-02-22 13:22:38 +00:00
|
|
|
if self.break_and_eat(token::Gt) {
|
|
|
|
// See doc comment for `unmatched_angle_bracket_count`.
|
|
|
|
if self.unmatched_angle_bracket_count > 0 {
|
|
|
|
self.unmatched_angle_bracket_count -= 1;
|
|
|
|
debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count);
|
2019-01-23 01:35:13 +00:00
|
|
|
}
|
2020-02-22 13:22:38 +00:00
|
|
|
Ok(())
|
|
|
|
} else {
|
|
|
|
self.unexpected()
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-28 13:06:51 +00:00
|
|
|
/// Checks if the next token is contained within `kets`, and returns `true` if so.
|
2019-07-09 08:27:07 +00:00
|
|
|
fn expect_any_with_type(&mut self, kets: &[&TokenKind], expect: TokenExpectType) -> bool {
|
2023-03-14 23:10:59 +00:00
|
|
|
kets.iter().any(|k| match expect {
|
2019-07-09 08:27:07 +00:00
|
|
|
TokenExpectType::Expect => self.check(k),
|
2023-12-28 13:06:51 +00:00
|
|
|
TokenExpectType::NoExpect => self.check_noexpect(k),
|
2023-03-14 23:10:59 +00:00
|
|
|
})
|
2019-07-09 08:27:07 +00:00
|
|
|
}
|
|
|
|
|
2023-12-28 13:06:51 +00:00
|
|
|
/// Parses a sequence until the specified delimiters. The function
|
|
|
|
/// `f` must consume tokens until reaching the next separator or
|
|
|
|
/// closing bracket.
|
2019-10-08 07:35:34 +00:00
|
|
|
fn parse_seq_to_before_tokens<T>(
|
2018-08-20 23:16:17 +00:00
|
|
|
&mut self,
|
2019-06-05 11:17:56 +00:00
|
|
|
kets: &[&TokenKind],
|
2018-08-20 23:16:17 +00:00
|
|
|
sep: SeqSep,
|
|
|
|
expect: TokenExpectType,
|
2019-07-09 08:27:07 +00:00
|
|
|
mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
2024-02-13 23:48:23 +00:00
|
|
|
) -> PResult<'a, (ThinVec<T>, Trailing, Recovered)> {
|
2019-01-28 05:04:50 +00:00
|
|
|
let mut first = true;
|
2024-02-13 23:44:33 +00:00
|
|
|
let mut recovered = Recovered::No;
|
2024-02-13 23:48:23 +00:00
|
|
|
let mut trailing = Trailing::No;
|
2022-11-23 00:55:16 +00:00
|
|
|
let mut v = ThinVec::new();
|
2021-04-09 02:12:00 +00:00
|
|
|
|
2019-07-09 08:27:07 +00:00
|
|
|
while !self.expect_any_with_type(kets, expect) {
|
|
|
|
if let token::CloseDelim(..) | token::Eof = self.token.kind {
|
|
|
|
break;
|
|
|
|
}
|
2022-11-22 09:42:01 +00:00
|
|
|
if let Some(t) = &sep.sep {
|
2017-05-12 18:05:39 +00:00
|
|
|
if first {
|
2023-12-28 13:06:51 +00:00
|
|
|
// no separator for the first element
|
2017-05-12 18:05:39 +00:00
|
|
|
first = false;
|
|
|
|
} else {
|
2023-12-28 13:06:51 +00:00
|
|
|
// check for separator
|
2019-01-28 05:04:50 +00:00
|
|
|
match self.expect(t) {
|
2024-02-13 23:44:33 +00:00
|
|
|
Ok(Recovered::No) => {
|
2021-08-16 13:22:36 +00:00
|
|
|
self.current_closure.take();
|
|
|
|
}
|
2024-02-13 23:44:33 +00:00
|
|
|
Ok(Recovered::Yes) => {
|
2021-08-16 13:22:36 +00:00
|
|
|
self.current_closure.take();
|
2024-02-13 23:44:33 +00:00
|
|
|
recovered = Recovered::Yes;
|
2019-01-28 05:04:50 +00:00
|
|
|
break;
|
2017-10-24 13:04:01 +00:00
|
|
|
}
|
2019-11-24 21:33:00 +00:00
|
|
|
Err(mut expect_err) => {
|
2020-02-29 11:56:15 +00:00
|
|
|
let sp = self.prev_token.span.shrink_to_hi();
|
2019-11-24 21:33:00 +00:00
|
|
|
let token_str = pprust::token_kind_to_string(t);
|
|
|
|
|
2021-08-16 13:22:36 +00:00
|
|
|
match self.current_closure.take() {
|
|
|
|
Some(closure_spans) if self.token.kind == TokenKind::Semi => {
|
|
|
|
// Finding a semicolon instead of a comma
|
|
|
|
// after a closure body indicates that the
|
|
|
|
// closure body may be a block but the user
|
|
|
|
// forgot to put braces around its
|
|
|
|
// statements.
|
|
|
|
|
|
|
|
self.recover_missing_braces_around_closure_body(
|
|
|
|
closure_spans,
|
|
|
|
expect_err,
|
|
|
|
)?;
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
_ => {
|
|
|
|
// Attempt to keep parsing if it was a similar separator.
|
2022-11-22 09:42:01 +00:00
|
|
|
if let Some(tokens) = t.similar_tokens() {
|
2023-03-03 22:48:21 +00:00
|
|
|
if tokens.contains(&self.token.kind) {
|
2021-08-16 13:22:36 +00:00
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
}
|
2019-01-28 05:04:50 +00:00
|
|
|
}
|
|
|
|
}
|
2019-11-24 21:33:00 +00:00
|
|
|
|
2020-05-27 18:09:54 +00:00
|
|
|
// If this was a missing `@` in a binding pattern
|
|
|
|
// bail with a suggestion
|
|
|
|
// https://github.com/rust-lang/rust/issues/72373
|
2020-06-09 13:57:08 +00:00
|
|
|
if self.prev_token.is_ident() && self.token.kind == token::DotDot {
|
2020-05-27 18:09:54 +00:00
|
|
|
let msg = format!(
|
2023-11-06 19:56:45 +00:00
|
|
|
"if you meant to bind the contents of the rest of the array \
|
|
|
|
pattern into `{}`, use `@`",
|
2020-05-27 18:09:54 +00:00
|
|
|
pprust::token_to_string(&self.prev_token)
|
|
|
|
);
|
|
|
|
expect_err
|
2024-01-08 22:08:49 +00:00
|
|
|
.with_span_suggestion_verbose(
|
2020-05-27 18:09:54 +00:00
|
|
|
self.prev_token.span.shrink_to_hi().until(self.token.span),
|
Restrict `From<S>` for `{D,Subd}iagnosticMessage`.
Currently a `{D,Subd}iagnosticMessage` can be created from any type that
impls `Into<String>`. That includes `&str`, `String`, and `Cow<'static,
str>`, which are reasonable. It also includes `&String`, which is pretty
weird, and results in many places making unnecessary allocations for
patterns like this:
```
self.fatal(&format!(...))
```
This creates a string with `format!`, takes a reference, passes the
reference to `fatal`, which does an `into()`, which clones the
reference, doing a second allocation. Two allocations for a single
string, bleh.
This commit changes the `From` impls so that you can only create a
`{D,Subd}iagnosticMessage` from `&str`, `String`, or `Cow<'static,
str>`. This requires changing all the places that currently create one
from a `&String`. Most of these are of the `&format!(...)` form
described above; each one removes an unnecessary static `&`, plus an
allocation when executed. There are also a few places where the existing
use of `&String` was more reasonable; these now just use `clone()` at
the call site.
As well as making the code nicer and more efficient, this is a step
towards possibly using `Cow<'static, str>` in
`{D,Subd}iagnosticMessage::{Str,Eager}`. That would require changing
the `From<&'a str>` impls to `From<&'static str>`, which is doable, but
I'm not yet sure if it's worthwhile.
2023-04-20 03:26:58 +00:00
|
|
|
msg,
|
2022-04-26 05:17:33 +00:00
|
|
|
" @ ",
|
2020-05-27 18:09:54 +00:00
|
|
|
Applicability::MaybeIncorrect,
|
|
|
|
)
|
|
|
|
.emit();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-09-06 02:56:45 +00:00
|
|
|
// Attempt to keep parsing if it was an omitted separator.
|
2024-04-19 17:12:12 +00:00
|
|
|
self.last_unexpected_token_span = None;
|
2019-01-28 05:04:50 +00:00
|
|
|
match f(self) {
|
|
|
|
Ok(t) => {
|
2019-11-24 21:33:00 +00:00
|
|
|
// Parsed successfully, therefore most probably the code only
|
|
|
|
// misses a separator.
|
|
|
|
expect_err
|
2024-01-08 22:08:49 +00:00
|
|
|
.with_span_suggestion_short(
|
2020-12-19 10:29:56 +00:00
|
|
|
sp,
|
2023-07-25 20:00:13 +00:00
|
|
|
format!("missing `{token_str}`"),
|
2022-04-26 05:17:33 +00:00
|
|
|
token_str,
|
2019-11-24 21:33:00 +00:00
|
|
|
Applicability::MaybeIncorrect,
|
|
|
|
)
|
|
|
|
.emit();
|
|
|
|
|
2019-01-28 05:04:50 +00:00
|
|
|
v.push(t);
|
|
|
|
continue;
|
|
|
|
}
|
2022-01-26 03:39:14 +00:00
|
|
|
Err(e) => {
|
2019-11-24 21:33:00 +00:00
|
|
|
// Parsing failed, therefore it must be something more serious
|
|
|
|
// than just a missing separator.
|
2022-11-30 20:01:00 +00:00
|
|
|
for xx in &e.children {
|
|
|
|
// propagate the help message from sub error 'e' to main error 'expect_err;
|
|
|
|
expect_err.children.push(xx.clone());
|
|
|
|
}
|
2019-01-28 05:04:50 +00:00
|
|
|
e.cancel();
|
2022-11-16 20:46:06 +00:00
|
|
|
if self.token == token::Colon {
|
|
|
|
// we will try to recover in `maybe_recover_struct_lit_bad_delims`
|
|
|
|
return Err(expect_err);
|
2023-11-06 23:19:14 +00:00
|
|
|
} else if let [token::CloseDelim(Delimiter::Parenthesis)] = kets
|
|
|
|
{
|
|
|
|
return Err(expect_err);
|
2022-11-16 20:46:06 +00:00
|
|
|
} else {
|
|
|
|
expect_err.emit();
|
|
|
|
break;
|
|
|
|
}
|
2019-01-28 05:04:50 +00:00
|
|
|
}
|
2017-10-24 13:04:01 +00:00
|
|
|
}
|
|
|
|
}
|
2016-01-29 04:49:59 +00:00
|
|
|
}
|
|
|
|
}
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
2019-07-09 08:27:07 +00:00
|
|
|
if sep.trailing_sep_allowed && self.expect_any_with_type(kets, expect) {
|
2024-02-13 23:48:23 +00:00
|
|
|
trailing = Trailing::Yes;
|
2016-01-31 19:39:50 +00:00
|
|
|
break;
|
|
|
|
}
|
2016-01-29 04:49:59 +00:00
|
|
|
|
2017-10-22 16:19:30 +00:00
|
|
|
let t = f(self)?;
|
|
|
|
v.push(t);
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
2023-03-14 23:10:59 +00:00
|
|
|
|
2019-07-09 08:27:07 +00:00
|
|
|
Ok((v, trailing, recovered))
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2021-08-16 13:22:36 +00:00
|
|
|
fn recover_missing_braces_around_closure_body(
|
|
|
|
&mut self,
|
|
|
|
closure_spans: ClosureSpans,
|
2024-02-22 23:20:45 +00:00
|
|
|
mut expect_err: Diag<'_>,
|
2021-08-16 13:22:36 +00:00
|
|
|
) -> PResult<'a, ()> {
|
|
|
|
let initial_semicolon = self.token.span;
|
|
|
|
|
|
|
|
while self.eat(&TokenKind::Semi) {
|
2023-02-23 07:42:52 +00:00
|
|
|
let _ =
|
|
|
|
self.parse_stmt_without_recovery(false, ForceCollect::Yes).unwrap_or_else(|e| {
|
|
|
|
e.cancel();
|
|
|
|
None
|
|
|
|
});
|
2021-08-16 13:22:36 +00:00
|
|
|
}
|
|
|
|
|
2023-12-23 22:08:41 +00:00
|
|
|
expect_err
|
|
|
|
.primary_message("closure bodies that contain statements must be surrounded by braces");
|
2021-08-16 13:22:36 +00:00
|
|
|
|
|
|
|
let preceding_pipe_span = closure_spans.closing_pipe;
|
|
|
|
let following_token_span = self.token.span;
|
|
|
|
|
|
|
|
let mut first_note = MultiSpan::from(vec![initial_semicolon]);
|
|
|
|
first_note.push_span_label(
|
|
|
|
initial_semicolon,
|
2022-06-29 12:16:43 +00:00
|
|
|
"this `;` turns the preceding closure into a statement",
|
2021-08-16 13:22:36 +00:00
|
|
|
);
|
|
|
|
first_note.push_span_label(
|
|
|
|
closure_spans.body,
|
2022-06-29 12:16:43 +00:00
|
|
|
"this expression is a statement because of the trailing semicolon",
|
2021-08-16 13:22:36 +00:00
|
|
|
);
|
|
|
|
expect_err.span_note(first_note, "statement found outside of a block");
|
|
|
|
|
|
|
|
let mut second_note = MultiSpan::from(vec![closure_spans.whole_closure]);
|
2022-06-29 12:16:43 +00:00
|
|
|
second_note.push_span_label(closure_spans.whole_closure, "this is the parsed closure...");
|
2021-08-16 13:22:36 +00:00
|
|
|
second_note.push_span_label(
|
|
|
|
following_token_span,
|
2022-06-29 12:16:43 +00:00
|
|
|
"...but likely you meant the closure to end here",
|
2021-08-16 13:22:36 +00:00
|
|
|
);
|
|
|
|
expect_err.span_note(second_note, "the closure body may be incorrectly delimited");
|
|
|
|
|
2023-12-23 22:08:41 +00:00
|
|
|
expect_err.span(vec![preceding_pipe_span, following_token_span]);
|
2021-08-16 13:22:36 +00:00
|
|
|
|
|
|
|
let opening_suggestion_str = " {".to_string();
|
|
|
|
let closing_suggestion_str = "}".to_string();
|
|
|
|
|
|
|
|
expect_err.multipart_suggestion(
|
|
|
|
"try adding braces",
|
|
|
|
vec![
|
|
|
|
(preceding_pipe_span.shrink_to_hi(), opening_suggestion_str),
|
|
|
|
(following_token_span.shrink_to_lo(), closing_suggestion_str),
|
|
|
|
],
|
|
|
|
Applicability::MaybeIncorrect,
|
|
|
|
);
|
|
|
|
|
|
|
|
expect_err.emit();
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2023-12-28 13:06:51 +00:00
|
|
|
/// Parses a sequence, not including the delimiters. The function
|
2019-12-04 09:13:29 +00:00
|
|
|
/// `f` must consume tokens until reaching the next separator or
|
|
|
|
/// closing bracket.
|
|
|
|
fn parse_seq_to_before_end<T>(
|
|
|
|
&mut self,
|
|
|
|
ket: &TokenKind,
|
|
|
|
sep: SeqSep,
|
|
|
|
f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
2024-02-13 23:48:23 +00:00
|
|
|
) -> PResult<'a, (ThinVec<T>, Trailing, Recovered)> {
|
2019-12-04 09:13:29 +00:00
|
|
|
self.parse_seq_to_before_tokens(&[ket], sep, TokenExpectType::Expect, f)
|
|
|
|
}
|
|
|
|
|
2023-12-28 13:06:51 +00:00
|
|
|
/// Parses a sequence, including only the closing delimiter. The function
|
2019-12-04 09:13:29 +00:00
|
|
|
/// `f` must consume tokens until reaching the next separator or
|
|
|
|
/// closing bracket.
|
|
|
|
fn parse_seq_to_end<T>(
|
|
|
|
&mut self,
|
|
|
|
ket: &TokenKind,
|
|
|
|
sep: SeqSep,
|
|
|
|
f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
2024-02-13 23:48:23 +00:00
|
|
|
) -> PResult<'a, (ThinVec<T>, Trailing)> {
|
2019-12-04 09:13:29 +00:00
|
|
|
let (val, trailing, recovered) = self.parse_seq_to_before_end(ket, sep, f)?;
|
2024-02-13 23:44:33 +00:00
|
|
|
if matches!(recovered, Recovered::No) {
|
2019-12-04 09:13:29 +00:00
|
|
|
self.eat(ket);
|
|
|
|
}
|
|
|
|
Ok((val, trailing))
|
|
|
|
}
|
|
|
|
|
2023-12-28 13:06:51 +00:00
|
|
|
/// Parses a sequence, including both delimiters. The function
|
2019-02-08 13:53:55 +00:00
|
|
|
/// `f` must consume tokens until reaching the next separator or
|
2014-06-09 20:12:30 +00:00
|
|
|
/// closing bracket.
|
2019-07-09 08:27:07 +00:00
|
|
|
fn parse_unspanned_seq<T>(
|
2019-01-28 05:04:50 +00:00
|
|
|
&mut self,
|
2019-06-05 11:17:56 +00:00
|
|
|
bra: &TokenKind,
|
|
|
|
ket: &TokenKind,
|
2019-01-28 05:04:50 +00:00
|
|
|
sep: SeqSep,
|
2019-07-09 08:27:07 +00:00
|
|
|
f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
2024-02-13 23:48:23 +00:00
|
|
|
) -> PResult<'a, (ThinVec<T>, Trailing)> {
|
2016-03-23 03:01:37 +00:00
|
|
|
self.expect(bra)?;
|
2019-12-04 09:13:29 +00:00
|
|
|
self.parse_seq_to_end(ket, sep, f)
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2023-12-28 13:06:51 +00:00
|
|
|
/// Parses a comma-separated sequence, including both delimiters.
|
|
|
|
/// The function `f` must consume tokens until reaching the next separator or
|
|
|
|
/// closing bracket.
|
2019-07-09 08:31:24 +00:00
|
|
|
fn parse_delim_comma_seq<T>(
|
|
|
|
&mut self,
|
2022-04-26 12:40:14 +00:00
|
|
|
delim: Delimiter,
|
2019-07-09 08:31:24 +00:00
|
|
|
f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
2024-02-13 23:48:23 +00:00
|
|
|
) -> PResult<'a, (ThinVec<T>, Trailing)> {
|
2019-07-09 08:31:24 +00:00
|
|
|
self.parse_unspanned_seq(
|
|
|
|
&token::OpenDelim(delim),
|
|
|
|
&token::CloseDelim(delim),
|
|
|
|
SeqSep::trailing_allowed(token::Comma),
|
|
|
|
f,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-12-28 13:06:51 +00:00
|
|
|
/// Parses a comma-separated sequence delimited by parentheses (e.g. `(x, y)`).
|
|
|
|
/// The function `f` must consume tokens until reaching the next separator or
|
|
|
|
/// closing bracket.
|
2019-07-09 08:31:24 +00:00
|
|
|
fn parse_paren_comma_seq<T>(
|
|
|
|
&mut self,
|
|
|
|
f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
2024-02-13 23:48:23 +00:00
|
|
|
) -> PResult<'a, (ThinVec<T>, Trailing)> {
|
2022-04-26 12:40:14 +00:00
|
|
|
self.parse_delim_comma_seq(Delimiter::Parenthesis, f)
|
2019-07-09 08:31:24 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Advance the parser by one token using provided token as the next one.
|
2022-03-07 04:17:38 +00:00
|
|
|
fn bump_with(&mut self, next: (Token, Spacing)) {
|
|
|
|
self.inlined_bump_with(next)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// This always-inlined version should only be used on hot code paths.
|
|
|
|
#[inline(always)]
|
|
|
|
fn inlined_bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) {
|
2020-02-09 14:54:38 +00:00
|
|
|
// Update the current and previous tokens.
|
2020-03-07 13:34:29 +00:00
|
|
|
self.prev_token = mem::replace(&mut self.token, next_token);
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
self.token_spacing = next_spacing;
|
2016-09-16 05:46:40 +00:00
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
// Diagnostics.
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
self.expected_tokens.clear();
|
2012-01-13 08:56:53 +00:00
|
|
|
}
|
2013-07-02 19:47:32 +00:00
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Advance the parser by one token.
|
|
|
|
pub fn bump(&mut self) {
|
2022-04-20 06:34:33 +00:00
|
|
|
// Note: destructuring here would give nicer code, but it was found in #96210 to be slower
|
|
|
|
// than `.0`/`.1` access.
|
2023-07-25 23:17:32 +00:00
|
|
|
let mut next = self.token_cursor.inlined_next();
|
2023-07-31 06:15:54 +00:00
|
|
|
self.num_bump_calls += 1;
|
2022-04-20 02:22:42 +00:00
|
|
|
// We've retrieved an token from the underlying
|
|
|
|
// cursor, so we no longer need to worry about
|
|
|
|
// an unglued token. See `break_and_eat` for more details
|
2023-07-31 06:36:27 +00:00
|
|
|
self.break_last_token = false;
|
2022-04-20 06:34:33 +00:00
|
|
|
if next.0.span.is_dummy() {
|
2022-04-20 02:22:42 +00:00
|
|
|
// Tweak the location for better diagnostics, but keep syntactic context intact.
|
2022-04-20 04:04:22 +00:00
|
|
|
let fallback_span = self.token.span;
|
2022-04-20 06:34:33 +00:00
|
|
|
next.0.span = fallback_span.with_ctxt(next.0.span.ctxt());
|
2022-04-14 01:13:20 +00:00
|
|
|
}
|
2022-04-20 02:22:42 +00:00
|
|
|
debug_assert!(!matches!(
|
2022-04-20 06:34:33 +00:00
|
|
|
next.0.kind,
|
2022-04-26 12:40:14 +00:00
|
|
|
token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible)
|
2022-04-20 02:22:42 +00:00
|
|
|
));
|
2022-04-20 06:34:33 +00:00
|
|
|
self.inlined_bump_with(next)
|
2012-01-13 08:56:53 +00:00
|
|
|
}
|
2016-02-06 17:42:17 +00:00
|
|
|
|
2019-10-01 03:13:42 +00:00
|
|
|
/// Look-ahead `dist` tokens of `self.token` and get access to that token there.
|
2023-08-09 05:02:30 +00:00
|
|
|
/// When `dist == 0` then the current token is looked at. `Eof` will be
|
|
|
|
/// returned if the look-ahead is any distance past the end of the tokens.
|
2019-09-30 04:21:30 +00:00
|
|
|
pub fn look_ahead<R>(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R {
|
2019-05-23 20:10:24 +00:00
|
|
|
if dist == 0 {
|
2019-09-30 04:21:30 +00:00
|
|
|
return looker(&self.token);
|
2019-05-23 20:10:24 +00:00
|
|
|
}
|
|
|
|
|
2023-10-12 04:36:14 +00:00
|
|
|
if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last()
|
2023-02-01 01:43:13 +00:00
|
|
|
&& delim != Delimiter::Invisible
|
|
|
|
{
|
2023-08-09 05:02:30 +00:00
|
|
|
// We are not in the outermost token stream, and the token stream
|
|
|
|
// we are in has non-skipped delimiters. Look for skipped
|
|
|
|
// delimiters in the lookahead range.
|
|
|
|
let tree_cursor = &self.token_cursor.tree_cursor;
|
2021-04-12 21:26:26 +00:00
|
|
|
let all_normal = (0..dist).all(|i| {
|
2023-02-01 01:43:13 +00:00
|
|
|
let token = tree_cursor.look_ahead(i);
|
2023-10-12 04:36:14 +00:00
|
|
|
!matches!(token, Some(TokenTree::Delimited(.., Delimiter::Invisible, _)))
|
2021-04-12 21:26:26 +00:00
|
|
|
});
|
|
|
|
if all_normal {
|
2023-08-09 05:02:30 +00:00
|
|
|
// There were no skipped delimiters. Do lookahead by plain indexing.
|
2023-02-01 01:43:13 +00:00
|
|
|
return match tree_cursor.look_ahead(dist - 1) {
|
2023-08-09 05:02:30 +00:00
|
|
|
Some(tree) => {
|
|
|
|
// Indexing stayed within the current token stream.
|
|
|
|
match tree {
|
|
|
|
TokenTree::Token(token, _) => looker(token),
|
2023-10-12 04:36:14 +00:00
|
|
|
TokenTree::Delimited(dspan, _, delim, _) => {
|
2023-08-09 05:02:30 +00:00
|
|
|
looker(&Token::new(token::OpenDelim(*delim), dspan.open))
|
|
|
|
}
|
2021-04-12 21:26:26 +00:00
|
|
|
}
|
2023-08-09 05:02:30 +00:00
|
|
|
}
|
|
|
|
None => {
|
|
|
|
// Indexing went past the end of the current token
|
|
|
|
// stream. Use the close delimiter, no matter how far
|
|
|
|
// ahead `dist` went.
|
|
|
|
looker(&Token::new(token::CloseDelim(delim), span.close))
|
|
|
|
}
|
2021-04-12 21:26:26 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-09 05:02:30 +00:00
|
|
|
// We are in a more complex case. Just clone the token cursor and use
|
|
|
|
// `next`, skipping delimiters as necessary. Slow but simple.
|
2021-04-12 15:15:38 +00:00
|
|
|
let mut cursor = self.token_cursor.clone();
|
|
|
|
let mut i = 0;
|
|
|
|
let mut token = Token::dummy();
|
|
|
|
while i < dist {
|
2023-07-25 23:17:32 +00:00
|
|
|
token = cursor.next().0;
|
2021-04-12 15:15:38 +00:00
|
|
|
if matches!(
|
|
|
|
token.kind,
|
2022-04-26 12:40:14 +00:00
|
|
|
token::OpenDelim(Delimiter::Invisible) | token::CloseDelim(Delimiter::Invisible)
|
2021-04-12 15:15:38 +00:00
|
|
|
) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
i += 1;
|
2020-09-14 05:45:10 +00:00
|
|
|
}
|
2023-07-31 01:07:50 +00:00
|
|
|
looker(&token)
|
2019-05-23 20:10:24 +00:00
|
|
|
}
|
|
|
|
|
2019-05-29 15:58:44 +00:00
|
|
|
/// Returns whether any of the given keywords are `dist` tokens ahead of the current one.
|
2023-11-12 22:46:01 +00:00
|
|
|
pub(crate) fn is_keyword_ahead(&self, dist: usize, kws: &[Symbol]) -> bool {
|
2019-05-29 15:58:44 +00:00
|
|
|
self.look_ahead(dist, |t| kws.iter().any(|&kw| t.is_keyword(kw)))
|
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Parses asyncness: `async` or nothing.
|
2023-12-05 21:45:01 +00:00
|
|
|
fn parse_coroutine_kind(&mut self, case: Case) -> Option<CoroutineKind> {
|
|
|
|
let span = self.token.uninterpolated_span();
|
2022-09-15 16:27:23 +00:00
|
|
|
if self.eat_keyword_case(kw::Async, case) {
|
2023-12-05 21:45:01 +00:00
|
|
|
// FIXME(gen_blocks): Do we want to unconditionally parse `gen` and then
|
|
|
|
// error if edition <= 2024, like we do with async and edition <= 2018?
|
|
|
|
if self.token.uninterpolated_span().at_least_rust_2024()
|
|
|
|
&& self.eat_keyword_case(kw::Gen, case)
|
|
|
|
{
|
|
|
|
let gen_span = self.prev_token.uninterpolated_span();
|
|
|
|
Some(CoroutineKind::AsyncGen {
|
|
|
|
span: span.to(gen_span),
|
|
|
|
closure_id: DUMMY_NODE_ID,
|
|
|
|
return_impl_trait_id: DUMMY_NODE_ID,
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
Some(CoroutineKind::Async {
|
|
|
|
span,
|
|
|
|
closure_id: DUMMY_NODE_ID,
|
|
|
|
return_impl_trait_id: DUMMY_NODE_ID,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
} else if self.token.uninterpolated_span().at_least_rust_2024()
|
|
|
|
&& self.eat_keyword_case(kw::Gen, case)
|
|
|
|
{
|
2023-12-01 00:39:56 +00:00
|
|
|
Some(CoroutineKind::Gen {
|
2023-11-30 22:54:39 +00:00
|
|
|
span,
|
|
|
|
closure_id: DUMMY_NODE_ID,
|
|
|
|
return_impl_trait_id: DUMMY_NODE_ID,
|
2023-12-01 00:39:56 +00:00
|
|
|
})
|
2023-10-05 11:30:55 +00:00
|
|
|
} else {
|
2023-12-01 00:39:56 +00:00
|
|
|
None
|
2023-10-05 11:30:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Parses unsafety: `unsafe` or nothing.
|
2022-09-15 16:27:23 +00:00
|
|
|
fn parse_unsafety(&mut self, case: Case) -> Unsafe {
|
|
|
|
if self.eat_keyword_case(kw::Unsafe, case) {
|
2020-03-04 21:34:57 +00:00
|
|
|
Unsafe::Yes(self.prev_token.uninterpolated_span())
|
2020-02-29 11:59:37 +00:00
|
|
|
} else {
|
|
|
|
Unsafe::No
|
|
|
|
}
|
2020-01-30 01:42:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Parses constness: `const` or nothing.
|
2022-09-15 16:27:23 +00:00
|
|
|
fn parse_constness(&mut self, case: Case) -> Const {
|
2023-02-01 05:55:48 +00:00
|
|
|
self.parse_constness_(case, false)
|
|
|
|
}
|
|
|
|
|
2023-03-11 21:29:15 +00:00
|
|
|
/// Parses constness for closures (case sensitive, feature-gated)
|
|
|
|
fn parse_closure_constness(&mut self) -> Const {
|
|
|
|
let constness = self.parse_constness_(Case::Sensitive, true);
|
|
|
|
if let Const::Yes(span) = constness {
|
2024-03-04 05:31:49 +00:00
|
|
|
self.psess.gated_spans.gate(sym::const_closures, span);
|
2023-03-11 21:29:15 +00:00
|
|
|
}
|
|
|
|
constness
|
2023-02-01 05:55:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_constness_(&mut self, case: Case, is_closure: bool) -> Const {
|
|
|
|
// Avoid const blocks and const closures to be parsed as const items
|
|
|
|
if (self.check_const_closure() == is_closure)
|
2023-07-17 19:02:06 +00:00
|
|
|
&& !self
|
|
|
|
.look_ahead(1, |t| *t == token::OpenDelim(Delimiter::Brace) || t.is_whole_block())
|
2022-09-15 16:27:23 +00:00
|
|
|
&& self.eat_keyword_case(kw::Const, case)
|
2020-09-21 20:55:58 +00:00
|
|
|
{
|
2020-03-04 21:34:57 +00:00
|
|
|
Const::Yes(self.prev_token.uninterpolated_span())
|
2020-02-29 11:59:37 +00:00
|
|
|
} else {
|
|
|
|
Const::No
|
|
|
|
}
|
2013-02-01 01:12:29 +00:00
|
|
|
}
|
2012-11-05 04:41:00 +00:00
|
|
|
|
2020-09-21 20:55:58 +00:00
|
|
|
/// Parses inline const expressions.
|
2021-11-22 16:25:28 +00:00
|
|
|
fn parse_const_block(&mut self, span: Span, pat: bool) -> PResult<'a, P<Expr>> {
|
|
|
|
if pat {
|
2024-03-04 05:31:49 +00:00
|
|
|
self.psess.gated_spans.gate(sym::inline_const_pat, span);
|
2021-11-22 16:25:28 +00:00
|
|
|
}
|
2020-09-21 20:55:58 +00:00
|
|
|
self.eat_keyword(kw::Const);
|
2022-03-16 00:20:21 +00:00
|
|
|
let (attrs, blk) = self.parse_inner_attrs_and_block()?;
|
2020-09-21 20:55:58 +00:00
|
|
|
let anon_const = AnonConst {
|
|
|
|
id: DUMMY_NODE_ID,
|
2022-08-14 23:58:38 +00:00
|
|
|
value: self.mk_expr(blk.span, ExprKind::Block(blk, None)),
|
2020-09-21 20:55:58 +00:00
|
|
|
};
|
2020-11-07 20:33:34 +00:00
|
|
|
let blk_span = anon_const.value.span;
|
2022-12-18 14:01:26 +00:00
|
|
|
Ok(self.mk_expr_with_attrs(span.to(blk_span), ExprKind::ConstBlock(anon_const), attrs))
|
2020-09-21 20:55:58 +00:00
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Parses mutability (`mut` or nothing).
|
2017-03-16 21:47:32 +00:00
|
|
|
fn parse_mutability(&mut self) -> Mutability {
|
2019-05-11 14:41:37 +00:00
|
|
|
if self.eat_keyword(kw::Mut) { Mutability::Mut } else { Mutability::Not }
|
2012-05-23 22:06:11 +00:00
|
|
|
}
|
2012-04-24 22:52:52 +00:00
|
|
|
|
2024-03-24 01:04:45 +00:00
|
|
|
/// Parses reference binding mode (`ref`, `ref mut`, or nothing).
|
|
|
|
fn parse_byref(&mut self) -> ByRef {
|
|
|
|
if self.eat_keyword(kw::Ref) { ByRef::Yes(self.parse_mutability()) } else { ByRef::No }
|
|
|
|
}
|
|
|
|
|
2019-09-30 00:36:08 +00:00
|
|
|
/// Possibly parses mutability (`const` or `mut`).
|
|
|
|
fn parse_const_or_mut(&mut self) -> Option<Mutability> {
|
|
|
|
if self.eat_keyword(kw::Mut) {
|
2019-12-16 16:28:40 +00:00
|
|
|
Some(Mutability::Mut)
|
2019-09-30 00:36:08 +00:00
|
|
|
} else if self.eat_keyword(kw::Const) {
|
2019-12-16 16:28:40 +00:00
|
|
|
Some(Mutability::Not)
|
2019-09-30 00:36:08 +00:00
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-31 22:53:30 +00:00
|
|
|
fn parse_field_name(&mut self) -> PResult<'a, Ident> {
|
2019-06-05 11:17:56 +00:00
|
|
|
if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) = self.token.kind
|
|
|
|
{
|
2022-09-15 08:12:09 +00:00
|
|
|
if let Some(suffix) = suffix {
|
|
|
|
self.expect_no_tuple_index_suffix(self.token.span, suffix);
|
|
|
|
}
|
2016-07-29 20:47:55 +00:00
|
|
|
self.bump();
|
2020-02-29 11:56:15 +00:00
|
|
|
Ok(Ident::new(symbol, self.prev_token.span))
|
2016-07-29 20:47:55 +00:00
|
|
|
} else {
|
2021-01-21 02:49:11 +00:00
|
|
|
self.parse_ident_common(true)
|
2016-07-29 20:47:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-18 00:24:21 +00:00
|
|
|
fn parse_delim_args(&mut self) -> PResult<'a, P<DelimArgs>> {
|
2024-03-15 11:36:21 +00:00
|
|
|
if let Some(args) = self.parse_delim_args_inner() {
|
|
|
|
Ok(P(args))
|
|
|
|
} else {
|
|
|
|
self.unexpected_any()
|
|
|
|
}
|
2019-11-30 23:25:32 +00:00
|
|
|
}
|
|
|
|
|
2022-11-18 00:24:21 +00:00
|
|
|
fn parse_attr_args(&mut self) -> PResult<'a, AttrArgs> {
|
|
|
|
Ok(if let Some(args) = self.parse_delim_args_inner() {
|
|
|
|
AttrArgs::Delimited(args)
|
|
|
|
} else {
|
|
|
|
if self.eat(&token::Eq) {
|
|
|
|
let eq_span = self.prev_token.span;
|
|
|
|
AttrArgs::Eq(eq_span, AttrArgsEq::Ast(self.parse_expr_force_collect()?))
|
|
|
|
} else {
|
|
|
|
AttrArgs::Empty
|
|
|
|
}
|
|
|
|
})
|
2019-11-30 23:25:32 +00:00
|
|
|
}
|
|
|
|
|
2022-11-18 00:24:21 +00:00
|
|
|
fn parse_delim_args_inner(&mut self) -> Option<DelimArgs> {
|
2023-02-15 11:43:41 +00:00
|
|
|
let delimited = self.check(&token::OpenDelim(Delimiter::Parenthesis))
|
2022-11-18 00:24:21 +00:00
|
|
|
|| self.check(&token::OpenDelim(Delimiter::Bracket))
|
2023-02-15 11:43:41 +00:00
|
|
|
|| self.check(&token::OpenDelim(Delimiter::Brace));
|
|
|
|
|
|
|
|
delimited.then(|| {
|
2023-10-12 04:36:14 +00:00
|
|
|
let TokenTree::Delimited(dspan, _, delim, tokens) = self.parse_token_tree() else {
|
2023-02-15 11:43:41 +00:00
|
|
|
unreachable!()
|
|
|
|
};
|
2023-08-01 23:56:26 +00:00
|
|
|
DelimArgs { dspan, delim, tokens }
|
2023-02-15 11:43:41 +00:00
|
|
|
})
|
2014-10-29 14:47:53 +00:00
|
|
|
}
|
|
|
|
|
2019-09-30 04:21:30 +00:00
|
|
|
fn parse_or_use_outer_attributes(
|
|
|
|
&mut self,
|
2021-01-22 18:28:08 +00:00
|
|
|
already_parsed_attrs: Option<AttrWrapper>,
|
|
|
|
) -> PResult<'a, AttrWrapper> {
|
2019-08-11 11:14:30 +00:00
|
|
|
if let Some(attrs) = already_parsed_attrs {
|
|
|
|
Ok(attrs)
|
|
|
|
} else {
|
2021-01-22 18:28:08 +00:00
|
|
|
self.parse_outer_attributes()
|
2019-06-07 02:08:38 +00:00
|
|
|
}
|
2019-08-11 11:14:30 +00:00
|
|
|
}
|
2019-06-07 02:08:38 +00:00
|
|
|
|
2019-08-11 11:14:30 +00:00
|
|
|
/// Parses a single token tree from the input.
|
2023-07-31 05:51:08 +00:00
|
|
|
pub fn parse_token_tree(&mut self) -> TokenTree {
|
2019-08-11 11:14:30 +00:00
|
|
|
match self.token.kind {
|
|
|
|
token::OpenDelim(..) => {
|
2023-02-01 01:43:13 +00:00
|
|
|
// Grab the tokens within the delimiters.
|
2023-07-31 06:57:35 +00:00
|
|
|
let stream = self.token_cursor.tree_cursor.stream.clone();
|
2023-10-12 04:36:14 +00:00
|
|
|
let (_, span, spacing, delim) = *self.token_cursor.stack.last().unwrap();
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
|
2022-04-21 02:26:58 +00:00
|
|
|
// Advance the token cursor through the entire delimited
|
|
|
|
// sequence. After getting the `OpenDelim` we are *within* the
|
|
|
|
// delimited sequence, i.e. at depth `d`. After getting the
|
|
|
|
// matching `CloseDelim` we are *after* the delimited sequence,
|
|
|
|
// i.e. at depth `d - 1`.
|
|
|
|
let target_depth = self.token_cursor.stack.len() - 1;
|
|
|
|
loop {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
// Advance one token at a time, so `TokenCursor::next()`
|
|
|
|
// can capture these tokens if necessary.
|
|
|
|
self.bump();
|
2022-04-21 02:26:58 +00:00
|
|
|
if self.token_cursor.stack.len() == target_depth {
|
|
|
|
debug_assert!(matches!(self.token.kind, token::CloseDelim(_)));
|
|
|
|
break;
|
|
|
|
}
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
}
|
2022-04-21 02:26:58 +00:00
|
|
|
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
// Consume close delimiter
|
2019-08-11 11:14:30 +00:00
|
|
|
self.bump();
|
2023-10-12 04:36:14 +00:00
|
|
|
TokenTree::Delimited(span, spacing, delim, stream)
|
2019-08-11 11:14:30 +00:00
|
|
|
}
|
|
|
|
token::CloseDelim(_) | token::Eof => unreachable!(),
|
|
|
|
_ => {
|
2023-08-08 01:43:44 +00:00
|
|
|
let prev_spacing = self.token_spacing;
|
2019-08-11 11:14:30 +00:00
|
|
|
self.bump();
|
2023-08-08 01:43:44 +00:00
|
|
|
TokenTree::Token(self.prev_token.clone(), prev_spacing)
|
2014-07-06 21:29:29 +00:00
|
|
|
}
|
2012-05-23 22:06:11 +00:00
|
|
|
}
|
2015-11-03 16:39:51 +00:00
|
|
|
}
|
|
|
|
|
2019-08-11 11:14:30 +00:00
|
|
|
pub fn parse_tokens(&mut self) -> TokenStream {
|
|
|
|
let mut result = Vec::new();
|
|
|
|
loop {
|
|
|
|
match self.token.kind {
|
|
|
|
token::Eof | token::CloseDelim(..) => break,
|
Remove `TreeAndSpacing`.
A `TokenStream` contains a `Lrc<Vec<(TokenTree, Spacing)>>`. But this is
not quite right. `Spacing` makes sense for `TokenTree::Token`, but does
not make sense for `TokenTree::Delimited`, because a
`TokenTree::Delimited` cannot be joined with another `TokenTree`.
This commit fixes this problem, by adding `Spacing` to `TokenTree::Token`,
changing `TokenStream` to contain a `Lrc<Vec<TokenTree>>`, and removing the
`TreeAndSpacing` typedef.
The commit removes these two impls:
- `impl From<TokenTree> for TokenStream`
- `impl From<TokenTree> for TreeAndSpacing`
These were useful, but also resulted in code with many `.into()` calls
that was hard to read, particularly for anyone not highly familiar with
the relevant types. This commit makes some other changes to compensate:
- `TokenTree::token()` becomes `TokenTree::token_{alone,joint}()`.
- `TokenStream::token_{alone,joint}()` are added.
- `TokenStream::delimited` is added.
This results in things like this:
```rust
TokenTree::token(token::Semi, stmt.span).into()
```
changing to this:
```rust
TokenStream::token_alone(token::Semi, stmt.span)
```
This makes the type of the result, and its spacing, clearer.
These changes also simplifies `Cursor` and `CursorRef`, because they no longer
need to distinguish between `next` and `next_with_spacing`.
2022-07-28 00:31:04 +00:00
|
|
|
_ => result.push(self.parse_token_tree()),
|
2019-04-15 00:09:03 +00:00
|
|
|
}
|
|
|
|
}
|
2019-08-11 11:14:30 +00:00
|
|
|
TokenStream::new(result)
|
2019-04-15 00:09:03 +00:00
|
|
|
}
|
|
|
|
|
2019-08-11 11:14:30 +00:00
|
|
|
/// Evaluates the closure with restrictions in place.
|
|
|
|
///
|
|
|
|
/// Afters the closure is evaluated, restrictions are reset.
|
2019-09-30 01:29:41 +00:00
|
|
|
fn with_res<T>(&mut self, res: Restrictions, f: impl FnOnce(&mut Self) -> T) -> T {
|
2019-08-11 11:14:30 +00:00
|
|
|
let old = self.restrictions;
|
2019-09-30 01:29:41 +00:00
|
|
|
self.restrictions = res;
|
|
|
|
let res = f(self);
|
2019-08-11 11:14:30 +00:00
|
|
|
self.restrictions = old;
|
2019-09-30 01:29:41 +00:00
|
|
|
res
|
2019-08-11 11:14:30 +00:00
|
|
|
}
|
2011-01-01 01:28:43 +00:00
|
|
|
|
2022-05-21 18:45:14 +00:00
|
|
|
/// Parses `pub` and `pub(in path)` plus shortcuts `pub(crate)` for `pub(in crate)`, `pub(self)`
|
|
|
|
/// for `pub(in self)` and `pub(super)` for `pub(in super)`.
|
2019-08-11 16:34:42 +00:00
|
|
|
/// If the following element can't be a tuple (i.e., it's a function definition), then
|
2021-10-17 10:04:01 +00:00
|
|
|
/// it's not a tuple struct field), and the contents within the parentheses aren't valid,
|
2019-08-11 16:34:42 +00:00
|
|
|
/// so emit a proper diagnostic.
|
2020-08-30 18:04:36 +00:00
|
|
|
// Public for rustfmt usage.
|
|
|
|
pub fn parse_visibility(&mut self, fbt: FollowedByType) -> PResult<'a, Visibility> {
|
2024-03-19 20:18:38 +00:00
|
|
|
maybe_whole!(self, NtVis, |vis| vis.into_inner());
|
2012-08-16 00:10:23 +00:00
|
|
|
|
2019-08-11 16:34:42 +00:00
|
|
|
if !self.eat_keyword(kw::Pub) {
|
|
|
|
// We need a span for our `Spanned<VisibilityKind>`, but there's inherently no
|
|
|
|
// keyword to grab a span from for inherited visibility; an empty span at the
|
|
|
|
// beginning of the current token would seem to be the "Schelling span".
|
2020-08-21 23:11:00 +00:00
|
|
|
return Ok(Visibility {
|
|
|
|
span: self.token.span.shrink_to_lo(),
|
|
|
|
kind: VisibilityKind::Inherited,
|
|
|
|
tokens: None,
|
|
|
|
});
|
2019-08-11 16:34:42 +00:00
|
|
|
}
|
2020-02-29 11:56:15 +00:00
|
|
|
let lo = self.prev_token.span;
|
2017-03-07 23:50:13 +00:00
|
|
|
|
2022-04-26 12:40:14 +00:00
|
|
|
if self.check(&token::OpenDelim(Delimiter::Parenthesis)) {
|
2017-03-18 04:13:00 +00:00
|
|
|
// We don't `self.bump()` the `(` yet because this might be a struct definition where
|
|
|
|
// `()` or a tuple might be allowed. For example, `struct Struct(pub (), pub (usize));`.
|
|
|
|
// Because of this, we only `bump` the `(` if we're assured it is appropriate to do so
|
|
|
|
// by the following tokens.
|
2022-05-21 18:45:14 +00:00
|
|
|
if self.is_keyword_ahead(1, &[kw::In]) {
|
2019-10-01 03:53:23 +00:00
|
|
|
// Parse `pub(in path)`.
|
|
|
|
self.bump(); // `(`
|
|
|
|
self.bump(); // `in`
|
|
|
|
let path = self.parse_path(PathStyle::Mod)?; // `path`
|
2022-04-26 12:40:14 +00:00
|
|
|
self.expect(&token::CloseDelim(Delimiter::Parenthesis))?; // `)`
|
2022-08-10 03:31:45 +00:00
|
|
|
let vis = VisibilityKind::Restricted {
|
|
|
|
path: P(path),
|
|
|
|
id: ast::DUMMY_NODE_ID,
|
|
|
|
shorthand: false,
|
|
|
|
};
|
2020-08-21 23:11:00 +00:00
|
|
|
return Ok(Visibility {
|
|
|
|
span: lo.to(self.prev_token.span),
|
|
|
|
kind: vis,
|
|
|
|
tokens: None,
|
|
|
|
});
|
2022-04-26 12:40:14 +00:00
|
|
|
} else if self.look_ahead(2, |t| t == &token::CloseDelim(Delimiter::Parenthesis))
|
2022-05-21 18:45:14 +00:00
|
|
|
&& self.is_keyword_ahead(1, &[kw::Crate, kw::Super, kw::SelfLower])
|
2018-01-27 07:13:50 +00:00
|
|
|
{
|
2022-05-21 18:45:14 +00:00
|
|
|
// Parse `pub(crate)`, `pub(self)`, or `pub(super)`.
|
2019-10-01 03:53:23 +00:00
|
|
|
self.bump(); // `(`
|
2022-05-21 18:45:14 +00:00
|
|
|
let path = self.parse_path(PathStyle::Mod)?; // `crate`/`super`/`self`
|
2022-04-26 12:40:14 +00:00
|
|
|
self.expect(&token::CloseDelim(Delimiter::Parenthesis))?; // `)`
|
2022-08-10 03:31:45 +00:00
|
|
|
let vis = VisibilityKind::Restricted {
|
|
|
|
path: P(path),
|
|
|
|
id: ast::DUMMY_NODE_ID,
|
|
|
|
shorthand: true,
|
|
|
|
};
|
2020-08-21 23:11:00 +00:00
|
|
|
return Ok(Visibility {
|
|
|
|
span: lo.to(self.prev_token.span),
|
|
|
|
kind: vis,
|
|
|
|
tokens: None,
|
|
|
|
});
|
2019-11-07 10:26:36 +00:00
|
|
|
} else if let FollowedByType::No = fbt {
|
|
|
|
// Provide this diagnostic if a type cannot follow;
|
|
|
|
// in particular, if this is not a tuple struct.
|
2019-09-30 04:42:56 +00:00
|
|
|
self.recover_incorrect_vis_restriction()?;
|
|
|
|
// Emit diagnostic, but continue with public visibility.
|
2016-04-23 05:40:55 +00:00
|
|
|
}
|
2016-04-11 00:39:35 +00:00
|
|
|
}
|
2017-03-07 23:50:13 +00:00
|
|
|
|
2020-08-21 23:11:00 +00:00
|
|
|
Ok(Visibility { span: lo, kind: VisibilityKind::Public, tokens: None })
|
2012-02-23 05:47:23 +00:00
|
|
|
}
|
2013-03-22 19:56:10 +00:00
|
|
|
|
2019-09-30 04:42:56 +00:00
|
|
|
/// Recovery for e.g. `pub(something) fn ...` or `struct X { pub(something) y: Z }`
|
|
|
|
fn recover_incorrect_vis_restriction(&mut self) -> PResult<'a, ()> {
|
|
|
|
self.bump(); // `(`
|
|
|
|
let path = self.parse_path(PathStyle::Mod)?;
|
2022-04-26 12:40:14 +00:00
|
|
|
self.expect(&token::CloseDelim(Delimiter::Parenthesis))?; // `)`
|
2019-09-30 04:42:56 +00:00
|
|
|
|
2019-10-08 20:17:46 +00:00
|
|
|
let path_str = pprust::path_to_string(&path);
|
2023-12-18 10:14:02 +00:00
|
|
|
self.dcx()
|
|
|
|
.emit_err(IncorrectVisibilityRestriction { span: path.span, inner_str: path_str });
|
2019-09-30 04:42:56 +00:00
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2019-10-27 23:29:23 +00:00
|
|
|
/// Parses `extern string_literal?`.
|
2022-09-15 16:27:23 +00:00
|
|
|
fn parse_extern(&mut self, case: Case) -> Extern {
|
|
|
|
if self.eat_keyword_case(kw::Extern, case) {
|
2022-07-02 17:25:55 +00:00
|
|
|
let mut extern_span = self.prev_token.span;
|
|
|
|
let abi = self.parse_abi();
|
|
|
|
if let Some(abi) = abi {
|
|
|
|
extern_span = extern_span.to(abi.span);
|
|
|
|
}
|
|
|
|
Extern::from_abi(abi, extern_span)
|
|
|
|
} else {
|
|
|
|
Extern::None
|
|
|
|
}
|
2019-09-29 23:22:18 +00:00
|
|
|
}
|
|
|
|
|
2019-10-27 23:29:23 +00:00
|
|
|
/// Parses a string literal as an ABI spec.
|
2019-11-10 14:04:12 +00:00
|
|
|
fn parse_abi(&mut self) -> Option<StrLit> {
|
|
|
|
match self.parse_str_lit() {
|
|
|
|
Ok(str_lit) => Some(str_lit),
|
|
|
|
Err(Some(lit)) => match lit.kind {
|
2024-02-14 09:12:05 +00:00
|
|
|
ast::LitKind::Err(_) => None,
|
2019-10-27 23:29:23 +00:00
|
|
|
_ => {
|
2023-12-18 10:14:02 +00:00
|
|
|
self.dcx().emit_err(NonStringAbiLiteral { span: lit.span });
|
2019-11-10 14:04:12 +00:00
|
|
|
None
|
2019-10-27 23:29:23 +00:00
|
|
|
}
|
2013-03-14 02:25:28 +00:00
|
|
|
},
|
2019-11-10 14:04:12 +00:00
|
|
|
Err(None) => None,
|
2019-11-09 19:05:20 +00:00
|
|
|
}
|
2013-03-14 02:25:28 +00:00
|
|
|
}
|
|
|
|
|
2022-05-01 17:58:24 +00:00
|
|
|
pub fn collect_tokens_no_attrs<R: HasAttrs + HasTokens>(
|
2021-01-14 15:42:01 +00:00
|
|
|
&mut self,
|
|
|
|
f: impl FnOnce(&mut Self) -> PResult<'a, R>,
|
|
|
|
) -> PResult<'a, R> {
|
2021-01-22 18:28:08 +00:00
|
|
|
// The only reason to call `collect_tokens_no_attrs` is if you want tokens, so use
|
|
|
|
// `ForceCollect::Yes`
|
|
|
|
self.collect_tokens_trailing_token(
|
2021-02-13 17:42:43 +00:00
|
|
|
AttrWrapper::empty(),
|
2021-01-22 18:28:08 +00:00
|
|
|
ForceCollect::Yes,
|
|
|
|
|this, _attrs| Ok((f(this)?, TrailingToken::None)),
|
|
|
|
)
|
2021-01-14 15:42:01 +00:00
|
|
|
}
|
|
|
|
|
2018-03-10 15:44:44 +00:00
|
|
|
/// `::{` or `::*`
|
|
|
|
fn is_import_coupler(&mut self) -> bool {
|
2024-04-04 17:03:32 +00:00
|
|
|
self.check(&token::PathSep)
|
2018-03-10 15:44:44 +00:00
|
|
|
&& self.look_ahead(1, |t| {
|
2022-04-26 12:40:14 +00:00
|
|
|
*t == token::OpenDelim(Delimiter::Brace) || *t == token::BinOp(token::Star)
|
2018-03-10 15:44:44 +00:00
|
|
|
})
|
2016-04-17 00:48:40 +00:00
|
|
|
}
|
2020-08-31 09:45:50 +00:00
|
|
|
|
|
|
|
pub fn clear_expected_tokens(&mut self) {
|
|
|
|
self.expected_tokens.clear();
|
|
|
|
}
|
2022-12-11 20:46:30 +00:00
|
|
|
|
|
|
|
pub fn approx_token_stream_pos(&self) -> usize {
|
2023-07-31 06:15:54 +00:00
|
|
|
self.num_bump_calls
|
2022-12-11 20:46:30 +00:00
|
|
|
}
|
2011-01-11 02:18:16 +00:00
|
|
|
}
|
2019-02-05 09:35:25 +00:00
|
|
|
|
2022-05-20 23:51:09 +00:00
|
|
|
pub(crate) fn make_unclosed_delims_error(
|
2023-02-21 14:51:19 +00:00
|
|
|
unmatched: UnmatchedDelim,
|
2024-03-04 05:31:49 +00:00
|
|
|
psess: &ParseSess,
|
2024-02-22 23:20:45 +00:00
|
|
|
) -> Option<Diag<'_>> {
|
2019-10-26 01:30:02 +00:00
|
|
|
// `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to
|
2023-02-21 14:51:19 +00:00
|
|
|
// `unmatched_delims` only for error recovery in the `Parser`.
|
2019-10-26 01:30:02 +00:00
|
|
|
let found_delim = unmatched.found_delim?;
|
2022-08-24 20:41:51 +00:00
|
|
|
let mut spans = vec![unmatched.found_span];
|
2019-10-26 01:30:02 +00:00
|
|
|
if let Some(sp) = unmatched.unclosed_span {
|
2022-08-24 20:41:51 +00:00
|
|
|
spans.push(sp);
|
|
|
|
};
|
2024-03-04 05:31:49 +00:00
|
|
|
let err = psess.dcx.create_err(MismatchedClosingDelimiter {
|
2022-08-24 20:41:51 +00:00
|
|
|
spans,
|
|
|
|
delimiter: pprust::token_kind_to_string(&token::CloseDelim(found_delim)).to_string(),
|
|
|
|
unmatched: unmatched.found_span,
|
|
|
|
opening_candidate: unmatched.candidate_span,
|
|
|
|
unclosed: unmatched.unclosed_span,
|
2023-12-18 03:00:17 +00:00
|
|
|
});
|
2019-10-26 01:30:02 +00:00
|
|
|
Some(err)
|
|
|
|
}
|
|
|
|
|
2022-09-09 02:44:05 +00:00
|
|
|
/// A helper struct used when building an `AttrTokenStream` from
|
2022-09-09 07:15:53 +00:00
|
|
|
/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens
|
2020-11-28 23:33:17 +00:00
|
|
|
/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
|
2022-09-09 02:44:05 +00:00
|
|
|
/// is then 'parsed' to build up an `AttrTokenStream` with nested
|
|
|
|
/// `AttrTokenTree::Delimited` tokens.
|
2020-11-28 23:33:17 +00:00
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub enum FlatToken {
|
|
|
|
/// A token - this holds both delimiter (e.g. '{' and '}')
|
|
|
|
/// and non-delimiter tokens
|
|
|
|
Token(Token),
|
|
|
|
/// Holds the `AttributesData` for an AST node. The
|
|
|
|
/// `AttributesData` is inserted directly into the
|
2022-09-09 02:44:05 +00:00
|
|
|
/// constructed `AttrTokenStream` as
|
|
|
|
/// an `AttrTokenTree::Attributes`.
|
2020-11-28 23:33:17 +00:00
|
|
|
AttrTarget(AttributesData),
|
|
|
|
/// A special 'empty' token that is ignored during the conversion
|
2022-09-09 02:44:05 +00:00
|
|
|
/// to an `AttrTokenStream`. This is used to simplify the
|
2020-11-28 23:33:17 +00:00
|
|
|
/// handling of replace ranges.
|
|
|
|
Empty,
|
|
|
|
}
|
2022-03-25 01:39:12 +00:00
|
|
|
|
2024-03-19 02:33:33 +00:00
|
|
|
// Metavar captures of various kinds.
|
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
pub enum ParseNtResult<NtType> {
|
2022-03-25 01:39:12 +00:00
|
|
|
Tt(TokenTree),
|
2024-03-19 02:33:33 +00:00
|
|
|
Nt(NtType),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<T> ParseNtResult<T> {
|
|
|
|
pub fn map_nt<F, U>(self, mut f: F) -> ParseNtResult<U>
|
|
|
|
where
|
|
|
|
F: FnMut(T) -> U,
|
|
|
|
{
|
|
|
|
match self {
|
|
|
|
ParseNtResult::Tt(tt) => ParseNtResult::Tt(tt),
|
|
|
|
ParseNtResult::Nt(nt) => ParseNtResult::Nt(f(nt)),
|
|
|
|
}
|
|
|
|
}
|
2022-03-25 01:39:12 +00:00
|
|
|
}
|