2019-10-16 11:23:46 +00:00
|
|
|
pub mod attr;
|
2020-08-02 12:05:19 +00:00
|
|
|
mod diagnostics;
|
2019-08-11 11:14:30 +00:00
|
|
|
mod expr;
|
2020-08-02 12:05:19 +00:00
|
|
|
mod generics;
|
2019-08-11 16:34:42 +00:00
|
|
|
mod item;
|
2020-07-27 12:04:54 +00:00
|
|
|
mod nonterminal;
|
2019-12-22 22:42:04 +00:00
|
|
|
mod pat;
|
2019-08-11 17:59:27 +00:00
|
|
|
mod path;
|
2019-12-22 22:42:04 +00:00
|
|
|
mod stmt;
|
2020-08-02 12:05:19 +00:00
|
|
|
mod ty;
|
2020-07-27 12:04:54 +00:00
|
|
|
|
2019-10-15 20:48:13 +00:00
|
|
|
use crate::lexer::UnmatchedBrace;
|
2020-08-12 22:39:15 +00:00
|
|
|
pub use diagnostics::AttemptLocalParseRecovery;
|
2020-08-02 12:05:19 +00:00
|
|
|
use diagnostics::Error;
|
|
|
|
pub use path::PathStyle;
|
2019-10-15 20:48:13 +00:00
|
|
|
|
2020-02-29 17:37:32 +00:00
|
|
|
use rustc_ast::ptr::P;
|
|
|
|
use rustc_ast::token::{self, DelimToken, Token, TokenKind};
|
2020-10-30 21:40:41 +00:00
|
|
|
use rustc_ast::tokenstream::{self, DelimSpan, LazyTokenStream, Spacing};
|
2020-12-12 20:20:22 +00:00
|
|
|
use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing};
|
2020-04-27 17:56:11 +00:00
|
|
|
use rustc_ast::DUMMY_NODE_ID;
|
2021-01-13 21:28:57 +00:00
|
|
|
use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, CrateSugar, Extern, HasTokens};
|
|
|
|
use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit, Unsafe};
|
2020-09-21 20:55:58 +00:00
|
|
|
use rustc_ast::{Visibility, VisibilityKind};
|
2020-01-11 16:02:46 +00:00
|
|
|
use rustc_ast_pretty::pprust;
|
2020-11-07 13:09:40 +00:00
|
|
|
use rustc_data_structures::sync::Lrc;
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
use rustc_errors::PResult;
|
|
|
|
use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError};
|
2020-01-11 14:03:15 +00:00
|
|
|
use rustc_session::parse::ParseSess;
|
2020-08-21 23:11:00 +00:00
|
|
|
use rustc_span::source_map::{Span, DUMMY_SP};
|
2020-04-19 11:00:18 +00:00
|
|
|
use rustc_span::symbol::{kw, sym, Ident, Symbol};
|
2020-08-14 06:05:01 +00:00
|
|
|
use tracing::debug;
|
2012-12-23 22:41:37 +00:00
|
|
|
|
2019-12-22 22:42:04 +00:00
|
|
|
use std::{cmp, mem, slice};
|
2018-07-03 17:38:14 +00:00
|
|
|
|
2019-02-06 17:33:01 +00:00
|
|
|
bitflags::bitflags! {
|
2018-05-31 22:53:30 +00:00
|
|
|
struct Restrictions: u8 {
|
2017-09-08 19:08:01 +00:00
|
|
|
const STMT_EXPR = 1 << 0;
|
|
|
|
const NO_STRUCT_LITERAL = 1 << 1;
|
2020-10-03 18:30:32 +00:00
|
|
|
const CONST_EXPR = 1 << 2;
|
2014-09-16 05:22:12 +00:00
|
|
|
}
|
2011-12-21 04:12:52 +00:00
|
|
|
}
|
2011-01-24 23:26:10 +00:00
|
|
|
|
2018-03-20 22:58:25 +00:00
|
|
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
2019-10-08 07:35:34 +00:00
|
|
|
enum SemiColonMode {
|
2016-02-10 03:11:27 +00:00
|
|
|
Break,
|
|
|
|
Ignore,
|
2019-01-20 08:37:06 +00:00
|
|
|
Comma,
|
2016-02-10 03:11:27 +00:00
|
|
|
}
|
|
|
|
|
2018-03-20 22:58:25 +00:00
|
|
|
#[derive(Clone, Copy, PartialEq, Debug)]
|
2019-10-08 07:35:34 +00:00
|
|
|
enum BlockMode {
|
2017-04-13 19:37:05 +00:00
|
|
|
Break,
|
|
|
|
Ignore,
|
|
|
|
}
|
|
|
|
|
2021-01-18 21:47:37 +00:00
|
|
|
/// Whether or not we should force collection of tokens for an AST node,
|
|
|
|
/// regardless of whether or not it has attributes
|
|
|
|
pub enum ForceCollect {
|
|
|
|
Yes,
|
|
|
|
No,
|
|
|
|
}
|
|
|
|
|
2021-01-14 15:42:01 +00:00
|
|
|
pub enum TrailingToken {
|
|
|
|
None,
|
|
|
|
Semi,
|
|
|
|
}
|
|
|
|
|
2019-09-06 02:56:45 +00:00
|
|
|
/// Like `maybe_whole_expr`, but for things other than expressions.
|
2019-08-11 13:24:37 +00:00
|
|
|
#[macro_export]
|
2014-11-14 17:18:10 +00:00
|
|
|
macro_rules! maybe_whole {
|
2016-11-02 03:03:55 +00:00
|
|
|
($p:expr, $constructor:ident, |$x:ident| $e:expr) => {
|
2020-07-01 10:16:49 +00:00
|
|
|
if let token::Interpolated(nt) = &$p.token.kind {
|
2019-03-10 08:53:16 +00:00
|
|
|
if let token::$constructor(x) = &**nt {
|
|
|
|
let $x = x.clone();
|
2016-11-02 03:03:55 +00:00
|
|
|
$p.bump();
|
|
|
|
return Ok($e);
|
2013-03-02 21:02:27 +00:00
|
|
|
}
|
2013-07-02 19:47:32 +00:00
|
|
|
}
|
2016-11-02 03:03:55 +00:00
|
|
|
};
|
2014-11-14 17:18:10 +00:00
|
|
|
}
|
2012-07-04 01:39:37 +00:00
|
|
|
|
2019-03-09 14:41:01 +00:00
|
|
|
/// If the next tokens are ill-formed `$ty::` recover them as `<$ty>::`.
|
2019-08-11 11:14:30 +00:00
|
|
|
#[macro_export]
|
2019-03-09 14:41:01 +00:00
|
|
|
macro_rules! maybe_recover_from_interpolated_ty_qpath {
|
|
|
|
($self: expr, $allow_qpath_recovery: expr) => {
|
|
|
|
if $allow_qpath_recovery && $self.look_ahead(1, |t| t == &token::ModSep) {
|
2020-07-01 10:16:49 +00:00
|
|
|
if let token::Interpolated(nt) = &$self.token.kind {
|
2019-03-09 14:41:01 +00:00
|
|
|
if let token::NtTy(ty) = &**nt {
|
|
|
|
let ty = ty.clone();
|
|
|
|
$self.bump();
|
2020-02-29 11:56:15 +00:00
|
|
|
return $self.maybe_recover_from_bad_qpath_stage_2($self.prev_token.span, ty);
|
2019-03-09 14:41:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-12-22 22:42:04 +00:00
|
|
|
};
|
2019-03-09 14:41:01 +00:00
|
|
|
}
|
|
|
|
|
2017-06-15 03:42:24 +00:00
|
|
|
#[derive(Clone)]
|
2014-03-09 14:54:34 +00:00
|
|
|
pub struct Parser<'a> {
|
2014-03-27 22:39:48 +00:00
|
|
|
pub sess: &'a ParseSess,
|
2020-03-07 13:34:29 +00:00
|
|
|
/// The current token.
|
2020-02-24 10:04:13 +00:00
|
|
|
pub token: Token,
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
/// The spacing for the current token
|
|
|
|
pub token_spacing: Spacing,
|
2020-03-07 13:34:29 +00:00
|
|
|
/// The previous token.
|
2020-02-24 10:04:13 +00:00
|
|
|
pub prev_token: Token,
|
2018-05-31 22:53:30 +00:00
|
|
|
restrictions: Restrictions,
|
2019-10-08 07:35:34 +00:00
|
|
|
expected_tokens: Vec<TokenType>,
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
// Important: This must only be advanced from `next_tok`
|
|
|
|
// to ensure that `token_cursor.num_next_calls` is updated properly
|
2019-08-31 13:03:54 +00:00
|
|
|
token_cursor: TokenCursor,
|
2018-05-31 22:53:30 +00:00
|
|
|
desugar_doc_comments: bool,
|
2019-01-23 01:35:13 +00:00
|
|
|
/// This field is used to keep track of how many left angle brackets we have seen. This is
|
|
|
|
/// required in order to detect extra leading left angle brackets (`<` characters) and error
|
|
|
|
/// appropriately.
|
|
|
|
///
|
|
|
|
/// See the comments in the `parse_path_segment` function for more details.
|
2019-10-08 07:35:34 +00:00
|
|
|
unmatched_angle_bracket_count: u32,
|
|
|
|
max_angle_bracket_count: u32,
|
2019-09-06 02:56:45 +00:00
|
|
|
/// A list of all unclosed delimiters found by the lexer. If an entry is used for error recovery
|
2019-02-06 10:24:07 +00:00
|
|
|
/// it gets removed from here. Every entry left at the end gets emitted as an independent
|
|
|
|
/// error.
|
2019-10-08 07:35:34 +00:00
|
|
|
pub(super) unclosed_delims: Vec<UnmatchedBrace>,
|
|
|
|
last_unexpected_token_span: Option<Span>,
|
2020-08-07 18:52:02 +00:00
|
|
|
/// Span pointing at the `:` for the last type ascription the parser has seen, and whether it
|
|
|
|
/// looked like it could have been a mistyped path or literal `Option:Some(42)`).
|
2019-10-16 08:59:30 +00:00
|
|
|
pub last_type_ascription: Option<(Span, bool /* likely path typo */)>,
|
2019-05-22 05:17:53 +00:00
|
|
|
/// If present, this `Parser` is not parsing Rust code but rather a macro call.
|
2019-10-08 07:35:34 +00:00
|
|
|
subparser_name: Option<&'static str>,
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
}
|
|
|
|
|
2019-03-03 19:13:19 +00:00
|
|
|
impl<'a> Drop for Parser<'a> {
|
|
|
|
fn drop(&mut self) {
|
2019-10-29 00:44:20 +00:00
|
|
|
emit_unclosed_delims(&mut self.unclosed_delims, &self.sess);
|
2019-03-03 19:13:19 +00:00
|
|
|
}
|
|
|
|
}
|
2017-05-17 22:37:24 +00:00
|
|
|
|
2017-06-10 03:30:33 +00:00
|
|
|
#[derive(Clone)]
|
2019-08-31 13:03:54 +00:00
|
|
|
struct TokenCursor {
|
|
|
|
frame: TokenCursorFrame,
|
|
|
|
stack: Vec<TokenCursorFrame>,
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
desugar_doc_comments: bool,
|
|
|
|
// Counts the number of calls to `next` or `next_desugared`,
|
|
|
|
// depending on whether `desugar_doc_comments` is set.
|
|
|
|
num_next_calls: usize,
|
2020-12-12 20:20:22 +00:00
|
|
|
// During parsing, we may sometimes need to 'unglue' a
|
|
|
|
// glued token into two component tokens
|
|
|
|
// (e.g. '>>' into '>' and '>), so that the parser
|
|
|
|
// can consume them one at a time. This process
|
|
|
|
// bypasses the normal capturing mechanism
|
|
|
|
// (e.g. `num_next_calls` will not be incremented),
|
|
|
|
// since the 'unglued' tokens due not exist in
|
|
|
|
// the original `TokenStream`.
|
|
|
|
//
|
|
|
|
// If we end up consuming both unglued tokens,
|
|
|
|
// then this is not an issue - we'll end up
|
|
|
|
// capturing the single 'glued' token.
|
|
|
|
//
|
|
|
|
// However, in certain circumstances, we may
|
|
|
|
// want to capture just the first 'unglued' token.
|
|
|
|
// For example, capturing the `Vec<u8>`
|
|
|
|
// in `Option<Vec<u8>>` requires us to unglue
|
|
|
|
// the trailing `>>` token. The `append_unglued_token`
|
|
|
|
// field is used to track this token - it gets
|
|
|
|
// appended to the captured stream when
|
|
|
|
// we evaluate a `LazyTokenStream`
|
|
|
|
append_unglued_token: Option<TreeAndSpacing>,
|
2017-02-20 05:44:06 +00:00
|
|
|
}
|
|
|
|
|
2017-06-10 03:30:33 +00:00
|
|
|
#[derive(Clone)]
|
2019-08-31 13:03:54 +00:00
|
|
|
struct TokenCursorFrame {
|
|
|
|
delim: token::DelimToken,
|
|
|
|
span: DelimSpan,
|
|
|
|
open_delim: bool,
|
|
|
|
tree_cursor: tokenstream::Cursor,
|
|
|
|
close_delim: bool,
|
2017-07-12 16:50:05 +00:00
|
|
|
}
|
|
|
|
|
2017-02-20 05:44:06 +00:00
|
|
|
impl TokenCursorFrame {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self {
|
2017-02-20 05:44:06 +00:00
|
|
|
TokenCursorFrame {
|
2019-06-25 21:22:45 +00:00
|
|
|
delim,
|
|
|
|
span,
|
2018-11-29 23:02:04 +00:00
|
|
|
open_delim: delim == token::NoDelim,
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
tree_cursor: tts.into_trees(),
|
2018-11-29 23:02:04 +00:00
|
|
|
close_delim: delim == token::NoDelim,
|
2017-02-20 05:44:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TokenCursor {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
fn next(&mut self) -> (Token, Spacing) {
|
2017-02-20 05:44:06 +00:00
|
|
|
loop {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
let (tree, spacing) = if !self.frame.open_delim {
|
2017-02-20 05:44:06 +00:00
|
|
|
self.frame.open_delim = true;
|
2020-05-19 20:54:20 +00:00
|
|
|
TokenTree::open_tt(self.frame.span, self.frame.delim).into()
|
2020-09-03 15:21:53 +00:00
|
|
|
} else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() {
|
2017-02-20 05:44:06 +00:00
|
|
|
tree
|
|
|
|
} else if !self.frame.close_delim {
|
|
|
|
self.frame.close_delim = true;
|
2020-05-19 20:54:20 +00:00
|
|
|
TokenTree::close_tt(self.frame.span, self.frame.delim).into()
|
2017-02-20 05:44:06 +00:00
|
|
|
} else if let Some(frame) = self.stack.pop() {
|
|
|
|
self.frame = frame;
|
2019-12-22 22:42:04 +00:00
|
|
|
continue;
|
2017-02-20 05:44:06 +00:00
|
|
|
} else {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
(TokenTree::Token(Token::new(token::Eof, DUMMY_SP)), Spacing::Alone)
|
2017-02-20 05:44:06 +00:00
|
|
|
};
|
|
|
|
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
match tree {
|
|
|
|
TokenTree::Token(token) => {
|
|
|
|
return (token, spacing);
|
2020-05-19 20:54:20 +00:00
|
|
|
}
|
2018-11-29 23:02:04 +00:00
|
|
|
TokenTree::Delimited(sp, delim, tts) => {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
let frame = TokenCursorFrame::new(sp, delim, tts);
|
2017-02-20 05:44:06 +00:00
|
|
|
self.stack.push(mem::replace(&mut self.frame, frame));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
fn next_desugared(&mut self) -> (Token, Spacing) {
|
2020-07-22 09:28:17 +00:00
|
|
|
let (data, attr_style, sp) = match self.next() {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
(Token { kind: token::DocComment(_, attr_style, data), span }, _) => {
|
2020-07-22 09:28:17 +00:00
|
|
|
(data, attr_style, span)
|
2020-07-21 19:16:19 +00:00
|
|
|
}
|
2017-05-12 18:05:39 +00:00
|
|
|
tok => return tok,
|
2017-02-20 05:44:06 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
|
|
|
|
// required to wrap the text.
|
|
|
|
let mut num_of_hashes = 0;
|
|
|
|
let mut count = 0;
|
2020-07-22 09:28:17 +00:00
|
|
|
for ch in data.as_str().chars() {
|
2017-02-20 05:44:06 +00:00
|
|
|
count = match ch {
|
|
|
|
'"' => 1,
|
|
|
|
'#' if count > 0 => count + 1,
|
|
|
|
_ => 0,
|
|
|
|
};
|
|
|
|
num_of_hashes = cmp::max(num_of_hashes, count);
|
|
|
|
}
|
|
|
|
|
2018-09-09 01:07:02 +00:00
|
|
|
let delim_span = DelimSpan::from_single(sp);
|
2018-11-29 23:02:04 +00:00
|
|
|
let body = TokenTree::Delimited(
|
|
|
|
delim_span,
|
|
|
|
token::Bracket,
|
2019-05-18 22:04:26 +00:00
|
|
|
[
|
2019-06-05 10:25:26 +00:00
|
|
|
TokenTree::token(token::Ident(sym::doc, false), sp),
|
|
|
|
TokenTree::token(token::Eq, sp),
|
2020-07-22 09:28:17 +00:00
|
|
|
TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), sp),
|
2018-11-29 23:02:04 +00:00
|
|
|
]
|
2019-12-22 22:42:04 +00:00
|
|
|
.iter()
|
|
|
|
.cloned()
|
2020-02-25 17:10:34 +00:00
|
|
|
.collect::<TokenStream>(),
|
2018-11-29 23:02:04 +00:00
|
|
|
);
|
2017-02-20 05:44:06 +00:00
|
|
|
|
2019-12-22 22:42:04 +00:00
|
|
|
self.stack.push(mem::replace(
|
|
|
|
&mut self.frame,
|
|
|
|
TokenCursorFrame::new(
|
|
|
|
delim_span,
|
|
|
|
token::NoDelim,
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
if attr_style == AttrStyle::Inner {
|
2019-12-22 22:42:04 +00:00
|
|
|
[TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body]
|
|
|
|
.iter()
|
|
|
|
.cloned()
|
|
|
|
.collect::<TokenStream>()
|
|
|
|
} else {
|
|
|
|
[TokenTree::token(token::Pound, sp), body]
|
|
|
|
.iter()
|
|
|
|
.cloned()
|
|
|
|
.collect::<TokenStream>()
|
|
|
|
},
|
|
|
|
),
|
|
|
|
));
|
2017-02-20 05:44:06 +00:00
|
|
|
|
|
|
|
self.next()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-15 16:43:24 +00:00
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
2019-10-08 07:35:34 +00:00
|
|
|
enum TokenType {
|
2019-06-05 11:17:56 +00:00
|
|
|
Token(TokenKind),
|
2019-05-11 14:41:37 +00:00
|
|
|
Keyword(Symbol),
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
Operator,
|
2017-01-18 16:01:04 +00:00
|
|
|
Lifetime,
|
|
|
|
Ident,
|
|
|
|
Path,
|
|
|
|
Type,
|
2019-02-05 15:49:38 +00:00
|
|
|
Const,
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl TokenType {
|
2019-10-08 07:35:34 +00:00
|
|
|
fn to_string(&self) -> String {
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
match *self {
|
2019-06-08 19:38:23 +00:00
|
|
|
TokenType::Token(ref t) => format!("`{}`", pprust::token_kind_to_string(t)),
|
2019-05-11 14:41:37 +00:00
|
|
|
TokenType::Keyword(kw) => format!("`{}`", kw),
|
2017-01-18 16:01:04 +00:00
|
|
|
TokenType::Operator => "an operator".to_string(),
|
|
|
|
TokenType::Lifetime => "lifetime".to_string(),
|
|
|
|
TokenType::Ident => "identifier".to_string(),
|
|
|
|
TokenType::Path => "path".to_string(),
|
|
|
|
TokenType::Type => "type".to_string(),
|
2020-12-04 10:33:30 +00:00
|
|
|
TokenType::Const => "a const expression".to_string(),
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
}
|
|
|
|
}
|
2012-09-08 02:04:40 +00:00
|
|
|
}
|
2012-05-23 22:06:11 +00:00
|
|
|
|
2018-03-20 22:58:25 +00:00
|
|
|
#[derive(Copy, Clone, Debug)]
|
2019-10-08 07:35:34 +00:00
|
|
|
enum TokenExpectType {
|
2017-09-07 06:07:49 +00:00
|
|
|
Expect,
|
|
|
|
NoExpect,
|
|
|
|
}
|
|
|
|
|
2019-10-08 10:59:59 +00:00
|
|
|
/// A sequence separator.
|
|
|
|
struct SeqSep {
|
|
|
|
/// The separator token.
|
|
|
|
sep: Option<TokenKind>,
|
|
|
|
/// `true` if a trailing separator is allowed.
|
|
|
|
trailing_sep_allowed: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl SeqSep {
|
|
|
|
fn trailing_allowed(t: TokenKind) -> SeqSep {
|
2019-12-22 22:42:04 +00:00
|
|
|
SeqSep { sep: Some(t), trailing_sep_allowed: true }
|
2019-10-08 10:59:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn none() -> SeqSep {
|
2019-12-22 22:42:04 +00:00
|
|
|
SeqSep { sep: None, trailing_sep_allowed: false }
|
2019-10-08 10:59:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-22 22:42:04 +00:00
|
|
|
pub enum FollowedByType {
|
|
|
|
Yes,
|
|
|
|
No,
|
|
|
|
}
|
2019-11-07 10:26:36 +00:00
|
|
|
|
2019-12-07 02:07:35 +00:00
|
|
|
fn token_descr_opt(token: &Token) -> Option<&'static str> {
|
|
|
|
Some(match token.kind {
|
|
|
|
_ if token.is_special_ident() => "reserved identifier",
|
|
|
|
_ if token.is_used_keyword() => "keyword",
|
|
|
|
_ if token.is_unused_keyword() => "reserved keyword",
|
|
|
|
token::DocComment(..) => "doc comment",
|
|
|
|
_ => return None,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(super) fn token_descr(token: &Token) -> String {
|
|
|
|
let token_str = pprust::token_to_string(token);
|
|
|
|
match token_descr_opt(token) {
|
|
|
|
Some(prefix) => format!("{} `{}`", prefix, token_str),
|
|
|
|
_ => format!("`{}`", token_str),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-09 14:54:34 +00:00
|
|
|
impl<'a> Parser<'a> {
|
2019-10-16 08:59:30 +00:00
|
|
|
pub fn new(
|
2019-05-22 00:47:23 +00:00
|
|
|
sess: &'a ParseSess,
|
|
|
|
tokens: TokenStream,
|
|
|
|
desugar_doc_comments: bool,
|
2019-05-22 05:17:53 +00:00
|
|
|
subparser_name: Option<&'static str>,
|
2019-05-22 00:47:23 +00:00
|
|
|
) -> Self {
|
2016-11-03 07:43:29 +00:00
|
|
|
let mut parser = Parser {
|
2017-08-07 05:54:09 +00:00
|
|
|
sess,
|
2019-06-05 06:39:34 +00:00
|
|
|
token: Token::dummy(),
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
token_spacing: Spacing::Alone,
|
2020-02-09 14:54:38 +00:00
|
|
|
prev_token: Token::dummy(),
|
2015-04-29 21:58:43 +00:00
|
|
|
restrictions: Restrictions::empty(),
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
expected_tokens: Vec::new(),
|
2017-02-20 05:44:06 +00:00
|
|
|
token_cursor: TokenCursor {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
frame: TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens),
|
2017-02-20 05:44:06 +00:00
|
|
|
stack: Vec::new(),
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
num_next_calls: 0,
|
|
|
|
desugar_doc_comments,
|
2020-12-12 20:20:22 +00:00
|
|
|
append_unglued_token: None,
|
2017-02-20 05:44:06 +00:00
|
|
|
},
|
2017-08-07 05:54:09 +00:00
|
|
|
desugar_doc_comments,
|
2019-01-23 01:35:13 +00:00
|
|
|
unmatched_angle_bracket_count: 0,
|
2019-01-28 05:04:50 +00:00
|
|
|
max_angle_bracket_count: 0,
|
|
|
|
unclosed_delims: Vec::new(),
|
2019-03-02 05:47:06 +00:00
|
|
|
last_unexpected_token_span: None,
|
2019-07-17 18:40:36 +00:00
|
|
|
last_type_ascription: None,
|
2019-05-22 05:17:53 +00:00
|
|
|
subparser_name,
|
2016-11-03 07:43:29 +00:00
|
|
|
};
|
|
|
|
|
2020-02-16 13:47:24 +00:00
|
|
|
// Make parser point to the first token.
|
|
|
|
parser.bump();
|
2017-05-17 22:37:24 +00:00
|
|
|
|
2016-11-03 07:43:29 +00:00
|
|
|
parser
|
|
|
|
}
|
|
|
|
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) {
|
|
|
|
let (mut next, spacing) = if self.desugar_doc_comments {
|
2017-05-12 18:05:39 +00:00
|
|
|
self.token_cursor.next_desugared()
|
|
|
|
} else {
|
|
|
|
self.token_cursor.next()
|
2017-02-21 12:04:45 +00:00
|
|
|
};
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
self.token_cursor.num_next_calls += 1;
|
2020-12-12 20:20:22 +00:00
|
|
|
// We've retrieved an token from the underlying
|
|
|
|
// cursor, so we no longer need to worry about
|
|
|
|
// an unglued token. See `break_and_eat` for more details
|
|
|
|
self.token_cursor.append_unglued_token = None;
|
2019-06-04 15:48:40 +00:00
|
|
|
if next.span.is_dummy() {
|
2018-04-22 01:10:15 +00:00
|
|
|
// Tweak the location for better diagnostics, but keep syntactic context intact.
|
2020-02-16 13:47:24 +00:00
|
|
|
next.span = fallback_span.with_ctxt(next.span.ctxt());
|
2014-05-25 23:27:36 +00:00
|
|
|
}
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
(next, spacing)
|
2014-05-25 23:27:36 +00:00
|
|
|
}
|
2014-06-09 20:12:30 +00:00
|
|
|
|
2020-10-05 21:21:03 +00:00
|
|
|
pub fn unexpected<T>(&mut self) -> PResult<'a, T> {
|
2015-03-28 21:58:51 +00:00
|
|
|
match self.expect_one_of(&[], &[]) {
|
2015-12-30 23:11:53 +00:00
|
|
|
Err(e) => Err(e),
|
2019-11-13 11:05:37 +00:00
|
|
|
// We can get `Ok(true)` from `recover_closing_delimiter`
|
|
|
|
// which is called in `expected_one_of_not_found`.
|
|
|
|
Ok(_) => FatalError.raise(),
|
2015-03-28 21:58:51 +00:00
|
|
|
}
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Expects and consumes the token `t`. Signals an error if the next token is not `t`.
|
2019-06-05 11:17:56 +00:00
|
|
|
pub fn expect(&mut self, t: &TokenKind) -> PResult<'a, bool /* recovered */> {
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
if self.expected_tokens.is_empty() {
|
|
|
|
if self.token == *t {
|
2015-12-30 23:11:53 +00:00
|
|
|
self.bump();
|
2019-01-28 05:04:50 +00:00
|
|
|
Ok(false)
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
} else {
|
2019-05-22 05:17:53 +00:00
|
|
|
self.unexpected_try_recover(t)
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
}
|
2013-06-15 01:21:47 +00:00
|
|
|
} else {
|
2018-06-01 14:05:46 +00:00
|
|
|
self.expect_one_of(slice::from_ref(t), &[])
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-06-09 20:12:30 +00:00
|
|
|
/// Expect next token to be edible or inedible token. If edible,
|
|
|
|
/// then consume it; if inedible, then return without consuming
|
|
|
|
/// anything. Signal a fatal error if next token is unexpected.
|
2019-01-28 05:04:50 +00:00
|
|
|
pub fn expect_one_of(
|
|
|
|
&mut self,
|
2019-06-05 11:17:56 +00:00
|
|
|
edible: &[TokenKind],
|
|
|
|
inedible: &[TokenKind],
|
2019-01-28 05:04:50 +00:00
|
|
|
) -> PResult<'a, bool /* recovered */> {
|
2019-06-08 19:38:23 +00:00
|
|
|
if edible.contains(&self.token.kind) {
|
2015-12-30 23:11:53 +00:00
|
|
|
self.bump();
|
2019-01-28 05:04:50 +00:00
|
|
|
Ok(false)
|
2019-06-08 19:38:23 +00:00
|
|
|
} else if inedible.contains(&self.token.kind) {
|
2013-08-05 20:18:29 +00:00
|
|
|
// leave it in the input
|
2019-01-28 05:04:50 +00:00
|
|
|
Ok(false)
|
2019-06-07 10:31:13 +00:00
|
|
|
} else if self.last_unexpected_token_span == Some(self.token.span) {
|
2019-03-02 05:47:06 +00:00
|
|
|
FatalError.raise();
|
2013-08-05 20:18:29 +00:00
|
|
|
} else {
|
2019-05-23 20:10:24 +00:00
|
|
|
self.expected_one_of_not_found(edible, inedible)
|
2013-08-05 20:18:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-11 19:19:57 +00:00
|
|
|
// Public for rustfmt usage.
|
2020-04-19 11:00:18 +00:00
|
|
|
pub fn parse_ident(&mut self) -> PResult<'a, Ident> {
|
2018-01-06 22:43:20 +00:00
|
|
|
self.parse_ident_common(true)
|
|
|
|
}
|
|
|
|
|
2020-04-19 11:00:18 +00:00
|
|
|
fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, Ident> {
|
2020-03-04 20:37:52 +00:00
|
|
|
match self.token.ident() {
|
|
|
|
Some((ident, is_raw)) => {
|
|
|
|
if !is_raw && ident.is_reserved() {
|
2018-01-23 03:03:51 +00:00
|
|
|
let mut err = self.expected_ident_found();
|
2018-01-06 22:43:20 +00:00
|
|
|
if recover {
|
|
|
|
err.emit();
|
|
|
|
} else {
|
|
|
|
return Err(err);
|
|
|
|
}
|
2017-06-29 10:16:35 +00:00
|
|
|
}
|
2015-12-30 23:11:53 +00:00
|
|
|
self.bump();
|
2020-03-04 20:37:52 +00:00
|
|
|
Ok(ident)
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
2020-02-10 17:20:01 +00:00
|
|
|
_ => Err(match self.prev_token.kind {
|
|
|
|
TokenKind::DocComment(..) => {
|
2020-02-29 11:56:15 +00:00
|
|
|
self.span_fatal_err(self.prev_token.span, Error::UselessDocComment)
|
2020-02-10 17:20:01 +00:00
|
|
|
}
|
|
|
|
_ => self.expected_ident_found(),
|
2019-12-22 22:42:04 +00:00
|
|
|
}),
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Checks if the next token is `tok`, and returns `true` if so.
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
///
|
2016-01-31 19:39:50 +00:00
|
|
|
/// This method will automatically add `tok` to `expected_tokens` if `tok` is not
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
/// encountered.
|
2019-10-08 07:35:34 +00:00
|
|
|
fn check(&mut self, tok: &TokenKind) -> bool {
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
let is_present = self.token == *tok;
|
2019-12-22 22:42:04 +00:00
|
|
|
if !is_present {
|
|
|
|
self.expected_tokens.push(TokenType::Token(tok.clone()));
|
|
|
|
}
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
is_present
|
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Consumes a token 'tok' if it exists. Returns whether the given token was present.
|
2019-06-05 11:17:56 +00:00
|
|
|
pub fn eat(&mut self, tok: &TokenKind) -> bool {
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
let is_present = self.check(tok);
|
2019-12-22 22:42:04 +00:00
|
|
|
if is_present {
|
|
|
|
self.bump()
|
|
|
|
}
|
2015-12-30 23:11:53 +00:00
|
|
|
is_present
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2019-10-01 03:13:42 +00:00
|
|
|
/// If the next token is the given keyword, returns `true` without eating it.
|
|
|
|
/// An expectation is also added for diagnostics purposes.
|
2019-05-11 14:41:37 +00:00
|
|
|
fn check_keyword(&mut self, kw: Symbol) -> bool {
|
2015-01-16 03:04:28 +00:00
|
|
|
self.expected_tokens.push(TokenType::Keyword(kw));
|
|
|
|
self.token.is_keyword(kw)
|
|
|
|
}
|
|
|
|
|
2019-10-01 03:13:42 +00:00
|
|
|
/// If the next token is the given keyword, eats it and returns `true`.
|
|
|
|
/// Otherwise, returns `false`. An expectation is also added for diagnostics purposes.
|
2020-01-11 19:19:57 +00:00
|
|
|
// Public for rustfmt usage.
|
|
|
|
pub fn eat_keyword(&mut self, kw: Symbol) -> bool {
|
2015-01-16 03:04:28 +00:00
|
|
|
if self.check_keyword(kw) {
|
2015-12-30 23:11:53 +00:00
|
|
|
self.bump();
|
|
|
|
true
|
2015-01-16 03:04:28 +00:00
|
|
|
} else {
|
2015-12-30 23:11:53 +00:00
|
|
|
false
|
2015-01-16 03:04:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-11 14:41:37 +00:00
|
|
|
fn eat_keyword_noexpect(&mut self, kw: Symbol) -> bool {
|
2014-10-27 12:33:30 +00:00
|
|
|
if self.token.is_keyword(kw) {
|
2015-12-30 23:11:53 +00:00
|
|
|
self.bump();
|
|
|
|
true
|
2014-08-28 04:34:03 +00:00
|
|
|
} else {
|
2015-12-30 23:11:53 +00:00
|
|
|
false
|
2014-08-28 04:34:03 +00:00
|
|
|
}
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// If the given word is not a keyword, signals an error.
|
|
|
|
/// If the next token is not the given word, signals an error.
|
|
|
|
/// Otherwise, eats it.
|
2019-05-11 14:41:37 +00:00
|
|
|
fn expect_keyword(&mut self, kw: Symbol) -> PResult<'a, ()> {
|
2019-12-22 22:42:04 +00:00
|
|
|
if !self.eat_keyword(kw) { self.unexpected() } else { Ok(()) }
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2020-02-01 05:18:10 +00:00
|
|
|
/// Is the given keyword `kw` followed by a non-reserved identifier?
|
|
|
|
fn is_kw_followed_by_ident(&self, kw: Symbol) -> bool {
|
|
|
|
self.token.is_keyword(kw) && self.look_ahead(1, |t| t.is_ident() && !t.is_reserved_ident())
|
|
|
|
}
|
|
|
|
|
2019-10-01 03:55:28 +00:00
|
|
|
fn check_or_expected(&mut self, ok: bool, typ: TokenType) -> bool {
|
2019-09-30 04:21:30 +00:00
|
|
|
if ok {
|
2017-01-18 16:01:04 +00:00
|
|
|
true
|
|
|
|
} else {
|
2019-10-01 03:55:28 +00:00
|
|
|
self.expected_tokens.push(typ);
|
2017-01-18 16:01:04 +00:00
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-08 07:35:34 +00:00
|
|
|
fn check_ident(&mut self) -> bool {
|
2019-10-01 03:55:28 +00:00
|
|
|
self.check_or_expected(self.token.is_ident(), TokenType::Ident)
|
2019-09-30 04:21:30 +00:00
|
|
|
}
|
|
|
|
|
2017-01-18 16:01:04 +00:00
|
|
|
fn check_path(&mut self) -> bool {
|
2019-10-01 03:55:28 +00:00
|
|
|
self.check_or_expected(self.token.is_path_start(), TokenType::Path)
|
2017-01-18 16:01:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn check_type(&mut self) -> bool {
|
2019-10-01 03:55:28 +00:00
|
|
|
self.check_or_expected(self.token.can_begin_type(), TokenType::Type)
|
2017-01-18 16:01:04 +00:00
|
|
|
}
|
|
|
|
|
2019-02-05 15:49:38 +00:00
|
|
|
fn check_const_arg(&mut self) -> bool {
|
2019-10-01 03:55:28 +00:00
|
|
|
self.check_or_expected(self.token.can_begin_const_arg(), TokenType::Const)
|
2019-09-30 04:21:30 +00:00
|
|
|
}
|
|
|
|
|
2020-10-19 21:44:37 +00:00
|
|
|
fn check_inline_const(&self, dist: usize) -> bool {
|
|
|
|
self.is_keyword_ahead(dist, &[kw::Const])
|
|
|
|
&& self.look_ahead(dist + 1, |t| match t.kind {
|
2020-10-19 19:57:04 +00:00
|
|
|
token::Interpolated(ref nt) => matches!(**nt, token::NtBlock(..)),
|
|
|
|
token::OpenDelim(DelimToken::Brace) => true,
|
|
|
|
_ => false,
|
|
|
|
})
|
2020-09-21 20:55:58 +00:00
|
|
|
}
|
|
|
|
|
2019-09-30 04:21:30 +00:00
|
|
|
/// Checks to see if the next token is either `+` or `+=`.
|
|
|
|
/// Otherwise returns `false`.
|
|
|
|
fn check_plus(&mut self) -> bool {
|
|
|
|
self.check_or_expected(
|
|
|
|
self.token.is_like_plus(),
|
2019-10-01 03:55:28 +00:00
|
|
|
TokenType::Token(token::BinOp(token::Plus)),
|
2019-09-30 04:21:30 +00:00
|
|
|
)
|
2019-02-05 15:49:38 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats the expected token if it's present possibly breaking
|
|
|
|
/// compound tokens like multi-character operators in process.
|
|
|
|
/// Returns `true` if the token was eaten.
|
|
|
|
fn break_and_eat(&mut self, expected: TokenKind) -> bool {
|
|
|
|
if self.token.kind == expected {
|
|
|
|
self.bump();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
match self.token.kind.break_two_token_op() {
|
|
|
|
Some((first, second)) if first == expected => {
|
|
|
|
let first_span = self.sess.source_map().start_point(self.token.span);
|
|
|
|
let second_span = self.token.span.with_lo(first_span.hi());
|
2020-03-07 13:34:29 +00:00
|
|
|
self.token = Token::new(first, first_span);
|
2020-12-12 20:20:22 +00:00
|
|
|
// Keep track of this token - if we end token capturing now,
|
|
|
|
// we'll want to append this token to the captured stream.
|
|
|
|
//
|
|
|
|
// If we consume any additional tokens, then this token
|
|
|
|
// is not needed (we'll capture the entire 'glued' token),
|
|
|
|
// and `next_tok` will set this field to `None`
|
|
|
|
self.token_cursor.append_unglued_token =
|
|
|
|
Some((TokenTree::Token(self.token.clone()), Spacing::Alone));
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
// Use the spacing of the glued token as the spacing
|
|
|
|
// of the unglued second token.
|
|
|
|
self.bump_with((Token::new(second, second_span), self.token_spacing));
|
2018-05-25 20:40:16 +00:00
|
|
|
true
|
|
|
|
}
|
2020-02-22 13:22:38 +00:00
|
|
|
_ => {
|
|
|
|
self.expected_tokens.push(TokenType::Token(expected));
|
|
|
|
false
|
2018-05-25 20:40:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-05-25 21:36:23 +00:00
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats `+` possibly breaking tokens like `+=` in process.
|
|
|
|
fn eat_plus(&mut self) -> bool {
|
|
|
|
self.break_and_eat(token::BinOp(token::Plus))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Eats `&` possibly breaking tokens like `&&` in process.
|
|
|
|
/// Signals an error if `&` is not eaten.
|
2015-12-20 21:00:43 +00:00
|
|
|
fn expect_and(&mut self) -> PResult<'a, ()> {
|
2020-02-22 13:22:38 +00:00
|
|
|
if self.break_and_eat(token::BinOp(token::And)) { Ok(()) } else { self.unexpected() }
|
2014-04-17 08:35:31 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats `|` possibly breaking tokens like `||` in process.
|
|
|
|
/// Signals an error if `|` was not eaten.
|
2017-09-07 06:07:49 +00:00
|
|
|
fn expect_or(&mut self) -> PResult<'a, ()> {
|
2020-02-22 13:22:38 +00:00
|
|
|
if self.break_and_eat(token::BinOp(token::Or)) { Ok(()) } else { self.unexpected() }
|
2017-09-07 06:07:49 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats `<` possibly breaking tokens like `<<` in process.
|
2015-12-30 23:11:53 +00:00
|
|
|
fn eat_lt(&mut self) -> bool {
|
2020-02-22 13:22:38 +00:00
|
|
|
let ate = self.break_and_eat(token::Lt);
|
2019-01-23 01:35:13 +00:00
|
|
|
if ate {
|
|
|
|
// See doc comment for `unmatched_angle_bracket_count`.
|
|
|
|
self.unmatched_angle_bracket_count += 1;
|
2019-01-28 05:04:50 +00:00
|
|
|
self.max_angle_bracket_count += 1;
|
2019-01-23 01:35:13 +00:00
|
|
|
debug!("eat_lt: (increment) count={:?}", self.unmatched_angle_bracket_count);
|
2014-05-11 04:27:44 +00:00
|
|
|
}
|
2019-01-23 01:35:13 +00:00
|
|
|
ate
|
2014-05-11 04:27:44 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats `<` possibly breaking tokens like `<<` in process.
|
|
|
|
/// Signals an error if `<` was not eaten.
|
2015-12-20 21:00:43 +00:00
|
|
|
fn expect_lt(&mut self) -> PResult<'a, ()> {
|
2020-02-22 13:22:38 +00:00
|
|
|
if self.eat_lt() { Ok(()) } else { self.unexpected() }
|
2014-05-11 04:27:44 +00:00
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Eats `>` possibly breaking tokens like `>>` in process.
|
|
|
|
/// Signals an error if `>` was not eaten.
|
2018-05-31 22:53:30 +00:00
|
|
|
fn expect_gt(&mut self) -> PResult<'a, ()> {
|
2020-02-22 13:22:38 +00:00
|
|
|
if self.break_and_eat(token::Gt) {
|
|
|
|
// See doc comment for `unmatched_angle_bracket_count`.
|
|
|
|
if self.unmatched_angle_bracket_count > 0 {
|
|
|
|
self.unmatched_angle_bracket_count -= 1;
|
|
|
|
debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count);
|
2019-12-22 22:42:04 +00:00
|
|
|
}
|
2020-02-22 13:22:38 +00:00
|
|
|
Ok(())
|
|
|
|
} else {
|
|
|
|
self.unexpected()
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-09 08:27:07 +00:00
|
|
|
fn expect_any_with_type(&mut self, kets: &[&TokenKind], expect: TokenExpectType) -> bool {
|
2019-12-22 22:42:04 +00:00
|
|
|
kets.iter().any(|k| match expect {
|
|
|
|
TokenExpectType::Expect => self.check(k),
|
|
|
|
TokenExpectType::NoExpect => self.token == **k,
|
2019-07-09 08:27:07 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-10-08 07:35:34 +00:00
|
|
|
fn parse_seq_to_before_tokens<T>(
|
2018-08-20 23:16:17 +00:00
|
|
|
&mut self,
|
2019-06-05 11:17:56 +00:00
|
|
|
kets: &[&TokenKind],
|
2018-08-20 23:16:17 +00:00
|
|
|
sep: SeqSep,
|
|
|
|
expect: TokenExpectType,
|
2019-07-09 08:27:07 +00:00
|
|
|
mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
|
|
|
) -> PResult<'a, (Vec<T>, bool /* trailing */, bool /* recovered */)> {
|
2019-01-28 05:04:50 +00:00
|
|
|
let mut first = true;
|
|
|
|
let mut recovered = false;
|
2019-07-09 08:27:07 +00:00
|
|
|
let mut trailing = false;
|
2016-10-29 21:54:04 +00:00
|
|
|
let mut v = vec![];
|
2019-07-09 08:27:07 +00:00
|
|
|
while !self.expect_any_with_type(kets, expect) {
|
|
|
|
if let token::CloseDelim(..) | token::Eof = self.token.kind {
|
2019-12-22 22:42:04 +00:00
|
|
|
break;
|
2019-07-09 08:27:07 +00:00
|
|
|
}
|
2017-05-12 18:05:39 +00:00
|
|
|
if let Some(ref t) = sep.sep {
|
|
|
|
if first {
|
|
|
|
first = false;
|
|
|
|
} else {
|
2019-01-28 05:04:50 +00:00
|
|
|
match self.expect(t) {
|
|
|
|
Ok(false) => {}
|
|
|
|
Ok(true) => {
|
|
|
|
recovered = true;
|
|
|
|
break;
|
2017-10-24 13:04:01 +00:00
|
|
|
}
|
2019-11-24 21:33:00 +00:00
|
|
|
Err(mut expect_err) => {
|
2020-02-29 11:56:15 +00:00
|
|
|
let sp = self.prev_token.span.shrink_to_hi();
|
2019-11-24 21:33:00 +00:00
|
|
|
let token_str = pprust::token_kind_to_string(t);
|
|
|
|
|
2019-09-06 02:56:45 +00:00
|
|
|
// Attempt to keep parsing if it was a similar separator.
|
2019-01-28 05:04:50 +00:00
|
|
|
if let Some(ref tokens) = t.similar_tokens() {
|
2019-06-08 19:38:23 +00:00
|
|
|
if tokens.contains(&self.token.kind) {
|
2019-01-28 05:04:50 +00:00
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
}
|
2019-11-24 21:33:00 +00:00
|
|
|
|
2020-05-27 18:09:54 +00:00
|
|
|
// If this was a missing `@` in a binding pattern
|
|
|
|
// bail with a suggestion
|
|
|
|
// https://github.com/rust-lang/rust/issues/72373
|
2020-06-09 13:57:08 +00:00
|
|
|
if self.prev_token.is_ident() && self.token.kind == token::DotDot {
|
2020-05-27 18:09:54 +00:00
|
|
|
let msg = format!(
|
|
|
|
"if you meant to bind the contents of \
|
|
|
|
the rest of the array pattern into `{}`, use `@`",
|
|
|
|
pprust::token_to_string(&self.prev_token)
|
|
|
|
);
|
|
|
|
expect_err
|
|
|
|
.span_suggestion_verbose(
|
|
|
|
self.prev_token.span.shrink_to_hi().until(self.token.span),
|
|
|
|
&msg,
|
|
|
|
" @ ".to_string(),
|
|
|
|
Applicability::MaybeIncorrect,
|
|
|
|
)
|
|
|
|
.emit();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-09-06 02:56:45 +00:00
|
|
|
// Attempt to keep parsing if it was an omitted separator.
|
2019-01-28 05:04:50 +00:00
|
|
|
match f(self) {
|
|
|
|
Ok(t) => {
|
2019-11-24 21:33:00 +00:00
|
|
|
// Parsed successfully, therefore most probably the code only
|
|
|
|
// misses a separator.
|
|
|
|
expect_err
|
|
|
|
.span_suggestion_short(
|
2020-12-19 10:29:56 +00:00
|
|
|
sp,
|
2019-11-24 21:33:00 +00:00
|
|
|
&format!("missing `{}`", token_str),
|
|
|
|
token_str,
|
|
|
|
Applicability::MaybeIncorrect,
|
|
|
|
)
|
|
|
|
.emit();
|
|
|
|
|
2019-01-28 05:04:50 +00:00
|
|
|
v.push(t);
|
|
|
|
continue;
|
2019-12-22 22:42:04 +00:00
|
|
|
}
|
2019-01-28 05:04:50 +00:00
|
|
|
Err(mut e) => {
|
2019-11-24 21:33:00 +00:00
|
|
|
// Parsing failed, therefore it must be something more serious
|
|
|
|
// than just a missing separator.
|
|
|
|
expect_err.emit();
|
|
|
|
|
2019-01-28 05:04:50 +00:00
|
|
|
e.cancel();
|
|
|
|
break;
|
|
|
|
}
|
2017-10-24 13:04:01 +00:00
|
|
|
}
|
|
|
|
}
|
2016-01-29 04:49:59 +00:00
|
|
|
}
|
|
|
|
}
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
2019-07-09 08:27:07 +00:00
|
|
|
if sep.trailing_sep_allowed && self.expect_any_with_type(kets, expect) {
|
|
|
|
trailing = true;
|
2016-01-31 19:39:50 +00:00
|
|
|
break;
|
|
|
|
}
|
2016-01-29 04:49:59 +00:00
|
|
|
|
2017-10-22 16:19:30 +00:00
|
|
|
let t = f(self)?;
|
|
|
|
v.push(t);
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
2016-01-29 04:49:59 +00:00
|
|
|
|
2019-07-09 08:27:07 +00:00
|
|
|
Ok((v, trailing, recovered))
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2019-12-04 09:13:29 +00:00
|
|
|
/// Parses a sequence, not including the closing delimiter. The function
|
|
|
|
/// `f` must consume tokens until reaching the next separator or
|
|
|
|
/// closing bracket.
|
|
|
|
fn parse_seq_to_before_end<T>(
|
|
|
|
&mut self,
|
|
|
|
ket: &TokenKind,
|
|
|
|
sep: SeqSep,
|
|
|
|
f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
|
|
|
) -> PResult<'a, (Vec<T>, bool, bool)> {
|
|
|
|
self.parse_seq_to_before_tokens(&[ket], sep, TokenExpectType::Expect, f)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Parses a sequence, including the closing delimiter. The function
|
|
|
|
/// `f` must consume tokens until reaching the next separator or
|
|
|
|
/// closing bracket.
|
|
|
|
fn parse_seq_to_end<T>(
|
|
|
|
&mut self,
|
|
|
|
ket: &TokenKind,
|
|
|
|
sep: SeqSep,
|
2019-12-22 22:42:04 +00:00
|
|
|
f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
2019-12-04 09:13:29 +00:00
|
|
|
) -> PResult<'a, (Vec<T>, bool /* trailing */)> {
|
|
|
|
let (val, trailing, recovered) = self.parse_seq_to_before_end(ket, sep, f)?;
|
|
|
|
if !recovered {
|
|
|
|
self.eat(ket);
|
|
|
|
}
|
|
|
|
Ok((val, trailing))
|
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Parses a sequence, including the closing delimiter. The function
|
|
|
|
/// `f` must consume tokens until reaching the next separator or
|
2014-06-09 20:12:30 +00:00
|
|
|
/// closing bracket.
|
2019-07-09 08:27:07 +00:00
|
|
|
fn parse_unspanned_seq<T>(
|
2019-01-28 05:04:50 +00:00
|
|
|
&mut self,
|
2019-06-05 11:17:56 +00:00
|
|
|
bra: &TokenKind,
|
|
|
|
ket: &TokenKind,
|
2019-01-28 05:04:50 +00:00
|
|
|
sep: SeqSep,
|
2019-07-09 08:27:07 +00:00
|
|
|
f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
|
|
|
) -> PResult<'a, (Vec<T>, bool)> {
|
2016-03-23 03:01:37 +00:00
|
|
|
self.expect(bra)?;
|
2019-12-04 09:13:29 +00:00
|
|
|
self.parse_seq_to_end(ket, sep, f)
|
2013-06-15 01:21:47 +00:00
|
|
|
}
|
|
|
|
|
2019-07-09 08:31:24 +00:00
|
|
|
fn parse_delim_comma_seq<T>(
|
|
|
|
&mut self,
|
|
|
|
delim: DelimToken,
|
|
|
|
f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
|
|
|
) -> PResult<'a, (Vec<T>, bool)> {
|
|
|
|
self.parse_unspanned_seq(
|
|
|
|
&token::OpenDelim(delim),
|
|
|
|
&token::CloseDelim(delim),
|
|
|
|
SeqSep::trailing_allowed(token::Comma),
|
|
|
|
f,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_paren_comma_seq<T>(
|
|
|
|
&mut self,
|
|
|
|
f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
|
|
|
|
) -> PResult<'a, (Vec<T>, bool)> {
|
|
|
|
self.parse_delim_comma_seq(token::Paren, f)
|
|
|
|
}
|
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Advance the parser by one token using provided token as the next one.
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
fn bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) {
|
2020-02-22 13:22:38 +00:00
|
|
|
// Bumping after EOF is a bad sign, usually an infinite loop.
|
2020-02-10 17:20:01 +00:00
|
|
|
if self.prev_token.kind == TokenKind::Eof {
|
2019-12-31 03:30:55 +00:00
|
|
|
let msg = "attempted to bump the parser past EOF (may be stuck in a loop)";
|
|
|
|
self.span_bug(self.token.span, msg);
|
2016-03-25 22:13:54 +00:00
|
|
|
}
|
|
|
|
|
2020-02-09 14:54:38 +00:00
|
|
|
// Update the current and previous tokens.
|
2020-03-07 13:34:29 +00:00
|
|
|
self.prev_token = mem::replace(&mut self.token, next_token);
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
self.token_spacing = next_spacing;
|
2016-09-16 05:46:40 +00:00
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
// Diagnostics.
|
Make the parser’s ‘expected <foo>, found <bar>’ errors more accurate
As an example of what this changes, the following code:
let x: [int ..4];
Currently spits out ‘expected `]`, found `..`’. However, a comma would also be
valid there, as would a number of other tokens. This change adjusts the parser
to produce more accurate errors, so that that example now produces ‘expected one
of `(`, `+`, `,`, `::`, or `]`, found `..`’.
2014-12-03 09:47:53 +00:00
|
|
|
self.expected_tokens.clear();
|
2012-01-13 08:56:53 +00:00
|
|
|
}
|
2013-07-02 19:47:32 +00:00
|
|
|
|
2020-02-22 13:22:38 +00:00
|
|
|
/// Advance the parser by one token.
|
|
|
|
pub fn bump(&mut self) {
|
2020-02-24 10:04:13 +00:00
|
|
|
let next_token = self.next_tok(self.token.span);
|
2020-02-22 13:22:38 +00:00
|
|
|
self.bump_with(next_token);
|
2012-01-13 08:56:53 +00:00
|
|
|
}
|
2016-02-06 17:42:17 +00:00
|
|
|
|
2019-10-01 03:13:42 +00:00
|
|
|
/// Look-ahead `dist` tokens of `self.token` and get access to that token there.
|
|
|
|
/// When `dist == 0` then the current token is looked at.
|
2019-09-30 04:21:30 +00:00
|
|
|
pub fn look_ahead<R>(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R {
|
2019-05-23 20:10:24 +00:00
|
|
|
if dist == 0 {
|
2019-09-30 04:21:30 +00:00
|
|
|
return looker(&self.token);
|
2019-05-23 20:10:24 +00:00
|
|
|
}
|
|
|
|
|
2019-06-04 22:17:07 +00:00
|
|
|
let frame = &self.token_cursor.frame;
|
2020-09-14 05:45:10 +00:00
|
|
|
match frame.tree_cursor.look_ahead(dist - 1) {
|
2019-05-23 20:10:24 +00:00
|
|
|
Some(tree) => match tree {
|
2020-09-14 05:45:10 +00:00
|
|
|
TokenTree::Token(token) => looker(token),
|
2019-12-22 22:42:04 +00:00
|
|
|
TokenTree::Delimited(dspan, delim, _) => {
|
2020-09-14 05:45:10 +00:00
|
|
|
looker(&Token::new(token::OpenDelim(*delim), dspan.open))
|
2019-12-22 22:42:04 +00:00
|
|
|
}
|
|
|
|
},
|
2020-09-14 05:45:10 +00:00
|
|
|
None => looker(&Token::new(token::CloseDelim(frame.delim), frame.span.close)),
|
|
|
|
}
|
2019-05-23 20:10:24 +00:00
|
|
|
}
|
|
|
|
|
2019-05-29 15:58:44 +00:00
|
|
|
/// Returns whether any of the given keywords are `dist` tokens ahead of the current one.
|
|
|
|
fn is_keyword_ahead(&self, dist: usize, kws: &[Symbol]) -> bool {
|
|
|
|
self.look_ahead(dist, |t| kws.iter().any(|&kw| t.is_keyword(kw)))
|
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Parses asyncness: `async` or nothing.
|
2020-01-30 04:31:04 +00:00
|
|
|
fn parse_asyncness(&mut self) -> Async {
|
2019-05-11 14:41:37 +00:00
|
|
|
if self.eat_keyword(kw::Async) {
|
2020-03-04 21:34:57 +00:00
|
|
|
let span = self.prev_token.uninterpolated_span();
|
2020-01-30 04:31:04 +00:00
|
|
|
Async::Yes { span, closure_id: DUMMY_NODE_ID, return_impl_trait_id: DUMMY_NODE_ID }
|
2018-06-19 04:18:10 +00:00
|
|
|
} else {
|
2020-01-30 04:31:04 +00:00
|
|
|
Async::No
|
2018-06-19 04:18:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Parses unsafety: `unsafe` or nothing.
|
2020-01-30 01:42:33 +00:00
|
|
|
fn parse_unsafety(&mut self) -> Unsafe {
|
2020-02-29 11:59:37 +00:00
|
|
|
if self.eat_keyword(kw::Unsafe) {
|
2020-03-04 21:34:57 +00:00
|
|
|
Unsafe::Yes(self.prev_token.uninterpolated_span())
|
2020-02-29 11:59:37 +00:00
|
|
|
} else {
|
|
|
|
Unsafe::No
|
|
|
|
}
|
2020-01-30 01:42:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Parses constness: `const` or nothing.
|
|
|
|
fn parse_constness(&mut self) -> Const {
|
2020-09-21 20:55:58 +00:00
|
|
|
// Avoid const blocks to be parsed as const items
|
|
|
|
if self.look_ahead(1, |t| t != &token::OpenDelim(DelimToken::Brace))
|
|
|
|
&& self.eat_keyword(kw::Const)
|
|
|
|
{
|
2020-03-04 21:34:57 +00:00
|
|
|
Const::Yes(self.prev_token.uninterpolated_span())
|
2020-02-29 11:59:37 +00:00
|
|
|
} else {
|
|
|
|
Const::No
|
|
|
|
}
|
2013-02-01 01:12:29 +00:00
|
|
|
}
|
2012-11-05 04:41:00 +00:00
|
|
|
|
2020-09-21 20:55:58 +00:00
|
|
|
/// Parses inline const expressions.
|
2020-10-20 22:14:17 +00:00
|
|
|
fn parse_const_block(&mut self, span: Span) -> PResult<'a, P<Expr>> {
|
2020-09-21 20:55:58 +00:00
|
|
|
self.sess.gated_spans.gate(sym::inline_const, span);
|
|
|
|
self.eat_keyword(kw::Const);
|
|
|
|
let blk = self.parse_block()?;
|
|
|
|
let anon_const = AnonConst {
|
|
|
|
id: DUMMY_NODE_ID,
|
|
|
|
value: self.mk_expr(blk.span, ExprKind::Block(blk, None), AttrVec::new()),
|
|
|
|
};
|
2020-11-07 20:33:34 +00:00
|
|
|
let blk_span = anon_const.value.span;
|
|
|
|
Ok(self.mk_expr(span.to(blk_span), ExprKind::ConstBlock(anon_const), AttrVec::new()))
|
2020-09-21 20:55:58 +00:00
|
|
|
}
|
|
|
|
|
2019-02-08 13:53:55 +00:00
|
|
|
/// Parses mutability (`mut` or nothing).
|
2017-03-16 21:47:32 +00:00
|
|
|
fn parse_mutability(&mut self) -> Mutability {
|
2019-12-22 22:42:04 +00:00
|
|
|
if self.eat_keyword(kw::Mut) { Mutability::Mut } else { Mutability::Not }
|
2012-05-23 22:06:11 +00:00
|
|
|
}
|
2012-04-24 22:52:52 +00:00
|
|
|
|
2019-09-30 00:36:08 +00:00
|
|
|
/// Possibly parses mutability (`const` or `mut`).
|
|
|
|
fn parse_const_or_mut(&mut self) -> Option<Mutability> {
|
|
|
|
if self.eat_keyword(kw::Mut) {
|
2019-12-16 16:28:40 +00:00
|
|
|
Some(Mutability::Mut)
|
2019-09-30 00:36:08 +00:00
|
|
|
} else if self.eat_keyword(kw::Const) {
|
2019-12-16 16:28:40 +00:00
|
|
|
Some(Mutability::Not)
|
2019-09-30 00:36:08 +00:00
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-31 22:53:30 +00:00
|
|
|
fn parse_field_name(&mut self) -> PResult<'a, Ident> {
|
2019-12-22 22:42:04 +00:00
|
|
|
if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) = self.token.kind
|
|
|
|
{
|
2019-06-07 10:31:13 +00:00
|
|
|
self.expect_no_suffix(self.token.span, "a tuple index", suffix);
|
2016-07-29 20:47:55 +00:00
|
|
|
self.bump();
|
2020-02-29 11:56:15 +00:00
|
|
|
Ok(Ident::new(symbol, self.prev_token.span))
|
2016-07-29 20:47:55 +00:00
|
|
|
} else {
|
2018-01-06 22:43:20 +00:00
|
|
|
self.parse_ident_common(false)
|
2016-07-29 20:47:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-30 23:25:32 +00:00
|
|
|
fn parse_mac_args(&mut self) -> PResult<'a, P<MacArgs>> {
|
2019-12-01 14:07:38 +00:00
|
|
|
self.parse_mac_args_common(true).map(P)
|
2019-11-30 23:25:32 +00:00
|
|
|
}
|
|
|
|
|
2019-12-01 14:07:38 +00:00
|
|
|
fn parse_attr_args(&mut self) -> PResult<'a, MacArgs> {
|
2019-11-30 23:25:32 +00:00
|
|
|
self.parse_mac_args_common(false)
|
|
|
|
}
|
|
|
|
|
2019-12-01 14:07:38 +00:00
|
|
|
fn parse_mac_args_common(&mut self, delimited_only: bool) -> PResult<'a, MacArgs> {
|
2019-12-22 22:42:04 +00:00
|
|
|
Ok(
|
|
|
|
if self.check(&token::OpenDelim(DelimToken::Paren))
|
|
|
|
|| self.check(&token::OpenDelim(DelimToken::Bracket))
|
|
|
|
|| self.check(&token::OpenDelim(DelimToken::Brace))
|
|
|
|
{
|
|
|
|
match self.parse_token_tree() {
|
|
|
|
TokenTree::Delimited(dspan, delim, tokens) =>
|
2019-12-02 18:56:11 +00:00
|
|
|
// We've confirmed above that there is a delimiter so unwrapping is OK.
|
2019-12-22 22:42:04 +00:00
|
|
|
{
|
|
|
|
MacArgs::Delimited(dspan, MacDelimiter::from_token(delim).unwrap(), tokens)
|
2019-11-30 23:25:32 +00:00
|
|
|
}
|
2019-12-22 22:42:04 +00:00
|
|
|
_ => unreachable!(),
|
2019-11-30 23:25:32 +00:00
|
|
|
}
|
2019-12-22 22:42:04 +00:00
|
|
|
} else if !delimited_only {
|
|
|
|
if self.eat(&token::Eq) {
|
2020-02-29 11:56:15 +00:00
|
|
|
let eq_span = self.prev_token.span;
|
2019-12-22 22:42:04 +00:00
|
|
|
let mut is_interpolated_expr = false;
|
2020-07-01 10:16:49 +00:00
|
|
|
if let token::Interpolated(nt) = &self.token.kind {
|
2019-12-22 22:42:04 +00:00
|
|
|
if let token::NtExpr(..) = **nt {
|
|
|
|
is_interpolated_expr = true;
|
|
|
|
}
|
|
|
|
}
|
2019-11-30 23:25:32 +00:00
|
|
|
|
2021-01-24 13:41:12 +00:00
|
|
|
// Collect tokens because they are used during lowering to HIR.
|
|
|
|
let expr = self.collect_tokens(|this| this.parse_expr())?;
|
2020-11-07 13:09:40 +00:00
|
|
|
let span = expr.span;
|
|
|
|
|
|
|
|
match &expr.kind {
|
|
|
|
// Not gated to supporte things like `doc = $expr` that work on stable.
|
|
|
|
_ if is_interpolated_expr => {}
|
|
|
|
ExprKind::Lit(lit) if lit.kind.is_unsuffixed() => {}
|
|
|
|
_ => self.sess.gated_spans.gate(sym::extended_key_value_attributes, span),
|
|
|
|
}
|
|
|
|
|
2020-12-19 20:38:22 +00:00
|
|
|
let token_kind = token::Interpolated(Lrc::new(token::NtExpr(expr)));
|
|
|
|
MacArgs::Eq(eq_span, Token::new(token_kind, span))
|
2019-12-22 22:42:04 +00:00
|
|
|
} else {
|
|
|
|
MacArgs::Empty
|
|
|
|
}
|
2019-11-30 23:25:32 +00:00
|
|
|
} else {
|
2019-12-22 22:42:04 +00:00
|
|
|
return self.unexpected();
|
|
|
|
},
|
|
|
|
)
|
2014-10-29 14:47:53 +00:00
|
|
|
}
|
|
|
|
|
2019-09-30 04:21:30 +00:00
|
|
|
fn parse_or_use_outer_attributes(
|
|
|
|
&mut self,
|
2019-12-03 15:38:34 +00:00
|
|
|
already_parsed_attrs: Option<AttrVec>,
|
|
|
|
) -> PResult<'a, AttrVec> {
|
2019-08-11 11:14:30 +00:00
|
|
|
if let Some(attrs) = already_parsed_attrs {
|
|
|
|
Ok(attrs)
|
|
|
|
} else {
|
|
|
|
self.parse_outer_attributes().map(|a| a.into())
|
2019-06-07 02:08:38 +00:00
|
|
|
}
|
2019-08-11 11:14:30 +00:00
|
|
|
}
|
2019-06-07 02:08:38 +00:00
|
|
|
|
2019-08-11 11:14:30 +00:00
|
|
|
/// Parses a single token tree from the input.
|
2020-07-27 12:04:54 +00:00
|
|
|
pub(crate) fn parse_token_tree(&mut self) -> TokenTree {
|
2019-08-11 11:14:30 +00:00
|
|
|
match self.token.kind {
|
|
|
|
token::OpenDelim(..) => {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
let depth = self.token_cursor.stack.len();
|
|
|
|
|
|
|
|
// We keep advancing the token cursor until we hit
|
|
|
|
// the matching `CloseDelim` token.
|
|
|
|
while !(depth == self.token_cursor.stack.len()
|
|
|
|
&& matches!(self.token.kind, token::CloseDelim(_)))
|
|
|
|
{
|
|
|
|
// Advance one token at a time, so `TokenCursor::next()`
|
|
|
|
// can capture these tokens if necessary.
|
|
|
|
self.bump();
|
|
|
|
}
|
|
|
|
// We are still inside the frame corresponding
|
|
|
|
// to the delimited stream we captured, so grab
|
|
|
|
// the tokens from this frame.
|
|
|
|
let frame = &self.token_cursor.frame;
|
|
|
|
let stream = frame.tree_cursor.stream.clone();
|
|
|
|
let span = frame.span;
|
|
|
|
let delim = frame.delim;
|
|
|
|
// Consume close delimiter
|
2019-08-11 11:14:30 +00:00
|
|
|
self.bump();
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
TokenTree::Delimited(span, delim, stream)
|
2019-12-22 22:42:04 +00:00
|
|
|
}
|
2019-08-11 11:14:30 +00:00
|
|
|
token::CloseDelim(_) | token::Eof => unreachable!(),
|
|
|
|
_ => {
|
|
|
|
self.bump();
|
2020-02-16 18:36:50 +00:00
|
|
|
TokenTree::Token(self.prev_token.clone())
|
2014-07-06 21:29:29 +00:00
|
|
|
}
|
2012-05-23 22:06:11 +00:00
|
|
|
}
|
2015-11-03 16:39:51 +00:00
|
|
|
}
|
|
|
|
|
2019-08-11 11:14:30 +00:00
|
|
|
/// Parses a stream of tokens into a list of `TokenTree`s, up to EOF.
|
|
|
|
pub fn parse_all_token_trees(&mut self) -> PResult<'a, Vec<TokenTree>> {
|
|
|
|
let mut tts = Vec::new();
|
|
|
|
while self.token != token::Eof {
|
|
|
|
tts.push(self.parse_token_tree());
|
|
|
|
}
|
|
|
|
Ok(tts)
|
|
|
|
}
|
2019-04-18 20:58:57 +00:00
|
|
|
|
2019-08-11 11:14:30 +00:00
|
|
|
pub fn parse_tokens(&mut self) -> TokenStream {
|
|
|
|
let mut result = Vec::new();
|
|
|
|
loop {
|
|
|
|
match self.token.kind {
|
|
|
|
token::Eof | token::CloseDelim(..) => break,
|
|
|
|
_ => result.push(self.parse_token_tree().into()),
|
2019-04-15 00:09:03 +00:00
|
|
|
}
|
|
|
|
}
|
2019-08-11 11:14:30 +00:00
|
|
|
TokenStream::new(result)
|
2019-04-15 00:09:03 +00:00
|
|
|
}
|
|
|
|
|
2019-08-11 11:14:30 +00:00
|
|
|
/// Evaluates the closure with restrictions in place.
|
|
|
|
///
|
|
|
|
/// Afters the closure is evaluated, restrictions are reset.
|
2019-09-30 01:29:41 +00:00
|
|
|
fn with_res<T>(&mut self, res: Restrictions, f: impl FnOnce(&mut Self) -> T) -> T {
|
2019-08-11 11:14:30 +00:00
|
|
|
let old = self.restrictions;
|
2019-09-30 01:29:41 +00:00
|
|
|
self.restrictions = res;
|
|
|
|
let res = f(self);
|
2019-08-11 11:14:30 +00:00
|
|
|
self.restrictions = old;
|
2019-09-30 01:29:41 +00:00
|
|
|
res
|
2019-08-11 11:14:30 +00:00
|
|
|
}
|
2011-01-01 01:28:43 +00:00
|
|
|
|
2019-08-11 18:32:29 +00:00
|
|
|
fn is_crate_vis(&self) -> bool {
|
|
|
|
self.token.is_keyword(kw::Crate) && self.look_ahead(1, |t| t != &token::ModSep)
|
|
|
|
}
|
|
|
|
|
2019-08-11 16:34:42 +00:00
|
|
|
/// Parses `pub`, `pub(crate)` and `pub(in path)` plus shortcuts `crate` for `pub(crate)`,
|
|
|
|
/// `pub(self)` for `pub(in self)` and `pub(super)` for `pub(in super)`.
|
|
|
|
/// If the following element can't be a tuple (i.e., it's a function definition), then
|
|
|
|
/// it's not a tuple struct field), and the contents within the parentheses isn't valid,
|
|
|
|
/// so emit a proper diagnostic.
|
2020-08-30 18:04:36 +00:00
|
|
|
// Public for rustfmt usage.
|
|
|
|
pub fn parse_visibility(&mut self, fbt: FollowedByType) -> PResult<'a, Visibility> {
|
2019-08-11 16:34:42 +00:00
|
|
|
maybe_whole!(self, NtVis, |x| x);
|
2012-08-16 00:10:23 +00:00
|
|
|
|
2019-08-11 16:34:42 +00:00
|
|
|
self.expected_tokens.push(TokenType::Keyword(kw::Crate));
|
|
|
|
if self.is_crate_vis() {
|
|
|
|
self.bump(); // `crate`
|
2020-02-29 11:56:15 +00:00
|
|
|
self.sess.gated_spans.gate(sym::crate_visibility_modifier, self.prev_token.span);
|
2020-08-21 23:11:00 +00:00
|
|
|
return Ok(Visibility {
|
|
|
|
span: self.prev_token.span,
|
|
|
|
kind: VisibilityKind::Crate(CrateSugar::JustCrate),
|
|
|
|
tokens: None,
|
|
|
|
});
|
2019-08-11 16:34:42 +00:00
|
|
|
}
|
2014-08-11 16:32:26 +00:00
|
|
|
|
2019-08-11 16:34:42 +00:00
|
|
|
if !self.eat_keyword(kw::Pub) {
|
|
|
|
// We need a span for our `Spanned<VisibilityKind>`, but there's inherently no
|
|
|
|
// keyword to grab a span from for inherited visibility; an empty span at the
|
|
|
|
// beginning of the current token would seem to be the "Schelling span".
|
2020-08-21 23:11:00 +00:00
|
|
|
return Ok(Visibility {
|
|
|
|
span: self.token.span.shrink_to_lo(),
|
|
|
|
kind: VisibilityKind::Inherited,
|
|
|
|
tokens: None,
|
|
|
|
});
|
2019-08-11 16:34:42 +00:00
|
|
|
}
|
2020-02-29 11:56:15 +00:00
|
|
|
let lo = self.prev_token.span;
|
2017-03-07 23:50:13 +00:00
|
|
|
|
|
|
|
if self.check(&token::OpenDelim(token::Paren)) {
|
2017-03-18 04:13:00 +00:00
|
|
|
// We don't `self.bump()` the `(` yet because this might be a struct definition where
|
|
|
|
// `()` or a tuple might be allowed. For example, `struct Struct(pub (), pub (usize));`.
|
|
|
|
// Because of this, we only `bump` the `(` if we're assured it is appropriate to do so
|
|
|
|
// by the following tokens.
|
2019-12-22 22:42:04 +00:00
|
|
|
if self.is_keyword_ahead(1, &[kw::Crate]) && self.look_ahead(2, |t| t != &token::ModSep)
|
|
|
|
// account for `pub(crate::foo)`
|
2019-05-01 00:48:18 +00:00
|
|
|
{
|
2019-10-01 03:53:23 +00:00
|
|
|
// Parse `pub(crate)`.
|
|
|
|
self.bump(); // `(`
|
|
|
|
self.bump(); // `crate`
|
|
|
|
self.expect(&token::CloseDelim(token::Paren))?; // `)`
|
|
|
|
let vis = VisibilityKind::Crate(CrateSugar::PubCrate);
|
2020-08-21 23:11:00 +00:00
|
|
|
return Ok(Visibility {
|
|
|
|
span: lo.to(self.prev_token.span),
|
|
|
|
kind: vis,
|
|
|
|
tokens: None,
|
|
|
|
});
|
2019-05-29 15:58:44 +00:00
|
|
|
} else if self.is_keyword_ahead(1, &[kw::In]) {
|
2019-10-01 03:53:23 +00:00
|
|
|
// Parse `pub(in path)`.
|
|
|
|
self.bump(); // `(`
|
|
|
|
self.bump(); // `in`
|
|
|
|
let path = self.parse_path(PathStyle::Mod)?; // `path`
|
|
|
|
self.expect(&token::CloseDelim(token::Paren))?; // `)`
|
2019-12-22 22:42:04 +00:00
|
|
|
let vis = VisibilityKind::Restricted { path: P(path), id: ast::DUMMY_NODE_ID };
|
2020-08-21 23:11:00 +00:00
|
|
|
return Ok(Visibility {
|
|
|
|
span: lo.to(self.prev_token.span),
|
|
|
|
kind: vis,
|
|
|
|
tokens: None,
|
|
|
|
});
|
2019-09-30 04:42:56 +00:00
|
|
|
} else if self.look_ahead(2, |t| t == &token::CloseDelim(token::Paren))
|
|
|
|
&& self.is_keyword_ahead(1, &[kw::Super, kw::SelfLower])
|
2018-01-27 07:13:50 +00:00
|
|
|
{
|
2019-10-01 03:53:23 +00:00
|
|
|
// Parse `pub(self)` or `pub(super)`.
|
|
|
|
self.bump(); // `(`
|
|
|
|
let path = self.parse_path(PathStyle::Mod)?; // `super`/`self`
|
|
|
|
self.expect(&token::CloseDelim(token::Paren))?; // `)`
|
2019-12-22 22:42:04 +00:00
|
|
|
let vis = VisibilityKind::Restricted { path: P(path), id: ast::DUMMY_NODE_ID };
|
2020-08-21 23:11:00 +00:00
|
|
|
return Ok(Visibility {
|
|
|
|
span: lo.to(self.prev_token.span),
|
|
|
|
kind: vis,
|
|
|
|
tokens: None,
|
|
|
|
});
|
2019-11-07 10:26:36 +00:00
|
|
|
} else if let FollowedByType::No = fbt {
|
|
|
|
// Provide this diagnostic if a type cannot follow;
|
|
|
|
// in particular, if this is not a tuple struct.
|
2019-09-30 04:42:56 +00:00
|
|
|
self.recover_incorrect_vis_restriction()?;
|
|
|
|
// Emit diagnostic, but continue with public visibility.
|
2016-04-23 05:40:55 +00:00
|
|
|
}
|
2016-04-11 00:39:35 +00:00
|
|
|
}
|
2017-03-07 23:50:13 +00:00
|
|
|
|
2020-08-21 23:11:00 +00:00
|
|
|
Ok(Visibility { span: lo, kind: VisibilityKind::Public, tokens: None })
|
2012-02-23 05:47:23 +00:00
|
|
|
}
|
2013-03-22 19:56:10 +00:00
|
|
|
|
2019-09-30 04:42:56 +00:00
|
|
|
/// Recovery for e.g. `pub(something) fn ...` or `struct X { pub(something) y: Z }`
|
|
|
|
fn recover_incorrect_vis_restriction(&mut self) -> PResult<'a, ()> {
|
|
|
|
self.bump(); // `(`
|
|
|
|
let path = self.parse_path(PathStyle::Mod)?;
|
2019-12-22 22:42:04 +00:00
|
|
|
self.expect(&token::CloseDelim(token::Paren))?; // `)`
|
2019-09-30 04:42:56 +00:00
|
|
|
|
|
|
|
let msg = "incorrect visibility restriction";
|
|
|
|
let suggestion = r##"some possible visibility restrictions are:
|
|
|
|
`pub(crate)`: visible only on the current crate
|
|
|
|
`pub(super)`: visible only in the current module's parent
|
|
|
|
`pub(in path::to::module)`: visible only on the specified path"##;
|
|
|
|
|
2019-10-08 20:17:46 +00:00
|
|
|
let path_str = pprust::path_to_string(&path);
|
|
|
|
|
2019-09-30 04:42:56 +00:00
|
|
|
struct_span_err!(self.sess.span_diagnostic, path.span, E0704, "{}", msg)
|
|
|
|
.help(suggestion)
|
|
|
|
.span_suggestion(
|
|
|
|
path.span,
|
2019-10-08 20:17:46 +00:00
|
|
|
&format!("make this visible only to module `{}` with `in`", path_str),
|
|
|
|
format!("in {}", path_str),
|
2019-09-30 04:42:56 +00:00
|
|
|
Applicability::MachineApplicable,
|
|
|
|
)
|
|
|
|
.emit();
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2019-10-27 23:29:23 +00:00
|
|
|
/// Parses `extern string_literal?`.
|
2019-11-09 19:05:20 +00:00
|
|
|
fn parse_extern(&mut self) -> PResult<'a, Extern> {
|
2019-10-27 22:14:35 +00:00
|
|
|
Ok(if self.eat_keyword(kw::Extern) {
|
2019-11-10 14:04:12 +00:00
|
|
|
Extern::from_abi(self.parse_abi())
|
2019-09-29 23:22:18 +00:00
|
|
|
} else {
|
2019-11-09 19:05:20 +00:00
|
|
|
Extern::None
|
2019-10-27 22:14:35 +00:00
|
|
|
})
|
2019-09-29 23:22:18 +00:00
|
|
|
}
|
|
|
|
|
2019-10-27 23:29:23 +00:00
|
|
|
/// Parses a string literal as an ABI spec.
|
2019-11-10 14:04:12 +00:00
|
|
|
fn parse_abi(&mut self) -> Option<StrLit> {
|
|
|
|
match self.parse_str_lit() {
|
|
|
|
Ok(str_lit) => Some(str_lit),
|
|
|
|
Err(Some(lit)) => match lit.kind {
|
|
|
|
ast::LitKind::Err(_) => None,
|
2019-10-27 23:29:23 +00:00
|
|
|
_ => {
|
2019-11-10 14:04:12 +00:00
|
|
|
self.struct_span_err(lit.span, "non-string ABI literal")
|
2019-10-27 23:29:23 +00:00
|
|
|
.span_suggestion(
|
2019-11-10 14:04:12 +00:00
|
|
|
lit.span,
|
2019-10-27 23:29:23 +00:00
|
|
|
"specify the ABI with a string literal",
|
|
|
|
"\"C\"".to_string(),
|
|
|
|
Applicability::MaybeIncorrect,
|
|
|
|
)
|
|
|
|
.emit();
|
2019-11-10 14:04:12 +00:00
|
|
|
None
|
2019-10-27 23:29:23 +00:00
|
|
|
}
|
2019-12-22 22:42:04 +00:00
|
|
|
},
|
2019-11-10 14:04:12 +00:00
|
|
|
Err(None) => None,
|
2019-11-09 19:05:20 +00:00
|
|
|
}
|
2013-03-14 02:25:28 +00:00
|
|
|
}
|
|
|
|
|
2021-01-14 15:42:01 +00:00
|
|
|
pub fn collect_tokens<R: HasTokens>(
|
|
|
|
&mut self,
|
|
|
|
f: impl FnOnce(&mut Self) -> PResult<'a, R>,
|
|
|
|
) -> PResult<'a, R> {
|
|
|
|
self.collect_tokens_trailing_token(|this| Ok((f(this)?, TrailingToken::None)))
|
|
|
|
}
|
|
|
|
|
2020-05-19 20:54:20 +00:00
|
|
|
/// Records all tokens consumed by the provided callback,
|
|
|
|
/// including the current token. These tokens are collected
|
2020-10-22 14:09:08 +00:00
|
|
|
/// into a `LazyTokenStream`, and returned along with the result
|
2020-11-04 13:27:11 +00:00
|
|
|
/// of the callback.
|
2020-05-19 20:54:20 +00:00
|
|
|
///
|
|
|
|
/// Note: If your callback consumes an opening delimiter
|
|
|
|
/// (including the case where you call `collect_tokens`
|
|
|
|
/// when the current token is an opening delimeter),
|
|
|
|
/// you must also consume the corresponding closing delimiter.
|
|
|
|
///
|
|
|
|
/// That is, you can consume
|
|
|
|
/// `something ([{ }])` or `([{}])`, but not `([{}]`
|
|
|
|
///
|
|
|
|
/// This restriction shouldn't be an issue in practice,
|
|
|
|
/// since this function is used to record the tokens for
|
|
|
|
/// a parsed AST item, which always has matching delimiters.
|
2021-01-14 15:42:01 +00:00
|
|
|
pub fn collect_tokens_trailing_token<R: HasTokens>(
|
2019-09-30 04:21:30 +00:00
|
|
|
&mut self,
|
2021-01-14 15:42:01 +00:00
|
|
|
f: impl FnOnce(&mut Self) -> PResult<'a, (R, TrailingToken)>,
|
2021-01-13 21:28:57 +00:00
|
|
|
) -> PResult<'a, R> {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
let start_token = (self.token.clone(), self.token_spacing);
|
2021-01-28 14:47:59 +00:00
|
|
|
let cursor_snapshot = self.token_cursor.clone();
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
|
2021-01-14 15:42:01 +00:00
|
|
|
let (mut ret, trailing_token) = f(self)?;
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
|
|
|
|
// Produces a `TokenStream` on-demand. Using `cursor_snapshot`
|
|
|
|
// and `num_calls`, we can reconstruct the `TokenStream` seen
|
|
|
|
// by the callback. This allows us to avoid producing a `TokenStream`
|
|
|
|
// if it is never needed - for example, a captured `macro_rules!`
|
|
|
|
// argument that is never passed to a proc macro.
|
2020-11-04 13:27:11 +00:00
|
|
|
// In practice token stream creation happens rarely compared to
|
|
|
|
// calls to `collect_tokens` (see some statistics in #78736),
|
|
|
|
// so we are doing as little up-front work as possible.
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
//
|
|
|
|
// This also makes `Parser` very cheap to clone, since
|
|
|
|
// there is no intermediate collection buffer to clone.
|
2020-11-17 19:27:44 +00:00
|
|
|
#[derive(Clone)]
|
2020-10-30 21:40:41 +00:00
|
|
|
struct LazyTokenStreamImpl {
|
|
|
|
start_token: (Token, Spacing),
|
|
|
|
cursor_snapshot: TokenCursor,
|
|
|
|
num_calls: usize,
|
|
|
|
desugar_doc_comments: bool,
|
2020-12-12 20:20:22 +00:00
|
|
|
append_unglued_token: Option<TreeAndSpacing>,
|
2020-10-30 21:40:41 +00:00
|
|
|
}
|
|
|
|
impl CreateTokenStream for LazyTokenStreamImpl {
|
|
|
|
fn create_token_stream(&self) -> TokenStream {
|
|
|
|
// The token produced by the final call to `next` or `next_desugared`
|
|
|
|
// was not actually consumed by the callback. The combination
|
|
|
|
// of chaining the initial token and using `take` produces the desired
|
|
|
|
// result - we produce an empty `TokenStream` if no calls were made,
|
|
|
|
// and omit the final token otherwise.
|
|
|
|
let mut cursor_snapshot = self.cursor_snapshot.clone();
|
|
|
|
let tokens = std::iter::once(self.start_token.clone())
|
2021-01-14 15:42:01 +00:00
|
|
|
.chain((0..self.num_calls).map(|_| {
|
2020-10-30 21:40:41 +00:00
|
|
|
if self.desugar_doc_comments {
|
|
|
|
cursor_snapshot.next_desugared()
|
|
|
|
} else {
|
|
|
|
cursor_snapshot.next()
|
|
|
|
}
|
|
|
|
}))
|
2021-01-14 15:42:01 +00:00
|
|
|
.take(self.num_calls);
|
2020-05-19 20:54:20 +00:00
|
|
|
|
2020-12-12 20:20:22 +00:00
|
|
|
make_token_stream(tokens, self.append_unglued_token.clone())
|
2020-10-30 21:40:41 +00:00
|
|
|
}
|
2021-01-14 15:42:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut num_calls = self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls;
|
|
|
|
match trailing_token {
|
|
|
|
TrailingToken::None => {}
|
|
|
|
TrailingToken::Semi => {
|
|
|
|
assert_eq!(self.token.kind, token::Semi);
|
|
|
|
num_calls += 1;
|
2020-11-17 19:27:44 +00:00
|
|
|
}
|
2020-10-30 21:40:41 +00:00
|
|
|
}
|
2018-07-22 15:48:29 +00:00
|
|
|
|
2020-10-30 21:40:41 +00:00
|
|
|
let lazy_impl = LazyTokenStreamImpl {
|
|
|
|
start_token,
|
2021-01-14 15:42:01 +00:00
|
|
|
num_calls,
|
2020-10-30 21:40:41 +00:00
|
|
|
cursor_snapshot,
|
|
|
|
desugar_doc_comments: self.desugar_doc_comments,
|
2020-12-12 20:20:22 +00:00
|
|
|
append_unglued_token: self.token_cursor.append_unglued_token.clone(),
|
2020-10-30 21:40:41 +00:00
|
|
|
};
|
2021-01-13 21:28:57 +00:00
|
|
|
ret.finalize_tokens(LazyTokenStream::new(lazy_impl));
|
|
|
|
Ok(ret)
|
2017-07-12 16:50:05 +00:00
|
|
|
}
|
|
|
|
|
2018-03-10 15:44:44 +00:00
|
|
|
/// `::{` or `::*`
|
|
|
|
fn is_import_coupler(&mut self) -> bool {
|
2019-12-22 22:42:04 +00:00
|
|
|
self.check(&token::ModSep)
|
|
|
|
&& self.look_ahead(1, |t| {
|
|
|
|
*t == token::OpenDelim(token::Brace) || *t == token::BinOp(token::Star)
|
|
|
|
})
|
2016-04-17 00:48:40 +00:00
|
|
|
}
|
2020-08-31 09:45:50 +00:00
|
|
|
|
|
|
|
pub fn clear_expected_tokens(&mut self) {
|
|
|
|
self.expected_tokens.clear();
|
|
|
|
}
|
2011-01-11 02:18:16 +00:00
|
|
|
}
|
2019-02-05 09:35:25 +00:00
|
|
|
|
2019-10-26 01:30:02 +00:00
|
|
|
crate fn make_unclosed_delims_error(
|
|
|
|
unmatched: UnmatchedBrace,
|
2019-10-29 00:44:20 +00:00
|
|
|
sess: &ParseSess,
|
2019-10-26 01:30:02 +00:00
|
|
|
) -> Option<DiagnosticBuilder<'_>> {
|
|
|
|
// `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to
|
|
|
|
// `unmatched_braces` only for error recovery in the `Parser`.
|
|
|
|
let found_delim = unmatched.found_delim?;
|
2019-12-22 22:42:04 +00:00
|
|
|
let mut err = sess.span_diagnostic.struct_span_err(
|
|
|
|
unmatched.found_span,
|
|
|
|
&format!(
|
2020-01-03 13:40:15 +00:00
|
|
|
"mismatched closing delimiter: `{}`",
|
2019-12-22 22:42:04 +00:00
|
|
|
pprust::token_kind_to_string(&token::CloseDelim(found_delim)),
|
|
|
|
),
|
|
|
|
);
|
2020-01-03 13:40:15 +00:00
|
|
|
err.span_label(unmatched.found_span, "mismatched closing delimiter");
|
2019-10-26 01:30:02 +00:00
|
|
|
if let Some(sp) = unmatched.candidate_span {
|
2020-01-03 13:40:15 +00:00
|
|
|
err.span_label(sp, "closing delimiter possibly meant for this");
|
2019-10-26 01:30:02 +00:00
|
|
|
}
|
|
|
|
if let Some(sp) = unmatched.unclosed_span {
|
2020-01-03 13:40:15 +00:00
|
|
|
err.span_label(sp, "unclosed delimiter");
|
2019-10-26 01:30:02 +00:00
|
|
|
}
|
|
|
|
Some(err)
|
|
|
|
}
|
|
|
|
|
2019-10-29 00:44:20 +00:00
|
|
|
pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &ParseSess) {
|
2019-12-22 22:42:04 +00:00
|
|
|
*sess.reached_eof.borrow_mut() |=
|
|
|
|
unclosed_delims.iter().any(|unmatched_delim| unmatched_delim.found_delim.is_none());
|
2019-10-26 01:30:02 +00:00
|
|
|
for unmatched in unclosed_delims.drain(..) {
|
2020-04-24 20:58:41 +00:00
|
|
|
if let Some(mut e) = make_unclosed_delims_error(unmatched, sess) {
|
2020-02-01 23:47:58 +00:00
|
|
|
e.emit();
|
2020-04-24 20:58:41 +00:00
|
|
|
}
|
2019-02-05 09:35:25 +00:00
|
|
|
}
|
2019-02-05 10:26:26 +00:00
|
|
|
}
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
|
|
|
|
/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
|
|
|
|
/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
|
|
|
|
/// of open and close delims.
|
2020-12-12 20:20:22 +00:00
|
|
|
fn make_token_stream(
|
|
|
|
tokens: impl Iterator<Item = (Token, Spacing)>,
|
|
|
|
append_unglued_token: Option<TreeAndSpacing>,
|
|
|
|
) -> TokenStream {
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
#[derive(Debug)]
|
|
|
|
struct FrameData {
|
|
|
|
open: Span,
|
|
|
|
inner: Vec<(TokenTree, Spacing)>,
|
|
|
|
}
|
|
|
|
let mut stack = vec![FrameData { open: DUMMY_SP, inner: vec![] }];
|
|
|
|
for (token, spacing) in tokens {
|
|
|
|
match token {
|
|
|
|
Token { kind: TokenKind::OpenDelim(_), span } => {
|
|
|
|
stack.push(FrameData { open: span, inner: vec![] });
|
|
|
|
}
|
|
|
|
Token { kind: TokenKind::CloseDelim(delim), span } => {
|
|
|
|
let frame_data = stack.pop().expect("Token stack was empty!");
|
|
|
|
let dspan = DelimSpan::from_pair(frame_data.open, span);
|
|
|
|
let stream = TokenStream::new(frame_data.inner);
|
|
|
|
let delimited = TokenTree::Delimited(dspan, delim, stream);
|
|
|
|
stack
|
|
|
|
.last_mut()
|
|
|
|
.unwrap_or_else(|| panic!("Bottom token frame is missing for tokens!"))
|
|
|
|
.inner
|
|
|
|
.push((delimited, Spacing::Alone));
|
|
|
|
}
|
2020-12-12 20:20:22 +00:00
|
|
|
token => {
|
|
|
|
stack
|
|
|
|
.last_mut()
|
|
|
|
.expect("Bottom token frame is missing!")
|
|
|
|
.inner
|
|
|
|
.push((TokenTree::Token(token), spacing));
|
|
|
|
}
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
}
|
|
|
|
}
|
2020-12-12 20:20:22 +00:00
|
|
|
let mut final_buf = stack.pop().expect("Missing final buf!");
|
|
|
|
final_buf.inner.extend(append_unglued_token);
|
Rewrite `collect_tokens` implementations to use a flattened buffer
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.
The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.
This implementation has a number of advantages over the previous one:
* It is significantly simpler, with no edge cases around capturing the
start/end of a delimited group.
* It can be easily extended to allow replacing tokens an an arbitrary
'depth' by just using `Vec::splice` at the proper position. This is
important for PR #76130, which requires us to track information about
attributes along with tokens.
* The lazy approach to `TokenStream` construction allows us to easily
parse an AST struct, and then decide after the fact whether we need a
`TokenStream`. This will be useful when we start collecting tokens for
`Attribute` - we can discard the `LazyTokenStream` if the parsed
attribute doesn't need tokens (e.g. is a builtin attribute).
The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
2020-09-27 01:56:29 +00:00
|
|
|
assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack);
|
|
|
|
TokenStream::new(final_buf.inner)
|
|
|
|
}
|
2021-01-18 21:47:37 +00:00
|
|
|
|
|
|
|
#[macro_export]
|
|
|
|
macro_rules! maybe_collect_tokens {
|
|
|
|
($self:ident, $force_collect:expr, $attrs:expr, $f:expr) => {
|
|
|
|
if matches!($force_collect, ForceCollect::Yes)
|
|
|
|
|| $crate::parser::attr::maybe_needs_tokens($attrs)
|
|
|
|
{
|
2021-01-14 15:42:01 +00:00
|
|
|
$self.collect_tokens_trailing_token($f)
|
2021-01-18 21:47:37 +00:00
|
|
|
} else {
|
2021-01-14 15:42:01 +00:00
|
|
|
Ok($f($self)?.0)
|
2021-01-18 21:47:37 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|