mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-25 16:24:46 +00:00
Fix break_last_token
.
It currently doesn't handle the three-char tokens `>>=` and `<<=` correctly. These can be broken twice, resulting in three individual tokens. This is a latent bug that currently doesn't cause any problems, but does cause problems for #124141, because that PR increases the usage of lazy token streams.
This commit is contained in:
parent
f79a912d9e
commit
73cc575177
@ -385,35 +385,41 @@ impl TokenKind {
|
|||||||
Literal(Lit::new(kind, symbol, suffix))
|
Literal(Lit::new(kind, symbol, suffix))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An approximation to proc-macro-style single-character operators used by rustc parser.
|
/// An approximation to proc-macro-style single-character operators used by
|
||||||
/// If the operator token can be broken into two tokens, the first of which is single-character,
|
/// rustc parser. If the operator token can be broken into two tokens, the
|
||||||
/// then this function performs that operation, otherwise it returns `None`.
|
/// first of which has `n` (1 or 2) chars, then this function performs that
|
||||||
pub fn break_two_token_op(&self) -> Option<(TokenKind, TokenKind)> {
|
/// operation, otherwise it returns `None`.
|
||||||
Some(match *self {
|
pub fn break_two_token_op(&self, n: u32) -> Option<(TokenKind, TokenKind)> {
|
||||||
Le => (Lt, Eq),
|
assert!(n == 1 || n == 2);
|
||||||
EqEq => (Eq, Eq),
|
Some(match (self, n) {
|
||||||
Ne => (Not, Eq),
|
(Le, 1) => (Lt, Eq),
|
||||||
Ge => (Gt, Eq),
|
(EqEq, 1) => (Eq, Eq),
|
||||||
AndAnd => (BinOp(And), BinOp(And)),
|
(Ne, 1) => (Not, Eq),
|
||||||
OrOr => (BinOp(Or), BinOp(Or)),
|
(Ge, 1) => (Gt, Eq),
|
||||||
BinOp(Shl) => (Lt, Lt),
|
(AndAnd, 1) => (BinOp(And), BinOp(And)),
|
||||||
BinOp(Shr) => (Gt, Gt),
|
(OrOr, 1) => (BinOp(Or), BinOp(Or)),
|
||||||
BinOpEq(Plus) => (BinOp(Plus), Eq),
|
(BinOp(Shl), 1) => (Lt, Lt),
|
||||||
BinOpEq(Minus) => (BinOp(Minus), Eq),
|
(BinOp(Shr), 1) => (Gt, Gt),
|
||||||
BinOpEq(Star) => (BinOp(Star), Eq),
|
(BinOpEq(Plus), 1) => (BinOp(Plus), Eq),
|
||||||
BinOpEq(Slash) => (BinOp(Slash), Eq),
|
(BinOpEq(Minus), 1) => (BinOp(Minus), Eq),
|
||||||
BinOpEq(Percent) => (BinOp(Percent), Eq),
|
(BinOpEq(Star), 1) => (BinOp(Star), Eq),
|
||||||
BinOpEq(Caret) => (BinOp(Caret), Eq),
|
(BinOpEq(Slash), 1) => (BinOp(Slash), Eq),
|
||||||
BinOpEq(And) => (BinOp(And), Eq),
|
(BinOpEq(Percent), 1) => (BinOp(Percent), Eq),
|
||||||
BinOpEq(Or) => (BinOp(Or), Eq),
|
(BinOpEq(Caret), 1) => (BinOp(Caret), Eq),
|
||||||
BinOpEq(Shl) => (Lt, Le),
|
(BinOpEq(And), 1) => (BinOp(And), Eq),
|
||||||
BinOpEq(Shr) => (Gt, Ge),
|
(BinOpEq(Or), 1) => (BinOp(Or), Eq),
|
||||||
DotDot => (Dot, Dot),
|
(BinOpEq(Shl), 1) => (Lt, Le), // `<` + `<=`
|
||||||
DotDotDot => (Dot, DotDot),
|
(BinOpEq(Shl), 2) => (BinOp(Shl), Eq), // `<<` + `=`
|
||||||
PathSep => (Colon, Colon),
|
(BinOpEq(Shr), 1) => (Gt, Ge), // `>` + `>=`
|
||||||
RArrow => (BinOp(Minus), Gt),
|
(BinOpEq(Shr), 2) => (BinOp(Shr), Eq), // `>>` + `=`
|
||||||
LArrow => (Lt, BinOp(Minus)),
|
(DotDot, 1) => (Dot, Dot),
|
||||||
FatArrow => (Eq, Gt),
|
(DotDotDot, 1) => (Dot, DotDot), // `.` + `..`
|
||||||
|
(DotDotDot, 2) => (DotDot, Dot), // `..` + `.`
|
||||||
|
(DotDotEq, 2) => (DotDot, Eq),
|
||||||
|
(PathSep, 1) => (Colon, Colon),
|
||||||
|
(RArrow, 1) => (BinOp(Minus), Gt),
|
||||||
|
(LArrow, 1) => (Lt, BinOp(Minus)),
|
||||||
|
(FatArrow, 1) => (Eq, Gt),
|
||||||
_ => return None,
|
_ => return None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -108,7 +108,7 @@ struct LazyAttrTokenStreamImpl {
|
|||||||
start_token: (Token, Spacing),
|
start_token: (Token, Spacing),
|
||||||
cursor_snapshot: TokenCursor,
|
cursor_snapshot: TokenCursor,
|
||||||
num_calls: u32,
|
num_calls: u32,
|
||||||
break_last_token: bool,
|
break_last_token: u32,
|
||||||
node_replacements: Box<[NodeReplacement]>,
|
node_replacements: Box<[NodeReplacement]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -339,17 +339,20 @@ impl<'a> Parser<'a> {
|
|||||||
let parser_replacements_end = self.capture_state.parser_replacements.len();
|
let parser_replacements_end = self.capture_state.parser_replacements.len();
|
||||||
|
|
||||||
assert!(
|
assert!(
|
||||||
!(self.break_last_token && matches!(capture_trailing, Trailing::Yes)),
|
!(self.break_last_token > 0 && matches!(capture_trailing, Trailing::Yes)),
|
||||||
"Cannot set break_last_token and have trailing token"
|
"Cannot have break_last_token > 0 and have trailing token"
|
||||||
);
|
);
|
||||||
|
assert!(self.break_last_token <= 2, "cannot break token more than twice");
|
||||||
|
|
||||||
let end_pos = self.num_bump_calls
|
let end_pos = self.num_bump_calls
|
||||||
+ capture_trailing as u32
|
+ capture_trailing as u32
|
||||||
// If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens), then
|
// If we "broke" the last token (e.g. breaking a `>>` token once into `>` + `>`, or
|
||||||
// extend the range of captured tokens to include it, since the parser was not actually
|
// breaking a `>>=` token twice into `>` + `>` + `=`), then extend the range of
|
||||||
// bumped past it. When the `LazyAttrTokenStream` gets converted into an
|
// captured tokens to include it, because the parser was not actually bumped past it.
|
||||||
// `AttrTokenStream`, we will create the proper token.
|
// (Even if we broke twice, it was still just one token originally, hence the `1`.)
|
||||||
+ self.break_last_token as u32;
|
// When the `LazyAttrTokenStream` gets converted into an `AttrTokenStream`, we will
|
||||||
|
// rebreak that final token once or twice.
|
||||||
|
+ if self.break_last_token == 0 { 0 } else { 1 };
|
||||||
|
|
||||||
let num_calls = end_pos - collect_pos.start_pos;
|
let num_calls = end_pos - collect_pos.start_pos;
|
||||||
|
|
||||||
@ -425,7 +428,7 @@ impl<'a> Parser<'a> {
|
|||||||
// for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
|
// for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
|
||||||
// eager cfg-expansion on the captured token stream.
|
// eager cfg-expansion on the captured token stream.
|
||||||
if definite_capture_mode {
|
if definite_capture_mode {
|
||||||
assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
|
assert!(self.break_last_token == 0, "Should not have unglued last token with cfg attr");
|
||||||
|
|
||||||
// What is the status here when parsing the example code at the top of this method?
|
// What is the status here when parsing the example code at the top of this method?
|
||||||
//
|
//
|
||||||
@ -471,7 +474,7 @@ impl<'a> Parser<'a> {
|
|||||||
/// close delims.
|
/// close delims.
|
||||||
fn make_attr_token_stream(
|
fn make_attr_token_stream(
|
||||||
iter: impl Iterator<Item = FlatToken>,
|
iter: impl Iterator<Item = FlatToken>,
|
||||||
break_last_token: bool,
|
break_last_token: u32,
|
||||||
) -> AttrTokenStream {
|
) -> AttrTokenStream {
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct FrameData {
|
struct FrameData {
|
||||||
@ -513,18 +516,17 @@ fn make_attr_token_stream(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if break_last_token {
|
if break_last_token > 0 {
|
||||||
let last_token = stack_top.inner.pop().unwrap();
|
let last_token = stack_top.inner.pop().unwrap();
|
||||||
if let AttrTokenTree::Token(last_token, spacing) = last_token {
|
if let AttrTokenTree::Token(last_token, spacing) = last_token {
|
||||||
let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
|
let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap();
|
||||||
|
|
||||||
// An 'unglued' token is always two ASCII characters
|
// Tokens are always ASCII chars, so we can use byte arithmetic here.
|
||||||
let mut first_span = last_token.span.shrink_to_lo();
|
let mut first_span = last_token.span.shrink_to_lo();
|
||||||
first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
|
first_span =
|
||||||
|
first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token));
|
||||||
|
|
||||||
stack_top
|
stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing));
|
||||||
.inner
|
|
||||||
.push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing));
|
|
||||||
} else {
|
} else {
|
||||||
panic!("Unexpected last token {last_token:?}")
|
panic!("Unexpected last token {last_token:?}")
|
||||||
}
|
}
|
||||||
|
@ -146,21 +146,25 @@ pub struct Parser<'a> {
|
|||||||
token_cursor: TokenCursor,
|
token_cursor: TokenCursor,
|
||||||
// The number of calls to `bump`, i.e. the position in the token stream.
|
// The number of calls to `bump`, i.e. the position in the token stream.
|
||||||
num_bump_calls: u32,
|
num_bump_calls: u32,
|
||||||
// During parsing we may sometimes need to 'unglue' a glued token into two
|
// During parsing we may sometimes need to "unglue" a glued token into two
|
||||||
// component tokens (e.g. '>>' into '>' and '>), so the parser can consume
|
// or three component tokens (e.g. `>>` into `>` and `>`, or `>>=` into `>`
|
||||||
// them one at a time. This process bypasses the normal capturing mechanism
|
// and `>` and `=`), so the parser can consume them one at a time. This
|
||||||
// (e.g. `num_bump_calls` will not be incremented), since the 'unglued'
|
// process bypasses the normal capturing mechanism (e.g. `num_bump_calls`
|
||||||
// tokens due not exist in the original `TokenStream`.
|
// will not be incremented), since the "unglued" tokens due not exist in
|
||||||
|
// the original `TokenStream`.
|
||||||
//
|
//
|
||||||
// If we end up consuming both unglued tokens, this is not an issue. We'll
|
// If we end up consuming all the component tokens, this is not an issue,
|
||||||
// end up capturing the single 'glued' token.
|
// because we'll end up capturing the single "glued" token.
|
||||||
//
|
//
|
||||||
// However, sometimes we may want to capture just the first 'unglued'
|
// However, sometimes we may want to capture not all of the original
|
||||||
// token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>`
|
// token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>`
|
||||||
// requires us to unglue the trailing `>>` token. The `break_last_token`
|
// requires us to unglue the trailing `>>` token. The `break_last_token`
|
||||||
// field is used to track this token. It gets appended to the captured
|
// field is used to track these tokens. They get appended to the captured
|
||||||
// stream when we evaluate a `LazyAttrTokenStream`.
|
// stream when we evaluate a `LazyAttrTokenStream`.
|
||||||
break_last_token: bool,
|
//
|
||||||
|
// This value is always 0, 1, or 2. It can only reach 2 when splitting
|
||||||
|
// `>>=` or `<<=`.
|
||||||
|
break_last_token: u32,
|
||||||
/// This field is used to keep track of how many left angle brackets we have seen. This is
|
/// This field is used to keep track of how many left angle brackets we have seen. This is
|
||||||
/// required in order to detect extra leading left angle brackets (`<` characters) and error
|
/// required in order to detect extra leading left angle brackets (`<` characters) and error
|
||||||
/// appropriately.
|
/// appropriately.
|
||||||
@ -453,7 +457,7 @@ impl<'a> Parser<'a> {
|
|||||||
expected_tokens: Vec::new(),
|
expected_tokens: Vec::new(),
|
||||||
token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
|
token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
|
||||||
num_bump_calls: 0,
|
num_bump_calls: 0,
|
||||||
break_last_token: false,
|
break_last_token: 0,
|
||||||
unmatched_angle_bracket_count: 0,
|
unmatched_angle_bracket_count: 0,
|
||||||
angle_bracket_nesting: 0,
|
angle_bracket_nesting: 0,
|
||||||
last_unexpected_token_span: None,
|
last_unexpected_token_span: None,
|
||||||
@ -773,7 +777,7 @@ impl<'a> Parser<'a> {
|
|||||||
self.bump();
|
self.bump();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
match self.token.kind.break_two_token_op() {
|
match self.token.kind.break_two_token_op(1) {
|
||||||
Some((first, second)) if first == expected => {
|
Some((first, second)) if first == expected => {
|
||||||
let first_span = self.psess.source_map().start_point(self.token.span);
|
let first_span = self.psess.source_map().start_point(self.token.span);
|
||||||
let second_span = self.token.span.with_lo(first_span.hi());
|
let second_span = self.token.span.with_lo(first_span.hi());
|
||||||
@ -783,8 +787,8 @@ impl<'a> Parser<'a> {
|
|||||||
//
|
//
|
||||||
// If we consume any additional tokens, then this token
|
// If we consume any additional tokens, then this token
|
||||||
// is not needed (we'll capture the entire 'glued' token),
|
// is not needed (we'll capture the entire 'glued' token),
|
||||||
// and `bump` will set this field to `None`
|
// and `bump` will set this field to 0.
|
||||||
self.break_last_token = true;
|
self.break_last_token += 1;
|
||||||
// Use the spacing of the glued token as the spacing of the
|
// Use the spacing of the glued token as the spacing of the
|
||||||
// unglued second token.
|
// unglued second token.
|
||||||
self.bump_with((Token::new(second, second_span), self.token_spacing));
|
self.bump_with((Token::new(second, second_span), self.token_spacing));
|
||||||
@ -1148,10 +1152,9 @@ impl<'a> Parser<'a> {
|
|||||||
// than `.0`/`.1` access.
|
// than `.0`/`.1` access.
|
||||||
let mut next = self.token_cursor.inlined_next();
|
let mut next = self.token_cursor.inlined_next();
|
||||||
self.num_bump_calls += 1;
|
self.num_bump_calls += 1;
|
||||||
// We've retrieved an token from the underlying
|
// We got a token from the underlying cursor and no longer need to
|
||||||
// cursor, so we no longer need to worry about
|
// worry about an unglued token. See `break_and_eat` for more details.
|
||||||
// an unglued token. See `break_and_eat` for more details
|
self.break_last_token = 0;
|
||||||
self.break_last_token = false;
|
|
||||||
if next.0.span.is_dummy() {
|
if next.0.span.is_dummy() {
|
||||||
// Tweak the location for better diagnostics, but keep syntactic context intact.
|
// Tweak the location for better diagnostics, but keep syntactic context intact.
|
||||||
let fallback_span = self.token.span;
|
let fallback_span = self.token.span;
|
||||||
|
16
tests/ui/macros/break-last-token-twice.rs
Normal file
16
tests/ui/macros/break-last-token-twice.rs
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
//@ check-pass
|
||||||
|
|
||||||
|
macro_rules! m {
|
||||||
|
(static $name:ident: $t:ty = $e:expr) => {
|
||||||
|
let $name: $t = $e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
m! {
|
||||||
|
// Tricky: the trailing `>>=` token here is broken twice:
|
||||||
|
// - into `>` and `>=`
|
||||||
|
// - then the `>=` is broken into `>` and `=`
|
||||||
|
static _x: Vec<Vec<u32>>= vec![]
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user