Auto merge of #63709 - matklad:decomposed-tokens, r=petrochenkov

Move token gluing to token stream parsing

work towards #63689, this moves token gluing from the lexer to the token tree layer. This is only a minimal step, but I like the negative diff here.

r? @petrochenkov
This commit is contained in:
bors 2019-08-20 07:44:44 +00:00
commit 14890954ce
6 changed files with 68 additions and 209 deletions

View File

@ -23,9 +23,6 @@ pub enum TokenKind {
Lifetime { starts_with_number: bool },
Semi,
Comma,
DotDotDot,
DotDotEq,
DotDot,
Dot,
OpenParen,
CloseParen,
@ -37,41 +34,19 @@ pub enum TokenKind {
Pound,
Tilde,
Question,
ColonColon,
Colon,
Dollar,
EqEq,
Eq,
FatArrow,
Ne,
Not,
Le,
LArrow,
Lt,
ShlEq,
Shl,
Ge,
Gt,
ShrEq,
Shr,
RArrow,
Minus,
MinusEq,
And,
AndAnd,
AndEq,
Or,
OrOr,
OrEq,
PlusEq,
Plus,
StarEq,
Star,
SlashEq,
Slash,
CaretEq,
Caret,
PercentEq,
Percent,
Unknown,
}
@ -135,13 +110,7 @@ impl Cursor<'_> {
'/' => match self.nth_char(0) {
'/' => self.line_comment(),
'*' => self.block_comment(),
_ => {
if self.eat_assign() {
SlashEq
} else {
Slash
}
}
_ => Slash,
},
c if character_properties::is_whitespace(c) => self.whitespace(),
'r' => match (self.nth_char(0), self.nth_char(1)) {
@ -199,22 +168,7 @@ impl Cursor<'_> {
}
';' => Semi,
',' => Comma,
'.' => {
if self.nth_char(0) == '.' {
self.bump();
if self.nth_char(0) == '.' {
self.bump();
DotDotDot
} else if self.nth_char(0) == '=' {
self.bump();
DotDotEq
} else {
DotDot
}
} else {
Dot
}
}
'.' => Dot,
'(' => OpenParen,
')' => CloseParen,
'{' => OpenBrace,
@ -225,112 +179,19 @@ impl Cursor<'_> {
'#' => Pound,
'~' => Tilde,
'?' => Question,
':' => {
if self.nth_char(0) == ':' {
self.bump();
ColonColon
} else {
Colon
}
}
':' => Colon,
'$' => Dollar,
'=' => {
if self.nth_char(0) == '=' {
self.bump();
EqEq
} else if self.nth_char(0) == '>' {
self.bump();
FatArrow
} else {
Eq
}
}
'!' => {
if self.nth_char(0) == '=' {
self.bump();
Ne
} else {
Not
}
}
'<' => match self.nth_char(0) {
'=' => {
self.bump();
Le
}
'<' => {
self.bump();
if self.eat_assign() { ShlEq } else { Shl }
}
'-' => {
self.bump();
LArrow
}
_ => Lt,
},
'>' => match self.nth_char(0) {
'=' => {
self.bump();
Ge
}
'>' => {
self.bump();
if self.eat_assign() { ShrEq } else { Shr }
}
_ => Gt,
},
'-' => {
if self.nth_char(0) == '>' {
self.bump();
RArrow
} else {
if self.eat_assign() { MinusEq } else { Minus }
}
}
'&' => {
if self.nth_char(0) == '&' {
self.bump();
AndAnd
} else {
if self.eat_assign() { AndEq } else { And }
}
}
'|' => {
if self.nth_char(0) == '|' {
self.bump();
OrOr
} else {
if self.eat_assign() { OrEq } else { Or }
}
}
'+' => {
if self.eat_assign() {
PlusEq
} else {
Plus
}
}
'*' => {
if self.eat_assign() {
StarEq
} else {
Star
}
}
'^' => {
if self.eat_assign() {
CaretEq
} else {
Caret
}
}
'%' => {
if self.eat_assign() {
PercentEq
} else {
Percent
}
}
'=' => Eq,
'!' => Not,
'<' => Lt,
'>' => Gt,
'-' => Minus,
'&' => And,
'|' => Or,
'+' => Plus,
'*' => Star,
'^' => Caret,
'%' => Percent,
'\'' => self.lifetime_or_char(),
'"' => {
let terminated = self.double_quoted_string();
@ -643,15 +504,6 @@ impl Cursor<'_> {
self.bump();
}
}
fn eat_assign(&mut self) -> bool {
if self.nth_char(0) == '=' {
self.bump();
true
} else {
false
}
}
}
pub mod character_properties {

View File

@ -273,9 +273,6 @@ impl<'a> StringReader<'a> {
}
rustc_lexer::TokenKind::Semi => token::Semi,
rustc_lexer::TokenKind::Comma => token::Comma,
rustc_lexer::TokenKind::DotDotDot => token::DotDotDot,
rustc_lexer::TokenKind::DotDotEq => token::DotDotEq,
rustc_lexer::TokenKind::DotDot => token::DotDot,
rustc_lexer::TokenKind::Dot => token::Dot,
rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren),
rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren),
@ -287,42 +284,20 @@ impl<'a> StringReader<'a> {
rustc_lexer::TokenKind::Pound => token::Pound,
rustc_lexer::TokenKind::Tilde => token::Tilde,
rustc_lexer::TokenKind::Question => token::Question,
rustc_lexer::TokenKind::ColonColon => token::ModSep,
rustc_lexer::TokenKind::Colon => token::Colon,
rustc_lexer::TokenKind::Dollar => token::Dollar,
rustc_lexer::TokenKind::EqEq => token::EqEq,
rustc_lexer::TokenKind::Eq => token::Eq,
rustc_lexer::TokenKind::FatArrow => token::FatArrow,
rustc_lexer::TokenKind::Ne => token::Ne,
rustc_lexer::TokenKind::Not => token::Not,
rustc_lexer::TokenKind::Le => token::Le,
rustc_lexer::TokenKind::LArrow => token::LArrow,
rustc_lexer::TokenKind::Lt => token::Lt,
rustc_lexer::TokenKind::ShlEq => token::BinOpEq(token::Shl),
rustc_lexer::TokenKind::Shl => token::BinOp(token::Shl),
rustc_lexer::TokenKind::Ge => token::Ge,
rustc_lexer::TokenKind::Gt => token::Gt,
rustc_lexer::TokenKind::ShrEq => token::BinOpEq(token::Shr),
rustc_lexer::TokenKind::Shr => token::BinOp(token::Shr),
rustc_lexer::TokenKind::RArrow => token::RArrow,
rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
rustc_lexer::TokenKind::MinusEq => token::BinOpEq(token::Minus),
rustc_lexer::TokenKind::And => token::BinOp(token::And),
rustc_lexer::TokenKind::AndEq => token::BinOpEq(token::And),
rustc_lexer::TokenKind::AndAnd => token::AndAnd,
rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
rustc_lexer::TokenKind::OrEq => token::BinOpEq(token::Or),
rustc_lexer::TokenKind::OrOr => token::OrOr,
rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
rustc_lexer::TokenKind::PlusEq => token::BinOpEq(token::Plus),
rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
rustc_lexer::TokenKind::StarEq => token::BinOpEq(token::Star),
rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
rustc_lexer::TokenKind::SlashEq => token::BinOpEq(token::Slash),
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
rustc_lexer::TokenKind::CaretEq => token::BinOpEq(token::Caret),
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
rustc_lexer::TokenKind::PercentEq => token::BinOpEq(token::Percent),
rustc_lexer::TokenKind::Unknown => {
let c = self.str_from(start).chars().next().unwrap();

View File

@ -75,42 +75,50 @@ fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind
}
#[test]
fn doublecolonparsing() {
fn doublecolon_parsing() {
with_default_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(setup(&sm, &sh, "a b".to_string()),
vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
check_tokenization(
setup(&sm, &sh, "a b".to_string()),
vec![mk_ident("a"), token::Whitespace, mk_ident("b")],
);
})
}
#[test]
fn dcparsing_2() {
fn doublecolon_parsing_2() {
with_default_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(setup(&sm, &sh, "a::b".to_string()),
vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
check_tokenization(
setup(&sm, &sh, "a::b".to_string()),
vec![mk_ident("a"), token::Colon, token::Colon, mk_ident("b")],
);
})
}
#[test]
fn dcparsing_3() {
fn doublecolon_parsing_3() {
with_default_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(setup(&sm, &sh, "a ::b".to_string()),
vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
check_tokenization(
setup(&sm, &sh, "a ::b".to_string()),
vec![mk_ident("a"), token::Whitespace, token::Colon, token::Colon, mk_ident("b")],
);
})
}
#[test]
fn dcparsing_4() {
fn doublecolon_parsing_4() {
with_default_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(setup(&sm, &sh, "a:: b".to_string()),
vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
check_tokenization(
setup(&sm, &sh, "a:: b".to_string()),
vec![mk_ident("a"), token::Colon, token::Colon, token::Whitespace, mk_ident("b")],
);
})
}

View File

@ -39,29 +39,29 @@ struct TokenTreesReader<'a> {
impl<'a> TokenTreesReader<'a> {
// Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`.
fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> {
let mut tts = Vec::new();
let mut buf = TokenStreamBuilder::default();
self.real_token();
while self.token != token::Eof {
tts.push(self.parse_token_tree()?);
buf.push(self.parse_token_tree()?);
}
Ok(TokenStream::new(tts))
Ok(buf.into_token_stream())
}
// Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`.
fn parse_token_trees_until_close_delim(&mut self) -> TokenStream {
let mut tts = vec![];
let mut buf = TokenStreamBuilder::default();
loop {
if let token::CloseDelim(..) = self.token.kind {
return TokenStream::new(tts);
return buf.into_token_stream();
}
match self.parse_token_tree() {
Ok(tree) => tts.push(tree),
Ok(tree) => buf.push(tree),
Err(mut e) => {
e.emit();
return TokenStream::new(tts);
return buf.into_token_stream();
}
}
}
@ -223,8 +223,32 @@ impl<'a> TokenTreesReader<'a> {
_ => {
self.token = token;
return;
},
}
}
}
}
}
#[derive(Default)]
struct TokenStreamBuilder {
buf: Vec<TreeAndJoint>,
}
impl TokenStreamBuilder {
fn push(&mut self, (tree, joint): TreeAndJoint) {
if let Some((TokenTree::Token(prev_token), Joint)) = self.buf.last() {
if let TokenTree::Token(token) = &tree {
if let Some(glued) = prev_token.glue(token) {
self.buf.pop();
self.buf.push((TokenTree::Token(glued), joint));
return;
}
}
}
self.buf.push((tree, joint))
}
fn into_token_stream(self) -> TokenStream {
TokenStream::new(self.buf)
}
}

View File

@ -551,7 +551,7 @@ impl Token {
}
}
crate fn glue(self, joint: Token) -> Option<Token> {
crate fn glue(&self, joint: &Token) -> Option<Token> {
let kind = match self.kind {
Eq => match joint.kind {
Eq => EqEq,

View File

@ -414,7 +414,7 @@ impl TokenStreamBuilder {
let last_tree_if_joint = self.0.last().and_then(TokenStream::last_tree_if_joint);
if let Some(TokenTree::Token(last_token)) = last_tree_if_joint {
if let Some((TokenTree::Token(token), is_joint)) = stream.first_tree_and_joint() {
if let Some(glued_tok) = last_token.glue(token) {
if let Some(glued_tok) = last_token.glue(&token) {
let last_stream = self.0.pop().unwrap();
self.push_all_but_last_tree(&last_stream);
let glued_tt = TokenTree::Token(glued_tok);