mirror of
https://github.com/rust-lang/rust.git
synced 2024-12-29 08:54:50 +00:00
Merge #1224
1224: Remove unused multchar puncts code and add space between puncts r=matklad a=edwin0cheng After #1213 , parser only need single char punct, this PR do the following things: * Remove code which handles multi char puncts * Remove code which handle traversal backward in `SubtreeSource` , because we cached the result in #1195 * Add space between two consecutive puncts while `tt` to `SyntaxNode` conversion . Note that the spaces should only be added if both puncts are not delimiters. Co-authored-by: Edwin Cheng <edwin0cheng@gmail.com>
This commit is contained in:
commit
b0e7022afe
@ -45,20 +45,6 @@ impl<'a> TokenSeq<'a> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
match self {
|
||||
TokenSeq::Subtree(subtree) => subtree.token_trees.len() + 2,
|
||||
TokenSeq::Seq(tokens) => tokens.len(),
|
||||
}
|
||||
}
|
||||
|
||||
fn child_slice(&self, pos: usize) -> &[tt::TokenTree] {
|
||||
match self {
|
||||
TokenSeq::Subtree(subtree) => &subtree.token_trees[pos - 1..],
|
||||
TokenSeq::Seq(tokens) => &tokens[pos..],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
@ -66,7 +52,6 @@ struct TtToken {
|
||||
pub kind: SyntaxKind,
|
||||
pub is_joint_to_next: bool,
|
||||
pub text: SmolStr,
|
||||
pub n_tokens: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
@ -80,19 +65,12 @@ struct SubTreeWalker<'a> {
|
||||
pos: usize,
|
||||
stack: Vec<(TokenSeq<'a>, usize)>,
|
||||
cursor: WalkCursor,
|
||||
last_steps: Vec<usize>,
|
||||
ts: TokenSeq<'a>,
|
||||
}
|
||||
|
||||
impl<'a> SubTreeWalker<'a> {
|
||||
fn new(ts: TokenSeq<'a>) -> SubTreeWalker {
|
||||
let mut res = SubTreeWalker {
|
||||
pos: 0,
|
||||
stack: vec![],
|
||||
cursor: WalkCursor::Eof,
|
||||
last_steps: vec![],
|
||||
ts,
|
||||
};
|
||||
let mut res = SubTreeWalker { pos: 0, stack: vec![], cursor: WalkCursor::Eof, ts };
|
||||
|
||||
res.reset();
|
||||
res
|
||||
@ -105,7 +83,6 @@ impl<'a> SubTreeWalker<'a> {
|
||||
fn reset(&mut self) {
|
||||
self.pos = 0;
|
||||
self.stack = vec![];
|
||||
self.last_steps = vec![];
|
||||
|
||||
self.cursor = match self.ts.get(0) {
|
||||
DelimToken::Token(token) => match token {
|
||||
@ -114,10 +91,7 @@ impl<'a> SubTreeWalker<'a> {
|
||||
self.stack.push((ts, 0));
|
||||
WalkCursor::Token(0, convert_delim(subtree.delimiter, false))
|
||||
}
|
||||
tt::TokenTree::Leaf(leaf) => {
|
||||
let next_tokens = self.ts.child_slice(0);
|
||||
WalkCursor::Token(0, convert_leaf(&next_tokens, leaf))
|
||||
}
|
||||
tt::TokenTree::Leaf(leaf) => WalkCursor::Token(0, convert_leaf(leaf)),
|
||||
},
|
||||
DelimToken::Delim(delim, is_end) => {
|
||||
assert!(!is_end);
|
||||
@ -138,24 +112,6 @@ impl<'a> SubTreeWalker<'a> {
|
||||
self.stack.last().map(|(t, _)| t).unwrap_or(&self.ts)
|
||||
}
|
||||
|
||||
/// Move cursor backward by 1 step
|
||||
fn backward(&mut self) {
|
||||
if self.last_steps.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
self.pos -= 1;
|
||||
let last_step = self.last_steps.pop().unwrap();
|
||||
|
||||
self.cursor = match self.cursor {
|
||||
WalkCursor::Token(idx, _) => self.walk_token(idx, last_step, true),
|
||||
WalkCursor::Eof => {
|
||||
let len = self.top().len();
|
||||
self.walk_token(len, last_step, true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Move cursor forward by 1 step
|
||||
fn forward(&mut self) {
|
||||
if self.is_eof() {
|
||||
@ -163,37 +119,24 @@ impl<'a> SubTreeWalker<'a> {
|
||||
}
|
||||
self.pos += 1;
|
||||
|
||||
let step = self.current().map(|x| x.n_tokens).unwrap_or(1);
|
||||
self.last_steps.push(step);
|
||||
|
||||
if let WalkCursor::Token(u, _) = self.cursor {
|
||||
self.cursor = self.walk_token(u, step, false)
|
||||
self.cursor = self.walk_token(u)
|
||||
}
|
||||
}
|
||||
|
||||
/// Traversal child token
|
||||
fn walk_token(&mut self, pos: usize, offset: usize, backward: bool) -> WalkCursor {
|
||||
fn walk_token(&mut self, pos: usize) -> WalkCursor {
|
||||
let top = self.stack.last().map(|(t, _)| t).unwrap_or(&self.ts);
|
||||
|
||||
if backward && pos < offset {
|
||||
let (_, last_idx) = self.stack.pop().unwrap();
|
||||
return self.walk_token(last_idx, offset, backward);
|
||||
}
|
||||
|
||||
let pos = if backward { pos - offset } else { pos + offset };
|
||||
let pos = pos + 1;
|
||||
|
||||
match top.get(pos) {
|
||||
DelimToken::Token(token) => match token {
|
||||
tt::TokenTree::Subtree(subtree) => {
|
||||
let ts = TokenSeq::from(subtree);
|
||||
let new_idx = if backward { ts.len() - 1 } else { 0 };
|
||||
self.stack.push((ts, pos));
|
||||
WalkCursor::Token(new_idx, convert_delim(subtree.delimiter, backward))
|
||||
}
|
||||
tt::TokenTree::Leaf(leaf) => {
|
||||
let next_tokens = top.child_slice(pos);
|
||||
WalkCursor::Token(pos, convert_leaf(&next_tokens, leaf))
|
||||
WalkCursor::Token(0, convert_delim(subtree.delimiter, false))
|
||||
}
|
||||
tt::TokenTree::Leaf(leaf) => WalkCursor::Token(pos, convert_leaf(leaf)),
|
||||
},
|
||||
DelimToken::Delim(delim, is_end) => {
|
||||
WalkCursor::Token(pos, convert_delim(*delim, is_end))
|
||||
@ -201,8 +144,7 @@ impl<'a> SubTreeWalker<'a> {
|
||||
DelimToken::End => {
|
||||
// it is the top level
|
||||
if let Some((_, last_idx)) = self.stack.pop() {
|
||||
assert!(!backward);
|
||||
self.walk_token(last_idx, offset, backward)
|
||||
self.walk_token(last_idx)
|
||||
} else {
|
||||
WalkCursor::Eof
|
||||
}
|
||||
@ -237,12 +179,9 @@ impl<'a> WalkerOwner<'a> {
|
||||
}
|
||||
|
||||
while pos >= cached.len() {
|
||||
let len = cached.len();
|
||||
cached.push({
|
||||
self.set_pos(len);
|
||||
let walker = self.walker.borrow();
|
||||
walker.current().cloned()
|
||||
});
|
||||
self.set_pos(cached.len());
|
||||
let walker = self.walker.borrow();
|
||||
cached.push(walker.current().cloned());
|
||||
}
|
||||
|
||||
return cached[pos].clone();
|
||||
@ -250,12 +189,11 @@ impl<'a> WalkerOwner<'a> {
|
||||
|
||||
fn set_pos(&self, pos: usize) {
|
||||
let mut walker = self.walker.borrow_mut();
|
||||
assert!(walker.pos <= pos);
|
||||
|
||||
while pos > walker.pos && !walker.is_eof() {
|
||||
walker.forward();
|
||||
}
|
||||
while pos < walker.pos {
|
||||
walker.backward();
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_token_trees(&mut self, n: usize) -> Vec<&tt::TokenTree> {
|
||||
@ -264,15 +202,16 @@ impl<'a> WalkerOwner<'a> {
|
||||
walker.reset();
|
||||
|
||||
while walker.pos < n {
|
||||
if let WalkCursor::Token(u, tt) = &walker.cursor {
|
||||
if let WalkCursor::Token(u, _) = &walker.cursor {
|
||||
// We only collect the topmost child
|
||||
if walker.stack.len() == 0 {
|
||||
for i in 0..tt.n_tokens {
|
||||
if let DelimToken::Token(token) = walker.ts.get(u + i) {
|
||||
res.push(token);
|
||||
}
|
||||
if let DelimToken::Token(token) = walker.ts.get(*u) {
|
||||
res.push(token);
|
||||
}
|
||||
} else if walker.stack.len() == 1 {
|
||||
}
|
||||
// Check whether the second level is a subtree
|
||||
// if so, collect its parent which is topmost child
|
||||
else if walker.stack.len() == 1 {
|
||||
if let DelimToken::Delim(_, is_end) = walker.top().get(*u) {
|
||||
if !is_end {
|
||||
let (_, last_idx) = &walker.stack[0];
|
||||
@ -343,78 +282,6 @@ impl<'a> TokenSource for SubtreeTokenSource<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct TokenPeek<'a, I>
|
||||
where
|
||||
I: Iterator<Item = &'a tt::TokenTree>,
|
||||
{
|
||||
iter: itertools::MultiPeek<I>,
|
||||
}
|
||||
|
||||
// helper function
|
||||
fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> {
|
||||
if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt {
|
||||
return Some(pp);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
impl<'a, I> TokenPeek<'a, I>
|
||||
where
|
||||
I: Iterator<Item = &'a tt::TokenTree>,
|
||||
{
|
||||
pub fn new(iter: I) -> Self {
|
||||
TokenPeek { iter: itertools::multipeek(iter) }
|
||||
}
|
||||
|
||||
pub fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> {
|
||||
if p.spacing != tt::Spacing::Joint {
|
||||
return None;
|
||||
}
|
||||
|
||||
self.iter.reset_peek();
|
||||
let p1 = to_punct(self.iter.peek()?)?;
|
||||
Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint))
|
||||
}
|
||||
|
||||
pub fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> {
|
||||
self.current_punct2(p).and_then(|((p0, p1), last_joint)| {
|
||||
if !last_joint {
|
||||
None
|
||||
} else {
|
||||
let p2 = to_punct(*self.iter.peek()?)?;
|
||||
Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Remove this function
|
||||
fn convert_multi_char_punct<'b, I>(
|
||||
p: &tt::Punct,
|
||||
iter: &mut TokenPeek<'b, I>,
|
||||
) -> Option<(SyntaxKind, bool, &'static str, usize)>
|
||||
where
|
||||
I: Iterator<Item = &'b tt::TokenTree>,
|
||||
{
|
||||
if let Some((m, is_joint_to_next)) = iter.current_punct3(p) {
|
||||
if let Some((kind, text)) = match m {
|
||||
_ => None,
|
||||
} {
|
||||
return Some((kind, is_joint_to_next, text, 3));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((m, is_joint_to_next)) = iter.current_punct2(p) {
|
||||
if let Some((kind, text)) = match m {
|
||||
_ => None,
|
||||
} {
|
||||
return Some((kind, is_joint_to_next, text, 2));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn convert_delim(d: tt::Delimiter, closing: bool) -> TtToken {
|
||||
let (kinds, texts) = match d {
|
||||
tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"),
|
||||
@ -426,7 +293,7 @@ fn convert_delim(d: tt::Delimiter, closing: bool) -> TtToken {
|
||||
let idx = closing as usize;
|
||||
let kind = kinds[idx];
|
||||
let text = if texts.len() > 0 { &texts[idx..texts.len() - (1 - idx)] } else { "" };
|
||||
TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }
|
||||
TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) }
|
||||
}
|
||||
|
||||
fn convert_literal(l: &tt::Literal) -> TtToken {
|
||||
@ -437,7 +304,7 @@ fn convert_literal(l: &tt::Literal) -> TtToken {
|
||||
_ => panic!("Fail to convert given literal {:#?}", &l),
|
||||
});
|
||||
|
||||
TtToken { kind, is_joint_to_next: false, text: l.text.clone(), n_tokens: 1 }
|
||||
TtToken { kind, is_joint_to_next: false, text: l.text.clone() }
|
||||
}
|
||||
|
||||
fn convert_ident(ident: &tt::Ident) -> TtToken {
|
||||
@ -447,39 +314,31 @@ fn convert_ident(ident: &tt::Ident) -> TtToken {
|
||||
SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT)
|
||||
};
|
||||
|
||||
TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 }
|
||||
TtToken { kind, is_joint_to_next: false, text: ident.text.clone() }
|
||||
}
|
||||
|
||||
fn convert_punct(p: &tt::Punct, next_tokens: &[tt::TokenTree]) -> TtToken {
|
||||
let mut iter = next_tokens.iter();
|
||||
iter.next();
|
||||
let mut peek = TokenPeek::new(iter);
|
||||
|
||||
if let Some((kind, is_joint_to_next, text, size)) = convert_multi_char_punct(p, &mut peek) {
|
||||
TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size }
|
||||
} else {
|
||||
let kind = match p.char {
|
||||
// lexer may produce combpund tokens for these ones
|
||||
'.' => DOT,
|
||||
':' => COLON,
|
||||
'=' => EQ,
|
||||
'!' => EXCL,
|
||||
'-' => MINUS,
|
||||
c => SyntaxKind::from_char(c).unwrap(),
|
||||
};
|
||||
let text = {
|
||||
let mut buf = [0u8; 4];
|
||||
let s: &str = p.char.encode_utf8(&mut buf);
|
||||
SmolStr::new(s)
|
||||
};
|
||||
TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text, n_tokens: 1 }
|
||||
}
|
||||
fn convert_punct(p: &tt::Punct) -> TtToken {
|
||||
let kind = match p.char {
|
||||
// lexer may produce combpund tokens for these ones
|
||||
'.' => DOT,
|
||||
':' => COLON,
|
||||
'=' => EQ,
|
||||
'!' => EXCL,
|
||||
'-' => MINUS,
|
||||
c => SyntaxKind::from_char(c).unwrap(),
|
||||
};
|
||||
let text = {
|
||||
let mut buf = [0u8; 4];
|
||||
let s: &str = p.char.encode_utf8(&mut buf);
|
||||
SmolStr::new(s)
|
||||
};
|
||||
TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }
|
||||
}
|
||||
|
||||
fn convert_leaf(tokens: &[tt::TokenTree], leaf: &tt::Leaf) -> TtToken {
|
||||
fn convert_leaf(leaf: &tt::Leaf) -> TtToken {
|
||||
match leaf {
|
||||
tt::Leaf::Literal(l) => convert_literal(l),
|
||||
tt::Leaf::Ident(ident) => convert_ident(ident),
|
||||
tt::Leaf::Punct(punct) => convert_punct(punct, tokens),
|
||||
tt::Leaf::Punct(punct) => convert_punct(punct),
|
||||
}
|
||||
}
|
||||
|
@ -148,30 +148,21 @@ fn convert_tt(
|
||||
match child {
|
||||
SyntaxElement::Token(token) => {
|
||||
if token.kind().is_punct() {
|
||||
let mut prev = None;
|
||||
for char in token.text().chars() {
|
||||
if let Some(char) = prev {
|
||||
token_trees.push(
|
||||
tt::Leaf::from(tt::Punct { char, spacing: tt::Spacing::Joint })
|
||||
.into(),
|
||||
);
|
||||
}
|
||||
prev = Some(char)
|
||||
}
|
||||
if let Some(char) = prev {
|
||||
let spacing = match child_iter.peek() {
|
||||
Some(SyntaxElement::Token(token)) => {
|
||||
if token.kind().is_punct() {
|
||||
tt::Spacing::Joint
|
||||
} else {
|
||||
tt::Spacing::Alone
|
||||
}
|
||||
}
|
||||
_ => tt::Spacing::Alone,
|
||||
};
|
||||
assert!(token.text().len() == 1, "Input ast::token punct must be single char.");
|
||||
let char = token.text().chars().next().unwrap();
|
||||
|
||||
token_trees.push(tt::Leaf::from(tt::Punct { char, spacing }).into());
|
||||
}
|
||||
let spacing = match child_iter.peek() {
|
||||
Some(SyntaxElement::Token(token)) => {
|
||||
if token.kind().is_punct() {
|
||||
tt::Spacing::Joint
|
||||
} else {
|
||||
tt::Spacing::Alone
|
||||
}
|
||||
}
|
||||
_ => tt::Spacing::Alone,
|
||||
};
|
||||
|
||||
token_trees.push(tt::Leaf::from(tt::Punct { char, spacing }).into());
|
||||
} else {
|
||||
let child: tt::TokenTree = if token.kind() == SyntaxKind::TRUE_KW
|
||||
|| token.kind() == SyntaxKind::FALSE_KW
|
||||
@ -224,6 +215,15 @@ impl<'a, Q: Querier> TtTreeSink<'a, Q> {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_delimiter(kind: SyntaxKind) -> bool {
|
||||
use SyntaxKind::*;
|
||||
|
||||
match kind {
|
||||
L_PAREN | L_BRACK | L_CURLY | R_PAREN | R_BRACK | R_CURLY => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> {
|
||||
fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
|
||||
if kind == L_DOLLAR || kind == R_DOLLAR {
|
||||
@ -240,14 +240,18 @@ impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> {
|
||||
self.buf.clear();
|
||||
self.inner.token(kind, text);
|
||||
|
||||
// // Add a white space to token
|
||||
// let (last_kind, _, last_joint_to_next ) = self.src_querier.token(self.token_pos-n_tokens as usize);
|
||||
// if !last_joint_to_next && last_kind.is_punct() {
|
||||
// let (cur_kind, _, _ ) = self.src_querier.token(self.token_pos);
|
||||
// if cur_kind.is_punct() {
|
||||
// self.inner.token(WHITESPACE, " ".into());
|
||||
// }
|
||||
// }
|
||||
// Add a white space between tokens, only if both are not delimiters
|
||||
if !is_delimiter(kind) {
|
||||
let (last_kind, _, last_joint_to_next) = self.src_querier.token(self.token_pos - 1);
|
||||
if !last_joint_to_next && last_kind.is_punct() {
|
||||
let (cur_kind, _, _) = self.src_querier.token(self.token_pos);
|
||||
if !is_delimiter(cur_kind) {
|
||||
if cur_kind.is_punct() {
|
||||
self.inner.token(WHITESPACE, " ".into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn start_node(&mut self, kind: SyntaxKind) {
|
||||
|
@ -1,6 +1,5 @@
|
||||
use crate::ParseError;
|
||||
use crate::subtree_parser::Parser;
|
||||
use crate::subtree_source::TokenPeek;
|
||||
use smallvec::{SmallVec, smallvec};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@ -262,3 +261,48 @@ impl<'a> TtCursor<'a> {
|
||||
self.pos = memento.pos;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct TokenPeek<'a, I>
|
||||
where
|
||||
I: Iterator<Item = &'a tt::TokenTree>,
|
||||
{
|
||||
iter: itertools::MultiPeek<I>,
|
||||
}
|
||||
|
||||
// helper function
|
||||
fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> {
|
||||
if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt {
|
||||
return Some(pp);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
impl<'a, I> TokenPeek<'a, I>
|
||||
where
|
||||
I: Iterator<Item = &'a tt::TokenTree>,
|
||||
{
|
||||
pub fn new(iter: I) -> Self {
|
||||
TokenPeek { iter: itertools::multipeek(iter) }
|
||||
}
|
||||
|
||||
pub fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> {
|
||||
if p.spacing != tt::Spacing::Joint {
|
||||
return None;
|
||||
}
|
||||
|
||||
self.iter.reset_peek();
|
||||
let p1 = to_punct(self.iter.peek()?)?;
|
||||
Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint))
|
||||
}
|
||||
|
||||
pub fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> {
|
||||
self.current_punct2(p).and_then(|((p0, p1), last_joint)| {
|
||||
if !last_joint {
|
||||
None
|
||||
} else {
|
||||
let p2 = to_punct(*self.iter.peek()?)?;
|
||||
Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user