1224: Remove unused multchar puncts code and add space between puncts r=matklad a=edwin0cheng

After #1213 , parser only need single char punct, this PR do the following things: 

* Remove code which handles multi char puncts
* Remove code which handle traversal backward in `SubtreeSource` , because we cached the result in #1195
* Add space between two consecutive puncts while `tt` to `SyntaxNode` conversion . 

Note that the spaces should only be added if both puncts are not delimiters. 

Co-authored-by: Edwin Cheng <edwin0cheng@gmail.com>
This commit is contained in:
bors[bot] 2019-05-02 06:32:42 +00:00
commit b0e7022afe
3 changed files with 121 additions and 214 deletions

View File

@ -45,20 +45,6 @@ impl<'a> TokenSeq<'a> {
}
}
}
fn len(&self) -> usize {
match self {
TokenSeq::Subtree(subtree) => subtree.token_trees.len() + 2,
TokenSeq::Seq(tokens) => tokens.len(),
}
}
fn child_slice(&self, pos: usize) -> &[tt::TokenTree] {
match self {
TokenSeq::Subtree(subtree) => &subtree.token_trees[pos - 1..],
TokenSeq::Seq(tokens) => &tokens[pos..],
}
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
@ -66,7 +52,6 @@ struct TtToken {
pub kind: SyntaxKind,
pub is_joint_to_next: bool,
pub text: SmolStr,
pub n_tokens: usize,
}
#[derive(Debug, Clone, Eq, PartialEq)]
@ -80,19 +65,12 @@ struct SubTreeWalker<'a> {
pos: usize,
stack: Vec<(TokenSeq<'a>, usize)>,
cursor: WalkCursor,
last_steps: Vec<usize>,
ts: TokenSeq<'a>,
}
impl<'a> SubTreeWalker<'a> {
fn new(ts: TokenSeq<'a>) -> SubTreeWalker {
let mut res = SubTreeWalker {
pos: 0,
stack: vec![],
cursor: WalkCursor::Eof,
last_steps: vec![],
ts,
};
let mut res = SubTreeWalker { pos: 0, stack: vec![], cursor: WalkCursor::Eof, ts };
res.reset();
res
@ -105,7 +83,6 @@ impl<'a> SubTreeWalker<'a> {
fn reset(&mut self) {
self.pos = 0;
self.stack = vec![];
self.last_steps = vec![];
self.cursor = match self.ts.get(0) {
DelimToken::Token(token) => match token {
@ -114,10 +91,7 @@ impl<'a> SubTreeWalker<'a> {
self.stack.push((ts, 0));
WalkCursor::Token(0, convert_delim(subtree.delimiter, false))
}
tt::TokenTree::Leaf(leaf) => {
let next_tokens = self.ts.child_slice(0);
WalkCursor::Token(0, convert_leaf(&next_tokens, leaf))
}
tt::TokenTree::Leaf(leaf) => WalkCursor::Token(0, convert_leaf(leaf)),
},
DelimToken::Delim(delim, is_end) => {
assert!(!is_end);
@ -138,24 +112,6 @@ impl<'a> SubTreeWalker<'a> {
self.stack.last().map(|(t, _)| t).unwrap_or(&self.ts)
}
/// Move cursor backward by 1 step
fn backward(&mut self) {
if self.last_steps.is_empty() {
return;
}
self.pos -= 1;
let last_step = self.last_steps.pop().unwrap();
self.cursor = match self.cursor {
WalkCursor::Token(idx, _) => self.walk_token(idx, last_step, true),
WalkCursor::Eof => {
let len = self.top().len();
self.walk_token(len, last_step, true)
}
}
}
/// Move cursor forward by 1 step
fn forward(&mut self) {
if self.is_eof() {
@ -163,37 +119,24 @@ impl<'a> SubTreeWalker<'a> {
}
self.pos += 1;
let step = self.current().map(|x| x.n_tokens).unwrap_or(1);
self.last_steps.push(step);
if let WalkCursor::Token(u, _) = self.cursor {
self.cursor = self.walk_token(u, step, false)
self.cursor = self.walk_token(u)
}
}
/// Traversal child token
fn walk_token(&mut self, pos: usize, offset: usize, backward: bool) -> WalkCursor {
fn walk_token(&mut self, pos: usize) -> WalkCursor {
let top = self.stack.last().map(|(t, _)| t).unwrap_or(&self.ts);
if backward && pos < offset {
let (_, last_idx) = self.stack.pop().unwrap();
return self.walk_token(last_idx, offset, backward);
}
let pos = if backward { pos - offset } else { pos + offset };
let pos = pos + 1;
match top.get(pos) {
DelimToken::Token(token) => match token {
tt::TokenTree::Subtree(subtree) => {
let ts = TokenSeq::from(subtree);
let new_idx = if backward { ts.len() - 1 } else { 0 };
self.stack.push((ts, pos));
WalkCursor::Token(new_idx, convert_delim(subtree.delimiter, backward))
}
tt::TokenTree::Leaf(leaf) => {
let next_tokens = top.child_slice(pos);
WalkCursor::Token(pos, convert_leaf(&next_tokens, leaf))
WalkCursor::Token(0, convert_delim(subtree.delimiter, false))
}
tt::TokenTree::Leaf(leaf) => WalkCursor::Token(pos, convert_leaf(leaf)),
},
DelimToken::Delim(delim, is_end) => {
WalkCursor::Token(pos, convert_delim(*delim, is_end))
@ -201,8 +144,7 @@ impl<'a> SubTreeWalker<'a> {
DelimToken::End => {
// it is the top level
if let Some((_, last_idx)) = self.stack.pop() {
assert!(!backward);
self.walk_token(last_idx, offset, backward)
self.walk_token(last_idx)
} else {
WalkCursor::Eof
}
@ -237,12 +179,9 @@ impl<'a> WalkerOwner<'a> {
}
while pos >= cached.len() {
let len = cached.len();
cached.push({
self.set_pos(len);
let walker = self.walker.borrow();
walker.current().cloned()
});
self.set_pos(cached.len());
let walker = self.walker.borrow();
cached.push(walker.current().cloned());
}
return cached[pos].clone();
@ -250,12 +189,11 @@ impl<'a> WalkerOwner<'a> {
fn set_pos(&self, pos: usize) {
let mut walker = self.walker.borrow_mut();
assert!(walker.pos <= pos);
while pos > walker.pos && !walker.is_eof() {
walker.forward();
}
while pos < walker.pos {
walker.backward();
}
}
fn collect_token_trees(&mut self, n: usize) -> Vec<&tt::TokenTree> {
@ -264,15 +202,16 @@ impl<'a> WalkerOwner<'a> {
walker.reset();
while walker.pos < n {
if let WalkCursor::Token(u, tt) = &walker.cursor {
if let WalkCursor::Token(u, _) = &walker.cursor {
// We only collect the topmost child
if walker.stack.len() == 0 {
for i in 0..tt.n_tokens {
if let DelimToken::Token(token) = walker.ts.get(u + i) {
res.push(token);
}
if let DelimToken::Token(token) = walker.ts.get(*u) {
res.push(token);
}
} else if walker.stack.len() == 1 {
}
// Check whether the second level is a subtree
// if so, collect its parent which is topmost child
else if walker.stack.len() == 1 {
if let DelimToken::Delim(_, is_end) = walker.top().get(*u) {
if !is_end {
let (_, last_idx) = &walker.stack[0];
@ -343,78 +282,6 @@ impl<'a> TokenSource for SubtreeTokenSource<'a> {
}
}
pub(crate) struct TokenPeek<'a, I>
where
I: Iterator<Item = &'a tt::TokenTree>,
{
iter: itertools::MultiPeek<I>,
}
// helper function
fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> {
if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt {
return Some(pp);
}
None
}
impl<'a, I> TokenPeek<'a, I>
where
I: Iterator<Item = &'a tt::TokenTree>,
{
pub fn new(iter: I) -> Self {
TokenPeek { iter: itertools::multipeek(iter) }
}
pub fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> {
if p.spacing != tt::Spacing::Joint {
return None;
}
self.iter.reset_peek();
let p1 = to_punct(self.iter.peek()?)?;
Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint))
}
pub fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> {
self.current_punct2(p).and_then(|((p0, p1), last_joint)| {
if !last_joint {
None
} else {
let p2 = to_punct(*self.iter.peek()?)?;
Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint))
}
})
}
}
// FIXME: Remove this function
fn convert_multi_char_punct<'b, I>(
p: &tt::Punct,
iter: &mut TokenPeek<'b, I>,
) -> Option<(SyntaxKind, bool, &'static str, usize)>
where
I: Iterator<Item = &'b tt::TokenTree>,
{
if let Some((m, is_joint_to_next)) = iter.current_punct3(p) {
if let Some((kind, text)) = match m {
_ => None,
} {
return Some((kind, is_joint_to_next, text, 3));
}
}
if let Some((m, is_joint_to_next)) = iter.current_punct2(p) {
if let Some((kind, text)) = match m {
_ => None,
} {
return Some((kind, is_joint_to_next, text, 2));
}
}
None
}
fn convert_delim(d: tt::Delimiter, closing: bool) -> TtToken {
let (kinds, texts) = match d {
tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"),
@ -426,7 +293,7 @@ fn convert_delim(d: tt::Delimiter, closing: bool) -> TtToken {
let idx = closing as usize;
let kind = kinds[idx];
let text = if texts.len() > 0 { &texts[idx..texts.len() - (1 - idx)] } else { "" };
TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }
TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) }
}
fn convert_literal(l: &tt::Literal) -> TtToken {
@ -437,7 +304,7 @@ fn convert_literal(l: &tt::Literal) -> TtToken {
_ => panic!("Fail to convert given literal {:#?}", &l),
});
TtToken { kind, is_joint_to_next: false, text: l.text.clone(), n_tokens: 1 }
TtToken { kind, is_joint_to_next: false, text: l.text.clone() }
}
fn convert_ident(ident: &tt::Ident) -> TtToken {
@ -447,39 +314,31 @@ fn convert_ident(ident: &tt::Ident) -> TtToken {
SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT)
};
TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 }
TtToken { kind, is_joint_to_next: false, text: ident.text.clone() }
}
fn convert_punct(p: &tt::Punct, next_tokens: &[tt::TokenTree]) -> TtToken {
let mut iter = next_tokens.iter();
iter.next();
let mut peek = TokenPeek::new(iter);
if let Some((kind, is_joint_to_next, text, size)) = convert_multi_char_punct(p, &mut peek) {
TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size }
} else {
let kind = match p.char {
// lexer may produce combpund tokens for these ones
'.' => DOT,
':' => COLON,
'=' => EQ,
'!' => EXCL,
'-' => MINUS,
c => SyntaxKind::from_char(c).unwrap(),
};
let text = {
let mut buf = [0u8; 4];
let s: &str = p.char.encode_utf8(&mut buf);
SmolStr::new(s)
};
TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text, n_tokens: 1 }
}
fn convert_punct(p: &tt::Punct) -> TtToken {
let kind = match p.char {
// lexer may produce combpund tokens for these ones
'.' => DOT,
':' => COLON,
'=' => EQ,
'!' => EXCL,
'-' => MINUS,
c => SyntaxKind::from_char(c).unwrap(),
};
let text = {
let mut buf = [0u8; 4];
let s: &str = p.char.encode_utf8(&mut buf);
SmolStr::new(s)
};
TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }
}
fn convert_leaf(tokens: &[tt::TokenTree], leaf: &tt::Leaf) -> TtToken {
fn convert_leaf(leaf: &tt::Leaf) -> TtToken {
match leaf {
tt::Leaf::Literal(l) => convert_literal(l),
tt::Leaf::Ident(ident) => convert_ident(ident),
tt::Leaf::Punct(punct) => convert_punct(punct, tokens),
tt::Leaf::Punct(punct) => convert_punct(punct),
}
}

View File

@ -148,30 +148,21 @@ fn convert_tt(
match child {
SyntaxElement::Token(token) => {
if token.kind().is_punct() {
let mut prev = None;
for char in token.text().chars() {
if let Some(char) = prev {
token_trees.push(
tt::Leaf::from(tt::Punct { char, spacing: tt::Spacing::Joint })
.into(),
);
}
prev = Some(char)
}
if let Some(char) = prev {
let spacing = match child_iter.peek() {
Some(SyntaxElement::Token(token)) => {
if token.kind().is_punct() {
tt::Spacing::Joint
} else {
tt::Spacing::Alone
}
}
_ => tt::Spacing::Alone,
};
assert!(token.text().len() == 1, "Input ast::token punct must be single char.");
let char = token.text().chars().next().unwrap();
token_trees.push(tt::Leaf::from(tt::Punct { char, spacing }).into());
}
let spacing = match child_iter.peek() {
Some(SyntaxElement::Token(token)) => {
if token.kind().is_punct() {
tt::Spacing::Joint
} else {
tt::Spacing::Alone
}
}
_ => tt::Spacing::Alone,
};
token_trees.push(tt::Leaf::from(tt::Punct { char, spacing }).into());
} else {
let child: tt::TokenTree = if token.kind() == SyntaxKind::TRUE_KW
|| token.kind() == SyntaxKind::FALSE_KW
@ -224,6 +215,15 @@ impl<'a, Q: Querier> TtTreeSink<'a, Q> {
}
}
fn is_delimiter(kind: SyntaxKind) -> bool {
use SyntaxKind::*;
match kind {
L_PAREN | L_BRACK | L_CURLY | R_PAREN | R_BRACK | R_CURLY => true,
_ => false,
}
}
impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> {
fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
if kind == L_DOLLAR || kind == R_DOLLAR {
@ -240,14 +240,18 @@ impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> {
self.buf.clear();
self.inner.token(kind, text);
// // Add a white space to token
// let (last_kind, _, last_joint_to_next ) = self.src_querier.token(self.token_pos-n_tokens as usize);
// if !last_joint_to_next && last_kind.is_punct() {
// let (cur_kind, _, _ ) = self.src_querier.token(self.token_pos);
// if cur_kind.is_punct() {
// self.inner.token(WHITESPACE, " ".into());
// }
// }
// Add a white space between tokens, only if both are not delimiters
if !is_delimiter(kind) {
let (last_kind, _, last_joint_to_next) = self.src_querier.token(self.token_pos - 1);
if !last_joint_to_next && last_kind.is_punct() {
let (cur_kind, _, _) = self.src_querier.token(self.token_pos);
if !is_delimiter(cur_kind) {
if cur_kind.is_punct() {
self.inner.token(WHITESPACE, " ".into());
}
}
}
}
}
fn start_node(&mut self, kind: SyntaxKind) {

View File

@ -1,6 +1,5 @@
use crate::ParseError;
use crate::subtree_parser::Parser;
use crate::subtree_source::TokenPeek;
use smallvec::{SmallVec, smallvec};
#[derive(Debug, Clone)]
@ -262,3 +261,48 @@ impl<'a> TtCursor<'a> {
self.pos = memento.pos;
}
}
pub(crate) struct TokenPeek<'a, I>
where
I: Iterator<Item = &'a tt::TokenTree>,
{
iter: itertools::MultiPeek<I>,
}
// helper function
fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> {
if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt {
return Some(pp);
}
None
}
impl<'a, I> TokenPeek<'a, I>
where
I: Iterator<Item = &'a tt::TokenTree>,
{
pub fn new(iter: I) -> Self {
TokenPeek { iter: itertools::multipeek(iter) }
}
pub fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> {
if p.spacing != tt::Spacing::Joint {
return None;
}
self.iter.reset_peek();
let p1 = to_punct(self.iter.peek()?)?;
Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint))
}
pub fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> {
self.current_punct2(p).and_then(|((p0, p1), last_joint)| {
if !last_joint {
None
} else {
let p2 = to_punct(*self.iter.peek()?)?;
Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint))
}
})
}
}