1224: Remove unused multchar puncts code and add space between puncts r=matklad a=edwin0cheng

After #1213 , parser only need single char punct, this PR do the following things: 

* Remove code which handles multi char puncts
* Remove code which handle traversal backward in `SubtreeSource` , because we cached the result in #1195
* Add space between two consecutive puncts while `tt` to `SyntaxNode` conversion . 

Note that the spaces should only be added if both puncts are not delimiters. 

Co-authored-by: Edwin Cheng <edwin0cheng@gmail.com>
This commit is contained in:
bors[bot] 2019-05-02 06:32:42 +00:00
commit b0e7022afe
3 changed files with 121 additions and 214 deletions

View File

@ -45,20 +45,6 @@ impl<'a> TokenSeq<'a> {
} }
} }
} }
fn len(&self) -> usize {
match self {
TokenSeq::Subtree(subtree) => subtree.token_trees.len() + 2,
TokenSeq::Seq(tokens) => tokens.len(),
}
}
fn child_slice(&self, pos: usize) -> &[tt::TokenTree] {
match self {
TokenSeq::Subtree(subtree) => &subtree.token_trees[pos - 1..],
TokenSeq::Seq(tokens) => &tokens[pos..],
}
}
} }
#[derive(Debug, Clone, Eq, PartialEq)] #[derive(Debug, Clone, Eq, PartialEq)]
@ -66,7 +52,6 @@ struct TtToken {
pub kind: SyntaxKind, pub kind: SyntaxKind,
pub is_joint_to_next: bool, pub is_joint_to_next: bool,
pub text: SmolStr, pub text: SmolStr,
pub n_tokens: usize,
} }
#[derive(Debug, Clone, Eq, PartialEq)] #[derive(Debug, Clone, Eq, PartialEq)]
@ -80,19 +65,12 @@ struct SubTreeWalker<'a> {
pos: usize, pos: usize,
stack: Vec<(TokenSeq<'a>, usize)>, stack: Vec<(TokenSeq<'a>, usize)>,
cursor: WalkCursor, cursor: WalkCursor,
last_steps: Vec<usize>,
ts: TokenSeq<'a>, ts: TokenSeq<'a>,
} }
impl<'a> SubTreeWalker<'a> { impl<'a> SubTreeWalker<'a> {
fn new(ts: TokenSeq<'a>) -> SubTreeWalker { fn new(ts: TokenSeq<'a>) -> SubTreeWalker {
let mut res = SubTreeWalker { let mut res = SubTreeWalker { pos: 0, stack: vec![], cursor: WalkCursor::Eof, ts };
pos: 0,
stack: vec![],
cursor: WalkCursor::Eof,
last_steps: vec![],
ts,
};
res.reset(); res.reset();
res res
@ -105,7 +83,6 @@ impl<'a> SubTreeWalker<'a> {
fn reset(&mut self) { fn reset(&mut self) {
self.pos = 0; self.pos = 0;
self.stack = vec![]; self.stack = vec![];
self.last_steps = vec![];
self.cursor = match self.ts.get(0) { self.cursor = match self.ts.get(0) {
DelimToken::Token(token) => match token { DelimToken::Token(token) => match token {
@ -114,10 +91,7 @@ impl<'a> SubTreeWalker<'a> {
self.stack.push((ts, 0)); self.stack.push((ts, 0));
WalkCursor::Token(0, convert_delim(subtree.delimiter, false)) WalkCursor::Token(0, convert_delim(subtree.delimiter, false))
} }
tt::TokenTree::Leaf(leaf) => { tt::TokenTree::Leaf(leaf) => WalkCursor::Token(0, convert_leaf(leaf)),
let next_tokens = self.ts.child_slice(0);
WalkCursor::Token(0, convert_leaf(&next_tokens, leaf))
}
}, },
DelimToken::Delim(delim, is_end) => { DelimToken::Delim(delim, is_end) => {
assert!(!is_end); assert!(!is_end);
@ -138,24 +112,6 @@ impl<'a> SubTreeWalker<'a> {
self.stack.last().map(|(t, _)| t).unwrap_or(&self.ts) self.stack.last().map(|(t, _)| t).unwrap_or(&self.ts)
} }
/// Move cursor backward by 1 step
fn backward(&mut self) {
if self.last_steps.is_empty() {
return;
}
self.pos -= 1;
let last_step = self.last_steps.pop().unwrap();
self.cursor = match self.cursor {
WalkCursor::Token(idx, _) => self.walk_token(idx, last_step, true),
WalkCursor::Eof => {
let len = self.top().len();
self.walk_token(len, last_step, true)
}
}
}
/// Move cursor forward by 1 step /// Move cursor forward by 1 step
fn forward(&mut self) { fn forward(&mut self) {
if self.is_eof() { if self.is_eof() {
@ -163,37 +119,24 @@ impl<'a> SubTreeWalker<'a> {
} }
self.pos += 1; self.pos += 1;
let step = self.current().map(|x| x.n_tokens).unwrap_or(1);
self.last_steps.push(step);
if let WalkCursor::Token(u, _) = self.cursor { if let WalkCursor::Token(u, _) = self.cursor {
self.cursor = self.walk_token(u, step, false) self.cursor = self.walk_token(u)
} }
} }
/// Traversal child token /// Traversal child token
fn walk_token(&mut self, pos: usize, offset: usize, backward: bool) -> WalkCursor { fn walk_token(&mut self, pos: usize) -> WalkCursor {
let top = self.stack.last().map(|(t, _)| t).unwrap_or(&self.ts); let top = self.stack.last().map(|(t, _)| t).unwrap_or(&self.ts);
let pos = pos + 1;
if backward && pos < offset {
let (_, last_idx) = self.stack.pop().unwrap();
return self.walk_token(last_idx, offset, backward);
}
let pos = if backward { pos - offset } else { pos + offset };
match top.get(pos) { match top.get(pos) {
DelimToken::Token(token) => match token { DelimToken::Token(token) => match token {
tt::TokenTree::Subtree(subtree) => { tt::TokenTree::Subtree(subtree) => {
let ts = TokenSeq::from(subtree); let ts = TokenSeq::from(subtree);
let new_idx = if backward { ts.len() - 1 } else { 0 };
self.stack.push((ts, pos)); self.stack.push((ts, pos));
WalkCursor::Token(new_idx, convert_delim(subtree.delimiter, backward)) WalkCursor::Token(0, convert_delim(subtree.delimiter, false))
}
tt::TokenTree::Leaf(leaf) => {
let next_tokens = top.child_slice(pos);
WalkCursor::Token(pos, convert_leaf(&next_tokens, leaf))
} }
tt::TokenTree::Leaf(leaf) => WalkCursor::Token(pos, convert_leaf(leaf)),
}, },
DelimToken::Delim(delim, is_end) => { DelimToken::Delim(delim, is_end) => {
WalkCursor::Token(pos, convert_delim(*delim, is_end)) WalkCursor::Token(pos, convert_delim(*delim, is_end))
@ -201,8 +144,7 @@ impl<'a> SubTreeWalker<'a> {
DelimToken::End => { DelimToken::End => {
// it is the top level // it is the top level
if let Some((_, last_idx)) = self.stack.pop() { if let Some((_, last_idx)) = self.stack.pop() {
assert!(!backward); self.walk_token(last_idx)
self.walk_token(last_idx, offset, backward)
} else { } else {
WalkCursor::Eof WalkCursor::Eof
} }
@ -237,12 +179,9 @@ impl<'a> WalkerOwner<'a> {
} }
while pos >= cached.len() { while pos >= cached.len() {
let len = cached.len(); self.set_pos(cached.len());
cached.push({ let walker = self.walker.borrow();
self.set_pos(len); cached.push(walker.current().cloned());
let walker = self.walker.borrow();
walker.current().cloned()
});
} }
return cached[pos].clone(); return cached[pos].clone();
@ -250,12 +189,11 @@ impl<'a> WalkerOwner<'a> {
fn set_pos(&self, pos: usize) { fn set_pos(&self, pos: usize) {
let mut walker = self.walker.borrow_mut(); let mut walker = self.walker.borrow_mut();
assert!(walker.pos <= pos);
while pos > walker.pos && !walker.is_eof() { while pos > walker.pos && !walker.is_eof() {
walker.forward(); walker.forward();
} }
while pos < walker.pos {
walker.backward();
}
} }
fn collect_token_trees(&mut self, n: usize) -> Vec<&tt::TokenTree> { fn collect_token_trees(&mut self, n: usize) -> Vec<&tt::TokenTree> {
@ -264,15 +202,16 @@ impl<'a> WalkerOwner<'a> {
walker.reset(); walker.reset();
while walker.pos < n { while walker.pos < n {
if let WalkCursor::Token(u, tt) = &walker.cursor { if let WalkCursor::Token(u, _) = &walker.cursor {
// We only collect the topmost child // We only collect the topmost child
if walker.stack.len() == 0 { if walker.stack.len() == 0 {
for i in 0..tt.n_tokens { if let DelimToken::Token(token) = walker.ts.get(*u) {
if let DelimToken::Token(token) = walker.ts.get(u + i) { res.push(token);
res.push(token);
}
} }
} else if walker.stack.len() == 1 { }
// Check whether the second level is a subtree
// if so, collect its parent which is topmost child
else if walker.stack.len() == 1 {
if let DelimToken::Delim(_, is_end) = walker.top().get(*u) { if let DelimToken::Delim(_, is_end) = walker.top().get(*u) {
if !is_end { if !is_end {
let (_, last_idx) = &walker.stack[0]; let (_, last_idx) = &walker.stack[0];
@ -343,78 +282,6 @@ impl<'a> TokenSource for SubtreeTokenSource<'a> {
} }
} }
pub(crate) struct TokenPeek<'a, I>
where
I: Iterator<Item = &'a tt::TokenTree>,
{
iter: itertools::MultiPeek<I>,
}
// helper function
fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> {
if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt {
return Some(pp);
}
None
}
impl<'a, I> TokenPeek<'a, I>
where
I: Iterator<Item = &'a tt::TokenTree>,
{
pub fn new(iter: I) -> Self {
TokenPeek { iter: itertools::multipeek(iter) }
}
pub fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> {
if p.spacing != tt::Spacing::Joint {
return None;
}
self.iter.reset_peek();
let p1 = to_punct(self.iter.peek()?)?;
Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint))
}
pub fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> {
self.current_punct2(p).and_then(|((p0, p1), last_joint)| {
if !last_joint {
None
} else {
let p2 = to_punct(*self.iter.peek()?)?;
Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint))
}
})
}
}
// FIXME: Remove this function
fn convert_multi_char_punct<'b, I>(
p: &tt::Punct,
iter: &mut TokenPeek<'b, I>,
) -> Option<(SyntaxKind, bool, &'static str, usize)>
where
I: Iterator<Item = &'b tt::TokenTree>,
{
if let Some((m, is_joint_to_next)) = iter.current_punct3(p) {
if let Some((kind, text)) = match m {
_ => None,
} {
return Some((kind, is_joint_to_next, text, 3));
}
}
if let Some((m, is_joint_to_next)) = iter.current_punct2(p) {
if let Some((kind, text)) = match m {
_ => None,
} {
return Some((kind, is_joint_to_next, text, 2));
}
}
None
}
fn convert_delim(d: tt::Delimiter, closing: bool) -> TtToken { fn convert_delim(d: tt::Delimiter, closing: bool) -> TtToken {
let (kinds, texts) = match d { let (kinds, texts) = match d {
tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"),
@ -426,7 +293,7 @@ fn convert_delim(d: tt::Delimiter, closing: bool) -> TtToken {
let idx = closing as usize; let idx = closing as usize;
let kind = kinds[idx]; let kind = kinds[idx];
let text = if texts.len() > 0 { &texts[idx..texts.len() - (1 - idx)] } else { "" }; let text = if texts.len() > 0 { &texts[idx..texts.len() - (1 - idx)] } else { "" };
TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 } TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) }
} }
fn convert_literal(l: &tt::Literal) -> TtToken { fn convert_literal(l: &tt::Literal) -> TtToken {
@ -437,7 +304,7 @@ fn convert_literal(l: &tt::Literal) -> TtToken {
_ => panic!("Fail to convert given literal {:#?}", &l), _ => panic!("Fail to convert given literal {:#?}", &l),
}); });
TtToken { kind, is_joint_to_next: false, text: l.text.clone(), n_tokens: 1 } TtToken { kind, is_joint_to_next: false, text: l.text.clone() }
} }
fn convert_ident(ident: &tt::Ident) -> TtToken { fn convert_ident(ident: &tt::Ident) -> TtToken {
@ -447,39 +314,31 @@ fn convert_ident(ident: &tt::Ident) -> TtToken {
SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT) SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT)
}; };
TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } TtToken { kind, is_joint_to_next: false, text: ident.text.clone() }
} }
fn convert_punct(p: &tt::Punct, next_tokens: &[tt::TokenTree]) -> TtToken { fn convert_punct(p: &tt::Punct) -> TtToken {
let mut iter = next_tokens.iter(); let kind = match p.char {
iter.next(); // lexer may produce combpund tokens for these ones
let mut peek = TokenPeek::new(iter); '.' => DOT,
':' => COLON,
if let Some((kind, is_joint_to_next, text, size)) = convert_multi_char_punct(p, &mut peek) { '=' => EQ,
TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } '!' => EXCL,
} else { '-' => MINUS,
let kind = match p.char { c => SyntaxKind::from_char(c).unwrap(),
// lexer may produce combpund tokens for these ones };
'.' => DOT, let text = {
':' => COLON, let mut buf = [0u8; 4];
'=' => EQ, let s: &str = p.char.encode_utf8(&mut buf);
'!' => EXCL, SmolStr::new(s)
'-' => MINUS, };
c => SyntaxKind::from_char(c).unwrap(), TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }
};
let text = {
let mut buf = [0u8; 4];
let s: &str = p.char.encode_utf8(&mut buf);
SmolStr::new(s)
};
TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text, n_tokens: 1 }
}
} }
fn convert_leaf(tokens: &[tt::TokenTree], leaf: &tt::Leaf) -> TtToken { fn convert_leaf(leaf: &tt::Leaf) -> TtToken {
match leaf { match leaf {
tt::Leaf::Literal(l) => convert_literal(l), tt::Leaf::Literal(l) => convert_literal(l),
tt::Leaf::Ident(ident) => convert_ident(ident), tt::Leaf::Ident(ident) => convert_ident(ident),
tt::Leaf::Punct(punct) => convert_punct(punct, tokens), tt::Leaf::Punct(punct) => convert_punct(punct),
} }
} }

View File

@ -148,30 +148,21 @@ fn convert_tt(
match child { match child {
SyntaxElement::Token(token) => { SyntaxElement::Token(token) => {
if token.kind().is_punct() { if token.kind().is_punct() {
let mut prev = None; assert!(token.text().len() == 1, "Input ast::token punct must be single char.");
for char in token.text().chars() { let char = token.text().chars().next().unwrap();
if let Some(char) = prev {
token_trees.push(
tt::Leaf::from(tt::Punct { char, spacing: tt::Spacing::Joint })
.into(),
);
}
prev = Some(char)
}
if let Some(char) = prev {
let spacing = match child_iter.peek() {
Some(SyntaxElement::Token(token)) => {
if token.kind().is_punct() {
tt::Spacing::Joint
} else {
tt::Spacing::Alone
}
}
_ => tt::Spacing::Alone,
};
token_trees.push(tt::Leaf::from(tt::Punct { char, spacing }).into()); let spacing = match child_iter.peek() {
} Some(SyntaxElement::Token(token)) => {
if token.kind().is_punct() {
tt::Spacing::Joint
} else {
tt::Spacing::Alone
}
}
_ => tt::Spacing::Alone,
};
token_trees.push(tt::Leaf::from(tt::Punct { char, spacing }).into());
} else { } else {
let child: tt::TokenTree = if token.kind() == SyntaxKind::TRUE_KW let child: tt::TokenTree = if token.kind() == SyntaxKind::TRUE_KW
|| token.kind() == SyntaxKind::FALSE_KW || token.kind() == SyntaxKind::FALSE_KW
@ -224,6 +215,15 @@ impl<'a, Q: Querier> TtTreeSink<'a, Q> {
} }
} }
fn is_delimiter(kind: SyntaxKind) -> bool {
use SyntaxKind::*;
match kind {
L_PAREN | L_BRACK | L_CURLY | R_PAREN | R_BRACK | R_CURLY => true,
_ => false,
}
}
impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> { impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> {
fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
if kind == L_DOLLAR || kind == R_DOLLAR { if kind == L_DOLLAR || kind == R_DOLLAR {
@ -240,14 +240,18 @@ impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> {
self.buf.clear(); self.buf.clear();
self.inner.token(kind, text); self.inner.token(kind, text);
// // Add a white space to token // Add a white space between tokens, only if both are not delimiters
// let (last_kind, _, last_joint_to_next ) = self.src_querier.token(self.token_pos-n_tokens as usize); if !is_delimiter(kind) {
// if !last_joint_to_next && last_kind.is_punct() { let (last_kind, _, last_joint_to_next) = self.src_querier.token(self.token_pos - 1);
// let (cur_kind, _, _ ) = self.src_querier.token(self.token_pos); if !last_joint_to_next && last_kind.is_punct() {
// if cur_kind.is_punct() { let (cur_kind, _, _) = self.src_querier.token(self.token_pos);
// self.inner.token(WHITESPACE, " ".into()); if !is_delimiter(cur_kind) {
// } if cur_kind.is_punct() {
// } self.inner.token(WHITESPACE, " ".into());
}
}
}
}
} }
fn start_node(&mut self, kind: SyntaxKind) { fn start_node(&mut self, kind: SyntaxKind) {

View File

@ -1,6 +1,5 @@
use crate::ParseError; use crate::ParseError;
use crate::subtree_parser::Parser; use crate::subtree_parser::Parser;
use crate::subtree_source::TokenPeek;
use smallvec::{SmallVec, smallvec}; use smallvec::{SmallVec, smallvec};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -262,3 +261,48 @@ impl<'a> TtCursor<'a> {
self.pos = memento.pos; self.pos = memento.pos;
} }
} }
pub(crate) struct TokenPeek<'a, I>
where
I: Iterator<Item = &'a tt::TokenTree>,
{
iter: itertools::MultiPeek<I>,
}
// helper function
fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> {
if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt {
return Some(pp);
}
None
}
impl<'a, I> TokenPeek<'a, I>
where
I: Iterator<Item = &'a tt::TokenTree>,
{
pub fn new(iter: I) -> Self {
TokenPeek { iter: itertools::multipeek(iter) }
}
pub fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> {
if p.spacing != tt::Spacing::Joint {
return None;
}
self.iter.reset_peek();
let p1 = to_punct(self.iter.peek()?)?;
Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint))
}
pub fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> {
self.current_punct2(p).and_then(|((p0, p1), last_joint)| {
if !last_joint {
None
} else {
let p2 = to_punct(*self.iter.peek()?)?;
Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint))
}
})
}
}