mirror of
https://github.com/rust-lang/rust.git
synced 2025-05-02 21:17:39 +00:00

This wasn't a right decision in the first place, the feature flag was broken in the last rustfmt release, and syntax highlighting of imports is more important anyway
296 lines
9.9 KiB
Rust
296 lines
9.9 KiB
Rust
//! Syntax Tree library used throughout the rust analyzer.
|
|
//!
|
|
//! Properties:
|
|
//! - easy and fast incremental re-parsing
|
|
//! - graceful handling of errors
|
|
//! - full-fidelity representation (*any* text can be precisely represented as
|
|
//! a syntax tree)
|
|
//!
|
|
//! For more information, see the [RFC]. Current implementation is inspired by
|
|
//! the [Swift] one.
|
|
//!
|
|
//! The most interesting modules here are `syntax_node` (which defines concrete
|
|
//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
|
|
//! CST). The actual parser live in a separate `ra_parser` crate, though the
|
|
//! lexer lives in this crate.
|
|
//!
|
|
//! See `api_walkthrough` test in this file for a quick API tour!
|
|
//!
|
|
//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
|
|
//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
|
|
|
|
mod syntax_node;
|
|
mod syntax_text;
|
|
mod syntax_error;
|
|
mod parsing;
|
|
mod validation;
|
|
mod ptr;
|
|
|
|
pub mod algo;
|
|
pub mod ast;
|
|
#[doc(hidden)]
|
|
pub mod fuzz;
|
|
|
|
use std::{fmt::Write, sync::Arc};
|
|
|
|
use ra_text_edit::AtomTextEdit;
|
|
|
|
use crate::syntax_node::GreenNode;
|
|
|
|
pub use crate::{
|
|
ast::AstNode,
|
|
parsing::{classify_literal, tokenize, Token},
|
|
ptr::{AstPtr, SyntaxNodePtr},
|
|
syntax_error::{Location, SyntaxError, SyntaxErrorKind},
|
|
syntax_node::{
|
|
Direction, InsertPosition, SyntaxElement, SyntaxNode, SyntaxToken, SyntaxTreeBuilder,
|
|
TreeArc, WalkEvent,
|
|
},
|
|
syntax_text::SyntaxText,
|
|
};
|
|
pub use ra_parser::SyntaxKind;
|
|
pub use ra_parser::T;
|
|
pub use rowan::{SmolStr, TextRange, TextUnit};
|
|
|
|
/// `Parse` is the result of the parsing: a syntax tree and a collection of
|
|
/// errors.
|
|
///
|
|
/// Note that we always produce a syntax tree, even for completely invalid
|
|
/// files.
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct Parse {
|
|
pub tree: TreeArc<SourceFile>,
|
|
pub errors: Arc<Vec<SyntaxError>>,
|
|
}
|
|
|
|
impl Parse {
|
|
pub fn ok(self) -> Result<TreeArc<SourceFile>, Arc<Vec<SyntaxError>>> {
|
|
if self.errors.is_empty() {
|
|
Ok(self.tree)
|
|
} else {
|
|
Err(self.errors)
|
|
}
|
|
}
|
|
|
|
pub fn reparse(&self, edit: &AtomTextEdit) -> Parse {
|
|
self.incremental_reparse(edit).unwrap_or_else(|| self.full_reparse(edit))
|
|
}
|
|
|
|
pub fn debug_dump(&self) -> String {
|
|
let mut buf = self.tree.syntax().debug_dump();
|
|
for err in self.errors.iter() {
|
|
writeln!(buf, "error {:?}: {}", err.location(), err.kind()).unwrap();
|
|
}
|
|
buf
|
|
}
|
|
|
|
fn incremental_reparse(&self, edit: &AtomTextEdit) -> Option<Parse> {
|
|
// FIXME: validation errors are not handled here
|
|
parsing::incremental_reparse(self.tree.syntax(), edit, self.errors.to_vec()).map(
|
|
|(green_node, errors, _reparsed_range)| Parse {
|
|
tree: SourceFile::new(green_node),
|
|
errors: Arc::new(errors),
|
|
},
|
|
)
|
|
}
|
|
|
|
fn full_reparse(&self, edit: &AtomTextEdit) -> Parse {
|
|
let text = edit.apply(self.tree.syntax().text().to_string());
|
|
SourceFile::parse(&text)
|
|
}
|
|
}
|
|
|
|
/// `SourceFile` represents a parse tree for a single Rust file.
|
|
pub use crate::ast::SourceFile;
|
|
|
|
impl SourceFile {
|
|
fn new(green: GreenNode) -> TreeArc<SourceFile> {
|
|
let root = SyntaxNode::new(green);
|
|
if cfg!(debug_assertions) {
|
|
validation::validate_block_structure(&root);
|
|
}
|
|
assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
|
|
TreeArc::cast(root)
|
|
}
|
|
|
|
pub fn parse(text: &str) -> Parse {
|
|
let (green, mut errors) = parsing::parse_text(text);
|
|
let tree = SourceFile::new(green);
|
|
errors.extend(validation::validate(&tree));
|
|
Parse { tree, errors: Arc::new(errors) }
|
|
}
|
|
}
|
|
|
|
/// This test does not assert anything and instead just shows off the crate's
|
|
/// API.
|
|
#[test]
|
|
fn api_walkthrough() {
|
|
use ast::{ModuleItemOwner, NameOwner};
|
|
|
|
let source_code = "
|
|
fn foo() {
|
|
1 + 1
|
|
}
|
|
";
|
|
// `SourceFile` is the main entry point.
|
|
//
|
|
// The `parse` method returns a `Parse` -- a pair of syntax tree and a list
|
|
// of errors. That is, syntax tree is constructed even in presence of errors.
|
|
let parse = SourceFile::parse(source_code);
|
|
assert!(parse.errors.is_empty());
|
|
|
|
// Due to the way ownership is set up, owned syntax Nodes always live behind
|
|
// a `TreeArc` smart pointer. `TreeArc` is roughly an `std::sync::Arc` which
|
|
// points to the whole file instead of an individual node.
|
|
let file: TreeArc<SourceFile> = parse.tree;
|
|
|
|
// `SourceFile` is the root of the syntax tree. We can iterate file's items:
|
|
let mut func = None;
|
|
for item in file.items() {
|
|
match item.kind() {
|
|
ast::ModuleItemKind::FnDef(f) => func = Some(f),
|
|
_ => unreachable!(),
|
|
}
|
|
}
|
|
// The returned items are always references.
|
|
let func: &ast::FnDef = func.unwrap();
|
|
|
|
// All nodes implement `ToOwned` trait, with `Owned = TreeArc<Self>`.
|
|
// `to_owned` is a cheap operation: atomic increment.
|
|
let _owned_func: TreeArc<ast::FnDef> = func.to_owned();
|
|
|
|
// Each AST node has a bunch of getters for children. All getters return
|
|
// `Option`s though, to account for incomplete code. Some getters are common
|
|
// for several kinds of node. In this case, a trait like `ast::NameOwner`
|
|
// usually exists. By convention, all ast types should be used with `ast::`
|
|
// qualifier.
|
|
let name: Option<&ast::Name> = func.name();
|
|
let name = name.unwrap();
|
|
assert_eq!(name.text(), "foo");
|
|
|
|
// Let's get the `1 + 1` expression!
|
|
let block: &ast::Block = func.body().unwrap();
|
|
let expr: &ast::Expr = block.expr().unwrap();
|
|
|
|
// "Enum"-like nodes are represented using the "kind" pattern. It allows us
|
|
// to match exhaustively against all flavors of nodes, while maintaining
|
|
// internal representation flexibility. The drawback is that one can't write
|
|
// nested matches as one pattern.
|
|
let bin_expr: &ast::BinExpr = match expr.kind() {
|
|
ast::ExprKind::BinExpr(e) => e,
|
|
_ => unreachable!(),
|
|
};
|
|
|
|
// Besides the "typed" AST API, there's an untyped CST one as well.
|
|
// To switch from AST to CST, call `.syntax()` method:
|
|
let expr_syntax: &SyntaxNode = expr.syntax();
|
|
|
|
// Note how `expr` and `bin_expr` are in fact the same node underneath:
|
|
assert!(std::ptr::eq(expr_syntax, bin_expr.syntax()));
|
|
|
|
// To go from CST to AST, `AstNode::cast` function is used:
|
|
let expr = match ast::Expr::cast(expr_syntax) {
|
|
Some(e) => e,
|
|
None => unreachable!(),
|
|
};
|
|
|
|
// Note how expr is also a reference!
|
|
let expr: &ast::Expr = expr;
|
|
|
|
// This is possible because the underlying representation is the same:
|
|
assert_eq!(
|
|
expr as *const ast::Expr as *const u8,
|
|
expr_syntax as *const SyntaxNode as *const u8
|
|
);
|
|
|
|
// The two properties each syntax node has is a `SyntaxKind`:
|
|
assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);
|
|
|
|
// And text range:
|
|
assert_eq!(expr_syntax.range(), TextRange::from_to(32.into(), 37.into()));
|
|
|
|
// You can get node's text as a `SyntaxText` object, which will traverse the
|
|
// tree collecting token's text:
|
|
let text: SyntaxText<'_> = expr_syntax.text();
|
|
assert_eq!(text.to_string(), "1 + 1");
|
|
|
|
// There's a bunch of traversal methods on `SyntaxNode`:
|
|
assert_eq!(expr_syntax.parent(), Some(block.syntax()));
|
|
assert_eq!(block.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{']));
|
|
assert_eq!(
|
|
expr_syntax.next_sibling_or_token().map(|it| it.kind()),
|
|
Some(SyntaxKind::WHITESPACE)
|
|
);
|
|
|
|
// As well as some iterator helpers:
|
|
let f = expr_syntax.ancestors().find_map(ast::FnDef::cast);
|
|
assert_eq!(f, Some(&*func));
|
|
assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}']));
|
|
assert_eq!(
|
|
expr_syntax.descendants_with_tokens().count(),
|
|
8, // 5 tokens `1`, ` `, `+`, ` `, `!`
|
|
// 2 child literal expressions: `1`, `1`
|
|
// 1 the node itself: `1 + 1`
|
|
);
|
|
|
|
// There's also a `preorder` method with a more fine-grained iteration control:
|
|
let mut buf = String::new();
|
|
let mut indent = 0;
|
|
for event in expr_syntax.preorder_with_tokens() {
|
|
match event {
|
|
WalkEvent::Enter(node) => {
|
|
let text = match node {
|
|
SyntaxElement::Node(it) => it.text().to_string(),
|
|
SyntaxElement::Token(it) => it.text().to_string(),
|
|
};
|
|
buf += &format!("{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent);
|
|
indent += 2;
|
|
}
|
|
WalkEvent::Leave(_) => indent -= 2,
|
|
}
|
|
}
|
|
assert_eq!(indent, 0);
|
|
assert_eq!(
|
|
buf.trim(),
|
|
r#"
|
|
"1 + 1" BIN_EXPR
|
|
"1" LITERAL
|
|
"1" INT_NUMBER
|
|
" " WHITESPACE
|
|
"+" PLUS
|
|
" " WHITESPACE
|
|
"1" LITERAL
|
|
"1" INT_NUMBER
|
|
"#
|
|
.trim()
|
|
);
|
|
|
|
// To recursively process the tree, there are three approaches:
|
|
// 1. explicitly call getter methods on AST nodes.
|
|
// 2. use descendants and `AstNode::cast`.
|
|
// 3. use descendants and the visitor.
|
|
//
|
|
// Here's how the first one looks like:
|
|
let exprs_cast: Vec<String> = file
|
|
.syntax()
|
|
.descendants()
|
|
.filter_map(ast::Expr::cast)
|
|
.map(|expr| expr.syntax().text().to_string())
|
|
.collect();
|
|
|
|
// An alternative is to use a visitor. The visitor does not do traversal
|
|
// automatically (so it's more akin to a generic lambda) and is constructed
|
|
// from closures. This seems more flexible than a single generated visitor
|
|
// trait.
|
|
use algo::visit::{visitor, Visitor};
|
|
let mut exprs_visit = Vec::new();
|
|
for node in file.syntax().descendants() {
|
|
if let Some(result) =
|
|
visitor().visit::<ast::Expr, _>(|expr| expr.syntax().text().to_string()).accept(node)
|
|
{
|
|
exprs_visit.push(result);
|
|
}
|
|
}
|
|
assert_eq!(exprs_cast, exprs_visit);
|
|
}
|