rust/crates/ra_syntax/src/lib.rs

//! Syntax Tree library used throughout the rust analyzer.
//!
//! Properties:
//!   - easy and fast incremental re-parsing
//!   - graceful handling of errors
//!   - full-fidelity representation (*any* text can be precisely represented as
//!     a syntax tree)
//!
//! For more information, see the [RFC]. Current implementation is inspired by
//! the [Swift] one.
//!
//! The most interesting modules here are `syntax_node` (which defines concrete
//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
//! CST). The actual parser live in a separate `ra_parser` crate, though the
//! lexer lives in this crate.
//!
//! See `api_walkthrough` test in this file for a quick API tour!
//!
//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>

mod syntax_node;
mod syntax_text;
mod syntax_error;
mod parsing;
mod validation;
mod ptr;

pub mod algo;
pub mod ast;
#[doc(hidden)]
pub mod fuzz;

use std::{fmt::Write, sync::Arc};

use ra_text_edit::AtomTextEdit;

use crate::syntax_node::GreenNode;

pub use crate::{
    ast::AstNode,
    parsing::{classify_literal, tokenize, Token},
    ptr::{AstPtr, SyntaxNodePtr},
    syntax_error::{Location, SyntaxError, SyntaxErrorKind},
    syntax_node::{
        Direction, InsertPosition, SyntaxElement, SyntaxNode, SyntaxToken, SyntaxTreeBuilder,
        TreeArc, WalkEvent,
    },
    syntax_text::SyntaxText,
};
pub use ra_parser::SyntaxKind;
pub use ra_parser::T;
pub use rowan::{SmolStr, TextRange, TextUnit};

/// `Parse` is the result of the parsing: a syntax tree and a collection of
/// errors.
///
/// Note that we always produce a syntax tree, even for completely invalid
/// files.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Parse {
    pub tree: TreeArc<SourceFile>,
    pub errors: Arc<Vec<SyntaxError>>,
}

impl Parse {
    pub fn ok(self) -> Result<TreeArc<SourceFile>, Arc<Vec<SyntaxError>>> {
        if self.errors.is_empty() {
            Ok(self.tree)
        } else {
            Err(self.errors)
        }
    }

    pub fn reparse(&self, edit: &AtomTextEdit) -> Parse {
        self.incremental_reparse(edit).unwrap_or_else(|| self.full_reparse(edit))
    }

    pub fn debug_dump(&self) -> String {
        let mut buf = self.tree.syntax().debug_dump();
        for err in self.errors.iter() {
            writeln!(buf, "error {:?}: {}", err.location(), err.kind()).unwrap();
        }
        buf
    }

    fn incremental_reparse(&self, edit: &AtomTextEdit) -> Option<Parse> {
        // FIXME: validation errors are not handled here
        parsing::incremental_reparse(self.tree.syntax(), edit, self.errors.to_vec()).map(
            |(green_node, errors, _reparsed_range)| Parse {
                tree: SourceFile::new(green_node),
                errors: Arc::new(errors),
            },
        )
    }

    fn full_reparse(&self, edit: &AtomTextEdit) -> Parse {
        let text = edit.apply(self.tree.syntax().text().to_string());
        SourceFile::parse(&text)
    }
}

/// `SourceFile` represents a parse tree for a single Rust file.
pub use crate::ast::SourceFile;

impl SourceFile {
    fn new(green: GreenNode) -> TreeArc<SourceFile> {
        let root = SyntaxNode::new(green);
        if cfg!(debug_assertions) {
            validation::validate_block_structure(&root);
        }
        assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
        TreeArc::cast(root)
    }

    pub fn parse(text: &str) -> Parse {
        let (green, mut errors) = parsing::parse_text(text);
        let tree = SourceFile::new(green);
        errors.extend(validation::validate(&tree));
        Parse { tree, errors: Arc::new(errors) }
    }
}

/// This test does not assert anything and instead just shows off the crate's
/// API.
#[test]
fn api_walkthrough() {
    use ast::{ModuleItemOwner, NameOwner};

    let source_code = "
        fn foo() {
            1 + 1
        }
    ";
    // `SourceFile` is the main entry point.
    //
    // The `parse` method returns a `Parse` -- a pair of syntax tree and a list
    // of errors. That is, syntax tree is constructed even in presence of errors.
    let parse = SourceFile::parse(source_code);
    assert!(parse.errors.is_empty());

    // Due to the way ownership is set up, owned syntax Nodes always live behind
    // a `TreeArc` smart pointer. `TreeArc` is roughly an `std::sync::Arc` which
    // points to the whole file instead of an individual node.
    let file: TreeArc<SourceFile> = parse.tree;

    // `SourceFile` is the root of the syntax tree. We can iterate file's items:
    let mut func = None;
    for item in file.items() {
        match item.kind() {
            ast::ModuleItemKind::FnDef(f) => func = Some(f),
            _ => unreachable!(),
        }
    }
    // The returned items are always references.
    let func: &ast::FnDef = func.unwrap();

    // All nodes implement `ToOwned` trait, with `Owned = TreeArc<Self>`.
    // `to_owned` is a cheap operation: atomic increment.
    let _owned_func: TreeArc<ast::FnDef> = func.to_owned();

    // Each AST node has a bunch of getters for children. All getters return
    // `Option`s though, to account for incomplete code. Some getters are common
    // for several kinds of node. In this case, a trait like `ast::NameOwner`
    // usually exists. By convention, all ast types should be used with `ast::`
    // qualifier.
    let name: Option<&ast::Name> = func.name();
    let name = name.unwrap();
    assert_eq!(name.text(), "foo");

    // Let's get the `1 + 1` expression!
    let block: &ast::Block = func.body().unwrap();
    let expr: &ast::Expr = block.expr().unwrap();

    // "Enum"-like nodes are represented using the "kind" pattern. It allows us
    // to match exhaustively against all flavors of nodes, while maintaining
    // internal representation flexibility. The drawback is that one can't write
    // nested matches as one pattern.
    let bin_expr: &ast::BinExpr = match expr.kind() {
        ast::ExprKind::BinExpr(e) => e,
        _ => unreachable!(),
    };

    // Besides the "typed" AST API, there's an untyped CST one as well.
    // To switch from AST to CST, call `.syntax()` method:
    let expr_syntax: &SyntaxNode = expr.syntax();

    // Note how `expr` and `bin_expr` are in fact the same node underneath:
    assert!(std::ptr::eq(expr_syntax, bin_expr.syntax()));

    // To go from CST to AST, `AstNode::cast` function is used:
    let expr = match ast::Expr::cast(expr_syntax) {
        Some(e) => e,
        None => unreachable!(),
    };

    // Note how expr is also a reference!
    let expr: &ast::Expr = expr;

    // This is possible because the underlying representation is the same:
    assert_eq!(
        expr as *const ast::Expr as *const u8,
        expr_syntax as *const SyntaxNode as *const u8
    );

    // The two properties each syntax node has is a `SyntaxKind`:
    assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);

    // And text range:
    assert_eq!(expr_syntax.range(), TextRange::from_to(32.into(), 37.into()));

    // You can get node's text as a `SyntaxText` object, which will traverse the
    // tree collecting token's text:
    let text: SyntaxText<'_> = expr_syntax.text();
    assert_eq!(text.to_string(), "1 + 1");

    // There's a bunch of traversal methods on `SyntaxNode`:
    assert_eq!(expr_syntax.parent(), Some(block.syntax()));
    assert_eq!(block.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{']));
    assert_eq!(
        expr_syntax.next_sibling_or_token().map(|it| it.kind()),
        Some(SyntaxKind::WHITESPACE)
    );

    // As well as some iterator helpers:
    let f = expr_syntax.ancestors().find_map(ast::FnDef::cast);
    assert_eq!(f, Some(&*func));
    assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}']));
    assert_eq!(
        expr_syntax.descendants_with_tokens().count(),
        8, // 5 tokens `1`, ` `, `+`, ` `, `!`
           // 2 child literal expressions: `1`, `1`
           // 1 the node itself: `1 + 1`
    );

    // There's also a `preorder` method with a more fine-grained iteration control:
    let mut buf = String::new();
    let mut indent = 0;
    for event in expr_syntax.preorder_with_tokens() {
        match event {
            WalkEvent::Enter(node) => {
                let text = match node {
                    SyntaxElement::Node(it) => it.text().to_string(),
                    SyntaxElement::Token(it) => it.text().to_string(),
                };
                buf += &format!("{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent);
                indent += 2;
            }
            WalkEvent::Leave(_) => indent -= 2,
        }
    }
    assert_eq!(indent, 0);
    assert_eq!(
        buf.trim(),
        r#"
"1 + 1" BIN_EXPR
  "1" LITERAL
    "1" INT_NUMBER
  " " WHITESPACE
  "+" PLUS
  " " WHITESPACE
  "1" LITERAL
    "1" INT_NUMBER
"#
        .trim()
    );

    // To recursively process the tree, there are three approaches:
    // 1. explicitly call getter methods on AST nodes.
    // 2. use descendants and `AstNode::cast`.
    // 3. use descendants and the visitor.
    //
    // Here's how the first one looks like:
    let exprs_cast: Vec<String> = file
        .syntax()
        .descendants()
        .filter_map(ast::Expr::cast)
        .map(|expr| expr.syntax().text().to_string())
        .collect();

    // An alternative is to use a visitor. The visitor does not do traversal
    // automatically (so it's more akin to a generic lambda) and is constructed
    // from closures. This seems more flexible than a single generated visitor
    // trait.
    use algo::visit::{visitor, Visitor};
    let mut exprs_visit = Vec::new();
    for node in file.syntax().descendants() {
        if let Some(result) =
            visitor().visit::<ast::Expr, _>(|expr| expr.syntax().text().to_string()).accept(node)
        {
            exprs_visit.push(result);
        }
    }
    assert_eq!(exprs_cast, exprs_visit);
}