mirror of
https://github.com/rust-lang/rust.git
synced 2025-01-20 19:52:48 +00:00
add rustc_lexer
This commit is contained in:
parent
d690249bc8
commit
75761c0e47
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -1308,12 +1308,21 @@ dependencies = [
|
||||
"serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ra_rustc_lexer"
|
||||
version = "0.1.0-pre.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ra_syntax"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ra_parser 0.1.0",
|
||||
"ra_rustc_lexer 0.1.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ra_text_edit 0.1.0",
|
||||
"rowan 0.6.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"smol_str 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -2250,6 +2259,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum proptest 0.9.4 (registry+https://github.com/rust-lang/crates.io-index)" = "cf147e022eacf0c8a054ab864914a7602618adba841d800a9a9868a5237a529f"
|
||||
"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0"
|
||||
"checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
|
||||
"checksum ra_rustc_lexer 0.1.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e8d92772f822978a6c9c4657aa61af439e4e635180628b3354049b283b749f1e"
|
||||
"checksum ra_vfs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fb7cd4e302032c5ab514f1c01c89727cd96fd950dd36f9ebee9252df45d9fb1a"
|
||||
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
|
||||
"checksum rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c"
|
||||
|
@ -11,6 +11,7 @@ repository = "https://github.com/rust-analyzer/rust-analyzer"
|
||||
unicode-xid = "0.1.0"
|
||||
itertools = "0.8.0"
|
||||
rowan = "0.6.0-pre.1"
|
||||
ra_rustc_lexer = { version = "0.1.0-pre.1", features = [ "unicode-xid" ] }
|
||||
|
||||
# ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here
|
||||
# to reduce number of compilations
|
||||
|
@ -30,19 +30,119 @@ pub struct Token {
|
||||
|
||||
/// Break a string up into its component tokens
|
||||
pub fn tokenize(text: &str) -> Vec<Token> {
|
||||
if text.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
let mut text = text;
|
||||
let mut acc = Vec::new();
|
||||
if let Some(len) = ra_rustc_lexer::strip_shebang(text) {
|
||||
acc.push(Token { kind: SHEBANG, len: TextUnit::from_usize(len) });
|
||||
text = &text[len..];
|
||||
}
|
||||
while !text.is_empty() {
|
||||
let token = next_token(text);
|
||||
let rustc_token = ra_rustc_lexer::first_token(text);
|
||||
macro_rules! decompose {
|
||||
($t1:expr, $t2:expr) => {{
|
||||
acc.push(Token { kind: $t1, len: 1.into() });
|
||||
acc.push(Token { kind: $t2, len: 1.into() });
|
||||
text = &text[2..];
|
||||
continue;
|
||||
}};
|
||||
($t1:expr, $t2:expr, $t3:expr) => {{
|
||||
acc.push(Token { kind: $t1, len: 1.into() });
|
||||
acc.push(Token { kind: $t2, len: 1.into() });
|
||||
acc.push(Token { kind: $t3, len: 1.into() });
|
||||
text = &text[3..];
|
||||
continue;
|
||||
}};
|
||||
}
|
||||
let kind = match rustc_token.kind {
|
||||
ra_rustc_lexer::TokenKind::LineComment => COMMENT,
|
||||
ra_rustc_lexer::TokenKind::BlockComment { .. } => COMMENT,
|
||||
ra_rustc_lexer::TokenKind::Whitespace => WHITESPACE,
|
||||
ra_rustc_lexer::TokenKind::Ident => {
|
||||
let token_text = &text[..rustc_token.len];
|
||||
if token_text == "_" {
|
||||
UNDERSCORE
|
||||
} else {
|
||||
SyntaxKind::from_keyword(&text[..rustc_token.len]).unwrap_or(IDENT)
|
||||
}
|
||||
}
|
||||
ra_rustc_lexer::TokenKind::RawIdent => IDENT,
|
||||
ra_rustc_lexer::TokenKind::Literal { kind, .. } => match kind {
|
||||
ra_rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER,
|
||||
ra_rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER,
|
||||
ra_rustc_lexer::LiteralKind::Char { .. } => CHAR,
|
||||
ra_rustc_lexer::LiteralKind::Byte { .. } => BYTE,
|
||||
ra_rustc_lexer::LiteralKind::Str { .. } => STRING,
|
||||
ra_rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING,
|
||||
ra_rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING,
|
||||
ra_rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING,
|
||||
},
|
||||
ra_rustc_lexer::TokenKind::Lifetime { .. } => LIFETIME,
|
||||
ra_rustc_lexer::TokenKind::Semi => SEMI,
|
||||
ra_rustc_lexer::TokenKind::Comma => COMMA,
|
||||
ra_rustc_lexer::TokenKind::DotDotDot => decompose!(DOT, DOT, DOT),
|
||||
ra_rustc_lexer::TokenKind::DotDotEq => decompose!(DOT, DOT, EQ),
|
||||
ra_rustc_lexer::TokenKind::DotDot => decompose!(DOT, DOT),
|
||||
ra_rustc_lexer::TokenKind::Dot => DOT,
|
||||
ra_rustc_lexer::TokenKind::OpenParen => L_PAREN,
|
||||
ra_rustc_lexer::TokenKind::CloseParen => R_PAREN,
|
||||
ra_rustc_lexer::TokenKind::OpenBrace => L_CURLY,
|
||||
ra_rustc_lexer::TokenKind::CloseBrace => R_CURLY,
|
||||
ra_rustc_lexer::TokenKind::OpenBracket => L_BRACK,
|
||||
ra_rustc_lexer::TokenKind::CloseBracket => R_BRACK,
|
||||
ra_rustc_lexer::TokenKind::At => AT,
|
||||
ra_rustc_lexer::TokenKind::Pound => POUND,
|
||||
ra_rustc_lexer::TokenKind::Tilde => TILDE,
|
||||
ra_rustc_lexer::TokenKind::Question => QUESTION,
|
||||
ra_rustc_lexer::TokenKind::ColonColon => decompose!(COLON, COLON),
|
||||
ra_rustc_lexer::TokenKind::Colon => COLON,
|
||||
ra_rustc_lexer::TokenKind::Dollar => DOLLAR,
|
||||
ra_rustc_lexer::TokenKind::EqEq => decompose!(EQ, EQ),
|
||||
ra_rustc_lexer::TokenKind::Eq => EQ,
|
||||
ra_rustc_lexer::TokenKind::FatArrow => decompose!(EQ, R_ANGLE),
|
||||
ra_rustc_lexer::TokenKind::Ne => decompose!(EXCL, EQ),
|
||||
ra_rustc_lexer::TokenKind::Not => EXCL,
|
||||
ra_rustc_lexer::TokenKind::Le => decompose!(L_ANGLE, EQ),
|
||||
ra_rustc_lexer::TokenKind::LArrow => decompose!(COLON, MINUS),
|
||||
ra_rustc_lexer::TokenKind::Lt => L_ANGLE,
|
||||
ra_rustc_lexer::TokenKind::ShlEq => decompose!(L_ANGLE, L_ANGLE, EQ),
|
||||
ra_rustc_lexer::TokenKind::Shl => decompose!(L_ANGLE, L_ANGLE),
|
||||
ra_rustc_lexer::TokenKind::Ge => decompose!(R_ANGLE, EQ),
|
||||
ra_rustc_lexer::TokenKind::Gt => R_ANGLE,
|
||||
ra_rustc_lexer::TokenKind::ShrEq => decompose!(R_ANGLE, R_ANGLE, EQ),
|
||||
ra_rustc_lexer::TokenKind::Shr => decompose!(R_ANGLE, R_ANGLE),
|
||||
ra_rustc_lexer::TokenKind::RArrow => decompose!(MINUS, R_ANGLE),
|
||||
ra_rustc_lexer::TokenKind::Minus => MINUS,
|
||||
ra_rustc_lexer::TokenKind::MinusEq => decompose!(MINUS, EQ),
|
||||
ra_rustc_lexer::TokenKind::And => AMP,
|
||||
ra_rustc_lexer::TokenKind::AndAnd => decompose!(AMP, AMP),
|
||||
ra_rustc_lexer::TokenKind::AndEq => decompose!(AMP, EQ),
|
||||
ra_rustc_lexer::TokenKind::Or => PIPE,
|
||||
ra_rustc_lexer::TokenKind::OrOr => decompose!(PIPE, PIPE),
|
||||
ra_rustc_lexer::TokenKind::OrEq => decompose!(PIPE, EQ),
|
||||
ra_rustc_lexer::TokenKind::PlusEq => decompose!(PLUS, EQ),
|
||||
ra_rustc_lexer::TokenKind::Plus => PLUS,
|
||||
ra_rustc_lexer::TokenKind::StarEq => decompose!(STAR, EQ),
|
||||
ra_rustc_lexer::TokenKind::Star => STAR,
|
||||
ra_rustc_lexer::TokenKind::SlashEq => decompose!(SLASH, EQ),
|
||||
ra_rustc_lexer::TokenKind::Slash => SLASH,
|
||||
ra_rustc_lexer::TokenKind::CaretEq => decompose!(CARET, EQ),
|
||||
ra_rustc_lexer::TokenKind::Caret => CARET,
|
||||
ra_rustc_lexer::TokenKind::PercentEq => decompose!(PERCENT, EQ),
|
||||
ra_rustc_lexer::TokenKind::Percent => PERCENT,
|
||||
ra_rustc_lexer::TokenKind::Unknown => ERROR,
|
||||
};
|
||||
let token = Token { kind, len: TextUnit::from_usize(rustc_token.len) };
|
||||
acc.push(token);
|
||||
let len: u32 = token.len.into();
|
||||
text = &text[len as usize..];
|
||||
text = &text[rustc_token.len..];
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
/// Get the next token from a string
|
||||
pub fn next_token(text: &str) -> Token {
|
||||
fn next_token(text: &str) -> Token {
|
||||
assert!(!text.is_empty());
|
||||
let mut ptr = Ptr::new(text);
|
||||
let c = ptr.bump().unwrap();
|
||||
|
@ -12,9 +12,9 @@ INT_NUMBER 2 "0_"
|
||||
WHITESPACE 1 " "
|
||||
FLOAT_NUMBER 2 "0."
|
||||
WHITESPACE 1 " "
|
||||
INT_NUMBER 2 "0e"
|
||||
FLOAT_NUMBER 2 "0e"
|
||||
WHITESPACE 1 " "
|
||||
INT_NUMBER 2 "0E"
|
||||
FLOAT_NUMBER 2 "0E"
|
||||
WHITESPACE 1 " "
|
||||
INT_NUMBER 2 "0z"
|
||||
WHITESPACE 1 "\n"
|
||||
@ -32,9 +32,9 @@ INT_NUMBER 6 "0_1279"
|
||||
WHITESPACE 1 " "
|
||||
FLOAT_NUMBER 6 "0.1279"
|
||||
WHITESPACE 1 " "
|
||||
INT_NUMBER 6 "0e1279"
|
||||
FLOAT_NUMBER 6 "0e1279"
|
||||
WHITESPACE 1 " "
|
||||
INT_NUMBER 6 "0E1279"
|
||||
FLOAT_NUMBER 6 "0E1279"
|
||||
WHITESPACE 1 "\n"
|
||||
INT_NUMBER 1 "0"
|
||||
DOT 1 "."
|
||||
@ -47,9 +47,7 @@ IDENT 3 "foo"
|
||||
L_PAREN 1 "("
|
||||
R_PAREN 1 ")"
|
||||
WHITESPACE 1 "\n"
|
||||
INT_NUMBER 2 "0e"
|
||||
PLUS 1 "+"
|
||||
INT_NUMBER 1 "1"
|
||||
FLOAT_NUMBER 4 "0e+1"
|
||||
WHITESPACE 1 "\n"
|
||||
INT_NUMBER 1 "0"
|
||||
DOT 1 "."
|
||||
|
@ -1 +1 @@
|
||||
CHAR 2 "\'1"
|
||||
LIFETIME 2 "\'1"
|
||||
|
@ -1,7 +1,39 @@
|
||||
SOURCE_FILE@[0; 42)
|
||||
SHEBANG@[0; 20) "#!/use/bin/env rusti"
|
||||
WHITESPACE@[20; 21) "\n"
|
||||
ERROR@[21; 41)
|
||||
SHEBANG@[21; 41) "#!/use/bin/env rusti"
|
||||
ATTR@[21; 23)
|
||||
POUND@[21; 22) "#"
|
||||
EXCL@[22; 23) "!"
|
||||
ERROR@[23; 24)
|
||||
SLASH@[23; 24) "/"
|
||||
USE_ITEM@[24; 28)
|
||||
USE_KW@[24; 27) "use"
|
||||
ERROR@[27; 28)
|
||||
SLASH@[27; 28) "/"
|
||||
MACRO_CALL@[28; 31)
|
||||
PATH@[28; 31)
|
||||
PATH_SEGMENT@[28; 31)
|
||||
NAME_REF@[28; 31)
|
||||
IDENT@[28; 31) "bin"
|
||||
ERROR@[31; 32)
|
||||
SLASH@[31; 32) "/"
|
||||
MACRO_CALL@[32; 41)
|
||||
PATH@[32; 35)
|
||||
PATH_SEGMENT@[32; 35)
|
||||
NAME_REF@[32; 35)
|
||||
IDENT@[32; 35) "env"
|
||||
WHITESPACE@[35; 36) " "
|
||||
NAME@[36; 41)
|
||||
IDENT@[36; 41) "rusti"
|
||||
WHITESPACE@[41; 42) "\n"
|
||||
error 21: expected an item
|
||||
error 23: expected `[`
|
||||
error 23: expected an item
|
||||
error 27: expected one of `*`, `::`, `{`, `self`, `super` or an indentifier
|
||||
error 28: expected SEMI
|
||||
error 31: expected EXCL
|
||||
error 31: expected `{`, `[`, `(`
|
||||
error 31: expected SEMI
|
||||
error 31: expected an item
|
||||
error 35: expected EXCL
|
||||
error 41: expected `{`, `[`, `(`
|
||||
error 41: expected SEMI
|
||||
|
@ -11,7 +11,7 @@ SOURCE_FILE@[0; 112)
|
||||
BLOCK@[10; 111)
|
||||
L_CURLY@[10; 11) "{"
|
||||
WHITESPACE@[11; 16) "\n "
|
||||
LET_STMT@[16; 27)
|
||||
LET_STMT@[16; 31)
|
||||
LET_KW@[16; 19) "let"
|
||||
WHITESPACE@[19; 20) " "
|
||||
PLACEHOLDER_PAT@[20; 21)
|
||||
@ -19,14 +19,8 @@ SOURCE_FILE@[0; 112)
|
||||
WHITESPACE@[21; 22) " "
|
||||
EQ@[22; 23) "="
|
||||
WHITESPACE@[23; 24) " "
|
||||
LITERAL@[24; 27)
|
||||
CHAR@[24; 27) "\'c\'"
|
||||
EXPR_STMT@[27; 31)
|
||||
PATH_EXPR@[27; 30)
|
||||
PATH@[27; 30)
|
||||
PATH_SEGMENT@[27; 30)
|
||||
NAME_REF@[27; 30)
|
||||
IDENT@[27; 30) "u32"
|
||||
LITERAL@[24; 30)
|
||||
CHAR@[24; 30) "\'c\'u32"
|
||||
SEMI@[30; 31) ";"
|
||||
WHITESPACE@[31; 36) "\n "
|
||||
LET_STMT@[36; 60)
|
||||
@ -67,4 +61,3 @@ SOURCE_FILE@[0; 112)
|
||||
WHITESPACE@[109; 110) "\n"
|
||||
R_CURLY@[110; 111) "}"
|
||||
WHITESPACE@[111; 112) "\n"
|
||||
error 27: expected SEMI
|
Loading…
Reference in New Issue
Block a user