mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-22 23:04:33 +00:00
Make rustc_lexer::cursor::Cursor
public.
`Cursor` is currently hidden, and the main tokenization path uses `rustc_lexer::first_token` which involves constructing a new `Cursor` for every single token, which is weird. Also, `first_token` also can't handle empty input, so callers have to check for that first. This commit makes `Cursor` public, so `StringReader` can contain a `Cursor`, which results in a simpler structure. The commit also changes `StringReader::advance_token` so it returns an `Option<Token>`, simplifying the the empty input case.
This commit is contained in:
parent
33516ac09a
commit
aa6bfaf04b
@ -4,7 +4,7 @@ use std::str::Chars;
|
|||||||
///
|
///
|
||||||
/// Next characters can be peeked via `first` method,
|
/// Next characters can be peeked via `first` method,
|
||||||
/// and position can be shifted forward via `bump` method.
|
/// and position can be shifted forward via `bump` method.
|
||||||
pub(crate) struct Cursor<'a> {
|
pub struct Cursor<'a> {
|
||||||
initial_len: usize,
|
initial_len: usize,
|
||||||
/// Iterator over chars. Slightly faster than a &str.
|
/// Iterator over chars. Slightly faster than a &str.
|
||||||
chars: Chars<'a>,
|
chars: Chars<'a>,
|
||||||
@ -15,7 +15,7 @@ pub(crate) struct Cursor<'a> {
|
|||||||
pub(crate) const EOF_CHAR: char = '\0';
|
pub(crate) const EOF_CHAR: char = '\0';
|
||||||
|
|
||||||
impl<'a> Cursor<'a> {
|
impl<'a> Cursor<'a> {
|
||||||
pub(crate) fn new(input: &'a str) -> Cursor<'a> {
|
pub fn new(input: &'a str) -> Cursor<'a> {
|
||||||
Cursor {
|
Cursor {
|
||||||
initial_len: input.len(),
|
initial_len: input.len(),
|
||||||
chars: input.chars(),
|
chars: input.chars(),
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
// We want to be able to build this crate with a stable compiler, so no
|
// We want to be able to build this crate with a stable compiler, so no
|
||||||
// `#![feature]` attributes should be added.
|
// `#![feature]` attributes should be added.
|
||||||
|
|
||||||
mod cursor;
|
pub mod cursor;
|
||||||
pub mod unescape;
|
pub mod unescape;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@ -219,13 +219,6 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses the first token from the provided input string.
|
|
||||||
#[inline]
|
|
||||||
pub fn first_token(input: &str) -> Token {
|
|
||||||
debug_assert!(!input.is_empty());
|
|
||||||
Cursor::new(input).advance_token()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Validates a raw string literal. Used for getting more information about a
|
/// Validates a raw string literal. Used for getting more information about a
|
||||||
/// problem with a `RawStr`/`RawByteStr` with a `None` field.
|
/// problem with a `RawStr`/`RawByteStr` with a `None` field.
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -242,14 +235,7 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
|
|||||||
/// Creates an iterator that produces tokens from the input string.
|
/// Creates an iterator that produces tokens from the input string.
|
||||||
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
|
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
|
||||||
let mut cursor = Cursor::new(input);
|
let mut cursor = Cursor::new(input);
|
||||||
std::iter::from_fn(move || {
|
std::iter::from_fn(move || cursor.advance_token())
|
||||||
if cursor.is_eof() {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
cursor.reset_len_consumed();
|
|
||||||
Some(cursor.advance_token())
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// True if `c` is considered a whitespace according to Rust language definition.
|
/// True if `c` is considered a whitespace according to Rust language definition.
|
||||||
@ -311,8 +297,8 @@ pub fn is_ident(string: &str) -> bool {
|
|||||||
|
|
||||||
impl Cursor<'_> {
|
impl Cursor<'_> {
|
||||||
/// Parses a token from the input string.
|
/// Parses a token from the input string.
|
||||||
fn advance_token(&mut self) -> Token {
|
pub fn advance_token(&mut self) -> Option<Token> {
|
||||||
let first_char = self.bump().unwrap();
|
let first_char = self.bump()?;
|
||||||
let token_kind = match first_char {
|
let token_kind = match first_char {
|
||||||
// Slash, comment or block comment.
|
// Slash, comment or block comment.
|
||||||
'/' => match self.first() {
|
'/' => match self.first() {
|
||||||
@ -433,7 +419,9 @@ impl Cursor<'_> {
|
|||||||
}
|
}
|
||||||
_ => Unknown,
|
_ => Unknown,
|
||||||
};
|
};
|
||||||
Token::new(token_kind, self.len_consumed())
|
let res = Some(Token::new(token_kind, self.len_consumed()));
|
||||||
|
self.reset_len_consumed();
|
||||||
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
fn line_comment(&mut self) -> TokenKind {
|
fn line_comment(&mut self) -> TokenKind {
|
||||||
|
@ -4,6 +4,7 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
|
|||||||
use rustc_ast::tokenstream::TokenStream;
|
use rustc_ast::tokenstream::TokenStream;
|
||||||
use rustc_ast::util::unicode::contains_text_flow_control_chars;
|
use rustc_ast::util::unicode::contains_text_flow_control_chars;
|
||||||
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult};
|
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, ErrorGuaranteed, PResult};
|
||||||
|
use rustc_lexer::cursor::Cursor;
|
||||||
use rustc_lexer::unescape::{self, Mode};
|
use rustc_lexer::unescape::{self, Mode};
|
||||||
use rustc_lexer::{Base, DocStyle, RawStrError};
|
use rustc_lexer::{Base, DocStyle, RawStrError};
|
||||||
use rustc_session::lint::builtin::{
|
use rustc_session::lint::builtin::{
|
||||||
@ -48,7 +49,9 @@ pub(crate) fn parse_token_trees<'a>(
|
|||||||
start_pos = start_pos + BytePos::from_usize(shebang_len);
|
start_pos = start_pos + BytePos::from_usize(shebang_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
let string_reader = StringReader { sess, start_pos, pos: start_pos, src, override_span };
|
let cursor = Cursor::new(src);
|
||||||
|
let string_reader =
|
||||||
|
StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span };
|
||||||
tokentrees::TokenTreesReader::parse_token_trees(string_reader)
|
tokentrees::TokenTreesReader::parse_token_trees(string_reader)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -60,6 +63,8 @@ struct StringReader<'a> {
|
|||||||
pos: BytePos,
|
pos: BytePos,
|
||||||
/// Source text to tokenize.
|
/// Source text to tokenize.
|
||||||
src: &'a str,
|
src: &'a str,
|
||||||
|
/// Cursor for getting lexer tokens.
|
||||||
|
cursor: Cursor<'a>,
|
||||||
override_span: Option<Span>,
|
override_span: Option<Span>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,15 +80,13 @@ impl<'a> StringReader<'a> {
|
|||||||
|
|
||||||
// Skip trivial (whitespace & comments) tokens
|
// Skip trivial (whitespace & comments) tokens
|
||||||
loop {
|
loop {
|
||||||
let start_src_index = self.src_index(self.pos);
|
let token = match self.cursor.advance_token() {
|
||||||
let text: &str = &self.src[start_src_index..];
|
Some(token) => token,
|
||||||
|
None => {
|
||||||
if text.is_empty() {
|
let span = self.mk_sp(self.pos, self.pos);
|
||||||
let span = self.mk_sp(self.pos, self.pos);
|
return (Token::new(token::Eof, span), preceded_by_whitespace);
|
||||||
return (Token::new(token::Eof, span), preceded_by_whitespace);
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
let token = rustc_lexer::first_token(text);
|
|
||||||
|
|
||||||
let start = self.pos;
|
let start = self.pos;
|
||||||
self.pos = self.pos + BytePos(token.len);
|
self.pos = self.pos + BytePos(token.len);
|
||||||
|
@ -13,6 +13,7 @@ use std::collections::VecDeque;
|
|||||||
use std::fmt::{Display, Write};
|
use std::fmt::{Display, Write};
|
||||||
|
|
||||||
use rustc_data_structures::fx::FxHashMap;
|
use rustc_data_structures::fx::FxHashMap;
|
||||||
|
use rustc_lexer::cursor::Cursor;
|
||||||
use rustc_lexer::{LiteralKind, TokenKind};
|
use rustc_lexer::{LiteralKind, TokenKind};
|
||||||
use rustc_span::edition::Edition;
|
use rustc_span::edition::Edition;
|
||||||
use rustc_span::symbol::Symbol;
|
use rustc_span::symbol::Symbol;
|
||||||
@ -408,15 +409,13 @@ enum Highlight<'a> {
|
|||||||
|
|
||||||
struct TokenIter<'a> {
|
struct TokenIter<'a> {
|
||||||
src: &'a str,
|
src: &'a str,
|
||||||
|
cursor: Cursor<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Iterator for TokenIter<'a> {
|
impl<'a> Iterator for TokenIter<'a> {
|
||||||
type Item = (TokenKind, &'a str);
|
type Item = (TokenKind, &'a str);
|
||||||
fn next(&mut self) -> Option<(TokenKind, &'a str)> {
|
fn next(&mut self) -> Option<(TokenKind, &'a str)> {
|
||||||
if self.src.is_empty() {
|
let token = self.cursor.advance_token()?;
|
||||||
return None;
|
|
||||||
}
|
|
||||||
let token = rustc_lexer::first_token(self.src);
|
|
||||||
let (text, rest) = self.src.split_at(token.len as usize);
|
let (text, rest) = self.src.split_at(token.len as usize);
|
||||||
self.src = rest;
|
self.src = rest;
|
||||||
Some((token.kind, text))
|
Some((token.kind, text))
|
||||||
@ -525,7 +524,7 @@ impl<'a> Classifier<'a> {
|
|||||||
/// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
|
/// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
|
||||||
/// file span which will be used later on by the `span_correspondance_map`.
|
/// file span which will be used later on by the `span_correspondance_map`.
|
||||||
fn new(src: &str, file_span: Span, decoration_info: Option<DecorationInfo>) -> Classifier<'_> {
|
fn new(src: &str, file_span: Span, decoration_info: Option<DecorationInfo>) -> Classifier<'_> {
|
||||||
let tokens = PeekIter::new(TokenIter { src });
|
let tokens = PeekIter::new(TokenIter { src, cursor: Cursor::new(src) });
|
||||||
let decorations = decoration_info.map(Decorations::new);
|
let decorations = decoration_info.map(Decorations::new);
|
||||||
Classifier {
|
Classifier {
|
||||||
tokens,
|
tokens,
|
||||||
|
Loading…
Reference in New Issue
Block a user