339: Fix assertion error for literals with suffixes r=matklad a=matklad



Co-authored-by: Aleksey Kladov <aleksey.kladov@gmail.com>
This commit is contained in:
bors[bot] 2018-12-27 12:12:14 +00:00
commit 55ab0c602e
14 changed files with 354 additions and 376 deletions

View File

@ -1,13 +1,7 @@
mod parser; mod parser;
mod byte;
mod byte_string;
mod char;
mod string; mod string;
pub use self::{ pub use self::{
byte::parse_byte_literal, parser::{StringComponent, StringComponentKind},
byte_string::parse_byte_string_literal, string::{parse_string_literal, parse_char_literal, parse_byte_literal, parse_byte_string_literal},
char::parse_char_literal,
parser::{CharComponent, CharComponentKind, StringComponent, StringComponentKind},
string::parse_string_literal,
}; };

View File

@ -1,51 +0,0 @@
use super::parser::Parser;
use super::CharComponent;
pub fn parse_byte_literal(src: &str) -> ByteComponentIterator {
ByteComponentIterator {
parser: Parser::new(src),
has_closing_quote: false,
}
}
pub struct ByteComponentIterator<'a> {
parser: Parser<'a>,
pub has_closing_quote: bool,
}
impl<'a> Iterator for ByteComponentIterator<'a> {
type Item = CharComponent;
fn next(&mut self) -> Option<CharComponent> {
if self.parser.pos == 0 {
assert!(
self.parser.advance() == 'b',
"Byte literal should start with a `b`"
);
assert!(
self.parser.advance() == '\'',
"Byte literal should start with a `b`, followed by a quote"
);
}
if let Some(component) = self.parser.parse_char_component() {
return Some(component);
}
// We get here when there are no char components left to parse
if self.parser.peek() == Some('\'') {
self.parser.advance();
self.has_closing_quote = true;
}
assert!(
self.parser.peek() == None,
"byte literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
self.parser.src,
self.parser.pos,
self.parser.src.len()
);
None
}
}

View File

@ -1,51 +0,0 @@
use super::parser::Parser;
use super::StringComponent;
pub fn parse_byte_string_literal(src: &str) -> ByteStringComponentIterator {
ByteStringComponentIterator {
parser: Parser::new(src),
has_closing_quote: false,
}
}
pub struct ByteStringComponentIterator<'a> {
parser: Parser<'a>,
pub has_closing_quote: bool,
}
impl<'a> Iterator for ByteStringComponentIterator<'a> {
type Item = StringComponent;
fn next(&mut self) -> Option<StringComponent> {
if self.parser.pos == 0 {
assert!(
self.parser.advance() == 'b',
"byte string literal should start with a `b`"
);
assert!(
self.parser.advance() == '"',
"byte string literal should start with a `b`, followed by double quotes"
);
}
if let Some(component) = self.parser.parse_string_component() {
return Some(component);
}
// We get here when there are no char components left to parse
if self.parser.peek() == Some('"') {
self.parser.advance();
self.has_closing_quote = true;
}
assert!(
self.parser.peek() == None,
"byte string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
self.parser.src,
self.parser.pos,
self.parser.src.len()
);
None
}
}

View File

@ -1,176 +0,0 @@
use super::parser::Parser;
use super::CharComponent;
pub fn parse_char_literal(src: &str) -> CharComponentIterator {
CharComponentIterator {
parser: Parser::new(src),
has_closing_quote: false,
}
}
pub struct CharComponentIterator<'a> {
parser: Parser<'a>,
pub has_closing_quote: bool,
}
impl<'a> Iterator for CharComponentIterator<'a> {
type Item = CharComponent;
fn next(&mut self) -> Option<CharComponent> {
if self.parser.pos == 0 {
assert!(
self.parser.advance() == '\'',
"char literal should start with a quote"
);
}
if let Some(component) = self.parser.parse_char_component() {
return Some(component);
}
// We get here when there are no char components left to parse
if self.parser.peek() == Some('\'') {
self.parser.advance();
self.has_closing_quote = true;
}
assert!(
self.parser.peek() == None,
"char literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
self.parser.src,
self.parser.pos,
self.parser.src.len()
);
None
}
}
#[cfg(test)]
mod tests {
use rowan::TextRange;
use crate::string_lexing::{
CharComponent,
CharComponentKind::*,
};
fn parse(src: &str) -> (bool, Vec<CharComponent>) {
let component_iterator = &mut super::parse_char_literal(src);
let components: Vec<_> = component_iterator.collect();
(component_iterator.has_closing_quote, components)
}
fn unclosed_char_component(src: &str) -> CharComponent {
let (has_closing_quote, components) = parse(src);
assert!(!has_closing_quote, "char should not have closing quote");
assert!(components.len() == 1);
components[0].clone()
}
fn closed_char_component(src: &str) -> CharComponent {
let (has_closing_quote, components) = parse(src);
assert!(has_closing_quote, "char should have closing quote");
assert!(
components.len() == 1,
"Literal: {}\nComponents: {:#?}",
src,
components
);
components[0].clone()
}
fn closed_char_components(src: &str) -> Vec<CharComponent> {
let (has_closing_quote, components) = parse(src);
assert!(has_closing_quote, "char should have closing quote");
components
}
fn range_closed(src: &str) -> TextRange {
TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
}
fn range_unclosed(src: &str) -> TextRange {
TextRange::from_to(1.into(), (src.len() as u32).into())
}
#[test]
fn test_unicode_escapes() {
let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
for escape in unicode_escapes {
let escape_sequence = format!(r"'\u{}'", escape);
let component = closed_char_component(&escape_sequence);
let expected_range = range_closed(&escape_sequence);
assert_eq!(component.kind, UnicodeEscape);
assert_eq!(component.range, expected_range);
}
}
#[test]
fn test_unicode_escapes_unclosed() {
let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
for escape in unicode_escapes {
let escape_sequence = format!(r"'\u{}'", escape);
let component = unclosed_char_component(&escape_sequence);
let expected_range = range_unclosed(&escape_sequence);
assert_eq!(component.kind, UnicodeEscape);
assert_eq!(component.range, expected_range);
}
}
#[test]
fn test_empty_char() {
let (has_closing_quote, components) = parse("''");
assert!(has_closing_quote, "char should have closing quote");
assert!(components.len() == 0);
}
#[test]
fn test_unclosed_char() {
let component = unclosed_char_component("'a");
assert!(component.kind == CodePoint);
assert!(component.range == TextRange::from_to(1.into(), 2.into()));
}
#[test]
fn test_digit_escapes() {
let literals = &[r"", r"5", r"55"];
for literal in literals {
let lit_text = format!(r"'\x{}'", literal);
let component = closed_char_component(&lit_text);
assert!(component.kind == AsciiCodeEscape);
assert!(component.range == range_closed(&lit_text));
}
// More than 2 digits starts a new codepoint
let components = closed_char_components(r"'\x555'");
assert!(components.len() == 2);
assert!(components[1].kind == CodePoint);
}
#[test]
fn test_ascii_escapes() {
let literals = &[
r"\'", "\\\"", // equivalent to \"
r"\n", r"\r", r"\t", r"\\", r"\0",
];
for literal in literals {
let lit_text = format!("'{}'", literal);
let component = closed_char_component(&lit_text);
assert!(component.kind == AsciiEscape);
assert!(component.range == range_closed(&lit_text));
}
}
#[test]
fn test_no_escapes() {
let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
for &literal in literals {
let lit_text = format!("'{}'", literal);
let component = closed_char_component(&lit_text);
assert!(component.kind == CodePoint);
assert!(component.range == range_closed(&lit_text));
}
}
}

View File

@ -1,15 +1,16 @@
use rowan::{TextRange, TextUnit}; use rowan::{TextRange, TextUnit};
use self::CharComponentKind::*; use self::StringComponentKind::*;
pub struct Parser<'a> { pub struct Parser<'a> {
pub(super) quote: u8,
pub(super) src: &'a str, pub(super) src: &'a str,
pub(super) pos: usize, pub(super) pos: usize,
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
pub fn new(src: &'a str) -> Parser<'a> { pub fn new(src: &'a str, quote: u8) -> Parser<'a> {
Parser { src, pos: 0 } Parser { quote, src, pos: 0 }
} }
// Utility methods // Utility methods
@ -42,7 +43,7 @@ impl<'a> Parser<'a> {
// Char parsing methods // Char parsing methods
fn parse_unicode_escape(&mut self, start: TextUnit) -> CharComponent { fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent {
match self.peek() { match self.peek() {
Some('{') => { Some('{') => {
self.advance(); self.advance();
@ -56,16 +57,16 @@ impl<'a> Parser<'a> {
} }
let end = self.get_pos(); let end = self.get_pos();
CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) StringComponent::new(TextRange::from_to(start, end), UnicodeEscape)
} }
Some(_) | None => { Some(_) | None => {
let end = self.get_pos(); let end = self.get_pos();
CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) StringComponent::new(TextRange::from_to(start, end), UnicodeEscape)
} }
} }
} }
fn parse_ascii_code_escape(&mut self, start: TextUnit) -> CharComponent { fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent {
let code_start = self.get_pos(); let code_start = self.get_pos();
while let Some(next) = self.peek() { while let Some(next) = self.peek() {
if next == '\'' || (self.get_pos() - code_start == 2.into()) { if next == '\'' || (self.get_pos() - code_start == 2.into()) {
@ -76,12 +77,12 @@ impl<'a> Parser<'a> {
} }
let end = self.get_pos(); let end = self.get_pos();
CharComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) StringComponent::new(TextRange::from_to(start, end), AsciiCodeEscape)
} }
fn parse_escape(&mut self, start: TextUnit) -> CharComponent { fn parse_escape(&mut self, start: TextUnit) -> StringComponent {
if self.peek().is_none() { if self.peek().is_none() {
return CharComponent::new(TextRange::from_to(start, start), AsciiEscape); return StringComponent::new(TextRange::from_to(start, start), AsciiEscape);
} }
let next = self.advance(); let next = self.advance();
@ -90,29 +91,7 @@ impl<'a> Parser<'a> {
match next { match next {
'x' => self.parse_ascii_code_escape(start), 'x' => self.parse_ascii_code_escape(start),
'u' => self.parse_unicode_escape(start), 'u' => self.parse_unicode_escape(start),
_ => CharComponent::new(range, AsciiEscape), _ => StringComponent::new(range, AsciiEscape),
}
}
pub fn parse_char_component(&mut self) -> Option<CharComponent> {
let next = self.peek()?;
// Ignore character close
if next == '\'' {
return None;
}
let start = self.get_pos();
self.advance();
if next == '\\' {
Some(self.parse_escape(start))
} else {
let end = self.get_pos();
Some(CharComponent::new(
TextRange::from_to(start, end),
CodePoint,
))
} }
} }
@ -131,11 +110,11 @@ impl<'a> Parser<'a> {
} }
} }
pub fn parse_string_component(&mut self) -> Option<StringComponent> { pub fn parse_component(&mut self) -> Option<StringComponent> {
let next = self.peek()?; let next = self.peek()?;
// Ignore string close // Ignore string close
if next == '"' { if next == self.quote as char {
return None; return None;
} }
@ -145,21 +124,31 @@ impl<'a> Parser<'a> {
if next == '\\' { if next == '\\' {
// Strings can use `\` to ignore newlines, so we first try to parse one of those // Strings can use `\` to ignore newlines, so we first try to parse one of those
// before falling back to parsing char escapes // before falling back to parsing char escapes
self.parse_ignore_newline(start).or_else(|| { if self.quote == b'"' {
let char_component = self.parse_escape(start); if let Some(component) = self.parse_ignore_newline(start) {
Some(StringComponent::new( return Some(component);
char_component.range, }
StringComponentKind::Char(char_component.kind), }
))
}) Some(self.parse_escape(start))
} else { } else {
let end = self.get_pos(); let end = self.get_pos();
Some(StringComponent::new( Some(StringComponent::new(
TextRange::from_to(start, end), TextRange::from_to(start, end),
StringComponentKind::Char(CodePoint), CodePoint,
)) ))
} }
} }
pub fn parse_suffix(&mut self) -> Option<TextRange> {
let start = self.get_pos();
let _ = self.peek()?;
while let Some(_) = self.peek() {
self.advance();
}
let end = self.get_pos();
Some(TextRange::from_to(start, end))
}
} }
#[derive(Debug, Eq, PartialEq, Clone)] #[derive(Debug, Eq, PartialEq, Clone)]
@ -177,23 +166,6 @@ impl StringComponent {
#[derive(Debug, Eq, PartialEq, Clone)] #[derive(Debug, Eq, PartialEq, Clone)]
pub enum StringComponentKind { pub enum StringComponentKind {
IgnoreNewline, IgnoreNewline,
Char(CharComponentKind),
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct CharComponent {
pub range: TextRange,
pub kind: CharComponentKind,
}
impl CharComponent {
fn new(range: TextRange, kind: CharComponentKind) -> CharComponent {
CharComponent { range, kind }
}
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum CharComponentKind {
CodePoint, CodePoint,
AsciiEscape, AsciiEscape,
AsciiCodeEscape, AsciiCodeEscape,

View File

@ -1,41 +1,92 @@
use super::parser::Parser; use crate::{
use super::StringComponent; TextRange,
string_lexing::{
parser::Parser,
StringComponent,
}};
pub fn parse_string_literal(src: &str) -> StringComponentIterator { pub fn parse_string_literal(src: &str) -> StringComponentIterator {
StringComponentIterator { StringComponentIterator {
parser: Parser::new(src), parser: Parser::new(src, b'"'),
has_closing_quote: false, has_closing_quote: false,
suffix: None,
prefix: None,
quote: b'"',
}
}
pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator {
StringComponentIterator {
parser: Parser::new(src, b'"'),
has_closing_quote: false,
suffix: None,
prefix: Some(b'b'),
quote: b'"',
}
}
pub fn parse_char_literal(src: &str) -> StringComponentIterator {
StringComponentIterator {
parser: Parser::new(src, b'\''),
has_closing_quote: false,
suffix: None,
prefix: None,
quote: b'\'',
}
}
pub fn parse_byte_literal(src: &str) -> StringComponentIterator {
StringComponentIterator {
parser: Parser::new(src, b'\''),
has_closing_quote: false,
suffix: None,
prefix: Some(b'b'),
quote: b'\'',
} }
} }
pub struct StringComponentIterator<'a> { pub struct StringComponentIterator<'a> {
parser: Parser<'a>, parser: Parser<'a>,
pub has_closing_quote: bool, pub has_closing_quote: bool,
pub suffix: Option<TextRange>,
prefix: Option<u8>,
quote: u8,
} }
impl<'a> Iterator for StringComponentIterator<'a> { impl<'a> Iterator for StringComponentIterator<'a> {
type Item = StringComponent; type Item = StringComponent;
fn next(&mut self) -> Option<StringComponent> { fn next(&mut self) -> Option<StringComponent> {
if self.parser.pos == 0 { if self.parser.pos == 0 {
if let Some(prefix) = self.prefix {
assert!(
self.parser.advance() == prefix as char,
"literal should start with a {:?}",
prefix as char,
);
}
assert!( assert!(
self.parser.advance() == '"', self.parser.advance() == self.quote as char,
"string literal should start with double quotes" "literal should start with a {:?}",
self.quote as char,
); );
} }
if let Some(component) = self.parser.parse_string_component() { if let Some(component) = self.parser.parse_component() {
return Some(component); return Some(component);
} }
// We get here when there are no char components left to parse // We get here when there are no char components left to parse
if self.parser.peek() == Some('"') { if self.parser.peek() == Some(self.quote as char) {
self.parser.advance(); self.parser.advance();
self.has_closing_quote = true; self.has_closing_quote = true;
if let Some(range) = self.parser.parse_suffix() {
self.suffix = Some(range);
}
} }
assert!( assert!(
self.parser.peek() == None, self.parser.peek() == None,
"string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
self.parser.src, self.parser.src,
self.parser.pos, self.parser.pos,
self.parser.src.len() self.parser.src.len()
@ -44,3 +95,133 @@ impl<'a> Iterator for StringComponentIterator<'a> {
None None
} }
} }
#[cfg(test)]
mod tests {
use rowan::TextRange;
use crate::string_lexing::{
StringComponent,
StringComponentKind::*,
};
fn parse(src: &str) -> (bool, Vec<StringComponent>) {
let component_iterator = &mut super::parse_char_literal(src);
let components: Vec<_> = component_iterator.collect();
(component_iterator.has_closing_quote, components)
}
fn unclosed_char_component(src: &str) -> StringComponent {
let (has_closing_quote, components) = parse(src);
assert!(!has_closing_quote, "char should not have closing quote");
assert!(components.len() == 1);
components[0].clone()
}
fn closed_char_component(src: &str) -> StringComponent {
let (has_closing_quote, components) = parse(src);
assert!(has_closing_quote, "char should have closing quote");
assert!(
components.len() == 1,
"Literal: {}\nComponents: {:#?}",
src,
components
);
components[0].clone()
}
fn closed_char_components(src: &str) -> Vec<StringComponent> {
let (has_closing_quote, components) = parse(src);
assert!(has_closing_quote, "char should have closing quote");
components
}
fn range_closed(src: &str) -> TextRange {
TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
}
fn range_unclosed(src: &str) -> TextRange {
TextRange::from_to(1.into(), (src.len() as u32).into())
}
#[test]
fn test_unicode_escapes() {
let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
for escape in unicode_escapes {
let escape_sequence = format!(r"'\u{}'", escape);
let component = closed_char_component(&escape_sequence);
let expected_range = range_closed(&escape_sequence);
assert_eq!(component.kind, UnicodeEscape);
assert_eq!(component.range, expected_range);
}
}
#[test]
fn test_unicode_escapes_unclosed() {
let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
for escape in unicode_escapes {
let escape_sequence = format!(r"'\u{}'", escape);
let component = unclosed_char_component(&escape_sequence);
let expected_range = range_unclosed(&escape_sequence);
assert_eq!(component.kind, UnicodeEscape);
assert_eq!(component.range, expected_range);
}
}
#[test]
fn test_empty_char() {
let (has_closing_quote, components) = parse("''");
assert!(has_closing_quote, "char should have closing quote");
assert!(components.len() == 0);
}
#[test]
fn test_unclosed_char() {
let component = unclosed_char_component("'a");
assert!(component.kind == CodePoint);
assert!(component.range == TextRange::from_to(1.into(), 2.into()));
}
#[test]
fn test_digit_escapes() {
let literals = &[r"", r"5", r"55"];
for literal in literals {
let lit_text = format!(r"'\x{}'", literal);
let component = closed_char_component(&lit_text);
assert!(component.kind == AsciiCodeEscape);
assert!(component.range == range_closed(&lit_text));
}
// More than 2 digits starts a new codepoint
let components = closed_char_components(r"'\x555'");
assert!(components.len() == 2);
assert!(components[1].kind == CodePoint);
}
#[test]
fn test_ascii_escapes() {
let literals = &[
r"\'", "\\\"", // equivalent to \"
r"\n", r"\r", r"\t", r"\\", r"\0",
];
for literal in literals {
let lit_text = format!("'{}'", literal);
let component = closed_char_component(&lit_text);
assert!(component.kind == AsciiEscape);
assert!(component.range == range_closed(&lit_text));
}
}
#[test]
fn test_no_escapes() {
let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
for &literal in literals {
let lit_text = format!("'{}'", literal);
let component = closed_char_component(&lit_text);
assert!(component.kind == CodePoint);
assert!(component.range == range_closed(&lit_text));
}
}
}

View File

@ -1,10 +1,13 @@
use crate::{SourceFileNode, SyntaxKind, SyntaxNodeRef, WalkEvent}; use crate::{SourceFileNode, SyntaxKind, SyntaxNodeRef, WalkEvent, AstNode};
use std::fmt::Write; use std::fmt::Write;
use std::str; use std::str;
/// Parse a file and create a string representation of the resulting parse tree. /// Parse a file and create a string representation of the resulting parse tree.
pub fn dump_tree(syntax: SyntaxNodeRef) -> String { pub fn dump_tree(syntax: SyntaxNodeRef) -> String {
let mut errors: Vec<_> = syntax.root_data().to_vec(); let mut errors: Vec<_> = match syntax.ancestors().find_map(SourceFileNode::cast) {
Some(file) => file.owned().errors(),
None => syntax.root_data().to_vec(),
};
errors.sort_by_key(|e| e.offset()); errors.sort_by_key(|e| e.offset());
let mut err_pos = 0; let mut err_pos = 0;
let mut level = 0; let mut level = 0;

View File

@ -2,7 +2,7 @@
use crate::{ use crate::{
ast::{self, AstNode}, ast::{self, AstNode},
string_lexing::{self, CharComponentKind}, string_lexing::{self, StringComponentKind},
TextRange, TextRange,
validation::char, validation::char,
yellow::{ yellow::{
@ -27,6 +27,13 @@ pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec<SyntaxError>)
errors.push(SyntaxError::new(UnclosedByte, literal_range)); errors.push(SyntaxError::new(UnclosedByte, literal_range));
} }
if let Some(range) = components.suffix {
errors.push(SyntaxError::new(
InvalidSuffix,
range + literal_range.start(),
));
}
if len == 0 { if len == 0 {
errors.push(SyntaxError::new(EmptyByte, literal_range)); errors.push(SyntaxError::new(EmptyByte, literal_range));
} }
@ -38,11 +45,11 @@ pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec<SyntaxError>)
pub(super) fn validate_byte_component( pub(super) fn validate_byte_component(
text: &str, text: &str,
kind: CharComponentKind, kind: StringComponentKind,
range: TextRange, range: TextRange,
errors: &mut Vec<SyntaxError>, errors: &mut Vec<SyntaxError>,
) { ) {
use self::CharComponentKind::*; use self::StringComponentKind::*;
match kind { match kind {
AsciiEscape => validate_byte_escape(text, range, errors), AsciiEscape => validate_byte_escape(text, range, errors),
AsciiCodeEscape => validate_byte_code_escape(text, range, errors), AsciiCodeEscape => validate_byte_code_escape(text, range, errors),
@ -63,6 +70,7 @@ pub(super) fn validate_byte_component(
errors.push(SyntaxError::new(ByteOutOfRange, range)); errors.push(SyntaxError::new(ByteOutOfRange, range));
} }
} }
IgnoreNewline => { /* always valid */ }
} }
} }

View File

@ -17,21 +17,28 @@ pub(crate) fn validate_byte_string_node(node: ast::ByteString, errors: &mut Vec<
let range = component.range + literal_range.start(); let range = component.range + literal_range.start();
match component.kind { match component.kind {
StringComponentKind::Char(kind) => { StringComponentKind::IgnoreNewline => { /* always valid */ }
_ => {
// Chars must escape \t, \n and \r codepoints, but strings don't // Chars must escape \t, \n and \r codepoints, but strings don't
let text = &literal_text[component.range]; let text = &literal_text[component.range];
match text { match text {
"\t" | "\n" | "\r" => { /* always valid */ } "\t" | "\n" | "\r" => { /* always valid */ }
_ => byte::validate_byte_component(text, kind, range, errors), _ => byte::validate_byte_component(text, component.kind, range, errors),
} }
} }
StringComponentKind::IgnoreNewline => { /* always valid */ }
} }
} }
if !components.has_closing_quote { if !components.has_closing_quote {
errors.push(SyntaxError::new(UnclosedString, literal_range)); errors.push(SyntaxError::new(UnclosedString, literal_range));
} }
if let Some(range) = components.suffix {
errors.push(SyntaxError::new(
InvalidSuffix,
range + literal_range.start(),
));
}
} }
#[cfg(test)] #[cfg(test)]

View File

@ -6,7 +6,7 @@ use arrayvec::ArrayString;
use crate::{ use crate::{
ast::{self, AstNode}, ast::{self, AstNode},
string_lexing::{self, CharComponentKind}, string_lexing::{self, StringComponentKind},
TextRange, TextRange,
yellow::{ yellow::{
SyntaxError, SyntaxError,
@ -30,6 +30,13 @@ pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>)
errors.push(SyntaxError::new(UnclosedChar, literal_range)); errors.push(SyntaxError::new(UnclosedChar, literal_range));
} }
if let Some(range) = components.suffix {
errors.push(SyntaxError::new(
InvalidSuffix,
range + literal_range.start(),
));
}
if len == 0 { if len == 0 {
errors.push(SyntaxError::new(EmptyChar, literal_range)); errors.push(SyntaxError::new(EmptyChar, literal_range));
} }
@ -41,12 +48,12 @@ pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>)
pub(super) fn validate_char_component( pub(super) fn validate_char_component(
text: &str, text: &str,
kind: CharComponentKind, kind: StringComponentKind,
range: TextRange, range: TextRange,
errors: &mut Vec<SyntaxError>, errors: &mut Vec<SyntaxError>,
) { ) {
// Validate escapes // Validate escapes
use self::CharComponentKind::*; use self::StringComponentKind::*;
match kind { match kind {
AsciiEscape => validate_ascii_escape(text, range, errors), AsciiEscape => validate_ascii_escape(text, range, errors),
AsciiCodeEscape => validate_ascii_code_escape(text, range, errors), AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
@ -57,6 +64,7 @@ pub(super) fn validate_char_component(
errors.push(SyntaxError::new(UnescapedCodepoint, range)); errors.push(SyntaxError::new(UnescapedCodepoint, range));
} }
} }
StringComponentKind::IgnoreNewline => { /* always valid */ }
} }
} }

View File

@ -1,6 +1,6 @@
use crate::{ use crate::{
ast::{self, AstNode}, ast::{self, AstNode},
string_lexing::{self, StringComponentKind}, string_lexing,
yellow::{ yellow::{
SyntaxError, SyntaxError,
SyntaxErrorKind::*, SyntaxErrorKind::*,
@ -16,22 +16,24 @@ pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxErr
for component in &mut components { for component in &mut components {
let range = component.range + literal_range.start(); let range = component.range + literal_range.start();
match component.kind { // Chars must escape \t, \n and \r codepoints, but strings don't
StringComponentKind::Char(kind) => { let text = &literal_text[component.range];
// Chars must escape \t, \n and \r codepoints, but strings don't match text {
let text = &literal_text[component.range]; "\t" | "\n" | "\r" => { /* always valid */ }
match text { _ => char::validate_char_component(text, component.kind, range, errors),
"\t" | "\n" | "\r" => { /* always valid */ }
_ => char::validate_char_component(text, kind, range, errors),
}
}
StringComponentKind::IgnoreNewline => { /* always valid */ }
} }
} }
if !components.has_closing_quote { if !components.has_closing_quote {
errors.push(SyntaxError::new(UnclosedString, literal_range)); errors.push(SyntaxError::new(UnclosedString, literal_range));
} }
if let Some(range) = components.suffix {
errors.push(SyntaxError::new(
InvalidSuffix,
range + literal_range.start(),
));
}
} }
#[cfg(test)] #[cfg(test)]

View File

@ -93,6 +93,7 @@ pub enum SyntaxErrorKind {
OverlongUnicodeEscape, OverlongUnicodeEscape,
UnicodeEscapeOutOfRange, UnicodeEscapeOutOfRange,
UnclosedString, UnclosedString,
InvalidSuffix,
} }
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@ -134,6 +135,7 @@ impl fmt::Display for SyntaxErrorKind {
} }
UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"), UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"),
UnclosedString => write!(f, "Unclosed string literal"), UnclosedString => write!(f, "Unclosed string literal"),
InvalidSuffix => write!(f, "Invalid literal suffix"),
ParseError(msg) => write!(f, "{}", msg.0), ParseError(msg) => write!(f, "{}", msg.0),
} }
} }

View File

@ -0,0 +1,6 @@
fn main() {
let _ = 'c'u32;
let _ = "string"invalid;
let _ = b'b'_suff;
let _ = b"bs"invalid;
}

View File

@ -0,0 +1,73 @@
SOURCE_FILE@[0; 112)
FN_DEF@[0; 111)
FN_KW@[0; 2)
WHITESPACE@[2; 3)
NAME@[3; 7)
IDENT@[3; 7) "main"
PARAM_LIST@[7; 9)
L_PAREN@[7; 8)
R_PAREN@[8; 9)
WHITESPACE@[9; 10)
BLOCK@[10; 111)
L_CURLY@[10; 11)
WHITESPACE@[11; 16)
LET_STMT@[16; 27)
LET_KW@[16; 19)
WHITESPACE@[19; 20)
PLACEHOLDER_PAT@[20; 21)
UNDERSCORE@[20; 21)
WHITESPACE@[21; 22)
EQ@[22; 23)
WHITESPACE@[23; 24)
LITERAL@[24; 27)
CHAR@[24; 27)
err: `expected SEMI`
EXPR_STMT@[27; 31)
PATH_EXPR@[27; 30)
PATH@[27; 30)
PATH_SEGMENT@[27; 30)
NAME_REF@[27; 30)
IDENT@[27; 30) "u32"
SEMI@[30; 31)
WHITESPACE@[31; 36)
LET_STMT@[36; 60)
LET_KW@[36; 39)
WHITESPACE@[39; 40)
PLACEHOLDER_PAT@[40; 41)
UNDERSCORE@[40; 41)
WHITESPACE@[41; 42)
EQ@[42; 43)
WHITESPACE@[43; 44)
LITERAL@[44; 59)
STRING@[44; 59)
err: `Invalid literal suffix`
SEMI@[59; 60)
WHITESPACE@[60; 65)
LET_STMT@[65; 83)
LET_KW@[65; 68)
WHITESPACE@[68; 69)
PLACEHOLDER_PAT@[69; 70)
UNDERSCORE@[69; 70)
WHITESPACE@[70; 71)
EQ@[71; 72)
WHITESPACE@[72; 73)
LITERAL@[73; 82)
BYTE@[73; 82)
err: `Invalid literal suffix`
SEMI@[82; 83)
WHITESPACE@[83; 88)
LET_STMT@[88; 109)
LET_KW@[88; 91)
WHITESPACE@[91; 92)
PLACEHOLDER_PAT@[92; 93)
UNDERSCORE@[92; 93)
WHITESPACE@[93; 94)
EQ@[94; 95)
WHITESPACE@[95; 96)
LITERAL@[96; 108)
BYTE_STRING@[96; 108)
err: `Invalid literal suffix`
SEMI@[108; 109)
WHITESPACE@[109; 110)
R_CURLY@[110; 111)
WHITESPACE@[111; 112)