share literal validation logic with compiler

2024-11-30 02:33:55 +00:00 · 2019-05-07 19:38:26 +03:00 · 2019-05-07 19:38:26 +03:00 · 313314e14b
commit 313314e14b
parent ef782adc29
10 changed files with 620 additions and 1201 deletions
--- a/crates/ra_syntax/src/lib.rs
+++ b/crates/ra_syntax/src/lib.rs
@ -23,7 +23,6 @@ mod syntax_node;
 mod syntax_text;
 mod syntax_error;
 mod parsing;
 mod string_lexing;
 mod validation;
 mod ptr;
--- a/crates/ra_syntax/src/string_lexing.rs
+++ b/crates/ra_syntax/src/string_lexing.rs
@ -1,333 +0,0 @@
 use crate::{TextRange, TextUnit};
 use self::StringComponentKind::*;
 #[derive(Debug, Eq, PartialEq, Clone)]
 pub(crate) struct StringComponent {
    pub(crate) range: TextRange,
    pub(crate) kind: StringComponentKind,
 }
 #[derive(Debug, Eq, PartialEq, Clone)]
 pub(crate) enum StringComponentKind {
    IgnoreNewline,
    CodePoint,
    AsciiEscape,
    AsciiCodeEscape,
    UnicodeEscape,
 }
 pub(crate) fn parse_quoted_literal(
    prefix: Option<char>,
    quote: char,
    src: &str,
 ) -> StringComponentIter {
    let prefix = prefix.map(|p| match p {
        'b' => b'b',
        _ => panic!("invalid prefix"),
    });
    let quote = match quote {
        '\'' => b'\'',
        '"' => b'"',
        _ => panic!("invalid quote"),
    };
    StringComponentIter { src, prefix, quote, pos: 0, has_closing_quote: false, suffix: None }
 }
 pub(crate) struct StringComponentIter<'a> {
    src: &'a str,
    prefix: Option<u8>,
    quote: u8,
    pos: usize,
    pub(crate) has_closing_quote: bool,
    pub(crate) suffix: Option<TextRange>,
 }
 impl<'a> Iterator for StringComponentIter<'a> {
    type Item = StringComponent;
    fn next(&mut self) -> Option<StringComponent> {
        if self.pos == 0 {
            if let Some(prefix) = self.prefix {
                assert!(
                    self.advance() == prefix as char,
                    "literal should start with a {:?}",
                    prefix as char,
                );
            }
            assert!(
                self.advance() == self.quote as char,
                "literal should start with a {:?}",
                self.quote as char,
            );
        }
        if let Some(component) = self.parse_component() {
            return Some(component);
        }
        // We get here when there are no char components left to parse
        if self.peek() == Some(self.quote as char) {
            self.advance();
            self.has_closing_quote = true;
            if let Some(range) = self.parse_suffix() {
                self.suffix = Some(range);
            }
        }
        assert!(
            self.peek() == None,
            "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
            self.src,
            self.pos,
            self.src.len()
        );
        None
    }
 }
 impl<'a> StringComponentIter<'a> {
    fn peek(&self) -> Option<char> {
        if self.pos == self.src.len() {
            return None;
        }
        self.src[self.pos..].chars().next()
    }
    fn advance(&mut self) -> char {
        let next = self.peek().expect("cannot advance if end of input is reached");
        self.pos += next.len_utf8();
        next
    }
    fn parse_component(&mut self) -> Option<StringComponent> {
        let next = self.peek()?;
        // Ignore string close
        if next == self.quote as char {
            return None;
        }
        let start = self.start_range();
        self.advance();
        if next == '\\' {
            // Strings can use `\` to ignore newlines, so we first try to parse one of those
            // before falling back to parsing char escapes
            if self.quote == b'"' {
                if let Some(component) = self.parse_ignore_newline(start) {
                    return Some(component);
                }
            }
            Some(self.parse_escape(start))
        } else {
            Some(self.finish_component(start, CodePoint))
        }
    }
    fn parse_ignore_newline(&mut self, start: TextUnit) -> Option<StringComponent> {
        // In string literals, when a `\` occurs immediately before the newline, the `\`,
        // the newline, and all whitespace at the beginning of the next line are ignored
        match self.peek() {
            Some('\n') | Some('\r') => {
                self.skip_whitespace();
                Some(self.finish_component(start, IgnoreNewline))
            }
            _ => None,
        }
    }
    fn skip_whitespace(&mut self) {
        while self.peek().map(|c| c.is_whitespace()) == Some(true) {
            self.advance();
        }
    }
    fn parse_escape(&mut self, start: TextUnit) -> StringComponent {
        if self.peek().is_none() {
            return self.finish_component(start, AsciiEscape);
        }
        let next = self.advance();
        match next {
            'x' => self.parse_ascii_code_escape(start),
            'u' => self.parse_unicode_escape(start),
            _ => self.finish_component(start, AsciiEscape),
        }
    }
    fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent {
        match self.peek() {
            Some('{') => {
                self.advance();
                // Parse anything until we reach `}`
                while let Some(next) = self.peek() {
                    self.advance();
                    if next == '}' {
                        break;
                    }
                }
                self.finish_component(start, UnicodeEscape)
            }
            Some(_) | None => self.finish_component(start, UnicodeEscape),
        }
    }
    fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent {
        let code_start = self.pos;
        while let Some(next) = self.peek() {
            if next == '\'' || (self.pos - code_start == 2) {
                break;
            }
            self.advance();
        }
        self.finish_component(start, AsciiCodeEscape)
    }
    fn parse_suffix(&mut self) -> Option<TextRange> {
        let start = self.start_range();
        let _ = self.peek()?;
        while let Some(_) = self.peek() {
            self.advance();
        }
        Some(self.finish_range(start))
    }
    fn start_range(&self) -> TextUnit {
        TextUnit::from_usize(self.pos)
    }
    fn finish_range(&self, start: TextUnit) -> TextRange {
        TextRange::from_to(start, TextUnit::from_usize(self.pos))
    }
    fn finish_component(&self, start: TextUnit, kind: StringComponentKind) -> StringComponent {
        let range = self.finish_range(start);
        StringComponent { range, kind }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    fn parse(src: &str) -> (bool, Vec<StringComponent>) {
        let component_iterator = &mut parse_quoted_literal(None, '\'', src);
        let components: Vec<_> = component_iterator.collect();
        (component_iterator.has_closing_quote, components)
    }
    fn unclosed_char_component(src: &str) -> StringComponent {
        let (has_closing_quote, components) = parse(src);
        assert!(!has_closing_quote, "char should not have closing quote");
        assert!(components.len() == 1);
        components[0].clone()
    }
    fn closed_char_component(src: &str) -> StringComponent {
        let (has_closing_quote, components) = parse(src);
        assert!(has_closing_quote, "char should have closing quote");
        assert!(components.len() == 1, "Literal: {}\nComponents: {:#?}", src, components);
        components[0].clone()
    }
    fn closed_char_components(src: &str) -> Vec<StringComponent> {
        let (has_closing_quote, components) = parse(src);
        assert!(has_closing_quote, "char should have closing quote");
        components
    }
    fn range_closed(src: &str) -> TextRange {
        TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
    }
    fn range_unclosed(src: &str) -> TextRange {
        TextRange::from_to(1.into(), (src.len() as u32).into())
    }
    #[test]
    fn test_unicode_escapes() {
        let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
        for escape in unicode_escapes {
            let escape_sequence = format!(r"'\u{}'", escape);
            let component = closed_char_component(&escape_sequence);
            let expected_range = range_closed(&escape_sequence);
            assert_eq!(component.kind, UnicodeEscape);
            assert_eq!(component.range, expected_range);
        }
    }
    #[test]
    fn test_unicode_escapes_unclosed() {
        let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
        for escape in unicode_escapes {
            let escape_sequence = format!(r"'\u{}'", escape);
            let component = unclosed_char_component(&escape_sequence);
            let expected_range = range_unclosed(&escape_sequence);
            assert_eq!(component.kind, UnicodeEscape);
            assert_eq!(component.range, expected_range);
        }
    }
    #[test]
    fn test_empty_char() {
        let (has_closing_quote, components) = parse("''");
        assert!(has_closing_quote, "char should have closing quote");
        assert!(components.len() == 0);
    }
    #[test]
    fn test_unclosed_char() {
        let component = unclosed_char_component("'a");
        assert!(component.kind == CodePoint);
        assert!(component.range == TextRange::from_to(1.into(), 2.into()));
    }
    #[test]
    fn test_digit_escapes() {
        let literals = &[r"", r"5", r"55"];
        for literal in literals {
            let lit_text = format!(r"'\x{}'", literal);
            let component = closed_char_component(&lit_text);
            assert!(component.kind == AsciiCodeEscape);
            assert!(component.range == range_closed(&lit_text));
        }
        // More than 2 digits starts a new codepoint
        let components = closed_char_components(r"'\x555'");
        assert!(components.len() == 2);
        assert!(components[1].kind == CodePoint);
    }
    #[test]
    fn test_ascii_escapes() {
        let literals = &[
            r"\'", "\\\"", // equivalent to \"
            r"\n", r"\r", r"\t", r"\\", r"\0",
        ];
        for literal in literals {
            let lit_text = format!("'{}'", literal);
            let component = closed_char_component(&lit_text);
            assert!(component.kind == AsciiEscape);
            assert!(component.range == range_closed(&lit_text));
        }
    }
    #[test]
    fn test_no_escapes() {
        let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
        for &literal in literals {
            let lit_text = format!("'{}'", literal);
            let component = closed_char_component(&lit_text);
            assert!(component.kind == CodePoint);
            assert!(component.range == range_closed(&lit_text));
        }
    }
 }
--- a/crates/ra_syntax/src/syntax_error.rs
+++ b/crates/ra_syntax/src/syntax_error.rs
@ -2,7 +2,10 @@ use std::fmt;
 use ra_parser::ParseError;
-use crate::{TextRange, TextUnit};
+use crate::{
    TextRange, TextUnit,
    validation::EscapeError,
 };
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct SyntaxError {
@ -67,32 +70,7 @@ impl fmt::Display for SyntaxError {
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum SyntaxErrorKind {
    ParseError(ParseError),
-    UnescapedCodepoint,
+    EscapeError(EscapeError),
    EmptyChar,
    UnclosedChar,
    OverlongChar,
    EmptyByte,
    UnclosedByte,
    OverlongByte,
    ByteOutOfRange,
    UnescapedByte,
    EmptyByteEscape,
    InvalidByteEscape,
    TooShortByteCodeEscape,
    MalformedByteCodeEscape,
    UnicodeEscapeForbidden,
    EmptyAsciiEscape,
    InvalidAsciiEscape,
    TooShortAsciiCodeEscape,
    AsciiCodeEscapeOutOfRange,
    MalformedAsciiCodeEscape,
    UnclosedUnicodeEscape,
    MalformedUnicodeEscape,
    EmptyUnicodeEcape,
    OverlongUnicodeEscape,
    UnicodeEscapeOutOfRange,
    UnclosedString,
    InvalidSuffix,
    InvalidBlockAttr,
    InvalidMatchInnerAttr,
    InvalidTupleIndexFormat,
@ -102,38 +80,6 @@ impl fmt::Display for SyntaxErrorKind {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use self::SyntaxErrorKind::*;
        match self {
            UnescapedCodepoint => write!(f, "This codepoint should always be escaped"),
            EmptyAsciiEscape => write!(f, "Empty escape sequence"),
            InvalidAsciiEscape => write!(f, "Invalid escape sequence"),
            EmptyChar => write!(f, "Empty char literal"),
            UnclosedChar => write!(f, "Unclosed char literal"),
            OverlongChar => write!(f, "Char literal should be one character long"),
            EmptyByte => write!(f, "Empty byte literal"),
            UnclosedByte => write!(f, "Unclosed byte literal"),
            OverlongByte => write!(f, "Byte literal should be one character long"),
            ByteOutOfRange => write!(f, "Byte should be a valid ASCII character"),
            UnescapedByte => write!(f, "This byte should always be escaped"),
            EmptyByteEscape => write!(f, "Empty escape sequence"),
            InvalidByteEscape => write!(f, "Invalid escape sequence"),
            TooShortByteCodeEscape => write!(f, "Escape sequence should have two digits"),
            MalformedByteCodeEscape => write!(f, "Escape sequence should be a hexadecimal number"),
            UnicodeEscapeForbidden => {
                write!(f, "Unicode escapes are not allowed in byte literals or byte strings")
            }
            TooShortAsciiCodeEscape => write!(f, "Escape sequence should have two digits"),
            AsciiCodeEscapeOutOfRange => {
                write!(f, "Escape sequence should be between \\x00 and \\x7F")
            }
            MalformedAsciiCodeEscape => write!(f, "Escape sequence should be a hexadecimal number"),
            UnclosedUnicodeEscape => write!(f, "Missing `}}`"),
            MalformedUnicodeEscape => write!(f, "Malformed unicode escape sequence"),
            EmptyUnicodeEcape => write!(f, "Empty unicode escape sequence"),
            OverlongUnicodeEscape => {
                write!(f, "Unicode escape sequence should have at most 6 digits")
            }
            UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"),
            UnclosedString => write!(f, "Unclosed string literal"),
            InvalidSuffix => write!(f, "Invalid literal suffix"),
            InvalidBlockAttr => {
                write!(f, "A block in this position cannot accept inner attributes")
            }
@ -144,6 +90,46 @@ impl fmt::Display for SyntaxErrorKind {
                write!(f, "Tuple (struct) field access is only allowed through decimal integers with no underscores or suffix")
            }
            ParseError(msg) => write!(f, "{}", msg.0),
            EscapeError(err) => write!(f, "{}", err),
        }
    }
 }
 impl fmt::Display for EscapeError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let msg = match self {
            EscapeError::ZeroChars => "Empty literal",
            EscapeError::MoreThanOneChar => "Literal should be one character long",
            EscapeError::LoneSlash => "Character must be escaped: '\\'",
            EscapeError::InvalidEscape => "Invalid escape sequence",
            EscapeError::BareCarriageReturn => "Character must be escaped: '\r'",
            EscapeError::EscapeOnlyChar => "Character must be escaped",
            EscapeError::TooShortHexEscape => "Escape sequence should have two digits",
            EscapeError::InvalidCharInHexEscape => "Escape sequence should be a hexadecimal number",
            EscapeError::OutOfRangeHexEscape => "Escape sequence should be ASCII",
            EscapeError::NoBraceInUnicodeEscape => "Invalid escape sequence",
            EscapeError::InvalidCharInUnicodeEscape => "Invalid escape sequence",
            EscapeError::EmptyUnicodeEscape => "Invalid escape sequence",
            EscapeError::UnclosedUnicodeEscape => "Missing '}'",
            EscapeError::LeadingUnderscoreUnicodeEscape => "Invalid escape sequence",
            EscapeError::OverlongUnicodeEscape => {
                "Unicode escape sequence should have at most 6 digits"
            }
            EscapeError::LoneSurrogateUnicodeEscape => {
                "Unicode escape code should not be a surrogate"
            }
            EscapeError::OutOfRangeUnicodeEscape => {
                "Unicode escape code should be at most 0x10FFFF"
            }
            EscapeError::UnicodeEscapeInByte => "Unicode escapes are not allowed in bytes",
            EscapeError::NonAsciiCharInByte => "Non ASCII characters are not allowed in bytes",
        };
        write!(f, "{}", msg)
    }
 }
 impl From<EscapeError> for SyntaxErrorKind {
    fn from(err: EscapeError) -> Self {
        SyntaxErrorKind::EscapeError(err)
    }
 }
--- a/crates/ra_syntax/src/validation.rs
+++ b/crates/ra_syntax/src/validation.rs
@ -1,17 +1,17 @@
-mod byte;
+mod unescape;
-mod byte_string;
+
 mod char;
 mod string;
 mod block;
 mod field_expr;
 use crate::{
-    SourceFile, SyntaxError, AstNode, SyntaxNode,
+    SourceFile, SyntaxError, AstNode, SyntaxNode, TextUnit,
    SyntaxKind::{L_CURLY, R_CURLY, BYTE, BYTE_STRING, STRING, CHAR},
    ast,
    algo::visit::{visitor_ctx, VisitorCtx},
 };
 pub(crate) use unescape::EscapeError;
 pub(crate) fn validate(file: &SourceFile) -> Vec<SyntaxError> {
    let mut errors = Vec::new();
    for node in file.syntax().descendants() {
@ -26,11 +26,55 @@ pub(crate) fn validate(file: &SourceFile) -> Vec<SyntaxError> {
 // FIXME: kill duplication
 fn validate_literal(literal: &ast::Literal, acc: &mut Vec<SyntaxError>) {
-    match literal.token().kind() {
+    let token = literal.token();
-        BYTE => byte::validate_byte_node(literal.token(), acc),
+    let text = token.text().as_str();
-        BYTE_STRING => byte_string::validate_byte_string_node(literal.token(), acc),
+    match token.kind() {
-        STRING => string::validate_string_node(literal.token(), acc),
+        BYTE => {
-        CHAR => char::validate_char_node(literal.token(), acc),
+            if let Some(end) = text.rfind('\'') {
                if let Some(without_quotes) = text.get(2..end) {
                    if let Err((off, err)) = unescape::unescape_byte(without_quotes) {
                        let off = token.range().start() + TextUnit::from_usize(off + 2);
                        acc.push(SyntaxError::new(err.into(), off))
                    }
                }
            }
        }
        CHAR => {
            if let Some(end) = text.rfind('\'') {
                if let Some(without_quotes) = text.get(1..end) {
                    if let Err((off, err)) = unescape::unescape_char(without_quotes) {
                        let off = token.range().start() + TextUnit::from_usize(off + 1);
                        acc.push(SyntaxError::new(err.into(), off))
                    }
                }
            }
        }
        BYTE_STRING => {
            if let Some(end) = text.rfind('\"') {
                if let Some(without_quotes) = text.get(2..end) {
                    unescape::unescape_byte_str(without_quotes, &mut |range, char| {
                        if let Err(err) = char {
                            let off = range.start;
                            let off = token.range().start() + TextUnit::from_usize(off + 2);
                            acc.push(SyntaxError::new(err.into(), off))
                        }
                    })
                }
            }
        }
        STRING => {
            if let Some(end) = text.rfind('\"') {
                if let Some(without_quotes) = text.get(1..end) {
                    unescape::unescape_str(without_quotes, &mut |range, char| {
                        if let Err(err) = char {
                            let off = range.start;
                            let off = token.range().start() + TextUnit::from_usize(off + 1);
                            acc.push(SyntaxError::new(err.into(), off))
                        }
                    })
                }
            }
        }
        _ => (),
    }
 }
--- a/crates/ra_syntax/src/validation/byte.rs
+++ b/crates/ra_syntax/src/validation/byte.rs
@ -1,199 +0,0 @@
 //! Validation of byte literals
 use crate::{
    string_lexing::{self, StringComponentKind},
    TextRange,
    validation::char,
    SyntaxError,
    SyntaxErrorKind::*,
    SyntaxToken,
 };
 pub(super) fn validate_byte_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) {
    let literal_text = node.text();
    let literal_range = node.range();
    let mut components = string_lexing::parse_quoted_literal(Some('b'), '\'', literal_text);
    let mut len = 0;
    for component in &mut components {
        len += 1;
        let text = &literal_text[component.range];
        let range = component.range + literal_range.start();
        validate_byte_component(text, component.kind, range, errors);
    }
    if !components.has_closing_quote {
        errors.push(SyntaxError::new(UnclosedByte, literal_range));
    }
    if let Some(range) = components.suffix {
        errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start()));
    }
    if len == 0 {
        errors.push(SyntaxError::new(EmptyByte, literal_range));
    }
    if len > 1 {
        errors.push(SyntaxError::new(OverlongByte, literal_range));
    }
 }
 pub(super) fn validate_byte_component(
    text: &str,
    kind: StringComponentKind,
    range: TextRange,
    errors: &mut Vec<SyntaxError>,
 ) {
    use self::StringComponentKind::*;
    match kind {
        AsciiEscape => validate_byte_escape(text, range, errors),
        AsciiCodeEscape => validate_byte_code_escape(text, range, errors),
        UnicodeEscape => errors.push(SyntaxError::new(UnicodeEscapeForbidden, range)),
        CodePoint => {
            let c = text.chars().next().expect("Code points should be one character long");
            // These bytes must always be escaped
            if c == '\t' || c == '\r' || c == '\n' {
                errors.push(SyntaxError::new(UnescapedByte, range));
            }
            // Only ASCII bytes are allowed
            if c > 0x7F as char {
                errors.push(SyntaxError::new(ByteOutOfRange, range));
            }
        }
        IgnoreNewline => { /* always valid */ }
    }
 }
 fn validate_byte_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
    if text.len() == 1 {
        // Escape sequence consists only of leading `\`
        errors.push(SyntaxError::new(EmptyByteEscape, range));
    } else {
        let escape_code = text.chars().skip(1).next().unwrap();
        if !char::is_ascii_escape(escape_code) {
            errors.push(SyntaxError::new(InvalidByteEscape, range));
        }
    }
 }
 fn validate_byte_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
    // A ByteCodeEscape has 4 chars, example: `\xDD`
    if !text.is_ascii() {
        errors.push(SyntaxError::new(MalformedByteCodeEscape, range));
    } else if text.chars().count() < 4 {
        errors.push(SyntaxError::new(TooShortByteCodeEscape, range));
    } else {
        assert!(text.chars().count() == 4, "ByteCodeEscape cannot be longer than 4 chars");
        if u8::from_str_radix(&text[2..], 16).is_err() {
            errors.push(SyntaxError::new(MalformedByteCodeEscape, range));
        }
    }
 }
 #[cfg(test)]
 mod test {
    use crate::{SourceFile, TreeArc};
    fn build_file(literal: &str) -> TreeArc<SourceFile> {
        let src = format!("const C: u8 = b'{}';", literal);
        SourceFile::parse(&src)
    }
    fn assert_valid_byte(literal: &str) {
        let file = build_file(literal);
        assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
    }
    fn assert_invalid_byte(literal: &str) {
        let file = build_file(literal);
        assert!(file.errors().len() > 0);
    }
    #[test]
    fn test_ansi_codepoints() {
        for byte in 0..128 {
            match byte {
                b'\n' | b'\r' | b'\t' => assert_invalid_byte(&(byte as char).to_string()),
                b'\'' | b'\\' => { /* Ignore character close and backslash */ }
                _ => assert_valid_byte(&(byte as char).to_string()),
            }
        }
        for byte in 128..=255u8 {
            assert_invalid_byte(&(byte as char).to_string());
        }
    }
    #[test]
    fn test_unicode_codepoints() {
        let invalid = ["Ƒ", "バ", "メ", "﷽"];
        for c in &invalid {
            assert_invalid_byte(c);
        }
    }
    #[test]
    fn test_unicode_multiple_codepoints() {
        let invalid = ["नी", "👨‍👨‍"];
        for c in &invalid {
            assert_invalid_byte(c);
        }
    }
    #[test]
    fn test_valid_byte_escape() {
        let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"];
        for c in &valid {
            assert_valid_byte(c);
        }
    }
    #[test]
    fn test_invalid_byte_escape() {
        let invalid = [r"\a", r"\?", r"\"];
        for c in &invalid {
            assert_invalid_byte(c);
        }
    }
    #[test]
    fn test_valid_byte_code_escape() {
        let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"];
        for c in &valid {
            assert_valid_byte(c);
        }
    }
    #[test]
    fn test_invalid_byte_code_escape() {
        let invalid = [r"\x", r"\x7"];
        for c in &invalid {
            assert_invalid_byte(c);
        }
    }
    #[test]
    fn test_invalid_unicode_escape() {
        let well_formed = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"];
        for c in &well_formed {
            assert_invalid_byte(c);
        }
        let invalid = [
            r"\u",
            r"\u{}",
            r"\u{",
            r"\u{FF",
            r"\u{FFFFFF}",
            r"\u{_F}",
            r"\u{00FFFFF}",
            r"\u{110000}",
        ];
        for c in &invalid {
            assert_invalid_byte(c);
        }
    }
 }
--- a/crates/ra_syntax/src/validation/byte_string.rs
+++ b/crates/ra_syntax/src/validation/byte_string.rs
@ -1,169 +0,0 @@
 use crate::{
    string_lexing::{self, StringComponentKind},
    SyntaxError,
    SyntaxErrorKind::*,
    SyntaxToken,
 };
 use super::byte;
 pub(crate) fn validate_byte_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) {
    let literal_text = node.text();
    let literal_range = node.range();
    let mut components = string_lexing::parse_quoted_literal(Some('b'), '"', literal_text);
    for component in &mut components {
        let range = component.range + literal_range.start();
        match component.kind {
            StringComponentKind::IgnoreNewline => { /* always valid */ }
            _ => {
                // Chars must escape \t, \n and \r codepoints, but strings don't
                let text = &literal_text[component.range];
                match text {
                    "\t" | "\n" | "\r" => { /* always valid */ }
                    _ => byte::validate_byte_component(text, component.kind, range, errors),
                }
            }
        }
    }
    if !components.has_closing_quote {
        errors.push(SyntaxError::new(UnclosedString, literal_range));
    }
    if let Some(range) = components.suffix {
        errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start()));
    }
 }
 #[cfg(test)]
 mod test {
    use crate::{SourceFile, TreeArc};
    fn build_file(literal: &str) -> TreeArc<SourceFile> {
        let src = format!(r#"const S: &'static [u8] = b"{}";"#, literal);
        println!("Source: {}", src);
        SourceFile::parse(&src)
    }
    fn assert_valid_str(literal: &str) {
        let file = build_file(literal);
        assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
    }
    fn assert_invalid_str(literal: &str) {
        let file = build_file(literal);
        assert!(file.errors().len() > 0);
    }
    #[test]
    fn test_ansi_codepoints() {
        for byte in 0..128 {
            match byte {
                b'\"' | b'\\' => { /* Ignore string close and backslash */ }
                _ => assert_valid_str(&(byte as char).to_string()),
            }
        }
        for byte in 128..=255u8 {
            assert_invalid_str(&(byte as char).to_string());
        }
    }
    #[test]
    fn test_unicode_codepoints() {
        let invalid = ["Ƒ", "バ", "メ", "﷽"];
        for c in &invalid {
            assert_invalid_str(c);
        }
    }
    #[test]
    fn test_unicode_multiple_codepoints() {
        let invalid = ["नी", "👨‍👨‍"];
        for c in &invalid {
            assert_invalid_str(c);
        }
    }
    #[test]
    fn test_valid_ascii_escape() {
        let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
        for c in &valid {
            assert_valid_str(c);
        }
    }
    #[test]
    fn test_invalid_ascii_escape() {
        let invalid = [r"\a", r"\?", r"\"];
        for c in &invalid {
            assert_invalid_str(c);
        }
    }
    #[test]
    fn test_valid_ascii_code_escape() {
        let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"];
        for c in &valid {
            assert_valid_str(c);
        }
    }
    #[test]
    fn test_invalid_ascii_code_escape() {
        let invalid = [r"\x", r"\x7"];
        for c in &invalid {
            assert_invalid_str(c);
        }
    }
    #[test]
    fn test_invalid_unicode_escape() {
        let well_formed = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"];
        for c in &well_formed {
            assert_invalid_str(c);
        }
        let invalid = [
            r"\u",
            r"\u{}",
            r"\u{",
            r"\u{FF",
            r"\u{FFFFFF}",
            r"\u{_F}",
            r"\u{00FFFFF}",
            r"\u{110000}",
        ];
        for c in &invalid {
            assert_invalid_str(c);
        }
    }
    #[test]
    fn test_mixed_invalid() {
        assert_invalid_str(
            r"This is the tale of a string
 with a newline in between, some emoji (👨‍👨‍) here and there,
 unicode escapes like this: \u{1FFBB} and weird stuff like
 this ﷽",
        );
    }
    #[test]
    fn test_mixed_valid() {
        assert_valid_str(
            r"This is the tale of a string
 with a newline in between, no emoji at all,
 nor unicode escapes or weird stuff",
        );
    }
    #[test]
    fn test_ignore_newline() {
        assert_valid_str(
            "Hello \
             World",
        );
    }
 }
--- a/crates/ra_syntax/src/validation/char.rs
+++ b/crates/ra_syntax/src/validation/char.rs
@ -1,273 +0,0 @@
 //! Validation of char literals
 use std::u32;
 use arrayvec::ArrayString;
 use crate::{
    string_lexing::{self, StringComponentKind},
    TextRange,
    SyntaxError,
    SyntaxErrorKind::*,
    SyntaxToken,
 };
 pub(super) fn validate_char_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) {
    let literal_text = node.text();
    let literal_range = node.range();
    let mut components = string_lexing::parse_quoted_literal(None, '\'', literal_text);
    let mut len = 0;
    for component in &mut components {
        len += 1;
        let text = &literal_text[component.range];
        let range = component.range + literal_range.start();
        validate_char_component(text, component.kind, range, errors);
    }
    if !components.has_closing_quote {
        errors.push(SyntaxError::new(UnclosedChar, literal_range));
    }
    if let Some(range) = components.suffix {
        errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start()));
    }
    if len == 0 {
        errors.push(SyntaxError::new(EmptyChar, literal_range));
    }
    if len > 1 {
        errors.push(SyntaxError::new(OverlongChar, literal_range));
    }
 }
 pub(super) fn validate_char_component(
    text: &str,
    kind: StringComponentKind,
    range: TextRange,
    errors: &mut Vec<SyntaxError>,
 ) {
    // Validate escapes
    use self::StringComponentKind::*;
    match kind {
        AsciiEscape => validate_ascii_escape(text, range, errors),
        AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
        UnicodeEscape => validate_unicode_escape(text, range, errors),
        CodePoint => {
            // These code points must always be escaped
            if text == "\t" || text == "\r" || text == "\n" {
                errors.push(SyntaxError::new(UnescapedCodepoint, range));
            }
        }
        StringComponentKind::IgnoreNewline => { /* always valid */ }
    }
 }
 fn validate_ascii_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
    if text.len() == 1 {
        // Escape sequence consists only of leading `\` (only occurs at EOF, otherwise e.g. '\' is treated as an unclosed char containing a single quote `'`)
        errors.push(SyntaxError::new(EmptyAsciiEscape, range));
    } else {
        let escape_code = text.chars().skip(1).next().unwrap();
        if !is_ascii_escape(escape_code) {
            errors.push(SyntaxError::new(InvalidAsciiEscape, range));
        }
    }
 }
 pub(super) fn is_ascii_escape(code: char) -> bool {
    match code {
        '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
        _ => false,
    }
 }
 fn validate_ascii_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
    // An AsciiCodeEscape has 4 chars, example: `\xDD`
    if !text.is_ascii() {
        // FIXME: Give a more precise error message (say what the invalid character was)
        errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range));
    } else if text.chars().count() < 4 {
        errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
    } else {
        assert_eq!(
            text.chars().count(),
            4,
            "AsciiCodeEscape cannot be longer than 4 chars, but text '{}' is",
            text,
        );
        match u8::from_str_radix(&text[2..], 16) {
            Ok(code) if code < 128 => { /* Escape code is valid */ }
            Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
            Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
        }
    }
 }
 fn validate_unicode_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
    assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
    if text.len() == 2 {
        // No starting `{`
        errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
        return;
    }
    if text.len() == 3 {
        // Only starting `{`
        errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
        return;
    }
    let mut code = ArrayString::<[_; 6]>::new();
    let mut closed = false;
    for c in text[3..].chars() {
        assert!(!closed, "no characters after escape is closed");
        if c.is_digit(16) {
            if code.len() == 6 {
                errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
                return;
            }
            code.push(c);
        } else if c == '_' {
            // Reject leading _
            if code.len() == 0 {
                errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
                return;
            }
        } else if c == '}' {
            closed = true;
        } else {
            errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
            return;
        }
    }
    if !closed {
        errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
    }
    if code.len() == 0 {
        errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
        return;
    }
    match u32::from_str_radix(&code, 16) {
        Ok(code_u32) if code_u32 > 0x10FFFF => {
            errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
        }
        Ok(_) => {
            // Valid escape code
        }
        Err(_) => {
            errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
        }
    }
 }
 #[cfg(test)]
 mod test {
    use crate::{SourceFile, TreeArc};
    fn build_file(literal: &str) -> TreeArc<SourceFile> {
        let src = format!("const C: char = '{}';", literal);
        SourceFile::parse(&src)
    }
    fn assert_valid_char(literal: &str) {
        let file = build_file(literal);
        assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
    }
    fn assert_invalid_char(literal: &str) {
        let file = build_file(literal);
        assert!(file.errors().len() > 0);
    }
    #[test]
    fn test_ansi_codepoints() {
        for byte in 0..=255u8 {
            match byte {
                b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
                b'\'' | b'\\' => { /* Ignore character close and backslash */ }
                _ => assert_valid_char(&(byte as char).to_string()),
            }
        }
    }
    #[test]
    fn test_unicode_codepoints() {
        let valid = ["Ƒ", "バ", "メ", "﷽"];
        for c in &valid {
            assert_valid_char(c);
        }
    }
    #[test]
    fn test_unicode_multiple_codepoints() {
        let invalid = ["नी", "👨‍👨‍"];
        for c in &invalid {
            assert_invalid_char(c);
        }
    }
    #[test]
    fn test_valid_ascii_escape() {
        let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"];
        for c in &valid {
            assert_valid_char(c);
        }
    }
    #[test]
    fn test_invalid_ascii_escape() {
        let invalid = [r"\a", r"\?", r"\"];
        for c in &invalid {
            assert_invalid_char(c);
        }
    }
    #[test]
    fn test_valid_ascii_code_escape() {
        let valid = [r"\x00", r"\x7F", r"\x55"];
        for c in &valid {
            assert_valid_char(c);
        }
    }
    #[test]
    fn test_invalid_ascii_code_escape() {
        let invalid = [r"\x", r"\x7", r"\xF0"];
        for c in &invalid {
            assert_invalid_char(c);
        }
    }
    #[test]
    fn test_valid_unicode_escape() {
        let valid = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"];
        for c in &valid {
            assert_valid_char(c);
        }
    }
    #[test]
    fn test_invalid_unicode_escape() {
        let invalid = [
            r"\u",
            r"\u{}",
            r"\u{",
            r"\u{FF",
            r"\u{FFFFFF}",
            r"\u{_F}",
            r"\u{00FFFFF}",
            r"\u{110000}",
        ];
        for c in &invalid {
            assert_invalid_char(c);
        }
    }
 }
--- a/crates/ra_syntax/src/validation/string.rs
+++ b/crates/ra_syntax/src/validation/string.rs
@ -1,154 +0,0 @@
 use crate::{
    string_lexing,
    SyntaxError,
    SyntaxErrorKind::*,
    SyntaxToken,
 };
 use super::char;
 pub(crate) fn validate_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) {
    let literal_text = node.text();
    let literal_range = node.range();
    let mut components = string_lexing::parse_quoted_literal(None, '"', literal_text);
    for component in &mut components {
        let range = component.range + literal_range.start();
        // Chars must escape \t, \n and \r codepoints, but strings don't
        let text = &literal_text[component.range];
        match text {
            "\t" | "\n" | "\r" => { /* always valid */ }
            _ => char::validate_char_component(text, component.kind, range, errors),
        }
    }
    if !components.has_closing_quote {
        errors.push(SyntaxError::new(UnclosedString, literal_range));
    }
    if let Some(range) = components.suffix {
        errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start()));
    }
 }
 #[cfg(test)]
 mod test {
    use crate::{SourceFile, TreeArc};
    fn build_file(literal: &str) -> TreeArc<SourceFile> {
        let src = format!(r#"const S: &'static str = "{}";"#, literal);
        println!("Source: {}", src);
        SourceFile::parse(&src)
    }
    fn assert_valid_str(literal: &str) {
        let file = build_file(literal);
        assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
    }
    fn assert_invalid_str(literal: &str) {
        let file = build_file(literal);
        assert!(file.errors().len() > 0);
    }
    #[test]
    fn test_ansi_codepoints() {
        for byte in 0..=255u8 {
            match byte {
                b'\"' | b'\\' => { /* Ignore string close and backslash */ }
                _ => assert_valid_str(&(byte as char).to_string()),
            }
        }
    }
    #[test]
    fn test_unicode_codepoints() {
        let valid = ["Ƒ", "バ", "メ", "﷽"];
        for c in &valid {
            assert_valid_str(c);
        }
    }
    #[test]
    fn test_unicode_multiple_codepoints() {
        let valid = ["नी", "👨‍👨‍"];
        for c in &valid {
            assert_valid_str(c);
        }
    }
    #[test]
    fn test_valid_ascii_escape() {
        let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
        for c in &valid {
            assert_valid_str(c);
        }
    }
    #[test]
    fn test_invalid_ascii_escape() {
        let invalid = [r"\a", r"\?", r"\"];
        for c in &invalid {
            assert_invalid_str(c);
        }
    }
    #[test]
    fn test_valid_ascii_code_escape() {
        let valid = [r"\x00", r"\x7F", r"\x55"];
        for c in &valid {
            assert_valid_str(c);
        }
    }
    #[test]
    fn test_invalid_ascii_code_escape() {
        let invalid = [r"\x", r"\x7", r"\xF0"];
        for c in &invalid {
            assert_invalid_str(c);
        }
    }
    #[test]
    fn test_valid_unicode_escape() {
        let valid = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"];
        for c in &valid {
            assert_valid_str(c);
        }
    }
    #[test]
    fn test_invalid_unicode_escape() {
        let invalid = [
            r"\u",
            r"\u{}",
            r"\u{",
            r"\u{FF",
            r"\u{FFFFFF}",
            r"\u{_F}",
            r"\u{00FFFFF}",
            r"\u{110000}",
        ];
        for c in &invalid {
            assert_invalid_str(c);
        }
    }
    #[test]
    fn test_mixed() {
        assert_valid_str(
            r"This is the tale of a string
 with a newline in between, some emoji (👨‍👨‍) here and there,
 unicode escapes like this: \u{1FFBB} and weird stuff like
 this ﷽",
        );
    }
    #[test]
    fn test_ignore_newline() {
        assert_valid_str(
            "Hello \
             World",
        );
    }
 }
--- a/crates/ra_syntax/src/validation/unescape.rs
+++ b/crates/ra_syntax/src/validation/unescape.rs
@ -0,0 +1,521 @@
 //! Utilities for validating  string and char literals and turning them into
 //! values they represent.
 //!
 //! This file is copy-pasted from the compiler
 //!
 //! https://github.com/rust-lang/rust/blob/c6ac57564852cb6e2d0db60f7b46d9eb98d4b449/src/libsyntax/parse/unescape.rs
 //!
 //! Hopefully, we'll share this code in a proper way some day
 use std::str::Chars;
 use std::ops::Range;
 #[derive(Debug, PartialEq, Eq, Clone, Hash)]
 pub enum EscapeError {
    ZeroChars,
    MoreThanOneChar,
    LoneSlash,
    InvalidEscape,
    BareCarriageReturn,
    EscapeOnlyChar,
    TooShortHexEscape,
    InvalidCharInHexEscape,
    OutOfRangeHexEscape,
    NoBraceInUnicodeEscape,
    InvalidCharInUnicodeEscape,
    EmptyUnicodeEscape,
    UnclosedUnicodeEscape,
    LeadingUnderscoreUnicodeEscape,
    OverlongUnicodeEscape,
    LoneSurrogateUnicodeEscape,
    OutOfRangeUnicodeEscape,
    UnicodeEscapeInByte,
    NonAsciiCharInByte,
 }
 /// Takes a contents of a char literal (without quotes), and returns an
 /// unescaped char or an error
 pub(crate) fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
    let mut chars = literal_text.chars();
    unescape_char_or_byte(&mut chars, Mode::Char)
        .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
 }
 /// Takes a contents of a string literal (without quotes) and produces a
 /// sequence of escaped characters or errors.
 pub(crate) fn unescape_str<F>(literal_text: &str, callback: &mut F)
 where
    F: FnMut(Range<usize>, Result<char, EscapeError>),
 {
    unescape_str_or_byte_str(literal_text, Mode::Str, callback)
 }
 pub(crate) fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
    let mut chars = literal_text.chars();
    unescape_char_or_byte(&mut chars, Mode::Byte)
        .map(byte_from_char)
        .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
 }
 /// Takes a contents of a string literal (without quotes) and produces a
 /// sequence of escaped characters or errors.
 pub(crate) fn unescape_byte_str<F>(literal_text: &str, callback: &mut F)
 where
    F: FnMut(Range<usize>, Result<u8, EscapeError>),
 {
    unescape_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
        callback(range, char.map(byte_from_char))
    })
 }
 #[derive(Debug, Clone, Copy)]
 pub(crate) enum Mode {
    Char,
    Str,
    Byte,
    ByteStr,
 }
 impl Mode {
    fn in_single_quotes(self) -> bool {
        match self {
            Mode::Char | Mode::Byte => true,
            Mode::Str | Mode::ByteStr => false,
        }
    }
    pub(crate) fn in_double_quotes(self) -> bool {
        !self.in_single_quotes()
    }
    pub(crate) fn is_bytes(self) -> bool {
        match self {
            Mode::Byte | Mode::ByteStr => true,
            Mode::Char | Mode::Str => false,
        }
    }
 }
 fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
    if first_char != '\\' {
        return match first_char {
            '\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
            '\r' => Err(if chars.clone().next() == Some('\n') {
                EscapeError::EscapeOnlyChar
            } else {
                EscapeError::BareCarriageReturn
            }),
            '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
            '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
            _ => {
                if mode.is_bytes() && !first_char.is_ascii() {
                    return Err(EscapeError::NonAsciiCharInByte);
                }
                Ok(first_char)
            }
        };
    }
    let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
    let res = match second_char {
        '"' => '"',
        'n' => '\n',
        'r' => '\r',
        't' => '\t',
        '\\' => '\\',
        '\'' => '\'',
        '0' => '\0',
        'x' => {
            let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
            let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
            let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
            let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
            let value = hi * 16 + lo;
            if !mode.is_bytes() && !is_ascii(value) {
                return Err(EscapeError::OutOfRangeHexEscape);
            }
            let value = value as u8;
            value as char
        }
        'u' => {
            if chars.next() != Some('{') {
                return Err(EscapeError::NoBraceInUnicodeEscape);
            }
            let mut n_digits = 1;
            let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
                '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
                '}' => return Err(EscapeError::EmptyUnicodeEscape),
                c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
            };
            loop {
                match chars.next() {
                    None => return Err(EscapeError::UnclosedUnicodeEscape),
                    Some('_') => continue,
                    Some('}') => {
                        if n_digits > 6 {
                            return Err(EscapeError::OverlongUnicodeEscape);
                        }
                        if mode.is_bytes() {
                            return Err(EscapeError::UnicodeEscapeInByte);
                        }
                        break std::char::from_u32(value).ok_or_else(|| {
                            if value > 0x10FFFF {
                                EscapeError::OutOfRangeUnicodeEscape
                            } else {
                                EscapeError::LoneSurrogateUnicodeEscape
                            }
                        })?;
                    }
                    Some(c) => {
                        let digit =
                            c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
                        n_digits += 1;
                        if n_digits > 6 {
                            continue;
                        }
                        let digit = digit as u32;
                        value = value * 16 + digit;
                    }
                };
            }
        }
        _ => return Err(EscapeError::InvalidEscape),
    };
    Ok(res)
 }
 fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
    let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
    let res = scan_escape(first_char, chars, mode)?;
    if chars.next().is_some() {
        return Err(EscapeError::MoreThanOneChar);
    }
    Ok(res)
 }
 /// Takes a contents of a string literal (without quotes) and produces a
 /// sequence of escaped characters or errors.
 fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
 where
    F: FnMut(Range<usize>, Result<char, EscapeError>),
 {
    assert!(mode.in_double_quotes());
    let initial_len = src.len();
    let mut chars = src.chars();
    while let Some(first_char) = chars.next() {
        let start = initial_len - chars.as_str().len() - first_char.len_utf8();
        let unescaped_char = match first_char {
            '\\' => {
                let (second_char, third_char) = {
                    let mut chars = chars.clone();
                    (chars.next(), chars.next())
                };
                match (second_char, third_char) {
                    (Some('\n'), _) | (Some('\r'), Some('\n')) => {
                        skip_ascii_whitespace(&mut chars);
                        continue;
                    }
                    _ => scan_escape(first_char, &mut chars, mode),
                }
            }
            '\r' => {
                let second_char = chars.clone().next();
                if second_char == Some('\n') {
                    chars.next();
                    Ok('\n')
                } else {
                    scan_escape(first_char, &mut chars, mode)
                }
            }
            '\n' => Ok('\n'),
            '\t' => Ok('\t'),
            _ => scan_escape(first_char, &mut chars, mode),
        };
        let end = initial_len - chars.as_str().len();
        callback(start..end, unescaped_char);
    }
    fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
        let str = chars.as_str();
        let first_non_space = str
            .bytes()
            .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
            .unwrap_or(str.len());
        *chars = str[first_non_space..].chars()
    }
 }
 fn byte_from_char(c: char) -> u8 {
    let res = c as u32;
    assert!(res <= u8::max_value() as u32, "guaranteed because of Mode::Byte");
    res as u8
 }
 fn is_ascii(x: u32) -> bool {
    x <= 0x7F
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_unescape_char_bad() {
        fn check(literal_text: &str, expected_error: EscapeError) {
            let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err);
            assert_eq!(actual_result, Err(expected_error));
        }
        check("", EscapeError::ZeroChars);
        check(r"\", EscapeError::LoneSlash);
        check("\n", EscapeError::EscapeOnlyChar);
        check("\r\n", EscapeError::EscapeOnlyChar);
        check("\t", EscapeError::EscapeOnlyChar);
        check("'", EscapeError::EscapeOnlyChar);
        check("\r", EscapeError::BareCarriageReturn);
        check("spam", EscapeError::MoreThanOneChar);
        check(r"\x0ff", EscapeError::MoreThanOneChar);
        check(r#"\"a"#, EscapeError::MoreThanOneChar);
        check(r"\na", EscapeError::MoreThanOneChar);
        check(r"\ra", EscapeError::MoreThanOneChar);
        check(r"\ta", EscapeError::MoreThanOneChar);
        check(r"\\a", EscapeError::MoreThanOneChar);
        check(r"\'a", EscapeError::MoreThanOneChar);
        check(r"\0a", EscapeError::MoreThanOneChar);
        check(r"\u{0}x", EscapeError::MoreThanOneChar);
        check(r"\u{1F63b}}", EscapeError::MoreThanOneChar);
        check(r"\v", EscapeError::InvalidEscape);
        check(r"\💩", EscapeError::InvalidEscape);
        check(r"\●", EscapeError::InvalidEscape);
        check(r"\x", EscapeError::TooShortHexEscape);
        check(r"\x0", EscapeError::TooShortHexEscape);
        check(r"\xf", EscapeError::TooShortHexEscape);
        check(r"\xa", EscapeError::TooShortHexEscape);
        check(r"\xx", EscapeError::InvalidCharInHexEscape);
        check(r"\xы", EscapeError::InvalidCharInHexEscape);
        check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
        check(r"\xtt", EscapeError::InvalidCharInHexEscape);
        check(r"\xff", EscapeError::OutOfRangeHexEscape);
        check(r"\xFF", EscapeError::OutOfRangeHexEscape);
        check(r"\x80", EscapeError::OutOfRangeHexEscape);
        check(r"\u", EscapeError::NoBraceInUnicodeEscape);
        check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
        check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
        check(r"\u{", EscapeError::UnclosedUnicodeEscape);
        check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
        check(r"\u{}", EscapeError::EmptyUnicodeEscape);
        check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
        check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
        check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape);
        check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
        check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
        check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape);
        check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape);
        check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape);
        check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape);
        check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape);
        check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape);
    }
    #[test]
    fn test_unescape_char_good() {
        fn check(literal_text: &str, expected_char: char) {
            let actual_result = unescape_char(literal_text);
            assert_eq!(actual_result, Ok(expected_char));
        }
        check("a", 'a');
        check("ы", 'ы');
        check("🦀", '🦀');
        check(r#"\""#, '"');
        check(r"\n", '\n');
        check(r"\r", '\r');
        check(r"\t", '\t');
        check(r"\\", '\\');
        check(r"\'", '\'');
        check(r"\0", '\0');
        check(r"\x00", '\0');
        check(r"\x5a", 'Z');
        check(r"\x5A", 'Z');
        check(r"\x7f", 127 as char);
        check(r"\u{0}", '\0');
        check(r"\u{000000}", '\0');
        check(r"\u{41}", 'A');
        check(r"\u{0041}", 'A');
        check(r"\u{00_41}", 'A');
        check(r"\u{4__1__}", 'A');
        check(r"\u{1F63b}", '😻');
    }
    #[test]
    fn test_unescape_str_good() {
        fn check(literal_text: &str, expected: &str) {
            let mut buf = Ok(String::with_capacity(literal_text.len()));
            unescape_str(literal_text, &mut |range, c| {
                if let Ok(b) = &mut buf {
                    match c {
                        Ok(c) => b.push(c),
                        Err(e) => buf = Err((range, e)),
                    }
                }
            });
            let buf = buf.as_ref().map(|it| it.as_ref());
            assert_eq!(buf, Ok(expected))
        }
        check("foo", "foo");
        check("", "");
        check(" \t\n\r\n", " \t\n\n");
        check("hello \\\n     world", "hello world");
        check("hello \\\r\n     world", "hello world");
        check("thread's", "thread's")
    }
    #[test]
    fn test_unescape_byte_bad() {
        fn check(literal_text: &str, expected_error: EscapeError) {
            let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err);
            assert_eq!(actual_result, Err(expected_error));
        }
        check("", EscapeError::ZeroChars);
        check(r"\", EscapeError::LoneSlash);
        check("\n", EscapeError::EscapeOnlyChar);
        check("\r\n", EscapeError::EscapeOnlyChar);
        check("\t", EscapeError::EscapeOnlyChar);
        check("'", EscapeError::EscapeOnlyChar);
        check("\r", EscapeError::BareCarriageReturn);
        check("spam", EscapeError::MoreThanOneChar);
        check(r"\x0ff", EscapeError::MoreThanOneChar);
        check(r#"\"a"#, EscapeError::MoreThanOneChar);
        check(r"\na", EscapeError::MoreThanOneChar);
        check(r"\ra", EscapeError::MoreThanOneChar);
        check(r"\ta", EscapeError::MoreThanOneChar);
        check(r"\\a", EscapeError::MoreThanOneChar);
        check(r"\'a", EscapeError::MoreThanOneChar);
        check(r"\0a", EscapeError::MoreThanOneChar);
        check(r"\v", EscapeError::InvalidEscape);
        check(r"\💩", EscapeError::InvalidEscape);
        check(r"\●", EscapeError::InvalidEscape);
        check(r"\x", EscapeError::TooShortHexEscape);
        check(r"\x0", EscapeError::TooShortHexEscape);
        check(r"\xa", EscapeError::TooShortHexEscape);
        check(r"\xf", EscapeError::TooShortHexEscape);
        check(r"\xx", EscapeError::InvalidCharInHexEscape);
        check(r"\xы", EscapeError::InvalidCharInHexEscape);
        check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
        check(r"\xtt", EscapeError::InvalidCharInHexEscape);
        check(r"\u", EscapeError::NoBraceInUnicodeEscape);
        check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
        check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
        check(r"\u{", EscapeError::UnclosedUnicodeEscape);
        check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
        check(r"\u{}", EscapeError::EmptyUnicodeEscape);
        check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
        check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
        check("ы", EscapeError::NonAsciiCharInByte);
        check("🦀", EscapeError::NonAsciiCharInByte);
        check(r"\u{0}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{000000}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{41}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{0041}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{0}x", EscapeError::UnicodeEscapeInByte);
        check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{D800}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte);
        check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte);
    }
    #[test]
    fn test_unescape_byte_good() {
        fn check(literal_text: &str, expected_byte: u8) {
            let actual_result = unescape_byte(literal_text);
            assert_eq!(actual_result, Ok(expected_byte));
        }
        check("a", b'a');
        check(r#"\""#, b'"');
        check(r"\n", b'\n');
        check(r"\r", b'\r');
        check(r"\t", b'\t');
        check(r"\\", b'\\');
        check(r"\'", b'\'');
        check(r"\0", b'\0');
        check(r"\x00", b'\0');
        check(r"\x5a", b'Z');
        check(r"\x5A", b'Z');
        check(r"\x7f", 127);
        check(r"\x80", 128);
        check(r"\xff", 255);
        check(r"\xFF", 255);
    }
    #[test]
    fn test_unescape_byte_str_good() {
        fn check(literal_text: &str, expected: &[u8]) {
            let mut buf = Ok(Vec::with_capacity(literal_text.len()));
            unescape_byte_str(literal_text, &mut |range, c| {
                if let Ok(b) = &mut buf {
                    match c {
                        Ok(c) => b.push(c),
                        Err(e) => buf = Err((range, e)),
                    }
                }
            });
            let buf = buf.as_ref().map(|it| it.as_ref());
            assert_eq!(buf, Ok(expected))
        }
        check("foo", b"foo");
        check("", b"");
        check(" \t\n\r\n", b" \t\n\n");
        check("hello \\\n     world", b"hello world");
        check("hello \\\r\n     world", b"hello world");
        check("thread's", b"thread's")
    }
 }
--- a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt
+++ b/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt
@ -40,7 +40,6 @@ SOURCE_FILE@[0; 112)
        WHITESPACE@[43; 44) " "
        LITERAL@[44; 59)
          STRING@[44; 59) "\"string\"invalid"
          err: `Invalid literal suffix`
        SEMI@[59; 60) ";"
      WHITESPACE@[60; 65) "\n    "
      LET_STMT@[65; 83)
@ -53,7 +52,6 @@ SOURCE_FILE@[0; 112)
        WHITESPACE@[72; 73) " "
        LITERAL@[73; 82)
          BYTE@[73; 82) "b\'b\'_suff"
          err: `Invalid literal suffix`
        SEMI@[82; 83) ";"
      WHITESPACE@[83; 88) "\n    "
      LET_STMT@[88; 109)
@ -66,7 +64,6 @@ SOURCE_FILE@[0; 112)
        WHITESPACE@[95; 96) " "
        LITERAL@[96; 108)
          BYTE_STRING@[96; 108) "b\"bs\"invalid"
          err: `Invalid literal suffix`
        SEMI@[108; 109) ";"
      WHITESPACE@[109; 110) "\n"
      R_CURLY@[110; 111) "}"