mirror of
https://github.com/rust-lang/rust.git
synced 2025-04-28 02:57:37 +00:00
Rollup merge of #123769 - dtolnay:literal, r=fee1-dead
Improve escaping of byte, byte str, and c str proc-macro literals This PR changes the behavior of `proc_macro::Literal::byte_character` (https://github.com/rust-lang/rust/issues/115268), `byte_string`, and `c_string` (https://github.com/rust-lang/rust/issues/119750) to improve their choice of escape sequences. 3 categories of changes are made: 1. Never use `\x00`. Always prefer `\0`, which is supported in all the same places. 2. Never escape `\'` inside double quotes and `\"` inside single quotes. 3. Never use `\x` for valid UTF-8 in literals that permit `\u`. The second commit adds tests covering these cases, asserting the **old** behavior. The third commit implements the behavior change and simultaneously updates the tests to assert the **new** behavior.
This commit is contained in:
commit
20ca54b6a6
57
library/proc_macro/src/escape.rs
Normal file
57
library/proc_macro/src/escape.rs
Normal file
@ -0,0 +1,57 @@
|
||||
#[derive(Copy, Clone)]
|
||||
pub(crate) struct EscapeOptions {
|
||||
/// Produce \'.
|
||||
pub escape_single_quote: bool,
|
||||
/// Produce \".
|
||||
pub escape_double_quote: bool,
|
||||
/// Produce \x escapes for non-ASCII, and use \x rather than \u for ASCII
|
||||
/// control characters.
|
||||
pub escape_nonascii: bool,
|
||||
}
|
||||
|
||||
pub(crate) fn escape_bytes(bytes: &[u8], opt: EscapeOptions) -> String {
|
||||
let mut repr = String::new();
|
||||
|
||||
if opt.escape_nonascii {
|
||||
for &byte in bytes {
|
||||
escape_single_byte(byte, opt, &mut repr);
|
||||
}
|
||||
} else {
|
||||
let mut chunks = bytes.utf8_chunks();
|
||||
while let Some(chunk) = chunks.next() {
|
||||
for ch in chunk.valid().chars() {
|
||||
escape_single_char(ch, opt, &mut repr);
|
||||
}
|
||||
for &byte in chunk.invalid() {
|
||||
escape_single_byte(byte, opt, &mut repr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
repr
|
||||
}
|
||||
|
||||
fn escape_single_byte(byte: u8, opt: EscapeOptions, repr: &mut String) {
|
||||
if byte == b'\0' {
|
||||
repr.push_str("\\0");
|
||||
} else if (byte == b'\'' && !opt.escape_single_quote)
|
||||
|| (byte == b'"' && !opt.escape_double_quote)
|
||||
{
|
||||
repr.push(byte as char);
|
||||
} else {
|
||||
// Escapes \t, \r, \n, \\, \', \", and uses \x## for non-ASCII and
|
||||
// for ASCII control characters.
|
||||
repr.extend(byte.escape_ascii().map(char::from));
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_single_char(ch: char, opt: EscapeOptions, repr: &mut String) {
|
||||
if (ch == '\'' && !opt.escape_single_quote) || (ch == '"' && !opt.escape_double_quote) {
|
||||
repr.push(ch);
|
||||
} else {
|
||||
// Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for
|
||||
// non-printable characters and for Grapheme_Extend characters, which
|
||||
// includes things like U+0300 "Combining Grave Accent".
|
||||
repr.extend(ch.escape_debug());
|
||||
}
|
||||
}
|
@ -43,10 +43,12 @@
|
||||
pub mod bridge;
|
||||
|
||||
mod diagnostic;
|
||||
mod escape;
|
||||
|
||||
#[unstable(feature = "proc_macro_diagnostic", issue = "54140")]
|
||||
pub use diagnostic::{Diagnostic, Level, MultiSpan};
|
||||
|
||||
use crate::escape::{escape_bytes, EscapeOptions};
|
||||
use std::ffi::CStr;
|
||||
use std::ops::{Range, RangeBounds};
|
||||
use std::path::PathBuf;
|
||||
@ -1356,40 +1358,61 @@ impl Literal {
|
||||
/// String literal.
|
||||
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
|
||||
pub fn string(string: &str) -> Literal {
|
||||
let quoted = format!("{:?}", string);
|
||||
assert!(quoted.starts_with('"') && quoted.ends_with('"'));
|
||||
let symbol = "ed[1..quoted.len() - 1];
|
||||
Literal::new(bridge::LitKind::Str, symbol, None)
|
||||
let escape = EscapeOptions {
|
||||
escape_single_quote: false,
|
||||
escape_double_quote: true,
|
||||
escape_nonascii: false,
|
||||
};
|
||||
let repr = escape_bytes(string.as_bytes(), escape);
|
||||
Literal::new(bridge::LitKind::Str, &repr, None)
|
||||
}
|
||||
|
||||
/// Character literal.
|
||||
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
|
||||
pub fn character(ch: char) -> Literal {
|
||||
let quoted = format!("{:?}", ch);
|
||||
assert!(quoted.starts_with('\'') && quoted.ends_with('\''));
|
||||
let symbol = "ed[1..quoted.len() - 1];
|
||||
Literal::new(bridge::LitKind::Char, symbol, None)
|
||||
let escape = EscapeOptions {
|
||||
escape_single_quote: true,
|
||||
escape_double_quote: false,
|
||||
escape_nonascii: false,
|
||||
};
|
||||
let repr = escape_bytes(ch.encode_utf8(&mut [0u8; 4]).as_bytes(), escape);
|
||||
Literal::new(bridge::LitKind::Char, &repr, None)
|
||||
}
|
||||
|
||||
/// Byte character literal.
|
||||
#[stable(feature = "proc_macro_byte_character", since = "1.79.0")]
|
||||
pub fn byte_character(byte: u8) -> Literal {
|
||||
let string = [byte].escape_ascii().to_string();
|
||||
Literal::new(bridge::LitKind::Byte, &string, None)
|
||||
let escape = EscapeOptions {
|
||||
escape_single_quote: true,
|
||||
escape_double_quote: false,
|
||||
escape_nonascii: true,
|
||||
};
|
||||
let repr = escape_bytes(&[byte], escape);
|
||||
Literal::new(bridge::LitKind::Byte, &repr, None)
|
||||
}
|
||||
|
||||
/// Byte string literal.
|
||||
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
|
||||
pub fn byte_string(bytes: &[u8]) -> Literal {
|
||||
let string = bytes.escape_ascii().to_string();
|
||||
Literal::new(bridge::LitKind::ByteStr, &string, None)
|
||||
let escape = EscapeOptions {
|
||||
escape_single_quote: false,
|
||||
escape_double_quote: true,
|
||||
escape_nonascii: true,
|
||||
};
|
||||
let repr = escape_bytes(bytes, escape);
|
||||
Literal::new(bridge::LitKind::ByteStr, &repr, None)
|
||||
}
|
||||
|
||||
/// C string literal.
|
||||
#[stable(feature = "proc_macro_c_str_literals", since = "1.79.0")]
|
||||
pub fn c_string(string: &CStr) -> Literal {
|
||||
let string = string.to_bytes().escape_ascii().to_string();
|
||||
Literal::new(bridge::LitKind::CStr, &string, None)
|
||||
let escape = EscapeOptions {
|
||||
escape_single_quote: false,
|
||||
escape_double_quote: true,
|
||||
escape_nonascii: false,
|
||||
};
|
||||
let repr = escape_bytes(string.to_bytes(), escape);
|
||||
Literal::new(bridge::LitKind::CStr, &repr, None)
|
||||
}
|
||||
|
||||
/// Returns the span encompassing this literal.
|
||||
|
83
tests/ui/proc-macro/auxiliary/api/literal.rs
Normal file
83
tests/ui/proc-macro/auxiliary/api/literal.rs
Normal file
@ -0,0 +1,83 @@
|
||||
// ignore-tidy-linelength
|
||||
|
||||
use proc_macro::Literal;
|
||||
|
||||
pub fn test() {
|
||||
test_display_literal();
|
||||
test_parse_literal();
|
||||
}
|
||||
|
||||
fn test_display_literal() {
|
||||
assert_eq!(Literal::isize_unsuffixed(-10).to_string(), "-10");
|
||||
assert_eq!(Literal::isize_suffixed(-10).to_string(), "-10isize");
|
||||
assert_eq!(Literal::f32_unsuffixed(-10.0).to_string(), "-10.0");
|
||||
assert_eq!(Literal::f32_suffixed(-10.0).to_string(), "-10f32");
|
||||
assert_eq!(Literal::f64_unsuffixed(-10.0).to_string(), "-10.0");
|
||||
assert_eq!(Literal::f64_suffixed(-10.0).to_string(), "-10f64");
|
||||
assert_eq!(
|
||||
Literal::f64_unsuffixed(1e100).to_string(),
|
||||
"10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0",
|
||||
);
|
||||
|
||||
assert_eq!(Literal::string("aA").to_string(), r#" "aA" "#.trim());
|
||||
assert_eq!(Literal::string("\t").to_string(), r#" "\t" "#.trim());
|
||||
assert_eq!(Literal::string("❤").to_string(), r#" "❤" "#.trim());
|
||||
assert_eq!(Literal::string("'").to_string(), r#" "'" "#.trim());
|
||||
assert_eq!(Literal::string("\"").to_string(), r#" "\"" "#.trim());
|
||||
assert_eq!(Literal::string("\0").to_string(), r#" "\0" "#.trim());
|
||||
assert_eq!(Literal::string("\u{1}").to_string(), r#" "\u{1}" "#.trim());
|
||||
|
||||
assert_eq!(Literal::byte_string(b"aA").to_string(), r#" b"aA" "#.trim());
|
||||
assert_eq!(Literal::byte_string(b"\t").to_string(), r#" b"\t" "#.trim());
|
||||
assert_eq!(Literal::byte_string(b"'").to_string(), r#" b"'" "#.trim());
|
||||
assert_eq!(Literal::byte_string(b"\"").to_string(), r#" b"\"" "#.trim());
|
||||
assert_eq!(Literal::byte_string(b"\0").to_string(), r#" b"\0" "#.trim());
|
||||
assert_eq!(Literal::byte_string(b"\x01").to_string(), r#" b"\x01" "#.trim());
|
||||
|
||||
assert_eq!(Literal::c_string(c"aA").to_string(), r#" c"aA" "#.trim());
|
||||
assert_eq!(Literal::c_string(c"\t").to_string(), r#" c"\t" "#.trim());
|
||||
assert_eq!(Literal::c_string(c"❤").to_string(), r#" c"❤" "#.trim());
|
||||
assert_eq!(Literal::c_string(c"\'").to_string(), r#" c"'" "#.trim());
|
||||
assert_eq!(Literal::c_string(c"\"").to_string(), r#" c"\"" "#.trim());
|
||||
assert_eq!(Literal::c_string(c"\x7f\xff\xfe\u{333}").to_string(), r#" c"\u{7f}\xff\xfe\u{333}" "#.trim());
|
||||
|
||||
assert_eq!(Literal::character('a').to_string(), r#" 'a' "#.trim());
|
||||
assert_eq!(Literal::character('\t').to_string(), r#" '\t' "#.trim());
|
||||
assert_eq!(Literal::character('❤').to_string(), r#" '❤' "#.trim());
|
||||
assert_eq!(Literal::character('\'').to_string(), r#" '\'' "#.trim());
|
||||
assert_eq!(Literal::character('"').to_string(), r#" '"' "#.trim());
|
||||
assert_eq!(Literal::character('\0').to_string(), r#" '\0' "#.trim());
|
||||
assert_eq!(Literal::character('\u{1}').to_string(), r#" '\u{1}' "#.trim());
|
||||
|
||||
assert_eq!(Literal::byte_character(b'a').to_string(), r#" b'a' "#.trim());
|
||||
assert_eq!(Literal::byte_character(b'\t').to_string(), r#" b'\t' "#.trim());
|
||||
assert_eq!(Literal::byte_character(b'\'').to_string(), r#" b'\'' "#.trim());
|
||||
assert_eq!(Literal::byte_character(b'"').to_string(), r#" b'"' "#.trim());
|
||||
assert_eq!(Literal::byte_character(0).to_string(), r#" b'\0' "#.trim());
|
||||
assert_eq!(Literal::byte_character(1).to_string(), r#" b'\x01' "#.trim());
|
||||
}
|
||||
|
||||
fn test_parse_literal() {
|
||||
assert_eq!("1".parse::<Literal>().unwrap().to_string(), "1");
|
||||
assert_eq!("1.0".parse::<Literal>().unwrap().to_string(), "1.0");
|
||||
assert_eq!("'a'".parse::<Literal>().unwrap().to_string(), "'a'");
|
||||
assert_eq!("b'a'".parse::<Literal>().unwrap().to_string(), "b'a'");
|
||||
assert_eq!("\"\n\"".parse::<Literal>().unwrap().to_string(), "\"\n\"");
|
||||
assert_eq!("b\"\"".parse::<Literal>().unwrap().to_string(), "b\"\"");
|
||||
assert_eq!("c\"\"".parse::<Literal>().unwrap().to_string(), "c\"\"");
|
||||
assert_eq!("r##\"\"##".parse::<Literal>().unwrap().to_string(), "r##\"\"##");
|
||||
assert_eq!("10ulong".parse::<Literal>().unwrap().to_string(), "10ulong");
|
||||
assert_eq!("-10ulong".parse::<Literal>().unwrap().to_string(), "-10ulong");
|
||||
|
||||
assert!("true".parse::<Literal>().is_err());
|
||||
assert!(".8".parse::<Literal>().is_err());
|
||||
assert!("0 1".parse::<Literal>().is_err());
|
||||
assert!("'a".parse::<Literal>().is_err());
|
||||
assert!(" 0".parse::<Literal>().is_err());
|
||||
assert!("0 ".parse::<Literal>().is_err());
|
||||
assert!("/* comment */0".parse::<Literal>().is_err());
|
||||
assert!("0/* comment */".parse::<Literal>().is_err());
|
||||
assert!("0// comment".parse::<Literal>().is_err());
|
||||
assert!("- 10".parse::<Literal>().is_err());
|
||||
assert!("-'x'".parse::<Literal>().is_err());
|
||||
}
|
@ -10,7 +10,7 @@
|
||||
extern crate proc_macro;
|
||||
|
||||
mod cmp;
|
||||
mod parse;
|
||||
mod literal;
|
||||
|
||||
use proc_macro::TokenStream;
|
||||
|
||||
@ -19,7 +19,7 @@ pub fn run(input: TokenStream) -> TokenStream {
|
||||
assert!(input.is_empty());
|
||||
|
||||
cmp::test();
|
||||
parse::test();
|
||||
literal::test();
|
||||
|
||||
TokenStream::new()
|
||||
}
|
||||
|
@ -1,58 +0,0 @@
|
||||
// ignore-tidy-linelength
|
||||
|
||||
use proc_macro::Literal;
|
||||
|
||||
pub fn test() {
|
||||
test_display_literal();
|
||||
test_parse_literal();
|
||||
}
|
||||
|
||||
fn test_display_literal() {
|
||||
assert_eq!(Literal::isize_unsuffixed(-10).to_string(), "-10");
|
||||
assert_eq!(Literal::isize_suffixed(-10).to_string(), "-10isize");
|
||||
assert_eq!(Literal::f32_unsuffixed(-10.0).to_string(), "-10.0");
|
||||
assert_eq!(Literal::f32_suffixed(-10.0).to_string(), "-10f32");
|
||||
assert_eq!(Literal::f64_unsuffixed(-10.0).to_string(), "-10.0");
|
||||
assert_eq!(Literal::f64_suffixed(-10.0).to_string(), "-10f64");
|
||||
assert_eq!(
|
||||
Literal::f64_unsuffixed(1e100).to_string(),
|
||||
"10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.0",
|
||||
);
|
||||
|
||||
assert_eq!(Literal::string("a \t ❤ ' \" \u{1}").to_string(), "\"a \\t ❤ ' \\\" \\u{1}\"",);
|
||||
assert_eq!(Literal::c_string(c"\'\"\x7f\u{7fff}").to_string(), r#"c"\'\"\x7f\xe7\xbf\xbf""#);
|
||||
assert_eq!(Literal::character('a').to_string(), "'a'");
|
||||
assert_eq!(Literal::character('\t').to_string(), "'\\t'");
|
||||
assert_eq!(Literal::character('❤').to_string(), "'❤'");
|
||||
assert_eq!(Literal::character('\'').to_string(), "'\\''");
|
||||
assert_eq!(Literal::character('"').to_string(), "'\"'");
|
||||
assert_eq!(Literal::character('\u{1}').to_string(), "'\\u{1}'");
|
||||
|
||||
assert_eq!(Literal::byte_character(b'a').to_string(), "b'a'");
|
||||
assert_eq!(Literal::byte_character(0).to_string(), "b'\\x00'");
|
||||
}
|
||||
|
||||
fn test_parse_literal() {
|
||||
assert_eq!("1".parse::<Literal>().unwrap().to_string(), "1");
|
||||
assert_eq!("1.0".parse::<Literal>().unwrap().to_string(), "1.0");
|
||||
assert_eq!("'a'".parse::<Literal>().unwrap().to_string(), "'a'");
|
||||
assert_eq!("b'a'".parse::<Literal>().unwrap().to_string(), "b'a'");
|
||||
assert_eq!("\"\n\"".parse::<Literal>().unwrap().to_string(), "\"\n\"");
|
||||
assert_eq!("b\"\"".parse::<Literal>().unwrap().to_string(), "b\"\"");
|
||||
assert_eq!("c\"\"".parse::<Literal>().unwrap().to_string(), "c\"\"");
|
||||
assert_eq!("r##\"\"##".parse::<Literal>().unwrap().to_string(), "r##\"\"##");
|
||||
assert_eq!("10ulong".parse::<Literal>().unwrap().to_string(), "10ulong");
|
||||
assert_eq!("-10ulong".parse::<Literal>().unwrap().to_string(), "-10ulong");
|
||||
|
||||
assert!("true".parse::<Literal>().is_err());
|
||||
assert!(".8".parse::<Literal>().is_err());
|
||||
assert!("0 1".parse::<Literal>().is_err());
|
||||
assert!("'a".parse::<Literal>().is_err());
|
||||
assert!(" 0".parse::<Literal>().is_err());
|
||||
assert!("0 ".parse::<Literal>().is_err());
|
||||
assert!("/* comment */0".parse::<Literal>().is_err());
|
||||
assert!("0/* comment */".parse::<Literal>().is_err());
|
||||
assert!("0// comment".parse::<Literal>().is_err());
|
||||
assert!("- 10".parse::<Literal>().is_err());
|
||||
assert!("-'x'".parse::<Literal>().is_err());
|
||||
}
|
Loading…
Reference in New Issue
Block a user