mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-25 16:24:46 +00:00
Auto merge of #113476 - fee1-dead-contrib:c-str-lit, r=petrochenkov
Reimplement C-str literals This reverts #113334, cc `@fmease.` While converting lexer tokens to ast Tokens in `rustc_parse`, we check the edition of the span of the token. If the edition < 2021, we split the token into two, one being the identifier and other being the str literal.
This commit is contained in:
commit
23405bb123
@ -24,6 +24,10 @@ impl<'a> Cursor<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &'a str {
|
||||
self.chars.as_str()
|
||||
}
|
||||
|
||||
/// Returns the last eaten symbol (or `'\0'` in release builds).
|
||||
/// (For debug assertions only.)
|
||||
pub(crate) fn prev(&self) -> char {
|
||||
|
@ -367,6 +367,13 @@ impl Cursor<'_> {
|
||||
Some(|terminated| Byte { terminated }),
|
||||
),
|
||||
|
||||
// c-string literal, raw c-string literal or identifier.
|
||||
'c' => self.c_or_byte_string(
|
||||
|terminated| CStr { terminated },
|
||||
|n_hashes| RawCStr { n_hashes },
|
||||
None,
|
||||
),
|
||||
|
||||
// Identifier (this should be checked after other variant that can
|
||||
// start as identifier).
|
||||
c if is_id_start(c) => self.ident_or_unknown_prefix(),
|
||||
|
@ -9,8 +9,8 @@ use rustc_ast::tokenstream::TokenStream;
|
||||
use rustc_ast::util::unicode::contains_text_flow_control_chars;
|
||||
use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
|
||||
use rustc_lexer::unescape::{self, EscapeError, Mode};
|
||||
use rustc_lexer::Cursor;
|
||||
use rustc_lexer::{Base, DocStyle, RawStrError};
|
||||
use rustc_lexer::{Cursor, LiteralKind};
|
||||
use rustc_session::lint::builtin::{
|
||||
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
|
||||
};
|
||||
@ -118,6 +118,7 @@ impl<'a> StringReader<'a> {
|
||||
let mut swallow_next_invalid = 0;
|
||||
// Skip trivial (whitespace & comments) tokens
|
||||
loop {
|
||||
let str_before = self.cursor.as_str();
|
||||
let token = self.cursor.advance_token();
|
||||
let start = self.pos;
|
||||
self.pos = self.pos + BytePos(token.len);
|
||||
@ -165,10 +166,7 @@ impl<'a> StringReader<'a> {
|
||||
continue;
|
||||
}
|
||||
rustc_lexer::TokenKind::Ident => {
|
||||
let sym = nfc_normalize(self.str_from(start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.symbol_gallery.insert(sym, span);
|
||||
token::Ident(sym, false)
|
||||
self.ident(start)
|
||||
}
|
||||
rustc_lexer::TokenKind::RawIdent => {
|
||||
let sym = nfc_normalize(self.str_from(start + BytePos(2)));
|
||||
@ -182,10 +180,7 @@ impl<'a> StringReader<'a> {
|
||||
}
|
||||
rustc_lexer::TokenKind::UnknownPrefix => {
|
||||
self.report_unknown_prefix(start);
|
||||
let sym = nfc_normalize(self.str_from(start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.symbol_gallery.insert(sym, span);
|
||||
token::Ident(sym, false)
|
||||
self.ident(start)
|
||||
}
|
||||
rustc_lexer::TokenKind::InvalidIdent
|
||||
// Do not recover an identifier with emoji if the codepoint is a confusable
|
||||
@ -203,6 +198,27 @@ impl<'a> StringReader<'a> {
|
||||
.push(span);
|
||||
token::Ident(sym, false)
|
||||
}
|
||||
// split up (raw) c string literals to an ident and a string literal when edition < 2021.
|
||||
rustc_lexer::TokenKind::Literal {
|
||||
kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }),
|
||||
suffix_start: _,
|
||||
} if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => {
|
||||
let prefix_len = match kind {
|
||||
LiteralKind::CStr { .. } => 1,
|
||||
LiteralKind::RawCStr { .. } => 2,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// reset the state so that only the prefix ("c" or "cr")
|
||||
// was consumed.
|
||||
let lit_start = start + BytePos(prefix_len);
|
||||
self.pos = lit_start;
|
||||
self.cursor = Cursor::new(&str_before[prefix_len as usize..]);
|
||||
|
||||
self.report_unknown_prefix(start);
|
||||
let prefix_span = self.mk_sp(start, lit_start);
|
||||
return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
|
||||
}
|
||||
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
|
||||
let suffix_start = start + BytePos(suffix_start);
|
||||
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
|
||||
@ -317,6 +333,13 @@ impl<'a> StringReader<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn ident(&self, start: BytePos) -> TokenKind {
|
||||
let sym = nfc_normalize(self.str_from(start));
|
||||
let span = self.mk_sp(start, self.pos);
|
||||
self.sess.symbol_gallery.insert(sym, span);
|
||||
token::Ident(sym, false)
|
||||
}
|
||||
|
||||
fn struct_fatal_span_char(
|
||||
&self,
|
||||
from_pos: BytePos,
|
||||
|
14
tests/ui/rfcs/rfc-3348-c-string-literals/auxiliary/count.rs
Normal file
14
tests/ui/rfcs/rfc-3348-c-string-literals/auxiliary/count.rs
Normal file
@ -0,0 +1,14 @@
|
||||
// force-host
|
||||
// edition: 2018
|
||||
// no-prefer-dynamic
|
||||
#![crate_type = "proc-macro"]
|
||||
|
||||
extern crate proc_macro;
|
||||
|
||||
use proc_macro::TokenStream;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[proc_macro]
|
||||
pub fn number_of_tokens(_: TokenStream) -> TokenStream {
|
||||
TokenStream::from_str("c\"\"").unwrap().into_iter().count().to_string().parse().unwrap()
|
||||
}
|
@ -1,5 +1,4 @@
|
||||
// FIXME(c_str_literals): This should be `run-pass`
|
||||
// known-bug: #113333
|
||||
// run-pass
|
||||
// edition: 2021
|
||||
|
||||
#![feature(c_str_literals)]
|
||||
|
@ -1,25 +0,0 @@
|
||||
error: prefix `c` is unknown
|
||||
--> $DIR/basic.rs:8:27
|
||||
|
|
||||
LL | assert_eq!(b"test\0", c"test".to_bytes_with_nul());
|
||||
| ^ unknown prefix
|
||||
|
|
||||
= note: prefixed identifiers and literals are reserved since Rust 2021
|
||||
help: consider inserting whitespace here
|
||||
|
|
||||
LL | assert_eq!(b"test\0", c "test".to_bytes_with_nul());
|
||||
| +
|
||||
|
||||
error: no rules expected the token `"test"`
|
||||
--> $DIR/basic.rs:8:28
|
||||
|
|
||||
LL | assert_eq!(b"test\0", c"test".to_bytes_with_nul());
|
||||
| -^^^^^
|
||||
| |
|
||||
| no rules expected this token in macro call
|
||||
| help: missing comma here
|
||||
|
|
||||
= note: while trying to match sequence start
|
||||
|
||||
error: aborting due to 2 previous errors
|
||||
|
16
tests/ui/rfcs/rfc-3348-c-string-literals/edition-spans.rs
Normal file
16
tests/ui/rfcs/rfc-3348-c-string-literals/edition-spans.rs
Normal file
@ -0,0 +1,16 @@
|
||||
// even if this crate is edition 2021, proc macros compiled using older
|
||||
// editions should still be able to observe the pre-2021 token behavior
|
||||
//
|
||||
// adapted from tests/ui/rust-2021/reserved-prefixes-via-macro.rs
|
||||
|
||||
// edition: 2021
|
||||
// check-pass
|
||||
|
||||
// aux-build: count.rs
|
||||
extern crate count;
|
||||
|
||||
const _: () = {
|
||||
assert!(count::number_of_tokens!() == 2);
|
||||
};
|
||||
|
||||
fn main() {}
|
@ -1,32 +1,21 @@
|
||||
error: prefix `c` is unknown
|
||||
error[E0658]: `c".."` literals are experimental
|
||||
--> $DIR/gate.rs:10:5
|
||||
|
|
||||
LL | c"foo";
|
||||
| ^ unknown prefix
|
||||
| ^^^^^^
|
||||
|
|
||||
= note: prefixed identifiers and literals are reserved since Rust 2021
|
||||
help: consider inserting whitespace here
|
||||
|
|
||||
LL | c "foo";
|
||||
| +
|
||||
= note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
|
||||
= help: add `#![feature(c_str_literals)]` to the crate attributes to enable
|
||||
|
||||
error: prefix `c` is unknown
|
||||
error[E0658]: `c".."` literals are experimental
|
||||
--> $DIR/gate.rs:13:8
|
||||
|
|
||||
LL | m!(c"test");
|
||||
| ^ unknown prefix
|
||||
| ^^^^^^^
|
||||
|
|
||||
= note: prefixed identifiers and literals are reserved since Rust 2021
|
||||
help: consider inserting whitespace here
|
||||
|
|
||||
LL | m!(c "test");
|
||||
| +
|
||||
= note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
|
||||
= help: add `#![feature(c_str_literals)]` to the crate attributes to enable
|
||||
|
||||
error: expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `"foo"`
|
||||
--> $DIR/gate.rs:10:6
|
||||
|
|
||||
LL | c"foo";
|
||||
| ^^^^^ expected one of 8 possible tokens
|
||||
|
||||
error: aborting due to 3 previous errors
|
||||
error: aborting due to 2 previous errors
|
||||
|
||||
For more information about this error, try `rustc --explain E0658`.
|
||||
|
Binary file not shown.
Binary file not shown.
@ -1,5 +1,4 @@
|
||||
// FIXME(c_str_literals): This should be `run-pass`
|
||||
// known-bug: #113333
|
||||
// run-pass
|
||||
// edition: 2021
|
||||
|
||||
#![feature(c_str_literals)]
|
||||
|
@ -1,38 +0,0 @@
|
||||
error: prefix `c` is unknown
|
||||
--> $DIR/non-ascii.rs:9:9
|
||||
|
|
||||
LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
|
||||
| ^ unknown prefix
|
||||
|
|
||||
= note: prefixed identifiers and literals are reserved since Rust 2021
|
||||
help: consider inserting whitespace here
|
||||
|
|
||||
LL | c "\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
|
||||
| +
|
||||
|
||||
error: out of range hex escape
|
||||
--> $DIR/non-ascii.rs:9:11
|
||||
|
|
||||
LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
|
||||
| ^^^^ must be a character in the range [\x00-\x7f]
|
||||
|
||||
error: out of range hex escape
|
||||
--> $DIR/non-ascii.rs:9:15
|
||||
|
|
||||
LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
|
||||
| ^^^^ must be a character in the range [\x00-\x7f]
|
||||
|
||||
error: no rules expected the token `"\xEF\x80🦀\u{1F980}"`
|
||||
--> $DIR/non-ascii.rs:9:10
|
||||
|
|
||||
LL | c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
|
||||
| -^^^^^^^^^^^^^^^^^^^^
|
||||
| |
|
||||
| no rules expected this token in macro call
|
||||
| help: missing comma here
|
||||
|
|
||||
note: while trying to match `,`
|
||||
--> $SRC_DIR/core/src/macros/mod.rs:LL:COL
|
||||
|
||||
error: aborting due to 4 previous errors
|
||||
|
Loading…
Reference in New Issue
Block a user