From a7f35c42d474f893c56b6e0f7df3f8bb965f2650 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 29 Nov 2022 13:35:44 +1100 Subject: [PATCH] Add `StrStyle` to `ast::LitKind::ByteStr`. This is required to distinguish between cooked and raw byte string literals in an `ast::LitKind`, without referring to an adjacent `token::Lit`. It's a prerequisite for the next commit. --- compiler/rustc_ast/src/ast.rs | 7 ++++--- compiler/rustc_ast/src/util/literal.rs | 16 +++++++++++----- compiler/rustc_ast_lowering/src/expr.rs | 2 +- .../rustc_ast_pretty/src/pprust/state/expr.rs | 3 ++- .../rustc_builtin_macros/src/concat_bytes.rs | 6 +++--- compiler/rustc_expand/src/base.rs | 2 +- compiler/rustc_expand/src/build.rs | 2 +- compiler/rustc_expand/src/proc_macro_server.rs | 3 ++- compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs | 2 +- compiler/rustc_hir_typeck/src/pat.rs | 2 +- .../src/build/expr/as_constant.rs | 4 ++-- compiler/rustc_mir_build/src/thir/constant.rs | 4 ++-- .../src/invalid_utf8_in_unchecked.rs | 2 +- .../clippy_lints/src/large_include_file.rs | 2 +- .../clippy_lints/src/matches/match_same_arms.rs | 2 +- .../clippy/clippy_lints/src/utils/author.rs | 2 +- .../clippy/clippy_utils/src/check_proc_macro.rs | 4 +++- src/tools/clippy/clippy_utils/src/consts.rs | 2 +- 18 files changed, 39 insertions(+), 28 deletions(-) diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index 6a2f1f0c574..b869b2f8af9 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -1796,8 +1796,9 @@ pub enum LitKind { /// A string literal (`"foo"`). The symbol is unescaped, and so may differ /// from the original token's symbol. Str(Symbol, StrStyle), - /// A byte string (`b"foo"`). - ByteStr(Lrc<[u8]>), + /// A byte string (`b"foo"`). Not stored as a symbol because it might be + /// non-utf8, and symbols only allow utf8 strings. + ByteStr(Lrc<[u8]>, StrStyle), /// A byte char (`b'f'`). Byte(u8), /// A character literal (`'a'`). @@ -1822,7 +1823,7 @@ impl LitKind { /// Returns `true` if this literal is byte literal string. pub fn is_bytestr(&self) -> bool { - matches!(self, LitKind::ByteStr(_)) + matches!(self, LitKind::ByteStr(..)) } /// Returns `true` if this is a numeric literal. diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 5e6c94f1e6f..9f6fdf44ac0 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -1,11 +1,12 @@ //! Code related to parsing literals. -use crate::ast::{self, LitKind, MetaItemLit}; +use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; use crate::token::{self, Token}; use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode}; use rustc_span::symbol::{kw, sym, Symbol}; use rustc_span::Span; use std::ascii; +use std::str; #[derive(Debug)] pub enum LitError { @@ -115,9 +116,9 @@ impl LitKind { } }); error?; - LitKind::ByteStr(buf.into()) + LitKind::ByteStr(buf.into(), StrStyle::Cooked) } - token::ByteStrRaw(_) => { + token::ByteStrRaw(n) => { let s = symbol.as_str(); let bytes = if s.contains('\r') { let mut buf = Vec::with_capacity(s.len()); @@ -136,7 +137,7 @@ impl LitKind { symbol.to_string().into_bytes() }; - LitKind::ByteStr(bytes.into()) + LitKind::ByteStr(bytes.into(), StrStyle::Raw(n)) } token::Err => LitKind::Err, }) @@ -155,10 +156,15 @@ impl LitKind { (token::Str, symbol, None) } LitKind::Str(symbol, ast::StrStyle::Raw(n)) => (token::StrRaw(n), symbol, None), - LitKind::ByteStr(ref bytes) => { + LitKind::ByteStr(ref bytes, ast::StrStyle::Cooked) => { let string = bytes.escape_ascii().to_string(); (token::ByteStr, Symbol::intern(&string), None) } + LitKind::ByteStr(ref bytes, ast::StrStyle::Raw(n)) => { + // Unwrap because raw byte string literals can only contain ASCII. + let string = str::from_utf8(bytes).unwrap(); + (token::ByteStrRaw(n), Symbol::intern(&string), None) + } LitKind::Byte(byte) => { let string: String = ascii::escape_default(byte).map(Into::::into).collect(); (token::Byte, Symbol::intern(&string), None) diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index 82912a733d5..e18bbcf65e7 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -97,7 +97,7 @@ impl<'hir> LoweringContext<'_, 'hir> { } ExprKind::IncludedBytes(bytes) => hir::ExprKind::Lit(respan( self.lower_span(e.span), - LitKind::ByteStr(bytes.clone()), + LitKind::ByteStr(bytes.clone(), StrStyle::Cooked), )), ExprKind::Cast(expr, ty) => { let expr = self.lower_expr(expr); diff --git a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs index 828b9d5ad5f..7306b10d60f 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs @@ -323,7 +323,8 @@ impl<'a> State<'a> { self.print_token_literal(*token_lit, expr.span); } ast::ExprKind::IncludedBytes(bytes) => { - let lit = ast::LitKind::ByteStr(bytes.clone()).synthesize_token_lit(); + let lit = ast::LitKind::ByteStr(bytes.clone(), ast::StrStyle::Cooked) + .synthesize_token_lit(); self.print_token_literal(lit, expr.span) } ast::ExprKind::Cast(expr, ty) => { diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index 161e3499584..56b77fdf580 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -69,7 +69,7 @@ fn invalid_type_err( Ok(ast::LitKind::Int(_, _)) => { cx.span_err(span, "numeric literal is not a `u8`"); } - Ok(ast::LitKind::ByteStr(_) | ast::LitKind::Byte(_)) => unreachable!(), + Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(), Err(err) => { report_lit_error(&cx.sess.parse_sess, err, token_lit, span); } @@ -97,7 +97,7 @@ fn handle_array_element( )) if val <= u8::MAX.into() => Some(val as u8), Ok(ast::LitKind::Byte(val)) => Some(val), - Ok(ast::LitKind::ByteStr(_)) => { + Ok(ast::LitKind::ByteStr(..)) => { if !*has_errors { cx.struct_span_err(expr.span, "cannot concatenate doubly nested array") .note("byte strings are treated as arrays of bytes") @@ -174,7 +174,7 @@ pub fn expand_concat_bytes( Ok(ast::LitKind::Byte(val)) => { accumulator.push(val); } - Ok(ast::LitKind::ByteStr(ref bytes)) => { + Ok(ast::LitKind::ByteStr(ref bytes, _)) => { accumulator.extend_from_slice(&bytes); } _ => { diff --git a/compiler/rustc_expand/src/base.rs b/compiler/rustc_expand/src/base.rs index 13e2d1ebbe7..d491e9e34a7 100644 --- a/compiler/rustc_expand/src/base.rs +++ b/compiler/rustc_expand/src/base.rs @@ -1234,7 +1234,7 @@ pub fn expr_to_spanned_string<'a>( Err(match expr.kind { ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)), - Ok(ast::LitKind::ByteStr(_)) => { + Ok(ast::LitKind::ByteStr(..)) => { let mut err = cx.struct_span_err(expr.span, err_msg); let span = expr.span.shrink_to_lo(); err.span_suggestion( diff --git a/compiler/rustc_expand/src/build.rs b/compiler/rustc_expand/src/build.rs index b56e1a24834..d8245ff613a 100644 --- a/compiler/rustc_expand/src/build.rs +++ b/compiler/rustc_expand/src/build.rs @@ -361,7 +361,7 @@ impl<'a> ExtCtxt<'a> { } pub fn expr_byte_str(&self, sp: Span, bytes: Vec) -> P { - self.expr_lit(sp, ast::LitKind::ByteStr(Lrc::from(bytes))) + self.expr_lit(sp, ast::LitKind::ByteStr(Lrc::from(bytes), ast::StrStyle::Cooked)) } /// `[expr1, expr2, ...]` diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index 57f66758ef0..255e5105ff4 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -526,7 +526,8 @@ impl server::TokenStream for Rustc<'_, '_> { Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span)) } ast::ExprKind::IncludedBytes(bytes) => { - let lit = ast::LitKind::ByteStr(bytes.clone()).synthesize_token_lit(); + let lit = ast::LitKind::ByteStr(bytes.clone(), ast::StrStyle::Cooked) + .synthesize_token_lit(); Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span)) } ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind { diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index 86384c7b93e..0d6b0175406 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -1169,7 +1169,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { match lit.node { ast::LitKind::Str(..) => tcx.mk_static_str(), - ast::LitKind::ByteStr(ref v) => { + ast::LitKind::ByteStr(ref v, _) => { tcx.mk_imm_ref(tcx.lifetimes.re_static, tcx.mk_array(tcx.types.u8, v.len() as u64)) } ast::LitKind::Byte(_) => tcx.types.u8, diff --git a/compiler/rustc_hir_typeck/src/pat.rs b/compiler/rustc_hir_typeck/src/pat.rs index decd317d9fc..6810353f9e7 100644 --- a/compiler/rustc_hir_typeck/src/pat.rs +++ b/compiler/rustc_hir_typeck/src/pat.rs @@ -386,7 +386,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { // Byte string patterns behave the same way as array patterns // They can denote both statically and dynamically-sized byte arrays. let mut pat_ty = ty; - if let hir::ExprKind::Lit(Spanned { node: ast::LitKind::ByteStr(_), .. }) = lt.kind { + if let hir::ExprKind::Lit(Spanned { node: ast::LitKind::ByteStr(..), .. }) = lt.kind { let expected = self.structurally_resolved_type(span, expected); if let ty::Ref(_, inner_ty, _) = expected.kind() && matches!(inner_ty.kind(), ty::Slice(_)) diff --git a/compiler/rustc_mir_build/src/build/expr/as_constant.rs b/compiler/rustc_mir_build/src/build/expr/as_constant.rs index 717c6231574..3b7ed818dc9 100644 --- a/compiler/rustc_mir_build/src/build/expr/as_constant.rs +++ b/compiler/rustc_mir_build/src/build/expr/as_constant.rs @@ -135,14 +135,14 @@ pub(crate) fn lit_to_mir_constant<'tcx>( let allocation = tcx.intern_const_alloc(allocation); ConstValue::Slice { data: allocation, start: 0, end: s.len() } } - (ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) + (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Slice(_)) => { let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8]); let allocation = tcx.intern_const_alloc(allocation); ConstValue::Slice { data: allocation, start: 0, end: data.len() } } - (ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { + (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { let id = tcx.allocate_bytes(data); ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx)) } diff --git a/compiler/rustc_mir_build/src/thir/constant.rs b/compiler/rustc_mir_build/src/thir/constant.rs index a9ed945d4a1..57ae6a3652d 100644 --- a/compiler/rustc_mir_build/src/thir/constant.rs +++ b/compiler/rustc_mir_build/src/thir/constant.rs @@ -33,13 +33,13 @@ pub(crate) fn lit_to_const<'tcx>( let str_bytes = s.as_str().as_bytes(); ty::ValTree::from_raw_bytes(tcx, str_bytes) } - (ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) + (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Slice(_)) => { let bytes = data as &[u8]; ty::ValTree::from_raw_bytes(tcx, bytes) } - (ast::LitKind::ByteStr(data), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { + (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { let bytes = data as &[u8]; ty::ValTree::from_raw_bytes(tcx, bytes) } diff --git a/src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs b/src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs index e0a607f9a95..6a4861747d2 100644 --- a/src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs +++ b/src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs @@ -33,7 +33,7 @@ impl<'tcx> LateLintPass<'tcx> for InvalidUtf8InUnchecked { if let Some([arg]) = match_function_call(cx, expr, &paths::STR_FROM_UTF8_UNCHECKED) { match &arg.kind { ExprKind::Lit(Spanned { node: lit, .. }) => { - if let LitKind::ByteStr(bytes) = &lit + if let LitKind::ByteStr(bytes, _) = &lit && std::str::from_utf8(bytes).is_err() { lint(cx, expr.span); diff --git a/src/tools/clippy/clippy_lints/src/large_include_file.rs b/src/tools/clippy/clippy_lints/src/large_include_file.rs index 84dd61a1e4b..424c0d9e798 100644 --- a/src/tools/clippy/clippy_lints/src/large_include_file.rs +++ b/src/tools/clippy/clippy_lints/src/large_include_file.rs @@ -60,7 +60,7 @@ impl LateLintPass<'_> for LargeIncludeFile { then { let len = match &lit.node { // include_bytes - LitKind::ByteStr(bstr) => bstr.len(), + LitKind::ByteStr(bstr, _) => bstr.len(), // include_str LitKind::Str(sym, _) => sym.as_str().len(), _ => return, diff --git a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs index 168c1e4d2e6..158e6caa4de 100644 --- a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs +++ b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs @@ -282,7 +282,7 @@ impl<'a> NormalizedPat<'a> { // TODO: Handle negative integers. They're currently treated as a wild match. ExprKind::Lit(lit) => match lit.node { LitKind::Str(sym, _) => Self::LitStr(sym), - LitKind::ByteStr(ref bytes) => Self::LitBytes(bytes), + LitKind::ByteStr(ref bytes, _) => Self::LitBytes(bytes), LitKind::Byte(val) => Self::LitInt(val.into()), LitKind::Char(val) => Self::LitInt(val.into()), LitKind::Int(val, _) => Self::LitInt(val), diff --git a/src/tools/clippy/clippy_lints/src/utils/author.rs b/src/tools/clippy/clippy_lints/src/utils/author.rs index 0c052d86eda..bd7daf0773c 100644 --- a/src/tools/clippy/clippy_lints/src/utils/author.rs +++ b/src/tools/clippy/clippy_lints/src/utils/author.rs @@ -299,7 +299,7 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> { }; kind!("Float(_, {float_ty})"); }, - LitKind::ByteStr(ref vec) => { + LitKind::ByteStr(ref vec, _) => { bind!(self, vec); kind!("ByteStr(ref {vec})"); chain!(self, "let [{:?}] = **{vec}", vec.value); diff --git a/src/tools/clippy/clippy_utils/src/check_proc_macro.rs b/src/tools/clippy/clippy_utils/src/check_proc_macro.rs index c6bf98b7b8b..43f0df145f0 100644 --- a/src/tools/clippy/clippy_utils/src/check_proc_macro.rs +++ b/src/tools/clippy/clippy_utils/src/check_proc_macro.rs @@ -69,7 +69,9 @@ fn lit_search_pat(lit: &LitKind) -> (Pat, Pat) { LitKind::Str(_, StrStyle::Cooked) => (Pat::Str("\""), Pat::Str("\"")), LitKind::Str(_, StrStyle::Raw(0)) => (Pat::Str("r"), Pat::Str("\"")), LitKind::Str(_, StrStyle::Raw(_)) => (Pat::Str("r#"), Pat::Str("#")), - LitKind::ByteStr(_) => (Pat::Str("b\""), Pat::Str("\"")), + LitKind::ByteStr(_, StrStyle::Cooked) => (Pat::Str("b\""), Pat::Str("\"")), + LitKind::ByteStr(_, StrStyle::Raw(0)) => (Pat::Str("br\""), Pat::Str("\"")), + LitKind::ByteStr(_, StrStyle::Raw(_)) => (Pat::Str("br#\""), Pat::Str("#")), LitKind::Byte(_) => (Pat::Str("b'"), Pat::Str("'")), LitKind::Char(_) => (Pat::Str("'"), Pat::Str("'")), LitKind::Int(_, LitIntType::Signed(IntTy::Isize)) => (Pat::Num, Pat::Str("isize")), diff --git a/src/tools/clippy/clippy_utils/src/consts.rs b/src/tools/clippy/clippy_utils/src/consts.rs index 315aea9aa09..7a637d32bab 100644 --- a/src/tools/clippy/clippy_utils/src/consts.rs +++ b/src/tools/clippy/clippy_utils/src/consts.rs @@ -210,7 +210,7 @@ pub fn lit_to_mir_constant(lit: &LitKind, ty: Option>) -> Constant { match *lit { LitKind::Str(ref is, _) => Constant::Str(is.to_string()), LitKind::Byte(b) => Constant::Int(u128::from(b)), - LitKind::ByteStr(ref s) => Constant::Binary(Lrc::clone(s)), + LitKind::ByteStr(ref s, _) => Constant::Binary(Lrc::clone(s)), LitKind::Char(c) => Constant::Char(c), LitKind::Int(n, _) => Constant::Int(n), LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {