From 7f8846a9ef4426d18b911783ab83b83a80a852d1 Mon Sep 17 00:00:00 2001 From: Urgau Date: Sat, 13 May 2023 17:12:45 +0200 Subject: [PATCH] Uplift clippy::invalid_utf8_in_unchecked as invalid_from_utf8_unchecked --- compiler/rustc_lint/messages.ftl | 4 + compiler/rustc_lint/src/invalid_from_utf8.rs | 85 ++++++++++++++++++++ compiler/rustc_lint/src/lib.rs | 3 + compiler/rustc_lint/src/lints.rs | 10 +++ compiler/rustc_span/src/symbol.rs | 2 + library/core/src/str/converts.rs | 2 + tests/ui/lint/invalid_from_utf8.rs | 49 +++++++++++ tests/ui/lint/invalid_from_utf8.stderr | 56 +++++++++++++ 8 files changed, 211 insertions(+) create mode 100644 compiler/rustc_lint/src/invalid_from_utf8.rs create mode 100644 tests/ui/lint/invalid_from_utf8.rs create mode 100644 tests/ui/lint/invalid_from_utf8.stderr diff --git a/compiler/rustc_lint/messages.ftl b/compiler/rustc_lint/messages.ftl index d34a3afcba5..35edb458478 100644 --- a/compiler/rustc_lint/messages.ftl +++ b/compiler/rustc_lint/messages.ftl @@ -304,6 +304,10 @@ lint_improper_ctypes_union_layout_help = consider adding a `#[repr(C)]` or `#[re lint_improper_ctypes_union_layout_reason = this union has unspecified layout lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive +# FIXME: we should ordinalize $valid_up_to when we add support for doing so +lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior + .label = the literal was valid UTF-8 up to the {$valid_up_to} bytes + lint_lintpass_by_hand = implementing `LintPass` by hand .help = try using `declare_lint_pass!` or `impl_lint_pass!` instead diff --git a/compiler/rustc_lint/src/invalid_from_utf8.rs b/compiler/rustc_lint/src/invalid_from_utf8.rs new file mode 100644 index 00000000000..2118deba5c7 --- /dev/null +++ b/compiler/rustc_lint/src/invalid_from_utf8.rs @@ -0,0 +1,85 @@ +use std::str::Utf8Error; + +use rustc_ast::{BorrowKind, LitKind}; +use rustc_hir::{Expr, ExprKind}; +use rustc_span::source_map::Spanned; +use rustc_span::sym; + +use crate::lints::InvalidFromUtf8UncheckedDiag; +use crate::{LateContext, LateLintPass, LintContext}; + +declare_lint! { + /// The `invalid_from_utf8_unchecked` lint checks for calls to + /// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut` + /// with an invalid UTF-8 literal. + /// + /// ### Example + /// + /// ```rust,compile_fail + /// # #[allow(unused)] + /// unsafe { + /// std::str::from_utf8_unchecked(b"Ru\x82st"); + /// } + /// ``` + /// + /// {{produces}} + /// + /// ### Explanation + /// + /// Creating such a `str` would result in undefined behavior as per documentation + /// for `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`. + pub INVALID_FROM_UTF8_UNCHECKED, + Deny, + "using a non UTF-8 literal in `std::str::from_utf8_unchecked`" +} + +declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED]); + +impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 { + fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) { + if let ExprKind::Call(path, [arg]) = expr.kind + && let ExprKind::Path(ref qpath) = path.kind + && let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id() + && let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id) + && [sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item) + { + let lint = |utf8_error: Utf8Error| { + let method = diag_item.as_str().strip_prefix("str_").unwrap(); + cx.emit_spanned_lint(INVALID_FROM_UTF8_UNCHECKED, expr.span, InvalidFromUtf8UncheckedDiag { + method: format!("std::str::{method}"), + valid_up_to: utf8_error.valid_up_to(), + label: arg.span, + }) + }; + + match &arg.kind { + ExprKind::Lit(Spanned { node: lit, .. }) => { + if let LitKind::ByteStr(bytes, _) = &lit + && let Err(utf8_error) = std::str::from_utf8(bytes) + { + lint(utf8_error); + } + }, + ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => { + let elements = args.iter().map(|e|{ + match &e.kind { + ExprKind::Lit(Spanned { node: lit, .. }) => match lit { + LitKind::Byte(b) => Some(*b), + LitKind::Int(b, _) => Some(*b as u8), + _ => None + } + _ => None + } + }).collect::>>(); + + if let Some(elements) = elements + && let Err(utf8_error) = std::str::from_utf8(&elements) + { + lint(utf8_error); + } + } + _ => {} + } + } + } +} diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs index dfddfe09ab3..c62109b2986 100644 --- a/compiler/rustc_lint/src/lib.rs +++ b/compiler/rustc_lint/src/lib.rs @@ -60,6 +60,7 @@ mod expect; mod for_loops_over_fallibles; pub mod hidden_unicode_codepoints; mod internal; +mod invalid_from_utf8; mod late; mod let_underscore; mod levels; @@ -102,6 +103,7 @@ use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums; use for_loops_over_fallibles::*; use hidden_unicode_codepoints::*; use internal::*; +use invalid_from_utf8::*; use let_underscore::*; use map_unit_fn::*; use methods::*; @@ -207,6 +209,7 @@ late_lint_methods!( HardwiredLints: HardwiredLints, ImproperCTypesDeclarations: ImproperCTypesDeclarations, ImproperCTypesDefinitions: ImproperCTypesDefinitions, + InvalidFromUtf8: InvalidFromUtf8, VariantSizeDifferences: VariantSizeDifferences, BoxPointers: BoxPointers, PathStatements: PathStatements, diff --git a/compiler/rustc_lint/src/lints.rs b/compiler/rustc_lint/src/lints.rs index de1c2be2875..5969bc5ca5a 100644 --- a/compiler/rustc_lint/src/lints.rs +++ b/compiler/rustc_lint/src/lints.rs @@ -699,6 +699,16 @@ pub struct ForgetCopyDiag<'a> { pub label: Span, } +// invalid_from_utf8.rs +#[derive(LintDiagnostic)] +#[diag(lint_invalid_from_utf8_unchecked)] +pub struct InvalidFromUtf8UncheckedDiag { + pub method: String, + pub valid_up_to: usize, + #[label] + pub label: Span, +} + // hidden_unicode_codepoints.rs #[derive(LintDiagnostic)] #[diag(lint_hidden_unicode_codepoints)] diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 874d578fe1d..4fc73c4ae86 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1454,6 +1454,8 @@ symbols! { stop_after_dataflow, store, str, + str_from_utf8_unchecked, + str_from_utf8_unchecked_mut, str_split_whitespace, str_trim, str_trim_end, diff --git a/library/core/src/str/converts.rs b/library/core/src/str/converts.rs index 5f8748206d7..12fac0567cb 100644 --- a/library/core/src/str/converts.rs +++ b/library/core/src/str/converts.rs @@ -167,6 +167,7 @@ pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> { #[must_use] #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_stable(feature = "const_str_from_utf8_unchecked", since = "1.55.0")] +#[rustc_diagnostic_item = "str_from_utf8_unchecked"] pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { // SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8. // Also relies on `&str` and `&[u8]` having the same layout. @@ -194,6 +195,7 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { #[must_use] #[stable(feature = "str_mut_extras", since = "1.20.0")] #[rustc_const_unstable(feature = "const_str_from_utf8_unchecked_mut", issue = "91005")] +#[rustc_diagnostic_item = "str_from_utf8_unchecked_mut"] pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { // SAFETY: the caller must guarantee that the bytes `v` // are valid UTF-8, thus the cast to `*mut str` is safe. diff --git a/tests/ui/lint/invalid_from_utf8.rs b/tests/ui/lint/invalid_from_utf8.rs new file mode 100644 index 00000000000..4a27c659c73 --- /dev/null +++ b/tests/ui/lint/invalid_from_utf8.rs @@ -0,0 +1,49 @@ +// check-pass + +#![feature(concat_bytes)] +#![warn(invalid_from_utf8_unchecked)] + +pub fn from_utf8_unchecked_mut() { + // Valid + unsafe { + std::str::from_utf8_unchecked_mut(&mut [99, 108, 105, 112, 112, 121]); + std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'i', b'p', b'p', b'y']); + + let x = 0xA0; + std::str::from_utf8_unchecked_mut(&mut [0xC0, x]); + } + + // Invalid + unsafe { + std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]); + //~^ WARN calls to `std::str::from_utf8_unchecked_mut` + std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + //~^ WARN calls to `std::str::from_utf8_unchecked_mut` + } +} + +pub fn from_utf8_unchecked() { + // Valid + unsafe { + std::str::from_utf8_unchecked(&[99, 108, 105, 112, 112, 121]); + std::str::from_utf8_unchecked(&[b'c', b'l', b'i', b'p', b'p', b'y']); + std::str::from_utf8_unchecked(b"clippy"); + + let x = 0xA0; + std::str::from_utf8_unchecked(&[0xC0, x]); + } + + // Invalid + unsafe { + std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]); + //~^ WARN calls to `std::str::from_utf8_unchecked` + std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + //~^ WARN calls to `std::str::from_utf8_unchecked` + std::str::from_utf8_unchecked(b"cl\x82ippy"); + //~^ WARN calls to `std::str::from_utf8_unchecked` + std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy")); + //~^ WARN calls to `std::str::from_utf8_unchecked` + } +} + +fn main() {} diff --git a/tests/ui/lint/invalid_from_utf8.stderr b/tests/ui/lint/invalid_from_utf8.stderr new file mode 100644 index 00000000000..63cd906237d --- /dev/null +++ b/tests/ui/lint/invalid_from_utf8.stderr @@ -0,0 +1,56 @@ +warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior + --> $DIR/invalid_from_utf8.rs:18:9 + | +LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + | +note: the lint level is defined here + --> $DIR/invalid_from_utf8.rs:4:9 + | +LL | #![warn(invalid_from_utf8_unchecked)] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior + --> $DIR/invalid_from_utf8.rs:20:9 + | +LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior + --> $DIR/invalid_from_utf8.rs:38:9 + | +LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior + --> $DIR/invalid_from_utf8.rs:40:9 + | +LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior + --> $DIR/invalid_from_utf8.rs:42:9 + | +LL | std::str::from_utf8_unchecked(b"cl\x82ippy"); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior + --> $DIR/invalid_from_utf8.rs:44:9 + | +LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy")); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^ + | | + | the literal was valid UTF-8 up to the 2 bytes + +warning: 6 warnings emitted +