Uplift clippy::invalid_utf8_in_unchecked as invalid_from_utf8_unchecked

This commit is contained in:
Urgau 2023-05-13 17:12:45 +02:00
parent 1a5f8bce74
commit 7f8846a9ef
8 changed files with 211 additions and 0 deletions

View File

@ -304,6 +304,10 @@ lint_improper_ctypes_union_layout_help = consider adding a `#[repr(C)]` or `#[re
lint_improper_ctypes_union_layout_reason = this union has unspecified layout
lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
lint_lintpass_by_hand = implementing `LintPass` by hand
.help = try using `declare_lint_pass!` or `impl_lint_pass!` instead

View File

@ -0,0 +1,85 @@
use std::str::Utf8Error;
use rustc_ast::{BorrowKind, LitKind};
use rustc_hir::{Expr, ExprKind};
use rustc_span::source_map::Spanned;
use rustc_span::sym;
use crate::lints::InvalidFromUtf8UncheckedDiag;
use crate::{LateContext, LateLintPass, LintContext};
declare_lint! {
/// The `invalid_from_utf8_unchecked` lint checks for calls to
/// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
/// with an invalid UTF-8 literal.
///
/// ### Example
///
/// ```rust,compile_fail
/// # #[allow(unused)]
/// unsafe {
/// std::str::from_utf8_unchecked(b"Ru\x82st");
/// }
/// ```
///
/// {{produces}}
///
/// ### Explanation
///
/// Creating such a `str` would result in undefined behavior as per documentation
/// for `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`.
pub INVALID_FROM_UTF8_UNCHECKED,
Deny,
"using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
}
declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED]);
impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
if let ExprKind::Call(path, [arg]) = expr.kind
&& let ExprKind::Path(ref qpath) = path.kind
&& let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
&& let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
&& [sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
{
let lint = |utf8_error: Utf8Error| {
let method = diag_item.as_str().strip_prefix("str_").unwrap();
cx.emit_spanned_lint(INVALID_FROM_UTF8_UNCHECKED, expr.span, InvalidFromUtf8UncheckedDiag {
method: format!("std::str::{method}"),
valid_up_to: utf8_error.valid_up_to(),
label: arg.span,
})
};
match &arg.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => {
if let LitKind::ByteStr(bytes, _) = &lit
&& let Err(utf8_error) = std::str::from_utf8(bytes)
{
lint(utf8_error);
}
},
ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
let elements = args.iter().map(|e|{
match &e.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
LitKind::Byte(b) => Some(*b),
LitKind::Int(b, _) => Some(*b as u8),
_ => None
}
_ => None
}
}).collect::<Option<Vec<_>>>();
if let Some(elements) = elements
&& let Err(utf8_error) = std::str::from_utf8(&elements)
{
lint(utf8_error);
}
}
_ => {}
}
}
}
}

View File

@ -60,6 +60,7 @@ mod expect;
mod for_loops_over_fallibles;
pub mod hidden_unicode_codepoints;
mod internal;
mod invalid_from_utf8;
mod late;
mod let_underscore;
mod levels;
@ -102,6 +103,7 @@ use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
use for_loops_over_fallibles::*;
use hidden_unicode_codepoints::*;
use internal::*;
use invalid_from_utf8::*;
use let_underscore::*;
use map_unit_fn::*;
use methods::*;
@ -207,6 +209,7 @@ late_lint_methods!(
HardwiredLints: HardwiredLints,
ImproperCTypesDeclarations: ImproperCTypesDeclarations,
ImproperCTypesDefinitions: ImproperCTypesDefinitions,
InvalidFromUtf8: InvalidFromUtf8,
VariantSizeDifferences: VariantSizeDifferences,
BoxPointers: BoxPointers,
PathStatements: PathStatements,

View File

@ -699,6 +699,16 @@ pub struct ForgetCopyDiag<'a> {
pub label: Span,
}
// invalid_from_utf8.rs
#[derive(LintDiagnostic)]
#[diag(lint_invalid_from_utf8_unchecked)]
pub struct InvalidFromUtf8UncheckedDiag {
pub method: String,
pub valid_up_to: usize,
#[label]
pub label: Span,
}
// hidden_unicode_codepoints.rs
#[derive(LintDiagnostic)]
#[diag(lint_hidden_unicode_codepoints)]

View File

@ -1454,6 +1454,8 @@ symbols! {
stop_after_dataflow,
store,
str,
str_from_utf8_unchecked,
str_from_utf8_unchecked_mut,
str_split_whitespace,
str_trim,
str_trim_end,

View File

@ -167,6 +167,7 @@ pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
#[must_use]
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_stable(feature = "const_str_from_utf8_unchecked", since = "1.55.0")]
#[rustc_diagnostic_item = "str_from_utf8_unchecked"]
pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
// SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8.
// Also relies on `&str` and `&[u8]` having the same layout.
@ -194,6 +195,7 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
#[must_use]
#[stable(feature = "str_mut_extras", since = "1.20.0")]
#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked_mut", issue = "91005")]
#[rustc_diagnostic_item = "str_from_utf8_unchecked_mut"]
pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
// SAFETY: the caller must guarantee that the bytes `v`
// are valid UTF-8, thus the cast to `*mut str` is safe.

View File

@ -0,0 +1,49 @@
// check-pass
#![feature(concat_bytes)]
#![warn(invalid_from_utf8_unchecked)]
pub fn from_utf8_unchecked_mut() {
// Valid
unsafe {
std::str::from_utf8_unchecked_mut(&mut [99, 108, 105, 112, 112, 121]);
std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'i', b'p', b'p', b'y']);
let x = 0xA0;
std::str::from_utf8_unchecked_mut(&mut [0xC0, x]);
}
// Invalid
unsafe {
std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
//~^ WARN calls to `std::str::from_utf8_unchecked_mut`
std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
//~^ WARN calls to `std::str::from_utf8_unchecked_mut`
}
}
pub fn from_utf8_unchecked() {
// Valid
unsafe {
std::str::from_utf8_unchecked(&[99, 108, 105, 112, 112, 121]);
std::str::from_utf8_unchecked(&[b'c', b'l', b'i', b'p', b'p', b'y']);
std::str::from_utf8_unchecked(b"clippy");
let x = 0xA0;
std::str::from_utf8_unchecked(&[0xC0, x]);
}
// Invalid
unsafe {
std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
//~^ WARN calls to `std::str::from_utf8_unchecked`
std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
//~^ WARN calls to `std::str::from_utf8_unchecked`
std::str::from_utf8_unchecked(b"cl\x82ippy");
//~^ WARN calls to `std::str::from_utf8_unchecked`
std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
//~^ WARN calls to `std::str::from_utf8_unchecked`
}
}
fn main() {}

View File

@ -0,0 +1,56 @@
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:18:9
|
LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
|
note: the lint level is defined here
--> $DIR/invalid_from_utf8.rs:4:9
|
LL | #![warn(invalid_from_utf8_unchecked)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:20:9
|
LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:38:9
|
LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:40:9
|
LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:42:9
|
LL | std::str::from_utf8_unchecked(b"cl\x82ippy");
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:44:9
|
LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
warning: 6 warnings emitted