diff --git a/Cargo.lock b/Cargo.lock index 17856e1f52a..b1bdaef81ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3670,6 +3670,7 @@ dependencies = [ "rustc_metadata", "rustc_middle", "rustc_query_system", + "rustc_sanitizers", "rustc_session", "rustc_span", "rustc_symbol_mangling", @@ -4558,6 +4559,21 @@ dependencies = [ "tracing", ] +[[package]] +name = "rustc_sanitizers" +version = "0.0.0" +dependencies = [ + "bitflags 2.5.0", + "rustc_data_structures", + "rustc_hir", + "rustc_middle", + "rustc_span", + "rustc_target", + "rustc_trait_selection", + "tracing", + "twox-hash", +] + [[package]] name = "rustc_serialize" version = "0.0.0" @@ -4633,7 +4649,6 @@ dependencies = [ name = "rustc_symbol_mangling" version = "0.0.0" dependencies = [ - "bitflags 2.5.0", "punycode", "rustc-demangle", "rustc_data_structures", @@ -4643,9 +4658,7 @@ dependencies = [ "rustc_session", "rustc_span", "rustc_target", - "rustc_trait_selection", "tracing", - "twox-hash", ] [[package]] diff --git a/compiler/rustc_codegen_llvm/Cargo.toml b/compiler/rustc_codegen_llvm/Cargo.toml index 3fda59e8b52..bb5045ec872 100644 --- a/compiler/rustc_codegen_llvm/Cargo.toml +++ b/compiler/rustc_codegen_llvm/Cargo.toml @@ -28,6 +28,7 @@ rustc_macros = { path = "../rustc_macros" } rustc_metadata = { path = "../rustc_metadata" } rustc_middle = { path = "../rustc_middle" } rustc_query_system = { path = "../rustc_query_system" } +rustc_sanitizers = { path = "../rustc_sanitizers" } rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } rustc_symbol_mangling = { path = "../rustc_symbol_mangling" } diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 1a32958d362..b7235972204 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -20,12 +20,9 @@ use rustc_middle::ty::layout::{ FnAbiError, FnAbiOfHelpers, FnAbiRequest, LayoutError, LayoutOfHelpers, TyAndLayout, }; use rustc_middle::ty::{self, Instance, Ty, TyCtxt}; +use rustc_sanitizers::{cfi, kcfi}; use rustc_session::config::OptLevel; use rustc_span::Span; -use rustc_symbol_mangling::typeid::{ - kcfi_typeid_for_fnabi, kcfi_typeid_for_instance, typeid_for_fnabi, typeid_for_instance, - TypeIdOptions, -}; use rustc_target::abi::{self, call::FnAbi, Align, Size, WrappingRange}; use rustc_target::spec::{HasTargetSpec, SanitizerSet, Target}; use smallvec::SmallVec; @@ -1632,18 +1629,18 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> { return; } - let mut options = TypeIdOptions::empty(); + let mut options = cfi::TypeIdOptions::empty(); if self.tcx.sess.is_sanitizer_cfi_generalize_pointers_enabled() { - options.insert(TypeIdOptions::GENERALIZE_POINTERS); + options.insert(cfi::TypeIdOptions::GENERALIZE_POINTERS); } if self.tcx.sess.is_sanitizer_cfi_normalize_integers_enabled() { - options.insert(TypeIdOptions::NORMALIZE_INTEGERS); + options.insert(cfi::TypeIdOptions::NORMALIZE_INTEGERS); } let typeid = if let Some(instance) = instance { - typeid_for_instance(self.tcx, instance, options) + cfi::typeid_for_instance(self.tcx, instance, options) } else { - typeid_for_fnabi(self.tcx, fn_abi, options) + cfi::typeid_for_fnabi(self.tcx, fn_abi, options) }; let typeid_metadata = self.cx.typeid_metadata(typeid).unwrap(); @@ -1680,18 +1677,18 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> { return None; } - let mut options = TypeIdOptions::empty(); + let mut options = kcfi::TypeIdOptions::empty(); if self.tcx.sess.is_sanitizer_cfi_generalize_pointers_enabled() { - options.insert(TypeIdOptions::GENERALIZE_POINTERS); + options.insert(kcfi::TypeIdOptions::GENERALIZE_POINTERS); } if self.tcx.sess.is_sanitizer_cfi_normalize_integers_enabled() { - options.insert(TypeIdOptions::NORMALIZE_INTEGERS); + options.insert(kcfi::TypeIdOptions::NORMALIZE_INTEGERS); } let kcfi_typeid = if let Some(instance) = instance { - kcfi_typeid_for_instance(self.tcx, instance, options) + kcfi::typeid_for_instance(self.tcx, instance, options) } else { - kcfi_typeid_for_fnabi(self.tcx, fn_abi, options) + kcfi::typeid_for_fnabi(self.tcx, fn_abi, options) }; Some(llvm::OperandBundleDef::new("kcfi", &[self.const_u32(kcfi_typeid)])) diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs index f86cdcaa6f7..7117c4a0ed9 100644 --- a/compiler/rustc_codegen_llvm/src/declare.rs +++ b/compiler/rustc_codegen_llvm/src/declare.rs @@ -22,10 +22,7 @@ use itertools::Itertools; use rustc_codegen_ssa::traits::TypeMembershipMethods; use rustc_data_structures::fx::FxIndexSet; use rustc_middle::ty::{Instance, Ty}; -use rustc_symbol_mangling::typeid::{ - kcfi_typeid_for_fnabi, kcfi_typeid_for_instance, typeid_for_fnabi, typeid_for_instance, - TypeIdOptions, -}; +use rustc_sanitizers::{cfi, kcfi}; use smallvec::SmallVec; /// Declare a function. @@ -145,27 +142,29 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { if let Some(instance) = instance { let mut typeids = FxIndexSet::default(); for options in [ - TypeIdOptions::GENERALIZE_POINTERS, - TypeIdOptions::NORMALIZE_INTEGERS, - TypeIdOptions::USE_CONCRETE_SELF, + cfi::TypeIdOptions::GENERALIZE_POINTERS, + cfi::TypeIdOptions::NORMALIZE_INTEGERS, + cfi::TypeIdOptions::USE_CONCRETE_SELF, ] .into_iter() .powerset() - .map(TypeIdOptions::from_iter) + .map(cfi::TypeIdOptions::from_iter) { - let typeid = typeid_for_instance(self.tcx, instance, options); + let typeid = cfi::typeid_for_instance(self.tcx, instance, options); if typeids.insert(typeid.clone()) { self.add_type_metadata(llfn, typeid); } } } else { - for options in - [TypeIdOptions::GENERALIZE_POINTERS, TypeIdOptions::NORMALIZE_INTEGERS] - .into_iter() - .powerset() - .map(TypeIdOptions::from_iter) + for options in [ + cfi::TypeIdOptions::GENERALIZE_POINTERS, + cfi::TypeIdOptions::NORMALIZE_INTEGERS, + ] + .into_iter() + .powerset() + .map(cfi::TypeIdOptions::from_iter) { - let typeid = typeid_for_fnabi(self.tcx, fn_abi, options); + let typeid = cfi::typeid_for_fnabi(self.tcx, fn_abi, options); self.add_type_metadata(llfn, typeid); } } @@ -173,19 +172,19 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { if self.tcx.sess.is_sanitizer_kcfi_enabled() { // LLVM KCFI does not support multiple !kcfi_type attachments - let mut options = TypeIdOptions::empty(); + let mut options = kcfi::TypeIdOptions::empty(); if self.tcx.sess.is_sanitizer_cfi_generalize_pointers_enabled() { - options.insert(TypeIdOptions::GENERALIZE_POINTERS); + options.insert(kcfi::TypeIdOptions::GENERALIZE_POINTERS); } if self.tcx.sess.is_sanitizer_cfi_normalize_integers_enabled() { - options.insert(TypeIdOptions::NORMALIZE_INTEGERS); + options.insert(kcfi::TypeIdOptions::NORMALIZE_INTEGERS); } if let Some(instance) = instance { - let kcfi_typeid = kcfi_typeid_for_instance(self.tcx, instance, options); + let kcfi_typeid = kcfi::typeid_for_instance(self.tcx, instance, options); self.set_kcfi_type_metadata(llfn, kcfi_typeid); } else { - let kcfi_typeid = kcfi_typeid_for_fnabi(self.tcx, fn_abi, options); + let kcfi_typeid = kcfi::typeid_for_fnabi(self.tcx, fn_abi, options); self.set_kcfi_type_metadata(llfn, kcfi_typeid); } } diff --git a/compiler/rustc_sanitizers/Cargo.toml b/compiler/rustc_sanitizers/Cargo.toml new file mode 100644 index 00000000000..aea2f7dda7f --- /dev/null +++ b/compiler/rustc_sanitizers/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "rustc_sanitizers" +version = "0.0.0" +edition = "2021" + +[dependencies] +bitflags = "2.5.0" +tracing = "0.1" +twox-hash = "1.6.3" +rustc_data_structures = { path = "../rustc_data_structures" } +rustc_hir = { path = "../rustc_hir" } +rustc_middle = { path = "../rustc_middle" } +rustc_span = { path = "../rustc_span" } +rustc_target = { path = "../rustc_target" } +rustc_trait_selection = { path = "../rustc_trait_selection" } diff --git a/compiler/rustc_sanitizers/README.md b/compiler/rustc_sanitizers/README.md new file mode 100644 index 00000000000..d2e8f5d3a97 --- /dev/null +++ b/compiler/rustc_sanitizers/README.md @@ -0,0 +1,2 @@ +The `rustc_sanitizers` crate contains the source code for providing support for +the [sanitizers](https://github.com/google/sanitizers) to the Rust compiler. diff --git a/compiler/rustc_sanitizers/src/cfi/mod.rs b/compiler/rustc_sanitizers/src/cfi/mod.rs new file mode 100644 index 00000000000..90dab5e0333 --- /dev/null +++ b/compiler/rustc_sanitizers/src/cfi/mod.rs @@ -0,0 +1,6 @@ +//! LLVM Control Flow Integrity (CFI) and cross-language LLVM CFI support for the Rust compiler. +//! +//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler, +//! see design document in the tracking issue #89653. +pub mod typeid; +pub use crate::cfi::typeid::{typeid_for_fnabi, typeid_for_instance, TypeIdOptions}; diff --git a/compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs b/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/encode.rs similarity index 54% rename from compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs rename to compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/encode.rs index 6078d901711..ed7cd8c2da7 100644 --- a/compiler/rustc_symbol_mangling/src/typeid/typeid_itanium_cxx_abi.rs +++ b/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/encode.rs @@ -1,76 +1,46 @@ -/// Type metadata identifiers (using Itanium C++ ABI mangling for encoding) for LLVM Control Flow -/// Integrity (CFI) and cross-language LLVM CFI support. -/// -/// Encodes type metadata identifiers for LLVM CFI and cross-language LLVM CFI support using Itanium -/// C++ ABI mangling for encoding with vendor extended type qualifiers and types for Rust types that -/// are not used across the FFI boundary. -/// -/// For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler, -/// see design document in the tracking issue #89653. +//! Encodes type metadata identifiers for LLVM CFI and cross-language LLVM CFI support using Itanium +//! C++ ABI mangling for encoding with vendor extended type qualifiers and types for Rust types that +//! are not used across the FFI boundary. +//! +//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler, +//! see design document in the tracking issue #89653. use rustc_data_structures::base_n; use rustc_data_structures::fx::FxHashMap; use rustc_hir as hir; -use rustc_hir::lang_items::LangItem; -use rustc_middle::ty::fold::{TypeFolder, TypeSuperFoldable}; +use rustc_middle::bug; use rustc_middle::ty::layout::IntegerExt; use rustc_middle::ty::{ - self, Const, ExistentialPredicate, FloatTy, FnSig, Instance, IntTy, List, Region, RegionKind, - TermKind, Ty, TyCtxt, UintTy, + self, Const, ExistentialPredicate, FloatTy, FnSig, GenericArg, GenericArgKind, GenericArgsRef, + IntTy, List, Region, RegionKind, TermKind, Ty, TyCtxt, TypeFoldable, UintTy, }; -use rustc_middle::ty::{GenericArg, GenericArgKind, GenericArgsRef}; -use rustc_middle::ty::{TypeFoldable, TypeVisitableExt}; use rustc_span::def_id::DefId; use rustc_span::sym; -use rustc_target::abi::call::{Conv, FnAbi, PassMode}; use rustc_target::abi::Integer; use rustc_target::spec::abi::Abi; -use rustc_trait_selection::traits; use std::fmt::Write as _; -use std::iter; +use tracing::instrument; -use crate::typeid::TypeIdOptions; +use crate::cfi::typeid::itanium_cxx_abi::transform::{TransformTy, TransformTyOptions}; +use crate::cfi::typeid::TypeIdOptions; -/// Type and extended type qualifiers. -#[derive(Eq, Hash, PartialEq)] -enum TyQ { - None, - Const, - Mut, -} +/// Options for encode_ty. +pub type EncodeTyOptions = TypeIdOptions; /// Substitution dictionary key. #[derive(Eq, Hash, PartialEq)] -enum DictKey<'tcx> { +pub enum DictKey<'tcx> { Ty(Ty<'tcx>, TyQ), Region(Region<'tcx>), Const(Const<'tcx>), Predicate(ExistentialPredicate<'tcx>), } -/// Options for encode_ty. -type EncodeTyOptions = TypeIdOptions; - -/// Options for transform_ty. -type TransformTyOptions = TypeIdOptions; - -/// Converts a number to a disambiguator (see -/// ). -fn to_disambiguator(num: u64) -> String { - if let Some(num) = num.checked_sub(1) { - format!("s{}_", base_n::encode(num as u128, 62)) - } else { - "s_".to_string() - } -} - -/// Converts a number to a sequence number (see -/// ). -fn to_seq_id(num: usize) -> String { - if let Some(num) = num.checked_sub(1) { - base_n::encode(num as u128, 36).to_uppercase() - } else { - "".to_string() - } +/// Type and extended type qualifiers. +#[derive(Eq, Hash, PartialEq)] +pub enum TyQ { + None, + Const, + Mut, } /// Substitutes a component if found in the substitution dictionary (see @@ -91,6 +61,37 @@ fn compress<'tcx>( } } +/// Encodes args using the Itanium C++ ABI with vendor extended type qualifiers and types for Rust +/// types that are not used at the FFI boundary. +fn encode_args<'tcx>( + tcx: TyCtxt<'tcx>, + args: GenericArgsRef<'tcx>, + dict: &mut FxHashMap, usize>, + options: EncodeTyOptions, +) -> String { + // [IE] as part of vendor extended type + let mut s = String::new(); + let args: Vec> = args.iter().collect(); + if !args.is_empty() { + s.push('I'); + for arg in args { + match arg.unpack() { + GenericArgKind::Lifetime(region) => { + s.push_str(&encode_region(region, dict)); + } + GenericArgKind::Type(ty) => { + s.push_str(&encode_ty(tcx, ty, dict, options)); + } + GenericArgKind::Const(c) => { + s.push_str(&encode_const(tcx, c, dict, options)); + } + } + } + s.push('E'); + } + s +} + /// Encodes a const using the Itanium C++ ABI as a literal argument (see /// ). fn encode_const<'tcx>( @@ -159,7 +160,6 @@ fn encode_const<'tcx>( /// Encodes a FnSig using the Itanium C++ ABI with vendor extended type qualifiers and types for /// Rust types that are not used at the FFI boundary. -#[instrument(level = "trace", skip(tcx, dict))] fn encode_fnsig<'tcx>( tcx: TyCtxt<'tcx>, fn_sig: &FnSig<'tcx>, @@ -299,137 +299,10 @@ fn encode_region<'tcx>(region: Region<'tcx>, dict: &mut FxHashMap, s } -/// Encodes args using the Itanium C++ ABI with vendor extended type qualifiers and types for Rust -/// types that are not used at the FFI boundary. -fn encode_args<'tcx>( - tcx: TyCtxt<'tcx>, - args: GenericArgsRef<'tcx>, - dict: &mut FxHashMap, usize>, - options: EncodeTyOptions, -) -> String { - // [IE] as part of vendor extended type - let mut s = String::new(); - let args: Vec> = args.iter().collect(); - if !args.is_empty() { - s.push('I'); - for arg in args { - match arg.unpack() { - GenericArgKind::Lifetime(region) => { - s.push_str(&encode_region(region, dict)); - } - GenericArgKind::Type(ty) => { - s.push_str(&encode_ty(tcx, ty, dict, options)); - } - GenericArgKind::Const(c) => { - s.push_str(&encode_const(tcx, c, dict, options)); - } - } - } - s.push('E'); - } - s -} - -/// Encodes a ty:Ty name, including its crate and path disambiguators and names. -fn encode_ty_name(tcx: TyCtxt<'_>, def_id: DefId) -> String { - // Encode for use in u[IE], where - // is , using v0's without v0's extended form of paths: - // - // N..N - // C - // .. - // - // With additional tags for DefPathData::Impl and DefPathData::ForeignMod. For instance: - // - // pub type Type1 = impl Send; - // let _: Type1 = >::foo; - // fn foo1(_: Type1) { } - // - // pub type Type2 = impl Send; - // let _: Type2 = >::foo; - // fn foo2(_: Type2) { } - // - // pub type Type3 = impl Send; - // let _: Type3 = >::foo; - // fn foo3(_: Type3) { } - // - // pub type Type4 = impl Send; - // let _: Type4 = as Trait1>::foo; - // fn foo3(_: Type4) { } - // - // Are encoded as: - // - // _ZTSFvu29NvNIC1234_5crate8{{impl}}3fooIu3i32EE - // _ZTSFvu27NvNtC1234_5crate6Trait13fooIu3dynIu21NtC1234_5crate6Trait1Iu3i32Eu6regionES_EE - // _ZTSFvu27NvNtC1234_5crate6Trait13fooIu3i32S_EE - // _ZTSFvu27NvNtC1234_5crate6Trait13fooIu22NtC1234_5crate7Struct1Iu3i32ES_EE - // - // The reason for not using v0's extended form of paths is to use a consistent and simpler - // encoding, as the reasoning for using it isn't relevant for type metadata identifiers (i.e., - // keep symbol names close to how methods are represented in error messages). See - // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#methods. - let mut s = String::new(); - - // Start and namespace tags - let mut def_path = tcx.def_path(def_id); - def_path.data.reverse(); - for disambiguated_data in &def_path.data { - s.push('N'); - s.push_str(match disambiguated_data.data { - hir::definitions::DefPathData::Impl => "I", // Not specified in v0's - hir::definitions::DefPathData::ForeignMod => "F", // Not specified in v0's - hir::definitions::DefPathData::TypeNs(..) => "t", - hir::definitions::DefPathData::ValueNs(..) => "v", - hir::definitions::DefPathData::Closure => "C", - hir::definitions::DefPathData::Ctor => "c", - hir::definitions::DefPathData::AnonConst => "k", - hir::definitions::DefPathData::OpaqueTy => "i", - hir::definitions::DefPathData::CrateRoot - | hir::definitions::DefPathData::Use - | hir::definitions::DefPathData::GlobalAsm - | hir::definitions::DefPathData::MacroNs(..) - | hir::definitions::DefPathData::LifetimeNs(..) - | hir::definitions::DefPathData::AnonAdt => { - bug!("encode_ty_name: unexpected `{:?}`", disambiguated_data.data); - } - }); - } - - // Crate disambiguator and name - s.push('C'); - s.push_str(&to_disambiguator(tcx.stable_crate_id(def_path.krate).as_u64())); - let crate_name = tcx.crate_name(def_path.krate).to_string(); - let _ = write!(s, "{}{}", crate_name.len(), &crate_name); - - // Disambiguators and names - def_path.data.reverse(); - for disambiguated_data in &def_path.data { - let num = disambiguated_data.disambiguator as u64; - if num > 0 { - s.push_str(&to_disambiguator(num)); - } - - let name = disambiguated_data.data.to_string(); - let _ = write!(s, "{}", name.len()); - - // Prepend a '_' if name starts with a digit or '_' - if let Some(first) = name.as_bytes().first() { - if first.is_ascii_digit() || *first == b'_' { - s.push('_'); - } - } else { - bug!("encode_ty_name: invalid name `{:?}`", name); - } - - s.push_str(&name); - } - - s -} - /// Encodes a ty:Ty using the Itanium C++ ABI with vendor extended type qualifiers and types for /// Rust types that are not used at the FFI boundary. -fn encode_ty<'tcx>( +#[instrument(level = "trace", skip(tcx, dict))] +pub fn encode_ty<'tcx>( tcx: TyCtxt<'tcx>, ty: Ty<'tcx>, dict: &mut FxHashMap, usize>, @@ -762,486 +635,119 @@ fn encode_ty<'tcx>( typeid } -struct TransformTy<'tcx> { - tcx: TyCtxt<'tcx>, - options: TransformTyOptions, - parents: Vec>, -} +/// Encodes a ty:Ty name, including its crate and path disambiguators and names. +fn encode_ty_name(tcx: TyCtxt<'_>, def_id: DefId) -> String { + // Encode for use in u[IE], where + // is , using v0's without v0's extended form of paths: + // + // N..N + // C + // .. + // + // With additional tags for DefPathData::Impl and DefPathData::ForeignMod. For instance: + // + // pub type Type1 = impl Send; + // let _: Type1 = >::foo; + // fn foo1(_: Type1) { } + // + // pub type Type2 = impl Send; + // let _: Type2 = >::foo; + // fn foo2(_: Type2) { } + // + // pub type Type3 = impl Send; + // let _: Type3 = >::foo; + // fn foo3(_: Type3) { } + // + // pub type Type4 = impl Send; + // let _: Type4 = as Trait1>::foo; + // fn foo3(_: Type4) { } + // + // Are encoded as: + // + // _ZTSFvu29NvNIC1234_5crate8{{impl}}3fooIu3i32EE + // _ZTSFvu27NvNtC1234_5crate6Trait13fooIu3dynIu21NtC1234_5crate6Trait1Iu3i32Eu6regionES_EE + // _ZTSFvu27NvNtC1234_5crate6Trait13fooIu3i32S_EE + // _ZTSFvu27NvNtC1234_5crate6Trait13fooIu22NtC1234_5crate7Struct1Iu3i32ES_EE + // + // The reason for not using v0's extended form of paths is to use a consistent and simpler + // encoding, as the reasoning for using it isn't relevant for type metadata identifiers (i.e., + // keep symbol names close to how methods are represented in error messages). See + // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#methods. + let mut s = String::new(); -impl<'tcx> TransformTy<'tcx> { - fn new(tcx: TyCtxt<'tcx>, options: TransformTyOptions) -> Self { - TransformTy { tcx, options, parents: Vec::new() } - } -} - -impl<'tcx> TypeFolder> for TransformTy<'tcx> { - // Transforms a ty:Ty for being encoded and used in the substitution dictionary. It transforms - // all c_void types into unit types unconditionally, generalizes pointers if - // TransformTyOptions::GENERALIZE_POINTERS option is set, and normalizes integers if - // TransformTyOptions::NORMALIZE_INTEGERS option is set. - fn fold_ty(&mut self, t: Ty<'tcx>) -> Ty<'tcx> { - match t.kind() { - ty::Array(..) - | ty::Closure(..) - | ty::Coroutine(..) - | ty::CoroutineClosure(..) - | ty::CoroutineWitness(..) - | ty::Dynamic(..) - | ty::Float(..) - | ty::FnDef(..) - | ty::Foreign(..) - | ty::Never - | ty::Slice(..) - | ty::Pat(..) - | ty::Str - | ty::Tuple(..) => t.super_fold_with(self), - - ty::Bool => { - if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) { - // Note: on all platforms that Rust's currently supports, its size and alignment - // are 1, and its ABI class is INTEGER - see Rust Layout and ABIs. - // - // (See https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#bool.) - // - // Clang represents bool as an 8-bit unsigned integer. - self.tcx.types.u8 - } else { - t - } + // Start and namespace tags + let mut def_path = tcx.def_path(def_id); + def_path.data.reverse(); + for disambiguated_data in &def_path.data { + s.push('N'); + s.push_str(match disambiguated_data.data { + hir::definitions::DefPathData::Impl => "I", // Not specified in v0's + hir::definitions::DefPathData::ForeignMod => "F", // Not specified in v0's + hir::definitions::DefPathData::TypeNs(..) => "t", + hir::definitions::DefPathData::ValueNs(..) => "v", + hir::definitions::DefPathData::Closure => "C", + hir::definitions::DefPathData::Ctor => "c", + hir::definitions::DefPathData::AnonConst => "k", + hir::definitions::DefPathData::OpaqueTy => "i", + hir::definitions::DefPathData::CrateRoot + | hir::definitions::DefPathData::Use + | hir::definitions::DefPathData::GlobalAsm + | hir::definitions::DefPathData::MacroNs(..) + | hir::definitions::DefPathData::LifetimeNs(..) + | hir::definitions::DefPathData::AnonAdt => { + bug!("encode_ty_name: unexpected `{:?}`", disambiguated_data.data); } - - ty::Char => { - if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) { - // Since #118032, char is guaranteed to have the same size, alignment, and - // function call ABI as u32 on all platforms. - self.tcx.types.u32 - } else { - t - } - } - - ty::Int(..) | ty::Uint(..) => { - if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) { - // Note: C99 7.18.2.4 requires uintptr_t and intptr_t to be at least 16-bit - // wide. All platforms we currently support have a C platform, and as a - // consequence, isize/usize are at least 16-bit wide for all of them. - // - // (See https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#isize-and-usize.) - match t.kind() { - ty::Int(IntTy::Isize) => match self.tcx.sess.target.pointer_width { - 16 => self.tcx.types.i16, - 32 => self.tcx.types.i32, - 64 => self.tcx.types.i64, - 128 => self.tcx.types.i128, - _ => bug!( - "fold_ty: unexpected pointer width `{}`", - self.tcx.sess.target.pointer_width - ), - }, - ty::Uint(UintTy::Usize) => match self.tcx.sess.target.pointer_width { - 16 => self.tcx.types.u16, - 32 => self.tcx.types.u32, - 64 => self.tcx.types.u64, - 128 => self.tcx.types.u128, - _ => bug!( - "fold_ty: unexpected pointer width `{}`", - self.tcx.sess.target.pointer_width - ), - }, - _ => t, - } - } else { - t - } - } - - ty::Adt(..) if t.is_c_void(self.tcx) => self.tcx.types.unit, - - ty::Adt(adt_def, args) => { - if adt_def.repr().transparent() && adt_def.is_struct() && !self.parents.contains(&t) - { - // Don't transform repr(transparent) types with an user-defined CFI encoding to - // preserve the user-defined CFI encoding. - if let Some(_) = self.tcx.get_attr(adt_def.did(), sym::cfi_encoding) { - return t; - } - let variant = adt_def.non_enum_variant(); - let param_env = self.tcx.param_env(variant.def_id); - let field = variant.fields.iter().find(|field| { - let ty = self.tcx.type_of(field.did).instantiate_identity(); - let is_zst = self - .tcx - .layout_of(param_env.and(ty)) - .is_ok_and(|layout| layout.is_zst()); - !is_zst - }); - if let Some(field) = field { - let ty0 = self.tcx.type_of(field.did).instantiate(self.tcx, args); - // Generalize any repr(transparent) user-defined type that is either a - // pointer or reference, and either references itself or any other type that - // contains or references itself, to avoid a reference cycle. - - // If the self reference is not through a pointer, for example, due - // to using `PhantomData`, need to skip normalizing it if we hit it again. - self.parents.push(t); - let ty = if ty0.is_any_ptr() && ty0.contains(t) { - let options = self.options; - self.options |= TransformTyOptions::GENERALIZE_POINTERS; - let ty = ty0.fold_with(self); - self.options = options; - ty - } else { - ty0.fold_with(self) - }; - self.parents.pop(); - ty - } else { - // Transform repr(transparent) types without non-ZST field into () - self.tcx.types.unit - } - } else { - t.super_fold_with(self) - } - } - - ty::Ref(..) => { - if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) { - if t.is_mutable_ptr() { - Ty::new_mut_ref(self.tcx, self.tcx.lifetimes.re_static, self.tcx.types.unit) - } else { - Ty::new_imm_ref(self.tcx, self.tcx.lifetimes.re_static, self.tcx.types.unit) - } - } else { - t.super_fold_with(self) - } - } - - ty::RawPtr(..) => { - if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) { - if t.is_mutable_ptr() { - Ty::new_mut_ptr(self.tcx, self.tcx.types.unit) - } else { - Ty::new_imm_ptr(self.tcx, self.tcx.types.unit) - } - } else { - t.super_fold_with(self) - } - } - - ty::FnPtr(..) => { - if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) { - Ty::new_imm_ptr(self.tcx, self.tcx.types.unit) - } else { - t.super_fold_with(self) - } - } - - ty::Alias(..) => { - self.fold_ty(self.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), t)) - } - - ty::Bound(..) | ty::Error(..) | ty::Infer(..) | ty::Param(..) | ty::Placeholder(..) => { - bug!("fold_ty: unexpected `{:?}`", t.kind()); - } - } - } - - fn interner(&self) -> TyCtxt<'tcx> { - self.tcx - } -} - -/// Returns a type metadata identifier for the specified FnAbi using the Itanium C++ ABI with vendor -/// extended type qualifiers and types for Rust types that are not used at the FFI boundary. -#[instrument(level = "trace", skip(tcx))] -pub fn typeid_for_fnabi<'tcx>( - tcx: TyCtxt<'tcx>, - fn_abi: &FnAbi<'tcx, Ty<'tcx>>, - options: TypeIdOptions, -) -> String { - // A name is mangled by prefixing "_Z" to an encoding of its name, and in the case of functions - // its type. - let mut typeid = String::from("_Z"); - - // Clang uses the Itanium C++ ABI's virtual tables and RTTI typeinfo structure name as type - // metadata identifiers for function pointers. The typeinfo name encoding is a two-character - // code (i.e., 'TS') prefixed to the type encoding for the function. - typeid.push_str("TS"); - - // Function types are delimited by an "F..E" pair - typeid.push('F'); - - // A dictionary of substitution candidates used for compression (see - // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-compression). - let mut dict: FxHashMap, usize> = FxHashMap::default(); - - let mut encode_ty_options = EncodeTyOptions::from_bits(options.bits()) - .unwrap_or_else(|| bug!("typeid_for_fnabi: invalid option(s) `{:?}`", options.bits())); - match fn_abi.conv { - Conv::C => { - encode_ty_options.insert(EncodeTyOptions::GENERALIZE_REPR_C); - } - _ => { - encode_ty_options.remove(EncodeTyOptions::GENERALIZE_REPR_C); - } - } - - // Encode the return type - let transform_ty_options = TransformTyOptions::from_bits(options.bits()) - .unwrap_or_else(|| bug!("typeid_for_fnabi: invalid option(s) `{:?}`", options.bits())); - let mut type_folder = TransformTy::new(tcx, transform_ty_options); - let ty = fn_abi.ret.layout.ty.fold_with(&mut type_folder); - typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options)); - - // Encode the parameter types - - // We erase ZSTs as we go if the argument is skipped. This is an implementation detail of how - // MIR is currently treated by rustc, and subject to change in the future. Specifically, MIR - // interpretation today will allow skipped arguments to simply not be passed at a call-site. - if !fn_abi.c_variadic { - let mut pushed_arg = false; - for arg in fn_abi.args.iter().filter(|arg| arg.mode != PassMode::Ignore) { - pushed_arg = true; - let ty = arg.layout.ty.fold_with(&mut type_folder); - typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options)); - } - if !pushed_arg { - // Empty parameter lists, whether declared as () or conventionally as (void), are - // encoded with a void parameter specifier "v". - typeid.push('v'); - } - } else { - for n in 0..fn_abi.fixed_count as usize { - if fn_abi.args[n].mode == PassMode::Ignore { - continue; - } - let ty = fn_abi.args[n].layout.ty.fold_with(&mut type_folder); - typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options)); - } - - typeid.push('z'); - } - - // Close the "F..E" pair - typeid.push('E'); - - // Add encoding suffixes - if options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) { - typeid.push_str(".normalized"); - } - - if options.contains(EncodeTyOptions::GENERALIZE_POINTERS) { - typeid.push_str(".generalized"); - } - - typeid -} - -/// Returns a type metadata identifier for the specified Instance using the Itanium C++ ABI with -/// vendor extended type qualifiers and types for Rust types that are not used at the FFI boundary. -pub fn typeid_for_instance<'tcx>( - tcx: TyCtxt<'tcx>, - mut instance: Instance<'tcx>, - options: TypeIdOptions, -) -> String { - if (matches!(instance.def, ty::InstanceDef::Virtual(..)) - && Some(instance.def_id()) == tcx.lang_items().drop_in_place_fn()) - || matches!(instance.def, ty::InstanceDef::DropGlue(..)) - { - // Adjust the type ids of DropGlues - // - // DropGlues may have indirect calls to one or more given types drop function. Rust allows - // for types to be erased to any trait object and retains the drop function for the original - // type, which means at the indirect call sites in DropGlues, when typeid_for_fnabi is - // called a second time, it only has information after type erasure and it could be a call - // on any arbitrary trait object. Normalize them to a synthesized Drop trait object, both on - // declaration/definition, and during code generation at call sites so they have the same - // type id and match. - // - // FIXME(rcvalle): This allows a drop call on any trait object to call the drop function of - // any other type. - // - let def_id = tcx - .lang_items() - .drop_trait() - .unwrap_or_else(|| bug!("typeid_for_instance: couldn't get drop_trait lang item")); - let predicate = ty::ExistentialPredicate::Trait(ty::ExistentialTraitRef { - def_id: def_id, - args: List::empty(), }); - let predicates = tcx.mk_poly_existential_predicates(&[ty::Binder::dummy(predicate)]); - let self_ty = Ty::new_dynamic(tcx, predicates, tcx.lifetimes.re_erased, ty::Dyn); - instance.args = tcx.mk_args_trait(self_ty, List::empty()); - } else if let ty::InstanceDef::Virtual(def_id, _) = instance.def { - let upcast_ty = match tcx.trait_of_item(def_id) { - Some(trait_id) => trait_object_ty( - tcx, - ty::Binder::dummy(ty::TraitRef::from_method(tcx, trait_id, instance.args)), - ), - // drop_in_place won't have a defining trait, skip the upcast - None => instance.args.type_at(0), - }; - let stripped_ty = strip_receiver_auto(tcx, upcast_ty); - instance.args = tcx.mk_args_trait(stripped_ty, instance.args.into_iter().skip(1)); - } else if let ty::InstanceDef::VTableShim(def_id) = instance.def - && let Some(trait_id) = tcx.trait_of_item(def_id) - { - // VTableShims may have a trait method, but a concrete Self. This is not suitable for a vtable, - // as the caller will not know the concrete Self. - let trait_ref = ty::TraitRef::new(tcx, trait_id, instance.args); - let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref)); - instance.args = tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1)); } - if !options.contains(EncodeTyOptions::USE_CONCRETE_SELF) { - if let Some(impl_id) = tcx.impl_of_method(instance.def_id()) - && let Some(trait_ref) = tcx.impl_trait_ref(impl_id) - { - let impl_method = tcx.associated_item(instance.def_id()); - let method_id = impl_method - .trait_item_def_id - .expect("Part of a trait implementation, but not linked to the def_id?"); - let trait_method = tcx.associated_item(method_id); - let trait_id = trait_ref.skip_binder().def_id; - if traits::is_vtable_safe_method(tcx, trait_id, trait_method) - && tcx.object_safety_violations(trait_id).is_empty() - { - // Trait methods will have a Self polymorphic parameter, where the concreteized - // implementatation will not. We need to walk back to the more general trait method - let trait_ref = tcx.instantiate_and_normalize_erasing_regions( - instance.args, - ty::ParamEnv::reveal_all(), - trait_ref, - ); - let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref)); + // Crate disambiguator and name + s.push('C'); + s.push_str(&to_disambiguator(tcx.stable_crate_id(def_path.krate).as_u64())); + let crate_name = tcx.crate_name(def_path.krate).to_string(); + let _ = write!(s, "{}{}", crate_name.len(), &crate_name); - // At the call site, any call to this concrete function through a vtable will be - // `Virtual(method_id, idx)` with appropriate arguments for the method. Since we have the - // original method id, and we've recovered the trait arguments, we can make the callee - // instance we're computing the alias set for match the caller instance. - // - // Right now, our code ignores the vtable index everywhere, so we use 0 as a placeholder. - // If we ever *do* start encoding the vtable index, we will need to generate an alias set - // based on which vtables we are putting this method into, as there will be more than one - // index value when supertraits are involved. - instance.def = ty::InstanceDef::Virtual(method_id, 0); - let abstract_trait_args = - tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1)); - instance.args = instance.args.rebase_onto(tcx, impl_id, abstract_trait_args); - } - } else if tcx.is_closure_like(instance.def_id()) { - // We're either a closure or a coroutine. Our goal is to find the trait we're defined on, - // instantiate it, and take the type of its only method as our own. - let closure_ty = instance.ty(tcx, ty::ParamEnv::reveal_all()); - let (trait_id, inputs) = match closure_ty.kind() { - ty::Closure(..) => { - let closure_args = instance.args.as_closure(); - let trait_id = tcx.fn_trait_kind_to_def_id(closure_args.kind()).unwrap(); - let tuple_args = - tcx.instantiate_bound_regions_with_erased(closure_args.sig()).inputs()[0]; - (trait_id, Some(tuple_args)) - } - ty::Coroutine(..) => match tcx.coroutine_kind(instance.def_id()).unwrap() { - hir::CoroutineKind::Coroutine(..) => ( - tcx.require_lang_item(LangItem::Coroutine, None), - Some(instance.args.as_coroutine().resume_ty()), - ), - hir::CoroutineKind::Desugared(desugaring, _) => { - let lang_item = match desugaring { - hir::CoroutineDesugaring::Async => LangItem::Future, - hir::CoroutineDesugaring::AsyncGen => LangItem::AsyncIterator, - hir::CoroutineDesugaring::Gen => LangItem::Iterator, - }; - (tcx.require_lang_item(lang_item, None), None) - } - }, - ty::CoroutineClosure(..) => ( - tcx.require_lang_item(LangItem::FnOnce, None), - Some( - tcx.instantiate_bound_regions_with_erased( - instance.args.as_coroutine_closure().coroutine_closure_sig(), - ) - .tupled_inputs_ty, - ), - ), - x => bug!("Unexpected type kind for closure-like: {x:?}"), - }; - let concrete_args = tcx.mk_args_trait(closure_ty, inputs.map(Into::into)); - let trait_ref = ty::TraitRef::new(tcx, trait_id, concrete_args); - let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref)); - let abstract_args = tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1)); - // There should be exactly one method on this trait, and it should be the one we're - // defining. - let call = tcx - .associated_items(trait_id) - .in_definition_order() - .find(|it| it.kind == ty::AssocKind::Fn) - .expect("No call-family function on closure-like Fn trait?") - .def_id; - - instance.def = ty::InstanceDef::Virtual(call, 0); - instance.args = abstract_args; + // Disambiguators and names + def_path.data.reverse(); + for disambiguated_data in &def_path.data { + let num = disambiguated_data.disambiguator as u64; + if num > 0 { + s.push_str(&to_disambiguator(num)); } + + let name = disambiguated_data.data.to_string(); + let _ = write!(s, "{}", name.len()); + + // Prepend a '_' if name starts with a digit or '_' + if let Some(first) = name.as_bytes().first() { + if first.is_ascii_digit() || *first == b'_' { + s.push('_'); + } + } else { + bug!("encode_ty_name: invalid name `{:?}`", name); + } + + s.push_str(&name); } - let fn_abi = tcx - .fn_abi_of_instance(tcx.param_env(instance.def_id()).and((instance, ty::List::empty()))) - .unwrap_or_else(|error| { - bug!("typeid_for_instance: couldn't get fn_abi of instance {instance:?}: {error:?}") - }); - - typeid_for_fnabi(tcx, fn_abi, options) + s } -fn strip_receiver_auto<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Ty<'tcx> { - let ty::Dynamic(preds, lifetime, kind) = ty.kind() else { - bug!("Tried to strip auto traits from non-dynamic type {ty}"); - }; - if preds.principal().is_some() { - let filtered_preds = - tcx.mk_poly_existential_predicates_from_iter(preds.into_iter().filter(|pred| { - !matches!(pred.skip_binder(), ty::ExistentialPredicate::AutoTrait(..)) - })); - Ty::new_dynamic(tcx, filtered_preds, *lifetime, *kind) +/// Converts a number to a disambiguator (see +/// ). +fn to_disambiguator(num: u64) -> String { + if let Some(num) = num.checked_sub(1) { + format!("s{}_", base_n::encode(num as u128, 62)) } else { - // If there's no principal type, re-encode it as a unit, since we don't know anything - // about it. This technically discards the knowledge that it was a type that was made - // into a trait object at some point, but that's not a lot. - tcx.types.unit + "s_".to_string() } } -#[instrument(skip(tcx), ret)] -fn trait_object_ty<'tcx>(tcx: TyCtxt<'tcx>, poly_trait_ref: ty::PolyTraitRef<'tcx>) -> Ty<'tcx> { - assert!(!poly_trait_ref.has_non_region_param()); - let principal_pred = poly_trait_ref.map_bound(|trait_ref| { - ty::ExistentialPredicate::Trait(ty::ExistentialTraitRef::erase_self_ty(tcx, trait_ref)) - }); - let mut assoc_preds: Vec<_> = traits::supertraits(tcx, poly_trait_ref) - .flat_map(|super_poly_trait_ref| { - tcx.associated_items(super_poly_trait_ref.def_id()) - .in_definition_order() - .filter(|item| item.kind == ty::AssocKind::Type) - .map(move |assoc_ty| { - super_poly_trait_ref.map_bound(|super_trait_ref| { - let alias_ty = ty::AliasTy::new(tcx, assoc_ty.def_id, super_trait_ref.args); - let resolved = tcx.normalize_erasing_regions( - ty::ParamEnv::reveal_all(), - alias_ty.to_ty(tcx), - ); - debug!("Resolved {:?} -> {resolved}", alias_ty.to_ty(tcx)); - ty::ExistentialPredicate::Projection(ty::ExistentialProjection { - def_id: assoc_ty.def_id, - args: ty::ExistentialTraitRef::erase_self_ty(tcx, super_trait_ref).args, - term: resolved.into(), - }) - }) - }) - }) - .collect(); - assoc_preds.sort_by(|a, b| a.skip_binder().stable_cmp(tcx, &b.skip_binder())); - let preds = tcx.mk_poly_existential_predicates_from_iter( - iter::once(principal_pred).chain(assoc_preds.into_iter()), - ); - Ty::new_dynamic(tcx, preds, tcx.lifetimes.re_erased, ty::Dyn) +/// Converts a number to a sequence number (see +/// ). +fn to_seq_id(num: usize) -> String { + if let Some(num) = num.checked_sub(1) { + base_n::encode(num as u128, 36).to_uppercase() + } else { + "".to_string() + } } diff --git a/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/mod.rs b/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/mod.rs new file mode 100644 index 00000000000..b6182dc4e63 --- /dev/null +++ b/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/mod.rs @@ -0,0 +1,123 @@ +//! Type metadata identifiers (using Itanium C++ ABI mangling for encoding) for LLVM Control Flow +//! Integrity (CFI) and cross-language LLVM CFI support. +//! +//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler, +//! see design document in the tracking issue #89653. +use rustc_data_structures::fx::FxHashMap; +use rustc_middle::bug; +use rustc_middle::ty::{self, Instance, Ty, TyCtxt, TypeFoldable}; +use rustc_target::abi::call::{Conv, FnAbi, PassMode}; +use tracing::instrument; + +mod encode; +mod transform; +use crate::cfi::typeid::itanium_cxx_abi::encode::{encode_ty, DictKey, EncodeTyOptions}; +use crate::cfi::typeid::itanium_cxx_abi::transform::{ + transform_instance, TransformTy, TransformTyOptions, +}; +use crate::cfi::typeid::TypeIdOptions; + +/// Returns a type metadata identifier for the specified FnAbi using the Itanium C++ ABI with vendor +/// extended type qualifiers and types for Rust types that are not used at the FFI boundary. +#[instrument(level = "trace", skip(tcx))] +pub fn typeid_for_fnabi<'tcx>( + tcx: TyCtxt<'tcx>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + options: TypeIdOptions, +) -> String { + // A name is mangled by prefixing "_Z" to an encoding of its name, and in the case of functions + // its type. + let mut typeid = String::from("_Z"); + + // Clang uses the Itanium C++ ABI's virtual tables and RTTI typeinfo structure name as type + // metadata identifiers for function pointers. The typeinfo name encoding is a two-character + // code (i.e., 'TS') prefixed to the type encoding for the function. + typeid.push_str("TS"); + + // Function types are delimited by an "F..E" pair + typeid.push('F'); + + // A dictionary of substitution candidates used for compression (see + // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-compression). + let mut dict: FxHashMap, usize> = FxHashMap::default(); + + let mut encode_ty_options = EncodeTyOptions::from_bits(options.bits()) + .unwrap_or_else(|| bug!("typeid_for_fnabi: invalid option(s) `{:?}`", options.bits())); + match fn_abi.conv { + Conv::C => { + encode_ty_options.insert(EncodeTyOptions::GENERALIZE_REPR_C); + } + _ => { + encode_ty_options.remove(EncodeTyOptions::GENERALIZE_REPR_C); + } + } + + // Encode the return type + let transform_ty_options = TransformTyOptions::from_bits(options.bits()) + .unwrap_or_else(|| bug!("typeid_for_fnabi: invalid option(s) `{:?}`", options.bits())); + let mut type_folder = TransformTy::new(tcx, transform_ty_options); + let ty = fn_abi.ret.layout.ty.fold_with(&mut type_folder); + typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options)); + + // Encode the parameter types + + // We erase ZSTs as we go if the argument is skipped. This is an implementation detail of how + // MIR is currently treated by rustc, and subject to change in the future. Specifically, MIR + // interpretation today will allow skipped arguments to simply not be passed at a call-site. + if !fn_abi.c_variadic { + let mut pushed_arg = false; + for arg in fn_abi.args.iter().filter(|arg| arg.mode != PassMode::Ignore) { + pushed_arg = true; + let ty = arg.layout.ty.fold_with(&mut type_folder); + typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options)); + } + if !pushed_arg { + // Empty parameter lists, whether declared as () or conventionally as (void), are + // encoded with a void parameter specifier "v". + typeid.push('v'); + } + } else { + for n in 0..fn_abi.fixed_count as usize { + if fn_abi.args[n].mode == PassMode::Ignore { + continue; + } + let ty = fn_abi.args[n].layout.ty.fold_with(&mut type_folder); + typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options)); + } + + typeid.push('z'); + } + + // Close the "F..E" pair + typeid.push('E'); + + // Add encoding suffixes + if options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) { + typeid.push_str(".normalized"); + } + + if options.contains(EncodeTyOptions::GENERALIZE_POINTERS) { + typeid.push_str(".generalized"); + } + + typeid +} + +/// Returns a type metadata identifier for the specified Instance using the Itanium C++ ABI with +/// vendor extended type qualifiers and types for Rust types that are not used at the FFI boundary. +#[instrument(level = "trace", skip(tcx))] +pub fn typeid_for_instance<'tcx>( + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + options: TypeIdOptions, +) -> String { + let transform_ty_options = TransformTyOptions::from_bits(options.bits()) + .unwrap_or_else(|| bug!("typeid_for_instance: invalid option(s) `{:?}`", options.bits())); + let instance = transform_instance(tcx, instance, transform_ty_options); + let fn_abi = tcx + .fn_abi_of_instance(tcx.param_env(instance.def_id()).and((instance, ty::List::empty()))) + .unwrap_or_else(|error| { + bug!("typeid_for_instance: couldn't get fn_abi of instance {instance:?}: {error:?}") + }); + typeid_for_fnabi(tcx, fn_abi, options) +} diff --git a/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/transform.rs b/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/transform.rs new file mode 100644 index 00000000000..7141c6c9bb0 --- /dev/null +++ b/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/transform.rs @@ -0,0 +1,450 @@ +//! Transforms instances and types for LLVM CFI and cross-language LLVM CFI support using Itanium +//! C++ ABI mangling. +//! +//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler, +//! see design document in the tracking issue #89653. +use rustc_hir as hir; +use rustc_hir::LangItem; +use rustc_middle::bug; +use rustc_middle::ty::fold::{TypeFolder, TypeSuperFoldable}; +use rustc_middle::ty::{ + self, Instance, IntTy, List, Ty, TyCtxt, TypeFoldable, TypeVisitableExt, UintTy, +}; +use rustc_span::sym; +use rustc_trait_selection::traits; +use std::iter; +use tracing::{debug, instrument}; + +use crate::cfi::typeid::itanium_cxx_abi::encode::EncodeTyOptions; +use crate::cfi::typeid::TypeIdOptions; + +/// Options for transform_ty. +pub type TransformTyOptions = TypeIdOptions; + +pub struct TransformTy<'tcx> { + tcx: TyCtxt<'tcx>, + options: TransformTyOptions, + parents: Vec>, +} + +impl<'tcx> TransformTy<'tcx> { + pub fn new(tcx: TyCtxt<'tcx>, options: TransformTyOptions) -> Self { + TransformTy { tcx, options, parents: Vec::new() } + } +} + +/// Transforms a ty:Ty for being encoded and used in the substitution dictionary. +/// +/// * Transforms all c_void types into unit types. +/// * Generalizes pointers if TransformTyOptions::GENERALIZE_POINTERS option is set. +/// * Normalizes integers if TransformTyOptions::NORMALIZE_INTEGERS option is set. +/// * Generalizes any repr(transparent) user-defined type that is either a pointer or reference, and +/// either references itself or any other type that contains or references itself, to avoid a +/// reference cycle. +/// * Transforms repr(transparent) types without non-ZST field into (). +/// +impl<'tcx> TypeFolder> for TransformTy<'tcx> { + // Transforms a ty:Ty for being encoded and used in the substitution dictionary. + fn fold_ty(&mut self, t: Ty<'tcx>) -> Ty<'tcx> { + match t.kind() { + ty::Array(..) + | ty::Closure(..) + | ty::Coroutine(..) + | ty::CoroutineClosure(..) + | ty::CoroutineWitness(..) + | ty::Dynamic(..) + | ty::Float(..) + | ty::FnDef(..) + | ty::Foreign(..) + | ty::Never + | ty::Pat(..) + | ty::Slice(..) + | ty::Str + | ty::Tuple(..) => t.super_fold_with(self), + + ty::Bool => { + if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) { + // Note: on all platforms that Rust's currently supports, its size and alignment + // are 1, and its ABI class is INTEGER - see Rust Layout and ABIs. + // + // (See https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#bool.) + // + // Clang represents bool as an 8-bit unsigned integer. + self.tcx.types.u8 + } else { + t + } + } + + ty::Char => { + if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) { + // Since #118032, char is guaranteed to have the same size, alignment, and + // function call ABI as u32 on all platforms. + self.tcx.types.u32 + } else { + t + } + } + + ty::Int(..) | ty::Uint(..) => { + if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) { + // Note: C99 7.18.2.4 requires uintptr_t and intptr_t to be at least 16-bit + // wide. All platforms we currently support have a C platform, and as a + // consequence, isize/usize are at least 16-bit wide for all of them. + // + // (See https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#isize-and-usize.) + match t.kind() { + ty::Int(IntTy::Isize) => match self.tcx.sess.target.pointer_width { + 16 => self.tcx.types.i16, + 32 => self.tcx.types.i32, + 64 => self.tcx.types.i64, + 128 => self.tcx.types.i128, + _ => bug!( + "fold_ty: unexpected pointer width `{}`", + self.tcx.sess.target.pointer_width + ), + }, + ty::Uint(UintTy::Usize) => match self.tcx.sess.target.pointer_width { + 16 => self.tcx.types.u16, + 32 => self.tcx.types.u32, + 64 => self.tcx.types.u64, + 128 => self.tcx.types.u128, + _ => bug!( + "fold_ty: unexpected pointer width `{}`", + self.tcx.sess.target.pointer_width + ), + }, + _ => t, + } + } else { + t + } + } + + ty::Adt(..) if t.is_c_void(self.tcx) => self.tcx.types.unit, + + ty::Adt(adt_def, args) => { + if adt_def.repr().transparent() && adt_def.is_struct() && !self.parents.contains(&t) + { + // Don't transform repr(transparent) types with an user-defined CFI encoding to + // preserve the user-defined CFI encoding. + if let Some(_) = self.tcx.get_attr(adt_def.did(), sym::cfi_encoding) { + return t; + } + let variant = adt_def.non_enum_variant(); + let param_env = self.tcx.param_env(variant.def_id); + let field = variant.fields.iter().find(|field| { + let ty = self.tcx.type_of(field.did).instantiate_identity(); + let is_zst = self + .tcx + .layout_of(param_env.and(ty)) + .is_ok_and(|layout| layout.is_zst()); + !is_zst + }); + if let Some(field) = field { + let ty0 = self.tcx.type_of(field.did).instantiate(self.tcx, args); + // Generalize any repr(transparent) user-defined type that is either a + // pointer or reference, and either references itself or any other type that + // contains or references itself, to avoid a reference cycle. + + // If the self reference is not through a pointer, for example, due + // to using `PhantomData`, need to skip normalizing it if we hit it again. + self.parents.push(t); + let ty = if ty0.is_any_ptr() && ty0.contains(t) { + let options = self.options; + self.options |= TransformTyOptions::GENERALIZE_POINTERS; + let ty = ty0.fold_with(self); + self.options = options; + ty + } else { + ty0.fold_with(self) + }; + self.parents.pop(); + ty + } else { + // Transform repr(transparent) types without non-ZST field into () + self.tcx.types.unit + } + } else { + t.super_fold_with(self) + } + } + + ty::Ref(..) => { + if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) { + if t.is_mutable_ptr() { + Ty::new_mut_ref(self.tcx, self.tcx.lifetimes.re_static, self.tcx.types.unit) + } else { + Ty::new_imm_ref(self.tcx, self.tcx.lifetimes.re_static, self.tcx.types.unit) + } + } else { + t.super_fold_with(self) + } + } + + ty::RawPtr(..) => { + if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) { + if t.is_mutable_ptr() { + Ty::new_mut_ptr(self.tcx, self.tcx.types.unit) + } else { + Ty::new_imm_ptr(self.tcx, self.tcx.types.unit) + } + } else { + t.super_fold_with(self) + } + } + + ty::FnPtr(..) => { + if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) { + Ty::new_imm_ptr(self.tcx, self.tcx.types.unit) + } else { + t.super_fold_with(self) + } + } + + ty::Alias(..) => { + self.fold_ty(self.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), t)) + } + + ty::Bound(..) | ty::Error(..) | ty::Infer(..) | ty::Param(..) | ty::Placeholder(..) => { + bug!("fold_ty: unexpected `{:?}`", t.kind()); + } + } + } + + fn interner(&self) -> TyCtxt<'tcx> { + self.tcx + } +} + +#[instrument(skip(tcx), ret)] +fn trait_object_ty<'tcx>(tcx: TyCtxt<'tcx>, poly_trait_ref: ty::PolyTraitRef<'tcx>) -> Ty<'tcx> { + assert!(!poly_trait_ref.has_non_region_param()); + let principal_pred = poly_trait_ref.map_bound(|trait_ref| { + ty::ExistentialPredicate::Trait(ty::ExistentialTraitRef::erase_self_ty(tcx, trait_ref)) + }); + let mut assoc_preds: Vec<_> = traits::supertraits(tcx, poly_trait_ref) + .flat_map(|super_poly_trait_ref| { + tcx.associated_items(super_poly_trait_ref.def_id()) + .in_definition_order() + .filter(|item| item.kind == ty::AssocKind::Type) + .map(move |assoc_ty| { + super_poly_trait_ref.map_bound(|super_trait_ref| { + let alias_ty = ty::AliasTy::new(tcx, assoc_ty.def_id, super_trait_ref.args); + let resolved = tcx.normalize_erasing_regions( + ty::ParamEnv::reveal_all(), + alias_ty.to_ty(tcx), + ); + debug!("Resolved {:?} -> {resolved}", alias_ty.to_ty(tcx)); + ty::ExistentialPredicate::Projection(ty::ExistentialProjection { + def_id: assoc_ty.def_id, + args: ty::ExistentialTraitRef::erase_self_ty(tcx, super_trait_ref).args, + term: resolved.into(), + }) + }) + }) + }) + .collect(); + assoc_preds.sort_by(|a, b| a.skip_binder().stable_cmp(tcx, &b.skip_binder())); + let preds = tcx.mk_poly_existential_predicates_from_iter( + iter::once(principal_pred).chain(assoc_preds.into_iter()), + ); + Ty::new_dynamic(tcx, preds, tcx.lifetimes.re_erased, ty::Dyn) +} + +/// Transforms an instance for LLVM CFI and cross-language LLVM CFI support using Itanium C++ ABI +/// mangling. +/// +/// typeid_for_instance is called at two locations, initially when declaring/defining functions and +/// methods, and later during code generation at call sites, after type erasure might have ocurred. +/// +/// In the first call (i.e., when declaring/defining functions and methods), it encodes type ids for +/// an FnAbi or Instance, and these type ids are attached to functions and methods. (These type ids +/// are used later by the LowerTypeTests LLVM pass to aggregate functions in groups derived from +/// these type ids.) +/// +/// In the second call (i.e., during code generation at call sites), it encodes a type id for an +/// FnAbi or Instance, after type erasure might have occured, and this type id is used for testing +/// if a function is member of the group derived from this type id. Therefore, in the first call to +/// typeid_for_fnabi (when type ids are attached to functions and methods), it can only include at +/// most as much information that would be available in the second call (i.e., during code +/// generation at call sites); otherwise, the type ids would not not match. +/// +/// For this, it: +/// +/// * Adjust the type ids of DropGlues (see below). +/// * Adjusts the type ids of VTableShims to the type id expected in the call sites for the +/// entry in the vtable (i.e., by using the signature of the closure passed as an argument to the +/// shim, or by just removing self). +/// * Performs type erasure for calls on trait objects by transforming self into a trait object of +/// the trait that defines the method. +/// * Performs type erasure for closures call methods by transforming self into a trait object of +/// the Fn trait that defines the method (for being attached as a secondary type id). +/// +#[instrument(level = "trace", skip(tcx))] +pub fn transform_instance<'tcx>( + tcx: TyCtxt<'tcx>, + mut instance: Instance<'tcx>, + options: TransformTyOptions, +) -> Instance<'tcx> { + if (matches!(instance.def, ty::InstanceDef::Virtual(..)) + && Some(instance.def_id()) == tcx.lang_items().drop_in_place_fn()) + || matches!(instance.def, ty::InstanceDef::DropGlue(..)) + { + // Adjust the type ids of DropGlues + // + // DropGlues may have indirect calls to one or more given types drop function. Rust allows + // for types to be erased to any trait object and retains the drop function for the original + // type, which means at the indirect call sites in DropGlues, when typeid_for_fnabi is + // called a second time, it only has information after type erasure and it could be a call + // on any arbitrary trait object. Normalize them to a synthesized Drop trait object, both on + // declaration/definition, and during code generation at call sites so they have the same + // type id and match. + // + // FIXME(rcvalle): This allows a drop call on any trait object to call the drop function of + // any other type. + // + let def_id = tcx + .lang_items() + .drop_trait() + .unwrap_or_else(|| bug!("typeid_for_instance: couldn't get drop_trait lang item")); + let predicate = ty::ExistentialPredicate::Trait(ty::ExistentialTraitRef { + def_id: def_id, + args: List::empty(), + }); + let predicates = tcx.mk_poly_existential_predicates(&[ty::Binder::dummy(predicate)]); + let self_ty = Ty::new_dynamic(tcx, predicates, tcx.lifetimes.re_erased, ty::Dyn); + instance.args = tcx.mk_args_trait(self_ty, List::empty()); + } else if let ty::InstanceDef::Virtual(def_id, _) = instance.def { + // Transform self into a trait object of the trait that defines the method for virtual + // functions to match the type erasure done below. + let upcast_ty = match tcx.trait_of_item(def_id) { + Some(trait_id) => trait_object_ty( + tcx, + ty::Binder::dummy(ty::TraitRef::from_method(tcx, trait_id, instance.args)), + ), + // drop_in_place won't have a defining trait, skip the upcast + None => instance.args.type_at(0), + }; + let ty::Dynamic(preds, lifetime, kind) = upcast_ty.kind() else { + bug!("Tried to remove autotraits from non-dynamic type {upcast_ty}"); + }; + let self_ty = if preds.principal().is_some() { + let filtered_preds = + tcx.mk_poly_existential_predicates_from_iter(preds.into_iter().filter(|pred| { + !matches!(pred.skip_binder(), ty::ExistentialPredicate::AutoTrait(..)) + })); + Ty::new_dynamic(tcx, filtered_preds, *lifetime, *kind) + } else { + // If there's no principal type, re-encode it as a unit, since we don't know anything + // about it. This technically discards the knowledge that it was a type that was made + // into a trait object at some point, but that's not a lot. + tcx.types.unit + }; + instance.args = tcx.mk_args_trait(self_ty, instance.args.into_iter().skip(1)); + } else if let ty::InstanceDef::VTableShim(def_id) = instance.def + && let Some(trait_id) = tcx.trait_of_item(def_id) + { + // Adjust the type ids of VTableShims to the type id expected in the call sites for the + // entry in the vtable (i.e., by using the signature of the closure passed as an argument + // to the shim, or by just removing self). + let trait_ref = ty::TraitRef::new(tcx, trait_id, instance.args); + let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref)); + instance.args = tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1)); + } + + if !options.contains(TransformTyOptions::USE_CONCRETE_SELF) { + // Perform type erasure for calls on trait objects by transforming self into a trait object + // of the trait that defines the method. + if let Some(impl_id) = tcx.impl_of_method(instance.def_id()) + && let Some(trait_ref) = tcx.impl_trait_ref(impl_id) + { + let impl_method = tcx.associated_item(instance.def_id()); + let method_id = impl_method + .trait_item_def_id + .expect("Part of a trait implementation, but not linked to the def_id?"); + let trait_method = tcx.associated_item(method_id); + let trait_id = trait_ref.skip_binder().def_id; + if traits::is_vtable_safe_method(tcx, trait_id, trait_method) + && tcx.object_safety_violations(trait_id).is_empty() + { + // Trait methods will have a Self polymorphic parameter, where the concreteized + // implementatation will not. We need to walk back to the more general trait method + let trait_ref = tcx.instantiate_and_normalize_erasing_regions( + instance.args, + ty::ParamEnv::reveal_all(), + trait_ref, + ); + let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref)); + + // At the call site, any call to this concrete function through a vtable will be + // `Virtual(method_id, idx)` with appropriate arguments for the method. Since we have the + // original method id, and we've recovered the trait arguments, we can make the callee + // instance we're computing the alias set for match the caller instance. + // + // Right now, our code ignores the vtable index everywhere, so we use 0 as a placeholder. + // If we ever *do* start encoding the vtable index, we will need to generate an alias set + // based on which vtables we are putting this method into, as there will be more than one + // index value when supertraits are involved. + instance.def = ty::InstanceDef::Virtual(method_id, 0); + let abstract_trait_args = + tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1)); + instance.args = instance.args.rebase_onto(tcx, impl_id, abstract_trait_args); + } + } else if tcx.is_closure_like(instance.def_id()) { + // We're either a closure or a coroutine. Our goal is to find the trait we're defined on, + // instantiate it, and take the type of its only method as our own. + let closure_ty = instance.ty(tcx, ty::ParamEnv::reveal_all()); + let (trait_id, inputs) = match closure_ty.kind() { + ty::Closure(..) => { + let closure_args = instance.args.as_closure(); + let trait_id = tcx.fn_trait_kind_to_def_id(closure_args.kind()).unwrap(); + let tuple_args = + tcx.instantiate_bound_regions_with_erased(closure_args.sig()).inputs()[0]; + (trait_id, Some(tuple_args)) + } + ty::Coroutine(..) => match tcx.coroutine_kind(instance.def_id()).unwrap() { + hir::CoroutineKind::Coroutine(..) => ( + tcx.require_lang_item(LangItem::Coroutine, None), + Some(instance.args.as_coroutine().resume_ty()), + ), + hir::CoroutineKind::Desugared(desugaring, _) => { + let lang_item = match desugaring { + hir::CoroutineDesugaring::Async => LangItem::Future, + hir::CoroutineDesugaring::AsyncGen => LangItem::AsyncIterator, + hir::CoroutineDesugaring::Gen => LangItem::Iterator, + }; + (tcx.require_lang_item(lang_item, None), None) + } + }, + ty::CoroutineClosure(..) => ( + tcx.require_lang_item(LangItem::FnOnce, None), + Some( + tcx.instantiate_bound_regions_with_erased( + instance.args.as_coroutine_closure().coroutine_closure_sig(), + ) + .tupled_inputs_ty, + ), + ), + x => bug!("Unexpected type kind for closure-like: {x:?}"), + }; + let concrete_args = tcx.mk_args_trait(closure_ty, inputs.map(Into::into)); + let trait_ref = ty::TraitRef::new(tcx, trait_id, concrete_args); + let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref)); + let abstract_args = tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1)); + // There should be exactly one method on this trait, and it should be the one we're + // defining. + let call = tcx + .associated_items(trait_id) + .in_definition_order() + .find(|it| it.kind == ty::AssocKind::Fn) + .expect("No call-family function on closure-like Fn trait?") + .def_id; + + instance.def = ty::InstanceDef::Virtual(call, 0); + instance.args = abstract_args; + } + } + + instance +} diff --git a/compiler/rustc_sanitizers/src/cfi/typeid/mod.rs b/compiler/rustc_sanitizers/src/cfi/typeid/mod.rs new file mode 100644 index 00000000000..ad8b1804439 --- /dev/null +++ b/compiler/rustc_sanitizers/src/cfi/typeid/mod.rs @@ -0,0 +1,54 @@ +//! Type metadata identifiers for LLVM Control Flow Integrity (CFI) and cross-language LLVM CFI +//! support for the Rust compiler. +//! +//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler, +//! see design document in the tracking issue #89653. +use bitflags::bitflags; +use rustc_middle::ty::{Instance, Ty, TyCtxt}; +use rustc_target::abi::call::FnAbi; + +bitflags! { + /// Options for typeid_for_fnabi. + #[derive(Clone, Copy, Debug)] + pub struct TypeIdOptions: u32 { + /// Generalizes pointers for compatibility with Clang + /// `-fsanitize-cfi-icall-generalize-pointers` option for cross-language LLVM CFI and KCFI + /// support. + const GENERALIZE_POINTERS = 1; + /// Generalizes repr(C) user-defined type for extern function types with the "C" calling + /// convention (or extern types) for cross-language LLVM CFI and KCFI support. + const GENERALIZE_REPR_C = 2; + /// Normalizes integers for compatibility with Clang + /// `-fsanitize-cfi-icall-experimental-normalize-integers` option for cross-language LLVM + /// CFI and KCFI support. + const NORMALIZE_INTEGERS = 4; + /// Do not perform self type erasure for attaching a secondary type id to methods with their + /// concrete self so they can be used as function pointers. + /// + /// (This applies to typeid_for_instance only and should be used to attach a secondary type + /// id to methods during their declaration/definition so they match the type ids returned by + /// either typeid_for_instance or typeid_for_fnabi at call sites during code generation for + /// type membership tests when methods are used as function pointers.) + const USE_CONCRETE_SELF = 8; + } +} + +pub mod itanium_cxx_abi; + +/// Returns a type metadata identifier for the specified FnAbi. +pub fn typeid_for_fnabi<'tcx>( + tcx: TyCtxt<'tcx>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + options: TypeIdOptions, +) -> String { + itanium_cxx_abi::typeid_for_fnabi(tcx, fn_abi, options) +} + +/// Returns a type metadata identifier for the specified Instance. +pub fn typeid_for_instance<'tcx>( + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + options: TypeIdOptions, +) -> String { + itanium_cxx_abi::typeid_for_instance(tcx, instance, options) +} diff --git a/compiler/rustc_sanitizers/src/kcfi/mod.rs b/compiler/rustc_sanitizers/src/kcfi/mod.rs new file mode 100644 index 00000000000..a8b632940b0 --- /dev/null +++ b/compiler/rustc_sanitizers/src/kcfi/mod.rs @@ -0,0 +1,7 @@ +//! LLVM Kernel Control Flow Integrity (KCFI) and cross-language LLVM KCFI support for the Rust +//! compiler. +//! +//! For more information about LLVM KCFI and cross-language LLVM KCFI support for the Rust compiler, +//! see the tracking issue #123479. +pub mod typeid; +pub use crate::kcfi::typeid::{typeid_for_fnabi, typeid_for_instance, TypeIdOptions}; diff --git a/compiler/rustc_sanitizers/src/kcfi/typeid/mod.rs b/compiler/rustc_sanitizers/src/kcfi/typeid/mod.rs new file mode 100644 index 00000000000..436c398e39b --- /dev/null +++ b/compiler/rustc_sanitizers/src/kcfi/typeid/mod.rs @@ -0,0 +1,55 @@ +//! Type metadata identifiers for LLVM Kernel Control Flow Integrity (KCFI) and cross-language LLVM +//! KCFI support for the Rust compiler. +//! +//! For more information about LLVM KCFI and cross-language LLVM KCFI support for the Rust compiler, +//! see the tracking issue #123479. +use rustc_middle::ty::{Instance, InstanceDef, ReifyReason, Ty, TyCtxt}; +use rustc_target::abi::call::FnAbi; +use std::hash::Hasher; +use twox_hash::XxHash64; + +pub use crate::cfi::typeid::{itanium_cxx_abi, TypeIdOptions}; + +/// Returns a KCFI type metadata identifier for the specified FnAbi. +pub fn typeid_for_fnabi<'tcx>( + tcx: TyCtxt<'tcx>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + options: TypeIdOptions, +) -> u32 { + // A KCFI type metadata identifier is a 32-bit constant produced by taking the lower half of the + // xxHash64 of the type metadata identifier. (See llvm/llvm-project@cff5bef.) + let mut hash: XxHash64 = Default::default(); + hash.write(itanium_cxx_abi::typeid_for_fnabi(tcx, fn_abi, options).as_bytes()); + hash.finish() as u32 +} + +/// Returns a KCFI type metadata identifier for the specified Instance. +pub fn typeid_for_instance<'tcx>( + tcx: TyCtxt<'tcx>, + instance: Instance<'tcx>, + mut options: TypeIdOptions, +) -> u32 { + // KCFI support for Rust shares most of its implementation with the CFI support, with some key + // differences: + // + // 1. KCFI performs type tests differently and are implemented as different LLVM passes than CFI + // to not require LTO. + // 2. KCFI has the limitation that a function or method may have one type id assigned only. + // + // Because of the limitation listed above (2), the current KCFI implementation (not CFI) does + // reifying of types (i.e., adds shims/trampolines for indirect calls in these cases) for: + // + // * Supporting casting between function items, closures, and Fn trait objects. + // * Supporting methods being cast as function pointers. + // + // This was implemented for KCFI support in #123106 and #123052 (which introduced the + // ReifyReason). The tracking issue for KCFI support for Rust is #123479. + if matches!(instance.def, InstanceDef::ReifyShim(_, Some(ReifyReason::FnPtr))) { + options.insert(TypeIdOptions::USE_CONCRETE_SELF); + } + // A KCFI type metadata identifier is a 32-bit constant produced by taking the lower half of the + // xxHash64 of the type metadata identifier. (See llvm/llvm-project@cff5bef.) + let mut hash: XxHash64 = Default::default(); + hash.write(itanium_cxx_abi::typeid_for_instance(tcx, instance, options).as_bytes()); + hash.finish() as u32 +} diff --git a/compiler/rustc_sanitizers/src/lib.rs b/compiler/rustc_sanitizers/src/lib.rs new file mode 100644 index 00000000000..1f73e255490 --- /dev/null +++ b/compiler/rustc_sanitizers/src/lib.rs @@ -0,0 +1,7 @@ +#![feature(let_chains)] +//! Sanitizers support for the Rust compiler. +//! +//! This crate contains the source code for providing support for the sanitizers to the Rust +//! compiler. +pub mod cfi; +pub mod kcfi; diff --git a/compiler/rustc_symbol_mangling/Cargo.toml b/compiler/rustc_symbol_mangling/Cargo.toml index 1c8f1d03670..65aa9e40c8b 100644 --- a/compiler/rustc_symbol_mangling/Cargo.toml +++ b/compiler/rustc_symbol_mangling/Cargo.toml @@ -5,7 +5,6 @@ edition = "2021" [dependencies] # tidy-alphabetical-start -bitflags = "2.4.1" punycode = "0.4.0" rustc-demangle = "0.1.21" rustc_data_structures = { path = "../rustc_data_structures" } @@ -15,7 +14,5 @@ rustc_middle = { path = "../rustc_middle" } rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } rustc_target = { path = "../rustc_target" } -rustc_trait_selection = { path = "../rustc_trait_selection" } tracing = "0.1" -twox-hash = "1.6.3" # tidy-alphabetical-end diff --git a/compiler/rustc_symbol_mangling/src/lib.rs b/compiler/rustc_symbol_mangling/src/lib.rs index 0588af9bda7..b9509478702 100644 --- a/compiler/rustc_symbol_mangling/src/lib.rs +++ b/compiler/rustc_symbol_mangling/src/lib.rs @@ -114,7 +114,6 @@ mod v0; pub mod errors; pub mod test; -pub mod typeid; /// This function computes the symbol name for the given `instance` and the /// given instantiating crate. That is, if you know that instance X is diff --git a/compiler/rustc_symbol_mangling/src/typeid.rs b/compiler/rustc_symbol_mangling/src/typeid.rs deleted file mode 100644 index 7bd998294dd..00000000000 --- a/compiler/rustc_symbol_mangling/src/typeid.rs +++ /dev/null @@ -1,100 +0,0 @@ -/// Type metadata identifiers for LLVM Control Flow Integrity (CFI) and cross-language LLVM CFI -/// support. -/// -/// For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler, -/// see design document in the tracking issue #89653. -use bitflags::bitflags; -use rustc_middle::ty::{Instance, InstanceDef, ReifyReason, Ty, TyCtxt}; -use rustc_target::abi::call::FnAbi; -use std::hash::Hasher; -use twox_hash::XxHash64; - -bitflags! { - /// Options for typeid_for_fnabi. - #[derive(Clone, Copy, Debug)] - pub struct TypeIdOptions: u32 { - /// Generalizes pointers for compatibility with Clang - /// `-fsanitize-cfi-icall-generalize-pointers` option for cross-language LLVM CFI and KCFI - /// support. - const GENERALIZE_POINTERS = 1; - /// Generalizes repr(C) user-defined type for extern function types with the "C" calling - /// convention (or extern types) for cross-language LLVM CFI and KCFI support. - const GENERALIZE_REPR_C = 2; - /// Normalizes integers for compatibility with Clang - /// `-fsanitize-cfi-icall-experimental-normalize-integers` option for cross-language LLVM - /// CFI and KCFI support. - const NORMALIZE_INTEGERS = 4; - /// Do not perform self type erasure for attaching a secondary type id to methods with their - /// concrete self so they can be used as function pointers. - /// - /// (This applies to typeid_for_instance only and should be used to attach a secondary type - /// id to methods during their declaration/definition so they match the type ids returned by - /// either typeid_for_instance or typeid_for_fnabi at call sites during code generation for - /// type membership tests when methods are used as function pointers.) - const USE_CONCRETE_SELF = 8; - } -} - -mod typeid_itanium_cxx_abi; - -/// Returns a type metadata identifier for the specified FnAbi. -pub fn typeid_for_fnabi<'tcx>( - tcx: TyCtxt<'tcx>, - fn_abi: &FnAbi<'tcx, Ty<'tcx>>, - options: TypeIdOptions, -) -> String { - typeid_itanium_cxx_abi::typeid_for_fnabi(tcx, fn_abi, options) -} - -/// Returns a type metadata identifier for the specified Instance. -pub fn typeid_for_instance<'tcx>( - tcx: TyCtxt<'tcx>, - instance: Instance<'tcx>, - options: TypeIdOptions, -) -> String { - typeid_itanium_cxx_abi::typeid_for_instance(tcx, instance, options) -} - -/// Returns a KCFI type metadata identifier for the specified FnAbi. -pub fn kcfi_typeid_for_fnabi<'tcx>( - tcx: TyCtxt<'tcx>, - fn_abi: &FnAbi<'tcx, Ty<'tcx>>, - options: TypeIdOptions, -) -> u32 { - // A KCFI type metadata identifier is a 32-bit constant produced by taking the lower half of the - // xxHash64 of the type metadata identifier. (See llvm/llvm-project@cff5bef.) - let mut hash: XxHash64 = Default::default(); - hash.write(typeid_itanium_cxx_abi::typeid_for_fnabi(tcx, fn_abi, options).as_bytes()); - hash.finish() as u32 -} - -/// Returns a KCFI type metadata identifier for the specified Instance. -pub fn kcfi_typeid_for_instance<'tcx>( - tcx: TyCtxt<'tcx>, - instance: Instance<'tcx>, - mut options: TypeIdOptions, -) -> u32 { - // KCFI support for Rust shares most of its implementation with the CFI support, with some key - // differences: - // - // 1. KCFI performs type tests differently and are implemented as different LLVM passes than CFI - // to not require LTO. - // 2. KCFI has the limitation that a function or method may have one type id assigned only. - // - // Because of the limitation listed above (2), the current KCFI implementation (not CFI) does - // reifying of types (i.e., adds shims/trampolines for indirect calls in these cases) for: - // - // * Supporting casting between function items, closures, and Fn trait objects. - // * Supporting methods being cast as function pointers. - // - // This was implemented for KCFI support in #123106 and #123052 (which introduced the - // ReifyReason). The tracking issue for KCFI support for Rust is #123479. - if matches!(instance.def, InstanceDef::ReifyShim(_, Some(ReifyReason::FnPtr))) { - options.insert(TypeIdOptions::USE_CONCRETE_SELF); - } - // A KCFI type metadata identifier is a 32-bit constant produced by taking the lower half of the - // xxHash64 of the type metadata identifier. (See llvm/llvm-project@cff5bef.) - let mut hash: XxHash64 = Default::default(); - hash.write(typeid_itanium_cxx_abi::typeid_for_instance(tcx, instance, options).as_bytes()); - hash.finish() as u32 -}