Rollup merge of #123620 - rcvalle:rust-create-rustc-sanitizers, r=davidtwco

sanitizers: Create the rustc_sanitizers crate

Create the `rustc_sanitizers` crate and move the source code for the CFI and KCFI sanitizers to it. The tracking issue for reviewing and moving sanitizers into a compiler crate is #123619. This is part of our work to organize and stabilize support for the sanitizers. (See our roadmap at https://hackmd.io/`@rcvalle/S1Ou9K6H6.)`
This commit is contained in:
Matthias Krüger 2024-04-09 06:02:21 +02:00 committed by GitHub
commit b809c4264b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 920 additions and 789 deletions

View File

@ -3670,6 +3670,7 @@ dependencies = [
"rustc_metadata", "rustc_metadata",
"rustc_middle", "rustc_middle",
"rustc_query_system", "rustc_query_system",
"rustc_sanitizers",
"rustc_session", "rustc_session",
"rustc_span", "rustc_span",
"rustc_symbol_mangling", "rustc_symbol_mangling",
@ -4558,6 +4559,21 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "rustc_sanitizers"
version = "0.0.0"
dependencies = [
"bitflags 2.5.0",
"rustc_data_structures",
"rustc_hir",
"rustc_middle",
"rustc_span",
"rustc_target",
"rustc_trait_selection",
"tracing",
"twox-hash",
]
[[package]] [[package]]
name = "rustc_serialize" name = "rustc_serialize"
version = "0.0.0" version = "0.0.0"
@ -4633,7 +4649,6 @@ dependencies = [
name = "rustc_symbol_mangling" name = "rustc_symbol_mangling"
version = "0.0.0" version = "0.0.0"
dependencies = [ dependencies = [
"bitflags 2.5.0",
"punycode", "punycode",
"rustc-demangle", "rustc-demangle",
"rustc_data_structures", "rustc_data_structures",
@ -4643,9 +4658,7 @@ dependencies = [
"rustc_session", "rustc_session",
"rustc_span", "rustc_span",
"rustc_target", "rustc_target",
"rustc_trait_selection",
"tracing", "tracing",
"twox-hash",
] ]
[[package]] [[package]]

View File

@ -28,6 +28,7 @@ rustc_macros = { path = "../rustc_macros" }
rustc_metadata = { path = "../rustc_metadata" } rustc_metadata = { path = "../rustc_metadata" }
rustc_middle = { path = "../rustc_middle" } rustc_middle = { path = "../rustc_middle" }
rustc_query_system = { path = "../rustc_query_system" } rustc_query_system = { path = "../rustc_query_system" }
rustc_sanitizers = { path = "../rustc_sanitizers" }
rustc_session = { path = "../rustc_session" } rustc_session = { path = "../rustc_session" }
rustc_span = { path = "../rustc_span" } rustc_span = { path = "../rustc_span" }
rustc_symbol_mangling = { path = "../rustc_symbol_mangling" } rustc_symbol_mangling = { path = "../rustc_symbol_mangling" }

View File

@ -20,12 +20,9 @@ use rustc_middle::ty::layout::{
FnAbiError, FnAbiOfHelpers, FnAbiRequest, LayoutError, LayoutOfHelpers, TyAndLayout, FnAbiError, FnAbiOfHelpers, FnAbiRequest, LayoutError, LayoutOfHelpers, TyAndLayout,
}; };
use rustc_middle::ty::{self, Instance, Ty, TyCtxt}; use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
use rustc_sanitizers::{cfi, kcfi};
use rustc_session::config::OptLevel; use rustc_session::config::OptLevel;
use rustc_span::Span; use rustc_span::Span;
use rustc_symbol_mangling::typeid::{
kcfi_typeid_for_fnabi, kcfi_typeid_for_instance, typeid_for_fnabi, typeid_for_instance,
TypeIdOptions,
};
use rustc_target::abi::{self, call::FnAbi, Align, Size, WrappingRange}; use rustc_target::abi::{self, call::FnAbi, Align, Size, WrappingRange};
use rustc_target::spec::{HasTargetSpec, SanitizerSet, Target}; use rustc_target::spec::{HasTargetSpec, SanitizerSet, Target};
use smallvec::SmallVec; use smallvec::SmallVec;
@ -1632,18 +1629,18 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
return; return;
} }
let mut options = TypeIdOptions::empty(); let mut options = cfi::TypeIdOptions::empty();
if self.tcx.sess.is_sanitizer_cfi_generalize_pointers_enabled() { if self.tcx.sess.is_sanitizer_cfi_generalize_pointers_enabled() {
options.insert(TypeIdOptions::GENERALIZE_POINTERS); options.insert(cfi::TypeIdOptions::GENERALIZE_POINTERS);
} }
if self.tcx.sess.is_sanitizer_cfi_normalize_integers_enabled() { if self.tcx.sess.is_sanitizer_cfi_normalize_integers_enabled() {
options.insert(TypeIdOptions::NORMALIZE_INTEGERS); options.insert(cfi::TypeIdOptions::NORMALIZE_INTEGERS);
} }
let typeid = if let Some(instance) = instance { let typeid = if let Some(instance) = instance {
typeid_for_instance(self.tcx, instance, options) cfi::typeid_for_instance(self.tcx, instance, options)
} else { } else {
typeid_for_fnabi(self.tcx, fn_abi, options) cfi::typeid_for_fnabi(self.tcx, fn_abi, options)
}; };
let typeid_metadata = self.cx.typeid_metadata(typeid).unwrap(); let typeid_metadata = self.cx.typeid_metadata(typeid).unwrap();
@ -1680,18 +1677,18 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
return None; return None;
} }
let mut options = TypeIdOptions::empty(); let mut options = kcfi::TypeIdOptions::empty();
if self.tcx.sess.is_sanitizer_cfi_generalize_pointers_enabled() { if self.tcx.sess.is_sanitizer_cfi_generalize_pointers_enabled() {
options.insert(TypeIdOptions::GENERALIZE_POINTERS); options.insert(kcfi::TypeIdOptions::GENERALIZE_POINTERS);
} }
if self.tcx.sess.is_sanitizer_cfi_normalize_integers_enabled() { if self.tcx.sess.is_sanitizer_cfi_normalize_integers_enabled() {
options.insert(TypeIdOptions::NORMALIZE_INTEGERS); options.insert(kcfi::TypeIdOptions::NORMALIZE_INTEGERS);
} }
let kcfi_typeid = if let Some(instance) = instance { let kcfi_typeid = if let Some(instance) = instance {
kcfi_typeid_for_instance(self.tcx, instance, options) kcfi::typeid_for_instance(self.tcx, instance, options)
} else { } else {
kcfi_typeid_for_fnabi(self.tcx, fn_abi, options) kcfi::typeid_for_fnabi(self.tcx, fn_abi, options)
}; };
Some(llvm::OperandBundleDef::new("kcfi", &[self.const_u32(kcfi_typeid)])) Some(llvm::OperandBundleDef::new("kcfi", &[self.const_u32(kcfi_typeid)]))

View File

@ -22,10 +22,7 @@ use itertools::Itertools;
use rustc_codegen_ssa::traits::TypeMembershipMethods; use rustc_codegen_ssa::traits::TypeMembershipMethods;
use rustc_data_structures::fx::FxIndexSet; use rustc_data_structures::fx::FxIndexSet;
use rustc_middle::ty::{Instance, Ty}; use rustc_middle::ty::{Instance, Ty};
use rustc_symbol_mangling::typeid::{ use rustc_sanitizers::{cfi, kcfi};
kcfi_typeid_for_fnabi, kcfi_typeid_for_instance, typeid_for_fnabi, typeid_for_instance,
TypeIdOptions,
};
use smallvec::SmallVec; use smallvec::SmallVec;
/// Declare a function. /// Declare a function.
@ -145,27 +142,29 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
if let Some(instance) = instance { if let Some(instance) = instance {
let mut typeids = FxIndexSet::default(); let mut typeids = FxIndexSet::default();
for options in [ for options in [
TypeIdOptions::GENERALIZE_POINTERS, cfi::TypeIdOptions::GENERALIZE_POINTERS,
TypeIdOptions::NORMALIZE_INTEGERS, cfi::TypeIdOptions::NORMALIZE_INTEGERS,
TypeIdOptions::USE_CONCRETE_SELF, cfi::TypeIdOptions::USE_CONCRETE_SELF,
] ]
.into_iter() .into_iter()
.powerset() .powerset()
.map(TypeIdOptions::from_iter) .map(cfi::TypeIdOptions::from_iter)
{ {
let typeid = typeid_for_instance(self.tcx, instance, options); let typeid = cfi::typeid_for_instance(self.tcx, instance, options);
if typeids.insert(typeid.clone()) { if typeids.insert(typeid.clone()) {
self.add_type_metadata(llfn, typeid); self.add_type_metadata(llfn, typeid);
} }
} }
} else { } else {
for options in for options in [
[TypeIdOptions::GENERALIZE_POINTERS, TypeIdOptions::NORMALIZE_INTEGERS] cfi::TypeIdOptions::GENERALIZE_POINTERS,
.into_iter() cfi::TypeIdOptions::NORMALIZE_INTEGERS,
.powerset() ]
.map(TypeIdOptions::from_iter) .into_iter()
.powerset()
.map(cfi::TypeIdOptions::from_iter)
{ {
let typeid = typeid_for_fnabi(self.tcx, fn_abi, options); let typeid = cfi::typeid_for_fnabi(self.tcx, fn_abi, options);
self.add_type_metadata(llfn, typeid); self.add_type_metadata(llfn, typeid);
} }
} }
@ -173,19 +172,19 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
if self.tcx.sess.is_sanitizer_kcfi_enabled() { if self.tcx.sess.is_sanitizer_kcfi_enabled() {
// LLVM KCFI does not support multiple !kcfi_type attachments // LLVM KCFI does not support multiple !kcfi_type attachments
let mut options = TypeIdOptions::empty(); let mut options = kcfi::TypeIdOptions::empty();
if self.tcx.sess.is_sanitizer_cfi_generalize_pointers_enabled() { if self.tcx.sess.is_sanitizer_cfi_generalize_pointers_enabled() {
options.insert(TypeIdOptions::GENERALIZE_POINTERS); options.insert(kcfi::TypeIdOptions::GENERALIZE_POINTERS);
} }
if self.tcx.sess.is_sanitizer_cfi_normalize_integers_enabled() { if self.tcx.sess.is_sanitizer_cfi_normalize_integers_enabled() {
options.insert(TypeIdOptions::NORMALIZE_INTEGERS); options.insert(kcfi::TypeIdOptions::NORMALIZE_INTEGERS);
} }
if let Some(instance) = instance { if let Some(instance) = instance {
let kcfi_typeid = kcfi_typeid_for_instance(self.tcx, instance, options); let kcfi_typeid = kcfi::typeid_for_instance(self.tcx, instance, options);
self.set_kcfi_type_metadata(llfn, kcfi_typeid); self.set_kcfi_type_metadata(llfn, kcfi_typeid);
} else { } else {
let kcfi_typeid = kcfi_typeid_for_fnabi(self.tcx, fn_abi, options); let kcfi_typeid = kcfi::typeid_for_fnabi(self.tcx, fn_abi, options);
self.set_kcfi_type_metadata(llfn, kcfi_typeid); self.set_kcfi_type_metadata(llfn, kcfi_typeid);
} }
} }

View File

@ -0,0 +1,15 @@
[package]
name = "rustc_sanitizers"
version = "0.0.0"
edition = "2021"
[dependencies]
bitflags = "2.5.0"
tracing = "0.1"
twox-hash = "1.6.3"
rustc_data_structures = { path = "../rustc_data_structures" }
rustc_hir = { path = "../rustc_hir" }
rustc_middle = { path = "../rustc_middle" }
rustc_span = { path = "../rustc_span" }
rustc_target = { path = "../rustc_target" }
rustc_trait_selection = { path = "../rustc_trait_selection" }

View File

@ -0,0 +1,2 @@
The `rustc_sanitizers` crate contains the source code for providing support for
the [sanitizers](https://github.com/google/sanitizers) to the Rust compiler.

View File

@ -0,0 +1,6 @@
//! LLVM Control Flow Integrity (CFI) and cross-language LLVM CFI support for the Rust compiler.
//!
//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
//! see design document in the tracking issue #89653.
pub mod typeid;
pub use crate::cfi::typeid::{typeid_for_fnabi, typeid_for_instance, TypeIdOptions};

View File

@ -1,76 +1,46 @@
/// Type metadata identifiers (using Itanium C++ ABI mangling for encoding) for LLVM Control Flow //! Encodes type metadata identifiers for LLVM CFI and cross-language LLVM CFI support using Itanium
/// Integrity (CFI) and cross-language LLVM CFI support. //! C++ ABI mangling for encoding with vendor extended type qualifiers and types for Rust types that
/// //! are not used across the FFI boundary.
/// Encodes type metadata identifiers for LLVM CFI and cross-language LLVM CFI support using Itanium //!
/// C++ ABI mangling for encoding with vendor extended type qualifiers and types for Rust types that //! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
/// are not used across the FFI boundary. //! see design document in the tracking issue #89653.
///
/// For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
/// see design document in the tracking issue #89653.
use rustc_data_structures::base_n; use rustc_data_structures::base_n;
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashMap;
use rustc_hir as hir; use rustc_hir as hir;
use rustc_hir::lang_items::LangItem; use rustc_middle::bug;
use rustc_middle::ty::fold::{TypeFolder, TypeSuperFoldable};
use rustc_middle::ty::layout::IntegerExt; use rustc_middle::ty::layout::IntegerExt;
use rustc_middle::ty::{ use rustc_middle::ty::{
self, Const, ExistentialPredicate, FloatTy, FnSig, Instance, IntTy, List, Region, RegionKind, self, Const, ExistentialPredicate, FloatTy, FnSig, GenericArg, GenericArgKind, GenericArgsRef,
TermKind, Ty, TyCtxt, UintTy, IntTy, List, Region, RegionKind, TermKind, Ty, TyCtxt, TypeFoldable, UintTy,
}; };
use rustc_middle::ty::{GenericArg, GenericArgKind, GenericArgsRef};
use rustc_middle::ty::{TypeFoldable, TypeVisitableExt};
use rustc_span::def_id::DefId; use rustc_span::def_id::DefId;
use rustc_span::sym; use rustc_span::sym;
use rustc_target::abi::call::{Conv, FnAbi, PassMode};
use rustc_target::abi::Integer; use rustc_target::abi::Integer;
use rustc_target::spec::abi::Abi; use rustc_target::spec::abi::Abi;
use rustc_trait_selection::traits;
use std::fmt::Write as _; use std::fmt::Write as _;
use std::iter; use tracing::instrument;
use crate::typeid::TypeIdOptions; use crate::cfi::typeid::itanium_cxx_abi::transform::{TransformTy, TransformTyOptions};
use crate::cfi::typeid::TypeIdOptions;
/// Type and extended type qualifiers. /// Options for encode_ty.
#[derive(Eq, Hash, PartialEq)] pub type EncodeTyOptions = TypeIdOptions;
enum TyQ {
None,
Const,
Mut,
}
/// Substitution dictionary key. /// Substitution dictionary key.
#[derive(Eq, Hash, PartialEq)] #[derive(Eq, Hash, PartialEq)]
enum DictKey<'tcx> { pub enum DictKey<'tcx> {
Ty(Ty<'tcx>, TyQ), Ty(Ty<'tcx>, TyQ),
Region(Region<'tcx>), Region(Region<'tcx>),
Const(Const<'tcx>), Const(Const<'tcx>),
Predicate(ExistentialPredicate<'tcx>), Predicate(ExistentialPredicate<'tcx>),
} }
/// Options for encode_ty. /// Type and extended type qualifiers.
type EncodeTyOptions = TypeIdOptions; #[derive(Eq, Hash, PartialEq)]
pub enum TyQ {
/// Options for transform_ty. None,
type TransformTyOptions = TypeIdOptions; Const,
Mut,
/// Converts a number to a disambiguator (see
/// <https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html>).
fn to_disambiguator(num: u64) -> String {
if let Some(num) = num.checked_sub(1) {
format!("s{}_", base_n::encode(num as u128, 62))
} else {
"s_".to_string()
}
}
/// Converts a number to a sequence number (see
/// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.seq-id>).
fn to_seq_id(num: usize) -> String {
if let Some(num) = num.checked_sub(1) {
base_n::encode(num as u128, 36).to_uppercase()
} else {
"".to_string()
}
} }
/// Substitutes a component if found in the substitution dictionary (see /// Substitutes a component if found in the substitution dictionary (see
@ -91,6 +61,37 @@ fn compress<'tcx>(
} }
} }
/// Encodes args using the Itanium C++ ABI with vendor extended type qualifiers and types for Rust
/// types that are not used at the FFI boundary.
fn encode_args<'tcx>(
tcx: TyCtxt<'tcx>,
args: GenericArgsRef<'tcx>,
dict: &mut FxHashMap<DictKey<'tcx>, usize>,
options: EncodeTyOptions,
) -> String {
// [I<subst1..substN>E] as part of vendor extended type
let mut s = String::new();
let args: Vec<GenericArg<'_>> = args.iter().collect();
if !args.is_empty() {
s.push('I');
for arg in args {
match arg.unpack() {
GenericArgKind::Lifetime(region) => {
s.push_str(&encode_region(region, dict));
}
GenericArgKind::Type(ty) => {
s.push_str(&encode_ty(tcx, ty, dict, options));
}
GenericArgKind::Const(c) => {
s.push_str(&encode_const(tcx, c, dict, options));
}
}
}
s.push('E');
}
s
}
/// Encodes a const using the Itanium C++ ABI as a literal argument (see /// Encodes a const using the Itanium C++ ABI as a literal argument (see
/// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling.literal>). /// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling.literal>).
fn encode_const<'tcx>( fn encode_const<'tcx>(
@ -159,7 +160,6 @@ fn encode_const<'tcx>(
/// Encodes a FnSig using the Itanium C++ ABI with vendor extended type qualifiers and types for /// Encodes a FnSig using the Itanium C++ ABI with vendor extended type qualifiers and types for
/// Rust types that are not used at the FFI boundary. /// Rust types that are not used at the FFI boundary.
#[instrument(level = "trace", skip(tcx, dict))]
fn encode_fnsig<'tcx>( fn encode_fnsig<'tcx>(
tcx: TyCtxt<'tcx>, tcx: TyCtxt<'tcx>,
fn_sig: &FnSig<'tcx>, fn_sig: &FnSig<'tcx>,
@ -299,137 +299,10 @@ fn encode_region<'tcx>(region: Region<'tcx>, dict: &mut FxHashMap<DictKey<'tcx>,
s s
} }
/// Encodes args using the Itanium C++ ABI with vendor extended type qualifiers and types for Rust
/// types that are not used at the FFI boundary.
fn encode_args<'tcx>(
tcx: TyCtxt<'tcx>,
args: GenericArgsRef<'tcx>,
dict: &mut FxHashMap<DictKey<'tcx>, usize>,
options: EncodeTyOptions,
) -> String {
// [I<subst1..substN>E] as part of vendor extended type
let mut s = String::new();
let args: Vec<GenericArg<'_>> = args.iter().collect();
if !args.is_empty() {
s.push('I');
for arg in args {
match arg.unpack() {
GenericArgKind::Lifetime(region) => {
s.push_str(&encode_region(region, dict));
}
GenericArgKind::Type(ty) => {
s.push_str(&encode_ty(tcx, ty, dict, options));
}
GenericArgKind::Const(c) => {
s.push_str(&encode_const(tcx, c, dict, options));
}
}
}
s.push('E');
}
s
}
/// Encodes a ty:Ty name, including its crate and path disambiguators and names.
fn encode_ty_name(tcx: TyCtxt<'_>, def_id: DefId) -> String {
// Encode <name> for use in u<length><name>[I<element-type1..element-typeN>E], where
// <element-type> is <subst>, using v0's <path> without v0's extended form of paths:
//
// N<namespace-tagN>..N<namespace-tag1>
// C<crate-disambiguator><crate-name>
// <path-disambiguator1><path-name1>..<path-disambiguatorN><path-nameN>
//
// With additional tags for DefPathData::Impl and DefPathData::ForeignMod. For instance:
//
// pub type Type1 = impl Send;
// let _: Type1 = <Struct1<i32>>::foo;
// fn foo1(_: Type1) { }
//
// pub type Type2 = impl Send;
// let _: Type2 = <Trait1<i32>>::foo;
// fn foo2(_: Type2) { }
//
// pub type Type3 = impl Send;
// let _: Type3 = <i32 as Trait1<i32>>::foo;
// fn foo3(_: Type3) { }
//
// pub type Type4 = impl Send;
// let _: Type4 = <Struct1<i32> as Trait1<i32>>::foo;
// fn foo3(_: Type4) { }
//
// Are encoded as:
//
// _ZTSFvu29NvNIC1234_5crate8{{impl}}3fooIu3i32EE
// _ZTSFvu27NvNtC1234_5crate6Trait13fooIu3dynIu21NtC1234_5crate6Trait1Iu3i32Eu6regionES_EE
// _ZTSFvu27NvNtC1234_5crate6Trait13fooIu3i32S_EE
// _ZTSFvu27NvNtC1234_5crate6Trait13fooIu22NtC1234_5crate7Struct1Iu3i32ES_EE
//
// The reason for not using v0's extended form of paths is to use a consistent and simpler
// encoding, as the reasoning for using it isn't relevant for type metadata identifiers (i.e.,
// keep symbol names close to how methods are represented in error messages). See
// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#methods.
let mut s = String::new();
// Start and namespace tags
let mut def_path = tcx.def_path(def_id);
def_path.data.reverse();
for disambiguated_data in &def_path.data {
s.push('N');
s.push_str(match disambiguated_data.data {
hir::definitions::DefPathData::Impl => "I", // Not specified in v0's <namespace>
hir::definitions::DefPathData::ForeignMod => "F", // Not specified in v0's <namespace>
hir::definitions::DefPathData::TypeNs(..) => "t",
hir::definitions::DefPathData::ValueNs(..) => "v",
hir::definitions::DefPathData::Closure => "C",
hir::definitions::DefPathData::Ctor => "c",
hir::definitions::DefPathData::AnonConst => "k",
hir::definitions::DefPathData::OpaqueTy => "i",
hir::definitions::DefPathData::CrateRoot
| hir::definitions::DefPathData::Use
| hir::definitions::DefPathData::GlobalAsm
| hir::definitions::DefPathData::MacroNs(..)
| hir::definitions::DefPathData::LifetimeNs(..)
| hir::definitions::DefPathData::AnonAdt => {
bug!("encode_ty_name: unexpected `{:?}`", disambiguated_data.data);
}
});
}
// Crate disambiguator and name
s.push('C');
s.push_str(&to_disambiguator(tcx.stable_crate_id(def_path.krate).as_u64()));
let crate_name = tcx.crate_name(def_path.krate).to_string();
let _ = write!(s, "{}{}", crate_name.len(), &crate_name);
// Disambiguators and names
def_path.data.reverse();
for disambiguated_data in &def_path.data {
let num = disambiguated_data.disambiguator as u64;
if num > 0 {
s.push_str(&to_disambiguator(num));
}
let name = disambiguated_data.data.to_string();
let _ = write!(s, "{}", name.len());
// Prepend a '_' if name starts with a digit or '_'
if let Some(first) = name.as_bytes().first() {
if first.is_ascii_digit() || *first == b'_' {
s.push('_');
}
} else {
bug!("encode_ty_name: invalid name `{:?}`", name);
}
s.push_str(&name);
}
s
}
/// Encodes a ty:Ty using the Itanium C++ ABI with vendor extended type qualifiers and types for /// Encodes a ty:Ty using the Itanium C++ ABI with vendor extended type qualifiers and types for
/// Rust types that are not used at the FFI boundary. /// Rust types that are not used at the FFI boundary.
fn encode_ty<'tcx>( #[instrument(level = "trace", skip(tcx, dict))]
pub fn encode_ty<'tcx>(
tcx: TyCtxt<'tcx>, tcx: TyCtxt<'tcx>,
ty: Ty<'tcx>, ty: Ty<'tcx>,
dict: &mut FxHashMap<DictKey<'tcx>, usize>, dict: &mut FxHashMap<DictKey<'tcx>, usize>,
@ -762,486 +635,119 @@ fn encode_ty<'tcx>(
typeid typeid
} }
struct TransformTy<'tcx> { /// Encodes a ty:Ty name, including its crate and path disambiguators and names.
tcx: TyCtxt<'tcx>, fn encode_ty_name(tcx: TyCtxt<'_>, def_id: DefId) -> String {
options: TransformTyOptions, // Encode <name> for use in u<length><name>[I<element-type1..element-typeN>E], where
parents: Vec<Ty<'tcx>>, // <element-type> is <subst>, using v0's <path> without v0's extended form of paths:
} //
// N<namespace-tagN>..N<namespace-tag1>
// C<crate-disambiguator><crate-name>
// <path-disambiguator1><path-name1>..<path-disambiguatorN><path-nameN>
//
// With additional tags for DefPathData::Impl and DefPathData::ForeignMod. For instance:
//
// pub type Type1 = impl Send;
// let _: Type1 = <Struct1<i32>>::foo;
// fn foo1(_: Type1) { }
//
// pub type Type2 = impl Send;
// let _: Type2 = <Trait1<i32>>::foo;
// fn foo2(_: Type2) { }
//
// pub type Type3 = impl Send;
// let _: Type3 = <i32 as Trait1<i32>>::foo;
// fn foo3(_: Type3) { }
//
// pub type Type4 = impl Send;
// let _: Type4 = <Struct1<i32> as Trait1<i32>>::foo;
// fn foo3(_: Type4) { }
//
// Are encoded as:
//
// _ZTSFvu29NvNIC1234_5crate8{{impl}}3fooIu3i32EE
// _ZTSFvu27NvNtC1234_5crate6Trait13fooIu3dynIu21NtC1234_5crate6Trait1Iu3i32Eu6regionES_EE
// _ZTSFvu27NvNtC1234_5crate6Trait13fooIu3i32S_EE
// _ZTSFvu27NvNtC1234_5crate6Trait13fooIu22NtC1234_5crate7Struct1Iu3i32ES_EE
//
// The reason for not using v0's extended form of paths is to use a consistent and simpler
// encoding, as the reasoning for using it isn't relevant for type metadata identifiers (i.e.,
// keep symbol names close to how methods are represented in error messages). See
// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html#methods.
let mut s = String::new();
impl<'tcx> TransformTy<'tcx> { // Start and namespace tags
fn new(tcx: TyCtxt<'tcx>, options: TransformTyOptions) -> Self { let mut def_path = tcx.def_path(def_id);
TransformTy { tcx, options, parents: Vec::new() } def_path.data.reverse();
} for disambiguated_data in &def_path.data {
} s.push('N');
s.push_str(match disambiguated_data.data {
impl<'tcx> TypeFolder<TyCtxt<'tcx>> for TransformTy<'tcx> { hir::definitions::DefPathData::Impl => "I", // Not specified in v0's <namespace>
// Transforms a ty:Ty for being encoded and used in the substitution dictionary. It transforms hir::definitions::DefPathData::ForeignMod => "F", // Not specified in v0's <namespace>
// all c_void types into unit types unconditionally, generalizes pointers if hir::definitions::DefPathData::TypeNs(..) => "t",
// TransformTyOptions::GENERALIZE_POINTERS option is set, and normalizes integers if hir::definitions::DefPathData::ValueNs(..) => "v",
// TransformTyOptions::NORMALIZE_INTEGERS option is set. hir::definitions::DefPathData::Closure => "C",
fn fold_ty(&mut self, t: Ty<'tcx>) -> Ty<'tcx> { hir::definitions::DefPathData::Ctor => "c",
match t.kind() { hir::definitions::DefPathData::AnonConst => "k",
ty::Array(..) hir::definitions::DefPathData::OpaqueTy => "i",
| ty::Closure(..) hir::definitions::DefPathData::CrateRoot
| ty::Coroutine(..) | hir::definitions::DefPathData::Use
| ty::CoroutineClosure(..) | hir::definitions::DefPathData::GlobalAsm
| ty::CoroutineWitness(..) | hir::definitions::DefPathData::MacroNs(..)
| ty::Dynamic(..) | hir::definitions::DefPathData::LifetimeNs(..)
| ty::Float(..) | hir::definitions::DefPathData::AnonAdt => {
| ty::FnDef(..) bug!("encode_ty_name: unexpected `{:?}`", disambiguated_data.data);
| ty::Foreign(..)
| ty::Never
| ty::Slice(..)
| ty::Pat(..)
| ty::Str
| ty::Tuple(..) => t.super_fold_with(self),
ty::Bool => {
if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) {
// Note: on all platforms that Rust's currently supports, its size and alignment
// are 1, and its ABI class is INTEGER - see Rust Layout and ABIs.
//
// (See https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#bool.)
//
// Clang represents bool as an 8-bit unsigned integer.
self.tcx.types.u8
} else {
t
}
} }
ty::Char => {
if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) {
// Since #118032, char is guaranteed to have the same size, alignment, and
// function call ABI as u32 on all platforms.
self.tcx.types.u32
} else {
t
}
}
ty::Int(..) | ty::Uint(..) => {
if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) {
// Note: C99 7.18.2.4 requires uintptr_t and intptr_t to be at least 16-bit
// wide. All platforms we currently support have a C platform, and as a
// consequence, isize/usize are at least 16-bit wide for all of them.
//
// (See https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#isize-and-usize.)
match t.kind() {
ty::Int(IntTy::Isize) => match self.tcx.sess.target.pointer_width {
16 => self.tcx.types.i16,
32 => self.tcx.types.i32,
64 => self.tcx.types.i64,
128 => self.tcx.types.i128,
_ => bug!(
"fold_ty: unexpected pointer width `{}`",
self.tcx.sess.target.pointer_width
),
},
ty::Uint(UintTy::Usize) => match self.tcx.sess.target.pointer_width {
16 => self.tcx.types.u16,
32 => self.tcx.types.u32,
64 => self.tcx.types.u64,
128 => self.tcx.types.u128,
_ => bug!(
"fold_ty: unexpected pointer width `{}`",
self.tcx.sess.target.pointer_width
),
},
_ => t,
}
} else {
t
}
}
ty::Adt(..) if t.is_c_void(self.tcx) => self.tcx.types.unit,
ty::Adt(adt_def, args) => {
if adt_def.repr().transparent() && adt_def.is_struct() && !self.parents.contains(&t)
{
// Don't transform repr(transparent) types with an user-defined CFI encoding to
// preserve the user-defined CFI encoding.
if let Some(_) = self.tcx.get_attr(adt_def.did(), sym::cfi_encoding) {
return t;
}
let variant = adt_def.non_enum_variant();
let param_env = self.tcx.param_env(variant.def_id);
let field = variant.fields.iter().find(|field| {
let ty = self.tcx.type_of(field.did).instantiate_identity();
let is_zst = self
.tcx
.layout_of(param_env.and(ty))
.is_ok_and(|layout| layout.is_zst());
!is_zst
});
if let Some(field) = field {
let ty0 = self.tcx.type_of(field.did).instantiate(self.tcx, args);
// Generalize any repr(transparent) user-defined type that is either a
// pointer or reference, and either references itself or any other type that
// contains or references itself, to avoid a reference cycle.
// If the self reference is not through a pointer, for example, due
// to using `PhantomData`, need to skip normalizing it if we hit it again.
self.parents.push(t);
let ty = if ty0.is_any_ptr() && ty0.contains(t) {
let options = self.options;
self.options |= TransformTyOptions::GENERALIZE_POINTERS;
let ty = ty0.fold_with(self);
self.options = options;
ty
} else {
ty0.fold_with(self)
};
self.parents.pop();
ty
} else {
// Transform repr(transparent) types without non-ZST field into ()
self.tcx.types.unit
}
} else {
t.super_fold_with(self)
}
}
ty::Ref(..) => {
if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) {
if t.is_mutable_ptr() {
Ty::new_mut_ref(self.tcx, self.tcx.lifetimes.re_static, self.tcx.types.unit)
} else {
Ty::new_imm_ref(self.tcx, self.tcx.lifetimes.re_static, self.tcx.types.unit)
}
} else {
t.super_fold_with(self)
}
}
ty::RawPtr(..) => {
if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) {
if t.is_mutable_ptr() {
Ty::new_mut_ptr(self.tcx, self.tcx.types.unit)
} else {
Ty::new_imm_ptr(self.tcx, self.tcx.types.unit)
}
} else {
t.super_fold_with(self)
}
}
ty::FnPtr(..) => {
if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) {
Ty::new_imm_ptr(self.tcx, self.tcx.types.unit)
} else {
t.super_fold_with(self)
}
}
ty::Alias(..) => {
self.fold_ty(self.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), t))
}
ty::Bound(..) | ty::Error(..) | ty::Infer(..) | ty::Param(..) | ty::Placeholder(..) => {
bug!("fold_ty: unexpected `{:?}`", t.kind());
}
}
}
fn interner(&self) -> TyCtxt<'tcx> {
self.tcx
}
}
/// Returns a type metadata identifier for the specified FnAbi using the Itanium C++ ABI with vendor
/// extended type qualifiers and types for Rust types that are not used at the FFI boundary.
#[instrument(level = "trace", skip(tcx))]
pub fn typeid_for_fnabi<'tcx>(
tcx: TyCtxt<'tcx>,
fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
options: TypeIdOptions,
) -> String {
// A name is mangled by prefixing "_Z" to an encoding of its name, and in the case of functions
// its type.
let mut typeid = String::from("_Z");
// Clang uses the Itanium C++ ABI's virtual tables and RTTI typeinfo structure name as type
// metadata identifiers for function pointers. The typeinfo name encoding is a two-character
// code (i.e., 'TS') prefixed to the type encoding for the function.
typeid.push_str("TS");
// Function types are delimited by an "F..E" pair
typeid.push('F');
// A dictionary of substitution candidates used for compression (see
// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-compression).
let mut dict: FxHashMap<DictKey<'tcx>, usize> = FxHashMap::default();
let mut encode_ty_options = EncodeTyOptions::from_bits(options.bits())
.unwrap_or_else(|| bug!("typeid_for_fnabi: invalid option(s) `{:?}`", options.bits()));
match fn_abi.conv {
Conv::C => {
encode_ty_options.insert(EncodeTyOptions::GENERALIZE_REPR_C);
}
_ => {
encode_ty_options.remove(EncodeTyOptions::GENERALIZE_REPR_C);
}
}
// Encode the return type
let transform_ty_options = TransformTyOptions::from_bits(options.bits())
.unwrap_or_else(|| bug!("typeid_for_fnabi: invalid option(s) `{:?}`", options.bits()));
let mut type_folder = TransformTy::new(tcx, transform_ty_options);
let ty = fn_abi.ret.layout.ty.fold_with(&mut type_folder);
typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options));
// Encode the parameter types
// We erase ZSTs as we go if the argument is skipped. This is an implementation detail of how
// MIR is currently treated by rustc, and subject to change in the future. Specifically, MIR
// interpretation today will allow skipped arguments to simply not be passed at a call-site.
if !fn_abi.c_variadic {
let mut pushed_arg = false;
for arg in fn_abi.args.iter().filter(|arg| arg.mode != PassMode::Ignore) {
pushed_arg = true;
let ty = arg.layout.ty.fold_with(&mut type_folder);
typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options));
}
if !pushed_arg {
// Empty parameter lists, whether declared as () or conventionally as (void), are
// encoded with a void parameter specifier "v".
typeid.push('v');
}
} else {
for n in 0..fn_abi.fixed_count as usize {
if fn_abi.args[n].mode == PassMode::Ignore {
continue;
}
let ty = fn_abi.args[n].layout.ty.fold_with(&mut type_folder);
typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options));
}
typeid.push('z');
}
// Close the "F..E" pair
typeid.push('E');
// Add encoding suffixes
if options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) {
typeid.push_str(".normalized");
}
if options.contains(EncodeTyOptions::GENERALIZE_POINTERS) {
typeid.push_str(".generalized");
}
typeid
}
/// Returns a type metadata identifier for the specified Instance using the Itanium C++ ABI with
/// vendor extended type qualifiers and types for Rust types that are not used at the FFI boundary.
pub fn typeid_for_instance<'tcx>(
tcx: TyCtxt<'tcx>,
mut instance: Instance<'tcx>,
options: TypeIdOptions,
) -> String {
if (matches!(instance.def, ty::InstanceDef::Virtual(..))
&& Some(instance.def_id()) == tcx.lang_items().drop_in_place_fn())
|| matches!(instance.def, ty::InstanceDef::DropGlue(..))
{
// Adjust the type ids of DropGlues
//
// DropGlues may have indirect calls to one or more given types drop function. Rust allows
// for types to be erased to any trait object and retains the drop function for the original
// type, which means at the indirect call sites in DropGlues, when typeid_for_fnabi is
// called a second time, it only has information after type erasure and it could be a call
// on any arbitrary trait object. Normalize them to a synthesized Drop trait object, both on
// declaration/definition, and during code generation at call sites so they have the same
// type id and match.
//
// FIXME(rcvalle): This allows a drop call on any trait object to call the drop function of
// any other type.
//
let def_id = tcx
.lang_items()
.drop_trait()
.unwrap_or_else(|| bug!("typeid_for_instance: couldn't get drop_trait lang item"));
let predicate = ty::ExistentialPredicate::Trait(ty::ExistentialTraitRef {
def_id: def_id,
args: List::empty(),
}); });
let predicates = tcx.mk_poly_existential_predicates(&[ty::Binder::dummy(predicate)]);
let self_ty = Ty::new_dynamic(tcx, predicates, tcx.lifetimes.re_erased, ty::Dyn);
instance.args = tcx.mk_args_trait(self_ty, List::empty());
} else if let ty::InstanceDef::Virtual(def_id, _) = instance.def {
let upcast_ty = match tcx.trait_of_item(def_id) {
Some(trait_id) => trait_object_ty(
tcx,
ty::Binder::dummy(ty::TraitRef::from_method(tcx, trait_id, instance.args)),
),
// drop_in_place won't have a defining trait, skip the upcast
None => instance.args.type_at(0),
};
let stripped_ty = strip_receiver_auto(tcx, upcast_ty);
instance.args = tcx.mk_args_trait(stripped_ty, instance.args.into_iter().skip(1));
} else if let ty::InstanceDef::VTableShim(def_id) = instance.def
&& let Some(trait_id) = tcx.trait_of_item(def_id)
{
// VTableShims may have a trait method, but a concrete Self. This is not suitable for a vtable,
// as the caller will not know the concrete Self.
let trait_ref = ty::TraitRef::new(tcx, trait_id, instance.args);
let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref));
instance.args = tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1));
} }
if !options.contains(EncodeTyOptions::USE_CONCRETE_SELF) { // Crate disambiguator and name
if let Some(impl_id) = tcx.impl_of_method(instance.def_id()) s.push('C');
&& let Some(trait_ref) = tcx.impl_trait_ref(impl_id) s.push_str(&to_disambiguator(tcx.stable_crate_id(def_path.krate).as_u64()));
{ let crate_name = tcx.crate_name(def_path.krate).to_string();
let impl_method = tcx.associated_item(instance.def_id()); let _ = write!(s, "{}{}", crate_name.len(), &crate_name);
let method_id = impl_method
.trait_item_def_id
.expect("Part of a trait implementation, but not linked to the def_id?");
let trait_method = tcx.associated_item(method_id);
let trait_id = trait_ref.skip_binder().def_id;
if traits::is_vtable_safe_method(tcx, trait_id, trait_method)
&& tcx.object_safety_violations(trait_id).is_empty()
{
// Trait methods will have a Self polymorphic parameter, where the concreteized
// implementatation will not. We need to walk back to the more general trait method
let trait_ref = tcx.instantiate_and_normalize_erasing_regions(
instance.args,
ty::ParamEnv::reveal_all(),
trait_ref,
);
let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref));
// At the call site, any call to this concrete function through a vtable will be // Disambiguators and names
// `Virtual(method_id, idx)` with appropriate arguments for the method. Since we have the def_path.data.reverse();
// original method id, and we've recovered the trait arguments, we can make the callee for disambiguated_data in &def_path.data {
// instance we're computing the alias set for match the caller instance. let num = disambiguated_data.disambiguator as u64;
// if num > 0 {
// Right now, our code ignores the vtable index everywhere, so we use 0 as a placeholder. s.push_str(&to_disambiguator(num));
// If we ever *do* start encoding the vtable index, we will need to generate an alias set
// based on which vtables we are putting this method into, as there will be more than one
// index value when supertraits are involved.
instance.def = ty::InstanceDef::Virtual(method_id, 0);
let abstract_trait_args =
tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1));
instance.args = instance.args.rebase_onto(tcx, impl_id, abstract_trait_args);
}
} else if tcx.is_closure_like(instance.def_id()) {
// We're either a closure or a coroutine. Our goal is to find the trait we're defined on,
// instantiate it, and take the type of its only method as our own.
let closure_ty = instance.ty(tcx, ty::ParamEnv::reveal_all());
let (trait_id, inputs) = match closure_ty.kind() {
ty::Closure(..) => {
let closure_args = instance.args.as_closure();
let trait_id = tcx.fn_trait_kind_to_def_id(closure_args.kind()).unwrap();
let tuple_args =
tcx.instantiate_bound_regions_with_erased(closure_args.sig()).inputs()[0];
(trait_id, Some(tuple_args))
}
ty::Coroutine(..) => match tcx.coroutine_kind(instance.def_id()).unwrap() {
hir::CoroutineKind::Coroutine(..) => (
tcx.require_lang_item(LangItem::Coroutine, None),
Some(instance.args.as_coroutine().resume_ty()),
),
hir::CoroutineKind::Desugared(desugaring, _) => {
let lang_item = match desugaring {
hir::CoroutineDesugaring::Async => LangItem::Future,
hir::CoroutineDesugaring::AsyncGen => LangItem::AsyncIterator,
hir::CoroutineDesugaring::Gen => LangItem::Iterator,
};
(tcx.require_lang_item(lang_item, None), None)
}
},
ty::CoroutineClosure(..) => (
tcx.require_lang_item(LangItem::FnOnce, None),
Some(
tcx.instantiate_bound_regions_with_erased(
instance.args.as_coroutine_closure().coroutine_closure_sig(),
)
.tupled_inputs_ty,
),
),
x => bug!("Unexpected type kind for closure-like: {x:?}"),
};
let concrete_args = tcx.mk_args_trait(closure_ty, inputs.map(Into::into));
let trait_ref = ty::TraitRef::new(tcx, trait_id, concrete_args);
let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref));
let abstract_args = tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1));
// There should be exactly one method on this trait, and it should be the one we're
// defining.
let call = tcx
.associated_items(trait_id)
.in_definition_order()
.find(|it| it.kind == ty::AssocKind::Fn)
.expect("No call-family function on closure-like Fn trait?")
.def_id;
instance.def = ty::InstanceDef::Virtual(call, 0);
instance.args = abstract_args;
} }
let name = disambiguated_data.data.to_string();
let _ = write!(s, "{}", name.len());
// Prepend a '_' if name starts with a digit or '_'
if let Some(first) = name.as_bytes().first() {
if first.is_ascii_digit() || *first == b'_' {
s.push('_');
}
} else {
bug!("encode_ty_name: invalid name `{:?}`", name);
}
s.push_str(&name);
} }
let fn_abi = tcx s
.fn_abi_of_instance(tcx.param_env(instance.def_id()).and((instance, ty::List::empty())))
.unwrap_or_else(|error| {
bug!("typeid_for_instance: couldn't get fn_abi of instance {instance:?}: {error:?}")
});
typeid_for_fnabi(tcx, fn_abi, options)
} }
fn strip_receiver_auto<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Ty<'tcx> { /// Converts a number to a disambiguator (see
let ty::Dynamic(preds, lifetime, kind) = ty.kind() else { /// <https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html>).
bug!("Tried to strip auto traits from non-dynamic type {ty}"); fn to_disambiguator(num: u64) -> String {
}; if let Some(num) = num.checked_sub(1) {
if preds.principal().is_some() { format!("s{}_", base_n::encode(num as u128, 62))
let filtered_preds =
tcx.mk_poly_existential_predicates_from_iter(preds.into_iter().filter(|pred| {
!matches!(pred.skip_binder(), ty::ExistentialPredicate::AutoTrait(..))
}));
Ty::new_dynamic(tcx, filtered_preds, *lifetime, *kind)
} else { } else {
// If there's no principal type, re-encode it as a unit, since we don't know anything "s_".to_string()
// about it. This technically discards the knowledge that it was a type that was made
// into a trait object at some point, but that's not a lot.
tcx.types.unit
} }
} }
#[instrument(skip(tcx), ret)] /// Converts a number to a sequence number (see
fn trait_object_ty<'tcx>(tcx: TyCtxt<'tcx>, poly_trait_ref: ty::PolyTraitRef<'tcx>) -> Ty<'tcx> { /// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.seq-id>).
assert!(!poly_trait_ref.has_non_region_param()); fn to_seq_id(num: usize) -> String {
let principal_pred = poly_trait_ref.map_bound(|trait_ref| { if let Some(num) = num.checked_sub(1) {
ty::ExistentialPredicate::Trait(ty::ExistentialTraitRef::erase_self_ty(tcx, trait_ref)) base_n::encode(num as u128, 36).to_uppercase()
}); } else {
let mut assoc_preds: Vec<_> = traits::supertraits(tcx, poly_trait_ref) "".to_string()
.flat_map(|super_poly_trait_ref| { }
tcx.associated_items(super_poly_trait_ref.def_id())
.in_definition_order()
.filter(|item| item.kind == ty::AssocKind::Type)
.map(move |assoc_ty| {
super_poly_trait_ref.map_bound(|super_trait_ref| {
let alias_ty = ty::AliasTy::new(tcx, assoc_ty.def_id, super_trait_ref.args);
let resolved = tcx.normalize_erasing_regions(
ty::ParamEnv::reveal_all(),
alias_ty.to_ty(tcx),
);
debug!("Resolved {:?} -> {resolved}", alias_ty.to_ty(tcx));
ty::ExistentialPredicate::Projection(ty::ExistentialProjection {
def_id: assoc_ty.def_id,
args: ty::ExistentialTraitRef::erase_self_ty(tcx, super_trait_ref).args,
term: resolved.into(),
})
})
})
})
.collect();
assoc_preds.sort_by(|a, b| a.skip_binder().stable_cmp(tcx, &b.skip_binder()));
let preds = tcx.mk_poly_existential_predicates_from_iter(
iter::once(principal_pred).chain(assoc_preds.into_iter()),
);
Ty::new_dynamic(tcx, preds, tcx.lifetimes.re_erased, ty::Dyn)
} }

View File

@ -0,0 +1,123 @@
//! Type metadata identifiers (using Itanium C++ ABI mangling for encoding) for LLVM Control Flow
//! Integrity (CFI) and cross-language LLVM CFI support.
//!
//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
//! see design document in the tracking issue #89653.
use rustc_data_structures::fx::FxHashMap;
use rustc_middle::bug;
use rustc_middle::ty::{self, Instance, Ty, TyCtxt, TypeFoldable};
use rustc_target::abi::call::{Conv, FnAbi, PassMode};
use tracing::instrument;
mod encode;
mod transform;
use crate::cfi::typeid::itanium_cxx_abi::encode::{encode_ty, DictKey, EncodeTyOptions};
use crate::cfi::typeid::itanium_cxx_abi::transform::{
transform_instance, TransformTy, TransformTyOptions,
};
use crate::cfi::typeid::TypeIdOptions;
/// Returns a type metadata identifier for the specified FnAbi using the Itanium C++ ABI with vendor
/// extended type qualifiers and types for Rust types that are not used at the FFI boundary.
#[instrument(level = "trace", skip(tcx))]
pub fn typeid_for_fnabi<'tcx>(
tcx: TyCtxt<'tcx>,
fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
options: TypeIdOptions,
) -> String {
// A name is mangled by prefixing "_Z" to an encoding of its name, and in the case of functions
// its type.
let mut typeid = String::from("_Z");
// Clang uses the Itanium C++ ABI's virtual tables and RTTI typeinfo structure name as type
// metadata identifiers for function pointers. The typeinfo name encoding is a two-character
// code (i.e., 'TS') prefixed to the type encoding for the function.
typeid.push_str("TS");
// Function types are delimited by an "F..E" pair
typeid.push('F');
// A dictionary of substitution candidates used for compression (see
// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-compression).
let mut dict: FxHashMap<DictKey<'tcx>, usize> = FxHashMap::default();
let mut encode_ty_options = EncodeTyOptions::from_bits(options.bits())
.unwrap_or_else(|| bug!("typeid_for_fnabi: invalid option(s) `{:?}`", options.bits()));
match fn_abi.conv {
Conv::C => {
encode_ty_options.insert(EncodeTyOptions::GENERALIZE_REPR_C);
}
_ => {
encode_ty_options.remove(EncodeTyOptions::GENERALIZE_REPR_C);
}
}
// Encode the return type
let transform_ty_options = TransformTyOptions::from_bits(options.bits())
.unwrap_or_else(|| bug!("typeid_for_fnabi: invalid option(s) `{:?}`", options.bits()));
let mut type_folder = TransformTy::new(tcx, transform_ty_options);
let ty = fn_abi.ret.layout.ty.fold_with(&mut type_folder);
typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options));
// Encode the parameter types
// We erase ZSTs as we go if the argument is skipped. This is an implementation detail of how
// MIR is currently treated by rustc, and subject to change in the future. Specifically, MIR
// interpretation today will allow skipped arguments to simply not be passed at a call-site.
if !fn_abi.c_variadic {
let mut pushed_arg = false;
for arg in fn_abi.args.iter().filter(|arg| arg.mode != PassMode::Ignore) {
pushed_arg = true;
let ty = arg.layout.ty.fold_with(&mut type_folder);
typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options));
}
if !pushed_arg {
// Empty parameter lists, whether declared as () or conventionally as (void), are
// encoded with a void parameter specifier "v".
typeid.push('v');
}
} else {
for n in 0..fn_abi.fixed_count as usize {
if fn_abi.args[n].mode == PassMode::Ignore {
continue;
}
let ty = fn_abi.args[n].layout.ty.fold_with(&mut type_folder);
typeid.push_str(&encode_ty(tcx, ty, &mut dict, encode_ty_options));
}
typeid.push('z');
}
// Close the "F..E" pair
typeid.push('E');
// Add encoding suffixes
if options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) {
typeid.push_str(".normalized");
}
if options.contains(EncodeTyOptions::GENERALIZE_POINTERS) {
typeid.push_str(".generalized");
}
typeid
}
/// Returns a type metadata identifier for the specified Instance using the Itanium C++ ABI with
/// vendor extended type qualifiers and types for Rust types that are not used at the FFI boundary.
#[instrument(level = "trace", skip(tcx))]
pub fn typeid_for_instance<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
options: TypeIdOptions,
) -> String {
let transform_ty_options = TransformTyOptions::from_bits(options.bits())
.unwrap_or_else(|| bug!("typeid_for_instance: invalid option(s) `{:?}`", options.bits()));
let instance = transform_instance(tcx, instance, transform_ty_options);
let fn_abi = tcx
.fn_abi_of_instance(tcx.param_env(instance.def_id()).and((instance, ty::List::empty())))
.unwrap_or_else(|error| {
bug!("typeid_for_instance: couldn't get fn_abi of instance {instance:?}: {error:?}")
});
typeid_for_fnabi(tcx, fn_abi, options)
}

View File

@ -0,0 +1,450 @@
//! Transforms instances and types for LLVM CFI and cross-language LLVM CFI support using Itanium
//! C++ ABI mangling.
//!
//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
//! see design document in the tracking issue #89653.
use rustc_hir as hir;
use rustc_hir::LangItem;
use rustc_middle::bug;
use rustc_middle::ty::fold::{TypeFolder, TypeSuperFoldable};
use rustc_middle::ty::{
self, Instance, IntTy, List, Ty, TyCtxt, TypeFoldable, TypeVisitableExt, UintTy,
};
use rustc_span::sym;
use rustc_trait_selection::traits;
use std::iter;
use tracing::{debug, instrument};
use crate::cfi::typeid::itanium_cxx_abi::encode::EncodeTyOptions;
use crate::cfi::typeid::TypeIdOptions;
/// Options for transform_ty.
pub type TransformTyOptions = TypeIdOptions;
pub struct TransformTy<'tcx> {
tcx: TyCtxt<'tcx>,
options: TransformTyOptions,
parents: Vec<Ty<'tcx>>,
}
impl<'tcx> TransformTy<'tcx> {
pub fn new(tcx: TyCtxt<'tcx>, options: TransformTyOptions) -> Self {
TransformTy { tcx, options, parents: Vec::new() }
}
}
/// Transforms a ty:Ty for being encoded and used in the substitution dictionary.
///
/// * Transforms all c_void types into unit types.
/// * Generalizes pointers if TransformTyOptions::GENERALIZE_POINTERS option is set.
/// * Normalizes integers if TransformTyOptions::NORMALIZE_INTEGERS option is set.
/// * Generalizes any repr(transparent) user-defined type that is either a pointer or reference, and
/// either references itself or any other type that contains or references itself, to avoid a
/// reference cycle.
/// * Transforms repr(transparent) types without non-ZST field into ().
///
impl<'tcx> TypeFolder<TyCtxt<'tcx>> for TransformTy<'tcx> {
// Transforms a ty:Ty for being encoded and used in the substitution dictionary.
fn fold_ty(&mut self, t: Ty<'tcx>) -> Ty<'tcx> {
match t.kind() {
ty::Array(..)
| ty::Closure(..)
| ty::Coroutine(..)
| ty::CoroutineClosure(..)
| ty::CoroutineWitness(..)
| ty::Dynamic(..)
| ty::Float(..)
| ty::FnDef(..)
| ty::Foreign(..)
| ty::Never
| ty::Pat(..)
| ty::Slice(..)
| ty::Str
| ty::Tuple(..) => t.super_fold_with(self),
ty::Bool => {
if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) {
// Note: on all platforms that Rust's currently supports, its size and alignment
// are 1, and its ABI class is INTEGER - see Rust Layout and ABIs.
//
// (See https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#bool.)
//
// Clang represents bool as an 8-bit unsigned integer.
self.tcx.types.u8
} else {
t
}
}
ty::Char => {
if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) {
// Since #118032, char is guaranteed to have the same size, alignment, and
// function call ABI as u32 on all platforms.
self.tcx.types.u32
} else {
t
}
}
ty::Int(..) | ty::Uint(..) => {
if self.options.contains(EncodeTyOptions::NORMALIZE_INTEGERS) {
// Note: C99 7.18.2.4 requires uintptr_t and intptr_t to be at least 16-bit
// wide. All platforms we currently support have a C platform, and as a
// consequence, isize/usize are at least 16-bit wide for all of them.
//
// (See https://rust-lang.github.io/unsafe-code-guidelines/layout/scalars.html#isize-and-usize.)
match t.kind() {
ty::Int(IntTy::Isize) => match self.tcx.sess.target.pointer_width {
16 => self.tcx.types.i16,
32 => self.tcx.types.i32,
64 => self.tcx.types.i64,
128 => self.tcx.types.i128,
_ => bug!(
"fold_ty: unexpected pointer width `{}`",
self.tcx.sess.target.pointer_width
),
},
ty::Uint(UintTy::Usize) => match self.tcx.sess.target.pointer_width {
16 => self.tcx.types.u16,
32 => self.tcx.types.u32,
64 => self.tcx.types.u64,
128 => self.tcx.types.u128,
_ => bug!(
"fold_ty: unexpected pointer width `{}`",
self.tcx.sess.target.pointer_width
),
},
_ => t,
}
} else {
t
}
}
ty::Adt(..) if t.is_c_void(self.tcx) => self.tcx.types.unit,
ty::Adt(adt_def, args) => {
if adt_def.repr().transparent() && adt_def.is_struct() && !self.parents.contains(&t)
{
// Don't transform repr(transparent) types with an user-defined CFI encoding to
// preserve the user-defined CFI encoding.
if let Some(_) = self.tcx.get_attr(adt_def.did(), sym::cfi_encoding) {
return t;
}
let variant = adt_def.non_enum_variant();
let param_env = self.tcx.param_env(variant.def_id);
let field = variant.fields.iter().find(|field| {
let ty = self.tcx.type_of(field.did).instantiate_identity();
let is_zst = self
.tcx
.layout_of(param_env.and(ty))
.is_ok_and(|layout| layout.is_zst());
!is_zst
});
if let Some(field) = field {
let ty0 = self.tcx.type_of(field.did).instantiate(self.tcx, args);
// Generalize any repr(transparent) user-defined type that is either a
// pointer or reference, and either references itself or any other type that
// contains or references itself, to avoid a reference cycle.
// If the self reference is not through a pointer, for example, due
// to using `PhantomData`, need to skip normalizing it if we hit it again.
self.parents.push(t);
let ty = if ty0.is_any_ptr() && ty0.contains(t) {
let options = self.options;
self.options |= TransformTyOptions::GENERALIZE_POINTERS;
let ty = ty0.fold_with(self);
self.options = options;
ty
} else {
ty0.fold_with(self)
};
self.parents.pop();
ty
} else {
// Transform repr(transparent) types without non-ZST field into ()
self.tcx.types.unit
}
} else {
t.super_fold_with(self)
}
}
ty::Ref(..) => {
if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) {
if t.is_mutable_ptr() {
Ty::new_mut_ref(self.tcx, self.tcx.lifetimes.re_static, self.tcx.types.unit)
} else {
Ty::new_imm_ref(self.tcx, self.tcx.lifetimes.re_static, self.tcx.types.unit)
}
} else {
t.super_fold_with(self)
}
}
ty::RawPtr(..) => {
if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) {
if t.is_mutable_ptr() {
Ty::new_mut_ptr(self.tcx, self.tcx.types.unit)
} else {
Ty::new_imm_ptr(self.tcx, self.tcx.types.unit)
}
} else {
t.super_fold_with(self)
}
}
ty::FnPtr(..) => {
if self.options.contains(TransformTyOptions::GENERALIZE_POINTERS) {
Ty::new_imm_ptr(self.tcx, self.tcx.types.unit)
} else {
t.super_fold_with(self)
}
}
ty::Alias(..) => {
self.fold_ty(self.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), t))
}
ty::Bound(..) | ty::Error(..) | ty::Infer(..) | ty::Param(..) | ty::Placeholder(..) => {
bug!("fold_ty: unexpected `{:?}`", t.kind());
}
}
}
fn interner(&self) -> TyCtxt<'tcx> {
self.tcx
}
}
#[instrument(skip(tcx), ret)]
fn trait_object_ty<'tcx>(tcx: TyCtxt<'tcx>, poly_trait_ref: ty::PolyTraitRef<'tcx>) -> Ty<'tcx> {
assert!(!poly_trait_ref.has_non_region_param());
let principal_pred = poly_trait_ref.map_bound(|trait_ref| {
ty::ExistentialPredicate::Trait(ty::ExistentialTraitRef::erase_self_ty(tcx, trait_ref))
});
let mut assoc_preds: Vec<_> = traits::supertraits(tcx, poly_trait_ref)
.flat_map(|super_poly_trait_ref| {
tcx.associated_items(super_poly_trait_ref.def_id())
.in_definition_order()
.filter(|item| item.kind == ty::AssocKind::Type)
.map(move |assoc_ty| {
super_poly_trait_ref.map_bound(|super_trait_ref| {
let alias_ty = ty::AliasTy::new(tcx, assoc_ty.def_id, super_trait_ref.args);
let resolved = tcx.normalize_erasing_regions(
ty::ParamEnv::reveal_all(),
alias_ty.to_ty(tcx),
);
debug!("Resolved {:?} -> {resolved}", alias_ty.to_ty(tcx));
ty::ExistentialPredicate::Projection(ty::ExistentialProjection {
def_id: assoc_ty.def_id,
args: ty::ExistentialTraitRef::erase_self_ty(tcx, super_trait_ref).args,
term: resolved.into(),
})
})
})
})
.collect();
assoc_preds.sort_by(|a, b| a.skip_binder().stable_cmp(tcx, &b.skip_binder()));
let preds = tcx.mk_poly_existential_predicates_from_iter(
iter::once(principal_pred).chain(assoc_preds.into_iter()),
);
Ty::new_dynamic(tcx, preds, tcx.lifetimes.re_erased, ty::Dyn)
}
/// Transforms an instance for LLVM CFI and cross-language LLVM CFI support using Itanium C++ ABI
/// mangling.
///
/// typeid_for_instance is called at two locations, initially when declaring/defining functions and
/// methods, and later during code generation at call sites, after type erasure might have ocurred.
///
/// In the first call (i.e., when declaring/defining functions and methods), it encodes type ids for
/// an FnAbi or Instance, and these type ids are attached to functions and methods. (These type ids
/// are used later by the LowerTypeTests LLVM pass to aggregate functions in groups derived from
/// these type ids.)
///
/// In the second call (i.e., during code generation at call sites), it encodes a type id for an
/// FnAbi or Instance, after type erasure might have occured, and this type id is used for testing
/// if a function is member of the group derived from this type id. Therefore, in the first call to
/// typeid_for_fnabi (when type ids are attached to functions and methods), it can only include at
/// most as much information that would be available in the second call (i.e., during code
/// generation at call sites); otherwise, the type ids would not not match.
///
/// For this, it:
///
/// * Adjust the type ids of DropGlues (see below).
/// * Adjusts the type ids of VTableShims to the type id expected in the call sites for the
/// entry in the vtable (i.e., by using the signature of the closure passed as an argument to the
/// shim, or by just removing self).
/// * Performs type erasure for calls on trait objects by transforming self into a trait object of
/// the trait that defines the method.
/// * Performs type erasure for closures call methods by transforming self into a trait object of
/// the Fn trait that defines the method (for being attached as a secondary type id).
///
#[instrument(level = "trace", skip(tcx))]
pub fn transform_instance<'tcx>(
tcx: TyCtxt<'tcx>,
mut instance: Instance<'tcx>,
options: TransformTyOptions,
) -> Instance<'tcx> {
if (matches!(instance.def, ty::InstanceDef::Virtual(..))
&& Some(instance.def_id()) == tcx.lang_items().drop_in_place_fn())
|| matches!(instance.def, ty::InstanceDef::DropGlue(..))
{
// Adjust the type ids of DropGlues
//
// DropGlues may have indirect calls to one or more given types drop function. Rust allows
// for types to be erased to any trait object and retains the drop function for the original
// type, which means at the indirect call sites in DropGlues, when typeid_for_fnabi is
// called a second time, it only has information after type erasure and it could be a call
// on any arbitrary trait object. Normalize them to a synthesized Drop trait object, both on
// declaration/definition, and during code generation at call sites so they have the same
// type id and match.
//
// FIXME(rcvalle): This allows a drop call on any trait object to call the drop function of
// any other type.
//
let def_id = tcx
.lang_items()
.drop_trait()
.unwrap_or_else(|| bug!("typeid_for_instance: couldn't get drop_trait lang item"));
let predicate = ty::ExistentialPredicate::Trait(ty::ExistentialTraitRef {
def_id: def_id,
args: List::empty(),
});
let predicates = tcx.mk_poly_existential_predicates(&[ty::Binder::dummy(predicate)]);
let self_ty = Ty::new_dynamic(tcx, predicates, tcx.lifetimes.re_erased, ty::Dyn);
instance.args = tcx.mk_args_trait(self_ty, List::empty());
} else if let ty::InstanceDef::Virtual(def_id, _) = instance.def {
// Transform self into a trait object of the trait that defines the method for virtual
// functions to match the type erasure done below.
let upcast_ty = match tcx.trait_of_item(def_id) {
Some(trait_id) => trait_object_ty(
tcx,
ty::Binder::dummy(ty::TraitRef::from_method(tcx, trait_id, instance.args)),
),
// drop_in_place won't have a defining trait, skip the upcast
None => instance.args.type_at(0),
};
let ty::Dynamic(preds, lifetime, kind) = upcast_ty.kind() else {
bug!("Tried to remove autotraits from non-dynamic type {upcast_ty}");
};
let self_ty = if preds.principal().is_some() {
let filtered_preds =
tcx.mk_poly_existential_predicates_from_iter(preds.into_iter().filter(|pred| {
!matches!(pred.skip_binder(), ty::ExistentialPredicate::AutoTrait(..))
}));
Ty::new_dynamic(tcx, filtered_preds, *lifetime, *kind)
} else {
// If there's no principal type, re-encode it as a unit, since we don't know anything
// about it. This technically discards the knowledge that it was a type that was made
// into a trait object at some point, but that's not a lot.
tcx.types.unit
};
instance.args = tcx.mk_args_trait(self_ty, instance.args.into_iter().skip(1));
} else if let ty::InstanceDef::VTableShim(def_id) = instance.def
&& let Some(trait_id) = tcx.trait_of_item(def_id)
{
// Adjust the type ids of VTableShims to the type id expected in the call sites for the
// entry in the vtable (i.e., by using the signature of the closure passed as an argument
// to the shim, or by just removing self).
let trait_ref = ty::TraitRef::new(tcx, trait_id, instance.args);
let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref));
instance.args = tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1));
}
if !options.contains(TransformTyOptions::USE_CONCRETE_SELF) {
// Perform type erasure for calls on trait objects by transforming self into a trait object
// of the trait that defines the method.
if let Some(impl_id) = tcx.impl_of_method(instance.def_id())
&& let Some(trait_ref) = tcx.impl_trait_ref(impl_id)
{
let impl_method = tcx.associated_item(instance.def_id());
let method_id = impl_method
.trait_item_def_id
.expect("Part of a trait implementation, but not linked to the def_id?");
let trait_method = tcx.associated_item(method_id);
let trait_id = trait_ref.skip_binder().def_id;
if traits::is_vtable_safe_method(tcx, trait_id, trait_method)
&& tcx.object_safety_violations(trait_id).is_empty()
{
// Trait methods will have a Self polymorphic parameter, where the concreteized
// implementatation will not. We need to walk back to the more general trait method
let trait_ref = tcx.instantiate_and_normalize_erasing_regions(
instance.args,
ty::ParamEnv::reveal_all(),
trait_ref,
);
let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref));
// At the call site, any call to this concrete function through a vtable will be
// `Virtual(method_id, idx)` with appropriate arguments for the method. Since we have the
// original method id, and we've recovered the trait arguments, we can make the callee
// instance we're computing the alias set for match the caller instance.
//
// Right now, our code ignores the vtable index everywhere, so we use 0 as a placeholder.
// If we ever *do* start encoding the vtable index, we will need to generate an alias set
// based on which vtables we are putting this method into, as there will be more than one
// index value when supertraits are involved.
instance.def = ty::InstanceDef::Virtual(method_id, 0);
let abstract_trait_args =
tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1));
instance.args = instance.args.rebase_onto(tcx, impl_id, abstract_trait_args);
}
} else if tcx.is_closure_like(instance.def_id()) {
// We're either a closure or a coroutine. Our goal is to find the trait we're defined on,
// instantiate it, and take the type of its only method as our own.
let closure_ty = instance.ty(tcx, ty::ParamEnv::reveal_all());
let (trait_id, inputs) = match closure_ty.kind() {
ty::Closure(..) => {
let closure_args = instance.args.as_closure();
let trait_id = tcx.fn_trait_kind_to_def_id(closure_args.kind()).unwrap();
let tuple_args =
tcx.instantiate_bound_regions_with_erased(closure_args.sig()).inputs()[0];
(trait_id, Some(tuple_args))
}
ty::Coroutine(..) => match tcx.coroutine_kind(instance.def_id()).unwrap() {
hir::CoroutineKind::Coroutine(..) => (
tcx.require_lang_item(LangItem::Coroutine, None),
Some(instance.args.as_coroutine().resume_ty()),
),
hir::CoroutineKind::Desugared(desugaring, _) => {
let lang_item = match desugaring {
hir::CoroutineDesugaring::Async => LangItem::Future,
hir::CoroutineDesugaring::AsyncGen => LangItem::AsyncIterator,
hir::CoroutineDesugaring::Gen => LangItem::Iterator,
};
(tcx.require_lang_item(lang_item, None), None)
}
},
ty::CoroutineClosure(..) => (
tcx.require_lang_item(LangItem::FnOnce, None),
Some(
tcx.instantiate_bound_regions_with_erased(
instance.args.as_coroutine_closure().coroutine_closure_sig(),
)
.tupled_inputs_ty,
),
),
x => bug!("Unexpected type kind for closure-like: {x:?}"),
};
let concrete_args = tcx.mk_args_trait(closure_ty, inputs.map(Into::into));
let trait_ref = ty::TraitRef::new(tcx, trait_id, concrete_args);
let invoke_ty = trait_object_ty(tcx, ty::Binder::dummy(trait_ref));
let abstract_args = tcx.mk_args_trait(invoke_ty, trait_ref.args.into_iter().skip(1));
// There should be exactly one method on this trait, and it should be the one we're
// defining.
let call = tcx
.associated_items(trait_id)
.in_definition_order()
.find(|it| it.kind == ty::AssocKind::Fn)
.expect("No call-family function on closure-like Fn trait?")
.def_id;
instance.def = ty::InstanceDef::Virtual(call, 0);
instance.args = abstract_args;
}
}
instance
}

View File

@ -0,0 +1,54 @@
//! Type metadata identifiers for LLVM Control Flow Integrity (CFI) and cross-language LLVM CFI
//! support for the Rust compiler.
//!
//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
//! see design document in the tracking issue #89653.
use bitflags::bitflags;
use rustc_middle::ty::{Instance, Ty, TyCtxt};
use rustc_target::abi::call::FnAbi;
bitflags! {
/// Options for typeid_for_fnabi.
#[derive(Clone, Copy, Debug)]
pub struct TypeIdOptions: u32 {
/// Generalizes pointers for compatibility with Clang
/// `-fsanitize-cfi-icall-generalize-pointers` option for cross-language LLVM CFI and KCFI
/// support.
const GENERALIZE_POINTERS = 1;
/// Generalizes repr(C) user-defined type for extern function types with the "C" calling
/// convention (or extern types) for cross-language LLVM CFI and KCFI support.
const GENERALIZE_REPR_C = 2;
/// Normalizes integers for compatibility with Clang
/// `-fsanitize-cfi-icall-experimental-normalize-integers` option for cross-language LLVM
/// CFI and KCFI support.
const NORMALIZE_INTEGERS = 4;
/// Do not perform self type erasure for attaching a secondary type id to methods with their
/// concrete self so they can be used as function pointers.
///
/// (This applies to typeid_for_instance only and should be used to attach a secondary type
/// id to methods during their declaration/definition so they match the type ids returned by
/// either typeid_for_instance or typeid_for_fnabi at call sites during code generation for
/// type membership tests when methods are used as function pointers.)
const USE_CONCRETE_SELF = 8;
}
}
pub mod itanium_cxx_abi;
/// Returns a type metadata identifier for the specified FnAbi.
pub fn typeid_for_fnabi<'tcx>(
tcx: TyCtxt<'tcx>,
fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
options: TypeIdOptions,
) -> String {
itanium_cxx_abi::typeid_for_fnabi(tcx, fn_abi, options)
}
/// Returns a type metadata identifier for the specified Instance.
pub fn typeid_for_instance<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
options: TypeIdOptions,
) -> String {
itanium_cxx_abi::typeid_for_instance(tcx, instance, options)
}

View File

@ -0,0 +1,7 @@
//! LLVM Kernel Control Flow Integrity (KCFI) and cross-language LLVM KCFI support for the Rust
//! compiler.
//!
//! For more information about LLVM KCFI and cross-language LLVM KCFI support for the Rust compiler,
//! see the tracking issue #123479.
pub mod typeid;
pub use crate::kcfi::typeid::{typeid_for_fnabi, typeid_for_instance, TypeIdOptions};

View File

@ -0,0 +1,55 @@
//! Type metadata identifiers for LLVM Kernel Control Flow Integrity (KCFI) and cross-language LLVM
//! KCFI support for the Rust compiler.
//!
//! For more information about LLVM KCFI and cross-language LLVM KCFI support for the Rust compiler,
//! see the tracking issue #123479.
use rustc_middle::ty::{Instance, InstanceDef, ReifyReason, Ty, TyCtxt};
use rustc_target::abi::call::FnAbi;
use std::hash::Hasher;
use twox_hash::XxHash64;
pub use crate::cfi::typeid::{itanium_cxx_abi, TypeIdOptions};
/// Returns a KCFI type metadata identifier for the specified FnAbi.
pub fn typeid_for_fnabi<'tcx>(
tcx: TyCtxt<'tcx>,
fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
options: TypeIdOptions,
) -> u32 {
// A KCFI type metadata identifier is a 32-bit constant produced by taking the lower half of the
// xxHash64 of the type metadata identifier. (See llvm/llvm-project@cff5bef.)
let mut hash: XxHash64 = Default::default();
hash.write(itanium_cxx_abi::typeid_for_fnabi(tcx, fn_abi, options).as_bytes());
hash.finish() as u32
}
/// Returns a KCFI type metadata identifier for the specified Instance.
pub fn typeid_for_instance<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
mut options: TypeIdOptions,
) -> u32 {
// KCFI support for Rust shares most of its implementation with the CFI support, with some key
// differences:
//
// 1. KCFI performs type tests differently and are implemented as different LLVM passes than CFI
// to not require LTO.
// 2. KCFI has the limitation that a function or method may have one type id assigned only.
//
// Because of the limitation listed above (2), the current KCFI implementation (not CFI) does
// reifying of types (i.e., adds shims/trampolines for indirect calls in these cases) for:
//
// * Supporting casting between function items, closures, and Fn trait objects.
// * Supporting methods being cast as function pointers.
//
// This was implemented for KCFI support in #123106 and #123052 (which introduced the
// ReifyReason). The tracking issue for KCFI support for Rust is #123479.
if matches!(instance.def, InstanceDef::ReifyShim(_, Some(ReifyReason::FnPtr))) {
options.insert(TypeIdOptions::USE_CONCRETE_SELF);
}
// A KCFI type metadata identifier is a 32-bit constant produced by taking the lower half of the
// xxHash64 of the type metadata identifier. (See llvm/llvm-project@cff5bef.)
let mut hash: XxHash64 = Default::default();
hash.write(itanium_cxx_abi::typeid_for_instance(tcx, instance, options).as_bytes());
hash.finish() as u32
}

View File

@ -0,0 +1,7 @@
#![feature(let_chains)]
//! Sanitizers support for the Rust compiler.
//!
//! This crate contains the source code for providing support for the sanitizers to the Rust
//! compiler.
pub mod cfi;
pub mod kcfi;

View File

@ -5,7 +5,6 @@ edition = "2021"
[dependencies] [dependencies]
# tidy-alphabetical-start # tidy-alphabetical-start
bitflags = "2.4.1"
punycode = "0.4.0" punycode = "0.4.0"
rustc-demangle = "0.1.21" rustc-demangle = "0.1.21"
rustc_data_structures = { path = "../rustc_data_structures" } rustc_data_structures = { path = "../rustc_data_structures" }
@ -15,7 +14,5 @@ rustc_middle = { path = "../rustc_middle" }
rustc_session = { path = "../rustc_session" } rustc_session = { path = "../rustc_session" }
rustc_span = { path = "../rustc_span" } rustc_span = { path = "../rustc_span" }
rustc_target = { path = "../rustc_target" } rustc_target = { path = "../rustc_target" }
rustc_trait_selection = { path = "../rustc_trait_selection" }
tracing = "0.1" tracing = "0.1"
twox-hash = "1.6.3"
# tidy-alphabetical-end # tidy-alphabetical-end

View File

@ -114,7 +114,6 @@ mod v0;
pub mod errors; pub mod errors;
pub mod test; pub mod test;
pub mod typeid;
/// This function computes the symbol name for the given `instance` and the /// This function computes the symbol name for the given `instance` and the
/// given instantiating crate. That is, if you know that instance X is /// given instantiating crate. That is, if you know that instance X is

View File

@ -1,100 +0,0 @@
/// Type metadata identifiers for LLVM Control Flow Integrity (CFI) and cross-language LLVM CFI
/// support.
///
/// For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
/// see design document in the tracking issue #89653.
use bitflags::bitflags;
use rustc_middle::ty::{Instance, InstanceDef, ReifyReason, Ty, TyCtxt};
use rustc_target::abi::call::FnAbi;
use std::hash::Hasher;
use twox_hash::XxHash64;
bitflags! {
/// Options for typeid_for_fnabi.
#[derive(Clone, Copy, Debug)]
pub struct TypeIdOptions: u32 {
/// Generalizes pointers for compatibility with Clang
/// `-fsanitize-cfi-icall-generalize-pointers` option for cross-language LLVM CFI and KCFI
/// support.
const GENERALIZE_POINTERS = 1;
/// Generalizes repr(C) user-defined type for extern function types with the "C" calling
/// convention (or extern types) for cross-language LLVM CFI and KCFI support.
const GENERALIZE_REPR_C = 2;
/// Normalizes integers for compatibility with Clang
/// `-fsanitize-cfi-icall-experimental-normalize-integers` option for cross-language LLVM
/// CFI and KCFI support.
const NORMALIZE_INTEGERS = 4;
/// Do not perform self type erasure for attaching a secondary type id to methods with their
/// concrete self so they can be used as function pointers.
///
/// (This applies to typeid_for_instance only and should be used to attach a secondary type
/// id to methods during their declaration/definition so they match the type ids returned by
/// either typeid_for_instance or typeid_for_fnabi at call sites during code generation for
/// type membership tests when methods are used as function pointers.)
const USE_CONCRETE_SELF = 8;
}
}
mod typeid_itanium_cxx_abi;
/// Returns a type metadata identifier for the specified FnAbi.
pub fn typeid_for_fnabi<'tcx>(
tcx: TyCtxt<'tcx>,
fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
options: TypeIdOptions,
) -> String {
typeid_itanium_cxx_abi::typeid_for_fnabi(tcx, fn_abi, options)
}
/// Returns a type metadata identifier for the specified Instance.
pub fn typeid_for_instance<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
options: TypeIdOptions,
) -> String {
typeid_itanium_cxx_abi::typeid_for_instance(tcx, instance, options)
}
/// Returns a KCFI type metadata identifier for the specified FnAbi.
pub fn kcfi_typeid_for_fnabi<'tcx>(
tcx: TyCtxt<'tcx>,
fn_abi: &FnAbi<'tcx, Ty<'tcx>>,
options: TypeIdOptions,
) -> u32 {
// A KCFI type metadata identifier is a 32-bit constant produced by taking the lower half of the
// xxHash64 of the type metadata identifier. (See llvm/llvm-project@cff5bef.)
let mut hash: XxHash64 = Default::default();
hash.write(typeid_itanium_cxx_abi::typeid_for_fnabi(tcx, fn_abi, options).as_bytes());
hash.finish() as u32
}
/// Returns a KCFI type metadata identifier for the specified Instance.
pub fn kcfi_typeid_for_instance<'tcx>(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
mut options: TypeIdOptions,
) -> u32 {
// KCFI support for Rust shares most of its implementation with the CFI support, with some key
// differences:
//
// 1. KCFI performs type tests differently and are implemented as different LLVM passes than CFI
// to not require LTO.
// 2. KCFI has the limitation that a function or method may have one type id assigned only.
//
// Because of the limitation listed above (2), the current KCFI implementation (not CFI) does
// reifying of types (i.e., adds shims/trampolines for indirect calls in these cases) for:
//
// * Supporting casting between function items, closures, and Fn trait objects.
// * Supporting methods being cast as function pointers.
//
// This was implemented for KCFI support in #123106 and #123052 (which introduced the
// ReifyReason). The tracking issue for KCFI support for Rust is #123479.
if matches!(instance.def, InstanceDef::ReifyShim(_, Some(ReifyReason::FnPtr))) {
options.insert(TypeIdOptions::USE_CONCRETE_SELF);
}
// A KCFI type metadata identifier is a 32-bit constant produced by taking the lower half of the
// xxHash64 of the type metadata identifier. (See llvm/llvm-project@cff5bef.)
let mut hash: XxHash64 = Default::default();
hash.write(typeid_itanium_cxx_abi::typeid_for_instance(tcx, instance, options).as_bytes());
hash.finish() as u32
}