2023-08-28 22:14:18 +00:00
|
|
|
use std::{collections::BTreeMap, env, sync::atomic::Ordering};
|
2016-09-22 02:14:38 +00:00
|
|
|
|
|
|
|
fn main() {
|
2016-09-26 20:55:11 +00:00
|
|
|
println!("cargo:rerun-if-changed=build.rs");
|
|
|
|
|
|
|
|
let target = env::var("TARGET").unwrap();
|
2018-11-21 20:55:06 +00:00
|
|
|
let cwd = env::current_dir().unwrap();
|
|
|
|
|
|
|
|
println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display());
|
2017-02-06 20:11:09 +00:00
|
|
|
|
2019-09-06 21:20:05 +00:00
|
|
|
// Activate libm's unstable features to make full use of Nightly.
|
2019-09-07 00:02:11 +00:00
|
|
|
println!("cargo:rustc-cfg=feature=\"unstable\"");
|
2019-09-06 21:20:05 +00:00
|
|
|
|
2017-02-06 20:11:09 +00:00
|
|
|
// Emscripten's runtime includes all the builtins
|
|
|
|
if target.contains("emscripten") {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-05-12 07:19:28 +00:00
|
|
|
// OpenBSD provides compiler_rt by default, use it instead of rebuilding it from source
|
|
|
|
if target.contains("openbsd") {
|
|
|
|
println!("cargo:rustc-link-search=native=/usr/lib");
|
2018-07-16 04:17:38 +00:00
|
|
|
println!("cargo:rustc-link-lib=compiler_rt");
|
2018-05-12 07:19:28 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-10-28 17:29:37 +00:00
|
|
|
// Forcibly enable memory intrinsics on wasm & SGX as we don't have a libc to
|
2017-10-25 00:03:39 +00:00
|
|
|
// provide them.
|
2021-10-28 17:29:37 +00:00
|
|
|
if (target.contains("wasm") && !target.contains("wasi"))
|
2019-05-14 21:33:08 +00:00
|
|
|
|| (target.contains("sgx") && target.contains("fortanix"))
|
2020-05-22 21:12:06 +00:00
|
|
|
|| target.contains("-none")
|
|
|
|
|| target.contains("nvptx")
|
2022-06-28 14:46:07 +00:00
|
|
|
|| target.contains("uefi")
|
2023-08-10 05:50:04 +00:00
|
|
|
|| target.contains("xous")
|
2019-05-14 21:33:08 +00:00
|
|
|
{
|
2017-10-25 00:03:39 +00:00
|
|
|
println!("cargo:rustc-cfg=feature=\"mem\"");
|
|
|
|
}
|
|
|
|
|
2021-08-21 02:45:38 +00:00
|
|
|
// These targets have hardware unaligned access support.
|
2021-11-21 06:54:00 +00:00
|
|
|
if target.contains("x86_64")
|
|
|
|
|| target.contains("i686")
|
|
|
|
|| target.contains("aarch64")
|
|
|
|
|| target.contains("bpf")
|
|
|
|
{
|
2021-08-21 02:45:38 +00:00
|
|
|
println!("cargo:rustc-cfg=feature=\"mem-unaligned\"");
|
|
|
|
}
|
|
|
|
|
2016-09-26 20:55:11 +00:00
|
|
|
// NOTE we are going to assume that llvm-target, what determines our codegen option, matches the
|
|
|
|
// target triple. This is usually correct for our built-in targets but can break in presence of
|
|
|
|
// custom targets, which can have arbitrary names.
|
|
|
|
let llvm_target = target.split('-').collect::<Vec<_>>();
|
|
|
|
|
2017-07-07 18:20:04 +00:00
|
|
|
// Build missing intrinsics from compiler-rt C source code. If we're
|
|
|
|
// mangling names though we assume that we're also in test mode so we don't
|
|
|
|
// build anything and we rely on the upstream implementation of compiler-rt
|
|
|
|
// functions
|
2017-10-25 00:03:39 +00:00
|
|
|
if !cfg!(feature = "mangled-names") && cfg!(feature = "c") {
|
2019-05-14 21:29:29 +00:00
|
|
|
// Don't use a C compiler for these targets:
|
|
|
|
//
|
2021-10-28 17:29:37 +00:00
|
|
|
// * wasm - clang for wasm is somewhat hard to come by and it's
|
2019-05-14 21:29:29 +00:00
|
|
|
// unlikely that the C is really that much better than our own Rust.
|
|
|
|
// * nvptx - everything is bitcode, not compatible with mixed C/Rust
|
|
|
|
// * riscv - the rust-lang/rust distribution container doesn't have a C
|
2022-06-09 00:40:15 +00:00
|
|
|
// compiler.
|
|
|
|
if !target.contains("wasm")
|
|
|
|
&& !target.contains("nvptx")
|
|
|
|
&& (!target.starts_with("riscv") || target.contains("xous"))
|
|
|
|
{
|
2017-10-25 00:03:39 +00:00
|
|
|
#[cfg(feature = "c")]
|
2020-02-28 19:01:22 +00:00
|
|
|
c::compile(&llvm_target, &target);
|
2017-10-25 00:03:39 +00:00
|
|
|
}
|
2017-07-07 18:20:04 +00:00
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
// To compile intrinsics.rs for thumb targets, where there is no libc
|
|
|
|
if llvm_target[0].starts_with("thumb") {
|
|
|
|
println!("cargo:rustc-cfg=thumb")
|
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2019-03-07 19:24:15 +00:00
|
|
|
// compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because
|
|
|
|
// these targets do not have full Thumb-2 support but only original Thumb-1.
|
|
|
|
// We have to cfg our code accordingly.
|
|
|
|
if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" {
|
|
|
|
println!("cargo:rustc-cfg=thumb_1")
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
2016-10-31 16:03:46 +00:00
|
|
|
|
2021-05-31 14:32:46 +00:00
|
|
|
// Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This
|
|
|
|
// includes the old androideabi. It is deprecated but it is available as a
|
|
|
|
// rustc target (arm-linux-androideabi).
|
2021-06-03 21:59:34 +00:00
|
|
|
if llvm_target[0] == "armv4t"
|
|
|
|
|| llvm_target[0] == "armv5te"
|
2022-01-27 19:53:50 +00:00
|
|
|
|| target == "arm-linux-androideabi"
|
2021-05-31 14:32:46 +00:00
|
|
|
{
|
2017-12-26 15:01:02 +00:00
|
|
|
println!("cargo:rustc-cfg=kernel_user_helpers")
|
2016-10-31 16:03:46 +00:00
|
|
|
}
|
Port outline-atomics to rust
This has a very long history, summarized in
https://github.com/rust-lang/rust/issues/109064. This port is a very
minimal subset of `aarch64/lse.S` from LLVM's compiler-rt. In
particular, it is missing the following:
1. Any form of runtime dispatch between LL/SC and LSE.
Determining which version of the intrinsics to use
requires one of the following:
i) `getauxval` from glibc. It's unclear whether `compiler_builtins` is
allowed to depend on libc at all, and musl doesn't even support
getauxval. Don't enshrine the requirement "de-facto" by making it
required for outline-atomics.
ii) kernel support. Linux and FreeBSD have limited support, but it
requires an extremely recent kernel version and doesn't work at all under QEMU (https://github.com/rust-lang/rust/issues/109064#issuecomment-1494939904).
Instead, we hard-code LL/SC intrinsics. Users who want LSE support
should use the LLVM compiler-rt (if you're building from source in
rust-lang/rust, make sure you have `src/llvm-project` checked out
locally. the goal is to soon add a new `optimized-compiler-builtins`
option so this is easier to discover).
2. The global `___aarch64_have_lse_atomics` CTOR, required to do runtime
dispatch. Thom Chiviolani has this to say about global CTORs:
> static ctors are problems because we are pretty eager about dead code elim
> in general if you have a module that isnt directly reference we will probably not have its static ctors
> also, while llvm has a super robust way to have a static ctor (theres s special "appending global" to use for c++), we dont use that and just have people make a #[used] static in a special section
> 1. the robust way kinda requires rust knowing that the argument is a static ctor (maybe a #[rustc_static_ctor] attribute). it also would be... finnicky, since on windows we actually care beyond being a static ctor, that we run as part in a specific group of ctors, which means a very specific section (one for TLS and the other for, uh, i dont remember)
> 2. we still actually have to codegen the cgu that isn't referenced. but maybe we could remember that it has that attribute and use that
So while this is possible in theory, it's decidedly non-trivial, and
needs invasive changes to rust itself. In any case, it doesn't matter
until we decide the story around libc.
3. The 16-byte (i128) version of compare_and_swap. This wouldn't be
*too* hard to add, but it would be hard to test. The way I tested the
existing code was not just with unit tests but also by loading it as a
path dependency and running `x test core` - the latter caught several
bugs the unit tests didn't catch (because I originally wrote the tests
wrong). So I am slightly nervous about adding a 16-byte version that is
much more poorly tested than the other intrinsics.
2023-06-25 21:07:00 +00:00
|
|
|
|
2023-07-08 05:18:27 +00:00
|
|
|
if llvm_target[0].starts_with("aarch64") {
|
Port outline-atomics to rust
This has a very long history, summarized in
https://github.com/rust-lang/rust/issues/109064. This port is a very
minimal subset of `aarch64/lse.S` from LLVM's compiler-rt. In
particular, it is missing the following:
1. Any form of runtime dispatch between LL/SC and LSE.
Determining which version of the intrinsics to use
requires one of the following:
i) `getauxval` from glibc. It's unclear whether `compiler_builtins` is
allowed to depend on libc at all, and musl doesn't even support
getauxval. Don't enshrine the requirement "de-facto" by making it
required for outline-atomics.
ii) kernel support. Linux and FreeBSD have limited support, but it
requires an extremely recent kernel version and doesn't work at all under QEMU (https://github.com/rust-lang/rust/issues/109064#issuecomment-1494939904).
Instead, we hard-code LL/SC intrinsics. Users who want LSE support
should use the LLVM compiler-rt (if you're building from source in
rust-lang/rust, make sure you have `src/llvm-project` checked out
locally. the goal is to soon add a new `optimized-compiler-builtins`
option so this is easier to discover).
2. The global `___aarch64_have_lse_atomics` CTOR, required to do runtime
dispatch. Thom Chiviolani has this to say about global CTORs:
> static ctors are problems because we are pretty eager about dead code elim
> in general if you have a module that isnt directly reference we will probably not have its static ctors
> also, while llvm has a super robust way to have a static ctor (theres s special "appending global" to use for c++), we dont use that and just have people make a #[used] static in a special section
> 1. the robust way kinda requires rust knowing that the argument is a static ctor (maybe a #[rustc_static_ctor] attribute). it also would be... finnicky, since on windows we actually care beyond being a static ctor, that we run as part in a specific group of ctors, which means a very specific section (one for TLS and the other for, uh, i dont remember)
> 2. we still actually have to codegen the cgu that isn't referenced. but maybe we could remember that it has that attribute and use that
So while this is possible in theory, it's decidedly non-trivial, and
needs invasive changes to rust itself. In any case, it doesn't matter
until we decide the story around libc.
3. The 16-byte (i128) version of compare_and_swap. This wouldn't be
*too* hard to add, but it would be hard to test. The way I tested the
existing code was not just with unit tests but also by loading it as a
path dependency and running `x test core` - the latter caught several
bugs the unit tests didn't catch (because I originally wrote the tests
wrong). So I am slightly nervous about adding a 16-byte version that is
much more poorly tested than the other intrinsics.
2023-06-25 21:07:00 +00:00
|
|
|
generate_aarch64_outlined_atomics();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn aarch64_symbol(ordering: Ordering) -> &'static str {
|
|
|
|
match ordering {
|
|
|
|
Ordering::Relaxed => "relax",
|
|
|
|
Ordering::Acquire => "acq",
|
|
|
|
Ordering::Release => "rel",
|
|
|
|
Ordering::AcqRel => "acq_rel",
|
|
|
|
_ => panic!("unknown symbol for {:?}", ordering),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// The `concat_idents` macro is extremely annoying and doesn't allow us to define new items.
|
|
|
|
/// Define them from the build script instead.
|
|
|
|
/// Note that the majority of the code is still defined in `aarch64.rs` through inline macros.
|
|
|
|
fn generate_aarch64_outlined_atomics() {
|
|
|
|
use std::fmt::Write;
|
|
|
|
// #[macro_export] so that we can use this in tests
|
|
|
|
let gen_macro =
|
|
|
|
|name| format!("#[macro_export] macro_rules! foreach_{name} {{ ($macro:path) => {{\n");
|
|
|
|
|
|
|
|
// Generate different macros for add/clr/eor/set so that we can test them separately.
|
|
|
|
let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"];
|
2023-08-28 22:14:18 +00:00
|
|
|
let mut macros = BTreeMap::new();
|
Port outline-atomics to rust
This has a very long history, summarized in
https://github.com/rust-lang/rust/issues/109064. This port is a very
minimal subset of `aarch64/lse.S` from LLVM's compiler-rt. In
particular, it is missing the following:
1. Any form of runtime dispatch between LL/SC and LSE.
Determining which version of the intrinsics to use
requires one of the following:
i) `getauxval` from glibc. It's unclear whether `compiler_builtins` is
allowed to depend on libc at all, and musl doesn't even support
getauxval. Don't enshrine the requirement "de-facto" by making it
required for outline-atomics.
ii) kernel support. Linux and FreeBSD have limited support, but it
requires an extremely recent kernel version and doesn't work at all under QEMU (https://github.com/rust-lang/rust/issues/109064#issuecomment-1494939904).
Instead, we hard-code LL/SC intrinsics. Users who want LSE support
should use the LLVM compiler-rt (if you're building from source in
rust-lang/rust, make sure you have `src/llvm-project` checked out
locally. the goal is to soon add a new `optimized-compiler-builtins`
option so this is easier to discover).
2. The global `___aarch64_have_lse_atomics` CTOR, required to do runtime
dispatch. Thom Chiviolani has this to say about global CTORs:
> static ctors are problems because we are pretty eager about dead code elim
> in general if you have a module that isnt directly reference we will probably not have its static ctors
> also, while llvm has a super robust way to have a static ctor (theres s special "appending global" to use for c++), we dont use that and just have people make a #[used] static in a special section
> 1. the robust way kinda requires rust knowing that the argument is a static ctor (maybe a #[rustc_static_ctor] attribute). it also would be... finnicky, since on windows we actually care beyond being a static ctor, that we run as part in a specific group of ctors, which means a very specific section (one for TLS and the other for, uh, i dont remember)
> 2. we still actually have to codegen the cgu that isn't referenced. but maybe we could remember that it has that attribute and use that
So while this is possible in theory, it's decidedly non-trivial, and
needs invasive changes to rust itself. In any case, it doesn't matter
until we decide the story around libc.
3. The 16-byte (i128) version of compare_and_swap. This wouldn't be
*too* hard to add, but it would be hard to test. The way I tested the
existing code was not just with unit tests but also by loading it as a
path dependency and running `x test core` - the latter caught several
bugs the unit tests didn't catch (because I originally wrote the tests
wrong). So I am slightly nervous about adding a 16-byte version that is
much more poorly tested than the other intrinsics.
2023-06-25 21:07:00 +00:00
|
|
|
for sym in sym_names {
|
|
|
|
macros.insert(sym, gen_macro(sym));
|
|
|
|
}
|
|
|
|
|
2023-06-26 13:54:47 +00:00
|
|
|
// Only CAS supports 16 bytes, and it has a different implementation that uses a different macro.
|
|
|
|
let mut cas16 = gen_macro("cas16");
|
|
|
|
|
Port outline-atomics to rust
This has a very long history, summarized in
https://github.com/rust-lang/rust/issues/109064. This port is a very
minimal subset of `aarch64/lse.S` from LLVM's compiler-rt. In
particular, it is missing the following:
1. Any form of runtime dispatch between LL/SC and LSE.
Determining which version of the intrinsics to use
requires one of the following:
i) `getauxval` from glibc. It's unclear whether `compiler_builtins` is
allowed to depend on libc at all, and musl doesn't even support
getauxval. Don't enshrine the requirement "de-facto" by making it
required for outline-atomics.
ii) kernel support. Linux and FreeBSD have limited support, but it
requires an extremely recent kernel version and doesn't work at all under QEMU (https://github.com/rust-lang/rust/issues/109064#issuecomment-1494939904).
Instead, we hard-code LL/SC intrinsics. Users who want LSE support
should use the LLVM compiler-rt (if you're building from source in
rust-lang/rust, make sure you have `src/llvm-project` checked out
locally. the goal is to soon add a new `optimized-compiler-builtins`
option so this is easier to discover).
2. The global `___aarch64_have_lse_atomics` CTOR, required to do runtime
dispatch. Thom Chiviolani has this to say about global CTORs:
> static ctors are problems because we are pretty eager about dead code elim
> in general if you have a module that isnt directly reference we will probably not have its static ctors
> also, while llvm has a super robust way to have a static ctor (theres s special "appending global" to use for c++), we dont use that and just have people make a #[used] static in a special section
> 1. the robust way kinda requires rust knowing that the argument is a static ctor (maybe a #[rustc_static_ctor] attribute). it also would be... finnicky, since on windows we actually care beyond being a static ctor, that we run as part in a specific group of ctors, which means a very specific section (one for TLS and the other for, uh, i dont remember)
> 2. we still actually have to codegen the cgu that isn't referenced. but maybe we could remember that it has that attribute and use that
So while this is possible in theory, it's decidedly non-trivial, and
needs invasive changes to rust itself. In any case, it doesn't matter
until we decide the story around libc.
3. The 16-byte (i128) version of compare_and_swap. This wouldn't be
*too* hard to add, but it would be hard to test. The way I tested the
existing code was not just with unit tests but also by loading it as a
path dependency and running `x test core` - the latter caught several
bugs the unit tests didn't catch (because I originally wrote the tests
wrong). So I am slightly nervous about adding a 16-byte version that is
much more poorly tested than the other intrinsics.
2023-06-25 21:07:00 +00:00
|
|
|
for ordering in [
|
|
|
|
Ordering::Relaxed,
|
|
|
|
Ordering::Acquire,
|
|
|
|
Ordering::Release,
|
|
|
|
Ordering::AcqRel,
|
|
|
|
] {
|
|
|
|
let sym_ordering = aarch64_symbol(ordering);
|
2023-06-26 13:54:47 +00:00
|
|
|
for size in [1, 2, 4, 8] {
|
Port outline-atomics to rust
This has a very long history, summarized in
https://github.com/rust-lang/rust/issues/109064. This port is a very
minimal subset of `aarch64/lse.S` from LLVM's compiler-rt. In
particular, it is missing the following:
1. Any form of runtime dispatch between LL/SC and LSE.
Determining which version of the intrinsics to use
requires one of the following:
i) `getauxval` from glibc. It's unclear whether `compiler_builtins` is
allowed to depend on libc at all, and musl doesn't even support
getauxval. Don't enshrine the requirement "de-facto" by making it
required for outline-atomics.
ii) kernel support. Linux and FreeBSD have limited support, but it
requires an extremely recent kernel version and doesn't work at all under QEMU (https://github.com/rust-lang/rust/issues/109064#issuecomment-1494939904).
Instead, we hard-code LL/SC intrinsics. Users who want LSE support
should use the LLVM compiler-rt (if you're building from source in
rust-lang/rust, make sure you have `src/llvm-project` checked out
locally. the goal is to soon add a new `optimized-compiler-builtins`
option so this is easier to discover).
2. The global `___aarch64_have_lse_atomics` CTOR, required to do runtime
dispatch. Thom Chiviolani has this to say about global CTORs:
> static ctors are problems because we are pretty eager about dead code elim
> in general if you have a module that isnt directly reference we will probably not have its static ctors
> also, while llvm has a super robust way to have a static ctor (theres s special "appending global" to use for c++), we dont use that and just have people make a #[used] static in a special section
> 1. the robust way kinda requires rust knowing that the argument is a static ctor (maybe a #[rustc_static_ctor] attribute). it also would be... finnicky, since on windows we actually care beyond being a static ctor, that we run as part in a specific group of ctors, which means a very specific section (one for TLS and the other for, uh, i dont remember)
> 2. we still actually have to codegen the cgu that isn't referenced. but maybe we could remember that it has that attribute and use that
So while this is possible in theory, it's decidedly non-trivial, and
needs invasive changes to rust itself. In any case, it doesn't matter
until we decide the story around libc.
3. The 16-byte (i128) version of compare_and_swap. This wouldn't be
*too* hard to add, but it would be hard to test. The way I tested the
existing code was not just with unit tests but also by loading it as a
path dependency and running `x test core` - the latter caught several
bugs the unit tests didn't catch (because I originally wrote the tests
wrong). So I am slightly nervous about adding a 16-byte version that is
much more poorly tested than the other intrinsics.
2023-06-25 21:07:00 +00:00
|
|
|
for (sym, macro_) in &mut macros {
|
|
|
|
let name = format!("__aarch64_{sym}{size}_{sym_ordering}");
|
|
|
|
writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap();
|
|
|
|
}
|
|
|
|
}
|
2023-06-26 13:54:47 +00:00
|
|
|
let name = format!("__aarch64_cas16_{sym_ordering}");
|
|
|
|
writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap();
|
Port outline-atomics to rust
This has a very long history, summarized in
https://github.com/rust-lang/rust/issues/109064. This port is a very
minimal subset of `aarch64/lse.S` from LLVM's compiler-rt. In
particular, it is missing the following:
1. Any form of runtime dispatch between LL/SC and LSE.
Determining which version of the intrinsics to use
requires one of the following:
i) `getauxval` from glibc. It's unclear whether `compiler_builtins` is
allowed to depend on libc at all, and musl doesn't even support
getauxval. Don't enshrine the requirement "de-facto" by making it
required for outline-atomics.
ii) kernel support. Linux and FreeBSD have limited support, but it
requires an extremely recent kernel version and doesn't work at all under QEMU (https://github.com/rust-lang/rust/issues/109064#issuecomment-1494939904).
Instead, we hard-code LL/SC intrinsics. Users who want LSE support
should use the LLVM compiler-rt (if you're building from source in
rust-lang/rust, make sure you have `src/llvm-project` checked out
locally. the goal is to soon add a new `optimized-compiler-builtins`
option so this is easier to discover).
2. The global `___aarch64_have_lse_atomics` CTOR, required to do runtime
dispatch. Thom Chiviolani has this to say about global CTORs:
> static ctors are problems because we are pretty eager about dead code elim
> in general if you have a module that isnt directly reference we will probably not have its static ctors
> also, while llvm has a super robust way to have a static ctor (theres s special "appending global" to use for c++), we dont use that and just have people make a #[used] static in a special section
> 1. the robust way kinda requires rust knowing that the argument is a static ctor (maybe a #[rustc_static_ctor] attribute). it also would be... finnicky, since on windows we actually care beyond being a static ctor, that we run as part in a specific group of ctors, which means a very specific section (one for TLS and the other for, uh, i dont remember)
> 2. we still actually have to codegen the cgu that isn't referenced. but maybe we could remember that it has that attribute and use that
So while this is possible in theory, it's decidedly non-trivial, and
needs invasive changes to rust itself. In any case, it doesn't matter
until we decide the story around libc.
3. The 16-byte (i128) version of compare_and_swap. This wouldn't be
*too* hard to add, but it would be hard to test. The way I tested the
existing code was not just with unit tests but also by loading it as a
path dependency and running `x test core` - the latter caught several
bugs the unit tests didn't catch (because I originally wrote the tests
wrong). So I am slightly nervous about adding a 16-byte version that is
much more poorly tested than the other intrinsics.
2023-06-25 21:07:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut buf = String::new();
|
2023-06-26 13:54:47 +00:00
|
|
|
for macro_def in macros.values().chain(std::iter::once(&cas16)) {
|
Port outline-atomics to rust
This has a very long history, summarized in
https://github.com/rust-lang/rust/issues/109064. This port is a very
minimal subset of `aarch64/lse.S` from LLVM's compiler-rt. In
particular, it is missing the following:
1. Any form of runtime dispatch between LL/SC and LSE.
Determining which version of the intrinsics to use
requires one of the following:
i) `getauxval` from glibc. It's unclear whether `compiler_builtins` is
allowed to depend on libc at all, and musl doesn't even support
getauxval. Don't enshrine the requirement "de-facto" by making it
required for outline-atomics.
ii) kernel support. Linux and FreeBSD have limited support, but it
requires an extremely recent kernel version and doesn't work at all under QEMU (https://github.com/rust-lang/rust/issues/109064#issuecomment-1494939904).
Instead, we hard-code LL/SC intrinsics. Users who want LSE support
should use the LLVM compiler-rt (if you're building from source in
rust-lang/rust, make sure you have `src/llvm-project` checked out
locally. the goal is to soon add a new `optimized-compiler-builtins`
option so this is easier to discover).
2. The global `___aarch64_have_lse_atomics` CTOR, required to do runtime
dispatch. Thom Chiviolani has this to say about global CTORs:
> static ctors are problems because we are pretty eager about dead code elim
> in general if you have a module that isnt directly reference we will probably not have its static ctors
> also, while llvm has a super robust way to have a static ctor (theres s special "appending global" to use for c++), we dont use that and just have people make a #[used] static in a special section
> 1. the robust way kinda requires rust knowing that the argument is a static ctor (maybe a #[rustc_static_ctor] attribute). it also would be... finnicky, since on windows we actually care beyond being a static ctor, that we run as part in a specific group of ctors, which means a very specific section (one for TLS and the other for, uh, i dont remember)
> 2. we still actually have to codegen the cgu that isn't referenced. but maybe we could remember that it has that attribute and use that
So while this is possible in theory, it's decidedly non-trivial, and
needs invasive changes to rust itself. In any case, it doesn't matter
until we decide the story around libc.
3. The 16-byte (i128) version of compare_and_swap. This wouldn't be
*too* hard to add, but it would be hard to test. The way I tested the
existing code was not just with unit tests but also by loading it as a
path dependency and running `x test core` - the latter caught several
bugs the unit tests didn't catch (because I originally wrote the tests
wrong). So I am slightly nervous about adding a 16-byte version that is
much more poorly tested than the other intrinsics.
2023-06-25 21:07:00 +00:00
|
|
|
buf += macro_def;
|
2023-08-28 22:14:18 +00:00
|
|
|
buf += "}; }\n";
|
Port outline-atomics to rust
This has a very long history, summarized in
https://github.com/rust-lang/rust/issues/109064. This port is a very
minimal subset of `aarch64/lse.S` from LLVM's compiler-rt. In
particular, it is missing the following:
1. Any form of runtime dispatch between LL/SC and LSE.
Determining which version of the intrinsics to use
requires one of the following:
i) `getauxval` from glibc. It's unclear whether `compiler_builtins` is
allowed to depend on libc at all, and musl doesn't even support
getauxval. Don't enshrine the requirement "de-facto" by making it
required for outline-atomics.
ii) kernel support. Linux and FreeBSD have limited support, but it
requires an extremely recent kernel version and doesn't work at all under QEMU (https://github.com/rust-lang/rust/issues/109064#issuecomment-1494939904).
Instead, we hard-code LL/SC intrinsics. Users who want LSE support
should use the LLVM compiler-rt (if you're building from source in
rust-lang/rust, make sure you have `src/llvm-project` checked out
locally. the goal is to soon add a new `optimized-compiler-builtins`
option so this is easier to discover).
2. The global `___aarch64_have_lse_atomics` CTOR, required to do runtime
dispatch. Thom Chiviolani has this to say about global CTORs:
> static ctors are problems because we are pretty eager about dead code elim
> in general if you have a module that isnt directly reference we will probably not have its static ctors
> also, while llvm has a super robust way to have a static ctor (theres s special "appending global" to use for c++), we dont use that and just have people make a #[used] static in a special section
> 1. the robust way kinda requires rust knowing that the argument is a static ctor (maybe a #[rustc_static_ctor] attribute). it also would be... finnicky, since on windows we actually care beyond being a static ctor, that we run as part in a specific group of ctors, which means a very specific section (one for TLS and the other for, uh, i dont remember)
> 2. we still actually have to codegen the cgu that isn't referenced. but maybe we could remember that it has that attribute and use that
So while this is possible in theory, it's decidedly non-trivial, and
needs invasive changes to rust itself. In any case, it doesn't matter
until we decide the story around libc.
3. The 16-byte (i128) version of compare_and_swap. This wouldn't be
*too* hard to add, but it would be hard to test. The way I tested the
existing code was not just with unit tests but also by loading it as a
path dependency and running `x test core` - the latter caught several
bugs the unit tests didn't catch (because I originally wrote the tests
wrong). So I am slightly nervous about adding a 16-byte version that is
much more poorly tested than the other intrinsics.
2023-06-25 21:07:00 +00:00
|
|
|
}
|
|
|
|
let dst = std::env::var("OUT_DIR").unwrap() + "/outlined_atomics.rs";
|
|
|
|
std::fs::write(dst, buf).unwrap();
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
#[cfg(feature = "c")]
|
|
|
|
mod c {
|
2017-09-23 04:30:12 +00:00
|
|
|
extern crate cc;
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2021-07-14 04:11:12 +00:00
|
|
|
use std::collections::{BTreeMap, HashSet};
|
2017-04-10 16:19:16 +00:00
|
|
|
use std::env;
|
2022-11-16 22:52:46 +00:00
|
|
|
use std::fs::{self, File};
|
2021-11-28 12:50:21 +00:00
|
|
|
use std::io::Write;
|
2021-03-02 23:50:09 +00:00
|
|
|
use std::path::{Path, PathBuf};
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
struct Sources {
|
|
|
|
// SYMBOL -> PATH TO SOURCE
|
|
|
|
map: BTreeMap<&'static str, &'static str>,
|
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
impl Sources {
|
|
|
|
fn new() -> Sources {
|
2019-05-14 21:33:08 +00:00
|
|
|
Sources {
|
|
|
|
map: BTreeMap::new(),
|
|
|
|
}
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
|
|
|
|
2019-05-15 19:57:36 +00:00
|
|
|
fn extend(&mut self, sources: &[(&'static str, &'static str)]) {
|
2017-04-10 16:19:16 +00:00
|
|
|
// NOTE Some intrinsics have both a generic implementation (e.g.
|
|
|
|
// `floatdidf.c`) and an arch optimized implementation
|
|
|
|
// (`x86_64/floatdidf.c`). In those cases, we keep the arch optimized
|
|
|
|
// implementation and discard the generic implementation. If we don't
|
|
|
|
// and keep both implementations, the linker will yell at us about
|
|
|
|
// duplicate symbols!
|
2019-05-15 19:57:36 +00:00
|
|
|
for (symbol, src) in sources {
|
2017-04-10 16:19:16 +00:00
|
|
|
if src.contains("/") {
|
|
|
|
// Arch-optimized implementation (preferred)
|
|
|
|
self.map.insert(symbol, src);
|
|
|
|
} else {
|
|
|
|
// Generic implementation
|
|
|
|
if !self.map.contains_key(symbol) {
|
|
|
|
self.map.insert(symbol, src);
|
|
|
|
}
|
|
|
|
}
|
2017-02-19 20:49:59 +00:00
|
|
|
}
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
fn remove(&mut self, symbols: &[&str]) {
|
|
|
|
for symbol in symbols {
|
|
|
|
self.map.remove(*symbol).unwrap();
|
2016-09-26 20:55:11 +00:00
|
|
|
}
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
|
|
|
}
|
2017-02-19 20:49:59 +00:00
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
/// Compile intrinsics from the compiler-rt C source code
|
2020-02-28 19:01:22 +00:00
|
|
|
pub fn compile(llvm_target: &[&str], target: &String) {
|
2017-04-10 16:19:16 +00:00
|
|
|
let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
|
|
|
|
let target_env = env::var("CARGO_CFG_TARGET_ENV").unwrap();
|
|
|
|
let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap();
|
|
|
|
let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap();
|
2020-02-28 19:01:22 +00:00
|
|
|
let mut consider_float_intrinsics = true;
|
2017-09-23 04:30:12 +00:00
|
|
|
let cfg = &mut cc::Build::new();
|
2017-09-07 05:49:34 +00:00
|
|
|
|
2020-02-28 19:01:22 +00:00
|
|
|
// AArch64 GCCs exit with an error condition when they encounter any kind of floating point
|
|
|
|
// code if the `nofp` and/or `nosimd` compiler flags have been set.
|
|
|
|
//
|
|
|
|
// Therefore, evaluate if those flags are present and set a boolean that causes any
|
|
|
|
// compiler-rt intrinsics that contain floating point source to be excluded for this target.
|
|
|
|
if target_arch == "aarch64" {
|
|
|
|
let cflags_key = String::from("CFLAGS_") + &(target.to_owned().replace("-", "_"));
|
|
|
|
if let Ok(cflags_value) = env::var(cflags_key) {
|
|
|
|
if cflags_value.contains("+nofp") || cflags_value.contains("+nosimd") {
|
|
|
|
consider_float_intrinsics = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-07 05:49:34 +00:00
|
|
|
cfg.warnings(false);
|
2017-04-10 16:19:16 +00:00
|
|
|
|
|
|
|
if target_env == "msvc" {
|
|
|
|
// Don't pull in extra libraries on MSVC
|
|
|
|
cfg.flag("/Zl");
|
|
|
|
|
|
|
|
// Emulate C99 and C++11's __func__ for MSVC prior to 2013 CTP
|
|
|
|
cfg.define("__func__", Some("__FUNCTION__"));
|
|
|
|
} else {
|
|
|
|
// Turn off various features of gcc and such, mostly copying
|
|
|
|
// compiler-rt's build system already
|
|
|
|
cfg.flag("-fno-builtin");
|
|
|
|
cfg.flag("-fvisibility=hidden");
|
|
|
|
cfg.flag("-ffreestanding");
|
2017-11-27 21:13:22 +00:00
|
|
|
// Avoid the following warning appearing once **per file**:
|
|
|
|
// clang: warning: optimization flag '-fomit-frame-pointer' is not supported for target 'armv7' [-Wignored-optimization-argument]
|
|
|
|
//
|
|
|
|
// Note that compiler-rt's build system also checks
|
|
|
|
//
|
|
|
|
// `check_cxx_compiler_flag(-fomit-frame-pointer COMPILER_RT_HAS_FOMIT_FRAME_POINTER_FLAG)`
|
|
|
|
//
|
|
|
|
// in https://github.com/rust-lang/compiler-rt/blob/c8fbcb3/cmake/config-ix.cmake#L19.
|
|
|
|
cfg.flag_if_supported("-fomit-frame-pointer");
|
2017-04-10 16:19:16 +00:00
|
|
|
cfg.define("VISIBILITY_HIDDEN", None);
|
|
|
|
}
|
2017-02-19 20:49:59 +00:00
|
|
|
|
2022-11-16 22:52:46 +00:00
|
|
|
// int_util.c tries to include stdlib.h if `_WIN32` is defined,
|
|
|
|
// which it is when compiling UEFI targets with clang. This is
|
|
|
|
// at odds with compiling with `-ffreestanding`, as the header
|
|
|
|
// may be incompatible or not present. Create a minimal stub
|
|
|
|
// header to use instead.
|
|
|
|
if target_os == "uefi" {
|
|
|
|
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
|
|
|
|
let include_dir = out_dir.join("include");
|
|
|
|
if !include_dir.exists() {
|
|
|
|
fs::create_dir(&include_dir).unwrap();
|
|
|
|
}
|
|
|
|
fs::write(include_dir.join("stdlib.h"), "#include <stddef.h>").unwrap();
|
|
|
|
cfg.flag(&format!("-I{}", include_dir.to_str().unwrap()));
|
|
|
|
}
|
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
let mut sources = Sources::new();
|
2019-05-14 21:33:08 +00:00
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("__absvdi2", "absvdi2.c"),
|
|
|
|
("__absvsi2", "absvsi2.c"),
|
|
|
|
("__addvdi3", "addvdi3.c"),
|
|
|
|
("__addvsi3", "addvsi3.c"),
|
|
|
|
("__clzdi2", "clzdi2.c"),
|
|
|
|
("__clzsi2", "clzsi2.c"),
|
|
|
|
("__cmpdi2", "cmpdi2.c"),
|
|
|
|
("__ctzdi2", "ctzdi2.c"),
|
|
|
|
("__ctzsi2", "ctzsi2.c"),
|
|
|
|
("__int_util", "int_util.c"),
|
|
|
|
("__mulvdi3", "mulvdi3.c"),
|
|
|
|
("__mulvsi3", "mulvsi3.c"),
|
|
|
|
("__negdi2", "negdi2.c"),
|
|
|
|
("__negvdi2", "negvdi2.c"),
|
|
|
|
("__negvsi2", "negvsi2.c"),
|
|
|
|
("__paritydi2", "paritydi2.c"),
|
|
|
|
("__paritysi2", "paritysi2.c"),
|
|
|
|
("__popcountdi2", "popcountdi2.c"),
|
|
|
|
("__popcountsi2", "popcountsi2.c"),
|
|
|
|
("__subvdi3", "subvdi3.c"),
|
|
|
|
("__subvsi3", "subvsi3.c"),
|
|
|
|
("__ucmpdi2", "ucmpdi2.c"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2017-04-10 16:19:16 +00:00
|
|
|
|
2020-02-28 19:01:22 +00:00
|
|
|
if consider_float_intrinsics {
|
|
|
|
sources.extend(&[
|
|
|
|
("__divdc3", "divdc3.c"),
|
|
|
|
("__divsc3", "divsc3.c"),
|
|
|
|
("__divxc3", "divxc3.c"),
|
|
|
|
("__extendhfsf2", "extendhfsf2.c"),
|
|
|
|
("__muldc3", "muldc3.c"),
|
|
|
|
("__mulsc3", "mulsc3.c"),
|
|
|
|
("__mulxc3", "mulxc3.c"),
|
|
|
|
("__negdf2", "negdf2.c"),
|
|
|
|
("__negsf2", "negsf2.c"),
|
|
|
|
("__powixf2", "powixf2.c"),
|
|
|
|
("__truncdfhf2", "truncdfhf2.c"),
|
|
|
|
("__truncsfhf2", "truncsfhf2.c"),
|
|
|
|
]);
|
|
|
|
}
|
|
|
|
|
2017-07-03 21:57:42 +00:00
|
|
|
// When compiling in rustbuild (the rust-lang/rust repo) this library
|
|
|
|
// also needs to satisfy intrinsics that jemalloc or C in general may
|
|
|
|
// need, so include a few more that aren't typically needed by
|
|
|
|
// LLVM/Rust.
|
2017-09-12 23:09:43 +00:00
|
|
|
if cfg!(feature = "rustbuild") {
|
2019-05-15 19:57:36 +00:00
|
|
|
sources.extend(&[("__ffsdi2", "ffsdi2.c")]);
|
2017-09-12 23:09:43 +00:00
|
|
|
}
|
2017-07-03 21:57:42 +00:00
|
|
|
|
2017-09-17 22:24:36 +00:00
|
|
|
// On iOS and 32-bit OSX these are all just empty intrinsics, no need to
|
|
|
|
// include them.
|
2022-03-08 10:18:07 +00:00
|
|
|
if target_os != "ios"
|
|
|
|
&& target_os != "watchos"
|
2023-10-06 21:35:20 +00:00
|
|
|
&& target_os != "tvos"
|
2022-03-08 10:18:07 +00:00
|
|
|
&& (target_vendor != "apple" || target_arch != "x86")
|
|
|
|
{
|
2019-05-14 21:33:08 +00:00
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("__absvti2", "absvti2.c"),
|
|
|
|
("__addvti3", "addvti3.c"),
|
|
|
|
("__clzti2", "clzti2.c"),
|
|
|
|
("__cmpti2", "cmpti2.c"),
|
|
|
|
("__ctzti2", "ctzti2.c"),
|
|
|
|
("__ffsti2", "ffsti2.c"),
|
|
|
|
("__mulvti3", "mulvti3.c"),
|
|
|
|
("__negti2", "negti2.c"),
|
|
|
|
("__parityti2", "parityti2.c"),
|
|
|
|
("__popcountti2", "popcountti2.c"),
|
|
|
|
("__subvti3", "subvti3.c"),
|
|
|
|
("__ucmpti2", "ucmpti2.c"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2020-02-28 19:01:22 +00:00
|
|
|
|
|
|
|
if consider_float_intrinsics {
|
|
|
|
sources.extend(&[("__negvti2", "negvti2.c")]);
|
|
|
|
}
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
if target_vendor == "apple" {
|
2019-05-14 21:33:08 +00:00
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("atomic_flag_clear", "atomic_flag_clear.c"),
|
|
|
|
("atomic_flag_clear_explicit", "atomic_flag_clear_explicit.c"),
|
|
|
|
("atomic_flag_test_and_set", "atomic_flag_test_and_set.c"),
|
|
|
|
(
|
|
|
|
"atomic_flag_test_and_set_explicit",
|
|
|
|
"atomic_flag_test_and_set_explicit.c",
|
|
|
|
),
|
|
|
|
("atomic_signal_fence", "atomic_signal_fence.c"),
|
|
|
|
("atomic_thread_fence", "atomic_thread_fence.c"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
if target_env == "msvc" {
|
|
|
|
if target_arch == "x86_64" {
|
2022-05-20 14:25:18 +00:00
|
|
|
sources.extend(&[("__floatdixf", "x86_64/floatdixf.c")]);
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
|
|
|
} else {
|
2017-06-24 04:09:24 +00:00
|
|
|
// None of these seem to be used on x86_64 windows, and they've all
|
|
|
|
// got the wrong ABI anyway, so we want to avoid them.
|
|
|
|
if target_os != "windows" {
|
|
|
|
if target_arch == "x86_64" {
|
2019-05-14 21:33:08 +00:00
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("__floatdixf", "x86_64/floatdixf.c"),
|
|
|
|
("__floatundixf", "x86_64/floatundixf.S"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2017-06-24 04:09:24 +00:00
|
|
|
}
|
2017-02-19 20:49:59 +00:00
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
if target_arch == "x86" {
|
2019-05-14 21:33:08 +00:00
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("__ashldi3", "i386/ashldi3.S"),
|
|
|
|
("__ashrdi3", "i386/ashrdi3.S"),
|
|
|
|
("__divdi3", "i386/divdi3.S"),
|
|
|
|
("__floatdixf", "i386/floatdixf.S"),
|
|
|
|
("__floatundixf", "i386/floatundixf.S"),
|
|
|
|
("__lshrdi3", "i386/lshrdi3.S"),
|
|
|
|
("__moddi3", "i386/moddi3.S"),
|
|
|
|
("__muldi3", "i386/muldi3.S"),
|
|
|
|
("__udivdi3", "i386/udivdi3.S"),
|
|
|
|
("__umoddi3", "i386/umoddi3.S"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2017-02-19 20:49:59 +00:00
|
|
|
}
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2022-03-08 10:18:07 +00:00
|
|
|
if target_arch == "arm"
|
|
|
|
&& target_os != "ios"
|
|
|
|
&& target_os != "watchos"
|
2023-10-06 21:35:20 +00:00
|
|
|
&& target_os != "tvos"
|
2022-03-08 10:18:07 +00:00
|
|
|
&& target_env != "msvc"
|
|
|
|
{
|
2019-05-14 21:33:08 +00:00
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("__aeabi_div0", "arm/aeabi_div0.c"),
|
|
|
|
("__aeabi_drsub", "arm/aeabi_drsub.c"),
|
|
|
|
("__aeabi_frsub", "arm/aeabi_frsub.c"),
|
|
|
|
("__bswapdi2", "arm/bswapdi2.S"),
|
|
|
|
("__bswapsi2", "arm/bswapsi2.S"),
|
|
|
|
("__clzdi2", "arm/clzdi2.S"),
|
|
|
|
("__clzsi2", "arm/clzsi2.S"),
|
|
|
|
("__divmodsi4", "arm/divmodsi4.S"),
|
|
|
|
("__divsi3", "arm/divsi3.S"),
|
|
|
|
("__modsi3", "arm/modsi3.S"),
|
|
|
|
("__switch16", "arm/switch16.S"),
|
|
|
|
("__switch32", "arm/switch32.S"),
|
|
|
|
("__switch8", "arm/switch8.S"),
|
|
|
|
("__switchu8", "arm/switchu8.S"),
|
|
|
|
("__sync_synchronize", "arm/sync_synchronize.S"),
|
|
|
|
("__udivmodsi4", "arm/udivmodsi4.S"),
|
|
|
|
("__udivsi3", "arm/udivsi3.S"),
|
|
|
|
("__umodsi3", "arm/umodsi3.S"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2018-01-19 08:04:57 +00:00
|
|
|
|
2019-02-02 18:52:41 +00:00
|
|
|
if target_os == "freebsd" {
|
2019-05-15 19:57:36 +00:00
|
|
|
sources.extend(&[("__clear_cache", "clear_cache.c")]);
|
2019-02-02 18:52:41 +00:00
|
|
|
}
|
2019-03-13 15:19:30 +00:00
|
|
|
|
2018-01-19 18:27:25 +00:00
|
|
|
// First of all aeabi_cdcmp and aeabi_cfcmp are never called by LLVM.
|
2018-01-19 08:04:57 +00:00
|
|
|
// Second are little-endian only, so build fail on big-endian targets.
|
|
|
|
// Temporally workaround: exclude these files for big-endian targets.
|
2019-05-14 21:33:08 +00:00
|
|
|
if !llvm_target[0].starts_with("thumbeb") && !llvm_target[0].starts_with("armeb") {
|
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("__aeabi_cdcmp", "arm/aeabi_cdcmp.S"),
|
|
|
|
("__aeabi_cdcmpeq_check_nan", "arm/aeabi_cdcmpeq_check_nan.c"),
|
|
|
|
("__aeabi_cfcmp", "arm/aeabi_cfcmp.S"),
|
|
|
|
("__aeabi_cfcmpeq_check_nan", "arm/aeabi_cfcmpeq_check_nan.c"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2018-01-19 18:27:25 +00:00
|
|
|
}
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if llvm_target[0] == "armv7" {
|
2019-05-14 21:33:08 +00:00
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("__sync_fetch_and_add_4", "arm/sync_fetch_and_add_4.S"),
|
|
|
|
("__sync_fetch_and_add_8", "arm/sync_fetch_and_add_8.S"),
|
|
|
|
("__sync_fetch_and_and_4", "arm/sync_fetch_and_and_4.S"),
|
|
|
|
("__sync_fetch_and_and_8", "arm/sync_fetch_and_and_8.S"),
|
|
|
|
("__sync_fetch_and_max_4", "arm/sync_fetch_and_max_4.S"),
|
|
|
|
("__sync_fetch_and_max_8", "arm/sync_fetch_and_max_8.S"),
|
|
|
|
("__sync_fetch_and_min_4", "arm/sync_fetch_and_min_4.S"),
|
|
|
|
("__sync_fetch_and_min_8", "arm/sync_fetch_and_min_8.S"),
|
|
|
|
("__sync_fetch_and_nand_4", "arm/sync_fetch_and_nand_4.S"),
|
|
|
|
("__sync_fetch_and_nand_8", "arm/sync_fetch_and_nand_8.S"),
|
|
|
|
("__sync_fetch_and_or_4", "arm/sync_fetch_and_or_4.S"),
|
|
|
|
("__sync_fetch_and_or_8", "arm/sync_fetch_and_or_8.S"),
|
|
|
|
("__sync_fetch_and_sub_4", "arm/sync_fetch_and_sub_4.S"),
|
|
|
|
("__sync_fetch_and_sub_8", "arm/sync_fetch_and_sub_8.S"),
|
|
|
|
("__sync_fetch_and_umax_4", "arm/sync_fetch_and_umax_4.S"),
|
|
|
|
("__sync_fetch_and_umax_8", "arm/sync_fetch_and_umax_8.S"),
|
|
|
|
("__sync_fetch_and_umin_4", "arm/sync_fetch_and_umin_4.S"),
|
|
|
|
("__sync_fetch_and_umin_8", "arm/sync_fetch_and_umin_8.S"),
|
|
|
|
("__sync_fetch_and_xor_4", "arm/sync_fetch_and_xor_4.S"),
|
|
|
|
("__sync_fetch_and_xor_8", "arm/sync_fetch_and_xor_8.S"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if llvm_target.last().unwrap().ends_with("eabihf") {
|
2019-05-14 21:33:08 +00:00
|
|
|
if !llvm_target[0].starts_with("thumbv7em")
|
|
|
|
&& !llvm_target[0].starts_with("thumbv8m.main")
|
|
|
|
{
|
2019-03-07 19:30:39 +00:00
|
|
|
// The FPU option chosen for these architectures in cc-rs, ie:
|
|
|
|
// -mfpu=fpv4-sp-d16 for thumbv7em
|
|
|
|
// -mfpu=fpv5-sp-d16 for thumbv8m.main
|
|
|
|
// do not support double precision floating points conversions so the files
|
|
|
|
// that include such instructions are not included for these targets.
|
2019-05-14 21:33:08 +00:00
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("__fixdfsivfp", "arm/fixdfsivfp.S"),
|
|
|
|
("__fixunsdfsivfp", "arm/fixunsdfsivfp.S"),
|
|
|
|
("__floatsidfvfp", "arm/floatsidfvfp.S"),
|
|
|
|
("__floatunssidfvfp", "arm/floatunssidfvfp.S"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2017-02-19 20:49:59 +00:00
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2019-05-14 21:33:08 +00:00
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("__fixsfsivfp", "arm/fixsfsivfp.S"),
|
|
|
|
("__fixunssfsivfp", "arm/fixunssfsivfp.S"),
|
|
|
|
("__floatsisfvfp", "arm/floatsisfvfp.S"),
|
|
|
|
("__floatunssisfvfp", "arm/floatunssisfvfp.S"),
|
|
|
|
("__floatunssisfvfp", "arm/floatunssisfvfp.S"),
|
|
|
|
("__restore_vfp_d8_d15_regs", "arm/restore_vfp_d8_d15_regs.S"),
|
|
|
|
("__save_vfp_d8_d15_regs", "arm/save_vfp_d8_d15_regs.S"),
|
|
|
|
("__negdf2vfp", "arm/negdf2vfp.S"),
|
|
|
|
("__negsf2vfp", "arm/negsf2vfp.S"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2016-09-26 20:55:11 +00:00
|
|
|
}
|
|
|
|
|
2020-02-28 19:01:22 +00:00
|
|
|
if target_arch == "aarch64" && consider_float_intrinsics {
|
2019-05-14 21:33:08 +00:00
|
|
|
sources.extend(&[
|
2019-05-15 19:57:36 +00:00
|
|
|
("__comparetf2", "comparetf2.c"),
|
|
|
|
("__extenddftf2", "extenddftf2.c"),
|
|
|
|
("__extendsftf2", "extendsftf2.c"),
|
|
|
|
("__fixtfdi", "fixtfdi.c"),
|
|
|
|
("__fixtfsi", "fixtfsi.c"),
|
|
|
|
("__fixtfti", "fixtfti.c"),
|
|
|
|
("__fixunstfdi", "fixunstfdi.c"),
|
|
|
|
("__fixunstfsi", "fixunstfsi.c"),
|
|
|
|
("__fixunstfti", "fixunstfti.c"),
|
|
|
|
("__floatditf", "floatditf.c"),
|
|
|
|
("__floatsitf", "floatsitf.c"),
|
|
|
|
("__floatunditf", "floatunditf.c"),
|
|
|
|
("__floatunsitf", "floatunsitf.c"),
|
|
|
|
("__trunctfdf2", "trunctfdf2.c"),
|
|
|
|
("__trunctfsf2", "trunctfsf2.c"),
|
2021-07-14 04:11:12 +00:00
|
|
|
("__addtf3", "addtf3.c"),
|
|
|
|
("__multf3", "multf3.c"),
|
|
|
|
("__subtf3", "subtf3.c"),
|
|
|
|
("__divtf3", "divtf3.c"),
|
|
|
|
("__powitf2", "powitf2.c"),
|
|
|
|
("__fe_getround", "fp_mode.c"),
|
|
|
|
("__fe_raise_inexact", "fp_mode.c"),
|
2019-05-14 21:33:08 +00:00
|
|
|
]);
|
2018-07-16 23:37:35 +00:00
|
|
|
|
|
|
|
if target_os != "windows" {
|
2019-05-15 19:57:36 +00:00
|
|
|
sources.extend(&[("__multc3", "multc3.c")]);
|
2018-07-16 23:37:35 +00:00
|
|
|
}
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2020-02-12 02:44:10 +00:00
|
|
|
if target_arch == "mips" {
|
|
|
|
sources.extend(&[("__bswapsi2", "bswapsi2.c")]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if target_arch == "mips64" {
|
|
|
|
sources.extend(&[
|
|
|
|
("__extenddftf2", "extenddftf2.c"),
|
|
|
|
("__netf2", "comparetf2.c"),
|
|
|
|
("__addtf3", "addtf3.c"),
|
|
|
|
("__multf3", "multf3.c"),
|
|
|
|
("__subtf3", "subtf3.c"),
|
|
|
|
("__fixtfsi", "fixtfsi.c"),
|
|
|
|
("__floatsitf", "floatsitf.c"),
|
|
|
|
("__fixunstfsi", "fixunstfsi.c"),
|
|
|
|
("__floatunsitf", "floatunsitf.c"),
|
|
|
|
("__fe_getround", "fp_mode.c"),
|
2021-03-24 05:49:12 +00:00
|
|
|
("__divtf3", "divtf3.c"),
|
|
|
|
("__trunctfdf2", "trunctfdf2.c"),
|
2022-07-29 20:52:23 +00:00
|
|
|
("__trunctfsf2", "trunctfsf2.c"),
|
2020-02-12 02:44:10 +00:00
|
|
|
]);
|
|
|
|
}
|
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
// Remove the assembly implementations that won't compile for the target
|
2022-11-16 02:01:21 +00:00
|
|
|
if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" || target_os == "uefi"
|
|
|
|
{
|
2019-05-15 19:57:36 +00:00
|
|
|
let mut to_remove = Vec::new();
|
|
|
|
for (k, v) in sources.map.iter() {
|
|
|
|
if v.ends_with(".S") {
|
|
|
|
to_remove.push(*k);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sources.remove(&to_remove);
|
2017-04-10 16:19:16 +00:00
|
|
|
|
|
|
|
// But use some generic implementations where possible
|
2019-05-15 19:57:36 +00:00
|
|
|
sources.extend(&[("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c")])
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
2016-09-26 20:55:11 +00:00
|
|
|
|
2017-04-10 16:19:16 +00:00
|
|
|
if llvm_target[0] == "thumbv7m" || llvm_target[0] == "thumbv7em" {
|
2019-05-15 19:57:36 +00:00
|
|
|
sources.remove(&["__aeabi_cdcmp", "__aeabi_cfcmp"]);
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
|
|
|
|
2022-03-23 02:06:50 +00:00
|
|
|
// Android uses emulated TLS so we need a runtime support function.
|
|
|
|
if target_os == "android" {
|
|
|
|
sources.extend(&[("__emutls_get_address", "emutls.c")]);
|
|
|
|
|
|
|
|
// Work around a bug in the NDK headers (fixed in
|
|
|
|
// https://r.android.com/2038949 which will be released in a future
|
|
|
|
// NDK version) by providing a definition of LONG_BIT.
|
|
|
|
cfg.define("LONG_BIT", "(8 * sizeof(long))");
|
|
|
|
}
|
|
|
|
|
2023-03-10 19:59:23 +00:00
|
|
|
// OpenHarmony also uses emulated TLS.
|
|
|
|
if target_env == "ohos" {
|
|
|
|
sources.extend(&[("__emutls_get_address", "emutls.c")]);
|
|
|
|
}
|
|
|
|
|
2019-05-16 14:30:36 +00:00
|
|
|
// When compiling the C code we require the user to tell us where the
|
|
|
|
// source code is, and this is largely done so when we're compiling as
|
|
|
|
// part of rust-lang/rust we can use the same llvm-project repository as
|
|
|
|
// rust-lang/rust.
|
|
|
|
let root = match env::var_os("RUST_COMPILER_RT_ROOT") {
|
|
|
|
Some(s) => PathBuf::from(s),
|
|
|
|
None => panic!("RUST_COMPILER_RT_ROOT is not set"),
|
2017-04-10 16:19:16 +00:00
|
|
|
};
|
2019-05-16 14:30:36 +00:00
|
|
|
if !root.exists() {
|
|
|
|
panic!("RUST_COMPILER_RT_ROOT={} does not exist", root.display());
|
|
|
|
}
|
2017-04-10 16:19:16 +00:00
|
|
|
|
2019-08-19 16:30:45 +00:00
|
|
|
// Support deterministic builds by remapping the __FILE__ prefix if the
|
2019-08-19 22:12:07 +00:00
|
|
|
// compiler supports it. This fixes the nondeterminism caused by the
|
|
|
|
// use of that macro in lib/builtins/int_util.h in compiler-rt.
|
2019-08-19 16:30:45 +00:00
|
|
|
cfg.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display()));
|
|
|
|
|
2021-03-02 23:50:09 +00:00
|
|
|
// Include out-of-line atomics for aarch64, which are all generated by supplying different
|
|
|
|
// sets of flags to the same source file.
|
2021-07-28 17:44:49 +00:00
|
|
|
// Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430).
|
2019-05-16 14:30:36 +00:00
|
|
|
let src_dir = root.join("lib/builtins");
|
2021-07-28 17:44:49 +00:00
|
|
|
if target_arch == "aarch64" && target_env != "msvc" {
|
2021-05-13 20:35:34 +00:00
|
|
|
// See below for why we're building these as separate libraries.
|
|
|
|
build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg);
|
|
|
|
|
|
|
|
// Some run-time CPU feature detection is necessary, as well.
|
|
|
|
sources.extend(&[("__aarch64_have_lse_atomics", "cpu_model.c")]);
|
2021-03-02 23:50:09 +00:00
|
|
|
}
|
|
|
|
|
2021-07-14 04:11:12 +00:00
|
|
|
let mut added_sources = HashSet::new();
|
2019-05-15 19:57:36 +00:00
|
|
|
for (sym, src) in sources.map.iter() {
|
2017-04-10 16:19:16 +00:00
|
|
|
let src = src_dir.join(src);
|
2021-07-14 04:11:12 +00:00
|
|
|
if added_sources.insert(src.clone()) {
|
|
|
|
cfg.file(&src);
|
|
|
|
println!("cargo:rerun-if-changed={}", src.display());
|
|
|
|
}
|
2019-05-15 19:57:36 +00:00
|
|
|
println!("cargo:rustc-cfg={}=\"optimized-c\"", sym);
|
2017-04-10 16:19:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cfg.compile("libcompiler-rt.a");
|
2016-09-26 20:55:11 +00:00
|
|
|
}
|
2021-03-02 23:50:09 +00:00
|
|
|
|
2021-11-28 12:50:21 +00:00
|
|
|
fn build_aarch64_out_of_line_atomics_libraries(builtins_dir: &Path, cfg: &mut cc::Build) {
|
|
|
|
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
|
2021-03-02 23:50:09 +00:00
|
|
|
let outlined_atomics_file = builtins_dir.join("aarch64/lse.S");
|
|
|
|
println!("cargo:rerun-if-changed={}", outlined_atomics_file.display());
|
2021-04-30 09:39:07 +00:00
|
|
|
|
2021-11-28 12:50:21 +00:00
|
|
|
cfg.include(&builtins_dir);
|
|
|
|
|
2021-04-30 09:38:39 +00:00
|
|
|
for instruction_type in &["cas", "swp", "ldadd", "ldclr", "ldeor", "ldset"] {
|
2021-03-02 23:50:09 +00:00
|
|
|
for size in &[1, 2, 4, 8, 16] {
|
|
|
|
if *size == 16 && *instruction_type != "cas" {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (model_number, model_name) in
|
|
|
|
&[(1, "relax"), (2, "acq"), (3, "rel"), (4, "acq_rel")]
|
|
|
|
{
|
2021-11-28 12:50:21 +00:00
|
|
|
// The original compiler-rt build system compiles the same
|
|
|
|
// source file multiple times with different compiler
|
|
|
|
// options. Here we do something slightly different: we
|
|
|
|
// create multiple .S files with the proper #defines and
|
|
|
|
// then include the original file.
|
|
|
|
//
|
|
|
|
// This is needed because the cc crate doesn't allow us to
|
|
|
|
// override the name of object files and libtool requires
|
|
|
|
// all objects in an archive to have unique names.
|
|
|
|
let path =
|
|
|
|
out_dir.join(format!("lse_{}{}_{}.S", instruction_type, size, model_name));
|
|
|
|
let mut file = File::create(&path).unwrap();
|
|
|
|
writeln!(file, "#define L_{}", instruction_type).unwrap();
|
|
|
|
writeln!(file, "#define SIZE {}", size).unwrap();
|
|
|
|
writeln!(file, "#define MODEL {}", model_number).unwrap();
|
|
|
|
writeln!(
|
|
|
|
file,
|
|
|
|
"#include \"{}\"",
|
|
|
|
outlined_atomics_file.canonicalize().unwrap().display()
|
2021-12-09 23:51:18 +00:00
|
|
|
)
|
|
|
|
.unwrap();
|
2021-11-28 12:50:21 +00:00
|
|
|
drop(file);
|
|
|
|
cfg.file(path);
|
2021-03-02 23:50:09 +00:00
|
|
|
|
2021-11-28 12:50:21 +00:00
|
|
|
let sym = format!("__aarch64_{}{}_{}", instruction_type, size, model_name);
|
2021-03-02 23:50:09 +00:00
|
|
|
println!("cargo:rustc-cfg={}=\"optimized-c\"", sym);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-09-22 02:14:38 +00:00
|
|
|
}
|