mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-23 07:14:28 +00:00
Auto merge of #117124 - bjorn3:sync_cg_clif-2023-10-24, r=bjorn3
Sync rustc_codegen_cranelift This contains fixes for the last two remaining known miscompilations. One is the lack of stack alignment support in cranelift which has been worked around by dynamically realigning at runtime. This fixed rayon and by extension Wasmtime. And the other is lack of zero/sign extending of small arguments when the ABI requires this. This is completely fine when only using cg_clif compiled code, but LLVM depends on this resulting in weird behavior of mixed LLVM, Cranelift binaries. The update to Cranelift 0.101.1 fixes this. In addition I have implemented all x86_64 SIMD intrinsics required by the image and rav1e crates. r? `@ghost` `@rustbot` label +A-codegen +A-cranelift +T-compiler
This commit is contained in:
commit
642bfb254a
@ -33,7 +33,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"sysroot_src": "./download/sysroot/sysroot_src/library",
|
||||
"sysroot_src": "./build/stdlib/library",
|
||||
"crates": [
|
||||
{
|
||||
"root_module": "./example/std_example.rs",
|
||||
|
@ -45,18 +45,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-bforest"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e5e1df0da8488dd03b34afc134ba84b754d61862cc465932a9e5d07952f661e"
|
||||
checksum = "c1512c3bb6b13018e7109fc3ac964bc87b329eaf3a77825d337558d0c7f6f1be"
|
||||
dependencies = [
|
||||
"cranelift-entity",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-codegen"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77a17ca4e699a0aaf49a0c88f6311a864f321048aa63f6b787cab20eb5f93f10"
|
||||
checksum = "16cb8fb9220a6ea7a226705a273ab905309ee546267bdf34948d57932d7f0396"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"cranelift-bforest",
|
||||
@ -75,39 +75,39 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-codegen-meta"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "022f2793cdade1d37a1f755ac42938a3f832f533eac6cafc8b26b209544c3c06"
|
||||
checksum = "ab3a8d3b0d4745b183da5ea0792b13d79f5c23d6e69ac04761728e2532b56649"
|
||||
dependencies = [
|
||||
"cranelift-codegen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-codegen-shared"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4d72dbb83c2ad788dec4ad0843070973cb48c35a3ca19b1e7437ac40834fd9c"
|
||||
checksum = "524141c8e68f2abc2043de4c2b31f6d9dd42432738c246431d0572a1422a4a84"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-control"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae07cf26dcc90d546826d747ac63b6c40c916f34b03e92a6ae0422c28d771b8a"
|
||||
checksum = "97513b57c961c713789a03886a57b43e14ebcd204cbaa8ae50ca6c70a8e716b3"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-entity"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2fe6b7e49820893691aea497f36257e9d6f52061d8c4758d61d802d5f101a3d"
|
||||
checksum = "e3f23d3cf3afa7e45f239702612c76d87964f652a55e28d13ed6d7e20f3479dd"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-frontend"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44f497576ca3674581581601b6a55ccc1b43447217648c880e5bce70db3cf659"
|
||||
checksum = "554cd4947ec9209b58bf9ae5bf83581b5ddf9128bd967208e334b504a57db54e"
|
||||
dependencies = [
|
||||
"cranelift-codegen",
|
||||
"log",
|
||||
@ -117,15 +117,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-isle"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b96aa02eac00fffee13b0cd37d17874ccdb3d5458983041accd825ef78ce6454"
|
||||
checksum = "6c1892a439696b6413cb54083806f5fd9fc431768b8de74864b3d9e8b93b124f"
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-jit"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1d6e0e308c873eefc185745a6b21daec2a10f7554c9fb67e334c2d7d756d979"
|
||||
checksum = "32209252fb38acaf1662ccd0397907bbe0e92bdb13b6ddbfd2f74e437f83e685"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cranelift-codegen",
|
||||
@ -143,9 +143,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-module"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1aa8ebb06eced4e478c3f94f1d65d4e7c93493f4640057912b27a3e34b84841"
|
||||
checksum = "bf42656f5f6df7bfafc4dd7b63a1888b0627c07b43b2cb9aa54e13843fed39eb"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cranelift-codegen",
|
||||
@ -154,9 +154,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-native"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2870170ca44054b202c737626607b87be6e35655084bd94a6ff807a5812ba7df"
|
||||
checksum = "e0c2d3badd4b9690865f5bb68a71fa94de592fa2df3f3d11a5a062c60c0a107a"
|
||||
dependencies = [
|
||||
"cranelift-codegen",
|
||||
"libc",
|
||||
@ -165,9 +165,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cranelift-object"
|
||||
version = "0.101.0"
|
||||
version = "0.101.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20647761742d17dabac8205da958910ede78599550e06418a16711a3ee2fc897"
|
||||
checksum = "88eca54bbecea3170035168357306e9c779d4a63d8bf036c9e16bd21fdaa69b5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cranelift-codegen",
|
||||
@ -374,9 +374,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wasmtime-jit-icache-coherence"
|
||||
version = "14.0.0"
|
||||
version = "14.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3a5dda53ad6993f9b0a2d65fb49e0348a7232a27a8794064122870d6ee19eb2"
|
||||
checksum = "9aaf2fa8fd2d6b65abae9b92edfe69254cc5d6b166e342364036c3e347de8da9"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
|
@ -8,12 +8,12 @@ crate-type = ["dylib"]
|
||||
|
||||
[dependencies]
|
||||
# These have to be in sync with each other
|
||||
cranelift-codegen = { version = "0.101", features = ["unwind", "all-arch"] }
|
||||
cranelift-frontend = { version = "0.101" }
|
||||
cranelift-module = { version = "0.101" }
|
||||
cranelift-native = { version = "0.101" }
|
||||
cranelift-jit = { version = "0.101", optional = true }
|
||||
cranelift-object = { version = "0.101" }
|
||||
cranelift-codegen = { version = "0.101.1", features = ["unwind", "all-arch"] }
|
||||
cranelift-frontend = { version = "0.101.1" }
|
||||
cranelift-module = { version = "0.101.1" }
|
||||
cranelift-native = { version = "0.101.1" }
|
||||
cranelift-jit = { version = "0.101.1", optional = true }
|
||||
cranelift-object = { version = "0.101.1" }
|
||||
target-lexicon = "0.12.0"
|
||||
gimli = { version = "0.28", default-features = false, features = ["write"]}
|
||||
object = { version = "0.32", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] }
|
||||
|
@ -8,7 +8,7 @@ If not please open an issue.
|
||||
## Building and testing
|
||||
|
||||
```bash
|
||||
$ git clone https://github.com/bjorn3/rustc_codegen_cranelift
|
||||
$ git clone https://github.com/rust-lang/rustc_codegen_cranelift
|
||||
$ cd rustc_codegen_cranelift
|
||||
$ ./y.sh prepare
|
||||
$ ./y.sh build
|
||||
@ -29,7 +29,7 @@ Extract the `dist` directory in the archive anywhere you want.
|
||||
If you want to use `cargo clif build` instead of having to specify the full path to the `cargo-clif` executable, you can add the `bin` subdirectory of the extracted `dist` directory to your `PATH`.
|
||||
(tutorial [for Windows](https://stackoverflow.com/a/44272417), and [for Linux/MacOS](https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path/26059#26059)).
|
||||
|
||||
[releases]: https://github.com/bjorn3/rustc_codegen_cranelift/releases/tag/dev
|
||||
[releases]: https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev
|
||||
|
||||
## Usage
|
||||
|
||||
@ -78,7 +78,7 @@ configuration options.
|
||||
|
||||
* Inline assembly ([no cranelift support](https://github.com/bytecodealliance/wasmtime/issues/1041))
|
||||
* On UNIX there is support for invoking an external assembler for `global_asm!` and `asm!`.
|
||||
* SIMD ([tracked here](https://github.com/bjorn3/rustc_codegen_cranelift/issues/171), `std::simd` fully works, `std::arch` is partially supported)
|
||||
* SIMD ([tracked here](https://github.com/rust-lang/rustc_codegen_cranelift/issues/171), `std::simd` fully works, `std::arch` is partially supported)
|
||||
* Unwinding on panics ([no cranelift support](https://github.com/bytecodealliance/wasmtime/issues/1677), `-Cpanic=abort` is enabled by default)
|
||||
|
||||
## License
|
||||
|
@ -353,6 +353,17 @@ fn main() {
|
||||
|
||||
let f = V([0.0, 1.0]);
|
||||
let _a = f.0[0];
|
||||
|
||||
stack_val_align();
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn stack_val_align() {
|
||||
#[repr(align(8192))]
|
||||
struct Foo(u8);
|
||||
|
||||
let a = Foo(0);
|
||||
assert_eq!(&a as *const Foo as usize % 8192, 0);
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
|
@ -1,25 +0,0 @@
|
||||
From 5d4afb8d807d181038b6a004d17ed055a8d191b2 Mon Sep 17 00:00:00 2001
|
||||
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
|
||||
Date: Mon, 2 Oct 2023 13:59:00 +0000
|
||||
Subject: [PATCH] Ignore test which gets miscompiled with llvm sysroot
|
||||
|
||||
---
|
||||
regex-automata/src/util/pool.rs | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/regex-automata/src/util/pool.rs b/regex-automata/src/util/pool.rs
|
||||
index c03d7b0..28b233b 100644
|
||||
--- a/regex-automata/src/util/pool.rs
|
||||
+++ b/regex-automata/src/util/pool.rs
|
||||
@@ -1081,6 +1081,8 @@ mod tests {
|
||||
// into the pool. This in turn resulted in this test producing a data race.
|
||||
#[cfg(feature = "std")]
|
||||
#[test]
|
||||
+ // FIXME(rustc_codegen_cranelift#1395) miscompilation of thread::scope with LLVM sysroot
|
||||
+ #[ignore]
|
||||
fn thread_owner_sync() {
|
||||
let pool = Pool::new(|| vec!['a']);
|
||||
{
|
||||
--
|
||||
2.34.1
|
||||
|
@ -120,32 +120,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
|
||||
args: &[Value],
|
||||
) -> Cow<'_, [Value]> {
|
||||
if self.tcx.sess.target.is_like_windows {
|
||||
let (mut params, mut args): (Vec<_>, Vec<_>) =
|
||||
params
|
||||
.into_iter()
|
||||
.zip(args)
|
||||
.map(|(param, &arg)| {
|
||||
if param.value_type == types::I128 {
|
||||
let arg_ptr = Pointer::stack_slot(self.bcx.create_sized_stack_slot(
|
||||
StackSlotData { kind: StackSlotKind::ExplicitSlot, size: 16 },
|
||||
));
|
||||
arg_ptr.store(self, arg, MemFlags::trusted());
|
||||
(AbiParam::new(self.pointer_type), arg_ptr.get_addr(self))
|
||||
} else {
|
||||
(param, arg)
|
||||
}
|
||||
})
|
||||
.unzip();
|
||||
let (mut params, mut args): (Vec<_>, Vec<_>) = params
|
||||
.into_iter()
|
||||
.zip(args)
|
||||
.map(|(param, &arg)| {
|
||||
if param.value_type == types::I128 {
|
||||
let arg_ptr = self.create_stack_slot(16, 16);
|
||||
arg_ptr.store(self, arg, MemFlags::trusted());
|
||||
(AbiParam::new(self.pointer_type), arg_ptr.get_addr(self))
|
||||
} else {
|
||||
(param, arg)
|
||||
}
|
||||
})
|
||||
.unzip();
|
||||
|
||||
let indirect_ret_val = returns.len() == 1 && returns[0].value_type == types::I128;
|
||||
|
||||
if indirect_ret_val {
|
||||
params.insert(0, AbiParam::new(self.pointer_type));
|
||||
let ret_ptr =
|
||||
Pointer::stack_slot(self.bcx.create_sized_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
size: 16,
|
||||
}));
|
||||
let ret_ptr = self.create_stack_slot(16, 16);
|
||||
args.insert(0, ret_ptr.get_addr(self));
|
||||
self.lib_call_unadjusted(name, params, vec![], &args);
|
||||
return Cow::Owned(vec![ret_ptr.load(self, types::I128, MemFlags::trusted())]);
|
||||
|
@ -189,16 +189,13 @@ pub(super) fn from_casted_value<'tcx>(
|
||||
let abi_params = cast_target_to_abi_params(cast);
|
||||
let abi_param_size: u32 = abi_params.iter().map(|param| param.value_type.bytes()).sum();
|
||||
let layout_size = u32::try_from(layout.size.bytes()).unwrap();
|
||||
let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
// FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
|
||||
// specify stack slot alignment.
|
||||
let ptr = fx.create_stack_slot(
|
||||
// Stack slot size may be bigger for example `[u8; 3]` which is packed into an `i32`.
|
||||
// It may also be smaller for example when the type is a wrapper around an integer with a
|
||||
// larger alignment than the integer.
|
||||
size: (std::cmp::max(abi_param_size, layout_size) + 15) / 16 * 16,
|
||||
});
|
||||
let ptr = Pointer::stack_slot(stack_slot);
|
||||
std::cmp::max(abi_param_size, layout_size),
|
||||
u32::try_from(layout.align.pref.bytes()).unwrap(),
|
||||
);
|
||||
let mut offset = 0;
|
||||
let mut block_params_iter = block_params.iter().copied();
|
||||
for param in abi_params {
|
||||
|
@ -104,11 +104,7 @@ pub(crate) fn clif_int_or_float_cast(
|
||||
&[from],
|
||||
)[0];
|
||||
// FIXME(bytecodealliance/wasmtime#6104) use bitcast instead of store to get from i64x2 to i128
|
||||
let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
size: 16,
|
||||
});
|
||||
let ret_ptr = Pointer::stack_slot(stack_slot);
|
||||
let ret_ptr = fx.create_stack_slot(16, 16);
|
||||
ret_ptr.store(fx, ret, MemFlags::trusted());
|
||||
ret_ptr.load(fx, types::I128, MemFlags::trusted())
|
||||
} else {
|
||||
|
@ -383,6 +383,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> {
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn create_stack_slot(&mut self, size: u32, align: u32) -> Pointer {
|
||||
if align <= 16 {
|
||||
let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
// FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
|
||||
// specify stack slot alignment.
|
||||
size: (size + 15) / 16 * 16,
|
||||
});
|
||||
Pointer::stack_slot(stack_slot)
|
||||
} else {
|
||||
// Alignment is too big to handle using the above hack. Dynamically realign a stack slot
|
||||
// instead. This wastes some space for the realignment.
|
||||
let base_ptr = self.create_stack_slot(size + align, 16).get_addr(self);
|
||||
let misalign_offset = self.bcx.ins().urem_imm(base_ptr, i64::from(align));
|
||||
let realign_offset = self.bcx.ins().irsub_imm(misalign_offset, i64::from(align));
|
||||
Pointer::new(self.bcx.ins().iadd(base_ptr, realign_offset))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn set_debug_loc(&mut self, source_info: mir::SourceInfo) {
|
||||
if let Some(debug_context) = &mut self.cx.debug_context {
|
||||
let (file, line, column) =
|
||||
|
@ -361,12 +361,26 @@ pub(crate) fn run_aot(
|
||||
metadata: EncodedMetadata,
|
||||
need_metadata_module: bool,
|
||||
) -> Box<OngoingCodegen> {
|
||||
// FIXME handle `-Ctarget-cpu=native`
|
||||
let target_cpu = match tcx.sess.opts.cg.target_cpu {
|
||||
Some(ref name) => name,
|
||||
None => tcx.sess.target.cpu.as_ref(),
|
||||
}
|
||||
.to_owned();
|
||||
|
||||
let cgus = if tcx.sess.opts.output_types.should_codegen() {
|
||||
tcx.collect_and_partition_mono_items(()).1
|
||||
} else {
|
||||
// If only `--emit metadata` is used, we shouldn't perform any codegen.
|
||||
// Also `tcx.collect_and_partition_mono_items` may panic in that case.
|
||||
&[]
|
||||
return Box::new(OngoingCodegen {
|
||||
modules: vec![],
|
||||
allocator_module: None,
|
||||
metadata_module: None,
|
||||
metadata,
|
||||
crate_info: CrateInfo::new(tcx, target_cpu),
|
||||
concurrency_limiter: ConcurrencyLimiter::new(tcx.sess, 0),
|
||||
});
|
||||
};
|
||||
|
||||
if tcx.dep_graph.is_fully_enabled() {
|
||||
@ -481,13 +495,6 @@ pub(crate) fn run_aot(
|
||||
None
|
||||
};
|
||||
|
||||
// FIXME handle `-Ctarget-cpu=native`
|
||||
let target_cpu = match tcx.sess.opts.cg.target_cpu {
|
||||
Some(ref name) => name,
|
||||
None => tcx.sess.target.cpu.as_ref(),
|
||||
}
|
||||
.to_owned();
|
||||
|
||||
Box::new(OngoingCodegen {
|
||||
modules,
|
||||
allocator_module,
|
||||
|
@ -878,13 +878,7 @@ fn call_inline_asm<'tcx>(
|
||||
inputs: Vec<(Size, Value)>,
|
||||
outputs: Vec<(Size, CPlace<'tcx>)>,
|
||||
) {
|
||||
let stack_slot = fx.bcx.func.create_sized_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
size: u32::try_from(slot_size.bytes()).unwrap(),
|
||||
});
|
||||
if fx.clif_comments.enabled() {
|
||||
fx.add_comment(stack_slot, "inline asm scratch slot");
|
||||
}
|
||||
let stack_slot = fx.create_stack_slot(u32::try_from(slot_size.bytes()).unwrap(), 16);
|
||||
|
||||
let inline_asm_func = fx
|
||||
.module
|
||||
@ -904,15 +898,23 @@ fn call_inline_asm<'tcx>(
|
||||
}
|
||||
|
||||
for (offset, value) in inputs {
|
||||
fx.bcx.ins().stack_store(value, stack_slot, i32::try_from(offset.bytes()).unwrap());
|
||||
stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).store(
|
||||
fx,
|
||||
value,
|
||||
MemFlags::trusted(),
|
||||
);
|
||||
}
|
||||
|
||||
let stack_slot_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0);
|
||||
let stack_slot_addr = stack_slot.get_addr(fx);
|
||||
fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]);
|
||||
|
||||
for (offset, place) in outputs {
|
||||
let ty = fx.clif_type(place.layout().ty).unwrap();
|
||||
let value = fx.bcx.ins().stack_load(ty, stack_slot, i32::try_from(offset.bytes()).unwrap());
|
||||
let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load(
|
||||
fx,
|
||||
ty,
|
||||
MemFlags::trusted(),
|
||||
);
|
||||
place.write_cvalue(fx, CValue::by_val(value, place.layout()));
|
||||
}
|
||||
}
|
||||
|
@ -310,6 +310,143 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
||||
let val = CValue::by_val_pair(cb_out, c, layout);
|
||||
ret.write_cvalue(fx, val);
|
||||
}
|
||||
"llvm.x86.sse2.pavg.b" | "llvm.x86.sse2.pavg.w" => {
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
// FIXME use vector instructions when possible
|
||||
simd_pair_for_each_lane(
|
||||
fx,
|
||||
a,
|
||||
b,
|
||||
ret,
|
||||
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| {
|
||||
// (a + b + 1) >> 1
|
||||
let lane_ty = fx.bcx.func.dfg.value_type(a_lane);
|
||||
let a_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), a_lane);
|
||||
let b_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), b_lane);
|
||||
let sum = fx.bcx.ins().iadd(a_lane, b_lane);
|
||||
let num_plus_one = fx.bcx.ins().iadd_imm(sum, 1);
|
||||
let res = fx.bcx.ins().ushr_imm(num_plus_one, 1);
|
||||
fx.bcx.ins().ireduce(lane_ty, res)
|
||||
},
|
||||
);
|
||||
}
|
||||
"llvm.x86.sse2.psra.w" => {
|
||||
intrinsic_args!(fx, args => (a, count); intrinsic);
|
||||
|
||||
let count_lane = count.force_stack(fx).0.load(fx, types::I64, MemFlags::trusted());
|
||||
let lane_ty = fx.clif_type(a.layout().ty.simd_size_and_type(fx.tcx).1).unwrap();
|
||||
let max_count = fx.bcx.ins().iconst(types::I64, i64::from(lane_ty.bits() - 1));
|
||||
let saturated_count = fx.bcx.ins().umin(count_lane, max_count);
|
||||
|
||||
// FIXME use vector instructions when possible
|
||||
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, a_lane| {
|
||||
fx.bcx.ins().sshr(a_lane, saturated_count)
|
||||
});
|
||||
}
|
||||
"llvm.x86.sse2.psad.bw" => {
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.u8);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.u64);
|
||||
assert_eq!(lane_count, ret_lane_count * 8);
|
||||
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.u64);
|
||||
for out_lane_idx in 0..lane_count / 8 {
|
||||
let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0);
|
||||
|
||||
for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 {
|
||||
let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx);
|
||||
|
||||
let lane_diff = fx.bcx.ins().isub(a_lane, b_lane);
|
||||
let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);
|
||||
let abs_lane_diff = fx.bcx.ins().uextend(types::I64, abs_lane_diff);
|
||||
lane_diff_acc = fx.bcx.ins().iadd(lane_diff_acc, abs_lane_diff);
|
||||
}
|
||||
|
||||
let res_lane = CValue::by_val(lane_diff_acc, ret_lane_layout);
|
||||
|
||||
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
"llvm.x86.ssse3.pmadd.ub.sw.128" => {
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.u8);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
|
||||
assert_eq!(lane_count, ret_lane_count * 2);
|
||||
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
|
||||
for out_lane_idx in 0..lane_count / 2 {
|
||||
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
|
||||
let a_lane0 = fx.bcx.ins().uextend(types::I16, a_lane0);
|
||||
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
|
||||
let b_lane0 = fx.bcx.ins().sextend(types::I16, b_lane0);
|
||||
|
||||
let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
|
||||
let a_lane1 = fx.bcx.ins().uextend(types::I16, a_lane1);
|
||||
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
|
||||
let b_lane1 = fx.bcx.ins().sextend(types::I16, b_lane1);
|
||||
|
||||
let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
|
||||
let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
|
||||
|
||||
let (val, has_overflow) = fx.bcx.ins().sadd_overflow(mul0, mul1);
|
||||
|
||||
let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, mul1, 0);
|
||||
|
||||
let min = fx.bcx.ins().iconst(types::I16, i64::from(i16::MIN as u16));
|
||||
let max = fx.bcx.ins().iconst(types::I16, i64::from(i16::MAX as u16));
|
||||
|
||||
let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
|
||||
let res_lane = fx.bcx.ins().select(has_overflow, sat_val, val);
|
||||
|
||||
let res_lane = CValue::by_val(res_lane, ret_lane_layout);
|
||||
|
||||
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
"llvm.x86.sse2.pmadd.wd" => {
|
||||
intrinsic_args!(fx, args => (a, b); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert_eq!(lane_ty, fx.tcx.types.i16);
|
||||
assert_eq!(ret_lane_ty, fx.tcx.types.i32);
|
||||
assert_eq!(lane_count, ret_lane_count * 2);
|
||||
|
||||
let ret_lane_layout = fx.layout_of(fx.tcx.types.i32);
|
||||
for out_lane_idx in 0..lane_count / 2 {
|
||||
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
|
||||
let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0);
|
||||
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
|
||||
let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0);
|
||||
|
||||
let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
|
||||
let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1);
|
||||
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
|
||||
let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1);
|
||||
|
||||
let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
|
||||
let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);
|
||||
|
||||
let res_lane = fx.bcx.ins().iadd(mul0, mul1);
|
||||
let res_lane = CValue::by_val(res_lane, ret_lane_layout);
|
||||
|
||||
ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
fx.tcx
|
||||
.sess
|
||||
|
@ -132,18 +132,11 @@ impl<'tcx> CValue<'tcx> {
|
||||
(ptr.get_addr(fx), vtable)
|
||||
}
|
||||
CValueInner::ByValPair(data, vtable) => {
|
||||
let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
// FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
|
||||
// specify stack slot alignment.
|
||||
size: (u32::try_from(fx.target_config.pointer_type().bytes()).unwrap() + 15)
|
||||
/ 16
|
||||
* 16,
|
||||
});
|
||||
let data_ptr = Pointer::stack_slot(stack_slot);
|
||||
let mut flags = MemFlags::new();
|
||||
flags.set_notrap();
|
||||
data_ptr.store(fx, data, flags);
|
||||
let data_ptr = fx.create_stack_slot(
|
||||
u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(),
|
||||
u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(),
|
||||
);
|
||||
data_ptr.store(fx, data, MemFlags::trusted());
|
||||
|
||||
(data_ptr.get_addr(fx), vtable)
|
||||
}
|
||||
@ -372,13 +365,11 @@ impl<'tcx> CPlace<'tcx> {
|
||||
.fatal(format!("values of type {} are too big to store on the stack", layout.ty));
|
||||
}
|
||||
|
||||
let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
// FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
|
||||
// specify stack slot alignment.
|
||||
size: (u32::try_from(layout.size.bytes()).unwrap() + 15) / 16 * 16,
|
||||
});
|
||||
CPlace { inner: CPlaceInner::Addr(Pointer::stack_slot(stack_slot), None), layout }
|
||||
let stack_slot = fx.create_stack_slot(
|
||||
u32::try_from(layout.size.bytes()).unwrap(),
|
||||
u32::try_from(layout.align.pref.bytes()).unwrap(),
|
||||
);
|
||||
CPlace { inner: CPlaceInner::Addr(stack_slot, None), layout }
|
||||
}
|
||||
|
||||
pub(crate) fn new_var(
|
||||
@ -543,13 +534,7 @@ impl<'tcx> CPlace<'tcx> {
|
||||
_ if src_ty.is_vector() && dst_ty.is_vector() => codegen_bitcast(fx, dst_ty, data),
|
||||
_ if src_ty.is_vector() || dst_ty.is_vector() => {
|
||||
// FIXME(bytecodealliance/wasmtime#6104) do something more efficient for transmutes between vectors and integers.
|
||||
let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData {
|
||||
kind: StackSlotKind::ExplicitSlot,
|
||||
// FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to
|
||||
// specify stack slot alignment.
|
||||
size: (src_ty.bytes() + 15) / 16 * 16,
|
||||
});
|
||||
let ptr = Pointer::stack_slot(stack_slot);
|
||||
let ptr = fx.create_stack_slot(src_ty.bytes(), src_ty.bytes());
|
||||
ptr.store(fx, data, MemFlags::trusted());
|
||||
ptr.load(fx, dst_ty, MemFlags::trusted())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user