mirror of
https://github.com/rust-lang/rust.git
synced 2025-06-07 12:48:30 +00:00
Auto merge of #58556 - oli-obk:imperative_recursion, r=pnkfelix
Optimize copying large ranges of undefmask blocks Hopefully fixes #58523
This commit is contained in:
commit
52e885628e
@ -101,8 +101,7 @@ impl AllocationExtra<(), ()> for () {
|
|||||||
impl<Tag, Extra> Allocation<Tag, Extra> {
|
impl<Tag, Extra> Allocation<Tag, Extra> {
|
||||||
/// Creates a read-only allocation initialized by the given bytes
|
/// Creates a read-only allocation initialized by the given bytes
|
||||||
pub fn from_bytes(slice: &[u8], align: Align, extra: Extra) -> Self {
|
pub fn from_bytes(slice: &[u8], align: Align, extra: Extra) -> Self {
|
||||||
let mut undef_mask = UndefMask::new(Size::ZERO);
|
let undef_mask = UndefMask::new(Size::from_bytes(slice.len() as u64), true);
|
||||||
undef_mask.grow(Size::from_bytes(slice.len() as u64), true);
|
|
||||||
Self {
|
Self {
|
||||||
bytes: slice.to_owned(),
|
bytes: slice.to_owned(),
|
||||||
relocations: Relocations::new(),
|
relocations: Relocations::new(),
|
||||||
@ -122,7 +121,7 @@ impl<Tag, Extra> Allocation<Tag, Extra> {
|
|||||||
Allocation {
|
Allocation {
|
||||||
bytes: vec![0; size.bytes() as usize],
|
bytes: vec![0; size.bytes() as usize],
|
||||||
relocations: Relocations::new(),
|
relocations: Relocations::new(),
|
||||||
undef_mask: UndefMask::new(size),
|
undef_mask: UndefMask::new(size, false),
|
||||||
align,
|
align,
|
||||||
mutability: Mutability::Mutable,
|
mutability: Mutability::Mutable,
|
||||||
extra,
|
extra,
|
||||||
@ -614,8 +613,9 @@ impl<Tag> DerefMut for Relocations<Tag> {
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
type Block = u64;
|
type Block = u64;
|
||||||
const BLOCK_SIZE: u64 = 64;
|
|
||||||
|
|
||||||
|
/// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte
|
||||||
|
/// is defined. If it is `false` the byte is undefined.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)]
|
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)]
|
||||||
pub struct UndefMask {
|
pub struct UndefMask {
|
||||||
blocks: Vec<Block>,
|
blocks: Vec<Block>,
|
||||||
@ -625,12 +625,14 @@ pub struct UndefMask {
|
|||||||
impl_stable_hash_for!(struct mir::interpret::UndefMask{blocks, len});
|
impl_stable_hash_for!(struct mir::interpret::UndefMask{blocks, len});
|
||||||
|
|
||||||
impl UndefMask {
|
impl UndefMask {
|
||||||
pub fn new(size: Size) -> Self {
|
pub const BLOCK_SIZE: u64 = 64;
|
||||||
|
|
||||||
|
pub fn new(size: Size, state: bool) -> Self {
|
||||||
let mut m = UndefMask {
|
let mut m = UndefMask {
|
||||||
blocks: vec![],
|
blocks: vec![],
|
||||||
len: Size::ZERO,
|
len: Size::ZERO,
|
||||||
};
|
};
|
||||||
m.grow(size, false);
|
m.grow(size, state);
|
||||||
m
|
m
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -644,6 +646,7 @@ impl UndefMask {
|
|||||||
return Err(self.len);
|
return Err(self.len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME(oli-obk): optimize this for allocations larger than a block.
|
||||||
let idx = (start.bytes()..end.bytes())
|
let idx = (start.bytes()..end.bytes())
|
||||||
.map(|i| Size::from_bytes(i))
|
.map(|i| Size::from_bytes(i))
|
||||||
.find(|&i| !self.get(i));
|
.find(|&i| !self.get(i));
|
||||||
@ -663,20 +666,63 @@ impl UndefMask {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool) {
|
pub fn set_range_inbounds(&mut self, start: Size, end: Size, new_state: bool) {
|
||||||
for i in start.bytes()..end.bytes() {
|
let (blocka, bita) = bit_index(start);
|
||||||
self.set(Size::from_bytes(i), new_state);
|
let (blockb, bitb) = bit_index(end);
|
||||||
|
if blocka == blockb {
|
||||||
|
// first set all bits but the first `bita`
|
||||||
|
// then unset the last `64 - bitb` bits
|
||||||
|
let range = if bitb == 0 {
|
||||||
|
u64::max_value() << bita
|
||||||
|
} else {
|
||||||
|
(u64::max_value() << bita) & (u64::max_value() >> (64 - bitb))
|
||||||
|
};
|
||||||
|
if new_state {
|
||||||
|
self.blocks[blocka] |= range;
|
||||||
|
} else {
|
||||||
|
self.blocks[blocka] &= !range;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// across block boundaries
|
||||||
|
if new_state {
|
||||||
|
// set bita..64 to 1
|
||||||
|
self.blocks[blocka] |= u64::max_value() << bita;
|
||||||
|
// set 0..bitb to 1
|
||||||
|
if bitb != 0 {
|
||||||
|
self.blocks[blockb] |= u64::max_value() >> (64 - bitb);
|
||||||
|
}
|
||||||
|
// fill in all the other blocks (much faster than one bit at a time)
|
||||||
|
for block in (blocka + 1) .. blockb {
|
||||||
|
self.blocks[block] = u64::max_value();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// set bita..64 to 0
|
||||||
|
self.blocks[blocka] &= !(u64::max_value() << bita);
|
||||||
|
// set 0..bitb to 0
|
||||||
|
if bitb != 0 {
|
||||||
|
self.blocks[blockb] &= !(u64::max_value() >> (64 - bitb));
|
||||||
|
}
|
||||||
|
// fill in all the other blocks (much faster than one bit at a time)
|
||||||
|
for block in (blocka + 1) .. blockb {
|
||||||
|
self.blocks[block] = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn get(&self, i: Size) -> bool {
|
pub fn get(&self, i: Size) -> bool {
|
||||||
let (block, bit) = bit_index(i);
|
let (block, bit) = bit_index(i);
|
||||||
(self.blocks[block] & 1 << bit) != 0
|
(self.blocks[block] & (1 << bit)) != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn set(&mut self, i: Size, new_state: bool) {
|
pub fn set(&mut self, i: Size, new_state: bool) {
|
||||||
let (block, bit) = bit_index(i);
|
let (block, bit) = bit_index(i);
|
||||||
|
self.set_bit(block, bit, new_state);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn set_bit(&mut self, block: usize, bit: usize, new_state: bool) {
|
||||||
if new_state {
|
if new_state {
|
||||||
self.blocks[block] |= 1 << bit;
|
self.blocks[block] |= 1 << bit;
|
||||||
} else {
|
} else {
|
||||||
@ -685,11 +731,15 @@ impl UndefMask {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn grow(&mut self, amount: Size, new_state: bool) {
|
pub fn grow(&mut self, amount: Size, new_state: bool) {
|
||||||
let unused_trailing_bits = self.blocks.len() as u64 * BLOCK_SIZE - self.len.bytes();
|
if amount.bytes() == 0 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let unused_trailing_bits = self.blocks.len() as u64 * Self::BLOCK_SIZE - self.len.bytes();
|
||||||
if amount.bytes() > unused_trailing_bits {
|
if amount.bytes() > unused_trailing_bits {
|
||||||
let additional_blocks = amount.bytes() / BLOCK_SIZE + 1;
|
let additional_blocks = amount.bytes() / Self::BLOCK_SIZE + 1;
|
||||||
assert_eq!(additional_blocks as usize as u64, additional_blocks);
|
assert_eq!(additional_blocks as usize as u64, additional_blocks);
|
||||||
self.blocks.extend(
|
self.blocks.extend(
|
||||||
|
// FIXME(oli-obk): optimize this by repeating `new_state as Block`
|
||||||
iter::repeat(0).take(additional_blocks as usize),
|
iter::repeat(0).take(additional_blocks as usize),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -702,8 +752,8 @@ impl UndefMask {
|
|||||||
#[inline]
|
#[inline]
|
||||||
fn bit_index(bits: Size) -> (usize, usize) {
|
fn bit_index(bits: Size) -> (usize, usize) {
|
||||||
let bits = bits.bytes();
|
let bits = bits.bytes();
|
||||||
let a = bits / BLOCK_SIZE;
|
let a = bits / UndefMask::BLOCK_SIZE;
|
||||||
let b = bits % BLOCK_SIZE;
|
let b = bits % UndefMask::BLOCK_SIZE;
|
||||||
assert_eq!(a as usize as u64, a);
|
assert_eq!(a as usize as u64, a);
|
||||||
assert_eq!(b as usize as u64, b);
|
assert_eq!(b as usize as u64, b);
|
||||||
(a as usize, b as usize)
|
(a as usize, b as usize)
|
||||||
|
@ -700,24 +700,29 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
|
|||||||
// relocations overlapping the edges; those would not be handled correctly).
|
// relocations overlapping the edges; those would not be handled correctly).
|
||||||
let relocations = {
|
let relocations = {
|
||||||
let relocations = self.get(src.alloc_id)?.relocations(self, src, size);
|
let relocations = self.get(src.alloc_id)?.relocations(self, src, size);
|
||||||
let mut new_relocations = Vec::with_capacity(relocations.len() * (length as usize));
|
if relocations.is_empty() {
|
||||||
for i in 0..length {
|
// nothing to copy, ignore even the `length` loop
|
||||||
new_relocations.extend(
|
Vec::new()
|
||||||
relocations
|
} else {
|
||||||
.iter()
|
let mut new_relocations = Vec::with_capacity(relocations.len() * (length as usize));
|
||||||
.map(|&(offset, reloc)| {
|
for i in 0..length {
|
||||||
// compute offset for current repetition
|
new_relocations.extend(
|
||||||
let dest_offset = dest.offset + (i * size);
|
relocations
|
||||||
(
|
.iter()
|
||||||
// shift offsets from source allocation to destination allocation
|
.map(|&(offset, reloc)| {
|
||||||
offset + dest_offset - src.offset,
|
// compute offset for current repetition
|
||||||
reloc,
|
let dest_offset = dest.offset + (i * size);
|
||||||
)
|
(
|
||||||
})
|
// shift offsets from source allocation to destination allocation
|
||||||
);
|
offset + dest_offset - src.offset,
|
||||||
}
|
reloc,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
new_relocations
|
new_relocations
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let tcx = self.tcx.tcx;
|
let tcx = self.tcx.tcx;
|
||||||
@ -784,20 +789,65 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
|
|||||||
// The bits have to be saved locally before writing to dest in case src and dest overlap.
|
// The bits have to be saved locally before writing to dest in case src and dest overlap.
|
||||||
assert_eq!(size.bytes() as usize as u64, size.bytes());
|
assert_eq!(size.bytes() as usize as u64, size.bytes());
|
||||||
|
|
||||||
let undef_mask = self.get(src.alloc_id)?.undef_mask.clone();
|
let undef_mask = &self.get(src.alloc_id)?.undef_mask;
|
||||||
let dest_allocation = self.get_mut(dest.alloc_id)?;
|
|
||||||
|
|
||||||
for i in 0..size.bytes() {
|
// Since we are copying `size` bytes from `src` to `dest + i * size` (`for i in 0..repeat`),
|
||||||
let defined = undef_mask.get(src.offset + Size::from_bytes(i));
|
// a naive undef mask copying algorithm would repeatedly have to read the undef mask from
|
||||||
|
// the source and write it to the destination. Even if we optimized the memory accesses,
|
||||||
|
// we'd be doing all of this `repeat` times.
|
||||||
|
// Therefor we precompute a compressed version of the undef mask of the source value and
|
||||||
|
// then write it back `repeat` times without computing any more information from the source.
|
||||||
|
|
||||||
for j in 0..repeat {
|
// a precomputed cache for ranges of defined/undefined bits
|
||||||
dest_allocation.undef_mask.set(
|
// 0000010010001110 will become
|
||||||
dest.offset + Size::from_bytes(i + (size.bytes() * j)),
|
// [5, 1, 2, 1, 3, 3, 1]
|
||||||
defined
|
// where each element toggles the state
|
||||||
);
|
let mut ranges = smallvec::SmallVec::<[u64; 1]>::new();
|
||||||
|
let first = undef_mask.get(src.offset);
|
||||||
|
let mut cur_len = 1;
|
||||||
|
let mut cur = first;
|
||||||
|
for i in 1..size.bytes() {
|
||||||
|
// FIXME: optimize to bitshift the current undef block's bits and read the top bit
|
||||||
|
if undef_mask.get(src.offset + Size::from_bytes(i)) == cur {
|
||||||
|
cur_len += 1;
|
||||||
|
} else {
|
||||||
|
ranges.push(cur_len);
|
||||||
|
cur_len = 1;
|
||||||
|
cur = !cur;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// now fill in all the data
|
||||||
|
let dest_allocation = self.get_mut(dest.alloc_id)?;
|
||||||
|
// an optimization where we can just overwrite an entire range of definedness bits if
|
||||||
|
// they are going to be uniformly `1` or `0`.
|
||||||
|
if ranges.is_empty() {
|
||||||
|
dest_allocation.undef_mask.set_range_inbounds(
|
||||||
|
dest.offset,
|
||||||
|
dest.offset + size * repeat,
|
||||||
|
first,
|
||||||
|
);
|
||||||
|
return Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// remember to fill in the trailing bits
|
||||||
|
ranges.push(cur_len);
|
||||||
|
|
||||||
|
for mut j in 0..repeat {
|
||||||
|
j *= size.bytes();
|
||||||
|
j += dest.offset.bytes();
|
||||||
|
let mut cur = first;
|
||||||
|
for range in &ranges {
|
||||||
|
let old_j = j;
|
||||||
|
j += range;
|
||||||
|
dest_allocation.undef_mask.set_range_inbounds(
|
||||||
|
Size::from_bytes(old_j),
|
||||||
|
Size::from_bytes(j),
|
||||||
|
cur,
|
||||||
|
);
|
||||||
|
cur = !cur;
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
26
src/test/run-pass-fulldeps/undef_mask.rs
Normal file
26
src/test/run-pass-fulldeps/undef_mask.rs
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
// ignore-cross-compile
|
||||||
|
// ignore-stage1
|
||||||
|
|
||||||
|
#![feature(rustc_private)]
|
||||||
|
|
||||||
|
extern crate rustc;
|
||||||
|
|
||||||
|
use rustc::mir::interpret::UndefMask;
|
||||||
|
use rustc::ty::layout::Size;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let mut mask = UndefMask::new(Size::from_bytes(500), false);
|
||||||
|
assert!(!mask.get(Size::from_bytes(499)));
|
||||||
|
mask.set(Size::from_bytes(499), true);
|
||||||
|
assert!(mask.get(Size::from_bytes(499)));
|
||||||
|
mask.set_range_inbounds(Size::from_bytes(100), Size::from_bytes(256), true);
|
||||||
|
for i in 0..100 {
|
||||||
|
assert!(!mask.get(Size::from_bytes(i)));
|
||||||
|
}
|
||||||
|
for i in 100..256 {
|
||||||
|
assert!(mask.get(Size::from_bytes(i)));
|
||||||
|
}
|
||||||
|
for i in 256..499 {
|
||||||
|
assert!(!mask.get(Size::from_bytes(i)));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user