Rollup merge of #63034 - tmandry:reduce-generator-size-regressions, r=cramertj

Fix generator size regressions due to optimization

I tested the generator optimizations in #60187 and #61922 on the Fuchsia
build, and noticed that some small generators (about 8% of the async fns
in our build) increased in size slightly.

This is because in #60187 we split the fields into two groups, a
"prefix" non-overlap region and an overlap region, and lay them out
separately. This can introduce unnecessary padding bytes between the two
groups.

In every single case in the Fuchsia build, it was due to there being
only a single variant being used in the overlap region. This means that
we aren't doing any overlapping, period. So it's better to combine the
two regions into one and lay out all the fields at once, which is what
this change does.

r? @cramertj
cc @eddyb @Zoxc
This commit is contained in:
Mazdak Farrokhzad 2019-08-07 01:39:25 +02:00 committed by GitHub
commit fb1f57e0e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 75 additions and 10 deletions

View File

@ -209,6 +209,8 @@ use crate::mem::ManuallyDrop;
/// guarantee may evolve.
#[allow(missing_debug_implementations)]
#[stable(feature = "maybe_uninit", since = "1.36.0")]
// Lang item so we can wrap other types in it. This is useful for generators.
#[cfg_attr(not(bootstrap), lang = "maybe_uninit")]
#[derive(Copy)]
#[repr(transparent)]
pub union MaybeUninit<T> {

View File

@ -365,6 +365,8 @@ language_item_table! {
ManuallyDropItem, "manually_drop", manually_drop, Target::Struct;
MaybeUninitLangItem, "maybe_uninit", maybe_uninit, Target::Union;
DebugTraitLangItem, "debug_trait", debug_trait, Target::Trait;
// Align offset for stride != 1, must not panic.

View File

@ -2347,10 +2347,9 @@ impl<'tcx> TyCtxt<'tcx> {
self.mk_ty(Foreign(def_id))
}
pub fn mk_box(self, ty: Ty<'tcx>) -> Ty<'tcx> {
let def_id = self.require_lang_item(lang_items::OwnedBoxLangItem);
let adt_def = self.adt_def(def_id);
let substs = InternalSubsts::for_item(self, def_id, |param, substs| {
fn mk_generic_adt(self, wrapper_def_id: DefId, ty_param: Ty<'tcx>) -> Ty<'tcx> {
let adt_def = self.adt_def(wrapper_def_id);
let substs = InternalSubsts::for_item(self, wrapper_def_id, |param, substs| {
match param.kind {
GenericParamDefKind::Lifetime |
GenericParamDefKind::Const => {
@ -2358,7 +2357,7 @@ impl<'tcx> TyCtxt<'tcx> {
}
GenericParamDefKind::Type { has_default, .. } => {
if param.index == 0 {
ty.into()
ty_param.into()
} else {
assert!(has_default);
self.type_of(param.def_id).subst(self, substs).into()
@ -2369,6 +2368,18 @@ impl<'tcx> TyCtxt<'tcx> {
self.mk_ty(Adt(adt_def, substs))
}
#[inline]
pub fn mk_box(self, ty: Ty<'tcx>) -> Ty<'tcx> {
let def_id = self.require_lang_item(lang_items::OwnedBoxLangItem);
self.mk_generic_adt(def_id, ty)
}
#[inline]
pub fn mk_maybe_uninit(self, ty: Ty<'tcx>) -> Ty<'tcx> {
let def_id = self.require_lang_item(lang_items::MaybeUninitLangItem);
self.mk_generic_adt(def_id, ty)
}
#[inline]
pub fn mk_ptr(self, tm: TypeAndMut<'tcx>) -> Ty<'tcx> {
self.mk_ty(RawPtr(tm))

View File

@ -1368,6 +1368,27 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> {
}
}
// Count the number of variants in use. If only one of them, then it is
// impossible to overlap any locals in our layout. In this case it's
// always better to make the remaining locals ineligible, so we can
// lay them out with the other locals in the prefix and eliminate
// unnecessary padding bytes.
{
let mut used_variants = BitSet::new_empty(info.variant_fields.len());
for assignment in &assignments {
match assignment {
Assigned(idx) => { used_variants.insert(*idx); }
_ => {}
}
}
if used_variants.count() < 2 {
for assignment in assignments.iter_mut() {
*assignment = Ineligible(None);
}
ineligible_locals.insert_all();
}
}
// Write down the order of our locals that will be promoted to the prefix.
{
let mut idx = 0u32;
@ -1406,24 +1427,21 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> {
Abi::Scalar(s) => s.clone(),
_ => bug!(),
};
// FIXME(eddyb) wrap each promoted type in `MaybeUninit` so that they
// don't poison the `largest_niche` or `abi` fields of `prefix`.
let promoted_layouts = ineligible_locals.iter()
.map(|local| subst_field(info.field_tys[local]))
.map(|ty| tcx.mk_maybe_uninit(ty))
.map(|ty| self.layout_of(ty));
let prefix_layouts = substs.prefix_tys(def_id, tcx)
.map(|ty| self.layout_of(ty))
.chain(iter::once(Ok(discr_layout)))
.chain(promoted_layouts)
.collect::<Result<Vec<_>, _>>()?;
let mut prefix = self.univariant_uninterned(
let prefix = self.univariant_uninterned(
ty,
&prefix_layouts,
&ReprOptions::default(),
StructKind::AlwaysSized,
)?;
// FIXME(eddyb) need `MaybeUninit` around promoted types (see above).
prefix.largest_niche = None;
let (prefix_size, prefix_align) = (prefix.size, prefix.align);

View File

@ -0,0 +1,17 @@
// Test that niche finding works with captured generator upvars.
#![feature(generators)]
use std::mem::size_of_val;
fn take<T>(_: T) {}
fn main() {
let x = false;
let gen1 = || {
yield;
take(x);
};
assert_eq!(size_of_val(&gen1), size_of_val(&Some(gen1)));
}

View File

@ -1,3 +1,7 @@
// Incorrect handling of uninhabited types could cause us to mark generator
// types as entirely uninhabited, when they were in fact constructible. This
// caused us to hit "unreachable" code (illegal instruction on x86).
// run-pass
// compile-flags: --edition=2018
@ -19,7 +23,18 @@ async fn contains_never() {
let error2 = error;
}
#[allow(unused)]
async fn overlap_never() {
let error1 = uninhabited_async();
noop().await;
let error2 = uninhabited_async();
drop(error1);
noop().await;
drop(error2);
}
#[allow(unused_must_use)]
fn main() {
contains_never();
overlap_never();
}