rust/compiler/rustc_data_structures/src/intern.rs

use crate::stable_hasher::{HashStable, StableHasher};
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::ptr;

use crate::fingerprint::Fingerprint;

mod private {
    #[derive(Clone, Copy, Debug)]
    pub struct PrivateZst;
}

/// A reference to a value that is interned, and is known to be unique.
///
/// Note that it is possible to have a `T` and a `Interned<T>` that are (or
/// refer to) equal but different values. But if you have two different
/// `Interned<T>`s, they both refer to the same value, at a single location in
/// memory. This means that equality and hashing can be done on the value's
/// address rather than the value's contents, which can improve performance.
///
/// The `PrivateZst` field means you can pattern match with `Interned(v, _)`
/// but you can only construct a `Interned` with `new_unchecked`, and not
/// directly.
#[derive(Debug)]
#[rustc_pass_by_value]
pub struct Interned<'a, T>(pub &'a T, pub private::PrivateZst);

impl<'a, T> Interned<'a, T> {
    /// Create a new `Interned` value. The value referred to *must* be interned
    /// and thus be unique, and it *must* remain unique in the future. This
    /// function has `_unchecked` in the name but is not `unsafe`, because if
    /// the uniqueness condition is violated condition it will cause incorrect
    /// behaviour but will not affect memory safety.
    #[inline]
    pub const fn new_unchecked(t: &'a T) -> Self {
        Interned(t, private::PrivateZst)
    }
}

impl<'a, T> Clone for Interned<'a, T> {
    fn clone(&self) -> Self {
        *self
    }
}

impl<'a, T> Copy for Interned<'a, T> {}

impl<'a, T> Deref for Interned<'a, T> {
    type Target = T;

    #[inline]
    fn deref(&self) -> &T {
        self.0
    }
}

impl<'a, T> PartialEq for Interned<'a, T> {
    #[inline]
    fn eq(&self, other: &Self) -> bool {
        // Pointer equality implies equality, due to the uniqueness constraint.
        ptr::eq(self.0, other.0)
    }
}

impl<'a, T> Eq for Interned<'a, T> {}

impl<'a, T: PartialOrd> PartialOrd for Interned<'a, T> {
    fn partial_cmp(&self, other: &Interned<'a, T>) -> Option<Ordering> {
        // Pointer equality implies equality, due to the uniqueness constraint,
        // but the contents must be compared otherwise.
        if ptr::eq(self.0, other.0) {
            Some(Ordering::Equal)
        } else {
            let res = self.0.partial_cmp(&other.0);
            debug_assert_ne!(res, Some(Ordering::Equal));
            res
        }
    }
}

impl<'a, T: Ord> Ord for Interned<'a, T> {
    fn cmp(&self, other: &Interned<'a, T>) -> Ordering {
        // Pointer equality implies equality, due to the uniqueness constraint,
        // but the contents must be compared otherwise.
        if ptr::eq(self.0, other.0) {
            Ordering::Equal
        } else {
            let res = self.0.cmp(&other.0);
            debug_assert_ne!(res, Ordering::Equal);
            res
        }
    }
}

impl<'a, T> Hash for Interned<'a, T> {
    #[inline]
    fn hash<H: Hasher>(&self, s: &mut H) {
        // Pointer hashing is sufficient, due to the uniqueness constraint.
        ptr::hash(self.0, s)
    }
}

impl<T, CTX> HashStable<CTX> for Interned<'_, T>
where
    T: HashStable<CTX>,
{
    fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
        self.0.hash_stable(hcx, hasher);
    }
}

/// A helper type that you can wrap round your own type in order to automatically
/// cache the stable hash on creation and not recompute it whenever the stable hash
/// of the type is computed.
/// This is only done in incremental mode. You can also opt out of caching by using
/// StableHash::ZERO for the hash, in which case the hash gets computed each time.
/// This is useful if you have values that you intern but never (can?) use for stable
/// hashing.
#[derive(Copy, Clone)]
pub struct WithStableHash<T> {
    pub internee: T,
    pub stable_hash: Fingerprint,
}

impl<T: PartialEq> PartialEq for WithStableHash<T> {
    #[inline]
    fn eq(&self, other: &Self) -> bool {
        self.internee.eq(&other.internee)
    }
}

impl<T: Eq> Eq for WithStableHash<T> {}

impl<T: Ord> PartialOrd for WithStableHash<T> {
    fn partial_cmp(&self, other: &WithStableHash<T>) -> Option<Ordering> {
        Some(self.internee.cmp(&other.internee))
    }
}

impl<T: Ord> Ord for WithStableHash<T> {
    fn cmp(&self, other: &WithStableHash<T>) -> Ordering {
        self.internee.cmp(&other.internee)
    }
}

impl<T> Deref for WithStableHash<T> {
    type Target = T;

    #[inline]
    fn deref(&self) -> &T {
        &self.internee
    }
}

impl<T: Hash> Hash for WithStableHash<T> {
    #[inline]
    fn hash<H: Hasher>(&self, s: &mut H) {
        if self.stable_hash != Fingerprint::ZERO {
            self.stable_hash.hash(s)
        } else {
            self.internee.hash(s)
        }
    }
}

impl<T: HashStable<CTX>, CTX> HashStable<CTX> for WithStableHash<T> {
    fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
        if self.stable_hash == Fingerprint::ZERO || cfg!(debug_assertions) {
            // No cached hash available. This can only mean that incremental is disabled.
            // We don't cache stable hashes in non-incremental mode, because they are used
            // so rarely that the performance actually suffers.

            // We need to build the hash as if we cached it and then hash that hash, as
            // otherwise the hashes will differ between cached and non-cached mode.
            let stable_hash: Fingerprint = {
                let mut hasher = StableHasher::new();
                self.internee.hash_stable(hcx, &mut hasher);
                hasher.finish()
            };
            if cfg!(debug_assertions) && self.stable_hash != Fingerprint::ZERO {
                assert_eq!(
                    stable_hash, self.stable_hash,
                    "cached stable hash does not match freshly computed stable hash"
                );
            }
            stable_hash.hash_stable(hcx, hasher);
        } else {
            self.stable_hash.hash_stable(hcx, hasher);
        }
    }
}

#[cfg(test)]
mod tests;
Clarify `Layout` interning. `Layout` is another type that is sometimes interned, sometimes not, and we always use references to refer to it so we can't take any advantage of the uniqueness properties for hashing or equality checks. This commit renames `Layout` as `LayoutS`, and then introduces a new `Layout` that is a newtype around an `Interned<LayoutS>`. It also interns more layouts than before. Previously layouts within layouts (via the `variants` field) were never interned, but now they are. Hence the lifetime on the new `Layout` type. Unlike other interned types, these ones are in `rustc_target` instead of `rustc_middle`. This reflects the existing structure of the code, which does layout-specific stuff in `rustc_target` while `TyAndLayout` is generic over the `Ty`, allowing the type-specific stuff to occur in `rustc_middle`. The commit also adds a `HashStable` impl for `Interned`, which was needed. It hashes the contents, unlike the `Hash` impl which hashes the pointer. 2022-03-04 02:46:56 +00:00			`use crate::stable_hasher::{HashStable, StableHasher};`
Rename `PtrKey` as `Interned` and improve it. In particular, there's now more protection against incorrect usage, because you can only create one via `Interned::new_unchecked`, which makes it more obvious that you must be careful. There are also some tests. 2022-02-04 03:26:29 +00:00			`use std::cmp::Ordering;`
			`use std::hash::{Hash, Hasher};`
			`use std::ops::Deref;`
			`use std::ptr;`

Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`use crate::fingerprint::Fingerprint;`

Rename `PtrKey` as `Interned` and improve it. In particular, there's now more protection against incorrect usage, because you can only create one via `Interned::new_unchecked`, which makes it more obvious that you must be careful. There are also some tests. 2022-02-04 03:26:29 +00:00			`mod private {`
			`#[derive(Clone, Copy, Debug)]`
			`pub struct PrivateZst;`
			`}`

			`/// A reference to a value that is interned, and is known to be unique.`
			`///`
			/// Note that it is possible to have a `T` and a `Interned<T>` that are (or
			`/// refer to) equal but different values. But if you have two different`
			/// `Interned<T>`s, they both refer to the same value, at a single location in
			`/// memory. This means that equality and hashing can be done on the value's`
			`/// address rather than the value's contents, which can improve performance.`
			`///`
			/// The `PrivateZst` field means you can pattern match with `Interned(v, _)`
			/// but you can only construct a `Interned` with `new_unchecked`, and not
			`/// directly.`
			`#[derive(Debug)]`
Switch bootstrap cfgs 2022-02-23 13:06:22 +00:00			`#[rustc_pass_by_value]`
Rename `PtrKey` as `Interned` and improve it. In particular, there's now more protection against incorrect usage, because you can only create one via `Interned::new_unchecked`, which makes it more obvious that you must be careful. There are also some tests. 2022-02-04 03:26:29 +00:00			`pub struct Interned<'a, T>(pub &'a T, pub private::PrivateZst);`

			`impl<'a, T> Interned<'a, T> {`
			/// Create a new `Interned` value. The value referred to must be interned
			`/// and thus be unique, and it must remain unique in the future. This`
			/// function has `_unchecked` in the name but is not `unsafe`, because if
			`/// the uniqueness condition is violated condition it will cause incorrect`
			`/// behaviour but will not affect memory safety.`
			`#[inline]`
			`pub const fn new_unchecked(t: &'a T) -> Self {`
			`Interned(t, private::PrivateZst)`
			`}`
			`}`

			`impl<'a, T> Clone for Interned<'a, T> {`
			`fn clone(&self) -> Self {`
			`*self`
			`}`
			`}`

			`impl<'a, T> Copy for Interned<'a, T> {}`

			`impl<'a, T> Deref for Interned<'a, T> {`
			`type Target = T;`

			`#[inline]`
			`fn deref(&self) -> &T {`
			`self.0`
			`}`
			`}`

			`impl<'a, T> PartialEq for Interned<'a, T> {`
			`#[inline]`
			`fn eq(&self, other: &Self) -> bool {`
			`// Pointer equality implies equality, due to the uniqueness constraint.`
			`ptr::eq(self.0, other.0)`
			`}`
			`}`

			`impl<'a, T> Eq for Interned<'a, T> {}`

Introduce `ConstAllocation`. Currently some `Allocation`s are interned, some are not, and it's very hard to tell at a use point which is which. This commit introduces `ConstAllocation` for the known-interned ones, which makes the division much clearer. `ConstAllocation::inner()` is used to get the underlying `Allocation`. In some places it's natural to use an `Allocation`, in some it's natural to use a `ConstAllocation`, and in some places there's no clear choice. I've tried to make things look as nice as possible, while generally favouring `ConstAllocation`, which is the type that embodies more information. This does require quite a few calls to `inner()`. The commit also tweaks how `PartialOrd` works for `Interned`. The previous code was too clever by half, building on `T: Ord` to make the code shorter. That caused problems with deriving `PartialOrd` and `Ord` for `ConstAllocation`, so I changed it to build on `T: PartialOrd`, which is slightly more verbose but much more standard and avoided the problems. 2022-03-01 20:15:04 +00:00			`impl<'a, T: PartialOrd> PartialOrd for Interned<'a, T> {`
Rename `PtrKey` as `Interned` and improve it. In particular, there's now more protection against incorrect usage, because you can only create one via `Interned::new_unchecked`, which makes it more obvious that you must be careful. There are also some tests. 2022-02-04 03:26:29 +00:00			`fn partial_cmp(&self, other: &Interned<'a, T>) -> Option<Ordering> {`
Introduce `ConstAllocation`. Currently some `Allocation`s are interned, some are not, and it's very hard to tell at a use point which is which. This commit introduces `ConstAllocation` for the known-interned ones, which makes the division much clearer. `ConstAllocation::inner()` is used to get the underlying `Allocation`. In some places it's natural to use an `Allocation`, in some it's natural to use a `ConstAllocation`, and in some places there's no clear choice. I've tried to make things look as nice as possible, while generally favouring `ConstAllocation`, which is the type that embodies more information. This does require quite a few calls to `inner()`. The commit also tweaks how `PartialOrd` works for `Interned`. The previous code was too clever by half, building on `T: Ord` to make the code shorter. That caused problems with deriving `PartialOrd` and `Ord` for `ConstAllocation`, so I changed it to build on `T: PartialOrd`, which is slightly more verbose but much more standard and avoided the problems. 2022-03-01 20:15:04 +00:00			`// Pointer equality implies equality, due to the uniqueness constraint,`
			`// but the contents must be compared otherwise.`
			`if ptr::eq(self.0, other.0) {`
			`Some(Ordering::Equal)`
			`} else {`
			`let res = self.0.partial_cmp(&other.0);`
			`debug_assert_ne!(res, Some(Ordering::Equal));`
			`res`
			`}`
Rename `PtrKey` as `Interned` and improve it. In particular, there's now more protection against incorrect usage, because you can only create one via `Interned::new_unchecked`, which makes it more obvious that you must be careful. There are also some tests. 2022-02-04 03:26:29 +00:00			`}`
			`}`

			`impl<'a, T: Ord> Ord for Interned<'a, T> {`
			`fn cmp(&self, other: &Interned<'a, T>) -> Ordering {`
			`// Pointer equality implies equality, due to the uniqueness constraint,`
			`// but the contents must be compared otherwise.`
			`if ptr::eq(self.0, other.0) {`
			`Ordering::Equal`
			`} else {`
			`let res = self.0.cmp(&other.0);`
Address review comments. 2022-02-08 03:12:29 +00:00			`debug_assert_ne!(res, Ordering::Equal);`
Rename `PtrKey` as `Interned` and improve it. In particular, there's now more protection against incorrect usage, because you can only create one via `Interned::new_unchecked`, which makes it more obvious that you must be careful. There are also some tests. 2022-02-04 03:26:29 +00:00			`res`
			`}`
			`}`
			`}`

			`impl<'a, T> Hash for Interned<'a, T> {`
			`#[inline]`
			`fn hash<H: Hasher>(&self, s: &mut H) {`
			`// Pointer hashing is sufficient, due to the uniqueness constraint.`
			`ptr::hash(self.0, s)`
			`}`
			`}`

Clarify `Layout` interning. `Layout` is another type that is sometimes interned, sometimes not, and we always use references to refer to it so we can't take any advantage of the uniqueness properties for hashing or equality checks. This commit renames `Layout` as `LayoutS`, and then introduces a new `Layout` that is a newtype around an `Interned<LayoutS>`. It also interns more layouts than before. Previously layouts within layouts (via the `variants` field) were never interned, but now they are. Hence the lifetime on the new `Layout` type. Unlike other interned types, these ones are in `rustc_target` instead of `rustc_middle`. This reflects the existing structure of the code, which does layout-specific stuff in `rustc_target` while `TyAndLayout` is generic over the `Ty`, allowing the type-specific stuff to occur in `rustc_middle`. The commit also adds a `HashStable` impl for `Interned`, which was needed. It hashes the contents, unlike the `Hash` impl which hashes the pointer. 2022-03-04 02:46:56 +00:00			`impl<T, CTX> HashStable<CTX> for Interned<'_, T>`
			`where`
			`T: HashStable<CTX>,`
			`{`
			`fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {`
			`self.0.hash_stable(hcx, hasher);`
			`}`
			`}`

Document and rename the new wrapper type 2022-04-05 16:42:47 +00:00			`/// A helper type that you can wrap round your own type in order to automatically`
			`/// cache the stable hash on creation and not recompute it whenever the stable hash`
			`/// of the type is computed.`
			`/// This is only done in incremental mode. You can also opt out of caching by using`
			`/// StableHash::ZERO for the hash, in which case the hash gets computed each time.`
			`/// This is useful if you have values that you intern but never (can?) use for stable`
			`/// hashing.`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`#[derive(Copy, Clone)]`
Document and rename the new wrapper type 2022-04-05 16:42:47 +00:00			`pub struct WithStableHash<T> {`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`pub internee: T,`
			`pub stable_hash: Fingerprint,`
			`}`

Document and rename the new wrapper type 2022-04-05 16:42:47 +00:00			`impl<T: PartialEq> PartialEq for WithStableHash<T> {`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`#[inline]`
			`fn eq(&self, other: &Self) -> bool {`
			`self.internee.eq(&other.internee)`
			`}`
			`}`

Document and rename the new wrapper type 2022-04-05 16:42:47 +00:00			`impl<T: Eq> Eq for WithStableHash<T> {}`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00
Document and rename the new wrapper type 2022-04-05 16:42:47 +00:00			`impl<T: Ord> PartialOrd for WithStableHash<T> {`
			`fn partial_cmp(&self, other: &WithStableHash<T>) -> Option<Ordering> {`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`Some(self.internee.cmp(&other.internee))`
			`}`
			`}`

Document and rename the new wrapper type 2022-04-05 16:42:47 +00:00			`impl<T: Ord> Ord for WithStableHash<T> {`
			`fn cmp(&self, other: &WithStableHash<T>) -> Ordering {`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`self.internee.cmp(&other.internee)`
			`}`
			`}`

Document and rename the new wrapper type 2022-04-05 16:42:47 +00:00			`impl<T> Deref for WithStableHash<T> {`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`type Target = T;`

			`#[inline]`
			`fn deref(&self) -> &T {`
			`&self.internee`
			`}`
			`}`

Document and rename the new wrapper type 2022-04-05 16:42:47 +00:00			`impl<T: Hash> Hash for WithStableHash<T> {`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`#[inline]`
			`fn hash<H: Hasher>(&self, s: &mut H) {`
Use the interned stable hash as plain hash. 2022-11-11 12:18:49 +00:00			`if self.stable_hash != Fingerprint::ZERO {`
			`self.stable_hash.hash(s)`
			`} else {`
			`self.internee.hash(s)`
			`}`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`}`
			`}`

Hash spans when interning. 2022-11-11 12:10:09 +00:00			`impl<T: HashStable<CTX>, CTX> HashStable<CTX> for WithStableHash<T> {`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {`
Check that the cached stable hash is the right one if debug assertions are enabled 2022-03-07 13:31:49 +00:00			`if self.stable_hash == Fingerprint::ZERO \|\| cfg!(debug_assertions) {`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`// No cached hash available. This can only mean that incremental is disabled.`
			`// We don't cache stable hashes in non-incremental mode, because they are used`
			`// so rarely that the performance actually suffers.`

			`// We need to build the hash as if we cached it and then hash that hash, as`
			`// otherwise the hashes will differ between cached and non-cached mode.`
			`let stable_hash: Fingerprint = {`
			`let mut hasher = StableHasher::new();`
Hash spans when interning. 2022-11-11 12:10:09 +00:00			`self.internee.hash_stable(hcx, &mut hasher);`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`hasher.finish()`
			`};`
Check that the cached stable hash is the right one if debug assertions are enabled 2022-03-07 13:31:49 +00:00			`if cfg!(debug_assertions) && self.stable_hash != Fingerprint::ZERO {`
			`assert_eq!(`
			`stable_hash, self.stable_hash,`
			`"cached stable hash does not match freshly computed stable hash"`
			`);`
			`}`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`stable_hash.hash_stable(hcx, hasher);`
			`} else {`
Check that the cached stable hash is the right one if debug assertions are enabled 2022-03-07 13:31:49 +00:00			`self.stable_hash.hash_stable(hcx, hasher);`
Move stable hash from TyS into a datastructure that can be shared with other interned types. 2022-03-01 14:51:08 +00:00			`}`
			`}`
			`}`

Rename `PtrKey` as `Interned` and improve it. In particular, there's now more protection against incorrect usage, because you can only create one via `Interned::new_unchecked`, which makes it more obvious that you must be careful. There are also some tests. 2022-02-04 03:26:29 +00:00			`#[cfg(test)]`
			`mod tests;`