Rollup merge of #80764 - CAD97:weak-unsized-as-ptr-again, r=RalfJung

Re-stabilize Weak::as_ptr and friends for unsized T

As per [T-lang consensus](https://hackmd.io/7r3_is6uTz-163fsOV8Vfg), this uses a branch to handle the dangling case. The discussed optimization of only doing the branch in the T: ?Sized case is left for a followup patch, as doing so is not trivial (as it requires specialization) and not _obviously_ better (as it requires using `wrapping_offset` rather than `offset` more).

<details><summary>Basically said optimization</summary>

Specialize on `T: Sized`:

```rust
fn as_ptr(&self) -> *const T {
    if [ T is Sized ] || !is_dangling(ptr) {
        (ptr as *mut T).set_ptr_value( (ptr as *mut u8).wrapping_offset(data_offset) )
    } else {
        ptr::null()
    }
}

fn from_raw(*const T) -> Self {
    if [ T is Sized ] || !ptr.is_null() {
        let ptr = (ptr as *mut RcBox).set_ptr_value( (ptr as *mut u8).wrapping_offset(-data_offset) );
        Weak { ptr }
    } else {
        Weak::new()
    }
}
```

(but with more `set_ptr_value` to avoid `Sized` restrictions and maintain metadata.)

Written in this fashion, this is not a correctness-critical specialization (i.e. so long as `[ T is Sized ]` is false for unsized `T`, it can be `rand()` for sized `T` without breaking correctness), but it's still touchy, so I'd rather do it in another PR with separate review.

---
</details>

This effectively reverts #80422 and re-establishes #74160. T-libs [previously signed off](https://github.com/rust-lang/rust/pull/74160#issuecomment-660539373) on this stable API change in #74160.
This commit is contained in:
Mara Bos 2021-01-16 17:29:56 +00:00 committed by GitHub
commit 5702cfa255
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 153 additions and 95 deletions

View File

@ -120,6 +120,7 @@
#![feature(receiver_trait)]
#![cfg_attr(bootstrap, feature(min_const_generics))]
#![feature(min_specialization)]
#![feature(set_ptr_value)]
#![feature(slice_ptr_get)]
#![feature(slice_ptr_len)]
#![feature(staged_api)]

View File

@ -829,8 +829,8 @@ impl<T: ?Sized> Rc<T> {
let offset = unsafe { data_offset(ptr) };
// Reverse the offset to find the original RcBox.
let fake_ptr = ptr as *mut RcBox<T>;
let rc_ptr = unsafe { set_data_ptr(fake_ptr, (ptr as *mut u8).offset(-offset)) };
let rc_ptr =
unsafe { (ptr as *mut RcBox<T>).set_ptr_value((ptr as *mut u8).offset(-offset)) };
unsafe { Self::from_ptr(rc_ptr) }
}
@ -850,7 +850,7 @@ impl<T: ?Sized> Rc<T> {
pub fn downgrade(this: &Self) -> Weak<T> {
this.inner().inc_weak();
// Make sure we do not create a dangling Weak
debug_assert!(!is_dangling(this.ptr));
debug_assert!(!is_dangling(this.ptr.as_ptr()));
Weak { ptr: this.ptr }
}
@ -1164,7 +1164,7 @@ impl<T: ?Sized> Rc<T> {
Self::allocate_for_layout(
Layout::for_value(&*ptr),
|layout| Global.allocate(layout),
|mem| set_data_ptr(ptr as *mut T, mem) as *mut RcBox<T>,
|mem| (ptr as *mut RcBox<T>).set_ptr_value(mem),
)
}
}
@ -1203,20 +1203,7 @@ impl<T> Rc<[T]> {
)
}
}
}
/// Sets the data pointer of a `?Sized` raw pointer.
///
/// For a slice/trait object, this sets the `data` field and leaves the rest
/// unchanged. For a sized raw pointer, this simply sets the pointer.
unsafe fn set_data_ptr<T: ?Sized, U>(mut ptr: *mut T, data: *mut U) -> *mut T {
unsafe {
ptr::write(&mut ptr as *mut _ as *mut *mut u8, data as *mut u8);
}
ptr
}
impl<T> Rc<[T]> {
/// Copy elements from slice into newly allocated Rc<\[T\]>
///
/// Unsafe because the caller must either take ownership or bind `T: Copy`
@ -1860,8 +1847,8 @@ impl<T> Weak<T> {
}
}
pub(crate) fn is_dangling<T: ?Sized>(ptr: NonNull<T>) -> bool {
let address = ptr.as_ptr() as *mut () as usize;
pub(crate) fn is_dangling<T: ?Sized>(ptr: *mut T) -> bool {
let address = ptr as *mut () as usize;
address == usize::MAX
}
@ -1872,7 +1859,7 @@ struct WeakInner<'a> {
strong: &'a Cell<usize>,
}
impl<T> Weak<T> {
impl<T: ?Sized> Weak<T> {
/// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
///
/// The pointer is valid only if there are some strong references. The pointer may be dangling,
@ -1902,15 +1889,15 @@ impl<T> Weak<T> {
pub fn as_ptr(&self) -> *const T {
let ptr: *mut RcBox<T> = NonNull::as_ptr(self.ptr);
// SAFETY: we must offset the pointer manually, and said pointer may be
// a dangling weak (usize::MAX) if T is sized. data_offset is safe to call,
// because we know that a pointer to unsized T was derived from a real
// unsized T, as dangling weaks are only created for sized T. wrapping_offset
// is used so that we can use the same code path for the non-dangling
// unsized case and the potentially dangling sized case.
unsafe {
let offset = data_offset(ptr as *mut T);
set_data_ptr(ptr as *mut T, (ptr as *mut u8).wrapping_offset(offset))
if is_dangling(ptr) {
// If the pointer is dangling, we return the sentinel directly. This cannot be
// a valid payload address, as the payload is at least as aligned as RcBox (usize).
ptr as *const T
} else {
// SAFETY: if is_dangling returns false, then the pointer is dereferencable.
// The payload may be dropped at this point, and we have to maintain provenance,
// so use raw pointer manipulation.
unsafe { &raw const (*ptr).value }
}
}
@ -1992,22 +1979,24 @@ impl<T> Weak<T> {
/// [`new`]: Weak::new
#[stable(feature = "weak_into_raw", since = "1.45.0")]
pub unsafe fn from_raw(ptr: *const T) -> Self {
// SAFETY: data_offset is safe to call, because this pointer originates from a Weak.
// See Weak::as_ptr for context on how the input pointer is derived.
let offset = unsafe { data_offset(ptr) };
// Reverse the offset to find the original RcBox.
// SAFETY: we use wrapping_offset here because the pointer may be dangling (but only if T: Sized).
let ptr = unsafe {
set_data_ptr(ptr as *mut RcBox<T>, (ptr as *mut u8).wrapping_offset(-offset))
let ptr = if is_dangling(ptr as *mut T) {
// This is a dangling Weak.
ptr as *mut RcBox<T>
} else {
// Otherwise, we're guaranteed the pointer came from a nondangling Weak.
// SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T.
let offset = unsafe { data_offset(ptr) };
// Thus, we reverse the offset to get the whole RcBox.
// SAFETY: the pointer originated from a Weak, so this offset is safe.
unsafe { (ptr as *mut RcBox<T>).set_ptr_value((ptr as *mut u8).offset(-offset)) }
};
// SAFETY: we now have recovered the original Weak pointer, so can create the Weak.
Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } }
}
}
impl<T: ?Sized> Weak<T> {
/// Attempts to upgrade the `Weak` pointer to an [`Rc`], delaying
/// dropping of the inner value if successful.
///
@ -2070,7 +2059,7 @@ impl<T: ?Sized> Weak<T> {
/// (i.e., when this `Weak` was created by `Weak::new`).
#[inline]
fn inner(&self) -> Option<WeakInner<'_>> {
if is_dangling(self.ptr) {
if is_dangling(self.ptr.as_ptr()) {
None
} else {
// We are careful to *not* create a reference covering the "data" field, as
@ -2325,21 +2314,19 @@ impl<T: ?Sized> AsRef<T> for Rc<T> {
#[stable(feature = "pin", since = "1.33.0")]
impl<T: ?Sized> Unpin for Rc<T> {}
/// Get the offset within an `RcBox` for
/// a payload of type described by a pointer.
/// Get the offset within an `RcBox` for the payload behind a pointer.
///
/// # Safety
///
/// This has the same safety requirements as `align_of_val_raw`. In effect:
///
/// - This function is safe for any argument if `T` is sized, and
/// - if `T` is unsized, the pointer must have appropriate pointer metadata
/// acquired from the real instance that you are getting this offset for.
/// The pointer must point to (and have valid metadata for) a previously
/// valid instance of T, but the T is allowed to be dropped.
unsafe fn data_offset<T: ?Sized>(ptr: *const T) -> isize {
// Align the unsized value to the end of the `RcBox`.
// Because it is ?Sized, it will always be the last field in memory.
// Note: This is a detail of the current implementation of the compiler,
// and is not a guaranteed language detail. Do not rely on it outside of std.
// Align the unsized value to the end of the RcBox.
// Because RcBox is repr(C), it will always be the last field in memory.
// SAFETY: since the only unsized types possible are slices, trait objects,
// and extern types, the input safety requirement is currently enough to
// satisfy the requirements of align_of_val_raw; this is an implementation
// detail of the language that may not be relied upon outside of std.
unsafe { data_offset_align(align_of_val_raw(ptr)) }
}

View File

@ -208,6 +208,30 @@ fn into_from_weak_raw() {
}
}
#[test]
fn test_into_from_weak_raw_unsized() {
use std::fmt::Display;
use std::string::ToString;
let arc: Rc<str> = Rc::from("foo");
let weak: Weak<str> = Rc::downgrade(&arc);
let ptr = Weak::into_raw(weak.clone());
let weak2 = unsafe { Weak::from_raw(ptr) };
assert_eq!(unsafe { &*ptr }, "foo");
assert!(weak.ptr_eq(&weak2));
let arc: Rc<dyn Display> = Rc::new(123);
let weak: Weak<dyn Display> = Rc::downgrade(&arc);
let ptr = Weak::into_raw(weak.clone());
let weak2 = unsafe { Weak::from_raw(ptr) };
assert_eq!(unsafe { &*ptr }.to_string(), "123");
assert!(weak.ptr_eq(&weak2));
}
#[test]
fn get_mut() {
let mut x = Rc::new(3);
@ -294,6 +318,23 @@ fn test_unsized() {
assert_eq!(foo, foo.clone());
}
#[test]
fn test_maybe_thin_unsized() {
// If/when custom thin DSTs exist, this test should be updated to use one
use std::ffi::{CStr, CString};
let x: Rc<CStr> = Rc::from(CString::new("swordfish").unwrap().into_boxed_c_str());
assert_eq!(format!("{:?}", x), "\"swordfish\"");
let y: Weak<CStr> = Rc::downgrade(&x);
drop(x);
// At this point, the weak points to a dropped DST
assert!(y.upgrade().is_none());
// But we still need to be able to get the alloc layout to drop.
// CStr has no drop glue, but custom DSTs might, and need to work.
drop(y);
}
#[test]
fn test_from_owned() {
let foo = 123;

View File

@ -846,8 +846,7 @@ impl<T: ?Sized> Arc<T> {
let offset = data_offset(ptr);
// Reverse the offset to find the original ArcInner.
let fake_ptr = ptr as *mut ArcInner<T>;
let arc_ptr = set_data_ptr(fake_ptr, (ptr as *mut u8).offset(-offset));
let arc_ptr = (ptr as *mut ArcInner<T>).set_ptr_value((ptr as *mut u8).offset(-offset));
Self::from_ptr(arc_ptr)
}
@ -888,7 +887,7 @@ impl<T: ?Sized> Arc<T> {
match this.inner().weak.compare_exchange_weak(cur, cur + 1, Acquire, Relaxed) {
Ok(_) => {
// Make sure we do not create a dangling Weak
debug_assert!(!is_dangling(this.ptr));
debug_assert!(!is_dangling(this.ptr.as_ptr()));
return Weak { ptr: this.ptr };
}
Err(old) => cur = old,
@ -1131,7 +1130,7 @@ impl<T: ?Sized> Arc<T> {
Self::allocate_for_layout(
Layout::for_value(&*ptr),
|layout| Global.allocate(layout),
|mem| set_data_ptr(ptr as *mut T, mem) as *mut ArcInner<T>,
|mem| (ptr as *mut ArcInner<T>).set_ptr_value(mem) as *mut ArcInner<T>,
)
}
}
@ -1170,20 +1169,7 @@ impl<T> Arc<[T]> {
)
}
}
}
/// Sets the data pointer of a `?Sized` raw pointer.
///
/// For a slice/trait object, this sets the `data` field and leaves the rest
/// unchanged. For a sized raw pointer, this simply sets the pointer.
unsafe fn set_data_ptr<T: ?Sized, U>(mut ptr: *mut T, data: *mut U) -> *mut T {
unsafe {
ptr::write(&mut ptr as *mut _ as *mut *mut u8, data as *mut u8);
}
ptr
}
impl<T> Arc<[T]> {
/// Copy elements from slice into newly allocated Arc<\[T\]>
///
/// Unsafe because the caller must either take ownership or bind `T: Copy`.
@ -1653,7 +1639,7 @@ struct WeakInner<'a> {
strong: &'a atomic::AtomicUsize,
}
impl<T> Weak<T> {
impl<T: ?Sized> Weak<T> {
/// Returns a raw pointer to the object `T` pointed to by this `Weak<T>`.
///
/// The pointer is valid only if there are some strong references. The pointer may be dangling,
@ -1683,15 +1669,15 @@ impl<T> Weak<T> {
pub fn as_ptr(&self) -> *const T {
let ptr: *mut ArcInner<T> = NonNull::as_ptr(self.ptr);
// SAFETY: we must offset the pointer manually, and said pointer may be
// a dangling weak (usize::MAX) if T is sized. data_offset is safe to call,
// because we know that a pointer to unsized T was derived from a real
// unsized T, as dangling weaks are only created for sized T. wrapping_offset
// is used so that we can use the same code path for the non-dangling
// unsized case and the potentially dangling sized case.
unsafe {
let offset = data_offset(ptr as *mut T);
set_data_ptr(ptr as *mut T, (ptr as *mut u8).wrapping_offset(offset))
if is_dangling(ptr) {
// If the pointer is dangling, we return the sentinel directly. This cannot be
// a valid payload address, as the payload is at least as aligned as ArcInner (usize).
ptr as *const T
} else {
// SAFETY: if is_dangling returns false, then the pointer is dereferencable.
// The payload may be dropped at this point, and we have to maintain provenance,
// so use raw pointer manipulation.
unsafe { &raw mut (*ptr).data }
}
}
@ -1773,18 +1759,22 @@ impl<T> Weak<T> {
/// [`forget`]: std::mem::forget
#[stable(feature = "weak_into_raw", since = "1.45.0")]
pub unsafe fn from_raw(ptr: *const T) -> Self {
// SAFETY: data_offset is safe to call, because this pointer originates from a Weak.
// See Weak::as_ptr for context on how the input pointer is derived.
let offset = unsafe { data_offset(ptr) };
// Reverse the offset to find the original ArcInner.
// SAFETY: we use wrapping_offset here because the pointer may be dangling (but only if T: Sized)
let ptr = unsafe {
set_data_ptr(ptr as *mut ArcInner<T>, (ptr as *mut u8).wrapping_offset(-offset))
let ptr = if is_dangling(ptr as *mut T) {
// This is a dangling Weak.
ptr as *mut ArcInner<T>
} else {
// Otherwise, we're guaranteed the pointer came from a nondangling Weak.
// SAFETY: data_offset is safe to call, as ptr references a real (potentially dropped) T.
let offset = unsafe { data_offset(ptr) };
// Thus, we reverse the offset to get the whole RcBox.
// SAFETY: the pointer originated from a Weak, so this offset is safe.
unsafe { (ptr as *mut ArcInner<T>).set_ptr_value((ptr as *mut u8).offset(-offset)) }
};
// SAFETY: we now have recovered the original Weak pointer, so can create the Weak.
unsafe { Weak { ptr: NonNull::new_unchecked(ptr) } }
Weak { ptr: unsafe { NonNull::new_unchecked(ptr) } }
}
}
@ -1889,7 +1879,7 @@ impl<T: ?Sized> Weak<T> {
/// (i.e., when this `Weak` was created by `Weak::new`).
#[inline]
fn inner(&self) -> Option<WeakInner<'_>> {
if is_dangling(self.ptr) {
if is_dangling(self.ptr.as_ptr()) {
None
} else {
// We are careful to *not* create a reference covering the "data" field, as
@ -2469,21 +2459,19 @@ impl<T: ?Sized> AsRef<T> for Arc<T> {
#[stable(feature = "pin", since = "1.33.0")]
impl<T: ?Sized> Unpin for Arc<T> {}
/// Get the offset within an `ArcInner` for
/// a payload of type described by a pointer.
/// Get the offset within an `ArcInner` for the payload behind a pointer.
///
/// # Safety
///
/// This has the same safety requirements as `align_of_val_raw`. In effect:
///
/// - This function is safe for any argument if `T` is sized, and
/// - if `T` is unsized, the pointer must have appropriate pointer metadata
/// acquired from the real instance that you are getting this offset for.
/// The pointer must point to (and have valid metadata for) a previously
/// valid instance of T, but the T is allowed to be dropped.
unsafe fn data_offset<T: ?Sized>(ptr: *const T) -> isize {
// Align the unsized value to the end of the `ArcInner`.
// Because it is `?Sized`, it will always be the last field in memory.
// Note: This is a detail of the current implementation of the compiler,
// and is not a guaranteed language detail. Do not rely on it outside of std.
// Align the unsized value to the end of the ArcInner.
// Because RcBox is repr(C), it will always be the last field in memory.
// SAFETY: since the only unsized types possible are slices, trait objects,
// and extern types, the input safety requirement is currently enough to
// satisfy the requirements of align_of_val_raw; this is an implementation
// detail of the language that may not be relied upon outside of std.
unsafe { data_offset_align(align_of_val_raw(ptr)) }
}

View File

@ -158,6 +158,30 @@ fn into_from_weak_raw() {
}
}
#[test]
fn test_into_from_weak_raw_unsized() {
use std::fmt::Display;
use std::string::ToString;
let arc: Arc<str> = Arc::from("foo");
let weak: Weak<str> = Arc::downgrade(&arc);
let ptr = Weak::into_raw(weak.clone());
let weak2 = unsafe { Weak::from_raw(ptr) };
assert_eq!(unsafe { &*ptr }, "foo");
assert!(weak.ptr_eq(&weak2));
let arc: Arc<dyn Display> = Arc::new(123);
let weak: Weak<dyn Display> = Arc::downgrade(&arc);
let ptr = Weak::into_raw(weak.clone());
let weak2 = unsafe { Weak::from_raw(ptr) };
assert_eq!(unsafe { &*ptr }.to_string(), "123");
assert!(weak.ptr_eq(&weak2));
}
#[test]
fn test_cowarc_clone_make_mut() {
let mut cow0 = Arc::new(75);
@ -329,6 +353,23 @@ fn test_unsized() {
assert!(y.upgrade().is_none());
}
#[test]
fn test_maybe_thin_unsized() {
// If/when custom thin DSTs exist, this test should be updated to use one
use std::ffi::{CStr, CString};
let x: Arc<CStr> = Arc::from(CString::new("swordfish").unwrap().into_boxed_c_str());
assert_eq!(format!("{:?}", x), "\"swordfish\"");
let y: Weak<CStr> = Arc::downgrade(&x);
drop(x);
// At this point, the weak points to a dropped DST
assert!(y.upgrade().is_none());
// But we still need to be able to get the alloc layout to drop.
// CStr has no drop glue, but custom DSTs might, and need to work.
drop(y);
}
#[test]
fn test_from_owned() {
let foo = 123;