auto merge of #17381 : tbu-/rust/pr_mapinplace2, r=aturon

Additionally, support zero-sized types. Now there isn't a safe interface of `PartialVec` anymore, it's just a bare data structure with destructor that assumes you handled everything correctly before.
2025-02-20 02:43:45 +00:00 · 2014-10-02 01:22:20 +00:00 · 2014-10-02 01:22:20 +00:00 · d53874eccf
commit d53874eccf
parent a70a0374e2 454d91d3d2
1 changed files with 222 additions and 208 deletions
--- a/src/libcollections/vec.rs
+++ b/src/libcollections/vec.rs
@ -18,6 +18,7 @@ use alloc::heap::{EMPTY, allocate, reallocate, deallocate};
 use core::cmp::max;
 use core::default::Default;
 use core::fmt;
+use core::kinds::marker::InvariantType;
 use core::mem;
 use core::num;
 use core::ops;
@ -1818,56 +1819,15 @@ pub mod raw {
    }
 }

-/// An owned, partially type-converted vector.
+/// An owned, partially type-converted vector of elements with non-zero size.
 ///
-/// This struct takes two type parameters `T` and `U` which must be of the
-/// same, non-zero size having the same minimal alignment.
+/// `T` and `U` must have the same, non-zero size. They must also have the same
+/// alignment.
 ///
-/// No allocations are performed by usage, only a deallocation happens in the
-/// destructor which should only run when unwinding.
-///
-/// It can be used to convert a vector of `T`s into a vector of `U`s, by
-/// converting the individual elements one-by-one.
-///
-/// You may call the `push` method as often as you get a `Some(t)` from `pop`.
-/// After pushing the same number of `U`s as you got `T`s, you can `unwrap` the
-/// vector.
-///
-/// # Example
-///
-/// ```ignore
-/// let pv = PartialVec::from_vec(vec![0u32, 1]);
-/// assert_eq!(pv.pop(), Some(0));
-/// assert_eq!(pv.pop(), Some(1));
-/// assert_eq!(pv.pop(), None);
-/// pv.push(2u32);
-/// pv.push(3);
-/// assert_eq!(pv.into_vec().as_slice(), &[2, 3]);
-/// ```
-//
-// Upheld invariants:
-//
-// (a) `vec` isn't modified except when the `PartialVec` goes out of scope, the
-//     only thing it is used for is keeping the memory which the `PartialVec`
-//     uses for the inplace conversion.
-//
-// (b) `start_u` points to the start of the vector.
-//
-// (c) `end_u` points to one element beyond the vector.
-//
-// (d) `start_u` <= `end_u` <= `start_t` <= `end_t`.
-//
-// (e) From `start_u` (incl.) to `end_u` (excl.) there are sequential instances
-//     of type `U`.
-//
-// (f) From `start_t` (incl.) to `end_t` (excl.) there are sequential instances
-//     of type `T`.
-//
-// (g) The size of `T` and `U` is equal and non-zero.
-//
-// (h) The `min_align_of` of `T` and `U` is equal.
-
-struct PartialVec<T,U> {
+/// When the destructor of this struct runs, all `U`s from `start_u` (incl.) to
+/// `end_u` (excl.) and all `T`s from `start_t` (incl.) to `end_t` (excl.) are
+/// destructed. Additionally the underlying storage of `vec` will be freed.
+struct PartialVecNonZeroSized<T,U> {
    vec: Vec<T>,

    start_u: *mut U,
@ -1876,166 +1836,34 @@ struct PartialVec<T,U> {
    end_t: *mut T,
 }

-impl<T,U> PartialVec<T,U> {
-    /// Creates a `PartialVec` from a `Vec`.
-    ///
-    /// # Failure
-    ///
-    /// Fails if `T` and `U` have differing sizes, are zero-sized or have
-    /// differing minimal alignments.
-    fn from_vec(mut vec: Vec<T>) -> PartialVec<T,U> {
-        // FIXME: Assert statically that the types `T` and `U` have the same
-        // size.
-        //
-        // These asserts make sure (g) and (h) are satisfied.
-        assert!(mem::size_of::<T>() != 0);
-        assert!(mem::size_of::<U>() != 0);
-        assert!(mem::size_of::<T>() == mem::size_of::<U>());
-        assert!(mem::min_align_of::<T>() == mem::min_align_of::<U>());
-
-        let start = vec.as_mut_ptr();
-
-        // This `as int` cast is safe, because the size of the elements of the
-        // vector is not 0, and:
-        //
-        // 1) If the size of the elements in the vector is 1, the `int` may
-        //    overflow, but it has the correct bit pattern so that the
-        //    `.offset()` function will work.
-        //
-        //    Example:
-        //        Address space 0x0-0xF.
-        //        `u8` array at: 0x1.
-        //        Size of `u8` array: 0x8.
-        //        Calculated `offset`: -0x8.
-        //        After `array.offset(offset)`: 0x9.
-        //        (0x1 + 0x8 = 0x1 - 0x8)
-        //
-        // 2) If the size of the elements in the vector is >1, the `uint` ->
-        //    `int` conversion can't overflow.
-        let offset = vec.len() as int;
-
-        let start_u = start as *mut U;
-        let end_u = start as *mut U;
-        let start_t = start;
-
-        // This points inside the vector, as the vector has length `offset`.
-        let end_t = unsafe { start_t.offset(offset) };
-
-        // (b) is satisfied, `start_u` points to the start of `vec`.
-        //
-        // (c) is also satisfied, `end_t` points to the end of `vec`.
-        //
-        // `start_u == end_u == start_t <= end_t`, so also `start_u <= end_u <=
-        // start_t <= end_t`, thus (b).
-        //
-        // As `start_u == end_u`, it is represented correctly that there are no
-        // instances of `U` in `vec`, thus (e) is satisfied.
-        //
-        // At start, there are only elements of type `T` in `vec`, so (f) is
-        // satisfied, as `start_t` points to the start of `vec` and `end_t` to
-        // the end of it.
-
-        PartialVec {
-            // (a) is satisfied, `vec` isn't modified in the function.
-            vec: vec,
-            start_u: start_u,
-            end_u: end_u,
-            start_t: start_t,
-            end_t: end_t,
-        }
-    }
-
-    /// Pops a `T` from the `PartialVec`.
-    ///
-    /// Removes the next `T` from the vector and returns it as `Some(T)`, or
-    /// `None` if there are none left.
-    fn pop(&mut self) -> Option<T> {
-        // The `if` ensures that there are more `T`s in `vec`.
-        if self.start_t < self.end_t {
-            let result;
-            unsafe {
-                // (f) is satisfied before, so in this if branch there actually
-                // is a `T` at `start_t`.  After shifting the pointer by one,
-                // (f) is again satisfied.
-                result = ptr::read(self.start_t as *const T);
-                self.start_t = self.start_t.offset(1);
-            }
-            Some(result)
-        } else {
-            None
-        }
-    }
-
-    /// Pushes a new `U` to the `PartialVec`.
-    ///
-    /// # Failure
-    ///
-    /// Fails if not enough `T`s were popped to have enough space for the new
-    /// `U`.
-    fn push(&mut self, value: U) {
-        // The assert assures that still `end_u <= start_t` (d) after
-        // the function.
-        assert!(self.end_u as *const () < self.start_t as *const (),
-            "writing more elements to PartialVec than reading from it")
-        unsafe {
-            // (e) is satisfied before, and after writing one `U`
-            // to `end_u` and shifting it by one, it's again
-            // satisfied.
-            ptr::write(self.end_u, value);
-            self.end_u = self.end_u.offset(1);
-        }
-    }
-
-    /// Unwraps the new `Vec` of `U`s after having pushed enough `U`s and
-    /// popped all `T`s.
-    ///
-    /// # Failure
-    ///
-    /// Fails if not all `T`s were popped, also fails if not the same amount of
-    /// `U`s was pushed before calling `unwrap`.
-    fn into_vec(mut self) -> Vec<U> {
-        // If `self.end_u == self.end_t`, we know from (e) that there are no
-        // more `T`s in `vec`, we also know that the whole length of `vec` is
-        // now used by `U`s, thus we can just interpret `vec` as a vector of
-        // `U` safely.
-
-        assert!(self.end_u as *const () == self.end_t as *const (),
-            "trying to unwrap a PartialVec before completing the writes to it");
-
-        // Extract `vec` and prevent the destructor of `PartialVec` from
-        // running. Note that none of the function calls can fail, thus no
-        // resources can be leaked (as the `vec` member of `PartialVec` is the
-        // only one which holds allocations -- and it is returned from this
-        // function.
-        unsafe {
-            let vec_len = self.vec.len();
-            let vec_cap = self.vec.capacity();
-            let vec_ptr = self.vec.as_mut_ptr() as *mut U;
-            mem::forget(self);
-            Vec::from_raw_parts(vec_len, vec_cap, vec_ptr)
-        }
-    }
+/// An owned, partially type-converted vector of zero-sized elements.
+///
+/// When the destructor of this struct runs, all `num_t` `T`s and `num_u` `U`s
+/// are destructed.
+struct PartialVecZeroSized<T,U> {
+    num_t: uint,
+    num_u: uint,
+    marker_t: InvariantType<T>,
+    marker_u: InvariantType<U>,
 }

 #[unsafe_destructor]
-impl<T,U> Drop for PartialVec<T,U> {
+impl<T,U> Drop for PartialVecNonZeroSized<T,U> {
    fn drop(&mut self) {
        unsafe {
-            // As per (a) `vec` hasn't been modified until now. As it has a
-            // length currently, this would run destructors of `T`s which might
-            // not be there. So at first, set `vec`s length to `0`. This must
-            // be done at first to remain memory-safe as the destructors of `U`
-            // or `T` might cause unwinding where `vec`s destructor would be
-            // executed.
+            // `vec` hasn't been modified until now. As it has a length
+            // currently, this would run destructors of `T`s which might not be
+            // there. So at first, set `vec`s length to `0`. This must be done
+            // at first to remain memory-safe as the destructors of `U` or `T`
+            // might cause unwinding where `vec`s destructor would be executed.
            self.vec.set_len(0);

-            // As per (e) and (f) we have instances of `U`s and `T`s in `vec`.
-            // Destruct them.
-            while self.start_u < self.end_u {
+            // We have instances of `U`s and `T`s in `vec`. Destruct them.
+            while self.start_u != self.end_u {
                let _ = ptr::read(self.start_u as *const U); // Run a `U` destructor.
                self.start_u = self.start_u.offset(1);
            }
-            while self.start_t < self.end_t {
+            while self.start_t != self.end_t {
                let _ = ptr::read(self.start_t as *const T); // Run a `T` destructor.
                self.start_t = self.start_t.offset(1);
            }
@ -2045,14 +1873,31 @@ impl<T,U> Drop for PartialVec<T,U> {
    }
 }

+#[unsafe_destructor]
+impl<T,U> Drop for PartialVecZeroSized<T,U> {
+    fn drop(&mut self) {
+        unsafe {
+            // Destruct the instances of `T` and `U` this struct owns.
+            while self.num_t != 0 {
+                let _: T = mem::uninitialized(); // Run a `T` destructor.
+                self.num_t -= 1;
+            }
+            while self.num_u != 0 {
+                let _: U = mem::uninitialized(); // Run a `U` destructor.
+                self.num_u -= 1;
+            }
+        }
+    }
+}
+
 impl<T> Vec<T> {
    /// Converts a `Vec<T>` to a `Vec<U>` where `T` and `U` have the same
-    /// non-zero size and the same minimal alignment.
+    /// size and in case they are not zero-sized the same minimal alignment.
    ///
    /// # Failure
    ///
-    /// Fails if `T` and `U` have differing sizes, are zero-sized or have
-    /// differing minimal alignments.
+    /// Fails if `T` and `U` have differing sizes or are not zero-sized and
+    /// have differing minimal alignments.
    ///
    /// # Example
    ///
@ -2068,13 +1913,174 @@ impl<T> Vec<T> {
    /// assert_eq!(newtyped_bytes.as_slice(), [Newtype(0x11), Newtype(0x22)].as_slice());
    /// ```
    pub fn map_in_place<U>(self, f: |T| -> U) -> Vec<U> {
-        let mut pv = PartialVec::from_vec(self);
-        loop {
-            let maybe_t = pv.pop();
-            match maybe_t {
-                Some(t) => pv.push(f(t)),
-                None => return pv.into_vec(),
+        // FIXME: Assert statically that the types `T` and `U` have the same
+        // size.
+        assert!(mem::size_of::<T>() == mem::size_of::<U>());
+
+        let mut vec = self;
+
+        if mem::size_of::<T>() != 0 {
+            // FIXME: Assert statically that the types `T` and `U` have the
+            // same minimal alignment in case they are not zero-sized.
+
+            // These asserts are necessary because the `min_align_of` of the
+            // types are passed to the allocator by `Vec`.
+            assert!(mem::min_align_of::<T>() == mem::min_align_of::<U>());
+
+            // This `as int` cast is safe, because the size of the elements of the
+            // vector is not 0, and:
+            //
+            // 1) If the size of the elements in the vector is 1, the `int` may
+            //    overflow, but it has the correct bit pattern so that the
+            //    `.offset()` function will work.
+            //
+            //    Example:
+            //        Address space 0x0-0xF.
+            //        `u8` array at: 0x1.
+            //        Size of `u8` array: 0x8.
+            //        Calculated `offset`: -0x8.
+            //        After `array.offset(offset)`: 0x9.
+            //        (0x1 + 0x8 = 0x1 - 0x8)
+            //
+            // 2) If the size of the elements in the vector is >1, the `uint` ->
+            //    `int` conversion can't overflow.
+            let offset = vec.len() as int;
+            let start = vec.as_mut_ptr();
+
+            let mut pv = PartialVecNonZeroSized {
+                vec: vec,
+
+                start_t: start,
+                // This points inside the vector, as the vector has length
+                // `offset`.
+                end_t: unsafe { start.offset(offset) },
+                start_u: start as *mut U,
+                end_u: start as *mut U,
            };
+            //  start_t
+            //  start_u
+            //  |
+            // +-+-+-+-+-+-+
+            // |T|T|T|...|T|
+            // +-+-+-+-+-+-+
+            //  |           |
+            //  end_u       end_t
+
+            while pv.end_u as *mut T != pv.end_t {
+                unsafe {
+                    //  start_u start_t
+                    //  |       |
+                    // +-+-+-+-+-+-+-+-+-+
+                    // |U|...|U|T|T|...|T|
+                    // +-+-+-+-+-+-+-+-+-+
+                    //          |         |
+                    //          end_u     end_t
+
+                    let t = ptr::read(pv.start_t as *const T);
+                    //  start_u start_t
+                    //  |       |
+                    // +-+-+-+-+-+-+-+-+-+
+                    // |U|...|U|X|T|...|T|
+                    // +-+-+-+-+-+-+-+-+-+
+                    //          |         |
+                    //          end_u     end_t
+                    // We must not fail here, one cell is marked as `T`
+                    // although it is not `T`.
+
+                    pv.start_t = pv.start_t.offset(1);
+                    //  start_u   start_t
+                    //  |         |
+                    // +-+-+-+-+-+-+-+-+-+
+                    // |U|...|U|X|T|...|T|
+                    // +-+-+-+-+-+-+-+-+-+
+                    //          |         |
+                    //          end_u     end_t
+                    // We may fail again.
+
+                    // The function given by the user might fail.
+                    let u = f(t);
+
+                    ptr::write(pv.end_u, u);
+                    //  start_u   start_t
+                    //  |         |
+                    // +-+-+-+-+-+-+-+-+-+
+                    // |U|...|U|U|T|...|T|
+                    // +-+-+-+-+-+-+-+-+-+
+                    //          |         |
+                    //          end_u     end_t
+                    // We should not fail here, because that would leak the `U`
+                    // pointed to by `end_u`.
+
+                    pv.end_u = pv.end_u.offset(1);
+                    //  start_u   start_t
+                    //  |         |
+                    // +-+-+-+-+-+-+-+-+-+
+                    // |U|...|U|U|T|...|T|
+                    // +-+-+-+-+-+-+-+-+-+
+                    //            |       |
+                    //            end_u   end_t
+                    // We may fail again.
+                }
+            }
+
+            //  start_u     start_t
+            //  |           |
+            // +-+-+-+-+-+-+
+            // |U|...|U|U|U|
+            // +-+-+-+-+-+-+
+            //              |
+            //              end_t
+            //              end_u
+            // Extract `vec` and prevent the destructor of
+            // `PartialVecNonZeroSized` from running. Note that none of the
+            // function calls can fail, thus no resources can be leaked (as the
+            // `vec` member of `PartialVec` is the only one which holds
+            // allocations -- and it is returned from this function. None of
+            // this can fail.
+            unsafe {
+                let vec_len = pv.vec.len();
+                let vec_cap = pv.vec.capacity();
+                let vec_ptr = pv.vec.as_mut_ptr() as *mut U;
+                mem::forget(pv);
+                Vec::from_raw_parts(vec_len, vec_cap, vec_ptr)
+            }
+        } else {
+            // Put the `Vec` into the `PartialVecZeroSized` structure and
+            // prevent the destructor of the `Vec` from running. Since the
+            // `Vec` contained zero-sized objects, it did not allocate, so we
+            // are not leaking memory here.
+            let mut pv = PartialVecZeroSized::<T,U> {
+                num_t: vec.len(),
+                num_u: 0,
+                marker_t: InvariantType,
+                marker_u: InvariantType,
+            };
+            unsafe { mem::forget(vec); }
+
+            while pv.num_t != 0 {
+                unsafe {
+                    // Create a `T` out of thin air and decrement `num_t`. This
+                    // must not fail between these steps, as otherwise a
+                    // destructor of `T` which doesn't exist runs.
+                    let t = mem::uninitialized();
+                    pv.num_t -= 1;
+
+                    // The function given by the user might fail.
+                    let u = f(t);
+
+                    // Forget the `U` and increment `num_u`. This increment
+                    // cannot overflow the `uint` as we only do this for a
+                    // number of times that fits into a `uint` (and start with
+                    // `0`). Again, we should not fail between these steps.
+                    mem::forget(u);
+                    pv.num_u += 1;
+                }
+            }
+            // Create a `Vec` from our `PartialVecZeroSized` and make sure the
+            // destructor of the latter will not run. None of this can fail.
+            let mut result = Vec::new();
+            unsafe { result.set_len(pv.num_u); }
+            result
        }
    }
 }
@ -2448,7 +2454,7 @@ mod tests {

    #[test]
    #[should_fail]
-    fn test_map_inp_lace_incompatible_types_fail() {
+    fn test_map_in_place_incompatible_types_fail() {
        let v = vec![0u, 1, 2];
        v.map_in_place(|_| ());
    }
@ -2459,6 +2465,14 @@ mod tests {
        assert_eq!(v.map_in_place(|i: uint| i as int - 1).as_slice(), [-1i, 0, 1].as_slice());
    }

+    #[test]
+    fn test_map_in_place_zero_sized() {
+        let v = vec![(), ()];
+        #[deriving(PartialEq, Show)]
+        struct ZeroSized;
+        assert_eq!(v.map_in_place(|_| ZeroSized).as_slice(), [ZeroSized, ZeroSized].as_slice());
+    }
+
    #[bench]
    fn bench_new(b: &mut Bencher) {
        b.iter(|| {