Auto merge of #68254 - Dylan-DPC:rollup-9vhc59u, r=Dylan-DPC

Rollup of 6 pull requests Successful merges: - #68123 (Implement Cursor for linked lists. (RFC 2570).) - #68212 (Suggest to shorten temporary lifetime during method call inside generator) - #68232 (Optimize size/speed of Unicode datasets) - #68236 (Add some regression tests) - #68237 (Account for `Path`s in `is_suggestable_infer_ty`) - #68252 (remove redundant clones, found by clippy) Failed merges: r? @ghost
2025-02-25 21:34:18 +00:00 · 2020-01-15 19:40:45 +00:00 · 2020-01-15 19:40:45 +00:00 · 3291ae3390
commit 3291ae3390
parent faf45c5dad 4ff6195929
37 changed files with 3903 additions and 3294 deletions
--- a/.gitignore
+++ b/.gitignore
@ -34,14 +34,7 @@ __pycache__/
 # Created by default with `src/ci/docker/run.sh`:
 /obj/
 /rustllvm/
-/src/libcore/unicode/DerivedCoreProperties.txt
-/src/libcore/unicode/DerivedNormalizationProps.txt
-/src/libcore/unicode/PropList.txt
-/src/libcore/unicode/ReadMe.txt
-/src/libcore/unicode/Scripts.txt
-/src/libcore/unicode/SpecialCasing.txt
-/src/libcore/unicode/UnicodeData.txt
-/src/libcore/unicode/downloaded
+/unicode-downloads
 /target/
 # Generated by compiletest for incremental:
 /tmp/
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4953,6 +4953,16 @@ version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169"

+[[package]]
+name = "ucd-parse"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca6b52bf4da6512f0f07785a04769222e50d29639e7ecd016b7806fd2de306b4"
+dependencies = [
+ "lazy_static 1.3.0",
+ "regex",
+]
+
 [[package]]
 name = "ucd-trie"
 version = "0.1.1"
@ -4974,6 +4984,13 @@ dependencies = [
 "version_check 0.1.5",
 ]

+[[package]]
+name = "unicode-bdd"
+version = "0.1.0"
+dependencies = [
+ "ucd-parse",
+]
+
 [[package]]
 name = "unicode-bidi"
 version = "0.3.4"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -23,6 +23,7 @@ members = [
  "src/tools/rustfmt",
  "src/tools/miri",
  "src/tools/rustdoc-themes",
+  "src/tools/unicode-table-generator",
 ]
 exclude = [
  "build",
--- a/src/liballoc/collections/linked_list.rs
+++ b/src/liballoc/collections/linked_list.rs
@ -242,6 +242,121 @@ impl<T> LinkedList<T> {

        self.len -= 1;
    }
+
+    /// Splices a series of nodes between two existing nodes.
+    ///
+    /// Warning: this will not check that the provided node belongs to the two existing lists.
+    #[inline]
+    unsafe fn splice_nodes(
+        &mut self,
+        existing_prev: Option<NonNull<Node<T>>>,
+        existing_next: Option<NonNull<Node<T>>>,
+        mut splice_start: NonNull<Node<T>>,
+        mut splice_end: NonNull<Node<T>>,
+        splice_length: usize,
+    ) {
+        // This method takes care not to create multiple mutable references to whole nodes at the same time,
+        // to maintain validity of aliasing pointers into `element`.
+        if let Some(mut existing_prev) = existing_prev {
+            existing_prev.as_mut().next = Some(splice_start);
+        } else {
+            self.head = Some(splice_start);
+        }
+        if let Some(mut existing_next) = existing_next {
+            existing_next.as_mut().prev = Some(splice_end);
+        } else {
+            self.tail = Some(splice_end);
+        }
+        splice_start.as_mut().prev = existing_prev;
+        splice_end.as_mut().next = existing_next;
+
+        self.len += splice_length;
+    }
+
+    /// Detaches all nodes from a linked list as a series of nodes.
+    #[inline]
+    fn detach_all_nodes(mut self) -> Option<(NonNull<Node<T>>, NonNull<Node<T>>, usize)> {
+        let head = self.head.take();
+        let tail = self.tail.take();
+        let len = mem::replace(&mut self.len, 0);
+        if let Some(head) = head {
+            let tail = tail.unwrap_or_else(|| unsafe { core::hint::unreachable_unchecked() });
+            Some((head, tail, len))
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    unsafe fn split_off_before_node(
+        &mut self,
+        split_node: Option<NonNull<Node<T>>>,
+        at: usize,
+    ) -> Self {
+        // The split node is the new head node of the second part
+        if let Some(mut split_node) = split_node {
+            let first_part_head;
+            let first_part_tail;
+            first_part_tail = split_node.as_mut().prev.take();
+            if let Some(mut tail) = first_part_tail {
+                tail.as_mut().next = None;
+                first_part_head = self.head;
+            } else {
+                first_part_head = None;
+            }
+
+            let first_part = LinkedList {
+                head: first_part_head,
+                tail: first_part_tail,
+                len: at,
+                marker: PhantomData,
+            };
+
+            // Fix the head ptr of the second part
+            self.head = Some(split_node);
+            self.len = self.len - at;
+
+            first_part
+        } else {
+            mem::replace(self, LinkedList::new())
+        }
+    }
+
+    #[inline]
+    unsafe fn split_off_after_node(
+        &mut self,
+        split_node: Option<NonNull<Node<T>>>,
+        at: usize,
+    ) -> Self {
+        // The split node is the new tail node of the first part and owns
+        // the head of the second part.
+        if let Some(mut split_node) = split_node {
+            let second_part_head;
+            let second_part_tail;
+            second_part_head = split_node.as_mut().next.take();
+            if let Some(mut head) = second_part_head {
+                head.as_mut().prev = None;
+                second_part_tail = self.tail;
+            } else {
+                second_part_tail = None;
+            }
+
+            let second_part = LinkedList {
+                head: second_part_head,
+                tail: second_part_tail,
+                len: self.len - at,
+                marker: PhantomData,
+            };
+
+            // Fix the tail ptr of the first part
+            self.tail = Some(split_node);
+            self.len = at;
+
+            second_part
+        } else {
+            mem::replace(self, LinkedList::new())
+        }
+    }
 }

 #[stable(feature = "rust1", since = "1.0.0")]
@ -319,6 +434,27 @@ impl<T> LinkedList<T> {
        }
    }

+    /// Moves all elements from `other` to the begin of the list.
+    #[unstable(feature = "linked_list_prepend", issue = "none")]
+    pub fn prepend(&mut self, other: &mut Self) {
+        match self.head {
+            None => mem::swap(self, other),
+            Some(mut head) => {
+                // `as_mut` is okay here because we have exclusive access to the entirety
+                // of both lists.
+                if let Some(mut other_tail) = other.tail.take() {
+                    unsafe {
+                        head.as_mut().prev = Some(other_tail);
+                        other_tail.as_mut().next = Some(head);
+                    }
+
+                    self.head = other.head.take();
+                    self.len += mem::replace(&mut other.len, 0);
+                }
+            }
+        }
+    }
+
    /// Provides a forward iterator.
    ///
    /// # Examples
@ -373,6 +509,42 @@ impl<T> LinkedList<T> {
        IterMut { head: self.head, tail: self.tail, len: self.len, list: self }
    }

+    /// Provides a cursor at the front element.
+    ///
+    /// The cursor is pointing to the "ghost" non-element if the list is empty.
+    #[inline]
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn cursor_front(&self) -> Cursor<'_, T> {
+        Cursor { index: 0, current: self.head, list: self }
+    }
+
+    /// Provides a cursor with editing operations at the front element.
+    ///
+    /// The cursor is pointing to the "ghost" non-element if the list is empty.
+    #[inline]
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn cursor_front_mut(&mut self) -> CursorMut<'_, T> {
+        CursorMut { index: 0, current: self.head, list: self }
+    }
+
+    /// Provides a cursor at the back element.
+    ///
+    /// The cursor is pointing to the "ghost" non-element if the list is empty.
+    #[inline]
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn cursor_back(&self) -> Cursor<'_, T> {
+        Cursor { index: self.len.checked_sub(1).unwrap_or(0), current: self.tail, list: self }
+    }
+
+    /// Provides a cursor with editing operations at the back element.
+    ///
+    /// The cursor is pointing to the "ghost" non-element if the list is empty.
+    #[inline]
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn cursor_back_mut(&mut self) -> CursorMut<'_, T> {
+        CursorMut { index: self.len.checked_sub(1).unwrap_or(0), current: self.tail, list: self }
+    }
+
    /// Returns `true` if the `LinkedList` is empty.
    ///
    /// This operation should compute in O(1) time.
@ -703,30 +875,7 @@ impl<T> LinkedList<T> {
            }
            iter.tail
        };
-
-        // The split node is the new tail node of the first part and owns
-        // the head of the second part.
-        let second_part_head;
-
-        unsafe {
-            second_part_head = split_node.unwrap().as_mut().next.take();
-            if let Some(mut head) = second_part_head {
-                head.as_mut().prev = None;
-            }
-        }
-
-        let second_part = LinkedList {
-            head: second_part_head,
-            tail: self.tail,
-            len: len - at,
-            marker: PhantomData,
-        };
-
-        // Fix the tail ptr of the first part
-        self.tail = split_node;
-        self.len = at;
-
-        second_part
+        unsafe { self.split_off_after_node(split_node, at) }
    }

    /// Creates an iterator which uses a closure to determine if an element should be removed.
@ -986,6 +1135,388 @@ impl<T> IterMut<'_, T> {
    }
 }

+/// A cursor over a `LinkedList`.
+///
+/// A `Cursor` is like an iterator, except that it can freely seek back-and-forth.
+///
+/// Cursors always rest between two elements in the list, and index in a logically circular way.
+/// To accommodate this, there is a "ghost" non-element that yields `None` between the head and
+/// tail of the list.
+///
+/// When created, cursors start at the front of the list, or the "ghost" non-element if the list is empty.
+#[unstable(feature = "linked_list_cursors", issue = "58533")]
+pub struct Cursor<'a, T: 'a> {
+    index: usize,
+    current: Option<NonNull<Node<T>>>,
+    list: &'a LinkedList<T>,
+}
+
+#[unstable(feature = "linked_list_cursors", issue = "58533")]
+impl<T: fmt::Debug> fmt::Debug for Cursor<'_, T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("Cursor").field(&self.list).field(&self.index()).finish()
+    }
+}
+
+/// A cursor over a `LinkedList` with editing operations.
+///
+/// A `Cursor` is like an iterator, except that it can freely seek back-and-forth, and can
+/// safely mutate the list during iteration. This is because the lifetime of its yielded
+/// references is tied to its own lifetime, instead of just the underlying list. This means
+/// cursors cannot yield multiple elements at once.
+///
+/// Cursors always rest between two elements in the list, and index in a logically circular way.
+/// To accommodate this, there is a "ghost" non-element that yields `None` between the head and
+/// tail of the list.
+#[unstable(feature = "linked_list_cursors", issue = "58533")]
+pub struct CursorMut<'a, T: 'a> {
+    index: usize,
+    current: Option<NonNull<Node<T>>>,
+    list: &'a mut LinkedList<T>,
+}
+
+#[unstable(feature = "linked_list_cursors", issue = "58533")]
+impl<T: fmt::Debug> fmt::Debug for CursorMut<'_, T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("CursorMut").field(&self.list).field(&self.index()).finish()
+    }
+}
+
+impl<'a, T> Cursor<'a, T> {
+    /// Returns the cursor position index within the `LinkedList`.
+    ///
+    /// This returns `None` if the cursor is currently pointing to the
+    /// "ghost" non-element.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn index(&self) -> Option<usize> {
+        let _ = self.current?;
+        Some(self.index)
+    }
+
+    /// Moves the cursor to the next element of the `LinkedList`.
+    ///
+    /// If the cursor is pointing to the "ghost" non-element then this will move it to
+    /// the first element of the `LinkedList`. If it is pointing to the last
+    /// element of the `LinkedList` then this will move it to the "ghost" non-element.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn move_next(&mut self) {
+        match self.current.take() {
+            // We had no current element; the cursor was sitting at the start position
+            // Next element should be the head of the list
+            None => {
+                self.current = self.list.head;
+                self.index = 0;
+            }
+            // We had a previous element, so let's go to its next
+            Some(current) => unsafe {
+                self.current = current.as_ref().next;
+                self.index += 1;
+            },
+        }
+    }
+
+    /// Moves the cursor to the previous element of the `LinkedList`.
+    ///
+    /// If the cursor is pointing to the "ghost" non-element then this will move it to
+    /// the last element of the `LinkedList`. If it is pointing to the first
+    /// element of the `LinkedList` then this will move it to the "ghost" non-element.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn move_prev(&mut self) {
+        match self.current.take() {
+            // No current. We're at the start of the list. Yield None and jump to the end.
+            None => {
+                self.current = self.list.tail;
+                self.index = self.list.len().checked_sub(1).unwrap_or(0);
+            }
+            // Have a prev. Yield it and go to the previous element.
+            Some(current) => unsafe {
+                self.current = current.as_ref().prev;
+                self.index = self.index.checked_sub(1).unwrap_or_else(|| self.list.len());
+            },
+        }
+    }
+
+    /// Returns a reference to the element that the cursor is currently
+    /// pointing to.
+    ///
+    /// This returns `None` if the cursor is currently pointing to the
+    /// "ghost" non-element.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn current(&self) -> Option<&'a T> {
+        unsafe { self.current.map(|current| &(*current.as_ptr()).element) }
+    }
+
+    /// Returns a reference to the next element.
+    ///
+    /// If the cursor is pointing to the "ghost" non-element then this returns
+    /// the first element of the `LinkedList`. If it is pointing to the last
+    /// element of the `LinkedList` then this returns `None`.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn peek_next(&self) -> Option<&'a T> {
+        unsafe {
+            let next = match self.current {
+                None => self.list.head,
+                Some(current) => current.as_ref().next,
+            };
+            next.map(|next| &(*next.as_ptr()).element)
+        }
+    }
+
+    /// Returns a reference to the previous element.
+    ///
+    /// If the cursor is pointing to the "ghost" non-element then this returns
+    /// the last element of the `LinkedList`. If it is pointing to the first
+    /// element of the `LinkedList` then this returns `None`.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn peek_prev(&self) -> Option<&'a T> {
+        unsafe {
+            let prev = match self.current {
+                None => self.list.tail,
+                Some(current) => current.as_ref().prev,
+            };
+            prev.map(|prev| &(*prev.as_ptr()).element)
+        }
+    }
+}
+
+impl<'a, T> CursorMut<'a, T> {
+    /// Returns the cursor position index within the `LinkedList`.
+    ///
+    /// This returns `None` if the cursor is currently pointing to the
+    /// "ghost" non-element.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn index(&self) -> Option<usize> {
+        let _ = self.current?;
+        Some(self.index)
+    }
+
+    /// Moves the cursor to the next element of the `LinkedList`.
+    ///
+    /// If the cursor is pointing to the "ghost" non-element then this will move it to
+    /// the first element of the `LinkedList`. If it is pointing to the last
+    /// element of the `LinkedList` then this will move it to the "ghost" non-element.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn move_next(&mut self) {
+        match self.current.take() {
+            // We had no current element; the cursor was sitting at the start position
+            // Next element should be the head of the list
+            None => {
+                self.current = self.list.head;
+                self.index = 0;
+            }
+            // We had a previous element, so let's go to its next
+            Some(current) => unsafe {
+                self.current = current.as_ref().next;
+                self.index += 1;
+            },
+        }
+    }
+
+    /// Moves the cursor to the previous element of the `LinkedList`.
+    ///
+    /// If the cursor is pointing to the "ghost" non-element then this will move it to
+    /// the last element of the `LinkedList`. If it is pointing to the first
+    /// element of the `LinkedList` then this will move it to the "ghost" non-element.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn move_prev(&mut self) {
+        match self.current.take() {
+            // No current. We're at the start of the list. Yield None and jump to the end.
+            None => {
+                self.current = self.list.tail;
+                self.index = self.list.len().checked_sub(1).unwrap_or(0);
+            }
+            // Have a prev. Yield it and go to the previous element.
+            Some(current) => unsafe {
+                self.current = current.as_ref().prev;
+                self.index = self.index.checked_sub(1).unwrap_or_else(|| self.list.len());
+            },
+        }
+    }
+
+    /// Returns a reference to the element that the cursor is currently
+    /// pointing to.
+    ///
+    /// This returns `None` if the cursor is currently pointing to the
+    /// "ghost" non-element.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn current(&mut self) -> Option<&mut T> {
+        unsafe { self.current.map(|current| &mut (*current.as_ptr()).element) }
+    }
+
+    /// Returns a reference to the next element.
+    ///
+    /// If the cursor is pointing to the "ghost" non-element then this returns
+    /// the first element of the `LinkedList`. If it is pointing to the last
+    /// element of the `LinkedList` then this returns `None`.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn peek_next(&mut self) -> Option<&mut T> {
+        unsafe {
+            let next = match self.current {
+                None => self.list.head,
+                Some(current) => current.as_ref().next,
+            };
+            next.map(|next| &mut (*next.as_ptr()).element)
+        }
+    }
+
+    /// Returns a reference to the previous element.
+    ///
+    /// If the cursor is pointing to the "ghost" non-element then this returns
+    /// the last element of the `LinkedList`. If it is pointing to the first
+    /// element of the `LinkedList` then this returns `None`.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn peek_prev(&mut self) -> Option<&mut T> {
+        unsafe {
+            let prev = match self.current {
+                None => self.list.tail,
+                Some(current) => current.as_ref().prev,
+            };
+            prev.map(|prev| &mut (*prev.as_ptr()).element)
+        }
+    }
+
+    /// Returns a read-only cursor pointing to the current element.
+    ///
+    /// The lifetime of the returned `Cursor` is bound to that of the
+    /// `CursorMut`, which means it cannot outlive the `CursorMut` and that the
+    /// `CursorMut` is frozen for the lifetime of the `Cursor`.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn as_cursor<'cm>(&'cm self) -> Cursor<'cm, T> {
+        Cursor { list: self.list, current: self.current, index: self.index }
+    }
+}
+
+// Now the list editing operations
+
+impl<'a, T> CursorMut<'a, T> {
+    /// Inserts a new element into the `LinkedList` after the current one.
+    ///
+    /// If the cursor is pointing at the "ghost" non-element then the new element is
+    /// inserted at the front of the `LinkedList`.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn insert_after(&mut self, item: T) {
+        unsafe {
+            let spliced_node = Box::into_raw_non_null(Box::new(Node::new(item)));
+            let node_next = match self.current {
+                None => self.list.head,
+                Some(node) => node.as_ref().next,
+            };
+            self.list.splice_nodes(self.current, node_next, spliced_node, spliced_node, 1);
+            if self.current.is_none() {
+                // The "ghost" non-element's index has changed.
+                self.index = self.list.len;
+            }
+        }
+    }
+
+    /// Inserts a new element into the `LinkedList` before the current one.
+    ///
+    /// If the cursor is pointing at the "ghost" non-element then the new element is
+    /// inserted at the end of the `LinkedList`.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn insert_before(&mut self, item: T) {
+        unsafe {
+            let spliced_node = Box::into_raw_non_null(Box::new(Node::new(item)));
+            let node_prev = match self.current {
+                None => self.list.tail,
+                Some(node) => node.as_ref().prev,
+            };
+            self.list.splice_nodes(node_prev, self.current, spliced_node, spliced_node, 1);
+            self.index += 1;
+        }
+    }
+
+    /// Removes the current element from the `LinkedList`.
+    ///
+    /// The element that was removed is returned, and the cursor is
+    /// moved to point to the next element in the `LinkedList`.
+    ///
+    /// If the cursor is currently pointing to the "ghost" non-element then no element
+    /// is removed and `None` is returned.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn remove_current(&mut self) -> Option<T> {
+        let unlinked_node = self.current?;
+        unsafe {
+            self.current = unlinked_node.as_ref().next;
+            self.list.unlink_node(unlinked_node);
+            let unlinked_node = Box::from_raw(unlinked_node.as_ptr());
+            Some(unlinked_node.element)
+        }
+    }
+
+    /// Inserts the elements from the given `LinkedList` after the current one.
+    ///
+    /// If the cursor is pointing at the "ghost" non-element then the new elements are
+    /// inserted at the start of the `LinkedList`.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn splice_after(&mut self, list: LinkedList<T>) {
+        unsafe {
+            let (splice_head, splice_tail, splice_len) = match list.detach_all_nodes() {
+                Some(parts) => parts,
+                _ => return,
+            };
+            let node_next = match self.current {
+                None => self.list.head,
+                Some(node) => node.as_ref().next,
+            };
+            self.list.splice_nodes(self.current, node_next, splice_head, splice_tail, splice_len);
+            if self.current.is_none() {
+                // The "ghost" non-element's index has changed.
+                self.index = self.list.len;
+            }
+        }
+    }
+
+    /// Inserts the elements from the given `LinkedList` before the current one.
+    ///
+    /// If the cursor is pointing at the "ghost" non-element then the new elements are
+    /// inserted at the end of the `LinkedList`.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn splice_before(&mut self, list: LinkedList<T>) {
+        unsafe {
+            let (splice_head, splice_tail, splice_len) = match list.detach_all_nodes() {
+                Some(parts) => parts,
+                _ => return,
+            };
+            let node_prev = match self.current {
+                None => self.list.tail,
+                Some(node) => node.as_ref().prev,
+            };
+            self.list.splice_nodes(node_prev, self.current, splice_head, splice_tail, splice_len);
+            self.index += splice_len;
+        }
+    }
+
+    /// Splits the list into two after the current element. This will return a
+    /// new list consisting of everything after the cursor, with the original
+    /// list retaining everything before.
+    ///
+    /// If the cursor is pointing at the "ghost" non-element then the entire contents
+    /// of the `LinkedList` are moved.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn split_after(&mut self) -> LinkedList<T> {
+        let split_off_idx = if self.index == self.list.len { 0 } else { self.index + 1 };
+        if self.index == self.list.len {
+            // The "ghost" non-element's index has changed to 0.
+            self.index = 0;
+        }
+        unsafe { self.list.split_off_after_node(self.current, split_off_idx) }
+    }
+
+    /// Splits the list into two before the current element. This will return a
+    /// new list consisting of everything before the cursor, with the original
+    /// list retaining everything after.
+    ///
+    /// If the cursor is pointing at the "ghost" non-element then the entire contents
+    /// of the `LinkedList` are moved.
+    #[unstable(feature = "linked_list_cursors", issue = "58533")]
+    pub fn split_before(&mut self) -> LinkedList<T> {
+        let split_off_idx = self.index;
+        self.index = 0;
+        unsafe { self.list.split_off_before_node(self.current, split_off_idx) }
+    }
+}
+
 /// An iterator produced by calling `drain_filter` on LinkedList.
 #[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")]
 pub struct DrainFilter<'a, T: 'a, F: 'a>
--- a/src/liballoc/collections/linked_list/tests.rs
+++ b/src/liballoc/collections/linked_list/tests.rs
@ -304,3 +304,155 @@ fn drain_to_empty_test() {
    assert_eq!(deleted, &[1, 2, 3, 4, 5, 6]);
    assert_eq!(m.into_iter().collect::<Vec<_>>(), &[]);
 }
+
+#[test]
+fn test_cursor_move_peek() {
+    let mut m: LinkedList<u32> = LinkedList::new();
+    m.extend(&[1, 2, 3, 4, 5, 6]);
+    let mut cursor = m.cursor_front();
+    assert_eq!(cursor.current(), Some(&1));
+    assert_eq!(cursor.peek_next(), Some(&2));
+    assert_eq!(cursor.peek_prev(), None);
+    assert_eq!(cursor.index(), Some(0));
+    cursor.move_prev();
+    assert_eq!(cursor.current(), None);
+    assert_eq!(cursor.peek_next(), Some(&1));
+    assert_eq!(cursor.peek_prev(), Some(&6));
+    assert_eq!(cursor.index(), None);
+    cursor.move_next();
+    cursor.move_next();
+    assert_eq!(cursor.current(), Some(&2));
+    assert_eq!(cursor.peek_next(), Some(&3));
+    assert_eq!(cursor.peek_prev(), Some(&1));
+    assert_eq!(cursor.index(), Some(1));
+
+    let mut cursor = m.cursor_back();
+    assert_eq!(cursor.current(), Some(&6));
+    assert_eq!(cursor.peek_next(), None);
+    assert_eq!(cursor.peek_prev(), Some(&5));
+    assert_eq!(cursor.index(), Some(5));
+    cursor.move_next();
+    assert_eq!(cursor.current(), None);
+    assert_eq!(cursor.peek_next(), Some(&1));
+    assert_eq!(cursor.peek_prev(), Some(&6));
+    assert_eq!(cursor.index(), None);
+    cursor.move_prev();
+    cursor.move_prev();
+    assert_eq!(cursor.current(), Some(&5));
+    assert_eq!(cursor.peek_next(), Some(&6));
+    assert_eq!(cursor.peek_prev(), Some(&4));
+    assert_eq!(cursor.index(), Some(4));
+
+    let mut m: LinkedList<u32> = LinkedList::new();
+    m.extend(&[1, 2, 3, 4, 5, 6]);
+    let mut cursor = m.cursor_front_mut();
+    assert_eq!(cursor.current(), Some(&mut 1));
+    assert_eq!(cursor.peek_next(), Some(&mut 2));
+    assert_eq!(cursor.peek_prev(), None);
+    assert_eq!(cursor.index(), Some(0));
+    cursor.move_prev();
+    assert_eq!(cursor.current(), None);
+    assert_eq!(cursor.peek_next(), Some(&mut 1));
+    assert_eq!(cursor.peek_prev(), Some(&mut 6));
+    assert_eq!(cursor.index(), None);
+    cursor.move_next();
+    cursor.move_next();
+    assert_eq!(cursor.current(), Some(&mut 2));
+    assert_eq!(cursor.peek_next(), Some(&mut 3));
+    assert_eq!(cursor.peek_prev(), Some(&mut 1));
+    assert_eq!(cursor.index(), Some(1));
+    let mut cursor2 = cursor.as_cursor();
+    assert_eq!(cursor2.current(), Some(&2));
+    assert_eq!(cursor2.index(), Some(1));
+    cursor2.move_next();
+    assert_eq!(cursor2.current(), Some(&3));
+    assert_eq!(cursor2.index(), Some(2));
+    assert_eq!(cursor.current(), Some(&mut 2));
+    assert_eq!(cursor.index(), Some(1));
+
+    let mut m: LinkedList<u32> = LinkedList::new();
+    m.extend(&[1, 2, 3, 4, 5, 6]);
+    let mut cursor = m.cursor_back_mut();
+    assert_eq!(cursor.current(), Some(&mut 6));
+    assert_eq!(cursor.peek_next(), None);
+    assert_eq!(cursor.peek_prev(), Some(&mut 5));
+    assert_eq!(cursor.index(), Some(5));
+    cursor.move_next();
+    assert_eq!(cursor.current(), None);
+    assert_eq!(cursor.peek_next(), Some(&mut 1));
+    assert_eq!(cursor.peek_prev(), Some(&mut 6));
+    assert_eq!(cursor.index(), None);
+    cursor.move_prev();
+    cursor.move_prev();
+    assert_eq!(cursor.current(), Some(&mut 5));
+    assert_eq!(cursor.peek_next(), Some(&mut 6));
+    assert_eq!(cursor.peek_prev(), Some(&mut 4));
+    assert_eq!(cursor.index(), Some(4));
+    let mut cursor2 = cursor.as_cursor();
+    assert_eq!(cursor2.current(), Some(&5));
+    assert_eq!(cursor2.index(), Some(4));
+    cursor2.move_prev();
+    assert_eq!(cursor2.current(), Some(&4));
+    assert_eq!(cursor2.index(), Some(3));
+    assert_eq!(cursor.current(), Some(&mut 5));
+    assert_eq!(cursor.index(), Some(4));
+}
+
+#[test]
+fn test_cursor_mut_insert() {
+    let mut m: LinkedList<u32> = LinkedList::new();
+    m.extend(&[1, 2, 3, 4, 5, 6]);
+    let mut cursor = m.cursor_front_mut();
+    cursor.insert_before(7);
+    cursor.insert_after(8);
+    check_links(&m);
+    assert_eq!(m.iter().cloned().collect::<Vec<_>>(), &[7, 1, 8, 2, 3, 4, 5, 6]);
+    let mut cursor = m.cursor_front_mut();
+    cursor.move_prev();
+    cursor.insert_before(9);
+    cursor.insert_after(10);
+    check_links(&m);
+    assert_eq!(m.iter().cloned().collect::<Vec<_>>(), &[10, 7, 1, 8, 2, 3, 4, 5, 6, 9]);
+    let mut cursor = m.cursor_front_mut();
+    cursor.move_prev();
+    assert_eq!(cursor.remove_current(), None);
+    cursor.move_next();
+    cursor.move_next();
+    assert_eq!(cursor.remove_current(), Some(7));
+    cursor.move_prev();
+    cursor.move_prev();
+    cursor.move_prev();
+    assert_eq!(cursor.remove_current(), Some(9));
+    cursor.move_next();
+    assert_eq!(cursor.remove_current(), Some(10));
+    check_links(&m);
+    assert_eq!(m.iter().cloned().collect::<Vec<_>>(), &[1, 8, 2, 3, 4, 5, 6]);
+    let mut cursor = m.cursor_front_mut();
+    let mut p: LinkedList<u32> = LinkedList::new();
+    p.extend(&[100, 101, 102, 103]);
+    let mut q: LinkedList<u32> = LinkedList::new();
+    q.extend(&[200, 201, 202, 203]);
+    cursor.splice_after(p);
+    cursor.splice_before(q);
+    check_links(&m);
+    assert_eq!(
+        m.iter().cloned().collect::<Vec<_>>(),
+        &[200, 201, 202, 203, 1, 100, 101, 102, 103, 8, 2, 3, 4, 5, 6]
+    );
+    let mut cursor = m.cursor_front_mut();
+    cursor.move_prev();
+    let tmp = cursor.split_before();
+    assert_eq!(m.into_iter().collect::<Vec<_>>(), &[]);
+    m = tmp;
+    let mut cursor = m.cursor_front_mut();
+    cursor.move_next();
+    cursor.move_next();
+    cursor.move_next();
+    cursor.move_next();
+    cursor.move_next();
+    cursor.move_next();
+    let tmp = cursor.split_after();
+    assert_eq!(tmp.into_iter().collect::<Vec<_>>(), &[102, 103, 8, 2, 3, 4, 5, 6]);
+    check_links(&m);
+    assert_eq!(m.iter().cloned().collect::<Vec<_>>(), &[200, 201, 202, 203, 1, 100, 101]);
+}
--- a/src/libcore/char/methods.rs
+++ b/src/libcore/char/methods.rs
@ -3,7 +3,7 @@
 use crate::slice;
 use crate::str::from_utf8_unchecked_mut;
 use crate::unicode::printable::is_printable;
-use crate::unicode::tables::{conversions, derived_property, general_category, property};
+use crate::unicode::{self, conversions};

 use super::*;

@ -552,7 +552,7 @@ impl char {
    pub fn is_alphabetic(self) -> bool {
        match self {
            'a'..='z' | 'A'..='Z' => true,
-            c => c > '\x7f' && derived_property::Alphabetic(c),
+            c => c > '\x7f' && unicode::Alphabetic(c),
        }
    }

@ -583,7 +583,7 @@ impl char {
    pub fn is_lowercase(self) -> bool {
        match self {
            'a'..='z' => true,
-            c => c > '\x7f' && derived_property::Lowercase(c),
+            c => c > '\x7f' && unicode::Lowercase(c),
        }
    }

@ -614,7 +614,7 @@ impl char {
    pub fn is_uppercase(self) -> bool {
        match self {
            'A'..='Z' => true,
-            c => c > '\x7f' && derived_property::Uppercase(c),
+            c => c > '\x7f' && unicode::Uppercase(c),
        }
    }

@ -642,7 +642,7 @@ impl char {
    pub fn is_whitespace(self) -> bool {
        match self {
            ' ' | '\x09'..='\x0d' => true,
-            c => c > '\x7f' && property::White_Space(c),
+            c => c > '\x7f' && unicode::White_Space(c),
        }
    }

@ -693,7 +693,7 @@ impl char {
    #[stable(feature = "rust1", since = "1.0.0")]
    #[inline]
    pub fn is_control(self) -> bool {
-        general_category::Cc(self)
+        unicode::Cc(self)
    }

    /// Returns `true` if this `char` has the `Grapheme_Extend` property.
@ -707,7 +707,7 @@ impl char {
    /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
    #[inline]
    pub(crate) fn is_grapheme_extended(self) -> bool {
-        derived_property::Grapheme_Extend(self)
+        unicode::Grapheme_Extend(self)
    }

    /// Returns `true` if this `char` has one of the general categories for numbers.
@ -739,7 +739,7 @@ impl char {
    pub fn is_numeric(self) -> bool {
        match self {
            '0'..='9' => true,
-            c => c > '\x7f' && general_category::N(c),
+            c => c > '\x7f' && unicode::N(c),
        }
    }

--- a/src/libcore/char/mod.rs
+++ b/src/libcore/char/mod.rs
@ -37,9 +37,9 @@ pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};

 // unstable re-exports
 #[unstable(feature = "unicode_version", issue = "49726")]
-pub use crate::unicode::tables::UNICODE_VERSION;
-#[unstable(feature = "unicode_version", issue = "49726")]
 pub use crate::unicode::version::UnicodeVersion;
+#[unstable(feature = "unicode_version", issue = "49726")]
+pub use crate::unicode::UNICODE_VERSION;

 use crate::fmt::{self, Write};
 use crate::iter::FusedIterator;
--- a/src/libcore/unicode/bool_trie.rs
+++ b/src/libcore/unicode/bool_trie.rs
@ -1,66 +0,0 @@
-/// BoolTrie is a trie for representing a set of Unicode codepoints. It is
-/// implemented with postfix compression (sharing of identical child nodes),
-/// which gives both compact size and fast lookup.
-///
-/// The space of Unicode codepoints is divided into 3 subareas, each
-/// represented by a trie with different depth. In the first (0..0x800), there
-/// is no trie structure at all; each u64 entry corresponds to a bitvector
-/// effectively holding 64 bool values.
-///
-/// In the second (0x800..0x10000), each child of the root node represents a
-/// 64-wide subrange, but instead of storing the full 64-bit value of the leaf,
-/// the trie stores an 8-bit index into a shared table of leaf values. This
-/// exploits the fact that in reasonable sets, many such leaves can be shared.
-///
-/// In the third (0x10000..0x110000), each child of the root node represents a
-/// 4096-wide subrange, and the trie stores an 8-bit index into a 64-byte slice
-/// of a child tree. Each of these 64 bytes represents an index into the table
-/// of shared 64-bit leaf values. This exploits the sparse structure in the
-/// non-BMP range of most Unicode sets.
-pub struct BoolTrie {
-    // 0..0x800 (corresponding to 1 and 2 byte utf-8 sequences)
-    pub r1: [u64; 32], // leaves
-
-    // 0x800..0x10000 (corresponding to 3 byte utf-8 sequences)
-    pub r2: [u8; 992],      // first level
-    pub r3: &'static [u64], // leaves
-
-    // 0x10000..0x110000 (corresponding to 4 byte utf-8 sequences)
-    pub r4: [u8; 256],      // first level
-    pub r5: &'static [u8],  // second level
-    pub r6: &'static [u64], // leaves
-}
-impl BoolTrie {
-    pub fn lookup(&self, c: char) -> bool {
-        let c = c as u32;
-        if c < 0x800 {
-            trie_range_leaf(c, self.r1[(c >> 6) as usize])
-        } else if c < 0x10000 {
-            let child = self.r2[(c >> 6) as usize - 0x20];
-            trie_range_leaf(c, self.r3[child as usize])
-        } else {
-            let child = self.r4[(c >> 12) as usize - 0x10];
-            let leaf = self.r5[((child as usize) << 6) + ((c >> 6) as usize & 0x3f)];
-            trie_range_leaf(c, self.r6[leaf as usize])
-        }
-    }
-}
-
-pub struct SmallBoolTrie {
-    pub(crate) r1: &'static [u8],  // first level
-    pub(crate) r2: &'static [u64], // leaves
-}
-
-impl SmallBoolTrie {
-    pub fn lookup(&self, c: char) -> bool {
-        let c = c as u32;
-        match self.r1.get((c >> 6) as usize) {
-            Some(&child) => trie_range_leaf(c, self.r2[child as usize]),
-            None => false,
-        }
-    }
-}
-
-fn trie_range_leaf(c: u32, bitmap_chunk: u64) -> bool {
-    ((bitmap_chunk >> (c & 63)) & 1) != 0
-}
--- a/src/libcore/unicode/mod.rs
+++ b/src/libcore/unicode/mod.rs
@ -1,15 +1,59 @@
 #![unstable(feature = "unicode_internals", issue = "none")]
 #![allow(missing_docs)]

-mod bool_trie;
 pub(crate) mod printable;
-pub(crate) mod tables;
+mod unicode_data;
 pub(crate) mod version;

+use version::UnicodeVersion;
+
+/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
+/// `char` and `str` methods are based on.
+#[unstable(feature = "unicode_version", issue = "49726")]
+pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
+    major: unicode_data::UNICODE_VERSION.0,
+    minor: unicode_data::UNICODE_VERSION.1,
+    micro: unicode_data::UNICODE_VERSION.2,
+    _priv: (),
+};
+
 // For use in liballoc, not re-exported in libstd.
 pub mod derived_property {
-    pub use crate::unicode::tables::derived_property::{Case_Ignorable, Cased};
+    pub use super::{Case_Ignorable, Cased};
 }
-pub mod conversions {
-    pub use crate::unicode::tables::conversions::{to_lower, to_upper};
+
+pub use unicode_data::alphabetic::lookup as Alphabetic;
+pub use unicode_data::case_ignorable::lookup as Case_Ignorable;
+pub use unicode_data::cased::lookup as Cased;
+pub use unicode_data::cc::lookup as Cc;
+pub use unicode_data::conversions;
+pub use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
+pub use unicode_data::lowercase::lookup as Lowercase;
+pub use unicode_data::n::lookup as N;
+pub use unicode_data::uppercase::lookup as Uppercase;
+pub use unicode_data::white_space::lookup as White_Space;
+
+#[inline(always)]
+fn range_search<const N: usize, const N1: usize, const N2: usize>(
+    needle: u32,
+    chunk_idx_map: &[u8; N],
+    (last_chunk_idx, last_chunk_mapping): (u16, u8),
+    bitset_chunk_idx: &[[u8; 16]; N1],
+    bitset: &[u64; N2],
+) -> bool {
+    let bucket_idx = (needle / 64) as usize;
+    let chunk_map_idx = bucket_idx / 16;
+    let chunk_piece = bucket_idx % 16;
+    let chunk_idx = if chunk_map_idx >= N {
+        if chunk_map_idx == last_chunk_idx as usize {
+            last_chunk_mapping
+        } else {
+            return false;
+        }
+    } else {
+        chunk_idx_map[chunk_map_idx]
+    };
+    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
+    let word = bitset[(idx as usize)];
+    (word & (1 << (needle % 64) as u64)) != 0
 }
--- a/src/libcore/unicode/tables.rs
+++ b/src/libcore/unicode/tables.rs
--- a/src/libcore/unicode/unicode.py
+++ b/src/libcore/unicode/unicode.py
@ -1,878 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Regenerate Unicode tables (tables.rs).
-"""
-
-# This script uses the Unicode tables as defined
-# in the UnicodeFiles class.
-
-# Since this should not require frequent updates, we just store this
-# out-of-line and check the tables.rs file into git.
-
-# Note that the "curl" program is required for operation.
-# This script is compatible with Python 2.7 and 3.x.
-
-import argparse
-import datetime
-import fileinput
-import itertools
-import os
-import re
-import textwrap
-import subprocess
-
-from collections import defaultdict, namedtuple
-
-try:
-    # Python 3
-    from itertools import zip_longest
-    from io import StringIO
-except ImportError:
-    # Python 2 compatibility
-    zip_longest = itertools.izip_longest
-    from StringIO import StringIO
-
-try:
-    # Completely optional type hinting
-    # (Python 2 compatible using comments,
-    # see: https://mypy.readthedocs.io/en/latest/python2.html)
-    # This is very helpful in typing-aware IDE like PyCharm.
-    from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Set, Tuple
-except ImportError:
-    pass
-
-
-# We don't use enum.Enum because of Python 2.7 compatibility.
-class UnicodeFiles(object):
-    # ReadMe does not contain any Unicode data, we
-    # only use it to extract versions.
-    README = "ReadMe.txt"
-
-    DERIVED_CORE_PROPERTIES = "DerivedCoreProperties.txt"
-    DERIVED_NORMALIZATION_PROPS = "DerivedNormalizationProps.txt"
-    PROPS = "PropList.txt"
-    SCRIPTS = "Scripts.txt"
-    SPECIAL_CASING = "SpecialCasing.txt"
-    UNICODE_DATA = "UnicodeData.txt"
-
-
-# The order doesn't really matter (Python < 3.6 won't preserve it),
-# we only want to aggregate all the file names.
-ALL_UNICODE_FILES = tuple(
-    value for name, value in UnicodeFiles.__dict__.items()
-    if not name.startswith("_")
-)
-
-assert len(ALL_UNICODE_FILES) == 7, "Unexpected number of unicode files"
-
-# The directory this file is located in.
-THIS_DIR = os.path.dirname(os.path.realpath(__file__))
-
-# Where to download the Unicode data.  The downloaded files
-# will be placed in sub-directories named after Unicode version.
-FETCH_DIR = os.path.join(THIS_DIR, "downloaded")
-
-FETCH_URL_LATEST = "ftp://ftp.unicode.org/Public/UNIDATA/{filename}"
-FETCH_URL_VERSION = "ftp://ftp.unicode.org/Public/{version}/ucd/{filename}"
-
-PREAMBLE = """\
-// NOTE: The following code was generated by "./unicode.py", do not edit directly
-
-#![allow(missing_docs, non_upper_case_globals, non_snake_case, clippy::unreadable_literal)]
-
-use crate::unicode::bool_trie::{{BoolTrie, SmallBoolTrie}};
-use crate::unicode::version::UnicodeVersion;
-""".format(year=datetime.datetime.now().year)
-
-# Mapping taken from Table 12 from:
-# http://www.unicode.org/reports/tr44/#General_Category_Values
-EXPANDED_CATEGORIES = {
-    "Lu": ["LC", "L"], "Ll": ["LC", "L"], "Lt": ["LC", "L"],
-    "Lm": ["L"], "Lo": ["L"],
-    "Mn": ["M"], "Mc": ["M"], "Me": ["M"],
-    "Nd": ["N"], "Nl": ["N"], "No": ["N"],
-    "Pc": ["P"], "Pd": ["P"], "Ps": ["P"], "Pe": ["P"],
-    "Pi": ["P"], "Pf": ["P"], "Po": ["P"],
-    "Sm": ["S"], "Sc": ["S"], "Sk": ["S"], "So": ["S"],
-    "Zs": ["Z"], "Zl": ["Z"], "Zp": ["Z"],
-    "Cc": ["C"], "Cf": ["C"], "Cs": ["C"], "Co": ["C"], "Cn": ["C"],
-}
-
-# This is the (inclusive) range of surrogate codepoints.
-# These are not valid Rust characters.
-SURROGATE_CODEPOINTS_RANGE = (0xd800, 0xdfff)
-
-UnicodeData = namedtuple(
-    "UnicodeData", (
-        # Conversions:
-        "to_upper", "to_lower", "to_title",
-
-        # Decompositions: canonical decompositions, compatibility decomp
-        "canon_decomp", "compat_decomp",
-
-        # Grouped: general categories and combining characters
-        "general_categories", "combines",
-    )
-)
-
-UnicodeVersion = namedtuple(
-    "UnicodeVersion", ("major", "minor", "micro", "as_str")
-)
-
-
-def fetch_files(version=None):
-    # type: (str) -> UnicodeVersion
-    """
-    Fetch all the Unicode files from unicode.org.
-
-    This will use cached files (stored in `FETCH_DIR`) if they exist,
-    creating them if they don't.  In any case, the Unicode version
-    is always returned.
-
-    :param version: The desired Unicode version, as string.
-        (If None, defaults to latest final release available,
-         querying the unicode.org service).
-    """
-    have_version = check_stored_version(version)
-    if have_version:
-        return have_version
-
-    if version:
-        # Check if the desired version exists on the server.
-        get_fetch_url = lambda name: FETCH_URL_VERSION.format(version=version, filename=name)
-    else:
-        # Extract the latest version.
-        get_fetch_url = lambda name: FETCH_URL_LATEST.format(filename=name)
-
-    readme_url = get_fetch_url(UnicodeFiles.README)
-
-    print("Fetching: {}".format(readme_url))
-    readme_content = subprocess.check_output(("curl", readme_url))
-
-    unicode_version = parse_readme_unicode_version(
-        readme_content.decode("utf8")
-    )
-
-    download_dir = get_unicode_dir(unicode_version)
-    if not os.path.exists(download_dir):
-        # For 2.7 compat, we don't use `exist_ok=True`.
-        os.makedirs(download_dir)
-
-    for filename in ALL_UNICODE_FILES:
-        file_path = get_unicode_file_path(unicode_version, filename)
-
-        if os.path.exists(file_path):
-            # Assume file on the server didn't change if it's been saved before.
-            continue
-
-        if filename == UnicodeFiles.README:
-            with open(file_path, "wb") as fd:
-                fd.write(readme_content)
-        else:
-            url = get_fetch_url(filename)
-            print("Fetching: {}".format(url))
-            subprocess.check_call(("curl", "-o", file_path, url))
-
-    return unicode_version
-
-
-def check_stored_version(version):
-    # type: (Optional[str]) -> Optional[UnicodeVersion]
-    """
-    Given desired Unicode version, return the version
-    if stored files are all present, and `None` otherwise.
-    """
-    if not version:
-        # If no desired version specified, we should check what's the latest
-        # version, skipping stored version checks.
-        return None
-
-    fetch_dir = os.path.join(FETCH_DIR, version)
-
-    for filename in ALL_UNICODE_FILES:
-        file_path = os.path.join(fetch_dir, filename)
-
-        if not os.path.exists(file_path):
-            return None
-
-    with open(os.path.join(fetch_dir, UnicodeFiles.README)) as fd:
-        return parse_readme_unicode_version(fd.read())
-
-
-def parse_readme_unicode_version(readme_content):
-    # type: (str) -> UnicodeVersion
-    """
-    Parse the Unicode version contained in their `ReadMe.txt` file.
-    """
-    # "Raw string" is necessary for \d not being treated as escape char
-    # (for the sake of compat with future Python versions).
-    # See: https://docs.python.org/3.6/whatsnew/3.6.html#deprecated-python-behavior
-    pattern = r"for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
-    groups = re.search(pattern, readme_content).groups()
-
-    return UnicodeVersion(*map(int, groups), as_str=".".join(groups))
-
-
-def get_unicode_dir(unicode_version):
-    # type: (UnicodeVersion) -> str
-    """
-    Indicate in which parent dir the Unicode data files should be stored.
-
-    This returns a full, absolute path.
-    """
-    return os.path.join(FETCH_DIR, unicode_version.as_str)
-
-
-def get_unicode_file_path(unicode_version, filename):
-    # type: (UnicodeVersion, str) -> str
-    """
-    Indicate where the Unicode data file should be stored.
-    """
-    return os.path.join(get_unicode_dir(unicode_version), filename)
-
-
-def is_surrogate(n):
-    # type: (int) -> bool
-    """
-    Tell if given codepoint is a surrogate (not a valid Rust character).
-    """
-    return SURROGATE_CODEPOINTS_RANGE[0] <= n <= SURROGATE_CODEPOINTS_RANGE[1]
-
-
-def load_unicode_data(file_path):
-    # type: (str) -> UnicodeData
-    """
-    Load main Unicode data.
-    """
-    # Conversions
-    to_lower = {}   # type: Dict[int, Tuple[int, int, int]]
-    to_upper = {}   # type: Dict[int, Tuple[int, int, int]]
-    to_title = {}   # type: Dict[int, Tuple[int, int, int]]
-
-    # Decompositions
-    compat_decomp = {}   # type: Dict[int, List[int]]
-    canon_decomp = {}    # type: Dict[int, List[int]]
-
-    # Combining characters
-    # FIXME: combines are not used
-    combines = defaultdict(set)   # type: Dict[str, Set[int]]
-
-    # Categories
-    general_categories = defaultdict(set)   # type: Dict[str, Set[int]]
-    category_assigned_codepoints = set()    # type: Set[int]
-
-    all_codepoints = {}
-
-    range_start = -1
-
-    for line in fileinput.input(file_path):
-        data = line.split(";")
-        if len(data) != 15:
-            continue
-        codepoint = int(data[0], 16)
-        if is_surrogate(codepoint):
-            continue
-        if range_start >= 0:
-            for i in range(range_start, codepoint):
-                all_codepoints[i] = data
-            range_start = -1
-        if data[1].endswith(", First>"):
-            range_start = codepoint
-            continue
-        all_codepoints[codepoint] = data
-
-    for code, data in all_codepoints.items():
-        (code_org, name, gencat, combine, bidi,
-         decomp, deci, digit, num, mirror,
-         old, iso, upcase, lowcase, titlecase) = data
-
-        # Generate char to char direct common and simple conversions:
-
-        # Uppercase to lowercase
-        if lowcase != "" and code_org != lowcase:
-            to_lower[code] = (int(lowcase, 16), 0, 0)
-
-        # Lowercase to uppercase
-        if upcase != "" and code_org != upcase:
-            to_upper[code] = (int(upcase, 16), 0, 0)
-
-        # Title case
-        if titlecase.strip() != "" and code_org != titlecase:
-            to_title[code] = (int(titlecase, 16), 0, 0)
-
-        # Store decomposition, if given
-        if decomp:
-            decompositions = decomp.split()[1:]
-            decomp_code_points = [int(i, 16) for i in decompositions]
-
-            if decomp.startswith("<"):
-                # Compatibility decomposition
-                compat_decomp[code] = decomp_code_points
-            else:
-                # Canonical decomposition
-                canon_decomp[code] = decomp_code_points
-
-        # Place letter in categories as appropriate.
-        for cat in itertools.chain((gencat, ), EXPANDED_CATEGORIES.get(gencat, [])):
-            general_categories[cat].add(code)
-            category_assigned_codepoints.add(code)
-
-        # Record combining class, if any.
-        if combine != "0":
-            combines[combine].add(code)
-
-    # Generate Not_Assigned from Assigned.
-    general_categories["Cn"] = get_unassigned_codepoints(category_assigned_codepoints)
-
-    # Other contains Not_Assigned
-    general_categories["C"].update(general_categories["Cn"])
-
-    grouped_categories = group_categories(general_categories)
-
-    # FIXME: combines are not used
-    return UnicodeData(
-        to_lower=to_lower, to_upper=to_upper, to_title=to_title,
-        compat_decomp=compat_decomp, canon_decomp=canon_decomp,
-        general_categories=grouped_categories, combines=combines,
-    )
-
-
-def load_special_casing(file_path, unicode_data):
-    # type: (str, UnicodeData) -> None
-    """
-    Load special casing data and enrich given Unicode data.
-    """
-    for line in fileinput.input(file_path):
-        data = line.split("#")[0].split(";")
-        if len(data) == 5:
-            code, lower, title, upper, _comment = data
-        elif len(data) == 6:
-            code, lower, title, upper, condition, _comment = data
-            if condition.strip():  # Only keep unconditional mappins
-                continue
-        else:
-            continue
-        code = code.strip()
-        lower = lower.strip()
-        title = title.strip()
-        upper = upper.strip()
-        key = int(code, 16)
-        for (map_, values) in ((unicode_data.to_lower, lower),
-                               (unicode_data.to_upper, upper),
-                               (unicode_data.to_title, title)):
-            if values != code:
-                split = values.split()
-
-                codepoints = list(itertools.chain(
-                    (int(i, 16) for i in split),
-                    (0 for _ in range(len(split), 3))
-                ))
-
-                assert len(codepoints) == 3
-                map_[key] = codepoints
-
-
-def group_categories(mapping):
-    # type: (Dict[Any, Iterable[int]]) -> Dict[str, List[Tuple[int, int]]]
-    """
-    Group codepoints mapped in "categories".
-    """
-    return {category: group_codepoints(codepoints)
-            for category, codepoints in mapping.items()}
-
-
-def group_codepoints(codepoints):
-    # type: (Iterable[int]) -> List[Tuple[int, int]]
-    """
-    Group integral values into continuous, disjoint value ranges.
-
-    Performs value deduplication.
-
-    :return: sorted list of pairs denoting start and end of codepoint
-        group values, both ends inclusive.
-
-    >>> group_codepoints([1, 2, 10, 11, 12, 3, 4])
-    [(1, 4), (10, 12)]
-    >>> group_codepoints([1])
-    [(1, 1)]
-    >>> group_codepoints([1, 5, 6])
-    [(1, 1), (5, 6)]
-    >>> group_codepoints([])
-    []
-    """
-    sorted_codes = sorted(set(codepoints))
-    result = []     # type: List[Tuple[int, int]]
-
-    if not sorted_codes:
-        return result
-
-    next_codes = sorted_codes[1:]
-    start_code = sorted_codes[0]
-
-    for code, next_code in zip_longest(sorted_codes, next_codes, fillvalue=None):
-        if next_code is None or next_code - code != 1:
-            result.append((start_code, code))
-            start_code = next_code
-
-    return result
-
-
-def ungroup_codepoints(codepoint_pairs):
-    # type: (Iterable[Tuple[int, int]]) -> List[int]
-    """
-    The inverse of group_codepoints -- produce a flat list of values
-    from value range pairs.
-
-    >>> ungroup_codepoints([(1, 4), (10, 12)])
-    [1, 2, 3, 4, 10, 11, 12]
-    >>> ungroup_codepoints([(1, 1), (5, 6)])
-    [1, 5, 6]
-    >>> ungroup_codepoints(group_codepoints([1, 2, 7, 8]))
-    [1, 2, 7, 8]
-    >>> ungroup_codepoints([])
-    []
-    """
-    return list(itertools.chain.from_iterable(
-        range(lo, hi + 1) for lo, hi in codepoint_pairs
-    ))
-
-
-def get_unassigned_codepoints(assigned_codepoints):
-    # type: (Set[int]) -> Set[int]
-    """
-    Given a set of "assigned" codepoints, return a set
-    of these that are not in assigned and not surrogate.
-    """
-    return {i for i in range(0, 0x110000)
-            if i not in assigned_codepoints and not is_surrogate(i)}
-
-
-def generate_table_lines(items, indent, wrap=98):
-    # type: (Iterable[str], int, int) -> Iterator[str]
-    """
-    Given table items, generate wrapped lines of text with comma-separated items.
-
-    This is a generator function.
-
-    :param wrap: soft wrap limit (characters per line), integer.
-    """
-    line = " " * indent
-    first = True
-    for item in items:
-        if len(line) + len(item) < wrap:
-            if first:
-                line += item
-            else:
-                line += ", " + item
-            first = False
-        else:
-            yield line + ",\n"
-            line = " " * indent + item
-
-    yield line
-
-
-def load_properties(file_path, interesting_props):
-    # type: (str, Iterable[str]) -> Dict[str, List[Tuple[int, int]]]
-    """
-    Load properties data and return in grouped form.
-    """
-    props = defaultdict(list)   # type: Dict[str, List[Tuple[int, int]]]
-    # "Raw string" is necessary for `\.` and `\w` not to be treated as escape chars
-    # (for the sake of compat with future Python versions).
-    # See: https://docs.python.org/3.6/whatsnew/3.6.html#deprecated-python-behavior
-    re1 = re.compile(r"^ *([0-9A-F]+) *; *(\w+)")
-    re2 = re.compile(r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
-
-    for line in fileinput.input(file_path):
-        match = re1.match(line) or re2.match(line)
-        if match:
-            groups = match.groups()
-
-            if len(groups) == 2:
-                # `re1` matched (2 groups).
-                d_lo, prop = groups
-                d_hi = d_lo
-            else:
-                d_lo, d_hi, prop = groups
-        else:
-            continue
-
-        if interesting_props and prop not in interesting_props:
-            continue
-
-        lo_value = int(d_lo, 16)
-        hi_value = int(d_hi, 16)
-
-        props[prop].append((lo_value, hi_value))
-
-    # Optimize if possible.
-    for prop in props:
-        props[prop] = group_codepoints(ungroup_codepoints(props[prop]))
-
-    return props
-
-
-def escape_char(c):
-    # type: (int) -> str
-    r"""
-    Escape a codepoint for use as Rust char literal.
-
-    Outputs are OK to use as Rust source code as char literals
-    and they also include necessary quotes.
-
-    >>> escape_char(97)
-    "'\\u{61}'"
-    >>> escape_char(0)
-    "'\\0'"
-    """
-    return r"'\u{%x}'" % c if c != 0 else r"'\0'"
-
-
-def format_char_pair(pair):
-    # type: (Tuple[int, int]) -> str
-    """
-    Format a pair of two Rust chars.
-    """
-    return "(%s,%s)" % (escape_char(pair[0]), escape_char(pair[1]))
-
-
-def generate_table(
-    name,   # type: str
-    items,  # type: List[Tuple[int, int]]
-    decl_type="&[(char, char)]",    # type: str
-    is_pub=True,                    # type: bool
-    format_item=format_char_pair,   # type: Callable[[Tuple[int, int]], str]
-):
-    # type: (...) -> Iterator[str]
-    """
-    Generate a nicely formatted Rust constant "table" array.
-
-    This generates actual Rust code.
-    """
-    pub_string = ""
-    if is_pub:
-        pub_string = "pub "
-
-    yield "\n"
-    yield "    #[rustfmt::skip]\n"
-    yield "    %sconst %s: %s = &[\n" % (pub_string, name, decl_type)
-
-    data = []
-    first = True
-    for item in items:
-        if not first:
-            data.append(",")
-        first = False
-        data.extend(format_item(item))
-
-    for table_line in generate_table_lines("".join(data).split(","), 8):
-        yield table_line
-
-    yield "\n    ];\n"
-
-
-def compute_trie(raw_data, chunk_size):
-    # type: (List[int], int) -> Tuple[List[int], List[int]]
-    """
-    Compute postfix-compressed trie.
-
-    See: bool_trie.rs for more details.
-
-    >>> compute_trie([1, 2, 3, 1, 2, 3, 4, 5, 6], 3)
-    ([0, 0, 1], [1, 2, 3, 4, 5, 6])
-    >>> compute_trie([1, 2, 3, 1, 2, 4, 4, 5, 6], 3)
-    ([0, 1, 2], [1, 2, 3, 1, 2, 4, 4, 5, 6])
-    """
-    root = []
-    childmap = {}       # type: Dict[Tuple[int, ...], int]
-    child_data = []
-
-    assert len(raw_data) % chunk_size == 0, "Chunks must be equally sized"
-
-    for i in range(len(raw_data) // chunk_size):
-        data = raw_data[i * chunk_size : (i + 1) * chunk_size]
-
-        # Postfix compression of child nodes (data chunks)
-        # (identical child nodes are shared).
-
-        # Make a tuple out of the list so it's hashable.
-        child = tuple(data)
-        if child not in childmap:
-            childmap[child] = len(childmap)
-            child_data.extend(data)
-
-        root.append(childmap[child])
-
-    return root, child_data
-
-
-def generate_bool_trie(name, codepoint_ranges, is_pub=False):
-    # type: (str, List[Tuple[int, int]], bool) -> Iterator[str]
-    """
-    Generate Rust code for BoolTrie struct.
-
-    This yields string fragments that should be joined to produce
-    the final string.
-
-    See: `bool_trie.rs`.
-    """
-    chunk_size = 64
-    rawdata = [False] * 0x110000
-    for (lo, hi) in codepoint_ranges:
-        for cp in range(lo, hi + 1):
-            rawdata[cp] = True
-
-    # Convert to bitmap chunks of `chunk_size` bits each.
-    chunks = []
-    for i in range(0x110000 // chunk_size):
-        chunk = 0
-        for j in range(chunk_size):
-            if rawdata[i * chunk_size + j]:
-                chunk |= 1 << j
-        chunks.append(chunk)
-
-    pub_string = ""
-    if is_pub:
-        pub_string = "pub "
-
-    yield "\n"
-    yield "    #[rustfmt::skip]\n"
-    yield "    %sconst %s: &super::BoolTrie = &super::BoolTrie {\n" % (pub_string, name)
-    yield "        r1: [\n"
-    data = ("0x%016x" % chunk for chunk in chunks[:0x800 // chunk_size])
-    for fragment in generate_table_lines(data, 12):
-        yield fragment
-    yield "\n        ],\n"
-
-    # 0x800..0x10000 trie
-    (r2, r3) = compute_trie(chunks[0x800 // chunk_size : 0x10000 // chunk_size], 64 // chunk_size)
-    yield "        r2: [\n"
-    data = map(str, r2)
-    for fragment in generate_table_lines(data, 12):
-        yield fragment
-    yield "\n        ],\n"
-
-    yield "        r3: &[\n"
-    data = ("0x%016x" % node for node in r3)
-    for fragment in generate_table_lines(data, 12):
-        yield fragment
-    yield "\n        ],\n"
-
-    # 0x10000..0x110000 trie
-    (mid, r6) = compute_trie(chunks[0x10000 // chunk_size : 0x110000 // chunk_size],
-                             64 // chunk_size)
-    (r4, r5) = compute_trie(mid, 64)
-
-    yield "        r4: [\n"
-    data = map(str, r4)
-    for fragment in generate_table_lines(data, 12):
-        yield fragment
-    yield "\n        ],\n"
-
-    yield "        r5: &[\n"
-    data = map(str, r5)
-    for fragment in generate_table_lines(data, 12):
-        yield fragment
-    yield "\n        ],\n"
-
-    yield "        r6: &[\n"
-    data = ("0x%016x" % node for node in r6)
-    for fragment in generate_table_lines(data, 12):
-        yield fragment
-    yield "\n        ],\n"
-
-    yield "    };\n"
-
-
-def generate_small_bool_trie(name, codepoint_ranges, is_pub=False):
-    # type: (str, List[Tuple[int, int]], bool) -> Iterator[str]
-    """
-    Generate Rust code for `SmallBoolTrie` struct.
-
-    See: `bool_trie.rs`.
-    """
-    last_chunk = max(hi // 64 for (lo, hi) in codepoint_ranges)
-    n_chunks = last_chunk + 1
-    chunks = [0] * n_chunks
-    for (lo, hi) in codepoint_ranges:
-        for cp in range(lo, hi + 1):
-            assert cp // 64 < len(chunks)
-            chunks[cp // 64] |= 1 << (cp & 63)
-
-    pub_string = ""
-    if is_pub:
-        pub_string = "pub "
-
-    yield "\n"
-    yield "    #[rustfmt::skip]\n"
-    yield ("    %sconst %s: &super::SmallBoolTrie = &super::SmallBoolTrie {\n"
-           % (pub_string, name))
-
-    (r1, r2) = compute_trie(chunks, 1)
-
-    yield "        r1: &[\n"
-    data = (str(node) for node in r1)
-    for fragment in generate_table_lines(data, 12):
-        yield fragment
-    yield "\n        ],\n"
-
-    yield "        r2: &[\n"
-    data = ("0x%016x" % node for node in r2)
-    for fragment in generate_table_lines(data, 12):
-        yield fragment
-    yield "\n        ],\n"
-
-    yield "    };\n"
-
-
-def generate_property_module(mod, grouped_categories, category_subset):
-    # type: (str, Dict[str, List[Tuple[int, int]]], Iterable[str]) -> Iterator[str]
-    """
-    Generate Rust code for module defining properties.
-    """
-
-    yield "pub(crate) mod %s {" % mod
-    for cat in sorted(category_subset):
-        if cat in ("Cc", "White_Space"):
-            generator = generate_small_bool_trie("%s_table" % cat, grouped_categories[cat])
-        else:
-            generator = generate_bool_trie("%s_table" % cat, grouped_categories[cat])
-
-        for fragment in generator:
-            yield fragment
-
-        yield "\n"
-        yield "    pub fn %s(c: char) -> bool {\n" % cat
-        yield "        %s_table.lookup(c)\n" % cat
-        yield "    }\n"
-
-    yield "}\n\n"
-
-
-def generate_conversions_module(unicode_data):
-    # type: (UnicodeData) -> Iterator[str]
-    """
-    Generate Rust code for module defining conversions.
-    """
-
-    yield "pub(crate) mod conversions {"
-    yield """
-    pub fn to_lower(c: char) -> [char; 3] {
-        match bsearch_case_table(c, to_lowercase_table) {
-            None => [c, '\\0', '\\0'],
-            Some(index) => to_lowercase_table[index].1,
-        }
-    }
-
-    pub fn to_upper(c: char) -> [char; 3] {
-        match bsearch_case_table(c, to_uppercase_table) {
-            None => [c, '\\0', '\\0'],
-            Some(index) => to_uppercase_table[index].1,
-        }
-    }
-
-    fn bsearch_case_table(c: char, table: &[(char, [char; 3])]) -> Option<usize> {
-        table.binary_search_by(|&(key, _)| key.cmp(&c)).ok()
-    }\n"""
-
-    decl_type = "&[(char, [char; 3])]"
-    format_conversion = lambda x: "({},[{},{},{}])".format(*(
-        escape_char(c) for c in (x[0], x[1][0], x[1][1], x[1][2])
-    ))
-
-    for fragment in generate_table(
-        name="to_lowercase_table",
-        items=sorted(unicode_data.to_lower.items(), key=lambda x: x[0]),
-        decl_type=decl_type,
-        is_pub=False,
-        format_item=format_conversion
-    ):
-        yield fragment
-
-    for fragment in generate_table(
-        name="to_uppercase_table",
-        items=sorted(unicode_data.to_upper.items(), key=lambda x: x[0]),
-        decl_type=decl_type,
-        is_pub=False,
-        format_item=format_conversion
-    ):
-        yield fragment
-
-    yield "}\n"
-
-
-def parse_args():
-    # type: () -> argparse.Namespace
-    """
-    Parse command line arguments.
-    """
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("-v", "--version", default=None, type=str,
-                        help="Unicode version to use (if not specified,"
-                             " defaults to latest release).")
-
-    return parser.parse_args()
-
-
-def main():
-    # type: () -> None
-    """
-    Script entry point.
-    """
-    args = parse_args()
-
-    unicode_version = fetch_files(args.version)
-    print("Using Unicode version: {}".format(unicode_version.as_str))
-
-    # All the writing happens entirely in memory, we only write to file
-    # once we have generated the file content (it's not very large, <1 MB).
-    buf = StringIO()
-    buf.write(PREAMBLE)
-
-    unicode_version_notice = textwrap.dedent("""
-    /// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
-    /// `char` and `str` methods are based on.
-    #[unstable(feature = "unicode_version", issue = "49726")]
-    pub const UNICODE_VERSION: UnicodeVersion =
-        UnicodeVersion {{ major: {v.major}, minor: {v.minor}, micro: {v.micro}, _priv: () }};
-    """).format(v=unicode_version)
-    buf.write(unicode_version_notice)
-
-    get_path = lambda f: get_unicode_file_path(unicode_version, f)
-
-    unicode_data = load_unicode_data(get_path(UnicodeFiles.UNICODE_DATA))
-    load_special_casing(get_path(UnicodeFiles.SPECIAL_CASING), unicode_data)
-
-    want_derived = {"Alphabetic", "Lowercase", "Uppercase",
-                    "Cased", "Case_Ignorable", "Grapheme_Extend"}
-    derived = load_properties(get_path(UnicodeFiles.DERIVED_CORE_PROPERTIES), want_derived)
-
-    props = load_properties(get_path(UnicodeFiles.PROPS),
-                            {"White_Space", "Join_Control", "Noncharacter_Code_Point"})
-
-    # Category tables
-    for (name, categories, category_subset) in (
-            ("general_category", unicode_data.general_categories, ["N", "Cc"]),
-            ("derived_property", derived, want_derived),
-            ("property", props, ["White_Space"])
-    ):
-        for fragment in generate_property_module(name, categories, category_subset):
-            buf.write(fragment)
-
-    for fragment in generate_conversions_module(unicode_data):
-        buf.write(fragment)
-
-    tables_rs_path = os.path.join(THIS_DIR, "tables.rs")
-
-    # Actually write out the file content.
-    # Will overwrite the file if it exists.
-    with open(tables_rs_path, "w") as fd:
-        fd.write(buf.getvalue())
-
-    print("Regenerated tables.rs.")
-
-
-if __name__ == "__main__":
-    main()
--- a/src/libcore/unicode/unicode_data.rs
+++ b/src/libcore/unicode/unicode_data.rs
--- a/src/librustc/traits/error_reporting.rs
+++ b/src/librustc/traits/error_reporting.rs
@ -2479,19 +2479,21 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
                );
                eq
            })
-            .map(|ty::GeneratorInteriorTypeCause { span, scope_span, .. }| {
-                (span, source_map.span_to_snippet(*span), scope_span)
+            .map(|ty::GeneratorInteriorTypeCause { span, scope_span, expr, .. }| {
+                (span, source_map.span_to_snippet(*span), scope_span, expr)
            });
+
        debug!(
            "maybe_note_obligation_cause_for_async_await: target_ty={:?} \
                generator_interior_types={:?} target_span={:?}",
            target_ty, tables.generator_interior_types, target_span
        );
-        if let Some((target_span, Ok(snippet), scope_span)) = target_span {
+        if let Some((target_span, Ok(snippet), scope_span, expr)) = target_span {
            self.note_obligation_cause_for_async_await(
                err,
                *target_span,
                scope_span,
+                *expr,
                snippet,
                generator_did,
                last_generator,
@ -2514,6 +2516,7 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
        err: &mut DiagnosticBuilder<'_>,
        target_span: Span,
        scope_span: &Option<Span>,
+        expr: Option<hir::HirId>,
        snippet: String,
        first_generator: DefId,
        last_generator: Option<DefId>,
@ -2549,6 +2552,7 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
        // not implemented.
        let is_send = self.tcx.is_diagnostic_item(sym::send_trait, trait_ref.def_id);
        let is_sync = self.tcx.is_diagnostic_item(sym::sync_trait, trait_ref.def_id);
+        let hir = self.tcx.hir();
        let trait_explanation = if is_send || is_sync {
            let (trait_name, trait_verb) =
                if is_send { ("`Send`", "sent") } else { ("`Sync`", "shared") };
@ -2564,8 +2568,8 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {

            let message = if let Some(name) = last_generator
                .and_then(|generator_did| self.tcx.parent(generator_did))
-                .and_then(|parent_did| self.tcx.hir().as_local_hir_id(parent_did))
-                .and_then(|parent_hir_id| self.tcx.hir().opt_name(parent_hir_id))
+                .and_then(|parent_did| hir.as_local_hir_id(parent_did))
+                .and_then(|parent_hir_id| hir.opt_name(parent_hir_id))
            {
                format!("future returned by `{}` is not {}", name, trait_name)
            } else {
@ -2581,7 +2585,7 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
        };

        // Look at the last interior type to get a span for the `.await`.
-        let await_span = tables.generator_interior_types.iter().map(|i| i.span).last().unwrap();
+        let await_span = tables.generator_interior_types.iter().map(|t| t.span).last().unwrap();
        let mut span = MultiSpan::from_span(await_span);
        span.push_span_label(
            await_span,
@ -2606,6 +2610,22 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
            ),
        );

+        if let Some(expr_id) = expr {
+            let expr = hir.expect_expr(expr_id);
+            let is_ref = tables.expr_adjustments(expr).iter().any(|adj| adj.is_region_borrow());
+            let parent = hir.get_parent_node(expr_id);
+            if let Some(hir::Node::Expr(e)) = hir.find(parent) {
+                let method_span = hir.span(parent);
+                if tables.is_method_call(e) && is_ref {
+                    err.span_help(
+                        method_span,
+                        "consider moving this method call into a `let` \
+                        binding to create a shorter lived borrow",
+                    );
+                }
+            }
+        }
+
        // Add a note for the item obligation that remains - normally a note pointing to the
        // bound that introduced the obligation (e.g. `T: Send`).
        debug!("note_obligation_cause_for_async_await: next_code={:?}", next_code);
--- a/src/librustc/ty/adjustment.rs
+++ b/src/librustc/ty/adjustment.rs
@ -81,6 +81,15 @@ pub struct Adjustment<'tcx> {
    pub target: Ty<'tcx>,
 }

+impl Adjustment<'tcx> {
+    pub fn is_region_borrow(&self) -> bool {
+        match self.kind {
+            Adjust::Borrow(AutoBorrow::Ref(..)) => true,
+            _ => false,
+        }
+    }
+}
+
 #[derive(Clone, Debug, RustcEncodable, RustcDecodable, HashStable, TypeFoldable)]
 pub enum Adjust<'tcx> {
    /// Go from ! to any type.
--- a/src/librustc/ty/context.rs
+++ b/src/librustc/ty/context.rs
@ -315,8 +315,7 @@ pub struct ResolvedOpaqueTy<'tcx> {
 ///
 /// Here, we would store the type `T`, the span of the value `x`, and the "scope-span" for
 /// the scope that contains `x`.
-#[derive(RustcEncodable, RustcDecodable, Clone, Debug, Eq, Hash, PartialEq)]
-#[derive(HashStable, TypeFoldable)]
+#[derive(RustcEncodable, RustcDecodable, Clone, Debug, Eq, Hash, PartialEq, HashStable)]
 pub struct GeneratorInteriorTypeCause<'tcx> {
    /// Type of the captured binding.
    pub ty: Ty<'tcx>,
@ -324,6 +323,8 @@ pub struct GeneratorInteriorTypeCause<'tcx> {
    pub span: Span,
    /// Span of the scope of the captured binding.
    pub scope_span: Option<Span>,
+    /// Expr which the type evaluated from.
+    pub expr: Option<hir::HirId>,
 }

 #[derive(RustcEncodable, RustcDecodable, Debug)]
@ -436,7 +437,7 @@ pub struct TypeckTables<'tcx> {
    /// entire variable.
    pub upvar_list: ty::UpvarListMap,

-    /// Stores the type, span and optional scope span of all types
+    /// Stores the type, expression, span and optional scope span of all types
    /// that are live across the yield of this generator (if a generator).
    pub generator_interior_types: Vec<GeneratorInteriorTypeCause<'tcx>>,
 }
--- a/src/librustc_driver/pretty.rs
+++ b/src/librustc_driver/pretty.rs
@ -429,7 +429,6 @@ pub fn print_after_hir_lowering<'tcx>(
        PpmSource(s) => {
            // Silently ignores an identified node.
            let out = &mut out;
-            let src = src.clone();
            call_with_pp_support(&s, tcx.sess, Some(tcx), move |annotation| {
                debug!("pretty printing source code {:?}", s);
                let sess = annotation.sess();
@ -447,7 +446,6 @@ pub fn print_after_hir_lowering<'tcx>(

        PpmHir(s) => {
            let out = &mut out;
-            let src = src.clone();
            call_with_pp_support_hir(&s, tcx, move |annotation, krate| {
                debug!("pretty printing source code {:?}", s);
                let sess = annotation.sess();
--- a/src/librustc_parse/parser/ty.rs
+++ b/src/librustc_parse/parser/ty.rs
@ -500,7 +500,7 @@ impl<'a> Parser<'a> {
            err.span_suggestion_short(
                lo.to(self.prev_span),
                "remove the parentheses",
-                snippet.to_owned(),
+                snippet,
                Applicability::MachineApplicable,
            );
        }
--- a/src/librustc_resolve/imports.rs
+++ b/src/librustc_resolve/imports.rs
@ -718,7 +718,7 @@ impl<'a, 'b> ImportResolver<'a, 'b> {
        }

        if !errors.is_empty() {
-            self.throw_unresolved_import_error(errors.clone(), None);
+            self.throw_unresolved_import_error(errors, None);
        }
    }

--- a/src/librustc_typeck/check/generator_interior.rs
+++ b/src/librustc_typeck/check/generator_interior.rs
@ -97,6 +97,7 @@ impl<'a, 'tcx> InteriorVisitor<'a, 'tcx> {
                        span: source_span,
                        ty: &ty,
                        scope_span,
+                        expr: expr.map(|e| e.hir_id),
                    })
                    .or_insert(entries);
            }
@ -164,17 +165,25 @@ pub fn resolve_interior<'a, 'tcx>(
    // which means that none of the regions inside relate to any other, even if
    // typeck had previously found constraints that would cause them to be related.
    let mut counter = 0;
-    let types = fcx.tcx.fold_regions(&types, &mut false, |_, current_depth| {
+    let fold_types: Vec<_> = types.iter().map(|(t, _)| t.ty).collect();
+    let folded_types = fcx.tcx.fold_regions(&fold_types, &mut false, |_, current_depth| {
        counter += 1;
        fcx.tcx.mk_region(ty::ReLateBound(current_depth, ty::BrAnon(counter)))
    });

    // Store the generator types and spans into the tables for this generator.
-    let interior_types = types.iter().map(|t| t.0.clone()).collect::<Vec<_>>();
-    visitor.fcx.inh.tables.borrow_mut().generator_interior_types = interior_types;
+    let types = types
+        .into_iter()
+        .zip(&folded_types)
+        .map(|((mut interior_cause, _), ty)| {
+            interior_cause.ty = ty;
+            interior_cause
+        })
+        .collect();
+    visitor.fcx.inh.tables.borrow_mut().generator_interior_types = types;

    // Extract type components
-    let type_list = fcx.tcx.mk_type_list(types.into_iter().map(|t| (t.0).ty));
+    let type_list = fcx.tcx.mk_type_list(folded_types.iter());

    let witness = fcx.tcx.mk_generator_witness(ty::Binder::bind(type_list));

--- a/src/librustc_typeck/collect.rs
+++ b/src/librustc_typeck/collect.rs
@ -1806,6 +1806,16 @@ fn find_opaque_ty_constraints(tcx: TyCtxt<'_>, def_id: DefId) -> Ty<'_> {
    }
 }

+fn are_suggestable_generic_args(generic_args: &[hir::GenericArg<'_>]) -> bool {
+    generic_args
+        .iter()
+        .filter_map(|arg| match arg {
+            hir::GenericArg::Type(ty) => Some(ty),
+            _ => None,
+        })
+        .any(is_suggestable_infer_ty)
+}
+
 /// Whether `ty` is a type with `_` placeholders that can be infered. Used in diagnostics only to
 /// use inference to provide suggestions for the appropriate type if possible.
 fn is_suggestable_infer_ty(ty: &hir::Ty<'_>) -> bool {
@ -1815,13 +1825,16 @@ fn is_suggestable_infer_ty(ty: &hir::Ty<'_>) -> bool {
        Slice(ty) | Array(ty, _) => is_suggestable_infer_ty(ty),
        Tup(tys) => tys.iter().any(is_suggestable_infer_ty),
        Ptr(mut_ty) | Rptr(_, mut_ty) => is_suggestable_infer_ty(mut_ty.ty),
-        Def(_, generic_args) => generic_args
-            .iter()
-            .filter_map(|arg| match arg {
-                hir::GenericArg::Type(ty) => Some(ty),
-                _ => None,
-            })
-            .any(is_suggestable_infer_ty),
+        Def(_, generic_args) => are_suggestable_generic_args(generic_args),
+        Path(hir::QPath::TypeRelative(ty, segment)) => {
+            is_suggestable_infer_ty(ty) || are_suggestable_generic_args(segment.generic_args().args)
+        }
+        Path(hir::QPath::Resolved(ty_opt, hir::Path { segments, .. })) => {
+            ty_opt.map_or(false, is_suggestable_infer_ty)
+                || segments
+                    .iter()
+                    .any(|segment| are_suggestable_generic_args(segment.generic_args().args))
+        }
        _ => false,
    }
 }
--- a/src/librustdoc/test.rs
+++ b/src/librustdoc/test.rs
@ -704,7 +704,7 @@ impl Tester for Collector {
        debug!("creating test {}: {}", name, test);
        self.tests.push(testing::TestDescAndFn {
            desc: testing::TestDesc {
-                name: testing::DynTestName(name.clone()),
+                name: testing::DynTestName(name),
                ignore: match config.ignore {
                    Ignore::All => true,
                    Ignore::None => false,
--- a/src/libtest/lib.rs
+++ b/src/libtest/lib.rs
@ -553,7 +553,7 @@ fn run_test_in_process(
        Err(e) => calc_result(&desc, Err(e.as_ref()), &time_opts, &exec_time),
    };
    let stdout = data.lock().unwrap().to_vec();
-    let message = CompletedTest::new(desc.clone(), test_result, exec_time, stdout);
+    let message = CompletedTest::new(desc, test_result, exec_time, stdout);
    monitor_ch.send(message).unwrap();
 }

@ -602,7 +602,7 @@ fn spawn_test_subprocess(
        (result, test_output, exec_time)
    })();

-    let message = CompletedTest::new(desc.clone(), result, exec_time, test_output);
+    let message = CompletedTest::new(desc, result, exec_time, test_output);
    monitor_ch.send(message).unwrap();
 }

--- a/src/test/mir-opt/inline/inline-into-box-place.rs
+++ b/src/test/mir-opt/inline/inline-into-box-place.rs
@ -1,5 +1,6 @@
 // ignore-tidy-linelength
 // ignore-wasm32-bare compiled with panic=abort by default
+// compile-flags: -Z mir-opt-level=3
 #![feature(box_syntax)]

 fn main() {
--- a/src/test/ui/associated-types/issue-64848.rs
+++ b/src/test/ui/associated-types/issue-64848.rs
@ -0,0 +1,29 @@
+// build-pass
+
+trait AssociatedConstant {
+    const DATA: ();
+}
+
+impl<F, T> AssociatedConstant for F
+where
+    F: FnOnce() -> T,
+    T: AssociatedConstant,
+{
+    const DATA: () = T::DATA;
+}
+
+impl AssociatedConstant for () {
+    const DATA: () = ();
+}
+
+fn foo() -> impl AssociatedConstant {
+    ()
+}
+
+fn get_data<T: AssociatedConstant>(_: T) -> &'static () {
+    &T::DATA
+}
+
+fn main() {
+    get_data(foo);
+}
--- a/src/test/ui/async-await/issue-64130-4-async-move.stderr
+++ b/src/test/ui/async-await/issue-64130-4-async-move.stderr
@ -16,6 +16,11 @@ LL |                 let _x = get().await;
 ...
 LL |     }
   |     - `client` is later dropped here
+help: consider moving this method call into a `let` binding to create a shorter lived borrow
+  --> $DIR/issue-64130-4-async-move.rs:19:15
+   |
+LL |         match client.status() {
+   |               ^^^^^^^^^^^^^^^
   = note: the return type of a function must have a statically known size

 error: aborting due to previous error
--- a/src/test/ui/generator/not-send-sync.stderr
+++ b/src/test/ui/generator/not-send-sync.stderr
@ -20,7 +20,7 @@ LL |     fn assert_sync<T: Sync>(_: T) {}
 LL |     assert_sync(|| {
   |     ^^^^^^^^^^^ future returned by `main` is not `Sync`
   |
-   = help: within `[generator@$DIR/not-send-sync.rs:9:17: 13:6 {std::cell::Cell<i32>, ()}]`, the trait `std::marker::Sync` is not implemented for `std::cell::Cell<i32>`
+   = help: within `[generator@$DIR/not-send-sync.rs:9:17: 13:6 {std::cell::Cell<i32>, (), ()}]`, the trait `std::marker::Sync` is not implemented for `std::cell::Cell<i32>`
 note: future is not `Sync` as this value is used across an yield
  --> $DIR/not-send-sync.rs:12:9
   |
--- a/src/test/ui/impl-trait/recursive-impl-trait-type-indirect.stderr
+++ b/src/test/ui/impl-trait/recursive-impl-trait-type-indirect.stderr
@ -76,7 +76,7 @@ error[E0720]: opaque type expands to a recursive type
 LL | fn generator_capture() -> impl Sized {
   |                           ^^^^^^^^^^ expands to a recursive type
   |
-   = note: expanded type is `[generator@$DIR/recursive-impl-trait-type-indirect.rs:50:5: 50:26 x:impl Sized {()}]`
+   = note: expanded type is `[generator@$DIR/recursive-impl-trait-type-indirect.rs:50:5: 50:26 x:impl Sized {(), ()}]`

 error[E0720]: opaque type expands to a recursive type
  --> $DIR/recursive-impl-trait-type-indirect.rs:53:26
@ -92,7 +92,7 @@ error[E0720]: opaque type expands to a recursive type
 LL | fn generator_hold() -> impl Sized {
   |                        ^^^^^^^^^^ expands to a recursive type
   |
-   = note: expanded type is `[generator@$DIR/recursive-impl-trait-type-indirect.rs:58:5: 62:6 {impl Sized, ()}]`
+   = note: expanded type is `[generator@$DIR/recursive-impl-trait-type-indirect.rs:58:5: 62:6 {impl Sized, (), ()}]`

 error[E0720]: opaque type expands to a recursive type
  --> $DIR/recursive-impl-trait-type-indirect.rs:69:26
--- a/src/test/ui/issues/issue-66473.rs
+++ b/src/test/ui/issues/issue-66473.rs
--- a/src/test/ui/issues/issue-66473.stderr
+++ b/src/test/ui/issues/issue-66473.stderr
--- a/src/test/ui/type-alias-impl-trait/issue-65918.rs
+++ b/src/test/ui/type-alias-impl-trait/issue-65918.rs
@ -0,0 +1,49 @@
+// build-pass
+
+#![feature(type_alias_impl_trait)]
+
+use std::marker::PhantomData;
+
+/* copied Index and TryFrom for convinience (and simplicity) */
+trait MyIndex<T> {
+    type O;
+    fn my_index(self) -> Self::O;
+}
+trait MyFrom<T>: Sized {
+    type Error;
+    fn my_from(value: T) -> Result<Self, Self::Error>;
+}
+
+/* MCVE starts here */
+trait F {}
+impl F for () {}
+type DummyT<T> = impl F;
+fn _dummy_t<T>() -> DummyT<T> {}
+
+struct Phantom1<T>(PhantomData<T>);
+struct Phantom2<T>(PhantomData<T>);
+struct Scope<T>(Phantom2<DummyT<T>>);
+
+impl<T> Scope<T> {
+    fn new() -> Self {
+        unimplemented!()
+    }
+}
+
+impl<T> MyFrom<Phantom2<T>> for Phantom1<T> {
+    type Error = ();
+    fn my_from(_: Phantom2<T>) -> Result<Self, Self::Error> {
+        unimplemented!()
+    }
+}
+
+impl<T: MyFrom<Phantom2<DummyT<U>>>, U> MyIndex<Phantom1<T>> for Scope<U> {
+    type O = T;
+    fn my_index(self) -> Self::O {
+        MyFrom::my_from(self.0).ok().unwrap()
+    }
+}
+
+fn main() {
+    let _pos: Phantom1<DummyT<()>> = Scope::new().my_index();
+}
--- a/src/test/ui/typeck/typeck_type_placeholder_item.rs
+++ b/src/test/ui/typeck/typeck_type_placeholder_item.rs
@ -68,6 +68,13 @@ struct Test10 {
 }

 pub fn main() {
+    static A = 42;
+    //~^ ERROR missing type for `static` item
+    static B: _ = 42;
+    //~^ ERROR the type placeholder `_` is not allowed within types on item signatures
+    static C: Option<_> = Some(42);
+    //~^ ERROR the type placeholder `_` is not allowed within types on item signatures
+
    fn fn_test() -> _ { 5 }
    //~^ ERROR the type placeholder `_` is not allowed within types on item signatures

--- a/src/test/ui/typeck/typeck_type_placeholder_item.stderr
+++ b/src/test/ui/typeck/typeck_type_placeholder_item.stderr
@ -1,35 +1,35 @@
 error: expected identifier, found reserved identifier `_`
-  --> $DIR/typeck_type_placeholder_item.rs:146:18
+  --> $DIR/typeck_type_placeholder_item.rs:153:18
   |
 LL | struct BadStruct<_>(_);
   |                  ^ expected identifier, found reserved identifier

 error: expected identifier, found reserved identifier `_`
-  --> $DIR/typeck_type_placeholder_item.rs:149:16
+  --> $DIR/typeck_type_placeholder_item.rs:156:16
   |
 LL | trait BadTrait<_> {}
   |                ^ expected identifier, found reserved identifier

 error: expected identifier, found reserved identifier `_`
-  --> $DIR/typeck_type_placeholder_item.rs:159:19
+  --> $DIR/typeck_type_placeholder_item.rs:166:19
   |
 LL | struct BadStruct1<_, _>(_);
   |                   ^ expected identifier, found reserved identifier

 error: expected identifier, found reserved identifier `_`
-  --> $DIR/typeck_type_placeholder_item.rs:159:22
+  --> $DIR/typeck_type_placeholder_item.rs:166:22
   |
 LL | struct BadStruct1<_, _>(_);
   |                      ^ expected identifier, found reserved identifier

 error: expected identifier, found reserved identifier `_`
-  --> $DIR/typeck_type_placeholder_item.rs:164:19
+  --> $DIR/typeck_type_placeholder_item.rs:171:19
   |
 LL | struct BadStruct2<_, T>(_, T);
   |                   ^ expected identifier, found reserved identifier

 error[E0403]: the name `_` is already used for a generic parameter in this item's generic parameters
-  --> $DIR/typeck_type_placeholder_item.rs:159:22
+  --> $DIR/typeck_type_placeholder_item.rs:166:22
   |
 LL | struct BadStruct1<_, _>(_);
   |                   -  ^ already used
@ -177,8 +177,29 @@ LL |
 LL |     b: (T, T),
   |

+error: missing type for `static` item
+  --> $DIR/typeck_type_placeholder_item.rs:71:12
+   |
+LL |     static A = 42;
+   |            ^ help: provide a type for the item: `A: i32`
+
 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:71:21
+  --> $DIR/typeck_type_placeholder_item.rs:73:15
+   |
+LL |     static B: _ = 42;
+   |               ^
+   |               |
+   |               not allowed in type signatures
+   |               help: replace `_` with the correct type: `i32`
+
+error[E0121]: the type placeholder `_` is not allowed within types on item signatures
+  --> $DIR/typeck_type_placeholder_item.rs:75:15
+   |
+LL |     static C: Option<_> = Some(42);
+   |               ^^^^^^^^^ not allowed in type signatures
+
+error[E0121]: the type placeholder `_` is not allowed within types on item signatures
+  --> $DIR/typeck_type_placeholder_item.rs:78:21
   |
 LL |     fn fn_test() -> _ { 5 }
   |                     ^
@ -187,7 +208,7 @@ LL |     fn fn_test() -> _ { 5 }
   |                     help: replace with the correct return type: `i32`

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:74:23
+  --> $DIR/typeck_type_placeholder_item.rs:81:23
   |
 LL |     fn fn_test2() -> (_, _) { (5, 5) }
   |                      -^--^-
@ -197,7 +218,7 @@ LL |     fn fn_test2() -> (_, _) { (5, 5) }
   |                      help: replace with the correct return type: `(i32, i32)`

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:77:22
+  --> $DIR/typeck_type_placeholder_item.rs:84:22
   |
 LL |     static FN_TEST3: _ = "test";
   |                      ^
@ -206,7 +227,7 @@ LL |     static FN_TEST3: _ = "test";
   |                      help: replace `_` with the correct type: `&'static str`

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:80:22
+  --> $DIR/typeck_type_placeholder_item.rs:87:22
   |
 LL |     static FN_TEST4: _ = 145;
   |                      ^
@ -215,13 +236,13 @@ LL |     static FN_TEST4: _ = 145;
   |                      help: replace `_` with the correct type: `i32`

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:83:22
+  --> $DIR/typeck_type_placeholder_item.rs:90:22
   |
 LL |     static FN_TEST5: (_, _) = (1, 2);
   |                      ^^^^^^ not allowed in type signatures

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:86:20
+  --> $DIR/typeck_type_placeholder_item.rs:93:20
   |
 LL |     fn fn_test6(_: _) { }
   |                    ^ not allowed in type signatures
@ -232,7 +253,7 @@ LL |     fn fn_test6<T>(_: T) { }
   |                ^^^    ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:89:20
+  --> $DIR/typeck_type_placeholder_item.rs:96:20
   |
 LL |     fn fn_test7(x: _) { let _x: usize = x; }
   |                    ^ not allowed in type signatures
@ -243,13 +264,13 @@ LL |     fn fn_test7<T>(x: T) { let _x: usize = x; }
   |                ^^^    ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:92:29
+  --> $DIR/typeck_type_placeholder_item.rs:99:29
   |
 LL |     fn fn_test8(_f: fn() -> _) { }
   |                             ^ not allowed in type signatures

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:92:29
+  --> $DIR/typeck_type_placeholder_item.rs:99:29
   |
 LL |     fn fn_test8(_f: fn() -> _) { }
   |                             ^ not allowed in type signatures
@ -260,7 +281,7 @@ LL |     fn fn_test8<T>(_f: fn() -> T) { }
   |                ^^^             ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:115:12
+  --> $DIR/typeck_type_placeholder_item.rs:122:12
   |
 LL |         a: _,
   |            ^ not allowed in type signatures
@ -279,13 +300,13 @@ LL |         b: (T, T),
   |

 error[E0282]: type annotations needed
-  --> $DIR/typeck_type_placeholder_item.rs:120:27
+  --> $DIR/typeck_type_placeholder_item.rs:127:27
   |
 LL |     fn fn_test11(_: _) -> (_, _) { panic!() }
   |                           ^^^^^^ cannot infer type

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:120:28
+  --> $DIR/typeck_type_placeholder_item.rs:127:28
   |
 LL |     fn fn_test11(_: _) -> (_, _) { panic!() }
   |                            ^  ^ not allowed in type signatures
@ -293,7 +314,7 @@ LL |     fn fn_test11(_: _) -> (_, _) { panic!() }
   |                            not allowed in type signatures

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:124:30
+  --> $DIR/typeck_type_placeholder_item.rs:131:30
   |
 LL |     fn fn_test12(x: i32) -> (_, _) { (x, x) }
   |                             -^--^-
@ -303,7 +324,7 @@ LL |     fn fn_test12(x: i32) -> (_, _) { (x, x) }
   |                             help: replace with the correct return type: `(i32, i32)`

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:127:33
+  --> $DIR/typeck_type_placeholder_item.rs:134:33
   |
 LL |     fn fn_test13(x: _) -> (i32, _) { (x, x) }
   |                           ------^-
@ -312,7 +333,7 @@ LL |     fn fn_test13(x: _) -> (i32, _) { (x, x) }
   |                           help: replace with the correct return type: `(i32, i32)`

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:146:21
+  --> $DIR/typeck_type_placeholder_item.rs:153:21
   |
 LL | struct BadStruct<_>(_);
   |                     ^ not allowed in type signatures
@ -323,7 +344,7 @@ LL | struct BadStruct<T>(T);
   |                  ^  ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:151:15
+  --> $DIR/typeck_type_placeholder_item.rs:158:15
   |
 LL | impl BadTrait<_> for BadStruct<_> {}
   |               ^                ^ not allowed in type signatures
@ -336,13 +357,13 @@ LL | impl<T> BadTrait<T> for BadStruct<T> {}
   |     ^^^          ^                ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:154:34
+  --> $DIR/typeck_type_placeholder_item.rs:161:34
   |
 LL | fn impl_trait() -> impl BadTrait<_> {
   |                                  ^ not allowed in type signatures

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:159:25
+  --> $DIR/typeck_type_placeholder_item.rs:166:25
   |
 LL | struct BadStruct1<_, _>(_);
   |                         ^ not allowed in type signatures
@ -353,7 +374,7 @@ LL | struct BadStruct1<T, _>(T);
   |                   ^     ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:164:25
+  --> $DIR/typeck_type_placeholder_item.rs:171:25
   |
 LL | struct BadStruct2<_, T>(_, T);
   |                         ^ not allowed in type signatures
@ -364,7 +385,7 @@ LL | struct BadStruct2<K, T>(K, T);
   |                   ^     ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:168:14
+  --> $DIR/typeck_type_placeholder_item.rs:175:14
   |
 LL | type X = Box<_>;
   |              ^ not allowed in type signatures
@ -381,7 +402,7 @@ LL |     fn test10<T>(&self, _x : T) { }
   |              ^^^             ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:132:31
+  --> $DIR/typeck_type_placeholder_item.rs:139:31
   |
 LL |     fn method_test1(&self, x: _);
   |                               ^ not allowed in type signatures
@ -392,7 +413,7 @@ LL |     fn method_test1<T>(&self, x: T);
   |                    ^^^           ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:134:31
+  --> $DIR/typeck_type_placeholder_item.rs:141:31
   |
 LL |     fn method_test2(&self, x: _) -> _;
   |                               ^     ^ not allowed in type signatures
@ -405,7 +426,7 @@ LL |     fn method_test2<T>(&self, x: T) -> T;
   |                    ^^^           ^     ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:136:31
+  --> $DIR/typeck_type_placeholder_item.rs:143:31
   |
 LL |     fn method_test3(&self) -> _;
   |                               ^ not allowed in type signatures
@ -416,7 +437,7 @@ LL |     fn method_test3<T>(&self) -> T;
   |                    ^^^           ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:138:26
+  --> $DIR/typeck_type_placeholder_item.rs:145:26
   |
 LL |     fn assoc_fn_test1(x: _);
   |                          ^ not allowed in type signatures
@ -427,7 +448,7 @@ LL |     fn assoc_fn_test1<T>(x: T);
   |                      ^^^    ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:140:26
+  --> $DIR/typeck_type_placeholder_item.rs:147:26
   |
 LL |     fn assoc_fn_test2(x: _) -> _;
   |                          ^     ^ not allowed in type signatures
@ -440,7 +461,7 @@ LL |     fn assoc_fn_test2<T>(x: T) -> T;
   |                      ^^^    ^     ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:142:28
+  --> $DIR/typeck_type_placeholder_item.rs:149:28
   |
 LL |     fn assoc_fn_test3() -> _;
   |                            ^ not allowed in type signatures
@ -462,7 +483,7 @@ LL |     fn clone_from<T>(&mut self, other: T) { *self = Test9; }
   |                  ^^^                   ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:102:34
+  --> $DIR/typeck_type_placeholder_item.rs:109:34
   |
 LL |         fn fn_test10(&self, _x : _) { }
   |                                  ^ not allowed in type signatures
@ -473,7 +494,7 @@ LL |         fn fn_test10<T>(&self, _x : T) { }
   |                     ^^^             ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:110:41
+  --> $DIR/typeck_type_placeholder_item.rs:117:41
   |
 LL |         fn clone_from(&mut self, other: _) { *self = FnTest9; }
   |                                         ^ not allowed in type signatures
@ -484,7 +505,7 @@ LL |         fn clone_from<T>(&mut self, other: T) { *self = FnTest9; }
   |                      ^^^                   ^

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:174:21
+  --> $DIR/typeck_type_placeholder_item.rs:181:21
   |
 LL | type Y = impl Trait<_>;
   |                     ^ not allowed in type signatures
@ -508,7 +529,7 @@ LL |     fn clone(&self) -> _ { Test9 }
   |                        help: replace with the correct return type: `Test9`

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:99:31
+  --> $DIR/typeck_type_placeholder_item.rs:106:31
   |
 LL |         fn fn_test9(&self) -> _ { () }
   |                               ^
@ -517,7 +538,7 @@ LL |         fn fn_test9(&self) -> _ { () }
   |                               help: replace with the correct return type: `()`

 error[E0121]: the type placeholder `_` is not allowed within types on item signatures
-  --> $DIR/typeck_type_placeholder_item.rs:107:28
+  --> $DIR/typeck_type_placeholder_item.rs:114:28
   |
 LL |         fn clone(&self) -> _ { FnTest9 }
   |                            ^
@ -525,7 +546,7 @@ LL |         fn clone(&self) -> _ { FnTest9 }
   |                            not allowed in type signatures
   |                            help: replace with the correct return type: `main::FnTest9`

-error: aborting due to 55 previous errors
+error: aborting due to 58 previous errors

 Some errors have detailed explanations: E0121, E0282, E0403.
 For more information about an error, try `rustc --explain E0121`.
--- a/src/tools/unicode-table-generator/Cargo.toml
+++ b/src/tools/unicode-table-generator/Cargo.toml
@ -0,0 +1,10 @@
+[package]
+name = "unicode-bdd"
+version = "0.1.0"
+authors = ["Mark Rousskov <mark.simulacrum@gmail.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+ucd-parse = "0.1.3"
--- a/src/tools/unicode-table-generator/src/case_mapping.rs
+++ b/src/tools/unicode-table-generator/src/case_mapping.rs
@ -0,0 +1,62 @@
+use crate::{fmt_list, UnicodeData};
+use std::fmt;
+
+pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String {
+    let mut file = String::new();
+
+    file.push_str(HEADER.trim_start());
+
+    let decl_type = "&[(char, [char; 3])]";
+
+    file.push_str(&format!(
+        "static LOWERCASE_TABLE: {} = &[{}];",
+        decl_type,
+        fmt_list(data.to_lower.iter().map(to_mapping))
+    ));
+    file.push_str("\n\n");
+    file.push_str(&format!(
+        "static UPPERCASE_TABLE: {} = &[{}];",
+        decl_type,
+        fmt_list(data.to_upper.iter().map(to_mapping))
+    ));
+    file
+}
+
+fn to_mapping((key, (a, b, c)): (&u32, &(u32, u32, u32))) -> (CharEscape, [CharEscape; 3]) {
+    (
+        CharEscape(std::char::from_u32(*key).unwrap()),
+        [
+            CharEscape(std::char::from_u32(*a).unwrap()),
+            CharEscape(std::char::from_u32(*b).unwrap()),
+            CharEscape(std::char::from_u32(*c).unwrap()),
+        ],
+    )
+}
+
+struct CharEscape(char);
+
+impl fmt::Debug for CharEscape {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "'{}'", self.0.escape_default())
+    }
+}
+
+static HEADER: &str = "
+pub fn to_lower(c: char) -> [char; 3] {
+    match bsearch_case_table(c, LOWERCASE_TABLE) {
+        None => [c, '\\0', '\\0'],
+        Some(index) => LOWERCASE_TABLE[index].1,
+    }
+}
+
+pub fn to_upper(c: char) -> [char; 3] {
+    match bsearch_case_table(c, UPPERCASE_TABLE) {
+        None => [c, '\\0', '\\0'],
+        Some(index) => UPPERCASE_TABLE[index].1,
+    }
+}
+
+fn bsearch_case_table(c: char, table: &[(char, [char; 3])]) -> Option<usize> {
+    table.binary_search_by(|&(key, _)| key.cmp(&c)).ok()
+}
+";
--- a/src/tools/unicode-table-generator/src/main.rs
+++ b/src/tools/unicode-table-generator/src/main.rs
@ -0,0 +1,261 @@
+use std::collections::{BTreeMap, HashMap};
+use std::ops::Range;
+use ucd_parse::Codepoints;
+
+mod case_mapping;
+mod raw_emitter;
+mod unicode_download;
+
+use raw_emitter::{emit_codepoints, RawEmitter};
+
+static PROPERTIES: &[&str] = &[
+    "Alphabetic",
+    "Lowercase",
+    "Uppercase",
+    "Cased",
+    "Case_Ignorable",
+    "Grapheme_Extend",
+    "White_Space",
+    "Cc",
+    "N",
+];
+
+struct UnicodeData {
+    ranges: Vec<(&'static str, Vec<Range<u32>>)>,
+    to_upper: BTreeMap<u32, (u32, u32, u32)>,
+    to_lower: BTreeMap<u32, (u32, u32, u32)>,
+}
+
+fn to_mapping(origin: u32, codepoints: Vec<ucd_parse::Codepoint>) -> Option<(u32, u32, u32)> {
+    let mut a = None;
+    let mut b = None;
+    let mut c = None;
+
+    for codepoint in codepoints {
+        if origin == codepoint.value() {
+            return None;
+        }
+
+        if a.is_none() {
+            a = Some(codepoint.value());
+        } else if b.is_none() {
+            b = Some(codepoint.value());
+        } else if c.is_none() {
+            c = Some(codepoint.value());
+        } else {
+            panic!("more than 3 mapped codepoints")
+        }
+    }
+
+    Some((a.unwrap(), b.unwrap_or(0), c.unwrap_or(0)))
+}
+
+static UNICODE_DIRECTORY: &str = "unicode-downloads";
+
+fn load_data() -> UnicodeData {
+    unicode_download::fetch_latest();
+
+    let mut properties = HashMap::new();
+    for row in ucd_parse::parse::<_, ucd_parse::CoreProperty>(&UNICODE_DIRECTORY).unwrap() {
+        if let Some(name) = PROPERTIES.iter().find(|prop| **prop == row.property.as_str()) {
+            properties.entry(*name).or_insert_with(Vec::new).push(row.codepoints);
+        }
+    }
+    for row in ucd_parse::parse::<_, ucd_parse::Property>(&UNICODE_DIRECTORY).unwrap() {
+        if let Some(name) = PROPERTIES.iter().find(|prop| **prop == row.property.as_str()) {
+            properties.entry(*name).or_insert_with(Vec::new).push(row.codepoints);
+        }
+    }
+
+    let mut to_lower = BTreeMap::new();
+    let mut to_upper = BTreeMap::new();
+    for row in ucd_parse::UnicodeDataExpander::new(
+        ucd_parse::parse::<_, ucd_parse::UnicodeData>(&UNICODE_DIRECTORY).unwrap(),
+    ) {
+        let general_category = if ["Nd", "Nl", "No"].contains(&row.general_category.as_str()) {
+            "N"
+        } else {
+            row.general_category.as_str()
+        };
+        if let Some(name) = PROPERTIES.iter().find(|prop| **prop == general_category) {
+            properties
+                .entry(*name)
+                .or_insert_with(Vec::new)
+                .push(Codepoints::Single(row.codepoint));
+        }
+
+        if let Some(mapped) = row.simple_lowercase_mapping {
+            if mapped != row.codepoint {
+                to_lower.insert(row.codepoint.value(), (mapped.value(), 0, 0));
+            }
+        }
+        if let Some(mapped) = row.simple_uppercase_mapping {
+            if mapped != row.codepoint {
+                to_upper.insert(row.codepoint.value(), (mapped.value(), 0, 0));
+            }
+        }
+    }
+
+    for row in ucd_parse::parse::<_, ucd_parse::SpecialCaseMapping>(&UNICODE_DIRECTORY).unwrap() {
+        if !row.conditions.is_empty() {
+            // Skip conditional case mappings
+            continue;
+        }
+
+        let key = row.codepoint.value();
+        if let Some(lower) = to_mapping(key, row.lowercase) {
+            to_lower.insert(key, lower);
+        }
+        if let Some(upper) = to_mapping(key, row.uppercase) {
+            to_upper.insert(key, upper);
+        }
+    }
+
+    let mut properties: HashMap<&'static str, Vec<Range<u32>>> = properties
+        .into_iter()
+        .map(|(k, v)| {
+            (
+                k,
+                v.into_iter()
+                    .flat_map(|codepoints| match codepoints {
+                        Codepoints::Single(c) => c
+                            .scalar()
+                            .map(|ch| (ch as u32..ch as u32 + 1))
+                            .into_iter()
+                            .collect::<Vec<_>>(),
+                        Codepoints::Range(c) => c
+                            .into_iter()
+                            .flat_map(|c| c.scalar().map(|ch| (ch as u32..ch as u32 + 1)))
+                            .collect::<Vec<_>>(),
+                    })
+                    .collect::<Vec<Range<u32>>>(),
+            )
+        })
+        .collect();
+
+    for ranges in properties.values_mut() {
+        merge_ranges(ranges);
+    }
+
+    let mut properties = properties.into_iter().collect::<Vec<_>>();
+    properties.sort_by_key(|p| p.0);
+    UnicodeData { ranges: properties, to_lower, to_upper }
+}
+
+fn main() {
+    let write_location = std::env::args().nth(1).unwrap_or_else(|| {
+        eprintln!("Must provide path to write unicode tables to");
+        eprintln!(
+            "e.g. {} src/libcore/unicode/unicode_data.rs",
+            std::env::args().nth(0).unwrap_or_default()
+        );
+        std::process::exit(1);
+    });
+
+    let unicode_data = load_data();
+    let ranges_by_property = &unicode_data.ranges;
+
+    let mut total_bytes = 0;
+    let mut modules = Vec::new();
+    for (property, ranges) in ranges_by_property {
+        let datapoints = ranges.iter().map(|r| r.end - r.start).sum::<u32>();
+        let mut emitter = RawEmitter::new();
+        emit_codepoints(&mut emitter, &ranges);
+
+        modules.push((property.to_lowercase().to_string(), emitter.file));
+        println!("{:15}: {} bytes, {} codepoints", property, emitter.bytes_used, datapoints,);
+        total_bytes += emitter.bytes_used;
+    }
+
+    let mut table_file = String::new();
+
+    table_file.push_str(
+        "///! This file is generated by src/tools/unicode-table-generator; do not edit manually!\n",
+    );
+
+    table_file.push_str("use super::range_search;\n\n");
+
+    table_file.push_str(&version());
+
+    table_file.push('\n');
+
+    modules.push((String::from("conversions"), case_mapping::generate_case_mapping(&unicode_data)));
+
+    for (name, contents) in modules {
+        table_file.push_str("#[rustfmt::skip]\n");
+        table_file.push_str(&format!("pub mod {} {{\n", name));
+        for line in contents.lines() {
+            if !line.trim().is_empty() {
+                table_file.push_str("    ");
+                table_file.push_str(&line);
+            }
+            table_file.push('\n');
+        }
+        table_file.push_str("}\n\n");
+    }
+
+    std::fs::write(&write_location, format!("{}\n", table_file.trim_end())).unwrap();
+
+    println!("Total table sizes: {} bytes", total_bytes);
+}
+
+fn version() -> String {
+    let mut out = String::new();
+    out.push_str("pub const UNICODE_VERSION: (u32, u32, u32) = ");
+
+    let readme =
+        std::fs::read_to_string(std::path::Path::new(UNICODE_DIRECTORY).join("ReadMe.txt"))
+            .unwrap();
+
+    let prefix = "for Version ";
+    let start = readme.find(prefix).unwrap() + prefix.len();
+    let end = readme.find(" of the Unicode Standard.").unwrap();
+    let version =
+        readme[start..end].split('.').map(|v| v.parse::<u32>().expect(&v)).collect::<Vec<_>>();
+    let [major, minor, micro] = [version[0], version[1], version[2]];
+
+    out.push_str(&format!("({}, {}, {});\n", major, minor, micro));
+    out
+}
+
+fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
+    let pieces = values.into_iter().map(|b| format!("{:?}, ", b)).collect::<Vec<_>>();
+    let mut out = String::new();
+    let mut line = format!("\n    ");
+    for piece in pieces {
+        if line.len() + piece.len() < 98 {
+            line.push_str(&piece);
+        } else {
+            out.push_str(line.trim_end());
+            out.push('\n');
+            line = format!("    {}", piece);
+        }
+    }
+    out.push_str(line.trim_end());
+    out.push('\n');
+    out
+}
+
+fn merge_ranges(ranges: &mut Vec<Range<u32>>) {
+    loop {
+        let mut new_ranges = Vec::new();
+        let mut idx_iter = 0..(ranges.len() - 1);
+        while let Some(idx) = idx_iter.next() {
+            let cur = ranges[idx].clone();
+            let next = ranges[idx + 1].clone();
+            if cur.end == next.start {
+                let _ = idx_iter.next(); // skip next as we're merging it in
+                new_ranges.push(cur.start..next.end);
+            } else {
+                new_ranges.push(cur);
+            }
+        }
+        new_ranges.push(ranges.last().unwrap().clone());
+        if new_ranges.len() == ranges.len() {
+            *ranges = new_ranges;
+            break;
+        } else {
+            *ranges = new_ranges;
+        }
+    }
+}
--- a/src/tools/unicode-table-generator/src/raw_emitter.rs
+++ b/src/tools/unicode-table-generator/src/raw_emitter.rs
@ -0,0 +1,170 @@
+//! This implements the core logic of the compression scheme used to compactly
+//! encode the Unicode character classes.
+//!
+//! The primary idea is that we 'flatten' the Unicode ranges into an enormous
+//! bitset. To represent any arbitrary codepoint in a raw bitset, we would need
+//! over 17 kilobytes of data per character set -- way too much for our
+//! purposes.
+//!
+//! We have two primary goals with the encoding: we want to be compact, because
+//! these tables often end up in ~every Rust program (especially the
+//! grapheme_extend table, used for str debugging), including those for embedded
+//! targets (where space is important). We also want to be relatively fast,
+//! though this is more of a nice to have rather than a key design constraint.
+//! In practice, due to modern processor design these two are closely related.
+//!
+//! The encoding scheme here compresses the bitset by first deduplicating the
+//! "words" (64 bits on all platforms). In practice very few words are present
+//! in most data sets.
+//!
+//! This gives us an array that maps `u8 -> word` (if we ever went beyond 256
+//! words, we could go to u16 -> word or have some dual compression scheme
+//! mapping into two separate sets; currently this is not dealt with).
+//!
+//! With that scheme, we now have a single byte for every 64 codepoints. We
+//! further group these by 16 (arbitrarily chosen), and again deduplicate and
+//! store in an array (u8 -> [u8; 16]).
+//!
+//! The indices into this array represent ranges of 64*16 = 1024 codepoints.
+//!
+//! This already reduces the top-level array to at most 1,086 bytes, but in
+//! practice we usually can encode in far fewer (the first couple Unicode planes
+//! are dense).
+//!
+//! The last byte of this top-level array is pulled out to a separate static
+//! and trailing zeros are dropped; this is simply because grapheme_extend and
+//! case_ignorable have a single entry in the 896th entry, so this shrinks them
+//! down considerably.
+
+use crate::fmt_list;
+use std::collections::{BTreeSet, HashMap};
+use std::convert::TryFrom;
+use std::fmt::Write;
+use std::ops::Range;
+
+pub struct RawEmitter {
+    pub file: String,
+    pub bytes_used: usize,
+}
+
+impl RawEmitter {
+    pub fn new() -> RawEmitter {
+        RawEmitter { file: String::new(), bytes_used: 0 }
+    }
+
+    fn blank_line(&mut self) {
+        if self.file.is_empty() || self.file.ends_with("\n\n") {
+            return;
+        }
+        writeln!(&mut self.file, "").unwrap();
+    }
+
+    fn emit_bitset(&mut self, words: &[u64]) {
+        let unique_words =
+            words.iter().cloned().collect::<BTreeSet<_>>().into_iter().collect::<Vec<_>>();
+        if unique_words.len() > u8::max_value() as usize {
+            panic!("cannot pack {} into 8 bits", unique_words.len());
+        }
+
+        let word_indices = unique_words
+            .iter()
+            .cloned()
+            .enumerate()
+            .map(|(idx, word)| (word, u8::try_from(idx).unwrap()))
+            .collect::<HashMap<_, _>>();
+
+        let mut idx = words.iter().map(|w| word_indices[w]).collect::<Vec<u8>>();
+        let chunk_length = 16;
+        for _ in 0..(chunk_length - (idx.len() % chunk_length)) {
+            assert_eq!(unique_words[0], 0, "first word is all zeros");
+            // pad out bitset index with zero words so we have all chunks of 16
+            idx.push(0);
+        }
+
+        let mut chunks = BTreeSet::new();
+        for chunk in idx.chunks(chunk_length) {
+            chunks.insert(chunk);
+        }
+        let chunk_map = chunks
+            .clone()
+            .into_iter()
+            .enumerate()
+            .map(|(idx, chunk)| (chunk, idx))
+            .collect::<HashMap<_, _>>();
+        let mut chunk_indices = Vec::new();
+        for chunk in idx.chunks(chunk_length) {
+            chunk_indices.push(chunk_map[chunk]);
+        }
+        writeln!(
+            &mut self.file,
+            "static BITSET_LAST_CHUNK_MAP: (u16, u8) = ({}, {});",
+            chunk_indices.len() - 1,
+            chunk_indices.pop().unwrap(),
+        )
+        .unwrap();
+        self.bytes_used += 3;
+        // Strip out the empty pieces, presuming our above pop() made us now
+        // have some trailing zeros.
+        assert_eq!(unique_words[0], 0, "first word is all zeros");
+        while let Some(0) = chunk_indices.last() {
+            chunk_indices.pop();
+        }
+        writeln!(
+            &mut self.file,
+            "static BITSET_CHUNKS_MAP: [u8; {}] = [{}];",
+            chunk_indices.len(),
+            fmt_list(&chunk_indices),
+        )
+        .unwrap();
+        self.bytes_used += chunk_indices.len();
+        writeln!(
+            &mut self.file,
+            "static BITSET_INDEX_CHUNKS: [[u8; 16]; {}] = [{}];",
+            chunks.len(),
+            fmt_list(chunks.iter()),
+        )
+        .unwrap();
+        self.bytes_used += 16 * chunks.len();
+        writeln!(
+            &mut self.file,
+            "static BITSET: [u64; {}] = [{}];",
+            unique_words.len(),
+            fmt_list(&unique_words),
+        )
+        .unwrap();
+        self.bytes_used += 8 * unique_words.len();
+    }
+
+    pub fn emit_lookup(&mut self) {
+        writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
+        writeln!(&mut self.file, "    super::range_search(",).unwrap();
+        writeln!(&mut self.file, "        c as u32,").unwrap();
+        writeln!(&mut self.file, "        &BITSET_CHUNKS_MAP,").unwrap();
+        writeln!(&mut self.file, "        BITSET_LAST_CHUNK_MAP,").unwrap();
+        writeln!(&mut self.file, "        &BITSET_INDEX_CHUNKS,").unwrap();
+        writeln!(&mut self.file, "        &BITSET,").unwrap();
+        writeln!(&mut self.file, "    )").unwrap();
+        writeln!(&mut self.file, "}}").unwrap();
+    }
+}
+
+pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
+    emitter.blank_line();
+
+    let last_code_point = ranges.last().unwrap().end;
+    // bitset for every bit in the codepoint range
+    //
+    // + 2 to ensure an all zero word to use for padding
+    let mut buckets = vec![0u64; (last_code_point as usize / 64) + 2];
+    for range in ranges {
+        for codepoint in range.clone() {
+            let bucket = codepoint as usize / 64;
+            let bit = codepoint as u64 % 64;
+            buckets[bucket] |= 1 << bit;
+        }
+    }
+
+    emitter.emit_bitset(&buckets);
+    emitter.blank_line();
+    emitter.emit_lookup();
+}
--- a/src/tools/unicode-table-generator/src/unicode_download.rs
+++ b/src/tools/unicode-table-generator/src/unicode_download.rs
@ -0,0 +1,42 @@
+use crate::UNICODE_DIRECTORY;
+use std::path::Path;
+use std::process::Command;
+
+static URL_PREFIX: &str = "https://www.unicode.org/Public/UCD/latest/ucd/";
+
+static README: &str = "ReadMe.txt";
+
+static RESOURCES: &[&str] =
+    &["DerivedCoreProperties.txt", "PropList.txt", "UnicodeData.txt", "SpecialCasing.txt"];
+
+pub fn fetch_latest() {
+    let directory = Path::new(UNICODE_DIRECTORY);
+    if let Err(e) = std::fs::create_dir_all(directory) {
+        if e.kind() != std::io::ErrorKind::AlreadyExists {
+            panic!("Failed to create {:?}: {}", UNICODE_DIRECTORY, e);
+        }
+    }
+    let output = Command::new("curl").arg(URL_PREFIX.to_owned() + README).output().unwrap();
+    if !output.status.success() {
+        panic!(
+            "Failed to run curl to fetch readme: stderr: {}",
+            String::from_utf8_lossy(&output.stderr)
+        );
+    }
+    let current = std::fs::read_to_string(directory.join(README)).unwrap_or_default();
+    if current.as_bytes() != &output.stdout[..] {
+        std::fs::write(directory.join(README), output.stdout).unwrap();
+    }
+
+    for resource in RESOURCES {
+        let output = Command::new("curl").arg(URL_PREFIX.to_owned() + resource).output().unwrap();
+        if !output.status.success() {
+            panic!(
+                "Failed to run curl to fetch {}: stderr: {}",
+                resource,
+                String::from_utf8_lossy(&output.stderr)
+            );
+        }
+        std::fs::write(directory.join(resource), output.stdout).unwrap();
+    }
+}