mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-01 06:51:58 +00:00
Auto merge of #68254 - Dylan-DPC:rollup-9vhc59u, r=Dylan-DPC
Rollup of 6 pull requests Successful merges: - #68123 (Implement Cursor for linked lists. (RFC 2570).) - #68212 (Suggest to shorten temporary lifetime during method call inside generator) - #68232 (Optimize size/speed of Unicode datasets) - #68236 (Add some regression tests) - #68237 (Account for `Path`s in `is_suggestable_infer_ty`) - #68252 (remove redundant clones, found by clippy) Failed merges: r? @ghost
This commit is contained in:
commit
3291ae3390
9
.gitignore
vendored
9
.gitignore
vendored
@ -34,14 +34,7 @@ __pycache__/
|
||||
# Created by default with `src/ci/docker/run.sh`:
|
||||
/obj/
|
||||
/rustllvm/
|
||||
/src/libcore/unicode/DerivedCoreProperties.txt
|
||||
/src/libcore/unicode/DerivedNormalizationProps.txt
|
||||
/src/libcore/unicode/PropList.txt
|
||||
/src/libcore/unicode/ReadMe.txt
|
||||
/src/libcore/unicode/Scripts.txt
|
||||
/src/libcore/unicode/SpecialCasing.txt
|
||||
/src/libcore/unicode/UnicodeData.txt
|
||||
/src/libcore/unicode/downloaded
|
||||
/unicode-downloads
|
||||
/target/
|
||||
# Generated by compiletest for incremental:
|
||||
/tmp/
|
||||
|
17
Cargo.lock
17
Cargo.lock
@ -4953,6 +4953,16 @@ version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169"
|
||||
|
||||
[[package]]
|
||||
name = "ucd-parse"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca6b52bf4da6512f0f07785a04769222e50d29639e7ecd016b7806fd2de306b4"
|
||||
dependencies = [
|
||||
"lazy_static 1.3.0",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ucd-trie"
|
||||
version = "0.1.1"
|
||||
@ -4974,6 +4984,13 @@ dependencies = [
|
||||
"version_check 0.1.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bdd"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"ucd-parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.3.4"
|
||||
|
@ -23,6 +23,7 @@ members = [
|
||||
"src/tools/rustfmt",
|
||||
"src/tools/miri",
|
||||
"src/tools/rustdoc-themes",
|
||||
"src/tools/unicode-table-generator",
|
||||
]
|
||||
exclude = [
|
||||
"build",
|
||||
|
@ -242,6 +242,121 @@ impl<T> LinkedList<T> {
|
||||
|
||||
self.len -= 1;
|
||||
}
|
||||
|
||||
/// Splices a series of nodes between two existing nodes.
|
||||
///
|
||||
/// Warning: this will not check that the provided node belongs to the two existing lists.
|
||||
#[inline]
|
||||
unsafe fn splice_nodes(
|
||||
&mut self,
|
||||
existing_prev: Option<NonNull<Node<T>>>,
|
||||
existing_next: Option<NonNull<Node<T>>>,
|
||||
mut splice_start: NonNull<Node<T>>,
|
||||
mut splice_end: NonNull<Node<T>>,
|
||||
splice_length: usize,
|
||||
) {
|
||||
// This method takes care not to create multiple mutable references to whole nodes at the same time,
|
||||
// to maintain validity of aliasing pointers into `element`.
|
||||
if let Some(mut existing_prev) = existing_prev {
|
||||
existing_prev.as_mut().next = Some(splice_start);
|
||||
} else {
|
||||
self.head = Some(splice_start);
|
||||
}
|
||||
if let Some(mut existing_next) = existing_next {
|
||||
existing_next.as_mut().prev = Some(splice_end);
|
||||
} else {
|
||||
self.tail = Some(splice_end);
|
||||
}
|
||||
splice_start.as_mut().prev = existing_prev;
|
||||
splice_end.as_mut().next = existing_next;
|
||||
|
||||
self.len += splice_length;
|
||||
}
|
||||
|
||||
/// Detaches all nodes from a linked list as a series of nodes.
|
||||
#[inline]
|
||||
fn detach_all_nodes(mut self) -> Option<(NonNull<Node<T>>, NonNull<Node<T>>, usize)> {
|
||||
let head = self.head.take();
|
||||
let tail = self.tail.take();
|
||||
let len = mem::replace(&mut self.len, 0);
|
||||
if let Some(head) = head {
|
||||
let tail = tail.unwrap_or_else(|| unsafe { core::hint::unreachable_unchecked() });
|
||||
Some((head, tail, len))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn split_off_before_node(
|
||||
&mut self,
|
||||
split_node: Option<NonNull<Node<T>>>,
|
||||
at: usize,
|
||||
) -> Self {
|
||||
// The split node is the new head node of the second part
|
||||
if let Some(mut split_node) = split_node {
|
||||
let first_part_head;
|
||||
let first_part_tail;
|
||||
first_part_tail = split_node.as_mut().prev.take();
|
||||
if let Some(mut tail) = first_part_tail {
|
||||
tail.as_mut().next = None;
|
||||
first_part_head = self.head;
|
||||
} else {
|
||||
first_part_head = None;
|
||||
}
|
||||
|
||||
let first_part = LinkedList {
|
||||
head: first_part_head,
|
||||
tail: first_part_tail,
|
||||
len: at,
|
||||
marker: PhantomData,
|
||||
};
|
||||
|
||||
// Fix the head ptr of the second part
|
||||
self.head = Some(split_node);
|
||||
self.len = self.len - at;
|
||||
|
||||
first_part
|
||||
} else {
|
||||
mem::replace(self, LinkedList::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn split_off_after_node(
|
||||
&mut self,
|
||||
split_node: Option<NonNull<Node<T>>>,
|
||||
at: usize,
|
||||
) -> Self {
|
||||
// The split node is the new tail node of the first part and owns
|
||||
// the head of the second part.
|
||||
if let Some(mut split_node) = split_node {
|
||||
let second_part_head;
|
||||
let second_part_tail;
|
||||
second_part_head = split_node.as_mut().next.take();
|
||||
if let Some(mut head) = second_part_head {
|
||||
head.as_mut().prev = None;
|
||||
second_part_tail = self.tail;
|
||||
} else {
|
||||
second_part_tail = None;
|
||||
}
|
||||
|
||||
let second_part = LinkedList {
|
||||
head: second_part_head,
|
||||
tail: second_part_tail,
|
||||
len: self.len - at,
|
||||
marker: PhantomData,
|
||||
};
|
||||
|
||||
// Fix the tail ptr of the first part
|
||||
self.tail = Some(split_node);
|
||||
self.len = at;
|
||||
|
||||
second_part
|
||||
} else {
|
||||
mem::replace(self, LinkedList::new())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
@ -319,6 +434,27 @@ impl<T> LinkedList<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Moves all elements from `other` to the begin of the list.
|
||||
#[unstable(feature = "linked_list_prepend", issue = "none")]
|
||||
pub fn prepend(&mut self, other: &mut Self) {
|
||||
match self.head {
|
||||
None => mem::swap(self, other),
|
||||
Some(mut head) => {
|
||||
// `as_mut` is okay here because we have exclusive access to the entirety
|
||||
// of both lists.
|
||||
if let Some(mut other_tail) = other.tail.take() {
|
||||
unsafe {
|
||||
head.as_mut().prev = Some(other_tail);
|
||||
other_tail.as_mut().next = Some(head);
|
||||
}
|
||||
|
||||
self.head = other.head.take();
|
||||
self.len += mem::replace(&mut other.len, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides a forward iterator.
|
||||
///
|
||||
/// # Examples
|
||||
@ -373,6 +509,42 @@ impl<T> LinkedList<T> {
|
||||
IterMut { head: self.head, tail: self.tail, len: self.len, list: self }
|
||||
}
|
||||
|
||||
/// Provides a cursor at the front element.
|
||||
///
|
||||
/// The cursor is pointing to the "ghost" non-element if the list is empty.
|
||||
#[inline]
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn cursor_front(&self) -> Cursor<'_, T> {
|
||||
Cursor { index: 0, current: self.head, list: self }
|
||||
}
|
||||
|
||||
/// Provides a cursor with editing operations at the front element.
|
||||
///
|
||||
/// The cursor is pointing to the "ghost" non-element if the list is empty.
|
||||
#[inline]
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn cursor_front_mut(&mut self) -> CursorMut<'_, T> {
|
||||
CursorMut { index: 0, current: self.head, list: self }
|
||||
}
|
||||
|
||||
/// Provides a cursor at the back element.
|
||||
///
|
||||
/// The cursor is pointing to the "ghost" non-element if the list is empty.
|
||||
#[inline]
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn cursor_back(&self) -> Cursor<'_, T> {
|
||||
Cursor { index: self.len.checked_sub(1).unwrap_or(0), current: self.tail, list: self }
|
||||
}
|
||||
|
||||
/// Provides a cursor with editing operations at the back element.
|
||||
///
|
||||
/// The cursor is pointing to the "ghost" non-element if the list is empty.
|
||||
#[inline]
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn cursor_back_mut(&mut self) -> CursorMut<'_, T> {
|
||||
CursorMut { index: self.len.checked_sub(1).unwrap_or(0), current: self.tail, list: self }
|
||||
}
|
||||
|
||||
/// Returns `true` if the `LinkedList` is empty.
|
||||
///
|
||||
/// This operation should compute in O(1) time.
|
||||
@ -703,30 +875,7 @@ impl<T> LinkedList<T> {
|
||||
}
|
||||
iter.tail
|
||||
};
|
||||
|
||||
// The split node is the new tail node of the first part and owns
|
||||
// the head of the second part.
|
||||
let second_part_head;
|
||||
|
||||
unsafe {
|
||||
second_part_head = split_node.unwrap().as_mut().next.take();
|
||||
if let Some(mut head) = second_part_head {
|
||||
head.as_mut().prev = None;
|
||||
}
|
||||
}
|
||||
|
||||
let second_part = LinkedList {
|
||||
head: second_part_head,
|
||||
tail: self.tail,
|
||||
len: len - at,
|
||||
marker: PhantomData,
|
||||
};
|
||||
|
||||
// Fix the tail ptr of the first part
|
||||
self.tail = split_node;
|
||||
self.len = at;
|
||||
|
||||
second_part
|
||||
unsafe { self.split_off_after_node(split_node, at) }
|
||||
}
|
||||
|
||||
/// Creates an iterator which uses a closure to determine if an element should be removed.
|
||||
@ -986,6 +1135,388 @@ impl<T> IterMut<'_, T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A cursor over a `LinkedList`.
|
||||
///
|
||||
/// A `Cursor` is like an iterator, except that it can freely seek back-and-forth.
|
||||
///
|
||||
/// Cursors always rest between two elements in the list, and index in a logically circular way.
|
||||
/// To accommodate this, there is a "ghost" non-element that yields `None` between the head and
|
||||
/// tail of the list.
|
||||
///
|
||||
/// When created, cursors start at the front of the list, or the "ghost" non-element if the list is empty.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub struct Cursor<'a, T: 'a> {
|
||||
index: usize,
|
||||
current: Option<NonNull<Node<T>>>,
|
||||
list: &'a LinkedList<T>,
|
||||
}
|
||||
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
impl<T: fmt::Debug> fmt::Debug for Cursor<'_, T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_tuple("Cursor").field(&self.list).field(&self.index()).finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// A cursor over a `LinkedList` with editing operations.
|
||||
///
|
||||
/// A `Cursor` is like an iterator, except that it can freely seek back-and-forth, and can
|
||||
/// safely mutate the list during iteration. This is because the lifetime of its yielded
|
||||
/// references is tied to its own lifetime, instead of just the underlying list. This means
|
||||
/// cursors cannot yield multiple elements at once.
|
||||
///
|
||||
/// Cursors always rest between two elements in the list, and index in a logically circular way.
|
||||
/// To accommodate this, there is a "ghost" non-element that yields `None` between the head and
|
||||
/// tail of the list.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub struct CursorMut<'a, T: 'a> {
|
||||
index: usize,
|
||||
current: Option<NonNull<Node<T>>>,
|
||||
list: &'a mut LinkedList<T>,
|
||||
}
|
||||
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
impl<T: fmt::Debug> fmt::Debug for CursorMut<'_, T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_tuple("CursorMut").field(&self.list).field(&self.index()).finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> Cursor<'a, T> {
|
||||
/// Returns the cursor position index within the `LinkedList`.
|
||||
///
|
||||
/// This returns `None` if the cursor is currently pointing to the
|
||||
/// "ghost" non-element.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn index(&self) -> Option<usize> {
|
||||
let _ = self.current?;
|
||||
Some(self.index)
|
||||
}
|
||||
|
||||
/// Moves the cursor to the next element of the `LinkedList`.
|
||||
///
|
||||
/// If the cursor is pointing to the "ghost" non-element then this will move it to
|
||||
/// the first element of the `LinkedList`. If it is pointing to the last
|
||||
/// element of the `LinkedList` then this will move it to the "ghost" non-element.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn move_next(&mut self) {
|
||||
match self.current.take() {
|
||||
// We had no current element; the cursor was sitting at the start position
|
||||
// Next element should be the head of the list
|
||||
None => {
|
||||
self.current = self.list.head;
|
||||
self.index = 0;
|
||||
}
|
||||
// We had a previous element, so let's go to its next
|
||||
Some(current) => unsafe {
|
||||
self.current = current.as_ref().next;
|
||||
self.index += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Moves the cursor to the previous element of the `LinkedList`.
|
||||
///
|
||||
/// If the cursor is pointing to the "ghost" non-element then this will move it to
|
||||
/// the last element of the `LinkedList`. If it is pointing to the first
|
||||
/// element of the `LinkedList` then this will move it to the "ghost" non-element.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn move_prev(&mut self) {
|
||||
match self.current.take() {
|
||||
// No current. We're at the start of the list. Yield None and jump to the end.
|
||||
None => {
|
||||
self.current = self.list.tail;
|
||||
self.index = self.list.len().checked_sub(1).unwrap_or(0);
|
||||
}
|
||||
// Have a prev. Yield it and go to the previous element.
|
||||
Some(current) => unsafe {
|
||||
self.current = current.as_ref().prev;
|
||||
self.index = self.index.checked_sub(1).unwrap_or_else(|| self.list.len());
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a reference to the element that the cursor is currently
|
||||
/// pointing to.
|
||||
///
|
||||
/// This returns `None` if the cursor is currently pointing to the
|
||||
/// "ghost" non-element.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn current(&self) -> Option<&'a T> {
|
||||
unsafe { self.current.map(|current| &(*current.as_ptr()).element) }
|
||||
}
|
||||
|
||||
/// Returns a reference to the next element.
|
||||
///
|
||||
/// If the cursor is pointing to the "ghost" non-element then this returns
|
||||
/// the first element of the `LinkedList`. If it is pointing to the last
|
||||
/// element of the `LinkedList` then this returns `None`.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn peek_next(&self) -> Option<&'a T> {
|
||||
unsafe {
|
||||
let next = match self.current {
|
||||
None => self.list.head,
|
||||
Some(current) => current.as_ref().next,
|
||||
};
|
||||
next.map(|next| &(*next.as_ptr()).element)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a reference to the previous element.
|
||||
///
|
||||
/// If the cursor is pointing to the "ghost" non-element then this returns
|
||||
/// the last element of the `LinkedList`. If it is pointing to the first
|
||||
/// element of the `LinkedList` then this returns `None`.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn peek_prev(&self) -> Option<&'a T> {
|
||||
unsafe {
|
||||
let prev = match self.current {
|
||||
None => self.list.tail,
|
||||
Some(current) => current.as_ref().prev,
|
||||
};
|
||||
prev.map(|prev| &(*prev.as_ptr()).element)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> CursorMut<'a, T> {
|
||||
/// Returns the cursor position index within the `LinkedList`.
|
||||
///
|
||||
/// This returns `None` if the cursor is currently pointing to the
|
||||
/// "ghost" non-element.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn index(&self) -> Option<usize> {
|
||||
let _ = self.current?;
|
||||
Some(self.index)
|
||||
}
|
||||
|
||||
/// Moves the cursor to the next element of the `LinkedList`.
|
||||
///
|
||||
/// If the cursor is pointing to the "ghost" non-element then this will move it to
|
||||
/// the first element of the `LinkedList`. If it is pointing to the last
|
||||
/// element of the `LinkedList` then this will move it to the "ghost" non-element.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn move_next(&mut self) {
|
||||
match self.current.take() {
|
||||
// We had no current element; the cursor was sitting at the start position
|
||||
// Next element should be the head of the list
|
||||
None => {
|
||||
self.current = self.list.head;
|
||||
self.index = 0;
|
||||
}
|
||||
// We had a previous element, so let's go to its next
|
||||
Some(current) => unsafe {
|
||||
self.current = current.as_ref().next;
|
||||
self.index += 1;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Moves the cursor to the previous element of the `LinkedList`.
|
||||
///
|
||||
/// If the cursor is pointing to the "ghost" non-element then this will move it to
|
||||
/// the last element of the `LinkedList`. If it is pointing to the first
|
||||
/// element of the `LinkedList` then this will move it to the "ghost" non-element.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn move_prev(&mut self) {
|
||||
match self.current.take() {
|
||||
// No current. We're at the start of the list. Yield None and jump to the end.
|
||||
None => {
|
||||
self.current = self.list.tail;
|
||||
self.index = self.list.len().checked_sub(1).unwrap_or(0);
|
||||
}
|
||||
// Have a prev. Yield it and go to the previous element.
|
||||
Some(current) => unsafe {
|
||||
self.current = current.as_ref().prev;
|
||||
self.index = self.index.checked_sub(1).unwrap_or_else(|| self.list.len());
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a reference to the element that the cursor is currently
|
||||
/// pointing to.
|
||||
///
|
||||
/// This returns `None` if the cursor is currently pointing to the
|
||||
/// "ghost" non-element.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn current(&mut self) -> Option<&mut T> {
|
||||
unsafe { self.current.map(|current| &mut (*current.as_ptr()).element) }
|
||||
}
|
||||
|
||||
/// Returns a reference to the next element.
|
||||
///
|
||||
/// If the cursor is pointing to the "ghost" non-element then this returns
|
||||
/// the first element of the `LinkedList`. If it is pointing to the last
|
||||
/// element of the `LinkedList` then this returns `None`.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn peek_next(&mut self) -> Option<&mut T> {
|
||||
unsafe {
|
||||
let next = match self.current {
|
||||
None => self.list.head,
|
||||
Some(current) => current.as_ref().next,
|
||||
};
|
||||
next.map(|next| &mut (*next.as_ptr()).element)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a reference to the previous element.
|
||||
///
|
||||
/// If the cursor is pointing to the "ghost" non-element then this returns
|
||||
/// the last element of the `LinkedList`. If it is pointing to the first
|
||||
/// element of the `LinkedList` then this returns `None`.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn peek_prev(&mut self) -> Option<&mut T> {
|
||||
unsafe {
|
||||
let prev = match self.current {
|
||||
None => self.list.tail,
|
||||
Some(current) => current.as_ref().prev,
|
||||
};
|
||||
prev.map(|prev| &mut (*prev.as_ptr()).element)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a read-only cursor pointing to the current element.
|
||||
///
|
||||
/// The lifetime of the returned `Cursor` is bound to that of the
|
||||
/// `CursorMut`, which means it cannot outlive the `CursorMut` and that the
|
||||
/// `CursorMut` is frozen for the lifetime of the `Cursor`.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn as_cursor<'cm>(&'cm self) -> Cursor<'cm, T> {
|
||||
Cursor { list: self.list, current: self.current, index: self.index }
|
||||
}
|
||||
}
|
||||
|
||||
// Now the list editing operations
|
||||
|
||||
impl<'a, T> CursorMut<'a, T> {
|
||||
/// Inserts a new element into the `LinkedList` after the current one.
|
||||
///
|
||||
/// If the cursor is pointing at the "ghost" non-element then the new element is
|
||||
/// inserted at the front of the `LinkedList`.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn insert_after(&mut self, item: T) {
|
||||
unsafe {
|
||||
let spliced_node = Box::into_raw_non_null(Box::new(Node::new(item)));
|
||||
let node_next = match self.current {
|
||||
None => self.list.head,
|
||||
Some(node) => node.as_ref().next,
|
||||
};
|
||||
self.list.splice_nodes(self.current, node_next, spliced_node, spliced_node, 1);
|
||||
if self.current.is_none() {
|
||||
// The "ghost" non-element's index has changed.
|
||||
self.index = self.list.len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts a new element into the `LinkedList` before the current one.
|
||||
///
|
||||
/// If the cursor is pointing at the "ghost" non-element then the new element is
|
||||
/// inserted at the end of the `LinkedList`.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn insert_before(&mut self, item: T) {
|
||||
unsafe {
|
||||
let spliced_node = Box::into_raw_non_null(Box::new(Node::new(item)));
|
||||
let node_prev = match self.current {
|
||||
None => self.list.tail,
|
||||
Some(node) => node.as_ref().prev,
|
||||
};
|
||||
self.list.splice_nodes(node_prev, self.current, spliced_node, spliced_node, 1);
|
||||
self.index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes the current element from the `LinkedList`.
|
||||
///
|
||||
/// The element that was removed is returned, and the cursor is
|
||||
/// moved to point to the next element in the `LinkedList`.
|
||||
///
|
||||
/// If the cursor is currently pointing to the "ghost" non-element then no element
|
||||
/// is removed and `None` is returned.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn remove_current(&mut self) -> Option<T> {
|
||||
let unlinked_node = self.current?;
|
||||
unsafe {
|
||||
self.current = unlinked_node.as_ref().next;
|
||||
self.list.unlink_node(unlinked_node);
|
||||
let unlinked_node = Box::from_raw(unlinked_node.as_ptr());
|
||||
Some(unlinked_node.element)
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts the elements from the given `LinkedList` after the current one.
|
||||
///
|
||||
/// If the cursor is pointing at the "ghost" non-element then the new elements are
|
||||
/// inserted at the start of the `LinkedList`.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn splice_after(&mut self, list: LinkedList<T>) {
|
||||
unsafe {
|
||||
let (splice_head, splice_tail, splice_len) = match list.detach_all_nodes() {
|
||||
Some(parts) => parts,
|
||||
_ => return,
|
||||
};
|
||||
let node_next = match self.current {
|
||||
None => self.list.head,
|
||||
Some(node) => node.as_ref().next,
|
||||
};
|
||||
self.list.splice_nodes(self.current, node_next, splice_head, splice_tail, splice_len);
|
||||
if self.current.is_none() {
|
||||
// The "ghost" non-element's index has changed.
|
||||
self.index = self.list.len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts the elements from the given `LinkedList` before the current one.
|
||||
///
|
||||
/// If the cursor is pointing at the "ghost" non-element then the new elements are
|
||||
/// inserted at the end of the `LinkedList`.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn splice_before(&mut self, list: LinkedList<T>) {
|
||||
unsafe {
|
||||
let (splice_head, splice_tail, splice_len) = match list.detach_all_nodes() {
|
||||
Some(parts) => parts,
|
||||
_ => return,
|
||||
};
|
||||
let node_prev = match self.current {
|
||||
None => self.list.tail,
|
||||
Some(node) => node.as_ref().prev,
|
||||
};
|
||||
self.list.splice_nodes(node_prev, self.current, splice_head, splice_tail, splice_len);
|
||||
self.index += splice_len;
|
||||
}
|
||||
}
|
||||
|
||||
/// Splits the list into two after the current element. This will return a
|
||||
/// new list consisting of everything after the cursor, with the original
|
||||
/// list retaining everything before.
|
||||
///
|
||||
/// If the cursor is pointing at the "ghost" non-element then the entire contents
|
||||
/// of the `LinkedList` are moved.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn split_after(&mut self) -> LinkedList<T> {
|
||||
let split_off_idx = if self.index == self.list.len { 0 } else { self.index + 1 };
|
||||
if self.index == self.list.len {
|
||||
// The "ghost" non-element's index has changed to 0.
|
||||
self.index = 0;
|
||||
}
|
||||
unsafe { self.list.split_off_after_node(self.current, split_off_idx) }
|
||||
}
|
||||
|
||||
/// Splits the list into two before the current element. This will return a
|
||||
/// new list consisting of everything before the cursor, with the original
|
||||
/// list retaining everything after.
|
||||
///
|
||||
/// If the cursor is pointing at the "ghost" non-element then the entire contents
|
||||
/// of the `LinkedList` are moved.
|
||||
#[unstable(feature = "linked_list_cursors", issue = "58533")]
|
||||
pub fn split_before(&mut self) -> LinkedList<T> {
|
||||
let split_off_idx = self.index;
|
||||
self.index = 0;
|
||||
unsafe { self.list.split_off_before_node(self.current, split_off_idx) }
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator produced by calling `drain_filter` on LinkedList.
|
||||
#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")]
|
||||
pub struct DrainFilter<'a, T: 'a, F: 'a>
|
||||
|
@ -304,3 +304,155 @@ fn drain_to_empty_test() {
|
||||
assert_eq!(deleted, &[1, 2, 3, 4, 5, 6]);
|
||||
assert_eq!(m.into_iter().collect::<Vec<_>>(), &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cursor_move_peek() {
|
||||
let mut m: LinkedList<u32> = LinkedList::new();
|
||||
m.extend(&[1, 2, 3, 4, 5, 6]);
|
||||
let mut cursor = m.cursor_front();
|
||||
assert_eq!(cursor.current(), Some(&1));
|
||||
assert_eq!(cursor.peek_next(), Some(&2));
|
||||
assert_eq!(cursor.peek_prev(), None);
|
||||
assert_eq!(cursor.index(), Some(0));
|
||||
cursor.move_prev();
|
||||
assert_eq!(cursor.current(), None);
|
||||
assert_eq!(cursor.peek_next(), Some(&1));
|
||||
assert_eq!(cursor.peek_prev(), Some(&6));
|
||||
assert_eq!(cursor.index(), None);
|
||||
cursor.move_next();
|
||||
cursor.move_next();
|
||||
assert_eq!(cursor.current(), Some(&2));
|
||||
assert_eq!(cursor.peek_next(), Some(&3));
|
||||
assert_eq!(cursor.peek_prev(), Some(&1));
|
||||
assert_eq!(cursor.index(), Some(1));
|
||||
|
||||
let mut cursor = m.cursor_back();
|
||||
assert_eq!(cursor.current(), Some(&6));
|
||||
assert_eq!(cursor.peek_next(), None);
|
||||
assert_eq!(cursor.peek_prev(), Some(&5));
|
||||
assert_eq!(cursor.index(), Some(5));
|
||||
cursor.move_next();
|
||||
assert_eq!(cursor.current(), None);
|
||||
assert_eq!(cursor.peek_next(), Some(&1));
|
||||
assert_eq!(cursor.peek_prev(), Some(&6));
|
||||
assert_eq!(cursor.index(), None);
|
||||
cursor.move_prev();
|
||||
cursor.move_prev();
|
||||
assert_eq!(cursor.current(), Some(&5));
|
||||
assert_eq!(cursor.peek_next(), Some(&6));
|
||||
assert_eq!(cursor.peek_prev(), Some(&4));
|
||||
assert_eq!(cursor.index(), Some(4));
|
||||
|
||||
let mut m: LinkedList<u32> = LinkedList::new();
|
||||
m.extend(&[1, 2, 3, 4, 5, 6]);
|
||||
let mut cursor = m.cursor_front_mut();
|
||||
assert_eq!(cursor.current(), Some(&mut 1));
|
||||
assert_eq!(cursor.peek_next(), Some(&mut 2));
|
||||
assert_eq!(cursor.peek_prev(), None);
|
||||
assert_eq!(cursor.index(), Some(0));
|
||||
cursor.move_prev();
|
||||
assert_eq!(cursor.current(), None);
|
||||
assert_eq!(cursor.peek_next(), Some(&mut 1));
|
||||
assert_eq!(cursor.peek_prev(), Some(&mut 6));
|
||||
assert_eq!(cursor.index(), None);
|
||||
cursor.move_next();
|
||||
cursor.move_next();
|
||||
assert_eq!(cursor.current(), Some(&mut 2));
|
||||
assert_eq!(cursor.peek_next(), Some(&mut 3));
|
||||
assert_eq!(cursor.peek_prev(), Some(&mut 1));
|
||||
assert_eq!(cursor.index(), Some(1));
|
||||
let mut cursor2 = cursor.as_cursor();
|
||||
assert_eq!(cursor2.current(), Some(&2));
|
||||
assert_eq!(cursor2.index(), Some(1));
|
||||
cursor2.move_next();
|
||||
assert_eq!(cursor2.current(), Some(&3));
|
||||
assert_eq!(cursor2.index(), Some(2));
|
||||
assert_eq!(cursor.current(), Some(&mut 2));
|
||||
assert_eq!(cursor.index(), Some(1));
|
||||
|
||||
let mut m: LinkedList<u32> = LinkedList::new();
|
||||
m.extend(&[1, 2, 3, 4, 5, 6]);
|
||||
let mut cursor = m.cursor_back_mut();
|
||||
assert_eq!(cursor.current(), Some(&mut 6));
|
||||
assert_eq!(cursor.peek_next(), None);
|
||||
assert_eq!(cursor.peek_prev(), Some(&mut 5));
|
||||
assert_eq!(cursor.index(), Some(5));
|
||||
cursor.move_next();
|
||||
assert_eq!(cursor.current(), None);
|
||||
assert_eq!(cursor.peek_next(), Some(&mut 1));
|
||||
assert_eq!(cursor.peek_prev(), Some(&mut 6));
|
||||
assert_eq!(cursor.index(), None);
|
||||
cursor.move_prev();
|
||||
cursor.move_prev();
|
||||
assert_eq!(cursor.current(), Some(&mut 5));
|
||||
assert_eq!(cursor.peek_next(), Some(&mut 6));
|
||||
assert_eq!(cursor.peek_prev(), Some(&mut 4));
|
||||
assert_eq!(cursor.index(), Some(4));
|
||||
let mut cursor2 = cursor.as_cursor();
|
||||
assert_eq!(cursor2.current(), Some(&5));
|
||||
assert_eq!(cursor2.index(), Some(4));
|
||||
cursor2.move_prev();
|
||||
assert_eq!(cursor2.current(), Some(&4));
|
||||
assert_eq!(cursor2.index(), Some(3));
|
||||
assert_eq!(cursor.current(), Some(&mut 5));
|
||||
assert_eq!(cursor.index(), Some(4));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cursor_mut_insert() {
|
||||
let mut m: LinkedList<u32> = LinkedList::new();
|
||||
m.extend(&[1, 2, 3, 4, 5, 6]);
|
||||
let mut cursor = m.cursor_front_mut();
|
||||
cursor.insert_before(7);
|
||||
cursor.insert_after(8);
|
||||
check_links(&m);
|
||||
assert_eq!(m.iter().cloned().collect::<Vec<_>>(), &[7, 1, 8, 2, 3, 4, 5, 6]);
|
||||
let mut cursor = m.cursor_front_mut();
|
||||
cursor.move_prev();
|
||||
cursor.insert_before(9);
|
||||
cursor.insert_after(10);
|
||||
check_links(&m);
|
||||
assert_eq!(m.iter().cloned().collect::<Vec<_>>(), &[10, 7, 1, 8, 2, 3, 4, 5, 6, 9]);
|
||||
let mut cursor = m.cursor_front_mut();
|
||||
cursor.move_prev();
|
||||
assert_eq!(cursor.remove_current(), None);
|
||||
cursor.move_next();
|
||||
cursor.move_next();
|
||||
assert_eq!(cursor.remove_current(), Some(7));
|
||||
cursor.move_prev();
|
||||
cursor.move_prev();
|
||||
cursor.move_prev();
|
||||
assert_eq!(cursor.remove_current(), Some(9));
|
||||
cursor.move_next();
|
||||
assert_eq!(cursor.remove_current(), Some(10));
|
||||
check_links(&m);
|
||||
assert_eq!(m.iter().cloned().collect::<Vec<_>>(), &[1, 8, 2, 3, 4, 5, 6]);
|
||||
let mut cursor = m.cursor_front_mut();
|
||||
let mut p: LinkedList<u32> = LinkedList::new();
|
||||
p.extend(&[100, 101, 102, 103]);
|
||||
let mut q: LinkedList<u32> = LinkedList::new();
|
||||
q.extend(&[200, 201, 202, 203]);
|
||||
cursor.splice_after(p);
|
||||
cursor.splice_before(q);
|
||||
check_links(&m);
|
||||
assert_eq!(
|
||||
m.iter().cloned().collect::<Vec<_>>(),
|
||||
&[200, 201, 202, 203, 1, 100, 101, 102, 103, 8, 2, 3, 4, 5, 6]
|
||||
);
|
||||
let mut cursor = m.cursor_front_mut();
|
||||
cursor.move_prev();
|
||||
let tmp = cursor.split_before();
|
||||
assert_eq!(m.into_iter().collect::<Vec<_>>(), &[]);
|
||||
m = tmp;
|
||||
let mut cursor = m.cursor_front_mut();
|
||||
cursor.move_next();
|
||||
cursor.move_next();
|
||||
cursor.move_next();
|
||||
cursor.move_next();
|
||||
cursor.move_next();
|
||||
cursor.move_next();
|
||||
let tmp = cursor.split_after();
|
||||
assert_eq!(tmp.into_iter().collect::<Vec<_>>(), &[102, 103, 8, 2, 3, 4, 5, 6]);
|
||||
check_links(&m);
|
||||
assert_eq!(m.iter().cloned().collect::<Vec<_>>(), &[200, 201, 202, 203, 1, 100, 101]);
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
use crate::slice;
|
||||
use crate::str::from_utf8_unchecked_mut;
|
||||
use crate::unicode::printable::is_printable;
|
||||
use crate::unicode::tables::{conversions, derived_property, general_category, property};
|
||||
use crate::unicode::{self, conversions};
|
||||
|
||||
use super::*;
|
||||
|
||||
@ -552,7 +552,7 @@ impl char {
|
||||
pub fn is_alphabetic(self) -> bool {
|
||||
match self {
|
||||
'a'..='z' | 'A'..='Z' => true,
|
||||
c => c > '\x7f' && derived_property::Alphabetic(c),
|
||||
c => c > '\x7f' && unicode::Alphabetic(c),
|
||||
}
|
||||
}
|
||||
|
||||
@ -583,7 +583,7 @@ impl char {
|
||||
pub fn is_lowercase(self) -> bool {
|
||||
match self {
|
||||
'a'..='z' => true,
|
||||
c => c > '\x7f' && derived_property::Lowercase(c),
|
||||
c => c > '\x7f' && unicode::Lowercase(c),
|
||||
}
|
||||
}
|
||||
|
||||
@ -614,7 +614,7 @@ impl char {
|
||||
pub fn is_uppercase(self) -> bool {
|
||||
match self {
|
||||
'A'..='Z' => true,
|
||||
c => c > '\x7f' && derived_property::Uppercase(c),
|
||||
c => c > '\x7f' && unicode::Uppercase(c),
|
||||
}
|
||||
}
|
||||
|
||||
@ -642,7 +642,7 @@ impl char {
|
||||
pub fn is_whitespace(self) -> bool {
|
||||
match self {
|
||||
' ' | '\x09'..='\x0d' => true,
|
||||
c => c > '\x7f' && property::White_Space(c),
|
||||
c => c > '\x7f' && unicode::White_Space(c),
|
||||
}
|
||||
}
|
||||
|
||||
@ -693,7 +693,7 @@ impl char {
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn is_control(self) -> bool {
|
||||
general_category::Cc(self)
|
||||
unicode::Cc(self)
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` has the `Grapheme_Extend` property.
|
||||
@ -707,7 +707,7 @@ impl char {
|
||||
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
|
||||
#[inline]
|
||||
pub(crate) fn is_grapheme_extended(self) -> bool {
|
||||
derived_property::Grapheme_Extend(self)
|
||||
unicode::Grapheme_Extend(self)
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` has one of the general categories for numbers.
|
||||
@ -739,7 +739,7 @@ impl char {
|
||||
pub fn is_numeric(self) -> bool {
|
||||
match self {
|
||||
'0'..='9' => true,
|
||||
c => c > '\x7f' && general_category::N(c),
|
||||
c => c > '\x7f' && unicode::N(c),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,9 +37,9 @@ pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};
|
||||
|
||||
// unstable re-exports
|
||||
#[unstable(feature = "unicode_version", issue = "49726")]
|
||||
pub use crate::unicode::tables::UNICODE_VERSION;
|
||||
#[unstable(feature = "unicode_version", issue = "49726")]
|
||||
pub use crate::unicode::version::UnicodeVersion;
|
||||
#[unstable(feature = "unicode_version", issue = "49726")]
|
||||
pub use crate::unicode::UNICODE_VERSION;
|
||||
|
||||
use crate::fmt::{self, Write};
|
||||
use crate::iter::FusedIterator;
|
||||
|
@ -1,66 +0,0 @@
|
||||
/// BoolTrie is a trie for representing a set of Unicode codepoints. It is
|
||||
/// implemented with postfix compression (sharing of identical child nodes),
|
||||
/// which gives both compact size and fast lookup.
|
||||
///
|
||||
/// The space of Unicode codepoints is divided into 3 subareas, each
|
||||
/// represented by a trie with different depth. In the first (0..0x800), there
|
||||
/// is no trie structure at all; each u64 entry corresponds to a bitvector
|
||||
/// effectively holding 64 bool values.
|
||||
///
|
||||
/// In the second (0x800..0x10000), each child of the root node represents a
|
||||
/// 64-wide subrange, but instead of storing the full 64-bit value of the leaf,
|
||||
/// the trie stores an 8-bit index into a shared table of leaf values. This
|
||||
/// exploits the fact that in reasonable sets, many such leaves can be shared.
|
||||
///
|
||||
/// In the third (0x10000..0x110000), each child of the root node represents a
|
||||
/// 4096-wide subrange, and the trie stores an 8-bit index into a 64-byte slice
|
||||
/// of a child tree. Each of these 64 bytes represents an index into the table
|
||||
/// of shared 64-bit leaf values. This exploits the sparse structure in the
|
||||
/// non-BMP range of most Unicode sets.
|
||||
pub struct BoolTrie {
|
||||
// 0..0x800 (corresponding to 1 and 2 byte utf-8 sequences)
|
||||
pub r1: [u64; 32], // leaves
|
||||
|
||||
// 0x800..0x10000 (corresponding to 3 byte utf-8 sequences)
|
||||
pub r2: [u8; 992], // first level
|
||||
pub r3: &'static [u64], // leaves
|
||||
|
||||
// 0x10000..0x110000 (corresponding to 4 byte utf-8 sequences)
|
||||
pub r4: [u8; 256], // first level
|
||||
pub r5: &'static [u8], // second level
|
||||
pub r6: &'static [u64], // leaves
|
||||
}
|
||||
impl BoolTrie {
|
||||
pub fn lookup(&self, c: char) -> bool {
|
||||
let c = c as u32;
|
||||
if c < 0x800 {
|
||||
trie_range_leaf(c, self.r1[(c >> 6) as usize])
|
||||
} else if c < 0x10000 {
|
||||
let child = self.r2[(c >> 6) as usize - 0x20];
|
||||
trie_range_leaf(c, self.r3[child as usize])
|
||||
} else {
|
||||
let child = self.r4[(c >> 12) as usize - 0x10];
|
||||
let leaf = self.r5[((child as usize) << 6) + ((c >> 6) as usize & 0x3f)];
|
||||
trie_range_leaf(c, self.r6[leaf as usize])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SmallBoolTrie {
|
||||
pub(crate) r1: &'static [u8], // first level
|
||||
pub(crate) r2: &'static [u64], // leaves
|
||||
}
|
||||
|
||||
impl SmallBoolTrie {
|
||||
pub fn lookup(&self, c: char) -> bool {
|
||||
let c = c as u32;
|
||||
match self.r1.get((c >> 6) as usize) {
|
||||
Some(&child) => trie_range_leaf(c, self.r2[child as usize]),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn trie_range_leaf(c: u32, bitmap_chunk: u64) -> bool {
|
||||
((bitmap_chunk >> (c & 63)) & 1) != 0
|
||||
}
|
@ -1,15 +1,59 @@
|
||||
#![unstable(feature = "unicode_internals", issue = "none")]
|
||||
#![allow(missing_docs)]
|
||||
|
||||
mod bool_trie;
|
||||
pub(crate) mod printable;
|
||||
pub(crate) mod tables;
|
||||
mod unicode_data;
|
||||
pub(crate) mod version;
|
||||
|
||||
use version::UnicodeVersion;
|
||||
|
||||
/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
|
||||
/// `char` and `str` methods are based on.
|
||||
#[unstable(feature = "unicode_version", issue = "49726")]
|
||||
pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
|
||||
major: unicode_data::UNICODE_VERSION.0,
|
||||
minor: unicode_data::UNICODE_VERSION.1,
|
||||
micro: unicode_data::UNICODE_VERSION.2,
|
||||
_priv: (),
|
||||
};
|
||||
|
||||
// For use in liballoc, not re-exported in libstd.
|
||||
pub mod derived_property {
|
||||
pub use crate::unicode::tables::derived_property::{Case_Ignorable, Cased};
|
||||
pub use super::{Case_Ignorable, Cased};
|
||||
}
|
||||
pub mod conversions {
|
||||
pub use crate::unicode::tables::conversions::{to_lower, to_upper};
|
||||
|
||||
pub use unicode_data::alphabetic::lookup as Alphabetic;
|
||||
pub use unicode_data::case_ignorable::lookup as Case_Ignorable;
|
||||
pub use unicode_data::cased::lookup as Cased;
|
||||
pub use unicode_data::cc::lookup as Cc;
|
||||
pub use unicode_data::conversions;
|
||||
pub use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
|
||||
pub use unicode_data::lowercase::lookup as Lowercase;
|
||||
pub use unicode_data::n::lookup as N;
|
||||
pub use unicode_data::uppercase::lookup as Uppercase;
|
||||
pub use unicode_data::white_space::lookup as White_Space;
|
||||
|
||||
#[inline(always)]
|
||||
fn range_search<const N: usize, const N1: usize, const N2: usize>(
|
||||
needle: u32,
|
||||
chunk_idx_map: &[u8; N],
|
||||
(last_chunk_idx, last_chunk_mapping): (u16, u8),
|
||||
bitset_chunk_idx: &[[u8; 16]; N1],
|
||||
bitset: &[u64; N2],
|
||||
) -> bool {
|
||||
let bucket_idx = (needle / 64) as usize;
|
||||
let chunk_map_idx = bucket_idx / 16;
|
||||
let chunk_piece = bucket_idx % 16;
|
||||
let chunk_idx = if chunk_map_idx >= N {
|
||||
if chunk_map_idx == last_chunk_idx as usize {
|
||||
last_chunk_mapping
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
chunk_idx_map[chunk_map_idx]
|
||||
};
|
||||
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece];
|
||||
let word = bitset[(idx as usize)];
|
||||
(word & (1 << (needle % 64) as u64)) != 0
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,878 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Regenerate Unicode tables (tables.rs).
|
||||
"""
|
||||
|
||||
# This script uses the Unicode tables as defined
|
||||
# in the UnicodeFiles class.
|
||||
|
||||
# Since this should not require frequent updates, we just store this
|
||||
# out-of-line and check the tables.rs file into git.
|
||||
|
||||
# Note that the "curl" program is required for operation.
|
||||
# This script is compatible with Python 2.7 and 3.x.
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import fileinput
|
||||
import itertools
|
||||
import os
|
||||
import re
|
||||
import textwrap
|
||||
import subprocess
|
||||
|
||||
from collections import defaultdict, namedtuple
|
||||
|
||||
try:
|
||||
# Python 3
|
||||
from itertools import zip_longest
|
||||
from io import StringIO
|
||||
except ImportError:
|
||||
# Python 2 compatibility
|
||||
zip_longest = itertools.izip_longest
|
||||
from StringIO import StringIO
|
||||
|
||||
try:
|
||||
# Completely optional type hinting
|
||||
# (Python 2 compatible using comments,
|
||||
# see: https://mypy.readthedocs.io/en/latest/python2.html)
|
||||
# This is very helpful in typing-aware IDE like PyCharm.
|
||||
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Set, Tuple
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# We don't use enum.Enum because of Python 2.7 compatibility.
|
||||
class UnicodeFiles(object):
|
||||
# ReadMe does not contain any Unicode data, we
|
||||
# only use it to extract versions.
|
||||
README = "ReadMe.txt"
|
||||
|
||||
DERIVED_CORE_PROPERTIES = "DerivedCoreProperties.txt"
|
||||
DERIVED_NORMALIZATION_PROPS = "DerivedNormalizationProps.txt"
|
||||
PROPS = "PropList.txt"
|
||||
SCRIPTS = "Scripts.txt"
|
||||
SPECIAL_CASING = "SpecialCasing.txt"
|
||||
UNICODE_DATA = "UnicodeData.txt"
|
||||
|
||||
|
||||
# The order doesn't really matter (Python < 3.6 won't preserve it),
|
||||
# we only want to aggregate all the file names.
|
||||
ALL_UNICODE_FILES = tuple(
|
||||
value for name, value in UnicodeFiles.__dict__.items()
|
||||
if not name.startswith("_")
|
||||
)
|
||||
|
||||
assert len(ALL_UNICODE_FILES) == 7, "Unexpected number of unicode files"
|
||||
|
||||
# The directory this file is located in.
|
||||
THIS_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
# Where to download the Unicode data. The downloaded files
|
||||
# will be placed in sub-directories named after Unicode version.
|
||||
FETCH_DIR = os.path.join(THIS_DIR, "downloaded")
|
||||
|
||||
FETCH_URL_LATEST = "ftp://ftp.unicode.org/Public/UNIDATA/{filename}"
|
||||
FETCH_URL_VERSION = "ftp://ftp.unicode.org/Public/{version}/ucd/{filename}"
|
||||
|
||||
PREAMBLE = """\
|
||||
// NOTE: The following code was generated by "./unicode.py", do not edit directly
|
||||
|
||||
#![allow(missing_docs, non_upper_case_globals, non_snake_case, clippy::unreadable_literal)]
|
||||
|
||||
use crate::unicode::bool_trie::{{BoolTrie, SmallBoolTrie}};
|
||||
use crate::unicode::version::UnicodeVersion;
|
||||
""".format(year=datetime.datetime.now().year)
|
||||
|
||||
# Mapping taken from Table 12 from:
|
||||
# http://www.unicode.org/reports/tr44/#General_Category_Values
|
||||
EXPANDED_CATEGORIES = {
|
||||
"Lu": ["LC", "L"], "Ll": ["LC", "L"], "Lt": ["LC", "L"],
|
||||
"Lm": ["L"], "Lo": ["L"],
|
||||
"Mn": ["M"], "Mc": ["M"], "Me": ["M"],
|
||||
"Nd": ["N"], "Nl": ["N"], "No": ["N"],
|
||||
"Pc": ["P"], "Pd": ["P"], "Ps": ["P"], "Pe": ["P"],
|
||||
"Pi": ["P"], "Pf": ["P"], "Po": ["P"],
|
||||
"Sm": ["S"], "Sc": ["S"], "Sk": ["S"], "So": ["S"],
|
||||
"Zs": ["Z"], "Zl": ["Z"], "Zp": ["Z"],
|
||||
"Cc": ["C"], "Cf": ["C"], "Cs": ["C"], "Co": ["C"], "Cn": ["C"],
|
||||
}
|
||||
|
||||
# This is the (inclusive) range of surrogate codepoints.
|
||||
# These are not valid Rust characters.
|
||||
SURROGATE_CODEPOINTS_RANGE = (0xd800, 0xdfff)
|
||||
|
||||
UnicodeData = namedtuple(
|
||||
"UnicodeData", (
|
||||
# Conversions:
|
||||
"to_upper", "to_lower", "to_title",
|
||||
|
||||
# Decompositions: canonical decompositions, compatibility decomp
|
||||
"canon_decomp", "compat_decomp",
|
||||
|
||||
# Grouped: general categories and combining characters
|
||||
"general_categories", "combines",
|
||||
)
|
||||
)
|
||||
|
||||
UnicodeVersion = namedtuple(
|
||||
"UnicodeVersion", ("major", "minor", "micro", "as_str")
|
||||
)
|
||||
|
||||
|
||||
def fetch_files(version=None):
|
||||
# type: (str) -> UnicodeVersion
|
||||
"""
|
||||
Fetch all the Unicode files from unicode.org.
|
||||
|
||||
This will use cached files (stored in `FETCH_DIR`) if they exist,
|
||||
creating them if they don't. In any case, the Unicode version
|
||||
is always returned.
|
||||
|
||||
:param version: The desired Unicode version, as string.
|
||||
(If None, defaults to latest final release available,
|
||||
querying the unicode.org service).
|
||||
"""
|
||||
have_version = check_stored_version(version)
|
||||
if have_version:
|
||||
return have_version
|
||||
|
||||
if version:
|
||||
# Check if the desired version exists on the server.
|
||||
get_fetch_url = lambda name: FETCH_URL_VERSION.format(version=version, filename=name)
|
||||
else:
|
||||
# Extract the latest version.
|
||||
get_fetch_url = lambda name: FETCH_URL_LATEST.format(filename=name)
|
||||
|
||||
readme_url = get_fetch_url(UnicodeFiles.README)
|
||||
|
||||
print("Fetching: {}".format(readme_url))
|
||||
readme_content = subprocess.check_output(("curl", readme_url))
|
||||
|
||||
unicode_version = parse_readme_unicode_version(
|
||||
readme_content.decode("utf8")
|
||||
)
|
||||
|
||||
download_dir = get_unicode_dir(unicode_version)
|
||||
if not os.path.exists(download_dir):
|
||||
# For 2.7 compat, we don't use `exist_ok=True`.
|
||||
os.makedirs(download_dir)
|
||||
|
||||
for filename in ALL_UNICODE_FILES:
|
||||
file_path = get_unicode_file_path(unicode_version, filename)
|
||||
|
||||
if os.path.exists(file_path):
|
||||
# Assume file on the server didn't change if it's been saved before.
|
||||
continue
|
||||
|
||||
if filename == UnicodeFiles.README:
|
||||
with open(file_path, "wb") as fd:
|
||||
fd.write(readme_content)
|
||||
else:
|
||||
url = get_fetch_url(filename)
|
||||
print("Fetching: {}".format(url))
|
||||
subprocess.check_call(("curl", "-o", file_path, url))
|
||||
|
||||
return unicode_version
|
||||
|
||||
|
||||
def check_stored_version(version):
|
||||
# type: (Optional[str]) -> Optional[UnicodeVersion]
|
||||
"""
|
||||
Given desired Unicode version, return the version
|
||||
if stored files are all present, and `None` otherwise.
|
||||
"""
|
||||
if not version:
|
||||
# If no desired version specified, we should check what's the latest
|
||||
# version, skipping stored version checks.
|
||||
return None
|
||||
|
||||
fetch_dir = os.path.join(FETCH_DIR, version)
|
||||
|
||||
for filename in ALL_UNICODE_FILES:
|
||||
file_path = os.path.join(fetch_dir, filename)
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
return None
|
||||
|
||||
with open(os.path.join(fetch_dir, UnicodeFiles.README)) as fd:
|
||||
return parse_readme_unicode_version(fd.read())
|
||||
|
||||
|
||||
def parse_readme_unicode_version(readme_content):
|
||||
# type: (str) -> UnicodeVersion
|
||||
"""
|
||||
Parse the Unicode version contained in their `ReadMe.txt` file.
|
||||
"""
|
||||
# "Raw string" is necessary for \d not being treated as escape char
|
||||
# (for the sake of compat with future Python versions).
|
||||
# See: https://docs.python.org/3.6/whatsnew/3.6.html#deprecated-python-behavior
|
||||
pattern = r"for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
|
||||
groups = re.search(pattern, readme_content).groups()
|
||||
|
||||
return UnicodeVersion(*map(int, groups), as_str=".".join(groups))
|
||||
|
||||
|
||||
def get_unicode_dir(unicode_version):
|
||||
# type: (UnicodeVersion) -> str
|
||||
"""
|
||||
Indicate in which parent dir the Unicode data files should be stored.
|
||||
|
||||
This returns a full, absolute path.
|
||||
"""
|
||||
return os.path.join(FETCH_DIR, unicode_version.as_str)
|
||||
|
||||
|
||||
def get_unicode_file_path(unicode_version, filename):
|
||||
# type: (UnicodeVersion, str) -> str
|
||||
"""
|
||||
Indicate where the Unicode data file should be stored.
|
||||
"""
|
||||
return os.path.join(get_unicode_dir(unicode_version), filename)
|
||||
|
||||
|
||||
def is_surrogate(n):
|
||||
# type: (int) -> bool
|
||||
"""
|
||||
Tell if given codepoint is a surrogate (not a valid Rust character).
|
||||
"""
|
||||
return SURROGATE_CODEPOINTS_RANGE[0] <= n <= SURROGATE_CODEPOINTS_RANGE[1]
|
||||
|
||||
|
||||
def load_unicode_data(file_path):
|
||||
# type: (str) -> UnicodeData
|
||||
"""
|
||||
Load main Unicode data.
|
||||
"""
|
||||
# Conversions
|
||||
to_lower = {} # type: Dict[int, Tuple[int, int, int]]
|
||||
to_upper = {} # type: Dict[int, Tuple[int, int, int]]
|
||||
to_title = {} # type: Dict[int, Tuple[int, int, int]]
|
||||
|
||||
# Decompositions
|
||||
compat_decomp = {} # type: Dict[int, List[int]]
|
||||
canon_decomp = {} # type: Dict[int, List[int]]
|
||||
|
||||
# Combining characters
|
||||
# FIXME: combines are not used
|
||||
combines = defaultdict(set) # type: Dict[str, Set[int]]
|
||||
|
||||
# Categories
|
||||
general_categories = defaultdict(set) # type: Dict[str, Set[int]]
|
||||
category_assigned_codepoints = set() # type: Set[int]
|
||||
|
||||
all_codepoints = {}
|
||||
|
||||
range_start = -1
|
||||
|
||||
for line in fileinput.input(file_path):
|
||||
data = line.split(";")
|
||||
if len(data) != 15:
|
||||
continue
|
||||
codepoint = int(data[0], 16)
|
||||
if is_surrogate(codepoint):
|
||||
continue
|
||||
if range_start >= 0:
|
||||
for i in range(range_start, codepoint):
|
||||
all_codepoints[i] = data
|
||||
range_start = -1
|
||||
if data[1].endswith(", First>"):
|
||||
range_start = codepoint
|
||||
continue
|
||||
all_codepoints[codepoint] = data
|
||||
|
||||
for code, data in all_codepoints.items():
|
||||
(code_org, name, gencat, combine, bidi,
|
||||
decomp, deci, digit, num, mirror,
|
||||
old, iso, upcase, lowcase, titlecase) = data
|
||||
|
||||
# Generate char to char direct common and simple conversions:
|
||||
|
||||
# Uppercase to lowercase
|
||||
if lowcase != "" and code_org != lowcase:
|
||||
to_lower[code] = (int(lowcase, 16), 0, 0)
|
||||
|
||||
# Lowercase to uppercase
|
||||
if upcase != "" and code_org != upcase:
|
||||
to_upper[code] = (int(upcase, 16), 0, 0)
|
||||
|
||||
# Title case
|
||||
if titlecase.strip() != "" and code_org != titlecase:
|
||||
to_title[code] = (int(titlecase, 16), 0, 0)
|
||||
|
||||
# Store decomposition, if given
|
||||
if decomp:
|
||||
decompositions = decomp.split()[1:]
|
||||
decomp_code_points = [int(i, 16) for i in decompositions]
|
||||
|
||||
if decomp.startswith("<"):
|
||||
# Compatibility decomposition
|
||||
compat_decomp[code] = decomp_code_points
|
||||
else:
|
||||
# Canonical decomposition
|
||||
canon_decomp[code] = decomp_code_points
|
||||
|
||||
# Place letter in categories as appropriate.
|
||||
for cat in itertools.chain((gencat, ), EXPANDED_CATEGORIES.get(gencat, [])):
|
||||
general_categories[cat].add(code)
|
||||
category_assigned_codepoints.add(code)
|
||||
|
||||
# Record combining class, if any.
|
||||
if combine != "0":
|
||||
combines[combine].add(code)
|
||||
|
||||
# Generate Not_Assigned from Assigned.
|
||||
general_categories["Cn"] = get_unassigned_codepoints(category_assigned_codepoints)
|
||||
|
||||
# Other contains Not_Assigned
|
||||
general_categories["C"].update(general_categories["Cn"])
|
||||
|
||||
grouped_categories = group_categories(general_categories)
|
||||
|
||||
# FIXME: combines are not used
|
||||
return UnicodeData(
|
||||
to_lower=to_lower, to_upper=to_upper, to_title=to_title,
|
||||
compat_decomp=compat_decomp, canon_decomp=canon_decomp,
|
||||
general_categories=grouped_categories, combines=combines,
|
||||
)
|
||||
|
||||
|
||||
def load_special_casing(file_path, unicode_data):
|
||||
# type: (str, UnicodeData) -> None
|
||||
"""
|
||||
Load special casing data and enrich given Unicode data.
|
||||
"""
|
||||
for line in fileinput.input(file_path):
|
||||
data = line.split("#")[0].split(";")
|
||||
if len(data) == 5:
|
||||
code, lower, title, upper, _comment = data
|
||||
elif len(data) == 6:
|
||||
code, lower, title, upper, condition, _comment = data
|
||||
if condition.strip(): # Only keep unconditional mappins
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
code = code.strip()
|
||||
lower = lower.strip()
|
||||
title = title.strip()
|
||||
upper = upper.strip()
|
||||
key = int(code, 16)
|
||||
for (map_, values) in ((unicode_data.to_lower, lower),
|
||||
(unicode_data.to_upper, upper),
|
||||
(unicode_data.to_title, title)):
|
||||
if values != code:
|
||||
split = values.split()
|
||||
|
||||
codepoints = list(itertools.chain(
|
||||
(int(i, 16) for i in split),
|
||||
(0 for _ in range(len(split), 3))
|
||||
))
|
||||
|
||||
assert len(codepoints) == 3
|
||||
map_[key] = codepoints
|
||||
|
||||
|
||||
def group_categories(mapping):
|
||||
# type: (Dict[Any, Iterable[int]]) -> Dict[str, List[Tuple[int, int]]]
|
||||
"""
|
||||
Group codepoints mapped in "categories".
|
||||
"""
|
||||
return {category: group_codepoints(codepoints)
|
||||
for category, codepoints in mapping.items()}
|
||||
|
||||
|
||||
def group_codepoints(codepoints):
|
||||
# type: (Iterable[int]) -> List[Tuple[int, int]]
|
||||
"""
|
||||
Group integral values into continuous, disjoint value ranges.
|
||||
|
||||
Performs value deduplication.
|
||||
|
||||
:return: sorted list of pairs denoting start and end of codepoint
|
||||
group values, both ends inclusive.
|
||||
|
||||
>>> group_codepoints([1, 2, 10, 11, 12, 3, 4])
|
||||
[(1, 4), (10, 12)]
|
||||
>>> group_codepoints([1])
|
||||
[(1, 1)]
|
||||
>>> group_codepoints([1, 5, 6])
|
||||
[(1, 1), (5, 6)]
|
||||
>>> group_codepoints([])
|
||||
[]
|
||||
"""
|
||||
sorted_codes = sorted(set(codepoints))
|
||||
result = [] # type: List[Tuple[int, int]]
|
||||
|
||||
if not sorted_codes:
|
||||
return result
|
||||
|
||||
next_codes = sorted_codes[1:]
|
||||
start_code = sorted_codes[0]
|
||||
|
||||
for code, next_code in zip_longest(sorted_codes, next_codes, fillvalue=None):
|
||||
if next_code is None or next_code - code != 1:
|
||||
result.append((start_code, code))
|
||||
start_code = next_code
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def ungroup_codepoints(codepoint_pairs):
|
||||
# type: (Iterable[Tuple[int, int]]) -> List[int]
|
||||
"""
|
||||
The inverse of group_codepoints -- produce a flat list of values
|
||||
from value range pairs.
|
||||
|
||||
>>> ungroup_codepoints([(1, 4), (10, 12)])
|
||||
[1, 2, 3, 4, 10, 11, 12]
|
||||
>>> ungroup_codepoints([(1, 1), (5, 6)])
|
||||
[1, 5, 6]
|
||||
>>> ungroup_codepoints(group_codepoints([1, 2, 7, 8]))
|
||||
[1, 2, 7, 8]
|
||||
>>> ungroup_codepoints([])
|
||||
[]
|
||||
"""
|
||||
return list(itertools.chain.from_iterable(
|
||||
range(lo, hi + 1) for lo, hi in codepoint_pairs
|
||||
))
|
||||
|
||||
|
||||
def get_unassigned_codepoints(assigned_codepoints):
|
||||
# type: (Set[int]) -> Set[int]
|
||||
"""
|
||||
Given a set of "assigned" codepoints, return a set
|
||||
of these that are not in assigned and not surrogate.
|
||||
"""
|
||||
return {i for i in range(0, 0x110000)
|
||||
if i not in assigned_codepoints and not is_surrogate(i)}
|
||||
|
||||
|
||||
def generate_table_lines(items, indent, wrap=98):
|
||||
# type: (Iterable[str], int, int) -> Iterator[str]
|
||||
"""
|
||||
Given table items, generate wrapped lines of text with comma-separated items.
|
||||
|
||||
This is a generator function.
|
||||
|
||||
:param wrap: soft wrap limit (characters per line), integer.
|
||||
"""
|
||||
line = " " * indent
|
||||
first = True
|
||||
for item in items:
|
||||
if len(line) + len(item) < wrap:
|
||||
if first:
|
||||
line += item
|
||||
else:
|
||||
line += ", " + item
|
||||
first = False
|
||||
else:
|
||||
yield line + ",\n"
|
||||
line = " " * indent + item
|
||||
|
||||
yield line
|
||||
|
||||
|
||||
def load_properties(file_path, interesting_props):
|
||||
# type: (str, Iterable[str]) -> Dict[str, List[Tuple[int, int]]]
|
||||
"""
|
||||
Load properties data and return in grouped form.
|
||||
"""
|
||||
props = defaultdict(list) # type: Dict[str, List[Tuple[int, int]]]
|
||||
# "Raw string" is necessary for `\.` and `\w` not to be treated as escape chars
|
||||
# (for the sake of compat with future Python versions).
|
||||
# See: https://docs.python.org/3.6/whatsnew/3.6.html#deprecated-python-behavior
|
||||
re1 = re.compile(r"^ *([0-9A-F]+) *; *(\w+)")
|
||||
re2 = re.compile(r"^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
|
||||
|
||||
for line in fileinput.input(file_path):
|
||||
match = re1.match(line) or re2.match(line)
|
||||
if match:
|
||||
groups = match.groups()
|
||||
|
||||
if len(groups) == 2:
|
||||
# `re1` matched (2 groups).
|
||||
d_lo, prop = groups
|
||||
d_hi = d_lo
|
||||
else:
|
||||
d_lo, d_hi, prop = groups
|
||||
else:
|
||||
continue
|
||||
|
||||
if interesting_props and prop not in interesting_props:
|
||||
continue
|
||||
|
||||
lo_value = int(d_lo, 16)
|
||||
hi_value = int(d_hi, 16)
|
||||
|
||||
props[prop].append((lo_value, hi_value))
|
||||
|
||||
# Optimize if possible.
|
||||
for prop in props:
|
||||
props[prop] = group_codepoints(ungroup_codepoints(props[prop]))
|
||||
|
||||
return props
|
||||
|
||||
|
||||
def escape_char(c):
|
||||
# type: (int) -> str
|
||||
r"""
|
||||
Escape a codepoint for use as Rust char literal.
|
||||
|
||||
Outputs are OK to use as Rust source code as char literals
|
||||
and they also include necessary quotes.
|
||||
|
||||
>>> escape_char(97)
|
||||
"'\\u{61}'"
|
||||
>>> escape_char(0)
|
||||
"'\\0'"
|
||||
"""
|
||||
return r"'\u{%x}'" % c if c != 0 else r"'\0'"
|
||||
|
||||
|
||||
def format_char_pair(pair):
|
||||
# type: (Tuple[int, int]) -> str
|
||||
"""
|
||||
Format a pair of two Rust chars.
|
||||
"""
|
||||
return "(%s,%s)" % (escape_char(pair[0]), escape_char(pair[1]))
|
||||
|
||||
|
||||
def generate_table(
|
||||
name, # type: str
|
||||
items, # type: List[Tuple[int, int]]
|
||||
decl_type="&[(char, char)]", # type: str
|
||||
is_pub=True, # type: bool
|
||||
format_item=format_char_pair, # type: Callable[[Tuple[int, int]], str]
|
||||
):
|
||||
# type: (...) -> Iterator[str]
|
||||
"""
|
||||
Generate a nicely formatted Rust constant "table" array.
|
||||
|
||||
This generates actual Rust code.
|
||||
"""
|
||||
pub_string = ""
|
||||
if is_pub:
|
||||
pub_string = "pub "
|
||||
|
||||
yield "\n"
|
||||
yield " #[rustfmt::skip]\n"
|
||||
yield " %sconst %s: %s = &[\n" % (pub_string, name, decl_type)
|
||||
|
||||
data = []
|
||||
first = True
|
||||
for item in items:
|
||||
if not first:
|
||||
data.append(",")
|
||||
first = False
|
||||
data.extend(format_item(item))
|
||||
|
||||
for table_line in generate_table_lines("".join(data).split(","), 8):
|
||||
yield table_line
|
||||
|
||||
yield "\n ];\n"
|
||||
|
||||
|
||||
def compute_trie(raw_data, chunk_size):
|
||||
# type: (List[int], int) -> Tuple[List[int], List[int]]
|
||||
"""
|
||||
Compute postfix-compressed trie.
|
||||
|
||||
See: bool_trie.rs for more details.
|
||||
|
||||
>>> compute_trie([1, 2, 3, 1, 2, 3, 4, 5, 6], 3)
|
||||
([0, 0, 1], [1, 2, 3, 4, 5, 6])
|
||||
>>> compute_trie([1, 2, 3, 1, 2, 4, 4, 5, 6], 3)
|
||||
([0, 1, 2], [1, 2, 3, 1, 2, 4, 4, 5, 6])
|
||||
"""
|
||||
root = []
|
||||
childmap = {} # type: Dict[Tuple[int, ...], int]
|
||||
child_data = []
|
||||
|
||||
assert len(raw_data) % chunk_size == 0, "Chunks must be equally sized"
|
||||
|
||||
for i in range(len(raw_data) // chunk_size):
|
||||
data = raw_data[i * chunk_size : (i + 1) * chunk_size]
|
||||
|
||||
# Postfix compression of child nodes (data chunks)
|
||||
# (identical child nodes are shared).
|
||||
|
||||
# Make a tuple out of the list so it's hashable.
|
||||
child = tuple(data)
|
||||
if child not in childmap:
|
||||
childmap[child] = len(childmap)
|
||||
child_data.extend(data)
|
||||
|
||||
root.append(childmap[child])
|
||||
|
||||
return root, child_data
|
||||
|
||||
|
||||
def generate_bool_trie(name, codepoint_ranges, is_pub=False):
|
||||
# type: (str, List[Tuple[int, int]], bool) -> Iterator[str]
|
||||
"""
|
||||
Generate Rust code for BoolTrie struct.
|
||||
|
||||
This yields string fragments that should be joined to produce
|
||||
the final string.
|
||||
|
||||
See: `bool_trie.rs`.
|
||||
"""
|
||||
chunk_size = 64
|
||||
rawdata = [False] * 0x110000
|
||||
for (lo, hi) in codepoint_ranges:
|
||||
for cp in range(lo, hi + 1):
|
||||
rawdata[cp] = True
|
||||
|
||||
# Convert to bitmap chunks of `chunk_size` bits each.
|
||||
chunks = []
|
||||
for i in range(0x110000 // chunk_size):
|
||||
chunk = 0
|
||||
for j in range(chunk_size):
|
||||
if rawdata[i * chunk_size + j]:
|
||||
chunk |= 1 << j
|
||||
chunks.append(chunk)
|
||||
|
||||
pub_string = ""
|
||||
if is_pub:
|
||||
pub_string = "pub "
|
||||
|
||||
yield "\n"
|
||||
yield " #[rustfmt::skip]\n"
|
||||
yield " %sconst %s: &super::BoolTrie = &super::BoolTrie {\n" % (pub_string, name)
|
||||
yield " r1: [\n"
|
||||
data = ("0x%016x" % chunk for chunk in chunks[:0x800 // chunk_size])
|
||||
for fragment in generate_table_lines(data, 12):
|
||||
yield fragment
|
||||
yield "\n ],\n"
|
||||
|
||||
# 0x800..0x10000 trie
|
||||
(r2, r3) = compute_trie(chunks[0x800 // chunk_size : 0x10000 // chunk_size], 64 // chunk_size)
|
||||
yield " r2: [\n"
|
||||
data = map(str, r2)
|
||||
for fragment in generate_table_lines(data, 12):
|
||||
yield fragment
|
||||
yield "\n ],\n"
|
||||
|
||||
yield " r3: &[\n"
|
||||
data = ("0x%016x" % node for node in r3)
|
||||
for fragment in generate_table_lines(data, 12):
|
||||
yield fragment
|
||||
yield "\n ],\n"
|
||||
|
||||
# 0x10000..0x110000 trie
|
||||
(mid, r6) = compute_trie(chunks[0x10000 // chunk_size : 0x110000 // chunk_size],
|
||||
64 // chunk_size)
|
||||
(r4, r5) = compute_trie(mid, 64)
|
||||
|
||||
yield " r4: [\n"
|
||||
data = map(str, r4)
|
||||
for fragment in generate_table_lines(data, 12):
|
||||
yield fragment
|
||||
yield "\n ],\n"
|
||||
|
||||
yield " r5: &[\n"
|
||||
data = map(str, r5)
|
||||
for fragment in generate_table_lines(data, 12):
|
||||
yield fragment
|
||||
yield "\n ],\n"
|
||||
|
||||
yield " r6: &[\n"
|
||||
data = ("0x%016x" % node for node in r6)
|
||||
for fragment in generate_table_lines(data, 12):
|
||||
yield fragment
|
||||
yield "\n ],\n"
|
||||
|
||||
yield " };\n"
|
||||
|
||||
|
||||
def generate_small_bool_trie(name, codepoint_ranges, is_pub=False):
|
||||
# type: (str, List[Tuple[int, int]], bool) -> Iterator[str]
|
||||
"""
|
||||
Generate Rust code for `SmallBoolTrie` struct.
|
||||
|
||||
See: `bool_trie.rs`.
|
||||
"""
|
||||
last_chunk = max(hi // 64 for (lo, hi) in codepoint_ranges)
|
||||
n_chunks = last_chunk + 1
|
||||
chunks = [0] * n_chunks
|
||||
for (lo, hi) in codepoint_ranges:
|
||||
for cp in range(lo, hi + 1):
|
||||
assert cp // 64 < len(chunks)
|
||||
chunks[cp // 64] |= 1 << (cp & 63)
|
||||
|
||||
pub_string = ""
|
||||
if is_pub:
|
||||
pub_string = "pub "
|
||||
|
||||
yield "\n"
|
||||
yield " #[rustfmt::skip]\n"
|
||||
yield (" %sconst %s: &super::SmallBoolTrie = &super::SmallBoolTrie {\n"
|
||||
% (pub_string, name))
|
||||
|
||||
(r1, r2) = compute_trie(chunks, 1)
|
||||
|
||||
yield " r1: &[\n"
|
||||
data = (str(node) for node in r1)
|
||||
for fragment in generate_table_lines(data, 12):
|
||||
yield fragment
|
||||
yield "\n ],\n"
|
||||
|
||||
yield " r2: &[\n"
|
||||
data = ("0x%016x" % node for node in r2)
|
||||
for fragment in generate_table_lines(data, 12):
|
||||
yield fragment
|
||||
yield "\n ],\n"
|
||||
|
||||
yield " };\n"
|
||||
|
||||
|
||||
def generate_property_module(mod, grouped_categories, category_subset):
|
||||
# type: (str, Dict[str, List[Tuple[int, int]]], Iterable[str]) -> Iterator[str]
|
||||
"""
|
||||
Generate Rust code for module defining properties.
|
||||
"""
|
||||
|
||||
yield "pub(crate) mod %s {" % mod
|
||||
for cat in sorted(category_subset):
|
||||
if cat in ("Cc", "White_Space"):
|
||||
generator = generate_small_bool_trie("%s_table" % cat, grouped_categories[cat])
|
||||
else:
|
||||
generator = generate_bool_trie("%s_table" % cat, grouped_categories[cat])
|
||||
|
||||
for fragment in generator:
|
||||
yield fragment
|
||||
|
||||
yield "\n"
|
||||
yield " pub fn %s(c: char) -> bool {\n" % cat
|
||||
yield " %s_table.lookup(c)\n" % cat
|
||||
yield " }\n"
|
||||
|
||||
yield "}\n\n"
|
||||
|
||||
|
||||
def generate_conversions_module(unicode_data):
|
||||
# type: (UnicodeData) -> Iterator[str]
|
||||
"""
|
||||
Generate Rust code for module defining conversions.
|
||||
"""
|
||||
|
||||
yield "pub(crate) mod conversions {"
|
||||
yield """
|
||||
pub fn to_lower(c: char) -> [char; 3] {
|
||||
match bsearch_case_table(c, to_lowercase_table) {
|
||||
None => [c, '\\0', '\\0'],
|
||||
Some(index) => to_lowercase_table[index].1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_upper(c: char) -> [char; 3] {
|
||||
match bsearch_case_table(c, to_uppercase_table) {
|
||||
None => [c, '\\0', '\\0'],
|
||||
Some(index) => to_uppercase_table[index].1,
|
||||
}
|
||||
}
|
||||
|
||||
fn bsearch_case_table(c: char, table: &[(char, [char; 3])]) -> Option<usize> {
|
||||
table.binary_search_by(|&(key, _)| key.cmp(&c)).ok()
|
||||
}\n"""
|
||||
|
||||
decl_type = "&[(char, [char; 3])]"
|
||||
format_conversion = lambda x: "({},[{},{},{}])".format(*(
|
||||
escape_char(c) for c in (x[0], x[1][0], x[1][1], x[1][2])
|
||||
))
|
||||
|
||||
for fragment in generate_table(
|
||||
name="to_lowercase_table",
|
||||
items=sorted(unicode_data.to_lower.items(), key=lambda x: x[0]),
|
||||
decl_type=decl_type,
|
||||
is_pub=False,
|
||||
format_item=format_conversion
|
||||
):
|
||||
yield fragment
|
||||
|
||||
for fragment in generate_table(
|
||||
name="to_uppercase_table",
|
||||
items=sorted(unicode_data.to_upper.items(), key=lambda x: x[0]),
|
||||
decl_type=decl_type,
|
||||
is_pub=False,
|
||||
format_item=format_conversion
|
||||
):
|
||||
yield fragment
|
||||
|
||||
yield "}\n"
|
||||
|
||||
|
||||
def parse_args():
|
||||
# type: () -> argparse.Namespace
|
||||
"""
|
||||
Parse command line arguments.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("-v", "--version", default=None, type=str,
|
||||
help="Unicode version to use (if not specified,"
|
||||
" defaults to latest release).")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
# type: () -> None
|
||||
"""
|
||||
Script entry point.
|
||||
"""
|
||||
args = parse_args()
|
||||
|
||||
unicode_version = fetch_files(args.version)
|
||||
print("Using Unicode version: {}".format(unicode_version.as_str))
|
||||
|
||||
# All the writing happens entirely in memory, we only write to file
|
||||
# once we have generated the file content (it's not very large, <1 MB).
|
||||
buf = StringIO()
|
||||
buf.write(PREAMBLE)
|
||||
|
||||
unicode_version_notice = textwrap.dedent("""
|
||||
/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
|
||||
/// `char` and `str` methods are based on.
|
||||
#[unstable(feature = "unicode_version", issue = "49726")]
|
||||
pub const UNICODE_VERSION: UnicodeVersion =
|
||||
UnicodeVersion {{ major: {v.major}, minor: {v.minor}, micro: {v.micro}, _priv: () }};
|
||||
""").format(v=unicode_version)
|
||||
buf.write(unicode_version_notice)
|
||||
|
||||
get_path = lambda f: get_unicode_file_path(unicode_version, f)
|
||||
|
||||
unicode_data = load_unicode_data(get_path(UnicodeFiles.UNICODE_DATA))
|
||||
load_special_casing(get_path(UnicodeFiles.SPECIAL_CASING), unicode_data)
|
||||
|
||||
want_derived = {"Alphabetic", "Lowercase", "Uppercase",
|
||||
"Cased", "Case_Ignorable", "Grapheme_Extend"}
|
||||
derived = load_properties(get_path(UnicodeFiles.DERIVED_CORE_PROPERTIES), want_derived)
|
||||
|
||||
props = load_properties(get_path(UnicodeFiles.PROPS),
|
||||
{"White_Space", "Join_Control", "Noncharacter_Code_Point"})
|
||||
|
||||
# Category tables
|
||||
for (name, categories, category_subset) in (
|
||||
("general_category", unicode_data.general_categories, ["N", "Cc"]),
|
||||
("derived_property", derived, want_derived),
|
||||
("property", props, ["White_Space"])
|
||||
):
|
||||
for fragment in generate_property_module(name, categories, category_subset):
|
||||
buf.write(fragment)
|
||||
|
||||
for fragment in generate_conversions_module(unicode_data):
|
||||
buf.write(fragment)
|
||||
|
||||
tables_rs_path = os.path.join(THIS_DIR, "tables.rs")
|
||||
|
||||
# Actually write out the file content.
|
||||
# Will overwrite the file if it exists.
|
||||
with open(tables_rs_path, "w") as fd:
|
||||
fd.write(buf.getvalue())
|
||||
|
||||
print("Regenerated tables.rs.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
2343
src/libcore/unicode/unicode_data.rs
Normal file
2343
src/libcore/unicode/unicode_data.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -2479,19 +2479,21 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
|
||||
);
|
||||
eq
|
||||
})
|
||||
.map(|ty::GeneratorInteriorTypeCause { span, scope_span, .. }| {
|
||||
(span, source_map.span_to_snippet(*span), scope_span)
|
||||
.map(|ty::GeneratorInteriorTypeCause { span, scope_span, expr, .. }| {
|
||||
(span, source_map.span_to_snippet(*span), scope_span, expr)
|
||||
});
|
||||
|
||||
debug!(
|
||||
"maybe_note_obligation_cause_for_async_await: target_ty={:?} \
|
||||
generator_interior_types={:?} target_span={:?}",
|
||||
target_ty, tables.generator_interior_types, target_span
|
||||
);
|
||||
if let Some((target_span, Ok(snippet), scope_span)) = target_span {
|
||||
if let Some((target_span, Ok(snippet), scope_span, expr)) = target_span {
|
||||
self.note_obligation_cause_for_async_await(
|
||||
err,
|
||||
*target_span,
|
||||
scope_span,
|
||||
*expr,
|
||||
snippet,
|
||||
generator_did,
|
||||
last_generator,
|
||||
@ -2514,6 +2516,7 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
|
||||
err: &mut DiagnosticBuilder<'_>,
|
||||
target_span: Span,
|
||||
scope_span: &Option<Span>,
|
||||
expr: Option<hir::HirId>,
|
||||
snippet: String,
|
||||
first_generator: DefId,
|
||||
last_generator: Option<DefId>,
|
||||
@ -2549,6 +2552,7 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
|
||||
// not implemented.
|
||||
let is_send = self.tcx.is_diagnostic_item(sym::send_trait, trait_ref.def_id);
|
||||
let is_sync = self.tcx.is_diagnostic_item(sym::sync_trait, trait_ref.def_id);
|
||||
let hir = self.tcx.hir();
|
||||
let trait_explanation = if is_send || is_sync {
|
||||
let (trait_name, trait_verb) =
|
||||
if is_send { ("`Send`", "sent") } else { ("`Sync`", "shared") };
|
||||
@ -2564,8 +2568,8 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
|
||||
|
||||
let message = if let Some(name) = last_generator
|
||||
.and_then(|generator_did| self.tcx.parent(generator_did))
|
||||
.and_then(|parent_did| self.tcx.hir().as_local_hir_id(parent_did))
|
||||
.and_then(|parent_hir_id| self.tcx.hir().opt_name(parent_hir_id))
|
||||
.and_then(|parent_did| hir.as_local_hir_id(parent_did))
|
||||
.and_then(|parent_hir_id| hir.opt_name(parent_hir_id))
|
||||
{
|
||||
format!("future returned by `{}` is not {}", name, trait_name)
|
||||
} else {
|
||||
@ -2581,7 +2585,7 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
|
||||
};
|
||||
|
||||
// Look at the last interior type to get a span for the `.await`.
|
||||
let await_span = tables.generator_interior_types.iter().map(|i| i.span).last().unwrap();
|
||||
let await_span = tables.generator_interior_types.iter().map(|t| t.span).last().unwrap();
|
||||
let mut span = MultiSpan::from_span(await_span);
|
||||
span.push_span_label(
|
||||
await_span,
|
||||
@ -2606,6 +2610,22 @@ impl<'a, 'tcx> InferCtxt<'a, 'tcx> {
|
||||
),
|
||||
);
|
||||
|
||||
if let Some(expr_id) = expr {
|
||||
let expr = hir.expect_expr(expr_id);
|
||||
let is_ref = tables.expr_adjustments(expr).iter().any(|adj| adj.is_region_borrow());
|
||||
let parent = hir.get_parent_node(expr_id);
|
||||
if let Some(hir::Node::Expr(e)) = hir.find(parent) {
|
||||
let method_span = hir.span(parent);
|
||||
if tables.is_method_call(e) && is_ref {
|
||||
err.span_help(
|
||||
method_span,
|
||||
"consider moving this method call into a `let` \
|
||||
binding to create a shorter lived borrow",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add a note for the item obligation that remains - normally a note pointing to the
|
||||
// bound that introduced the obligation (e.g. `T: Send`).
|
||||
debug!("note_obligation_cause_for_async_await: next_code={:?}", next_code);
|
||||
|
@ -81,6 +81,15 @@ pub struct Adjustment<'tcx> {
|
||||
pub target: Ty<'tcx>,
|
||||
}
|
||||
|
||||
impl Adjustment<'tcx> {
|
||||
pub fn is_region_borrow(&self) -> bool {
|
||||
match self.kind {
|
||||
Adjust::Borrow(AutoBorrow::Ref(..)) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, RustcEncodable, RustcDecodable, HashStable, TypeFoldable)]
|
||||
pub enum Adjust<'tcx> {
|
||||
/// Go from ! to any type.
|
||||
|
@ -315,8 +315,7 @@ pub struct ResolvedOpaqueTy<'tcx> {
|
||||
///
|
||||
/// Here, we would store the type `T`, the span of the value `x`, and the "scope-span" for
|
||||
/// the scope that contains `x`.
|
||||
#[derive(RustcEncodable, RustcDecodable, Clone, Debug, Eq, Hash, PartialEq)]
|
||||
#[derive(HashStable, TypeFoldable)]
|
||||
#[derive(RustcEncodable, RustcDecodable, Clone, Debug, Eq, Hash, PartialEq, HashStable)]
|
||||
pub struct GeneratorInteriorTypeCause<'tcx> {
|
||||
/// Type of the captured binding.
|
||||
pub ty: Ty<'tcx>,
|
||||
@ -324,6 +323,8 @@ pub struct GeneratorInteriorTypeCause<'tcx> {
|
||||
pub span: Span,
|
||||
/// Span of the scope of the captured binding.
|
||||
pub scope_span: Option<Span>,
|
||||
/// Expr which the type evaluated from.
|
||||
pub expr: Option<hir::HirId>,
|
||||
}
|
||||
|
||||
#[derive(RustcEncodable, RustcDecodable, Debug)]
|
||||
@ -436,7 +437,7 @@ pub struct TypeckTables<'tcx> {
|
||||
/// entire variable.
|
||||
pub upvar_list: ty::UpvarListMap,
|
||||
|
||||
/// Stores the type, span and optional scope span of all types
|
||||
/// Stores the type, expression, span and optional scope span of all types
|
||||
/// that are live across the yield of this generator (if a generator).
|
||||
pub generator_interior_types: Vec<GeneratorInteriorTypeCause<'tcx>>,
|
||||
}
|
||||
|
@ -429,7 +429,6 @@ pub fn print_after_hir_lowering<'tcx>(
|
||||
PpmSource(s) => {
|
||||
// Silently ignores an identified node.
|
||||
let out = &mut out;
|
||||
let src = src.clone();
|
||||
call_with_pp_support(&s, tcx.sess, Some(tcx), move |annotation| {
|
||||
debug!("pretty printing source code {:?}", s);
|
||||
let sess = annotation.sess();
|
||||
@ -447,7 +446,6 @@ pub fn print_after_hir_lowering<'tcx>(
|
||||
|
||||
PpmHir(s) => {
|
||||
let out = &mut out;
|
||||
let src = src.clone();
|
||||
call_with_pp_support_hir(&s, tcx, move |annotation, krate| {
|
||||
debug!("pretty printing source code {:?}", s);
|
||||
let sess = annotation.sess();
|
||||
|
@ -500,7 +500,7 @@ impl<'a> Parser<'a> {
|
||||
err.span_suggestion_short(
|
||||
lo.to(self.prev_span),
|
||||
"remove the parentheses",
|
||||
snippet.to_owned(),
|
||||
snippet,
|
||||
Applicability::MachineApplicable,
|
||||
);
|
||||
}
|
||||
|
@ -718,7 +718,7 @@ impl<'a, 'b> ImportResolver<'a, 'b> {
|
||||
}
|
||||
|
||||
if !errors.is_empty() {
|
||||
self.throw_unresolved_import_error(errors.clone(), None);
|
||||
self.throw_unresolved_import_error(errors, None);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,6 +97,7 @@ impl<'a, 'tcx> InteriorVisitor<'a, 'tcx> {
|
||||
span: source_span,
|
||||
ty: &ty,
|
||||
scope_span,
|
||||
expr: expr.map(|e| e.hir_id),
|
||||
})
|
||||
.or_insert(entries);
|
||||
}
|
||||
@ -164,17 +165,25 @@ pub fn resolve_interior<'a, 'tcx>(
|
||||
// which means that none of the regions inside relate to any other, even if
|
||||
// typeck had previously found constraints that would cause them to be related.
|
||||
let mut counter = 0;
|
||||
let types = fcx.tcx.fold_regions(&types, &mut false, |_, current_depth| {
|
||||
let fold_types: Vec<_> = types.iter().map(|(t, _)| t.ty).collect();
|
||||
let folded_types = fcx.tcx.fold_regions(&fold_types, &mut false, |_, current_depth| {
|
||||
counter += 1;
|
||||
fcx.tcx.mk_region(ty::ReLateBound(current_depth, ty::BrAnon(counter)))
|
||||
});
|
||||
|
||||
// Store the generator types and spans into the tables for this generator.
|
||||
let interior_types = types.iter().map(|t| t.0.clone()).collect::<Vec<_>>();
|
||||
visitor.fcx.inh.tables.borrow_mut().generator_interior_types = interior_types;
|
||||
let types = types
|
||||
.into_iter()
|
||||
.zip(&folded_types)
|
||||
.map(|((mut interior_cause, _), ty)| {
|
||||
interior_cause.ty = ty;
|
||||
interior_cause
|
||||
})
|
||||
.collect();
|
||||
visitor.fcx.inh.tables.borrow_mut().generator_interior_types = types;
|
||||
|
||||
// Extract type components
|
||||
let type_list = fcx.tcx.mk_type_list(types.into_iter().map(|t| (t.0).ty));
|
||||
let type_list = fcx.tcx.mk_type_list(folded_types.iter());
|
||||
|
||||
let witness = fcx.tcx.mk_generator_witness(ty::Binder::bind(type_list));
|
||||
|
||||
|
@ -1806,6 +1806,16 @@ fn find_opaque_ty_constraints(tcx: TyCtxt<'_>, def_id: DefId) -> Ty<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
fn are_suggestable_generic_args(generic_args: &[hir::GenericArg<'_>]) -> bool {
|
||||
generic_args
|
||||
.iter()
|
||||
.filter_map(|arg| match arg {
|
||||
hir::GenericArg::Type(ty) => Some(ty),
|
||||
_ => None,
|
||||
})
|
||||
.any(is_suggestable_infer_ty)
|
||||
}
|
||||
|
||||
/// Whether `ty` is a type with `_` placeholders that can be infered. Used in diagnostics only to
|
||||
/// use inference to provide suggestions for the appropriate type if possible.
|
||||
fn is_suggestable_infer_ty(ty: &hir::Ty<'_>) -> bool {
|
||||
@ -1815,13 +1825,16 @@ fn is_suggestable_infer_ty(ty: &hir::Ty<'_>) -> bool {
|
||||
Slice(ty) | Array(ty, _) => is_suggestable_infer_ty(ty),
|
||||
Tup(tys) => tys.iter().any(is_suggestable_infer_ty),
|
||||
Ptr(mut_ty) | Rptr(_, mut_ty) => is_suggestable_infer_ty(mut_ty.ty),
|
||||
Def(_, generic_args) => generic_args
|
||||
.iter()
|
||||
.filter_map(|arg| match arg {
|
||||
hir::GenericArg::Type(ty) => Some(ty),
|
||||
_ => None,
|
||||
})
|
||||
.any(is_suggestable_infer_ty),
|
||||
Def(_, generic_args) => are_suggestable_generic_args(generic_args),
|
||||
Path(hir::QPath::TypeRelative(ty, segment)) => {
|
||||
is_suggestable_infer_ty(ty) || are_suggestable_generic_args(segment.generic_args().args)
|
||||
}
|
||||
Path(hir::QPath::Resolved(ty_opt, hir::Path { segments, .. })) => {
|
||||
ty_opt.map_or(false, is_suggestable_infer_ty)
|
||||
|| segments
|
||||
.iter()
|
||||
.any(|segment| are_suggestable_generic_args(segment.generic_args().args))
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
@ -704,7 +704,7 @@ impl Tester for Collector {
|
||||
debug!("creating test {}: {}", name, test);
|
||||
self.tests.push(testing::TestDescAndFn {
|
||||
desc: testing::TestDesc {
|
||||
name: testing::DynTestName(name.clone()),
|
||||
name: testing::DynTestName(name),
|
||||
ignore: match config.ignore {
|
||||
Ignore::All => true,
|
||||
Ignore::None => false,
|
||||
|
@ -553,7 +553,7 @@ fn run_test_in_process(
|
||||
Err(e) => calc_result(&desc, Err(e.as_ref()), &time_opts, &exec_time),
|
||||
};
|
||||
let stdout = data.lock().unwrap().to_vec();
|
||||
let message = CompletedTest::new(desc.clone(), test_result, exec_time, stdout);
|
||||
let message = CompletedTest::new(desc, test_result, exec_time, stdout);
|
||||
monitor_ch.send(message).unwrap();
|
||||
}
|
||||
|
||||
@ -602,7 +602,7 @@ fn spawn_test_subprocess(
|
||||
(result, test_output, exec_time)
|
||||
})();
|
||||
|
||||
let message = CompletedTest::new(desc.clone(), result, exec_time, test_output);
|
||||
let message = CompletedTest::new(desc, result, exec_time, test_output);
|
||||
monitor_ch.send(message).unwrap();
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
// ignore-tidy-linelength
|
||||
// ignore-wasm32-bare compiled with panic=abort by default
|
||||
// compile-flags: -Z mir-opt-level=3
|
||||
#![feature(box_syntax)]
|
||||
|
||||
fn main() {
|
||||
|
29
src/test/ui/associated-types/issue-64848.rs
Normal file
29
src/test/ui/associated-types/issue-64848.rs
Normal file
@ -0,0 +1,29 @@
|
||||
// build-pass
|
||||
|
||||
trait AssociatedConstant {
|
||||
const DATA: ();
|
||||
}
|
||||
|
||||
impl<F, T> AssociatedConstant for F
|
||||
where
|
||||
F: FnOnce() -> T,
|
||||
T: AssociatedConstant,
|
||||
{
|
||||
const DATA: () = T::DATA;
|
||||
}
|
||||
|
||||
impl AssociatedConstant for () {
|
||||
const DATA: () = ();
|
||||
}
|
||||
|
||||
fn foo() -> impl AssociatedConstant {
|
||||
()
|
||||
}
|
||||
|
||||
fn get_data<T: AssociatedConstant>(_: T) -> &'static () {
|
||||
&T::DATA
|
||||
}
|
||||
|
||||
fn main() {
|
||||
get_data(foo);
|
||||
}
|
@ -16,6 +16,11 @@ LL | let _x = get().await;
|
||||
...
|
||||
LL | }
|
||||
| - `client` is later dropped here
|
||||
help: consider moving this method call into a `let` binding to create a shorter lived borrow
|
||||
--> $DIR/issue-64130-4-async-move.rs:19:15
|
||||
|
|
||||
LL | match client.status() {
|
||||
| ^^^^^^^^^^^^^^^
|
||||
= note: the return type of a function must have a statically known size
|
||||
|
||||
error: aborting due to previous error
|
||||
|
@ -20,7 +20,7 @@ LL | fn assert_sync<T: Sync>(_: T) {}
|
||||
LL | assert_sync(|| {
|
||||
| ^^^^^^^^^^^ future returned by `main` is not `Sync`
|
||||
|
|
||||
= help: within `[generator@$DIR/not-send-sync.rs:9:17: 13:6 {std::cell::Cell<i32>, ()}]`, the trait `std::marker::Sync` is not implemented for `std::cell::Cell<i32>`
|
||||
= help: within `[generator@$DIR/not-send-sync.rs:9:17: 13:6 {std::cell::Cell<i32>, (), ()}]`, the trait `std::marker::Sync` is not implemented for `std::cell::Cell<i32>`
|
||||
note: future is not `Sync` as this value is used across an yield
|
||||
--> $DIR/not-send-sync.rs:12:9
|
||||
|
|
||||
|
@ -76,7 +76,7 @@ error[E0720]: opaque type expands to a recursive type
|
||||
LL | fn generator_capture() -> impl Sized {
|
||||
| ^^^^^^^^^^ expands to a recursive type
|
||||
|
|
||||
= note: expanded type is `[generator@$DIR/recursive-impl-trait-type-indirect.rs:50:5: 50:26 x:impl Sized {()}]`
|
||||
= note: expanded type is `[generator@$DIR/recursive-impl-trait-type-indirect.rs:50:5: 50:26 x:impl Sized {(), ()}]`
|
||||
|
||||
error[E0720]: opaque type expands to a recursive type
|
||||
--> $DIR/recursive-impl-trait-type-indirect.rs:53:26
|
||||
@ -92,7 +92,7 @@ error[E0720]: opaque type expands to a recursive type
|
||||
LL | fn generator_hold() -> impl Sized {
|
||||
| ^^^^^^^^^^ expands to a recursive type
|
||||
|
|
||||
= note: expanded type is `[generator@$DIR/recursive-impl-trait-type-indirect.rs:58:5: 62:6 {impl Sized, ()}]`
|
||||
= note: expanded type is `[generator@$DIR/recursive-impl-trait-type-indirect.rs:58:5: 62:6 {impl Sized, (), ()}]`
|
||||
|
||||
error[E0720]: opaque type expands to a recursive type
|
||||
--> $DIR/recursive-impl-trait-type-indirect.rs:69:26
|
||||
|
BIN
src/test/ui/issues/issue-66473.rs
Normal file
BIN
src/test/ui/issues/issue-66473.rs
Normal file
Binary file not shown.
BIN
src/test/ui/issues/issue-66473.stderr
Normal file
BIN
src/test/ui/issues/issue-66473.stderr
Normal file
Binary file not shown.
49
src/test/ui/type-alias-impl-trait/issue-65918.rs
Normal file
49
src/test/ui/type-alias-impl-trait/issue-65918.rs
Normal file
@ -0,0 +1,49 @@
|
||||
// build-pass
|
||||
|
||||
#![feature(type_alias_impl_trait)]
|
||||
|
||||
use std::marker::PhantomData;
|
||||
|
||||
/* copied Index and TryFrom for convinience (and simplicity) */
|
||||
trait MyIndex<T> {
|
||||
type O;
|
||||
fn my_index(self) -> Self::O;
|
||||
}
|
||||
trait MyFrom<T>: Sized {
|
||||
type Error;
|
||||
fn my_from(value: T) -> Result<Self, Self::Error>;
|
||||
}
|
||||
|
||||
/* MCVE starts here */
|
||||
trait F {}
|
||||
impl F for () {}
|
||||
type DummyT<T> = impl F;
|
||||
fn _dummy_t<T>() -> DummyT<T> {}
|
||||
|
||||
struct Phantom1<T>(PhantomData<T>);
|
||||
struct Phantom2<T>(PhantomData<T>);
|
||||
struct Scope<T>(Phantom2<DummyT<T>>);
|
||||
|
||||
impl<T> Scope<T> {
|
||||
fn new() -> Self {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> MyFrom<Phantom2<T>> for Phantom1<T> {
|
||||
type Error = ();
|
||||
fn my_from(_: Phantom2<T>) -> Result<Self, Self::Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: MyFrom<Phantom2<DummyT<U>>>, U> MyIndex<Phantom1<T>> for Scope<U> {
|
||||
type O = T;
|
||||
fn my_index(self) -> Self::O {
|
||||
MyFrom::my_from(self.0).ok().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let _pos: Phantom1<DummyT<()>> = Scope::new().my_index();
|
||||
}
|
@ -68,6 +68,13 @@ struct Test10 {
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
static A = 42;
|
||||
//~^ ERROR missing type for `static` item
|
||||
static B: _ = 42;
|
||||
//~^ ERROR the type placeholder `_` is not allowed within types on item signatures
|
||||
static C: Option<_> = Some(42);
|
||||
//~^ ERROR the type placeholder `_` is not allowed within types on item signatures
|
||||
|
||||
fn fn_test() -> _ { 5 }
|
||||
//~^ ERROR the type placeholder `_` is not allowed within types on item signatures
|
||||
|
||||
|
@ -1,35 +1,35 @@
|
||||
error: expected identifier, found reserved identifier `_`
|
||||
--> $DIR/typeck_type_placeholder_item.rs:146:18
|
||||
--> $DIR/typeck_type_placeholder_item.rs:153:18
|
||||
|
|
||||
LL | struct BadStruct<_>(_);
|
||||
| ^ expected identifier, found reserved identifier
|
||||
|
||||
error: expected identifier, found reserved identifier `_`
|
||||
--> $DIR/typeck_type_placeholder_item.rs:149:16
|
||||
--> $DIR/typeck_type_placeholder_item.rs:156:16
|
||||
|
|
||||
LL | trait BadTrait<_> {}
|
||||
| ^ expected identifier, found reserved identifier
|
||||
|
||||
error: expected identifier, found reserved identifier `_`
|
||||
--> $DIR/typeck_type_placeholder_item.rs:159:19
|
||||
--> $DIR/typeck_type_placeholder_item.rs:166:19
|
||||
|
|
||||
LL | struct BadStruct1<_, _>(_);
|
||||
| ^ expected identifier, found reserved identifier
|
||||
|
||||
error: expected identifier, found reserved identifier `_`
|
||||
--> $DIR/typeck_type_placeholder_item.rs:159:22
|
||||
--> $DIR/typeck_type_placeholder_item.rs:166:22
|
||||
|
|
||||
LL | struct BadStruct1<_, _>(_);
|
||||
| ^ expected identifier, found reserved identifier
|
||||
|
||||
error: expected identifier, found reserved identifier `_`
|
||||
--> $DIR/typeck_type_placeholder_item.rs:164:19
|
||||
--> $DIR/typeck_type_placeholder_item.rs:171:19
|
||||
|
|
||||
LL | struct BadStruct2<_, T>(_, T);
|
||||
| ^ expected identifier, found reserved identifier
|
||||
|
||||
error[E0403]: the name `_` is already used for a generic parameter in this item's generic parameters
|
||||
--> $DIR/typeck_type_placeholder_item.rs:159:22
|
||||
--> $DIR/typeck_type_placeholder_item.rs:166:22
|
||||
|
|
||||
LL | struct BadStruct1<_, _>(_);
|
||||
| - ^ already used
|
||||
@ -177,8 +177,29 @@ LL |
|
||||
LL | b: (T, T),
|
||||
|
|
||||
|
||||
error: missing type for `static` item
|
||||
--> $DIR/typeck_type_placeholder_item.rs:71:12
|
||||
|
|
||||
LL | static A = 42;
|
||||
| ^ help: provide a type for the item: `A: i32`
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:71:21
|
||||
--> $DIR/typeck_type_placeholder_item.rs:73:15
|
||||
|
|
||||
LL | static B: _ = 42;
|
||||
| ^
|
||||
| |
|
||||
| not allowed in type signatures
|
||||
| help: replace `_` with the correct type: `i32`
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:75:15
|
||||
|
|
||||
LL | static C: Option<_> = Some(42);
|
||||
| ^^^^^^^^^ not allowed in type signatures
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:78:21
|
||||
|
|
||||
LL | fn fn_test() -> _ { 5 }
|
||||
| ^
|
||||
@ -187,7 +208,7 @@ LL | fn fn_test() -> _ { 5 }
|
||||
| help: replace with the correct return type: `i32`
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:74:23
|
||||
--> $DIR/typeck_type_placeholder_item.rs:81:23
|
||||
|
|
||||
LL | fn fn_test2() -> (_, _) { (5, 5) }
|
||||
| -^--^-
|
||||
@ -197,7 +218,7 @@ LL | fn fn_test2() -> (_, _) { (5, 5) }
|
||||
| help: replace with the correct return type: `(i32, i32)`
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:77:22
|
||||
--> $DIR/typeck_type_placeholder_item.rs:84:22
|
||||
|
|
||||
LL | static FN_TEST3: _ = "test";
|
||||
| ^
|
||||
@ -206,7 +227,7 @@ LL | static FN_TEST3: _ = "test";
|
||||
| help: replace `_` with the correct type: `&'static str`
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:80:22
|
||||
--> $DIR/typeck_type_placeholder_item.rs:87:22
|
||||
|
|
||||
LL | static FN_TEST4: _ = 145;
|
||||
| ^
|
||||
@ -215,13 +236,13 @@ LL | static FN_TEST4: _ = 145;
|
||||
| help: replace `_` with the correct type: `i32`
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:83:22
|
||||
--> $DIR/typeck_type_placeholder_item.rs:90:22
|
||||
|
|
||||
LL | static FN_TEST5: (_, _) = (1, 2);
|
||||
| ^^^^^^ not allowed in type signatures
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:86:20
|
||||
--> $DIR/typeck_type_placeholder_item.rs:93:20
|
||||
|
|
||||
LL | fn fn_test6(_: _) { }
|
||||
| ^ not allowed in type signatures
|
||||
@ -232,7 +253,7 @@ LL | fn fn_test6<T>(_: T) { }
|
||||
| ^^^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:89:20
|
||||
--> $DIR/typeck_type_placeholder_item.rs:96:20
|
||||
|
|
||||
LL | fn fn_test7(x: _) { let _x: usize = x; }
|
||||
| ^ not allowed in type signatures
|
||||
@ -243,13 +264,13 @@ LL | fn fn_test7<T>(x: T) { let _x: usize = x; }
|
||||
| ^^^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:92:29
|
||||
--> $DIR/typeck_type_placeholder_item.rs:99:29
|
||||
|
|
||||
LL | fn fn_test8(_f: fn() -> _) { }
|
||||
| ^ not allowed in type signatures
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:92:29
|
||||
--> $DIR/typeck_type_placeholder_item.rs:99:29
|
||||
|
|
||||
LL | fn fn_test8(_f: fn() -> _) { }
|
||||
| ^ not allowed in type signatures
|
||||
@ -260,7 +281,7 @@ LL | fn fn_test8<T>(_f: fn() -> T) { }
|
||||
| ^^^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:115:12
|
||||
--> $DIR/typeck_type_placeholder_item.rs:122:12
|
||||
|
|
||||
LL | a: _,
|
||||
| ^ not allowed in type signatures
|
||||
@ -279,13 +300,13 @@ LL | b: (T, T),
|
||||
|
|
||||
|
||||
error[E0282]: type annotations needed
|
||||
--> $DIR/typeck_type_placeholder_item.rs:120:27
|
||||
--> $DIR/typeck_type_placeholder_item.rs:127:27
|
||||
|
|
||||
LL | fn fn_test11(_: _) -> (_, _) { panic!() }
|
||||
| ^^^^^^ cannot infer type
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:120:28
|
||||
--> $DIR/typeck_type_placeholder_item.rs:127:28
|
||||
|
|
||||
LL | fn fn_test11(_: _) -> (_, _) { panic!() }
|
||||
| ^ ^ not allowed in type signatures
|
||||
@ -293,7 +314,7 @@ LL | fn fn_test11(_: _) -> (_, _) { panic!() }
|
||||
| not allowed in type signatures
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:124:30
|
||||
--> $DIR/typeck_type_placeholder_item.rs:131:30
|
||||
|
|
||||
LL | fn fn_test12(x: i32) -> (_, _) { (x, x) }
|
||||
| -^--^-
|
||||
@ -303,7 +324,7 @@ LL | fn fn_test12(x: i32) -> (_, _) { (x, x) }
|
||||
| help: replace with the correct return type: `(i32, i32)`
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:127:33
|
||||
--> $DIR/typeck_type_placeholder_item.rs:134:33
|
||||
|
|
||||
LL | fn fn_test13(x: _) -> (i32, _) { (x, x) }
|
||||
| ------^-
|
||||
@ -312,7 +333,7 @@ LL | fn fn_test13(x: _) -> (i32, _) { (x, x) }
|
||||
| help: replace with the correct return type: `(i32, i32)`
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:146:21
|
||||
--> $DIR/typeck_type_placeholder_item.rs:153:21
|
||||
|
|
||||
LL | struct BadStruct<_>(_);
|
||||
| ^ not allowed in type signatures
|
||||
@ -323,7 +344,7 @@ LL | struct BadStruct<T>(T);
|
||||
| ^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:151:15
|
||||
--> $DIR/typeck_type_placeholder_item.rs:158:15
|
||||
|
|
||||
LL | impl BadTrait<_> for BadStruct<_> {}
|
||||
| ^ ^ not allowed in type signatures
|
||||
@ -336,13 +357,13 @@ LL | impl<T> BadTrait<T> for BadStruct<T> {}
|
||||
| ^^^ ^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:154:34
|
||||
--> $DIR/typeck_type_placeholder_item.rs:161:34
|
||||
|
|
||||
LL | fn impl_trait() -> impl BadTrait<_> {
|
||||
| ^ not allowed in type signatures
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:159:25
|
||||
--> $DIR/typeck_type_placeholder_item.rs:166:25
|
||||
|
|
||||
LL | struct BadStruct1<_, _>(_);
|
||||
| ^ not allowed in type signatures
|
||||
@ -353,7 +374,7 @@ LL | struct BadStruct1<T, _>(T);
|
||||
| ^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:164:25
|
||||
--> $DIR/typeck_type_placeholder_item.rs:171:25
|
||||
|
|
||||
LL | struct BadStruct2<_, T>(_, T);
|
||||
| ^ not allowed in type signatures
|
||||
@ -364,7 +385,7 @@ LL | struct BadStruct2<K, T>(K, T);
|
||||
| ^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:168:14
|
||||
--> $DIR/typeck_type_placeholder_item.rs:175:14
|
||||
|
|
||||
LL | type X = Box<_>;
|
||||
| ^ not allowed in type signatures
|
||||
@ -381,7 +402,7 @@ LL | fn test10<T>(&self, _x : T) { }
|
||||
| ^^^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:132:31
|
||||
--> $DIR/typeck_type_placeholder_item.rs:139:31
|
||||
|
|
||||
LL | fn method_test1(&self, x: _);
|
||||
| ^ not allowed in type signatures
|
||||
@ -392,7 +413,7 @@ LL | fn method_test1<T>(&self, x: T);
|
||||
| ^^^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:134:31
|
||||
--> $DIR/typeck_type_placeholder_item.rs:141:31
|
||||
|
|
||||
LL | fn method_test2(&self, x: _) -> _;
|
||||
| ^ ^ not allowed in type signatures
|
||||
@ -405,7 +426,7 @@ LL | fn method_test2<T>(&self, x: T) -> T;
|
||||
| ^^^ ^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:136:31
|
||||
--> $DIR/typeck_type_placeholder_item.rs:143:31
|
||||
|
|
||||
LL | fn method_test3(&self) -> _;
|
||||
| ^ not allowed in type signatures
|
||||
@ -416,7 +437,7 @@ LL | fn method_test3<T>(&self) -> T;
|
||||
| ^^^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:138:26
|
||||
--> $DIR/typeck_type_placeholder_item.rs:145:26
|
||||
|
|
||||
LL | fn assoc_fn_test1(x: _);
|
||||
| ^ not allowed in type signatures
|
||||
@ -427,7 +448,7 @@ LL | fn assoc_fn_test1<T>(x: T);
|
||||
| ^^^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:140:26
|
||||
--> $DIR/typeck_type_placeholder_item.rs:147:26
|
||||
|
|
||||
LL | fn assoc_fn_test2(x: _) -> _;
|
||||
| ^ ^ not allowed in type signatures
|
||||
@ -440,7 +461,7 @@ LL | fn assoc_fn_test2<T>(x: T) -> T;
|
||||
| ^^^ ^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:142:28
|
||||
--> $DIR/typeck_type_placeholder_item.rs:149:28
|
||||
|
|
||||
LL | fn assoc_fn_test3() -> _;
|
||||
| ^ not allowed in type signatures
|
||||
@ -462,7 +483,7 @@ LL | fn clone_from<T>(&mut self, other: T) { *self = Test9; }
|
||||
| ^^^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:102:34
|
||||
--> $DIR/typeck_type_placeholder_item.rs:109:34
|
||||
|
|
||||
LL | fn fn_test10(&self, _x : _) { }
|
||||
| ^ not allowed in type signatures
|
||||
@ -473,7 +494,7 @@ LL | fn fn_test10<T>(&self, _x : T) { }
|
||||
| ^^^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:110:41
|
||||
--> $DIR/typeck_type_placeholder_item.rs:117:41
|
||||
|
|
||||
LL | fn clone_from(&mut self, other: _) { *self = FnTest9; }
|
||||
| ^ not allowed in type signatures
|
||||
@ -484,7 +505,7 @@ LL | fn clone_from<T>(&mut self, other: T) { *self = FnTest9; }
|
||||
| ^^^ ^
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:174:21
|
||||
--> $DIR/typeck_type_placeholder_item.rs:181:21
|
||||
|
|
||||
LL | type Y = impl Trait<_>;
|
||||
| ^ not allowed in type signatures
|
||||
@ -508,7 +529,7 @@ LL | fn clone(&self) -> _ { Test9 }
|
||||
| help: replace with the correct return type: `Test9`
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:99:31
|
||||
--> $DIR/typeck_type_placeholder_item.rs:106:31
|
||||
|
|
||||
LL | fn fn_test9(&self) -> _ { () }
|
||||
| ^
|
||||
@ -517,7 +538,7 @@ LL | fn fn_test9(&self) -> _ { () }
|
||||
| help: replace with the correct return type: `()`
|
||||
|
||||
error[E0121]: the type placeholder `_` is not allowed within types on item signatures
|
||||
--> $DIR/typeck_type_placeholder_item.rs:107:28
|
||||
--> $DIR/typeck_type_placeholder_item.rs:114:28
|
||||
|
|
||||
LL | fn clone(&self) -> _ { FnTest9 }
|
||||
| ^
|
||||
@ -525,7 +546,7 @@ LL | fn clone(&self) -> _ { FnTest9 }
|
||||
| not allowed in type signatures
|
||||
| help: replace with the correct return type: `main::FnTest9`
|
||||
|
||||
error: aborting due to 55 previous errors
|
||||
error: aborting due to 58 previous errors
|
||||
|
||||
Some errors have detailed explanations: E0121, E0282, E0403.
|
||||
For more information about an error, try `rustc --explain E0121`.
|
||||
|
10
src/tools/unicode-table-generator/Cargo.toml
Normal file
10
src/tools/unicode-table-generator/Cargo.toml
Normal file
@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "unicode-bdd"
|
||||
version = "0.1.0"
|
||||
authors = ["Mark Rousskov <mark.simulacrum@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
ucd-parse = "0.1.3"
|
62
src/tools/unicode-table-generator/src/case_mapping.rs
Normal file
62
src/tools/unicode-table-generator/src/case_mapping.rs
Normal file
@ -0,0 +1,62 @@
|
||||
use crate::{fmt_list, UnicodeData};
|
||||
use std::fmt;
|
||||
|
||||
pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String {
|
||||
let mut file = String::new();
|
||||
|
||||
file.push_str(HEADER.trim_start());
|
||||
|
||||
let decl_type = "&[(char, [char; 3])]";
|
||||
|
||||
file.push_str(&format!(
|
||||
"static LOWERCASE_TABLE: {} = &[{}];",
|
||||
decl_type,
|
||||
fmt_list(data.to_lower.iter().map(to_mapping))
|
||||
));
|
||||
file.push_str("\n\n");
|
||||
file.push_str(&format!(
|
||||
"static UPPERCASE_TABLE: {} = &[{}];",
|
||||
decl_type,
|
||||
fmt_list(data.to_upper.iter().map(to_mapping))
|
||||
));
|
||||
file
|
||||
}
|
||||
|
||||
fn to_mapping((key, (a, b, c)): (&u32, &(u32, u32, u32))) -> (CharEscape, [CharEscape; 3]) {
|
||||
(
|
||||
CharEscape(std::char::from_u32(*key).unwrap()),
|
||||
[
|
||||
CharEscape(std::char::from_u32(*a).unwrap()),
|
||||
CharEscape(std::char::from_u32(*b).unwrap()),
|
||||
CharEscape(std::char::from_u32(*c).unwrap()),
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
struct CharEscape(char);
|
||||
|
||||
impl fmt::Debug for CharEscape {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "'{}'", self.0.escape_default())
|
||||
}
|
||||
}
|
||||
|
||||
static HEADER: &str = "
|
||||
pub fn to_lower(c: char) -> [char; 3] {
|
||||
match bsearch_case_table(c, LOWERCASE_TABLE) {
|
||||
None => [c, '\\0', '\\0'],
|
||||
Some(index) => LOWERCASE_TABLE[index].1,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_upper(c: char) -> [char; 3] {
|
||||
match bsearch_case_table(c, UPPERCASE_TABLE) {
|
||||
None => [c, '\\0', '\\0'],
|
||||
Some(index) => UPPERCASE_TABLE[index].1,
|
||||
}
|
||||
}
|
||||
|
||||
fn bsearch_case_table(c: char, table: &[(char, [char; 3])]) -> Option<usize> {
|
||||
table.binary_search_by(|&(key, _)| key.cmp(&c)).ok()
|
||||
}
|
||||
";
|
261
src/tools/unicode-table-generator/src/main.rs
Normal file
261
src/tools/unicode-table-generator/src/main.rs
Normal file
@ -0,0 +1,261 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::ops::Range;
|
||||
use ucd_parse::Codepoints;
|
||||
|
||||
mod case_mapping;
|
||||
mod raw_emitter;
|
||||
mod unicode_download;
|
||||
|
||||
use raw_emitter::{emit_codepoints, RawEmitter};
|
||||
|
||||
static PROPERTIES: &[&str] = &[
|
||||
"Alphabetic",
|
||||
"Lowercase",
|
||||
"Uppercase",
|
||||
"Cased",
|
||||
"Case_Ignorable",
|
||||
"Grapheme_Extend",
|
||||
"White_Space",
|
||||
"Cc",
|
||||
"N",
|
||||
];
|
||||
|
||||
struct UnicodeData {
|
||||
ranges: Vec<(&'static str, Vec<Range<u32>>)>,
|
||||
to_upper: BTreeMap<u32, (u32, u32, u32)>,
|
||||
to_lower: BTreeMap<u32, (u32, u32, u32)>,
|
||||
}
|
||||
|
||||
fn to_mapping(origin: u32, codepoints: Vec<ucd_parse::Codepoint>) -> Option<(u32, u32, u32)> {
|
||||
let mut a = None;
|
||||
let mut b = None;
|
||||
let mut c = None;
|
||||
|
||||
for codepoint in codepoints {
|
||||
if origin == codepoint.value() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if a.is_none() {
|
||||
a = Some(codepoint.value());
|
||||
} else if b.is_none() {
|
||||
b = Some(codepoint.value());
|
||||
} else if c.is_none() {
|
||||
c = Some(codepoint.value());
|
||||
} else {
|
||||
panic!("more than 3 mapped codepoints")
|
||||
}
|
||||
}
|
||||
|
||||
Some((a.unwrap(), b.unwrap_or(0), c.unwrap_or(0)))
|
||||
}
|
||||
|
||||
static UNICODE_DIRECTORY: &str = "unicode-downloads";
|
||||
|
||||
fn load_data() -> UnicodeData {
|
||||
unicode_download::fetch_latest();
|
||||
|
||||
let mut properties = HashMap::new();
|
||||
for row in ucd_parse::parse::<_, ucd_parse::CoreProperty>(&UNICODE_DIRECTORY).unwrap() {
|
||||
if let Some(name) = PROPERTIES.iter().find(|prop| **prop == row.property.as_str()) {
|
||||
properties.entry(*name).or_insert_with(Vec::new).push(row.codepoints);
|
||||
}
|
||||
}
|
||||
for row in ucd_parse::parse::<_, ucd_parse::Property>(&UNICODE_DIRECTORY).unwrap() {
|
||||
if let Some(name) = PROPERTIES.iter().find(|prop| **prop == row.property.as_str()) {
|
||||
properties.entry(*name).or_insert_with(Vec::new).push(row.codepoints);
|
||||
}
|
||||
}
|
||||
|
||||
let mut to_lower = BTreeMap::new();
|
||||
let mut to_upper = BTreeMap::new();
|
||||
for row in ucd_parse::UnicodeDataExpander::new(
|
||||
ucd_parse::parse::<_, ucd_parse::UnicodeData>(&UNICODE_DIRECTORY).unwrap(),
|
||||
) {
|
||||
let general_category = if ["Nd", "Nl", "No"].contains(&row.general_category.as_str()) {
|
||||
"N"
|
||||
} else {
|
||||
row.general_category.as_str()
|
||||
};
|
||||
if let Some(name) = PROPERTIES.iter().find(|prop| **prop == general_category) {
|
||||
properties
|
||||
.entry(*name)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(Codepoints::Single(row.codepoint));
|
||||
}
|
||||
|
||||
if let Some(mapped) = row.simple_lowercase_mapping {
|
||||
if mapped != row.codepoint {
|
||||
to_lower.insert(row.codepoint.value(), (mapped.value(), 0, 0));
|
||||
}
|
||||
}
|
||||
if let Some(mapped) = row.simple_uppercase_mapping {
|
||||
if mapped != row.codepoint {
|
||||
to_upper.insert(row.codepoint.value(), (mapped.value(), 0, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for row in ucd_parse::parse::<_, ucd_parse::SpecialCaseMapping>(&UNICODE_DIRECTORY).unwrap() {
|
||||
if !row.conditions.is_empty() {
|
||||
// Skip conditional case mappings
|
||||
continue;
|
||||
}
|
||||
|
||||
let key = row.codepoint.value();
|
||||
if let Some(lower) = to_mapping(key, row.lowercase) {
|
||||
to_lower.insert(key, lower);
|
||||
}
|
||||
if let Some(upper) = to_mapping(key, row.uppercase) {
|
||||
to_upper.insert(key, upper);
|
||||
}
|
||||
}
|
||||
|
||||
let mut properties: HashMap<&'static str, Vec<Range<u32>>> = properties
|
||||
.into_iter()
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
k,
|
||||
v.into_iter()
|
||||
.flat_map(|codepoints| match codepoints {
|
||||
Codepoints::Single(c) => c
|
||||
.scalar()
|
||||
.map(|ch| (ch as u32..ch as u32 + 1))
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>(),
|
||||
Codepoints::Range(c) => c
|
||||
.into_iter()
|
||||
.flat_map(|c| c.scalar().map(|ch| (ch as u32..ch as u32 + 1)))
|
||||
.collect::<Vec<_>>(),
|
||||
})
|
||||
.collect::<Vec<Range<u32>>>(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
for ranges in properties.values_mut() {
|
||||
merge_ranges(ranges);
|
||||
}
|
||||
|
||||
let mut properties = properties.into_iter().collect::<Vec<_>>();
|
||||
properties.sort_by_key(|p| p.0);
|
||||
UnicodeData { ranges: properties, to_lower, to_upper }
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let write_location = std::env::args().nth(1).unwrap_or_else(|| {
|
||||
eprintln!("Must provide path to write unicode tables to");
|
||||
eprintln!(
|
||||
"e.g. {} src/libcore/unicode/unicode_data.rs",
|
||||
std::env::args().nth(0).unwrap_or_default()
|
||||
);
|
||||
std::process::exit(1);
|
||||
});
|
||||
|
||||
let unicode_data = load_data();
|
||||
let ranges_by_property = &unicode_data.ranges;
|
||||
|
||||
let mut total_bytes = 0;
|
||||
let mut modules = Vec::new();
|
||||
for (property, ranges) in ranges_by_property {
|
||||
let datapoints = ranges.iter().map(|r| r.end - r.start).sum::<u32>();
|
||||
let mut emitter = RawEmitter::new();
|
||||
emit_codepoints(&mut emitter, &ranges);
|
||||
|
||||
modules.push((property.to_lowercase().to_string(), emitter.file));
|
||||
println!("{:15}: {} bytes, {} codepoints", property, emitter.bytes_used, datapoints,);
|
||||
total_bytes += emitter.bytes_used;
|
||||
}
|
||||
|
||||
let mut table_file = String::new();
|
||||
|
||||
table_file.push_str(
|
||||
"///! This file is generated by src/tools/unicode-table-generator; do not edit manually!\n",
|
||||
);
|
||||
|
||||
table_file.push_str("use super::range_search;\n\n");
|
||||
|
||||
table_file.push_str(&version());
|
||||
|
||||
table_file.push('\n');
|
||||
|
||||
modules.push((String::from("conversions"), case_mapping::generate_case_mapping(&unicode_data)));
|
||||
|
||||
for (name, contents) in modules {
|
||||
table_file.push_str("#[rustfmt::skip]\n");
|
||||
table_file.push_str(&format!("pub mod {} {{\n", name));
|
||||
for line in contents.lines() {
|
||||
if !line.trim().is_empty() {
|
||||
table_file.push_str(" ");
|
||||
table_file.push_str(&line);
|
||||
}
|
||||
table_file.push('\n');
|
||||
}
|
||||
table_file.push_str("}\n\n");
|
||||
}
|
||||
|
||||
std::fs::write(&write_location, format!("{}\n", table_file.trim_end())).unwrap();
|
||||
|
||||
println!("Total table sizes: {} bytes", total_bytes);
|
||||
}
|
||||
|
||||
fn version() -> String {
|
||||
let mut out = String::new();
|
||||
out.push_str("pub const UNICODE_VERSION: (u32, u32, u32) = ");
|
||||
|
||||
let readme =
|
||||
std::fs::read_to_string(std::path::Path::new(UNICODE_DIRECTORY).join("ReadMe.txt"))
|
||||
.unwrap();
|
||||
|
||||
let prefix = "for Version ";
|
||||
let start = readme.find(prefix).unwrap() + prefix.len();
|
||||
let end = readme.find(" of the Unicode Standard.").unwrap();
|
||||
let version =
|
||||
readme[start..end].split('.').map(|v| v.parse::<u32>().expect(&v)).collect::<Vec<_>>();
|
||||
let [major, minor, micro] = [version[0], version[1], version[2]];
|
||||
|
||||
out.push_str(&format!("({}, {}, {});\n", major, minor, micro));
|
||||
out
|
||||
}
|
||||
|
||||
fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
|
||||
let pieces = values.into_iter().map(|b| format!("{:?}, ", b)).collect::<Vec<_>>();
|
||||
let mut out = String::new();
|
||||
let mut line = format!("\n ");
|
||||
for piece in pieces {
|
||||
if line.len() + piece.len() < 98 {
|
||||
line.push_str(&piece);
|
||||
} else {
|
||||
out.push_str(line.trim_end());
|
||||
out.push('\n');
|
||||
line = format!(" {}", piece);
|
||||
}
|
||||
}
|
||||
out.push_str(line.trim_end());
|
||||
out.push('\n');
|
||||
out
|
||||
}
|
||||
|
||||
fn merge_ranges(ranges: &mut Vec<Range<u32>>) {
|
||||
loop {
|
||||
let mut new_ranges = Vec::new();
|
||||
let mut idx_iter = 0..(ranges.len() - 1);
|
||||
while let Some(idx) = idx_iter.next() {
|
||||
let cur = ranges[idx].clone();
|
||||
let next = ranges[idx + 1].clone();
|
||||
if cur.end == next.start {
|
||||
let _ = idx_iter.next(); // skip next as we're merging it in
|
||||
new_ranges.push(cur.start..next.end);
|
||||
} else {
|
||||
new_ranges.push(cur);
|
||||
}
|
||||
}
|
||||
new_ranges.push(ranges.last().unwrap().clone());
|
||||
if new_ranges.len() == ranges.len() {
|
||||
*ranges = new_ranges;
|
||||
break;
|
||||
} else {
|
||||
*ranges = new_ranges;
|
||||
}
|
||||
}
|
||||
}
|
170
src/tools/unicode-table-generator/src/raw_emitter.rs
Normal file
170
src/tools/unicode-table-generator/src/raw_emitter.rs
Normal file
@ -0,0 +1,170 @@
|
||||
//! This implements the core logic of the compression scheme used to compactly
|
||||
//! encode the Unicode character classes.
|
||||
//!
|
||||
//! The primary idea is that we 'flatten' the Unicode ranges into an enormous
|
||||
//! bitset. To represent any arbitrary codepoint in a raw bitset, we would need
|
||||
//! over 17 kilobytes of data per character set -- way too much for our
|
||||
//! purposes.
|
||||
//!
|
||||
//! We have two primary goals with the encoding: we want to be compact, because
|
||||
//! these tables often end up in ~every Rust program (especially the
|
||||
//! grapheme_extend table, used for str debugging), including those for embedded
|
||||
//! targets (where space is important). We also want to be relatively fast,
|
||||
//! though this is more of a nice to have rather than a key design constraint.
|
||||
//! In practice, due to modern processor design these two are closely related.
|
||||
//!
|
||||
//! The encoding scheme here compresses the bitset by first deduplicating the
|
||||
//! "words" (64 bits on all platforms). In practice very few words are present
|
||||
//! in most data sets.
|
||||
//!
|
||||
//! This gives us an array that maps `u8 -> word` (if we ever went beyond 256
|
||||
//! words, we could go to u16 -> word or have some dual compression scheme
|
||||
//! mapping into two separate sets; currently this is not dealt with).
|
||||
//!
|
||||
//! With that scheme, we now have a single byte for every 64 codepoints. We
|
||||
//! further group these by 16 (arbitrarily chosen), and again deduplicate and
|
||||
//! store in an array (u8 -> [u8; 16]).
|
||||
//!
|
||||
//! The indices into this array represent ranges of 64*16 = 1024 codepoints.
|
||||
//!
|
||||
//! This already reduces the top-level array to at most 1,086 bytes, but in
|
||||
//! practice we usually can encode in far fewer (the first couple Unicode planes
|
||||
//! are dense).
|
||||
//!
|
||||
//! The last byte of this top-level array is pulled out to a separate static
|
||||
//! and trailing zeros are dropped; this is simply because grapheme_extend and
|
||||
//! case_ignorable have a single entry in the 896th entry, so this shrinks them
|
||||
//! down considerably.
|
||||
|
||||
use crate::fmt_list;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::convert::TryFrom;
|
||||
use std::fmt::Write;
|
||||
use std::ops::Range;
|
||||
|
||||
pub struct RawEmitter {
|
||||
pub file: String,
|
||||
pub bytes_used: usize,
|
||||
}
|
||||
|
||||
impl RawEmitter {
|
||||
pub fn new() -> RawEmitter {
|
||||
RawEmitter { file: String::new(), bytes_used: 0 }
|
||||
}
|
||||
|
||||
fn blank_line(&mut self) {
|
||||
if self.file.is_empty() || self.file.ends_with("\n\n") {
|
||||
return;
|
||||
}
|
||||
writeln!(&mut self.file, "").unwrap();
|
||||
}
|
||||
|
||||
fn emit_bitset(&mut self, words: &[u64]) {
|
||||
let unique_words =
|
||||
words.iter().cloned().collect::<BTreeSet<_>>().into_iter().collect::<Vec<_>>();
|
||||
if unique_words.len() > u8::max_value() as usize {
|
||||
panic!("cannot pack {} into 8 bits", unique_words.len());
|
||||
}
|
||||
|
||||
let word_indices = unique_words
|
||||
.iter()
|
||||
.cloned()
|
||||
.enumerate()
|
||||
.map(|(idx, word)| (word, u8::try_from(idx).unwrap()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let mut idx = words.iter().map(|w| word_indices[w]).collect::<Vec<u8>>();
|
||||
let chunk_length = 16;
|
||||
for _ in 0..(chunk_length - (idx.len() % chunk_length)) {
|
||||
assert_eq!(unique_words[0], 0, "first word is all zeros");
|
||||
// pad out bitset index with zero words so we have all chunks of 16
|
||||
idx.push(0);
|
||||
}
|
||||
|
||||
let mut chunks = BTreeSet::new();
|
||||
for chunk in idx.chunks(chunk_length) {
|
||||
chunks.insert(chunk);
|
||||
}
|
||||
let chunk_map = chunks
|
||||
.clone()
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(idx, chunk)| (chunk, idx))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let mut chunk_indices = Vec::new();
|
||||
for chunk in idx.chunks(chunk_length) {
|
||||
chunk_indices.push(chunk_map[chunk]);
|
||||
}
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET_LAST_CHUNK_MAP: (u16, u8) = ({}, {});",
|
||||
chunk_indices.len() - 1,
|
||||
chunk_indices.pop().unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += 3;
|
||||
// Strip out the empty pieces, presuming our above pop() made us now
|
||||
// have some trailing zeros.
|
||||
assert_eq!(unique_words[0], 0, "first word is all zeros");
|
||||
while let Some(0) = chunk_indices.last() {
|
||||
chunk_indices.pop();
|
||||
}
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET_CHUNKS_MAP: [u8; {}] = [{}];",
|
||||
chunk_indices.len(),
|
||||
fmt_list(&chunk_indices),
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += chunk_indices.len();
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET_INDEX_CHUNKS: [[u8; 16]; {}] = [{}];",
|
||||
chunks.len(),
|
||||
fmt_list(chunks.iter()),
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += 16 * chunks.len();
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET: [u64; {}] = [{}];",
|
||||
unique_words.len(),
|
||||
fmt_list(&unique_words),
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += 8 * unique_words.len();
|
||||
}
|
||||
|
||||
pub fn emit_lookup(&mut self) {
|
||||
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
|
||||
writeln!(&mut self.file, " super::range_search(",).unwrap();
|
||||
writeln!(&mut self.file, " c as u32,").unwrap();
|
||||
writeln!(&mut self.file, " &BITSET_CHUNKS_MAP,").unwrap();
|
||||
writeln!(&mut self.file, " BITSET_LAST_CHUNK_MAP,").unwrap();
|
||||
writeln!(&mut self.file, " &BITSET_INDEX_CHUNKS,").unwrap();
|
||||
writeln!(&mut self.file, " &BITSET,").unwrap();
|
||||
writeln!(&mut self.file, " )").unwrap();
|
||||
writeln!(&mut self.file, "}}").unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
|
||||
emitter.blank_line();
|
||||
|
||||
let last_code_point = ranges.last().unwrap().end;
|
||||
// bitset for every bit in the codepoint range
|
||||
//
|
||||
// + 2 to ensure an all zero word to use for padding
|
||||
let mut buckets = vec![0u64; (last_code_point as usize / 64) + 2];
|
||||
for range in ranges {
|
||||
for codepoint in range.clone() {
|
||||
let bucket = codepoint as usize / 64;
|
||||
let bit = codepoint as u64 % 64;
|
||||
buckets[bucket] |= 1 << bit;
|
||||
}
|
||||
}
|
||||
|
||||
emitter.emit_bitset(&buckets);
|
||||
emitter.blank_line();
|
||||
emitter.emit_lookup();
|
||||
}
|
42
src/tools/unicode-table-generator/src/unicode_download.rs
Normal file
42
src/tools/unicode-table-generator/src/unicode_download.rs
Normal file
@ -0,0 +1,42 @@
|
||||
use crate::UNICODE_DIRECTORY;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
static URL_PREFIX: &str = "https://www.unicode.org/Public/UCD/latest/ucd/";
|
||||
|
||||
static README: &str = "ReadMe.txt";
|
||||
|
||||
static RESOURCES: &[&str] =
|
||||
&["DerivedCoreProperties.txt", "PropList.txt", "UnicodeData.txt", "SpecialCasing.txt"];
|
||||
|
||||
pub fn fetch_latest() {
|
||||
let directory = Path::new(UNICODE_DIRECTORY);
|
||||
if let Err(e) = std::fs::create_dir_all(directory) {
|
||||
if e.kind() != std::io::ErrorKind::AlreadyExists {
|
||||
panic!("Failed to create {:?}: {}", UNICODE_DIRECTORY, e);
|
||||
}
|
||||
}
|
||||
let output = Command::new("curl").arg(URL_PREFIX.to_owned() + README).output().unwrap();
|
||||
if !output.status.success() {
|
||||
panic!(
|
||||
"Failed to run curl to fetch readme: stderr: {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
let current = std::fs::read_to_string(directory.join(README)).unwrap_or_default();
|
||||
if current.as_bytes() != &output.stdout[..] {
|
||||
std::fs::write(directory.join(README), output.stdout).unwrap();
|
||||
}
|
||||
|
||||
for resource in RESOURCES {
|
||||
let output = Command::new("curl").arg(URL_PREFIX.to_owned() + resource).output().unwrap();
|
||||
if !output.status.success() {
|
||||
panic!(
|
||||
"Failed to run curl to fetch {}: stderr: {}",
|
||||
resource,
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
}
|
||||
std::fs::write(directory.join(resource), output.stdout).unwrap();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user