From d4ed1e6fa4978141408ef01d0d35c7bd142dd164 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 6 Apr 2018 10:24:01 +0200 Subject: [PATCH] Merge unstable Utf16Encoder into EncodeUtf16 --- src/liballoc/str.rs | 27 ++++++++++++++---- src/liballoc/tests/str.rs | 3 +- src/libcore/unicode/mod.rs | 58 -------------------------------------- 3 files changed, 23 insertions(+), 65 deletions(-) diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 0b961c2c186..65df93bd3bb 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -45,7 +45,6 @@ use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher}; use core::mem; use core::ptr; use core::iter::FusedIterator; -use core::unicode::Utf16Encoder; use vec_deque::VecDeque; use borrow::{Borrow, ToOwned}; @@ -146,7 +145,8 @@ impl> SliceConcatExt for [S] { #[derive(Clone)] #[stable(feature = "encode_utf16", since = "1.8.0")] pub struct EncodeUtf16<'a> { - encoder: Utf16Encoder>, + chars: Chars<'a>, + extra: u16, } #[stable(feature = "collection_debug", since = "1.17.0")] @@ -162,12 +162,29 @@ impl<'a> Iterator for EncodeUtf16<'a> { #[inline] fn next(&mut self) -> Option { - self.encoder.next() + if self.extra != 0 { + let tmp = self.extra; + self.extra = 0; + return Some(tmp); + } + + let mut buf = [0; 2]; + self.chars.next().map(|ch| { + let n = ch.encode_utf16(&mut buf).len(); + if n == 2 { + self.extra = buf[1]; + } + buf[0] + }) } #[inline] fn size_hint(&self) -> (usize, Option) { - self.encoder.size_hint() + let (low, high) = self.chars.size_hint(); + // every char gets either one u16 or two u16, + // so this iterator is between 1 or 2 times as + // long as the underlying iterator. + (low, high.and_then(|n| n.checked_mul(2))) } } @@ -870,7 +887,7 @@ impl str { /// ``` #[stable(feature = "encode_utf16", since = "1.8.0")] pub fn encode_utf16(&self) -> EncodeUtf16 { - EncodeUtf16 { encoder: Utf16Encoder::new(self[..].chars()) } + EncodeUtf16 { chars: self[..].chars(), extra: 0 } } /// Returns `true` if the given pattern matches a sub-slice of diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index 2df8ca63a3e..a3f4c385fe2 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -1204,8 +1204,7 @@ fn test_rev_split_char_iterator_no_trailing() { #[test] fn test_utf16_code_units() { - use core::unicode::Utf16Encoder; - assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::>(), + assert_eq!("é\u{1F4A9}".encode_utf16().collect::>(), [0xE9, 0xD83D, 0xDCA9]) } diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 3413476fd22..9ab8cb748b1 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -24,61 +24,3 @@ pub mod derived_property { pub mod property { pub use unicode::tables::property::Pattern_White_Space; } - -use iter::FusedIterator; - -/// Iterator adaptor for encoding `char`s to UTF-16. -#[derive(Clone)] -#[allow(missing_debug_implementations)] -pub struct Utf16Encoder { - chars: I, - extra: u16, -} - -impl Utf16Encoder { - /// Create a UTF-16 encoder from any `char` iterator. - pub fn new(chars: I) -> Utf16Encoder - where I: Iterator - { - Utf16Encoder { - chars, - extra: 0, - } - } -} - -impl Iterator for Utf16Encoder - where I: Iterator -{ - type Item = u16; - - #[inline] - fn next(&mut self) -> Option { - if self.extra != 0 { - let tmp = self.extra; - self.extra = 0; - return Some(tmp); - } - - let mut buf = [0; 2]; - self.chars.next().map(|ch| { - let n = ch.encode_utf16(&mut buf).len(); - if n == 2 { - self.extra = buf[1]; - } - buf[0] - }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let (low, high) = self.chars.size_hint(); - // every char gets either one u16 or two u16, - // so this iterator is between 1 or 2 times as - // long as the underlying iterator. - (low, high.and_then(|n| n.checked_mul(2))) - } -} - -impl FusedIterator for Utf16Encoder - where I: FusedIterator {}