mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-26 00:34:06 +00:00
rustfmt librustc_unicode
This commit is contained in:
parent
0b9edc77c5
commit
1bb7205082
@ -313,14 +313,19 @@ def escape_char(c):
|
||||
|
||||
def emit_bsearch_range_table(f):
|
||||
f.write("""
|
||||
fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool {
|
||||
fn bsearch_range_table(c: char, r: &'static [(char, char)]) -> bool {
|
||||
use core::cmp::Ordering::{Equal, Less, Greater};
|
||||
use core::slice::SliceExt;
|
||||
r.binary_search_by(|&(lo,hi)| {
|
||||
if lo <= c && c <= hi { Equal }
|
||||
else if hi < c { Less }
|
||||
else { Greater }
|
||||
}).is_ok()
|
||||
r.binary_search_by(|&(lo, hi)| {
|
||||
if lo <= c && c <= hi {
|
||||
Equal
|
||||
} else if hi < c {
|
||||
Less
|
||||
} else {
|
||||
Greater
|
||||
}
|
||||
})
|
||||
.is_ok()
|
||||
}\n
|
||||
""")
|
||||
|
||||
|
@ -49,7 +49,9 @@ pub struct ToLowercase(CaseMappingIter);
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl Iterator for ToLowercase {
|
||||
type Item = char;
|
||||
fn next(&mut self) -> Option<char> { self.0.next() }
|
||||
fn next(&mut self) -> Option<char> {
|
||||
self.0.next()
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over the uppercase mapping of a given character, returned from
|
||||
@ -61,7 +63,9 @@ pub struct ToUppercase(CaseMappingIter);
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl Iterator for ToUppercase {
|
||||
type Item = char;
|
||||
fn next(&mut self) -> Option<char> { self.0.next() }
|
||||
fn next(&mut self) -> Option<char> {
|
||||
self.0.next()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -69,7 +73,7 @@ enum CaseMappingIter {
|
||||
Three(char, char, char),
|
||||
Two(char, char),
|
||||
One(char),
|
||||
Zero
|
||||
Zero,
|
||||
}
|
||||
|
||||
impl CaseMappingIter {
|
||||
@ -165,7 +169,9 @@ impl char {
|
||||
/// ```
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn is_digit(self, radix: u32) -> bool { C::is_digit(self, radix) }
|
||||
pub fn is_digit(self, radix: u32) -> bool {
|
||||
C::is_digit(self, radix)
|
||||
}
|
||||
|
||||
/// Converts a `char` to a digit in the given radix.
|
||||
///
|
||||
@ -229,7 +235,9 @@ impl char {
|
||||
/// ```
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn to_digit(self, radix: u32) -> Option<u32> { C::to_digit(self, radix) }
|
||||
pub fn to_digit(self, radix: u32) -> Option<u32> {
|
||||
C::to_digit(self, radix)
|
||||
}
|
||||
|
||||
/// Returns an iterator that yields the hexadecimal Unicode escape of a
|
||||
/// character, as `char`s.
|
||||
@ -262,7 +270,9 @@ impl char {
|
||||
/// ```
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn escape_unicode(self) -> EscapeUnicode { C::escape_unicode(self) }
|
||||
pub fn escape_unicode(self) -> EscapeUnicode {
|
||||
C::escape_unicode(self)
|
||||
}
|
||||
|
||||
/// Returns an iterator that yields the literal escape code of a `char`.
|
||||
///
|
||||
@ -309,7 +319,9 @@ impl char {
|
||||
/// ```
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn escape_default(self) -> EscapeDefault { C::escape_default(self) }
|
||||
pub fn escape_default(self) -> EscapeDefault {
|
||||
C::escape_default(self)
|
||||
}
|
||||
|
||||
/// Returns the number of bytes this `char` would need if encoded in UTF-8.
|
||||
///
|
||||
@ -358,7 +370,9 @@ impl char {
|
||||
/// ```
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn len_utf8(self) -> usize { C::len_utf8(self) }
|
||||
pub fn len_utf8(self) -> usize {
|
||||
C::len_utf8(self)
|
||||
}
|
||||
|
||||
/// Returns the number of 16-bit code units this `char` would need if
|
||||
/// encoded in UTF-16.
|
||||
@ -378,7 +392,9 @@ impl char {
|
||||
/// ```
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn len_utf16(self) -> usize { C::len_utf16(self) }
|
||||
pub fn len_utf16(self) -> usize {
|
||||
C::len_utf16(self)
|
||||
}
|
||||
|
||||
/// Encodes this character as UTF-8 into the provided byte buffer, and then
|
||||
/// returns the number of bytes written.
|
||||
@ -482,9 +498,9 @@ impl char {
|
||||
#[inline]
|
||||
pub fn is_alphabetic(self) -> bool {
|
||||
match self {
|
||||
'a' ... 'z' | 'A' ... 'Z' => true,
|
||||
'a'...'z' | 'A'...'Z' => true,
|
||||
c if c > '\x7f' => derived_property::Alphabetic(c),
|
||||
_ => false
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -498,7 +514,9 @@ impl char {
|
||||
reason = "mainly needed for compiler internals",
|
||||
issue = "0")]
|
||||
#[inline]
|
||||
pub fn is_xid_start(self) -> bool { derived_property::XID_Start(self) }
|
||||
pub fn is_xid_start(self) -> bool {
|
||||
derived_property::XID_Start(self)
|
||||
}
|
||||
|
||||
/// Returns true if this `char` satisfies the 'XID_Continue' Unicode property, and false
|
||||
/// otherwise.
|
||||
@ -510,7 +528,9 @@ impl char {
|
||||
reason = "mainly needed for compiler internals",
|
||||
issue = "0")]
|
||||
#[inline]
|
||||
pub fn is_xid_continue(self) -> bool { derived_property::XID_Continue(self) }
|
||||
pub fn is_xid_continue(self) -> bool {
|
||||
derived_property::XID_Continue(self)
|
||||
}
|
||||
|
||||
/// Returns true if this `char` is lowercase, and false otherwise.
|
||||
///
|
||||
@ -542,9 +562,9 @@ impl char {
|
||||
#[inline]
|
||||
pub fn is_lowercase(self) -> bool {
|
||||
match self {
|
||||
'a' ... 'z' => true,
|
||||
'a'...'z' => true,
|
||||
c if c > '\x7f' => derived_property::Lowercase(c),
|
||||
_ => false
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -578,9 +598,9 @@ impl char {
|
||||
#[inline]
|
||||
pub fn is_uppercase(self) -> bool {
|
||||
match self {
|
||||
'A' ... 'Z' => true,
|
||||
'A'...'Z' => true,
|
||||
c if c > '\x7f' => derived_property::Uppercase(c),
|
||||
_ => false
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -608,9 +628,9 @@ impl char {
|
||||
#[inline]
|
||||
pub fn is_whitespace(self) -> bool {
|
||||
match self {
|
||||
' ' | '\x09' ... '\x0d' => true,
|
||||
' ' | '\x09'...'\x0d' => true,
|
||||
c if c > '\x7f' => property::White_Space(c),
|
||||
_ => false
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -673,7 +693,9 @@ impl char {
|
||||
/// ```
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn is_control(self) -> bool { general_category::Cc(self) }
|
||||
pub fn is_control(self) -> bool {
|
||||
general_category::Cc(self)
|
||||
}
|
||||
|
||||
/// Returns true if this `char` is numeric, and false otherwise.
|
||||
///
|
||||
@ -713,9 +735,9 @@ impl char {
|
||||
#[inline]
|
||||
pub fn is_numeric(self) -> bool {
|
||||
match self {
|
||||
'0' ... '9' => true,
|
||||
'0'...'9' => true,
|
||||
c if c > '\x7f' => general_category::N(c),
|
||||
_ => false
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -823,7 +845,9 @@ impl char {
|
||||
/// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s.
|
||||
#[unstable(feature = "decode_utf16", reason = "recently exposed", issue = "27830")]
|
||||
#[derive(Clone)]
|
||||
pub struct DecodeUtf16<I> where I: Iterator<Item=u16> {
|
||||
pub struct DecodeUtf16<I>
|
||||
where I: Iterator<Item = u16>
|
||||
{
|
||||
iter: I,
|
||||
buf: Option<u16>,
|
||||
}
|
||||
@ -874,7 +898,7 @@ pub struct DecodeUtf16<I> where I: Iterator<Item=u16> {
|
||||
/// ```
|
||||
#[unstable(feature = "decode_utf16", reason = "recently exposed", issue = "27830")]
|
||||
#[inline]
|
||||
pub fn decode_utf16<I: IntoIterator<Item=u16>>(iterable: I) -> DecodeUtf16<I::IntoIter> {
|
||||
pub fn decode_utf16<I: IntoIterator<Item = u16>>(iterable: I) -> DecodeUtf16<I::IntoIter> {
|
||||
DecodeUtf16 {
|
||||
iter: iterable.into_iter(),
|
||||
buf: None,
|
||||
@ -890,8 +914,8 @@ impl<I: Iterator<Item=u16>> Iterator for DecodeUtf16<I> {
|
||||
Some(buf) => buf,
|
||||
None => match self.iter.next() {
|
||||
Some(u) => u,
|
||||
None => return None
|
||||
}
|
||||
None => return None,
|
||||
},
|
||||
};
|
||||
|
||||
if u < 0xD800 || 0xDFFF < u {
|
||||
@ -904,13 +928,13 @@ impl<I: Iterator<Item=u16>> Iterator for DecodeUtf16<I> {
|
||||
let u2 = match self.iter.next() {
|
||||
Some(u2) => u2,
|
||||
// eof
|
||||
None => return Some(Err(u))
|
||||
None => return Some(Err(u)),
|
||||
};
|
||||
if u2 < 0xDC00 || u2 > 0xDFFF {
|
||||
// not a trailing surrogate so we're not a valid
|
||||
// surrogate pair, so rewind to redecode u2 next time.
|
||||
self.buf = Some(u2);
|
||||
return Some(Err(u))
|
||||
return Some(Err(u));
|
||||
}
|
||||
|
||||
// all ok, so lets decode it.
|
||||
|
@ -10,8 +10,8 @@
|
||||
|
||||
//! Unicode-intensive string manipulations.
|
||||
//!
|
||||
//! This module provides functionality to `str` that requires the Unicode methods provided by the
|
||||
//! unicode parts of the CharExt trait.
|
||||
//! This module provides functionality to `str` that requires the Unicode
|
||||
//! methods provided by the unicode parts of the CharExt trait.
|
||||
|
||||
use char::{DecodeUtf16, decode_utf16};
|
||||
use core::char;
|
||||
@ -40,20 +40,28 @@ pub trait UnicodeStr {
|
||||
impl UnicodeStr for str {
|
||||
#[inline]
|
||||
fn split_whitespace(&self) -> SplitWhitespace {
|
||||
fn is_not_empty(s: &&str) -> bool { !s.is_empty() }
|
||||
fn is_not_empty(s: &&str) -> bool {
|
||||
!s.is_empty()
|
||||
}
|
||||
let is_not_empty: fn(&&str) -> bool = is_not_empty; // coerce to fn pointer
|
||||
|
||||
fn is_whitespace(c: char) -> bool { c.is_whitespace() }
|
||||
fn is_whitespace(c: char) -> bool {
|
||||
c.is_whitespace()
|
||||
}
|
||||
let is_whitespace: fn(char) -> bool = is_whitespace; // coerce to fn pointer
|
||||
|
||||
SplitWhitespace { inner: self.split(is_whitespace).filter(is_not_empty) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_whitespace(&self) -> bool { self.chars().all(|c| c.is_whitespace()) }
|
||||
fn is_whitespace(&self) -> bool {
|
||||
self.chars().all(|c| c.is_whitespace())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_alphanumeric(&self) -> bool { self.chars().all(|c| c.is_alphanumeric()) }
|
||||
fn is_alphanumeric(&self) -> bool {
|
||||
self.chars().all(|c| c.is_alphanumeric())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn trim(&self) -> &str {
|
||||
@ -111,8 +119,9 @@ pub fn is_utf16(v: &[u16]) -> bool {
|
||||
Some(_) => {}
|
||||
None => {
|
||||
let u2 = next!(false);
|
||||
if u < 0xD7FF || u > 0xDBFF ||
|
||||
u2 < 0xDC00 || u2 > 0xDFFF { return false; }
|
||||
if u < 0xD7FF || u > 0xDBFF || u2 < 0xDC00 || u2 > 0xDFFF {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -125,7 +134,7 @@ pub fn is_utf16(v: &[u16]) -> bool {
|
||||
#[allow(deprecated)]
|
||||
#[derive(Clone)]
|
||||
pub struct Utf16Items<'a> {
|
||||
decoder: DecodeUtf16<Cloned<slice::Iter<'a, u16>>>
|
||||
decoder: DecodeUtf16<Cloned<slice::Iter<'a, u16>>>,
|
||||
}
|
||||
|
||||
/// The possibilities for values decoded from a `u16` stream.
|
||||
@ -137,7 +146,7 @@ pub enum Utf16Item {
|
||||
/// A valid codepoint.
|
||||
ScalarValue(char),
|
||||
/// An invalid surrogate without its pair.
|
||||
LoneSurrogate(u16)
|
||||
LoneSurrogate(u16),
|
||||
}
|
||||
|
||||
#[allow(deprecated)]
|
||||
@ -148,7 +157,7 @@ impl Utf16Item {
|
||||
pub fn to_char_lossy(&self) -> char {
|
||||
match *self {
|
||||
Utf16Item::ScalarValue(c) => c,
|
||||
Utf16Item::LoneSurrogate(_) => '\u{FFFD}'
|
||||
Utf16Item::LoneSurrogate(_) => '\u{FFFD}',
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -160,9 +169,11 @@ impl<'a> Iterator for Utf16Items<'a> {
|
||||
type Item = Utf16Item;
|
||||
|
||||
fn next(&mut self) -> Option<Utf16Item> {
|
||||
self.decoder.next().map(|result| match result {
|
||||
Ok(c) => Utf16Item::ScalarValue(c),
|
||||
Err(s) => Utf16Item::LoneSurrogate(s),
|
||||
self.decoder.next().map(|result| {
|
||||
match result {
|
||||
Ok(c) => Utf16Item::ScalarValue(c),
|
||||
Err(s) => Utf16Item::LoneSurrogate(s),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@ -209,13 +220,18 @@ pub fn utf16_items<'a>(v: &'a [u16]) -> Utf16Items<'a> {
|
||||
#[derive(Clone)]
|
||||
pub struct Utf16Encoder<I> {
|
||||
chars: I,
|
||||
extra: u16
|
||||
extra: u16,
|
||||
}
|
||||
|
||||
impl<I> Utf16Encoder<I> {
|
||||
/// Create a UTF-16 encoder from any `char` iterator.
|
||||
pub fn new(chars: I) -> Utf16Encoder<I> where I: Iterator<Item=char> {
|
||||
Utf16Encoder { chars: chars, extra: 0 }
|
||||
pub fn new(chars: I) -> Utf16Encoder<I>
|
||||
where I: Iterator<Item = char>
|
||||
{
|
||||
Utf16Encoder {
|
||||
chars: chars,
|
||||
extra: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -233,7 +249,9 @@ impl<I> Iterator for Utf16Encoder<I> where I: Iterator<Item=char> {
|
||||
let mut buf = [0; 2];
|
||||
self.chars.next().map(|ch| {
|
||||
let n = CharExt::encode_utf16(ch, &mut buf).unwrap_or(0);
|
||||
if n == 2 { self.extra = buf[1]; }
|
||||
if n == 2 {
|
||||
self.extra = buf[1];
|
||||
}
|
||||
buf[0]
|
||||
})
|
||||
}
|
||||
@ -251,8 +269,12 @@ impl<I> Iterator for Utf16Encoder<I> where I: Iterator<Item=char> {
|
||||
impl<'a> Iterator for SplitWhitespace<'a> {
|
||||
type Item = &'a str;
|
||||
|
||||
fn next(&mut self) -> Option<&'a str> { self.inner.next() }
|
||||
fn next(&mut self) -> Option<&'a str> {
|
||||
self.inner.next()
|
||||
}
|
||||
}
|
||||
impl<'a> DoubleEndedIterator for SplitWhitespace<'a> {
|
||||
fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() }
|
||||
fn next_back(&mut self) -> Option<&'a str> {
|
||||
self.inner.next_back()
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user