mirror of
https://github.com/rust-lang/rust.git
synced 2025-05-14 02:49:40 +00:00
Auto merge of #21488 - aturon:os-str, r=alexcrichton
Per [RFC 517](https://github.com/rust-lang/rfcs/pull/575/), this commit introduces platform-native strings. The API is essentially as described in the RFC. The WTF-8 implementation is adapted from @SimonSapin's [implementation](https://github.com/SimonSapin/rust-wtf8). To make this work, some encodign and decoding functionality in `libcore` is now exported in a "raw" fashion reusable for WTF-8. These exports are *not* reexported in `std`, nor are they stable.
This commit is contained in:
commit
bb7cc4eb26
@ -258,49 +258,69 @@ impl CharExt for char {
|
||||
#[inline]
|
||||
#[unstable = "pending decision about Iterator/Writer/Reader"]
|
||||
fn encode_utf8(self, dst: &mut [u8]) -> Option<uint> {
|
||||
// Marked #[inline] to allow llvm optimizing it away
|
||||
let code = self as u32;
|
||||
if code < MAX_ONE_B && dst.len() >= 1 {
|
||||
dst[0] = code as u8;
|
||||
Some(1)
|
||||
} else if code < MAX_TWO_B && dst.len() >= 2 {
|
||||
dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B;
|
||||
dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT;
|
||||
Some(2)
|
||||
} else if code < MAX_THREE_B && dst.len() >= 3 {
|
||||
dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B;
|
||||
dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
|
||||
dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT;
|
||||
Some(3)
|
||||
} else if dst.len() >= 4 {
|
||||
dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B;
|
||||
dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT;
|
||||
dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
|
||||
dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT;
|
||||
Some(4)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
encode_utf8_raw(self as u32, dst)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[unstable = "pending decision about Iterator/Writer/Reader"]
|
||||
fn encode_utf16(self, dst: &mut [u16]) -> Option<uint> {
|
||||
// Marked #[inline] to allow llvm optimizing it away
|
||||
let mut ch = self as u32;
|
||||
if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 {
|
||||
// The BMP falls through (assuming non-surrogate, as it should)
|
||||
dst[0] = ch as u16;
|
||||
Some(1)
|
||||
} else if dst.len() >= 2 {
|
||||
// Supplementary planes break into surrogates.
|
||||
ch -= 0x1_0000_u32;
|
||||
dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
|
||||
dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
|
||||
Some(2)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
encode_utf16_raw(self as u32, dst)
|
||||
}
|
||||
}
|
||||
|
||||
/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
|
||||
/// and then returns the number of bytes written.
|
||||
///
|
||||
/// If the buffer is not large enough, nothing will be written into it
|
||||
/// and a `None` will be returned.
|
||||
#[inline]
|
||||
#[unstable]
|
||||
pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> Option<uint> {
|
||||
// Marked #[inline] to allow llvm optimizing it away
|
||||
if code < MAX_ONE_B && dst.len() >= 1 {
|
||||
dst[0] = code as u8;
|
||||
Some(1)
|
||||
} else if code < MAX_TWO_B && dst.len() >= 2 {
|
||||
dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B;
|
||||
dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT;
|
||||
Some(2)
|
||||
} else if code < MAX_THREE_B && dst.len() >= 3 {
|
||||
dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B;
|
||||
dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
|
||||
dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT;
|
||||
Some(3)
|
||||
} else if dst.len() >= 4 {
|
||||
dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B;
|
||||
dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT;
|
||||
dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
|
||||
dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT;
|
||||
Some(4)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
|
||||
/// and then returns the number of `u16`s written.
|
||||
///
|
||||
/// If the buffer is not large enough, nothing will be written into it
|
||||
/// and a `None` will be returned.
|
||||
#[inline]
|
||||
#[unstable]
|
||||
pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<uint> {
|
||||
// Marked #[inline] to allow llvm optimizing it away
|
||||
if (ch & 0xFFFF_u32) == ch && dst.len() >= 1 {
|
||||
// The BMP falls through (assuming non-surrogate, as it should)
|
||||
dst[0] = ch as u16;
|
||||
Some(1)
|
||||
} else if dst.len() >= 2 {
|
||||
// Supplementary planes break into surrogates.
|
||||
ch -= 0x1_0000_u32;
|
||||
dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
|
||||
dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
|
||||
Some(2)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -305,43 +305,52 @@ fn unwrap_or_0(opt: Option<&u8>) -> u8 {
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the next code point out of a byte iterator (assuming a
|
||||
/// UTF-8-like encoding).
|
||||
#[unstable]
|
||||
pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
|
||||
// Decode UTF-8
|
||||
let x = match bytes.next() {
|
||||
None => return None,
|
||||
Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
|
||||
Some(&next_byte) => next_byte,
|
||||
};
|
||||
|
||||
// Multibyte case follows
|
||||
// Decode from a byte combination out of: [[[x y] z] w]
|
||||
// NOTE: Performance is sensitive to the exact formulation here
|
||||
let init = utf8_first_byte!(x, 2);
|
||||
let y = unwrap_or_0(bytes.next());
|
||||
let mut ch = utf8_acc_cont_byte!(init, y);
|
||||
if x >= 0xE0 {
|
||||
// [[x y z] w] case
|
||||
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
|
||||
let z = unwrap_or_0(bytes.next());
|
||||
let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
|
||||
ch = init << 12 | y_z;
|
||||
if x >= 0xF0 {
|
||||
// [x y z w] case
|
||||
// use only the lower 3 bits of `init`
|
||||
let w = unwrap_or_0(bytes.next());
|
||||
ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
|
||||
}
|
||||
}
|
||||
|
||||
Some(ch)
|
||||
}
|
||||
|
||||
#[stable]
|
||||
impl<'a> Iterator for Chars<'a> {
|
||||
type Item = char;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<char> {
|
||||
// Decode UTF-8, using the valid UTF-8 invariant
|
||||
let x = match self.iter.next() {
|
||||
None => return None,
|
||||
Some(&next_byte) if next_byte < 128 => return Some(next_byte as char),
|
||||
Some(&next_byte) => next_byte,
|
||||
};
|
||||
|
||||
// Multibyte case follows
|
||||
// Decode from a byte combination out of: [[[x y] z] w]
|
||||
// NOTE: Performance is sensitive to the exact formulation here
|
||||
let init = utf8_first_byte!(x, 2);
|
||||
let y = unwrap_or_0(self.iter.next());
|
||||
let mut ch = utf8_acc_cont_byte!(init, y);
|
||||
if x >= 0xE0 {
|
||||
// [[x y z] w] case
|
||||
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
|
||||
let z = unwrap_or_0(self.iter.next());
|
||||
let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
|
||||
ch = init << 12 | y_z;
|
||||
if x >= 0xF0 {
|
||||
// [x y z w] case
|
||||
// use only the lower 3 bits of `init`
|
||||
let w = unwrap_or_0(self.iter.next());
|
||||
ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
|
||||
next_code_point(&mut self.iter).map(|ch| {
|
||||
// str invariant says `ch` is a valid Unicode Scalar Value
|
||||
unsafe {
|
||||
mem::transmute(ch)
|
||||
}
|
||||
}
|
||||
|
||||
// str invariant says `ch` is a valid Unicode Scalar Value
|
||||
unsafe {
|
||||
Some(mem::transmute(ch))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@ -1517,25 +1526,8 @@ impl StrExt for str {
|
||||
|
||||
#[inline]
|
||||
fn char_range_at(&self, i: uint) -> CharRange {
|
||||
if self.as_bytes()[i] < 128u8 {
|
||||
return CharRange {ch: self.as_bytes()[i] as char, next: i + 1 };
|
||||
}
|
||||
|
||||
// Multibyte case is a fn to allow char_range_at to inline cleanly
|
||||
fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
|
||||
let mut val = s.as_bytes()[i] as u32;
|
||||
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
|
||||
assert!((w != 0));
|
||||
|
||||
val = utf8_first_byte!(val, w);
|
||||
val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
|
||||
if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
|
||||
if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
|
||||
|
||||
return CharRange {ch: unsafe { mem::transmute(val) }, next: i + w};
|
||||
}
|
||||
|
||||
return multibyte_char_range_at(self, i);
|
||||
let (c, n) = char_range_at_raw(self.as_bytes(), i);
|
||||
CharRange { ch: unsafe { mem::transmute(c) }, next: n }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@ -1653,6 +1645,32 @@ impl StrExt for str {
|
||||
fn parse<T: FromStr>(&self) -> Option<T> { FromStr::from_str(self) }
|
||||
}
|
||||
|
||||
/// Pluck a code point out of a UTF-8-like byte slice and return the
|
||||
/// index of the next code point.
|
||||
#[inline]
|
||||
#[unstable]
|
||||
pub fn char_range_at_raw(bytes: &[u8], i: uint) -> (u32, usize) {
|
||||
if bytes[i] < 128u8 {
|
||||
return (bytes[i] as u32, i + 1);
|
||||
}
|
||||
|
||||
// Multibyte case is a fn to allow char_range_at to inline cleanly
|
||||
fn multibyte_char_range_at(bytes: &[u8], i: uint) -> (u32, usize) {
|
||||
let mut val = bytes[i] as u32;
|
||||
let w = UTF8_CHAR_WIDTH[val as uint] as uint;
|
||||
assert!((w != 0));
|
||||
|
||||
val = utf8_first_byte!(val, w);
|
||||
val = utf8_acc_cont_byte!(val, bytes[i + 1]);
|
||||
if w > 2 { val = utf8_acc_cont_byte!(val, bytes[i + 2]); }
|
||||
if w > 3 { val = utf8_acc_cont_byte!(val, bytes[i + 3]); }
|
||||
|
||||
return (val, i + w);
|
||||
}
|
||||
|
||||
multibyte_char_range_at(bytes, i)
|
||||
}
|
||||
|
||||
#[stable]
|
||||
impl<'a> Default for &'a str {
|
||||
#[stable]
|
||||
|
@ -17,4 +17,9 @@ pub use self::c_str::CString;
|
||||
pub use self::c_str::c_str_to_bytes;
|
||||
pub use self::c_str::c_str_to_bytes_with_nul;
|
||||
|
||||
pub use self::os_str::OsString;
|
||||
pub use self::os_str::OsStr;
|
||||
pub use self::os_str::AsOsStr;
|
||||
|
||||
mod c_str;
|
||||
mod os_str;
|
||||
|
259
src/libstd/ffi/os_str.rs
Normal file
259
src/libstd/ffi/os_str.rs
Normal file
@ -0,0 +1,259 @@
|
||||
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
//! A type that can represent all platform-native strings, but is cheaply
|
||||
//! interconvertable with Rust strings.
|
||||
//!
|
||||
//! The need for this type arises from the fact that:
|
||||
//!
|
||||
//! * On Unix systems, strings are often arbitrary sequences of non-zero
|
||||
//! bytes, in many cases interpreted as UTF-8.
|
||||
//!
|
||||
//! * On Windows, strings are often arbitrary sequences of non-zero 16-bit
|
||||
//! values, interpreted as UTF-16 when it is valid to do so.
|
||||
//!
|
||||
//! * In Rust, strings are always valid UTF-8, but may contain zeros.
|
||||
//!
|
||||
//! The types in this module bridge this gap by simultaneously representing Rust
|
||||
//! and platform-native string values, and in particular allowing a Rust string
|
||||
//! to be converted into an "OS" string with no cost.
|
||||
//!
|
||||
//! **Note**: At the moment, these types are extremely bare-bones, usable only
|
||||
//! for conversion to/from various other string types. Eventually these types
|
||||
//! will offer a full-fledged string API.
|
||||
|
||||
#![unstable = "recently added as part of path/io reform"]
|
||||
|
||||
use core::prelude::*;
|
||||
|
||||
use core::borrow::{BorrowFrom, ToOwned};
|
||||
use fmt::{self, Debug};
|
||||
use mem;
|
||||
use string::{String, CowString};
|
||||
use ops;
|
||||
use cmp;
|
||||
use hash::{Hash, Hasher, Writer};
|
||||
|
||||
use sys::os_str::{Buf, Slice};
|
||||
use sys_common::{AsInner, IntoInner, FromInner};
|
||||
|
||||
/// Owned, mutable OS strings.
|
||||
#[derive(Clone)]
|
||||
pub struct OsString {
|
||||
inner: Buf
|
||||
}
|
||||
|
||||
/// Slices into OS strings.
|
||||
pub struct OsStr {
|
||||
inner: Slice
|
||||
}
|
||||
|
||||
impl OsString {
|
||||
/// Constructs an `OsString` at no cost by consuming a `String`.
|
||||
pub fn from_string(s: String) -> OsString {
|
||||
OsString { inner: Buf::from_string(s) }
|
||||
}
|
||||
|
||||
/// Constructs an `OsString` by copying from a `&str` slice.
|
||||
///
|
||||
/// Equivalent to: `OsString::from_string(String::from_str(s))`.
|
||||
pub fn from_str(s: &str) -> OsString {
|
||||
OsString { inner: Buf::from_str(s) }
|
||||
}
|
||||
|
||||
/// Convert the `OsString` into a `String` if it contains valid Unicode data.
|
||||
///
|
||||
/// On failure, ownership of the original `OsString` is returned.
|
||||
pub fn into_string(self) -> Result<String, OsString> {
|
||||
self.inner.into_string().map_err(|buf| OsString { inner: buf} )
|
||||
}
|
||||
|
||||
/// Extend the string with the given `&OsStr` slice.
|
||||
pub fn push_os_str(&mut self, s: &OsStr) {
|
||||
self.inner.push_slice(&s.inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Index<ops::FullRange> for OsString {
|
||||
type Output = OsStr;
|
||||
|
||||
#[inline]
|
||||
fn index(&self, _index: &ops::FullRange) -> &OsStr {
|
||||
unsafe { mem::transmute(self.inner.as_slice()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Deref for OsString {
|
||||
type Target = OsStr;
|
||||
|
||||
#[inline]
|
||||
fn deref(&self) -> &OsStr {
|
||||
&self[]
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for OsString {
|
||||
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
fmt::Debug::fmt(&**self, formatter)
|
||||
}
|
||||
}
|
||||
|
||||
impl OsStr {
|
||||
/// Coerce directly from a `&str` slice to a `&OsStr` slice.
|
||||
pub fn from_str(s: &str) -> &OsStr {
|
||||
unsafe { mem::transmute(Slice::from_str(s)) }
|
||||
}
|
||||
|
||||
/// Yield a `&str` slice if the `OsStr` is valid unicode.
|
||||
///
|
||||
/// This conversion may entail doing a check for UTF-8 validity.
|
||||
pub fn to_str(&self) -> Option<&str> {
|
||||
self.inner.to_str()
|
||||
}
|
||||
|
||||
/// Convert an `OsStr` to a `CowString`.
|
||||
///
|
||||
/// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
|
||||
pub fn to_string_lossy(&self) -> CowString {
|
||||
self.inner.to_string_lossy()
|
||||
}
|
||||
|
||||
/// Copy the slice into an onwed `OsString`.
|
||||
pub fn to_os_string(&self) -> OsString {
|
||||
OsString { inner: self.inner.to_owned() }
|
||||
}
|
||||
|
||||
/// Get the underlying byte representation.
|
||||
///
|
||||
/// Note: it is *crucial* that this API is private, to avoid
|
||||
/// revealing the internal, platform-specific encodings.
|
||||
fn bytes(&self) -> &[u8] {
|
||||
unsafe { mem::transmute(&self.inner) }
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for OsStr {
|
||||
fn eq(&self, other: &OsStr) -> bool {
|
||||
self.bytes().eq(other.bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<str> for OsStr {
|
||||
fn eq(&self, other: &str) -> bool {
|
||||
*self == *OsStr::from_str(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<OsStr> for str {
|
||||
fn eq(&self, other: &OsStr) -> bool {
|
||||
*other == *OsStr::from_str(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for OsStr {}
|
||||
|
||||
impl PartialOrd for OsStr {
|
||||
#[inline]
|
||||
fn partial_cmp(&self, other: &OsStr) -> Option<cmp::Ordering> {
|
||||
self.bytes().partial_cmp(other.bytes())
|
||||
}
|
||||
#[inline]
|
||||
fn lt(&self, other: &OsStr) -> bool { self.bytes().lt(other.bytes()) }
|
||||
#[inline]
|
||||
fn le(&self, other: &OsStr) -> bool { self.bytes().le(other.bytes()) }
|
||||
#[inline]
|
||||
fn gt(&self, other: &OsStr) -> bool { self.bytes().gt(other.bytes()) }
|
||||
#[inline]
|
||||
fn ge(&self, other: &OsStr) -> bool { self.bytes().ge(other.bytes()) }
|
||||
}
|
||||
|
||||
impl PartialOrd<str> for OsStr {
|
||||
#[inline]
|
||||
fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> {
|
||||
self.partial_cmp(OsStr::from_str(other))
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME (#19470): cannot provide PartialOrd<OsStr> for str until we
|
||||
// have more flexible coherence rules.
|
||||
|
||||
impl Ord for OsStr {
|
||||
#[inline]
|
||||
fn cmp(&self, other: &OsStr) -> cmp::Ordering { self.bytes().cmp(other.bytes()) }
|
||||
}
|
||||
|
||||
impl<'a, S: Hasher + Writer> Hash<S> for OsStr {
|
||||
#[inline]
|
||||
fn hash(&self, state: &mut S) {
|
||||
self.bytes().hash(state)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for OsStr {
|
||||
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
self.inner.fmt(formatter)
|
||||
}
|
||||
}
|
||||
|
||||
impl BorrowFrom<OsString> for OsStr {
|
||||
fn borrow_from(owned: &OsString) -> &OsStr { &owned[] }
|
||||
}
|
||||
|
||||
impl ToOwned<OsString> for OsStr {
|
||||
fn to_owned(&self) -> OsString { self.to_os_string() }
|
||||
}
|
||||
|
||||
/// Freely convertible to an `&OsStr` slice.
|
||||
pub trait AsOsStr {
|
||||
/// Convert to an `&OsStr` slice.
|
||||
fn as_os_str(&self) -> &OsStr;
|
||||
}
|
||||
|
||||
impl AsOsStr for OsStr {
|
||||
fn as_os_str(&self) -> &OsStr {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl AsOsStr for OsString {
|
||||
fn as_os_str(&self) -> &OsStr {
|
||||
&self[]
|
||||
}
|
||||
}
|
||||
|
||||
impl AsOsStr for str {
|
||||
fn as_os_str(&self) -> &OsStr {
|
||||
OsStr::from_str(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsOsStr for String {
|
||||
fn as_os_str(&self) -> &OsStr {
|
||||
OsStr::from_str(&self[])
|
||||
}
|
||||
}
|
||||
|
||||
impl FromInner<Buf> for OsString {
|
||||
fn from_inner(buf: Buf) -> OsString {
|
||||
OsString { inner: buf }
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoInner<Buf> for OsString {
|
||||
fn into_inner(self) -> Buf {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl AsInner<Slice> for OsStr {
|
||||
fn as_inner(&self) -> &Slice {
|
||||
&self.inner
|
||||
}
|
||||
}
|
@ -29,6 +29,7 @@ pub mod stack;
|
||||
pub mod thread;
|
||||
pub mod thread_info;
|
||||
pub mod thread_local;
|
||||
pub mod wtf8;
|
||||
|
||||
// common error constructors
|
||||
|
||||
@ -93,11 +94,21 @@ pub fn keep_going<F>(data: &[u8], mut f: F) -> i64 where
|
||||
return (origamt - amt) as i64;
|
||||
}
|
||||
|
||||
// A trait for extracting representations from std::io types
|
||||
pub trait AsInner<Inner> {
|
||||
/// A trait for viewing representations from std types
|
||||
pub trait AsInner<Inner: ?Sized> {
|
||||
fn as_inner(&self) -> &Inner;
|
||||
}
|
||||
|
||||
/// A trait for extracting representations from std types
|
||||
pub trait IntoInner<Inner> {
|
||||
fn into_inner(self) -> Inner;
|
||||
}
|
||||
|
||||
/// A trait for creating std types from internal representations
|
||||
pub trait FromInner<Inner> {
|
||||
fn from_inner(inner: Inner) -> Self;
|
||||
}
|
||||
|
||||
pub trait ProcessConfig<K: BytesContainer, V: BytesContainer> {
|
||||
fn program(&self) -> &CString;
|
||||
fn args(&self) -> &[CString];
|
||||
|
1212
src/libstd/sys/common/wtf8.rs
Normal file
1212
src/libstd/sys/common/wtf8.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -31,7 +31,10 @@
|
||||
|
||||
#![unstable]
|
||||
|
||||
use sys_common::AsInner;
|
||||
use vec::Vec;
|
||||
use sys::os_str::Buf;
|
||||
use sys_common::{AsInner, IntoInner, FromInner};
|
||||
use ffi::{OsStr, OsString};
|
||||
use libc;
|
||||
|
||||
use io;
|
||||
@ -99,6 +102,36 @@ impl AsRawFd for io::net::udp::UdpSocket {
|
||||
}
|
||||
}
|
||||
|
||||
// Unix-specific extensions to `OsString`.
|
||||
pub trait OsStringExt {
|
||||
/// Create an `OsString` from a byte vector.
|
||||
fn from_vec(vec: Vec<u8>) -> Self;
|
||||
|
||||
/// Yield the underlying byte vector of this `OsString`.
|
||||
fn into_vec(self) -> Vec<u8>;
|
||||
}
|
||||
|
||||
impl OsStringExt for OsString {
|
||||
fn from_vec(vec: Vec<u8>) -> OsString {
|
||||
FromInner::from_inner(Buf { inner: vec })
|
||||
}
|
||||
|
||||
fn into_vec(self) -> Vec<u8> {
|
||||
self.into_inner().inner
|
||||
}
|
||||
}
|
||||
|
||||
// Unix-specific extensions to `OsStr`.
|
||||
pub trait OsStrExt {
|
||||
fn as_byte_slice(&self) -> &[u8];
|
||||
}
|
||||
|
||||
impl OsStrExt for OsStr {
|
||||
fn as_byte_slice(&self) -> &[u8] {
|
||||
&self.as_inner().inner
|
||||
}
|
||||
}
|
||||
|
||||
/// A prelude for conveniently writing platform-specific code.
|
||||
///
|
||||
/// Includes all extension traits, and some important type definitions.
|
||||
|
@ -44,6 +44,7 @@ pub mod fs;
|
||||
pub mod helper_signal;
|
||||
pub mod mutex;
|
||||
pub mod os;
|
||||
pub mod os_str;
|
||||
pub mod pipe;
|
||||
pub mod process;
|
||||
pub mod rwlock;
|
||||
|
86
src/libstd/sys/unix/os_str.rs
Normal file
86
src/libstd/sys/unix/os_str.rs
Normal file
@ -0,0 +1,86 @@
|
||||
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
/// The underlying OsString/OsStr implementation on Unix systems: just
|
||||
/// a `Vec<u8>`/`[u8]`.
|
||||
|
||||
use core::prelude::*;
|
||||
|
||||
use fmt::{self, Debug};
|
||||
use vec::Vec;
|
||||
use slice::SliceExt as StdSliceExt;
|
||||
use str;
|
||||
use string::{String, CowString};
|
||||
use mem;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Buf {
|
||||
pub inner: Vec<u8>
|
||||
}
|
||||
|
||||
pub struct Slice {
|
||||
pub inner: [u8]
|
||||
}
|
||||
|
||||
impl Debug for Slice {
|
||||
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
self.to_string_lossy().fmt(formatter)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Buf {
|
||||
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
self.as_slice().fmt(formatter)
|
||||
}
|
||||
}
|
||||
|
||||
impl Buf {
|
||||
pub fn from_string(s: String) -> Buf {
|
||||
Buf { inner: s.into_bytes() }
|
||||
}
|
||||
|
||||
pub fn from_str(s: &str) -> Buf {
|
||||
Buf { inner: s.as_bytes().to_vec() }
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &Slice {
|
||||
unsafe { mem::transmute(self.inner.as_slice()) }
|
||||
}
|
||||
|
||||
pub fn into_string(self) -> Result<String, Buf> {
|
||||
String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() } )
|
||||
}
|
||||
|
||||
pub fn push_slice(&mut self, s: &Slice) {
|
||||
self.inner.push_all(&s.inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl Slice {
|
||||
fn from_u8_slice(s: &[u8]) -> &Slice {
|
||||
unsafe { mem::transmute(s) }
|
||||
}
|
||||
|
||||
pub fn from_str(s: &str) -> &Slice {
|
||||
unsafe { mem::transmute(s.as_bytes()) }
|
||||
}
|
||||
|
||||
pub fn to_str(&self) -> Option<&str> {
|
||||
str::from_utf8(&self.inner).ok()
|
||||
}
|
||||
|
||||
pub fn to_string_lossy(&self) -> CowString {
|
||||
String::from_utf8_lossy(&self.inner)
|
||||
}
|
||||
|
||||
pub fn to_owned(&self) -> Buf {
|
||||
Buf { inner: self.inner.to_vec() }
|
||||
}
|
||||
}
|
@ -16,7 +16,11 @@
|
||||
|
||||
#![unstable]
|
||||
|
||||
use sys_common::AsInner;
|
||||
pub use sys_common::wtf8::{Wtf8Buf, EncodeWide};
|
||||
|
||||
use sys::os_str::Buf;
|
||||
use sys_common::{AsInner, FromInner};
|
||||
use ffi::{OsStr, OsString};
|
||||
use libc;
|
||||
|
||||
use io;
|
||||
@ -92,9 +96,35 @@ impl AsRawSocket for io::net::udp::UdpSocket {
|
||||
}
|
||||
}
|
||||
|
||||
// Windows-specific extensions to `OsString`.
|
||||
pub trait OsStringExt {
|
||||
/// Create an `OsString` from a potentially ill-formed UTF-16 slice of 16-bit code units.
|
||||
///
|
||||
/// This is lossless: calling `.encode_wide()` on the resulting string
|
||||
/// will always return the original code units.
|
||||
fn from_wide(wide: &[u16]) -> Self;
|
||||
}
|
||||
|
||||
impl OsStringExt for OsString {
|
||||
fn from_wide(wide: &[u16]) -> OsString {
|
||||
FromInner::from_inner(Buf { inner: Wtf8Buf::from_wide(wide) })
|
||||
}
|
||||
}
|
||||
|
||||
// Windows-specific extensions to `OsStr`.
|
||||
pub trait OsStrExt {
|
||||
fn encode_wide(&self) -> EncodeWide;
|
||||
}
|
||||
|
||||
impl OsStrExt for OsStr {
|
||||
fn encode_wide(&self) -> EncodeWide {
|
||||
self.as_inner().inner.encode_wide()
|
||||
}
|
||||
}
|
||||
|
||||
/// A prelude for conveniently writing platform-specific code.
|
||||
///
|
||||
/// Includes all extension traits, and some important type definitions.
|
||||
pub mod prelude {
|
||||
pub use super::{Socket, Handle, AsRawSocket, AsRawHandle};
|
||||
pub use super::{Socket, Handle, AsRawSocket, AsRawHandle, OsStrExt, OsStringExt};
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ pub mod fs;
|
||||
pub mod helper_signal;
|
||||
pub mod mutex;
|
||||
pub mod os;
|
||||
pub mod os_str;
|
||||
pub mod pipe;
|
||||
pub mod process;
|
||||
pub mod rwlock;
|
||||
|
82
src/libstd/sys/windows/os_str.rs
Normal file
82
src/libstd/sys/windows/os_str.rs
Normal file
@ -0,0 +1,82 @@
|
||||
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
/// The underlying OsString/OsStr implementation on Windows is a
|
||||
/// wrapper around the "WTF-8" encoding; see the `wtf8` module for more.
|
||||
|
||||
use fmt::{self, Debug};
|
||||
use sys_common::wtf8::{Wtf8, Wtf8Buf};
|
||||
use string::{String, CowString};
|
||||
use result::Result;
|
||||
use option::Option;
|
||||
use mem;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Buf {
|
||||
pub inner: Wtf8Buf
|
||||
}
|
||||
|
||||
impl Debug for Buf {
|
||||
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
self.as_slice().fmt(formatter)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Slice {
|
||||
pub inner: Wtf8
|
||||
}
|
||||
|
||||
impl Debug for Slice {
|
||||
fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
self.inner.fmt(formatter)
|
||||
}
|
||||
}
|
||||
|
||||
impl Buf {
|
||||
pub fn from_string(s: String) -> Buf {
|
||||
Buf { inner: Wtf8Buf::from_string(s) }
|
||||
}
|
||||
|
||||
pub fn from_str(s: &str) -> Buf {
|
||||
Buf { inner: Wtf8Buf::from_str(s) }
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &Slice {
|
||||
unsafe { mem::transmute(self.inner.as_slice()) }
|
||||
}
|
||||
|
||||
pub fn into_string(self) -> Result<String, Buf> {
|
||||
self.inner.into_string().map_err(|buf| Buf { inner: buf })
|
||||
}
|
||||
|
||||
pub fn push_slice(&mut self, s: &Slice) {
|
||||
self.inner.push_wtf8(&s.inner)
|
||||
}
|
||||
}
|
||||
|
||||
impl Slice {
|
||||
pub fn from_str(s: &str) -> &Slice {
|
||||
unsafe { mem::transmute(Wtf8::from_str(s)) }
|
||||
}
|
||||
|
||||
pub fn to_str(&self) -> Option<&str> {
|
||||
self.inner.as_str()
|
||||
}
|
||||
|
||||
pub fn to_string_lossy(&self) -> CowString {
|
||||
self.inner.to_string_lossy()
|
||||
}
|
||||
|
||||
pub fn to_owned(&self) -> Buf {
|
||||
let mut buf = Wtf8Buf::with_capacity(self.inner.len());
|
||||
buf.push_wtf8(&self.inner);
|
||||
Buf { inner: buf }
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user