auto merge of #14213 : kballard/rust/str_from_utf8_result, r=cmr

Change `str::from_utf8_owned()` and `StrBuf::from_utf8()` to return `Result`.

This allows the vector to be recovered when it contains invalid UTF-8.
This commit is contained in:
bors 2014-05-15 21:41:29 -07:00
commit 84406d438c
6 changed files with 26 additions and 20 deletions

View File

@ -60,7 +60,7 @@ To return an Owned String (~str) use the str helper function [`from_utf8_owned`]
~~~
use std::str;
let x: Option<~str> = str::from_utf8_owned(~[104u8,105u8]);
let x: Result<~str,~[u8]> = str::from_utf8_owned(~[104u8,105u8]);
let y: ~str = x.unwrap();
~~~

View File

@ -181,9 +181,8 @@ impl<'a> FromBase64 for &'a str {
* Convert any base64 encoded string (literal, `@`, `&`, or `~`)
* to the byte values it encodes.
*
* You can use the `from_utf8_owned` function in `std::str`
* to turn a `[u8]` into a string with characters corresponding to those
* values.
* You can use the `StrBuf::from_utf8` function in `std::strbuf` to turn a
* `Vec<u8>` into a string with characters corresponding to those values.
*
* # Example
*
@ -199,7 +198,7 @@ impl<'a> FromBase64 for &'a str {
* let res = hello_str.from_base64();
* if res.is_ok() {
* let opt_bytes = StrBuf::from_utf8(res.unwrap());
* if opt_bytes.is_some() {
* if opt_bytes.is_ok() {
* println!("decoded from base64: {}", opt_bytes.unwrap());
* }
* }

View File

@ -80,9 +80,8 @@ impl<'a> FromHex for &'a str {
* Convert any hexadecimal encoded string (literal, `@`, `&`, or `~`)
* to the byte values it encodes.
*
* You can use the `from_utf8_owned` function in `std::str`
* to turn a `[u8]` into a string with characters corresponding to those
* values.
* You can use the `StrBuf::from_utf8` function in `std::strbuf` to turn a
* `Vec<u8>` into a string with characters corresponding to those values.
*
* # Example
*

View File

@ -19,6 +19,7 @@ use num::{Float, FPNaN, FPInfinite, ToPrimitive};
use num;
use ops::{Add, Sub, Mul, Div, Rem, Neg};
use option::{None, Option, Some};
use result::ResultUnwrap;
use slice::{CloneableVector, ImmutableVector, MutableVector};
use std::cmp::{Ord, Eq};
use str::{StrAllocating, StrSlice};

View File

@ -87,6 +87,7 @@ use iter::{Iterator, range, AdditiveIterator};
use mem::transmute;
use mem;
use option::{None, Option, Some};
use result::{Result, Ok, Err};
use slice::Vector;
use slice::{ImmutableVector, MutableVector, CloneableVector};
use strbuf::StrBuf;
@ -105,12 +106,14 @@ Section: Creating a string
*/
/// Consumes a vector of bytes to create a new utf-8 string.
/// Returns None if the vector contains invalid UTF-8.
pub fn from_utf8_owned(vv: ~[u8]) -> Option<~str> {
///
/// Returns `Err` with the original vector if the vector contains invalid
/// UTF-8.
pub fn from_utf8_owned(vv: ~[u8]) -> Result<~str, ~[u8]> {
if is_utf8(vv) {
Some(unsafe { raw::from_utf8_owned(vv) })
Ok(unsafe { raw::from_utf8_owned(vv) })
} else {
None
Err(vv)
}
}
@ -2120,13 +2123,13 @@ mod tests {
#[test]
fn test_str_from_utf8_owned() {
let xs = bytes!("hello").to_owned();
assert_eq!(from_utf8_owned(xs), Some("hello".to_owned()));
assert_eq!(from_utf8_owned(xs), Ok("hello".to_owned()));
let xs = bytes!("ศไทย中华Việt Nam").to_owned();
assert_eq!(from_utf8_owned(xs), Some("ศไทย中华Việt Nam".to_owned()));
assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_owned()));
let xs = bytes!("hello", 0xff).to_owned();
assert_eq!(from_utf8_owned(xs), None);
assert_eq!(from_utf8_owned(xs), Err(bytes!("hello", 0xff).to_owned()));
}
#[test]

View File

@ -20,6 +20,7 @@ use mem;
use option::{None, Option, Some};
use ptr::RawPtr;
use ptr;
use result::{Result, Ok, Err};
use slice::{OwnedVector, Vector, CloneableVector};
use str::{CharRange, OwnedStr, Str, StrSlice, StrAllocating};
use str;
@ -72,14 +73,17 @@ impl StrBuf {
}
}
/// Tries to create a new string buffer from the given byte
/// vector, validating that the vector is UTF-8 encoded.
/// Returns the vector as a string buffer, if possible, taking care not to
/// copy it.
///
/// Returns `Err` with the original vector if the vector contains invalid
/// UTF-8.
#[inline]
pub fn from_utf8(vec: Vec<u8>) -> Option<StrBuf> {
pub fn from_utf8(vec: Vec<u8>) -> Result<StrBuf, Vec<u8>> {
if str::is_utf8(vec.as_slice()) {
Some(StrBuf { vec: vec })
Ok(StrBuf { vec: vec })
} else {
None
Err(vec)
}
}