From d0f3cb05df41b14b58553fab6a533e0e4c947b06 Mon Sep 17 00:00:00 2001 From: Kevin Ballard Date: Wed, 14 May 2014 16:48:05 -0700 Subject: [PATCH 1/2] Change str::from_utf8_owned() to return Result This allows the original vector to be recovered in the event that it is not valid UTF-8. [breaking-change] --- src/doc/complement-cheatsheet.md | 2 +- src/libstd/str.rs | 17 ++++++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/doc/complement-cheatsheet.md b/src/doc/complement-cheatsheet.md index 804d878398b..5cd555cad8e 100644 --- a/src/doc/complement-cheatsheet.md +++ b/src/doc/complement-cheatsheet.md @@ -60,7 +60,7 @@ To return an Owned String (~str) use the str helper function [`from_utf8_owned`] ~~~ use std::str; -let x: Option<~str> = str::from_utf8_owned(~[104u8,105u8]); +let x: Result<~str,~[u8]> = str::from_utf8_owned(~[104u8,105u8]); let y: ~str = x.unwrap(); ~~~ diff --git a/src/libstd/str.rs b/src/libstd/str.rs index fa4cf8e4427..5f117ca0821 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -87,6 +87,7 @@ use iter::{Iterator, range, AdditiveIterator}; use mem::transmute; use mem; use option::{None, Option, Some}; +use result::{Result, Ok, Err}; use slice::Vector; use slice::{ImmutableVector, MutableVector, CloneableVector}; use strbuf::StrBuf; @@ -105,12 +106,14 @@ Section: Creating a string */ /// Consumes a vector of bytes to create a new utf-8 string. -/// Returns None if the vector contains invalid UTF-8. -pub fn from_utf8_owned(vv: ~[u8]) -> Option<~str> { +/// +/// Returns `Err` with the original vector if the vector contains invalid +/// UTF-8. +pub fn from_utf8_owned(vv: ~[u8]) -> Result<~str, ~[u8]> { if is_utf8(vv) { - Some(unsafe { raw::from_utf8_owned(vv) }) + Ok(unsafe { raw::from_utf8_owned(vv) }) } else { - None + Err(vv) } } @@ -2115,13 +2118,13 @@ mod tests { #[test] fn test_str_from_utf8_owned() { let xs = bytes!("hello").to_owned(); - assert_eq!(from_utf8_owned(xs), Some("hello".to_owned())); + assert_eq!(from_utf8_owned(xs), Ok("hello".to_owned())); let xs = bytes!("ศไทย中华Việt Nam").to_owned(); - assert_eq!(from_utf8_owned(xs), Some("ศไทย中华Việt Nam".to_owned())); + assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_owned())); let xs = bytes!("hello", 0xff).to_owned(); - assert_eq!(from_utf8_owned(xs), None); + assert_eq!(from_utf8_owned(xs), Err(bytes!("hello", 0xff).to_owned())); } #[test] From ba7844a7fff0061e5b4528c2ecd5adf765145b70 Mon Sep 17 00:00:00 2001 From: Kevin Ballard Date: Wed, 14 May 2014 16:55:24 -0700 Subject: [PATCH 2/2] Change StrBuf::from_utf8() to return Result This allows the original vector to be recovered in the event that it is not UTF-8. [breaking-change] --- src/libserialize/base64.rs | 7 +++---- src/libserialize/hex.rs | 5 ++--- src/libstd/num/strconv.rs | 1 + src/libstd/strbuf.rs | 14 +++++++++----- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/libserialize/base64.rs b/src/libserialize/base64.rs index 4709365ebff..5702557526f 100644 --- a/src/libserialize/base64.rs +++ b/src/libserialize/base64.rs @@ -181,9 +181,8 @@ impl<'a> FromBase64 for &'a str { * Convert any base64 encoded string (literal, `@`, `&`, or `~`) * to the byte values it encodes. * - * You can use the `from_utf8_owned` function in `std::str` - * to turn a `[u8]` into a string with characters corresponding to those - * values. + * You can use the `StrBuf::from_utf8` function in `std::strbuf` to turn a + * `Vec` into a string with characters corresponding to those values. * * # Example * @@ -199,7 +198,7 @@ impl<'a> FromBase64 for &'a str { * let res = hello_str.from_base64(); * if res.is_ok() { * let opt_bytes = StrBuf::from_utf8(res.unwrap()); - * if opt_bytes.is_some() { + * if opt_bytes.is_ok() { * println!("decoded from base64: {}", opt_bytes.unwrap()); * } * } diff --git a/src/libserialize/hex.rs b/src/libserialize/hex.rs index c463d97dba4..2b9ba763b2e 100644 --- a/src/libserialize/hex.rs +++ b/src/libserialize/hex.rs @@ -80,9 +80,8 @@ impl<'a> FromHex for &'a str { * Convert any hexadecimal encoded string (literal, `@`, `&`, or `~`) * to the byte values it encodes. * - * You can use the `from_utf8_owned` function in `std::str` - * to turn a `[u8]` into a string with characters corresponding to those - * values. + * You can use the `StrBuf::from_utf8` function in `std::strbuf` to turn a + * `Vec` into a string with characters corresponding to those values. * * # Example * diff --git a/src/libstd/num/strconv.rs b/src/libstd/num/strconv.rs index 4769b17fb2b..63d6219ab8a 100644 --- a/src/libstd/num/strconv.rs +++ b/src/libstd/num/strconv.rs @@ -19,6 +19,7 @@ use num::{Float, FPNaN, FPInfinite, ToPrimitive}; use num; use ops::{Add, Sub, Mul, Div, Rem, Neg}; use option::{None, Option, Some}; +use result::ResultUnwrap; use slice::{CloneableVector, ImmutableVector, MutableVector}; use std::cmp::{Ord, Eq}; use str::{StrAllocating, StrSlice}; diff --git a/src/libstd/strbuf.rs b/src/libstd/strbuf.rs index 575de89fae2..de480ef1b7f 100644 --- a/src/libstd/strbuf.rs +++ b/src/libstd/strbuf.rs @@ -20,6 +20,7 @@ use mem; use option::{None, Option, Some}; use ptr::RawPtr; use ptr; +use result::{Result, Ok, Err}; use slice::{OwnedVector, Vector, CloneableVector}; use str::{CharRange, OwnedStr, Str, StrSlice, StrAllocating}; use str; @@ -72,14 +73,17 @@ impl StrBuf { } } - /// Tries to create a new string buffer from the given byte - /// vector, validating that the vector is UTF-8 encoded. + /// Returns the vector as a string buffer, if possible, taking care not to + /// copy it. + /// + /// Returns `Err` with the original vector if the vector contains invalid + /// UTF-8. #[inline] - pub fn from_utf8(vec: Vec) -> Option { + pub fn from_utf8(vec: Vec) -> Result> { if str::is_utf8(vec.as_slice()) { - Some(StrBuf { vec: vec }) + Ok(StrBuf { vec: vec }) } else { - None + Err(vec) } }