Use str::from_utf8_lossy() in os::args(), add os::args_as_bytes()

os::args() was using str::raw::from_c_str(), which would assert if the
C-string wasn't valid UTF-8. Switch to using from_utf8_lossy() instead,
and add a separate function os::args_as_bytes() that returns the ~[u8]
byte-vectors instead.
This commit is contained in:
Kevin Ballard 2014-02-14 14:42:40 -08:00
parent 8cc8eb7b8e
commit c73d5ce8ab
2 changed files with 50 additions and 25 deletions

View File

@ -53,6 +53,8 @@ use ptr::RawPtr;
#[cfg(unix)]
use c_str::ToCStr;
#[cfg(windows)]
use str::OwnedStr;
/// Delegates to the libc close() function, returning the same return value.
pub fn close(fd: int) -> int {
@ -722,10 +724,12 @@ pub fn get_exit_status() -> int {
}
#[cfg(target_os = "macos")]
unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] {
unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~[u8]] {
use c_str::CString;
let mut args = ~[];
for i in range(0u, argc as uint) {
args.push(str::raw::from_c_str(*argv.offset(i as int)));
args.push(CString::new(*argv.offset(i as int), false).as_bytes_no_nul().to_owned())
}
args
}
@ -736,7 +740,7 @@ unsafe fn load_argc_and_argv(argc: int, argv: **c_char) -> ~[~str] {
* Returns a list of the command line arguments.
*/
#[cfg(target_os = "macos")]
fn real_args() -> ~[~str] {
fn real_args_as_bytes() -> ~[~[u8]] {
unsafe {
let (argc, argv) = (*_NSGetArgc() as int,
*_NSGetArgv() as **c_char);
@ -747,7 +751,7 @@ fn real_args() -> ~[~str] {
#[cfg(target_os = "linux")]
#[cfg(target_os = "android")]
#[cfg(target_os = "freebsd")]
fn real_args() -> ~[~str] {
fn real_args_as_bytes() -> ~[~[u8]] {
use rt;
match rt::args::clone() {
@ -756,6 +760,11 @@ fn real_args() -> ~[~str] {
}
}
#[cfg(not(windows))]
fn real_args() -> ~[~str] {
real_args_as_bytes().move_iter().map(|v| str::from_utf8_lossy(v).into_owned()).collect()
}
#[cfg(windows)]
fn real_args() -> ~[~str] {
use vec;
@ -786,6 +795,11 @@ fn real_args() -> ~[~str] {
return args;
}
#[cfg(windows)]
fn real_args_as_bytes() -> ~[~[u8]] {
real_args().move_iter().map(|s| s.into_bytes()).collect()
}
type LPCWSTR = *u16;
#[cfg(windows)]
@ -803,10 +817,19 @@ extern "system" {
/// Returns the arguments which this program was started with (normally passed
/// via the command line).
///
/// The arguments are interpreted as utf-8, with invalid bytes replaced with \uFFFD.
/// See `str::from_utf8_lossy` for details.
pub fn args() -> ~[~str] {
real_args()
}
/// Returns the arguments which this program was started with (normally passed
/// via the command line) as byte vectors.
pub fn args_as_bytes() -> ~[~[u8]] {
real_args_as_bytes()
}
#[cfg(target_os = "macos")]
extern {
// These functions are in crt_externs.h.

View File

@ -36,8 +36,8 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) }
#[cfg(test)] pub unsafe fn cleanup() { realargs::cleanup() }
/// Take the global arguments from global storage.
#[cfg(not(test))] pub fn take() -> Option<~[~str]> { imp::take() }
#[cfg(test)] pub fn take() -> Option<~[~str]> {
#[cfg(not(test))] pub fn take() -> Option<~[~[u8]]> { imp::take() }
#[cfg(test)] pub fn take() -> Option<~[~[u8]]> {
match realargs::take() {
realstd::option::Some(a) => Some(a),
realstd::option::None => None,
@ -47,12 +47,12 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) }
/// Give the global arguments to global storage.
///
/// It is an error if the arguments already exist.
#[cfg(not(test))] pub fn put(args: ~[~str]) { imp::put(args) }
#[cfg(test)] pub fn put(args: ~[~str]) { realargs::put(args) }
#[cfg(not(test))] pub fn put(args: ~[~[u8]]) { imp::put(args) }
#[cfg(test)] pub fn put(args: ~[~[u8]]) { realargs::put(args) }
/// Make a clone of the global arguments.
#[cfg(not(test))] pub fn clone() -> Option<~[~str]> { imp::clone() }
#[cfg(test)] pub fn clone() -> Option<~[~str]> {
#[cfg(not(test))] pub fn clone() -> Option<~[~[u8]]> { imp::clone() }
#[cfg(test)] pub fn clone() -> Option<~[~[u8]]> {
match realargs::clone() {
realstd::option::Some(a) => Some(a),
realstd::option::None => None,
@ -65,15 +65,12 @@ pub unsafe fn init(argc: int, argv: **u8) { realargs::init(argc, argv) }
mod imp {
use cast;
use clone::Clone;
#[cfg(not(test))] use libc;
use option::{Option, Some, None};
use ptr::RawPtr;
use iter::Iterator;
#[cfg(not(test))] use str;
use unstable::finally::Finally;
use unstable::mutex::{Mutex, MUTEX_INIT};
use mem;
#[cfg(not(test))] use vec;
static mut global_args_ptr: uint = 0;
static mut lock: Mutex = MUTEX_INIT;
@ -90,15 +87,15 @@ mod imp {
lock.destroy();
}
pub fn take() -> Option<~[~str]> {
pub fn take() -> Option<~[~[u8]]> {
with_lock(|| unsafe {
let ptr = get_global_ptr();
let val = mem::replace(&mut *ptr, None);
val.as_ref().map(|s: &~~[~str]| (**s).clone())
val.as_ref().map(|s: &~~[~[u8]]| (**s).clone())
})
}
pub fn put(args: ~[~str]) {
pub fn put(args: ~[~[u8]]) {
with_lock(|| unsafe {
let ptr = get_global_ptr();
rtassert!((*ptr).is_none());
@ -106,10 +103,10 @@ mod imp {
})
}
pub fn clone() -> Option<~[~str]> {
pub fn clone() -> Option<~[~[u8]]> {
with_lock(|| unsafe {
let ptr = get_global_ptr();
(*ptr).as_ref().map(|s: &~~[~str]| (**s).clone())
(*ptr).as_ref().map(|s: &~~[~[u8]]| (**s).clone())
})
}
@ -126,15 +123,20 @@ mod imp {
})
}
fn get_global_ptr() -> *mut Option<~~[~str]> {
fn get_global_ptr() -> *mut Option<~~[~[u8]]> {
unsafe { cast::transmute(&global_args_ptr) }
}
// Copied from `os`.
#[cfg(not(test))]
unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~str] {
unsafe fn load_argc_and_argv(argc: int, argv: **u8) -> ~[~[u8]] {
use c_str::CString;
use {vec, libc};
use vec::CloneableVector;
vec::from_fn(argc as uint, |i| {
str::raw::from_c_str(*(argv as **libc::c_char).offset(i as int))
let cs = CString::new(*(argv as **libc::c_char).offset(i as int), false);
cs.as_bytes_no_nul().to_owned()
})
}
@ -149,7 +151,7 @@ mod imp {
// Preserve the actual global state.
let saved_value = take();
let expected = ~[~"happy", ~"today?"];
let expected = ~[bytes!("happy").to_owned(), bytes!("today?").to_owned()];
put(expected.clone());
assert!(clone() == Some(expected.clone()));
@ -179,15 +181,15 @@ mod imp {
pub fn cleanup() {
}
pub fn take() -> Option<~[~str]> {
pub fn take() -> Option<~[~[u8]]> {
fail!()
}
pub fn put(_args: ~[~str]) {
pub fn put(_args: ~[~[u8]]) {
fail!()
}
pub fn clone() -> Option<~[~str]> {
pub fn clone() -> Option<~[~[u8]]> {
fail!()
}
}