Sophisticate Windows path encoding

This commit is contained in:
Armin Sander 2020-08-14 01:19:46 +02:00
parent f1f73649a6
commit 8fc254597f

View File

@ -57,23 +57,42 @@ impl VfsPath {
}; };
buf.push(tag); buf.push(tag);
match &self.0 { match &self.0 {
VfsPathRepr::PathBuf(it) => { VfsPathRepr::PathBuf(path) => {
let path: &std::ffi::OsStr = it.as_os_str();
#[cfg(windows)] #[cfg(windows)]
{ {
use std::os::windows::ffi::OsStrExt; use windows_paths::Encode;
for wchar in path.encode_wide() { let components = path.components();
buf.extend(wchar.to_le_bytes().iter().copied()); let mut add_sep = false;
for component in components {
if add_sep {
windows_paths::SEP.encode(buf);
}
let len_before = buf.len();
match component {
std::path::Component::Prefix(prefix) => {
// kind() returns a normalized and comparable path prefix.
prefix.kind().encode(buf);
}
std::path::Component::RootDir => {
if !add_sep {
component.as_os_str().encode(buf);
}
}
_ => component.as_os_str().encode(buf),
}
// some components may be encoded empty
add_sep = len_before != buf.len();
} }
} }
#[cfg(unix)] #[cfg(unix)]
{ {
use std::os::unix::ffi::OsStrExt; use std::os::unix::ffi::OsStrExt;
buf.extend(path.as_bytes()); buf.extend(path.as_os_str().as_bytes());
} }
#[cfg(not(any(windows, unix)))] #[cfg(not(any(windows, unix)))]
{ {
buf.extend(path.to_string_lossy().as_bytes()); buf.extend(path.as_os_str().to_string_lossy().as_bytes());
} }
} }
VfsPathRepr::VirtualPath(VirtualPath(s)) => buf.extend(s.as_bytes()), VfsPathRepr::VirtualPath(VirtualPath(s)) => buf.extend(s.as_bytes()),
@ -81,6 +100,112 @@ impl VfsPath {
} }
} }
#[cfg(windows)]
mod windows_paths {
pub trait Encode {
fn encode(&self, buf: &mut Vec<u8>);
}
impl Encode for std::ffi::OsStr {
fn encode(&self, buf: &mut Vec<u8>) {
use std::os::windows::ffi::OsStrExt;
for wchar in self.encode_wide() {
buf.extend(wchar.to_le_bytes().iter().copied());
}
}
}
impl Encode for u8 {
fn encode(&self, buf: &mut Vec<u8>) {
let wide = *self as u16;
buf.extend(wide.to_le_bytes().iter().copied())
}
}
impl Encode for &str {
fn encode(&self, buf: &mut Vec<u8>) {
debug_assert!(self.is_ascii());
for b in self.as_bytes() {
b.encode(buf)
}
}
}
pub const SEP: &str = "\\";
const VERBATIM: &str = "\\\\?\\";
const UNC: &str = "UNC";
const DEVICE: &str = "\\\\.\\";
const COLON: &str = ":";
impl Encode for std::path::Prefix<'_> {
fn encode(&self, buf: &mut Vec<u8>) {
match self {
std::path::Prefix::Verbatim(c) => {
VERBATIM.encode(buf);
c.encode(buf);
}
std::path::Prefix::VerbatimUNC(server, share) => {
VERBATIM.encode(buf);
UNC.encode(buf);
SEP.encode(buf);
server.encode(buf);
SEP.encode(buf);
share.encode(buf);
}
std::path::Prefix::VerbatimDisk(d) => {
VERBATIM.encode(buf);
d.encode(buf);
COLON.encode(buf);
}
std::path::Prefix::DeviceNS(device) => {
DEVICE.encode(buf);
device.encode(buf);
}
std::path::Prefix::UNC(server, share) => {
SEP.encode(buf);
SEP.encode(buf);
server.encode(buf);
SEP.encode(buf);
share.encode(buf);
}
std::path::Prefix::Disk(d) => {
d.encode(buf);
COLON.encode(buf);
}
}
}
}
#[test]
fn paths_encoding() {
// drive letter casing agnostic
test_eq("C:/x.rs", "c:/x.rs");
// separator agnostic
test_eq("C:/x/y.rs", "C:\\x\\y.rs");
fn test_eq(a: &str, b: &str) {
let mut b1 = Vec::new();
let mut b2 = Vec::new();
vfs(a).encode(&mut b1);
vfs(b).encode(&mut b2);
assert_eq!(b1, b2);
}
}
#[test]
fn test_sep_root_dir_encoding() {
let mut buf = Vec::new();
vfs("C:/x/y").encode(&mut buf);
assert_eq!(&buf, &[0, 67, 0, 58, 0, 92, 0, 120, 0, 92, 0, 121, 0])
}
#[cfg(test)]
fn vfs(str: &str) -> super::VfsPath {
use super::{AbsPathBuf, VfsPath};
use std::convert::TryFrom;
VfsPath::from(AbsPathBuf::try_from(str).unwrap())
}
}
#[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] #[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
enum VfsPathRepr { enum VfsPathRepr {
PathBuf(AbsPathBuf), PathBuf(AbsPathBuf),