mirror of
https://github.com/rust-lang/rust.git
synced 2025-01-25 22:22:44 +00:00
Improve Windows path prefix parsing
This commit is contained in:
parent
2c28b0eaf9
commit
e87082293e
@ -240,6 +240,7 @@
|
|||||||
#![feature(exhaustive_patterns)]
|
#![feature(exhaustive_patterns)]
|
||||||
#![feature(intra_doc_pointers)]
|
#![feature(intra_doc_pointers)]
|
||||||
#![feature(lang_items)]
|
#![feature(lang_items)]
|
||||||
|
#![feature(let_chains)]
|
||||||
#![feature(linkage)]
|
#![feature(linkage)]
|
||||||
#![feature(min_specialization)]
|
#![feature(min_specialization)]
|
||||||
#![feature(must_not_suspend)]
|
#![feature(must_not_suspend)]
|
||||||
|
@ -168,8 +168,8 @@ pub enum Prefix<'a> {
|
|||||||
|
|
||||||
/// Device namespace prefix, e.g., `\\.\COM42`.
|
/// Device namespace prefix, e.g., `\\.\COM42`.
|
||||||
///
|
///
|
||||||
/// Device namespace prefixes consist of `\\.\` immediately followed by the
|
/// Device namespace prefixes consist of `\\.\` (possibly using `/`
|
||||||
/// device name.
|
/// instead of `\`), immediately followed by the device name.
|
||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
DeviceNS(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr),
|
DeviceNS(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr),
|
||||||
|
|
||||||
|
@ -971,15 +971,15 @@ pub fn test_decompositions_windows() {
|
|||||||
file_prefix: None
|
file_prefix: None
|
||||||
);
|
);
|
||||||
|
|
||||||
t!("\\\\?\\C:/foo",
|
t!("\\\\?\\C:/foo/bar",
|
||||||
iter: ["\\\\?\\C:/foo"],
|
iter: ["\\\\?\\C:", "\\", "foo/bar"],
|
||||||
has_root: true,
|
has_root: true,
|
||||||
is_absolute: true,
|
is_absolute: true,
|
||||||
parent: None,
|
parent: Some("\\\\?\\C:/"),
|
||||||
file_name: None,
|
file_name: Some("foo/bar"),
|
||||||
file_stem: None,
|
file_stem: Some("foo/bar"),
|
||||||
extension: None,
|
extension: None,
|
||||||
file_prefix: None
|
file_prefix: Some("foo/bar")
|
||||||
);
|
);
|
||||||
|
|
||||||
t!("\\\\.\\foo\\bar",
|
t!("\\\\.\\foo\\bar",
|
||||||
|
@ -190,6 +190,10 @@ where
|
|||||||
{
|
{
|
||||||
// Start off with a stack buf but then spill over to the heap if we end up
|
// Start off with a stack buf but then spill over to the heap if we end up
|
||||||
// needing more space.
|
// needing more space.
|
||||||
|
//
|
||||||
|
// This initial size also works around `GetFullPathNameW` returning
|
||||||
|
// incorrect size hints for some short paths:
|
||||||
|
// https://github.com/dylni/normpath/issues/5
|
||||||
let mut stack_buf = [0u16; 512];
|
let mut stack_buf = [0u16; 512];
|
||||||
let mut heap_buf = Vec::new();
|
let mut heap_buf = Vec::new();
|
||||||
unsafe {
|
unsafe {
|
||||||
|
@ -50,37 +50,101 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
|
|||||||
path.into()
|
path.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct PrefixParser<'a, const LEN: usize> {
|
||||||
|
path: &'a OsStr,
|
||||||
|
prefix: [u8; LEN],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
|
||||||
|
#[inline]
|
||||||
|
fn get_prefix(path: &OsStr) -> [u8; LEN] {
|
||||||
|
let mut prefix = [0; LEN];
|
||||||
|
// SAFETY: Only ASCII characters are modified.
|
||||||
|
for (i, &ch) in path.bytes().iter().take(LEN).enumerate() {
|
||||||
|
prefix[i] = if ch == b'/' { b'\\' } else { ch };
|
||||||
|
}
|
||||||
|
prefix
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new(path: &'a OsStr) -> Self {
|
||||||
|
Self { path, prefix: Self::get_prefix(path) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
|
||||||
|
PrefixParserSlice {
|
||||||
|
path: self.path,
|
||||||
|
prefix: &self.prefix[..LEN.min(self.path.len())],
|
||||||
|
index: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct PrefixParserSlice<'a, 'b> {
|
||||||
|
path: &'a OsStr,
|
||||||
|
prefix: &'b [u8],
|
||||||
|
index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> PrefixParserSlice<'a, '_> {
|
||||||
|
fn strip_prefix(&self, prefix: &str) -> Option<Self> {
|
||||||
|
self.prefix[self.index..]
|
||||||
|
.starts_with(prefix.as_bytes())
|
||||||
|
.then(|| Self { index: self.index + prefix.len(), ..*self })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prefix_bytes(&self) -> &'a [u8] {
|
||||||
|
&self.path.bytes()[..self.index]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn finish(self) -> &'a OsStr {
|
||||||
|
// SAFETY: The unsafety here stems from converting between &OsStr and
|
||||||
|
// &[u8] and back. This is safe to do because (1) we only look at ASCII
|
||||||
|
// contents of the encoding and (2) new &OsStr values are produced only
|
||||||
|
// from ASCII-bounded slices of existing &OsStr values.
|
||||||
|
unsafe { bytes_as_os_str(&self.path.bytes()[self.index..]) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
|
pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
|
||||||
use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};
|
use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};
|
||||||
|
|
||||||
if let Some(path) = strip_prefix(path, r"\\") {
|
let parser = PrefixParser::<8>::new(path);
|
||||||
|
let parser = parser.as_slice();
|
||||||
|
if let Some(parser) = parser.strip_prefix(r"\\") {
|
||||||
// \\
|
// \\
|
||||||
if let Some(path) = strip_prefix(path, r"?\") {
|
|
||||||
|
// The meaning of verbatim paths can change when they use a different
|
||||||
|
// separator.
|
||||||
|
if let Some(parser) = parser.strip_prefix(r"?\") && !parser.prefix_bytes().iter().any(|&x| x == b'/') {
|
||||||
// \\?\
|
// \\?\
|
||||||
if let Some(path) = strip_prefix(path, r"UNC\") {
|
if let Some(parser) = parser.strip_prefix(r"UNC\") {
|
||||||
// \\?\UNC\server\share
|
// \\?\UNC\server\share
|
||||||
|
|
||||||
|
let path = parser.finish();
|
||||||
let (server, path) = parse_next_component(path, true);
|
let (server, path) = parse_next_component(path, true);
|
||||||
let (share, _) = parse_next_component(path, true);
|
let (share, _) = parse_next_component(path, true);
|
||||||
|
|
||||||
Some(VerbatimUNC(server, share))
|
Some(VerbatimUNC(server, share))
|
||||||
} else {
|
} else {
|
||||||
let (prefix, _) = parse_next_component(path, true);
|
let path = parser.finish();
|
||||||
|
|
||||||
// in verbatim paths only recognize an exact drive prefix
|
// in verbatim paths only recognize an exact drive prefix
|
||||||
if let Some(drive) = parse_drive_exact(prefix) {
|
if let Some(drive) = parse_drive_exact(path) {
|
||||||
// \\?\C:
|
// \\?\C:
|
||||||
Some(VerbatimDisk(drive))
|
Some(VerbatimDisk(drive))
|
||||||
} else {
|
} else {
|
||||||
// \\?\prefix
|
// \\?\prefix
|
||||||
|
let (prefix, _) = parse_next_component(path, true);
|
||||||
Some(Verbatim(prefix))
|
Some(Verbatim(prefix))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if let Some(path) = strip_prefix(path, r".\") {
|
} else if let Some(parser) = parser.strip_prefix(r".\") {
|
||||||
// \\.\COM42
|
// \\.\COM42
|
||||||
|
let path = parser.finish();
|
||||||
let (prefix, _) = parse_next_component(path, false);
|
let (prefix, _) = parse_next_component(path, false);
|
||||||
Some(DeviceNS(prefix))
|
Some(DeviceNS(prefix))
|
||||||
} else {
|
} else {
|
||||||
|
let path = parser.finish();
|
||||||
let (server, path) = parse_next_component(path, false);
|
let (server, path) = parse_next_component(path, false);
|
||||||
let (share, _) = parse_next_component(path, false);
|
let (share, _) = parse_next_component(path, false);
|
||||||
|
|
||||||
@ -102,31 +166,26 @@ pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parses a drive prefix, e.g. "C:" and "C:\whatever"
|
// Parses a drive prefix, e.g. "C:" and "C:\whatever"
|
||||||
fn parse_drive(prefix: &OsStr) -> Option<u8> {
|
fn parse_drive(path: &OsStr) -> Option<u8> {
|
||||||
// In most DOS systems, it is not possible to have more than 26 drive letters.
|
// In most DOS systems, it is not possible to have more than 26 drive letters.
|
||||||
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
|
// See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
|
||||||
fn is_valid_drive_letter(drive: &u8) -> bool {
|
fn is_valid_drive_letter(drive: &u8) -> bool {
|
||||||
drive.is_ascii_alphabetic()
|
drive.is_ascii_alphabetic()
|
||||||
}
|
}
|
||||||
|
|
||||||
match prefix.bytes() {
|
match path.bytes() {
|
||||||
[drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
|
[drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parses a drive prefix exactly, e.g. "C:"
|
// Parses a drive prefix exactly, e.g. "C:"
|
||||||
fn parse_drive_exact(prefix: &OsStr) -> Option<u8> {
|
fn parse_drive_exact(path: &OsStr) -> Option<u8> {
|
||||||
// only parse two bytes: the drive letter and the drive separator
|
// only parse two bytes: the drive letter and the drive separator
|
||||||
if prefix.len() == 2 { parse_drive(prefix) } else { None }
|
if path.bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
|
||||||
}
|
parse_drive(path)
|
||||||
|
} else {
|
||||||
fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> {
|
None
|
||||||
// `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]`
|
|
||||||
// is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice.
|
|
||||||
match path.bytes().strip_prefix(prefix.as_bytes()) {
|
|
||||||
Some(path) => unsafe { Some(bytes_as_os_str(path)) },
|
|
||||||
None => None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -219,15 +278,7 @@ pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
|
|||||||
// SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
|
// SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
|
||||||
// `lpfilename` is a pointer to a null terminated string that is not
|
// `lpfilename` is a pointer to a null terminated string that is not
|
||||||
// invalidated until after `GetFullPathNameW` returns successfully.
|
// invalidated until after `GetFullPathNameW` returns successfully.
|
||||||
|buffer, size| unsafe {
|
|buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
|
||||||
// While the docs for `GetFullPathNameW` have the standard note
|
|
||||||
// about needing a `\\?\` path for a long lpfilename, this does not
|
|
||||||
// appear to be true in practice.
|
|
||||||
// See:
|
|
||||||
// https://stackoverflow.com/questions/38036943/getfullpathnamew-and-long-windows-file-paths
|
|
||||||
// https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
|
|
||||||
c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut())
|
|
||||||
},
|
|
||||||
|mut absolute| {
|
|mut absolute| {
|
||||||
path.clear();
|
path.clear();
|
||||||
|
|
||||||
@ -263,9 +314,20 @@ pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
|
|||||||
|
|
||||||
/// Make a Windows path absolute.
|
/// Make a Windows path absolute.
|
||||||
pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
|
pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
|
||||||
if path.as_os_str().bytes().starts_with(br"\\?\") {
|
let path = path.as_os_str();
|
||||||
return Ok(path.into());
|
let prefix = parse_prefix(path);
|
||||||
|
// Verbatim paths should not be modified.
|
||||||
|
if prefix.map(|x| x.is_verbatim()).unwrap_or(false) {
|
||||||
|
// NULs in verbatim paths are rejected for consistency.
|
||||||
|
if path.bytes().contains(&0) {
|
||||||
|
return Err(io::const_io_error!(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"strings passed to WinAPI cannot contain NULs",
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
return Ok(path.to_owned().into());
|
||||||
|
}
|
||||||
|
|
||||||
let path = to_u16s(path)?;
|
let path = to_u16s(path)?;
|
||||||
let lpfilename = path.as_ptr();
|
let lpfilename = path.as_ptr();
|
||||||
fill_utf16_buf(
|
fill_utf16_buf(
|
||||||
|
@ -94,3 +94,23 @@ fn verbatim() {
|
|||||||
// A path that contains null is not a valid path.
|
// A path that contains null is not a valid path.
|
||||||
assert!(maybe_verbatim(Path::new("\0")).is_err());
|
assert!(maybe_verbatim(Path::new("\0")).is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_prefix(path: &str) -> Option<Prefix<'_>> {
|
||||||
|
super::parse_prefix(OsStr::new(path))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_prefix_verbatim() {
|
||||||
|
let prefix = Some(Prefix::VerbatimDisk(b'C'));
|
||||||
|
assert_eq!(prefix, parse_prefix(r"\\?\C:/windows/system32/notepad.exe"));
|
||||||
|
assert_eq!(prefix, parse_prefix(r"\\?\C:\windows\system32\notepad.exe"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_prefix_verbatim_device() {
|
||||||
|
let prefix = Some(Prefix::UNC(OsStr::new("?"), OsStr::new("C:")));
|
||||||
|
assert_eq!(prefix, parse_prefix(r"//?/C:/windows/system32/notepad.exe"));
|
||||||
|
assert_eq!(prefix, parse_prefix(r"//?/C:\windows\system32\notepad.exe"));
|
||||||
|
assert_eq!(prefix, parse_prefix(r"/\?\C:\windows\system32\notepad.exe"));
|
||||||
|
assert_eq!(prefix, parse_prefix(r"\\?/C:\windows\system32\notepad.exe"));
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user