From aaf102dfe4d749b80aa5a012e019a4e204670887 Mon Sep 17 00:00:00 2001 From: Christopher Serr Date: Mon, 29 Jan 2024 22:57:50 +0100 Subject: [PATCH] Support Windows Device Paths in WASI (#766) Turns out that Windows paths are very complicated. There's all sorts of relative paths and various device paths and UNC paths. Here's a TL;DR of how it all works: 1. Win32 paths may be in one of various relative forms that depend on hidden environment variables, such as `C:\Windows` (absolute on C drive), `Windows\System32\user32.dll` (relative to current dir on current drive), `C:Windows` (relative to current dir on C drive), or `\Windows` (absolute on current drive). There's also `\\server\share` paths that are called UNC paths. 2. These paths then get converted into a form that's rooted in a device (called device path) starting with `\\.\` followed by a device name. UNC paths get mapped to `\\.\UNC\server\share`. If a reserved device is part of the path, it takes precedence and becomes the device path (`C:\some\dir\COM1.txt` -> `\\.\COM1`). 3. The paths then get normalized / canonicalized (forward slashes converted, .. and . resolved, some spaces and dots get removed) into the `\\?\` form (normalized device path). Because this is the step that replaces forward slashes by backward slashes, all previous forms mentioned may use forward slashes instead. `Path::canonicalize` handles all three steps, meaning a path returned by it starts with `\\?\` and skips all these three steps when used. 4. The `\\?\` form gets passed almost directly to NT, though it gets replaced with `\??\`. At this point it is an NT path. The NT path `\??\` matches `\GLOBAL??\` where the devices are then looked up. The device names may actually be "symbolic links" in the NT object namespace to other devices (so a symbolic link from one NT path to another). So for example `C:` is actually a symbolic link at `\GLOBAL??\C:` to `\Device\HarddiskVolume1` (or any other number). Various other forms of NT paths are also possible, but you can't get to them from a Win32 path (except via the device symlink called `GLOBALROOT`). The driver is then chosen based on the device that it resolves to. Depending on what kind of Win32 path you have, you may skip some of the steps on the way. References: - https://chrisdenton.github.io/omnipath/ - https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html - https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file - https://stackoverflow.com/a/46019856 - https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats - https://medium.com/walmartglobaltech/dos-file-path-magic-tricks-5eda7a7a85fa - https://reverseengineering.stackexchange.com/a/3799 We now map the `\\?\` paths, which are the lowest level before the NT paths directly to `/mnt/device` in WASI. This should allow you to access everything that's currently not accessible. --- crates/livesplit-auto-splitting/Cargo.toml | 6 +- crates/livesplit-auto-splitting/README.md | 7 +- crates/livesplit-auto-splitting/src/lib.rs | 3 +- .../src/runtime/api/wasi.rs | 67 ++++- .../livesplit-auto-splitting/src/wasi_path.rs | 247 +++++++++++++++--- src/auto_splitting/mod.rs | 3 +- 6 files changed, 293 insertions(+), 40 deletions(-) diff --git a/crates/livesplit-auto-splitting/Cargo.toml b/crates/livesplit-auto-splitting/Cargo.toml index b0cfab70..77eb1d96 100644 --- a/crates/livesplit-auto-splitting/Cargo.toml +++ b/crates/livesplit-auto-splitting/Cargo.toml @@ -23,14 +23,14 @@ sysinfo = { version = "0.30.0", default-features = false, features = [ "multithread", ] } time = { version = "0.3.3", default-features = false } -wasmtime = { version = "16.0.0", default-features = false, features = [ +wasmtime = { version = "17.0.0", default-features = false, features = [ "cranelift", "parallel-compilation", ] } -wasmtime-wasi = { version = "16.0.0", default-features = false, features = [ +wasmtime-wasi = { version = "17.0.0", default-features = false, features = [ "sync", ] } -wasi-common = "16.0.0" +wasi-common = "17.0.0" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.52.0", features = ["Win32_Storage_FileSystem"] } diff --git a/crates/livesplit-auto-splitting/README.md b/crates/livesplit-auto-splitting/README.md index 12491561..93946cd1 100644 --- a/crates/livesplit-auto-splitting/README.md +++ b/crates/livesplit-auto-splitting/README.md @@ -532,9 +532,10 @@ support: - `stdout` / `stderr` / `stdin` are unbound. Those streams currently do nothing. -- The file system is currently almost entirely empty. The host's file system - is accessible through `/mnt`. It is entirely read-only. Windows paths are - mapped to `/mnt/c`, `/mnt/d`, etc. to match WSL. +- The file system is currently almost entirely empty. The host's file system is + accessible through `/mnt`. It is entirely read-only. Windows paths are mapped + to `/mnt/c`, `/mnt/d`, etc. to match WSL. Additionally `/mnt/device` maps to + `\\?\` on Windows to access additional paths. - There are no environment variables. - There are no command line arguments. - There is no networking. diff --git a/crates/livesplit-auto-splitting/src/lib.rs b/crates/livesplit-auto-splitting/src/lib.rs index ec44b3c2..6eba8184 100644 --- a/crates/livesplit-auto-splitting/src/lib.rs +++ b/crates/livesplit-auto-splitting/src/lib.rs @@ -534,7 +534,8 @@ //! nothing. //! - The file system is currently almost entirely empty. The host's file system //! is accessible through `/mnt`. It is entirely read-only. Windows paths are -//! mapped to `/mnt/c`, `/mnt/d`, etc. to match WSL. +//! mapped to `/mnt/c`, `/mnt/d`, etc. to match WSL. Additionally +//! `/mnt/device` maps to `\\?\` on Windows to access additional paths. //! - There are no environment variables. //! - There are no command line arguments. //! - There is no networking. diff --git a/crates/livesplit-auto-splitting/src/runtime/api/wasi.rs b/crates/livesplit-auto-splitting/src/runtime/api/wasi.rs index b433c03b..133b58c0 100644 --- a/crates/livesplit-auto-splitting/src/runtime/api/wasi.rs +++ b/crates/livesplit-auto-splitting/src/runtime/api/wasi.rs @@ -33,7 +33,7 @@ pub fn build(script_path: Option<&Path>) -> WasiCtx { drives &= !(1 << drive_idx); let drive = drive_idx as u8 + b'a'; if let Ok(path) = wasmtime_wasi::Dir::open_ambient_dir( - str::from_utf8(&[drive, b':', b'\\']).unwrap(), + str::from_utf8(&[b'\\', b'\\', b'?', b'\\', drive, b':', b'\\']).unwrap(), ambient_authority(), ) { wasi.push_dir( @@ -43,6 +43,9 @@ pub fn build(script_path: Option<&Path>) -> WasiCtx { .unwrap(); } } + + wasi.push_dir(Box::new(DeviceDir), PathBuf::from("/mnt/device")) + .unwrap(); } #[cfg(not(windows))] { @@ -128,3 +131,65 @@ impl WasiDir for ReadOnlyDir { self.0.get_path_filestat(path, follow_symlinks).await } } + +#[cfg(windows)] +struct DeviceDir; + +#[cfg(windows)] +#[async_trait::async_trait] +impl WasiDir for DeviceDir { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + async fn open_file( + &self, + symlink_follow: bool, + path: &str, + oflags: OFlags, + read: bool, + write: bool, + fdflags: FdFlags, + ) -> Result { + let (dir, file) = device_path(path)?; + dir.open_file(symlink_follow, file, oflags, read, write, fdflags) + .await + } + + // FIXME: cap-primitives/src/windows/fs/get_path tries to strip `\\?\`, + // which breaks paths that aren't valid without it, such as UNC paths: + // https://github.com/bytecodealliance/cap-std/issues/348 + + async fn read_link(&self, path: &str) -> Result { + let (dir, file) = device_path(path)?; + dir.read_link(file).await + } + + async fn get_path_filestat( + &self, + path: &str, + follow_symlinks: bool, + ) -> Result { + let (dir, file) = device_path(path)?; + dir.get_path_filestat(file, follow_symlinks).await + } +} + +#[cfg(windows)] +fn device_path(path: &str) -> Result<(ReadOnlyDir, &str), wasi_common::Error> { + let (parent, file) = path + .strip_suffix('/') + .unwrap_or(path) + .rsplit_once('/') + .ok_or_else(wasi_common::Error::not_supported)?; + + let parent = wasi_path::to_native(&format!("/mnt/device/{parent}"), true) + .ok_or_else(wasi_common::Error::not_supported)?; + + let dir = wasmtime_wasi::dir::Dir::from_cap_std( + wasmtime_wasi::Dir::open_ambient_dir(parent, ambient_authority()) + .map_err(|_| wasi_common::Error::not_supported())?, + ); + + Ok((ReadOnlyDir(dir), file)) +} diff --git a/crates/livesplit-auto-splitting/src/wasi_path.rs b/crates/livesplit-auto-splitting/src/wasi_path.rs index a3b2c635..fe1b1cc9 100644 --- a/crates/livesplit-auto-splitting/src/wasi_path.rs +++ b/crates/livesplit-auto-splitting/src/wasi_path.rs @@ -1,59 +1,176 @@ //! Translating WASI Paths -use std::path::{Component, Path, PathBuf, Prefix}; +use std::path::{Component, Path, PathBuf}; -/// Translates `original_path` into a path that -/// is accessible through the WASI file system, -/// so a Windows path of `C:\foo\bar.exe` would -/// be returned as `/mnt/c/foo/bar.exe`. +// Windows Paths Documentation: +// https://chrisdenton.github.io/omnipath/ +// https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html +// https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file +// https://stackoverflow.com/a/46019856 +// https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats +// https://medium.com/walmartglobaltech/dos-file-path-magic-tricks-5eda7a7a85fa +// https://reverseengineering.stackexchange.com/a/3799 + +// TL;DR: +// 1. Win32 paths may be in one of various relative forms that depend on hidden +// environment variables, such as `C:\Windows` (absolute on C drive), +// `Windows\System32\user32.dll` (relative to current dir on current drive), +// `C:Windows` (relative to current dir on C drive), or `\Windows` (absolute +// on current drive). There's also `\\server\share` paths that are called UNC +// paths. +// 2. These paths then get converted into a form that's rooted in a device +// (called device path) starting with `\\.\` followed by a device name. UNC +// paths get mapped to `\\.\UNC\server\share`. If a reserved device is part +// of the path, it takes precedence and becomes the device path +// (`C:\some\dir\COM1.txt` -> `\\.\COM1`). +// 3. The paths then get normalized / canonicalized (forward slashes converted, +// .. and . resolved, some spaces and dots get removed) into the `\\?\` form +// (normalized device path). Because this is the step that replaces forward +// slashes by backward slashes, all previous forms mentioned may use forward +// slashes instead. `Path::canonicalize` handles all three steps, meaning a +// path returned by it starts with `\\?\` and skips all these three steps +// when used. +// 4. The `\\?\` form gets passed almost directly to NT, though it gets replaced +// with `\??\`. At this point it is an NT path. The NT path `\??\` matches +// `\GLOBAL??\` where the devices are then looked up. The device names may +// actually be "symbolic links" in the NT object namespace to other devices +// (so a symbolic link from one NT path to another). So for example `C:` is +// actually a symbolic link at `\GLOBAL??\C:` to `\Device\HarddiskVolume1` +// (or any other number). Various other forms of NT paths are also possible, +// but you can't get to them from a Win32 path (except via the device symlink +// called `GLOBALROOT`). The driver is then chosen based on the device that +// it resolves to. +// +// Depending on what kind of Win32 path you have, you may skip some of the steps +// on the way. + +// Notes on Rust's handling of the paths: +// +// Rust does not really treat `\\.\` and `\\?\` the same, which means that +// `\\.\C:\` is parsed as a raw `DeviceNS` prefix, while `\\?\C:\` is parsed as +// a `VerbatimDisk` prefix, special handling is needed to treat them the same. + +/// Translates `original_path` into a path that is accessible through the WASI +/// file system, so a Windows path of `C:\foo\bar.exe` would be returned as +/// `/mnt/c/foo/bar.exe`. The original path should be canonicalized or at least +/// absolute. pub fn from_native(original_path: &Path) -> Option> { - let mut path = String::from("/mnt"); + const BASE: &str = "/mnt"; + let mut path = String::from(BASE); + for component in original_path.components() { if !path.ends_with('/') { path.push('/'); } match component { - Component::Prefix(prefix) => match prefix.kind() { - Prefix::VerbatimDisk(disk) | Prefix::Disk(disk) => { - path.push(disk.to_ascii_lowercase() as char) + Component::Prefix(_prefix) => { + #[cfg(windows)] + use std::path::Prefix; + #[cfg(windows)] + match _prefix.kind() { + Prefix::VerbatimDisk(disk) | Prefix::Disk(disk) => { + path.push(disk.to_ascii_lowercase() as char) + } + Prefix::VerbatimUNC(server, share) | Prefix::UNC(server, share) => { + path.push_str("device/"); + path.push_str("UNC/"); + path.push_str(server.to_str()?); + path.push('/'); + path.push_str(share.to_str()?); + } + Prefix::Verbatim(value) | Prefix::DeviceNS(value) => { + if let [c @ b'A'..=b'Z' | c @ b'a'..=b'z', b':'] = value.as_encoded_bytes() + { + path.push(c.to_ascii_lowercase() as char); + } else { + path.push_str("device/"); + path.push_str(value.to_str()?); + } + } + } + } + Component::Normal(c) => path.push_str(c.to_str()?), + Component::RootDir => + { + #[cfg(windows)] + if path.len() == BASE.len() { + return None; } - _ => return None, - }, - Component::Normal(c) => { - path.push_str(c.to_str()?); } - Component::RootDir => {} Component::CurDir => path.push('.'), Component::ParentDir => path.push_str(".."), } } + Some(path.into_boxed_str()) } -/// Translates from a path accessible through the WASI -/// file system to a path accessible outside that, -/// so a WASI path of `/mnt/c/foo/bar.exe` would -/// be translated on Windows to `C:\foo\bar.exe`. -pub fn to_native(wasi_path_str: &str) -> Option { +/// Translates from a path accessible through the WASI file system to a path +/// accessible outside that, so a WASI path of `/mnt/c/foo/bar.exe` would be +/// translated on Windows to `C:\foo\bar.exe`. If `supports_device_path` is +/// true, then the path will be translated to a path that uses the `\\?\` +/// prefix. The DOS device path syntax allows for longer paths, but not every +/// application may support it. The parameter is ignored on non-Windows +/// platforms. +pub fn to_native(wasi_path_str: &str, supports_device_path: bool) -> Option { let path = wasi_path_str.strip_prefix("/mnt")?; let _after_slash = path.strip_prefix('/')?; #[cfg(windows)] { - let mut path_buf = String::with_capacity(path.len()); - let [c @ b'a'..=b'z', b'/', ..] = _after_slash.as_bytes() else { + // Backslashes would mess up the path, so we don't allow them. + if _after_slash.contains('\\') { return None; + } + + let mut path_buf = String::with_capacity(path.len() + supports_device_path as usize * 4); + if supports_device_path { + path_buf.push_str(r"\\?\"); + } + + let rem = match _after_slash.as_bytes() { + [c @ b'a'..=b'z', b'/', rem @ ..] => { + let drive = c.to_ascii_uppercase(); + path_buf.push(drive as char); + path_buf.push(':'); + rem + } + [b'd', b'e', b'v', b'i', b'c', b'e', b'/', rem @ ..] => { + if supports_device_path { + path_buf.pop(); + rem + } else { + match rem { + [b'U', b'N', b'C', b'/', rem @ ..] => { + path_buf.push('\\'); + rem + } + [c @ b'a'..=b'z' | c @ b'A'..=b'Z', b':', b'/', rem @ ..] => { + let drive = c.to_ascii_uppercase(); + path_buf.push(drive as char); + path_buf.push(':'); + rem + } + _ => return None, + } + } + } + _ => return None, }; - let drive = c.to_ascii_uppercase(); - path_buf.push(drive as char); - path_buf.push(':'); - _after_slash[2..].split('/').for_each(|segment| { + + // SAFETY: We know that the path is valid UTF-8 because it was + // originally a WASI path, which is valid UTF-8 and we split after a + // slash. + let rem = unsafe { std::str::from_utf8_unchecked(rem) }; + rem.split('/').for_each(|segment| { path_buf.push('\\'); path_buf.push_str(segment); }); + Some(path_buf.into()) } #[cfg(not(windows))] { + _ = supports_device_path; Some(PathBuf::from(path)) } } @@ -66,12 +183,43 @@ mod tests { #[test] fn test_windows_to_wasi() { assert_eq!( - from_native(Path::new(r"C:\foo\bar.exe")), - Some(r"/mnt/c/foo/bar.exe".into()) + from_native(Path::new(r"C:\Windows\System32\user32.dll")), + Some(r"/mnt/c/Windows/System32/user32.dll".into()) + ); + assert_eq!( + from_native(Path::new(r"\\?\C:\Windows\System32\user32.dll")), + Some(r"/mnt/c/Windows/System32/user32.dll".into()) + ); + + assert_eq!( + from_native(Path::new(r"C:Windows\System32\user32.dll")), + Some(r"/mnt/c/Windows/System32/user32.dll".into()) + ); + + assert_eq!( + from_native(Path::new(r"\\server\share\bar.exe")), + Some(r"/mnt/device/UNC/server/share/bar.exe".into()) ); assert_eq!( - from_native(Path::new(r"\\?\C:\foo\bar.exe")), - Some(r"/mnt/c/foo/bar.exe".into()) + from_native(Path::new(r"\\?\UNC\server\share\bar.exe")), + Some(r"/mnt/device/UNC/server/share/bar.exe".into()) + ); + + assert_eq!( + from_native(Path::new(r"\\.\C:\Test\Foo.txt")), + Some(r"/mnt/c/Test/Foo.txt".into()) + ); + assert_eq!( + from_native(Path::new( + r"\\.\Volume{b75e2c83-0000-0000-0000-602f00000000}\Test\Foo.txt" + )), + Some(r"/mnt/device/Volume{b75e2c83-0000-0000-0000-602f00000000}/Test/Foo.txt".into()) + ); + assert_eq!( + from_native(Path::new( + r"\\?\Volume{b75e2c83-0000-0000-0000-602f00000000}\Test\Foo.txt" + )), + Some(r"/mnt/device/Volume{b75e2c83-0000-0000-0000-602f00000000}/Test/Foo.txt".into()) ); } @@ -79,8 +227,45 @@ mod tests { #[test] fn test_wasi_to_windows() { assert_eq!( - to_native(r"/mnt/c/foo/bar.exe"), - Some(r"C:\foo\bar.exe".into()) + to_native(r"/mnt/c/Windows/System32/user32.dll", false), + Some(r"C:\Windows\System32\user32.dll".into()) + ); + assert_eq!( + to_native(r"/mnt/c/Windows/System32/user32.dll", true), + Some(r"\\?\C:\Windows\System32\user32.dll".into()) + ); + + assert_eq!( + to_native(r"/mnt/device/UNC/server/share/bar.exe", false), + Some(r"\\server\share\bar.exe".into()) + ); + assert_eq!( + to_native(r"/mnt/device/UNC/server/share/bar.exe", true), + Some(r"\\?\UNC\server\share\bar.exe".into()) + ); + + assert_eq!( + to_native(r"/mnt/device/C:/Windows/System32/user32.dll", false), + Some(r"C:\Windows\System32\user32.dll".into()) + ); + assert_eq!( + to_native(r"/mnt/device/C:/Windows/System32/user32.dll", true), + Some(r"\\?\C:\Windows\System32\user32.dll".into()) + ); + + assert_eq!( + to_native( + r"/mnt/device/Volume{b75e2c83-0000-0000-0000-602f00000000}/Test/Foo.txt", + false + ), + None, + ); + assert_eq!( + to_native( + r"/mnt/device/Volume{b75e2c83-0000-0000-0000-602f00000000}/Test/Foo.txt", + true + ), + Some(r"\\?\Volume{b75e2c83-0000-0000-0000-602f00000000}\Test\Foo.txt".into()), ); } diff --git a/src/auto_splitting/mod.rs b/src/auto_splitting/mod.rs index dcf8cd02..8d4e6bc4 100644 --- a/src/auto_splitting/mod.rs +++ b/src/auto_splitting/mod.rs @@ -534,7 +534,8 @@ //! nothing. //! - The file system is currently almost entirely empty. The host's file system //! is accessible through `/mnt`. It is entirely read-only. Windows paths are -//! mapped to `/mnt/c`, `/mnt/d`, etc. to match WSL. +//! mapped to `/mnt/c`, `/mnt/d`, etc. to match WSL. Additionally +//! `/mnt/device` maps to `\\?\` on Windows to access additional paths. //! - There are no environment variables. //! - There are no command line arguments. //! - There is no networking.