diff --git a/src/linux/init/WSLCInit.cpp b/src/linux/init/WSLCInit.cpp index c85efe79e..ab823e027 100644 --- a/src/linux/init/WSLCInit.cpp +++ b/src/linux/init/WSLCInit.cpp @@ -17,6 +17,7 @@ Module Name: #include "message.h" #include "localhost.h" #include "common.h" +#include "drvfs.h" #include #include #include @@ -652,8 +653,22 @@ void HandleMessageImpl( // Chroot without OverlayFs is not supported — the chroot logic depends on the overlay target path. THROW_ERRNO_IF(EINVAL, WI_IsFlagSet(Message.Flags, WSLC_MOUNT::Chroot) && !WI_IsFlagSet(Message.Flags, WSLC_MOUNT::OverlayFs)); - auto type = readField(Message.TypeIndex); - THROW_LAST_ERROR_IF(UtilMount(source, target, type, options.MountFlags, options.StringOptions.c_str(), c_defaultRetryTimeout) < 0); + const char* type = readField(Message.TypeIndex); + const char* subname = readField(Message.SubnameIndex); + if (*subname != '\0' && strcmp(type, "virtiofs") == 0) + { + // + // Windows-folder share routed through the single aggregate + // virtio-fs device: bind-mount the child identified by Subname + // instead of mounting a dedicated device. The helper re-parses + // the raw option string itself, so pass it through unparsed. + // + THROW_LAST_ERROR_IF(MountVirtioFsAggregateChild(source, subname, target, readField(Message.OptionsIndex)) < 0); + } + else + { + THROW_LAST_ERROR_IF(UtilMount(source, target, type, options.MountFlags, options.StringOptions.c_str(), c_defaultRetryTimeout) < 0); + } // Workaround for a Linux bug where virtiofs permissions aren't properly propagated when an overlay is mounted on top of a virtiofs share before the permissions have been fetched. // TODO: Remove once fixed upstream. diff --git a/src/linux/init/config.cpp b/src/linux/init/config.cpp index 3d3b59bfd..cd2e8fa83 100644 --- a/src/linux/init/config.cpp +++ b/src/linux/init/config.cpp @@ -1706,10 +1706,23 @@ Return Value: while (MountEnum.Next()) { // - // Do not consider bind mounts. + // Skip non-root mounts (bind mounts), with one exception: aggregate + // virtio-fs shares are always bind-mounts from the aggregate device's + // synthetic-root child, so virtiofs entries with Root != "/" are + // legitimate drvfs mounts whose subname is encoded in Root. // - if (strcmp(MountEnum.Current().Root, "/") != 0) + const bool IsVirtioFs = (strcmp(MountEnum.Current().FileSystemType, VIRTIO_FS_TYPE) == 0); + if (!IsVirtioFs && strcmp(MountEnum.Current().Root, "/") != 0) + { + continue; + } + + // + // The aggregate-device root mounts under VIRTIOFS_AGGREGATE_ROOT_DIR + // are internal infrastructure (not user-visible drvfs targets); skip. + // + if (IsVirtioFs && wsl::shared::string::StartsWith(MountEnum.Current().MountPoint, VIRTIOFS_AGGREGATE_ROOT_DIR "/")) { continue; } @@ -1732,9 +1745,15 @@ Return Value: MountSource = MountEnum.Current().Source; UtilCanonicalisePathSeparator(MountSource, PATH_SEP_NT); } - else if (strcmp(MountEnum.Current().FileSystemType, VIRTIO_FS_TYPE) == 0) + else if (IsVirtioFs) { - MountSource = QueryVirtiofsMountSource(MountEnum.Current().Source); + // + // For aggregate shares, derive the subname from Root (strip leading "/"). + // Legacy direct-mount shares have Root == "/" → empty subname. + // + const char* Root = MountEnum.Current().Root; + const char* Subname = (Root && Root[0] == '/') ? Root + 1 : (Root ? Root : ""); + MountSource = QueryVirtiofsMountSource(MountEnum.Current().Source, Subname); } else { @@ -2327,10 +2346,26 @@ try // // Bind mounts which have a root other than / are currently not supported. // - // TODO_LX: Support bind mounts. + // + // Skip non-root mounts (bind mounts), except for aggregate virtio-fs + // shares which are themselves bind-mounts from the aggregate device's + // synthetic-root child. For those, Root encodes "/". + // + // TODO_LX: Support arbitrary bind mounts. // - if (strcmp(MountEntry.Root, "/") != 0) + const bool IsVirtioFs = (strcmp(MountEntry.FileSystemType, VIRTIO_FS_TYPE) == 0); + if (!IsVirtioFs && strcmp(MountEntry.Root, "/") != 0) + { + continue; + } + + // + // The aggregate-device root mounts under VIRTIOFS_AGGREGATE_ROOT_DIR + // are internal infrastructure (not user-visible drvfs targets); skip + // them so they're not torn down and remounted incorrectly. + // + if (IsVirtioFs && wsl::shared::string::StartsWith(MountEntry.MountPoint, VIRTIOFS_AGGREGATE_ROOT_DIR "/")) { continue; } @@ -2447,7 +2482,13 @@ try } else if (strcmp(MountEntry.FileSystemType, VIRTIO_FS_TYPE) == 0) { - RemountVirtioFs(MountEntry.Source, MountEntry.MountPoint, MountEntry.MountOptions, Message->Admin); + // + // Derive aggregate-child subname from Root ("/", or "/" + // for legacy direct-mount shares). + // + const char* Root = MountEntry.Root; + const char* Subname = (Root && Root[0] == '/') ? Root + 1 : (Root ? Root : ""); + RemountVirtioFs(MountEntry.Source, Subname, MountEntry.MountPoint, MountEntry.MountOptions, Message->Admin); } else { diff --git a/src/linux/init/drvfs.cpp b/src/linux/init/drvfs.cpp index 128157e57..1e4ed8a3f 100644 --- a/src/linux/init/drvfs.cpp +++ b/src/linux/init/drvfs.cpp @@ -20,9 +20,12 @@ Module Name: #include "drvfs.h" #include "config.h" #include "message.h" +#include #include +#include #include #include +#include using namespace std::chrono_literals; @@ -44,19 +47,23 @@ int MountFilesystem(const char* FsType, const char* Source, const char* Target, int MountWithRetry(const char* Source, const char* Target, const char* FsType, const char* Options, int* ExitCode = nullptr); -void SaveVirtiofsTagMapping(const char* Tag, const char* Source) +void SaveVirtiofsTagMapping(const char* Tag, const char* Subname, const char* Source) /*++ Routine Description: - This routine creates a symlink in VIRTIOFS_TAG_DIR that maps a virtiofs tag - to its Windows mount source path. This allows QueryVirtiofsMountSource to - resolve tags without talking to the service. + This routine creates a symlink in VIRTIOFS_TAG_DIR that maps a virtiofs + (aggregate) tag and child subname to its Windows mount source path. + This allows QueryVirtiofsMountSource to resolve tags without talking to + the service. Arguments: - Tag - Supplies the virtiofs tag. + Tag - Supplies the virtiofs (aggregate) tag. + + Subname - Supplies the child subname inside the aggregate's synthetic + root. Empty for legacy direct-mount shares. Source - Supplies the Windows path the tag refers to. @@ -78,6 +85,23 @@ Return Value: return; } + // + // Validate the subname (when provided) is 32 lowercase hex chars so + // we can use it as a path component without escaping concerns. + // + + const bool hasSubname = Subname && *Subname != '\0'; + if (hasSubname) + { + std::string_view sv{Subname}; + if (sv.size() != 32 || + !std::all_of(sv.begin(), sv.end(), [](char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); })) + { + LOG_WARNING("Invalid virtiofs subname {}", Subname); + return; + } + } + // // Canonicalize path separators to backslashes before persisting. // @@ -87,7 +111,21 @@ Return Value: UtilMkdirPath(VIRTIOFS_TAG_DIR, 0755); - auto LinkPath = std::format("{}/{}", VIRTIOFS_TAG_DIR, Tag); + // + // For aggregate shares, the layout is VIRTIOFS_TAG_DIR//; + // for legacy direct-mount shares, it stays VIRTIOFS_TAG_DIR/. + // + std::string LinkPath; + if (hasSubname) + { + const auto TagDir = std::format("{}/{}", VIRTIOFS_TAG_DIR, Tag); + UtilMkdirPath(TagDir.c_str(), 0755); + LinkPath = std::format("{}/{}", TagDir, Subname); + } + else + { + LinkPath = std::format("{}/{}", VIRTIOFS_TAG_DIR, Tag); + } // // Remove any existing symlink for this tag before creating a new one. @@ -543,6 +581,181 @@ try } CATCH_RETURN_ERRNO() +int MountVirtioFs(const char* Source, const char* Target, const char* Options, std::optional Admin, const wsl::linux::WslDistributionConfig& Config, int* ExitCode); +int RemountVirtioFs(const char* Tag, const char* Subname, const char* Target, const char* Options, bool Admin); + +namespace { + +// +// Check whether the aggregate device for Tag is already mounted at +// VIRTIOFS_AGGREGATE_ROOT_DIR/ by scanning /proc/self/mountinfo. +// +bool IsAggregateRootMounted(const char* Tag, const std::string& Target) +{ + try + { + mountutil::MountEnum MountEnum; + while (MountEnum.Next()) + { + const auto& Entry = MountEnum.Current(); + if (strcmp(Entry.FileSystemType, VIRTIO_FS_TYPE) == 0 && strcmp(Entry.Source, Tag) == 0 && Target == Entry.MountPoint) + { + return true; + } + } + } + catch (...) + { + LOG_CAUGHT_EXCEPTION(); + } + + return false; +} + +// +// Ensure the aggregate virtiofs device for Tag is mounted at the +// well-known root VIRTIOFS_AGGREGATE_ROOT_DIR/. The synthetic-root +// FUSE directory exposes one child entry per share registered against +// this tag. Idempotent across processes via a /proc/self/mountinfo scan +// instead of any in-memory state. +// +int EnsureAggregateRootMounted(const char* Tag) +try +{ + // + // Validate the tag is a GUID; it becomes a path component. + // + + const auto Guid = wsl::shared::string::ToGuid(Tag); + if (!Guid) + { + LOG_ERROR("Invalid virtiofs aggregate tag {}", Tag); + errno = EINVAL; + return -1; + } + + UtilMkdirPath(VIRTIOFS_AGGREGATE_ROOT_DIR, 0755); + const auto Target = std::format("{}/{}", VIRTIOFS_AGGREGATE_ROOT_DIR, Tag); + UtilMkdirPath(Target.c_str(), 0755); + + if (IsAggregateRootMounted(Tag, Target)) + { + return 0; + } + + if (MountWithRetry(Tag, Target.c_str(), VIRTIO_FS_TYPE, "") < 0) + { + // + // Another thread may have raced us to the mount; recheck before + // failing to keep mount idempotent. + // + if (IsAggregateRootMounted(Tag, Target)) + { + return 0; + } + return -1; + } + + return 0; +} +CATCH_RETURN_ERRNO() + +} // namespace + +// +// Bind-mount an aggregate child onto the user's requested target, +// then apply MountOptions (if any) via a remount. Bounded retry around +// the initial bind absorbs the brief lag between AddSharePath returning +// on the host and the FUSE LOOKUP on the synthetic root resolving the +// new child name. +// +int MountVirtioFsAggregateChild(const char* Tag, const char* Subname, const char* Target, const char* MountOptions) +try +{ + if (EnsureAggregateRootMounted(Tag) < 0) + { + return -1; + } + + // + // Validate Subname is 32 lowercase hex chars before using it as a + // path component, to guard against path traversal from untrusted + // message input. + // + + if (Subname == nullptr) + { + errno = EINVAL; + return -1; + } + + std::string_view SubnameView{Subname}; + if (SubnameView.size() != 32 || !std::all_of(SubnameView.begin(), SubnameView.end(), [](char c) { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); + })) + { + LOG_ERROR("Invalid virtiofs aggregate subname {}", Subname); + errno = EINVAL; + return -1; + } + + const auto BindSource = std::format("{}/{}/{}", VIRTIOFS_AGGREGATE_ROOT_DIR, Tag, Subname); + + // + // Ensure target exists; bind-mount the child. + // + if (UtilMkdirPath(Target, 0755) < 0) + { + return -1; + } + + constexpr int c_maxAttempts = 5; + constexpr auto c_retryDelay = std::chrono::milliseconds{50}; + int LastErrno = 0; + for (int attempt = 0; attempt < c_maxAttempts; ++attempt) + { + if (mount(BindSource.c_str(), Target, nullptr, MS_BIND, nullptr) == 0) + { + LastErrno = 0; + break; + } + + LastErrno = errno; + if (LastErrno != ENOENT) + { + break; + } + + std::this_thread::sleep_for(c_retryDelay); + } + + if (LastErrno != 0) + { + errno = LastErrno; + LOG_ERROR("bind-mount {} -> {} failed {}", BindSource, Target, LastErrno); + return -1; + } + + // + // Apply caller-supplied mount options via remount-bind, if any. + // + if (MountOptions != nullptr && *MountOptions != '\0') + { + auto Parsed = mountutil::MountParseFlags(MountOptions); + if (mount(nullptr, Target, nullptr, MS_REMOUNT | MS_BIND | Parsed.MountFlags, Parsed.StringOptions.c_str()) < 0) + { + const int RemountErrno = errno; + LOG_ERROR("remount-bind {} flags={:#x} options=\"{}\" failed {}", Target, Parsed.MountFlags, Parsed.StringOptions, RemountErrno); + umount2(Target, MNT_DETACH); + errno = RemountErrno; + return -1; + } + } + + return 0; +} +CATCH_RETURN_ERRNO() + int MountVirtioFs(const char* Source, const char* Target, const char* Options, std::optional Admin, const wsl::linux::WslDistributionConfig& Config, int* ExitCode) /*++ @@ -621,12 +834,41 @@ try } // - // Perform the mount operation. + // Perform the mount operation. For aggregate shares the response + // carries a non-empty Subname identifying the child entry inside the + // device's synthetic root; bind-mount that child onto the user's + // requested target. Legacy direct-mount responses (empty Subname) + // still mount the tag directly. // auto* Tag = wsl::shared::string::FromSpan(ResponseSpan, Response.TagOffset); auto* ResponseSource = wsl::shared::string::FromSpan(ResponseSpan, Response.SourceOffset); - THROW_LAST_ERROR_IF(MountWithRetry(Tag, Target, VIRTIO_FS_TYPE, MountOptions.c_str(), ExitCode) < 0); + + // + // SubnameOffset is an appended trailing field. Two guards are required: + // 1. The message must be large enough to contain it. + // 2. The decoded offset itself must land within the message payload. + // If either fails, treat as an empty Subname (legacy direct-mount). This + // protects new-init talking to an old service whose wire payload happens + // to be large enough to satisfy the SocketChannel sizeof(TMessage) check + // but whose SubnameOffset slot overlaps stale buffer bytes (garbage). + // + + const char* Subname = ""; + if (Response.Header.MessageSize >= offsetof(LX_INIT_ADD_VIRTIOFS_SHARE_RESPONSE_MESSAGE, SubnameOffset) + sizeof(unsigned int) && + Response.SubnameOffset >= sizeof(LX_INIT_ADD_VIRTIOFS_SHARE_RESPONSE_MESSAGE) && Response.SubnameOffset < Response.Header.MessageSize) + { + Subname = wsl::shared::string::FromSpan(ResponseSpan, Response.SubnameOffset); + } + + if (Subname != nullptr && *Subname != '\0') + { + THROW_LAST_ERROR_IF(MountVirtioFsAggregateChild(Tag, Subname, Target, MountOptions.c_str()) < 0); + } + else + { + THROW_LAST_ERROR_IF(MountWithRetry(Tag, Target, VIRTIO_FS_TYPE, MountOptions.c_str(), ExitCode) < 0); + } // // Save the tag mapping. @@ -634,13 +876,18 @@ try // N.B. Use the source path from the response since the service canonicalizes it. // - SaveVirtiofsTagMapping(Tag, ResponseSource); + SaveVirtiofsTagMapping(Tag, Subname, ResponseSource); + + if (ExitCode) + { + *ExitCode = 0; + } return 0; } CATCH_RETURN_ERRNO() -int RemountVirtioFs(const char* Tag, const char* Target, const char* Options, bool Admin) +int RemountVirtioFs(const char* Tag, const char* Subname, const char* Target, const char* Options, bool Admin) /*++ @@ -651,7 +898,10 @@ Routine Description: Arguments: - Tag - Supplies the virtiofs tag to remount. + Tag - Supplies the (aggregate) virtiofs tag to remount. + + Subname - Supplies the child subname inside the aggregate's synthetic + root, or an empty string for legacy direct-mount shares. Target - Supplies the mount target. @@ -672,6 +922,7 @@ try wsl::shared::MessageWriter RemountShare(LxInitMessageRemountVirtioFsDevice); RemountShare->Admin = Admin; RemountShare.WriteString(RemountShare->TagOffset, Tag); + RemountShare.WriteString(RemountShare->SubnameOffset, Subname ? Subname : ""); // // Connect to the host and send the remount request. @@ -693,26 +944,44 @@ try auto* NewTag = wsl::shared::string::FromSpan(ResponseSpan, Response.TagOffset); auto* Source = wsl::shared::string::FromSpan(ResponseSpan, Response.SourceOffset); - THROW_LAST_ERROR_IF(MountWithRetry(NewTag, Target, VIRTIO_FS_TYPE, Options) < 0); + const char* NewSubname = ""; + if (Response.Header.MessageSize >= offsetof(LX_INIT_ADD_VIRTIOFS_SHARE_RESPONSE_MESSAGE, SubnameOffset) + sizeof(unsigned int) && + Response.SubnameOffset >= sizeof(LX_INIT_ADD_VIRTIOFS_SHARE_RESPONSE_MESSAGE) && Response.SubnameOffset < Response.Header.MessageSize) + { + NewSubname = wsl::shared::string::FromSpan(ResponseSpan, Response.SubnameOffset); + } + + if (NewSubname != nullptr && *NewSubname != '\0') + { + THROW_LAST_ERROR_IF(MountVirtioFsAggregateChild(NewTag, NewSubname, Target, Options) < 0); + } + else + { + THROW_LAST_ERROR_IF(MountWithRetry(NewTag, Target, VIRTIO_FS_TYPE, Options) < 0); + } - SaveVirtiofsTagMapping(NewTag, Source); + SaveVirtiofsTagMapping(NewTag, NewSubname, Source); return 0; } CATCH_RETURN_ERRNO() -std::string QueryVirtiofsMountSource(const char* Tag) +std::string QueryVirtiofsMountSource(const char* Tag, const char* Subname) /*++ Routine Description: - This routine takes a virtiofs tag and determines the Windows path it refers to - by reading the symlink created during mount. + This routine takes a virtiofs tag and (optional) child subname and + determines the Windows path it refers to by reading the symlink + created during mount. Arguments: - Tag - Supplies the virtiofs tag to query. + Tag - Supplies the virtiofs (aggregate) tag to query. + + Subname - Supplies the child subname inside the aggregate's synthetic + root. Empty for legacy direct-mount shares. Return Value: @@ -738,11 +1007,43 @@ try } // - // Read the symlink that maps this tag to its Windows source path. + // Read the symlink that maps this tag (and optional subname) to its + // Windows source path. Aggregate shares live nested under the tag + // directory; legacy shares are direct children of VIRTIOFS_TAG_DIR. + // + + std::string LinkPath; + if (Subname != nullptr && *Subname != '\0') + { + std::string_view sv{Subname}; + if (sv.size() != 32 || + !std::all_of(sv.begin(), sv.end(), [](char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); })) + { + return {}; + } + LinkPath = std::format("{}/{}/{}", VIRTIOFS_TAG_DIR, Tag, Subname); + } + else + { + LinkPath = std::format("{}/{}", VIRTIOFS_TAG_DIR, Tag); + } + + // + // Use the error-code overload to avoid throwing for the common case + // where this path is the aggregate device's root directory (which + // contains per-subname symlinks for its children, but is not itself + // a symlink). A legacy direct-mount share IS a symlink here, so the + // read succeeds; the aggregate device's raw root mount returns + // empty silently, letting the caller skip it. // - auto LinkPath = std::format("{}/{}", VIRTIOFS_TAG_DIR, Tag); - return std::filesystem::read_symlink(LinkPath).string(); + std::error_code ec; + auto target = std::filesystem::read_symlink(LinkPath, ec); + if (ec) + { + return {}; + } + return target.string(); } catch (...) { diff --git a/src/linux/init/drvfs.h b/src/linux/init/drvfs.h index 682e08d1f..619ef5b45 100644 --- a/src/linux/init/drvfs.h +++ b/src/linux/init/drvfs.h @@ -19,6 +19,14 @@ Module Name: #define DRVFS_FS_TYPE "drvfs" #define MOUNT_DRVFS_NAME "mount.drvfs" +// +// Directory under which aggregate virtio-fs devices are mounted by their +// tag (one subdirectory per tag). Aggregate child bind-mounts source from +// VIRTIOFS_AGGREGATE_ROOT_DIR//. Exposed here so that other +// init components can filter these internal mounts out of drvfs enumeration. +// +#define VIRTIOFS_AGGREGATE_ROOT_DIR "/run/wsl/virtiofs-root" + int MountDrvfs(const char* Source, const char* Target, const char* Options, std::optional Admin, const wsl::linux::WslDistributionConfig& Config, int* ExitCode = nullptr); int MountDrvfsEntry(int Argc, char* Argv[]); @@ -29,6 +37,24 @@ int MountPlan9(const char* Source, const char* Target, const char* Options, std: int MountVirtioFs(const char* Source, const char* Target, const char* Options, std::optional Admin, const wsl::linux::WslDistributionConfig& Config, int* ExitCode = nullptr); -int RemountVirtioFs(const char* Tag, const char* Target, const char* Options, bool Admin); - -std::string QueryVirtiofsMountSource(const char* Tag); +int RemountVirtioFs(const char* Tag, const char* Subname, const char* Target, const char* Options, bool Admin); + +// +// Look up the Windows path mounted by a virtio-fs share. +// +// Tag identifies the (aggregate) virtio-fs device tag and Subname +// identifies the child entry inside the device's synthetic root. +// Subname must be empty for legacy direct-mount shares. +// +std::string QueryVirtiofsMountSource(const char* Tag, const char* Subname = ""); + +// +// Bind-mount one child of an aggregate virtio-fs device onto Target. +// +// Tag is the aggregate device tag (GUID formatted) and Subname is the +// child entry inside the device's synthetic root (32 lowercase hex +// chars). Ensures the device is mounted at VIRTIOFS_AGGREGATE_ROOT_DIR/ +// , then bind-mounts / onto Target and applies +// MountOptions. Used by both drvfs and the WSLc Windows-folder path. +// +int MountVirtioFsAggregateChild(const char* Tag, const char* Subname, const char* Target, const char* MountOptions); diff --git a/src/linux/init/util.cpp b/src/linux/init/util.cpp index 962e426a2..f269aac16 100644 --- a/src/linux/init/util.cpp +++ b/src/linux/init/util.cpp @@ -923,13 +923,30 @@ try } else if (strcmp(MountEnum.Current().FileSystemType, VIRTIO_FS_TYPE) == 0) { - MountSource = QueryVirtiofsMountSource(MountEnum.Current().Source); + // + // For aggregate virtio-fs shares the user-visible mount is a + // bind from the aggregate device's child entry, with Root set + // to "/". Strip the leading "/" to recover the + // subname; "/" itself means a legacy direct-mount share. + // + // After QueryVirtiofsMountSource resolves to the Windows path, + // overwrite Root to "/" so the bind-mount concatenation below + // does not append "/" onto the Windows source path. + // + const char* Root = MountEnum.Current().Root; + const char* Subname = (Root && Root[0] == '/') ? Root + 1 : (Root ? Root : ""); + MountSource = QueryVirtiofsMountSource(MountEnum.Current().Source, Subname); if (MountSource.empty()) { continue; } MountEnum.Current().Source = MountSource.data(); + if (Subname != nullptr && *Subname != '\0') + { + MountEnum.Current().Root[0] = '/'; + MountEnum.Current().Root[1] = '\0'; + } } else if (strcmp(MountEnum.Current().FileSystemType, DRVFS_FS_TYPE) == 0) { diff --git a/src/shared/inc/lxinitshared.h b/src/shared/inc/lxinitshared.h index 9c9e3fde9..d5af396a4 100644 --- a/src/shared/inc/lxinitshared.h +++ b/src/shared/inc/lxinitshared.h @@ -1156,9 +1156,17 @@ typedef struct _LX_INIT_ADD_VIRTIOFS_SHARE_RESPONSE_MESSAGE int Result; unsigned int TagOffset; unsigned int SourceOffset; + // + // Offset of the per-share "subname" (child name within an aggregate + // virtio-fs root) inside Buffer. Empty for legacy direct-mount + // shares. Appended for wire compatibility: consumers MUST size-gate + // Header.MessageSize >= offsetof(SubnameOffset) + sizeof(unsigned int) + // before reading this field. + // + unsigned int SubnameOffset; char Buffer[]; - PRETTY_PRINT(FIELD(Header), FIELD(Result), STRING_FIELD(TagOffset), STRING_FIELD(SourceOffset)); + PRETTY_PRINT(FIELD(Header), FIELD(Result), STRING_FIELD(TagOffset), STRING_FIELD(SourceOffset), STRING_FIELD(SubnameOffset)); } LX_INIT_ADD_VIRTIOFS_SHARE_RESPONSE_MESSAGE, *PLX_INIT_ADD_VIRTIOFS_SHARE_RESPONSE_MESSAGE; typedef struct _LX_INIT_ADD_VIRTIOFS_SHARE_MESSAGE @@ -1183,9 +1191,17 @@ typedef struct _LX_INIT_REMOUNT_VIRTIOFS_SHARE_MESSAGE MESSAGE_HEADER Header; bool Admin; unsigned int TagOffset; + // + // Offset of the original child "subname" inside Buffer. Empty when + // the share was a legacy direct-mount virtiofs. Appended for wire + // compatibility: consumers MUST size-gate + // Header.MessageSize >= offsetof(SubnameOffset) + sizeof(unsigned int) + // before reading this field. + // + unsigned int SubnameOffset; char Buffer[]; - PRETTY_PRINT(FIELD(Header), FIELD(Admin), STRING_FIELD(TagOffset)); + PRETTY_PRINT(FIELD(Header), FIELD(Admin), STRING_FIELD(TagOffset), STRING_FIELD(SubnameOffset)); } LX_INIT_REMOUNT_VIRTIOFS_SHARE_MESSAGE, *PLX_INIT_REMOUNT_VIRTIOFS_SHARE_MESSAGE; // @@ -1594,6 +1610,26 @@ struct WSLC_MOUNT_RESULT PRETTY_PRINT(FIELD(Header), FIELD(Result)); }; +// +// Fixed tags identifying the aggregate virtio-fs devices used to share +// Windows folders into a WSLc container. Unlike drvfs (which mints a +// random per-VM tag and returns it to its caller), WSLc has no return +// channel for AddShare, so host and guest agree on these compile-time +// constants. Each WSLc VM is a distinct guest, so there is no cross-VM +// collision on VIRTIOFS_AGGREGATE_ROOT_DIR/. Per-share entries are +// addressed by a subname derived independently on both sides from the +// share's GUID (see GuidToHexString). +// +// There are two aggregates because the device host enforces a single +// readonly setting per aggregate device: read-write shares go in the +// read-write aggregate and read-only shares go in the read-only aggregate, +// whose virtio-fs backend rejects writes (EROFS) regardless of the guest's +// mount options. The guest selects the matching tag from the share's +// read-only flag, so no return channel is needed. +// +inline constexpr GUID c_wslcVirtioFsAggregateTag = {0x3f2a7c84, 0x1b6d, 0x4e29, {0x9a, 0x55, 0x6c, 0x0d, 0x8e, 0x1f, 0x2b, 0x73}}; +inline constexpr GUID c_wslcVirtioFsAggregateReadOnlyTag = {0xa43bebe3, 0xd067, 0x4e0b, {0x96, 0xc8, 0xa2, 0x4e, 0x33, 0xc4, 0x15, 0x6e}}; + struct WSLC_MOUNT { static inline auto Type = LxMessageWSLCMount; @@ -1607,6 +1643,7 @@ struct WSLC_MOUNT unsigned int TypeIndex{}; unsigned int OptionsIndex{}; unsigned int Flags{}; + unsigned int SubnameIndex{}; enum MountType : uint8_t { @@ -1619,7 +1656,7 @@ struct WSLC_MOUNT char Buffer[]; - PRETTY_PRINT(FIELD(Header), STRING_FIELD(SourceIndex), STRING_FIELD(DestinationIndex), STRING_FIELD(TypeIndex), STRING_FIELD(OptionsIndex)); + PRETTY_PRINT(FIELD(Header), STRING_FIELD(SourceIndex), STRING_FIELD(DestinationIndex), STRING_FIELD(TypeIndex), STRING_FIELD(OptionsIndex), STRING_FIELD(SubnameIndex)); }; struct WSLC_EXEC diff --git a/src/shared/inc/stringshared.h b/src/shared/inc/stringshared.h index 978bd9cbd..b4e38fd3a 100644 --- a/src/shared/inc/stringshared.h +++ b/src/shared/inc/stringshared.h @@ -13,6 +13,7 @@ Module Name: --*/ #pragma once +#include #include #include #include @@ -492,6 +493,21 @@ inline std::basic_string GuidToString(const GUID& guid, GuidToStringFlags return output; } +// +// Format a GUID as 32 lowercase hexadecimal characters (no braces or +// dashes). Used to derive a virtio-fs aggregate subname that is safe to +// use directly as a path component. Both host and guest format the same +// GUID through this helper to agree on the subname without transmitting +// it. +// +template +inline std::basic_string GuidToHexString(const GUID& guid) +{ + auto output = GuidToString(guid, GuidToStringFlags::None); + output.erase(std::remove(output.begin(), output.end(), static_cast('-')), output.end()); + return output; +} + template inline std::optional ToGuid(const TChar* string, std::optional length = {}) { diff --git a/src/windows/common/GuestDeviceManager.cpp b/src/windows/common/GuestDeviceManager.cpp index aec0231c0..72bd72902 100644 --- a/src/windows/common/GuestDeviceManager.cpp +++ b/src/windows/common/GuestDeviceManager.cpp @@ -69,6 +69,37 @@ GUID GuestDeviceManager::AddNewDevice(_In_ const GUID& deviceId, _In_ const wil: return m_deviceHostSupport->AddNewDevice(deviceId, server, tag); } +_Requires_lock_not_held_(m_lock) +void GuestDeviceManager::ExtendVirtioFsAggregate( + _In_ const GUID& ImplementationClsid, _In_ PCWSTR AccessName, _In_opt_ PCWSTR Options, _In_ PCWSTR Path, _In_ HANDLE UserToken) +{ + auto guestDeviceLock = m_lock.lock_exclusive(); + + // Options are appended to the name with a semi-colon separator. + // "name;subname=;key1=value1;key2=value2" + // N.B. A ";vm_id=" option is always appended so the device host can + // identify the owning VM. This mirrors AddHdvShareWithOptions and is + // required because device-host control configs (e.g. ";swiotlb=...", + // forwarded here for aggregate children) are rejected without it. + std::wstring nameWithOptions{AccessName}; + if (ARGUMENT_PRESENT(Options) && Options[0] != L'\0') + { + nameWithOptions += L";"; + nameWithOptions += Options; + } + + nameWithOptions += std::format(L";vm_id={}", m_machineId); + + auto revert = wil::impersonate_token(UserToken); + + // The aggregate must already exist; the original AddNewDevice call for + // this AccessName populated the COM server. + auto server = GetRemoteFileSystem(ImplementationClsid, c_defaultDeviceTag); + THROW_HR_IF_NULL(E_UNEXPECTED, server.get()); + + THROW_IF_FAILED(server->AddSharePath(nameWithOptions.c_str(), Path, VIRTIO_FS_FLAGS_TYPE_AGGREGATE)); +} + void GuestDeviceManager::AddRemoteFileSystem(_In_ REFCLSID clsid, _In_ PCWSTR tag, _In_ const wil::com_ptr& server) { m_deviceHostSupport->AddRemoteFileSystem(clsid, tag, server); diff --git a/src/windows/common/GuestDeviceManager.h b/src/windows/common/GuestDeviceManager.h index 41ef10df5..ff9a20503 100644 --- a/src/windows/common/GuestDeviceManager.h +++ b/src/windows/common/GuestDeviceManager.h @@ -7,6 +7,7 @@ // Flags for virtiofs vdev device creation. #define VIRTIO_FS_FLAGS_TYPE_FILES 0x8000 #define VIRTIO_FS_FLAGS_TYPE_SECTIONS 0x4000 +#define VIRTIO_FS_FLAGS_TYPE_AGGREGATE 0x2000 inline const std::wstring c_defaultDeviceTag = L"default"; @@ -43,6 +44,15 @@ class GuestDeviceManager _Requires_lock_not_held_(m_lock) GUID AddNewDevice(_In_ const GUID& deviceId, _In_ const wil::com_ptr& server, _In_ PCWSTR tag); + // + // Add a new child to an existing aggregate virtiofs device. Routes + // an AddSharePath call (with the VIRTIO_FS_FLAGS_TYPE_AGGREGATE flag + // set) to the same wsldevicehost COM server that hosts the existing + // aggregate tag, without creating a new PCI device. + // + _Requires_lock_not_held_(m_lock) + void ExtendVirtioFsAggregate(_In_ const GUID& ImplementationClsid, _In_ PCWSTR AccessName, _In_opt_ PCWSTR Options, _In_ PCWSTR Path, _In_ HANDLE UserToken); + void AddRemoteFileSystem(_In_ REFCLSID clsid, _In_ PCWSTR tag, _In_ const wil::com_ptr& server); void AddSharedMemoryDevice(_In_ const GUID& ImplementationClsid, _In_ PCWSTR Tag, _In_ PCWSTR Path, _In_ UINT32 SizeMb, _In_ HANDLE UserToken); diff --git a/src/windows/service/exe/HcsVirtualMachine.cpp b/src/windows/service/exe/HcsVirtualMachine.cpp index c512eabb3..73dd8f46b 100644 --- a/src/windows/service/exe/HcsVirtualMachine.cpp +++ b/src/windows/service/exe/HcsVirtualMachine.cpp @@ -584,25 +584,61 @@ try } else { - std::wstring options = ReadOnly ? L"ro" : L""; - if (!m_swiotlbOption.empty()) + // + // Share Windows folders through aggregate virtio-fs devices instead + // of one PCI device per share. Each share becomes a child of an + // aggregate's synthetic root, addressed by a subname derived from the + // share GUID (see GuidToHexString). The aggregate tags are the fixed + // compile-time constants c_wslcVirtioFsAggregateTag (read-write) and + // c_wslcVirtioFsAggregateReadOnlyTag (read-only) so the guest can + // agree on the device name without AddShare needing a return channel. + // + // ReadOnly is enforced host-side: read-only shares are placed in a + // separate read-only aggregate whose virtio-fs backend rejects writes + // (EROFS) regardless of the guest's mount options. The device host + // requires all children of one aggregate to share the same readonly + // setting, so read-only and read-write shares cannot share a device. + // + const auto subname = wsl::shared::string::GuidToHexString(shareIdLocal); + auto options = L"subname=" + subname; + if (ReadOnly) { - if (!options.empty()) - { - options += L";"; - } + options += L";ro"; + } + // Append the swiotlb token so the aggregate device gets the swiotlb pool reservation. + // The options string always begins with "subname=", so it is never empty here. + if (!m_swiotlbOption.empty()) + { + options += L";"; options += m_swiotlbOption; } - it->second = m_guestDeviceManager->AddGuestDevice( - VIRTIO_FS_DEVICE_ID, - m_virtioFsClassId, - shareName.c_str(), - options.c_str(), - WindowsPath, - VIRTIO_FS_FLAGS_TYPE_FILES, - m_userToken.get()); + const auto& aggregateTag = ReadOnly ? c_wslcVirtioFsAggregateReadOnlyTag : c_wslcVirtioFsAggregateTag; + const auto tag = wsl::shared::string::GuidToString(aggregateTag, wsl::shared::string::None); + + bool& aggregateCreated = ReadOnly ? m_virtioFsAggregateReadOnlyCreated : m_virtioFsAggregateCreated; + GUID& aggregateDevice = ReadOnly ? m_virtioFsAggregateReadOnlyDevice : m_virtioFsAggregateDevice; + + if (!aggregateCreated) + { + aggregateDevice = m_guestDeviceManager->AddGuestDevice( + VIRTIO_FS_DEVICE_ID, + m_virtioFsClassId, + tag.c_str(), + options.c_str(), + WindowsPath, + VIRTIO_FS_FLAGS_TYPE_AGGREGATE, + m_userToken.get()); + aggregateCreated = true; + } + else + { + m_guestDeviceManager->ExtendVirtioFsAggregate( + m_virtioFsClassId, tag.c_str(), options.c_str(), WindowsPath, m_userToken.get()); + } + + it->second = aggregateDevice; } cleanup.release(); @@ -627,7 +663,14 @@ try } else { - m_guestDeviceManager->RemoveGuestDevice(VIRTIO_FS_DEVICE_ID, it->second.value()); + // + // Virtio-fs shares are children of a single aggregate device that + // is torn down with the VM. Removing the guest device here would + // destroy the device shared by every other Windows-folder share, + // so aggregate children are append-only: just drop the bookkeeping + // entry. (The WSLc consumer never calls RemoveShare in virtio-fs + // mode.) + // } m_shares.erase(it); diff --git a/src/windows/service/exe/HcsVirtualMachine.h b/src/windows/service/exe/HcsVirtualMachine.h index b8eba9490..0d7e010eb 100644 --- a/src/windows/service/exe/HcsVirtualMachine.h +++ b/src/windows/service/exe/HcsVirtualMachine.h @@ -97,6 +97,17 @@ class HcsVirtualMachine // Shares: key is ShareId, value is nullopt for Plan9 or DeviceInstanceId for VirtioFS std::map, wsl::windows::common::helpers::GuidLess> m_shares; + // Aggregate virtio-fs devices shared by every Windows-folder share. + // There are two because the device host enforces a single readonly + // setting per aggregate: read-write shares are registered as children + // of the read-write device and read-only shares as children of the + // read-only device (whose backend rejects writes). Each is created + // lazily on its first share and torn down with the VM. + bool m_virtioFsAggregateCreated{false}; + GUID m_virtioFsAggregateDevice{}; + bool m_virtioFsAggregateReadOnlyCreated{false}; + GUID m_virtioFsAggregateReadOnlyDevice{}; + std::filesystem::path m_vmSavedStateFile; std::filesystem::path m_crashDumpFolder; std::atomic m_vmSavedStateCaptured = false; diff --git a/src/windows/service/exe/WslCoreVm.cpp b/src/windows/service/exe/WslCoreVm.cpp index b658ce9ab..b635ad1ca 100644 --- a/src/windows/service/exe/WslCoreVm.cpp +++ b/src/windows/service/exe/WslCoreVm.cpp @@ -73,6 +73,43 @@ RequiredExtraMmioSpaceForPmemFileInMb(_In_ PCWSTR FilePath) // Convert from bytes to megabytes. Ensure that we don't truncate a 512kb file to 0mb. return std::max(fileSizeBytes.QuadPart / static_cast(_1MB), 1i64); } + +// +// Compute a deterministic subname (child name) for a virtio-fs share +// inside its aggregate device's synthetic root. The hash inputs are: +// NUL NUL +// The first 128 bits of the SHA-256 digest are encoded as 32 lowercase +// hex chars. A deterministic subname keeps the per-share guest-side path +// stable across service-internal state rebuilds. +// +std::wstring ComputeAggregateSubname(const std::wstring& CanonicalPath, const std::wstring& NormalizedOptions, bool Admin) +{ + std::string buffer; + const auto pathUtf8 = wsl::shared::string::WideToMultiByte(CanonicalPath); + const auto optsUtf8 = wsl::shared::string::WideToMultiByte(NormalizedOptions); + buffer.append(pathUtf8); + buffer.push_back('\0'); + buffer.append(optsUtf8); + buffer.push_back('\0'); + buffer.push_back(Admin ? '\x01' : '\x00'); + + wil::unique_bcrypt_hash hash; + THROW_IF_NTSTATUS_FAILED(BCryptCreateHash(BCRYPT_SHA256_ALG_HANDLE, &hash, nullptr, 0, nullptr, 0, 0)); + THROW_IF_NTSTATUS_FAILED(BCryptHashData(hash.get(), reinterpret_cast(buffer.data()), gsl::narrow(buffer.size()), 0)); + + std::array digest{}; + THROW_IF_NTSTATUS_FAILED(BCryptFinishHash(hash.get(), digest.data(), gsl::narrow(digest.size()), 0)); + + static constexpr char hexChars[] = "0123456789abcdef"; + std::wstring out; + out.reserve(32); + for (size_t i = 0; i < 16; ++i) + { + out.push_back(static_cast(hexChars[digest[i] >> 4])); + out.push_back(static_cast(hexChars[digest[i] & 0x0F])); + } + return out; +} } // namespace WslCoreVm::WslCoreVm(_In_ wsl::core::Config&& VmConfig) : @@ -1389,12 +1426,35 @@ void WslCoreVm::EjectVhdLockHeld(_In_ PCWSTR VhdPath) } _Requires_lock_held_(m_guestDeviceLock) -std::optional WslCoreVm::FindVirtioFsShare(_In_ PCWSTR tag, _In_ std::optional Admin) const +std::optional WslCoreVm::FindVirtioFsShare(_In_ PCWSTR tag, _In_ PCWSTR Subname, _In_ std::optional Admin) const { - for (const auto& share : m_virtioFsShares) + // + // Each aggregate tag lives in one of the two Admin buckets (index 0 + // = non-admin, index 1 = admin). Identify the matching bucket by tag + // and (optionally) Admin, then look up the share by Subname within + // that bucket. The subname encodes any RO/RW distinction (via the + // hashed options string), so matching by subname alone is enough. + // + for (size_t bucket = 0; bucket < m_drvfsAggregateTag.size(); ++bucket) { - if ((share.second == tag) && (!Admin.has_value() || Admin.value() == share.first.Admin)) + const bool bucketAdmin = (bucket != 0); + if (Admin.has_value() && Admin.value() != bucketAdmin) { + continue; + } + + if (m_drvfsAggregateTag[bucket].empty() || m_drvfsAggregateTag[bucket] != tag) + { + continue; + } + + for (const auto& share : m_virtioFsShares) + { + if (share.first.Admin != bucketAdmin || share.second != Subname) + { + continue; + } + return share.first; } } @@ -2165,7 +2225,7 @@ void WslCoreVm::WaitForPmemDeviceInVm(_In_ ULONG PmemId) } _Requires_lock_held_(m_guestDeviceLock) -std::pair WslCoreVm::AddVirtioFsShare(_In_ bool Admin, _In_ PCWSTR Path, _In_ PCWSTR Options, _In_opt_ HANDLE UserToken) +std::tuple WslCoreVm::AddVirtioFsShare(_In_ bool Admin, _In_ PCWSTR Path, _In_ PCWSTR Options, _In_opt_ HANDLE UserToken) { WI_ASSERT(m_vmConfig.EnableVirtioFs); @@ -2199,48 +2259,99 @@ std::pair WslCoreVm::AddVirtioFsShare(_In_ bool Admi effectiveOptions += m_swiotlbOption; } - // Check if a matching share already exists. - bool created = false; - std::wstring tag; + // + // Each share lives inside an aggregate virtio-fs device bucketed by + // Admin. A single bucket exposes one PCI device whose synthetic root + // holds one child entry per share. + // + // ReadOnly is not a bucket axis: the standard drvfs path routes "ro" + // through the Linux-side bind mount (drvfs.cpp's + // ConvertDrvfsMountOptionsToPlan9 classifies "ro" as a + // StandardOption, not a Plan9Option), so the host-side aggregate + // device never observes "ro" in normal use. If a caller does + // explicitly pass "ro" in the plan9 options, the deterministic + // subname (which hashes the full normalized options string) still + // gives that share its own child entry within the bucket. Aggregate + // children are device-RW at the FUSE level; per-share write + // protection comes from the guest's bind-mount options. + // VirtioFsShare key(sharePath.c_str(), effectiveOptions.c_str(), Admin); - if (!m_virtioFsShares.contains(key)) - { - // Generate a new unique tag for the share. - // - // N.B. The tag can be maximum 36 characters long so a GUID without braces fits perfectly. - GUID tagGuid{}; - THROW_IF_FAILED(CoCreateGuid(&tagGuid)); + const bool readOnly = key.Options.contains(L"ro"); + const size_t bucket = Admin ? 1u : 0u; - tag = wsl::shared::string::GuidToString(tagGuid, wsl::shared::string::None); - WI_ASSERT(!FindVirtioFsShare(tag.c_str(), Admin)); + bool created = false; + std::wstring subname; - (void)m_guestDeviceManager->AddGuestDevice( - VIRTIO_FS_DEVICE_ID, - Admin ? VIRTIO_FS_ADMIN_CLASS_ID : VIRTIO_FS_CLASS_ID, - tag.c_str(), - key.OptionsString().c_str(), - sharePath.c_str(), - VIRTIO_FS_FLAGS_TYPE_FILES, - UserToken); - - m_virtioFsShares.emplace(std::move(key), tag); - created = true; + auto existing = m_virtioFsShares.find(key); + if (existing != m_virtioFsShares.end()) + { + subname = existing->second; } else { - tag = m_virtioFsShares[key]; + const auto normalizedOptions = key.OptionsString(); + subname = ComputeAggregateSubname(sharePath, normalizedOptions, Admin); + + std::wstring optionsWithSubname{L"subname="}; + optionsWithSubname += subname; + if (!normalizedOptions.empty()) + { + optionsWithSubname += L';'; + optionsWithSubname += normalizedOptions; + } + + const GUID classId = Admin ? VIRTIO_FS_ADMIN_CLASS_ID : VIRTIO_FS_CLASS_ID; + + if (m_drvfsAggregateTag[bucket].empty()) + { + // + // First share in this bucket: allocate a fresh aggregate + // tag and create the PCI device. The tag is shared by every + // subsequent share in the same bucket. + // + // N.B. The tag can be maximum 36 characters long so a GUID + // without braces fits perfectly. + // + GUID tagGuid{}; + THROW_IF_FAILED(CoCreateGuid(&tagGuid)); + + std::wstring aggregateTag = wsl::shared::string::GuidToString(tagGuid, wsl::shared::string::None); + WI_ASSERT(!FindVirtioFsShare(aggregateTag.c_str(), subname.c_str(), Admin)); + + (void)m_guestDeviceManager->AddGuestDevice( + VIRTIO_FS_DEVICE_ID, classId, aggregateTag.c_str(), optionsWithSubname.c_str(), sharePath.c_str(), VIRTIO_FS_FLAGS_TYPE_AGGREGATE, UserToken); + + m_drvfsAggregateTag[bucket] = std::move(aggregateTag); + } + else + { + // + // Aggregate device already exists for this bucket: extend + // it in-place by registering a new child under the existing + // PCI device. No new PCI device is created. + // + m_guestDeviceManager->ExtendVirtioFsAggregate( + classId, m_drvfsAggregateTag[bucket].c_str(), optionsWithSubname.c_str(), sharePath.c_str(), UserToken); + } + + m_virtioFsShares.emplace(std::move(key), subname); + created = true; } + const std::wstring& tag = m_drvfsAggregateTag[bucket]; + WSL_LOG( "WslCoreVmAddVirtioFsShare", TraceLoggingValue(Admin, "admin"), + TraceLoggingValue(readOnly, "readOnly"), TraceLoggingValue(sharePath.c_str(), "path"), TraceLoggingValue(effectiveOptions.c_str(), "options"), TraceLoggingValue(tag.c_str(), "tag"), + TraceLoggingValue(subname.c_str(), "subname"), TraceLoggingValue(created, "created"), TraceLoggingValue(m_virtioFsShares.size(), "shareCount")); - return {tag, sharePath}; + return {tag, subname, sharePath}; } void WslCoreVm::OnCrash(_In_ LPCWSTR Details) @@ -2640,13 +2751,14 @@ try return; } - auto respondWithTag = [&](const std::wstring& tag, const std::wstring& source, HRESULT result) { - // Respond to the guest with the tag that should be used to mount the device. - + auto respondWithTag = [&](const std::wstring& tag, const std::wstring& subname, const std::wstring& source, HRESULT result) { + // Respond to the guest with the aggregate tag, child subname (inside the + // aggregate's synthetic root), and canonicalized source the device maps. wsl::shared::MessageWriter response(LxInitMessageAddVirtioFsDeviceResponse); response->Result = SUCCEEDED(result) ? 0 : EINVAL; // TODO: Improved HRESULT -> errno mapping. response.WriteString(response->TagOffset, tag); response.WriteString(response->SourceOffset, source); + response.WriteString(response->SubnameOffset, subname); transaction.Send(response.Span()); }; @@ -2654,8 +2766,9 @@ try if (message->MessageType == LxInitMessageAddVirtioFsDevice) { std::wstring tag; + std::wstring subname; std::wstring source; - const auto result = wil::ResultFromException([this, span, &tag, &source]() { + const auto result = wil::ResultFromException([this, span, &tag, &subname, &source]() { const auto* addShare = gslhelpers::try_get_struct(span); THROW_HR_IF(E_UNEXPECTED, !addShare); @@ -2666,32 +2779,53 @@ try // Acquire the lock and attempt to add the device. auto guestDeviceLock = m_guestDeviceLock.lock_exclusive(); - std::tie(tag, source) = AddVirtioFsShare(addShare->Admin, pathWide.c_str(), optionsWide.c_str()); + std::tie(tag, subname, source) = AddVirtioFsShare(addShare->Admin, pathWide.c_str(), optionsWide.c_str()); }); - respondWithTag(tag, source, result); + respondWithTag(tag, subname, source, result); } else if (message->MessageType == LxInitMessageRemountVirtioFsDevice) { std::wstring newTag; + std::wstring newSubname; std::wstring source; - const auto result = wil::ResultFromException([this, span, &newTag, &source]() { + const auto result = wil::ResultFromException([this, span, message, &newTag, &newSubname, &source]() { const auto* remountShare = gslhelpers::try_get_struct(span); THROW_HR_IF(E_UNEXPECTED, !remountShare); const std::string tag = wsl::shared::string::FromSpan(span, remountShare->TagOffset); const auto tagWide = wsl::shared::string::MultiByteToWide(tag); + + // + // The aggregate remount protocol always appends the child SubnameOffset, and + // guest init ships in lockstep with the service, so a well-formed message + // includes it. The size and bounds checks below only guard against a + // malformed or truncated payload (so a bogus offset is never dereferenced). + // A single aggregate tag is shared by every child, so the subname is required + // to identify the specific share -- there is no tag-only fallback. + // + std::wstring subnameWide; + if (message->MessageSize >= offsetof(LX_INIT_REMOUNT_VIRTIOFS_SHARE_MESSAGE, SubnameOffset) + sizeof(unsigned int) && + remountShare->SubnameOffset >= sizeof(LX_INIT_REMOUNT_VIRTIOFS_SHARE_MESSAGE) && + remountShare->SubnameOffset < message->MessageSize) + { + const std::string subname = wsl::shared::string::FromSpan(span, remountShare->SubnameOffset); + subnameWide = wsl::shared::string::MultiByteToWide(subname); + } + + THROW_HR_IF_MSG(E_INVALIDARG, subnameWide.empty(), "Remount message missing virtio-fs subname"); + auto guestDeviceLock = m_guestDeviceLock.lock_exclusive(); - const auto foundShare = FindVirtioFsShare(tagWide.c_str(), !remountShare->Admin); + const auto foundShare = FindVirtioFsShare(tagWide.c_str(), subnameWide.c_str(), !remountShare->Admin); THROW_HR_IF_MSG(E_UNEXPECTED, !foundShare.has_value(), "Unknown tag %ls", tagWide.c_str()); - std::tie(newTag, source) = + std::tie(newTag, newSubname, source) = AddVirtioFsShare(remountShare->Admin, foundShare->Path.c_str(), foundShare->OptionsString().c_str()); WI_ASSERT(source == foundShare->Path); }); - respondWithTag(newTag, source, result); + respondWithTag(newTag, newSubname, source, result); } else { diff --git a/src/windows/service/exe/WslCoreVm.h b/src/windows/service/exe/WslCoreVm.h index b3bb810b6..85657985c 100644 --- a/src/windows/service/exe/WslCoreVm.h +++ b/src/windows/service/exe/WslCoreVm.h @@ -182,8 +182,22 @@ class WslCoreVm _Requires_lock_held_(m_guestDeviceLock) void AddPlan9Share(_In_ PCWSTR AccessName, _In_ PCWSTR Path, _In_ UINT32 Port, _In_ wsl::windows::common::hcs::Plan9ShareFlags Flags, _In_ HANDLE UserToken, _In_ PCWSTR VirtIoTag); + // + // Add (or look up) a virtio-fs share. Returns {aggregate tag, subname, + // canonicalized source path}. The aggregate tag is shared by all + // shares in the same Admin bucket on this VM (ReadOnly is not a bucket + // axis here -- it is folded into the subname via the normalized + // options); the subname is deterministic per (canonical path, + // normalized options, Admin) + // and identifies the child entry inside the aggregate's synthetic + // root directory. The first share added per bucket creates the PCI + // device via GuestDeviceManager::AddGuestDevice; subsequent shares + // call GuestDeviceManager::ExtendVirtioFsAggregate, which extends + // the existing device in-place without creating a new PCI device. + // _Requires_lock_held_(m_guestDeviceLock) - std::pair AddVirtioFsShare(_In_ bool Admin, _In_ PCWSTR Path, _In_ PCWSTR Options, _In_opt_ HANDLE UserToken = nullptr); + std::tuple AddVirtioFsShare( + _In_ bool Admin, _In_ PCWSTR Path, _In_ PCWSTR Options, _In_opt_ HANDLE UserToken = nullptr); _Requires_lock_held_(m_lock) ULONG AttachDiskLockHeld(_In_ PCWSTR Disk, _In_ DiskType Type, _In_ MountFlags Flags, _In_ std::optional Lun, _In_ bool IsUserDisk, _In_ HANDLE UserToken); @@ -203,7 +217,7 @@ class WslCoreVm void EjectVhdLockHeld(_In_ PCWSTR VhdPath); _Requires_lock_held_(m_guestDeviceLock) - std::optional FindVirtioFsShare(_In_ PCWSTR tag, _In_ std::optional Admin = {}) const; + std::optional FindVirtioFsShare(_In_ PCWSTR tag, _In_ PCWSTR Subname, _In_ std::optional Admin = {}) const; _Requires_lock_held_(m_lock) void FreeLun(_In_ ULONG Lun); @@ -262,6 +276,26 @@ class WslCoreVm _Guarded_by_(m_guestDeviceLock) std::future m_drvfsInitialResult; _Guarded_by_(m_guestDeviceLock) wil::unique_handle m_drvfsToken; _Guarded_by_(m_guestDeviceLock) wil::unique_handle m_adminDrvfsToken; + // + // Aggregate virtio-fs tag per Admin bucket: index 0 = non-admin, + // index 1 = admin. Empty until the first share is added to that + // bucket; populated lazily by AddVirtioFsShare. + // + // ReadOnly is NOT a separate bucket axis. The standard drvfs path + // routes mount-level "ro" through the Linux-side bind mount (see + // drvfs.cpp ConvertDrvfsMountOptionsToPlan9, which classifies "ro" + // as a StandardOption rather than a Plan9Option), so the host-side + // aggregate device never observes "ro" in normal use. Per-share + // distinctions (including any explicitly-supplied "ro" plan9 + // option) are still preserved within a bucket via + // ComputeAggregateSubname, which hashes the full normalized + // options string into the synthetic-root child name. + // + _Guarded_by_(m_guestDeviceLock) std::array m_drvfsAggregateTag; + // + // Map from share key to deterministic subname (32 hex chars) inside + // the aggregate device's synthetic root directory. + // _Guarded_by_(m_guestDeviceLock) std::map m_virtioFsShares; _Guarded_by_(m_guestDeviceLock) std::map> m_plan9Servers; wil::srwlock m_lock; diff --git a/src/windows/wslcsession/WSLCVirtualMachine.cpp b/src/windows/wslcsession/WSLCVirtualMachine.cpp index c2b5e4f6a..60c40b97c 100644 --- a/src/windows/wslcsession/WSLCVirtualMachine.cpp +++ b/src/windows/wslcsession/WSLCVirtualMachine.cpp @@ -799,7 +799,7 @@ void WSLCVirtualMachine::Mount(LPCSTR Source, LPCSTR Target, LPCSTR Type, LPCSTR Mount(m_initChannel, Source, Target, Type, Options, Flags); } -void WSLCVirtualMachine::Mount(shared::SocketChannel& Channel, LPCSTR Source, LPCSTR Target, LPCSTR Type, LPCSTR Options, ULONG Flags) +void WSLCVirtualMachine::Mount(shared::SocketChannel& Channel, LPCSTR Source, LPCSTR Target, LPCSTR Type, LPCSTR Options, ULONG Flags, LPCSTR Subname) { static_assert(WSLCMountFlagsNone == WSLC_MOUNT::None); static_assert(WSLCMountFlagsReadOnly == WSLC_MOUNT::ReadOnly); @@ -819,6 +819,7 @@ void WSLCVirtualMachine::Mount(shared::SocketChannel& Channel, LPCSTR Source, LP optionalAdd(Target, message->DestinationIndex); optionalAdd(Type, message->TypeIndex); optionalAdd(Options, message->OptionsIndex); + optionalAdd(Subname, message->SubnameIndex); message->Flags = Flags; const auto& response = Channel.Transaction(message.Span()); @@ -829,6 +830,7 @@ void WSLCVirtualMachine::Mount(shared::SocketChannel& Channel, LPCSTR Source, LP TraceLoggingValue(Target == nullptr ? "" : Target, "Target"), TraceLoggingValue(Type == nullptr ? "" : Type, "Type"), TraceLoggingValue(Options == nullptr ? "" : Options, "Options"), + TraceLoggingValue(Subname == nullptr ? "" : Subname, "Subname"), TraceLoggingValue(Flags, "Flags"), TraceLoggingValue(response.Result, "Result")); @@ -1080,8 +1082,21 @@ try } else { + // + // Mount the share as a child of an aggregate virtio-fs device. The + // Source is the fixed aggregate tag selected by the share's access + // mode (read-only shares use a separate aggregate whose backend + // enforces read-only); the per-share entry is selected by Subname, + // derived from the same share GUID the host used (see + // GuidToHexString). ReadOnly is also applied here as the bind-mount + // option, but enforcement is host-side: the read-only aggregate's + // FUSE backend rejects writes regardless of the guest mount flags. + // std::string options = readOnly ? "ro" : "rw"; - Mount(m_initChannel, shareName.c_str(), LinuxPath, "virtiofs", options.c_str(), Flags); + const auto& aggregateTag = readOnly ? c_wslcVirtioFsAggregateReadOnlyTag : c_wslcVirtioFsAggregateTag; + auto tag = shared::string::GuidToString(aggregateTag, shared::string::None); + auto subname = shared::string::GuidToHexString(shareGuid); + Mount(m_initChannel, tag.c_str(), LinuxPath, "virtiofs", options.c_str(), Flags, subname.c_str()); } deleteOnFailure.release(); diff --git a/src/windows/wslcsession/WSLCVirtualMachine.h b/src/windows/wslcsession/WSLCVirtualMachine.h index 1c2aed268..b091d85a3 100644 --- a/src/windows/wslcsession/WSLCVirtualMachine.h +++ b/src/windows/wslcsession/WSLCVirtualMachine.h @@ -182,7 +182,7 @@ class WSLCVirtualMachine // can include the swiotlb token. Called after the root filesystem is mounted. void ReadGuestCapabilities(); - static void Mount(wsl::shared::SocketChannel& Channel, LPCSTR Source, _In_ LPCSTR Target, _In_ LPCSTR Type, _In_ LPCSTR Options, _In_ ULONG Flags); + static void Mount(wsl::shared::SocketChannel& Channel, LPCSTR Source, _In_ LPCSTR Target, _In_ LPCSTR Type, _In_ LPCSTR Options, _In_ ULONG Flags, _In_opt_ LPCSTR Subname = nullptr); void MountGpuLibraries(_In_ LPCSTR LibrariesMountPoint, _In_ LPCSTR DriversMountpoint); Microsoft::WRL::ComPtr CreateLinuxProcessImpl( @@ -255,9 +255,21 @@ class WSLCVirtualMachine std::map m_attachedDisks; std::map m_mountedWindowsFolders; + // Case-insensitive ordering for the Windows-path component of the VirtioFs + // share-cache key. Windows paths are case-insensitive, so e.g. "C:\Foo" and + // "C:\foo" must map to the same cached share rather than two distinct ones. + struct VirtioFsShareKeyCompare + { + bool operator()(const std::pair& left, const std::pair& right) const + { + const int cmp = _wcsicmp(left.first.c_str(), right.first.c_str()); + return (cmp != 0) ? (cmp < 0) : (left.second < right.second); + } + }; + // VirtioFs share cache: maps (normalized WindowsPath, readOnly) to share GUID. // Shares are kept alive after unmount for reuse on subsequent mounts of the same folder. - std::map, GUID> m_virtioFsShares; + std::map, GUID, VirtioFsShareKeyCompare> m_virtioFsShares; std::recursive_mutex m_lock; std::mutex m_portRelaylock; diff --git a/test/linux/unit_tests/drvfs.c b/test/linux/unit_tests/drvfs.c index 704cfe19a..401e5c7c7 100644 --- a/test/linux/unit_tests/drvfs.c +++ b/test/linux/unit_tests/drvfs.c @@ -1724,11 +1724,14 @@ Return Value: // "remembers" the first case used after the file is closed because the // directory entries are cached. // - // N.B. This is not the case with Plan 9 because Linux doesn't know the - // file system is case-insensitive. + // N.B. This is not the case with Plan 9 or virtiofs because Linux doesn't + // know the file system is case-insensitive. The fuse/virtiofs guest + // driver has no case-insensitive dentry operations, so opening "FOO" + // creates a distinct "FOO" dentry rather than reusing the cached + // "foo" one, and /proc/self/fd reports the as-opened case. // - if (g_LxtFsInfo.FsType != LxtFsTypePlan9) + if (g_LxtFsInfo.FsType != LxtFsTypePlan9 && g_LxtFsInfo.FsType != LxtFsTypeVirtioFs) { LxtCheckErrno(Fd = open(DRVFS_CASE_INSENSITIVE_TEST_DIR "/foo", O_RDONLY)); LxtCheckErrno(Fd2 = open(DRVFS_CASE_INSENSITIVE_TEST_DIR "/FOO", O_RDONLY)); @@ -1739,10 +1742,14 @@ Return Value: // // Listing the directory shows the file with the correct case. // - // N.B. As remarked above, for SMB on Plan 9, the case will have changed. + // N.B. As remarked above, for SMB over Plan 9 or virtiofs the case will + // have changed, because Linux doesn't know the file system is + // case-insensitive and NTFS lets you change the case on rename. (FAT + // over virtiofs keeps the original case because a case-only rename is + // a no-op on FAT at the NT level.) // - if ((g_LxtFsInfo.FsType == LxtFsTypePlan9) && (DrvFsTestMode == DRVFS_SMB_TEST_MODE)) + if (((g_LxtFsInfo.FsType == LxtFsTypePlan9) || (g_LxtFsInfo.FsType == LxtFsTypeVirtioFs)) && (DrvFsTestMode == DRVFS_SMB_TEST_MODE)) { LxtCheckResult(LxtCheckDirectoryContentsEx(DRVFS_CASE_INSENSITIVE_TEST_DIR, ChildrenPlan9Smb, LXT_COUNT_OF(Children), 0)); @@ -1792,11 +1799,13 @@ Return Value: int Result; // - // This test does not apply to VM mode because Plan 9 doesn't support - // junction point symlinks. + // This test does not apply to VM mode because the "drvfs" filesystem type + // is not registered in the guest kernel (drvfs mounts are set up by the WSL + // mount helper over Plan 9 or virtiofs), so a raw mount() syscall fails with + // ENODEV. Plan 9 additionally doesn't support junction point symlinks. // - if (g_LxtFsInfo.FsType == LxtFsTypePlan9) + if (g_LxtFsInfo.FsType == LxtFsTypePlan9 || g_LxtFsInfo.FsType == LxtFsTypeVirtioFs) { LxtLogInfo("This test is not relevant in VM mode."); Result = 0; diff --git a/test/linux/unit_tests/lxtmount.c b/test/linux/unit_tests/lxtmount.c index 007ab0dc3..24667bb61 100644 --- a/test/linux/unit_tests/lxtmount.c +++ b/test/linux/unit_tests/lxtmount.c @@ -59,11 +59,13 @@ Return Value: { + const char* ActualRoot; int Direction; const char* ExpectedSourceActual; struct libmnt_fs* FileSystem; char LocalPath[PATH_MAX]; int MountId; + const char* NextSlash; int Result; struct stat Stat; struct libmnt_table* Table; @@ -124,7 +126,22 @@ Return Value: strcat(LocalPath, "//deleted"); } - LxtCheckStringEqual(LocalPath, mnt_fs_get_root(FileSystem)); + // + // Aggregate virtiofs exposes each share as a child of a synthetic + // read-only root, so the mount root reported in mountinfo is prefixed with + // an opaque per-share component (for example "/"). Strip that leading + // component before comparing so the checks remain agnostic to the + // non-deterministic share name. + // + + ActualRoot = mnt_fs_get_root(FileSystem); + if ((strcmp(ExpectedFsType, "virtiofs") == 0) && (strcmp(ActualRoot, LocalPath) != 0) && (ActualRoot[0] == '/')) + { + NextSlash = strchr(ActualRoot + 1, '/'); + ActualRoot = (NextSlash != NULL) ? NextSlash : "/"; + } + + LxtCheckStringEqual(LocalPath, ActualRoot); LxtCheckStringEqual(ExpectedMountOptions, mnt_fs_get_vfs_options(FileSystem)); if (ExpectedFsOptions != NULL) { diff --git a/test/windows/DrvFsTests.cpp b/test/windows/DrvFsTests.cpp index 9025ba6a3..f8b0222f8 100644 --- a/test/windows/DrvFsTests.cpp +++ b/test/windows/DrvFsTests.cpp @@ -239,12 +239,6 @@ class DrvFsTests { SKIP_TEST_ARM64(); - if (Mode == DrvFsMode::VirtioFs) - { - LogSkipped("TODO: debug virtiofs handling of //localhost/C$ style paths"); - return; - } - VERIFY_NO_THROW( LxsstuRunTest((L"bash -c '" + SkipUnstableTestEnvVar + L" /data/test/wsl_unit_tests drvfs -m 4'").c_str(), L"drvfs4")); } diff --git a/test/windows/WSLCTests.cpp b/test/windows/WSLCTests.cpp index 526520735..779c086da 100644 --- a/test/windows/WSLCTests.cpp +++ b/test/windows/WSLCTests.cpp @@ -3375,19 +3375,28 @@ class WSLCTests VERIFY_SUCCEEDED(session->MountWindowsFolder(testFolder.c_str(), "/win-path", true)); ExpectMount(session.get(), "/win-path", expectedMountOptions(true)); - // Capture the mount source and type, unmount, then remount without read-only. + // Aggregate virtio-fs children are bind-mounts of a subdirectory of the + // shared aggregate root, so the mount SOURCE is not a standalone + // virtio-fs source that can be remounted fresh. Instead, bind the share + // elsewhere and clear read-only on the bind to confirm the guest cannot + // regain write access: the read-only aggregate's backend rejects writes + // (EROFS) regardless of the guest's bind-mount flags. The findmnt check + // proves the bind really is read-write so the write failure can only come + // from host-side enforcement. ExpectCommandResult( session.get(), {"/bin/sh", "-c", - "src=$(findmnt -n -o SOURCE /win-path) && " - "fstype=$(findmnt -n -o FSTYPE /win-path) && " - "umount /win-path && " - "mount -t $fstype $src /win-path"}, + "mkdir -p /win-path-rw && " + "mount --bind /win-path /win-path-rw && " + "mount -o remount,bind,rw /win-path-rw && " + "findmnt -n -o VFS-OPTIONS /win-path-rw | grep -qE '(^|,)rw(,|$)'"}, 0); - // Verify the folder is still not writeable. - ExpectCommandResult(session.get(), {"/bin/sh", "-c", "echo -n content > /win-path/file.txt"}, 1); + // Verify the folder is still not writeable through the read-write bind. + ExpectCommandResult(session.get(), {"/bin/sh", "-c", "echo -n content > /win-path-rw/file.txt"}, 1); + + ExpectCommandResult(session.get(), {"/bin/sh", "-c", "umount /win-path-rw && rmdir /win-path-rw"}, 0); VERIFY_SUCCEEDED(session->UnmountWindowsFolder("/win-path")); ExpectMount(session.get(), "/win-path", {});