Add Windows support to Hybrid Memory Manager

This commit adds Windows-specific implementation of the fault-managed memory system, providing similar functionality to the existing Linux/Android implementation. Key changes: - Added Windows-specific memory management using VirtualAlloc/VirtualFree - Implemented Windows vectored exception handler for page fault handling - Added proper memory protection and page fault handling on Windows - Updated memory snapshot functionality to work on Windows - Added proper cleanup of Windows-specific resources - Fixed type conversion issues in memory management code - Added proper error handling for Windows memory operations - Fixed VRAM Memory Layout Mode to allow up to 12Gb The implementation uses Windows-specific APIs: - VirtualAlloc/VirtualFree for memory management - AddVectoredExceptionHandler for page fault handling - VirtualProtect for memory protection management This change maintains feature parity with the Linux/Android implementation while using Windows-native APIs for better performance and reliability. Signed-off-by: Zephyron <zephyron@citron-emu.org>
2025-05-13 03:10:36 +01:00 · 2025-04-12 16:15:51 +10:00 · 2025-04-12 16:15:51 +10:00 · a4088f3a1e
commit a4088f3a1e
parent b66b3ca639
4 changed files with 236 additions and 86 deletions
--- a/src/common/settings.h
+++ b/src/common/settings.h
@ -198,9 +198,7 @@ struct Values {
                                                             MemoryLayout::Memory_4Gb,
                                                             MemoryLayout::Memory_12Gb,
                                                             "memory_layout_mode",
-                                                             Category::Core,
-                                                             Specialization::Default,
-                                                             false};
+                                                             Category::Core};
    SwitchableSetting<bool> use_speed_limit{
        linkage, true, "use_speed_limit", Category::Core, Specialization::Paired, false, true};
    SwitchableSetting<u16, true> speed_limit{linkage,
@ -213,11 +211,11 @@ struct Values {
                                             true,
                                             true,
                                             &use_speed_limit};
-    SwitchableSetting<bool> use_nce{linkage, true, "use_nce", Category::Core};
+    SwitchableSetting<bool> use_nce{linkage, true, "Use Native Code Execution", Category::Core};

    // Memory
-    SwitchableSetting<bool> use_gpu_memory_manager{linkage, false, "use_gpu_memory_manager", Category::Core};
-    SwitchableSetting<bool> enable_memory_snapshots{linkage, false, "enable_memory_snapshots", Category::Core};
+    SwitchableSetting<bool> use_gpu_memory_manager{linkage, false, "Use GPU Memory Manager", Category::Core};
+    SwitchableSetting<bool> enable_memory_snapshots{linkage, false, "Enable Memory Snapshots", Category::Core};

    // Cpu
    SwitchableSetting<CpuBackend, true> cpu_backend{linkage,
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@ -9,6 +9,8 @@
 #include <optional>
 #include <string>
 #include <vector>
+#include <fstream>
+#include <filesystem>

 #include <fmt/ranges.h>

@ -136,15 +138,37 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,

    // Initialize HybridMemory system
    if (Settings::values.use_gpu_memory_manager.GetValue()) {
-#if defined(__linux__) || defined(__ANDROID__)
+#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
        try {
-            void* guest_memory_base = std::aligned_alloc(4096, 64 * 1024 * 1024);
-            if (guest_memory_base) {
+            // Define memory size with explicit types to avoid conversion warnings
+            constexpr size_t memory_size_mb = 64;
+            constexpr size_t memory_size_bytes = memory_size_mb * 1024 * 1024;
+
+            void* guest_memory_base = nullptr;
+#if defined(_WIN32)
+            // On Windows, use VirtualAlloc to reserve (but not commit) memory
+            const SIZE_T win_size = static_cast<SIZE_T>(memory_size_bytes);
+            LPVOID result = VirtualAlloc(nullptr, win_size, MEM_RESERVE, PAGE_NOACCESS);
+            if (result != nullptr) {
+                guest_memory_base = result;
+            }
+#else
+            // On Linux/Android, use aligned_alloc
+            guest_memory_base = std::aligned_alloc(4096, memory_size_bytes);
+#endif
+            if (guest_memory_base != nullptr) {
                try {
-                    hybrid_memory->InitializeGuestMemory(guest_memory_base, 64 * 1024 * 1024);
-                    LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", 64);
-                } catch (const std::exception& e) {
+                    hybrid_memory->InitializeGuestMemory(guest_memory_base, memory_size_bytes);
+                    LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", memory_size_mb);
+                } catch (const std::exception&) {
+#if defined(_WIN32)
+                    if (guest_memory_base != nullptr) {
+                        const LPVOID win_ptr = static_cast<LPVOID>(guest_memory_base);
+                        VirtualFree(win_ptr, 0, MEM_RELEASE);
+                    }
+#else
                    std::free(guest_memory_base);
+#endif
                    throw;
                }
            }
@ -168,10 +192,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,

        // Add paths to common shaders that should be preloaded
        // These will be compiled in parallel for faster startup
-        if (std::filesystem::exists(shader_dir)) {
        try {
+            if (std::filesystem::exists(shader_dir)) {
                for (const auto& entry : std::filesystem::directory_iterator(shader_dir)) {
-                    if (entry.path().extension() == ".spv") {
+                    if (entry.is_regular_file() && entry.path().extension() == ".spv") {
                        common_shaders.push_back(entry.path().string());
                    }
                }
@ -180,12 +204,12 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
                    LOG_INFO(Render_Vulkan, "Preloading {} common shaders", common_shaders.size());
                    shader_manager.PreloadShaders(common_shaders);
                }
-            } catch (const std::exception& e) {
-                LOG_ERROR(Render_Vulkan, "Error during shader preloading: {}", e.what());
-            }
            } else {
                LOG_INFO(Render_Vulkan, "Shader directory not found at {}", shader_dir);
            }
+        } catch (const std::exception& e) {
+            LOG_ERROR(Render_Vulkan, "Error during shader preloading: {}", e.what());
+        }
    }

    Report();
--- a/src/video_core/vulkan_common/hybrid_memory.cpp
+++ b/src/video_core/vulkan_common/hybrid_memory.cpp
@ -15,6 +15,8 @@
 #include <sys/syscall.h>
 #include <linux/userfaultfd.h>
 #include <sys/ioctl.h>
+#elif defined(_WIN32)
+#include <windows.h>
 #endif

 namespace Vulkan {
@ -72,8 +74,136 @@ void PredictiveReuseManager::ClearHistory() {
    current_timestamp = 0;
 }

+#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
+void FaultManagedAllocator::Touch(size_t addr) {
+    lru.remove(addr);
+    lru.push_front(addr);
+    dirty_set.insert(addr);
+}
+
+void FaultManagedAllocator::EnforceLimit() {
+    while (lru.size() > MaxPages) {
+        size_t evict = lru.back();
+        lru.pop_back();
+
+        auto it = page_map.find(evict);
+        if (it != page_map.end()) {
+            if (dirty_set.count(evict)) {
+                // Compress and store dirty page before evicting
+                std::vector<u8> compressed((u8*)it->second, (u8*)it->second + PageSize);
+                compressed_store[evict] = std::move(compressed);
+                dirty_set.erase(evict);
+            }
+
 #if defined(__linux__) || defined(__ANDROID__)
+            munmap(it->second, PageSize);
+#elif defined(_WIN32)
+            VirtualFree(it->second, 0, MEM_RELEASE);
+#endif
+            page_map.erase(it);
+        }
+    }
+}
+
+void* FaultManagedAllocator::GetOrAlloc(size_t addr) {
+    std::lock_guard<std::mutex> guard(lock);
+
+    if (page_map.count(addr)) {
+        Touch(addr);
+        return page_map[addr];
+    }
+
+#if defined(__linux__) || defined(__ANDROID__)
+    void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE,
+                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+    if (mem == MAP_FAILED) {
+        LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler");
+        return nullptr;
+    }
+#elif defined(_WIN32)
+    void* mem = VirtualAlloc(nullptr, PageSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+    if (!mem) {
+        LOG_ERROR(Render_Vulkan, "Failed to VirtualAlloc memory for fault handler");
+        return nullptr;
+    }
+#endif
+
+    if (compressed_store.count(addr)) {
+        // Decompress stored page data
+        std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size());
+        compressed_store.erase(addr);
+    } else {
+        std::memset(mem, 0, PageSize);
+    }
+
+    page_map[addr] = mem;
+    lru.push_front(addr);
+    dirty_set.insert(addr);
+    EnforceLimit();
+
+    return mem;
+}
+
+#if defined(_WIN32)
+// Static member initialization
+FaultManagedAllocator* FaultManagedAllocator::current_instance = nullptr;
+
+LONG WINAPI FaultManagedAllocator::VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info) {
+    // Only handle access violations (page faults)
+    if (exception_info->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) {
+        return EXCEPTION_CONTINUE_SEARCH;
+    }
+
+    if (!current_instance) {
+        return EXCEPTION_CONTINUE_SEARCH;
+    }
+
+    // Get the faulting address - use ULONG_PTR for Windows
+    const ULONG_PTR fault_addr = static_cast<ULONG_PTR>(exception_info->ExceptionRecord->ExceptionInformation[1]);
+    const ULONG_PTR base_addr = reinterpret_cast<ULONG_PTR>(current_instance->base_address);
+
+    // Check if the address is within our managed range
+    if (fault_addr < base_addr ||
+        fault_addr >= (base_addr + static_cast<ULONG_PTR>(current_instance->memory_size))) {
+        return EXCEPTION_CONTINUE_SEARCH;
+    }
+
+    // Calculate the base address of the page
+    const ULONG_PTR page_addr = fault_addr & ~(static_cast<ULONG_PTR>(PageSize) - 1);
+    const size_t relative_addr = static_cast<size_t>(page_addr - base_addr);
+
+    // Handle the fault by allocating memory
+    void* page = current_instance->GetOrAlloc(relative_addr);
+    if (!page) {
+        return EXCEPTION_CONTINUE_SEARCH;
+    }
+
+    // Copy the page data to the faulting address
+    DWORD old_protect;
+    void* target_addr = reinterpret_cast<void*>(page_addr);
+
+    // Make the target page writable
+    if (VirtualProtect(target_addr, PageSize, PAGE_READWRITE, &old_protect)) {
+        std::memcpy(target_addr, page, PageSize);
+        // Restore original protection
+        VirtualProtect(target_addr, PageSize, old_protect, &old_protect);
+        return EXCEPTION_CONTINUE_EXECUTION;
+    }
+
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+void FaultManagedAllocator::ExceptionHandlerThread() {
+    while (running) {
+        // Sleep to avoid busy waiting
+        Sleep(10);
+    }
+}
+#endif
+
 void FaultManagedAllocator::Initialize(void* base, size_t size) {
+#if defined(__linux__) || defined(__ANDROID__)
    uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK);
    if (uffd < 0) {
        LOG_ERROR(Render_Vulkan, "Failed to create userfaultfd, fault handling disabled");
@ -97,66 +227,28 @@ void FaultManagedAllocator::Initialize(void* base, size_t size) {

    running = true;
    fault_handler = std::thread(&FaultManagedAllocator::FaultThread, this);
+#elif defined(_WIN32)
+    // Setup Windows memory for fault handling
+    base_address = base;
+    memory_size = size;
+
+    // Reserve memory range but don't commit it yet - it will be demand-paged
+    DWORD oldProtect;
+    VirtualProtect(base, size, PAGE_NOACCESS, &oldProtect);
+
+    // Install a vectored exception handler
+    current_instance = this;
+    AddVectoredExceptionHandler(1, VectoredExceptionHandler);
+
+    running = true;
+    exception_handler = std::thread(&FaultManagedAllocator::ExceptionHandlerThread, this);
+
+    LOG_INFO(Render_Vulkan, "Windows fault-managed memory initialized at {:p}, size: {}",
+             base, size);
+#endif
 }

-void FaultManagedAllocator::Touch(size_t addr) {
-    lru.remove(addr);
-    lru.push_front(addr);
-    dirty_set.insert(addr);
-}
-
-void FaultManagedAllocator::EnforceLimit() {
-    while (lru.size() > MaxPages) {
-        size_t evict = lru.back();
-        lru.pop_back();
-
-        auto it = page_map.find(evict);
-        if (it != page_map.end()) {
-            if (dirty_set.count(evict)) {
-                // Compress and store dirty page before evicting
-                std::vector<u8> compressed((u8*)it->second, (u8*)it->second + PageSize);
-                compressed_store[evict] = std::move(compressed);
-                dirty_set.erase(evict);
-            }
-
-            munmap(it->second, PageSize);
-            page_map.erase(it);
-        }
-    }
-}
-
-void* FaultManagedAllocator::GetOrAlloc(size_t addr) {
-    std::lock_guard<std::mutex> guard(lock);
-
-    if (page_map.count(addr)) {
-        Touch(addr);
-        return page_map[addr];
-    }
-
-    void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE,
-                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-
-    if (mem == MAP_FAILED) {
-        LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler");
-        return nullptr;
-    }
-
-    if (compressed_store.count(addr)) {
-        // Decompress stored page data
-        std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size());
-        compressed_store.erase(addr);
-    } else {
-        std::memset(mem, 0, PageSize);
-    }
-
-    page_map[addr] = mem;
-    lru.push_front(addr);
-    dirty_set.insert(addr);
-    EnforceLimit();
-
-    return mem;
-}
-
+#if defined(__linux__) || defined(__ANDROID__)
 void FaultManagedAllocator::FaultThread() {
    struct pollfd pfd = { uffd, POLLIN, 0 };

@ -183,6 +275,7 @@ void FaultManagedAllocator::FaultThread() {
        }
    }
 }
+#endif

 void* FaultManagedAllocator::Translate(size_t addr) {
    std::lock_guard<std::mutex> guard(lock);
@ -244,6 +337,7 @@ void FaultManagedAllocator::ClearDirtySet() {
 FaultManagedAllocator::~FaultManagedAllocator() {
    running = false;

+#if defined(__linux__) || defined(__ANDROID__)
    if (fault_handler.joinable()) {
        fault_handler.join();
    }
@ -255,8 +349,27 @@ FaultManagedAllocator::~FaultManagedAllocator() {
    if (uffd != -1) {
        close(uffd);
    }
+#elif defined(_WIN32)
+    if (exception_handler.joinable()) {
+        exception_handler.join();
+    }
+
+    // Remove the vectored exception handler
+    RemoveVectoredExceptionHandler(VectoredExceptionHandler);
+    current_instance = nullptr;
+
+    for (auto& [addr, mem] : page_map) {
+        VirtualFree(mem, 0, MEM_RELEASE);
+    }
+
+    // Free the base memory if needed
+    if (base_address) {
+        VirtualFree(base_address, 0, MEM_RELEASE);
+        base_address = nullptr;
+    }
+#endif
 }
-#endif // defined(__linux__) || defined(__ANDROID__)
+#endif // defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)

 HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, size_t reuse_history)
    : device(device_), memory_allocator(allocator), reuse_manager(reuse_history) {
@ -265,7 +378,7 @@ HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, si
 HybridMemory::~HybridMemory() = default;

 void HybridMemory::InitializeGuestMemory(void* base, size_t size) {
-#if defined(__linux__) || defined(__ANDROID__)
+#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
    fmaa.Initialize(base, size);
    LOG_INFO(Render_Vulkan, "Initialized fault-managed guest memory at {:p}, size: {}",
             base, size);
@ -275,7 +388,7 @@ void HybridMemory::InitializeGuestMemory(void* base, size_t size) {
 }

 void* HybridMemory::TranslateAddress(size_t addr) {
-#if defined(__linux__) || defined(__ANDROID__)
+#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
    return fmaa.Translate(addr);
 #else
    return nullptr;
@ -308,7 +421,7 @@ ComputeBuffer HybridMemory::CreateComputeBuffer(VkDeviceSize size, VkBufferUsage
 }

 void HybridMemory::SaveSnapshot(const std::string& path) {
-#if defined(__linux__) || defined(__ANDROID__)
+#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
    fmaa.SaveSnapshot(path);
 #else
    LOG_ERROR(Render_Vulkan, "Memory snapshots not supported on this platform");
@ -316,7 +429,7 @@ void HybridMemory::SaveSnapshot(const std::string& path) {
 }

 void HybridMemory::SaveDifferentialSnapshot(const std::string& path) {
-#if defined(__linux__) || defined(__ANDROID__)
+#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
    fmaa.SaveDifferentialSnapshot(path);
 #else
    LOG_ERROR(Render_Vulkan, "Differential memory snapshots not supported on this platform");
@ -324,7 +437,7 @@ void HybridMemory::SaveDifferentialSnapshot(const std::string& path) {
 }

 void HybridMemory::ResetDirtyTracking() {
-#if defined(__linux__) || defined(__ANDROID__)
+#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
    fmaa.ClearDirtySet();
 #endif
 }
--- a/src/video_core/vulkan_common/hybrid_memory.h
+++ b/src/video_core/vulkan_common/hybrid_memory.h
@ -10,6 +10,9 @@
 #include <mutex>
 #include <atomic>
 #include <functional>
+#include <list>
+#include <set>
+#include <map>

 #include "common/common_types.h"
 #include "video_core/vulkan_common/vulkan_device.h"
@ -46,7 +49,7 @@ private:
    mutable std::mutex mutex;
 };

-#if defined(__linux__) || defined(__ANDROID__)
+#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
 class FaultManagedAllocator {
 public:
    static constexpr size_t PageSize = 0x1000;
@ -65,14 +68,26 @@ private:
    std::set<size_t> dirty_set;
    std::unordered_map<size_t, std::vector<u8>> compressed_store;
    std::mutex lock;
+
+#if defined(__linux__) || defined(__ANDROID__)
    int uffd = -1;
    std::atomic<bool> running{false};
    std::thread fault_handler;
+    void FaultThread();
+#elif defined(_WIN32)
+    void* base_address = nullptr;
+    size_t memory_size = 0;
+    HANDLE exception_port = nullptr;
+    std::atomic<bool> running{false};
+    std::thread exception_handler;
+    void ExceptionHandlerThread();
+    static LONG WINAPI VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info);
+    static FaultManagedAllocator* current_instance;
+#endif

    void Touch(size_t addr);
    void EnforceLimit();
    void* GetOrAlloc(size_t addr);
-    void FaultThread();
 };
 #endif

@ -95,7 +110,7 @@ private:
    MemoryAllocator& memory_allocator;
    PredictiveReuseManager reuse_manager;

-#if defined(__linux__) || defined(__ANDROID__)
+#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
    FaultManagedAllocator fmaa;
 #endif
 };