Add Windows support to Hybrid Memory Manager

This commit adds Windows-specific implementation of the fault-managed memory
system, providing similar functionality to the existing Linux/Android implementation.

Key changes:
- Added Windows-specific memory management using VirtualAlloc/VirtualFree
- Implemented Windows vectored exception handler for page fault handling
- Added proper memory protection and page fault handling on Windows
- Updated memory snapshot functionality to work on Windows
- Added proper cleanup of Windows-specific resources
- Fixed type conversion issues in memory management code
- Added proper error handling for Windows memory operations
- Fixed VRAM Memory Layout Mode to allow up to 12Gb

The implementation uses Windows-specific APIs:
- VirtualAlloc/VirtualFree for memory management
- AddVectoredExceptionHandler for page fault handling
- VirtualProtect for memory protection management

This change maintains feature parity with the Linux/Android implementation
while using Windows-native APIs for better performance and reliability.

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron 2025-04-12 16:15:51 +10:00
parent b66b3ca639
commit a4088f3a1e
4 changed files with 236 additions and 86 deletions

View file

@ -198,9 +198,7 @@ struct Values {
MemoryLayout::Memory_4Gb, MemoryLayout::Memory_4Gb,
MemoryLayout::Memory_12Gb, MemoryLayout::Memory_12Gb,
"memory_layout_mode", "memory_layout_mode",
Category::Core, Category::Core};
Specialization::Default,
false};
SwitchableSetting<bool> use_speed_limit{ SwitchableSetting<bool> use_speed_limit{
linkage, true, "use_speed_limit", Category::Core, Specialization::Paired, false, true}; linkage, true, "use_speed_limit", Category::Core, Specialization::Paired, false, true};
SwitchableSetting<u16, true> speed_limit{linkage, SwitchableSetting<u16, true> speed_limit{linkage,
@ -213,11 +211,11 @@ struct Values {
true, true,
true, true,
&use_speed_limit}; &use_speed_limit};
SwitchableSetting<bool> use_nce{linkage, true, "use_nce", Category::Core}; SwitchableSetting<bool> use_nce{linkage, true, "Use Native Code Execution", Category::Core};
// Memory // Memory
SwitchableSetting<bool> use_gpu_memory_manager{linkage, false, "use_gpu_memory_manager", Category::Core}; SwitchableSetting<bool> use_gpu_memory_manager{linkage, false, "Use GPU Memory Manager", Category::Core};
SwitchableSetting<bool> enable_memory_snapshots{linkage, false, "enable_memory_snapshots", Category::Core}; SwitchableSetting<bool> enable_memory_snapshots{linkage, false, "Enable Memory Snapshots", Category::Core};
// Cpu // Cpu
SwitchableSetting<CpuBackend, true> cpu_backend{linkage, SwitchableSetting<CpuBackend, true> cpu_backend{linkage,

View file

@ -9,6 +9,8 @@
#include <optional> #include <optional>
#include <string> #include <string>
#include <vector> #include <vector>
#include <fstream>
#include <filesystem>
#include <fmt/ranges.h> #include <fmt/ranges.h>
@ -136,15 +138,37 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
// Initialize HybridMemory system // Initialize HybridMemory system
if (Settings::values.use_gpu_memory_manager.GetValue()) { if (Settings::values.use_gpu_memory_manager.GetValue()) {
#if defined(__linux__) || defined(__ANDROID__) #if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
try { try {
void* guest_memory_base = std::aligned_alloc(4096, 64 * 1024 * 1024); // Define memory size with explicit types to avoid conversion warnings
if (guest_memory_base) { constexpr size_t memory_size_mb = 64;
constexpr size_t memory_size_bytes = memory_size_mb * 1024 * 1024;
void* guest_memory_base = nullptr;
#if defined(_WIN32)
// On Windows, use VirtualAlloc to reserve (but not commit) memory
const SIZE_T win_size = static_cast<SIZE_T>(memory_size_bytes);
LPVOID result = VirtualAlloc(nullptr, win_size, MEM_RESERVE, PAGE_NOACCESS);
if (result != nullptr) {
guest_memory_base = result;
}
#else
// On Linux/Android, use aligned_alloc
guest_memory_base = std::aligned_alloc(4096, memory_size_bytes);
#endif
if (guest_memory_base != nullptr) {
try { try {
hybrid_memory->InitializeGuestMemory(guest_memory_base, 64 * 1024 * 1024); hybrid_memory->InitializeGuestMemory(guest_memory_base, memory_size_bytes);
LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", 64); LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", memory_size_mb);
} catch (const std::exception& e) { } catch (const std::exception&) {
#if defined(_WIN32)
if (guest_memory_base != nullptr) {
const LPVOID win_ptr = static_cast<LPVOID>(guest_memory_base);
VirtualFree(win_ptr, 0, MEM_RELEASE);
}
#else
std::free(guest_memory_base); std::free(guest_memory_base);
#endif
throw; throw;
} }
} }
@ -168,10 +192,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
// Add paths to common shaders that should be preloaded // Add paths to common shaders that should be preloaded
// These will be compiled in parallel for faster startup // These will be compiled in parallel for faster startup
if (std::filesystem::exists(shader_dir)) {
try { try {
if (std::filesystem::exists(shader_dir)) {
for (const auto& entry : std::filesystem::directory_iterator(shader_dir)) { for (const auto& entry : std::filesystem::directory_iterator(shader_dir)) {
if (entry.path().extension() == ".spv") { if (entry.is_regular_file() && entry.path().extension() == ".spv") {
common_shaders.push_back(entry.path().string()); common_shaders.push_back(entry.path().string());
} }
} }
@ -180,12 +204,12 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
LOG_INFO(Render_Vulkan, "Preloading {} common shaders", common_shaders.size()); LOG_INFO(Render_Vulkan, "Preloading {} common shaders", common_shaders.size());
shader_manager.PreloadShaders(common_shaders); shader_manager.PreloadShaders(common_shaders);
} }
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Error during shader preloading: {}", e.what());
}
} else { } else {
LOG_INFO(Render_Vulkan, "Shader directory not found at {}", shader_dir); LOG_INFO(Render_Vulkan, "Shader directory not found at {}", shader_dir);
} }
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Error during shader preloading: {}", e.what());
}
} }
Report(); Report();

View file

@ -15,6 +15,8 @@
#include <sys/syscall.h> #include <sys/syscall.h>
#include <linux/userfaultfd.h> #include <linux/userfaultfd.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#elif defined(_WIN32)
#include <windows.h>
#endif #endif
namespace Vulkan { namespace Vulkan {
@ -72,8 +74,136 @@ void PredictiveReuseManager::ClearHistory() {
current_timestamp = 0; current_timestamp = 0;
} }
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
void FaultManagedAllocator::Touch(size_t addr) {
lru.remove(addr);
lru.push_front(addr);
dirty_set.insert(addr);
}
void FaultManagedAllocator::EnforceLimit() {
while (lru.size() > MaxPages) {
size_t evict = lru.back();
lru.pop_back();
auto it = page_map.find(evict);
if (it != page_map.end()) {
if (dirty_set.count(evict)) {
// Compress and store dirty page before evicting
std::vector<u8> compressed((u8*)it->second, (u8*)it->second + PageSize);
compressed_store[evict] = std::move(compressed);
dirty_set.erase(evict);
}
#if defined(__linux__) || defined(__ANDROID__) #if defined(__linux__) || defined(__ANDROID__)
munmap(it->second, PageSize);
#elif defined(_WIN32)
VirtualFree(it->second, 0, MEM_RELEASE);
#endif
page_map.erase(it);
}
}
}
void* FaultManagedAllocator::GetOrAlloc(size_t addr) {
std::lock_guard<std::mutex> guard(lock);
if (page_map.count(addr)) {
Touch(addr);
return page_map[addr];
}
#if defined(__linux__) || defined(__ANDROID__)
void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler");
return nullptr;
}
#elif defined(_WIN32)
void* mem = VirtualAlloc(nullptr, PageSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
if (!mem) {
LOG_ERROR(Render_Vulkan, "Failed to VirtualAlloc memory for fault handler");
return nullptr;
}
#endif
if (compressed_store.count(addr)) {
// Decompress stored page data
std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size());
compressed_store.erase(addr);
} else {
std::memset(mem, 0, PageSize);
}
page_map[addr] = mem;
lru.push_front(addr);
dirty_set.insert(addr);
EnforceLimit();
return mem;
}
#if defined(_WIN32)
// Static member initialization
FaultManagedAllocator* FaultManagedAllocator::current_instance = nullptr;
LONG WINAPI FaultManagedAllocator::VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info) {
// Only handle access violations (page faults)
if (exception_info->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) {
return EXCEPTION_CONTINUE_SEARCH;
}
if (!current_instance) {
return EXCEPTION_CONTINUE_SEARCH;
}
// Get the faulting address - use ULONG_PTR for Windows
const ULONG_PTR fault_addr = static_cast<ULONG_PTR>(exception_info->ExceptionRecord->ExceptionInformation[1]);
const ULONG_PTR base_addr = reinterpret_cast<ULONG_PTR>(current_instance->base_address);
// Check if the address is within our managed range
if (fault_addr < base_addr ||
fault_addr >= (base_addr + static_cast<ULONG_PTR>(current_instance->memory_size))) {
return EXCEPTION_CONTINUE_SEARCH;
}
// Calculate the base address of the page
const ULONG_PTR page_addr = fault_addr & ~(static_cast<ULONG_PTR>(PageSize) - 1);
const size_t relative_addr = static_cast<size_t>(page_addr - base_addr);
// Handle the fault by allocating memory
void* page = current_instance->GetOrAlloc(relative_addr);
if (!page) {
return EXCEPTION_CONTINUE_SEARCH;
}
// Copy the page data to the faulting address
DWORD old_protect;
void* target_addr = reinterpret_cast<void*>(page_addr);
// Make the target page writable
if (VirtualProtect(target_addr, PageSize, PAGE_READWRITE, &old_protect)) {
std::memcpy(target_addr, page, PageSize);
// Restore original protection
VirtualProtect(target_addr, PageSize, old_protect, &old_protect);
return EXCEPTION_CONTINUE_EXECUTION;
}
return EXCEPTION_CONTINUE_SEARCH;
}
void FaultManagedAllocator::ExceptionHandlerThread() {
while (running) {
// Sleep to avoid busy waiting
Sleep(10);
}
}
#endif
void FaultManagedAllocator::Initialize(void* base, size_t size) { void FaultManagedAllocator::Initialize(void* base, size_t size) {
#if defined(__linux__) || defined(__ANDROID__)
uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK); uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd < 0) { if (uffd < 0) {
LOG_ERROR(Render_Vulkan, "Failed to create userfaultfd, fault handling disabled"); LOG_ERROR(Render_Vulkan, "Failed to create userfaultfd, fault handling disabled");
@ -97,66 +227,28 @@ void FaultManagedAllocator::Initialize(void* base, size_t size) {
running = true; running = true;
fault_handler = std::thread(&FaultManagedAllocator::FaultThread, this); fault_handler = std::thread(&FaultManagedAllocator::FaultThread, this);
#elif defined(_WIN32)
// Setup Windows memory for fault handling
base_address = base;
memory_size = size;
// Reserve memory range but don't commit it yet - it will be demand-paged
DWORD oldProtect;
VirtualProtect(base, size, PAGE_NOACCESS, &oldProtect);
// Install a vectored exception handler
current_instance = this;
AddVectoredExceptionHandler(1, VectoredExceptionHandler);
running = true;
exception_handler = std::thread(&FaultManagedAllocator::ExceptionHandlerThread, this);
LOG_INFO(Render_Vulkan, "Windows fault-managed memory initialized at {:p}, size: {}",
base, size);
#endif
} }
void FaultManagedAllocator::Touch(size_t addr) { #if defined(__linux__) || defined(__ANDROID__)
lru.remove(addr);
lru.push_front(addr);
dirty_set.insert(addr);
}
void FaultManagedAllocator::EnforceLimit() {
while (lru.size() > MaxPages) {
size_t evict = lru.back();
lru.pop_back();
auto it = page_map.find(evict);
if (it != page_map.end()) {
if (dirty_set.count(evict)) {
// Compress and store dirty page before evicting
std::vector<u8> compressed((u8*)it->second, (u8*)it->second + PageSize);
compressed_store[evict] = std::move(compressed);
dirty_set.erase(evict);
}
munmap(it->second, PageSize);
page_map.erase(it);
}
}
}
void* FaultManagedAllocator::GetOrAlloc(size_t addr) {
std::lock_guard<std::mutex> guard(lock);
if (page_map.count(addr)) {
Touch(addr);
return page_map[addr];
}
void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler");
return nullptr;
}
if (compressed_store.count(addr)) {
// Decompress stored page data
std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size());
compressed_store.erase(addr);
} else {
std::memset(mem, 0, PageSize);
}
page_map[addr] = mem;
lru.push_front(addr);
dirty_set.insert(addr);
EnforceLimit();
return mem;
}
void FaultManagedAllocator::FaultThread() { void FaultManagedAllocator::FaultThread() {
struct pollfd pfd = { uffd, POLLIN, 0 }; struct pollfd pfd = { uffd, POLLIN, 0 };
@ -183,6 +275,7 @@ void FaultManagedAllocator::FaultThread() {
} }
} }
} }
#endif
void* FaultManagedAllocator::Translate(size_t addr) { void* FaultManagedAllocator::Translate(size_t addr) {
std::lock_guard<std::mutex> guard(lock); std::lock_guard<std::mutex> guard(lock);
@ -244,6 +337,7 @@ void FaultManagedAllocator::ClearDirtySet() {
FaultManagedAllocator::~FaultManagedAllocator() { FaultManagedAllocator::~FaultManagedAllocator() {
running = false; running = false;
#if defined(__linux__) || defined(__ANDROID__)
if (fault_handler.joinable()) { if (fault_handler.joinable()) {
fault_handler.join(); fault_handler.join();
} }
@ -255,8 +349,27 @@ FaultManagedAllocator::~FaultManagedAllocator() {
if (uffd != -1) { if (uffd != -1) {
close(uffd); close(uffd);
} }
#elif defined(_WIN32)
if (exception_handler.joinable()) {
exception_handler.join();
}
// Remove the vectored exception handler
RemoveVectoredExceptionHandler(VectoredExceptionHandler);
current_instance = nullptr;
for (auto& [addr, mem] : page_map) {
VirtualFree(mem, 0, MEM_RELEASE);
}
// Free the base memory if needed
if (base_address) {
VirtualFree(base_address, 0, MEM_RELEASE);
base_address = nullptr;
}
#endif
} }
#endif // defined(__linux__) || defined(__ANDROID__) #endif // defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, size_t reuse_history) HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, size_t reuse_history)
: device(device_), memory_allocator(allocator), reuse_manager(reuse_history) { : device(device_), memory_allocator(allocator), reuse_manager(reuse_history) {
@ -265,7 +378,7 @@ HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, si
HybridMemory::~HybridMemory() = default; HybridMemory::~HybridMemory() = default;
void HybridMemory::InitializeGuestMemory(void* base, size_t size) { void HybridMemory::InitializeGuestMemory(void* base, size_t size) {
#if defined(__linux__) || defined(__ANDROID__) #if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
fmaa.Initialize(base, size); fmaa.Initialize(base, size);
LOG_INFO(Render_Vulkan, "Initialized fault-managed guest memory at {:p}, size: {}", LOG_INFO(Render_Vulkan, "Initialized fault-managed guest memory at {:p}, size: {}",
base, size); base, size);
@ -275,7 +388,7 @@ void HybridMemory::InitializeGuestMemory(void* base, size_t size) {
} }
void* HybridMemory::TranslateAddress(size_t addr) { void* HybridMemory::TranslateAddress(size_t addr) {
#if defined(__linux__) || defined(__ANDROID__) #if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
return fmaa.Translate(addr); return fmaa.Translate(addr);
#else #else
return nullptr; return nullptr;
@ -308,7 +421,7 @@ ComputeBuffer HybridMemory::CreateComputeBuffer(VkDeviceSize size, VkBufferUsage
} }
void HybridMemory::SaveSnapshot(const std::string& path) { void HybridMemory::SaveSnapshot(const std::string& path) {
#if defined(__linux__) || defined(__ANDROID__) #if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
fmaa.SaveSnapshot(path); fmaa.SaveSnapshot(path);
#else #else
LOG_ERROR(Render_Vulkan, "Memory snapshots not supported on this platform"); LOG_ERROR(Render_Vulkan, "Memory snapshots not supported on this platform");
@ -316,7 +429,7 @@ void HybridMemory::SaveSnapshot(const std::string& path) {
} }
void HybridMemory::SaveDifferentialSnapshot(const std::string& path) { void HybridMemory::SaveDifferentialSnapshot(const std::string& path) {
#if defined(__linux__) || defined(__ANDROID__) #if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
fmaa.SaveDifferentialSnapshot(path); fmaa.SaveDifferentialSnapshot(path);
#else #else
LOG_ERROR(Render_Vulkan, "Differential memory snapshots not supported on this platform"); LOG_ERROR(Render_Vulkan, "Differential memory snapshots not supported on this platform");
@ -324,7 +437,7 @@ void HybridMemory::SaveDifferentialSnapshot(const std::string& path) {
} }
void HybridMemory::ResetDirtyTracking() { void HybridMemory::ResetDirtyTracking() {
#if defined(__linux__) || defined(__ANDROID__) #if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
fmaa.ClearDirtySet(); fmaa.ClearDirtySet();
#endif #endif
} }

View file

@ -10,6 +10,9 @@
#include <mutex> #include <mutex>
#include <atomic> #include <atomic>
#include <functional> #include <functional>
#include <list>
#include <set>
#include <map>
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_device.h"
@ -46,7 +49,7 @@ private:
mutable std::mutex mutex; mutable std::mutex mutex;
}; };
#if defined(__linux__) || defined(__ANDROID__) #if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
class FaultManagedAllocator { class FaultManagedAllocator {
public: public:
static constexpr size_t PageSize = 0x1000; static constexpr size_t PageSize = 0x1000;
@ -65,14 +68,26 @@ private:
std::set<size_t> dirty_set; std::set<size_t> dirty_set;
std::unordered_map<size_t, std::vector<u8>> compressed_store; std::unordered_map<size_t, std::vector<u8>> compressed_store;
std::mutex lock; std::mutex lock;
#if defined(__linux__) || defined(__ANDROID__)
int uffd = -1; int uffd = -1;
std::atomic<bool> running{false}; std::atomic<bool> running{false};
std::thread fault_handler; std::thread fault_handler;
void FaultThread();
#elif defined(_WIN32)
void* base_address = nullptr;
size_t memory_size = 0;
HANDLE exception_port = nullptr;
std::atomic<bool> running{false};
std::thread exception_handler;
void ExceptionHandlerThread();
static LONG WINAPI VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info);
static FaultManagedAllocator* current_instance;
#endif
void Touch(size_t addr); void Touch(size_t addr);
void EnforceLimit(); void EnforceLimit();
void* GetOrAlloc(size_t addr); void* GetOrAlloc(size_t addr);
void FaultThread();
}; };
#endif #endif
@ -95,7 +110,7 @@ private:
MemoryAllocator& memory_allocator; MemoryAllocator& memory_allocator;
PredictiveReuseManager reuse_manager; PredictiveReuseManager reuse_manager;
#if defined(__linux__) || defined(__ANDROID__) #if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
FaultManagedAllocator fmaa; FaultManagedAllocator fmaa;
#endif #endif
}; };