diff --git a/src/common/settings.h b/src/common/settings.h index 9177d9b2d..47ab9d131 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -198,9 +198,7 @@ struct Values { MemoryLayout::Memory_4Gb, MemoryLayout::Memory_12Gb, "memory_layout_mode", - Category::Core, - Specialization::Default, - false}; + Category::Core}; SwitchableSetting use_speed_limit{ linkage, true, "use_speed_limit", Category::Core, Specialization::Paired, false, true}; SwitchableSetting speed_limit{linkage, @@ -213,11 +211,11 @@ struct Values { true, true, &use_speed_limit}; - SwitchableSetting use_nce{linkage, true, "use_nce", Category::Core}; + SwitchableSetting use_nce{linkage, true, "Use Native Code Execution", Category::Core}; // Memory - SwitchableSetting use_gpu_memory_manager{linkage, false, "use_gpu_memory_manager", Category::Core}; - SwitchableSetting enable_memory_snapshots{linkage, false, "enable_memory_snapshots", Category::Core}; + SwitchableSetting use_gpu_memory_manager{linkage, false, "Use GPU Memory Manager", Category::Core}; + SwitchableSetting enable_memory_snapshots{linkage, false, "Enable Memory Snapshots", Category::Core}; // Cpu SwitchableSetting cpu_backend{linkage, diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index eafbbf324..a15de9539 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include @@ -136,15 +138,37 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, // Initialize HybridMemory system if (Settings::values.use_gpu_memory_manager.GetValue()) { -#if defined(__linux__) || defined(__ANDROID__) +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) try { - void* guest_memory_base = std::aligned_alloc(4096, 64 * 1024 * 1024); - if (guest_memory_base) { + // Define memory size with explicit types to avoid conversion warnings + constexpr size_t memory_size_mb = 64; + constexpr size_t memory_size_bytes = memory_size_mb * 1024 * 1024; + + void* guest_memory_base = nullptr; +#if defined(_WIN32) + // On Windows, use VirtualAlloc to reserve (but not commit) memory + const SIZE_T win_size = static_cast(memory_size_bytes); + LPVOID result = VirtualAlloc(nullptr, win_size, MEM_RESERVE, PAGE_NOACCESS); + if (result != nullptr) { + guest_memory_base = result; + } +#else + // On Linux/Android, use aligned_alloc + guest_memory_base = std::aligned_alloc(4096, memory_size_bytes); +#endif + if (guest_memory_base != nullptr) { try { - hybrid_memory->InitializeGuestMemory(guest_memory_base, 64 * 1024 * 1024); - LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", 64); - } catch (const std::exception& e) { + hybrid_memory->InitializeGuestMemory(guest_memory_base, memory_size_bytes); + LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", memory_size_mb); + } catch (const std::exception&) { +#if defined(_WIN32) + if (guest_memory_base != nullptr) { + const LPVOID win_ptr = static_cast(guest_memory_base); + VirtualFree(win_ptr, 0, MEM_RELEASE); + } +#else std::free(guest_memory_base); +#endif throw; } } @@ -168,10 +192,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, // Add paths to common shaders that should be preloaded // These will be compiled in parallel for faster startup - if (std::filesystem::exists(shader_dir)) { - try { + try { + if (std::filesystem::exists(shader_dir)) { for (const auto& entry : std::filesystem::directory_iterator(shader_dir)) { - if (entry.path().extension() == ".spv") { + if (entry.is_regular_file() && entry.path().extension() == ".spv") { common_shaders.push_back(entry.path().string()); } } @@ -180,11 +204,11 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, LOG_INFO(Render_Vulkan, "Preloading {} common shaders", common_shaders.size()); shader_manager.PreloadShaders(common_shaders); } - } catch (const std::exception& e) { - LOG_ERROR(Render_Vulkan, "Error during shader preloading: {}", e.what()); + } else { + LOG_INFO(Render_Vulkan, "Shader directory not found at {}", shader_dir); } - } else { - LOG_INFO(Render_Vulkan, "Shader directory not found at {}", shader_dir); + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Error during shader preloading: {}", e.what()); } } diff --git a/src/video_core/vulkan_common/hybrid_memory.cpp b/src/video_core/vulkan_common/hybrid_memory.cpp index bbc001b01..60c4ec345 100644 --- a/src/video_core/vulkan_common/hybrid_memory.cpp +++ b/src/video_core/vulkan_common/hybrid_memory.cpp @@ -15,6 +15,8 @@ #include #include #include +#elif defined(_WIN32) +#include #endif namespace Vulkan { @@ -72,8 +74,136 @@ void PredictiveReuseManager::ClearHistory() { current_timestamp = 0; } +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) +void FaultManagedAllocator::Touch(size_t addr) { + lru.remove(addr); + lru.push_front(addr); + dirty_set.insert(addr); +} + +void FaultManagedAllocator::EnforceLimit() { + while (lru.size() > MaxPages) { + size_t evict = lru.back(); + lru.pop_back(); + + auto it = page_map.find(evict); + if (it != page_map.end()) { + if (dirty_set.count(evict)) { + // Compress and store dirty page before evicting + std::vector compressed((u8*)it->second, (u8*)it->second + PageSize); + compressed_store[evict] = std::move(compressed); + dirty_set.erase(evict); + } + #if defined(__linux__) || defined(__ANDROID__) + munmap(it->second, PageSize); +#elif defined(_WIN32) + VirtualFree(it->second, 0, MEM_RELEASE); +#endif + page_map.erase(it); + } + } +} + +void* FaultManagedAllocator::GetOrAlloc(size_t addr) { + std::lock_guard guard(lock); + + if (page_map.count(addr)) { + Touch(addr); + return page_map[addr]; + } + +#if defined(__linux__) || defined(__ANDROID__) + void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (mem == MAP_FAILED) { + LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler"); + return nullptr; + } +#elif defined(_WIN32) + void* mem = VirtualAlloc(nullptr, PageSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (!mem) { + LOG_ERROR(Render_Vulkan, "Failed to VirtualAlloc memory for fault handler"); + return nullptr; + } +#endif + + if (compressed_store.count(addr)) { + // Decompress stored page data + std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size()); + compressed_store.erase(addr); + } else { + std::memset(mem, 0, PageSize); + } + + page_map[addr] = mem; + lru.push_front(addr); + dirty_set.insert(addr); + EnforceLimit(); + + return mem; +} + +#if defined(_WIN32) +// Static member initialization +FaultManagedAllocator* FaultManagedAllocator::current_instance = nullptr; + +LONG WINAPI FaultManagedAllocator::VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info) { + // Only handle access violations (page faults) + if (exception_info->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) { + return EXCEPTION_CONTINUE_SEARCH; + } + + if (!current_instance) { + return EXCEPTION_CONTINUE_SEARCH; + } + + // Get the faulting address - use ULONG_PTR for Windows + const ULONG_PTR fault_addr = static_cast(exception_info->ExceptionRecord->ExceptionInformation[1]); + const ULONG_PTR base_addr = reinterpret_cast(current_instance->base_address); + + // Check if the address is within our managed range + if (fault_addr < base_addr || + fault_addr >= (base_addr + static_cast(current_instance->memory_size))) { + return EXCEPTION_CONTINUE_SEARCH; + } + + // Calculate the base address of the page + const ULONG_PTR page_addr = fault_addr & ~(static_cast(PageSize) - 1); + const size_t relative_addr = static_cast(page_addr - base_addr); + + // Handle the fault by allocating memory + void* page = current_instance->GetOrAlloc(relative_addr); + if (!page) { + return EXCEPTION_CONTINUE_SEARCH; + } + + // Copy the page data to the faulting address + DWORD old_protect; + void* target_addr = reinterpret_cast(page_addr); + + // Make the target page writable + if (VirtualProtect(target_addr, PageSize, PAGE_READWRITE, &old_protect)) { + std::memcpy(target_addr, page, PageSize); + // Restore original protection + VirtualProtect(target_addr, PageSize, old_protect, &old_protect); + return EXCEPTION_CONTINUE_EXECUTION; + } + + return EXCEPTION_CONTINUE_SEARCH; +} + +void FaultManagedAllocator::ExceptionHandlerThread() { + while (running) { + // Sleep to avoid busy waiting + Sleep(10); + } +} +#endif + void FaultManagedAllocator::Initialize(void* base, size_t size) { +#if defined(__linux__) || defined(__ANDROID__) uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd < 0) { LOG_ERROR(Render_Vulkan, "Failed to create userfaultfd, fault handling disabled"); @@ -97,66 +227,28 @@ void FaultManagedAllocator::Initialize(void* base, size_t size) { running = true; fault_handler = std::thread(&FaultManagedAllocator::FaultThread, this); +#elif defined(_WIN32) + // Setup Windows memory for fault handling + base_address = base; + memory_size = size; + + // Reserve memory range but don't commit it yet - it will be demand-paged + DWORD oldProtect; + VirtualProtect(base, size, PAGE_NOACCESS, &oldProtect); + + // Install a vectored exception handler + current_instance = this; + AddVectoredExceptionHandler(1, VectoredExceptionHandler); + + running = true; + exception_handler = std::thread(&FaultManagedAllocator::ExceptionHandlerThread, this); + + LOG_INFO(Render_Vulkan, "Windows fault-managed memory initialized at {:p}, size: {}", + base, size); +#endif } -void FaultManagedAllocator::Touch(size_t addr) { - lru.remove(addr); - lru.push_front(addr); - dirty_set.insert(addr); -} - -void FaultManagedAllocator::EnforceLimit() { - while (lru.size() > MaxPages) { - size_t evict = lru.back(); - lru.pop_back(); - - auto it = page_map.find(evict); - if (it != page_map.end()) { - if (dirty_set.count(evict)) { - // Compress and store dirty page before evicting - std::vector compressed((u8*)it->second, (u8*)it->second + PageSize); - compressed_store[evict] = std::move(compressed); - dirty_set.erase(evict); - } - - munmap(it->second, PageSize); - page_map.erase(it); - } - } -} - -void* FaultManagedAllocator::GetOrAlloc(size_t addr) { - std::lock_guard guard(lock); - - if (page_map.count(addr)) { - Touch(addr); - return page_map[addr]; - } - - void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - - if (mem == MAP_FAILED) { - LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler"); - return nullptr; - } - - if (compressed_store.count(addr)) { - // Decompress stored page data - std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size()); - compressed_store.erase(addr); - } else { - std::memset(mem, 0, PageSize); - } - - page_map[addr] = mem; - lru.push_front(addr); - dirty_set.insert(addr); - EnforceLimit(); - - return mem; -} - +#if defined(__linux__) || defined(__ANDROID__) void FaultManagedAllocator::FaultThread() { struct pollfd pfd = { uffd, POLLIN, 0 }; @@ -183,6 +275,7 @@ void FaultManagedAllocator::FaultThread() { } } } +#endif void* FaultManagedAllocator::Translate(size_t addr) { std::lock_guard guard(lock); @@ -244,6 +337,7 @@ void FaultManagedAllocator::ClearDirtySet() { FaultManagedAllocator::~FaultManagedAllocator() { running = false; +#if defined(__linux__) || defined(__ANDROID__) if (fault_handler.joinable()) { fault_handler.join(); } @@ -255,8 +349,27 @@ FaultManagedAllocator::~FaultManagedAllocator() { if (uffd != -1) { close(uffd); } +#elif defined(_WIN32) + if (exception_handler.joinable()) { + exception_handler.join(); + } + + // Remove the vectored exception handler + RemoveVectoredExceptionHandler(VectoredExceptionHandler); + current_instance = nullptr; + + for (auto& [addr, mem] : page_map) { + VirtualFree(mem, 0, MEM_RELEASE); + } + + // Free the base memory if needed + if (base_address) { + VirtualFree(base_address, 0, MEM_RELEASE); + base_address = nullptr; + } +#endif } -#endif // defined(__linux__) || defined(__ANDROID__) +#endif // defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, size_t reuse_history) : device(device_), memory_allocator(allocator), reuse_manager(reuse_history) { @@ -265,7 +378,7 @@ HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, si HybridMemory::~HybridMemory() = default; void HybridMemory::InitializeGuestMemory(void* base, size_t size) { -#if defined(__linux__) || defined(__ANDROID__) +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) fmaa.Initialize(base, size); LOG_INFO(Render_Vulkan, "Initialized fault-managed guest memory at {:p}, size: {}", base, size); @@ -275,7 +388,7 @@ void HybridMemory::InitializeGuestMemory(void* base, size_t size) { } void* HybridMemory::TranslateAddress(size_t addr) { -#if defined(__linux__) || defined(__ANDROID__) +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) return fmaa.Translate(addr); #else return nullptr; @@ -308,7 +421,7 @@ ComputeBuffer HybridMemory::CreateComputeBuffer(VkDeviceSize size, VkBufferUsage } void HybridMemory::SaveSnapshot(const std::string& path) { -#if defined(__linux__) || defined(__ANDROID__) +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) fmaa.SaveSnapshot(path); #else LOG_ERROR(Render_Vulkan, "Memory snapshots not supported on this platform"); @@ -316,7 +429,7 @@ void HybridMemory::SaveSnapshot(const std::string& path) { } void HybridMemory::SaveDifferentialSnapshot(const std::string& path) { -#if defined(__linux__) || defined(__ANDROID__) +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) fmaa.SaveDifferentialSnapshot(path); #else LOG_ERROR(Render_Vulkan, "Differential memory snapshots not supported on this platform"); @@ -324,7 +437,7 @@ void HybridMemory::SaveDifferentialSnapshot(const std::string& path) { } void HybridMemory::ResetDirtyTracking() { -#if defined(__linux__) || defined(__ANDROID__) +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) fmaa.ClearDirtySet(); #endif } diff --git a/src/video_core/vulkan_common/hybrid_memory.h b/src/video_core/vulkan_common/hybrid_memory.h index c258d4f21..182c1c589 100644 --- a/src/video_core/vulkan_common/hybrid_memory.h +++ b/src/video_core/vulkan_common/hybrid_memory.h @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_device.h" @@ -46,7 +49,7 @@ private: mutable std::mutex mutex; }; -#if defined(__linux__) || defined(__ANDROID__) +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) class FaultManagedAllocator { public: static constexpr size_t PageSize = 0x1000; @@ -65,14 +68,26 @@ private: std::set dirty_set; std::unordered_map> compressed_store; std::mutex lock; + +#if defined(__linux__) || defined(__ANDROID__) int uffd = -1; std::atomic running{false}; std::thread fault_handler; + void FaultThread(); +#elif defined(_WIN32) + void* base_address = nullptr; + size_t memory_size = 0; + HANDLE exception_port = nullptr; + std::atomic running{false}; + std::thread exception_handler; + void ExceptionHandlerThread(); + static LONG WINAPI VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info); + static FaultManagedAllocator* current_instance; +#endif void Touch(size_t addr); void EnforceLimit(); void* GetOrAlloc(size_t addr); - void FaultThread(); }; #endif @@ -95,7 +110,7 @@ private: MemoryAllocator& memory_allocator; PredictiveReuseManager reuse_manager; -#if defined(__linux__) || defined(__ANDROID__) +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) FaultManagedAllocator fmaa; #endif };