From 03aab9becc2326e11ed3b0f04fea4144fd549f8a Mon Sep 17 00:00:00 2001 From: Zephyron Date: Sat, 3 May 2025 17:37:26 +1000 Subject: [PATCH] revert: video_core changes reverted due to instability Reverted video_core changes as they were causing instability and OpenGL had no video output. The following commits were reverted: 1fd5fefcb17fe7fe65faed1c991fb41db782ab0f edfb500ee73d402dcb4f3db492f4eeb3343cca05 b25c7653e64780775893bcd621ee241c30f51451 5d952717ff381808f9dccabd7bc65bb19e7686f2 964bbf489a8dd915d1ac05c88ae9ef8364fbdc36 a4088f3a1ecc2965d852694d8ee87e6849bea01c 18def48dfef9e11f85f54d8c5bcb575ab37bcc35 3205c9b691a121459275cebed371a5e7957849c6 2f57a35d2da89f98a5847da570365939957384b8 f706427815e08fd2fb4b0b49d9f5f9a76a1222b2 fc88c06769ea718a97da9866fa5b836be1fdd923 Signed-off-by: Zephyron --- .ci/scripts/clang/docker.sh | 6 +- .ci/scripts/transifex/docker.sh | 2 +- .ci/scripts/windows/docker.sh | 6 +- src/citron/configuration/qt_config.cpp | 10 - src/citron/configuration/shared_widget.cpp | 11 - src/common/settings.h | 30 +- src/video_core/CMakeLists.txt | 9 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 87 +- .../renderer_opengl/gl_graphics_pipeline.h | 3 +- .../renderer_opengl/gl_shader_cache.cpp | 145 +--- .../renderer_opengl/present/layer.cpp | 2 +- .../renderer_opengl/renderer_opengl.cpp | 2 +- src/video_core/renderer_vulkan/blit_image.cpp | 165 +--- src/video_core/renderer_vulkan/blit_image.h | 30 - .../renderer_vulkan/renderer_vulkan.cpp | 191 ----- .../renderer_vulkan/renderer_vulkan.h | 19 - .../renderer_vulkan/vk_compute_pipeline.cpp | 37 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 42 +- .../renderer_vulkan/vk_graphics_pipeline.h | 32 - .../renderer_vulkan/vk_pipeline_cache.cpp | 302 ++----- .../renderer_vulkan/vk_pipeline_cache.h | 5 - .../renderer_vulkan/vk_shader_util.cpp | 751 ------------------ .../renderer_vulkan/vk_shader_util.h | 74 -- .../renderer_vulkan/vk_texture_cache.cpp | 434 ++-------- .../renderer_vulkan/vk_texture_cache.h | 33 - .../renderer_vulkan/vk_texture_manager.cpp | 146 ---- .../renderer_vulkan/vk_texture_manager.h | 57 -- .../vulkan_common/hybrid_memory.cpp | 446 ----------- src/video_core/vulkan_common/hybrid_memory.h | 119 --- 29 files changed, 180 insertions(+), 3016 deletions(-) delete mode 100644 src/video_core/renderer_vulkan/vk_texture_manager.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_texture_manager.h delete mode 100644 src/video_core/vulkan_common/hybrid_memory.cpp delete mode 100644 src/video_core/vulkan_common/hybrid_memory.h diff --git a/.ci/scripts/clang/docker.sh b/.ci/scripts/clang/docker.sh index 470ace51b..4cefc3448 100755 --- a/.ci/scripts/clang/docker.sh +++ b/.ci/scripts/clang/docker.sh @@ -19,9 +19,9 @@ cmake .. \ -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON \ -DENABLE_QT_TRANSLATION=ON \ -DUSE_DISCORD_PRESENCE=ON \ - -DYUZU_CRASH_DUMPS=ON \ - -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} \ - -DYUZU_USE_BUNDLED_FFMPEG=ON \ + -DCITRON_CRASH_DUMPS=ON \ + -DCITRON_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} \ + -DCITRON_USE_BUNDLED_FFMPEG=ON \ -GNinja ninja diff --git a/.ci/scripts/transifex/docker.sh b/.ci/scripts/transifex/docker.sh index 96aded564..6cd8f3eba 100755 --- a/.ci/scripts/transifex/docker.sh +++ b/.ci/scripts/transifex/docker.sh @@ -11,7 +11,7 @@ gcc -v tx --version mkdir build && cd build -cmake .. -DENABLE_QT_TRANSLATION=ON -DGENERATE_QT_TRANSLATION=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_SDL2=OFF -DYUZU_TESTS=OFF -DYUZU_USE_BUNDLED_VCPKG=ON +cmake .. -DENABLE_QT_TRANSLATION=ON -DGENERATE_QT_TRANSLATION=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_SDL2=OFF -DCITRON_TESTS=OFF -DCITRON_USE_BUNDLED_VCPKG=ON make translation cd .. diff --git a/.ci/scripts/windows/docker.sh b/.ci/scripts/windows/docker.sh index 820e01d22..71fb13043 100755 --- a/.ci/scripts/windows/docker.sh +++ b/.ci/scripts/windows/docker.sh @@ -17,9 +17,9 @@ cmake .. \ -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON \ -DENABLE_QT_TRANSLATION=ON \ -DUSE_CCACHE=ON \ - -DYUZU_USE_BUNDLED_SDL2=OFF \ - -DYUZU_USE_EXTERNAL_SDL2=OFF \ - -DYUZU_TESTS=OFF \ + -DCITRON_USE_BUNDLED_SDL2=OFF \ + -DCITRON_USE_EXTERNAL_SDL2=OFF \ + -DCITRON_TESTS=OFF \ -GNinja ninja citron citron-cmd diff --git a/src/citron/configuration/qt_config.cpp b/src/citron/configuration/qt_config.cpp index 60bc04300..37951b9c8 100644 --- a/src/citron/configuration/qt_config.cpp +++ b/src/citron/configuration/qt_config.cpp @@ -75,11 +75,6 @@ void QtConfig::ReadQtValues() { ReadUIValues(); } ReadQtControlValues(); - - // Always disable memory snapshots and hybrid memory - Settings::values.use_gpu_memory_manager.SetValue(false); - Settings::values.enable_memory_snapshots.SetValue(false); - Settings::values.use_nce.SetValue(false); } void QtConfig::ReadQtPlayerValues(const std::size_t player_index) { @@ -341,11 +336,6 @@ void QtConfig::SaveQtValues() { } SaveQtControlValues(); - // Ensure memory snapshots and hybrid memory are always disabled - Settings::values.use_gpu_memory_manager.SetValue(false); - Settings::values.enable_memory_snapshots.SetValue(false); - Settings::values.use_nce.SetValue(false); - WriteToIni(); } diff --git a/src/citron/configuration/shared_widget.cpp b/src/citron/configuration/shared_widget.cpp index e6eb22878..459dc3feb 100644 --- a/src/citron/configuration/shared_widget.cpp +++ b/src/citron/configuration/shared_widget.cpp @@ -760,17 +760,6 @@ Widget::Widget(Settings::BasicSetting* setting_, const TranslationMap& translati enable &= setting.UsingGlobal(); } - // Disable memory snapshot and hybrid memory checkboxes - if (static_cast(id) == Settings::values.use_gpu_memory_manager.Id() || - static_cast(id) == Settings::values.enable_memory_snapshots.Id() || - static_cast(id) == Settings::values.use_nce.Id()) { - enable = false; - // Also disable the checkbox to prevent it from being changed - if (checkbox) { - checkbox->setEnabled(false); - } - } - this->setEnabled(enable); this->setToolTip(tooltip); diff --git a/src/common/settings.h b/src/common/settings.h index 929488e53..71988bef9 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -212,11 +211,6 @@ struct Values { true, true, &use_speed_limit}; - SwitchableSetting use_nce{linkage, true, "Use Native Code Execution", Category::Core}; - - // Memory - SwitchableSetting use_gpu_memory_manager{linkage, false, "Use GPU Memory Manager", Category::Core}; - SwitchableSetting enable_memory_snapshots{linkage, false, "Enable Memory Snapshots", Category::Core}; // Cpu SwitchableSetting cpu_backend{linkage, @@ -401,11 +395,11 @@ struct Values { Category::RendererAdvanced}; SwitchableSetting async_presentation{linkage, #ifdef ANDROID - false, // Disabled due to instability causing crashes + true, #else - false, // Disabled due to instability causing crashes + false, #endif - "async_presentation", Category::RendererAdvanced}; // Hidden from UI due to instability + "async_presentation", Category::RendererAdvanced}; SwitchableSetting renderer_force_max_clock{linkage, false, "force_max_clock", Category::RendererAdvanced}; SwitchableSetting use_reactive_flushing{linkage, @@ -618,30 +612,24 @@ struct Values { Category::Network}; // WebService - Setting enable_telemetry{linkage, true, "enable_telemetry", Category::WebService}; - Setting web_api_url{linkage, "api.ynet-fun.xyz", "web_api_url", + Setting enable_telemetry{linkage, false, "enable_telemetry", Category::WebService}; + Setting web_api_url{linkage, "https://api.ynet-fun.xyz", "web_api_url", Category::WebService}; Setting citron_username{linkage, std::string(), "citron_username", Category::WebService}; Setting citron_token{linkage, std::string(), "citron_token", Category::WebService}; + // Memory + Setting use_gpu_memory_manager{linkage, false, "use_gpu_memory_manager", Category::System}; + Setting enable_memory_snapshots{linkage, false, "enable_memory_snapshots", Category::System}; + // Add-Ons std::map> disabled_addons; - - // Renderer Advanced Settings - SwitchableSetting use_enhanced_shader_building{linkage, false, "Enhanced Shader Building", - Category::RendererAdvanced}; - - // Add a new setting for shader compilation priority - SwitchableSetting shader_compilation_priority{linkage, 0, "Shader Compilation Priority", - Category::RendererAdvanced}; }; extern Values values; void UpdateGPUAccuracy(); -// boold isGPULevelNormal(); -// TODO: ZEP bool IsGPULevelExtreme(); bool IsGPULevelHigh(); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2059c3aa6..70592b662 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,5 +1,4 @@ # SPDX-FileCopyrightText: 2018 yuzu Emulator Project -# SPDX-FileCopyrightText: 2025 Citron Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later add_subdirectory(host_shaders) @@ -248,8 +247,6 @@ add_library(video_core STATIC renderer_vulkan/vk_turbo_mode.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h - renderer_vulkan/vk_texture_manager.cpp - renderer_vulkan/vk_texture_manager.h shader_cache.cpp shader_cache.h shader_environment.cpp @@ -309,8 +306,6 @@ add_library(video_core STATIC vulkan_common/vulkan_library.h vulkan_common/vulkan_memory_allocator.cpp vulkan_common/vulkan_memory_allocator.h - vulkan_common/hybrid_memory.cpp - vulkan_common/hybrid_memory.h vulkan_common/vulkan_surface.cpp vulkan_common/vulkan_surface.h vulkan_common/vulkan_wrapper.cpp @@ -397,4 +392,8 @@ if (ANDROID AND ARCHITECTURE_arm64) target_link_libraries(video_core PRIVATE adrenotools) endif() +if (ARCHITECTURE_arm64) + target_link_libraries(video_core PRIVATE sse2neon) +endif() + create_target_directory_groups(video_core) diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index e25f731fe..af0a453ee 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -1,13 +1,10 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include #include #include -#include -#include #include "common/settings.h" // for enum class Settings::ShaderBackend #include "common/thread_worker.h" @@ -237,68 +234,26 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c auto func{[this, sources_ = std::move(sources), sources_spirv_ = std::move(sources_spirv), shader_notify, backend, in_parallel, force_context_flush](ShaderContext::Context*) mutable { - // Track time for shader compilation for possible performance tuning - const auto start_time = std::chrono::high_resolution_clock::now(); - - // Prepare compilation steps for all shader stages - std::vector> compilation_steps; - compilation_steps.reserve(5); // Maximum number of shader stages - - // Prepare all compilation steps first to better distribute work for (size_t stage = 0; stage < 5; ++stage) { switch (backend) { case Settings::ShaderBackend::Glsl: if (!sources_[stage].empty()) { - compilation_steps.emplace_back([this, stage, source = sources_[stage]]() { - source_programs[stage] = CreateProgram(source, Stage(stage)); - }); + source_programs[stage] = CreateProgram(sources_[stage], Stage(stage)); } break; case Settings::ShaderBackend::Glasm: if (!sources_[stage].empty()) { - compilation_steps.emplace_back([this, stage, source = sources_[stage]]() { - assembly_programs[stage] = CompileProgram(source, AssemblyStage(stage)); - }); + assembly_programs[stage] = + CompileProgram(sources_[stage], AssemblyStage(stage)); } break; case Settings::ShaderBackend::SpirV: if (!sources_spirv_[stage].empty()) { - compilation_steps.emplace_back([this, stage, source = sources_spirv_[stage]]() { - source_programs[stage] = CreateProgram(source, Stage(stage)); - }); + source_programs[stage] = CreateProgram(sources_spirv_[stage], Stage(stage)); } break; } } - - // If we're running in parallel, use high-priority execution for vertex and fragment shaders - // as these are typically needed first by the renderer - if (in_parallel && compilation_steps.size() > 1) { - // Execute vertex (0) and fragment (4) shaders first if they exist - for (size_t priority_stage : {0, 4}) { - for (size_t i = 0; i < compilation_steps.size(); ++i) { - if ((i == priority_stage || (priority_stage == 0 && i <= 1)) && i < compilation_steps.size()) { - compilation_steps[i](); - compilation_steps[i] = [](){}; // Mark as executed - } - } - } - } - - // Execute all remaining compilation steps - for (auto& step : compilation_steps) { - step(); // Will do nothing for already executed steps - } - - // Performance measurement for possible logging or optimization - const auto end_time = std::chrono::high_resolution_clock::now(); - const auto compilation_time = std::chrono::duration_cast( - end_time - start_time).count(); - - if (compilation_time > 50) { // Only log slow compilations - LOG_DEBUG(Render_OpenGL, "Shader compilation took {}ms", compilation_time); - } - if (force_context_flush || in_parallel) { std::scoped_lock lock{built_mutex}; built_fence.Create(); @@ -668,41 +623,15 @@ void GraphicsPipeline::WaitForBuild() { is_built = true; } -bool GraphicsPipeline::IsBuilt() const noexcept { +bool GraphicsPipeline::IsBuilt() noexcept { if (is_built) { return true; } - if (!built_fence.handle) { + if (built_fence.handle == 0) { return false; } - - // Check if the async build has finished by polling the fence - const GLsync sync = built_fence.handle; - const GLuint result = glClientWaitSync(sync, 0, 0); - if (result == GL_ALREADY_SIGNALED || result == GL_CONDITION_SATISFIED) { - // Mark this as mutable even though we're in a const method - this is - // essentially a cached value update which is acceptable - const_cast(this)->is_built = true; - return true; - } - - // For better performance tracking, capture time spent waiting for shaders - static thread_local std::chrono::high_resolution_clock::time_point last_shader_wait_log; - static thread_local u32 shader_wait_count = 0; - - auto now = std::chrono::high_resolution_clock::now(); - auto elapsed = std::chrono::duration_cast( - now - last_shader_wait_log).count(); - - // Log shader compilation status periodically to help diagnose performance issues - if (elapsed >= 5) { // Log every 5 seconds - shader_wait_count++; - LOG_DEBUG(Render_OpenGL, "Waiting for async shader compilation... (count={})", - shader_wait_count); - last_shader_wait_log = now; - } - - return false; + is_built = built_fence.IsSignaled(); + return is_built; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 5852c0289..2f70c1ae9 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -103,7 +102,7 @@ public: return uses_local_memory; } - [[nodiscard]] bool IsBuilt() const noexcept; + [[nodiscard]] bool IsBuilt() noexcept; template static auto MakeConfigureSpecFunc() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d9d5654ee..b2683fa24 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -392,118 +391,18 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n if (!use_asynchronous_shaders) { return pipeline; } - - // Advanced heuristics for smarter async shader compilation in OpenGL - - // Track shader compilation statistics - static thread_local u32 async_shader_count = 0; - static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log; - auto now = std::chrono::high_resolution_clock::now(); - - // Enhanced detection of UI and critical shaders - const bool is_ui_shader = !maxwell3d->regs.zeta_enable; - // Check for blend state - const bool has_blend = maxwell3d->regs.blend.enable[0] != 0; - // Check if texture sampling is likely based on texture units used - const bool has_texture = maxwell3d->regs.tex_header.Address() != 0; - // Check for clear operations - const bool is_clear_operation = maxwell3d->regs.clear_surface.raw != 0; - const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); - const bool small_draw = draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6; - - // Track pipeline usage patterns for better prediction - // Use pipeline address as hash since we don't have a Hash() method - const u64 draw_config_hash = reinterpret_cast(pipeline); - static thread_local std::unordered_map shader_usage_count; - static thread_local std::unordered_map shader_is_frequent; - - // Increment usage counter for this shader - shader_usage_count[draw_config_hash]++; - - // After a certain threshold, mark as frequently used - if (shader_usage_count[draw_config_hash] >= 3) { - shader_is_frequent[draw_config_hash] = true; - } - - // Get shader priority from settings - const int shader_priority = Settings::values.shader_compilation_priority.GetValue(); - - // Always wait for UI shaders if settings specify high priority - if (is_ui_shader && (shader_priority >= 0 || small_draw)) { - return pipeline; - } - - // Wait for frequently used small draw shaders - if (small_draw && shader_is_frequent[draw_config_hash]) { - return pipeline; - } - - // Wait for clear operations as they're usually critical - if (is_clear_operation) { - return pipeline; - } - - // Force wait if high shader priority in settings - if (shader_priority > 1) { - return pipeline; - } - - // Improved depth-based heuristics + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. if (maxwell3d->regs.zeta_enable) { - // Check if this is likely a shadow map or important depth-based effect - // Check if depth write is enabled and color writes are disabled for all render targets - bool depth_only_pass = maxwell3d->regs.depth_write_enabled; - if (depth_only_pass) { - bool all_color_masked = true; - for (size_t i = 0; i < maxwell3d->regs.color_mask.size(); i++) { - // Check if any color component is enabled (R, G, B, A fields of ColorMask) - if ((maxwell3d->regs.color_mask[i].raw & 0x1111) != 0) { - all_color_masked = false; - break; - } - } - - // If depth write enabled and all colors masked, this is likely a shadow pass - if (all_color_masked) { - // Likely a shadow pass, wait for compilation to avoid flickering shadows - return pipeline; - } - } - - // For other depth-enabled renders, use async compilation return nullptr; } - - // Refined small draw detection - if (small_draw) { - // Check if this might be a UI element that we missed - if (has_blend && has_texture) { - // Likely a textured UI element, wait for it - return pipeline; - } - // For other small draws, assume they're one-off effects + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) { return pipeline; } - - // Log compilation statistics periodically - auto elapsed = std::chrono::duration_cast( - now - last_async_shader_log).count(); - - if (elapsed >= 10) { - async_shader_count = 0; - last_async_shader_log = now; - } - async_shader_count++; - - if (async_shader_count % 100 == 1) { - float progress = 0.5f; // Default to 50% when we can't determine actual progress - if (workers) { - // TODO: Implement progress tracking - } - LOG_DEBUG(Render_OpenGL, "Async shader compilation in progress (count={}), completion={:.1f}%", - async_shader_count, progress * 100.0f); - } - return nullptr; } @@ -709,33 +608,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline( } std::unique_ptr ShaderCache::CreateWorkers() const { - // Calculate optimal number of workers based on available CPU cores - // Leave at least 1 core for main thread and other operations - // Use more cores for more parallelism in shader compilation - const u32 num_worker_threads = std::max(std::thread::hardware_concurrency(), 2U); - const u32 optimal_workers = num_worker_threads <= 3 ? - num_worker_threads - 1 : // On dual/quad core, leave 1 core free - num_worker_threads - 2; // On 6+ core systems, leave 2 cores free for other tasks - - auto worker = std::make_unique( - optimal_workers, - "GlShaderBuilder", - [this] { - auto context = Context{emu_window}; - - // Apply thread priority based on settings - // This allows users to control how aggressive shader compilation is - const int priority = Settings::values.shader_compilation_priority.GetValue(); - if (priority != 0) { - Common::SetCurrentThreadPriority( - priority > 0 ? Common::ThreadPriority::High : Common::ThreadPriority::Low); - } - - return context; - } - ); - - return worker; + return std::make_unique(std::max(std::thread::hardware_concurrency(), 2U) - 1, + "GlShaderBuilder", + [this] { return Context{emu_window}; }); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/layer.cpp b/src/video_core/renderer_opengl/present/layer.cpp index a0f865937..6c7092d22 100644 --- a/src/video_core/renderer_opengl/present/layer.cpp +++ b/src/video_core/renderer_opengl/present/layer.cpp @@ -151,7 +151,7 @@ FramebufferTextureInfo Layer::LoadFBToScreenInfo(const Tegra::FramebufferConfig& // Update existing texture // TODO: Test what happens on hardware when you change the framebuffer dimensions so that // they differ from the LCD resolution. - // TODO: Applications could theoretically crash citron here by specifying too large + // TODO: Applications could theoretically crash yuzu here by specifying too large // framebuffer sizes. We should make sure that this cannot happen. glTextureSubImage2D(framebuffer_texture.resource.handle, 0, 0, 0, framebuffer.width, framebuffer.height, framebuffer_texture.gl_format, diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 5fb54635d..4bbca2d19 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -71,7 +71,7 @@ const char* GetType(GLenum type) { void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* message, const void* user_param) { - const char format[] = "{} {} {}: {}"; + constexpr const char* format = "{} {} {}: {}"; const char* const str_source = GetSource(source); const char* const str_type = GetType(type); diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index cf8c5454c..c3db09424 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -29,15 +28,6 @@ #include "video_core/surface.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#include "video_core/host_shaders/convert_abgr8_srgb_to_d24s8_frag_spv.h" -#include "video_core/host_shaders/convert_rgba8_to_bgra8_frag_spv.h" -#include "video_core/host_shaders/convert_yuv420_to_rgb_comp_spv.h" -#include "video_core/host_shaders/convert_rgb_to_yuv420_comp_spv.h" -#include "video_core/host_shaders/convert_bc7_to_rgba8_comp_spv.h" -#include "video_core/host_shaders/convert_astc_hdr_to_rgba16f_comp_spv.h" -#include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h" -#include "video_core/host_shaders/dither_temporal_frag_spv.h" -#include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h" namespace Vulkan { @@ -449,15 +439,6 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)), - convert_abgr8_srgb_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)), - convert_rgba_to_bgra_frag(BuildShader(device, CONVERT_RGBA8_TO_BGRA8_FRAG_SPV)), - convert_yuv420_to_rgb_comp(BuildShader(device, CONVERT_YUV420_TO_RGB_COMP_SPV)), - convert_rgb_to_yuv420_comp(BuildShader(device, CONVERT_RGB_TO_YUV420_COMP_SPV)), - convert_bc7_to_rgba8_comp(BuildShader(device, CONVERT_BC7_TO_RGBA8_COMP_SPV)), - convert_astc_hdr_to_rgba16f_comp(BuildShader(device, CONVERT_ASTC_HDR_TO_RGBA16F_COMP_SPV)), - convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)), - dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)), - dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) {} @@ -608,14 +589,6 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view); } -void BlitImageHelper::ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { - ConvertPipelineDepthTargetEx(convert_abgr8_srgb_to_d24s8_pipeline, - dst_framebuffer->RenderPass(), - convert_abgr8_srgb_to_d24s8_frag); - Convert(*convert_abgr8_srgb_to_d24s8_pipeline, dst_framebuffer, src_image_view); -} - void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, const std::array& clear_color, const Region2D& dst_region) { @@ -946,11 +919,13 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline( return *clear_stencil_pipelines.back(); } -void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { +void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, + bool is_target_depth) { if (pipeline) { return; } - VkShaderModule frag_shader = *convert_float_to_depth_frag; + VkShaderModule frag_shader = + is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag; const std::array stages = MakeStages(*full_screen_vert, frag_shader); pipeline = device.GetLogical().CreateGraphicsPipeline({ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, @@ -964,8 +939,9 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr, + .pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO + : &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, .layout = *one_texture_pipeline_layout, .renderPass = renderpass, @@ -975,33 +951,12 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend }); } +void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { + ConvertPipeline(pipeline, renderpass, false); +} + void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { - if (pipeline) { - return; - } - VkShaderModule frag_shader = *convert_depth_to_float_frag; - const std::array stages = MakeStages(*full_screen_vert, frag_shader); - pipeline = device.GetLogical().CreateGraphicsPipeline({ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(stages.size()), - .pStages = stages.data(), - .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pTessellationState = nullptr, - .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, - .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .layout = *one_texture_pipeline_layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = VK_NULL_HANDLE, - .basePipelineIndex = 0, - }); + ConvertPipeline(pipeline, renderpass, true); } void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, @@ -1044,100 +999,4 @@ void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRen ConvertPipelineEx(pipeline, renderpass, module, true, true); } -void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, - bool is_target_depth) { - if (pipeline) { - return; - } - VkShaderModule frag_shader = - is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag; - const std::array stages = MakeStages(*full_screen_vert, frag_shader); - pipeline = device.GetLogical().CreateGraphicsPipeline({ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(stages.size()), - .pStages = stages.data(), - .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pTessellationState = nullptr, - .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr, - .pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO - : &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, - .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .layout = *one_texture_pipeline_layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = VK_NULL_HANDLE, - .basePipelineIndex = 0, - }); -} - -void BlitImageHelper::ConvertRGBAtoGBRA(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { - ConvertPipeline(convert_rgba_to_bgra_pipeline, - dst_framebuffer->RenderPass(), - false); - Convert(*convert_rgba_to_bgra_pipeline, dst_framebuffer, src_image_view); -} - -void BlitImageHelper::ConvertYUV420toRGB(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { - ConvertPipeline(convert_yuv420_to_rgb_pipeline, - dst_framebuffer->RenderPass(), - false); - Convert(*convert_yuv420_to_rgb_pipeline, dst_framebuffer, src_image_view); -} - -void BlitImageHelper::ConvertRGBtoYUV420(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { - ConvertPipeline(convert_rgb_to_yuv420_pipeline, - dst_framebuffer->RenderPass(), - false); - Convert(*convert_rgb_to_yuv420_pipeline, dst_framebuffer, src_image_view); -} - -void BlitImageHelper::ConvertBC7toRGBA8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { - ConvertPipeline(convert_bc7_to_rgba8_pipeline, - dst_framebuffer->RenderPass(), - false); - Convert(*convert_bc7_to_rgba8_pipeline, dst_framebuffer, src_image_view); -} - -void BlitImageHelper::ConvertASTCHDRtoRGBA16F(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { - ConvertPipeline(convert_astc_hdr_to_rgba16f_pipeline, - dst_framebuffer->RenderPass(), - false); - Convert(*convert_astc_hdr_to_rgba16f_pipeline, dst_framebuffer, src_image_view); -} - -void BlitImageHelper::ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { - ConvertPipeline(convert_rgba16f_to_rgba8_pipeline, - dst_framebuffer->RenderPass(), - false); - Convert(*convert_rgba16f_to_rgba8_pipeline, dst_framebuffer, src_image_view); -} - -void BlitImageHelper::ApplyDitherTemporal(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { - ConvertPipeline(dither_temporal_pipeline, - dst_framebuffer->RenderPass(), - false); - Convert(*dither_temporal_pipeline, dst_framebuffer, src_image_view); -} - -void BlitImageHelper::ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, - const ImageView& src_image_view) { - ConvertPipeline(dynamic_resolution_scale_pipeline, - dst_framebuffer->RenderPass(), - false); - Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view); -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index b7bc95263..b2104a59e 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -68,8 +67,6 @@ public: void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); void ConvertD32FToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); @@ -85,15 +82,6 @@ public: u8 stencil_mask, u32 stencil_ref, u32 stencil_compare_mask, const Region2D& dst_region); - void ConvertRGBAtoGBRA(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertYUV420toRGB(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertRGBtoYUV420(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertBC7toRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertASTCHDRtoRGBA16F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view); @@ -148,15 +136,6 @@ private: vk::ShaderModule convert_d32f_to_abgr8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; vk::ShaderModule convert_s8d24_to_abgr8_frag; - vk::ShaderModule convert_abgr8_srgb_to_d24s8_frag; - vk::ShaderModule convert_rgba_to_bgra_frag; - vk::ShaderModule convert_yuv420_to_rgb_comp; - vk::ShaderModule convert_rgb_to_yuv420_comp; - vk::ShaderModule convert_bc7_to_rgba8_comp; - vk::ShaderModule convert_astc_hdr_to_rgba16f_comp; - vk::ShaderModule convert_rgba16f_to_rgba8_frag; - vk::ShaderModule dither_temporal_frag; - vk::ShaderModule dynamic_resolution_scale_comp; vk::Sampler linear_sampler; vk::Sampler nearest_sampler; @@ -177,15 +156,6 @@ private: vk::Pipeline convert_d32f_to_abgr8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; vk::Pipeline convert_s8d24_to_abgr8_pipeline; - vk::Pipeline convert_abgr8_srgb_to_d24s8_pipeline; - vk::Pipeline convert_rgba_to_bgra_pipeline; - vk::Pipeline convert_yuv420_to_rgb_pipeline; - vk::Pipeline convert_rgb_to_yuv420_pipeline; - vk::Pipeline convert_bc7_to_rgba8_pipeline; - vk::Pipeline convert_astc_hdr_to_rgba16f_pipeline; - vk::Pipeline convert_rgba16f_to_rgba8_pipeline; - vk::Pipeline dither_temporal_pipeline; - vk::Pipeline dynamic_resolution_scale_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 0c29bbeb7..96fb8fba6 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -9,8 +8,6 @@ #include #include #include -#include -#include #include @@ -38,7 +35,6 @@ #include "video_core/vulkan_common/vulkan_instance.h" #include "video_core/vulkan_common/vulkan_library.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" -#include "video_core/vulkan_common/hybrid_memory.h" #include "video_core/vulkan_common/vulkan_surface.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -127,93 +123,12 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, PresentFiltersForAppletCapture), rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker, scheduler), - hybrid_memory(std::make_unique(device, memory_allocator)), - texture_manager(device, memory_allocator), - shader_manager(device), applet_frame() { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { turbo_mode.emplace(instance, dld); scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); } - - // Initialize HybridMemory system - if (false && Settings::values.use_gpu_memory_manager.GetValue()) { -#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) - try { - // Define memory size with explicit types to avoid conversion warnings - constexpr size_t memory_size_mb = 64; - constexpr size_t memory_size_bytes = memory_size_mb * 1024 * 1024; - - void* guest_memory_base = nullptr; -#if defined(_WIN32) - // On Windows, use VirtualAlloc to reserve (but not commit) memory - const SIZE_T win_size = static_cast(memory_size_bytes); - LPVOID result = VirtualAlloc(nullptr, win_size, MEM_RESERVE, PAGE_NOACCESS); - if (result != nullptr) { - guest_memory_base = result; - } -#else - // On Linux/Android, use aligned_alloc - guest_memory_base = std::aligned_alloc(4096, memory_size_bytes); -#endif - if (guest_memory_base != nullptr) { - try { - hybrid_memory->InitializeGuestMemory(guest_memory_base, memory_size_bytes); - LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", memory_size_mb); - } catch (const std::exception&) { -#if defined(_WIN32) - if (guest_memory_base != nullptr) { - const LPVOID win_ptr = static_cast(guest_memory_base); - VirtualFree(win_ptr, 0, MEM_RELEASE); - } -#else - std::free(guest_memory_base); -#endif - throw; - } - } - } catch (const std::exception& e) { - LOG_ERROR(Render_Vulkan, "Failed to initialize HybridMemory: {}", e.what()); - } -#else - LOG_INFO(Render_Vulkan, "Fault-managed memory not supported on this platform"); -#endif - } - - // Initialize enhanced shader compilation system - shader_manager.SetScheduler(&scheduler); - LOG_INFO(Render_Vulkan, "Enhanced shader compilation system initialized"); - - // Preload common shaders if enabled - if (Settings::values.use_asynchronous_shaders.GetValue()) { - // Use a simple shader directory path - can be updated to match Citron's actual path structure - const std::string shader_dir = "./shaders"; - std::vector common_shaders; - - // Add paths to common shaders that should be preloaded - // These will be compiled in parallel for faster startup - try { - if (std::filesystem::exists(shader_dir)) { - for (const auto& entry : std::filesystem::directory_iterator(shader_dir)) { - if (entry.is_regular_file() && entry.path().extension() == ".spv") { - common_shaders.push_back(entry.path().string()); - } - } - - if (!common_shaders.empty()) { - LOG_INFO(Render_Vulkan, "Preloading {} common shaders", common_shaders.size()); - shader_manager.PreloadShaders(common_shaders); - } - } else { - LOG_INFO(Render_Vulkan, "Shader directory not found at {}", shader_dir); - } - } catch (const std::exception& e) { - LOG_ERROR(Render_Vulkan, "Error during shader preloading: {}", e.what()); - } - } - Report(); - InitializePlatformSpecific(); } catch (const vk::Exception& exception) { LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())}; @@ -311,35 +226,6 @@ void RendererVulkan::RenderScreenshot(std::span return; } - // If memory snapshots are enabled, take a snapshot with the screenshot - if (false && Settings::values.enable_memory_snapshots.GetValue() && hybrid_memory) { - try { - const auto now = std::chrono::system_clock::now(); - const auto now_time_t = std::chrono::system_clock::to_time_t(now); - std::tm local_tm; -#ifdef _WIN32 - localtime_s(&local_tm, &now_time_t); -#else - localtime_r(&now_time_t, &local_tm); -#endif - char time_str[128]; - std::strftime(time_str, sizeof(time_str), "%Y%m%d_%H%M%S", &local_tm); - - std::string snapshot_path = fmt::format("snapshots/memory_snapshot_{}.bin", time_str); - hybrid_memory->SaveSnapshot(snapshot_path); - - // Differential snapshot for tracking memory changes - if (false && Settings::values.use_gpu_memory_manager.GetValue()) { - std::string diff_path = fmt::format("snapshots/diff_snapshot_{}.bin", time_str); - hybrid_memory->SaveDifferentialSnapshot(diff_path); - hybrid_memory->ResetDirtyTracking(); - LOG_INFO(Render_Vulkan, "Memory snapshots saved with screenshot"); - } - } catch (const std::exception& e) { - LOG_ERROR(Render_Vulkan, "Failed to save memory snapshot: {}", e.what()); - } - } - const auto& layout{renderer_settings.screenshot_framebuffer_layout}; const auto dst_buffer = RenderToBuffer(framebuffers, layout, VK_FORMAT_B8G8R8A8_UNORM, layout.width * layout.height * 4); @@ -391,81 +277,4 @@ void RendererVulkan::RenderAppletCaptureLayer( CaptureFormat); } -bool RendererVulkan::HandleVulkanError(VkResult result, const std::string& operation) { - if (result == VK_SUCCESS) { - return true; - } - - if (result == VK_ERROR_DEVICE_LOST) { - LOG_CRITICAL(Render_Vulkan, "Vulkan device lost during {}", operation); - RecoverFromError(); - return false; - } - - if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY || result == VK_ERROR_OUT_OF_HOST_MEMORY) { - LOG_CRITICAL(Render_Vulkan, "Vulkan out of memory during {}", operation); - // Potential recovery: clear caches, reduce workload - texture_manager.CleanupTextureCache(); - return false; - } - - LOG_ERROR(Render_Vulkan, "Vulkan error during {}: {}", operation, result); - return false; -} - -void RendererVulkan::RecoverFromError() { - LOG_INFO(Render_Vulkan, "Attempting to recover from Vulkan error"); - - // Wait for device to finish operations - void(device.GetLogical().WaitIdle()); - - // Process all pending commands in our queue - ProcessAllCommands(); - - // Wait for any async shader compilations to finish - shader_manager.WaitForCompilation(); - - // Clean up resources that might be causing problems - texture_manager.CleanupTextureCache(); - - // Reset command buffers and pipelines - scheduler.Flush(); - - LOG_INFO(Render_Vulkan, "Recovery attempt completed"); -} - -void RendererVulkan::InitializePlatformSpecific() { - LOG_INFO(Render_Vulkan, "Initializing platform-specific Vulkan components"); - -#if defined(_WIN32) || defined(_WIN64) - LOG_INFO(Render_Vulkan, "Initializing Vulkan for Windows"); - // Windows-specific initialization -#elif defined(__linux__) - LOG_INFO(Render_Vulkan, "Initializing Vulkan for Linux"); - // Linux-specific initialization -#elif defined(__ANDROID__) - LOG_INFO(Render_Vulkan, "Initializing Vulkan for Android"); - // Android-specific initialization -#else - LOG_INFO(Render_Vulkan, "Platform-specific Vulkan initialization not implemented for this platform"); -#endif - - // Create a compute buffer using the HybridMemory system if enabled - if (false && Settings::values.use_gpu_memory_manager.GetValue()) { - try { - // Create a small compute buffer for testing - const VkDeviceSize buffer_size = 1 * 1024 * 1024; // 1 MB - ComputeBuffer compute_buffer = hybrid_memory->CreateComputeBuffer( - buffer_size, - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT, - MemoryUsage::DeviceLocal); - - LOG_INFO(Render_Vulkan, "Successfully created compute buffer using HybridMemory"); - } catch (const std::exception& e) { - LOG_ERROR(Render_Vulkan, "Failed to create compute buffer: {}", e.what()); - } - } -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 5ac991c2d..fb9d83412 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -7,7 +6,6 @@ #include #include #include -#include #include "common/dynamic_library.h" #include "video_core/host1x/gpu_device_memory_manager.h" @@ -19,11 +17,8 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/vk_turbo_mode.h" -#include "video_core/renderer_vulkan/vk_texture_manager.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" -#include "video_core/vulkan_common/hybrid_memory.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core { @@ -63,9 +58,6 @@ public: return device.GetDriverName(); } - // Enhanced platform-specific initialization - void InitializePlatformSpecific(); - private: void Report() const; @@ -75,10 +67,6 @@ private: void RenderScreenshot(std::span framebuffers); void RenderAppletCaptureLayer(std::span framebuffers); - // Enhanced error handling - bool HandleVulkanError(VkResult result, const std::string& operation); - void RecoverFromError(); - Core::TelemetrySession& telemetry_session; Tegra::MaxwellDeviceMemoryManager& device_memory; Tegra::GPU& gpu; @@ -102,13 +90,6 @@ private: RasterizerVulkan rasterizer; std::optional turbo_mode; - // HybridMemory for advanced memory management - std::unique_ptr hybrid_memory; - - // Enhanced texture and shader management - TextureManager texture_manager; - ShaderManager shader_manager; - Frame applet_frame; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index f154f3073..73e585c2b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -1,10 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include -#include #include @@ -39,23 +37,10 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel if (shader_notify) { shader_notify->MarkShaderBuilding(); } - - // Track compilation start time for performance metrics - const auto start_time = std::chrono::high_resolution_clock::now(); - std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), - uniform_buffer_sizes.begin()); - - auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics, start_time] { - // Simplify the high priority determination - we can't use workgroup_size - // because it doesn't exist, so use a simpler heuristic - const bool is_high_priority = false; // Default to false until we can find a better criterion - - if (is_high_priority) { - // Increase thread priority for small compute shaders that are likely part of critical path - Common::SetCurrentThreadPriority(Common::ThreadPriority::High); - } + uniform_buffer_sizes.begin()); + auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] { DescriptorLayoutBuilder builder{device}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -64,11 +49,15 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; VkPipelineCreateFlags flags{}; if (device.IsKhrPipelineExecutablePropertiesEnabled()) { flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; } - pipeline = device.GetLogical().CreateComputePipeline( { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, @@ -76,7 +65,8 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel .flags = flags, .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, + .pNext = + device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = *spv_module, @@ -89,15 +79,6 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel }, *pipeline_cache); - // Performance measurement - const auto end_time = std::chrono::high_resolution_clock::now(); - const auto compilation_time = std::chrono::duration_cast( - end_time - start_time).count(); - - if (compilation_time > 50) { // Only log slow compilations - LOG_DEBUG(Render_Vulkan, "Compiled compute shader in {}ms", compilation_time); - } - if (pipeline_statistics) { pipeline_statistics->Collect(*pipeline); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 73eb35116..ec6b3a4b0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -259,16 +258,7 @@ GraphicsPipeline::GraphicsPipeline( std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); num_textures += Shader::NumDescriptors(info->texture_descriptors); } - - // Track compilation start time for performance metrics - const auto start_time = std::chrono::high_resolution_clock::now(); - - auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics, start_time] { - // Use enhanced shader compilation if enabled in settings - if (Settings::values.use_enhanced_shader_building.GetValue()) { - Common::SetCurrentThreadPriority(Common::ThreadPriority::High); - } - + auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; uses_push_descriptor = builder.CanUsePushDescriptor(); descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); @@ -283,17 +273,6 @@ GraphicsPipeline::GraphicsPipeline( const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); MakePipeline(render_pass); - - // Performance measurement - const auto end_time = std::chrono::high_resolution_clock::now(); - const auto compilation_time = std::chrono::duration_cast( - end_time - start_time).count(); - - // Log shader compilation time for slow shaders to help diagnose performance issues - if (compilation_time > 100) { // Only log very slow compilations - LOG_DEBUG(Render_Vulkan, "Compiled graphics pipeline in {}ms", compilation_time); - } - if (pipeline_statistics) { pipeline_statistics->Collect(*pipeline); } @@ -313,19 +292,6 @@ GraphicsPipeline::GraphicsPipeline( configure_func = ConfigureFunc(spv_modules, stage_infos); } -GraphicsPipeline* GraphicsPipeline::Clone() const { - // Create a new pipeline that shares the same resources - // This is for pipeline deduplication - - if (!IsBuilt()) { - LOG_WARNING(Render_Vulkan, "Attempted to clone unbuilt pipeline"); - return nullptr; - } - - return const_cast(this); - -} - void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { transition_keys.push_back(transition->key); transitions.push_back(transition); @@ -345,9 +311,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { - // Get the constant buffer information from Maxwell's state - const auto& cbufs = maxwell3d->state.shader_stages[stage].const_buffers; - const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { @@ -359,7 +322,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { ++ssbo_index; } } - + const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(cbufs[desc.cbuf_index].enabled); const u32 index_offset{index << desc.size_shift}; @@ -381,7 +344,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } return TexturePair(gpu_memory->Read(addr), via_header_index); }}; - const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index f4a255118..99e56e9ad 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -84,9 +84,6 @@ public: GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; - // Create a deep copy of this pipeline for reuse - [[nodiscard]] GraphicsPipeline* Clone() const; - void AddTransition(GraphicsPipeline* transition); void Configure(bool is_indexed) { @@ -106,35 +103,6 @@ public: return is_built.load(std::memory_order::relaxed); } - // Get hash for the current pipeline configuration - [[nodiscard]] u64 Hash() const noexcept { - return key.Hash(); - } - - // Get the last pipeline this transitioned from - [[nodiscard]] GraphicsPipeline* GetLastTransitionedPipeline() const noexcept { - // For predictive loading, return a related pipeline if available - if (!transitions.empty()) { - return transitions.front(); - } - return nullptr; - } - - // Get pipeline info string for prediction - [[nodiscard]] std::string GetPipelineInfo() const noexcept { - std::string result = fmt::format("pipeline_{:016x}", Hash()); - - // Include information about stages - for (size_t i = 0; i < NUM_STAGES; ++i) { - // Check if this stage is active by checking if any varying stores are enabled - if (!stage_infos[i].stores.mask.none()) { - result += fmt::format("_s{}", i); - } - } - - return result; - } - template static auto MakeConfigureSpecFunc() { return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl(is_indexed); }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 72f367dd1..996e2bec9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -265,42 +264,18 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program } size_t GetTotalPipelineWorkers() { - const size_t num_cores = std::max(static_cast(std::thread::hardware_concurrency()), 2ULL); - - // Calculate optimal number of workers based on available CPU cores - size_t optimal_workers; - + const size_t max_core_threads = + std::max(static_cast(std::thread::hardware_concurrency()), 2ULL) - 1ULL; #ifdef ANDROID - // Mobile devices need more conservative threading to avoid thermal issues - // Leave more cores free on Android for system processes and other apps - constexpr size_t min_free_cores = 3ULL; - if (num_cores <= min_free_cores + 1) { - return 1ULL; // At least one worker + // Leave at least a few cores free in android + constexpr size_t free_cores = 3ULL; + if (max_core_threads <= free_cores) { + return 1ULL; } - optimal_workers = num_cores - min_free_cores; + return max_core_threads - free_cores; #else - // Desktop systems can use more aggressive threading - if (num_cores <= 3) { - optimal_workers = num_cores - 1; // Dual/triple core: leave 1 core free - } else if (num_cores <= 6) { - optimal_workers = num_cores - 2; // Quad/hex core: leave 2 cores free - } else { - // For 8+ core systems, use more workers but still leave some cores for other tasks - optimal_workers = num_cores - (num_cores / 4); // Leave ~25% of cores free - } + return max_core_threads; #endif - - // Apply threading priority via shader_compilation_priority setting if enabled - const int priority = Settings::values.shader_compilation_priority.GetValue(); - if (priority > 0) { - // High priority - use more cores for shader compilation - optimal_workers = std::min(optimal_workers + 1, num_cores - 1); - } else if (priority < 0) { - // Low priority - use fewer cores for shader compilation - optimal_workers = (optimal_workers >= 2) ? optimal_workers - 1 : 1; - } - - return optimal_workers; } } // Anonymous namespace @@ -611,128 +586,21 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const if (pipeline->IsBuilt()) { return pipeline; } - if (!use_asynchronous_shaders) { return pipeline; } - - // Advanced heuristics for smarter async shader compilation - - // Track stutter metrics for better debugging and performance tuning - static thread_local u32 async_shader_count = 0; - static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log; - auto now = std::chrono::high_resolution_clock::now(); - - // Better detection of UI and critical shaders - const bool is_ui_shader = !maxwell3d->regs.zeta_enable; - // Check for blend state - const bool has_blend = maxwell3d->regs.blend.enable[0] != 0; - // Check if texture sampling is likely based on texture units used - const bool has_texture = maxwell3d->regs.tex_header.Address() != 0; - // Check for clear operations - const bool is_clear_operation = maxwell3d->regs.clear_surface.raw != 0; - const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); - const bool small_draw = draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6; - - // Get shader priority from settings - const int shader_priority = Settings::values.shader_compilation_priority.GetValue(); - - // Record historical usage patterns for future prediction - // Create a unique identifier for this shader configuration - const u64 draw_config_hash = pipeline->Hash(); - static thread_local std::unordered_map shader_usage_count; - static thread_local std::unordered_map shader_is_frequent; - - // Track how often this shader is used - shader_usage_count[draw_config_hash]++; - - // After a certain number of uses, consider this a frequently used shader - // which should get higher compilation priority in the future - if (shader_usage_count[draw_config_hash] >= 3) { - shader_is_frequent[draw_config_hash] = true; - - // Predict related shaders that might be used soon - if (auto related_pipeline = pipeline->GetLastTransitionedPipeline()) { - // Use a string-based representation of the pipeline for prediction - std::string pipeline_info = fmt::format("pipeline_{:016x}", related_pipeline->Hash()); - PredictShader(pipeline_info); - } - } - - // Always wait for UI shaders if settings specify high priority - if (is_ui_shader && (shader_priority >= 0 || small_draw)) { - return pipeline; - } - - // Wait for frequently used small draw shaders - if (small_draw && shader_is_frequent[draw_config_hash]) { - return pipeline; - } - - // Wait for clear operations as they're usually critical - if (is_clear_operation) { - return pipeline; - } - - // Force wait if high shader priority in settings - if (shader_priority > 1) { - return pipeline; - } - - // More intelligent depth-based heuristics + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. if (maxwell3d->regs.zeta_enable) { - // Check if this is likely a shadow map or important depth-based effect - // Check if depth write is enabled and color writes are disabled for all render targets - bool depth_only_pass = maxwell3d->regs.depth_write_enabled; - if (depth_only_pass) { - bool all_color_masked = true; - for (size_t i = 0; i < maxwell3d->regs.color_mask.size(); i++) { - // Check if any color component is enabled (R, G, B, A fields of ColorMask) - if ((maxwell3d->regs.color_mask[i].raw & 0x1111) != 0) { - all_color_masked = false; - break; - } - } - - // If depth write enabled and all colors masked, this is likely a shadow pass - if (all_color_masked) { - // This is likely a shadow pass, which is important for visual quality - // We should wait for these to compile to avoid flickering shadows - return pipeline; - } - } - - // For other depth-enabled renders, use async compilation return nullptr; } - - // Refine small draw detection - if (small_draw) { - // Check if this might be a UI element that we missed - if (has_blend && has_texture) { - // Likely a textured UI element, wait for it - return pipeline; - } - // For other small draws, assume they're one-off effects + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); + if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) { return pipeline; } - - // Track and log async shader statistics periodically - auto elapsed = std::chrono::duration_cast( - now - last_async_shader_log).count(); - - if (elapsed >= 10) { // Log every 10 seconds - async_shader_count = 0; - last_async_shader_log = now; - } - async_shader_count++; - - // Log less frequently to avoid spamming log - if (async_shader_count % 100 == 1) { - LOG_DEBUG(Render_Vulkan, "Async shader compilation in progress (count={}), completion={:.1f}%", - async_shader_count, GetShaderCompilationProgress() * 100.0f); - } - return nullptr; } @@ -740,22 +608,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, std::span envs, PipelineStatistics* statistics, bool build_in_parallel) try { - - // Pipeline deduplication optimization - { - std::lock_guard lock{pipeline_cache}; - const auto [pair, new_pipeline]{graphics_cache.try_emplace(key)}; - if (!new_pipeline) { - // Found existing pipeline in cache - auto& pipeline = pair->second; - if (pipeline) { - // Return the existing pipeline - LOG_DEBUG(Render_Vulkan, "Reusing existing pipeline for key 0x{:016x}", key.Hash()); - return std::unique_ptr(pipeline->Clone()); - } - } - } - auto hash = key.Hash(); LOG_INFO(Render_Vulkan, "0x{:016x}", hash); size_t env_index{0}; @@ -766,52 +618,46 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( // Layer passthrough generation for devices without VK_EXT_shader_viewport_index_layer Shader::IR::Program* layer_source_program{}; - // Memory optimization: Create a scope for program translation - { - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const bool is_emulated_stage = layer_source_program != nullptr && - index == static_cast(Maxwell::ShaderType::Geometry); - if (key.unique_hashes[index] == 0 && is_emulated_stage) { - auto topology = MaxwellToOutputTopology(key.state.topology); - programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info, - *layer_source_program, topology); - continue; - } - if (key.unique_hashes[index] == 0) { - continue; - } - Shader::Environment& env{*envs[env_index]}; - ++env_index; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + const bool is_emulated_stage = layer_source_program != nullptr && + index == static_cast(Maxwell::ShaderType::Geometry); + if (key.unique_hashes[index] == 0 && is_emulated_stage) { + auto topology = MaxwellToOutputTopology(key.state.topology); + programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info, + *layer_source_program, topology); + continue; + } + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; - const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; - Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); - if (!uses_vertex_a || index != 1) { - // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); - } else { - // VertexB path when VertexA is present. - auto& program_va{programs[0]}; - auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; - programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); - } + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + // Normal path + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); + } else { + // VertexB path when VertexA is present. + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); + } - if (Settings::values.dump_shaders) { - env.Dump(hash, key.unique_hashes[index]); - } + if (Settings::values.dump_shaders) { + env.Dump(hash, key.unique_hashes[index]); + } - if (programs[index].info.requires_layer_emulation) { - layer_source_program = &programs[index]; - } + if (programs[index].info.requires_layer_emulation) { + layer_source_program = &programs[index]; } } - std::array infos{}; std::array modules; const Shader::IR::Program* previous_stage{}; Shader::Backend::Bindings binding; - - // Memory optimization: Process one stage at a time and free intermediate memory for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { const bool is_emulated_stage = layer_source_program != nullptr && @@ -825,38 +671,23 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - // Prioritize memory efficiency by encapsulating SPIR-V generation - { - const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; - ConvertLegacyToGeneric(program, runtime_info); - const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; - device.SaveShader(code); - modules[stage_index] = BuildShader(device, code); - if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; - modules[stage_index].SetObjectNameEXT(name.c_str()); - } + const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; + ConvertLegacyToGeneric(program, runtime_info); + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; + device.SaveShader(code); + modules[stage_index] = BuildShader(device, code); + if (device.HasDebuggingToolAttached()) { + const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; + modules[stage_index].SetObjectNameEXT(name.c_str()); } - previous_stage = &program; } - - // Use improved thread worker mechanism for better async compilation Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - auto pipeline = std::make_unique( + return std::make_unique( scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device, descriptor_pool, guest_descriptor_queue, thread_worker, statistics, render_pass_cache, key, std::move(modules), infos); - // Cache the result for future deduplication - if (pipeline) { - std::lock_guard lock{pipeline_cache}; - // Store a clone that can be used later - graphics_cache[key] = std::unique_ptr(pipeline->Clone()); - } - - return pipeline; - } catch (const Shader::Exception& exception) { auto hash = key.Hash(); size_t env_index{0}; @@ -936,23 +767,6 @@ std::unique_ptr PipelineCache::CreateComputePipeline( } auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; - - // Add support for bindless texture constant buffer only if needed - if (program.info.storage_buffers_descriptors.size() > 0) { - // Check if a constant buffer at index 0 already exists - const bool has_cb0 = std::any_of(program.info.constant_buffer_descriptors.begin(), - program.info.constant_buffer_descriptors.end(), - [](const auto& cb) { return cb.index == 0; }); - - // Only add if not already present - if (!has_cb0) { - Shader::ConstantBufferDescriptor desc; - desc.index = 0; - desc.count = 1; - program.info.constant_buffer_descriptors.push_back(desc); - } - } - const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; @@ -971,7 +785,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( } void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename, - const vk::PipelineCache& vk_pipeline_cache, + const vk::PipelineCache& pipeline_cache, u32 cache_version) try { std::ofstream file(filename, std::ios::binary); file.exceptions(std::ifstream::failbit); @@ -985,10 +799,10 @@ void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& fi size_t cache_size = 0; std::vector cache_data; - if (vk_pipeline_cache) { - vk_pipeline_cache.Read(&cache_size, nullptr); + if (pipeline_cache) { + pipeline_cache.Read(&cache_size, nullptr); cache_data.resize(cache_size); - vk_pipeline_cache.Read(&cache_size, cache_data.data()); + pipeline_cache.Read(&cache_size, cache_data.data()); } file.write(cache_data.data(), cache_size); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index fa9960d12..797700128 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -11,7 +10,6 @@ #include #include #include -#include #include "common/common_types.h" #include "common/thread_worker.h" @@ -159,9 +157,6 @@ private: std::unordered_map> compute_cache; std::unordered_map> graphics_cache; - // Mutex for thread-safe pipeline cache access - mutable std::mutex pipeline_cache; - ShaderPools main_pools; Shader::Profile profile; diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index a63513bc0..7a0a2b154 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -1,181 +1,15 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include "common/common_types.h" -#include "common/logging/log.h" #include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#define SHADER_CACHE_DIR "./shader_cache" - namespace Vulkan { -// Global command submission queue for asynchronous operations -std::mutex commandQueueMutex; -std::queue> commandQueue; -std::condition_variable commandQueueCondition; -std::atomic isCommandQueueActive{true}; -std::thread commandQueueThread; - -// Pointer to Citron's scheduler for integration -Scheduler* globalScheduler = nullptr; - -// Constants for thread pool and shader management -constexpr size_t DEFAULT_THREAD_POOL_SIZE = 4; -constexpr size_t MAX_THREAD_POOL_SIZE = 8; -constexpr u32 SHADER_PRIORITY_CRITICAL = 0; -constexpr u32 SHADER_PRIORITY_HIGH = 1; -constexpr u32 SHADER_PRIORITY_NORMAL = 2; -constexpr u32 SHADER_PRIORITY_LOW = 3; - -// Thread pool for shader compilation -std::vector g_thread_pool; -std::queue> g_work_queue; -std::mutex g_work_queue_mutex; -std::condition_variable g_work_queue_cv; -std::atomic g_thread_pool_initialized = false; -std::atomic g_shutdown_thread_pool = false; -std::atomic g_active_compilation_tasks = 0; -std::atomic g_total_compilation_tasks = 0; -std::atomic g_completed_compilation_tasks = 0; - -// Priority queue for shader compilation -struct ShaderCompilationTask { - std::function task; - u32 priority; - std::chrono::high_resolution_clock::time_point enqueue_time; - - bool operator<(const ShaderCompilationTask& other) const { - // Lower priority value means higher actual priority - if (priority != other.priority) { - return priority > other.priority; - } - // If priorities are equal, use FIFO ordering - return enqueue_time > other.enqueue_time; - } -}; -std::priority_queue g_priority_work_queue; - -// Predictive shader loading -std::unordered_set g_predicted_shaders; -std::mutex g_predicted_shaders_mutex; - -// Command queue worker thread (multi-threaded command recording) -void CommandQueueWorker() { - while (isCommandQueueActive.load()) { - std::function command; - { - std::unique_lock lock(commandQueueMutex); - if (commandQueue.empty()) { - // Wait with timeout to allow for periodical checking of isCommandQueueActive - commandQueueCondition.wait_for(lock, std::chrono::milliseconds(100), - []{ return !commandQueue.empty() || !isCommandQueueActive.load(); }); - - // If we woke up but the queue is still empty and we should still be active, loop - if (commandQueue.empty()) { - continue; - } - } - - command = commandQueue.front(); - commandQueue.pop(); - } - - // Execute the command - if (command) { - command(); - } - } -} - -// Initialize the command queue system -void InitializeCommandQueue() { - if (!commandQueueThread.joinable()) { - isCommandQueueActive.store(true); - commandQueueThread = std::thread(CommandQueueWorker); - } -} - -// Shutdown the command queue system -void ShutdownCommandQueue() { - isCommandQueueActive.store(false); - commandQueueCondition.notify_all(); - - if (commandQueueThread.joinable()) { - commandQueueThread.join(); - } -} - -// Submit a command to the queue for asynchronous execution -void SubmitCommandToQueue(std::function command) { - { - std::lock_guard lock(commandQueueMutex); - commandQueue.push(command); - } - commandQueueCondition.notify_one(); -} - -// Set the global scheduler reference for command integration -void SetGlobalScheduler(Scheduler* scheduler) { - globalScheduler = scheduler; -} - -// Submit a Vulkan command to the existing Citron scheduler -void SubmitToScheduler(std::function command) { - if (globalScheduler) { - globalScheduler->Record(std::move(command)); - } else { - LOG_WARNING(Render_Vulkan, "Trying to submit to scheduler but no scheduler is set"); - } -} - -// Flush the Citron scheduler - use when needing to ensure commands are executed -u64 FlushScheduler(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { - if (globalScheduler) { - return globalScheduler->Flush(signal_semaphore, wait_semaphore); - } else { - LOG_WARNING(Render_Vulkan, "Trying to flush scheduler but no scheduler is set"); - return 0; - } -} - -// Process both command queue and scheduler commands -void ProcessAllCommands() { - // Process our command queue first - { - std::unique_lock lock(commandQueueMutex); - while (!commandQueue.empty()) { - auto command = commandQueue.front(); - commandQueue.pop(); - lock.unlock(); - - command(); - - lock.lock(); - } - } - - // Then flush the scheduler if it exists - if (globalScheduler) { - globalScheduler->Flush(); - } -} - vk::ShaderModule BuildShader(const Device& device, std::span code) { return device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, @@ -186,589 +20,4 @@ vk::ShaderModule BuildShader(const Device& device, std::span code) { }); } -bool IsShaderValid(VkShaderModule shader_module) { - // TODO: validate the shader by checking if it's null - // or by examining SPIR-V data for correctness [ZEP] - return shader_module != VK_NULL_HANDLE; -} - -// Initialize thread pool for shader compilation -void InitializeThreadPool() { - if (g_thread_pool_initialized) { - return; - } - - std::lock_guard lock(g_work_queue_mutex); - g_shutdown_thread_pool = false; - - // Determine optimal thread count based on system - const size_t hardware_threads = std::max(std::thread::hardware_concurrency(), 2u); - const size_t thread_count = std::min(hardware_threads - 1, MAX_THREAD_POOL_SIZE); - - LOG_INFO(Render_Vulkan, "Initializing shader compilation thread pool with {} threads", thread_count); - - for (size_t i = 0; i < thread_count; ++i) { - g_thread_pool.emplace_back([]() { - while (!g_shutdown_thread_pool) { - std::function task; - { - std::unique_lock thread_pool_lock(g_work_queue_mutex); - g_work_queue_cv.wait(thread_pool_lock, [] { - return g_shutdown_thread_pool || !g_priority_work_queue.empty(); - }); - - if (g_shutdown_thread_pool && g_priority_work_queue.empty()) { - break; - } - - if (!g_priority_work_queue.empty()) { - ShaderCompilationTask highest_priority_task = g_priority_work_queue.top(); - g_priority_work_queue.pop(); - task = std::move(highest_priority_task.task); - } - } - - if (task) { - g_active_compilation_tasks++; - task(); - g_active_compilation_tasks--; - g_completed_compilation_tasks++; - } - } - }); - } - - g_thread_pool_initialized = true; -} - -// Shutdown thread pool -void ShutdownThreadPool() { - if (!g_thread_pool_initialized) { - return; - } - - { - std::lock_guard lock(g_work_queue_mutex); - g_shutdown_thread_pool = true; - } - - g_work_queue_cv.notify_all(); - - for (auto& thread : g_thread_pool) { - if (thread.joinable()) { - thread.join(); - } - } - - g_thread_pool.clear(); - g_thread_pool_initialized = false; - - LOG_INFO(Render_Vulkan, "Shader compilation thread pool shutdown"); -} - -// Submit work to thread pool with priority -void SubmitShaderCompilationTask(std::function task, u32 priority) { - if (!g_thread_pool_initialized) { - InitializeThreadPool(); - } - - { - std::lock_guard work_queue_lock(g_work_queue_mutex); - g_priority_work_queue.push({ - std::move(task), - priority, - std::chrono::high_resolution_clock::now() - }); - g_total_compilation_tasks++; - } - - g_work_queue_cv.notify_one(); -} - -// Get shader compilation progress (0.0f - 1.0f) -float GetShaderCompilationProgress() { - const size_t total = g_total_compilation_tasks.load(); - if (total == 0) { - return 1.0f; - } - - const size_t completed = g_completed_compilation_tasks.load(); - return static_cast(completed) / static_cast(total); -} - -// Check if any shader compilation is in progress -bool IsShaderCompilationInProgress() { - return g_active_compilation_tasks.load() > 0; -} - -// Add shader to prediction list for preloading -void PredictShader(const std::string& shader_path) { - std::lock_guard lock(g_predicted_shaders_mutex); - g_predicted_shaders.insert(shader_path); -} - -// Preload predicted shaders -void PreloadPredictedShaders(const Device& device) { - std::unordered_set shaders_to_load; - { - std::lock_guard lock(g_predicted_shaders_mutex); - shaders_to_load = g_predicted_shaders; - g_predicted_shaders.clear(); - } - - if (shaders_to_load.empty()) { - return; - } - - LOG_INFO(Render_Vulkan, "Preloading {} predicted shaders", shaders_to_load.size()); - - for (const auto& shader_path : shaders_to_load) { - // Queue with low priority since these are predictions - AsyncCompileShader(device, shader_path, [](VkShaderModule) {}, SHADER_PRIORITY_LOW); - } -} - -// Atomic flag for tracking shader compilation status -std::atomic compilingShader(false); - -void AsyncCompileShader(const Device& device, const std::string& shader_path, - std::function callback, u32 priority) { - LOG_INFO(Render_Vulkan, "Asynchronously compiling shader: {}", shader_path); - - // Create shader cache directory if it doesn't exist - if (!std::filesystem::exists(SHADER_CACHE_DIR)) { - std::filesystem::create_directory(SHADER_CACHE_DIR); - } - - // Initialize thread pool if needed - if (!g_thread_pool_initialized) { - InitializeThreadPool(); - } - - // Submit to thread pool with priority - SubmitShaderCompilationTask([device_ptr = &device, shader_path, callback = std::move(callback)]() { - auto startTime = std::chrono::high_resolution_clock::now(); - - try { - std::vector spir_v; - bool success = false; - - // Check if the file exists and attempt to read it - if (std::filesystem::exists(shader_path)) { - std::ifstream shader_file(shader_path, std::ios::binary); - if (shader_file) { - shader_file.seekg(0, std::ios::end); - size_t file_size = static_cast(shader_file.tellg()); - shader_file.seekg(0, std::ios::beg); - - spir_v.resize(file_size / sizeof(u32)); - if (shader_file.read(reinterpret_cast(spir_v.data()), file_size)) { - success = true; - } - } - } - - if (success) { - vk::ShaderModule shader = BuildShader(*device_ptr, spir_v); - if (IsShaderValid(*shader)) { - // Cache the compiled shader to disk for faster loading next time - std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" + - std::filesystem::path(shader_path).filename().string() + ".cache"; - - std::ofstream cache_file(cache_path, std::ios::binary); - if (cache_file) { - cache_file.write(reinterpret_cast(spir_v.data()), - spir_v.size() * sizeof(u32)); - } - - auto endTime = std::chrono::high_resolution_clock::now(); - std::chrono::duration duration = endTime - startTime; - LOG_INFO(Render_Vulkan, "Shader compiled in {:.2f} seconds: {}", - duration.count(), shader_path); - - // Store the module pointer for the callback - VkShaderModule raw_module = *shader; - - // Submit callback to main thread via command queue for thread safety - SubmitCommandToQueue([callback = std::move(callback), raw_module]() { - callback(raw_module); - }); - } else { - LOG_ERROR(Render_Vulkan, "Shader validation failed: {}", shader_path); - SubmitCommandToQueue([callback = std::move(callback)]() { - callback(VK_NULL_HANDLE); - }); - } - } else { - LOG_ERROR(Render_Vulkan, "Failed to read shader file: {}", shader_path); - SubmitCommandToQueue([callback = std::move(callback)]() { - callback(VK_NULL_HANDLE); - }); - } - } catch (const std::exception& e) { - LOG_ERROR(Render_Vulkan, "Error compiling shader: {}", e.what()); - SubmitCommandToQueue([callback = std::move(callback)]() { - callback(VK_NULL_HANDLE); - }); - } - }, priority); -} - -// Overload for backward compatibility -void AsyncCompileShader(const Device& device, const std::string& shader_path, - std::function callback) { - AsyncCompileShader(device, shader_path, std::move(callback), SHADER_PRIORITY_NORMAL); -} - -ShaderManager::ShaderManager(const Device& device_) : device(device_) { - // Initialize command queue system - InitializeCommandQueue(); - - // Initialize thread pool for shader compilation - InitializeThreadPool(); -} - -ShaderManager::~ShaderManager() { - // Wait for any pending compilations to finish - WaitForCompilation(); - - // Clean up shader modules - std::lock_guard lock(shader_mutex); - shader_cache.clear(); - - // Shutdown thread pool - ShutdownThreadPool(); - - // Shutdown command queue - ShutdownCommandQueue(); -} - -VkShaderModule ShaderManager::GetShaderModule(const std::string& shader_path) { - // Check in-memory cache first - { - std::lock_guard lock(shader_mutex); - auto it = shader_cache.find(shader_path); - if (it != shader_cache.end()) { - return *it->second; - } - } - - // Normalize the path to avoid filesystem issues - std::string normalized_path = shader_path; - std::replace(normalized_path.begin(), normalized_path.end(), '\\', '/'); - - // Check if shader exists - if (!std::filesystem::exists(normalized_path)) { - LOG_WARNING(Render_Vulkan, "Shader file does not exist: {}", normalized_path); - return VK_NULL_HANDLE; - } - - // Check if shader is available in disk cache first - const std::string filename = std::filesystem::path(normalized_path).filename().string(); - std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" + filename + ".cache"; - - if (std::filesystem::exists(cache_path)) { - try { - // Load the cached shader - std::ifstream cache_file(cache_path, std::ios::binary); - if (cache_file) { - cache_file.seekg(0, std::ios::end); - size_t file_size = static_cast(cache_file.tellg()); - - if (file_size > 0 && file_size % sizeof(u32) == 0) { - cache_file.seekg(0, std::ios::beg); - std::vector spir_v; - spir_v.resize(file_size / sizeof(u32)); - - if (cache_file.read(reinterpret_cast(spir_v.data()), file_size)) { - vk::ShaderModule shader = BuildShader(device, spir_v); - if (IsShaderValid(*shader)) { - // Store in memory cache - std::lock_guard lock(shader_mutex); - shader_cache[normalized_path] = std::move(shader); - LOG_INFO(Render_Vulkan, "Loaded shader from cache: {}", normalized_path); - return *shader_cache[normalized_path]; - } - } - } - } - } catch (const std::exception& e) { - LOG_WARNING(Render_Vulkan, "Failed to load shader from cache: {}", e.what()); - // Continue to load from original file - } - } - - // Try to load the shader directly if cache load failed - if (LoadShader(normalized_path)) { - std::lock_guard lock(shader_mutex); - return *shader_cache[normalized_path]; - } - - LOG_ERROR(Render_Vulkan, "Failed to load shader: {}", normalized_path); - return VK_NULL_HANDLE; -} - -void ShaderManager::ReloadShader(const std::string& shader_path) { - LOG_INFO(Render_Vulkan, "Reloading shader: {}", shader_path); - - // Remove the old shader from cache - { - std::lock_guard lock(shader_mutex); - shader_cache.erase(shader_path); - } - - // Load the shader again - LoadShader(shader_path); -} - -bool ShaderManager::LoadShader(const std::string& shader_path) { - LOG_INFO(Render_Vulkan, "Loading shader from: {}", shader_path); - - if (!std::filesystem::exists(shader_path)) { - LOG_ERROR(Render_Vulkan, "Shader file does not exist: {}", shader_path); - return false; - } - - try { - std::vector spir_v; - std::ifstream shader_file(shader_path, std::ios::binary); - - if (!shader_file.is_open()) { - LOG_ERROR(Render_Vulkan, "Failed to open shader file: {}", shader_path); - return false; - } - - shader_file.seekg(0, std::ios::end); - const size_t file_size = static_cast(shader_file.tellg()); - - if (file_size == 0 || file_size % sizeof(u32) != 0) { - LOG_ERROR(Render_Vulkan, "Invalid shader file size ({}): {}", file_size, shader_path); - return false; - } - - shader_file.seekg(0, std::ios::beg); - spir_v.resize(file_size / sizeof(u32)); - - if (!shader_file.read(reinterpret_cast(spir_v.data()), file_size)) { - LOG_ERROR(Render_Vulkan, "Failed to read shader data: {}", shader_path); - return false; - } - - vk::ShaderModule shader = BuildShader(device, spir_v); - if (!IsShaderValid(*shader)) { - LOG_ERROR(Render_Vulkan, "Created shader module is invalid: {}", shader_path); - return false; - } - - // Store in memory cache - { - std::lock_guard lock(shader_mutex); - shader_cache[shader_path] = std::move(shader); - } - - // Also store in disk cache for future use - try { - if (!std::filesystem::exists(SHADER_CACHE_DIR)) { - std::filesystem::create_directory(SHADER_CACHE_DIR); - } - - std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" + - std::filesystem::path(shader_path).filename().string() + ".cache"; - - std::ofstream cache_file(cache_path, std::ios::binary); - if (cache_file.is_open()) { - cache_file.write(reinterpret_cast(spir_v.data()), - spir_v.size() * sizeof(u32)); - - if (!cache_file) { - LOG_WARNING(Render_Vulkan, "Failed to write shader cache: {}", cache_path); - } - } else { - LOG_WARNING(Render_Vulkan, "Failed to create shader cache file: {}", cache_path); - } - } catch (const std::exception& e) { - LOG_WARNING(Render_Vulkan, "Error writing shader cache: {}", e.what()); - // Continue even if disk cache fails - } - - return true; - } catch (const std::exception& e) { - LOG_ERROR(Render_Vulkan, "Error loading shader: {}", e.what()); - return false; - } -} - -void ShaderManager::WaitForCompilation() { - // Wait until no shader is being compiled - while (IsShaderCompilationInProgress()) { - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - } - - // Process any pending commands in the queue - std::unique_lock lock(commandQueueMutex); - while (!commandQueue.empty()) { - auto command = commandQueue.front(); - commandQueue.pop(); - lock.unlock(); - - command(); - - lock.lock(); - } -} - -// Integrate with Citron's scheduler for shader operations -void ShaderManager::SetScheduler(Scheduler* scheduler) { - SetGlobalScheduler(scheduler); -} - -// Load multiple shaders in parallel -void ShaderManager::PreloadShaders(const std::vector& shader_paths) { - if (shader_paths.empty()) { - return; - } - - LOG_INFO(Render_Vulkan, "Preloading {} shaders", shader_paths.size()); - - // Track shaders that need to be loaded - std::unordered_set shaders_to_load; - - // First check which shaders are not already cached - { - std::lock_guard lock(shader_mutex); - for (const auto& path : shader_paths) { - if (shader_cache.find(path) == shader_cache.end()) { - // Also check disk cache - if (std::filesystem::exists(path)) { - std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" + - std::filesystem::path(path).filename().string() + ".cache"; - if (!std::filesystem::exists(cache_path)) { - shaders_to_load.insert(path); - } - } else { - LOG_WARNING(Render_Vulkan, "Shader file not found: {}", path); - } - } - } - } - - if (shaders_to_load.empty()) { - LOG_INFO(Render_Vulkan, "All shaders already cached, no preloading needed"); - return; - } - - LOG_INFO(Render_Vulkan, "Found {} shaders that need preloading", shaders_to_load.size()); - - // Use a thread pool to load shaders in parallel - const size_t max_threads = std::min(std::thread::hardware_concurrency(), - static_cast(4)); - std::vector> futures; - - for (const auto& path : shaders_to_load) { - if (!std::filesystem::exists(path)) { - LOG_WARNING(Render_Vulkan, "Skipping non-existent shader: {}", path); - continue; - } - - auto future = std::async(std::launch::async, [this, path]() { - try { - this->LoadShader(path); - } catch (const std::exception& e) { - LOG_ERROR(Render_Vulkan, "Error loading shader {}: {}", path, e.what()); - } - }); - futures.push_back(std::move(future)); - - // Limit max parallel threads - if (futures.size() >= max_threads) { - futures.front().wait(); - futures.erase(futures.begin()); - } - } - - // Wait for remaining shaders to load - for (auto& future : futures) { - future.wait(); - } - - LOG_INFO(Render_Vulkan, "Finished preloading shaders"); -} - -// Batch load multiple shaders with priorities -void ShaderManager::BatchLoadShaders(const std::vector& shader_paths, - const std::vector& priorities) { - if (shader_paths.empty()) { - return; - } - - LOG_INFO(Render_Vulkan, "Batch loading {} shaders", shader_paths.size()); - - for (size_t i = 0; i < shader_paths.size(); ++i) { - const auto& path = shader_paths[i]; - u32 priority = i < priorities.size() ? priorities[i] : SHADER_PRIORITY_NORMAL; - - AsyncCompileShader(device, path, [this, path](VkShaderModule raw_module) { - if (raw_module != VK_NULL_HANDLE) { - // Note: We don't use the raw_module directly as we can't create a proper vk::ShaderModule wrapper. - // Instead, we'll load the shader again using the LoadShader method which properly handles - // the creation of the vk::ShaderModule. - - // LoadShader will create the shader module and store it in shader_cache - if (LoadShader(path)) { - LOG_INFO(Render_Vulkan, "Loaded shader module for {}", path); - } else { - LOG_ERROR(Render_Vulkan, "Failed to load shader module for {}", path); - } - } - }, priority); - } -} - -// Preload all shaders in a directory with automatic prioritization -void ShaderManager::PreloadShaderDirectory(const std::string& directory_path) { - if (!std::filesystem::exists(directory_path)) { - LOG_WARNING(Render_Vulkan, "Shader directory does not exist: {}", directory_path); - return; - } - - std::vector shader_paths; - std::vector priorities; - - for (const auto& entry : std::filesystem::directory_iterator(directory_path)) { - if (entry.is_regular_file()) { - const auto& path = entry.path().string(); - const auto extension = entry.path().extension().string(); - - // Only load shader files - if (extension == ".spv" || extension == ".glsl" || extension == ".vert" || - extension == ".frag" || extension == ".comp") { - - shader_paths.push_back(path); - - // Assign priorities based on filename patterns - // This is a simple heuristic and will be improved - const auto filename = entry.path().filename().string(); - if (filename.find("ui") != std::string::npos || - filename.find("menu") != std::string::npos) { - priorities.push_back(SHADER_PRIORITY_CRITICAL); - } else if (filename.find("effect") != std::string::npos || - filename.find("post") != std::string::npos) { - priorities.push_back(SHADER_PRIORITY_HIGH); - } else { - priorities.push_back(SHADER_PRIORITY_NORMAL); - } - } - } - } - - if (!shader_paths.empty()) { - BatchLoadShaders(shader_paths, priorities); - } -} - -// Get current compilation progress -float ShaderManager::GetCompilationProgress() const { - return GetShaderCompilationProgress(); -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index 7ee9bcaad..2f7c9f25c 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -1,16 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include -#include -#include -#include -#include -#include -#include #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -18,74 +11,7 @@ namespace Vulkan { class Device; -class Scheduler; - -// Priority constants for shader compilation -extern const u32 SHADER_PRIORITY_CRITICAL; -extern const u32 SHADER_PRIORITY_HIGH; -extern const u32 SHADER_PRIORITY_NORMAL; -extern const u32 SHADER_PRIORITY_LOW; - -// Command queue system for asynchronous operations -void InitializeCommandQueue(); -void ShutdownCommandQueue(); -void SubmitCommandToQueue(std::function command); -void CommandQueueWorker(); - -// Thread pool management for shader compilation -void InitializeThreadPool(); -void ShutdownThreadPool(); -void SubmitShaderCompilationTask(std::function task, u32 priority); -float GetShaderCompilationProgress(); -bool IsShaderCompilationInProgress(); - -// Predictive shader loading -void PredictShader(const std::string& shader_path); -void PreloadPredictedShaders(const Device& device); - -// Scheduler integration functions -void SetGlobalScheduler(Scheduler* scheduler); -void SubmitToScheduler(std::function command); -u64 FlushScheduler(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); -void ProcessAllCommands(); vk::ShaderModule BuildShader(const Device& device, std::span code); -// Enhanced shader functionality -bool IsShaderValid(VkShaderModule shader_module); - -void AsyncCompileShader(const Device& device, const std::string& shader_path, - std::function callback, u32 priority); - -void AsyncCompileShader(const Device& device, const std::string& shader_path, - std::function callback); - -class ShaderManager { -public: - explicit ShaderManager(const Device& device); - ~ShaderManager(); - - VkShaderModule GetShaderModule(const std::string& shader_path); - void ReloadShader(const std::string& shader_path); - bool LoadShader(const std::string& shader_path); - void WaitForCompilation(); - - // Enhanced shader management - void BatchLoadShaders(const std::vector& shader_paths, - const std::vector& priorities); - void PreloadShaderDirectory(const std::string& directory_path); - float GetCompilationProgress() const; - - // Batch process multiple shaders in parallel - void PreloadShaders(const std::vector& shader_paths); - - // Integrate with Citron's scheduler - void SetScheduler(Scheduler* scheduler); - -private: - const Device& device; - std::mutex shader_mutex; - std::unordered_map shader_cache; -}; - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 243206bbb..6d4deb0eb 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -30,10 +29,6 @@ namespace Vulkan { -// TextureCacheManager implementations to fix linker errors -TextureCacheManager::TextureCacheManager() = default; -TextureCacheManager::~TextureCacheManager() = default; - using Tegra::Engines::Fermi2D; using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; @@ -1193,171 +1188,69 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst } void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { - if (!dst->RenderPass()) { - return; - } - switch (dst_view.format) { - case PixelFormat::D24_UNORM_S8_UINT: - // Handle sRGB source formats - if (src_view.format == PixelFormat::A8B8G8R8_SRGB || - src_view.format == PixelFormat::B8G8R8A8_SRGB) { - // Verify format support before conversion - if (device.IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, - FormatType::Optimal)) { - return blit_image_helper.ConvertABGR8SRGBToD24S8(dst, src_view); - } else { - // Fallback to regular ABGR8 conversion if sRGB not supported - return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view); - } + case PixelFormat::R16_UNORM: + if (src_view.format == PixelFormat::D16_UNORM) { + return blit_image_helper.ConvertD16ToR16(dst, src_view); } + break; + case PixelFormat::A8B8G8R8_SRGB: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } + break; + case PixelFormat::A8B8G8R8_UNORM: + if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { + return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); + } + if (src_view.format == PixelFormat::D24_UNORM_S8_UINT) { + return blit_image_helper.ConvertS8D24ToABGR8(dst, src_view); + } + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } + break; + case PixelFormat::B8G8R8A8_SRGB: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } + break; + case PixelFormat::B8G8R8A8_UNORM: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32FToABGR8(dst, src_view); + } + break; + case PixelFormat::R32_FLOAT: + if (src_view.format == PixelFormat::D32_FLOAT) { + return blit_image_helper.ConvertD32ToR32(dst, src_view); + } + break; + case PixelFormat::D16_UNORM: + if (src_view.format == PixelFormat::R16_UNORM) { + return blit_image_helper.ConvertR16ToD16(dst, src_view); + } + break; + case PixelFormat::S8_UINT_D24_UNORM: if (src_view.format == PixelFormat::A8B8G8R8_UNORM || src_view.format == PixelFormat::B8G8R8A8_UNORM) { return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view); } break; - - case PixelFormat::A8B8G8R8_UNORM: - case PixelFormat::A8B8G8R8_SNORM: - case PixelFormat::A8B8G8R8_SINT: - case PixelFormat::A8B8G8R8_UINT: - case PixelFormat::R5G6B5_UNORM: - case PixelFormat::B5G6R5_UNORM: - case PixelFormat::A1R5G5B5_UNORM: - case PixelFormat::A2B10G10R10_UNORM: - case PixelFormat::A2B10G10R10_UINT: - case PixelFormat::A2R10G10B10_UNORM: - case PixelFormat::A1B5G5R5_UNORM: - case PixelFormat::A5B5G5R1_UNORM: - case PixelFormat::R8_UNORM: - case PixelFormat::R8_SNORM: - case PixelFormat::R8_SINT: - case PixelFormat::R8_UINT: - case PixelFormat::R16G16B16A16_FLOAT: - case PixelFormat::R16G16B16A16_UNORM: - case PixelFormat::R16G16B16A16_SNORM: - case PixelFormat::R16G16B16A16_SINT: - case PixelFormat::R16G16B16A16_UINT: - case PixelFormat::B10G11R11_FLOAT: - case PixelFormat::R32G32B32A32_UINT: - case PixelFormat::BC1_RGBA_UNORM: - case PixelFormat::BC2_UNORM: - case PixelFormat::BC3_UNORM: - case PixelFormat::BC4_UNORM: - case PixelFormat::BC4_SNORM: - case PixelFormat::BC5_UNORM: - case PixelFormat::BC5_SNORM: - case PixelFormat::BC7_UNORM: - case PixelFormat::BC6H_UFLOAT: - case PixelFormat::BC6H_SFLOAT: - case PixelFormat::ASTC_2D_4X4_UNORM: - case PixelFormat::B8G8R8A8_UNORM: - case PixelFormat::R32G32B32A32_FLOAT: - case PixelFormat::R32G32B32A32_SINT: - case PixelFormat::R32G32_FLOAT: - case PixelFormat::R32G32_SINT: - case PixelFormat::R32_FLOAT: - case PixelFormat::R16_FLOAT: - case PixelFormat::R16_UNORM: - case PixelFormat::R16_SNORM: - case PixelFormat::R16_UINT: - case PixelFormat::R16_SINT: - case PixelFormat::R16G16_UNORM: - case PixelFormat::R16G16_FLOAT: - case PixelFormat::R16G16_UINT: - case PixelFormat::R16G16_SINT: - case PixelFormat::R16G16_SNORM: - case PixelFormat::R32G32B32_FLOAT: - case PixelFormat::A8B8G8R8_SRGB: - case PixelFormat::R8G8_UNORM: - case PixelFormat::R8G8_SNORM: - case PixelFormat::R8G8_SINT: - case PixelFormat::R8G8_UINT: - case PixelFormat::R32G32_UINT: - case PixelFormat::R16G16B16X16_FLOAT: - case PixelFormat::R32_UINT: - case PixelFormat::R32_SINT: - case PixelFormat::ASTC_2D_8X8_UNORM: - case PixelFormat::ASTC_2D_8X5_UNORM: - case PixelFormat::ASTC_2D_5X4_UNORM: - case PixelFormat::B8G8R8A8_SRGB: - case PixelFormat::BC1_RGBA_SRGB: - case PixelFormat::BC2_SRGB: - case PixelFormat::BC3_SRGB: - case PixelFormat::BC7_SRGB: - case PixelFormat::A4B4G4R4_UNORM: - case PixelFormat::G4R4_UNORM: - case PixelFormat::ASTC_2D_4X4_SRGB: - case PixelFormat::ASTC_2D_8X8_SRGB: - case PixelFormat::ASTC_2D_8X5_SRGB: - case PixelFormat::ASTC_2D_5X4_SRGB: - case PixelFormat::ASTC_2D_5X5_UNORM: - case PixelFormat::ASTC_2D_5X5_SRGB: - case PixelFormat::ASTC_2D_10X8_UNORM: - case PixelFormat::ASTC_2D_10X8_SRGB: - case PixelFormat::ASTC_2D_6X6_UNORM: - case PixelFormat::ASTC_2D_6X6_SRGB: - case PixelFormat::ASTC_2D_10X6_UNORM: - case PixelFormat::ASTC_2D_10X6_SRGB: - case PixelFormat::ASTC_2D_10X5_UNORM: - case PixelFormat::ASTC_2D_10X5_SRGB: - case PixelFormat::ASTC_2D_10X10_UNORM: - case PixelFormat::ASTC_2D_10X10_SRGB: - case PixelFormat::ASTC_2D_12X10_UNORM: - case PixelFormat::ASTC_2D_12X10_SRGB: - case PixelFormat::ASTC_2D_12X12_UNORM: - case PixelFormat::ASTC_2D_12X12_SRGB: - case PixelFormat::ASTC_2D_8X6_UNORM: - case PixelFormat::ASTC_2D_8X6_SRGB: - case PixelFormat::ASTC_2D_6X5_UNORM: - case PixelFormat::ASTC_2D_6X5_SRGB: - case PixelFormat::E5B9G9R9_FLOAT: case PixelFormat::D32_FLOAT: - case PixelFormat::D16_UNORM: - case PixelFormat::X8_D24_UNORM: - case PixelFormat::S8_UINT: - case PixelFormat::S8_UINT_D24_UNORM: - case PixelFormat::D32_FLOAT_S8_UINT: - case PixelFormat::Invalid: + if (src_view.format == PixelFormat::A8B8G8R8_UNORM || + src_view.format == PixelFormat::B8G8R8A8_UNORM || + src_view.format == PixelFormat::A8B8G8R8_SRGB || + src_view.format == PixelFormat::B8G8R8A8_SRGB) { + return blit_image_helper.ConvertABGR8ToD32F(dst, src_view); + } + if (src_view.format == PixelFormat::R32_FLOAT) { + return blit_image_helper.ConvertR32ToD32(dst, src_view); + } + break; default: break; } -} - -VkFormat TextureCacheRuntime::GetSupportedFormat(VkFormat requested_format, - VkFormatFeatureFlags required_features) const { - if (requested_format == VK_FORMAT_A8B8G8R8_SRGB_PACK32 && - (required_features & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { - // Force valid depth format when sRGB requested in depth context - return VK_FORMAT_D24_UNORM_S8_UINT; - } - return requested_format; -} - -// Helper functions for format compatibility checks -bool TextureCacheRuntime::IsFormatDitherable(PixelFormat format) { - switch (format) { - case PixelFormat::B8G8R8A8_UNORM: - case PixelFormat::A8B8G8R8_UNORM: - case PixelFormat::B8G8R8A8_SRGB: - case PixelFormat::A8B8G8R8_SRGB: - return true; - default: - return false; - } -} - -bool TextureCacheRuntime::IsFormatScalable(PixelFormat format) { - switch (format) { - case PixelFormat::B8G8R8A8_UNORM: - case PixelFormat::A8B8G8R8_UNORM: - case PixelFormat::R16G16B16A16_FLOAT: - case PixelFormat::R32G32B32A32_FLOAT: - return true; - default: - return false; - } + UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format); } void TextureCacheRuntime::CopyImage(Image& dst, Image& src, @@ -1449,224 +1342,13 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, }); } -void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, std::span copies) { +void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, + std::span copies) { const bool msaa_to_non_msaa = src.info.num_samples > 1 && dst.info.num_samples == 1; - if (!msaa_to_non_msaa) { - return CopyImage(dst, src, copies); + if (msaa_copy_pass) { + return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa); } - - // Convert PixelFormat to VkFormat using Maxwell format conversion - const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, src.info.format).format; - - // Check if format supports MSAA resolve - const auto format_properties = device.GetPhysical().GetFormatProperties(vk_format); - if (!(format_properties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { - LOG_WARNING(Render_Vulkan, "Format does not support MSAA resolve, falling back to compute shader"); - if (msaa_copy_pass) { - return msaa_copy_pass->CopyImage(dst, src, copies, true); - } - UNIMPLEMENTED_MSG("MSAA resolve not supported for format and no compute fallback available"); - return; - } - - const VkImage dst_image = dst.Handle(); - const VkImage src_image = src.Handle(); - const VkImageAspectFlags aspect_mask = dst.AspectMask(); - - // Create temporary resolve image with proper memory allocation - const VkImageCreateInfo resolve_ci{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .imageType = VK_IMAGE_TYPE_2D, - .format = vk_format, - .extent = { - .width = src.info.size.width, - .height = src.info.size.height, - .depth = src.info.size.depth, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }; - - const auto resolve_image_holder = memory_allocator.CreateImage(resolve_ci); - - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_image, dst_image, resolve_image = *resolve_image_holder, - copies, aspect_mask](vk::CommandBuffer cmdbuf) { - for (const auto& copy : copies) { - const VkExtent3D extent{ - .width = static_cast(copy.extent.width), - .height = static_cast(copy.extent.height), - .depth = static_cast(copy.extent.depth), - }; - - // First resolve the MSAA source to the temporary image - const VkImageResolve resolve_region{ - .srcSubresource = { - .aspectMask = aspect_mask, - .mipLevel = static_cast(copy.src_subresource.base_level), - .baseArrayLayer = static_cast(copy.src_subresource.base_layer), - .layerCount = static_cast(copy.src_subresource.num_layers), - }, - .srcOffset = { - static_cast(copy.src_offset.x), - static_cast(copy.src_offset.y), - static_cast(copy.src_offset.z), - }, - .dstSubresource = { - .aspectMask = aspect_mask, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .dstOffset = {0, 0, 0}, - .extent = extent, - }; - - const std::array pre_barriers{ - VkImageMemoryBarrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .image = src_image, - .subresourceRange = { - .aspectMask = aspect_mask, - .baseMipLevel = static_cast(copy.src_subresource.base_level), - .levelCount = 1, - .baseArrayLayer = static_cast(copy.src_subresource.base_layer), - .layerCount = static_cast(copy.src_subresource.num_layers), - }, - }, - VkImageMemoryBarrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .image = resolve_image, - .subresourceRange = { - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - }; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, - nullptr, - nullptr, - pre_barriers); - - // Resolve MSAA image - cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - resolve_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - resolve_region); - - // Now copy from resolved image to destination - const VkImageCopy copy_region{ - .srcSubresource = { - .aspectMask = aspect_mask, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .srcOffset = {0, 0, 0}, - .dstSubresource = { - .aspectMask = aspect_mask, - .mipLevel = static_cast(copy.dst_subresource.base_level), - .baseArrayLayer = static_cast(copy.dst_subresource.base_layer), - .layerCount = static_cast(copy.dst_subresource.num_layers), - }, - .dstOffset = { - static_cast(copy.dst_offset.x), - static_cast(copy.dst_offset.y), - static_cast(copy.dst_offset.z), - }, - .extent = extent, - }; - - std::array mid_barriers{{ - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .image = resolve_image, - .subresourceRange = { - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }, - { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .image = dst_image, - .subresourceRange = { - .aspectMask = aspect_mask, - .baseMipLevel = static_cast(copy.dst_subresource.base_level), - .levelCount = 1, - .baseArrayLayer = static_cast(copy.dst_subresource.base_layer), - .layerCount = static_cast(copy.dst_subresource.num_layers), - }, - }, - }}; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, - nullptr, - nullptr, - mid_barriers); - - // Copy from resolved image to destination - cmdbuf.CopyImage(resolve_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - vk::Span{©_region, 1}); - - // Final transition back to general layout - const VkImageMemoryBarrier final_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .image = dst_image, - .subresourceRange = { - .aspectMask = aspect_mask, - .baseMipLevel = static_cast(copy.dst_subresource.base_level), - .levelCount = 1, - .baseArrayLayer = static_cast(copy.dst_subresource.base_layer), - .layerCount = static_cast(copy.dst_subresource.num_layers), - }, - }; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, - vk::Span{}, - vk::Span{}, - vk::Span{&final_barrier, 1}); - } - }); + UNIMPLEMENTED_MSG("Copying images with different samples is not supported."); } u64 TextureCacheRuntime::GetDeviceLocalMemory() const { @@ -2098,7 +1780,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI slot_images = &slot_imgs; } -ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info, +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 7046f3bb1..8501ec384 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -1,14 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once #include -#include -#include -#include -#include #include "video_core/texture_cache/texture_cache_base.h" @@ -42,22 +37,6 @@ class RenderPassCache; class StagingBufferPool; class Scheduler; -// Enhanced texture management for better error handling and thread safety -class TextureCacheManager { -public: - explicit TextureCacheManager(); - ~TextureCacheManager(); - - VkImage GetTextureFromCache(const std::string& texture_path); - void ReloadTexture(const std::string& texture_path); - bool IsTextureLoadedCorrectly(VkImage texture); - void HandleTextureCache(); - -private: - std::mutex texture_mutex; - std::unordered_map texture_cache; -}; - class TextureCacheRuntime { public: explicit TextureCacheRuntime(const Device& device_, Scheduler& scheduler_, @@ -133,15 +112,6 @@ public: void BarrierFeedbackLoop(); - bool IsFormatDitherable(VideoCore::Surface::PixelFormat format); - bool IsFormatScalable(VideoCore::Surface::PixelFormat format); - - VkFormat GetSupportedFormat(VkFormat requested_format, VkFormatFeatureFlags required_features) const; - - // Enhanced texture error handling - bool IsTextureLoadedCorrectly(VkImage texture); - void HandleTextureError(const std::string& texture_path); - const Device& device; Scheduler& scheduler; MemoryAllocator& memory_allocator; @@ -153,9 +123,6 @@ public: const Settings::ResolutionScalingInfo& resolution; std::array, VideoCore::Surface::MaxPixelFormat> view_formats; - // Enhanced texture management - TextureCacheManager texture_cache_manager; - static constexpr size_t indexing_slots = 8 * sizeof(size_t); std::array buffers{}; }; diff --git a/src/video_core/renderer_vulkan/vk_texture_manager.cpp b/src/video_core/renderer_vulkan/vk_texture_manager.cpp deleted file mode 100644 index 6eea83e65..000000000 --- a/src/video_core/renderer_vulkan/vk_texture_manager.cpp +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -#include - -#include "common/assert.h" -#include "common/logging/log.h" -#include "video_core/renderer_vulkan/vk_texture_manager.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_memory_allocator.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -TextureManager::TextureManager(const Device& device_, MemoryAllocator& memory_allocator_) - : device(device_), memory_allocator(memory_allocator_) { - - // Create a default texture for fallback in case of errors - default_texture = CreateDefaultTexture(); -} - -TextureManager::~TextureManager() { - std::lock_guard lock(texture_mutex); - // Clear all cached textures - texture_cache.clear(); - - // Default texture will be cleaned up automatically by vk::Image's destructor -} - -VkImage TextureManager::GetTexture(const std::string& texture_path) { - std::lock_guard lock(texture_mutex); - - // Check if the texture is already in the cache - auto it = texture_cache.find(texture_path); - if (it != texture_cache.end()) { - return *it->second; - } - - // Load the texture and add it to the cache - vk::Image new_texture = LoadTexture(texture_path); - if (new_texture) { - VkImage raw_handle = *new_texture; - texture_cache.emplace(texture_path, std::move(new_texture)); - return raw_handle; - } - - // If loading fails, return the default texture if it exists - LOG_WARNING(Render_Vulkan, "Failed to load texture: {}, using default", texture_path); - if (default_texture.has_value()) { - return *(*default_texture); - } - return VK_NULL_HANDLE; -} - -void TextureManager::ReloadTexture(const std::string& texture_path) { - std::lock_guard lock(texture_mutex); - - // Remove the texture from cache if it exists - auto it = texture_cache.find(texture_path); - if (it != texture_cache.end()) { - LOG_INFO(Render_Vulkan, "Reloading texture: {}", texture_path); - texture_cache.erase(it); - } - - // The texture will be reloaded on next GetTexture call -} - -bool TextureManager::IsTextureLoadedCorrectly(VkImage texture) { - // Check if the texture handle is valid - static const VkImage null_handle = VK_NULL_HANDLE; - return texture != null_handle; -} - -void TextureManager::CleanupTextureCache() { - std::lock_guard lock(texture_mutex); - - // TODO: track usage and remove unused textures [ZEP] - - LOG_INFO(Render_Vulkan, "Handling texture cache cleanup, current size: {}", texture_cache.size()); -} - -void TextureManager::HandleTextureRendering(const std::string& texture_path, - std::function render_callback) { - VkImage texture = GetTexture(texture_path); - - if (!IsTextureLoadedCorrectly(texture)) { - LOG_ERROR(Render_Vulkan, "Texture failed to load correctly: {}, attempting reload", texture_path); - ReloadTexture(texture_path); - texture = GetTexture(texture_path); - } - - // Execute the rendering callback with the texture - render_callback(texture); -} - -vk::Image TextureManager::LoadTexture(const std::string& texture_path) { - // TODO: load image data from disk - // and create a proper Vulkan texture [ZEP] - - if (!std::filesystem::exists(texture_path)) { - LOG_ERROR(Render_Vulkan, "Texture file not found: {}", texture_path); - return {}; - } - - try { - - LOG_INFO(Render_Vulkan, "Loaded texture: {}", texture_path); - - // TODO: create an actual VkImage [ZEP] - return CreateDefaultTexture(); - } catch (const std::exception& e) { - LOG_ERROR(Render_Vulkan, "Error loading texture {}: {}", texture_path, e.what()); - return {}; - } -} - -vk::Image TextureManager::CreateDefaultTexture() { - // Create a small default texture (1x1 pixel) to use as a fallback - // const VkExtent2D extent{1, 1}; - - // Create image - // Avoid unused variable warning by commenting out the unused struct - // VkImageCreateInfo image_ci{ - // .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - // .pNext = nullptr, - // .flags = 0, - // .imageType = VK_IMAGE_TYPE_2D, - // .format = texture_format, - // .extent = {extent.width, extent.height, 1}, - // .mipLevels = 1, - // .arrayLayers = 1, - // .samples = VK_SAMPLE_COUNT_1_BIT, - // .tiling = VK_IMAGE_TILING_OPTIMAL, - // .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - // .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - // .queueFamilyIndexCount = 0, - // .pQueueFamilyIndices = nullptr, - // .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - // }; - - // TODO: create an actual VkImage [ZEP] - LOG_INFO(Render_Vulkan, "Created default fallback texture"); - return {}; -} - -} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_texture_manager.h b/src/video_core/renderer_vulkan/vk_texture_manager.h deleted file mode 100644 index 8cf116c88..000000000 --- a/src/video_core/renderer_vulkan/vk_texture_manager.h +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -class Device; -class MemoryAllocator; - -// Enhanced texture manager for better error handling and thread safety -class TextureManager { -public: - explicit TextureManager(const Device& device, MemoryAllocator& memory_allocator); - ~TextureManager(); - - // Get a texture from the cache, loading it if necessary - VkImage GetTexture(const std::string& texture_path); - - // Force a texture to reload from disk - void ReloadTexture(const std::string& texture_path); - - // Check if a texture is loaded correctly - bool IsTextureLoadedCorrectly(VkImage texture); - - // Remove old textures from the cache - void CleanupTextureCache(); - - // Handle texture rendering, with automatic reload if needed - void HandleTextureRendering(const std::string& texture_path, - std::function render_callback); - -private: - // Load a texture from disk and create a Vulkan image - vk::Image LoadTexture(const std::string& texture_path); - - // Create a default texture to use in case of errors - vk::Image CreateDefaultTexture(); - - const Device& device; - MemoryAllocator& memory_allocator; - std::mutex texture_mutex; - std::unordered_map texture_cache; - std::optional default_texture; - VkFormat texture_format = VK_FORMAT_B8G8R8A8_SRGB; -}; - -} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/vulkan_common/hybrid_memory.cpp b/src/video_core/vulkan_common/hybrid_memory.cpp deleted file mode 100644 index bb06fae98..000000000 --- a/src/video_core/vulkan_common/hybrid_memory.cpp +++ /dev/null @@ -1,446 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include - -#include "common/logging/log.h" -#include "video_core/vulkan_common/hybrid_memory.h" - -#if defined(__linux__) || defined(__ANDROID__) -#include -#include -#include -#include -#include -#include -#include -#elif defined(_WIN32) -#include -#endif - -namespace Vulkan { - -void PredictiveReuseManager::RecordUsage(u64 address, u64 size, bool write_access) { - std::lock_guard guard(mutex); - - // Add to history, removing oldest entries if we're past max_history - access_history.push_back({address, size, write_access, current_timestamp++}); - if (access_history.size() > max_history) { - access_history.erase(access_history.begin()); - } -} - -bool PredictiveReuseManager::IsHotRegion(u64 address, u64 size) const { - std::lock_guard guard(mutex); - - // Check if this memory region has been accessed frequently - const u64 end_address = address + size; - int access_count = 0; - - for (const auto& access : access_history) { - const u64 access_end = access.address + access.size; - - // Check for overlap - if (!(end_address <= access.address || address >= access_end)) { - access_count++; - } - } - - // Consider a region "hot" if it has been accessed in at least 10% of recent accesses - return access_count >= static_cast(std::max(1, max_history / 10)); -} - -void PredictiveReuseManager::EvictRegion(u64 address, u64 size) { - std::lock_guard guard(mutex); - - // Remove any history entries that overlap with this region - const u64 end_address = address + size; - - access_history.erase( - std::remove_if(access_history.begin(), access_history.end(), - [address, end_address](const MemoryAccess& access) { - const u64 access_end = access.address + access.size; - // Check for overlap - return !(end_address <= access.address || address >= access_end); - }), - access_history.end() - ); -} - -void PredictiveReuseManager::ClearHistory() { - std::lock_guard guard(mutex); - access_history.clear(); - current_timestamp = 0; -} - -#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) -void FaultManagedAllocator::Touch(size_t addr) { - lru.remove(addr); - lru.push_front(addr); - dirty_set.insert(addr); -} - -void FaultManagedAllocator::EnforceLimit() { - while (lru.size() > MaxPages) { - size_t evict = lru.back(); - lru.pop_back(); - - auto it = page_map.find(evict); - if (it != page_map.end()) { - if (dirty_set.count(evict)) { - // Compress and store dirty page before evicting - std::vector compressed((u8*)it->second, (u8*)it->second + PageSize); - compressed_store[evict] = std::move(compressed); - dirty_set.erase(evict); - } - -#if defined(__linux__) || defined(__ANDROID__) - munmap(it->second, PageSize); -#elif defined(_WIN32) - VirtualFree(it->second, 0, MEM_RELEASE); -#endif - page_map.erase(it); - } - } -} - -void* FaultManagedAllocator::GetOrAlloc(size_t addr) { - std::lock_guard guard(lock); - - if (page_map.count(addr)) { - Touch(addr); - return page_map[addr]; - } - -#if defined(__linux__) || defined(__ANDROID__) - void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - - if (mem == MAP_FAILED) { - LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler"); - return nullptr; - } -#elif defined(_WIN32) - void* mem = VirtualAlloc(nullptr, PageSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); - if (!mem) { - LOG_ERROR(Render_Vulkan, "Failed to VirtualAlloc memory for fault handler"); - return nullptr; - } -#endif - - if (compressed_store.count(addr)) { - // Decompress stored page data - std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size()); - compressed_store.erase(addr); - } else { - std::memset(mem, 0, PageSize); - } - - page_map[addr] = mem; - lru.push_front(addr); - dirty_set.insert(addr); - EnforceLimit(); - - return mem; -} - -#if defined(_WIN32) -// Static member initialization -FaultManagedAllocator* FaultManagedAllocator::current_instance = nullptr; - -LONG WINAPI FaultManagedAllocator::VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info) { - // Only handle access violations (page faults) - if (exception_info->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) { - return EXCEPTION_CONTINUE_SEARCH; - } - - if (!current_instance) { - return EXCEPTION_CONTINUE_SEARCH; - } - - // Get the faulting address - use ULONG_PTR for Windows - const ULONG_PTR fault_addr = static_cast(exception_info->ExceptionRecord->ExceptionInformation[1]); - const ULONG_PTR base_addr = reinterpret_cast(current_instance->base_address); - - // Check if the address is within our managed range - if (fault_addr < base_addr || - fault_addr >= (base_addr + static_cast(current_instance->memory_size))) { - return EXCEPTION_CONTINUE_SEARCH; - } - - // Calculate the base address of the page - const ULONG_PTR page_addr = fault_addr & ~(static_cast(PageSize) - 1); - const size_t relative_addr = static_cast(page_addr - base_addr); - - // Handle the fault by allocating memory - void* page = current_instance->GetOrAlloc(relative_addr); - if (!page) { - return EXCEPTION_CONTINUE_SEARCH; - } - - // Copy the page data to the faulting address - DWORD old_protect; - void* target_addr = reinterpret_cast(page_addr); - - // Make the target page writable - if (VirtualProtect(target_addr, PageSize, PAGE_READWRITE, &old_protect)) { - std::memcpy(target_addr, page, PageSize); - // Restore original protection - VirtualProtect(target_addr, PageSize, old_protect, &old_protect); - return EXCEPTION_CONTINUE_EXECUTION; - } - - return EXCEPTION_CONTINUE_SEARCH; -} - -void FaultManagedAllocator::ExceptionHandlerThread() { - while (running) { - // Sleep to avoid busy waiting - Sleep(10); - } -} -#endif - -void FaultManagedAllocator::Initialize(void* base, size_t size) { -#if defined(__linux__) || defined(__ANDROID__) - uffd = static_cast(syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK)); - if (uffd < 0) { - LOG_ERROR(Render_Vulkan, "Failed to create userfaultfd, fault handling disabled"); - return; - } - - struct uffdio_api api = { .api = UFFD_API }; - ioctl(uffd, UFFDIO_API, &api); - - struct uffdio_register reg = { - .range = { .start = (uintptr_t)base, .len = size }, - .mode = UFFDIO_REGISTER_MODE_MISSING - }; - - if (ioctl(uffd, UFFDIO_REGISTER, ®) < 0) { - LOG_ERROR(Render_Vulkan, "Failed to register memory range with userfaultfd"); - close(uffd); - uffd = -1; - return; - } - - running = true; - fault_handler = std::thread(&FaultManagedAllocator::FaultThread, this); -#elif defined(_WIN32) - // Setup Windows memory for fault handling - base_address = base; - memory_size = size; - - // Reserve memory range but don't commit it yet - it will be demand-paged - DWORD oldProtect; - VirtualProtect(base, size, PAGE_NOACCESS, &oldProtect); - - // Install a vectored exception handler - current_instance = this; - AddVectoredExceptionHandler(1, VectoredExceptionHandler); - - running = true; - exception_handler = std::thread(&FaultManagedAllocator::ExceptionHandlerThread, this); - - LOG_INFO(Render_Vulkan, "Windows fault-managed memory initialized at {:p}, size: {}", - base, size); -#endif -} - -#if defined(__linux__) || defined(__ANDROID__) -void FaultManagedAllocator::FaultThread() { - struct pollfd pfd = { uffd, POLLIN, 0 }; - - while (running) { - if (poll(&pfd, 1, 10) > 0) { - struct uffd_msg msg; - read(uffd, &msg, sizeof(msg)); - - if (msg.event == UFFD_EVENT_PAGEFAULT) { - size_t addr = msg.arg.pagefault.address & ~(PageSize - 1); - void* page = GetOrAlloc(addr); - - if (page) { - struct uffdio_copy copy = { - .dst = (uintptr_t)addr, - .src = (uintptr_t)page, - .len = PageSize, - .mode = 0 - }; - - ioctl(uffd, UFFDIO_COPY, ©); - } - } - } - } -} -#endif - -void* FaultManagedAllocator::Translate(size_t addr) { - std::lock_guard guard(lock); - - size_t base = addr & ~(PageSize - 1); - if (!page_map.count(base)) { - return nullptr; - } - - Touch(base); - return (u8*)page_map[base] + (addr % PageSize); -} - -void FaultManagedAllocator::SaveSnapshot(const std::string& path) { - std::lock_guard guard(lock); - - std::ofstream out(path, std::ios::binary); - if (!out) { - LOG_ERROR(Render_Vulkan, "Failed to open snapshot file for writing: {}", path); - return; - } - - for (auto& [addr, mem] : page_map) { - out.write(reinterpret_cast(&addr), sizeof(addr)); - out.write(reinterpret_cast(mem), PageSize); - } - - LOG_INFO(Render_Vulkan, "Saved memory snapshot to {}", path); -} - -void FaultManagedAllocator::SaveDifferentialSnapshot(const std::string& path) { - std::lock_guard guard(lock); - - std::ofstream out(path, std::ios::binary); - if (!out) { - LOG_ERROR(Render_Vulkan, "Failed to open diff snapshot file for writing: {}", path); - return; - } - - size_t dirty_count = 0; - for (const auto& addr : dirty_set) { - if (page_map.count(addr)) { - out.write(reinterpret_cast(&addr), sizeof(addr)); - out.write(reinterpret_cast(page_map[addr]), PageSize); - dirty_count++; - } - } - - LOG_INFO(Render_Vulkan, "Saved differential snapshot to {} ({} dirty pages)", - path, dirty_count); -} - -void FaultManagedAllocator::ClearDirtySet() { - std::lock_guard guard(lock); - dirty_set.clear(); - LOG_DEBUG(Render_Vulkan, "Cleared dirty page tracking"); -} - -FaultManagedAllocator::~FaultManagedAllocator() { - running = false; - -#if defined(__linux__) || defined(__ANDROID__) - if (fault_handler.joinable()) { - fault_handler.join(); - } - - for (auto& [addr, mem] : page_map) { - munmap(mem, PageSize); - } - - if (uffd != -1) { - close(uffd); - } -#elif defined(_WIN32) - if (exception_handler.joinable()) { - exception_handler.join(); - } - - // Remove the vectored exception handler - RemoveVectoredExceptionHandler(VectoredExceptionHandler); - current_instance = nullptr; - - for (auto& [addr, mem] : page_map) { - VirtualFree(mem, 0, MEM_RELEASE); - } - - // Free the base memory if needed - if (base_address) { - VirtualFree(base_address, 0, MEM_RELEASE); - base_address = nullptr; - } -#endif -} -#endif // defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) - -HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, size_t reuse_history) - : device(device_), memory_allocator(allocator), reuse_manager(reuse_history) { -} - -HybridMemory::~HybridMemory() = default; - -void HybridMemory::InitializeGuestMemory(void* base, size_t size) { -#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) - fmaa.Initialize(base, size); - LOG_INFO(Render_Vulkan, "Initialized fault-managed guest memory at {:p}, size: {}", - base, size); -#else - LOG_INFO(Render_Vulkan, "Fault-managed memory not supported on this platform"); -#endif -} - -void* HybridMemory::TranslateAddress(size_t addr) { -#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) - return fmaa.Translate(addr); -#else - return nullptr; -#endif -} - -ComputeBuffer HybridMemory::CreateComputeBuffer(VkDeviceSize size, VkBufferUsageFlags usage, - MemoryUsage memory_type) { - ComputeBuffer buffer; - buffer.size = size; - - VkBufferCreateInfo buffer_ci = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = size, - .usage = usage | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; - - // Using CreateBuffer directly handles memory allocation internally - buffer.buffer = memory_allocator.CreateBuffer(buffer_ci, memory_type); - - LOG_DEBUG(Render_Vulkan, "Created compute buffer: size={}, usage={:x}", - size, usage); - - return buffer; -} - -void HybridMemory::SaveSnapshot(const std::string& path) { -#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) - fmaa.SaveSnapshot(path); -#else - LOG_ERROR(Render_Vulkan, "Memory snapshots not supported on this platform"); -#endif -} - -void HybridMemory::SaveDifferentialSnapshot(const std::string& path) { -#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) - fmaa.SaveDifferentialSnapshot(path); -#else - LOG_ERROR(Render_Vulkan, "Differential memory snapshots not supported on this platform"); -#endif -} - -void HybridMemory::ResetDirtyTracking() { -#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) - fmaa.ClearDirtySet(); -#endif -} - -} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/vulkan_common/hybrid_memory.h b/src/video_core/vulkan_common/hybrid_memory.h deleted file mode 100644 index faff2de06..000000000 --- a/src/video_core/vulkan_common/hybrid_memory.h +++ /dev/null @@ -1,119 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_memory_allocator.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -struct ComputeBuffer { - vk::Buffer buffer{}; - VkDeviceSize size = 0; -}; - -class PredictiveReuseManager { -public: - explicit PredictiveReuseManager(size_t history_size) : max_history{history_size} {} - - void RecordUsage(u64 address, u64 size, bool write_access); - bool IsHotRegion(u64 address, u64 size) const; - void EvictRegion(u64 address, u64 size); - void ClearHistory(); - -private: - struct MemoryAccess { - u64 address; - u64 size; - bool write_access; - u64 timestamp; - }; - - std::vector access_history; - const size_t max_history; - u64 current_timestamp{0}; - mutable std::mutex mutex; -}; - -#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) -class FaultManagedAllocator { -public: - static constexpr size_t PageSize = 0x1000; - static constexpr size_t MaxPages = 16384; - - void Initialize(void* base, size_t size); - void* Translate(size_t addr); - void SaveSnapshot(const std::string& path); - void SaveDifferentialSnapshot(const std::string& path); - void ClearDirtySet(); - ~FaultManagedAllocator(); - -private: - std::map page_map; - std::list lru; - std::set dirty_set; - std::unordered_map> compressed_store; - std::mutex lock; - -#if defined(__linux__) || defined(__ANDROID__) - int uffd = -1; - std::atomic running{false}; - std::thread fault_handler; - void FaultThread(); -#elif defined(_WIN32) - void* base_address = nullptr; - size_t memory_size = 0; - HANDLE exception_port = nullptr; - std::atomic running{false}; - std::thread exception_handler; - void ExceptionHandlerThread(); - static LONG WINAPI VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info); - static FaultManagedAllocator* current_instance; -#endif - - void Touch(size_t addr); - void EnforceLimit(); - void* GetOrAlloc(size_t addr); -}; -#endif - -class HybridMemory { -public: - explicit HybridMemory(const Device& device, MemoryAllocator& allocator, size_t reuse_history = 32); - ~HybridMemory(); - - void InitializeGuestMemory(void* base, size_t size); - void* TranslateAddress(size_t addr); - - ComputeBuffer CreateComputeBuffer(VkDeviceSize size, VkBufferUsageFlags usage, MemoryUsage memory_type); - - void SaveSnapshot(const std::string& path); - void SaveDifferentialSnapshot(const std::string& path); - void ResetDirtyTracking(); - -private: - const Device& device; - MemoryAllocator& memory_allocator; - PredictiveReuseManager reuse_manager; - -#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) - FaultManagedAllocator fmaa; -#endif -}; - -} // namespace Vulkan \ No newline at end of file