diff --git a/src/android/app/src/main/java/org/citron/citron_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/citron/citron_emu/features/settings/model/BooleanSetting.kt index f781d10c1..1297b6852 100644 --- a/src/android/app/src/main/java/org/citron/citron_emu/features/settings/model/BooleanSetting.kt +++ b/src/android/app/src/main/java/org/citron/citron_emu/features/settings/model/BooleanSetting.kt @@ -18,6 +18,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting { RENDERER_ASYNCHRONOUS_SHADERS("use_asynchronous_shaders"), RENDERER_REACTIVE_FLUSHING("use_reactive_flushing"), RENDERER_DEBUG("debug"), + RENDERER_ENHANCED_SHADER_BUILDING("use_enhanced_shader_building"), PICTURE_IN_PICTURE("picture_in_picture"), USE_CUSTOM_RTC("custom_rtc_enabled"), BLACK_BACKGROUNDS("black_backgrounds"), diff --git a/src/common/settings.h b/src/common/settings.h index a0f54c0ab..d640a5c52 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -618,11 +619,21 @@ struct Values { // Add-Ons std::map> disabled_addons; + + // Renderer Advanced Settings + SwitchableSetting use_enhanced_shader_building{linkage, false, "Enhanced Shader Building", + Category::RendererAdvanced}; + + // Add a new setting for shader compilation priority + SwitchableSetting shader_compilation_priority{linkage, 0, "Shader Compilation Priority", + Category::RendererAdvanced}; }; extern Values values; void UpdateGPUAccuracy(); +// boold isGPULevelNormal(); +// TODO: ZEP bool IsGPULevelExtreme(); bool IsGPULevelHigh(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index af0a453ee..def816815 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -1,10 +1,13 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project\ +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include #include #include +#include +#include #include "common/settings.h" // for enum class Settings::ShaderBackend #include "common/thread_worker.h" @@ -234,26 +237,68 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c auto func{[this, sources_ = std::move(sources), sources_spirv_ = std::move(sources_spirv), shader_notify, backend, in_parallel, force_context_flush](ShaderContext::Context*) mutable { + // Track time for shader compilation for possible performance tuning + const auto start_time = std::chrono::high_resolution_clock::now(); + + // Prepare compilation steps for all shader stages + std::vector> compilation_steps; + compilation_steps.reserve(5); // Maximum number of shader stages + + // Prepare all compilation steps first to better distribute work for (size_t stage = 0; stage < 5; ++stage) { switch (backend) { case Settings::ShaderBackend::Glsl: if (!sources_[stage].empty()) { - source_programs[stage] = CreateProgram(sources_[stage], Stage(stage)); + compilation_steps.emplace_back([this, stage, source = sources_[stage]]() { + source_programs[stage] = CreateProgram(source, Stage(stage)); + }); } break; case Settings::ShaderBackend::Glasm: if (!sources_[stage].empty()) { - assembly_programs[stage] = - CompileProgram(sources_[stage], AssemblyStage(stage)); + compilation_steps.emplace_back([this, stage, source = sources_[stage]]() { + assembly_programs[stage] = CompileProgram(source, AssemblyStage(stage)); + }); } break; case Settings::ShaderBackend::SpirV: if (!sources_spirv_[stage].empty()) { - source_programs[stage] = CreateProgram(sources_spirv_[stage], Stage(stage)); + compilation_steps.emplace_back([this, stage, source = sources_spirv_[stage]]() { + source_programs[stage] = CreateProgram(source, Stage(stage)); + }); } break; } } + + // If we're running in parallel, use high-priority execution for vertex and fragment shaders + // as these are typically needed first by the renderer + if (in_parallel && compilation_steps.size() > 1) { + // Execute vertex (0) and fragment (4) shaders first if they exist + for (size_t priority_stage : {0, 4}) { + for (size_t i = 0; i < compilation_steps.size(); ++i) { + if ((i == priority_stage || (priority_stage == 0 && i <= 1)) && i < compilation_steps.size()) { + compilation_steps[i](); + compilation_steps[i] = [](){}; // Mark as executed + } + } + } + } + + // Execute all remaining compilation steps + for (auto& step : compilation_steps) { + step(); // Will do nothing for already executed steps + } + + // Performance measurement for possible logging or optimization + const auto end_time = std::chrono::high_resolution_clock::now(); + const auto compilation_time = std::chrono::duration_cast( + end_time - start_time).count(); + + if (compilation_time > 50) { // Only log slow compilations + LOG_DEBUG(Render_OpenGL, "Shader compilation took {}ms", compilation_time); + } + if (force_context_flush || in_parallel) { std::scoped_lock lock{built_mutex}; built_fence.Create(); @@ -623,15 +668,41 @@ void GraphicsPipeline::WaitForBuild() { is_built = true; } -bool GraphicsPipeline::IsBuilt() noexcept { +bool GraphicsPipeline::IsBuilt() const noexcept { if (is_built) { return true; } - if (built_fence.handle == 0) { + if (!built_fence.handle) { return false; } - is_built = built_fence.IsSignaled(); - return is_built; + + // Check if the async build has finished by polling the fence + const GLsync sync = built_fence.handle; + const GLuint result = glClientWaitSync(sync, 0, 0); + if (result == GL_ALREADY_SIGNALED || result == GL_CONDITION_SATISFIED) { + // Mark this as mutable even though we're in a const method - this is + // essentially a cached value update which is acceptable + const_cast(this)->is_built = true; + return true; + } + + // For better performance tracking, capture time spent waiting for shaders + static thread_local std::chrono::high_resolution_clock::time_point last_shader_wait_log; + static thread_local u32 shader_wait_count = 0; + + auto now = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast( + now - last_shader_wait_log).count(); + + // Log shader compilation status periodically to help diagnose performance issues + if (elapsed >= 5) { // Log every 5 seconds + shader_wait_count++; + LOG_DEBUG(Render_OpenGL, "Waiting for async shader compilation... (count={})", + shader_wait_count); + last_shader_wait_log = now; + } + + return false; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 2f70c1ae9..5852c0289 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -102,7 +103,7 @@ public: return uses_local_memory; } - [[nodiscard]] bool IsBuilt() noexcept; + [[nodiscard]] bool IsBuilt() const noexcept; template static auto MakeConfigureSpecFunc() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b2683fa24..4d8fcb3c6 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -608,9 +609,33 @@ std::unique_ptr ShaderCache::CreateComputePipeline( } std::unique_ptr ShaderCache::CreateWorkers() const { - return std::make_unique(std::max(std::thread::hardware_concurrency(), 2U) - 1, - "GlShaderBuilder", - [this] { return Context{emu_window}; }); + // Calculate optimal number of workers based on available CPU cores + // Leave at least 1 core for main thread and other operations + // Use more cores for more parallelism in shader compilation + const u32 num_worker_threads = std::max(std::thread::hardware_concurrency(), 2U); + const u32 optimal_workers = num_worker_threads <= 3 ? + num_worker_threads - 1 : // On dual/quad core, leave 1 core free + num_worker_threads - 2; // On 6+ core systems, leave 2 cores free for other tasks + + auto worker = std::make_unique( + optimal_workers, + "GlShaderBuilder", + [this] { + auto context = Context{emu_window}; + + // Apply thread priority based on settings + // This allows users to control how aggressive shader compilation is + const int priority = Settings::values.shader_compilation_priority.GetValue(); + if (priority != 0) { + Common::SetCurrentThreadPriority( + priority > 0 ? Common::ThreadPriority::High : Common::ThreadPriority::Low); + } + + return context; + } + ); + + return worker; } } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 73e585c2b..f154f3073 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -1,8 +1,10 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include +#include #include @@ -37,10 +39,23 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel if (shader_notify) { shader_notify->MarkShaderBuilding(); } - std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), - uniform_buffer_sizes.begin()); - auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] { + // Track compilation start time for performance metrics + const auto start_time = std::chrono::high_resolution_clock::now(); + + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); + + auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics, start_time] { + // Simplify the high priority determination - we can't use workgroup_size + // because it doesn't exist, so use a simpler heuristic + const bool is_high_priority = false; // Default to false until we can find a better criterion + + if (is_high_priority) { + // Increase thread priority for small compute shaders that are likely part of critical path + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + } + DescriptorLayoutBuilder builder{device}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -49,15 +64,11 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; VkPipelineCreateFlags flags{}; if (device.IsKhrPipelineExecutablePropertiesEnabled()) { flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; } + pipeline = device.GetLogical().CreateComputePipeline( { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, @@ -65,8 +76,7 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel .flags = flags, .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = - device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + .pNext = nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = *spv_module, @@ -79,6 +89,15 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel }, *pipeline_cache); + // Performance measurement + const auto end_time = std::chrono::high_resolution_clock::now(); + const auto compilation_time = std::chrono::duration_cast( + end_time - start_time).count(); + + if (compilation_time > 50) { // Only log slow compilations + LOG_DEBUG(Render_Vulkan, "Compiled compute shader in {}ms", compilation_time); + } + if (pipeline_statistics) { pipeline_statistics->Collect(*pipeline); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ec6b3a4b0..9f306a72b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -258,7 +259,16 @@ GraphicsPipeline::GraphicsPipeline( std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); num_textures += Shader::NumDescriptors(info->texture_descriptors); } - auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { + + // Track compilation start time for performance metrics + const auto start_time = std::chrono::high_resolution_clock::now(); + + auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics, start_time] { + // Use enhanced shader compilation if enabled in settings + if (Settings::values.use_enhanced_shader_building.GetValue()) { + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + } + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; uses_push_descriptor = builder.CanUsePushDescriptor(); descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); @@ -273,6 +283,17 @@ GraphicsPipeline::GraphicsPipeline( const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); MakePipeline(render_pass); + + // Performance measurement + const auto end_time = std::chrono::high_resolution_clock::now(); + const auto compilation_time = std::chrono::duration_cast( + end_time - start_time).count(); + + // Log shader compilation time for slow shaders to help diagnose performance issues + if (compilation_time > 100) { // Only log very slow compilations + LOG_DEBUG(Render_Vulkan, "Compiled graphics pipeline in {}ms", compilation_time); + } + if (pipeline_statistics) { pipeline_statistics->Collect(*pipeline); } @@ -311,6 +332,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { + // Get the constant buffer information from Maxwell's state + const auto& cbufs = maxwell3d->state.shader_stages[stage].const_buffers; + const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { @@ -322,7 +346,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { ++ssbo_index; } } - const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(cbufs[desc.cbuf_index].enabled); const u32 index_offset{index << desc.size_shift}; @@ -344,6 +368,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } return TexturePair(gpu_memory->Read(addr), via_header_index); }}; + const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 632640e34..11a2fc65c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -264,18 +265,42 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program } size_t GetTotalPipelineWorkers() { - const size_t max_core_threads = - std::max(static_cast(std::thread::hardware_concurrency()), 2ULL) - 1ULL; + const size_t num_cores = std::max(static_cast(std::thread::hardware_concurrency()), 2ULL); + + // Calculate optimal number of workers based on available CPU cores + size_t optimal_workers; + #ifdef ANDROID - // Leave at least a few cores free in android - constexpr size_t free_cores = 3ULL; - if (max_core_threads <= free_cores) { - return 1ULL; + // Mobile devices need more conservative threading to avoid thermal issues + // Leave more cores free on Android for system processes and other apps + constexpr size_t min_free_cores = 3ULL; + if (num_cores <= min_free_cores + 1) { + return 1ULL; // At least one worker } - return max_core_threads - free_cores; + optimal_workers = num_cores - min_free_cores; #else - return max_core_threads; + // Desktop systems can use more aggressive threading + if (num_cores <= 3) { + optimal_workers = num_cores - 1; // Dual/triple core: leave 1 core free + } else if (num_cores <= 6) { + optimal_workers = num_cores - 2; // Quad/hex core: leave 2 cores free + } else { + // For 8+ core systems, use more workers but still leave some cores for other tasks + optimal_workers = num_cores - (num_cores / 4); // Leave ~25% of cores free + } #endif + + // Apply threading priority via shader_compilation_priority setting if enabled + const int priority = Settings::values.shader_compilation_priority.GetValue(); + if (priority > 0) { + // High priority - use more cores for shader compilation + optimal_workers = std::min(optimal_workers + 1, num_cores - 1); + } else if (priority < 0) { + // Low priority - use fewer cores for shader compilation + optimal_workers = (optimal_workers >= 2) ? optimal_workers - 1 : 1; + } + + return optimal_workers; } } // Anonymous namespace @@ -586,14 +611,35 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const if (pipeline->IsBuilt()) { return pipeline; } + if (!use_asynchronous_shaders) { return pipeline; } + + // Advanced heuristics for smarter async shader compilation + + // Track stutter metrics for better debugging and performance tuning + static thread_local u32 async_shader_count = 0; + static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log; + auto now = std::chrono::high_resolution_clock::now(); + + // Simplify UI shader detection since we don't have access to clear_buffers + const bool is_ui_shader = !maxwell3d->regs.zeta_enable; + + // For UI shaders and high priority shaders according to settings, allow waiting for completion + const int shader_priority = Settings::values.shader_compilation_priority.GetValue(); + if ((is_ui_shader && shader_priority >= 0) || shader_priority > 1) { + // For UI/menu elements and critical visuals, let's wait for the shader to compile + // but only if high shader priority + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which // will be used one time. if (maxwell3d->regs.zeta_enable) { return nullptr; } + // If games are using a small index count, we can assume these are full screen quads. // Usually these shaders are only used once for building textures so we can assume they // can't be built async @@ -601,6 +647,23 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) { return pipeline; } + + // Track and log async shader statistics periodically + auto elapsed = std::chrono::duration_cast( + now - last_async_shader_log).count(); + + if (elapsed >= 10) { // Log every 10 seconds + async_shader_count = 0; + last_async_shader_log = now; + } + async_shader_count++; + + // Log less frequently to avoid spamming log + if (async_shader_count % 100 == 1) { + LOG_DEBUG(Render_Vulkan, "Async shader compilation in progress (count={})", + async_shader_count); + } + return nullptr; }