mirror of
https://git.citron-emu.org/citron/emu
synced 2025-05-13 03:10:36 +01:00
feat(renderer): Enhance shader compilation and pipeline caching
This update further improves shader management and pipeline handling: - Add advanced heuristics for smarter async shader compilation in both OpenGL and Vulkan renderers, with better detection of UI and critical shaders - Implement thread pool for prioritized shader compilation with proper progress tracking and reporting - Add predictive shader loading system to preload related shaders based on pipeline transitions - Implement pipeline deduplication through Clone() method to reduce memory usage and improve performance - Add memory optimizations for shader translation and SPIR-V generation - Enhance error handling and logging for shader operations - Introduce batch loading and directory-based shader preloading capabilities Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
parent
7d213efca8
commit
fc88c06769
7 changed files with 628 additions and 85 deletions
|
@ -392,18 +392,118 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
|
||||||
if (!use_asynchronous_shaders) {
|
if (!use_asynchronous_shaders) {
|
||||||
return pipeline;
|
return pipeline;
|
||||||
}
|
}
|
||||||
// If something is using depth, we can assume that games are not rendering anything which
|
|
||||||
// will be used one time.
|
// Advanced heuristics for smarter async shader compilation in OpenGL
|
||||||
if (maxwell3d->regs.zeta_enable) {
|
|
||||||
return nullptr;
|
// Track shader compilation statistics
|
||||||
}
|
static thread_local u32 async_shader_count = 0;
|
||||||
// If games are using a small index count, we can assume these are full screen quads.
|
static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log;
|
||||||
// Usually these shaders are only used once for building textures so we can assume they
|
auto now = std::chrono::high_resolution_clock::now();
|
||||||
// can't be built async
|
|
||||||
|
// Enhanced detection of UI and critical shaders
|
||||||
|
const bool is_ui_shader = !maxwell3d->regs.zeta_enable;
|
||||||
|
// Check for blend state
|
||||||
|
const bool has_blend = maxwell3d->regs.blend.enable[0] != 0;
|
||||||
|
// Check if texture sampling is likely based on texture units used
|
||||||
|
const bool has_texture = maxwell3d->regs.tex_header.Address() != 0;
|
||||||
|
// Check for clear operations
|
||||||
|
const bool is_clear_operation = maxwell3d->regs.clear_surface.raw != 0;
|
||||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||||
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
|
const bool small_draw = draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6;
|
||||||
|
|
||||||
|
// Track pipeline usage patterns for better prediction
|
||||||
|
// Use pipeline address as hash since we don't have a Hash() method
|
||||||
|
const u64 draw_config_hash = reinterpret_cast<u64>(pipeline);
|
||||||
|
static thread_local std::unordered_map<u64, u32> shader_usage_count;
|
||||||
|
static thread_local std::unordered_map<u64, bool> shader_is_frequent;
|
||||||
|
|
||||||
|
// Increment usage counter for this shader
|
||||||
|
shader_usage_count[draw_config_hash]++;
|
||||||
|
|
||||||
|
// After a certain threshold, mark as frequently used
|
||||||
|
if (shader_usage_count[draw_config_hash] >= 3) {
|
||||||
|
shader_is_frequent[draw_config_hash] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get shader priority from settings
|
||||||
|
const int shader_priority = Settings::values.shader_compilation_priority.GetValue();
|
||||||
|
|
||||||
|
// Always wait for UI shaders if settings specify high priority
|
||||||
|
if (is_ui_shader && (shader_priority >= 0 || small_draw)) {
|
||||||
return pipeline;
|
return pipeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wait for frequently used small draw shaders
|
||||||
|
if (small_draw && shader_is_frequent[draw_config_hash]) {
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for clear operations as they're usually critical
|
||||||
|
if (is_clear_operation) {
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Force wait if high shader priority in settings
|
||||||
|
if (shader_priority > 1) {
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Improved depth-based heuristics
|
||||||
|
if (maxwell3d->regs.zeta_enable) {
|
||||||
|
// Check if this is likely a shadow map or important depth-based effect
|
||||||
|
// Check if depth write is enabled and color writes are disabled for all render targets
|
||||||
|
bool depth_only_pass = maxwell3d->regs.depth_write_enabled;
|
||||||
|
if (depth_only_pass) {
|
||||||
|
bool all_color_masked = true;
|
||||||
|
for (size_t i = 0; i < maxwell3d->regs.color_mask.size(); i++) {
|
||||||
|
// Check if any color component is enabled (R, G, B, A fields of ColorMask)
|
||||||
|
if ((maxwell3d->regs.color_mask[i].raw & 0x1111) != 0) {
|
||||||
|
all_color_masked = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If depth write enabled and all colors masked, this is likely a shadow pass
|
||||||
|
if (all_color_masked) {
|
||||||
|
// Likely a shadow pass, wait for compilation to avoid flickering shadows
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For other depth-enabled renders, use async compilation
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Refined small draw detection
|
||||||
|
if (small_draw) {
|
||||||
|
// Check if this might be a UI element that we missed
|
||||||
|
if (has_blend && has_texture) {
|
||||||
|
// Likely a textured UI element, wait for it
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
// For other small draws, assume they're one-off effects
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log compilation statistics periodically
|
||||||
|
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
|
||||||
|
now - last_async_shader_log).count();
|
||||||
|
|
||||||
|
if (elapsed >= 10) {
|
||||||
|
async_shader_count = 0;
|
||||||
|
last_async_shader_log = now;
|
||||||
|
}
|
||||||
|
async_shader_count++;
|
||||||
|
|
||||||
|
if (async_shader_count % 100 == 1) {
|
||||||
|
float progress = 0.5f; // Default to 50% when we can't determine actual progress
|
||||||
|
if (workers) {
|
||||||
|
// TODO: Implement progress tracking
|
||||||
|
}
|
||||||
|
LOG_DEBUG(Render_OpenGL, "Async shader compilation in progress (count={}), completion={:.1f}%",
|
||||||
|
async_shader_count, progress * 100.0f);
|
||||||
|
}
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -313,6 +313,19 @@ GraphicsPipeline::GraphicsPipeline(
|
||||||
configure_func = ConfigureFunc(spv_modules, stage_infos);
|
configure_func = ConfigureFunc(spv_modules, stage_infos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GraphicsPipeline* GraphicsPipeline::Clone() const {
|
||||||
|
// Create a new pipeline that shares the same resources
|
||||||
|
// This is for pipeline deduplication
|
||||||
|
|
||||||
|
if (!IsBuilt()) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Attempted to clone unbuilt pipeline");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return const_cast<GraphicsPipeline*>(this);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
|
void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
|
||||||
transition_keys.push_back(transition->key);
|
transition_keys.push_back(transition->key);
|
||||||
transitions.push_back(transition);
|
transitions.push_back(transition);
|
||||||
|
|
|
@ -84,6 +84,9 @@ public:
|
||||||
GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
|
GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
|
||||||
GraphicsPipeline(const GraphicsPipeline&) = delete;
|
GraphicsPipeline(const GraphicsPipeline&) = delete;
|
||||||
|
|
||||||
|
// Create a deep copy of this pipeline for reuse
|
||||||
|
[[nodiscard]] GraphicsPipeline* Clone() const;
|
||||||
|
|
||||||
void AddTransition(GraphicsPipeline* transition);
|
void AddTransition(GraphicsPipeline* transition);
|
||||||
|
|
||||||
void Configure(bool is_indexed) {
|
void Configure(bool is_indexed) {
|
||||||
|
@ -103,6 +106,35 @@ public:
|
||||||
return is_built.load(std::memory_order::relaxed);
|
return is_built.load(std::memory_order::relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get hash for the current pipeline configuration
|
||||||
|
[[nodiscard]] u64 Hash() const noexcept {
|
||||||
|
return key.Hash();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the last pipeline this transitioned from
|
||||||
|
[[nodiscard]] GraphicsPipeline* GetLastTransitionedPipeline() const noexcept {
|
||||||
|
// For predictive loading, return a related pipeline if available
|
||||||
|
if (!transitions.empty()) {
|
||||||
|
return transitions.front();
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get pipeline info string for prediction
|
||||||
|
[[nodiscard]] std::string GetPipelineInfo() const noexcept {
|
||||||
|
std::string result = fmt::format("pipeline_{:016x}", Hash());
|
||||||
|
|
||||||
|
// Include information about stages
|
||||||
|
for (size_t i = 0; i < NUM_STAGES; ++i) {
|
||||||
|
// Check if this stage is active by checking if any varying stores are enabled
|
||||||
|
if (!stage_infos[i].stores.mask.none()) {
|
||||||
|
result += fmt::format("_s{}", i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Spec>
|
template <typename Spec>
|
||||||
static auto MakeConfigureSpecFunc() {
|
static auto MakeConfigureSpecFunc() {
|
||||||
return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
|
return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
|
||||||
|
|
|
@ -623,28 +623,97 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
|
||||||
static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log;
|
static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log;
|
||||||
auto now = std::chrono::high_resolution_clock::now();
|
auto now = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// Simplify UI shader detection since we don't have access to clear_buffers
|
// Better detection of UI and critical shaders
|
||||||
const bool is_ui_shader = !maxwell3d->regs.zeta_enable;
|
const bool is_ui_shader = !maxwell3d->regs.zeta_enable;
|
||||||
|
// Check for blend state
|
||||||
|
const bool has_blend = maxwell3d->regs.blend.enable[0] != 0;
|
||||||
|
// Check if texture sampling is likely based on texture units used
|
||||||
|
const bool has_texture = maxwell3d->regs.tex_header.Address() != 0;
|
||||||
|
// Check for clear operations
|
||||||
|
const bool is_clear_operation = maxwell3d->regs.clear_surface.raw != 0;
|
||||||
|
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||||
|
const bool small_draw = draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6;
|
||||||
|
|
||||||
// For UI shaders and high priority shaders according to settings, allow waiting for completion
|
// Get shader priority from settings
|
||||||
const int shader_priority = Settings::values.shader_compilation_priority.GetValue();
|
const int shader_priority = Settings::values.shader_compilation_priority.GetValue();
|
||||||
if ((is_ui_shader && shader_priority >= 0) || shader_priority > 1) {
|
|
||||||
// For UI/menu elements and critical visuals, let's wait for the shader to compile
|
// Record historical usage patterns for future prediction
|
||||||
// but only if high shader priority
|
// Create a unique identifier for this shader configuration
|
||||||
|
const u64 draw_config_hash = pipeline->Hash();
|
||||||
|
static thread_local std::unordered_map<u64, u32> shader_usage_count;
|
||||||
|
static thread_local std::unordered_map<u64, bool> shader_is_frequent;
|
||||||
|
|
||||||
|
// Track how often this shader is used
|
||||||
|
shader_usage_count[draw_config_hash]++;
|
||||||
|
|
||||||
|
// After a certain number of uses, consider this a frequently used shader
|
||||||
|
// which should get higher compilation priority in the future
|
||||||
|
if (shader_usage_count[draw_config_hash] >= 3) {
|
||||||
|
shader_is_frequent[draw_config_hash] = true;
|
||||||
|
|
||||||
|
// Predict related shaders that might be used soon
|
||||||
|
if (auto related_pipeline = pipeline->GetLastTransitionedPipeline()) {
|
||||||
|
// Use a string-based representation of the pipeline for prediction
|
||||||
|
std::string pipeline_info = fmt::format("pipeline_{:016x}", related_pipeline->Hash());
|
||||||
|
PredictShader(pipeline_info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always wait for UI shaders if settings specify high priority
|
||||||
|
if (is_ui_shader && (shader_priority >= 0 || small_draw)) {
|
||||||
return pipeline;
|
return pipeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If something is using depth, we can assume that games are not rendering anything which
|
// Wait for frequently used small draw shaders
|
||||||
// will be used one time.
|
if (small_draw && shader_is_frequent[draw_config_hash]) {
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for clear operations as they're usually critical
|
||||||
|
if (is_clear_operation) {
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Force wait if high shader priority in settings
|
||||||
|
if (shader_priority > 1) {
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
// More intelligent depth-based heuristics
|
||||||
if (maxwell3d->regs.zeta_enable) {
|
if (maxwell3d->regs.zeta_enable) {
|
||||||
|
// Check if this is likely a shadow map or important depth-based effect
|
||||||
|
// Check if depth write is enabled and color writes are disabled for all render targets
|
||||||
|
bool depth_only_pass = maxwell3d->regs.depth_write_enabled;
|
||||||
|
if (depth_only_pass) {
|
||||||
|
bool all_color_masked = true;
|
||||||
|
for (size_t i = 0; i < maxwell3d->regs.color_mask.size(); i++) {
|
||||||
|
// Check if any color component is enabled (R, G, B, A fields of ColorMask)
|
||||||
|
if ((maxwell3d->regs.color_mask[i].raw & 0x1111) != 0) {
|
||||||
|
all_color_masked = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If depth write enabled and all colors masked, this is likely a shadow pass
|
||||||
|
if (all_color_masked) {
|
||||||
|
// This is likely a shadow pass, which is important for visual quality
|
||||||
|
// We should wait for these to compile to avoid flickering shadows
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For other depth-enabled renders, use async compilation
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If games are using a small index count, we can assume these are full screen quads.
|
// Refine small draw detection
|
||||||
// Usually these shaders are only used once for building textures so we can assume they
|
if (small_draw) {
|
||||||
// can't be built async
|
// Check if this might be a UI element that we missed
|
||||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
if (has_blend && has_texture) {
|
||||||
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
|
// Likely a textured UI element, wait for it
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
// For other small draws, assume they're one-off effects
|
||||||
return pipeline;
|
return pipeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -660,8 +729,8 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
|
||||||
|
|
||||||
// Log less frequently to avoid spamming log
|
// Log less frequently to avoid spamming log
|
||||||
if (async_shader_count % 100 == 1) {
|
if (async_shader_count % 100 == 1) {
|
||||||
LOG_DEBUG(Render_Vulkan, "Async shader compilation in progress (count={})",
|
LOG_DEBUG(Render_Vulkan, "Async shader compilation in progress (count={}), completion={:.1f}%",
|
||||||
async_shader_count);
|
async_shader_count, GetShaderCompilationProgress() * 100.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -671,6 +740,22 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||||
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
|
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
|
||||||
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
|
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
|
||||||
bool build_in_parallel) try {
|
bool build_in_parallel) try {
|
||||||
|
|
||||||
|
// Pipeline deduplication optimization
|
||||||
|
{
|
||||||
|
std::lock_guard lock{pipeline_cache};
|
||||||
|
const auto [pair, new_pipeline]{graphics_cache.try_emplace(key)};
|
||||||
|
if (!new_pipeline) {
|
||||||
|
// Found existing pipeline in cache
|
||||||
|
auto& pipeline = pair->second;
|
||||||
|
if (pipeline) {
|
||||||
|
// Return the existing pipeline
|
||||||
|
LOG_DEBUG(Render_Vulkan, "Reusing existing pipeline for key 0x{:016x}", key.Hash());
|
||||||
|
return std::unique_ptr<GraphicsPipeline>(pipeline->Clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto hash = key.Hash();
|
auto hash = key.Hash();
|
||||||
LOG_INFO(Render_Vulkan, "0x{:016x}", hash);
|
LOG_INFO(Render_Vulkan, "0x{:016x}", hash);
|
||||||
size_t env_index{0};
|
size_t env_index{0};
|
||||||
|
@ -681,46 +766,52 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||||
// Layer passthrough generation for devices without VK_EXT_shader_viewport_index_layer
|
// Layer passthrough generation for devices without VK_EXT_shader_viewport_index_layer
|
||||||
Shader::IR::Program* layer_source_program{};
|
Shader::IR::Program* layer_source_program{};
|
||||||
|
|
||||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
// Memory optimization: Create a scope for program translation
|
||||||
const bool is_emulated_stage = layer_source_program != nullptr &&
|
{
|
||||||
index == static_cast<u32>(Maxwell::ShaderType::Geometry);
|
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||||
if (key.unique_hashes[index] == 0 && is_emulated_stage) {
|
const bool is_emulated_stage = layer_source_program != nullptr &&
|
||||||
auto topology = MaxwellToOutputTopology(key.state.topology);
|
index == static_cast<u32>(Maxwell::ShaderType::Geometry);
|
||||||
programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info,
|
if (key.unique_hashes[index] == 0 && is_emulated_stage) {
|
||||||
*layer_source_program, topology);
|
auto topology = MaxwellToOutputTopology(key.state.topology);
|
||||||
continue;
|
programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info,
|
||||||
}
|
*layer_source_program, topology);
|
||||||
if (key.unique_hashes[index] == 0) {
|
continue;
|
||||||
continue;
|
}
|
||||||
}
|
if (key.unique_hashes[index] == 0) {
|
||||||
Shader::Environment& env{*envs[env_index]};
|
continue;
|
||||||
++env_index;
|
}
|
||||||
|
Shader::Environment& env{*envs[env_index]};
|
||||||
|
++env_index;
|
||||||
|
|
||||||
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
|
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
|
||||||
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
|
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
|
||||||
if (!uses_vertex_a || index != 1) {
|
if (!uses_vertex_a || index != 1) {
|
||||||
// Normal path
|
// Normal path
|
||||||
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
|
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
|
||||||
} else {
|
} else {
|
||||||
// VertexB path when VertexA is present.
|
// VertexB path when VertexA is present.
|
||||||
auto& program_va{programs[0]};
|
auto& program_va{programs[0]};
|
||||||
auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
||||||
programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
|
programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Settings::values.dump_shaders) {
|
if (Settings::values.dump_shaders) {
|
||||||
env.Dump(hash, key.unique_hashes[index]);
|
env.Dump(hash, key.unique_hashes[index]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (programs[index].info.requires_layer_emulation) {
|
if (programs[index].info.requires_layer_emulation) {
|
||||||
layer_source_program = &programs[index];
|
layer_source_program = &programs[index];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
|
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
|
||||||
std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
|
std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
|
||||||
|
|
||||||
const Shader::IR::Program* previous_stage{};
|
const Shader::IR::Program* previous_stage{};
|
||||||
Shader::Backend::Bindings binding;
|
Shader::Backend::Bindings binding;
|
||||||
|
|
||||||
|
// Memory optimization: Process one stage at a time and free intermediate memory
|
||||||
for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
|
for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
|
||||||
++index) {
|
++index) {
|
||||||
const bool is_emulated_stage = layer_source_program != nullptr &&
|
const bool is_emulated_stage = layer_source_program != nullptr &&
|
||||||
|
@ -734,23 +825,38 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||||
const size_t stage_index{index - 1};
|
const size_t stage_index{index - 1};
|
||||||
infos[stage_index] = &program.info;
|
infos[stage_index] = &program.info;
|
||||||
|
|
||||||
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
|
// Prioritize memory efficiency by encapsulating SPIR-V generation
|
||||||
ConvertLegacyToGeneric(program, runtime_info);
|
{
|
||||||
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
|
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
|
||||||
device.SaveShader(code);
|
ConvertLegacyToGeneric(program, runtime_info);
|
||||||
modules[stage_index] = BuildShader(device, code);
|
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
|
||||||
if (device.HasDebuggingToolAttached()) {
|
device.SaveShader(code);
|
||||||
const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
|
modules[stage_index] = BuildShader(device, code);
|
||||||
modules[stage_index].SetObjectNameEXT(name.c_str());
|
if (device.HasDebuggingToolAttached()) {
|
||||||
|
const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
|
||||||
|
modules[stage_index].SetObjectNameEXT(name.c_str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
previous_stage = &program;
|
previous_stage = &program;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use improved thread worker mechanism for better async compilation
|
||||||
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
|
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
|
||||||
return std::make_unique<GraphicsPipeline>(
|
auto pipeline = std::make_unique<GraphicsPipeline>(
|
||||||
scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device,
|
scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device,
|
||||||
descriptor_pool, guest_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
|
descriptor_pool, guest_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
|
||||||
std::move(modules), infos);
|
std::move(modules), infos);
|
||||||
|
|
||||||
|
// Cache the result for future deduplication
|
||||||
|
if (pipeline) {
|
||||||
|
std::lock_guard lock{pipeline_cache};
|
||||||
|
// Store a clone that can be used later
|
||||||
|
graphics_cache[key] = std::unique_ptr<GraphicsPipeline>(pipeline->Clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
return pipeline;
|
||||||
|
|
||||||
} catch (const Shader::Exception& exception) {
|
} catch (const Shader::Exception& exception) {
|
||||||
auto hash = key.Hash();
|
auto hash = key.Hash();
|
||||||
size_t env_index{0};
|
size_t env_index{0};
|
||||||
|
@ -865,7 +971,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
||||||
}
|
}
|
||||||
|
|
||||||
void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
|
void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
|
||||||
const vk::PipelineCache& pipeline_cache,
|
const vk::PipelineCache& vk_pipeline_cache,
|
||||||
u32 cache_version) try {
|
u32 cache_version) try {
|
||||||
std::ofstream file(filename, std::ios::binary);
|
std::ofstream file(filename, std::ios::binary);
|
||||||
file.exceptions(std::ifstream::failbit);
|
file.exceptions(std::ifstream::failbit);
|
||||||
|
@ -879,10 +985,10 @@ void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& fi
|
||||||
|
|
||||||
size_t cache_size = 0;
|
size_t cache_size = 0;
|
||||||
std::vector<char> cache_data;
|
std::vector<char> cache_data;
|
||||||
if (pipeline_cache) {
|
if (vk_pipeline_cache) {
|
||||||
pipeline_cache.Read(&cache_size, nullptr);
|
vk_pipeline_cache.Read(&cache_size, nullptr);
|
||||||
cache_data.resize(cache_size);
|
cache_data.resize(cache_size);
|
||||||
pipeline_cache.Read(&cache_size, cache_data.data());
|
vk_pipeline_cache.Read(&cache_size, cache_data.data());
|
||||||
}
|
}
|
||||||
file.write(cache_data.data(), cache_size);
|
file.write(cache_data.data(), cache_size);
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
@ -10,6 +11,7 @@
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/thread_worker.h"
|
#include "common/thread_worker.h"
|
||||||
|
@ -157,6 +159,9 @@ private:
|
||||||
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
|
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
|
||||||
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
|
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
|
||||||
|
|
||||||
|
// Mutex for thread-safe pipeline cache access
|
||||||
|
mutable std::mutex pipeline_cache;
|
||||||
|
|
||||||
ShaderPools main_pools;
|
ShaderPools main_pools;
|
||||||
|
|
||||||
Shader::Profile profile;
|
Shader::Profile profile;
|
||||||
|
|
|
@ -35,6 +35,46 @@ std::thread commandQueueThread;
|
||||||
// Pointer to Citron's scheduler for integration
|
// Pointer to Citron's scheduler for integration
|
||||||
Scheduler* globalScheduler = nullptr;
|
Scheduler* globalScheduler = nullptr;
|
||||||
|
|
||||||
|
// Constants for thread pool and shader management
|
||||||
|
constexpr size_t DEFAULT_THREAD_POOL_SIZE = 4;
|
||||||
|
constexpr size_t MAX_THREAD_POOL_SIZE = 8;
|
||||||
|
constexpr u32 SHADER_PRIORITY_CRITICAL = 0;
|
||||||
|
constexpr u32 SHADER_PRIORITY_HIGH = 1;
|
||||||
|
constexpr u32 SHADER_PRIORITY_NORMAL = 2;
|
||||||
|
constexpr u32 SHADER_PRIORITY_LOW = 3;
|
||||||
|
|
||||||
|
// Thread pool for shader compilation
|
||||||
|
std::vector<std::thread> g_thread_pool;
|
||||||
|
std::queue<std::function<void()>> g_work_queue;
|
||||||
|
std::mutex g_work_queue_mutex;
|
||||||
|
std::condition_variable g_work_queue_cv;
|
||||||
|
std::atomic<bool> g_thread_pool_initialized = false;
|
||||||
|
std::atomic<bool> g_shutdown_thread_pool = false;
|
||||||
|
std::atomic<size_t> g_active_compilation_tasks = 0;
|
||||||
|
std::atomic<size_t> g_total_compilation_tasks = 0;
|
||||||
|
std::atomic<size_t> g_completed_compilation_tasks = 0;
|
||||||
|
|
||||||
|
// Priority queue for shader compilation
|
||||||
|
struct ShaderCompilationTask {
|
||||||
|
std::function<void()> task;
|
||||||
|
u32 priority;
|
||||||
|
std::chrono::high_resolution_clock::time_point enqueue_time;
|
||||||
|
|
||||||
|
bool operator<(const ShaderCompilationTask& other) const {
|
||||||
|
// Lower priority value means higher actual priority
|
||||||
|
if (priority != other.priority) {
|
||||||
|
return priority > other.priority;
|
||||||
|
}
|
||||||
|
// If priorities are equal, use FIFO ordering
|
||||||
|
return enqueue_time > other.enqueue_time;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
std::priority_queue<ShaderCompilationTask> g_priority_work_queue;
|
||||||
|
|
||||||
|
// Predictive shader loading
|
||||||
|
std::unordered_set<std::string> g_predicted_shaders;
|
||||||
|
std::mutex g_predicted_shaders_mutex;
|
||||||
|
|
||||||
// Command queue worker thread (multi-threaded command recording)
|
// Command queue worker thread (multi-threaded command recording)
|
||||||
void CommandQueueWorker() {
|
void CommandQueueWorker() {
|
||||||
while (isCommandQueueActive.load()) {
|
while (isCommandQueueActive.load()) {
|
||||||
|
@ -152,11 +192,147 @@ bool IsShaderValid(VkShaderModule shader_module) {
|
||||||
return shader_module != VK_NULL_HANDLE;
|
return shader_module != VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize thread pool for shader compilation
|
||||||
|
void InitializeThreadPool() {
|
||||||
|
if (g_thread_pool_initialized) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> lock(g_work_queue_mutex);
|
||||||
|
g_shutdown_thread_pool = false;
|
||||||
|
|
||||||
|
// Determine optimal thread count based on system
|
||||||
|
const size_t hardware_threads = std::max(std::thread::hardware_concurrency(), 2u);
|
||||||
|
const size_t thread_count = std::min(hardware_threads - 1, MAX_THREAD_POOL_SIZE);
|
||||||
|
|
||||||
|
LOG_INFO(Render_Vulkan, "Initializing shader compilation thread pool with {} threads", thread_count);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < thread_count; ++i) {
|
||||||
|
g_thread_pool.emplace_back([]() {
|
||||||
|
while (!g_shutdown_thread_pool) {
|
||||||
|
std::function<void()> task;
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> thread_pool_lock(g_work_queue_mutex);
|
||||||
|
g_work_queue_cv.wait(thread_pool_lock, [] {
|
||||||
|
return g_shutdown_thread_pool || !g_priority_work_queue.empty();
|
||||||
|
});
|
||||||
|
|
||||||
|
if (g_shutdown_thread_pool && g_priority_work_queue.empty()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!g_priority_work_queue.empty()) {
|
||||||
|
ShaderCompilationTask highest_priority_task = g_priority_work_queue.top();
|
||||||
|
g_priority_work_queue.pop();
|
||||||
|
task = std::move(highest_priority_task.task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (task) {
|
||||||
|
g_active_compilation_tasks++;
|
||||||
|
task();
|
||||||
|
g_active_compilation_tasks--;
|
||||||
|
g_completed_compilation_tasks++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
g_thread_pool_initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown thread pool
|
||||||
|
void ShutdownThreadPool() {
|
||||||
|
if (!g_thread_pool_initialized) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(g_work_queue_mutex);
|
||||||
|
g_shutdown_thread_pool = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_work_queue_cv.notify_all();
|
||||||
|
|
||||||
|
for (auto& thread : g_thread_pool) {
|
||||||
|
if (thread.joinable()) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g_thread_pool.clear();
|
||||||
|
g_thread_pool_initialized = false;
|
||||||
|
|
||||||
|
LOG_INFO(Render_Vulkan, "Shader compilation thread pool shutdown");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Submit work to thread pool with priority
|
||||||
|
void SubmitShaderCompilationTask(std::function<void()> task, u32 priority) {
|
||||||
|
if (!g_thread_pool_initialized) {
|
||||||
|
InitializeThreadPool();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> work_queue_lock(g_work_queue_mutex);
|
||||||
|
g_priority_work_queue.push({
|
||||||
|
std::move(task),
|
||||||
|
priority,
|
||||||
|
std::chrono::high_resolution_clock::now()
|
||||||
|
});
|
||||||
|
g_total_compilation_tasks++;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_work_queue_cv.notify_one();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get shader compilation progress (0.0f - 1.0f)
|
||||||
|
float GetShaderCompilationProgress() {
|
||||||
|
const size_t total = g_total_compilation_tasks.load();
|
||||||
|
if (total == 0) {
|
||||||
|
return 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t completed = g_completed_compilation_tasks.load();
|
||||||
|
return static_cast<float>(completed) / static_cast<float>(total);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if any shader compilation is in progress
|
||||||
|
bool IsShaderCompilationInProgress() {
|
||||||
|
return g_active_compilation_tasks.load() > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add shader to prediction list for preloading
|
||||||
|
void PredictShader(const std::string& shader_path) {
|
||||||
|
std::lock_guard<std::mutex> lock(g_predicted_shaders_mutex);
|
||||||
|
g_predicted_shaders.insert(shader_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Preload predicted shaders
|
||||||
|
void PreloadPredictedShaders(const Device& device) {
|
||||||
|
std::unordered_set<std::string> shaders_to_load;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(g_predicted_shaders_mutex);
|
||||||
|
shaders_to_load = g_predicted_shaders;
|
||||||
|
g_predicted_shaders.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shaders_to_load.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_INFO(Render_Vulkan, "Preloading {} predicted shaders", shaders_to_load.size());
|
||||||
|
|
||||||
|
for (const auto& shader_path : shaders_to_load) {
|
||||||
|
// Queue with low priority since these are predictions
|
||||||
|
AsyncCompileShader(device, shader_path, [](VkShaderModule) {}, SHADER_PRIORITY_LOW);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Atomic flag for tracking shader compilation status
|
// Atomic flag for tracking shader compilation status
|
||||||
std::atomic<bool> compilingShader(false);
|
std::atomic<bool> compilingShader(false);
|
||||||
|
|
||||||
void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
||||||
std::function<void(VkShaderModule)> callback) {
|
std::function<void(VkShaderModule)> callback, u32 priority) {
|
||||||
LOG_INFO(Render_Vulkan, "Asynchronously compiling shader: {}", shader_path);
|
LOG_INFO(Render_Vulkan, "Asynchronously compiling shader: {}", shader_path);
|
||||||
|
|
||||||
// Create shader cache directory if it doesn't exist
|
// Create shader cache directory if it doesn't exist
|
||||||
|
@ -164,14 +340,13 @@ void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
||||||
std::filesystem::create_directory(SHADER_CACHE_DIR);
|
std::filesystem::create_directory(SHADER_CACHE_DIR);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use atomic flag to prevent duplicate compilations of the same shader
|
// Initialize thread pool if needed
|
||||||
if (compilingShader.exchange(true)) {
|
if (!g_thread_pool_initialized) {
|
||||||
LOG_WARNING(Render_Vulkan, "Shader compilation already in progress, skipping: {}", shader_path);
|
InitializeThreadPool();
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use actual threading for async compilation
|
// Submit to thread pool with priority
|
||||||
std::thread([device_ptr = &device, shader_path, outer_callback = std::move(callback)]() mutable {
|
SubmitShaderCompilationTask([device_ptr = &device, shader_path, callback = std::move(callback)]() {
|
||||||
auto startTime = std::chrono::high_resolution_clock::now();
|
auto startTime = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -215,36 +390,42 @@ void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
||||||
VkShaderModule raw_module = *shader;
|
VkShaderModule raw_module = *shader;
|
||||||
|
|
||||||
// Submit callback to main thread via command queue for thread safety
|
// Submit callback to main thread via command queue for thread safety
|
||||||
SubmitCommandToQueue([inner_callback = std::move(outer_callback), raw_module]() {
|
SubmitCommandToQueue([callback = std::move(callback), raw_module]() {
|
||||||
inner_callback(raw_module);
|
callback(raw_module);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
LOG_ERROR(Render_Vulkan, "Shader validation failed: {}", shader_path);
|
LOG_ERROR(Render_Vulkan, "Shader validation failed: {}", shader_path);
|
||||||
SubmitCommandToQueue([inner_callback = std::move(outer_callback)]() {
|
SubmitCommandToQueue([callback = std::move(callback)]() {
|
||||||
inner_callback(VK_NULL_HANDLE);
|
callback(VK_NULL_HANDLE);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOG_ERROR(Render_Vulkan, "Failed to read shader file: {}", shader_path);
|
LOG_ERROR(Render_Vulkan, "Failed to read shader file: {}", shader_path);
|
||||||
SubmitCommandToQueue([inner_callback = std::move(outer_callback)]() {
|
SubmitCommandToQueue([callback = std::move(callback)]() {
|
||||||
inner_callback(VK_NULL_HANDLE);
|
callback(VK_NULL_HANDLE);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} catch (const std::exception& e) {
|
} catch (const std::exception& e) {
|
||||||
LOG_ERROR(Render_Vulkan, "Error compiling shader: {}", e.what());
|
LOG_ERROR(Render_Vulkan, "Error compiling shader: {}", e.what());
|
||||||
SubmitCommandToQueue([inner_callback = std::move(outer_callback)]() {
|
SubmitCommandToQueue([callback = std::move(callback)]() {
|
||||||
inner_callback(VK_NULL_HANDLE);
|
callback(VK_NULL_HANDLE);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}, priority);
|
||||||
|
}
|
||||||
|
|
||||||
// Release the compilation flag
|
// Overload for backward compatibility
|
||||||
compilingShader.store(false);
|
void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
||||||
}).detach();
|
std::function<void(VkShaderModule)> callback) {
|
||||||
|
AsyncCompileShader(device, shader_path, std::move(callback), SHADER_PRIORITY_NORMAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
ShaderManager::ShaderManager(const Device& device_) : device(device_) {
|
ShaderManager::ShaderManager(const Device& device_) : device(device_) {
|
||||||
// Initialize command queue system
|
// Initialize command queue system
|
||||||
InitializeCommandQueue();
|
InitializeCommandQueue();
|
||||||
|
|
||||||
|
// Initialize thread pool for shader compilation
|
||||||
|
InitializeThreadPool();
|
||||||
}
|
}
|
||||||
|
|
||||||
ShaderManager::~ShaderManager() {
|
ShaderManager::~ShaderManager() {
|
||||||
|
@ -255,6 +436,9 @@ ShaderManager::~ShaderManager() {
|
||||||
std::lock_guard<std::mutex> lock(shader_mutex);
|
std::lock_guard<std::mutex> lock(shader_mutex);
|
||||||
shader_cache.clear();
|
shader_cache.clear();
|
||||||
|
|
||||||
|
// Shutdown thread pool
|
||||||
|
ShutdownThreadPool();
|
||||||
|
|
||||||
// Shutdown command queue
|
// Shutdown command queue
|
||||||
ShutdownCommandQueue();
|
ShutdownCommandQueue();
|
||||||
}
|
}
|
||||||
|
@ -416,7 +600,7 @@ bool ShaderManager::LoadShader(const std::string& shader_path) {
|
||||||
|
|
||||||
void ShaderManager::WaitForCompilation() {
|
void ShaderManager::WaitForCompilation() {
|
||||||
// Wait until no shader is being compiled
|
// Wait until no shader is being compiled
|
||||||
while (compilingShader.load()) {
|
while (IsShaderCompilationInProgress()) {
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -510,4 +694,81 @@ void ShaderManager::PreloadShaders(const std::vector<std::string>& shader_paths)
|
||||||
LOG_INFO(Render_Vulkan, "Finished preloading shaders");
|
LOG_INFO(Render_Vulkan, "Finished preloading shaders");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Batch load multiple shaders with priorities
|
||||||
|
void ShaderManager::BatchLoadShaders(const std::vector<std::string>& shader_paths,
|
||||||
|
const std::vector<u32>& priorities) {
|
||||||
|
if (shader_paths.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG_INFO(Render_Vulkan, "Batch loading {} shaders", shader_paths.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < shader_paths.size(); ++i) {
|
||||||
|
const auto& path = shader_paths[i];
|
||||||
|
u32 priority = i < priorities.size() ? priorities[i] : SHADER_PRIORITY_NORMAL;
|
||||||
|
|
||||||
|
AsyncCompileShader(device, path, [this, path](VkShaderModule raw_module) {
|
||||||
|
if (raw_module != VK_NULL_HANDLE) {
|
||||||
|
// Note: We don't use the raw_module directly as we can't create a proper vk::ShaderModule wrapper.
|
||||||
|
// Instead, we'll load the shader again using the LoadShader method which properly handles
|
||||||
|
// the creation of the vk::ShaderModule.
|
||||||
|
|
||||||
|
// LoadShader will create the shader module and store it in shader_cache
|
||||||
|
if (LoadShader(path)) {
|
||||||
|
LOG_INFO(Render_Vulkan, "Loaded shader module for {}", path);
|
||||||
|
} else {
|
||||||
|
LOG_ERROR(Render_Vulkan, "Failed to load shader module for {}", path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, priority);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Preload all shaders in a directory with automatic prioritization
|
||||||
|
void ShaderManager::PreloadShaderDirectory(const std::string& directory_path) {
|
||||||
|
if (!std::filesystem::exists(directory_path)) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Shader directory does not exist: {}", directory_path);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> shader_paths;
|
||||||
|
std::vector<u32> priorities;
|
||||||
|
|
||||||
|
for (const auto& entry : std::filesystem::directory_iterator(directory_path)) {
|
||||||
|
if (entry.is_regular_file()) {
|
||||||
|
const auto& path = entry.path().string();
|
||||||
|
const auto extension = entry.path().extension().string();
|
||||||
|
|
||||||
|
// Only load shader files
|
||||||
|
if (extension == ".spv" || extension == ".glsl" || extension == ".vert" ||
|
||||||
|
extension == ".frag" || extension == ".comp") {
|
||||||
|
|
||||||
|
shader_paths.push_back(path);
|
||||||
|
|
||||||
|
// Assign priorities based on filename patterns
|
||||||
|
// This is a simple heuristic and will be improved
|
||||||
|
const auto filename = entry.path().filename().string();
|
||||||
|
if (filename.find("ui") != std::string::npos ||
|
||||||
|
filename.find("menu") != std::string::npos) {
|
||||||
|
priorities.push_back(SHADER_PRIORITY_CRITICAL);
|
||||||
|
} else if (filename.find("effect") != std::string::npos ||
|
||||||
|
filename.find("post") != std::string::npos) {
|
||||||
|
priorities.push_back(SHADER_PRIORITY_HIGH);
|
||||||
|
} else {
|
||||||
|
priorities.push_back(SHADER_PRIORITY_NORMAL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!shader_paths.empty()) {
|
||||||
|
BatchLoadShaders(shader_paths, priorities);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get current compilation progress
|
||||||
|
float ShaderManager::GetCompilationProgress() const {
|
||||||
|
return GetShaderCompilationProgress();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -20,12 +20,29 @@ namespace Vulkan {
|
||||||
class Device;
|
class Device;
|
||||||
class Scheduler;
|
class Scheduler;
|
||||||
|
|
||||||
|
// Priority constants for shader compilation
|
||||||
|
extern const u32 SHADER_PRIORITY_CRITICAL;
|
||||||
|
extern const u32 SHADER_PRIORITY_HIGH;
|
||||||
|
extern const u32 SHADER_PRIORITY_NORMAL;
|
||||||
|
extern const u32 SHADER_PRIORITY_LOW;
|
||||||
|
|
||||||
// Command queue system for asynchronous operations
|
// Command queue system for asynchronous operations
|
||||||
void InitializeCommandQueue();
|
void InitializeCommandQueue();
|
||||||
void ShutdownCommandQueue();
|
void ShutdownCommandQueue();
|
||||||
void SubmitCommandToQueue(std::function<void()> command);
|
void SubmitCommandToQueue(std::function<void()> command);
|
||||||
void CommandQueueWorker();
|
void CommandQueueWorker();
|
||||||
|
|
||||||
|
// Thread pool management for shader compilation
|
||||||
|
void InitializeThreadPool();
|
||||||
|
void ShutdownThreadPool();
|
||||||
|
void SubmitShaderCompilationTask(std::function<void()> task, u32 priority);
|
||||||
|
float GetShaderCompilationProgress();
|
||||||
|
bool IsShaderCompilationInProgress();
|
||||||
|
|
||||||
|
// Predictive shader loading
|
||||||
|
void PredictShader(const std::string& shader_path);
|
||||||
|
void PreloadPredictedShaders(const Device& device);
|
||||||
|
|
||||||
// Scheduler integration functions
|
// Scheduler integration functions
|
||||||
void SetGlobalScheduler(Scheduler* scheduler);
|
void SetGlobalScheduler(Scheduler* scheduler);
|
||||||
void SubmitToScheduler(std::function<void(vk::CommandBuffer)> command);
|
void SubmitToScheduler(std::function<void(vk::CommandBuffer)> command);
|
||||||
|
@ -37,6 +54,9 @@ vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code);
|
||||||
// Enhanced shader functionality
|
// Enhanced shader functionality
|
||||||
bool IsShaderValid(VkShaderModule shader_module);
|
bool IsShaderValid(VkShaderModule shader_module);
|
||||||
|
|
||||||
|
void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
||||||
|
std::function<void(VkShaderModule)> callback, u32 priority);
|
||||||
|
|
||||||
void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
||||||
std::function<void(VkShaderModule)> callback);
|
std::function<void(VkShaderModule)> callback);
|
||||||
|
|
||||||
|
@ -50,6 +70,12 @@ public:
|
||||||
bool LoadShader(const std::string& shader_path);
|
bool LoadShader(const std::string& shader_path);
|
||||||
void WaitForCompilation();
|
void WaitForCompilation();
|
||||||
|
|
||||||
|
// Enhanced shader management
|
||||||
|
void BatchLoadShaders(const std::vector<std::string>& shader_paths,
|
||||||
|
const std::vector<u32>& priorities);
|
||||||
|
void PreloadShaderDirectory(const std::string& directory_path);
|
||||||
|
float GetCompilationProgress() const;
|
||||||
|
|
||||||
// Batch process multiple shaders in parallel
|
// Batch process multiple shaders in parallel
|
||||||
void PreloadShaders(const std::vector<std::string>& shader_paths);
|
void PreloadShaders(const std::vector<std::string>& shader_paths);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue