revert: video_core changes reverted due to instability

Reverted video_core changes as they were causing instability and OpenGL
had no video output. The following commits were reverted:

1fd5fefcb1
edfb500ee7
b25c7653e6
5d952717ff
964bbf489a
a4088f3a1e
18def48dfe
3205c9b691
2f57a35d2d
f706427815
fc88c06769

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron 2025-05-03 17:37:26 +10:00
parent ba98d0f15c
commit 03aab9becc
29 changed files with 180 additions and 3016 deletions

View file

@ -19,9 +19,9 @@ cmake .. \
-DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON \
-DENABLE_QT_TRANSLATION=ON \
-DUSE_DISCORD_PRESENCE=ON \
-DYUZU_CRASH_DUMPS=ON \
-DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} \
-DYUZU_USE_BUNDLED_FFMPEG=ON \
-DCITRON_CRASH_DUMPS=ON \
-DCITRON_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} \
-DCITRON_USE_BUNDLED_FFMPEG=ON \
-GNinja
ninja

View file

@ -11,7 +11,7 @@ gcc -v
tx --version
mkdir build && cd build
cmake .. -DENABLE_QT_TRANSLATION=ON -DGENERATE_QT_TRANSLATION=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_SDL2=OFF -DYUZU_TESTS=OFF -DYUZU_USE_BUNDLED_VCPKG=ON
cmake .. -DENABLE_QT_TRANSLATION=ON -DGENERATE_QT_TRANSLATION=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_SDL2=OFF -DCITRON_TESTS=OFF -DCITRON_USE_BUNDLED_VCPKG=ON
make translation
cd ..

View file

@ -17,9 +17,9 @@ cmake .. \
-DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON \
-DENABLE_QT_TRANSLATION=ON \
-DUSE_CCACHE=ON \
-DYUZU_USE_BUNDLED_SDL2=OFF \
-DYUZU_USE_EXTERNAL_SDL2=OFF \
-DYUZU_TESTS=OFF \
-DCITRON_USE_BUNDLED_SDL2=OFF \
-DCITRON_USE_EXTERNAL_SDL2=OFF \
-DCITRON_TESTS=OFF \
-GNinja
ninja citron citron-cmd

View file

@ -75,11 +75,6 @@ void QtConfig::ReadQtValues() {
ReadUIValues();
}
ReadQtControlValues();
// Always disable memory snapshots and hybrid memory
Settings::values.use_gpu_memory_manager.SetValue(false);
Settings::values.enable_memory_snapshots.SetValue(false);
Settings::values.use_nce.SetValue(false);
}
void QtConfig::ReadQtPlayerValues(const std::size_t player_index) {
@ -341,11 +336,6 @@ void QtConfig::SaveQtValues() {
}
SaveQtControlValues();
// Ensure memory snapshots and hybrid memory are always disabled
Settings::values.use_gpu_memory_manager.SetValue(false);
Settings::values.enable_memory_snapshots.SetValue(false);
Settings::values.use_nce.SetValue(false);
WriteToIni();
}

View file

@ -760,17 +760,6 @@ Widget::Widget(Settings::BasicSetting* setting_, const TranslationMap& translati
enable &= setting.UsingGlobal();
}
// Disable memory snapshot and hybrid memory checkboxes
if (static_cast<u32>(id) == Settings::values.use_gpu_memory_manager.Id() ||
static_cast<u32>(id) == Settings::values.enable_memory_snapshots.Id() ||
static_cast<u32>(id) == Settings::values.use_nce.Id()) {
enable = false;
// Also disable the checkbox to prevent it from being changed
if (checkbox) {
checkbox->setEnabled(false);
}
}
this->setEnabled(enable);
this->setToolTip(tooltip);

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@ -212,11 +211,6 @@ struct Values {
true,
true,
&use_speed_limit};
SwitchableSetting<bool> use_nce{linkage, true, "Use Native Code Execution", Category::Core};
// Memory
SwitchableSetting<bool> use_gpu_memory_manager{linkage, false, "Use GPU Memory Manager", Category::Core};
SwitchableSetting<bool> enable_memory_snapshots{linkage, false, "Enable Memory Snapshots", Category::Core};
// Cpu
SwitchableSetting<CpuBackend, true> cpu_backend{linkage,
@ -401,11 +395,11 @@ struct Values {
Category::RendererAdvanced};
SwitchableSetting<bool> async_presentation{linkage,
#ifdef ANDROID
false, // Disabled due to instability causing crashes
true,
#else
false, // Disabled due to instability causing crashes
false,
#endif
"async_presentation", Category::RendererAdvanced}; // Hidden from UI due to instability
"async_presentation", Category::RendererAdvanced};
SwitchableSetting<bool> renderer_force_max_clock{linkage, false, "force_max_clock",
Category::RendererAdvanced};
SwitchableSetting<bool> use_reactive_flushing{linkage,
@ -618,30 +612,24 @@ struct Values {
Category::Network};
// WebService
Setting<bool> enable_telemetry{linkage, true, "enable_telemetry", Category::WebService};
Setting<std::string> web_api_url{linkage, "api.ynet-fun.xyz", "web_api_url",
Setting<bool> enable_telemetry{linkage, false, "enable_telemetry", Category::WebService};
Setting<std::string> web_api_url{linkage, "https://api.ynet-fun.xyz", "web_api_url",
Category::WebService};
Setting<std::string> citron_username{linkage, std::string(), "citron_username",
Category::WebService};
Setting<std::string> citron_token{linkage, std::string(), "citron_token", Category::WebService};
// Memory
Setting<bool> use_gpu_memory_manager{linkage, false, "use_gpu_memory_manager", Category::System};
Setting<bool> enable_memory_snapshots{linkage, false, "enable_memory_snapshots", Category::System};
// Add-Ons
std::map<u64, std::vector<std::string>> disabled_addons;
// Renderer Advanced Settings
SwitchableSetting<bool> use_enhanced_shader_building{linkage, false, "Enhanced Shader Building",
Category::RendererAdvanced};
// Add a new setting for shader compilation priority
SwitchableSetting<int> shader_compilation_priority{linkage, 0, "Shader Compilation Priority",
Category::RendererAdvanced};
};
extern Values values;
void UpdateGPUAccuracy();
// boold isGPULevelNormal();
// TODO: ZEP
bool IsGPULevelExtreme();
bool IsGPULevelHigh();

View file

@ -1,5 +1,4 @@
# SPDX-FileCopyrightText: 2018 yuzu Emulator Project
# SPDX-FileCopyrightText: 2025 Citron Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
add_subdirectory(host_shaders)
@ -248,8 +247,6 @@ add_library(video_core STATIC
renderer_vulkan/vk_turbo_mode.h
renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h
renderer_vulkan/vk_texture_manager.cpp
renderer_vulkan/vk_texture_manager.h
shader_cache.cpp
shader_cache.h
shader_environment.cpp
@ -309,8 +306,6 @@ add_library(video_core STATIC
vulkan_common/vulkan_library.h
vulkan_common/vulkan_memory_allocator.cpp
vulkan_common/vulkan_memory_allocator.h
vulkan_common/hybrid_memory.cpp
vulkan_common/hybrid_memory.h
vulkan_common/vulkan_surface.cpp
vulkan_common/vulkan_surface.h
vulkan_common/vulkan_wrapper.cpp
@ -397,4 +392,8 @@ if (ANDROID AND ARCHITECTURE_arm64)
target_link_libraries(video_core PRIVATE adrenotools)
endif()
if (ARCHITECTURE_arm64)
target_link_libraries(video_core PRIVATE sse2neon)
endif()
create_target_directory_groups(video_core)

View file

@ -1,13 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <string>
#include <vector>
#include <chrono>
#include <functional>
#include "common/settings.h" // for enum class Settings::ShaderBackend
#include "common/thread_worker.h"
@ -237,68 +234,26 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
auto func{[this, sources_ = std::move(sources), sources_spirv_ = std::move(sources_spirv),
shader_notify, backend, in_parallel,
force_context_flush](ShaderContext::Context*) mutable {
// Track time for shader compilation for possible performance tuning
const auto start_time = std::chrono::high_resolution_clock::now();
// Prepare compilation steps for all shader stages
std::vector<std::function<void()>> compilation_steps;
compilation_steps.reserve(5); // Maximum number of shader stages
// Prepare all compilation steps first to better distribute work
for (size_t stage = 0; stage < 5; ++stage) {
switch (backend) {
case Settings::ShaderBackend::Glsl:
if (!sources_[stage].empty()) {
compilation_steps.emplace_back([this, stage, source = sources_[stage]]() {
source_programs[stage] = CreateProgram(source, Stage(stage));
});
source_programs[stage] = CreateProgram(sources_[stage], Stage(stage));
}
break;
case Settings::ShaderBackend::Glasm:
if (!sources_[stage].empty()) {
compilation_steps.emplace_back([this, stage, source = sources_[stage]]() {
assembly_programs[stage] = CompileProgram(source, AssemblyStage(stage));
});
assembly_programs[stage] =
CompileProgram(sources_[stage], AssemblyStage(stage));
}
break;
case Settings::ShaderBackend::SpirV:
if (!sources_spirv_[stage].empty()) {
compilation_steps.emplace_back([this, stage, source = sources_spirv_[stage]]() {
source_programs[stage] = CreateProgram(source, Stage(stage));
});
source_programs[stage] = CreateProgram(sources_spirv_[stage], Stage(stage));
}
break;
}
}
// If we're running in parallel, use high-priority execution for vertex and fragment shaders
// as these are typically needed first by the renderer
if (in_parallel && compilation_steps.size() > 1) {
// Execute vertex (0) and fragment (4) shaders first if they exist
for (size_t priority_stage : {0, 4}) {
for (size_t i = 0; i < compilation_steps.size(); ++i) {
if ((i == priority_stage || (priority_stage == 0 && i <= 1)) && i < compilation_steps.size()) {
compilation_steps[i]();
compilation_steps[i] = [](){}; // Mark as executed
}
}
}
}
// Execute all remaining compilation steps
for (auto& step : compilation_steps) {
step(); // Will do nothing for already executed steps
}
// Performance measurement for possible logging or optimization
const auto end_time = std::chrono::high_resolution_clock::now();
const auto compilation_time = std::chrono::duration_cast<std::chrono::milliseconds>(
end_time - start_time).count();
if (compilation_time > 50) { // Only log slow compilations
LOG_DEBUG(Render_OpenGL, "Shader compilation took {}ms", compilation_time);
}
if (force_context_flush || in_parallel) {
std::scoped_lock lock{built_mutex};
built_fence.Create();
@ -668,41 +623,15 @@ void GraphicsPipeline::WaitForBuild() {
is_built = true;
}
bool GraphicsPipeline::IsBuilt() const noexcept {
bool GraphicsPipeline::IsBuilt() noexcept {
if (is_built) {
return true;
}
if (!built_fence.handle) {
if (built_fence.handle == 0) {
return false;
}
// Check if the async build has finished by polling the fence
const GLsync sync = built_fence.handle;
const GLuint result = glClientWaitSync(sync, 0, 0);
if (result == GL_ALREADY_SIGNALED || result == GL_CONDITION_SATISFIED) {
// Mark this as mutable even though we're in a const method - this is
// essentially a cached value update which is acceptable
const_cast<GraphicsPipeline*>(this)->is_built = true;
return true;
}
// For better performance tracking, capture time spent waiting for shaders
static thread_local std::chrono::high_resolution_clock::time_point last_shader_wait_log;
static thread_local u32 shader_wait_count = 0;
auto now = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
now - last_shader_wait_log).count();
// Log shader compilation status periodically to help diagnose performance issues
if (elapsed >= 5) { // Log every 5 seconds
shader_wait_count++;
LOG_DEBUG(Render_OpenGL, "Waiting for async shader compilation... (count={})",
shader_wait_count);
last_shader_wait_log = now;
}
return false;
is_built = built_fence.IsSignaled();
return is_built;
}
} // namespace OpenGL

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@ -103,7 +102,7 @@ public:
return uses_local_memory;
}
[[nodiscard]] bool IsBuilt() const noexcept;
[[nodiscard]] bool IsBuilt() noexcept;
template <typename Spec>
static auto MakeConfigureSpecFunc() {

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <atomic>
@ -392,118 +391,18 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
if (!use_asynchronous_shaders) {
return pipeline;
}
// Advanced heuristics for smarter async shader compilation in OpenGL
// Track shader compilation statistics
static thread_local u32 async_shader_count = 0;
static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log;
auto now = std::chrono::high_resolution_clock::now();
// Enhanced detection of UI and critical shaders
const bool is_ui_shader = !maxwell3d->regs.zeta_enable;
// Check for blend state
const bool has_blend = maxwell3d->regs.blend.enable[0] != 0;
// Check if texture sampling is likely based on texture units used
const bool has_texture = maxwell3d->regs.tex_header.Address() != 0;
// Check for clear operations
const bool is_clear_operation = maxwell3d->regs.clear_surface.raw != 0;
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const bool small_draw = draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6;
// Track pipeline usage patterns for better prediction
// Use pipeline address as hash since we don't have a Hash() method
const u64 draw_config_hash = reinterpret_cast<u64>(pipeline);
static thread_local std::unordered_map<u64, u32> shader_usage_count;
static thread_local std::unordered_map<u64, bool> shader_is_frequent;
// Increment usage counter for this shader
shader_usage_count[draw_config_hash]++;
// After a certain threshold, mark as frequently used
if (shader_usage_count[draw_config_hash] >= 3) {
shader_is_frequent[draw_config_hash] = true;
}
// Get shader priority from settings
const int shader_priority = Settings::values.shader_compilation_priority.GetValue();
// Always wait for UI shaders if settings specify high priority
if (is_ui_shader && (shader_priority >= 0 || small_draw)) {
return pipeline;
}
// Wait for frequently used small draw shaders
if (small_draw && shader_is_frequent[draw_config_hash]) {
return pipeline;
}
// Wait for clear operations as they're usually critical
if (is_clear_operation) {
return pipeline;
}
// Force wait if high shader priority in settings
if (shader_priority > 1) {
return pipeline;
}
// Improved depth-based heuristics
// If something is using depth, we can assume that games are not rendering anything which
// will be used one time.
if (maxwell3d->regs.zeta_enable) {
// Check if this is likely a shadow map or important depth-based effect
// Check if depth write is enabled and color writes are disabled for all render targets
bool depth_only_pass = maxwell3d->regs.depth_write_enabled;
if (depth_only_pass) {
bool all_color_masked = true;
for (size_t i = 0; i < maxwell3d->regs.color_mask.size(); i++) {
// Check if any color component is enabled (R, G, B, A fields of ColorMask)
if ((maxwell3d->regs.color_mask[i].raw & 0x1111) != 0) {
all_color_masked = false;
break;
}
}
// If depth write enabled and all colors masked, this is likely a shadow pass
if (all_color_masked) {
// Likely a shadow pass, wait for compilation to avoid flickering shadows
return pipeline;
}
}
// For other depth-enabled renders, use async compilation
return nullptr;
}
// Refined small draw detection
if (small_draw) {
// Check if this might be a UI element that we missed
if (has_blend && has_texture) {
// Likely a textured UI element, wait for it
// If games are using a small index count, we can assume these are full screen quads.
// Usually these shaders are only used once for building textures so we can assume they
// can't be built async
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
return pipeline;
}
// For other small draws, assume they're one-off effects
return pipeline;
}
// Log compilation statistics periodically
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
now - last_async_shader_log).count();
if (elapsed >= 10) {
async_shader_count = 0;
last_async_shader_log = now;
}
async_shader_count++;
if (async_shader_count % 100 == 1) {
float progress = 0.5f; // Default to 50% when we can't determine actual progress
if (workers) {
// TODO: Implement progress tracking
}
LOG_DEBUG(Render_OpenGL, "Async shader compilation in progress (count={}), completion={:.1f}%",
async_shader_count, progress * 100.0f);
}
return nullptr;
}
@ -709,33 +608,9 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
}
std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
// Calculate optimal number of workers based on available CPU cores
// Leave at least 1 core for main thread and other operations
// Use more cores for more parallelism in shader compilation
const u32 num_worker_threads = std::max(std::thread::hardware_concurrency(), 2U);
const u32 optimal_workers = num_worker_threads <= 3 ?
num_worker_threads - 1 : // On dual/quad core, leave 1 core free
num_worker_threads - 2; // On 6+ core systems, leave 2 cores free for other tasks
auto worker = std::make_unique<ShaderWorker>(
optimal_workers,
return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
"GlShaderBuilder",
[this] {
auto context = Context{emu_window};
// Apply thread priority based on settings
// This allows users to control how aggressive shader compilation is
const int priority = Settings::values.shader_compilation_priority.GetValue();
if (priority != 0) {
Common::SetCurrentThreadPriority(
priority > 0 ? Common::ThreadPriority::High : Common::ThreadPriority::Low);
}
return context;
}
);
return worker;
[this] { return Context{emu_window}; });
}
} // namespace OpenGL

View file

@ -151,7 +151,7 @@ FramebufferTextureInfo Layer::LoadFBToScreenInfo(const Tegra::FramebufferConfig&
// Update existing texture
// TODO: Test what happens on hardware when you change the framebuffer dimensions so that
// they differ from the LCD resolution.
// TODO: Applications could theoretically crash citron here by specifying too large
// TODO: Applications could theoretically crash yuzu here by specifying too large
// framebuffer sizes. We should make sure that this cannot happen.
glTextureSubImage2D(framebuffer_texture.resource.handle, 0, 0, 0, framebuffer.width,
framebuffer.height, framebuffer_texture.gl_format,

View file

@ -71,7 +71,7 @@ const char* GetType(GLenum type) {
void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
const GLchar* message, const void* user_param) {
const char format[] = "{} {} {}: {}";
constexpr const char* format = "{} {} {}: {}";
const char* const str_source = GetSource(source);
const char* const str_type = GetType(type);

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
@ -29,15 +28,6 @@
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
#include "video_core/host_shaders/convert_abgr8_srgb_to_d24s8_frag_spv.h"
#include "video_core/host_shaders/convert_rgba8_to_bgra8_frag_spv.h"
#include "video_core/host_shaders/convert_yuv420_to_rgb_comp_spv.h"
#include "video_core/host_shaders/convert_rgb_to_yuv420_comp_spv.h"
#include "video_core/host_shaders/convert_bc7_to_rgba8_comp_spv.h"
#include "video_core/host_shaders/convert_astc_hdr_to_rgba16f_comp_spv.h"
#include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h"
#include "video_core/host_shaders/dither_temporal_frag_spv.h"
#include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h"
namespace Vulkan {
@ -449,15 +439,6 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)),
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
convert_abgr8_srgb_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)),
convert_rgba_to_bgra_frag(BuildShader(device, CONVERT_RGBA8_TO_BGRA8_FRAG_SPV)),
convert_yuv420_to_rgb_comp(BuildShader(device, CONVERT_YUV420_TO_RGB_COMP_SPV)),
convert_rgb_to_yuv420_comp(BuildShader(device, CONVERT_RGB_TO_YUV420_COMP_SPV)),
convert_bc7_to_rgba8_comp(BuildShader(device, CONVERT_BC7_TO_RGBA8_COMP_SPV)),
convert_astc_hdr_to_rgba16f_comp(BuildShader(device, CONVERT_ASTC_HDR_TO_RGBA16F_COMP_SPV)),
convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)),
dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)),
dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)),
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {}
@ -608,14 +589,6 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipelineDepthTargetEx(convert_abgr8_srgb_to_d24s8_pipeline,
dst_framebuffer->RenderPass(),
convert_abgr8_srgb_to_d24s8_frag);
Convert(*convert_abgr8_srgb_to_d24s8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask,
const std::array<f32, 4>& clear_color,
const Region2D& dst_region) {
@ -946,11 +919,13 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline(
return *clear_stencil_pipelines.back();
}
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass,
bool is_target_depth) {
if (pipeline) {
return;
}
VkShaderModule frag_shader = *convert_float_to_depth_frag;
VkShaderModule frag_shader =
is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag;
const std::array stages = MakeStages(*full_screen_vert, frag_shader);
pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
@ -964,8 +939,9 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
.pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr,
.pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO
: &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *one_texture_pipeline_layout,
.renderPass = renderpass,
@ -975,33 +951,12 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend
});
}
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
ConvertPipeline(pipeline, renderpass, false);
}
void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
if (pipeline) {
return;
}
VkShaderModule frag_shader = *convert_depth_to_float_frag;
const std::array stages = MakeStages(*full_screen_vert, frag_shader);
pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *one_texture_pipeline_layout,
.renderPass = renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
ConvertPipeline(pipeline, renderpass, true);
}
void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
@ -1044,100 +999,4 @@ void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRen
ConvertPipelineEx(pipeline, renderpass, module, true, true);
}
void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass,
bool is_target_depth) {
if (pipeline) {
return;
}
VkShaderModule frag_shader =
is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag;
const std::array stages = MakeStages(*full_screen_vert, frag_shader);
pipeline = device.GetLogical().CreateGraphicsPipeline({
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr,
.pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO
: &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = *one_texture_pipeline_layout,
.renderPass = renderpass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
.basePipelineIndex = 0,
});
}
void BlitImageHelper::ConvertRGBAtoGBRA(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_rgba_to_bgra_pipeline,
dst_framebuffer->RenderPass(),
false);
Convert(*convert_rgba_to_bgra_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertYUV420toRGB(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_yuv420_to_rgb_pipeline,
dst_framebuffer->RenderPass(),
false);
Convert(*convert_yuv420_to_rgb_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertRGBtoYUV420(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_rgb_to_yuv420_pipeline,
dst_framebuffer->RenderPass(),
false);
Convert(*convert_rgb_to_yuv420_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertBC7toRGBA8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_bc7_to_rgba8_pipeline,
dst_framebuffer->RenderPass(),
false);
Convert(*convert_bc7_to_rgba8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertASTCHDRtoRGBA16F(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_astc_hdr_to_rgba16f_pipeline,
dst_framebuffer->RenderPass(),
false);
Convert(*convert_astc_hdr_to_rgba16f_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_rgba16f_to_rgba8_pipeline,
dst_framebuffer->RenderPass(),
false);
Convert(*convert_rgba16f_to_rgba8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ApplyDitherTemporal(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(dither_temporal_pipeline,
dst_framebuffer->RenderPass(),
false);
Convert(*dither_temporal_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(dynamic_resolution_scale_pipeline,
dst_framebuffer->RenderPass(),
false);
Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view);
}
} // namespace Vulkan

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@ -68,8 +67,6 @@ public:
void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertD32FToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
@ -85,15 +82,6 @@ public:
u8 stencil_mask, u32 stencil_ref, u32 stencil_compare_mask,
const Region2D& dst_region);
void ConvertRGBAtoGBRA(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertYUV420toRGB(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertRGBtoYUV420(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertBC7toRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertASTCHDRtoRGBA16F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
@ -148,15 +136,6 @@ private:
vk::ShaderModule convert_d32f_to_abgr8_frag;
vk::ShaderModule convert_d24s8_to_abgr8_frag;
vk::ShaderModule convert_s8d24_to_abgr8_frag;
vk::ShaderModule convert_abgr8_srgb_to_d24s8_frag;
vk::ShaderModule convert_rgba_to_bgra_frag;
vk::ShaderModule convert_yuv420_to_rgb_comp;
vk::ShaderModule convert_rgb_to_yuv420_comp;
vk::ShaderModule convert_bc7_to_rgba8_comp;
vk::ShaderModule convert_astc_hdr_to_rgba16f_comp;
vk::ShaderModule convert_rgba16f_to_rgba8_frag;
vk::ShaderModule dither_temporal_frag;
vk::ShaderModule dynamic_resolution_scale_comp;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
@ -177,15 +156,6 @@ private:
vk::Pipeline convert_d32f_to_abgr8_pipeline;
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
vk::Pipeline convert_s8d24_to_abgr8_pipeline;
vk::Pipeline convert_abgr8_srgb_to_d24s8_pipeline;
vk::Pipeline convert_rgba_to_bgra_pipeline;
vk::Pipeline convert_yuv420_to_rgb_pipeline;
vk::Pipeline convert_rgb_to_yuv420_pipeline;
vk::Pipeline convert_bc7_to_rgba8_pipeline;
vk::Pipeline convert_astc_hdr_to_rgba16f_pipeline;
vk::Pipeline convert_rgba16f_to_rgba8_pipeline;
vk::Pipeline dither_temporal_pipeline;
vk::Pipeline dynamic_resolution_scale_pipeline;
};
} // namespace Vulkan

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
@ -9,8 +8,6 @@
#include <optional>
#include <string>
#include <vector>
#include <fstream>
#include <filesystem>
#include <fmt/ranges.h>
@ -38,7 +35,6 @@
#include "video_core/vulkan_common/vulkan_instance.h"
#include "video_core/vulkan_common/vulkan_library.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/hybrid_memory.h"
#include "video_core/vulkan_common/vulkan_surface.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@ -127,93 +123,12 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
PresentFiltersForAppletCapture),
rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker,
scheduler),
hybrid_memory(std::make_unique<HybridMemory>(device, memory_allocator)),
texture_manager(device, memory_allocator),
shader_manager(device),
applet_frame() {
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
turbo_mode.emplace(instance, dld);
scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
}
// Initialize HybridMemory system
if (false && Settings::values.use_gpu_memory_manager.GetValue()) {
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
try {
// Define memory size with explicit types to avoid conversion warnings
constexpr size_t memory_size_mb = 64;
constexpr size_t memory_size_bytes = memory_size_mb * 1024 * 1024;
void* guest_memory_base = nullptr;
#if defined(_WIN32)
// On Windows, use VirtualAlloc to reserve (but not commit) memory
const SIZE_T win_size = static_cast<SIZE_T>(memory_size_bytes);
LPVOID result = VirtualAlloc(nullptr, win_size, MEM_RESERVE, PAGE_NOACCESS);
if (result != nullptr) {
guest_memory_base = result;
}
#else
// On Linux/Android, use aligned_alloc
guest_memory_base = std::aligned_alloc(4096, memory_size_bytes);
#endif
if (guest_memory_base != nullptr) {
try {
hybrid_memory->InitializeGuestMemory(guest_memory_base, memory_size_bytes);
LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", memory_size_mb);
} catch (const std::exception&) {
#if defined(_WIN32)
if (guest_memory_base != nullptr) {
const LPVOID win_ptr = static_cast<LPVOID>(guest_memory_base);
VirtualFree(win_ptr, 0, MEM_RELEASE);
}
#else
std::free(guest_memory_base);
#endif
throw;
}
}
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Failed to initialize HybridMemory: {}", e.what());
}
#else
LOG_INFO(Render_Vulkan, "Fault-managed memory not supported on this platform");
#endif
}
// Initialize enhanced shader compilation system
shader_manager.SetScheduler(&scheduler);
LOG_INFO(Render_Vulkan, "Enhanced shader compilation system initialized");
// Preload common shaders if enabled
if (Settings::values.use_asynchronous_shaders.GetValue()) {
// Use a simple shader directory path - can be updated to match Citron's actual path structure
const std::string shader_dir = "./shaders";
std::vector<std::string> common_shaders;
// Add paths to common shaders that should be preloaded
// These will be compiled in parallel for faster startup
try {
if (std::filesystem::exists(shader_dir)) {
for (const auto& entry : std::filesystem::directory_iterator(shader_dir)) {
if (entry.is_regular_file() && entry.path().extension() == ".spv") {
common_shaders.push_back(entry.path().string());
}
}
if (!common_shaders.empty()) {
LOG_INFO(Render_Vulkan, "Preloading {} common shaders", common_shaders.size());
shader_manager.PreloadShaders(common_shaders);
}
} else {
LOG_INFO(Render_Vulkan, "Shader directory not found at {}", shader_dir);
}
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Error during shader preloading: {}", e.what());
}
}
Report();
InitializePlatformSpecific();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
@ -311,35 +226,6 @@ void RendererVulkan::RenderScreenshot(std::span<const Tegra::FramebufferConfig>
return;
}
// If memory snapshots are enabled, take a snapshot with the screenshot
if (false && Settings::values.enable_memory_snapshots.GetValue() && hybrid_memory) {
try {
const auto now = std::chrono::system_clock::now();
const auto now_time_t = std::chrono::system_clock::to_time_t(now);
std::tm local_tm;
#ifdef _WIN32
localtime_s(&local_tm, &now_time_t);
#else
localtime_r(&now_time_t, &local_tm);
#endif
char time_str[128];
std::strftime(time_str, sizeof(time_str), "%Y%m%d_%H%M%S", &local_tm);
std::string snapshot_path = fmt::format("snapshots/memory_snapshot_{}.bin", time_str);
hybrid_memory->SaveSnapshot(snapshot_path);
// Differential snapshot for tracking memory changes
if (false && Settings::values.use_gpu_memory_manager.GetValue()) {
std::string diff_path = fmt::format("snapshots/diff_snapshot_{}.bin", time_str);
hybrid_memory->SaveDifferentialSnapshot(diff_path);
hybrid_memory->ResetDirtyTracking();
LOG_INFO(Render_Vulkan, "Memory snapshots saved with screenshot");
}
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Failed to save memory snapshot: {}", e.what());
}
}
const auto& layout{renderer_settings.screenshot_framebuffer_layout};
const auto dst_buffer = RenderToBuffer(framebuffers, layout, VK_FORMAT_B8G8R8A8_UNORM,
layout.width * layout.height * 4);
@ -391,81 +277,4 @@ void RendererVulkan::RenderAppletCaptureLayer(
CaptureFormat);
}
bool RendererVulkan::HandleVulkanError(VkResult result, const std::string& operation) {
if (result == VK_SUCCESS) {
return true;
}
if (result == VK_ERROR_DEVICE_LOST) {
LOG_CRITICAL(Render_Vulkan, "Vulkan device lost during {}", operation);
RecoverFromError();
return false;
}
if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY || result == VK_ERROR_OUT_OF_HOST_MEMORY) {
LOG_CRITICAL(Render_Vulkan, "Vulkan out of memory during {}", operation);
// Potential recovery: clear caches, reduce workload
texture_manager.CleanupTextureCache();
return false;
}
LOG_ERROR(Render_Vulkan, "Vulkan error during {}: {}", operation, result);
return false;
}
void RendererVulkan::RecoverFromError() {
LOG_INFO(Render_Vulkan, "Attempting to recover from Vulkan error");
// Wait for device to finish operations
void(device.GetLogical().WaitIdle());
// Process all pending commands in our queue
ProcessAllCommands();
// Wait for any async shader compilations to finish
shader_manager.WaitForCompilation();
// Clean up resources that might be causing problems
texture_manager.CleanupTextureCache();
// Reset command buffers and pipelines
scheduler.Flush();
LOG_INFO(Render_Vulkan, "Recovery attempt completed");
}
void RendererVulkan::InitializePlatformSpecific() {
LOG_INFO(Render_Vulkan, "Initializing platform-specific Vulkan components");
#if defined(_WIN32) || defined(_WIN64)
LOG_INFO(Render_Vulkan, "Initializing Vulkan for Windows");
// Windows-specific initialization
#elif defined(__linux__)
LOG_INFO(Render_Vulkan, "Initializing Vulkan for Linux");
// Linux-specific initialization
#elif defined(__ANDROID__)
LOG_INFO(Render_Vulkan, "Initializing Vulkan for Android");
// Android-specific initialization
#else
LOG_INFO(Render_Vulkan, "Platform-specific Vulkan initialization not implemented for this platform");
#endif
// Create a compute buffer using the HybridMemory system if enabled
if (false && Settings::values.use_gpu_memory_manager.GetValue()) {
try {
// Create a small compute buffer for testing
const VkDeviceSize buffer_size = 1 * 1024 * 1024; // 1 MB
ComputeBuffer compute_buffer = hybrid_memory->CreateComputeBuffer(
buffer_size,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
MemoryUsage::DeviceLocal);
LOG_INFO(Render_Vulkan, "Successfully created compute buffer using HybridMemory");
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Failed to create compute buffer: {}", e.what());
}
}
}
} // namespace Vulkan

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@ -7,7 +6,6 @@
#include <memory>
#include <string>
#include <variant>
#include <functional>
#include "common/dynamic_library.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
@ -19,11 +17,8 @@
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_turbo_mode.h"
#include "video_core/renderer_vulkan/vk_texture_manager.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/hybrid_memory.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Core {
@ -63,9 +58,6 @@ public:
return device.GetDriverName();
}
// Enhanced platform-specific initialization
void InitializePlatformSpecific();
private:
void Report() const;
@ -75,10 +67,6 @@ private:
void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers);
void RenderAppletCaptureLayer(std::span<const Tegra::FramebufferConfig> framebuffers);
// Enhanced error handling
bool HandleVulkanError(VkResult result, const std::string& operation);
void RecoverFromError();
Core::TelemetrySession& telemetry_session;
Tegra::MaxwellDeviceMemoryManager& device_memory;
Tegra::GPU& gpu;
@ -102,13 +90,6 @@ private:
RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode;
// HybridMemory for advanced memory management
std::unique_ptr<HybridMemory> hybrid_memory;
// Enhanced texture and shader management
TextureManager texture_manager;
ShaderManager shader_manager;
Frame applet_frame;
};

View file

@ -1,10 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <vector>
#include <chrono>
#include <boost/container/small_vector.hpp>
@ -39,23 +37,10 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
if (shader_notify) {
shader_notify->MarkShaderBuilding();
}
// Track compilation start time for performance metrics
const auto start_time = std::chrono::high_resolution_clock::now();
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
uniform_buffer_sizes.begin());
auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics, start_time] {
// Simplify the high priority determination - we can't use workgroup_size
// because it doesn't exist, so use a simpler heuristic
const bool is_high_priority = false; // Default to false until we can find a better criterion
if (is_high_priority) {
// Increase thread priority for small compute shaders that are likely part of critical path
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
}
auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] {
DescriptorLayoutBuilder builder{device};
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
@ -64,11 +49,15 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
descriptor_update_template =
builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
.pNext = nullptr,
.requiredSubgroupSize = GuestWarpSize,
};
VkPipelineCreateFlags flags{};
if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
pipeline = device.GetLogical().CreateComputePipeline(
{
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
@ -76,7 +65,8 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
.flags = flags,
.stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.pNext =
device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *spv_module,
@ -89,15 +79,6 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
},
*pipeline_cache);
// Performance measurement
const auto end_time = std::chrono::high_resolution_clock::now();
const auto compilation_time = std::chrono::duration_cast<std::chrono::milliseconds>(
end_time - start_time).count();
if (compilation_time > 50) { // Only log slow compilations
LOG_DEBUG(Render_Vulkan, "Compiled compute shader in {}ms", compilation_time);
}
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
@ -259,16 +258,7 @@ GraphicsPipeline::GraphicsPipeline(
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
num_textures += Shader::NumDescriptors(info->texture_descriptors);
}
// Track compilation start time for performance metrics
const auto start_time = std::chrono::high_resolution_clock::now();
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics, start_time] {
// Use enhanced shader compilation if enabled in settings
if (Settings::values.use_enhanced_shader_building.GetValue()) {
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
}
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
uses_push_descriptor = builder.CanUsePushDescriptor();
descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
@ -283,17 +273,6 @@ GraphicsPipeline::GraphicsPipeline(
const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
Validate();
MakePipeline(render_pass);
// Performance measurement
const auto end_time = std::chrono::high_resolution_clock::now();
const auto compilation_time = std::chrono::duration_cast<std::chrono::milliseconds>(
end_time - start_time).count();
// Log shader compilation time for slow shaders to help diagnose performance issues
if (compilation_time > 100) { // Only log very slow compilations
LOG_DEBUG(Render_Vulkan, "Compiled graphics pipeline in {}ms", compilation_time);
}
if (pipeline_statistics) {
pipeline_statistics->Collect(*pipeline);
}
@ -313,19 +292,6 @@ GraphicsPipeline::GraphicsPipeline(
configure_func = ConfigureFunc(spv_modules, stage_infos);
}
GraphicsPipeline* GraphicsPipeline::Clone() const {
// Create a new pipeline that shares the same resources
// This is for pipeline deduplication
if (!IsBuilt()) {
LOG_WARNING(Render_Vulkan, "Attempted to clone unbuilt pipeline");
return nullptr;
}
return const_cast<GraphicsPipeline*>(this);
}
void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
transition_keys.push_back(transition->key);
transitions.push_back(transition);
@ -345,9 +311,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto& regs{maxwell3d->regs};
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
// Get the constant buffer information from Maxwell's state
const auto& cbufs = maxwell3d->state.shader_stages[stage].const_buffers;
const Shader::Info& info{stage_infos[stage]};
buffer_cache.UnbindGraphicsStorageBuffers(stage);
if constexpr (Spec::has_storage_buffers) {
@ -359,7 +322,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
++ssbo_index;
}
}
const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(cbufs[desc.cbuf_index].enabled);
const u32 index_offset{index << desc.size_shift};
@ -381,7 +344,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
return TexturePair(gpu_memory->Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};

View file

@ -84,9 +84,6 @@ public:
GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
GraphicsPipeline(const GraphicsPipeline&) = delete;
// Create a deep copy of this pipeline for reuse
[[nodiscard]] GraphicsPipeline* Clone() const;
void AddTransition(GraphicsPipeline* transition);
void Configure(bool is_indexed) {
@ -106,35 +103,6 @@ public:
return is_built.load(std::memory_order::relaxed);
}
// Get hash for the current pipeline configuration
[[nodiscard]] u64 Hash() const noexcept {
return key.Hash();
}
// Get the last pipeline this transitioned from
[[nodiscard]] GraphicsPipeline* GetLastTransitionedPipeline() const noexcept {
// For predictive loading, return a related pipeline if available
if (!transitions.empty()) {
return transitions.front();
}
return nullptr;
}
// Get pipeline info string for prediction
[[nodiscard]] std::string GetPipelineInfo() const noexcept {
std::string result = fmt::format("pipeline_{:016x}", Hash());
// Include information about stages
for (size_t i = 0; i < NUM_STAGES; ++i) {
// Check if this stage is active by checking if any varying stores are enabled
if (!stage_infos[i].stores.mask.none()) {
result += fmt::format("_s{}", i);
}
}
return result;
}
template <typename Spec>
static auto MakeConfigureSpecFunc() {
return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
@ -265,42 +264,18 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
}
size_t GetTotalPipelineWorkers() {
const size_t num_cores = std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL);
// Calculate optimal number of workers based on available CPU cores
size_t optimal_workers;
const size_t max_core_threads =
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
#ifdef ANDROID
// Mobile devices need more conservative threading to avoid thermal issues
// Leave more cores free on Android for system processes and other apps
constexpr size_t min_free_cores = 3ULL;
if (num_cores <= min_free_cores + 1) {
return 1ULL; // At least one worker
// Leave at least a few cores free in android
constexpr size_t free_cores = 3ULL;
if (max_core_threads <= free_cores) {
return 1ULL;
}
optimal_workers = num_cores - min_free_cores;
return max_core_threads - free_cores;
#else
// Desktop systems can use more aggressive threading
if (num_cores <= 3) {
optimal_workers = num_cores - 1; // Dual/triple core: leave 1 core free
} else if (num_cores <= 6) {
optimal_workers = num_cores - 2; // Quad/hex core: leave 2 cores free
} else {
// For 8+ core systems, use more workers but still leave some cores for other tasks
optimal_workers = num_cores - (num_cores / 4); // Leave ~25% of cores free
}
return max_core_threads;
#endif
// Apply threading priority via shader_compilation_priority setting if enabled
const int priority = Settings::values.shader_compilation_priority.GetValue();
if (priority > 0) {
// High priority - use more cores for shader compilation
optimal_workers = std::min(optimal_workers + 1, num_cores - 1);
} else if (priority < 0) {
// Low priority - use fewer cores for shader compilation
optimal_workers = (optimal_workers >= 2) ? optimal_workers - 1 : 1;
}
return optimal_workers;
}
} // Anonymous namespace
@ -611,128 +586,21 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
if (pipeline->IsBuilt()) {
return pipeline;
}
if (!use_asynchronous_shaders) {
return pipeline;
}
// Advanced heuristics for smarter async shader compilation
// Track stutter metrics for better debugging and performance tuning
static thread_local u32 async_shader_count = 0;
static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log;
auto now = std::chrono::high_resolution_clock::now();
// Better detection of UI and critical shaders
const bool is_ui_shader = !maxwell3d->regs.zeta_enable;
// Check for blend state
const bool has_blend = maxwell3d->regs.blend.enable[0] != 0;
// Check if texture sampling is likely based on texture units used
const bool has_texture = maxwell3d->regs.tex_header.Address() != 0;
// Check for clear operations
const bool is_clear_operation = maxwell3d->regs.clear_surface.raw != 0;
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const bool small_draw = draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6;
// Get shader priority from settings
const int shader_priority = Settings::values.shader_compilation_priority.GetValue();
// Record historical usage patterns for future prediction
// Create a unique identifier for this shader configuration
const u64 draw_config_hash = pipeline->Hash();
static thread_local std::unordered_map<u64, u32> shader_usage_count;
static thread_local std::unordered_map<u64, bool> shader_is_frequent;
// Track how often this shader is used
shader_usage_count[draw_config_hash]++;
// After a certain number of uses, consider this a frequently used shader
// which should get higher compilation priority in the future
if (shader_usage_count[draw_config_hash] >= 3) {
shader_is_frequent[draw_config_hash] = true;
// Predict related shaders that might be used soon
if (auto related_pipeline = pipeline->GetLastTransitionedPipeline()) {
// Use a string-based representation of the pipeline for prediction
std::string pipeline_info = fmt::format("pipeline_{:016x}", related_pipeline->Hash());
PredictShader(pipeline_info);
}
}
// Always wait for UI shaders if settings specify high priority
if (is_ui_shader && (shader_priority >= 0 || small_draw)) {
return pipeline;
}
// Wait for frequently used small draw shaders
if (small_draw && shader_is_frequent[draw_config_hash]) {
return pipeline;
}
// Wait for clear operations as they're usually critical
if (is_clear_operation) {
return pipeline;
}
// Force wait if high shader priority in settings
if (shader_priority > 1) {
return pipeline;
}
// More intelligent depth-based heuristics
// If something is using depth, we can assume that games are not rendering anything which
// will be used one time.
if (maxwell3d->regs.zeta_enable) {
// Check if this is likely a shadow map or important depth-based effect
// Check if depth write is enabled and color writes are disabled for all render targets
bool depth_only_pass = maxwell3d->regs.depth_write_enabled;
if (depth_only_pass) {
bool all_color_masked = true;
for (size_t i = 0; i < maxwell3d->regs.color_mask.size(); i++) {
// Check if any color component is enabled (R, G, B, A fields of ColorMask)
if ((maxwell3d->regs.color_mask[i].raw & 0x1111) != 0) {
all_color_masked = false;
break;
}
}
// If depth write enabled and all colors masked, this is likely a shadow pass
if (all_color_masked) {
// This is likely a shadow pass, which is important for visual quality
// We should wait for these to compile to avoid flickering shadows
return pipeline;
}
}
// For other depth-enabled renders, use async compilation
return nullptr;
}
// Refine small draw detection
if (small_draw) {
// Check if this might be a UI element that we missed
if (has_blend && has_texture) {
// Likely a textured UI element, wait for it
// If games are using a small index count, we can assume these are full screen quads.
// Usually these shaders are only used once for building textures so we can assume they
// can't be built async
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
return pipeline;
}
// For other small draws, assume they're one-off effects
return pipeline;
}
// Track and log async shader statistics periodically
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
now - last_async_shader_log).count();
if (elapsed >= 10) { // Log every 10 seconds
async_shader_count = 0;
last_async_shader_log = now;
}
async_shader_count++;
// Log less frequently to avoid spamming log
if (async_shader_count % 100 == 1) {
LOG_DEBUG(Render_Vulkan, "Async shader compilation in progress (count={}), completion={:.1f}%",
async_shader_count, GetShaderCompilationProgress() * 100.0f);
}
return nullptr;
}
@ -740,22 +608,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
bool build_in_parallel) try {
// Pipeline deduplication optimization
{
std::lock_guard lock{pipeline_cache};
const auto [pair, new_pipeline]{graphics_cache.try_emplace(key)};
if (!new_pipeline) {
// Found existing pipeline in cache
auto& pipeline = pair->second;
if (pipeline) {
// Return the existing pipeline
LOG_DEBUG(Render_Vulkan, "Reusing existing pipeline for key 0x{:016x}", key.Hash());
return std::unique_ptr<GraphicsPipeline>(pipeline->Clone());
}
}
}
auto hash = key.Hash();
LOG_INFO(Render_Vulkan, "0x{:016x}", hash);
size_t env_index{0};
@ -766,8 +618,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
// Layer passthrough generation for devices without VK_EXT_shader_viewport_index_layer
Shader::IR::Program* layer_source_program{};
// Memory optimization: Create a scope for program translation
{
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const bool is_emulated_stage = layer_source_program != nullptr &&
index == static_cast<u32>(Maxwell::ShaderType::Geometry);
@ -803,15 +653,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
layer_source_program = &programs[index];
}
}
}
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
const Shader::IR::Program* previous_stage{};
Shader::Backend::Bindings binding;
// Memory optimization: Process one stage at a time and free intermediate memory
for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
++index) {
const bool is_emulated_stage = layer_source_program != nullptr &&
@ -825,8 +671,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const size_t stage_index{index - 1};
infos[stage_index] = &program.info;
// Prioritize memory efficiency by encapsulating SPIR-V generation
{
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
ConvertLegacyToGeneric(program, runtime_info);
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
@ -836,27 +680,14 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
modules[stage_index].SetObjectNameEXT(name.c_str());
}
}
previous_stage = &program;
}
// Use improved thread worker mechanism for better async compilation
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
auto pipeline = std::make_unique<GraphicsPipeline>(
return std::make_unique<GraphicsPipeline>(
scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device,
descriptor_pool, guest_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
std::move(modules), infos);
// Cache the result for future deduplication
if (pipeline) {
std::lock_guard lock{pipeline_cache};
// Store a clone that can be used later
graphics_cache[key] = std::unique_ptr<GraphicsPipeline>(pipeline->Clone());
}
return pipeline;
} catch (const Shader::Exception& exception) {
auto hash = key.Hash();
size_t env_index{0};
@ -936,23 +767,6 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
}
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
// Add support for bindless texture constant buffer only if needed
if (program.info.storage_buffers_descriptors.size() > 0) {
// Check if a constant buffer at index 0 already exists
const bool has_cb0 = std::any_of(program.info.constant_buffer_descriptors.begin(),
program.info.constant_buffer_descriptors.end(),
[](const auto& cb) { return cb.index == 0; });
// Only add if not already present
if (!has_cb0) {
Shader::ConstantBufferDescriptor desc;
desc.index = 0;
desc.count = 1;
program.info.constant_buffer_descriptors.push_back(desc);
}
}
const std::vector<u32> code{EmitSPIRV(profile, program)};
device.SaveShader(code);
vk::ShaderModule spv_module{BuildShader(device, code)};
@ -971,7 +785,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
}
void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
const vk::PipelineCache& vk_pipeline_cache,
const vk::PipelineCache& pipeline_cache,
u32 cache_version) try {
std::ofstream file(filename, std::ios::binary);
file.exceptions(std::ifstream::failbit);
@ -985,10 +799,10 @@ void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& fi
size_t cache_size = 0;
std::vector<char> cache_data;
if (vk_pipeline_cache) {
vk_pipeline_cache.Read(&cache_size, nullptr);
if (pipeline_cache) {
pipeline_cache.Read(&cache_size, nullptr);
cache_data.resize(cache_size);
vk_pipeline_cache.Read(&cache_size, cache_data.data());
pipeline_cache.Read(&cache_size, cache_data.data());
}
file.write(cache_data.data(), cache_size);

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@ -11,7 +10,6 @@
#include <type_traits>
#include <unordered_map>
#include <vector>
#include <mutex>
#include "common/common_types.h"
#include "common/thread_worker.h"
@ -159,9 +157,6 @@ private:
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
// Mutex for thread-safe pipeline cache access
mutable std::mutex pipeline_cache;
ShaderPools main_pools;
Shader::Profile profile;

View file

@ -1,181 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include <thread>
#include <filesystem>
#include <fstream>
#include <vector>
#include <atomic>
#include <queue>
#include <condition_variable>
#include <future>
#include <chrono>
#include <unordered_set>
#include "common/common_types.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
#define SHADER_CACHE_DIR "./shader_cache"
namespace Vulkan {
// Global command submission queue for asynchronous operations
std::mutex commandQueueMutex;
std::queue<std::function<void()>> commandQueue;
std::condition_variable commandQueueCondition;
std::atomic<bool> isCommandQueueActive{true};
std::thread commandQueueThread;
// Pointer to Citron's scheduler for integration
Scheduler* globalScheduler = nullptr;
// Constants for thread pool and shader management
constexpr size_t DEFAULT_THREAD_POOL_SIZE = 4;
constexpr size_t MAX_THREAD_POOL_SIZE = 8;
constexpr u32 SHADER_PRIORITY_CRITICAL = 0;
constexpr u32 SHADER_PRIORITY_HIGH = 1;
constexpr u32 SHADER_PRIORITY_NORMAL = 2;
constexpr u32 SHADER_PRIORITY_LOW = 3;
// Thread pool for shader compilation
std::vector<std::thread> g_thread_pool;
std::queue<std::function<void()>> g_work_queue;
std::mutex g_work_queue_mutex;
std::condition_variable g_work_queue_cv;
std::atomic<bool> g_thread_pool_initialized = false;
std::atomic<bool> g_shutdown_thread_pool = false;
std::atomic<size_t> g_active_compilation_tasks = 0;
std::atomic<size_t> g_total_compilation_tasks = 0;
std::atomic<size_t> g_completed_compilation_tasks = 0;
// Priority queue for shader compilation
struct ShaderCompilationTask {
std::function<void()> task;
u32 priority;
std::chrono::high_resolution_clock::time_point enqueue_time;
bool operator<(const ShaderCompilationTask& other) const {
// Lower priority value means higher actual priority
if (priority != other.priority) {
return priority > other.priority;
}
// If priorities are equal, use FIFO ordering
return enqueue_time > other.enqueue_time;
}
};
std::priority_queue<ShaderCompilationTask> g_priority_work_queue;
// Predictive shader loading
std::unordered_set<std::string> g_predicted_shaders;
std::mutex g_predicted_shaders_mutex;
// Command queue worker thread (multi-threaded command recording)
void CommandQueueWorker() {
while (isCommandQueueActive.load()) {
std::function<void()> command;
{
std::unique_lock<std::mutex> lock(commandQueueMutex);
if (commandQueue.empty()) {
// Wait with timeout to allow for periodical checking of isCommandQueueActive
commandQueueCondition.wait_for(lock, std::chrono::milliseconds(100),
[]{ return !commandQueue.empty() || !isCommandQueueActive.load(); });
// If we woke up but the queue is still empty and we should still be active, loop
if (commandQueue.empty()) {
continue;
}
}
command = commandQueue.front();
commandQueue.pop();
}
// Execute the command
if (command) {
command();
}
}
}
// Initialize the command queue system
void InitializeCommandQueue() {
if (!commandQueueThread.joinable()) {
isCommandQueueActive.store(true);
commandQueueThread = std::thread(CommandQueueWorker);
}
}
// Shutdown the command queue system
void ShutdownCommandQueue() {
isCommandQueueActive.store(false);
commandQueueCondition.notify_all();
if (commandQueueThread.joinable()) {
commandQueueThread.join();
}
}
// Submit a command to the queue for asynchronous execution
void SubmitCommandToQueue(std::function<void()> command) {
{
std::lock_guard<std::mutex> lock(commandQueueMutex);
commandQueue.push(command);
}
commandQueueCondition.notify_one();
}
// Set the global scheduler reference for command integration
void SetGlobalScheduler(Scheduler* scheduler) {
globalScheduler = scheduler;
}
// Submit a Vulkan command to the existing Citron scheduler
void SubmitToScheduler(std::function<void(vk::CommandBuffer)> command) {
if (globalScheduler) {
globalScheduler->Record(std::move(command));
} else {
LOG_WARNING(Render_Vulkan, "Trying to submit to scheduler but no scheduler is set");
}
}
// Flush the Citron scheduler - use when needing to ensure commands are executed
u64 FlushScheduler(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
if (globalScheduler) {
return globalScheduler->Flush(signal_semaphore, wait_semaphore);
} else {
LOG_WARNING(Render_Vulkan, "Trying to flush scheduler but no scheduler is set");
return 0;
}
}
// Process both command queue and scheduler commands
void ProcessAllCommands() {
// Process our command queue first
{
std::unique_lock<std::mutex> lock(commandQueueMutex);
while (!commandQueue.empty()) {
auto command = commandQueue.front();
commandQueue.pop();
lock.unlock();
command();
lock.lock();
}
}
// Then flush the scheduler if it exists
if (globalScheduler) {
globalScheduler->Flush();
}
}
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
return device.GetLogical().CreateShaderModule({
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@ -186,589 +20,4 @@ vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
});
}
bool IsShaderValid(VkShaderModule shader_module) {
// TODO: validate the shader by checking if it's null
// or by examining SPIR-V data for correctness [ZEP]
return shader_module != VK_NULL_HANDLE;
}
// Initialize thread pool for shader compilation
void InitializeThreadPool() {
if (g_thread_pool_initialized) {
return;
}
std::lock_guard<std::mutex> lock(g_work_queue_mutex);
g_shutdown_thread_pool = false;
// Determine optimal thread count based on system
const size_t hardware_threads = std::max(std::thread::hardware_concurrency(), 2u);
const size_t thread_count = std::min(hardware_threads - 1, MAX_THREAD_POOL_SIZE);
LOG_INFO(Render_Vulkan, "Initializing shader compilation thread pool with {} threads", thread_count);
for (size_t i = 0; i < thread_count; ++i) {
g_thread_pool.emplace_back([]() {
while (!g_shutdown_thread_pool) {
std::function<void()> task;
{
std::unique_lock<std::mutex> thread_pool_lock(g_work_queue_mutex);
g_work_queue_cv.wait(thread_pool_lock, [] {
return g_shutdown_thread_pool || !g_priority_work_queue.empty();
});
if (g_shutdown_thread_pool && g_priority_work_queue.empty()) {
break;
}
if (!g_priority_work_queue.empty()) {
ShaderCompilationTask highest_priority_task = g_priority_work_queue.top();
g_priority_work_queue.pop();
task = std::move(highest_priority_task.task);
}
}
if (task) {
g_active_compilation_tasks++;
task();
g_active_compilation_tasks--;
g_completed_compilation_tasks++;
}
}
});
}
g_thread_pool_initialized = true;
}
// Shutdown thread pool
void ShutdownThreadPool() {
if (!g_thread_pool_initialized) {
return;
}
{
std::lock_guard<std::mutex> lock(g_work_queue_mutex);
g_shutdown_thread_pool = true;
}
g_work_queue_cv.notify_all();
for (auto& thread : g_thread_pool) {
if (thread.joinable()) {
thread.join();
}
}
g_thread_pool.clear();
g_thread_pool_initialized = false;
LOG_INFO(Render_Vulkan, "Shader compilation thread pool shutdown");
}
// Submit work to thread pool with priority
void SubmitShaderCompilationTask(std::function<void()> task, u32 priority) {
if (!g_thread_pool_initialized) {
InitializeThreadPool();
}
{
std::lock_guard<std::mutex> work_queue_lock(g_work_queue_mutex);
g_priority_work_queue.push({
std::move(task),
priority,
std::chrono::high_resolution_clock::now()
});
g_total_compilation_tasks++;
}
g_work_queue_cv.notify_one();
}
// Get shader compilation progress (0.0f - 1.0f)
float GetShaderCompilationProgress() {
const size_t total = g_total_compilation_tasks.load();
if (total == 0) {
return 1.0f;
}
const size_t completed = g_completed_compilation_tasks.load();
return static_cast<float>(completed) / static_cast<float>(total);
}
// Check if any shader compilation is in progress
bool IsShaderCompilationInProgress() {
return g_active_compilation_tasks.load() > 0;
}
// Add shader to prediction list for preloading
void PredictShader(const std::string& shader_path) {
std::lock_guard<std::mutex> lock(g_predicted_shaders_mutex);
g_predicted_shaders.insert(shader_path);
}
// Preload predicted shaders
void PreloadPredictedShaders(const Device& device) {
std::unordered_set<std::string> shaders_to_load;
{
std::lock_guard<std::mutex> lock(g_predicted_shaders_mutex);
shaders_to_load = g_predicted_shaders;
g_predicted_shaders.clear();
}
if (shaders_to_load.empty()) {
return;
}
LOG_INFO(Render_Vulkan, "Preloading {} predicted shaders", shaders_to_load.size());
for (const auto& shader_path : shaders_to_load) {
// Queue with low priority since these are predictions
AsyncCompileShader(device, shader_path, [](VkShaderModule) {}, SHADER_PRIORITY_LOW);
}
}
// Atomic flag for tracking shader compilation status
std::atomic<bool> compilingShader(false);
void AsyncCompileShader(const Device& device, const std::string& shader_path,
std::function<void(VkShaderModule)> callback, u32 priority) {
LOG_INFO(Render_Vulkan, "Asynchronously compiling shader: {}", shader_path);
// Create shader cache directory if it doesn't exist
if (!std::filesystem::exists(SHADER_CACHE_DIR)) {
std::filesystem::create_directory(SHADER_CACHE_DIR);
}
// Initialize thread pool if needed
if (!g_thread_pool_initialized) {
InitializeThreadPool();
}
// Submit to thread pool with priority
SubmitShaderCompilationTask([device_ptr = &device, shader_path, callback = std::move(callback)]() {
auto startTime = std::chrono::high_resolution_clock::now();
try {
std::vector<u32> spir_v;
bool success = false;
// Check if the file exists and attempt to read it
if (std::filesystem::exists(shader_path)) {
std::ifstream shader_file(shader_path, std::ios::binary);
if (shader_file) {
shader_file.seekg(0, std::ios::end);
size_t file_size = static_cast<size_t>(shader_file.tellg());
shader_file.seekg(0, std::ios::beg);
spir_v.resize(file_size / sizeof(u32));
if (shader_file.read(reinterpret_cast<char*>(spir_v.data()), file_size)) {
success = true;
}
}
}
if (success) {
vk::ShaderModule shader = BuildShader(*device_ptr, spir_v);
if (IsShaderValid(*shader)) {
// Cache the compiled shader to disk for faster loading next time
std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" +
std::filesystem::path(shader_path).filename().string() + ".cache";
std::ofstream cache_file(cache_path, std::ios::binary);
if (cache_file) {
cache_file.write(reinterpret_cast<const char*>(spir_v.data()),
spir_v.size() * sizeof(u32));
}
auto endTime = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> duration = endTime - startTime;
LOG_INFO(Render_Vulkan, "Shader compiled in {:.2f} seconds: {}",
duration.count(), shader_path);
// Store the module pointer for the callback
VkShaderModule raw_module = *shader;
// Submit callback to main thread via command queue for thread safety
SubmitCommandToQueue([callback = std::move(callback), raw_module]() {
callback(raw_module);
});
} else {
LOG_ERROR(Render_Vulkan, "Shader validation failed: {}", shader_path);
SubmitCommandToQueue([callback = std::move(callback)]() {
callback(VK_NULL_HANDLE);
});
}
} else {
LOG_ERROR(Render_Vulkan, "Failed to read shader file: {}", shader_path);
SubmitCommandToQueue([callback = std::move(callback)]() {
callback(VK_NULL_HANDLE);
});
}
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Error compiling shader: {}", e.what());
SubmitCommandToQueue([callback = std::move(callback)]() {
callback(VK_NULL_HANDLE);
});
}
}, priority);
}
// Overload for backward compatibility
void AsyncCompileShader(const Device& device, const std::string& shader_path,
std::function<void(VkShaderModule)> callback) {
AsyncCompileShader(device, shader_path, std::move(callback), SHADER_PRIORITY_NORMAL);
}
ShaderManager::ShaderManager(const Device& device_) : device(device_) {
// Initialize command queue system
InitializeCommandQueue();
// Initialize thread pool for shader compilation
InitializeThreadPool();
}
ShaderManager::~ShaderManager() {
// Wait for any pending compilations to finish
WaitForCompilation();
// Clean up shader modules
std::lock_guard<std::mutex> lock(shader_mutex);
shader_cache.clear();
// Shutdown thread pool
ShutdownThreadPool();
// Shutdown command queue
ShutdownCommandQueue();
}
VkShaderModule ShaderManager::GetShaderModule(const std::string& shader_path) {
// Check in-memory cache first
{
std::lock_guard<std::mutex> lock(shader_mutex);
auto it = shader_cache.find(shader_path);
if (it != shader_cache.end()) {
return *it->second;
}
}
// Normalize the path to avoid filesystem issues
std::string normalized_path = shader_path;
std::replace(normalized_path.begin(), normalized_path.end(), '\\', '/');
// Check if shader exists
if (!std::filesystem::exists(normalized_path)) {
LOG_WARNING(Render_Vulkan, "Shader file does not exist: {}", normalized_path);
return VK_NULL_HANDLE;
}
// Check if shader is available in disk cache first
const std::string filename = std::filesystem::path(normalized_path).filename().string();
std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" + filename + ".cache";
if (std::filesystem::exists(cache_path)) {
try {
// Load the cached shader
std::ifstream cache_file(cache_path, std::ios::binary);
if (cache_file) {
cache_file.seekg(0, std::ios::end);
size_t file_size = static_cast<size_t>(cache_file.tellg());
if (file_size > 0 && file_size % sizeof(u32) == 0) {
cache_file.seekg(0, std::ios::beg);
std::vector<u32> spir_v;
spir_v.resize(file_size / sizeof(u32));
if (cache_file.read(reinterpret_cast<char*>(spir_v.data()), file_size)) {
vk::ShaderModule shader = BuildShader(device, spir_v);
if (IsShaderValid(*shader)) {
// Store in memory cache
std::lock_guard<std::mutex> lock(shader_mutex);
shader_cache[normalized_path] = std::move(shader);
LOG_INFO(Render_Vulkan, "Loaded shader from cache: {}", normalized_path);
return *shader_cache[normalized_path];
}
}
}
}
} catch (const std::exception& e) {
LOG_WARNING(Render_Vulkan, "Failed to load shader from cache: {}", e.what());
// Continue to load from original file
}
}
// Try to load the shader directly if cache load failed
if (LoadShader(normalized_path)) {
std::lock_guard<std::mutex> lock(shader_mutex);
return *shader_cache[normalized_path];
}
LOG_ERROR(Render_Vulkan, "Failed to load shader: {}", normalized_path);
return VK_NULL_HANDLE;
}
void ShaderManager::ReloadShader(const std::string& shader_path) {
LOG_INFO(Render_Vulkan, "Reloading shader: {}", shader_path);
// Remove the old shader from cache
{
std::lock_guard<std::mutex> lock(shader_mutex);
shader_cache.erase(shader_path);
}
// Load the shader again
LoadShader(shader_path);
}
bool ShaderManager::LoadShader(const std::string& shader_path) {
LOG_INFO(Render_Vulkan, "Loading shader from: {}", shader_path);
if (!std::filesystem::exists(shader_path)) {
LOG_ERROR(Render_Vulkan, "Shader file does not exist: {}", shader_path);
return false;
}
try {
std::vector<u32> spir_v;
std::ifstream shader_file(shader_path, std::ios::binary);
if (!shader_file.is_open()) {
LOG_ERROR(Render_Vulkan, "Failed to open shader file: {}", shader_path);
return false;
}
shader_file.seekg(0, std::ios::end);
const size_t file_size = static_cast<size_t>(shader_file.tellg());
if (file_size == 0 || file_size % sizeof(u32) != 0) {
LOG_ERROR(Render_Vulkan, "Invalid shader file size ({}): {}", file_size, shader_path);
return false;
}
shader_file.seekg(0, std::ios::beg);
spir_v.resize(file_size / sizeof(u32));
if (!shader_file.read(reinterpret_cast<char*>(spir_v.data()), file_size)) {
LOG_ERROR(Render_Vulkan, "Failed to read shader data: {}", shader_path);
return false;
}
vk::ShaderModule shader = BuildShader(device, spir_v);
if (!IsShaderValid(*shader)) {
LOG_ERROR(Render_Vulkan, "Created shader module is invalid: {}", shader_path);
return false;
}
// Store in memory cache
{
std::lock_guard<std::mutex> lock(shader_mutex);
shader_cache[shader_path] = std::move(shader);
}
// Also store in disk cache for future use
try {
if (!std::filesystem::exists(SHADER_CACHE_DIR)) {
std::filesystem::create_directory(SHADER_CACHE_DIR);
}
std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" +
std::filesystem::path(shader_path).filename().string() + ".cache";
std::ofstream cache_file(cache_path, std::ios::binary);
if (cache_file.is_open()) {
cache_file.write(reinterpret_cast<const char*>(spir_v.data()),
spir_v.size() * sizeof(u32));
if (!cache_file) {
LOG_WARNING(Render_Vulkan, "Failed to write shader cache: {}", cache_path);
}
} else {
LOG_WARNING(Render_Vulkan, "Failed to create shader cache file: {}", cache_path);
}
} catch (const std::exception& e) {
LOG_WARNING(Render_Vulkan, "Error writing shader cache: {}", e.what());
// Continue even if disk cache fails
}
return true;
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Error loading shader: {}", e.what());
return false;
}
}
void ShaderManager::WaitForCompilation() {
// Wait until no shader is being compiled
while (IsShaderCompilationInProgress()) {
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
// Process any pending commands in the queue
std::unique_lock<std::mutex> lock(commandQueueMutex);
while (!commandQueue.empty()) {
auto command = commandQueue.front();
commandQueue.pop();
lock.unlock();
command();
lock.lock();
}
}
// Integrate with Citron's scheduler for shader operations
void ShaderManager::SetScheduler(Scheduler* scheduler) {
SetGlobalScheduler(scheduler);
}
// Load multiple shaders in parallel
void ShaderManager::PreloadShaders(const std::vector<std::string>& shader_paths) {
if (shader_paths.empty()) {
return;
}
LOG_INFO(Render_Vulkan, "Preloading {} shaders", shader_paths.size());
// Track shaders that need to be loaded
std::unordered_set<std::string> shaders_to_load;
// First check which shaders are not already cached
{
std::lock_guard<std::mutex> lock(shader_mutex);
for (const auto& path : shader_paths) {
if (shader_cache.find(path) == shader_cache.end()) {
// Also check disk cache
if (std::filesystem::exists(path)) {
std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" +
std::filesystem::path(path).filename().string() + ".cache";
if (!std::filesystem::exists(cache_path)) {
shaders_to_load.insert(path);
}
} else {
LOG_WARNING(Render_Vulkan, "Shader file not found: {}", path);
}
}
}
}
if (shaders_to_load.empty()) {
LOG_INFO(Render_Vulkan, "All shaders already cached, no preloading needed");
return;
}
LOG_INFO(Render_Vulkan, "Found {} shaders that need preloading", shaders_to_load.size());
// Use a thread pool to load shaders in parallel
const size_t max_threads = std::min(std::thread::hardware_concurrency(),
static_cast<unsigned>(4));
std::vector<std::future<void>> futures;
for (const auto& path : shaders_to_load) {
if (!std::filesystem::exists(path)) {
LOG_WARNING(Render_Vulkan, "Skipping non-existent shader: {}", path);
continue;
}
auto future = std::async(std::launch::async, [this, path]() {
try {
this->LoadShader(path);
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Error loading shader {}: {}", path, e.what());
}
});
futures.push_back(std::move(future));
// Limit max parallel threads
if (futures.size() >= max_threads) {
futures.front().wait();
futures.erase(futures.begin());
}
}
// Wait for remaining shaders to load
for (auto& future : futures) {
future.wait();
}
LOG_INFO(Render_Vulkan, "Finished preloading shaders");
}
// Batch load multiple shaders with priorities
void ShaderManager::BatchLoadShaders(const std::vector<std::string>& shader_paths,
const std::vector<u32>& priorities) {
if (shader_paths.empty()) {
return;
}
LOG_INFO(Render_Vulkan, "Batch loading {} shaders", shader_paths.size());
for (size_t i = 0; i < shader_paths.size(); ++i) {
const auto& path = shader_paths[i];
u32 priority = i < priorities.size() ? priorities[i] : SHADER_PRIORITY_NORMAL;
AsyncCompileShader(device, path, [this, path](VkShaderModule raw_module) {
if (raw_module != VK_NULL_HANDLE) {
// Note: We don't use the raw_module directly as we can't create a proper vk::ShaderModule wrapper.
// Instead, we'll load the shader again using the LoadShader method which properly handles
// the creation of the vk::ShaderModule.
// LoadShader will create the shader module and store it in shader_cache
if (LoadShader(path)) {
LOG_INFO(Render_Vulkan, "Loaded shader module for {}", path);
} else {
LOG_ERROR(Render_Vulkan, "Failed to load shader module for {}", path);
}
}
}, priority);
}
}
// Preload all shaders in a directory with automatic prioritization
void ShaderManager::PreloadShaderDirectory(const std::string& directory_path) {
if (!std::filesystem::exists(directory_path)) {
LOG_WARNING(Render_Vulkan, "Shader directory does not exist: {}", directory_path);
return;
}
std::vector<std::string> shader_paths;
std::vector<u32> priorities;
for (const auto& entry : std::filesystem::directory_iterator(directory_path)) {
if (entry.is_regular_file()) {
const auto& path = entry.path().string();
const auto extension = entry.path().extension().string();
// Only load shader files
if (extension == ".spv" || extension == ".glsl" || extension == ".vert" ||
extension == ".frag" || extension == ".comp") {
shader_paths.push_back(path);
// Assign priorities based on filename patterns
// This is a simple heuristic and will be improved
const auto filename = entry.path().filename().string();
if (filename.find("ui") != std::string::npos ||
filename.find("menu") != std::string::npos) {
priorities.push_back(SHADER_PRIORITY_CRITICAL);
} else if (filename.find("effect") != std::string::npos ||
filename.find("post") != std::string::npos) {
priorities.push_back(SHADER_PRIORITY_HIGH);
} else {
priorities.push_back(SHADER_PRIORITY_NORMAL);
}
}
}
}
if (!shader_paths.empty()) {
BatchLoadShaders(shader_paths, priorities);
}
}
// Get current compilation progress
float ShaderManager::GetCompilationProgress() const {
return GetShaderCompilationProgress();
}
} // namespace Vulkan

View file

@ -1,16 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <span>
#include <string>
#include <unordered_map>
#include <mutex>
#include <atomic>
#include <functional>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@ -18,74 +11,7 @@
namespace Vulkan {
class Device;
class Scheduler;
// Priority constants for shader compilation
extern const u32 SHADER_PRIORITY_CRITICAL;
extern const u32 SHADER_PRIORITY_HIGH;
extern const u32 SHADER_PRIORITY_NORMAL;
extern const u32 SHADER_PRIORITY_LOW;
// Command queue system for asynchronous operations
void InitializeCommandQueue();
void ShutdownCommandQueue();
void SubmitCommandToQueue(std::function<void()> command);
void CommandQueueWorker();
// Thread pool management for shader compilation
void InitializeThreadPool();
void ShutdownThreadPool();
void SubmitShaderCompilationTask(std::function<void()> task, u32 priority);
float GetShaderCompilationProgress();
bool IsShaderCompilationInProgress();
// Predictive shader loading
void PredictShader(const std::string& shader_path);
void PreloadPredictedShaders(const Device& device);
// Scheduler integration functions
void SetGlobalScheduler(Scheduler* scheduler);
void SubmitToScheduler(std::function<void(vk::CommandBuffer)> command);
u64 FlushScheduler(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
void ProcessAllCommands();
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code);
// Enhanced shader functionality
bool IsShaderValid(VkShaderModule shader_module);
void AsyncCompileShader(const Device& device, const std::string& shader_path,
std::function<void(VkShaderModule)> callback, u32 priority);
void AsyncCompileShader(const Device& device, const std::string& shader_path,
std::function<void(VkShaderModule)> callback);
class ShaderManager {
public:
explicit ShaderManager(const Device& device);
~ShaderManager();
VkShaderModule GetShaderModule(const std::string& shader_path);
void ReloadShader(const std::string& shader_path);
bool LoadShader(const std::string& shader_path);
void WaitForCompilation();
// Enhanced shader management
void BatchLoadShaders(const std::vector<std::string>& shader_paths,
const std::vector<u32>& priorities);
void PreloadShaderDirectory(const std::string& directory_path);
float GetCompilationProgress() const;
// Batch process multiple shaders in parallel
void PreloadShaders(const std::vector<std::string>& shader_paths);
// Integrate with Citron's scheduler
void SetScheduler(Scheduler* scheduler);
private:
const Device& device;
std::mutex shader_mutex;
std::unordered_map<std::string, vk::ShaderModule> shader_cache;
};
} // namespace Vulkan

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
@ -30,10 +29,6 @@
namespace Vulkan {
// TextureCacheManager implementations to fix linker errors
TextureCacheManager::TextureCacheManager() = default;
TextureCacheManager::~TextureCacheManager() = default;
using Tegra::Engines::Fermi2D;
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureMipmapFilter;
@ -1193,171 +1188,69 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
}
void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
if (!dst->RenderPass()) {
return;
}
switch (dst_view.format) {
case PixelFormat::D24_UNORM_S8_UINT:
// Handle sRGB source formats
if (src_view.format == PixelFormat::A8B8G8R8_SRGB ||
src_view.format == PixelFormat::B8G8R8A8_SRGB) {
// Verify format support before conversion
if (device.IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT,
FormatType::Optimal)) {
return blit_image_helper.ConvertABGR8SRGBToD24S8(dst, src_view);
} else {
// Fallback to regular ABGR8 conversion if sRGB not supported
return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view);
case PixelFormat::R16_UNORM:
if (src_view.format == PixelFormat::D16_UNORM) {
return blit_image_helper.ConvertD16ToR16(dst, src_view);
}
break;
case PixelFormat::A8B8G8R8_SRGB:
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
}
break;
case PixelFormat::A8B8G8R8_UNORM:
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
}
if (src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
return blit_image_helper.ConvertS8D24ToABGR8(dst, src_view);
}
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
}
break;
case PixelFormat::B8G8R8A8_SRGB:
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
}
break;
case PixelFormat::B8G8R8A8_UNORM:
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
}
break;
case PixelFormat::R32_FLOAT:
if (src_view.format == PixelFormat::D32_FLOAT) {
return blit_image_helper.ConvertD32ToR32(dst, src_view);
}
break;
case PixelFormat::D16_UNORM:
if (src_view.format == PixelFormat::R16_UNORM) {
return blit_image_helper.ConvertR16ToD16(dst, src_view);
}
break;
case PixelFormat::S8_UINT_D24_UNORM:
if (src_view.format == PixelFormat::A8B8G8R8_UNORM ||
src_view.format == PixelFormat::B8G8R8A8_UNORM) {
return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view);
}
break;
case PixelFormat::A8B8G8R8_UNORM:
case PixelFormat::A8B8G8R8_SNORM:
case PixelFormat::A8B8G8R8_SINT:
case PixelFormat::A8B8G8R8_UINT:
case PixelFormat::R5G6B5_UNORM:
case PixelFormat::B5G6R5_UNORM:
case PixelFormat::A1R5G5B5_UNORM:
case PixelFormat::A2B10G10R10_UNORM:
case PixelFormat::A2B10G10R10_UINT:
case PixelFormat::A2R10G10B10_UNORM:
case PixelFormat::A1B5G5R5_UNORM:
case PixelFormat::A5B5G5R1_UNORM:
case PixelFormat::R8_UNORM:
case PixelFormat::R8_SNORM:
case PixelFormat::R8_SINT:
case PixelFormat::R8_UINT:
case PixelFormat::R16G16B16A16_FLOAT:
case PixelFormat::R16G16B16A16_UNORM:
case PixelFormat::R16G16B16A16_SNORM:
case PixelFormat::R16G16B16A16_SINT:
case PixelFormat::R16G16B16A16_UINT:
case PixelFormat::B10G11R11_FLOAT:
case PixelFormat::R32G32B32A32_UINT:
case PixelFormat::BC1_RGBA_UNORM:
case PixelFormat::BC2_UNORM:
case PixelFormat::BC3_UNORM:
case PixelFormat::BC4_UNORM:
case PixelFormat::BC4_SNORM:
case PixelFormat::BC5_UNORM:
case PixelFormat::BC5_SNORM:
case PixelFormat::BC7_UNORM:
case PixelFormat::BC6H_UFLOAT:
case PixelFormat::BC6H_SFLOAT:
case PixelFormat::ASTC_2D_4X4_UNORM:
case PixelFormat::B8G8R8A8_UNORM:
case PixelFormat::R32G32B32A32_FLOAT:
case PixelFormat::R32G32B32A32_SINT:
case PixelFormat::R32G32_FLOAT:
case PixelFormat::R32G32_SINT:
case PixelFormat::R32_FLOAT:
case PixelFormat::R16_FLOAT:
case PixelFormat::R16_UNORM:
case PixelFormat::R16_SNORM:
case PixelFormat::R16_UINT:
case PixelFormat::R16_SINT:
case PixelFormat::R16G16_UNORM:
case PixelFormat::R16G16_FLOAT:
case PixelFormat::R16G16_UINT:
case PixelFormat::R16G16_SINT:
case PixelFormat::R16G16_SNORM:
case PixelFormat::R32G32B32_FLOAT:
case PixelFormat::A8B8G8R8_SRGB:
case PixelFormat::R8G8_UNORM:
case PixelFormat::R8G8_SNORM:
case PixelFormat::R8G8_SINT:
case PixelFormat::R8G8_UINT:
case PixelFormat::R32G32_UINT:
case PixelFormat::R16G16B16X16_FLOAT:
case PixelFormat::R32_UINT:
case PixelFormat::R32_SINT:
case PixelFormat::ASTC_2D_8X8_UNORM:
case PixelFormat::ASTC_2D_8X5_UNORM:
case PixelFormat::ASTC_2D_5X4_UNORM:
case PixelFormat::B8G8R8A8_SRGB:
case PixelFormat::BC1_RGBA_SRGB:
case PixelFormat::BC2_SRGB:
case PixelFormat::BC3_SRGB:
case PixelFormat::BC7_SRGB:
case PixelFormat::A4B4G4R4_UNORM:
case PixelFormat::G4R4_UNORM:
case PixelFormat::ASTC_2D_4X4_SRGB:
case PixelFormat::ASTC_2D_8X8_SRGB:
case PixelFormat::ASTC_2D_8X5_SRGB:
case PixelFormat::ASTC_2D_5X4_SRGB:
case PixelFormat::ASTC_2D_5X5_UNORM:
case PixelFormat::ASTC_2D_5X5_SRGB:
case PixelFormat::ASTC_2D_10X8_UNORM:
case PixelFormat::ASTC_2D_10X8_SRGB:
case PixelFormat::ASTC_2D_6X6_UNORM:
case PixelFormat::ASTC_2D_6X6_SRGB:
case PixelFormat::ASTC_2D_10X6_UNORM:
case PixelFormat::ASTC_2D_10X6_SRGB:
case PixelFormat::ASTC_2D_10X5_UNORM:
case PixelFormat::ASTC_2D_10X5_SRGB:
case PixelFormat::ASTC_2D_10X10_UNORM:
case PixelFormat::ASTC_2D_10X10_SRGB:
case PixelFormat::ASTC_2D_12X10_UNORM:
case PixelFormat::ASTC_2D_12X10_SRGB:
case PixelFormat::ASTC_2D_12X12_UNORM:
case PixelFormat::ASTC_2D_12X12_SRGB:
case PixelFormat::ASTC_2D_8X6_UNORM:
case PixelFormat::ASTC_2D_8X6_SRGB:
case PixelFormat::ASTC_2D_6X5_UNORM:
case PixelFormat::ASTC_2D_6X5_SRGB:
case PixelFormat::E5B9G9R9_FLOAT:
case PixelFormat::D32_FLOAT:
case PixelFormat::D16_UNORM:
case PixelFormat::X8_D24_UNORM:
case PixelFormat::S8_UINT:
case PixelFormat::S8_UINT_D24_UNORM:
case PixelFormat::D32_FLOAT_S8_UINT:
case PixelFormat::Invalid:
if (src_view.format == PixelFormat::A8B8G8R8_UNORM ||
src_view.format == PixelFormat::B8G8R8A8_UNORM ||
src_view.format == PixelFormat::A8B8G8R8_SRGB ||
src_view.format == PixelFormat::B8G8R8A8_SRGB) {
return blit_image_helper.ConvertABGR8ToD32F(dst, src_view);
}
if (src_view.format == PixelFormat::R32_FLOAT) {
return blit_image_helper.ConvertR32ToD32(dst, src_view);
}
break;
default:
break;
}
}
VkFormat TextureCacheRuntime::GetSupportedFormat(VkFormat requested_format,
VkFormatFeatureFlags required_features) const {
if (requested_format == VK_FORMAT_A8B8G8R8_SRGB_PACK32 &&
(required_features & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
// Force valid depth format when sRGB requested in depth context
return VK_FORMAT_D24_UNORM_S8_UINT;
}
return requested_format;
}
// Helper functions for format compatibility checks
bool TextureCacheRuntime::IsFormatDitherable(PixelFormat format) {
switch (format) {
case PixelFormat::B8G8R8A8_UNORM:
case PixelFormat::A8B8G8R8_UNORM:
case PixelFormat::B8G8R8A8_SRGB:
case PixelFormat::A8B8G8R8_SRGB:
return true;
default:
return false;
}
}
bool TextureCacheRuntime::IsFormatScalable(PixelFormat format) {
switch (format) {
case PixelFormat::B8G8R8A8_UNORM:
case PixelFormat::A8B8G8R8_UNORM:
case PixelFormat::R16G16B16A16_FLOAT:
case PixelFormat::R32G32B32A32_FLOAT:
return true;
default:
return false;
}
UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format);
}
void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
@ -1449,224 +1342,13 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
});
}
void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies) {
void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
const bool msaa_to_non_msaa = src.info.num_samples > 1 && dst.info.num_samples == 1;
if (!msaa_to_non_msaa) {
return CopyImage(dst, src, copies);
}
// Convert PixelFormat to VkFormat using Maxwell format conversion
const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, src.info.format).format;
// Check if format supports MSAA resolve
const auto format_properties = device.GetPhysical().GetFormatProperties(vk_format);
if (!(format_properties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
LOG_WARNING(Render_Vulkan, "Format does not support MSAA resolve, falling back to compute shader");
if (msaa_copy_pass) {
return msaa_copy_pass->CopyImage(dst, src, copies, true);
return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa);
}
UNIMPLEMENTED_MSG("MSAA resolve not supported for format and no compute fallback available");
return;
}
const VkImage dst_image = dst.Handle();
const VkImage src_image = src.Handle();
const VkImageAspectFlags aspect_mask = dst.AspectMask();
// Create temporary resolve image with proper memory allocation
const VkImageCreateInfo resolve_ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.imageType = VK_IMAGE_TYPE_2D,
.format = vk_format,
.extent = {
.width = src.info.size.width,
.height = src.info.size.height,
.depth = src.info.size.depth,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
const auto resolve_image_holder = memory_allocator.CreateImage(resolve_ci);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_image, dst_image, resolve_image = *resolve_image_holder,
copies, aspect_mask](vk::CommandBuffer cmdbuf) {
for (const auto& copy : copies) {
const VkExtent3D extent{
.width = static_cast<u32>(copy.extent.width),
.height = static_cast<u32>(copy.extent.height),
.depth = static_cast<u32>(copy.extent.depth),
};
// First resolve the MSAA source to the temporary image
const VkImageResolve resolve_region{
.srcSubresource = {
.aspectMask = aspect_mask,
.mipLevel = static_cast<u32>(copy.src_subresource.base_level),
.baseArrayLayer = static_cast<u32>(copy.src_subresource.base_layer),
.layerCount = static_cast<u32>(copy.src_subresource.num_layers),
},
.srcOffset = {
static_cast<s32>(copy.src_offset.x),
static_cast<s32>(copy.src_offset.y),
static_cast<s32>(copy.src_offset.z),
},
.dstSubresource = {
.aspectMask = aspect_mask,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.dstOffset = {0, 0, 0},
.extent = extent,
};
const std::array pre_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.image = src_image,
.subresourceRange = {
.aspectMask = aspect_mask,
.baseMipLevel = static_cast<u32>(copy.src_subresource.base_level),
.levelCount = 1,
.baseArrayLayer = static_cast<u32>(copy.src_subresource.base_layer),
.layerCount = static_cast<u32>(copy.src_subresource.num_layers),
},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.image = resolve_image,
.subresourceRange = {
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
nullptr,
nullptr,
pre_barriers);
// Resolve MSAA image
cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
resolve_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
resolve_region);
// Now copy from resolved image to destination
const VkImageCopy copy_region{
.srcSubresource = {
.aspectMask = aspect_mask,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.srcOffset = {0, 0, 0},
.dstSubresource = {
.aspectMask = aspect_mask,
.mipLevel = static_cast<u32>(copy.dst_subresource.base_level),
.baseArrayLayer = static_cast<u32>(copy.dst_subresource.base_layer),
.layerCount = static_cast<u32>(copy.dst_subresource.num_layers),
},
.dstOffset = {
static_cast<s32>(copy.dst_offset.x),
static_cast<s32>(copy.dst_offset.y),
static_cast<s32>(copy.dst_offset.z),
},
.extent = extent,
};
std::array<VkImageMemoryBarrier, 2> mid_barriers{{
{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.image = resolve_image,
.subresourceRange = {
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
},
{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.image = dst_image,
.subresourceRange = {
.aspectMask = aspect_mask,
.baseMipLevel = static_cast<u32>(copy.dst_subresource.base_level),
.levelCount = 1,
.baseArrayLayer = static_cast<u32>(copy.dst_subresource.base_layer),
.layerCount = static_cast<u32>(copy.dst_subresource.num_layers),
},
},
}};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
nullptr,
nullptr,
mid_barriers);
// Copy from resolved image to destination
cmdbuf.CopyImage(resolve_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
vk::Span{&copy_region, 1});
// Final transition back to general layout
const VkImageMemoryBarrier final_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.image = dst_image,
.subresourceRange = {
.aspectMask = aspect_mask,
.baseMipLevel = static_cast<u32>(copy.dst_subresource.base_level),
.levelCount = 1,
.baseArrayLayer = static_cast<u32>(copy.dst_subresource.base_layer),
.layerCount = static_cast<u32>(copy.dst_subresource.num_layers),
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0,
vk::Span<VkMemoryBarrier>{},
vk::Span<VkBufferMemoryBarrier>{},
vk::Span{&final_barrier, 1});
}
});
UNIMPLEMENTED_MSG("Copying images with different samples is not supported.");
}
u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
@ -2098,7 +1780,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
slot_images = &slot_imgs;
}
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info,
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
: VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}

View file

@ -1,14 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <span>
#include <mutex>
#include <atomic>
#include <string>
#include <unordered_map>
#include "video_core/texture_cache/texture_cache_base.h"
@ -42,22 +37,6 @@ class RenderPassCache;
class StagingBufferPool;
class Scheduler;
// Enhanced texture management for better error handling and thread safety
class TextureCacheManager {
public:
explicit TextureCacheManager();
~TextureCacheManager();
VkImage GetTextureFromCache(const std::string& texture_path);
void ReloadTexture(const std::string& texture_path);
bool IsTextureLoadedCorrectly(VkImage texture);
void HandleTextureCache();
private:
std::mutex texture_mutex;
std::unordered_map<std::string, VkImage> texture_cache;
};
class TextureCacheRuntime {
public:
explicit TextureCacheRuntime(const Device& device_, Scheduler& scheduler_,
@ -133,15 +112,6 @@ public:
void BarrierFeedbackLoop();
bool IsFormatDitherable(VideoCore::Surface::PixelFormat format);
bool IsFormatScalable(VideoCore::Surface::PixelFormat format);
VkFormat GetSupportedFormat(VkFormat requested_format, VkFormatFeatureFlags required_features) const;
// Enhanced texture error handling
bool IsTextureLoadedCorrectly(VkImage texture);
void HandleTextureError(const std::string& texture_path);
const Device& device;
Scheduler& scheduler;
MemoryAllocator& memory_allocator;
@ -153,9 +123,6 @@ public:
const Settings::ResolutionScalingInfo& resolution;
std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
// Enhanced texture management
TextureCacheManager texture_cache_manager;
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
};

View file

@ -1,146 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include <filesystem>
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_texture_manager.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
TextureManager::TextureManager(const Device& device_, MemoryAllocator& memory_allocator_)
: device(device_), memory_allocator(memory_allocator_) {
// Create a default texture for fallback in case of errors
default_texture = CreateDefaultTexture();
}
TextureManager::~TextureManager() {
std::lock_guard<std::mutex> lock(texture_mutex);
// Clear all cached textures
texture_cache.clear();
// Default texture will be cleaned up automatically by vk::Image's destructor
}
VkImage TextureManager::GetTexture(const std::string& texture_path) {
std::lock_guard<std::mutex> lock(texture_mutex);
// Check if the texture is already in the cache
auto it = texture_cache.find(texture_path);
if (it != texture_cache.end()) {
return *it->second;
}
// Load the texture and add it to the cache
vk::Image new_texture = LoadTexture(texture_path);
if (new_texture) {
VkImage raw_handle = *new_texture;
texture_cache.emplace(texture_path, std::move(new_texture));
return raw_handle;
}
// If loading fails, return the default texture if it exists
LOG_WARNING(Render_Vulkan, "Failed to load texture: {}, using default", texture_path);
if (default_texture.has_value()) {
return *(*default_texture);
}
return VK_NULL_HANDLE;
}
void TextureManager::ReloadTexture(const std::string& texture_path) {
std::lock_guard<std::mutex> lock(texture_mutex);
// Remove the texture from cache if it exists
auto it = texture_cache.find(texture_path);
if (it != texture_cache.end()) {
LOG_INFO(Render_Vulkan, "Reloading texture: {}", texture_path);
texture_cache.erase(it);
}
// The texture will be reloaded on next GetTexture call
}
bool TextureManager::IsTextureLoadedCorrectly(VkImage texture) {
// Check if the texture handle is valid
static const VkImage null_handle = VK_NULL_HANDLE;
return texture != null_handle;
}
void TextureManager::CleanupTextureCache() {
std::lock_guard<std::mutex> lock(texture_mutex);
// TODO: track usage and remove unused textures [ZEP]
LOG_INFO(Render_Vulkan, "Handling texture cache cleanup, current size: {}", texture_cache.size());
}
void TextureManager::HandleTextureRendering(const std::string& texture_path,
std::function<void(VkImage)> render_callback) {
VkImage texture = GetTexture(texture_path);
if (!IsTextureLoadedCorrectly(texture)) {
LOG_ERROR(Render_Vulkan, "Texture failed to load correctly: {}, attempting reload", texture_path);
ReloadTexture(texture_path);
texture = GetTexture(texture_path);
}
// Execute the rendering callback with the texture
render_callback(texture);
}
vk::Image TextureManager::LoadTexture(const std::string& texture_path) {
// TODO: load image data from disk
// and create a proper Vulkan texture [ZEP]
if (!std::filesystem::exists(texture_path)) {
LOG_ERROR(Render_Vulkan, "Texture file not found: {}", texture_path);
return {};
}
try {
LOG_INFO(Render_Vulkan, "Loaded texture: {}", texture_path);
// TODO: create an actual VkImage [ZEP]
return CreateDefaultTexture();
} catch (const std::exception& e) {
LOG_ERROR(Render_Vulkan, "Error loading texture {}: {}", texture_path, e.what());
return {};
}
}
vk::Image TextureManager::CreateDefaultTexture() {
// Create a small default texture (1x1 pixel) to use as a fallback
// const VkExtent2D extent{1, 1};
// Create image
// Avoid unused variable warning by commenting out the unused struct
// VkImageCreateInfo image_ci{
// .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
// .pNext = nullptr,
// .flags = 0,
// .imageType = VK_IMAGE_TYPE_2D,
// .format = texture_format,
// .extent = {extent.width, extent.height, 1},
// .mipLevels = 1,
// .arrayLayers = 1,
// .samples = VK_SAMPLE_COUNT_1_BIT,
// .tiling = VK_IMAGE_TILING_OPTIMAL,
// .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
// .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
// .queueFamilyIndexCount = 0,
// .pQueueFamilyIndices = nullptr,
// .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
// };
// TODO: create an actual VkImage [ZEP]
LOG_INFO(Render_Vulkan, "Created default fallback texture");
return {};
}
} // namespace Vulkan

View file

@ -1,57 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <mutex>
#include <string>
#include <unordered_map>
#include <functional>
#include <atomic>
#include <optional>
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
class Device;
class MemoryAllocator;
// Enhanced texture manager for better error handling and thread safety
class TextureManager {
public:
explicit TextureManager(const Device& device, MemoryAllocator& memory_allocator);
~TextureManager();
// Get a texture from the cache, loading it if necessary
VkImage GetTexture(const std::string& texture_path);
// Force a texture to reload from disk
void ReloadTexture(const std::string& texture_path);
// Check if a texture is loaded correctly
bool IsTextureLoadedCorrectly(VkImage texture);
// Remove old textures from the cache
void CleanupTextureCache();
// Handle texture rendering, with automatic reload if needed
void HandleTextureRendering(const std::string& texture_path,
std::function<void(VkImage)> render_callback);
private:
// Load a texture from disk and create a Vulkan image
vk::Image LoadTexture(const std::string& texture_path);
// Create a default texture to use in case of errors
vk::Image CreateDefaultTexture();
const Device& device;
MemoryAllocator& memory_allocator;
std::mutex texture_mutex;
std::unordered_map<std::string, vk::Image> texture_cache;
std::optional<vk::Image> default_texture;
VkFormat texture_format = VK_FORMAT_B8G8R8A8_SRGB;
};
} // namespace Vulkan

View file

@ -1,446 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include <fstream>
#include <algorithm>
#include "common/logging/log.h"
#include "video_core/vulkan_common/hybrid_memory.h"
#if defined(__linux__) || defined(__ANDROID__)
#include <sys/mman.h>
#include <unistd.h>
#include <poll.h>
#include <sys/syscall.h>
#include <linux/userfaultfd.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#elif defined(_WIN32)
#include <windows.h>
#endif
namespace Vulkan {
void PredictiveReuseManager::RecordUsage(u64 address, u64 size, bool write_access) {
std::lock_guard<std::mutex> guard(mutex);
// Add to history, removing oldest entries if we're past max_history
access_history.push_back({address, size, write_access, current_timestamp++});
if (access_history.size() > max_history) {
access_history.erase(access_history.begin());
}
}
bool PredictiveReuseManager::IsHotRegion(u64 address, u64 size) const {
std::lock_guard<std::mutex> guard(mutex);
// Check if this memory region has been accessed frequently
const u64 end_address = address + size;
int access_count = 0;
for (const auto& access : access_history) {
const u64 access_end = access.address + access.size;
// Check for overlap
if (!(end_address <= access.address || address >= access_end)) {
access_count++;
}
}
// Consider a region "hot" if it has been accessed in at least 10% of recent accesses
return access_count >= static_cast<int>(std::max<size_t>(1, max_history / 10));
}
void PredictiveReuseManager::EvictRegion(u64 address, u64 size) {
std::lock_guard<std::mutex> guard(mutex);
// Remove any history entries that overlap with this region
const u64 end_address = address + size;
access_history.erase(
std::remove_if(access_history.begin(), access_history.end(),
[address, end_address](const MemoryAccess& access) {
const u64 access_end = access.address + access.size;
// Check for overlap
return !(end_address <= access.address || address >= access_end);
}),
access_history.end()
);
}
void PredictiveReuseManager::ClearHistory() {
std::lock_guard<std::mutex> guard(mutex);
access_history.clear();
current_timestamp = 0;
}
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
void FaultManagedAllocator::Touch(size_t addr) {
lru.remove(addr);
lru.push_front(addr);
dirty_set.insert(addr);
}
void FaultManagedAllocator::EnforceLimit() {
while (lru.size() > MaxPages) {
size_t evict = lru.back();
lru.pop_back();
auto it = page_map.find(evict);
if (it != page_map.end()) {
if (dirty_set.count(evict)) {
// Compress and store dirty page before evicting
std::vector<u8> compressed((u8*)it->second, (u8*)it->second + PageSize);
compressed_store[evict] = std::move(compressed);
dirty_set.erase(evict);
}
#if defined(__linux__) || defined(__ANDROID__)
munmap(it->second, PageSize);
#elif defined(_WIN32)
VirtualFree(it->second, 0, MEM_RELEASE);
#endif
page_map.erase(it);
}
}
}
void* FaultManagedAllocator::GetOrAlloc(size_t addr) {
std::lock_guard<std::mutex> guard(lock);
if (page_map.count(addr)) {
Touch(addr);
return page_map[addr];
}
#if defined(__linux__) || defined(__ANDROID__)
void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler");
return nullptr;
}
#elif defined(_WIN32)
void* mem = VirtualAlloc(nullptr, PageSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
if (!mem) {
LOG_ERROR(Render_Vulkan, "Failed to VirtualAlloc memory for fault handler");
return nullptr;
}
#endif
if (compressed_store.count(addr)) {
// Decompress stored page data
std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size());
compressed_store.erase(addr);
} else {
std::memset(mem, 0, PageSize);
}
page_map[addr] = mem;
lru.push_front(addr);
dirty_set.insert(addr);
EnforceLimit();
return mem;
}
#if defined(_WIN32)
// Static member initialization
FaultManagedAllocator* FaultManagedAllocator::current_instance = nullptr;
LONG WINAPI FaultManagedAllocator::VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info) {
// Only handle access violations (page faults)
if (exception_info->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) {
return EXCEPTION_CONTINUE_SEARCH;
}
if (!current_instance) {
return EXCEPTION_CONTINUE_SEARCH;
}
// Get the faulting address - use ULONG_PTR for Windows
const ULONG_PTR fault_addr = static_cast<ULONG_PTR>(exception_info->ExceptionRecord->ExceptionInformation[1]);
const ULONG_PTR base_addr = reinterpret_cast<ULONG_PTR>(current_instance->base_address);
// Check if the address is within our managed range
if (fault_addr < base_addr ||
fault_addr >= (base_addr + static_cast<ULONG_PTR>(current_instance->memory_size))) {
return EXCEPTION_CONTINUE_SEARCH;
}
// Calculate the base address of the page
const ULONG_PTR page_addr = fault_addr & ~(static_cast<ULONG_PTR>(PageSize) - 1);
const size_t relative_addr = static_cast<size_t>(page_addr - base_addr);
// Handle the fault by allocating memory
void* page = current_instance->GetOrAlloc(relative_addr);
if (!page) {
return EXCEPTION_CONTINUE_SEARCH;
}
// Copy the page data to the faulting address
DWORD old_protect;
void* target_addr = reinterpret_cast<void*>(page_addr);
// Make the target page writable
if (VirtualProtect(target_addr, PageSize, PAGE_READWRITE, &old_protect)) {
std::memcpy(target_addr, page, PageSize);
// Restore original protection
VirtualProtect(target_addr, PageSize, old_protect, &old_protect);
return EXCEPTION_CONTINUE_EXECUTION;
}
return EXCEPTION_CONTINUE_SEARCH;
}
void FaultManagedAllocator::ExceptionHandlerThread() {
while (running) {
// Sleep to avoid busy waiting
Sleep(10);
}
}
#endif
void FaultManagedAllocator::Initialize(void* base, size_t size) {
#if defined(__linux__) || defined(__ANDROID__)
uffd = static_cast<int>(syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK));
if (uffd < 0) {
LOG_ERROR(Render_Vulkan, "Failed to create userfaultfd, fault handling disabled");
return;
}
struct uffdio_api api = { .api = UFFD_API };
ioctl(uffd, UFFDIO_API, &api);
struct uffdio_register reg = {
.range = { .start = (uintptr_t)base, .len = size },
.mode = UFFDIO_REGISTER_MODE_MISSING
};
if (ioctl(uffd, UFFDIO_REGISTER, &reg) < 0) {
LOG_ERROR(Render_Vulkan, "Failed to register memory range with userfaultfd");
close(uffd);
uffd = -1;
return;
}
running = true;
fault_handler = std::thread(&FaultManagedAllocator::FaultThread, this);
#elif defined(_WIN32)
// Setup Windows memory for fault handling
base_address = base;
memory_size = size;
// Reserve memory range but don't commit it yet - it will be demand-paged
DWORD oldProtect;
VirtualProtect(base, size, PAGE_NOACCESS, &oldProtect);
// Install a vectored exception handler
current_instance = this;
AddVectoredExceptionHandler(1, VectoredExceptionHandler);
running = true;
exception_handler = std::thread(&FaultManagedAllocator::ExceptionHandlerThread, this);
LOG_INFO(Render_Vulkan, "Windows fault-managed memory initialized at {:p}, size: {}",
base, size);
#endif
}
#if defined(__linux__) || defined(__ANDROID__)
void FaultManagedAllocator::FaultThread() {
struct pollfd pfd = { uffd, POLLIN, 0 };
while (running) {
if (poll(&pfd, 1, 10) > 0) {
struct uffd_msg msg;
read(uffd, &msg, sizeof(msg));
if (msg.event == UFFD_EVENT_PAGEFAULT) {
size_t addr = msg.arg.pagefault.address & ~(PageSize - 1);
void* page = GetOrAlloc(addr);
if (page) {
struct uffdio_copy copy = {
.dst = (uintptr_t)addr,
.src = (uintptr_t)page,
.len = PageSize,
.mode = 0
};
ioctl(uffd, UFFDIO_COPY, &copy);
}
}
}
}
}
#endif
void* FaultManagedAllocator::Translate(size_t addr) {
std::lock_guard<std::mutex> guard(lock);
size_t base = addr & ~(PageSize - 1);
if (!page_map.count(base)) {
return nullptr;
}
Touch(base);
return (u8*)page_map[base] + (addr % PageSize);
}
void FaultManagedAllocator::SaveSnapshot(const std::string& path) {
std::lock_guard<std::mutex> guard(lock);
std::ofstream out(path, std::ios::binary);
if (!out) {
LOG_ERROR(Render_Vulkan, "Failed to open snapshot file for writing: {}", path);
return;
}
for (auto& [addr, mem] : page_map) {
out.write(reinterpret_cast<const char*>(&addr), sizeof(addr));
out.write(reinterpret_cast<const char*>(mem), PageSize);
}
LOG_INFO(Render_Vulkan, "Saved memory snapshot to {}", path);
}
void FaultManagedAllocator::SaveDifferentialSnapshot(const std::string& path) {
std::lock_guard<std::mutex> guard(lock);
std::ofstream out(path, std::ios::binary);
if (!out) {
LOG_ERROR(Render_Vulkan, "Failed to open diff snapshot file for writing: {}", path);
return;
}
size_t dirty_count = 0;
for (const auto& addr : dirty_set) {
if (page_map.count(addr)) {
out.write(reinterpret_cast<const char*>(&addr), sizeof(addr));
out.write(reinterpret_cast<const char*>(page_map[addr]), PageSize);
dirty_count++;
}
}
LOG_INFO(Render_Vulkan, "Saved differential snapshot to {} ({} dirty pages)",
path, dirty_count);
}
void FaultManagedAllocator::ClearDirtySet() {
std::lock_guard<std::mutex> guard(lock);
dirty_set.clear();
LOG_DEBUG(Render_Vulkan, "Cleared dirty page tracking");
}
FaultManagedAllocator::~FaultManagedAllocator() {
running = false;
#if defined(__linux__) || defined(__ANDROID__)
if (fault_handler.joinable()) {
fault_handler.join();
}
for (auto& [addr, mem] : page_map) {
munmap(mem, PageSize);
}
if (uffd != -1) {
close(uffd);
}
#elif defined(_WIN32)
if (exception_handler.joinable()) {
exception_handler.join();
}
// Remove the vectored exception handler
RemoveVectoredExceptionHandler(VectoredExceptionHandler);
current_instance = nullptr;
for (auto& [addr, mem] : page_map) {
VirtualFree(mem, 0, MEM_RELEASE);
}
// Free the base memory if needed
if (base_address) {
VirtualFree(base_address, 0, MEM_RELEASE);
base_address = nullptr;
}
#endif
}
#endif // defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, size_t reuse_history)
: device(device_), memory_allocator(allocator), reuse_manager(reuse_history) {
}
HybridMemory::~HybridMemory() = default;
void HybridMemory::InitializeGuestMemory(void* base, size_t size) {
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
fmaa.Initialize(base, size);
LOG_INFO(Render_Vulkan, "Initialized fault-managed guest memory at {:p}, size: {}",
base, size);
#else
LOG_INFO(Render_Vulkan, "Fault-managed memory not supported on this platform");
#endif
}
void* HybridMemory::TranslateAddress(size_t addr) {
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
return fmaa.Translate(addr);
#else
return nullptr;
#endif
}
ComputeBuffer HybridMemory::CreateComputeBuffer(VkDeviceSize size, VkBufferUsageFlags usage,
MemoryUsage memory_type) {
ComputeBuffer buffer;
buffer.size = size;
VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = size,
.usage = usage | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
// Using CreateBuffer directly handles memory allocation internally
buffer.buffer = memory_allocator.CreateBuffer(buffer_ci, memory_type);
LOG_DEBUG(Render_Vulkan, "Created compute buffer: size={}, usage={:x}",
size, usage);
return buffer;
}
void HybridMemory::SaveSnapshot(const std::string& path) {
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
fmaa.SaveSnapshot(path);
#else
LOG_ERROR(Render_Vulkan, "Memory snapshots not supported on this platform");
#endif
}
void HybridMemory::SaveDifferentialSnapshot(const std::string& path) {
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
fmaa.SaveDifferentialSnapshot(path);
#else
LOG_ERROR(Render_Vulkan, "Differential memory snapshots not supported on this platform");
#endif
}
void HybridMemory::ResetDirtyTracking() {
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
fmaa.ClearDirtySet();
#endif
}
} // namespace Vulkan

View file

@ -1,119 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <memory>
#include <string>
#include <vector>
#include <unordered_map>
#include <mutex>
#include <atomic>
#include <functional>
#include <list>
#include <set>
#include <map>
#include <thread>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
struct ComputeBuffer {
vk::Buffer buffer{};
VkDeviceSize size = 0;
};
class PredictiveReuseManager {
public:
explicit PredictiveReuseManager(size_t history_size) : max_history{history_size} {}
void RecordUsage(u64 address, u64 size, bool write_access);
bool IsHotRegion(u64 address, u64 size) const;
void EvictRegion(u64 address, u64 size);
void ClearHistory();
private:
struct MemoryAccess {
u64 address;
u64 size;
bool write_access;
u64 timestamp;
};
std::vector<MemoryAccess> access_history;
const size_t max_history;
u64 current_timestamp{0};
mutable std::mutex mutex;
};
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
class FaultManagedAllocator {
public:
static constexpr size_t PageSize = 0x1000;
static constexpr size_t MaxPages = 16384;
void Initialize(void* base, size_t size);
void* Translate(size_t addr);
void SaveSnapshot(const std::string& path);
void SaveDifferentialSnapshot(const std::string& path);
void ClearDirtySet();
~FaultManagedAllocator();
private:
std::map<size_t, void*> page_map;
std::list<size_t> lru;
std::set<size_t> dirty_set;
std::unordered_map<size_t, std::vector<u8>> compressed_store;
std::mutex lock;
#if defined(__linux__) || defined(__ANDROID__)
int uffd = -1;
std::atomic<bool> running{false};
std::thread fault_handler;
void FaultThread();
#elif defined(_WIN32)
void* base_address = nullptr;
size_t memory_size = 0;
HANDLE exception_port = nullptr;
std::atomic<bool> running{false};
std::thread exception_handler;
void ExceptionHandlerThread();
static LONG WINAPI VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info);
static FaultManagedAllocator* current_instance;
#endif
void Touch(size_t addr);
void EnforceLimit();
void* GetOrAlloc(size_t addr);
};
#endif
class HybridMemory {
public:
explicit HybridMemory(const Device& device, MemoryAllocator& allocator, size_t reuse_history = 32);
~HybridMemory();
void InitializeGuestMemory(void* base, size_t size);
void* TranslateAddress(size_t addr);
ComputeBuffer CreateComputeBuffer(VkDeviceSize size, VkBufferUsageFlags usage, MemoryUsage memory_type);
void SaveSnapshot(const std::string& path);
void SaveDifferentialSnapshot(const std::string& path);
void ResetDirtyTracking();
private:
const Device& device;
MemoryAllocator& memory_allocator;
PredictiveReuseManager reuse_manager;
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
FaultManagedAllocator fmaa;
#endif
};
} // namespace Vulkan