mirror of
https://git.citron-emu.org/citron/emu
synced 2025-05-12 19:00:37 +01:00
revert: video_core changes reverted due to instability
Reverted video_core changes as they were causing instability and OpenGL had no video output. The following commits were reverted:1fd5fefcb1
edfb500ee7
b25c7653e6
5d952717ff
964bbf489a
a4088f3a1e
18def48dfe
3205c9b691
2f57a35d2d
f706427815
fc88c06769
Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
parent
ba98d0f15c
commit
03aab9becc
29 changed files with 180 additions and 3016 deletions
|
@ -19,9 +19,9 @@ cmake .. \
|
|||
-DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON \
|
||||
-DENABLE_QT_TRANSLATION=ON \
|
||||
-DUSE_DISCORD_PRESENCE=ON \
|
||||
-DYUZU_CRASH_DUMPS=ON \
|
||||
-DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} \
|
||||
-DYUZU_USE_BUNDLED_FFMPEG=ON \
|
||||
-DCITRON_CRASH_DUMPS=ON \
|
||||
-DCITRON_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} \
|
||||
-DCITRON_USE_BUNDLED_FFMPEG=ON \
|
||||
-GNinja
|
||||
|
||||
ninja
|
||||
|
|
|
@ -11,7 +11,7 @@ gcc -v
|
|||
tx --version
|
||||
|
||||
mkdir build && cd build
|
||||
cmake .. -DENABLE_QT_TRANSLATION=ON -DGENERATE_QT_TRANSLATION=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_SDL2=OFF -DYUZU_TESTS=OFF -DYUZU_USE_BUNDLED_VCPKG=ON
|
||||
cmake .. -DENABLE_QT_TRANSLATION=ON -DGENERATE_QT_TRANSLATION=ON -DCMAKE_BUILD_TYPE=Release -DENABLE_SDL2=OFF -DCITRON_TESTS=OFF -DCITRON_USE_BUNDLED_VCPKG=ON
|
||||
make translation
|
||||
cd ..
|
||||
|
||||
|
|
|
@ -17,9 +17,9 @@ cmake .. \
|
|||
-DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON \
|
||||
-DENABLE_QT_TRANSLATION=ON \
|
||||
-DUSE_CCACHE=ON \
|
||||
-DYUZU_USE_BUNDLED_SDL2=OFF \
|
||||
-DYUZU_USE_EXTERNAL_SDL2=OFF \
|
||||
-DYUZU_TESTS=OFF \
|
||||
-DCITRON_USE_BUNDLED_SDL2=OFF \
|
||||
-DCITRON_USE_EXTERNAL_SDL2=OFF \
|
||||
-DCITRON_TESTS=OFF \
|
||||
-GNinja
|
||||
ninja citron citron-cmd
|
||||
|
||||
|
|
|
@ -75,11 +75,6 @@ void QtConfig::ReadQtValues() {
|
|||
ReadUIValues();
|
||||
}
|
||||
ReadQtControlValues();
|
||||
|
||||
// Always disable memory snapshots and hybrid memory
|
||||
Settings::values.use_gpu_memory_manager.SetValue(false);
|
||||
Settings::values.enable_memory_snapshots.SetValue(false);
|
||||
Settings::values.use_nce.SetValue(false);
|
||||
}
|
||||
|
||||
void QtConfig::ReadQtPlayerValues(const std::size_t player_index) {
|
||||
|
@ -341,11 +336,6 @@ void QtConfig::SaveQtValues() {
|
|||
}
|
||||
SaveQtControlValues();
|
||||
|
||||
// Ensure memory snapshots and hybrid memory are always disabled
|
||||
Settings::values.use_gpu_memory_manager.SetValue(false);
|
||||
Settings::values.enable_memory_snapshots.SetValue(false);
|
||||
Settings::values.use_nce.SetValue(false);
|
||||
|
||||
WriteToIni();
|
||||
}
|
||||
|
||||
|
|
|
@ -760,17 +760,6 @@ Widget::Widget(Settings::BasicSetting* setting_, const TranslationMap& translati
|
|||
enable &= setting.UsingGlobal();
|
||||
}
|
||||
|
||||
// Disable memory snapshot and hybrid memory checkboxes
|
||||
if (static_cast<u32>(id) == Settings::values.use_gpu_memory_manager.Id() ||
|
||||
static_cast<u32>(id) == Settings::values.enable_memory_snapshots.Id() ||
|
||||
static_cast<u32>(id) == Settings::values.use_nce.Id()) {
|
||||
enable = false;
|
||||
// Also disable the checkbox to prevent it from being changed
|
||||
if (checkbox) {
|
||||
checkbox->setEnabled(false);
|
||||
}
|
||||
}
|
||||
|
||||
this->setEnabled(enable);
|
||||
|
||||
this->setToolTip(tooltip);
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
@ -212,11 +211,6 @@ struct Values {
|
|||
true,
|
||||
true,
|
||||
&use_speed_limit};
|
||||
SwitchableSetting<bool> use_nce{linkage, true, "Use Native Code Execution", Category::Core};
|
||||
|
||||
// Memory
|
||||
SwitchableSetting<bool> use_gpu_memory_manager{linkage, false, "Use GPU Memory Manager", Category::Core};
|
||||
SwitchableSetting<bool> enable_memory_snapshots{linkage, false, "Enable Memory Snapshots", Category::Core};
|
||||
|
||||
// Cpu
|
||||
SwitchableSetting<CpuBackend, true> cpu_backend{linkage,
|
||||
|
@ -401,11 +395,11 @@ struct Values {
|
|||
Category::RendererAdvanced};
|
||||
SwitchableSetting<bool> async_presentation{linkage,
|
||||
#ifdef ANDROID
|
||||
false, // Disabled due to instability causing crashes
|
||||
true,
|
||||
#else
|
||||
false, // Disabled due to instability causing crashes
|
||||
false,
|
||||
#endif
|
||||
"async_presentation", Category::RendererAdvanced}; // Hidden from UI due to instability
|
||||
"async_presentation", Category::RendererAdvanced};
|
||||
SwitchableSetting<bool> renderer_force_max_clock{linkage, false, "force_max_clock",
|
||||
Category::RendererAdvanced};
|
||||
SwitchableSetting<bool> use_reactive_flushing{linkage,
|
||||
|
@ -618,30 +612,24 @@ struct Values {
|
|||
Category::Network};
|
||||
|
||||
// WebService
|
||||
Setting<bool> enable_telemetry{linkage, true, "enable_telemetry", Category::WebService};
|
||||
Setting<std::string> web_api_url{linkage, "api.ynet-fun.xyz", "web_api_url",
|
||||
Setting<bool> enable_telemetry{linkage, false, "enable_telemetry", Category::WebService};
|
||||
Setting<std::string> web_api_url{linkage, "https://api.ynet-fun.xyz", "web_api_url",
|
||||
Category::WebService};
|
||||
Setting<std::string> citron_username{linkage, std::string(), "citron_username",
|
||||
Category::WebService};
|
||||
Setting<std::string> citron_token{linkage, std::string(), "citron_token", Category::WebService};
|
||||
|
||||
// Memory
|
||||
Setting<bool> use_gpu_memory_manager{linkage, false, "use_gpu_memory_manager", Category::System};
|
||||
Setting<bool> enable_memory_snapshots{linkage, false, "enable_memory_snapshots", Category::System};
|
||||
|
||||
// Add-Ons
|
||||
std::map<u64, std::vector<std::string>> disabled_addons;
|
||||
|
||||
// Renderer Advanced Settings
|
||||
SwitchableSetting<bool> use_enhanced_shader_building{linkage, false, "Enhanced Shader Building",
|
||||
Category::RendererAdvanced};
|
||||
|
||||
// Add a new setting for shader compilation priority
|
||||
SwitchableSetting<int> shader_compilation_priority{linkage, 0, "Shader Compilation Priority",
|
||||
Category::RendererAdvanced};
|
||||
};
|
||||
|
||||
extern Values values;
|
||||
|
||||
void UpdateGPUAccuracy();
|
||||
// boold isGPULevelNormal();
|
||||
// TODO: ZEP
|
||||
bool IsGPULevelExtreme();
|
||||
bool IsGPULevelHigh();
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# SPDX-FileCopyrightText: 2018 yuzu Emulator Project
|
||||
# SPDX-FileCopyrightText: 2025 Citron Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
add_subdirectory(host_shaders)
|
||||
|
@ -248,8 +247,6 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/vk_turbo_mode.h
|
||||
renderer_vulkan/vk_update_descriptor.cpp
|
||||
renderer_vulkan/vk_update_descriptor.h
|
||||
renderer_vulkan/vk_texture_manager.cpp
|
||||
renderer_vulkan/vk_texture_manager.h
|
||||
shader_cache.cpp
|
||||
shader_cache.h
|
||||
shader_environment.cpp
|
||||
|
@ -309,8 +306,6 @@ add_library(video_core STATIC
|
|||
vulkan_common/vulkan_library.h
|
||||
vulkan_common/vulkan_memory_allocator.cpp
|
||||
vulkan_common/vulkan_memory_allocator.h
|
||||
vulkan_common/hybrid_memory.cpp
|
||||
vulkan_common/hybrid_memory.h
|
||||
vulkan_common/vulkan_surface.cpp
|
||||
vulkan_common/vulkan_surface.h
|
||||
vulkan_common/vulkan_wrapper.cpp
|
||||
|
@ -397,4 +392,8 @@ if (ANDROID AND ARCHITECTURE_arm64)
|
|||
target_link_libraries(video_core PRIVATE adrenotools)
|
||||
endif()
|
||||
|
||||
if (ARCHITECTURE_arm64)
|
||||
target_link_libraries(video_core PRIVATE sse2neon)
|
||||
endif()
|
||||
|
||||
create_target_directory_groups(video_core)
|
||||
|
|
|
@ -1,13 +1,10 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
|
||||
#include "common/settings.h" // for enum class Settings::ShaderBackend
|
||||
#include "common/thread_worker.h"
|
||||
|
@ -237,68 +234,26 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
|
|||
auto func{[this, sources_ = std::move(sources), sources_spirv_ = std::move(sources_spirv),
|
||||
shader_notify, backend, in_parallel,
|
||||
force_context_flush](ShaderContext::Context*) mutable {
|
||||
// Track time for shader compilation for possible performance tuning
|
||||
const auto start_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Prepare compilation steps for all shader stages
|
||||
std::vector<std::function<void()>> compilation_steps;
|
||||
compilation_steps.reserve(5); // Maximum number of shader stages
|
||||
|
||||
// Prepare all compilation steps first to better distribute work
|
||||
for (size_t stage = 0; stage < 5; ++stage) {
|
||||
switch (backend) {
|
||||
case Settings::ShaderBackend::Glsl:
|
||||
if (!sources_[stage].empty()) {
|
||||
compilation_steps.emplace_back([this, stage, source = sources_[stage]]() {
|
||||
source_programs[stage] = CreateProgram(source, Stage(stage));
|
||||
});
|
||||
source_programs[stage] = CreateProgram(sources_[stage], Stage(stage));
|
||||
}
|
||||
break;
|
||||
case Settings::ShaderBackend::Glasm:
|
||||
if (!sources_[stage].empty()) {
|
||||
compilation_steps.emplace_back([this, stage, source = sources_[stage]]() {
|
||||
assembly_programs[stage] = CompileProgram(source, AssemblyStage(stage));
|
||||
});
|
||||
assembly_programs[stage] =
|
||||
CompileProgram(sources_[stage], AssemblyStage(stage));
|
||||
}
|
||||
break;
|
||||
case Settings::ShaderBackend::SpirV:
|
||||
if (!sources_spirv_[stage].empty()) {
|
||||
compilation_steps.emplace_back([this, stage, source = sources_spirv_[stage]]() {
|
||||
source_programs[stage] = CreateProgram(source, Stage(stage));
|
||||
});
|
||||
source_programs[stage] = CreateProgram(sources_spirv_[stage], Stage(stage));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we're running in parallel, use high-priority execution for vertex and fragment shaders
|
||||
// as these are typically needed first by the renderer
|
||||
if (in_parallel && compilation_steps.size() > 1) {
|
||||
// Execute vertex (0) and fragment (4) shaders first if they exist
|
||||
for (size_t priority_stage : {0, 4}) {
|
||||
for (size_t i = 0; i < compilation_steps.size(); ++i) {
|
||||
if ((i == priority_stage || (priority_stage == 0 && i <= 1)) && i < compilation_steps.size()) {
|
||||
compilation_steps[i]();
|
||||
compilation_steps[i] = [](){}; // Mark as executed
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Execute all remaining compilation steps
|
||||
for (auto& step : compilation_steps) {
|
||||
step(); // Will do nothing for already executed steps
|
||||
}
|
||||
|
||||
// Performance measurement for possible logging or optimization
|
||||
const auto end_time = std::chrono::high_resolution_clock::now();
|
||||
const auto compilation_time = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
end_time - start_time).count();
|
||||
|
||||
if (compilation_time > 50) { // Only log slow compilations
|
||||
LOG_DEBUG(Render_OpenGL, "Shader compilation took {}ms", compilation_time);
|
||||
}
|
||||
|
||||
if (force_context_flush || in_parallel) {
|
||||
std::scoped_lock lock{built_mutex};
|
||||
built_fence.Create();
|
||||
|
@ -668,41 +623,15 @@ void GraphicsPipeline::WaitForBuild() {
|
|||
is_built = true;
|
||||
}
|
||||
|
||||
bool GraphicsPipeline::IsBuilt() const noexcept {
|
||||
bool GraphicsPipeline::IsBuilt() noexcept {
|
||||
if (is_built) {
|
||||
return true;
|
||||
}
|
||||
if (!built_fence.handle) {
|
||||
if (built_fence.handle == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the async build has finished by polling the fence
|
||||
const GLsync sync = built_fence.handle;
|
||||
const GLuint result = glClientWaitSync(sync, 0, 0);
|
||||
if (result == GL_ALREADY_SIGNALED || result == GL_CONDITION_SATISFIED) {
|
||||
// Mark this as mutable even though we're in a const method - this is
|
||||
// essentially a cached value update which is acceptable
|
||||
const_cast<GraphicsPipeline*>(this)->is_built = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// For better performance tracking, capture time spent waiting for shaders
|
||||
static thread_local std::chrono::high_resolution_clock::time_point last_shader_wait_log;
|
||||
static thread_local u32 shader_wait_count = 0;
|
||||
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
|
||||
now - last_shader_wait_log).count();
|
||||
|
||||
// Log shader compilation status periodically to help diagnose performance issues
|
||||
if (elapsed >= 5) { // Log every 5 seconds
|
||||
shader_wait_count++;
|
||||
LOG_DEBUG(Render_OpenGL, "Waiting for async shader compilation... (count={})",
|
||||
shader_wait_count);
|
||||
last_shader_wait_log = now;
|
||||
}
|
||||
|
||||
return false;
|
||||
is_built = built_fence.IsSignaled();
|
||||
return is_built;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
@ -103,7 +102,7 @@ public:
|
|||
return uses_local_memory;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsBuilt() const noexcept;
|
||||
[[nodiscard]] bool IsBuilt() noexcept;
|
||||
|
||||
template <typename Spec>
|
||||
static auto MakeConfigureSpecFunc() {
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <atomic>
|
||||
|
@ -392,118 +391,18 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
|
|||
if (!use_asynchronous_shaders) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Advanced heuristics for smarter async shader compilation in OpenGL
|
||||
|
||||
// Track shader compilation statistics
|
||||
static thread_local u32 async_shader_count = 0;
|
||||
static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log;
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Enhanced detection of UI and critical shaders
|
||||
const bool is_ui_shader = !maxwell3d->regs.zeta_enable;
|
||||
// Check for blend state
|
||||
const bool has_blend = maxwell3d->regs.blend.enable[0] != 0;
|
||||
// Check if texture sampling is likely based on texture units used
|
||||
const bool has_texture = maxwell3d->regs.tex_header.Address() != 0;
|
||||
// Check for clear operations
|
||||
const bool is_clear_operation = maxwell3d->regs.clear_surface.raw != 0;
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
const bool small_draw = draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6;
|
||||
|
||||
// Track pipeline usage patterns for better prediction
|
||||
// Use pipeline address as hash since we don't have a Hash() method
|
||||
const u64 draw_config_hash = reinterpret_cast<u64>(pipeline);
|
||||
static thread_local std::unordered_map<u64, u32> shader_usage_count;
|
||||
static thread_local std::unordered_map<u64, bool> shader_is_frequent;
|
||||
|
||||
// Increment usage counter for this shader
|
||||
shader_usage_count[draw_config_hash]++;
|
||||
|
||||
// After a certain threshold, mark as frequently used
|
||||
if (shader_usage_count[draw_config_hash] >= 3) {
|
||||
shader_is_frequent[draw_config_hash] = true;
|
||||
}
|
||||
|
||||
// Get shader priority from settings
|
||||
const int shader_priority = Settings::values.shader_compilation_priority.GetValue();
|
||||
|
||||
// Always wait for UI shaders if settings specify high priority
|
||||
if (is_ui_shader && (shader_priority >= 0 || small_draw)) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Wait for frequently used small draw shaders
|
||||
if (small_draw && shader_is_frequent[draw_config_hash]) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Wait for clear operations as they're usually critical
|
||||
if (is_clear_operation) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Force wait if high shader priority in settings
|
||||
if (shader_priority > 1) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Improved depth-based heuristics
|
||||
// If something is using depth, we can assume that games are not rendering anything which
|
||||
// will be used one time.
|
||||
if (maxwell3d->regs.zeta_enable) {
|
||||
// Check if this is likely a shadow map or important depth-based effect
|
||||
// Check if depth write is enabled and color writes are disabled for all render targets
|
||||
bool depth_only_pass = maxwell3d->regs.depth_write_enabled;
|
||||
if (depth_only_pass) {
|
||||
bool all_color_masked = true;
|
||||
for (size_t i = 0; i < maxwell3d->regs.color_mask.size(); i++) {
|
||||
// Check if any color component is enabled (R, G, B, A fields of ColorMask)
|
||||
if ((maxwell3d->regs.color_mask[i].raw & 0x1111) != 0) {
|
||||
all_color_masked = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If depth write enabled and all colors masked, this is likely a shadow pass
|
||||
if (all_color_masked) {
|
||||
// Likely a shadow pass, wait for compilation to avoid flickering shadows
|
||||
return pipeline;
|
||||
}
|
||||
}
|
||||
|
||||
// For other depth-enabled renders, use async compilation
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Refined small draw detection
|
||||
if (small_draw) {
|
||||
// Check if this might be a UI element that we missed
|
||||
if (has_blend && has_texture) {
|
||||
// Likely a textured UI element, wait for it
|
||||
// If games are using a small index count, we can assume these are full screen quads.
|
||||
// Usually these shaders are only used once for building textures so we can assume they
|
||||
// can't be built async
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
|
||||
return pipeline;
|
||||
}
|
||||
// For other small draws, assume they're one-off effects
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Log compilation statistics periodically
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
|
||||
now - last_async_shader_log).count();
|
||||
|
||||
if (elapsed >= 10) {
|
||||
async_shader_count = 0;
|
||||
last_async_shader_log = now;
|
||||
}
|
||||
async_shader_count++;
|
||||
|
||||
if (async_shader_count % 100 == 1) {
|
||||
float progress = 0.5f; // Default to 50% when we can't determine actual progress
|
||||
if (workers) {
|
||||
// TODO: Implement progress tracking
|
||||
}
|
||||
LOG_DEBUG(Render_OpenGL, "Async shader compilation in progress (count={}), completion={:.1f}%",
|
||||
async_shader_count, progress * 100.0f);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -709,33 +608,9 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
|
|||
}
|
||||
|
||||
std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
|
||||
// Calculate optimal number of workers based on available CPU cores
|
||||
// Leave at least 1 core for main thread and other operations
|
||||
// Use more cores for more parallelism in shader compilation
|
||||
const u32 num_worker_threads = std::max(std::thread::hardware_concurrency(), 2U);
|
||||
const u32 optimal_workers = num_worker_threads <= 3 ?
|
||||
num_worker_threads - 1 : // On dual/quad core, leave 1 core free
|
||||
num_worker_threads - 2; // On 6+ core systems, leave 2 cores free for other tasks
|
||||
|
||||
auto worker = std::make_unique<ShaderWorker>(
|
||||
optimal_workers,
|
||||
return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
|
||||
"GlShaderBuilder",
|
||||
[this] {
|
||||
auto context = Context{emu_window};
|
||||
|
||||
// Apply thread priority based on settings
|
||||
// This allows users to control how aggressive shader compilation is
|
||||
const int priority = Settings::values.shader_compilation_priority.GetValue();
|
||||
if (priority != 0) {
|
||||
Common::SetCurrentThreadPriority(
|
||||
priority > 0 ? Common::ThreadPriority::High : Common::ThreadPriority::Low);
|
||||
}
|
||||
|
||||
return context;
|
||||
}
|
||||
);
|
||||
|
||||
return worker;
|
||||
[this] { return Context{emu_window}; });
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -151,7 +151,7 @@ FramebufferTextureInfo Layer::LoadFBToScreenInfo(const Tegra::FramebufferConfig&
|
|||
// Update existing texture
|
||||
// TODO: Test what happens on hardware when you change the framebuffer dimensions so that
|
||||
// they differ from the LCD resolution.
|
||||
// TODO: Applications could theoretically crash citron here by specifying too large
|
||||
// TODO: Applications could theoretically crash yuzu here by specifying too large
|
||||
// framebuffer sizes. We should make sure that this cannot happen.
|
||||
glTextureSubImage2D(framebuffer_texture.resource.handle, 0, 0, 0, framebuffer.width,
|
||||
framebuffer.height, framebuffer_texture.gl_format,
|
||||
|
|
|
@ -71,7 +71,7 @@ const char* GetType(GLenum type) {
|
|||
|
||||
void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
|
||||
const GLchar* message, const void* user_param) {
|
||||
const char format[] = "{} {} {}: {}";
|
||||
constexpr const char* format = "{} {} {}: {}";
|
||||
const char* const str_source = GetSource(source);
|
||||
const char* const str_type = GetType(type);
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
|
@ -29,15 +28,6 @@
|
|||
#include "video_core/surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
#include "video_core/host_shaders/convert_abgr8_srgb_to_d24s8_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_rgba8_to_bgra8_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_yuv420_to_rgb_comp_spv.h"
|
||||
#include "video_core/host_shaders/convert_rgb_to_yuv420_comp_spv.h"
|
||||
#include "video_core/host_shaders/convert_bc7_to_rgba8_comp_spv.h"
|
||||
#include "video_core/host_shaders/convert_astc_hdr_to_rgba16f_comp_spv.h"
|
||||
#include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h"
|
||||
#include "video_core/host_shaders/dither_temporal_frag_spv.h"
|
||||
#include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
|
@ -449,15 +439,6 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
|
|||
convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)),
|
||||
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
|
||||
convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
|
||||
convert_abgr8_srgb_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)),
|
||||
convert_rgba_to_bgra_frag(BuildShader(device, CONVERT_RGBA8_TO_BGRA8_FRAG_SPV)),
|
||||
convert_yuv420_to_rgb_comp(BuildShader(device, CONVERT_YUV420_TO_RGB_COMP_SPV)),
|
||||
convert_rgb_to_yuv420_comp(BuildShader(device, CONVERT_RGB_TO_YUV420_COMP_SPV)),
|
||||
convert_bc7_to_rgba8_comp(BuildShader(device, CONVERT_BC7_TO_RGBA8_COMP_SPV)),
|
||||
convert_astc_hdr_to_rgba16f_comp(BuildShader(device, CONVERT_ASTC_HDR_TO_RGBA16F_COMP_SPV)),
|
||||
convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)),
|
||||
dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)),
|
||||
dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)),
|
||||
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
|
||||
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {}
|
||||
|
||||
|
@ -608,14 +589,6 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
|
|||
ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertPipelineDepthTargetEx(convert_abgr8_srgb_to_d24s8_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
convert_abgr8_srgb_to_d24s8_frag);
|
||||
Convert(*convert_abgr8_srgb_to_d24s8_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask,
|
||||
const std::array<f32, 4>& clear_color,
|
||||
const Region2D& dst_region) {
|
||||
|
@ -946,11 +919,13 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline(
|
|||
return *clear_stencil_pipelines.back();
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
|
||||
void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
bool is_target_depth) {
|
||||
if (pipeline) {
|
||||
return;
|
||||
}
|
||||
VkShaderModule frag_shader = *convert_float_to_depth_frag;
|
||||
VkShaderModule frag_shader =
|
||||
is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag;
|
||||
const std::array stages = MakeStages(*full_screen_vert, frag_shader);
|
||||
pipeline = device.GetLogical().CreateGraphicsPipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
|
@ -964,8 +939,9 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend
|
|||
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
|
||||
.pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr,
|
||||
.pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO
|
||||
: &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
|
||||
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.layout = *one_texture_pipeline_layout,
|
||||
.renderPass = renderpass,
|
||||
|
@ -975,33 +951,12 @@ void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRend
|
|||
});
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
|
||||
ConvertPipeline(pipeline, renderpass, false);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
|
||||
if (pipeline) {
|
||||
return;
|
||||
}
|
||||
VkShaderModule frag_shader = *convert_depth_to_float_frag;
|
||||
const std::array stages = MakeStages(*full_screen_vert, frag_shader);
|
||||
pipeline = device.GetLogical().CreateGraphicsPipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stageCount = static_cast<u32>(stages.size()),
|
||||
.pStages = stages.data(),
|
||||
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pTessellationState = nullptr,
|
||||
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
|
||||
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.layout = *one_texture_pipeline_layout,
|
||||
.renderPass = renderpass,
|
||||
.subpass = 0,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
ConvertPipeline(pipeline, renderpass, true);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
|
@ -1044,100 +999,4 @@ void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRen
|
|||
ConvertPipelineEx(pipeline, renderpass, module, true, true);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass,
|
||||
bool is_target_depth) {
|
||||
if (pipeline) {
|
||||
return;
|
||||
}
|
||||
VkShaderModule frag_shader =
|
||||
is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag;
|
||||
const std::array stages = MakeStages(*full_screen_vert, frag_shader);
|
||||
pipeline = device.GetLogical().CreateGraphicsPipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stageCount = static_cast<u32>(stages.size()),
|
||||
.pStages = stages.data(),
|
||||
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pTessellationState = nullptr,
|
||||
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr,
|
||||
.pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO
|
||||
: &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
|
||||
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.layout = *one_texture_pipeline_layout,
|
||||
.renderPass = renderpass,
|
||||
.subpass = 0,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertRGBAtoGBRA(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertPipeline(convert_rgba_to_bgra_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
false);
|
||||
Convert(*convert_rgba_to_bgra_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertYUV420toRGB(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertPipeline(convert_yuv420_to_rgb_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
false);
|
||||
Convert(*convert_yuv420_to_rgb_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertRGBtoYUV420(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertPipeline(convert_rgb_to_yuv420_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
false);
|
||||
Convert(*convert_rgb_to_yuv420_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertBC7toRGBA8(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertPipeline(convert_bc7_to_rgba8_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
false);
|
||||
Convert(*convert_bc7_to_rgba8_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertASTCHDRtoRGBA16F(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertPipeline(convert_astc_hdr_to_rgba16f_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
false);
|
||||
Convert(*convert_astc_hdr_to_rgba16f_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertPipeline(convert_rgba16f_to_rgba8_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
false);
|
||||
Convert(*convert_rgba16f_to_rgba8_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ApplyDitherTemporal(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertPipeline(dither_temporal_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
false);
|
||||
Convert(*dither_temporal_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertPipeline(dynamic_resolution_scale_pipeline,
|
||||
dst_framebuffer->RenderPass(),
|
||||
false);
|
||||
Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
@ -68,8 +67,6 @@ public:
|
|||
|
||||
void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertD32FToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
|
||||
|
@ -85,15 +82,6 @@ public:
|
|||
u8 stencil_mask, u32 stencil_ref, u32 stencil_compare_mask,
|
||||
const Region2D& dst_region);
|
||||
|
||||
void ConvertRGBAtoGBRA(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
void ConvertYUV420toRGB(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
void ConvertRGBtoYUV420(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
void ConvertBC7toRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
void ConvertASTCHDRtoRGBA16F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
private:
|
||||
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view);
|
||||
|
@ -148,15 +136,6 @@ private:
|
|||
vk::ShaderModule convert_d32f_to_abgr8_frag;
|
||||
vk::ShaderModule convert_d24s8_to_abgr8_frag;
|
||||
vk::ShaderModule convert_s8d24_to_abgr8_frag;
|
||||
vk::ShaderModule convert_abgr8_srgb_to_d24s8_frag;
|
||||
vk::ShaderModule convert_rgba_to_bgra_frag;
|
||||
vk::ShaderModule convert_yuv420_to_rgb_comp;
|
||||
vk::ShaderModule convert_rgb_to_yuv420_comp;
|
||||
vk::ShaderModule convert_bc7_to_rgba8_comp;
|
||||
vk::ShaderModule convert_astc_hdr_to_rgba16f_comp;
|
||||
vk::ShaderModule convert_rgba16f_to_rgba8_frag;
|
||||
vk::ShaderModule dither_temporal_frag;
|
||||
vk::ShaderModule dynamic_resolution_scale_comp;
|
||||
vk::Sampler linear_sampler;
|
||||
vk::Sampler nearest_sampler;
|
||||
|
||||
|
@ -177,15 +156,6 @@ private:
|
|||
vk::Pipeline convert_d32f_to_abgr8_pipeline;
|
||||
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
|
||||
vk::Pipeline convert_s8d24_to_abgr8_pipeline;
|
||||
vk::Pipeline convert_abgr8_srgb_to_d24s8_pipeline;
|
||||
vk::Pipeline convert_rgba_to_bgra_pipeline;
|
||||
vk::Pipeline convert_yuv420_to_rgb_pipeline;
|
||||
vk::Pipeline convert_rgb_to_yuv420_pipeline;
|
||||
vk::Pipeline convert_bc7_to_rgba8_pipeline;
|
||||
vk::Pipeline convert_astc_hdr_to_rgba16f_pipeline;
|
||||
vk::Pipeline convert_rgba16f_to_rgba8_pipeline;
|
||||
vk::Pipeline dither_temporal_pipeline;
|
||||
vk::Pipeline dynamic_resolution_scale_pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
|
@ -9,8 +8,6 @@
|
|||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <filesystem>
|
||||
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
|
@ -38,7 +35,6 @@
|
|||
#include "video_core/vulkan_common/vulkan_instance.h"
|
||||
#include "video_core/vulkan_common/vulkan_library.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/hybrid_memory.h"
|
||||
#include "video_core/vulkan_common/vulkan_surface.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
|
@ -127,93 +123,12 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
|||
PresentFiltersForAppletCapture),
|
||||
rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker,
|
||||
scheduler),
|
||||
hybrid_memory(std::make_unique<HybridMemory>(device, memory_allocator)),
|
||||
texture_manager(device, memory_allocator),
|
||||
shader_manager(device),
|
||||
applet_frame() {
|
||||
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
|
||||
turbo_mode.emplace(instance, dld);
|
||||
scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
|
||||
}
|
||||
|
||||
// Initialize HybridMemory system
|
||||
if (false && Settings::values.use_gpu_memory_manager.GetValue()) {
|
||||
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
|
||||
try {
|
||||
// Define memory size with explicit types to avoid conversion warnings
|
||||
constexpr size_t memory_size_mb = 64;
|
||||
constexpr size_t memory_size_bytes = memory_size_mb * 1024 * 1024;
|
||||
|
||||
void* guest_memory_base = nullptr;
|
||||
#if defined(_WIN32)
|
||||
// On Windows, use VirtualAlloc to reserve (but not commit) memory
|
||||
const SIZE_T win_size = static_cast<SIZE_T>(memory_size_bytes);
|
||||
LPVOID result = VirtualAlloc(nullptr, win_size, MEM_RESERVE, PAGE_NOACCESS);
|
||||
if (result != nullptr) {
|
||||
guest_memory_base = result;
|
||||
}
|
||||
#else
|
||||
// On Linux/Android, use aligned_alloc
|
||||
guest_memory_base = std::aligned_alloc(4096, memory_size_bytes);
|
||||
#endif
|
||||
if (guest_memory_base != nullptr) {
|
||||
try {
|
||||
hybrid_memory->InitializeGuestMemory(guest_memory_base, memory_size_bytes);
|
||||
LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", memory_size_mb);
|
||||
} catch (const std::exception&) {
|
||||
#if defined(_WIN32)
|
||||
if (guest_memory_base != nullptr) {
|
||||
const LPVOID win_ptr = static_cast<LPVOID>(guest_memory_base);
|
||||
VirtualFree(win_ptr, 0, MEM_RELEASE);
|
||||
}
|
||||
#else
|
||||
std::free(guest_memory_base);
|
||||
#endif
|
||||
throw;
|
||||
}
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to initialize HybridMemory: {}", e.what());
|
||||
}
|
||||
#else
|
||||
LOG_INFO(Render_Vulkan, "Fault-managed memory not supported on this platform");
|
||||
#endif
|
||||
}
|
||||
|
||||
// Initialize enhanced shader compilation system
|
||||
shader_manager.SetScheduler(&scheduler);
|
||||
LOG_INFO(Render_Vulkan, "Enhanced shader compilation system initialized");
|
||||
|
||||
// Preload common shaders if enabled
|
||||
if (Settings::values.use_asynchronous_shaders.GetValue()) {
|
||||
// Use a simple shader directory path - can be updated to match Citron's actual path structure
|
||||
const std::string shader_dir = "./shaders";
|
||||
std::vector<std::string> common_shaders;
|
||||
|
||||
// Add paths to common shaders that should be preloaded
|
||||
// These will be compiled in parallel for faster startup
|
||||
try {
|
||||
if (std::filesystem::exists(shader_dir)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(shader_dir)) {
|
||||
if (entry.is_regular_file() && entry.path().extension() == ".spv") {
|
||||
common_shaders.push_back(entry.path().string());
|
||||
}
|
||||
}
|
||||
|
||||
if (!common_shaders.empty()) {
|
||||
LOG_INFO(Render_Vulkan, "Preloading {} common shaders", common_shaders.size());
|
||||
shader_manager.PreloadShaders(common_shaders);
|
||||
}
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Shader directory not found at {}", shader_dir);
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
LOG_ERROR(Render_Vulkan, "Error during shader preloading: {}", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
Report();
|
||||
InitializePlatformSpecific();
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
|
||||
throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())};
|
||||
|
@ -311,35 +226,6 @@ void RendererVulkan::RenderScreenshot(std::span<const Tegra::FramebufferConfig>
|
|||
return;
|
||||
}
|
||||
|
||||
// If memory snapshots are enabled, take a snapshot with the screenshot
|
||||
if (false && Settings::values.enable_memory_snapshots.GetValue() && hybrid_memory) {
|
||||
try {
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
const auto now_time_t = std::chrono::system_clock::to_time_t(now);
|
||||
std::tm local_tm;
|
||||
#ifdef _WIN32
|
||||
localtime_s(&local_tm, &now_time_t);
|
||||
#else
|
||||
localtime_r(&now_time_t, &local_tm);
|
||||
#endif
|
||||
char time_str[128];
|
||||
std::strftime(time_str, sizeof(time_str), "%Y%m%d_%H%M%S", &local_tm);
|
||||
|
||||
std::string snapshot_path = fmt::format("snapshots/memory_snapshot_{}.bin", time_str);
|
||||
hybrid_memory->SaveSnapshot(snapshot_path);
|
||||
|
||||
// Differential snapshot for tracking memory changes
|
||||
if (false && Settings::values.use_gpu_memory_manager.GetValue()) {
|
||||
std::string diff_path = fmt::format("snapshots/diff_snapshot_{}.bin", time_str);
|
||||
hybrid_memory->SaveDifferentialSnapshot(diff_path);
|
||||
hybrid_memory->ResetDirtyTracking();
|
||||
LOG_INFO(Render_Vulkan, "Memory snapshots saved with screenshot");
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to save memory snapshot: {}", e.what());
|
||||
}
|
||||
}
|
||||
|
||||
const auto& layout{renderer_settings.screenshot_framebuffer_layout};
|
||||
const auto dst_buffer = RenderToBuffer(framebuffers, layout, VK_FORMAT_B8G8R8A8_UNORM,
|
||||
layout.width * layout.height * 4);
|
||||
|
@ -391,81 +277,4 @@ void RendererVulkan::RenderAppletCaptureLayer(
|
|||
CaptureFormat);
|
||||
}
|
||||
|
||||
bool RendererVulkan::HandleVulkanError(VkResult result, const std::string& operation) {
|
||||
if (result == VK_SUCCESS) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (result == VK_ERROR_DEVICE_LOST) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Vulkan device lost during {}", operation);
|
||||
RecoverFromError();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY || result == VK_ERROR_OUT_OF_HOST_MEMORY) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Vulkan out of memory during {}", operation);
|
||||
// Potential recovery: clear caches, reduce workload
|
||||
texture_manager.CleanupTextureCache();
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan error during {}: {}", operation, result);
|
||||
return false;
|
||||
}
|
||||
|
||||
void RendererVulkan::RecoverFromError() {
|
||||
LOG_INFO(Render_Vulkan, "Attempting to recover from Vulkan error");
|
||||
|
||||
// Wait for device to finish operations
|
||||
void(device.GetLogical().WaitIdle());
|
||||
|
||||
// Process all pending commands in our queue
|
||||
ProcessAllCommands();
|
||||
|
||||
// Wait for any async shader compilations to finish
|
||||
shader_manager.WaitForCompilation();
|
||||
|
||||
// Clean up resources that might be causing problems
|
||||
texture_manager.CleanupTextureCache();
|
||||
|
||||
// Reset command buffers and pipelines
|
||||
scheduler.Flush();
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Recovery attempt completed");
|
||||
}
|
||||
|
||||
void RendererVulkan::InitializePlatformSpecific() {
|
||||
LOG_INFO(Render_Vulkan, "Initializing platform-specific Vulkan components");
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
LOG_INFO(Render_Vulkan, "Initializing Vulkan for Windows");
|
||||
// Windows-specific initialization
|
||||
#elif defined(__linux__)
|
||||
LOG_INFO(Render_Vulkan, "Initializing Vulkan for Linux");
|
||||
// Linux-specific initialization
|
||||
#elif defined(__ANDROID__)
|
||||
LOG_INFO(Render_Vulkan, "Initializing Vulkan for Android");
|
||||
// Android-specific initialization
|
||||
#else
|
||||
LOG_INFO(Render_Vulkan, "Platform-specific Vulkan initialization not implemented for this platform");
|
||||
#endif
|
||||
|
||||
// Create a compute buffer using the HybridMemory system if enabled
|
||||
if (false && Settings::values.use_gpu_memory_manager.GetValue()) {
|
||||
try {
|
||||
// Create a small compute buffer for testing
|
||||
const VkDeviceSize buffer_size = 1 * 1024 * 1024; // 1 MB
|
||||
ComputeBuffer compute_buffer = hybrid_memory->CreateComputeBuffer(
|
||||
buffer_size,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
MemoryUsage::DeviceLocal);
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Successfully created compute buffer using HybridMemory");
|
||||
} catch (const std::exception& e) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create compute buffer: {}", e.what());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
@ -7,7 +6,6 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
#include <variant>
|
||||
#include <functional>
|
||||
|
||||
#include "common/dynamic_library.h"
|
||||
#include "video_core/host1x/gpu_device_memory_manager.h"
|
||||
|
@ -19,11 +17,8 @@
|
|||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/renderer_vulkan/vk_turbo_mode.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/hybrid_memory.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Core {
|
||||
|
@ -63,9 +58,6 @@ public:
|
|||
return device.GetDriverName();
|
||||
}
|
||||
|
||||
// Enhanced platform-specific initialization
|
||||
void InitializePlatformSpecific();
|
||||
|
||||
private:
|
||||
void Report() const;
|
||||
|
||||
|
@ -75,10 +67,6 @@ private:
|
|||
void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers);
|
||||
void RenderAppletCaptureLayer(std::span<const Tegra::FramebufferConfig> framebuffers);
|
||||
|
||||
// Enhanced error handling
|
||||
bool HandleVulkanError(VkResult result, const std::string& operation);
|
||||
void RecoverFromError();
|
||||
|
||||
Core::TelemetrySession& telemetry_session;
|
||||
Tegra::MaxwellDeviceMemoryManager& device_memory;
|
||||
Tegra::GPU& gpu;
|
||||
|
@ -102,13 +90,6 @@ private:
|
|||
RasterizerVulkan rasterizer;
|
||||
std::optional<TurboMode> turbo_mode;
|
||||
|
||||
// HybridMemory for advanced memory management
|
||||
std::unique_ptr<HybridMemory> hybrid_memory;
|
||||
|
||||
// Enhanced texture and shader management
|
||||
TextureManager texture_manager;
|
||||
ShaderManager shader_manager;
|
||||
|
||||
Frame applet_frame;
|
||||
};
|
||||
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
|
@ -39,23 +37,10 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
|
|||
if (shader_notify) {
|
||||
shader_notify->MarkShaderBuilding();
|
||||
}
|
||||
|
||||
// Track compilation start time for performance metrics
|
||||
const auto start_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
|
||||
uniform_buffer_sizes.begin());
|
||||
|
||||
auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics, start_time] {
|
||||
// Simplify the high priority determination - we can't use workgroup_size
|
||||
// because it doesn't exist, so use a simpler heuristic
|
||||
const bool is_high_priority = false; // Default to false until we can find a better criterion
|
||||
|
||||
if (is_high_priority) {
|
||||
// Increase thread priority for small compute shaders that are likely part of critical path
|
||||
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
|
||||
}
|
||||
|
||||
auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] {
|
||||
DescriptorLayoutBuilder builder{device};
|
||||
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
|
@ -64,11 +49,15 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
|
|||
descriptor_update_template =
|
||||
builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.requiredSubgroupSize = GuestWarpSize,
|
||||
};
|
||||
VkPipelineCreateFlags flags{};
|
||||
if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
|
||||
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
|
||||
}
|
||||
|
||||
pipeline = device.GetLogical().CreateComputePipeline(
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
|
@ -76,7 +65,8 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
|
|||
.flags = flags,
|
||||
.stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.pNext =
|
||||
device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *spv_module,
|
||||
|
@ -89,15 +79,6 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
|
|||
},
|
||||
*pipeline_cache);
|
||||
|
||||
// Performance measurement
|
||||
const auto end_time = std::chrono::high_resolution_clock::now();
|
||||
const auto compilation_time = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
end_time - start_time).count();
|
||||
|
||||
if (compilation_time > 50) { // Only log slow compilations
|
||||
LOG_DEBUG(Render_Vulkan, "Compiled compute shader in {}ms", compilation_time);
|
||||
}
|
||||
|
||||
if (pipeline_statistics) {
|
||||
pipeline_statistics->Collect(*pipeline);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
|
@ -259,16 +258,7 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
||||
num_textures += Shader::NumDescriptors(info->texture_descriptors);
|
||||
}
|
||||
|
||||
// Track compilation start time for performance metrics
|
||||
const auto start_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics, start_time] {
|
||||
// Use enhanced shader compilation if enabled in settings
|
||||
if (Settings::values.use_enhanced_shader_building.GetValue()) {
|
||||
Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
|
||||
}
|
||||
|
||||
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
|
||||
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
|
||||
uses_push_descriptor = builder.CanUsePushDescriptor();
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
|
||||
|
@ -283,17 +273,6 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
|
||||
Validate();
|
||||
MakePipeline(render_pass);
|
||||
|
||||
// Performance measurement
|
||||
const auto end_time = std::chrono::high_resolution_clock::now();
|
||||
const auto compilation_time = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
end_time - start_time).count();
|
||||
|
||||
// Log shader compilation time for slow shaders to help diagnose performance issues
|
||||
if (compilation_time > 100) { // Only log very slow compilations
|
||||
LOG_DEBUG(Render_Vulkan, "Compiled graphics pipeline in {}ms", compilation_time);
|
||||
}
|
||||
|
||||
if (pipeline_statistics) {
|
||||
pipeline_statistics->Collect(*pipeline);
|
||||
}
|
||||
|
@ -313,19 +292,6 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
configure_func = ConfigureFunc(spv_modules, stage_infos);
|
||||
}
|
||||
|
||||
GraphicsPipeline* GraphicsPipeline::Clone() const {
|
||||
// Create a new pipeline that shares the same resources
|
||||
// This is for pipeline deduplication
|
||||
|
||||
if (!IsBuilt()) {
|
||||
LOG_WARNING(Render_Vulkan, "Attempted to clone unbuilt pipeline");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return const_cast<GraphicsPipeline*>(this);
|
||||
|
||||
}
|
||||
|
||||
void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
|
||||
transition_keys.push_back(transition->key);
|
||||
transitions.push_back(transition);
|
||||
|
@ -345,9 +311,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
const auto& regs{maxwell3d->regs};
|
||||
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
|
||||
const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
|
||||
// Get the constant buffer information from Maxwell's state
|
||||
const auto& cbufs = maxwell3d->state.shader_stages[stage].const_buffers;
|
||||
|
||||
const Shader::Info& info{stage_infos[stage]};
|
||||
buffer_cache.UnbindGraphicsStorageBuffers(stage);
|
||||
if constexpr (Spec::has_storage_buffers) {
|
||||
|
@ -359,7 +322,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
++ssbo_index;
|
||||
}
|
||||
}
|
||||
|
||||
const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers};
|
||||
const auto read_handle{[&](const auto& desc, u32 index) {
|
||||
ASSERT(cbufs[desc.cbuf_index].enabled);
|
||||
const u32 index_offset{index << desc.size_shift};
|
||||
|
@ -381,7 +344,6 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
}
|
||||
return TexturePair(gpu_memory->Read<u32>(addr), via_header_index);
|
||||
}};
|
||||
|
||||
const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE {
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
const auto handle{read_handle(desc, index)};
|
||||
|
|
|
@ -84,9 +84,6 @@ public:
|
|||
GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
|
||||
GraphicsPipeline(const GraphicsPipeline&) = delete;
|
||||
|
||||
// Create a deep copy of this pipeline for reuse
|
||||
[[nodiscard]] GraphicsPipeline* Clone() const;
|
||||
|
||||
void AddTransition(GraphicsPipeline* transition);
|
||||
|
||||
void Configure(bool is_indexed) {
|
||||
|
@ -106,35 +103,6 @@ public:
|
|||
return is_built.load(std::memory_order::relaxed);
|
||||
}
|
||||
|
||||
// Get hash for the current pipeline configuration
|
||||
[[nodiscard]] u64 Hash() const noexcept {
|
||||
return key.Hash();
|
||||
}
|
||||
|
||||
// Get the last pipeline this transitioned from
|
||||
[[nodiscard]] GraphicsPipeline* GetLastTransitionedPipeline() const noexcept {
|
||||
// For predictive loading, return a related pipeline if available
|
||||
if (!transitions.empty()) {
|
||||
return transitions.front();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Get pipeline info string for prediction
|
||||
[[nodiscard]] std::string GetPipelineInfo() const noexcept {
|
||||
std::string result = fmt::format("pipeline_{:016x}", Hash());
|
||||
|
||||
// Include information about stages
|
||||
for (size_t i = 0; i < NUM_STAGES; ++i) {
|
||||
// Check if this stage is active by checking if any varying stores are enabled
|
||||
if (!stage_infos[i].stores.mask.none()) {
|
||||
result += fmt::format("_s{}", i);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename Spec>
|
||||
static auto MakeConfigureSpecFunc() {
|
||||
return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
|
@ -265,42 +264,18 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
|
|||
}
|
||||
|
||||
size_t GetTotalPipelineWorkers() {
|
||||
const size_t num_cores = std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL);
|
||||
|
||||
// Calculate optimal number of workers based on available CPU cores
|
||||
size_t optimal_workers;
|
||||
|
||||
const size_t max_core_threads =
|
||||
std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL;
|
||||
#ifdef ANDROID
|
||||
// Mobile devices need more conservative threading to avoid thermal issues
|
||||
// Leave more cores free on Android for system processes and other apps
|
||||
constexpr size_t min_free_cores = 3ULL;
|
||||
if (num_cores <= min_free_cores + 1) {
|
||||
return 1ULL; // At least one worker
|
||||
// Leave at least a few cores free in android
|
||||
constexpr size_t free_cores = 3ULL;
|
||||
if (max_core_threads <= free_cores) {
|
||||
return 1ULL;
|
||||
}
|
||||
optimal_workers = num_cores - min_free_cores;
|
||||
return max_core_threads - free_cores;
|
||||
#else
|
||||
// Desktop systems can use more aggressive threading
|
||||
if (num_cores <= 3) {
|
||||
optimal_workers = num_cores - 1; // Dual/triple core: leave 1 core free
|
||||
} else if (num_cores <= 6) {
|
||||
optimal_workers = num_cores - 2; // Quad/hex core: leave 2 cores free
|
||||
} else {
|
||||
// For 8+ core systems, use more workers but still leave some cores for other tasks
|
||||
optimal_workers = num_cores - (num_cores / 4); // Leave ~25% of cores free
|
||||
}
|
||||
return max_core_threads;
|
||||
#endif
|
||||
|
||||
// Apply threading priority via shader_compilation_priority setting if enabled
|
||||
const int priority = Settings::values.shader_compilation_priority.GetValue();
|
||||
if (priority > 0) {
|
||||
// High priority - use more cores for shader compilation
|
||||
optimal_workers = std::min(optimal_workers + 1, num_cores - 1);
|
||||
} else if (priority < 0) {
|
||||
// Low priority - use fewer cores for shader compilation
|
||||
optimal_workers = (optimal_workers >= 2) ? optimal_workers - 1 : 1;
|
||||
}
|
||||
|
||||
return optimal_workers;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
@ -611,128 +586,21 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
|
|||
if (pipeline->IsBuilt()) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
if (!use_asynchronous_shaders) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Advanced heuristics for smarter async shader compilation
|
||||
|
||||
// Track stutter metrics for better debugging and performance tuning
|
||||
static thread_local u32 async_shader_count = 0;
|
||||
static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log;
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Better detection of UI and critical shaders
|
||||
const bool is_ui_shader = !maxwell3d->regs.zeta_enable;
|
||||
// Check for blend state
|
||||
const bool has_blend = maxwell3d->regs.blend.enable[0] != 0;
|
||||
// Check if texture sampling is likely based on texture units used
|
||||
const bool has_texture = maxwell3d->regs.tex_header.Address() != 0;
|
||||
// Check for clear operations
|
||||
const bool is_clear_operation = maxwell3d->regs.clear_surface.raw != 0;
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
const bool small_draw = draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6;
|
||||
|
||||
// Get shader priority from settings
|
||||
const int shader_priority = Settings::values.shader_compilation_priority.GetValue();
|
||||
|
||||
// Record historical usage patterns for future prediction
|
||||
// Create a unique identifier for this shader configuration
|
||||
const u64 draw_config_hash = pipeline->Hash();
|
||||
static thread_local std::unordered_map<u64, u32> shader_usage_count;
|
||||
static thread_local std::unordered_map<u64, bool> shader_is_frequent;
|
||||
|
||||
// Track how often this shader is used
|
||||
shader_usage_count[draw_config_hash]++;
|
||||
|
||||
// After a certain number of uses, consider this a frequently used shader
|
||||
// which should get higher compilation priority in the future
|
||||
if (shader_usage_count[draw_config_hash] >= 3) {
|
||||
shader_is_frequent[draw_config_hash] = true;
|
||||
|
||||
// Predict related shaders that might be used soon
|
||||
if (auto related_pipeline = pipeline->GetLastTransitionedPipeline()) {
|
||||
// Use a string-based representation of the pipeline for prediction
|
||||
std::string pipeline_info = fmt::format("pipeline_{:016x}", related_pipeline->Hash());
|
||||
PredictShader(pipeline_info);
|
||||
}
|
||||
}
|
||||
|
||||
// Always wait for UI shaders if settings specify high priority
|
||||
if (is_ui_shader && (shader_priority >= 0 || small_draw)) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Wait for frequently used small draw shaders
|
||||
if (small_draw && shader_is_frequent[draw_config_hash]) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Wait for clear operations as they're usually critical
|
||||
if (is_clear_operation) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Force wait if high shader priority in settings
|
||||
if (shader_priority > 1) {
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// More intelligent depth-based heuristics
|
||||
// If something is using depth, we can assume that games are not rendering anything which
|
||||
// will be used one time.
|
||||
if (maxwell3d->regs.zeta_enable) {
|
||||
// Check if this is likely a shadow map or important depth-based effect
|
||||
// Check if depth write is enabled and color writes are disabled for all render targets
|
||||
bool depth_only_pass = maxwell3d->regs.depth_write_enabled;
|
||||
if (depth_only_pass) {
|
||||
bool all_color_masked = true;
|
||||
for (size_t i = 0; i < maxwell3d->regs.color_mask.size(); i++) {
|
||||
// Check if any color component is enabled (R, G, B, A fields of ColorMask)
|
||||
if ((maxwell3d->regs.color_mask[i].raw & 0x1111) != 0) {
|
||||
all_color_masked = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If depth write enabled and all colors masked, this is likely a shadow pass
|
||||
if (all_color_masked) {
|
||||
// This is likely a shadow pass, which is important for visual quality
|
||||
// We should wait for these to compile to avoid flickering shadows
|
||||
return pipeline;
|
||||
}
|
||||
}
|
||||
|
||||
// For other depth-enabled renders, use async compilation
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Refine small draw detection
|
||||
if (small_draw) {
|
||||
// Check if this might be a UI element that we missed
|
||||
if (has_blend && has_texture) {
|
||||
// Likely a textured UI element, wait for it
|
||||
// If games are using a small index count, we can assume these are full screen quads.
|
||||
// Usually these shaders are only used once for building textures so we can assume they
|
||||
// can't be built async
|
||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||
if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) {
|
||||
return pipeline;
|
||||
}
|
||||
// For other small draws, assume they're one-off effects
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
// Track and log async shader statistics periodically
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
|
||||
now - last_async_shader_log).count();
|
||||
|
||||
if (elapsed >= 10) { // Log every 10 seconds
|
||||
async_shader_count = 0;
|
||||
last_async_shader_log = now;
|
||||
}
|
||||
async_shader_count++;
|
||||
|
||||
// Log less frequently to avoid spamming log
|
||||
if (async_shader_count % 100 == 1) {
|
||||
LOG_DEBUG(Render_Vulkan, "Async shader compilation in progress (count={}), completion={:.1f}%",
|
||||
async_shader_count, GetShaderCompilationProgress() * 100.0f);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -740,22 +608,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
|
||||
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
|
||||
bool build_in_parallel) try {
|
||||
|
||||
// Pipeline deduplication optimization
|
||||
{
|
||||
std::lock_guard lock{pipeline_cache};
|
||||
const auto [pair, new_pipeline]{graphics_cache.try_emplace(key)};
|
||||
if (!new_pipeline) {
|
||||
// Found existing pipeline in cache
|
||||
auto& pipeline = pair->second;
|
||||
if (pipeline) {
|
||||
// Return the existing pipeline
|
||||
LOG_DEBUG(Render_Vulkan, "Reusing existing pipeline for key 0x{:016x}", key.Hash());
|
||||
return std::unique_ptr<GraphicsPipeline>(pipeline->Clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto hash = key.Hash();
|
||||
LOG_INFO(Render_Vulkan, "0x{:016x}", hash);
|
||||
size_t env_index{0};
|
||||
|
@ -766,8 +618,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
// Layer passthrough generation for devices without VK_EXT_shader_viewport_index_layer
|
||||
Shader::IR::Program* layer_source_program{};
|
||||
|
||||
// Memory optimization: Create a scope for program translation
|
||||
{
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const bool is_emulated_stage = layer_source_program != nullptr &&
|
||||
index == static_cast<u32>(Maxwell::ShaderType::Geometry);
|
||||
|
@ -803,15 +653,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
layer_source_program = &programs[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
|
||||
std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
|
||||
|
||||
const Shader::IR::Program* previous_stage{};
|
||||
Shader::Backend::Bindings binding;
|
||||
|
||||
// Memory optimization: Process one stage at a time and free intermediate memory
|
||||
for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
|
||||
++index) {
|
||||
const bool is_emulated_stage = layer_source_program != nullptr &&
|
||||
|
@ -825,8 +671,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
const size_t stage_index{index - 1};
|
||||
infos[stage_index] = &program.info;
|
||||
|
||||
// Prioritize memory efficiency by encapsulating SPIR-V generation
|
||||
{
|
||||
const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
|
||||
ConvertLegacyToGeneric(program, runtime_info);
|
||||
const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
|
||||
|
@ -836,27 +680,14 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
|
||||
modules[stage_index].SetObjectNameEXT(name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
previous_stage = &program;
|
||||
}
|
||||
|
||||
// Use improved thread worker mechanism for better async compilation
|
||||
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
|
||||
auto pipeline = std::make_unique<GraphicsPipeline>(
|
||||
return std::make_unique<GraphicsPipeline>(
|
||||
scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device,
|
||||
descriptor_pool, guest_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
|
||||
std::move(modules), infos);
|
||||
|
||||
// Cache the result for future deduplication
|
||||
if (pipeline) {
|
||||
std::lock_guard lock{pipeline_cache};
|
||||
// Store a clone that can be used later
|
||||
graphics_cache[key] = std::unique_ptr<GraphicsPipeline>(pipeline->Clone());
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
|
||||
} catch (const Shader::Exception& exception) {
|
||||
auto hash = key.Hash();
|
||||
size_t env_index{0};
|
||||
|
@ -936,23 +767,6 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
|||
}
|
||||
|
||||
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
||||
|
||||
// Add support for bindless texture constant buffer only if needed
|
||||
if (program.info.storage_buffers_descriptors.size() > 0) {
|
||||
// Check if a constant buffer at index 0 already exists
|
||||
const bool has_cb0 = std::any_of(program.info.constant_buffer_descriptors.begin(),
|
||||
program.info.constant_buffer_descriptors.end(),
|
||||
[](const auto& cb) { return cb.index == 0; });
|
||||
|
||||
// Only add if not already present
|
||||
if (!has_cb0) {
|
||||
Shader::ConstantBufferDescriptor desc;
|
||||
desc.index = 0;
|
||||
desc.count = 1;
|
||||
program.info.constant_buffer_descriptors.push_back(desc);
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<u32> code{EmitSPIRV(profile, program)};
|
||||
device.SaveShader(code);
|
||||
vk::ShaderModule spv_module{BuildShader(device, code)};
|
||||
|
@ -971,7 +785,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
|||
}
|
||||
|
||||
void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
|
||||
const vk::PipelineCache& vk_pipeline_cache,
|
||||
const vk::PipelineCache& pipeline_cache,
|
||||
u32 cache_version) try {
|
||||
std::ofstream file(filename, std::ios::binary);
|
||||
file.exceptions(std::ifstream::failbit);
|
||||
|
@ -985,10 +799,10 @@ void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& fi
|
|||
|
||||
size_t cache_size = 0;
|
||||
std::vector<char> cache_data;
|
||||
if (vk_pipeline_cache) {
|
||||
vk_pipeline_cache.Read(&cache_size, nullptr);
|
||||
if (pipeline_cache) {
|
||||
pipeline_cache.Read(&cache_size, nullptr);
|
||||
cache_data.resize(cache_size);
|
||||
vk_pipeline_cache.Read(&cache_size, cache_data.data());
|
||||
pipeline_cache.Read(&cache_size, cache_data.data());
|
||||
}
|
||||
file.write(cache_data.data(), cache_size);
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
@ -11,7 +10,6 @@
|
|||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/thread_worker.h"
|
||||
|
@ -159,9 +157,6 @@ private:
|
|||
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
|
||||
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
|
||||
|
||||
// Mutex for thread-safe pipeline cache access
|
||||
mutable std::mutex pipeline_cache;
|
||||
|
||||
ShaderPools main_pools;
|
||||
|
||||
Shader::Profile profile;
|
||||
|
|
|
@ -1,181 +1,15 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
#include <thread>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
#include <queue>
|
||||
#include <condition_variable>
|
||||
#include <future>
|
||||
#include <chrono>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
#define SHADER_CACHE_DIR "./shader_cache"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
// Global command submission queue for asynchronous operations
|
||||
std::mutex commandQueueMutex;
|
||||
std::queue<std::function<void()>> commandQueue;
|
||||
std::condition_variable commandQueueCondition;
|
||||
std::atomic<bool> isCommandQueueActive{true};
|
||||
std::thread commandQueueThread;
|
||||
|
||||
// Pointer to Citron's scheduler for integration
|
||||
Scheduler* globalScheduler = nullptr;
|
||||
|
||||
// Constants for thread pool and shader management
|
||||
constexpr size_t DEFAULT_THREAD_POOL_SIZE = 4;
|
||||
constexpr size_t MAX_THREAD_POOL_SIZE = 8;
|
||||
constexpr u32 SHADER_PRIORITY_CRITICAL = 0;
|
||||
constexpr u32 SHADER_PRIORITY_HIGH = 1;
|
||||
constexpr u32 SHADER_PRIORITY_NORMAL = 2;
|
||||
constexpr u32 SHADER_PRIORITY_LOW = 3;
|
||||
|
||||
// Thread pool for shader compilation
|
||||
std::vector<std::thread> g_thread_pool;
|
||||
std::queue<std::function<void()>> g_work_queue;
|
||||
std::mutex g_work_queue_mutex;
|
||||
std::condition_variable g_work_queue_cv;
|
||||
std::atomic<bool> g_thread_pool_initialized = false;
|
||||
std::atomic<bool> g_shutdown_thread_pool = false;
|
||||
std::atomic<size_t> g_active_compilation_tasks = 0;
|
||||
std::atomic<size_t> g_total_compilation_tasks = 0;
|
||||
std::atomic<size_t> g_completed_compilation_tasks = 0;
|
||||
|
||||
// Priority queue for shader compilation
|
||||
struct ShaderCompilationTask {
|
||||
std::function<void()> task;
|
||||
u32 priority;
|
||||
std::chrono::high_resolution_clock::time_point enqueue_time;
|
||||
|
||||
bool operator<(const ShaderCompilationTask& other) const {
|
||||
// Lower priority value means higher actual priority
|
||||
if (priority != other.priority) {
|
||||
return priority > other.priority;
|
||||
}
|
||||
// If priorities are equal, use FIFO ordering
|
||||
return enqueue_time > other.enqueue_time;
|
||||
}
|
||||
};
|
||||
std::priority_queue<ShaderCompilationTask> g_priority_work_queue;
|
||||
|
||||
// Predictive shader loading
|
||||
std::unordered_set<std::string> g_predicted_shaders;
|
||||
std::mutex g_predicted_shaders_mutex;
|
||||
|
||||
// Command queue worker thread (multi-threaded command recording)
|
||||
void CommandQueueWorker() {
|
||||
while (isCommandQueueActive.load()) {
|
||||
std::function<void()> command;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(commandQueueMutex);
|
||||
if (commandQueue.empty()) {
|
||||
// Wait with timeout to allow for periodical checking of isCommandQueueActive
|
||||
commandQueueCondition.wait_for(lock, std::chrono::milliseconds(100),
|
||||
[]{ return !commandQueue.empty() || !isCommandQueueActive.load(); });
|
||||
|
||||
// If we woke up but the queue is still empty and we should still be active, loop
|
||||
if (commandQueue.empty()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
command = commandQueue.front();
|
||||
commandQueue.pop();
|
||||
}
|
||||
|
||||
// Execute the command
|
||||
if (command) {
|
||||
command();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the command queue system
|
||||
void InitializeCommandQueue() {
|
||||
if (!commandQueueThread.joinable()) {
|
||||
isCommandQueueActive.store(true);
|
||||
commandQueueThread = std::thread(CommandQueueWorker);
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown the command queue system
|
||||
void ShutdownCommandQueue() {
|
||||
isCommandQueueActive.store(false);
|
||||
commandQueueCondition.notify_all();
|
||||
|
||||
if (commandQueueThread.joinable()) {
|
||||
commandQueueThread.join();
|
||||
}
|
||||
}
|
||||
|
||||
// Submit a command to the queue for asynchronous execution
|
||||
void SubmitCommandToQueue(std::function<void()> command) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(commandQueueMutex);
|
||||
commandQueue.push(command);
|
||||
}
|
||||
commandQueueCondition.notify_one();
|
||||
}
|
||||
|
||||
// Set the global scheduler reference for command integration
|
||||
void SetGlobalScheduler(Scheduler* scheduler) {
|
||||
globalScheduler = scheduler;
|
||||
}
|
||||
|
||||
// Submit a Vulkan command to the existing Citron scheduler
|
||||
void SubmitToScheduler(std::function<void(vk::CommandBuffer)> command) {
|
||||
if (globalScheduler) {
|
||||
globalScheduler->Record(std::move(command));
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "Trying to submit to scheduler but no scheduler is set");
|
||||
}
|
||||
}
|
||||
|
||||
// Flush the Citron scheduler - use when needing to ensure commands are executed
|
||||
u64 FlushScheduler(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
|
||||
if (globalScheduler) {
|
||||
return globalScheduler->Flush(signal_semaphore, wait_semaphore);
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "Trying to flush scheduler but no scheduler is set");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Process both command queue and scheduler commands
|
||||
void ProcessAllCommands() {
|
||||
// Process our command queue first
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(commandQueueMutex);
|
||||
while (!commandQueue.empty()) {
|
||||
auto command = commandQueue.front();
|
||||
commandQueue.pop();
|
||||
lock.unlock();
|
||||
|
||||
command();
|
||||
|
||||
lock.lock();
|
||||
}
|
||||
}
|
||||
|
||||
// Then flush the scheduler if it exists
|
||||
if (globalScheduler) {
|
||||
globalScheduler->Flush();
|
||||
}
|
||||
}
|
||||
|
||||
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
|
||||
return device.GetLogical().CreateShaderModule({
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
|
@ -186,589 +20,4 @@ vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code) {
|
|||
});
|
||||
}
|
||||
|
||||
bool IsShaderValid(VkShaderModule shader_module) {
|
||||
// TODO: validate the shader by checking if it's null
|
||||
// or by examining SPIR-V data for correctness [ZEP]
|
||||
return shader_module != VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
// Initialize thread pool for shader compilation
|
||||
void InitializeThreadPool() {
|
||||
if (g_thread_pool_initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(g_work_queue_mutex);
|
||||
g_shutdown_thread_pool = false;
|
||||
|
||||
// Determine optimal thread count based on system
|
||||
const size_t hardware_threads = std::max(std::thread::hardware_concurrency(), 2u);
|
||||
const size_t thread_count = std::min(hardware_threads - 1, MAX_THREAD_POOL_SIZE);
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Initializing shader compilation thread pool with {} threads", thread_count);
|
||||
|
||||
for (size_t i = 0; i < thread_count; ++i) {
|
||||
g_thread_pool.emplace_back([]() {
|
||||
while (!g_shutdown_thread_pool) {
|
||||
std::function<void()> task;
|
||||
{
|
||||
std::unique_lock<std::mutex> thread_pool_lock(g_work_queue_mutex);
|
||||
g_work_queue_cv.wait(thread_pool_lock, [] {
|
||||
return g_shutdown_thread_pool || !g_priority_work_queue.empty();
|
||||
});
|
||||
|
||||
if (g_shutdown_thread_pool && g_priority_work_queue.empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!g_priority_work_queue.empty()) {
|
||||
ShaderCompilationTask highest_priority_task = g_priority_work_queue.top();
|
||||
g_priority_work_queue.pop();
|
||||
task = std::move(highest_priority_task.task);
|
||||
}
|
||||
}
|
||||
|
||||
if (task) {
|
||||
g_active_compilation_tasks++;
|
||||
task();
|
||||
g_active_compilation_tasks--;
|
||||
g_completed_compilation_tasks++;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
g_thread_pool_initialized = true;
|
||||
}
|
||||
|
||||
// Shutdown thread pool
|
||||
void ShutdownThreadPool() {
|
||||
if (!g_thread_pool_initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(g_work_queue_mutex);
|
||||
g_shutdown_thread_pool = true;
|
||||
}
|
||||
|
||||
g_work_queue_cv.notify_all();
|
||||
|
||||
for (auto& thread : g_thread_pool) {
|
||||
if (thread.joinable()) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
g_thread_pool.clear();
|
||||
g_thread_pool_initialized = false;
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Shader compilation thread pool shutdown");
|
||||
}
|
||||
|
||||
// Submit work to thread pool with priority
|
||||
void SubmitShaderCompilationTask(std::function<void()> task, u32 priority) {
|
||||
if (!g_thread_pool_initialized) {
|
||||
InitializeThreadPool();
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> work_queue_lock(g_work_queue_mutex);
|
||||
g_priority_work_queue.push({
|
||||
std::move(task),
|
||||
priority,
|
||||
std::chrono::high_resolution_clock::now()
|
||||
});
|
||||
g_total_compilation_tasks++;
|
||||
}
|
||||
|
||||
g_work_queue_cv.notify_one();
|
||||
}
|
||||
|
||||
// Get shader compilation progress (0.0f - 1.0f)
|
||||
float GetShaderCompilationProgress() {
|
||||
const size_t total = g_total_compilation_tasks.load();
|
||||
if (total == 0) {
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
const size_t completed = g_completed_compilation_tasks.load();
|
||||
return static_cast<float>(completed) / static_cast<float>(total);
|
||||
}
|
||||
|
||||
// Check if any shader compilation is in progress
|
||||
bool IsShaderCompilationInProgress() {
|
||||
return g_active_compilation_tasks.load() > 0;
|
||||
}
|
||||
|
||||
// Add shader to prediction list for preloading
|
||||
void PredictShader(const std::string& shader_path) {
|
||||
std::lock_guard<std::mutex> lock(g_predicted_shaders_mutex);
|
||||
g_predicted_shaders.insert(shader_path);
|
||||
}
|
||||
|
||||
// Preload predicted shaders
|
||||
void PreloadPredictedShaders(const Device& device) {
|
||||
std::unordered_set<std::string> shaders_to_load;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(g_predicted_shaders_mutex);
|
||||
shaders_to_load = g_predicted_shaders;
|
||||
g_predicted_shaders.clear();
|
||||
}
|
||||
|
||||
if (shaders_to_load.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Preloading {} predicted shaders", shaders_to_load.size());
|
||||
|
||||
for (const auto& shader_path : shaders_to_load) {
|
||||
// Queue with low priority since these are predictions
|
||||
AsyncCompileShader(device, shader_path, [](VkShaderModule) {}, SHADER_PRIORITY_LOW);
|
||||
}
|
||||
}
|
||||
|
||||
// Atomic flag for tracking shader compilation status
|
||||
std::atomic<bool> compilingShader(false);
|
||||
|
||||
void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
||||
std::function<void(VkShaderModule)> callback, u32 priority) {
|
||||
LOG_INFO(Render_Vulkan, "Asynchronously compiling shader: {}", shader_path);
|
||||
|
||||
// Create shader cache directory if it doesn't exist
|
||||
if (!std::filesystem::exists(SHADER_CACHE_DIR)) {
|
||||
std::filesystem::create_directory(SHADER_CACHE_DIR);
|
||||
}
|
||||
|
||||
// Initialize thread pool if needed
|
||||
if (!g_thread_pool_initialized) {
|
||||
InitializeThreadPool();
|
||||
}
|
||||
|
||||
// Submit to thread pool with priority
|
||||
SubmitShaderCompilationTask([device_ptr = &device, shader_path, callback = std::move(callback)]() {
|
||||
auto startTime = std::chrono::high_resolution_clock::now();
|
||||
|
||||
try {
|
||||
std::vector<u32> spir_v;
|
||||
bool success = false;
|
||||
|
||||
// Check if the file exists and attempt to read it
|
||||
if (std::filesystem::exists(shader_path)) {
|
||||
std::ifstream shader_file(shader_path, std::ios::binary);
|
||||
if (shader_file) {
|
||||
shader_file.seekg(0, std::ios::end);
|
||||
size_t file_size = static_cast<size_t>(shader_file.tellg());
|
||||
shader_file.seekg(0, std::ios::beg);
|
||||
|
||||
spir_v.resize(file_size / sizeof(u32));
|
||||
if (shader_file.read(reinterpret_cast<char*>(spir_v.data()), file_size)) {
|
||||
success = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (success) {
|
||||
vk::ShaderModule shader = BuildShader(*device_ptr, spir_v);
|
||||
if (IsShaderValid(*shader)) {
|
||||
// Cache the compiled shader to disk for faster loading next time
|
||||
std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" +
|
||||
std::filesystem::path(shader_path).filename().string() + ".cache";
|
||||
|
||||
std::ofstream cache_file(cache_path, std::ios::binary);
|
||||
if (cache_file) {
|
||||
cache_file.write(reinterpret_cast<const char*>(spir_v.data()),
|
||||
spir_v.size() * sizeof(u32));
|
||||
}
|
||||
|
||||
auto endTime = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> duration = endTime - startTime;
|
||||
LOG_INFO(Render_Vulkan, "Shader compiled in {:.2f} seconds: {}",
|
||||
duration.count(), shader_path);
|
||||
|
||||
// Store the module pointer for the callback
|
||||
VkShaderModule raw_module = *shader;
|
||||
|
||||
// Submit callback to main thread via command queue for thread safety
|
||||
SubmitCommandToQueue([callback = std::move(callback), raw_module]() {
|
||||
callback(raw_module);
|
||||
});
|
||||
} else {
|
||||
LOG_ERROR(Render_Vulkan, "Shader validation failed: {}", shader_path);
|
||||
SubmitCommandToQueue([callback = std::move(callback)]() {
|
||||
callback(VK_NULL_HANDLE);
|
||||
});
|
||||
}
|
||||
} else {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to read shader file: {}", shader_path);
|
||||
SubmitCommandToQueue([callback = std::move(callback)]() {
|
||||
callback(VK_NULL_HANDLE);
|
||||
});
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
LOG_ERROR(Render_Vulkan, "Error compiling shader: {}", e.what());
|
||||
SubmitCommandToQueue([callback = std::move(callback)]() {
|
||||
callback(VK_NULL_HANDLE);
|
||||
});
|
||||
}
|
||||
}, priority);
|
||||
}
|
||||
|
||||
// Overload for backward compatibility
|
||||
void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
||||
std::function<void(VkShaderModule)> callback) {
|
||||
AsyncCompileShader(device, shader_path, std::move(callback), SHADER_PRIORITY_NORMAL);
|
||||
}
|
||||
|
||||
ShaderManager::ShaderManager(const Device& device_) : device(device_) {
|
||||
// Initialize command queue system
|
||||
InitializeCommandQueue();
|
||||
|
||||
// Initialize thread pool for shader compilation
|
||||
InitializeThreadPool();
|
||||
}
|
||||
|
||||
ShaderManager::~ShaderManager() {
|
||||
// Wait for any pending compilations to finish
|
||||
WaitForCompilation();
|
||||
|
||||
// Clean up shader modules
|
||||
std::lock_guard<std::mutex> lock(shader_mutex);
|
||||
shader_cache.clear();
|
||||
|
||||
// Shutdown thread pool
|
||||
ShutdownThreadPool();
|
||||
|
||||
// Shutdown command queue
|
||||
ShutdownCommandQueue();
|
||||
}
|
||||
|
||||
VkShaderModule ShaderManager::GetShaderModule(const std::string& shader_path) {
|
||||
// Check in-memory cache first
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(shader_mutex);
|
||||
auto it = shader_cache.find(shader_path);
|
||||
if (it != shader_cache.end()) {
|
||||
return *it->second;
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize the path to avoid filesystem issues
|
||||
std::string normalized_path = shader_path;
|
||||
std::replace(normalized_path.begin(), normalized_path.end(), '\\', '/');
|
||||
|
||||
// Check if shader exists
|
||||
if (!std::filesystem::exists(normalized_path)) {
|
||||
LOG_WARNING(Render_Vulkan, "Shader file does not exist: {}", normalized_path);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
// Check if shader is available in disk cache first
|
||||
const std::string filename = std::filesystem::path(normalized_path).filename().string();
|
||||
std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" + filename + ".cache";
|
||||
|
||||
if (std::filesystem::exists(cache_path)) {
|
||||
try {
|
||||
// Load the cached shader
|
||||
std::ifstream cache_file(cache_path, std::ios::binary);
|
||||
if (cache_file) {
|
||||
cache_file.seekg(0, std::ios::end);
|
||||
size_t file_size = static_cast<size_t>(cache_file.tellg());
|
||||
|
||||
if (file_size > 0 && file_size % sizeof(u32) == 0) {
|
||||
cache_file.seekg(0, std::ios::beg);
|
||||
std::vector<u32> spir_v;
|
||||
spir_v.resize(file_size / sizeof(u32));
|
||||
|
||||
if (cache_file.read(reinterpret_cast<char*>(spir_v.data()), file_size)) {
|
||||
vk::ShaderModule shader = BuildShader(device, spir_v);
|
||||
if (IsShaderValid(*shader)) {
|
||||
// Store in memory cache
|
||||
std::lock_guard<std::mutex> lock(shader_mutex);
|
||||
shader_cache[normalized_path] = std::move(shader);
|
||||
LOG_INFO(Render_Vulkan, "Loaded shader from cache: {}", normalized_path);
|
||||
return *shader_cache[normalized_path];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
LOG_WARNING(Render_Vulkan, "Failed to load shader from cache: {}", e.what());
|
||||
// Continue to load from original file
|
||||
}
|
||||
}
|
||||
|
||||
// Try to load the shader directly if cache load failed
|
||||
if (LoadShader(normalized_path)) {
|
||||
std::lock_guard<std::mutex> lock(shader_mutex);
|
||||
return *shader_cache[normalized_path];
|
||||
}
|
||||
|
||||
LOG_ERROR(Render_Vulkan, "Failed to load shader: {}", normalized_path);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
void ShaderManager::ReloadShader(const std::string& shader_path) {
|
||||
LOG_INFO(Render_Vulkan, "Reloading shader: {}", shader_path);
|
||||
|
||||
// Remove the old shader from cache
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(shader_mutex);
|
||||
shader_cache.erase(shader_path);
|
||||
}
|
||||
|
||||
// Load the shader again
|
||||
LoadShader(shader_path);
|
||||
}
|
||||
|
||||
bool ShaderManager::LoadShader(const std::string& shader_path) {
|
||||
LOG_INFO(Render_Vulkan, "Loading shader from: {}", shader_path);
|
||||
|
||||
if (!std::filesystem::exists(shader_path)) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader file does not exist: {}", shader_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
std::vector<u32> spir_v;
|
||||
std::ifstream shader_file(shader_path, std::ios::binary);
|
||||
|
||||
if (!shader_file.is_open()) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to open shader file: {}", shader_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
shader_file.seekg(0, std::ios::end);
|
||||
const size_t file_size = static_cast<size_t>(shader_file.tellg());
|
||||
|
||||
if (file_size == 0 || file_size % sizeof(u32) != 0) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid shader file size ({}): {}", file_size, shader_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
shader_file.seekg(0, std::ios::beg);
|
||||
spir_v.resize(file_size / sizeof(u32));
|
||||
|
||||
if (!shader_file.read(reinterpret_cast<char*>(spir_v.data()), file_size)) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to read shader data: {}", shader_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
vk::ShaderModule shader = BuildShader(device, spir_v);
|
||||
if (!IsShaderValid(*shader)) {
|
||||
LOG_ERROR(Render_Vulkan, "Created shader module is invalid: {}", shader_path);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Store in memory cache
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(shader_mutex);
|
||||
shader_cache[shader_path] = std::move(shader);
|
||||
}
|
||||
|
||||
// Also store in disk cache for future use
|
||||
try {
|
||||
if (!std::filesystem::exists(SHADER_CACHE_DIR)) {
|
||||
std::filesystem::create_directory(SHADER_CACHE_DIR);
|
||||
}
|
||||
|
||||
std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" +
|
||||
std::filesystem::path(shader_path).filename().string() + ".cache";
|
||||
|
||||
std::ofstream cache_file(cache_path, std::ios::binary);
|
||||
if (cache_file.is_open()) {
|
||||
cache_file.write(reinterpret_cast<const char*>(spir_v.data()),
|
||||
spir_v.size() * sizeof(u32));
|
||||
|
||||
if (!cache_file) {
|
||||
LOG_WARNING(Render_Vulkan, "Failed to write shader cache: {}", cache_path);
|
||||
}
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "Failed to create shader cache file: {}", cache_path);
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
LOG_WARNING(Render_Vulkan, "Error writing shader cache: {}", e.what());
|
||||
// Continue even if disk cache fails
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (const std::exception& e) {
|
||||
LOG_ERROR(Render_Vulkan, "Error loading shader: {}", e.what());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderManager::WaitForCompilation() {
|
||||
// Wait until no shader is being compiled
|
||||
while (IsShaderCompilationInProgress()) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
||||
}
|
||||
|
||||
// Process any pending commands in the queue
|
||||
std::unique_lock<std::mutex> lock(commandQueueMutex);
|
||||
while (!commandQueue.empty()) {
|
||||
auto command = commandQueue.front();
|
||||
commandQueue.pop();
|
||||
lock.unlock();
|
||||
|
||||
command();
|
||||
|
||||
lock.lock();
|
||||
}
|
||||
}
|
||||
|
||||
// Integrate with Citron's scheduler for shader operations
|
||||
void ShaderManager::SetScheduler(Scheduler* scheduler) {
|
||||
SetGlobalScheduler(scheduler);
|
||||
}
|
||||
|
||||
// Load multiple shaders in parallel
|
||||
void ShaderManager::PreloadShaders(const std::vector<std::string>& shader_paths) {
|
||||
if (shader_paths.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Preloading {} shaders", shader_paths.size());
|
||||
|
||||
// Track shaders that need to be loaded
|
||||
std::unordered_set<std::string> shaders_to_load;
|
||||
|
||||
// First check which shaders are not already cached
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(shader_mutex);
|
||||
for (const auto& path : shader_paths) {
|
||||
if (shader_cache.find(path) == shader_cache.end()) {
|
||||
// Also check disk cache
|
||||
if (std::filesystem::exists(path)) {
|
||||
std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" +
|
||||
std::filesystem::path(path).filename().string() + ".cache";
|
||||
if (!std::filesystem::exists(cache_path)) {
|
||||
shaders_to_load.insert(path);
|
||||
}
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "Shader file not found: {}", path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (shaders_to_load.empty()) {
|
||||
LOG_INFO(Render_Vulkan, "All shaders already cached, no preloading needed");
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Found {} shaders that need preloading", shaders_to_load.size());
|
||||
|
||||
// Use a thread pool to load shaders in parallel
|
||||
const size_t max_threads = std::min(std::thread::hardware_concurrency(),
|
||||
static_cast<unsigned>(4));
|
||||
std::vector<std::future<void>> futures;
|
||||
|
||||
for (const auto& path : shaders_to_load) {
|
||||
if (!std::filesystem::exists(path)) {
|
||||
LOG_WARNING(Render_Vulkan, "Skipping non-existent shader: {}", path);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto future = std::async(std::launch::async, [this, path]() {
|
||||
try {
|
||||
this->LoadShader(path);
|
||||
} catch (const std::exception& e) {
|
||||
LOG_ERROR(Render_Vulkan, "Error loading shader {}: {}", path, e.what());
|
||||
}
|
||||
});
|
||||
futures.push_back(std::move(future));
|
||||
|
||||
// Limit max parallel threads
|
||||
if (futures.size() >= max_threads) {
|
||||
futures.front().wait();
|
||||
futures.erase(futures.begin());
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for remaining shaders to load
|
||||
for (auto& future : futures) {
|
||||
future.wait();
|
||||
}
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Finished preloading shaders");
|
||||
}
|
||||
|
||||
// Batch load multiple shaders with priorities
|
||||
void ShaderManager::BatchLoadShaders(const std::vector<std::string>& shader_paths,
|
||||
const std::vector<u32>& priorities) {
|
||||
if (shader_paths.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Batch loading {} shaders", shader_paths.size());
|
||||
|
||||
for (size_t i = 0; i < shader_paths.size(); ++i) {
|
||||
const auto& path = shader_paths[i];
|
||||
u32 priority = i < priorities.size() ? priorities[i] : SHADER_PRIORITY_NORMAL;
|
||||
|
||||
AsyncCompileShader(device, path, [this, path](VkShaderModule raw_module) {
|
||||
if (raw_module != VK_NULL_HANDLE) {
|
||||
// Note: We don't use the raw_module directly as we can't create a proper vk::ShaderModule wrapper.
|
||||
// Instead, we'll load the shader again using the LoadShader method which properly handles
|
||||
// the creation of the vk::ShaderModule.
|
||||
|
||||
// LoadShader will create the shader module and store it in shader_cache
|
||||
if (LoadShader(path)) {
|
||||
LOG_INFO(Render_Vulkan, "Loaded shader module for {}", path);
|
||||
} else {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to load shader module for {}", path);
|
||||
}
|
||||
}
|
||||
}, priority);
|
||||
}
|
||||
}
|
||||
|
||||
// Preload all shaders in a directory with automatic prioritization
|
||||
void ShaderManager::PreloadShaderDirectory(const std::string& directory_path) {
|
||||
if (!std::filesystem::exists(directory_path)) {
|
||||
LOG_WARNING(Render_Vulkan, "Shader directory does not exist: {}", directory_path);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::string> shader_paths;
|
||||
std::vector<u32> priorities;
|
||||
|
||||
for (const auto& entry : std::filesystem::directory_iterator(directory_path)) {
|
||||
if (entry.is_regular_file()) {
|
||||
const auto& path = entry.path().string();
|
||||
const auto extension = entry.path().extension().string();
|
||||
|
||||
// Only load shader files
|
||||
if (extension == ".spv" || extension == ".glsl" || extension == ".vert" ||
|
||||
extension == ".frag" || extension == ".comp") {
|
||||
|
||||
shader_paths.push_back(path);
|
||||
|
||||
// Assign priorities based on filename patterns
|
||||
// This is a simple heuristic and will be improved
|
||||
const auto filename = entry.path().filename().string();
|
||||
if (filename.find("ui") != std::string::npos ||
|
||||
filename.find("menu") != std::string::npos) {
|
||||
priorities.push_back(SHADER_PRIORITY_CRITICAL);
|
||||
} else if (filename.find("effect") != std::string::npos ||
|
||||
filename.find("post") != std::string::npos) {
|
||||
priorities.push_back(SHADER_PRIORITY_HIGH);
|
||||
} else {
|
||||
priorities.push_back(SHADER_PRIORITY_NORMAL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!shader_paths.empty()) {
|
||||
BatchLoadShaders(shader_paths, priorities);
|
||||
}
|
||||
}
|
||||
|
||||
// Get current compilation progress
|
||||
float ShaderManager::GetCompilationProgress() const {
|
||||
return GetShaderCompilationProgress();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -1,16 +1,9 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
@ -18,74 +11,7 @@
|
|||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class Scheduler;
|
||||
|
||||
// Priority constants for shader compilation
|
||||
extern const u32 SHADER_PRIORITY_CRITICAL;
|
||||
extern const u32 SHADER_PRIORITY_HIGH;
|
||||
extern const u32 SHADER_PRIORITY_NORMAL;
|
||||
extern const u32 SHADER_PRIORITY_LOW;
|
||||
|
||||
// Command queue system for asynchronous operations
|
||||
void InitializeCommandQueue();
|
||||
void ShutdownCommandQueue();
|
||||
void SubmitCommandToQueue(std::function<void()> command);
|
||||
void CommandQueueWorker();
|
||||
|
||||
// Thread pool management for shader compilation
|
||||
void InitializeThreadPool();
|
||||
void ShutdownThreadPool();
|
||||
void SubmitShaderCompilationTask(std::function<void()> task, u32 priority);
|
||||
float GetShaderCompilationProgress();
|
||||
bool IsShaderCompilationInProgress();
|
||||
|
||||
// Predictive shader loading
|
||||
void PredictShader(const std::string& shader_path);
|
||||
void PreloadPredictedShaders(const Device& device);
|
||||
|
||||
// Scheduler integration functions
|
||||
void SetGlobalScheduler(Scheduler* scheduler);
|
||||
void SubmitToScheduler(std::function<void(vk::CommandBuffer)> command);
|
||||
u64 FlushScheduler(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
|
||||
void ProcessAllCommands();
|
||||
|
||||
vk::ShaderModule BuildShader(const Device& device, std::span<const u32> code);
|
||||
|
||||
// Enhanced shader functionality
|
||||
bool IsShaderValid(VkShaderModule shader_module);
|
||||
|
||||
void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
||||
std::function<void(VkShaderModule)> callback, u32 priority);
|
||||
|
||||
void AsyncCompileShader(const Device& device, const std::string& shader_path,
|
||||
std::function<void(VkShaderModule)> callback);
|
||||
|
||||
class ShaderManager {
|
||||
public:
|
||||
explicit ShaderManager(const Device& device);
|
||||
~ShaderManager();
|
||||
|
||||
VkShaderModule GetShaderModule(const std::string& shader_path);
|
||||
void ReloadShader(const std::string& shader_path);
|
||||
bool LoadShader(const std::string& shader_path);
|
||||
void WaitForCompilation();
|
||||
|
||||
// Enhanced shader management
|
||||
void BatchLoadShaders(const std::vector<std::string>& shader_paths,
|
||||
const std::vector<u32>& priorities);
|
||||
void PreloadShaderDirectory(const std::string& directory_path);
|
||||
float GetCompilationProgress() const;
|
||||
|
||||
// Batch process multiple shaders in parallel
|
||||
void PreloadShaders(const std::vector<std::string>& shader_paths);
|
||||
|
||||
// Integrate with Citron's scheduler
|
||||
void SetScheduler(Scheduler* scheduler);
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
std::mutex shader_mutex;
|
||||
std::unordered_map<std::string, vk::ShaderModule> shader_cache;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <algorithm>
|
||||
|
@ -30,10 +29,6 @@
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
// TextureCacheManager implementations to fix linker errors
|
||||
TextureCacheManager::TextureCacheManager() = default;
|
||||
TextureCacheManager::~TextureCacheManager() = default;
|
||||
|
||||
using Tegra::Engines::Fermi2D;
|
||||
using Tegra::Texture::SwizzleSource;
|
||||
using Tegra::Texture::TextureMipmapFilter;
|
||||
|
@ -1193,171 +1188,69 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
|
|||
}
|
||||
|
||||
void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
|
||||
if (!dst->RenderPass()) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch (dst_view.format) {
|
||||
case PixelFormat::D24_UNORM_S8_UINT:
|
||||
// Handle sRGB source formats
|
||||
if (src_view.format == PixelFormat::A8B8G8R8_SRGB ||
|
||||
src_view.format == PixelFormat::B8G8R8A8_SRGB) {
|
||||
// Verify format support before conversion
|
||||
if (device.IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT,
|
||||
FormatType::Optimal)) {
|
||||
return blit_image_helper.ConvertABGR8SRGBToD24S8(dst, src_view);
|
||||
} else {
|
||||
// Fallback to regular ABGR8 conversion if sRGB not supported
|
||||
return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view);
|
||||
case PixelFormat::R16_UNORM:
|
||||
if (src_view.format == PixelFormat::D16_UNORM) {
|
||||
return blit_image_helper.ConvertD16ToR16(dst, src_view);
|
||||
}
|
||||
break;
|
||||
case PixelFormat::A8B8G8R8_SRGB:
|
||||
if (src_view.format == PixelFormat::D32_FLOAT) {
|
||||
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
|
||||
}
|
||||
break;
|
||||
case PixelFormat::A8B8G8R8_UNORM:
|
||||
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
|
||||
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
|
||||
}
|
||||
if (src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
|
||||
return blit_image_helper.ConvertS8D24ToABGR8(dst, src_view);
|
||||
}
|
||||
if (src_view.format == PixelFormat::D32_FLOAT) {
|
||||
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
|
||||
}
|
||||
break;
|
||||
case PixelFormat::B8G8R8A8_SRGB:
|
||||
if (src_view.format == PixelFormat::D32_FLOAT) {
|
||||
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
|
||||
}
|
||||
break;
|
||||
case PixelFormat::B8G8R8A8_UNORM:
|
||||
if (src_view.format == PixelFormat::D32_FLOAT) {
|
||||
return blit_image_helper.ConvertD32FToABGR8(dst, src_view);
|
||||
}
|
||||
break;
|
||||
case PixelFormat::R32_FLOAT:
|
||||
if (src_view.format == PixelFormat::D32_FLOAT) {
|
||||
return blit_image_helper.ConvertD32ToR32(dst, src_view);
|
||||
}
|
||||
break;
|
||||
case PixelFormat::D16_UNORM:
|
||||
if (src_view.format == PixelFormat::R16_UNORM) {
|
||||
return blit_image_helper.ConvertR16ToD16(dst, src_view);
|
||||
}
|
||||
break;
|
||||
case PixelFormat::S8_UINT_D24_UNORM:
|
||||
if (src_view.format == PixelFormat::A8B8G8R8_UNORM ||
|
||||
src_view.format == PixelFormat::B8G8R8A8_UNORM) {
|
||||
return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view);
|
||||
}
|
||||
break;
|
||||
|
||||
case PixelFormat::A8B8G8R8_UNORM:
|
||||
case PixelFormat::A8B8G8R8_SNORM:
|
||||
case PixelFormat::A8B8G8R8_SINT:
|
||||
case PixelFormat::A8B8G8R8_UINT:
|
||||
case PixelFormat::R5G6B5_UNORM:
|
||||
case PixelFormat::B5G6R5_UNORM:
|
||||
case PixelFormat::A1R5G5B5_UNORM:
|
||||
case PixelFormat::A2B10G10R10_UNORM:
|
||||
case PixelFormat::A2B10G10R10_UINT:
|
||||
case PixelFormat::A2R10G10B10_UNORM:
|
||||
case PixelFormat::A1B5G5R5_UNORM:
|
||||
case PixelFormat::A5B5G5R1_UNORM:
|
||||
case PixelFormat::R8_UNORM:
|
||||
case PixelFormat::R8_SNORM:
|
||||
case PixelFormat::R8_SINT:
|
||||
case PixelFormat::R8_UINT:
|
||||
case PixelFormat::R16G16B16A16_FLOAT:
|
||||
case PixelFormat::R16G16B16A16_UNORM:
|
||||
case PixelFormat::R16G16B16A16_SNORM:
|
||||
case PixelFormat::R16G16B16A16_SINT:
|
||||
case PixelFormat::R16G16B16A16_UINT:
|
||||
case PixelFormat::B10G11R11_FLOAT:
|
||||
case PixelFormat::R32G32B32A32_UINT:
|
||||
case PixelFormat::BC1_RGBA_UNORM:
|
||||
case PixelFormat::BC2_UNORM:
|
||||
case PixelFormat::BC3_UNORM:
|
||||
case PixelFormat::BC4_UNORM:
|
||||
case PixelFormat::BC4_SNORM:
|
||||
case PixelFormat::BC5_UNORM:
|
||||
case PixelFormat::BC5_SNORM:
|
||||
case PixelFormat::BC7_UNORM:
|
||||
case PixelFormat::BC6H_UFLOAT:
|
||||
case PixelFormat::BC6H_SFLOAT:
|
||||
case PixelFormat::ASTC_2D_4X4_UNORM:
|
||||
case PixelFormat::B8G8R8A8_UNORM:
|
||||
case PixelFormat::R32G32B32A32_FLOAT:
|
||||
case PixelFormat::R32G32B32A32_SINT:
|
||||
case PixelFormat::R32G32_FLOAT:
|
||||
case PixelFormat::R32G32_SINT:
|
||||
case PixelFormat::R32_FLOAT:
|
||||
case PixelFormat::R16_FLOAT:
|
||||
case PixelFormat::R16_UNORM:
|
||||
case PixelFormat::R16_SNORM:
|
||||
case PixelFormat::R16_UINT:
|
||||
case PixelFormat::R16_SINT:
|
||||
case PixelFormat::R16G16_UNORM:
|
||||
case PixelFormat::R16G16_FLOAT:
|
||||
case PixelFormat::R16G16_UINT:
|
||||
case PixelFormat::R16G16_SINT:
|
||||
case PixelFormat::R16G16_SNORM:
|
||||
case PixelFormat::R32G32B32_FLOAT:
|
||||
case PixelFormat::A8B8G8R8_SRGB:
|
||||
case PixelFormat::R8G8_UNORM:
|
||||
case PixelFormat::R8G8_SNORM:
|
||||
case PixelFormat::R8G8_SINT:
|
||||
case PixelFormat::R8G8_UINT:
|
||||
case PixelFormat::R32G32_UINT:
|
||||
case PixelFormat::R16G16B16X16_FLOAT:
|
||||
case PixelFormat::R32_UINT:
|
||||
case PixelFormat::R32_SINT:
|
||||
case PixelFormat::ASTC_2D_8X8_UNORM:
|
||||
case PixelFormat::ASTC_2D_8X5_UNORM:
|
||||
case PixelFormat::ASTC_2D_5X4_UNORM:
|
||||
case PixelFormat::B8G8R8A8_SRGB:
|
||||
case PixelFormat::BC1_RGBA_SRGB:
|
||||
case PixelFormat::BC2_SRGB:
|
||||
case PixelFormat::BC3_SRGB:
|
||||
case PixelFormat::BC7_SRGB:
|
||||
case PixelFormat::A4B4G4R4_UNORM:
|
||||
case PixelFormat::G4R4_UNORM:
|
||||
case PixelFormat::ASTC_2D_4X4_SRGB:
|
||||
case PixelFormat::ASTC_2D_8X8_SRGB:
|
||||
case PixelFormat::ASTC_2D_8X5_SRGB:
|
||||
case PixelFormat::ASTC_2D_5X4_SRGB:
|
||||
case PixelFormat::ASTC_2D_5X5_UNORM:
|
||||
case PixelFormat::ASTC_2D_5X5_SRGB:
|
||||
case PixelFormat::ASTC_2D_10X8_UNORM:
|
||||
case PixelFormat::ASTC_2D_10X8_SRGB:
|
||||
case PixelFormat::ASTC_2D_6X6_UNORM:
|
||||
case PixelFormat::ASTC_2D_6X6_SRGB:
|
||||
case PixelFormat::ASTC_2D_10X6_UNORM:
|
||||
case PixelFormat::ASTC_2D_10X6_SRGB:
|
||||
case PixelFormat::ASTC_2D_10X5_UNORM:
|
||||
case PixelFormat::ASTC_2D_10X5_SRGB:
|
||||
case PixelFormat::ASTC_2D_10X10_UNORM:
|
||||
case PixelFormat::ASTC_2D_10X10_SRGB:
|
||||
case PixelFormat::ASTC_2D_12X10_UNORM:
|
||||
case PixelFormat::ASTC_2D_12X10_SRGB:
|
||||
case PixelFormat::ASTC_2D_12X12_UNORM:
|
||||
case PixelFormat::ASTC_2D_12X12_SRGB:
|
||||
case PixelFormat::ASTC_2D_8X6_UNORM:
|
||||
case PixelFormat::ASTC_2D_8X6_SRGB:
|
||||
case PixelFormat::ASTC_2D_6X5_UNORM:
|
||||
case PixelFormat::ASTC_2D_6X5_SRGB:
|
||||
case PixelFormat::E5B9G9R9_FLOAT:
|
||||
case PixelFormat::D32_FLOAT:
|
||||
case PixelFormat::D16_UNORM:
|
||||
case PixelFormat::X8_D24_UNORM:
|
||||
case PixelFormat::S8_UINT:
|
||||
case PixelFormat::S8_UINT_D24_UNORM:
|
||||
case PixelFormat::D32_FLOAT_S8_UINT:
|
||||
case PixelFormat::Invalid:
|
||||
if (src_view.format == PixelFormat::A8B8G8R8_UNORM ||
|
||||
src_view.format == PixelFormat::B8G8R8A8_UNORM ||
|
||||
src_view.format == PixelFormat::A8B8G8R8_SRGB ||
|
||||
src_view.format == PixelFormat::B8G8R8A8_SRGB) {
|
||||
return blit_image_helper.ConvertABGR8ToD32F(dst, src_view);
|
||||
}
|
||||
if (src_view.format == PixelFormat::R32_FLOAT) {
|
||||
return blit_image_helper.ConvertR32ToD32(dst, src_view);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
VkFormat TextureCacheRuntime::GetSupportedFormat(VkFormat requested_format,
|
||||
VkFormatFeatureFlags required_features) const {
|
||||
if (requested_format == VK_FORMAT_A8B8G8R8_SRGB_PACK32 &&
|
||||
(required_features & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
|
||||
// Force valid depth format when sRGB requested in depth context
|
||||
return VK_FORMAT_D24_UNORM_S8_UINT;
|
||||
}
|
||||
return requested_format;
|
||||
}
|
||||
|
||||
// Helper functions for format compatibility checks
|
||||
bool TextureCacheRuntime::IsFormatDitherable(PixelFormat format) {
|
||||
switch (format) {
|
||||
case PixelFormat::B8G8R8A8_UNORM:
|
||||
case PixelFormat::A8B8G8R8_UNORM:
|
||||
case PixelFormat::B8G8R8A8_SRGB:
|
||||
case PixelFormat::A8B8G8R8_SRGB:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool TextureCacheRuntime::IsFormatScalable(PixelFormat format) {
|
||||
switch (format) {
|
||||
case PixelFormat::B8G8R8A8_UNORM:
|
||||
case PixelFormat::A8B8G8R8_UNORM:
|
||||
case PixelFormat::R16G16B16A16_FLOAT:
|
||||
case PixelFormat::R32G32B32A32_FLOAT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format);
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
||||
|
@ -1449,224 +1342,13 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
|||
});
|
||||
}
|
||||
|
||||
void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies) {
|
||||
void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
|
||||
std::span<const VideoCommon::ImageCopy> copies) {
|
||||
const bool msaa_to_non_msaa = src.info.num_samples > 1 && dst.info.num_samples == 1;
|
||||
if (!msaa_to_non_msaa) {
|
||||
return CopyImage(dst, src, copies);
|
||||
}
|
||||
|
||||
// Convert PixelFormat to VkFormat using Maxwell format conversion
|
||||
const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, false, src.info.format).format;
|
||||
|
||||
// Check if format supports MSAA resolve
|
||||
const auto format_properties = device.GetPhysical().GetFormatProperties(vk_format);
|
||||
if (!(format_properties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
|
||||
LOG_WARNING(Render_Vulkan, "Format does not support MSAA resolve, falling back to compute shader");
|
||||
if (msaa_copy_pass) {
|
||||
return msaa_copy_pass->CopyImage(dst, src, copies, true);
|
||||
return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa);
|
||||
}
|
||||
UNIMPLEMENTED_MSG("MSAA resolve not supported for format and no compute fallback available");
|
||||
return;
|
||||
}
|
||||
|
||||
const VkImage dst_image = dst.Handle();
|
||||
const VkImage src_image = src.Handle();
|
||||
const VkImageAspectFlags aspect_mask = dst.AspectMask();
|
||||
|
||||
// Create temporary resolve image with proper memory allocation
|
||||
const VkImageCreateInfo resolve_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = vk_format,
|
||||
.extent = {
|
||||
.width = src.info.size.width,
|
||||
.height = src.info.size.height,
|
||||
.depth = src.info.size.depth,
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
};
|
||||
|
||||
const auto resolve_image_holder = memory_allocator.CreateImage(resolve_ci);
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([src_image, dst_image, resolve_image = *resolve_image_holder,
|
||||
copies, aspect_mask](vk::CommandBuffer cmdbuf) {
|
||||
for (const auto& copy : copies) {
|
||||
const VkExtent3D extent{
|
||||
.width = static_cast<u32>(copy.extent.width),
|
||||
.height = static_cast<u32>(copy.extent.height),
|
||||
.depth = static_cast<u32>(copy.extent.depth),
|
||||
};
|
||||
|
||||
// First resolve the MSAA source to the temporary image
|
||||
const VkImageResolve resolve_region{
|
||||
.srcSubresource = {
|
||||
.aspectMask = aspect_mask,
|
||||
.mipLevel = static_cast<u32>(copy.src_subresource.base_level),
|
||||
.baseArrayLayer = static_cast<u32>(copy.src_subresource.base_layer),
|
||||
.layerCount = static_cast<u32>(copy.src_subresource.num_layers),
|
||||
},
|
||||
.srcOffset = {
|
||||
static_cast<s32>(copy.src_offset.x),
|
||||
static_cast<s32>(copy.src_offset.y),
|
||||
static_cast<s32>(copy.src_offset.z),
|
||||
},
|
||||
.dstSubresource = {
|
||||
.aspectMask = aspect_mask,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.dstOffset = {0, 0, 0},
|
||||
.extent = extent,
|
||||
};
|
||||
|
||||
const std::array pre_barriers{
|
||||
VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.image = src_image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = static_cast<u32>(copy.src_subresource.base_level),
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = static_cast<u32>(copy.src_subresource.base_layer),
|
||||
.layerCount = static_cast<u32>(copy.src_subresource.num_layers),
|
||||
},
|
||||
},
|
||||
VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.image = resolve_image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr,
|
||||
pre_barriers);
|
||||
|
||||
// Resolve MSAA image
|
||||
cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
resolve_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
resolve_region);
|
||||
|
||||
// Now copy from resolved image to destination
|
||||
const VkImageCopy copy_region{
|
||||
.srcSubresource = {
|
||||
.aspectMask = aspect_mask,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.srcOffset = {0, 0, 0},
|
||||
.dstSubresource = {
|
||||
.aspectMask = aspect_mask,
|
||||
.mipLevel = static_cast<u32>(copy.dst_subresource.base_level),
|
||||
.baseArrayLayer = static_cast<u32>(copy.dst_subresource.base_layer),
|
||||
.layerCount = static_cast<u32>(copy.dst_subresource.num_layers),
|
||||
},
|
||||
.dstOffset = {
|
||||
static_cast<s32>(copy.dst_offset.x),
|
||||
static_cast<s32>(copy.dst_offset.y),
|
||||
static_cast<s32>(copy.dst_offset.z),
|
||||
},
|
||||
.extent = extent,
|
||||
};
|
||||
|
||||
std::array<VkImageMemoryBarrier, 2> mid_barriers{{
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.image = resolve_image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
},
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.image = dst_image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = static_cast<u32>(copy.dst_subresource.base_level),
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = static_cast<u32>(copy.dst_subresource.base_layer),
|
||||
.layerCount = static_cast<u32>(copy.dst_subresource.num_layers),
|
||||
},
|
||||
},
|
||||
}};
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr,
|
||||
mid_barriers);
|
||||
|
||||
// Copy from resolved image to destination
|
||||
cmdbuf.CopyImage(resolve_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
vk::Span{©_region, 1});
|
||||
|
||||
// Final transition back to general layout
|
||||
const VkImageMemoryBarrier final_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.image = dst_image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = static_cast<u32>(copy.dst_subresource.base_level),
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = static_cast<u32>(copy.dst_subresource.base_layer),
|
||||
.layerCount = static_cast<u32>(copy.dst_subresource.num_layers),
|
||||
},
|
||||
};
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0,
|
||||
vk::Span<VkMemoryBarrier>{},
|
||||
vk::Span<VkBufferMemoryBarrier>{},
|
||||
vk::Span{&final_barrier, 1});
|
||||
}
|
||||
});
|
||||
UNIMPLEMENTED_MSG("Copying images with different samples is not supported.");
|
||||
}
|
||||
|
||||
u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
|
||||
|
@ -2098,7 +1780,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
|||
slot_images = &slot_imgs;
|
||||
}
|
||||
|
||||
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info,
|
||||
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
|
||||
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
|
||||
: VideoCommon::ImageViewBase{info, view_info, gpu_addr_},
|
||||
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
|
||||
|
|
|
@ -1,14 +1,9 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "video_core/texture_cache/texture_cache_base.h"
|
||||
|
||||
|
@ -42,22 +37,6 @@ class RenderPassCache;
|
|||
class StagingBufferPool;
|
||||
class Scheduler;
|
||||
|
||||
// Enhanced texture management for better error handling and thread safety
|
||||
class TextureCacheManager {
|
||||
public:
|
||||
explicit TextureCacheManager();
|
||||
~TextureCacheManager();
|
||||
|
||||
VkImage GetTextureFromCache(const std::string& texture_path);
|
||||
void ReloadTexture(const std::string& texture_path);
|
||||
bool IsTextureLoadedCorrectly(VkImage texture);
|
||||
void HandleTextureCache();
|
||||
|
||||
private:
|
||||
std::mutex texture_mutex;
|
||||
std::unordered_map<std::string, VkImage> texture_cache;
|
||||
};
|
||||
|
||||
class TextureCacheRuntime {
|
||||
public:
|
||||
explicit TextureCacheRuntime(const Device& device_, Scheduler& scheduler_,
|
||||
|
@ -133,15 +112,6 @@ public:
|
|||
|
||||
void BarrierFeedbackLoop();
|
||||
|
||||
bool IsFormatDitherable(VideoCore::Surface::PixelFormat format);
|
||||
bool IsFormatScalable(VideoCore::Surface::PixelFormat format);
|
||||
|
||||
VkFormat GetSupportedFormat(VkFormat requested_format, VkFormatFeatureFlags required_features) const;
|
||||
|
||||
// Enhanced texture error handling
|
||||
bool IsTextureLoadedCorrectly(VkImage texture);
|
||||
void HandleTextureError(const std::string& texture_path);
|
||||
|
||||
const Device& device;
|
||||
Scheduler& scheduler;
|
||||
MemoryAllocator& memory_allocator;
|
||||
|
@ -153,9 +123,6 @@ public:
|
|||
const Settings::ResolutionScalingInfo& resolution;
|
||||
std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
|
||||
|
||||
// Enhanced texture management
|
||||
TextureCacheManager texture_cache_manager;
|
||||
|
||||
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
|
||||
std::array<vk::Buffer, indexing_slots> buffers{};
|
||||
};
|
||||
|
|
|
@ -1,146 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_manager.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
TextureManager::TextureManager(const Device& device_, MemoryAllocator& memory_allocator_)
|
||||
: device(device_), memory_allocator(memory_allocator_) {
|
||||
|
||||
// Create a default texture for fallback in case of errors
|
||||
default_texture = CreateDefaultTexture();
|
||||
}
|
||||
|
||||
TextureManager::~TextureManager() {
|
||||
std::lock_guard<std::mutex> lock(texture_mutex);
|
||||
// Clear all cached textures
|
||||
texture_cache.clear();
|
||||
|
||||
// Default texture will be cleaned up automatically by vk::Image's destructor
|
||||
}
|
||||
|
||||
VkImage TextureManager::GetTexture(const std::string& texture_path) {
|
||||
std::lock_guard<std::mutex> lock(texture_mutex);
|
||||
|
||||
// Check if the texture is already in the cache
|
||||
auto it = texture_cache.find(texture_path);
|
||||
if (it != texture_cache.end()) {
|
||||
return *it->second;
|
||||
}
|
||||
|
||||
// Load the texture and add it to the cache
|
||||
vk::Image new_texture = LoadTexture(texture_path);
|
||||
if (new_texture) {
|
||||
VkImage raw_handle = *new_texture;
|
||||
texture_cache.emplace(texture_path, std::move(new_texture));
|
||||
return raw_handle;
|
||||
}
|
||||
|
||||
// If loading fails, return the default texture if it exists
|
||||
LOG_WARNING(Render_Vulkan, "Failed to load texture: {}, using default", texture_path);
|
||||
if (default_texture.has_value()) {
|
||||
return *(*default_texture);
|
||||
}
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
void TextureManager::ReloadTexture(const std::string& texture_path) {
|
||||
std::lock_guard<std::mutex> lock(texture_mutex);
|
||||
|
||||
// Remove the texture from cache if it exists
|
||||
auto it = texture_cache.find(texture_path);
|
||||
if (it != texture_cache.end()) {
|
||||
LOG_INFO(Render_Vulkan, "Reloading texture: {}", texture_path);
|
||||
texture_cache.erase(it);
|
||||
}
|
||||
|
||||
// The texture will be reloaded on next GetTexture call
|
||||
}
|
||||
|
||||
bool TextureManager::IsTextureLoadedCorrectly(VkImage texture) {
|
||||
// Check if the texture handle is valid
|
||||
static const VkImage null_handle = VK_NULL_HANDLE;
|
||||
return texture != null_handle;
|
||||
}
|
||||
|
||||
void TextureManager::CleanupTextureCache() {
|
||||
std::lock_guard<std::mutex> lock(texture_mutex);
|
||||
|
||||
// TODO: track usage and remove unused textures [ZEP]
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Handling texture cache cleanup, current size: {}", texture_cache.size());
|
||||
}
|
||||
|
||||
void TextureManager::HandleTextureRendering(const std::string& texture_path,
|
||||
std::function<void(VkImage)> render_callback) {
|
||||
VkImage texture = GetTexture(texture_path);
|
||||
|
||||
if (!IsTextureLoadedCorrectly(texture)) {
|
||||
LOG_ERROR(Render_Vulkan, "Texture failed to load correctly: {}, attempting reload", texture_path);
|
||||
ReloadTexture(texture_path);
|
||||
texture = GetTexture(texture_path);
|
||||
}
|
||||
|
||||
// Execute the rendering callback with the texture
|
||||
render_callback(texture);
|
||||
}
|
||||
|
||||
vk::Image TextureManager::LoadTexture(const std::string& texture_path) {
|
||||
// TODO: load image data from disk
|
||||
// and create a proper Vulkan texture [ZEP]
|
||||
|
||||
if (!std::filesystem::exists(texture_path)) {
|
||||
LOG_ERROR(Render_Vulkan, "Texture file not found: {}", texture_path);
|
||||
return {};
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Loaded texture: {}", texture_path);
|
||||
|
||||
// TODO: create an actual VkImage [ZEP]
|
||||
return CreateDefaultTexture();
|
||||
} catch (const std::exception& e) {
|
||||
LOG_ERROR(Render_Vulkan, "Error loading texture {}: {}", texture_path, e.what());
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
vk::Image TextureManager::CreateDefaultTexture() {
|
||||
// Create a small default texture (1x1 pixel) to use as a fallback
|
||||
// const VkExtent2D extent{1, 1};
|
||||
|
||||
// Create image
|
||||
// Avoid unused variable warning by commenting out the unused struct
|
||||
// VkImageCreateInfo image_ci{
|
||||
// .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
// .pNext = nullptr,
|
||||
// .flags = 0,
|
||||
// .imageType = VK_IMAGE_TYPE_2D,
|
||||
// .format = texture_format,
|
||||
// .extent = {extent.width, extent.height, 1},
|
||||
// .mipLevels = 1,
|
||||
// .arrayLayers = 1,
|
||||
// .samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
// .tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
// .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
// .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
// .queueFamilyIndexCount = 0,
|
||||
// .pQueueFamilyIndices = nullptr,
|
||||
// .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
// };
|
||||
|
||||
// TODO: create an actual VkImage [ZEP]
|
||||
LOG_INFO(Render_Vulkan, "Created default fallback texture");
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
|
@ -1,57 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <functional>
|
||||
#include <atomic>
|
||||
#include <optional>
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class MemoryAllocator;
|
||||
|
||||
// Enhanced texture manager for better error handling and thread safety
|
||||
class TextureManager {
|
||||
public:
|
||||
explicit TextureManager(const Device& device, MemoryAllocator& memory_allocator);
|
||||
~TextureManager();
|
||||
|
||||
// Get a texture from the cache, loading it if necessary
|
||||
VkImage GetTexture(const std::string& texture_path);
|
||||
|
||||
// Force a texture to reload from disk
|
||||
void ReloadTexture(const std::string& texture_path);
|
||||
|
||||
// Check if a texture is loaded correctly
|
||||
bool IsTextureLoadedCorrectly(VkImage texture);
|
||||
|
||||
// Remove old textures from the cache
|
||||
void CleanupTextureCache();
|
||||
|
||||
// Handle texture rendering, with automatic reload if needed
|
||||
void HandleTextureRendering(const std::string& texture_path,
|
||||
std::function<void(VkImage)> render_callback);
|
||||
|
||||
private:
|
||||
// Load a texture from disk and create a Vulkan image
|
||||
vk::Image LoadTexture(const std::string& texture_path);
|
||||
|
||||
// Create a default texture to use in case of errors
|
||||
vk::Image CreateDefaultTexture();
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
std::mutex texture_mutex;
|
||||
std::unordered_map<std::string, vk::Image> texture_cache;
|
||||
std::optional<vk::Image> default_texture;
|
||||
VkFormat texture_format = VK_FORMAT_B8G8R8A8_SRGB;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -1,446 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/vulkan_common/hybrid_memory.h"
|
||||
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <poll.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/userfaultfd.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
void PredictiveReuseManager::RecordUsage(u64 address, u64 size, bool write_access) {
|
||||
std::lock_guard<std::mutex> guard(mutex);
|
||||
|
||||
// Add to history, removing oldest entries if we're past max_history
|
||||
access_history.push_back({address, size, write_access, current_timestamp++});
|
||||
if (access_history.size() > max_history) {
|
||||
access_history.erase(access_history.begin());
|
||||
}
|
||||
}
|
||||
|
||||
bool PredictiveReuseManager::IsHotRegion(u64 address, u64 size) const {
|
||||
std::lock_guard<std::mutex> guard(mutex);
|
||||
|
||||
// Check if this memory region has been accessed frequently
|
||||
const u64 end_address = address + size;
|
||||
int access_count = 0;
|
||||
|
||||
for (const auto& access : access_history) {
|
||||
const u64 access_end = access.address + access.size;
|
||||
|
||||
// Check for overlap
|
||||
if (!(end_address <= access.address || address >= access_end)) {
|
||||
access_count++;
|
||||
}
|
||||
}
|
||||
|
||||
// Consider a region "hot" if it has been accessed in at least 10% of recent accesses
|
||||
return access_count >= static_cast<int>(std::max<size_t>(1, max_history / 10));
|
||||
}
|
||||
|
||||
void PredictiveReuseManager::EvictRegion(u64 address, u64 size) {
|
||||
std::lock_guard<std::mutex> guard(mutex);
|
||||
|
||||
// Remove any history entries that overlap with this region
|
||||
const u64 end_address = address + size;
|
||||
|
||||
access_history.erase(
|
||||
std::remove_if(access_history.begin(), access_history.end(),
|
||||
[address, end_address](const MemoryAccess& access) {
|
||||
const u64 access_end = access.address + access.size;
|
||||
// Check for overlap
|
||||
return !(end_address <= access.address || address >= access_end);
|
||||
}),
|
||||
access_history.end()
|
||||
);
|
||||
}
|
||||
|
||||
void PredictiveReuseManager::ClearHistory() {
|
||||
std::lock_guard<std::mutex> guard(mutex);
|
||||
access_history.clear();
|
||||
current_timestamp = 0;
|
||||
}
|
||||
|
||||
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
|
||||
void FaultManagedAllocator::Touch(size_t addr) {
|
||||
lru.remove(addr);
|
||||
lru.push_front(addr);
|
||||
dirty_set.insert(addr);
|
||||
}
|
||||
|
||||
void FaultManagedAllocator::EnforceLimit() {
|
||||
while (lru.size() > MaxPages) {
|
||||
size_t evict = lru.back();
|
||||
lru.pop_back();
|
||||
|
||||
auto it = page_map.find(evict);
|
||||
if (it != page_map.end()) {
|
||||
if (dirty_set.count(evict)) {
|
||||
// Compress and store dirty page before evicting
|
||||
std::vector<u8> compressed((u8*)it->second, (u8*)it->second + PageSize);
|
||||
compressed_store[evict] = std::move(compressed);
|
||||
dirty_set.erase(evict);
|
||||
}
|
||||
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
munmap(it->second, PageSize);
|
||||
#elif defined(_WIN32)
|
||||
VirtualFree(it->second, 0, MEM_RELEASE);
|
||||
#endif
|
||||
page_map.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void* FaultManagedAllocator::GetOrAlloc(size_t addr) {
|
||||
std::lock_guard<std::mutex> guard(lock);
|
||||
|
||||
if (page_map.count(addr)) {
|
||||
Touch(addr);
|
||||
return page_map[addr];
|
||||
}
|
||||
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
|
||||
if (mem == MAP_FAILED) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler");
|
||||
return nullptr;
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
void* mem = VirtualAlloc(nullptr, PageSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
|
||||
if (!mem) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to VirtualAlloc memory for fault handler");
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (compressed_store.count(addr)) {
|
||||
// Decompress stored page data
|
||||
std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size());
|
||||
compressed_store.erase(addr);
|
||||
} else {
|
||||
std::memset(mem, 0, PageSize);
|
||||
}
|
||||
|
||||
page_map[addr] = mem;
|
||||
lru.push_front(addr);
|
||||
dirty_set.insert(addr);
|
||||
EnforceLimit();
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
// Static member initialization
|
||||
FaultManagedAllocator* FaultManagedAllocator::current_instance = nullptr;
|
||||
|
||||
LONG WINAPI FaultManagedAllocator::VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info) {
|
||||
// Only handle access violations (page faults)
|
||||
if (exception_info->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) {
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
}
|
||||
|
||||
if (!current_instance) {
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
}
|
||||
|
||||
// Get the faulting address - use ULONG_PTR for Windows
|
||||
const ULONG_PTR fault_addr = static_cast<ULONG_PTR>(exception_info->ExceptionRecord->ExceptionInformation[1]);
|
||||
const ULONG_PTR base_addr = reinterpret_cast<ULONG_PTR>(current_instance->base_address);
|
||||
|
||||
// Check if the address is within our managed range
|
||||
if (fault_addr < base_addr ||
|
||||
fault_addr >= (base_addr + static_cast<ULONG_PTR>(current_instance->memory_size))) {
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
}
|
||||
|
||||
// Calculate the base address of the page
|
||||
const ULONG_PTR page_addr = fault_addr & ~(static_cast<ULONG_PTR>(PageSize) - 1);
|
||||
const size_t relative_addr = static_cast<size_t>(page_addr - base_addr);
|
||||
|
||||
// Handle the fault by allocating memory
|
||||
void* page = current_instance->GetOrAlloc(relative_addr);
|
||||
if (!page) {
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
}
|
||||
|
||||
// Copy the page data to the faulting address
|
||||
DWORD old_protect;
|
||||
void* target_addr = reinterpret_cast<void*>(page_addr);
|
||||
|
||||
// Make the target page writable
|
||||
if (VirtualProtect(target_addr, PageSize, PAGE_READWRITE, &old_protect)) {
|
||||
std::memcpy(target_addr, page, PageSize);
|
||||
// Restore original protection
|
||||
VirtualProtect(target_addr, PageSize, old_protect, &old_protect);
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
}
|
||||
|
||||
void FaultManagedAllocator::ExceptionHandlerThread() {
|
||||
while (running) {
|
||||
// Sleep to avoid busy waiting
|
||||
Sleep(10);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void FaultManagedAllocator::Initialize(void* base, size_t size) {
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
uffd = static_cast<int>(syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK));
|
||||
if (uffd < 0) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to create userfaultfd, fault handling disabled");
|
||||
return;
|
||||
}
|
||||
|
||||
struct uffdio_api api = { .api = UFFD_API };
|
||||
ioctl(uffd, UFFDIO_API, &api);
|
||||
|
||||
struct uffdio_register reg = {
|
||||
.range = { .start = (uintptr_t)base, .len = size },
|
||||
.mode = UFFDIO_REGISTER_MODE_MISSING
|
||||
};
|
||||
|
||||
if (ioctl(uffd, UFFDIO_REGISTER, ®) < 0) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to register memory range with userfaultfd");
|
||||
close(uffd);
|
||||
uffd = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
running = true;
|
||||
fault_handler = std::thread(&FaultManagedAllocator::FaultThread, this);
|
||||
#elif defined(_WIN32)
|
||||
// Setup Windows memory for fault handling
|
||||
base_address = base;
|
||||
memory_size = size;
|
||||
|
||||
// Reserve memory range but don't commit it yet - it will be demand-paged
|
||||
DWORD oldProtect;
|
||||
VirtualProtect(base, size, PAGE_NOACCESS, &oldProtect);
|
||||
|
||||
// Install a vectored exception handler
|
||||
current_instance = this;
|
||||
AddVectoredExceptionHandler(1, VectoredExceptionHandler);
|
||||
|
||||
running = true;
|
||||
exception_handler = std::thread(&FaultManagedAllocator::ExceptionHandlerThread, this);
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Windows fault-managed memory initialized at {:p}, size: {}",
|
||||
base, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
void FaultManagedAllocator::FaultThread() {
|
||||
struct pollfd pfd = { uffd, POLLIN, 0 };
|
||||
|
||||
while (running) {
|
||||
if (poll(&pfd, 1, 10) > 0) {
|
||||
struct uffd_msg msg;
|
||||
read(uffd, &msg, sizeof(msg));
|
||||
|
||||
if (msg.event == UFFD_EVENT_PAGEFAULT) {
|
||||
size_t addr = msg.arg.pagefault.address & ~(PageSize - 1);
|
||||
void* page = GetOrAlloc(addr);
|
||||
|
||||
if (page) {
|
||||
struct uffdio_copy copy = {
|
||||
.dst = (uintptr_t)addr,
|
||||
.src = (uintptr_t)page,
|
||||
.len = PageSize,
|
||||
.mode = 0
|
||||
};
|
||||
|
||||
ioctl(uffd, UFFDIO_COPY, ©);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void* FaultManagedAllocator::Translate(size_t addr) {
|
||||
std::lock_guard<std::mutex> guard(lock);
|
||||
|
||||
size_t base = addr & ~(PageSize - 1);
|
||||
if (!page_map.count(base)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Touch(base);
|
||||
return (u8*)page_map[base] + (addr % PageSize);
|
||||
}
|
||||
|
||||
void FaultManagedAllocator::SaveSnapshot(const std::string& path) {
|
||||
std::lock_guard<std::mutex> guard(lock);
|
||||
|
||||
std::ofstream out(path, std::ios::binary);
|
||||
if (!out) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to open snapshot file for writing: {}", path);
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto& [addr, mem] : page_map) {
|
||||
out.write(reinterpret_cast<const char*>(&addr), sizeof(addr));
|
||||
out.write(reinterpret_cast<const char*>(mem), PageSize);
|
||||
}
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Saved memory snapshot to {}", path);
|
||||
}
|
||||
|
||||
void FaultManagedAllocator::SaveDifferentialSnapshot(const std::string& path) {
|
||||
std::lock_guard<std::mutex> guard(lock);
|
||||
|
||||
std::ofstream out(path, std::ios::binary);
|
||||
if (!out) {
|
||||
LOG_ERROR(Render_Vulkan, "Failed to open diff snapshot file for writing: {}", path);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t dirty_count = 0;
|
||||
for (const auto& addr : dirty_set) {
|
||||
if (page_map.count(addr)) {
|
||||
out.write(reinterpret_cast<const char*>(&addr), sizeof(addr));
|
||||
out.write(reinterpret_cast<const char*>(page_map[addr]), PageSize);
|
||||
dirty_count++;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Saved differential snapshot to {} ({} dirty pages)",
|
||||
path, dirty_count);
|
||||
}
|
||||
|
||||
void FaultManagedAllocator::ClearDirtySet() {
|
||||
std::lock_guard<std::mutex> guard(lock);
|
||||
dirty_set.clear();
|
||||
LOG_DEBUG(Render_Vulkan, "Cleared dirty page tracking");
|
||||
}
|
||||
|
||||
FaultManagedAllocator::~FaultManagedAllocator() {
|
||||
running = false;
|
||||
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
if (fault_handler.joinable()) {
|
||||
fault_handler.join();
|
||||
}
|
||||
|
||||
for (auto& [addr, mem] : page_map) {
|
||||
munmap(mem, PageSize);
|
||||
}
|
||||
|
||||
if (uffd != -1) {
|
||||
close(uffd);
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
if (exception_handler.joinable()) {
|
||||
exception_handler.join();
|
||||
}
|
||||
|
||||
// Remove the vectored exception handler
|
||||
RemoveVectoredExceptionHandler(VectoredExceptionHandler);
|
||||
current_instance = nullptr;
|
||||
|
||||
for (auto& [addr, mem] : page_map) {
|
||||
VirtualFree(mem, 0, MEM_RELEASE);
|
||||
}
|
||||
|
||||
// Free the base memory if needed
|
||||
if (base_address) {
|
||||
VirtualFree(base_address, 0, MEM_RELEASE);
|
||||
base_address = nullptr;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif // defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
|
||||
|
||||
HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, size_t reuse_history)
|
||||
: device(device_), memory_allocator(allocator), reuse_manager(reuse_history) {
|
||||
}
|
||||
|
||||
HybridMemory::~HybridMemory() = default;
|
||||
|
||||
void HybridMemory::InitializeGuestMemory(void* base, size_t size) {
|
||||
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
|
||||
fmaa.Initialize(base, size);
|
||||
LOG_INFO(Render_Vulkan, "Initialized fault-managed guest memory at {:p}, size: {}",
|
||||
base, size);
|
||||
#else
|
||||
LOG_INFO(Render_Vulkan, "Fault-managed memory not supported on this platform");
|
||||
#endif
|
||||
}
|
||||
|
||||
void* HybridMemory::TranslateAddress(size_t addr) {
|
||||
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
|
||||
return fmaa.Translate(addr);
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
ComputeBuffer HybridMemory::CreateComputeBuffer(VkDeviceSize size, VkBufferUsageFlags usage,
|
||||
MemoryUsage memory_type) {
|
||||
ComputeBuffer buffer;
|
||||
buffer.size = size;
|
||||
|
||||
VkBufferCreateInfo buffer_ci = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = size,
|
||||
.usage = usage | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
|
||||
// Using CreateBuffer directly handles memory allocation internally
|
||||
buffer.buffer = memory_allocator.CreateBuffer(buffer_ci, memory_type);
|
||||
|
||||
LOG_DEBUG(Render_Vulkan, "Created compute buffer: size={}, usage={:x}",
|
||||
size, usage);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void HybridMemory::SaveSnapshot(const std::string& path) {
|
||||
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
|
||||
fmaa.SaveSnapshot(path);
|
||||
#else
|
||||
LOG_ERROR(Render_Vulkan, "Memory snapshots not supported on this platform");
|
||||
#endif
|
||||
}
|
||||
|
||||
void HybridMemory::SaveDifferentialSnapshot(const std::string& path) {
|
||||
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
|
||||
fmaa.SaveDifferentialSnapshot(path);
|
||||
#else
|
||||
LOG_ERROR(Render_Vulkan, "Differential memory snapshots not supported on this platform");
|
||||
#endif
|
||||
}
|
||||
|
||||
void HybridMemory::ResetDirtyTracking() {
|
||||
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
|
||||
fmaa.ClearDirtySet();
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
|
@ -1,119 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <thread>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct ComputeBuffer {
|
||||
vk::Buffer buffer{};
|
||||
VkDeviceSize size = 0;
|
||||
};
|
||||
|
||||
class PredictiveReuseManager {
|
||||
public:
|
||||
explicit PredictiveReuseManager(size_t history_size) : max_history{history_size} {}
|
||||
|
||||
void RecordUsage(u64 address, u64 size, bool write_access);
|
||||
bool IsHotRegion(u64 address, u64 size) const;
|
||||
void EvictRegion(u64 address, u64 size);
|
||||
void ClearHistory();
|
||||
|
||||
private:
|
||||
struct MemoryAccess {
|
||||
u64 address;
|
||||
u64 size;
|
||||
bool write_access;
|
||||
u64 timestamp;
|
||||
};
|
||||
|
||||
std::vector<MemoryAccess> access_history;
|
||||
const size_t max_history;
|
||||
u64 current_timestamp{0};
|
||||
mutable std::mutex mutex;
|
||||
};
|
||||
|
||||
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
|
||||
class FaultManagedAllocator {
|
||||
public:
|
||||
static constexpr size_t PageSize = 0x1000;
|
||||
static constexpr size_t MaxPages = 16384;
|
||||
|
||||
void Initialize(void* base, size_t size);
|
||||
void* Translate(size_t addr);
|
||||
void SaveSnapshot(const std::string& path);
|
||||
void SaveDifferentialSnapshot(const std::string& path);
|
||||
void ClearDirtySet();
|
||||
~FaultManagedAllocator();
|
||||
|
||||
private:
|
||||
std::map<size_t, void*> page_map;
|
||||
std::list<size_t> lru;
|
||||
std::set<size_t> dirty_set;
|
||||
std::unordered_map<size_t, std::vector<u8>> compressed_store;
|
||||
std::mutex lock;
|
||||
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
int uffd = -1;
|
||||
std::atomic<bool> running{false};
|
||||
std::thread fault_handler;
|
||||
void FaultThread();
|
||||
#elif defined(_WIN32)
|
||||
void* base_address = nullptr;
|
||||
size_t memory_size = 0;
|
||||
HANDLE exception_port = nullptr;
|
||||
std::atomic<bool> running{false};
|
||||
std::thread exception_handler;
|
||||
void ExceptionHandlerThread();
|
||||
static LONG WINAPI VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info);
|
||||
static FaultManagedAllocator* current_instance;
|
||||
#endif
|
||||
|
||||
void Touch(size_t addr);
|
||||
void EnforceLimit();
|
||||
void* GetOrAlloc(size_t addr);
|
||||
};
|
||||
#endif
|
||||
|
||||
class HybridMemory {
|
||||
public:
|
||||
explicit HybridMemory(const Device& device, MemoryAllocator& allocator, size_t reuse_history = 32);
|
||||
~HybridMemory();
|
||||
|
||||
void InitializeGuestMemory(void* base, size_t size);
|
||||
void* TranslateAddress(size_t addr);
|
||||
|
||||
ComputeBuffer CreateComputeBuffer(VkDeviceSize size, VkBufferUsageFlags usage, MemoryUsage memory_type);
|
||||
|
||||
void SaveSnapshot(const std::string& path);
|
||||
void SaveDifferentialSnapshot(const std::string& path);
|
||||
void ResetDirtyTracking();
|
||||
|
||||
private:
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
PredictiveReuseManager reuse_manager;
|
||||
|
||||
#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32)
|
||||
FaultManagedAllocator fmaa;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
Loading…
Reference in a new issue