mirror of
https://git.citron-emu.org/citron/emu
synced 2025-05-12 19:00:37 +01:00
video_core: Improve texture cache memory management to prevent leaks
Implement several improvements to the texture cache memory management system to address memory leaks that occur in memory-intensive games like TOTK (Title ID 0100F2C0115B6000). These changes prevent the gradual memory increase that eventually leads to crashes or undefined behavior. Key improvements: - Enhance garbage collection with more aggressive cleanup thresholds - Add emergency resource cleanup for persistent high memory usage - Improve DeleteImage to ensure proper resource deallocation - Make DelayedDestructionRing thread-safe with proper mutex protection - Track consecutive high-memory frames to detect potential leaks - Add emergency cleanup mechanism for extreme memory pressure situations - Use proper type casting in std::max to fix compilation errors This should significantly improve stability during extended gameplay sessions with memory-intensive titles. Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
parent
e72d695115
commit
ff9c61e7c7
2 changed files with 135 additions and 15 deletions
|
@ -1,12 +1,15 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "common/logging/log.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
|
@ -14,18 +17,59 @@ namespace VideoCommon {
|
|||
template <typename T, size_t TICKS_TO_DESTROY>
|
||||
class DelayedDestructionRing {
|
||||
public:
|
||||
DelayedDestructionRing() = default;
|
||||
~DelayedDestructionRing() {
|
||||
// Ensure all resources are properly released when ring is destroyed
|
||||
for (auto& element_list : elements) {
|
||||
element_list.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void Tick() {
|
||||
std::scoped_lock lock{ring_mutex};
|
||||
|
||||
// Move to next position in the ring
|
||||
index = (index + 1) % TICKS_TO_DESTROY;
|
||||
elements[index].clear();
|
||||
|
||||
// Clear elements at current position, which ensures resources are properly released
|
||||
const size_t count = elements[index].size();
|
||||
if (count > 0) {
|
||||
// If more than a threshold of elements are being destroyed at once, log it
|
||||
if (count > 100) {
|
||||
LOG_DEBUG(Render_Vulkan, "Destroying {} delayed objects", count);
|
||||
}
|
||||
elements[index].clear();
|
||||
}
|
||||
}
|
||||
|
||||
void Push(T&& object) {
|
||||
std::scoped_lock lock{ring_mutex};
|
||||
elements[index].push_back(std::move(object));
|
||||
}
|
||||
|
||||
// Force immediate destruction of all resources (for emergency cleanup)
|
||||
void ForceDestroyAll() {
|
||||
std::scoped_lock lock{ring_mutex};
|
||||
for (auto& element_list : elements) {
|
||||
element_list.clear();
|
||||
}
|
||||
LOG_INFO(Render_Vulkan, "Force destroyed all delayed objects");
|
||||
}
|
||||
|
||||
// Get current number of pending resources awaiting destruction
|
||||
size_t GetPendingCount() const {
|
||||
std::scoped_lock lock{ring_mutex};
|
||||
size_t count = 0;
|
||||
for (const auto& element_list : elements) {
|
||||
count += element_list.size();
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t index = 0;
|
||||
std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
|
||||
mutable std::mutex ring_mutex;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
@ -80,8 +81,10 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||
const auto Configure = [&](bool allow_aggressive) {
|
||||
high_priority_mode = total_used_memory >= expected_memory;
|
||||
aggressive_mode = allow_aggressive && total_used_memory >= critical_memory;
|
||||
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
|
||||
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
|
||||
// Reduce ticks_to_destroy to be more aggressive in freeing memory
|
||||
ticks_to_destroy = aggressive_mode ? 5ULL : high_priority_mode ? 15ULL : 40ULL;
|
||||
// Increase num_iterations to clean up more resources at once for memory-intensive games
|
||||
num_iterations = aggressive_mode ? 60 : (high_priority_mode ? 30 : 15);
|
||||
};
|
||||
const auto Cleanup = [this, &num_iterations, &high_priority_mode,
|
||||
&aggressive_mode](ImageId image_id) {
|
||||
|
@ -95,7 +98,8 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||
// used by the async decoder thread.
|
||||
return false;
|
||||
}
|
||||
if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
|
||||
// Be more aggressive with cleanup for memory-intensive games
|
||||
if (!aggressive_mode && !high_priority_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
|
||||
return false;
|
||||
}
|
||||
const bool must_download =
|
||||
|
@ -118,19 +122,20 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
|
||||
if (total_used_memory < critical_memory) {
|
||||
if (aggressive_mode) {
|
||||
// Sink the aggresiveness.
|
||||
num_iterations >>= 2;
|
||||
// Sink the aggresiveness more gradually to prevent oscillation
|
||||
num_iterations = num_iterations * 3 / 4;
|
||||
aggressive_mode = false;
|
||||
return false;
|
||||
}
|
||||
if (high_priority_mode && total_used_memory < expected_memory) {
|
||||
num_iterations >>= 1;
|
||||
num_iterations = num_iterations * 3 / 4;
|
||||
high_priority_mode = false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Run garbage collection more frequently for memory-intensive games
|
||||
// Try to remove anything old enough and not high priority.
|
||||
Configure(false);
|
||||
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
|
||||
|
@ -138,19 +143,67 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||
// If pressure is still too high, prune aggressively.
|
||||
if (total_used_memory >= critical_memory) {
|
||||
Configure(true);
|
||||
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
|
||||
// Make a more thorough sweep with more aggressive settings
|
||||
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy / 2, Cleanup);
|
||||
|
||||
// If we're still in a critical memory situation, do emergency cleanup
|
||||
if (total_used_memory >= critical_memory + 50_MiB) {
|
||||
// Last resort emergency cleanup - reduce thresholds dramatically
|
||||
ticks_to_destroy = 1;
|
||||
num_iterations = 100;
|
||||
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TickFrame() {
|
||||
static u64 consecutive_high_memory_frames = 0;
|
||||
static constexpr u64 EMERGENCY_CLEANUP_THRESHOLD = 120; // ~2 seconds at 60 FPS
|
||||
|
||||
// If we can obtain the memory info, use it instead of the estimate.
|
||||
if (runtime.CanReportMemoryUsage()) {
|
||||
total_used_memory = runtime.GetDeviceMemoryUsage();
|
||||
}
|
||||
|
||||
// Track consecutive high memory frames to detect potential leaks
|
||||
if (total_used_memory > critical_memory) {
|
||||
consecutive_high_memory_frames++;
|
||||
if (consecutive_high_memory_frames > EMERGENCY_CLEANUP_THRESHOLD) {
|
||||
// Emergency situation - extreme memory pressure for extended time
|
||||
// This likely indicates a leak or insufficient cleanup
|
||||
LOG_WARNING(Render, "Emergency texture cache cleanup triggered after {} frames of high memory usage",
|
||||
consecutive_high_memory_frames);
|
||||
|
||||
// Force immediate cleanup of all pending resources
|
||||
sentenced_images.ForceDestroyAll();
|
||||
sentenced_framebuffers.ForceDestroyAll();
|
||||
sentenced_image_view.ForceDestroyAll();
|
||||
|
||||
// Do a forced garbage collection pass
|
||||
bool saved_value = has_deleted_images;
|
||||
RunGarbageCollector();
|
||||
has_deleted_images = saved_value;
|
||||
|
||||
// Reset counter but keep some pressure
|
||||
consecutive_high_memory_frames = 30;
|
||||
}
|
||||
else if (consecutive_high_memory_frames > 60) { // If high memory for >60 frames (~1 second)
|
||||
// Force a more aggressive cleanup cycle
|
||||
RunGarbageCollector();
|
||||
consecutive_high_memory_frames = 45; // Reset but keep some pressure
|
||||
}
|
||||
} else if (total_used_memory > expected_memory) {
|
||||
// Use u64(1) to ensure type compatibility, avoiding the ULL suffix
|
||||
consecutive_high_memory_frames = std::max(u64(1), consecutive_high_memory_frames / 2);
|
||||
} else {
|
||||
consecutive_high_memory_frames = 0;
|
||||
}
|
||||
|
||||
if (total_used_memory > minimum_memory) {
|
||||
RunGarbageCollector();
|
||||
}
|
||||
|
||||
sentenced_images.Tick();
|
||||
sentenced_framebuffers.Tick();
|
||||
sentenced_image_view.Tick();
|
||||
|
@ -2165,27 +2218,35 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
|||
if (image.HasScaled()) {
|
||||
total_used_memory -= GetScaledImageSizeBytes(image);
|
||||
}
|
||||
|
||||
// Calculate accurate memory usage for this image
|
||||
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
||||
if ((IsPixelFormatASTC(image.info.format) &&
|
||||
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
||||
True(image.flags & ImageFlagBits::Converted)) {
|
||||
tentative_size = TranscodedAstcSize(tentative_size, image.info.format);
|
||||
}
|
||||
|
||||
// Ensure memory usage is properly accounted for
|
||||
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
||||
|
||||
const GPUVAddr gpu_addr = image.gpu_addr;
|
||||
const auto alloc_it = image_allocs_table.find(gpu_addr);
|
||||
if (alloc_it == image_allocs_table.end()) {
|
||||
ASSERT_MSG(false, "Trying to delete an image alloc that does not exist in address 0x{:x}",
|
||||
gpu_addr);
|
||||
LOG_ERROR(HW_GPU, "Trying to delete an image alloc that does not exist in address 0x{:x}",
|
||||
gpu_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
const ImageAllocId alloc_id = alloc_it->second;
|
||||
std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
|
||||
const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
|
||||
if (alloc_image_it == alloc_images.end()) {
|
||||
ASSERT_MSG(false, "Trying to delete an image that does not exist");
|
||||
LOG_ERROR(HW_GPU, "Trying to delete an image that does not exist");
|
||||
return;
|
||||
}
|
||||
|
||||
// Ensure image is properly untracked and unregistered before deletion
|
||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
|
||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
|
||||
|
||||
|
@ -2196,6 +2257,8 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
|||
for (size_t rt = 0; rt < NUM_RT; ++rt) {
|
||||
dirty[Dirty::ColorBuffer0 + rt] = true;
|
||||
}
|
||||
|
||||
// Clear render target references
|
||||
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
|
||||
for (const ImageViewId image_view_id : image_view_ids) {
|
||||
std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
|
||||
|
@ -2203,9 +2266,12 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
|||
render_targets.depth_buffer_id = ImageViewId{};
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up references and dependencies
|
||||
RemoveImageViewReferences(image_view_ids);
|
||||
RemoveFramebuffers(image_view_ids);
|
||||
|
||||
// Handle aliased images
|
||||
for (const AliasedImage& alias : image.aliased_images) {
|
||||
ImageBase& other_image = slot_images[alias.id];
|
||||
[[maybe_unused]] const size_t num_removed_aliases =
|
||||
|
@ -2213,33 +2279,43 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
|||
return other_alias.id == image_id;
|
||||
});
|
||||
other_image.CheckAliasState();
|
||||
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
|
||||
num_removed_aliases);
|
||||
if (num_removed_aliases != 1) {
|
||||
LOG_WARNING(HW_GPU, "Invalid number of removed aliases: {}", num_removed_aliases);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle overlapping images
|
||||
for (const ImageId overlap_id : image.overlapping_images) {
|
||||
ImageBase& other_image = slot_images[overlap_id];
|
||||
[[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
|
||||
other_image.overlapping_images,
|
||||
[image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
|
||||
other_image.CheckBadOverlapState();
|
||||
ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
|
||||
num_removed_overlaps);
|
||||
if (num_removed_overlaps != 1) {
|
||||
LOG_WARNING(HW_GPU, "Invalid number of removed overlaps: {}", num_removed_overlaps);
|
||||
}
|
||||
}
|
||||
|
||||
// Free resources - either immediately or queue for delayed destruction
|
||||
for (const ImageViewId image_view_id : image_view_ids) {
|
||||
if (!immediate_delete) {
|
||||
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
|
||||
}
|
||||
slot_image_views.erase(image_view_id);
|
||||
}
|
||||
|
||||
if (!immediate_delete) {
|
||||
sentenced_images.Push(std::move(slot_images[image_id]));
|
||||
}
|
||||
slot_images.erase(image_id);
|
||||
|
||||
// Clean up allocation table
|
||||
alloc_images.erase(alloc_image_it);
|
||||
if (alloc_images.empty()) {
|
||||
image_allocs_table.erase(alloc_it);
|
||||
}
|
||||
|
||||
// Mark tables as invalidated
|
||||
for (size_t c : active_channel_ids) {
|
||||
auto& channel_info = channel_storage[c];
|
||||
if constexpr (ENABLE_VALIDATION) {
|
||||
|
|
Loading…
Reference in a new issue