video_core: Improve texture cache memory management to prevent leaks

Implement several improvements to the texture cache memory management system
to address memory leaks that occur in memory-intensive games like TOTK
(Title ID 0100F2C0115B6000). These changes prevent the gradual memory
increase that eventually leads to crashes or undefined behavior.

Key improvements:
- Enhance garbage collection with more aggressive cleanup thresholds
- Add emergency resource cleanup for persistent high memory usage
- Improve DeleteImage to ensure proper resource deallocation
- Make DelayedDestructionRing thread-safe with proper mutex protection
- Track consecutive high-memory frames to detect potential leaks
- Add emergency cleanup mechanism for extreme memory pressure situations
- Use proper type casting in std::max to fix compilation errors

This should significantly improve stability during extended gameplay
sessions with memory-intensive titles.

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron 2025-04-20 17:39:14 +10:00
parent e72d695115
commit ff9c61e7c7
2 changed files with 135 additions and 15 deletions

View file

@ -1,12 +1,15 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-FileCopyrightText: 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <cstddef>
#include <mutex>
#include <utility>
#include <vector>
#include "common/logging/log.h"
namespace VideoCommon {
@ -14,18 +17,59 @@ namespace VideoCommon {
template <typename T, size_t TICKS_TO_DESTROY>
class DelayedDestructionRing {
public:
DelayedDestructionRing() = default;
~DelayedDestructionRing() {
// Ensure all resources are properly released when ring is destroyed
for (auto& element_list : elements) {
element_list.clear();
}
}
void Tick() {
std::scoped_lock lock{ring_mutex};
// Move to next position in the ring
index = (index + 1) % TICKS_TO_DESTROY;
elements[index].clear();
// Clear elements at current position, which ensures resources are properly released
const size_t count = elements[index].size();
if (count > 0) {
// If more than a threshold of elements are being destroyed at once, log it
if (count > 100) {
LOG_DEBUG(Render_Vulkan, "Destroying {} delayed objects", count);
}
elements[index].clear();
}
}
void Push(T&& object) {
std::scoped_lock lock{ring_mutex};
elements[index].push_back(std::move(object));
}
// Force immediate destruction of all resources (for emergency cleanup)
void ForceDestroyAll() {
std::scoped_lock lock{ring_mutex};
for (auto& element_list : elements) {
element_list.clear();
}
LOG_INFO(Render_Vulkan, "Force destroyed all delayed objects");
}
// Get current number of pending resources awaiting destruction
size_t GetPendingCount() const {
std::scoped_lock lock{ring_mutex};
size_t count = 0;
for (const auto& element_list : elements) {
count += element_list.size();
}
return count;
}
private:
size_t index = 0;
std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
mutable std::mutex ring_mutex;
};
} // namespace VideoCommon

View file

@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
// SPDX-FileCopyrightText: 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
@ -80,8 +81,10 @@ void TextureCache<P>::RunGarbageCollector() {
const auto Configure = [&](bool allow_aggressive) {
high_priority_mode = total_used_memory >= expected_memory;
aggressive_mode = allow_aggressive && total_used_memory >= critical_memory;
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
// Reduce ticks_to_destroy to be more aggressive in freeing memory
ticks_to_destroy = aggressive_mode ? 5ULL : high_priority_mode ? 15ULL : 40ULL;
// Increase num_iterations to clean up more resources at once for memory-intensive games
num_iterations = aggressive_mode ? 60 : (high_priority_mode ? 30 : 15);
};
const auto Cleanup = [this, &num_iterations, &high_priority_mode,
&aggressive_mode](ImageId image_id) {
@ -95,7 +98,8 @@ void TextureCache<P>::RunGarbageCollector() {
// used by the async decoder thread.
return false;
}
if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
// Be more aggressive with cleanup for memory-intensive games
if (!aggressive_mode && !high_priority_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
return false;
}
const bool must_download =
@ -118,19 +122,20 @@ void TextureCache<P>::RunGarbageCollector() {
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
if (total_used_memory < critical_memory) {
if (aggressive_mode) {
// Sink the aggresiveness.
num_iterations >>= 2;
// Sink the aggresiveness more gradually to prevent oscillation
num_iterations = num_iterations * 3 / 4;
aggressive_mode = false;
return false;
}
if (high_priority_mode && total_used_memory < expected_memory) {
num_iterations >>= 1;
num_iterations = num_iterations * 3 / 4;
high_priority_mode = false;
}
}
return false;
};
// Run garbage collection more frequently for memory-intensive games
// Try to remove anything old enough and not high priority.
Configure(false);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
@ -138,19 +143,67 @@ void TextureCache<P>::RunGarbageCollector() {
// If pressure is still too high, prune aggressively.
if (total_used_memory >= critical_memory) {
Configure(true);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
// Make a more thorough sweep with more aggressive settings
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy / 2, Cleanup);
// If we're still in a critical memory situation, do emergency cleanup
if (total_used_memory >= critical_memory + 50_MiB) {
// Last resort emergency cleanup - reduce thresholds dramatically
ticks_to_destroy = 1;
num_iterations = 100;
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
}
}
}
template <class P>
void TextureCache<P>::TickFrame() {
static u64 consecutive_high_memory_frames = 0;
static constexpr u64 EMERGENCY_CLEANUP_THRESHOLD = 120; // ~2 seconds at 60 FPS
// If we can obtain the memory info, use it instead of the estimate.
if (runtime.CanReportMemoryUsage()) {
total_used_memory = runtime.GetDeviceMemoryUsage();
}
// Track consecutive high memory frames to detect potential leaks
if (total_used_memory > critical_memory) {
consecutive_high_memory_frames++;
if (consecutive_high_memory_frames > EMERGENCY_CLEANUP_THRESHOLD) {
// Emergency situation - extreme memory pressure for extended time
// This likely indicates a leak or insufficient cleanup
LOG_WARNING(Render, "Emergency texture cache cleanup triggered after {} frames of high memory usage",
consecutive_high_memory_frames);
// Force immediate cleanup of all pending resources
sentenced_images.ForceDestroyAll();
sentenced_framebuffers.ForceDestroyAll();
sentenced_image_view.ForceDestroyAll();
// Do a forced garbage collection pass
bool saved_value = has_deleted_images;
RunGarbageCollector();
has_deleted_images = saved_value;
// Reset counter but keep some pressure
consecutive_high_memory_frames = 30;
}
else if (consecutive_high_memory_frames > 60) { // If high memory for >60 frames (~1 second)
// Force a more aggressive cleanup cycle
RunGarbageCollector();
consecutive_high_memory_frames = 45; // Reset but keep some pressure
}
} else if (total_used_memory > expected_memory) {
// Use u64(1) to ensure type compatibility, avoiding the ULL suffix
consecutive_high_memory_frames = std::max(u64(1), consecutive_high_memory_frames / 2);
} else {
consecutive_high_memory_frames = 0;
}
if (total_used_memory > minimum_memory) {
RunGarbageCollector();
}
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
@ -2165,27 +2218,35 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
if (image.HasScaled()) {
total_used_memory -= GetScaledImageSizeBytes(image);
}
// Calculate accurate memory usage for this image
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) {
tentative_size = TranscodedAstcSize(tentative_size, image.info.format);
}
// Ensure memory usage is properly accounted for
total_used_memory -= Common::AlignUp(tentative_size, 1024);
const GPUVAddr gpu_addr = image.gpu_addr;
const auto alloc_it = image_allocs_table.find(gpu_addr);
if (alloc_it == image_allocs_table.end()) {
ASSERT_MSG(false, "Trying to delete an image alloc that does not exist in address 0x{:x}",
gpu_addr);
LOG_ERROR(HW_GPU, "Trying to delete an image alloc that does not exist in address 0x{:x}",
gpu_addr);
return;
}
const ImageAllocId alloc_id = alloc_it->second;
std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
if (alloc_image_it == alloc_images.end()) {
ASSERT_MSG(false, "Trying to delete an image that does not exist");
LOG_ERROR(HW_GPU, "Trying to delete an image that does not exist");
return;
}
// Ensure image is properly untracked and unregistered before deletion
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
@ -2196,6 +2257,8 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
for (size_t rt = 0; rt < NUM_RT; ++rt) {
dirty[Dirty::ColorBuffer0 + rt] = true;
}
// Clear render target references
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
for (const ImageViewId image_view_id : image_view_ids) {
std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
@ -2203,9 +2266,12 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
render_targets.depth_buffer_id = ImageViewId{};
}
}
// Clean up references and dependencies
RemoveImageViewReferences(image_view_ids);
RemoveFramebuffers(image_view_ids);
// Handle aliased images
for (const AliasedImage& alias : image.aliased_images) {
ImageBase& other_image = slot_images[alias.id];
[[maybe_unused]] const size_t num_removed_aliases =
@ -2213,33 +2279,43 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
return other_alias.id == image_id;
});
other_image.CheckAliasState();
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
num_removed_aliases);
if (num_removed_aliases != 1) {
LOG_WARNING(HW_GPU, "Invalid number of removed aliases: {}", num_removed_aliases);
}
}
// Handle overlapping images
for (const ImageId overlap_id : image.overlapping_images) {
ImageBase& other_image = slot_images[overlap_id];
[[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
other_image.overlapping_images,
[image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
other_image.CheckBadOverlapState();
ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
num_removed_overlaps);
if (num_removed_overlaps != 1) {
LOG_WARNING(HW_GPU, "Invalid number of removed overlaps: {}", num_removed_overlaps);
}
}
// Free resources - either immediately or queue for delayed destruction
for (const ImageViewId image_view_id : image_view_ids) {
if (!immediate_delete) {
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
}
slot_image_views.erase(image_view_id);
}
if (!immediate_delete) {
sentenced_images.Push(std::move(slot_images[image_id]));
}
slot_images.erase(image_id);
// Clean up allocation table
alloc_images.erase(alloc_image_it);
if (alloc_images.empty()) {
image_allocs_table.erase(alloc_it);
}
// Mark tables as invalidated
for (size_t c : active_channel_ids) {
auto& channel_info = channel_storage[c];
if constexpr (ENABLE_VALIDATION) {