From 7e58599d694e82751f7a2da966ca71c9e462bb0e Mon Sep 17 00:00:00 2001 From: Zephyron Date: Mon, 5 May 2025 18:05:39 +1000 Subject: [PATCH] feat: downgrade fmt requirement to version 10 - Change fmt requirement from 11 to 10 in CMakeLists.txt and vcpkg.json - Remove null pointer handling in memory operations - Remove memory region management features - Remove unused host shader files and code - Simplify buffer cache and texture cache memory management - Remove thread safety and emergency cleanup from DelayedDestructionRing - Revert storage buffer optimizations and safety fallbacks - Remove Citron copyright notices from multiple files Signed-off-by: Zephyron --- CMakeLists.txt | 2 +- src/core/memory.cpp | 99 +---------------- src/core/memory.h | 74 ------------- .../global_memory_to_storage_buffer_pass.cpp | 33 ++---- src/video_core/buffer_cache/buffer_cache.h | 31 +----- src/video_core/delayed_destruction_ring.h | 46 +------- src/video_core/host_shaders/CMakeLists.txt | 10 -- .../convert_abgr8_srgb_to_d24s8.frag | 46 -------- .../convert_astc_hdr_to_rgba16f.comp | 28 ----- .../host_shaders/convert_bc7_to_rgba8.comp | 29 ----- .../host_shaders/convert_rgb_to_yuv420.comp | 29 ----- .../convert_rgba16f_to_rgba8.frag | 31 ------ .../host_shaders/convert_rgba8_to_bgra8.frag | 11 -- .../host_shaders/convert_yuv420_to_rgb.comp | 30 ----- .../host_shaders/dither_temporal.frag | 29 ----- .../dynamic_resolution_scale.comp | 68 ------------ .../opengl_present_scaleforce.frag | 2 +- src/video_core/texture_cache/texture_cache.h | 104 +++--------------- vcpkg.json | 12 +- 19 files changed, 42 insertions(+), 672 deletions(-) delete mode 100644 src/video_core/host_shaders/convert_abgr8_srgb_to_d24s8.frag delete mode 100644 src/video_core/host_shaders/convert_astc_hdr_to_rgba16f.comp delete mode 100644 src/video_core/host_shaders/convert_bc7_to_rgba8.comp delete mode 100644 src/video_core/host_shaders/convert_rgb_to_yuv420.comp delete mode 100644 src/video_core/host_shaders/convert_rgba16f_to_rgba8.frag delete mode 100644 src/video_core/host_shaders/convert_rgba8_to_bgra8.frag delete mode 100644 src/video_core/host_shaders/convert_yuv420_to_rgb.comp delete mode 100644 src/video_core/host_shaders/dither_temporal.frag delete mode 100644 src/video_core/host_shaders/dynamic_resolution_scale.comp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9cde23779..a53aded3d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -369,7 +369,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) # Enforce the search mode of non-required packages for better and shorter failure messages find_package(Boost REQUIRED context) find_package(enet MODULE) -find_package(fmt 11 REQUIRED) +find_package(fmt 10 REQUIRED) if (CITRON_USE_LLVM_DEMANGLE) find_package(LLVM MODULE COMPONENTS Demangle) endif() diff --git a/src/core/memory.cpp b/src/core/memory.cpp index b92698945..62437415b 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -1,6 +1,5 @@ // SPDX-FileCopyrightText: 2015 Citra Emulator Project // SPDX-FileCopyrightText: 2018 yuzu Emulator Project -// SPDX-FileCopyrightText: 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -738,21 +737,12 @@ struct Memory::Impl { const u8* const ptr = GetPointerImpl( GetInteger(vaddr), [vaddr]() { - // Add special handling for null pointer reads - if (GetInteger(vaddr) == 0 || GetInteger(vaddr) < 0x1000) { - LOG_ERROR(HW_Memory, "Null pointer Read{} @ 0x{:016X}", sizeof(T) * 8, - GetInteger(vaddr)); - return; - } LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, GetInteger(vaddr)); }, [&]() { HandleRasterizerDownload(GetInteger(vaddr), sizeof(T)); }); if (ptr) { std::memcpy(&result, ptr, sizeof(T)); - } else if (GetInteger(vaddr) == 0) { - // Return 0 for null pointer reads instead of random memory - result = 0; } return result; } @@ -771,12 +761,6 @@ struct Memory::Impl { u8* const ptr = GetPointerImpl( GetInteger(vaddr), [vaddr, data]() { - // Add special handling for null pointer writes - if (GetInteger(vaddr) == 0 || GetInteger(vaddr) < 0x1000) { - LOG_ERROR(HW_Memory, "Null pointer Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, - GetInteger(vaddr), static_cast(data)); - return; - } LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, GetInteger(vaddr), static_cast(data)); }, @@ -784,7 +768,6 @@ struct Memory::Impl { if (ptr) { std::memcpy(ptr, &data, sizeof(T)); } - // Silently ignore writes to null pointer } template @@ -910,9 +893,7 @@ struct Memory::Impl { #endif }; -Memory::Memory(Core::System& system_) : system(system_), impl(std::make_unique(system_)), gen(rd()) { - // Initialize the random number distribution - dis = std::uniform_int_distribution(0, std::numeric_limits::max()); +Memory::Memory(Core::System& system_) : system{system_} { Reset(); } @@ -1096,54 +1077,28 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) } bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { - // Add detailed debug logging - LOG_DEBUG(HW_Memory, "JIT requesting NCE invalidation - Address: 0x{:016X}, Size: {} bytes", - GetInteger(vaddr), size); - - // First check if the memory region is valid and executable - if (!IsValidVirtualAddressRange(vaddr, size)) { - LOG_WARNING(HW_Memory, "Skipping InvalidateNCE: Invalid address range - {} bytes @ 0x{:016X}", - size, GetInteger(vaddr)); - return false; - } - [[maybe_unused]] bool mapped = true; [[maybe_unused]] bool rasterizer = false; - // Get pointer and check memory type u8* const ptr = impl->GetPointerImpl( GetInteger(vaddr), [&] { - LOG_WARNING(HW_Memory, - "Skipping InvalidateNCE: Unmapped memory region - {} bytes @ 0x{:016X}", - size, GetInteger(vaddr)); + LOG_ERROR(HW_Memory, "Unmapped InvalidateNCE for {} bytes @ {:#x}", size, + GetInteger(vaddr)); mapped = false; }, [&] { rasterizer = true; }); - - // Handle rasterizer memory separately if (rasterizer) { - LOG_DEBUG(HW_Memory, "Invalidating rasterizer memory region - {} bytes @ 0x{:016X}", - size, GetInteger(vaddr)); impl->InvalidateGPUMemory(ptr, size); } #ifdef __linux__ - // Handle separate heap mapping on Linux - if (!rasterizer && mapped && ptr) { - LOG_DEBUG(HW_Memory, "Handling separate heap mapping for NCE region"); + if (!rasterizer && mapped) { impl->buffer->DeferredMapSeparateHeap(GetInteger(vaddr)); } #endif - // Return success only if we have a valid pointer and the region was mapped - const bool success = mapped && ptr != nullptr; - if (!success) { - LOG_WARNING(HW_Memory, "NCE invalidation failed - Address: 0x{:016X}, Size: {} bytes", - GetInteger(vaddr), size); - } - - return success; + return mapped && ptr != nullptr; } bool Memory::InvalidateSeparateHeap(void* fault_address) { @@ -1154,48 +1109,4 @@ bool Memory::InvalidateSeparateHeap(void* fault_address) { #endif } -Common::ProcessAddress Memory::GenerateRandomBaseAddress() { - u64 random_bits = dis(gen); - return Common::ProcessAddress((random_bits & ~NRO_BASE_ADDRESS_RANDOMIZATION_MASK) | - (random_bits & NRO_BASE_ADDRESS_RANDOMIZATION_MASK)); -} - -Memory::MemoryRegion* Memory::FindRegion(Common::ProcessAddress address) { - for (auto& entry : memory_regions) { - if (address >= entry.second.start_address && - address < entry.second.start_address + entry.second.size) { - return &entry.second; - } - } - return nullptr; -} - -void Memory::MapMemoryRegion(Common::ProcessAddress start_address, u64 size, MemoryRegionType type, - bool exec, bool write) { - if (start_address + size > EMULATED_MEMORY_SIZE) { - LOG_ERROR(HW_Memory, "Memory mapping exceeds emulated memory boundaries at address {:016X}", - GetInteger(start_address)); - return; - } - - // Create the memory region - memory_regions[start_address] = MemoryRegion(start_address, size, type, exec, write); - - // Map the region in the page table - Common::MemoryPermission perms{}; - if (exec) perms |= Common::MemoryPermission::Execute; - if (write) perms |= Common::MemoryPermission::Write; - perms |= Common::MemoryPermission::Read; - - // Using the MapMemoryRegion method defined in the Impl struct - impl->MapMemoryRegion(*impl->current_page_table, start_address, size, - Common::PhysicalAddress(GetInteger(start_address)), perms, false); -} - -Common::ProcessAddress Memory::MapBinary(u64 size) { - Common::ProcessAddress base_address = GenerateRandomBaseAddress(); - MapMemoryRegion(base_address, size, MemoryRegionType::BinaryMemory, true, true); - return base_address; -} - } // namespace Core::Memory diff --git a/src/core/memory.h b/src/core/memory.h index 45189c426..3f315ff7a 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: 2014 Citra Emulator Project -// SPDX-FileCopyrightText: 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -10,8 +9,6 @@ #include #include #include -#include -#include #include "common/scratch_buffer.h" #include "common/typed_address.h" @@ -46,9 +43,6 @@ constexpr std::size_t CITRON_PAGEBITS = 12; constexpr u64 CITRON_PAGESIZE = 1ULL << CITRON_PAGEBITS; constexpr u64 CITRON_PAGEMASK = CITRON_PAGESIZE - 1; -/// Emulated memory size (4GB) -constexpr u64 EMULATED_MEMORY_SIZE = 4ULL * 1024 * 1024 * 1024; - /// Virtual user-space memory regions enum : u64 { /// TLS (Thread-Local Storage) related. @@ -56,18 +50,6 @@ enum : u64 { /// Application stack DEFAULT_STACK_SIZE = 0x100000, - - /// Mask to randomize bits 37-12 for NRO base address - NRO_BASE_ADDRESS_RANDOMIZATION_MASK = 0xFFFFFFFFFFFFF000, -}; - -/// Types of memory regions in the system -enum class MemoryRegionType { - SystemMemory, - GraphicsMemory, - IOMemory, - BinaryMemory, - Undefined }; /// Central class that handles all memory operations and state. @@ -82,55 +64,6 @@ public: Memory(Memory&&) = default; Memory& operator=(Memory&&) = delete; - /** - * Structure representing a memory region with its properties - */ - struct MemoryRegion { - Common::ProcessAddress start_address; - u64 size; - std::unique_ptr data; - bool is_mapped; - MemoryRegionType type; - bool is_executable; - bool is_writable; - - // Default constructor needed for STL containers - MemoryRegion() : start_address(0), size(0), data(nullptr), is_mapped(false), - type(MemoryRegionType::Undefined), is_executable(false), is_writable(false) {} - - MemoryRegion(Common::ProcessAddress start, u64 sz, MemoryRegionType t, bool exec = false, bool write = false) - : start_address(start), size(sz), data(std::make_unique(sz)), is_mapped(false), - type(t), is_executable(exec), is_writable(write) {} - }; - - /** - * Maps a memory region with the specified properties - * - * @param start_address The starting address of the region - * @param size The size of the region in bytes - * @param type The type of memory region - * @param exec Whether the region is executable - * @param write Whether the region is writable - */ - void MapMemoryRegion(Common::ProcessAddress start_address, u64 size, MemoryRegionType type, - bool exec = false, bool write = false); - - /** - * Maps a binary with a randomized base address - * - * @param size The size of the binary in bytes - * @returns The base address where the binary was mapped - */ - Common::ProcessAddress MapBinary(u64 size); - - /** - * Finds a memory region containing the specified address - * - * @param address The address to search for - * @returns Pointer to the memory region if found, nullptr otherwise - */ - MemoryRegion* FindRegion(Common::ProcessAddress address); - /** * Resets the state of the Memory system. */ @@ -564,13 +497,6 @@ private: struct Impl; std::unique_ptr impl; - - std::unordered_map memory_regions; - std::random_device rd; - std::mt19937 gen; - std::uniform_int_distribution dis; - - Common::ProcessAddress GenerateRandomBaseAddress(); }; template diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 8849a7a73..0cea79945 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -275,15 +274,8 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { /// Returns true when a storage buffer address satisfies a bias bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { - // For performance, strongly prefer addresses that meet the bias criteria - // and have optimal alignment - if (storage_buffer.index == bias.index && - storage_buffer.offset >= bias.offset_begin && - storage_buffer.offset < bias.offset_end) { - return true; - } - // Only fall back to other addresses if absolutely necessary - return false; + return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && + storage_buffer.offset < bias.offset_end; } struct LowAddrInfo { @@ -359,7 +351,7 @@ std::optional Track(const IR::Value& value, const Bias* bias) .index = index.U32(), .offset = offset.U32(), }; - const u32 alignment{bias ? bias->alignment : 16U}; + const u32 alignment{bias ? bias->alignment : 8U}; if (!Common::IsAligned(storage_buffer.offset, alignment)) { // The SSBO pointer has to be aligned return std::nullopt; @@ -380,9 +372,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // avoid getting false positives static constexpr Bias nvn_bias{ .index = 0, - .offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers - .offset_end = 0x1000, // Substantially expanded to include all TOTK storage buffers - .alignment = 32, // Increased from 16 to optimize memory access patterns + .offset_begin = 0x110, + .offset_end = 0x610, + .alignment = 16, }; // Track the low address of the instruction const std::optional low_addr_info{TrackLowAddress(&inst)}; @@ -394,16 +386,15 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) const IR::U32 low_addr{low_addr_info->value}; std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { - // If it fails, track without a bias but with higher alignment requirements - // for better performance + // If it fails, track without a bias storage_buffer = Track(low_addr, nullptr); if (!storage_buffer) { // If that also fails, use NVN fallbacks LOG_WARNING(Shader, "Storage buffer failed to track, using global memory fallbacks"); return; } - LOG_DEBUG(Shader, "Storage buffer tracked without bias, index {} offset 0x{:X}", - storage_buffer->index, storage_buffer->offset); + LOG_WARNING(Shader, "Storage buffer tracked without bias, index {} offset {}", + storage_buffer->index, storage_buffer->offset); } // Collect storage buffer and the instruction if (IsGlobalMemoryWrite(inst)) { @@ -434,12 +425,8 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; // Align the offset base to match the host alignment requirements - // Use a more aggressive alignment mask for better performance low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); - - // Also align the resulting offset for optimal memory access - IR::U32 result = ir.ISub(offset, low_cbuf); - return result; + return ir.ISub(offset, low_cbuf); } /// Replace a global memory load instruction with its storage buffer equivalent diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7351e2fc3..af237703d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project -// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -7,7 +6,6 @@ #include #include #include -#include #include "common/range_sets.inc" #include "video_core/buffer_cache/buffer_cache_base.h" @@ -20,7 +18,7 @@ using Core::DEVICE_PAGESIZE; template BufferCache

::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_) - : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory}, immediate_buffer_alloc{} { + : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); gpu_modified_ranges.Clear(); @@ -1721,31 +1719,8 @@ Binding BufferCache

::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, const std::optional aligned_device_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); if (!aligned_device_addr || size == 0) { - // Use a static counter to track and limit warnings - static std::unordered_map warning_counts; - - // Increment the warning count for this cbuf_index - warning_counts[cbuf_index]++; - - // Only log the first warning for each cbuf_index - if (warning_counts[cbuf_index] == 1) { - LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}. Using fallback.", - cbuf_index); - } else if (warning_counts[cbuf_index] % 1000 == 0) { - // Log occasional reminder warnings - LOG_DEBUG(HW_GPU, "Still using fallback for storage buffer cbuf index {} (count: {})", - cbuf_index, warning_counts[cbuf_index]); - } - - // Create a dummy binding with non-zero values to avoid potential crashes - static DAddr safe_device_addr = 0x1000; - static const u32 safe_size = 16 * 1024; // 16KB should be adequate for most cases - - return Binding{ - .device_addr = safe_device_addr, - .size = safe_size, - .buffer_id = const_cast*>(this)->FindBuffer(safe_device_addr, safe_size), - }; + LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); + return NULL_BINDING; } const std::optional device_addr = gpu_memory->GpuToCpuAddress(gpu_addr); ASSERT_MSG(device_addr, "Unaligned storage buffer address not found for cbuf index {}", diff --git a/src/video_core/delayed_destruction_ring.h b/src/video_core/delayed_destruction_ring.h index e02a14faa..d13ee622b 100644 --- a/src/video_core/delayed_destruction_ring.h +++ b/src/video_core/delayed_destruction_ring.h @@ -1,15 +1,12 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-FileCopyrightText: 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include #include -#include #include #include -#include "common/logging/log.h" namespace VideoCommon { @@ -17,59 +14,18 @@ namespace VideoCommon { template class DelayedDestructionRing { public: - DelayedDestructionRing() = default; - ~DelayedDestructionRing() { - // Ensure all resources are properly released when ring is destroyed - for (auto& element_list : elements) { - element_list.clear(); - } - } - void Tick() { - std::scoped_lock lock{ring_mutex}; - - // Move to next position in the ring index = (index + 1) % TICKS_TO_DESTROY; - - // Clear elements at current position, which ensures resources are properly released - const size_t count = elements[index].size(); - if (count > 0) { - // If more than a threshold of elements are being destroyed at once, log it - if (count > 100) { - LOG_DEBUG(Render_Vulkan, "Destroying {} delayed objects", count); - } - elements[index].clear(); - } + elements[index].clear(); } void Push(T&& object) { - std::scoped_lock lock{ring_mutex}; elements[index].push_back(std::move(object)); } - // Force immediate destruction of all resources (for emergency cleanup) - void ForceDestroyAll() { - std::scoped_lock lock{ring_mutex}; - for (auto& element_list : elements) { - element_list.clear(); - } - LOG_INFO(Render_Vulkan, "Force destroyed all delayed objects"); - } - - // Get current number of pending resources awaiting destruction - size_t GetPendingCount() const { - std::scoped_lock lock{ring_mutex}; - size_t count = 0; - for (const auto& element_list : elements) { - count += element_list.size(); - } - return count; - } - private: size_t index = 0; std::array, TICKS_TO_DESTROY> elements; - mutable std::mutex ring_mutex; }; } // namespace VideoCommon diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 7e808780d..969f21d50 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,5 +1,4 @@ # SPDX-FileCopyrightText: 2018 yuzu Emulator Project -# SPDX-FileCopyrightText: 2025 citron Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr) @@ -19,7 +18,6 @@ set(SHADER_FILES blit_color_float.frag block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp - convert_abgr8_srgb_to_d24s8.frag convert_abgr8_to_d24s8.frag convert_abgr8_to_d32f.frag convert_d32f_to_abgr8.frag @@ -70,14 +68,6 @@ set(SHADER_FILES vulkan_quad_indexed.comp vulkan_turbo_mode.comp vulkan_uint8.comp - convert_rgba8_to_bgra8.frag - convert_yuv420_to_rgb.comp - convert_rgb_to_yuv420.comp - convert_bc7_to_rgba8.comp - convert_astc_hdr_to_rgba16f.comp - convert_rgba16f_to_rgba8.frag - dither_temporal.frag - dynamic_resolution_scale.comp ) find_program(GLSLANGVALIDATOR "glslangValidator") diff --git a/src/video_core/host_shaders/convert_abgr8_srgb_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_srgb_to_d24s8.frag deleted file mode 100644 index 96c08aae4..000000000 --- a/src/video_core/host_shaders/convert_abgr8_srgb_to_d24s8.frag +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-FileCopyrightText: 2025 citron Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#version 450 -#extension GL_ARB_shader_stencil_export : require - -layout(binding = 0) uniform sampler2D color_texture; - -// More accurate sRGB to linear conversion -float srgbToLinear(float srgb) { - if (srgb <= 0.04045) { - return srgb / 12.92; - } else { - return pow((srgb + 0.055) / 1.055, 2.4); - } -} - -void main() { - ivec2 coord = ivec2(gl_FragCoord.xy); - vec4 srgbColor = texelFetch(color_texture, coord, 0); - - // Convert sRGB to linear space with proper gamma correction - vec3 linearColor = vec3( - srgbToLinear(srgbColor.r), - srgbToLinear(srgbColor.g), - srgbToLinear(srgbColor.b) - ); - - // Use standard luminance coefficients - float luminance = dot(linearColor, vec3(0.2126, 0.7152, 0.0722)); - - // Ensure proper depth range - luminance = clamp(luminance, 0.0, 1.0); - - // Convert to 24-bit depth value - uint depth_val = uint(luminance * float(0xFFFFFF)); - - // Extract 8-bit stencil from alpha - uint stencil_val = uint(srgbColor.a * 255.0); - - // Pack values efficiently - uint depth_stencil = (stencil_val << 24) | (depth_val & 0x00FFFFFF); - - gl_FragDepth = float(depth_val) / float(0xFFFFFF); - gl_FragStencilRefARB = int(stencil_val); -} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_astc_hdr_to_rgba16f.comp b/src/video_core/host_shaders/convert_astc_hdr_to_rgba16f.comp deleted file mode 100644 index 8d4b1825b..000000000 --- a/src/video_core/host_shaders/convert_astc_hdr_to_rgba16f.comp +++ /dev/null @@ -1,28 +0,0 @@ -#version 450 - -layout(local_size_x = 8, local_size_y = 8) in; - -layout(binding = 0) uniform samplerBuffer astc_data; -layout(binding = 1, rgba16f) uniform writeonly image2D output_image; - -// Note: This is a simplified version. Real ASTC HDR decompression is more complex -void main() { - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - ivec2 size = imageSize(output_image); - - if (pos.x >= size.x || pos.y >= size.y) { - return; - } - - // Calculate block and pixel within block - ivec2 block = pos / 8; // Assuming 8x8 ASTC blocks - ivec2 pixel = pos % 8; - - // Each ASTC block is 16 bytes - int block_index = block.y * (size.x / 8) + block.x; - - // Simplified ASTC HDR decoding - you'll need to implement full ASTC decoding - vec4 color = texelFetch(astc_data, block_index * 8 + pixel.y * 8 + pixel.x); - - imageStore(output_image, pos, color); -} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_bc7_to_rgba8.comp b/src/video_core/host_shaders/convert_bc7_to_rgba8.comp deleted file mode 100644 index a0842e175..000000000 --- a/src/video_core/host_shaders/convert_bc7_to_rgba8.comp +++ /dev/null @@ -1,29 +0,0 @@ -#version 450 -#extension GL_ARB_shader_ballot : require - -layout(local_size_x = 8, local_size_y = 8) in; - -layout(binding = 0) uniform samplerBuffer bc7_data; -layout(binding = 1, rgba8) uniform writeonly image2D output_image; - -// Note: This is a simplified version. Real BC7 decompression is more complex -void main() { - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - ivec2 size = imageSize(output_image); - - if (pos.x >= size.x || pos.y >= size.y) { - return; - } - - // Calculate block and pixel within block - ivec2 block = pos / 4; - ivec2 pixel = pos % 4; - - // Each BC7 block is 16 bytes - int block_index = block.y * (size.x / 4) + block.x; - - // Simplified BC7 decoding - you'll need to implement full BC7 decoding - vec4 color = texelFetch(bc7_data, block_index * 4 + pixel.y * 4 + pixel.x); - - imageStore(output_image, pos, color); -} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_rgb_to_yuv420.comp b/src/video_core/host_shaders/convert_rgb_to_yuv420.comp deleted file mode 100644 index 0a5cfab39..000000000 --- a/src/video_core/host_shaders/convert_rgb_to_yuv420.comp +++ /dev/null @@ -1,29 +0,0 @@ -#version 450 - -layout(local_size_x = 8, local_size_y = 8) in; - -layout(binding = 0) uniform sampler2D input_texture; -layout(binding = 1, r8) uniform writeonly image2D y_output; -layout(binding = 2, r8) uniform writeonly image2D u_output; -layout(binding = 3, r8) uniform writeonly image2D v_output; - -void main() { - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - ivec2 size = imageSize(y_output); - - if (pos.x >= size.x || pos.y >= size.y) { - return; - } - - vec2 tex_coord = vec2(pos) / vec2(size); - vec3 rgb = texture(input_texture, tex_coord).rgb; - - // RGB to YUV conversion - float y = 0.299 * rgb.r + 0.587 * rgb.g + 0.114 * rgb.b; - float u = -0.147 * rgb.r - 0.289 * rgb.g + 0.436 * rgb.b + 0.5; - float v = 0.615 * rgb.r - 0.515 * rgb.g - 0.100 * rgb.b + 0.5; - - imageStore(y_output, pos, vec4(y)); - imageStore(u_output, pos / 2, vec4(u)); - imageStore(v_output, pos / 2, vec4(v)); -} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_rgba16f_to_rgba8.frag b/src/video_core/host_shaders/convert_rgba16f_to_rgba8.frag deleted file mode 100644 index 9e430f504..000000000 --- a/src/video_core/host_shaders/convert_rgba16f_to_rgba8.frag +++ /dev/null @@ -1,31 +0,0 @@ -#version 450 - -layout(location = 0) in vec2 texcoord; -layout(location = 0) out vec4 color; - -layout(binding = 0) uniform sampler2D input_texture; - -layout(push_constant) uniform PushConstants { - float exposure; - float gamma; -} constants; - -vec3 tonemap(vec3 hdr) { - // Reinhard tonemapping - return hdr / (hdr + vec3(1.0)); -} - -void main() { - vec4 hdr = texture(input_texture, texcoord); - - // Apply exposure - vec3 exposed = hdr.rgb * constants.exposure; - - // Tonemap - vec3 tonemapped = tonemap(exposed); - - // Gamma correction - vec3 gamma_corrected = pow(tonemapped, vec3(1.0 / constants.gamma)); - - color = vec4(gamma_corrected, hdr.a); -} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_rgba8_to_bgra8.frag b/src/video_core/host_shaders/convert_rgba8_to_bgra8.frag deleted file mode 100644 index 6f7d24798..000000000 --- a/src/video_core/host_shaders/convert_rgba8_to_bgra8.frag +++ /dev/null @@ -1,11 +0,0 @@ -#version 450 - -layout(location = 0) in vec2 texcoord; -layout(location = 0) out vec4 color; - -layout(binding = 0) uniform sampler2D input_texture; - -void main() { - vec4 rgba = texture(input_texture, texcoord); - color = rgba.bgra; // Swap red and blue channels -} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_yuv420_to_rgb.comp b/src/video_core/host_shaders/convert_yuv420_to_rgb.comp deleted file mode 100644 index b1f1536f8..000000000 --- a/src/video_core/host_shaders/convert_yuv420_to_rgb.comp +++ /dev/null @@ -1,30 +0,0 @@ -#version 450 - -layout(local_size_x = 8, local_size_y = 8) in; - -layout(binding = 0) uniform sampler2D y_texture; -layout(binding = 1) uniform sampler2D u_texture; -layout(binding = 2) uniform sampler2D v_texture; -layout(binding = 3, rgba8) uniform writeonly image2D output_image; - -void main() { - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - ivec2 size = imageSize(output_image); - - if (pos.x >= size.x || pos.y >= size.y) { - return; - } - - vec2 tex_coord = vec2(pos) / vec2(size); - float y = texture(y_texture, tex_coord).r; - float u = texture(u_texture, tex_coord).r - 0.5; - float v = texture(v_texture, tex_coord).r - 0.5; - - // YUV to RGB conversion - vec3 rgb; - rgb.r = y + 1.402 * v; - rgb.g = y - 0.344 * u - 0.714 * v; - rgb.b = y + 1.772 * u; - - imageStore(output_image, pos, vec4(rgb, 1.0)); -} \ No newline at end of file diff --git a/src/video_core/host_shaders/dither_temporal.frag b/src/video_core/host_shaders/dither_temporal.frag deleted file mode 100644 index feaddc9aa..000000000 --- a/src/video_core/host_shaders/dither_temporal.frag +++ /dev/null @@ -1,29 +0,0 @@ -#version 450 - -layout(location = 0) in vec2 texcoord; -layout(location = 0) out vec4 color; - -layout(binding = 0) uniform sampler2D input_texture; - -layout(push_constant) uniform PushConstants { - float frame_count; - float dither_strength; -} constants; - -// Pseudo-random number generator -float rand(vec2 co) { - return fract(sin(dot(co.xy ,vec2(12.9898,78.233))) * 43758.5453); -} - -void main() { - vec4 input_color = texture(input_texture, texcoord); - - // Generate temporal noise based on frame count - vec2 noise_coord = gl_FragCoord.xy + vec2(constants.frame_count); - float noise = rand(noise_coord) * 2.0 - 1.0; - - // Apply dithering - vec3 dithered = input_color.rgb + noise * constants.dither_strength; - - color = vec4(dithered, input_color.a); -} \ No newline at end of file diff --git a/src/video_core/host_shaders/dynamic_resolution_scale.comp b/src/video_core/host_shaders/dynamic_resolution_scale.comp deleted file mode 100644 index 88f0a41c1..000000000 --- a/src/video_core/host_shaders/dynamic_resolution_scale.comp +++ /dev/null @@ -1,68 +0,0 @@ -#version 450 - -layout(local_size_x = 8, local_size_y = 8) in; - -layout(binding = 0) uniform sampler2D input_texture; -layout(binding = 1, rgba8) uniform writeonly image2D output_image; - -layout(push_constant) uniform PushConstants { - vec2 scale_factor; - vec2 input_size; -} constants; - -vec4 cubic(float v) { - vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; - vec4 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = s.w - 4.0 * s.z + 6.0 * s.y - 4.0 * s.x; - return vec4(x, y, z, w) * (1.0/6.0); -} - -vec4 bicubic_sample(sampler2D tex, vec2 tex_coord) { - vec2 tex_size = constants.input_size; - vec2 inv_tex_size = 1.0 / tex_size; - - tex_coord = tex_coord * tex_size - 0.5; - - vec2 fxy = fract(tex_coord); - tex_coord -= fxy; - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = tex_coord.xxyy + vec2(-0.5, +1.5).xyxy; - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= inv_tex_size.xxyy; - - vec4 sample0 = texture(tex, offset.xz); - vec4 sample1 = texture(tex, offset.yz); - vec4 sample2 = texture(tex, offset.xw); - vec4 sample3 = texture(tex, offset.yw); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix( - mix(sample3, sample2, sx), - mix(sample1, sample0, sx), - sy - ); -} - -void main() { - ivec2 pos = ivec2(gl_GlobalInvocationID.xy); - ivec2 size = imageSize(output_image); - - if (pos.x >= size.x || pos.y >= size.y) { - return; - } - - vec2 tex_coord = vec2(pos) / vec2(size); - vec4 color = bicubic_sample(input_texture, tex_coord); - - imageStore(output_image, pos, color); -} \ No newline at end of file diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag index 5558b676c..01948e591 100644 --- a/src/video_core/host_shaders/opengl_present_scaleforce.frag +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag @@ -10,7 +10,7 @@ #ifdef CITRON_USE_FP16 #extension GL_AMD_gpu_shader_half_float : enable -#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +#extension GL_NV_gpu_shader5 : enable #define lfloat float16_t #define lvec2 f16vec2 diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 00f997259..2a911a359 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,5 +1,4 @@ // SPDX-FileCopyrightText: 2023 yuzu Emulator Project -// SPDX-FileCopyrightText: 2025 citron Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once @@ -81,10 +80,8 @@ void TextureCache

::RunGarbageCollector() { const auto Configure = [&](bool allow_aggressive) { high_priority_mode = total_used_memory >= expected_memory; aggressive_mode = allow_aggressive && total_used_memory >= critical_memory; - // Reduce ticks_to_destroy to be more aggressive in freeing memory - ticks_to_destroy = aggressive_mode ? 5ULL : high_priority_mode ? 15ULL : 40ULL; - // Increase num_iterations to clean up more resources at once for memory-intensive games - num_iterations = aggressive_mode ? 60 : (high_priority_mode ? 30 : 15); + ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; + num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); }; const auto Cleanup = [this, &num_iterations, &high_priority_mode, &aggressive_mode](ImageId image_id) { @@ -98,8 +95,7 @@ void TextureCache

::RunGarbageCollector() { // used by the async decoder thread. return false; } - // Be more aggressive with cleanup for memory-intensive games - if (!aggressive_mode && !high_priority_mode && True(image.flags & ImageFlagBits::CostlyLoad)) { + if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) { return false; } const bool must_download = @@ -122,20 +118,19 @@ void TextureCache

::RunGarbageCollector() { DeleteImage(image_id, image.scale_tick > frame_tick + 5); if (total_used_memory < critical_memory) { if (aggressive_mode) { - // Sink the aggresiveness more gradually to prevent oscillation - num_iterations = num_iterations * 3 / 4; + // Sink the aggresiveness. + num_iterations >>= 2; aggressive_mode = false; return false; } if (high_priority_mode && total_used_memory < expected_memory) { - num_iterations = num_iterations * 3 / 4; + num_iterations >>= 1; high_priority_mode = false; } } return false; }; - // Run garbage collection more frequently for memory-intensive games // Try to remove anything old enough and not high priority. Configure(false); lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); @@ -143,67 +138,19 @@ void TextureCache

::RunGarbageCollector() { // If pressure is still too high, prune aggressively. if (total_used_memory >= critical_memory) { Configure(true); - // Make a more thorough sweep with more aggressive settings - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy / 2, Cleanup); - - // If we're still in a critical memory situation, do emergency cleanup - if (total_used_memory >= critical_memory + 50_MiB) { - // Last resort emergency cleanup - reduce thresholds dramatically - ticks_to_destroy = 1; - num_iterations = 100; - lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); - } + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup); } } template void TextureCache

::TickFrame() { - static u64 consecutive_high_memory_frames = 0; - static constexpr u64 EMERGENCY_CLEANUP_THRESHOLD = 120; // ~2 seconds at 60 FPS - // If we can obtain the memory info, use it instead of the estimate. if (runtime.CanReportMemoryUsage()) { total_used_memory = runtime.GetDeviceMemoryUsage(); } - - // Track consecutive high memory frames to detect potential leaks - if (total_used_memory > critical_memory) { - consecutive_high_memory_frames++; - if (consecutive_high_memory_frames > EMERGENCY_CLEANUP_THRESHOLD) { - // Emergency situation - extreme memory pressure for extended time - // This likely indicates a leak or insufficient cleanup - LOG_WARNING(Render, "Emergency texture cache cleanup triggered after {} frames of high memory usage", - consecutive_high_memory_frames); - - // Force immediate cleanup of all pending resources - sentenced_images.ForceDestroyAll(); - sentenced_framebuffers.ForceDestroyAll(); - sentenced_image_view.ForceDestroyAll(); - - // Do a forced garbage collection pass - bool saved_value = has_deleted_images; - RunGarbageCollector(); - has_deleted_images = saved_value; - - // Reset counter but keep some pressure - consecutive_high_memory_frames = 30; - } - else if (consecutive_high_memory_frames > 60) { // If high memory for >60 frames (~1 second) - // Force a more aggressive cleanup cycle - RunGarbageCollector(); - consecutive_high_memory_frames = 45; // Reset but keep some pressure - } - } else if (total_used_memory > expected_memory) { - // Use u64(1) to ensure type compatibility, avoiding the ULL suffix - consecutive_high_memory_frames = std::max(u64(1), consecutive_high_memory_frames / 2); - } else { - consecutive_high_memory_frames = 0; - } - if (total_used_memory > minimum_memory) { RunGarbageCollector(); } - sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); @@ -2218,35 +2165,27 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { if (image.HasScaled()) { total_used_memory -= GetScaledImageSizeBytes(image); } - - // Calculate accurate memory usage for this image u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); if ((IsPixelFormatASTC(image.info.format) && True(image.flags & ImageFlagBits::AcceleratedUpload)) || True(image.flags & ImageFlagBits::Converted)) { tentative_size = TranscodedAstcSize(tentative_size, image.info.format); } - - // Ensure memory usage is properly accounted for total_used_memory -= Common::AlignUp(tentative_size, 1024); - const GPUVAddr gpu_addr = image.gpu_addr; const auto alloc_it = image_allocs_table.find(gpu_addr); if (alloc_it == image_allocs_table.end()) { - LOG_ERROR(HW_GPU, "Trying to delete an image alloc that does not exist in address 0x{:x}", - gpu_addr); + ASSERT_MSG(false, "Trying to delete an image alloc that does not exist in address 0x{:x}", + gpu_addr); return; } - const ImageAllocId alloc_id = alloc_it->second; std::vector& alloc_images = slot_image_allocs[alloc_id].images; const auto alloc_image_it = std::ranges::find(alloc_images, image_id); if (alloc_image_it == alloc_images.end()) { - LOG_ERROR(HW_GPU, "Trying to delete an image that does not exist"); + ASSERT_MSG(false, "Trying to delete an image that does not exist"); return; } - - // Ensure image is properly untracked and unregistered before deletion ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); @@ -2257,8 +2196,6 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { for (size_t rt = 0; rt < NUM_RT; ++rt) { dirty[Dirty::ColorBuffer0 + rt] = true; } - - // Clear render target references const std::span image_view_ids = image.image_view_ids; for (const ImageViewId image_view_id : image_view_ids) { std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); @@ -2266,12 +2203,9 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { render_targets.depth_buffer_id = ImageViewId{}; } } - - // Clean up references and dependencies RemoveImageViewReferences(image_view_ids); RemoveFramebuffers(image_view_ids); - // Handle aliased images for (const AliasedImage& alias : image.aliased_images) { ImageBase& other_image = slot_images[alias.id]; [[maybe_unused]] const size_t num_removed_aliases = @@ -2279,43 +2213,33 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { return other_alias.id == image_id; }); other_image.CheckAliasState(); - if (num_removed_aliases != 1) { - LOG_WARNING(HW_GPU, "Invalid number of removed aliases: {}", num_removed_aliases); - } + ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", + num_removed_aliases); } - - // Handle overlapping images for (const ImageId overlap_id : image.overlapping_images) { ImageBase& other_image = slot_images[overlap_id]; [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( other_image.overlapping_images, [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); other_image.CheckBadOverlapState(); - if (num_removed_overlaps != 1) { - LOG_WARNING(HW_GPU, "Invalid number of removed overlaps: {}", num_removed_overlaps); - } + ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", + num_removed_overlaps); } - - // Free resources - either immediately or queue for delayed destruction for (const ImageViewId image_view_id : image_view_ids) { if (!immediate_delete) { sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); } slot_image_views.erase(image_view_id); } - if (!immediate_delete) { sentenced_images.Push(std::move(slot_images[image_id])); } slot_images.erase(image_id); - // Clean up allocation table alloc_images.erase(alloc_image_it); if (alloc_images.empty()) { image_allocs_table.erase(alloc_it); } - - // Mark tables as invalidated for (size_t c : active_channel_ids) { auto& channel_info = channel_storage[c]; if constexpr (ENABLE_VALIDATION) { diff --git a/vcpkg.json b/vcpkg.json index 3c57e651d..7334202b5 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,7 +1,7 @@ { "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg-tool/main/docs/vcpkg.schema.json", "name": "citron", - "builtin-baseline": "c82f74667287d3dc386bce81e44964370c91a289", + "builtin-baseline": "1318ab14aae14db20085441cd71366891a9c9d0c", "version": "1.0", "dependencies": [ "boost-algorithm", @@ -31,7 +31,9 @@ "features": { "citron-tests": { "description": "Compile tests", - "dependencies": [ "catch2" ] + "dependencies": [ + "catch2" + ] }, "web-service": { "description": "Enable web services (telemetry, etc.)", @@ -55,11 +57,11 @@ "overrides": [ { "name": "catch2", - "version": "3.8.0" + "version": "3.3.1" }, { "name": "fmt", - "version": "11.0.2" + "version": "10.1.1" } ] -} +} \ No newline at end of file