feat: downgrade fmt requirement to version 10

- Change fmt requirement from 11 to 10 in CMakeLists.txt and vcpkg.json
- Remove null pointer handling in memory operations
- Remove memory region management features
- Remove unused host shader files and code
- Simplify buffer cache and texture cache memory management
- Remove thread safety and emergency cleanup from DelayedDestructionRing
- Revert storage buffer optimizations and safety fallbacks
- Remove Citron copyright notices from multiple files

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron 2025-05-05 18:05:39 +10:00
parent 6969005c54
commit 7e58599d69
19 changed files with 42 additions and 672 deletions

View file

@ -369,7 +369,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
# Enforce the search mode of non-required packages for better and shorter failure messages
find_package(Boost REQUIRED context)
find_package(enet MODULE)
find_package(fmt 11 REQUIRED)
find_package(fmt 10 REQUIRED)
if (CITRON_USE_LLVM_DEMANGLE)
find_package(LLVM MODULE COMPONENTS Demangle)
endif()

View file

@ -1,6 +1,5 @@
// SPDX-FileCopyrightText: 2015 Citra Emulator Project
// SPDX-FileCopyrightText: 2018 yuzu Emulator Project
// SPDX-FileCopyrightText: 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
@ -738,21 +737,12 @@ struct Memory::Impl {
const u8* const ptr = GetPointerImpl(
GetInteger(vaddr),
[vaddr]() {
// Add special handling for null pointer reads
if (GetInteger(vaddr) == 0 || GetInteger(vaddr) < 0x1000) {
LOG_ERROR(HW_Memory, "Null pointer Read{} @ 0x{:016X}", sizeof(T) * 8,
GetInteger(vaddr));
return;
}
LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8,
GetInteger(vaddr));
},
[&]() { HandleRasterizerDownload(GetInteger(vaddr), sizeof(T)); });
if (ptr) {
std::memcpy(&result, ptr, sizeof(T));
} else if (GetInteger(vaddr) == 0) {
// Return 0 for null pointer reads instead of random memory
result = 0;
}
return result;
}
@ -771,12 +761,6 @@ struct Memory::Impl {
u8* const ptr = GetPointerImpl(
GetInteger(vaddr),
[vaddr, data]() {
// Add special handling for null pointer writes
if (GetInteger(vaddr) == 0 || GetInteger(vaddr) < 0x1000) {
LOG_ERROR(HW_Memory, "Null pointer Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
GetInteger(vaddr), static_cast<u64>(data));
return;
}
LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
GetInteger(vaddr), static_cast<u64>(data));
},
@ -784,7 +768,6 @@ struct Memory::Impl {
if (ptr) {
std::memcpy(ptr, &data, sizeof(T));
}
// Silently ignore writes to null pointer
}
template <typename T>
@ -910,9 +893,7 @@ struct Memory::Impl {
#endif
};
Memory::Memory(Core::System& system_) : system(system_), impl(std::make_unique<Impl>(system_)), gen(rd()) {
// Initialize the random number distribution
dis = std::uniform_int_distribution<u64>(0, std::numeric_limits<u64>::max());
Memory::Memory(Core::System& system_) : system{system_} {
Reset();
}
@ -1096,54 +1077,28 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
}
bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
// Add detailed debug logging
LOG_DEBUG(HW_Memory, "JIT requesting NCE invalidation - Address: 0x{:016X}, Size: {} bytes",
GetInteger(vaddr), size);
// First check if the memory region is valid and executable
if (!IsValidVirtualAddressRange(vaddr, size)) {
LOG_WARNING(HW_Memory, "Skipping InvalidateNCE: Invalid address range - {} bytes @ 0x{:016X}",
size, GetInteger(vaddr));
return false;
}
[[maybe_unused]] bool mapped = true;
[[maybe_unused]] bool rasterizer = false;
// Get pointer and check memory type
u8* const ptr = impl->GetPointerImpl(
GetInteger(vaddr),
[&] {
LOG_WARNING(HW_Memory,
"Skipping InvalidateNCE: Unmapped memory region - {} bytes @ 0x{:016X}",
size, GetInteger(vaddr));
LOG_ERROR(HW_Memory, "Unmapped InvalidateNCE for {} bytes @ {:#x}", size,
GetInteger(vaddr));
mapped = false;
},
[&] { rasterizer = true; });
// Handle rasterizer memory separately
if (rasterizer) {
LOG_DEBUG(HW_Memory, "Invalidating rasterizer memory region - {} bytes @ 0x{:016X}",
size, GetInteger(vaddr));
impl->InvalidateGPUMemory(ptr, size);
}
#ifdef __linux__
// Handle separate heap mapping on Linux
if (!rasterizer && mapped && ptr) {
LOG_DEBUG(HW_Memory, "Handling separate heap mapping for NCE region");
if (!rasterizer && mapped) {
impl->buffer->DeferredMapSeparateHeap(GetInteger(vaddr));
}
#endif
// Return success only if we have a valid pointer and the region was mapped
const bool success = mapped && ptr != nullptr;
if (!success) {
LOG_WARNING(HW_Memory, "NCE invalidation failed - Address: 0x{:016X}, Size: {} bytes",
GetInteger(vaddr), size);
}
return success;
return mapped && ptr != nullptr;
}
bool Memory::InvalidateSeparateHeap(void* fault_address) {
@ -1154,48 +1109,4 @@ bool Memory::InvalidateSeparateHeap(void* fault_address) {
#endif
}
Common::ProcessAddress Memory::GenerateRandomBaseAddress() {
u64 random_bits = dis(gen);
return Common::ProcessAddress((random_bits & ~NRO_BASE_ADDRESS_RANDOMIZATION_MASK) |
(random_bits & NRO_BASE_ADDRESS_RANDOMIZATION_MASK));
}
Memory::MemoryRegion* Memory::FindRegion(Common::ProcessAddress address) {
for (auto& entry : memory_regions) {
if (address >= entry.second.start_address &&
address < entry.second.start_address + entry.second.size) {
return &entry.second;
}
}
return nullptr;
}
void Memory::MapMemoryRegion(Common::ProcessAddress start_address, u64 size, MemoryRegionType type,
bool exec, bool write) {
if (start_address + size > EMULATED_MEMORY_SIZE) {
LOG_ERROR(HW_Memory, "Memory mapping exceeds emulated memory boundaries at address {:016X}",
GetInteger(start_address));
return;
}
// Create the memory region
memory_regions[start_address] = MemoryRegion(start_address, size, type, exec, write);
// Map the region in the page table
Common::MemoryPermission perms{};
if (exec) perms |= Common::MemoryPermission::Execute;
if (write) perms |= Common::MemoryPermission::Write;
perms |= Common::MemoryPermission::Read;
// Using the MapMemoryRegion method defined in the Impl struct
impl->MapMemoryRegion(*impl->current_page_table, start_address, size,
Common::PhysicalAddress(GetInteger(start_address)), perms, false);
}
Common::ProcessAddress Memory::MapBinary(u64 size) {
Common::ProcessAddress base_address = GenerateRandomBaseAddress();
MapMemoryRegion(base_address, size, MemoryRegionType::BinaryMemory, true, true);
return base_address;
}
} // namespace Core::Memory

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: 2014 Citra Emulator Project
// SPDX-FileCopyrightText: 2025 Citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@ -10,8 +9,6 @@
#include <span>
#include <string>
#include <vector>
#include <unordered_map>
#include <random>
#include "common/scratch_buffer.h"
#include "common/typed_address.h"
@ -46,9 +43,6 @@ constexpr std::size_t CITRON_PAGEBITS = 12;
constexpr u64 CITRON_PAGESIZE = 1ULL << CITRON_PAGEBITS;
constexpr u64 CITRON_PAGEMASK = CITRON_PAGESIZE - 1;
/// Emulated memory size (4GB)
constexpr u64 EMULATED_MEMORY_SIZE = 4ULL * 1024 * 1024 * 1024;
/// Virtual user-space memory regions
enum : u64 {
/// TLS (Thread-Local Storage) related.
@ -56,18 +50,6 @@ enum : u64 {
/// Application stack
DEFAULT_STACK_SIZE = 0x100000,
/// Mask to randomize bits 37-12 for NRO base address
NRO_BASE_ADDRESS_RANDOMIZATION_MASK = 0xFFFFFFFFFFFFF000,
};
/// Types of memory regions in the system
enum class MemoryRegionType {
SystemMemory,
GraphicsMemory,
IOMemory,
BinaryMemory,
Undefined
};
/// Central class that handles all memory operations and state.
@ -82,55 +64,6 @@ public:
Memory(Memory&&) = default;
Memory& operator=(Memory&&) = delete;
/**
* Structure representing a memory region with its properties
*/
struct MemoryRegion {
Common::ProcessAddress start_address;
u64 size;
std::unique_ptr<u8[]> data;
bool is_mapped;
MemoryRegionType type;
bool is_executable;
bool is_writable;
// Default constructor needed for STL containers
MemoryRegion() : start_address(0), size(0), data(nullptr), is_mapped(false),
type(MemoryRegionType::Undefined), is_executable(false), is_writable(false) {}
MemoryRegion(Common::ProcessAddress start, u64 sz, MemoryRegionType t, bool exec = false, bool write = false)
: start_address(start), size(sz), data(std::make_unique<u8[]>(sz)), is_mapped(false),
type(t), is_executable(exec), is_writable(write) {}
};
/**
* Maps a memory region with the specified properties
*
* @param start_address The starting address of the region
* @param size The size of the region in bytes
* @param type The type of memory region
* @param exec Whether the region is executable
* @param write Whether the region is writable
*/
void MapMemoryRegion(Common::ProcessAddress start_address, u64 size, MemoryRegionType type,
bool exec = false, bool write = false);
/**
* Maps a binary with a randomized base address
*
* @param size The size of the binary in bytes
* @returns The base address where the binary was mapped
*/
Common::ProcessAddress MapBinary(u64 size);
/**
* Finds a memory region containing the specified address
*
* @param address The address to search for
* @returns Pointer to the memory region if found, nullptr otherwise
*/
MemoryRegion* FindRegion(Common::ProcessAddress address);
/**
* Resets the state of the Memory system.
*/
@ -564,13 +497,6 @@ private:
struct Impl;
std::unique_ptr<Impl> impl;
std::unordered_map<Common::ProcessAddress, MemoryRegion> memory_regions;
std::random_device rd;
std::mt19937 gen;
std::uniform_int_distribution<u64> dis;
Common::ProcessAddress GenerateRandomBaseAddress();
};
template <typename T, GuestMemoryFlags FLAGS>

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <optional>
@ -275,15 +274,8 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
/// Returns true when a storage buffer address satisfies a bias
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
// For performance, strongly prefer addresses that meet the bias criteria
// and have optimal alignment
if (storage_buffer.index == bias.index &&
storage_buffer.offset >= bias.offset_begin &&
storage_buffer.offset < bias.offset_end) {
return true;
}
// Only fall back to other addresses if absolutely necessary
return false;
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
storage_buffer.offset < bias.offset_end;
}
struct LowAddrInfo {
@ -359,7 +351,7 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
.index = index.U32(),
.offset = offset.U32(),
};
const u32 alignment{bias ? bias->alignment : 16U};
const u32 alignment{bias ? bias->alignment : 8U};
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
// The SSBO pointer has to be aligned
return std::nullopt;
@ -380,9 +372,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
// avoid getting false positives
static constexpr Bias nvn_bias{
.index = 0,
.offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers
.offset_end = 0x1000, // Substantially expanded to include all TOTK storage buffers
.alignment = 32, // Increased from 16 to optimize memory access patterns
.offset_begin = 0x110,
.offset_end = 0x610,
.alignment = 16,
};
// Track the low address of the instruction
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
@ -394,15 +386,14 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
const IR::U32 low_addr{low_addr_info->value};
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
if (!storage_buffer) {
// If it fails, track without a bias but with higher alignment requirements
// for better performance
// If it fails, track without a bias
storage_buffer = Track(low_addr, nullptr);
if (!storage_buffer) {
// If that also fails, use NVN fallbacks
LOG_WARNING(Shader, "Storage buffer failed to track, using global memory fallbacks");
return;
}
LOG_DEBUG(Shader, "Storage buffer tracked without bias, index {} offset 0x{:X}",
LOG_WARNING(Shader, "Storage buffer tracked without bias, index {} offset {}",
storage_buffer->index, storage_buffer->offset);
}
// Collect storage buffer and the instruction
@ -434,12 +425,8 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
// Align the offset base to match the host alignment requirements
// Use a more aggressive alignment mask for better performance
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
// Also align the resulting offset for optimal memory access
IR::U32 result = ir.ISub(offset, low_cbuf);
return result;
return ir.ISub(offset, low_cbuf);
}
/// Replace a global memory load instruction with its storage buffer equivalent

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
@ -7,7 +6,6 @@
#include <algorithm>
#include <memory>
#include <numeric>
#include <unordered_map>
#include "common/range_sets.inc"
#include "video_core/buffer_cache/buffer_cache_base.h"
@ -20,7 +18,7 @@ using Core::DEVICE_PAGESIZE;
template <class P>
BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_)
: runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory}, immediate_buffer_alloc{} {
: runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
gpu_modified_ranges.Clear();
@ -1721,31 +1719,8 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
const std::optional<DAddr> aligned_device_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
if (!aligned_device_addr || size == 0) {
// Use a static counter to track and limit warnings
static std::unordered_map<u32, u32> warning_counts;
// Increment the warning count for this cbuf_index
warning_counts[cbuf_index]++;
// Only log the first warning for each cbuf_index
if (warning_counts[cbuf_index] == 1) {
LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}. Using fallback.",
cbuf_index);
} else if (warning_counts[cbuf_index] % 1000 == 0) {
// Log occasional reminder warnings
LOG_DEBUG(HW_GPU, "Still using fallback for storage buffer cbuf index {} (count: {})",
cbuf_index, warning_counts[cbuf_index]);
}
// Create a dummy binding with non-zero values to avoid potential crashes
static DAddr safe_device_addr = 0x1000;
static const u32 safe_size = 16 * 1024; // 16KB should be adequate for most cases
return Binding{
.device_addr = safe_device_addr,
.size = safe_size,
.buffer_id = const_cast<BufferCache<P>*>(this)->FindBuffer(safe_device_addr, safe_size),
};
LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
return NULL_BINDING;
}
const std::optional<DAddr> device_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
ASSERT_MSG(device_addr, "Unaligned storage buffer address not found for cbuf index {}",

View file

@ -1,15 +1,12 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-FileCopyrightText: 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
#include <cstddef>
#include <mutex>
#include <utility>
#include <vector>
#include "common/logging/log.h"
namespace VideoCommon {
@ -17,59 +14,18 @@ namespace VideoCommon {
template <typename T, size_t TICKS_TO_DESTROY>
class DelayedDestructionRing {
public:
DelayedDestructionRing() = default;
~DelayedDestructionRing() {
// Ensure all resources are properly released when ring is destroyed
for (auto& element_list : elements) {
element_list.clear();
}
}
void Tick() {
std::scoped_lock lock{ring_mutex};
// Move to next position in the ring
index = (index + 1) % TICKS_TO_DESTROY;
// Clear elements at current position, which ensures resources are properly released
const size_t count = elements[index].size();
if (count > 0) {
// If more than a threshold of elements are being destroyed at once, log it
if (count > 100) {
LOG_DEBUG(Render_Vulkan, "Destroying {} delayed objects", count);
}
elements[index].clear();
}
}
void Push(T&& object) {
std::scoped_lock lock{ring_mutex};
elements[index].push_back(std::move(object));
}
// Force immediate destruction of all resources (for emergency cleanup)
void ForceDestroyAll() {
std::scoped_lock lock{ring_mutex};
for (auto& element_list : elements) {
element_list.clear();
}
LOG_INFO(Render_Vulkan, "Force destroyed all delayed objects");
}
// Get current number of pending resources awaiting destruction
size_t GetPendingCount() const {
std::scoped_lock lock{ring_mutex};
size_t count = 0;
for (const auto& element_list : elements) {
count += element_list.size();
}
return count;
}
private:
size_t index = 0;
std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
mutable std::mutex ring_mutex;
};
} // namespace VideoCommon

View file

@ -1,5 +1,4 @@
# SPDX-FileCopyrightText: 2018 yuzu Emulator Project
# SPDX-FileCopyrightText: 2025 citron Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr)
@ -19,7 +18,6 @@ set(SHADER_FILES
blit_color_float.frag
block_linear_unswizzle_2d.comp
block_linear_unswizzle_3d.comp
convert_abgr8_srgb_to_d24s8.frag
convert_abgr8_to_d24s8.frag
convert_abgr8_to_d32f.frag
convert_d32f_to_abgr8.frag
@ -70,14 +68,6 @@ set(SHADER_FILES
vulkan_quad_indexed.comp
vulkan_turbo_mode.comp
vulkan_uint8.comp
convert_rgba8_to_bgra8.frag
convert_yuv420_to_rgb.comp
convert_rgb_to_yuv420.comp
convert_bc7_to_rgba8.comp
convert_astc_hdr_to_rgba16f.comp
convert_rgba16f_to_rgba8.frag
dither_temporal.frag
dynamic_resolution_scale.comp
)
find_program(GLSLANGVALIDATOR "glslangValidator")

View file

@ -1,46 +0,0 @@
// SPDX-FileCopyrightText: 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
#extension GL_ARB_shader_stencil_export : require
layout(binding = 0) uniform sampler2D color_texture;
// More accurate sRGB to linear conversion
float srgbToLinear(float srgb) {
if (srgb <= 0.04045) {
return srgb / 12.92;
} else {
return pow((srgb + 0.055) / 1.055, 2.4);
}
}
void main() {
ivec2 coord = ivec2(gl_FragCoord.xy);
vec4 srgbColor = texelFetch(color_texture, coord, 0);
// Convert sRGB to linear space with proper gamma correction
vec3 linearColor = vec3(
srgbToLinear(srgbColor.r),
srgbToLinear(srgbColor.g),
srgbToLinear(srgbColor.b)
);
// Use standard luminance coefficients
float luminance = dot(linearColor, vec3(0.2126, 0.7152, 0.0722));
// Ensure proper depth range
luminance = clamp(luminance, 0.0, 1.0);
// Convert to 24-bit depth value
uint depth_val = uint(luminance * float(0xFFFFFF));
// Extract 8-bit stencil from alpha
uint stencil_val = uint(srgbColor.a * 255.0);
// Pack values efficiently
uint depth_stencil = (stencil_val << 24) | (depth_val & 0x00FFFFFF);
gl_FragDepth = float(depth_val) / float(0xFFFFFF);
gl_FragStencilRefARB = int(stencil_val);
}

View file

@ -1,28 +0,0 @@
#version 450
layout(local_size_x = 8, local_size_y = 8) in;
layout(binding = 0) uniform samplerBuffer astc_data;
layout(binding = 1, rgba16f) uniform writeonly image2D output_image;
// Note: This is a simplified version. Real ASTC HDR decompression is more complex
void main() {
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
ivec2 size = imageSize(output_image);
if (pos.x >= size.x || pos.y >= size.y) {
return;
}
// Calculate block and pixel within block
ivec2 block = pos / 8; // Assuming 8x8 ASTC blocks
ivec2 pixel = pos % 8;
// Each ASTC block is 16 bytes
int block_index = block.y * (size.x / 8) + block.x;
// Simplified ASTC HDR decoding - you'll need to implement full ASTC decoding
vec4 color = texelFetch(astc_data, block_index * 8 + pixel.y * 8 + pixel.x);
imageStore(output_image, pos, color);
}

View file

@ -1,29 +0,0 @@
#version 450
#extension GL_ARB_shader_ballot : require
layout(local_size_x = 8, local_size_y = 8) in;
layout(binding = 0) uniform samplerBuffer bc7_data;
layout(binding = 1, rgba8) uniform writeonly image2D output_image;
// Note: This is a simplified version. Real BC7 decompression is more complex
void main() {
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
ivec2 size = imageSize(output_image);
if (pos.x >= size.x || pos.y >= size.y) {
return;
}
// Calculate block and pixel within block
ivec2 block = pos / 4;
ivec2 pixel = pos % 4;
// Each BC7 block is 16 bytes
int block_index = block.y * (size.x / 4) + block.x;
// Simplified BC7 decoding - you'll need to implement full BC7 decoding
vec4 color = texelFetch(bc7_data, block_index * 4 + pixel.y * 4 + pixel.x);
imageStore(output_image, pos, color);
}

View file

@ -1,29 +0,0 @@
#version 450
layout(local_size_x = 8, local_size_y = 8) in;
layout(binding = 0) uniform sampler2D input_texture;
layout(binding = 1, r8) uniform writeonly image2D y_output;
layout(binding = 2, r8) uniform writeonly image2D u_output;
layout(binding = 3, r8) uniform writeonly image2D v_output;
void main() {
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
ivec2 size = imageSize(y_output);
if (pos.x >= size.x || pos.y >= size.y) {
return;
}
vec2 tex_coord = vec2(pos) / vec2(size);
vec3 rgb = texture(input_texture, tex_coord).rgb;
// RGB to YUV conversion
float y = 0.299 * rgb.r + 0.587 * rgb.g + 0.114 * rgb.b;
float u = -0.147 * rgb.r - 0.289 * rgb.g + 0.436 * rgb.b + 0.5;
float v = 0.615 * rgb.r - 0.515 * rgb.g - 0.100 * rgb.b + 0.5;
imageStore(y_output, pos, vec4(y));
imageStore(u_output, pos / 2, vec4(u));
imageStore(v_output, pos / 2, vec4(v));
}

View file

@ -1,31 +0,0 @@
#version 450
layout(location = 0) in vec2 texcoord;
layout(location = 0) out vec4 color;
layout(binding = 0) uniform sampler2D input_texture;
layout(push_constant) uniform PushConstants {
float exposure;
float gamma;
} constants;
vec3 tonemap(vec3 hdr) {
// Reinhard tonemapping
return hdr / (hdr + vec3(1.0));
}
void main() {
vec4 hdr = texture(input_texture, texcoord);
// Apply exposure
vec3 exposed = hdr.rgb * constants.exposure;
// Tonemap
vec3 tonemapped = tonemap(exposed);
// Gamma correction
vec3 gamma_corrected = pow(tonemapped, vec3(1.0 / constants.gamma));
color = vec4(gamma_corrected, hdr.a);
}

View file

@ -1,11 +0,0 @@
#version 450
layout(location = 0) in vec2 texcoord;
layout(location = 0) out vec4 color;
layout(binding = 0) uniform sampler2D input_texture;
void main() {
vec4 rgba = texture(input_texture, texcoord);
color = rgba.bgra; // Swap red and blue channels
}

View file

@ -1,30 +0,0 @@
#version 450
layout(local_size_x = 8, local_size_y = 8) in;
layout(binding = 0) uniform sampler2D y_texture;
layout(binding = 1) uniform sampler2D u_texture;
layout(binding = 2) uniform sampler2D v_texture;
layout(binding = 3, rgba8) uniform writeonly image2D output_image;
void main() {
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
ivec2 size = imageSize(output_image);
if (pos.x >= size.x || pos.y >= size.y) {
return;
}
vec2 tex_coord = vec2(pos) / vec2(size);
float y = texture(y_texture, tex_coord).r;
float u = texture(u_texture, tex_coord).r - 0.5;
float v = texture(v_texture, tex_coord).r - 0.5;
// YUV to RGB conversion
vec3 rgb;
rgb.r = y + 1.402 * v;
rgb.g = y - 0.344 * u - 0.714 * v;
rgb.b = y + 1.772 * u;
imageStore(output_image, pos, vec4(rgb, 1.0));
}

View file

@ -1,29 +0,0 @@
#version 450
layout(location = 0) in vec2 texcoord;
layout(location = 0) out vec4 color;
layout(binding = 0) uniform sampler2D input_texture;
layout(push_constant) uniform PushConstants {
float frame_count;
float dither_strength;
} constants;
// Pseudo-random number generator
float rand(vec2 co) {
return fract(sin(dot(co.xy ,vec2(12.9898,78.233))) * 43758.5453);
}
void main() {
vec4 input_color = texture(input_texture, texcoord);
// Generate temporal noise based on frame count
vec2 noise_coord = gl_FragCoord.xy + vec2(constants.frame_count);
float noise = rand(noise_coord) * 2.0 - 1.0;
// Apply dithering
vec3 dithered = input_color.rgb + noise * constants.dither_strength;
color = vec4(dithered, input_color.a);
}

View file

@ -1,68 +0,0 @@
#version 450
layout(local_size_x = 8, local_size_y = 8) in;
layout(binding = 0) uniform sampler2D input_texture;
layout(binding = 1, rgba8) uniform writeonly image2D output_image;
layout(push_constant) uniform PushConstants {
vec2 scale_factor;
vec2 input_size;
} constants;
vec4 cubic(float v) {
vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v;
vec4 s = n * n * n;
float x = s.x;
float y = s.y - 4.0 * s.x;
float z = s.z - 4.0 * s.y + 6.0 * s.x;
float w = s.w - 4.0 * s.z + 6.0 * s.y - 4.0 * s.x;
return vec4(x, y, z, w) * (1.0/6.0);
}
vec4 bicubic_sample(sampler2D tex, vec2 tex_coord) {
vec2 tex_size = constants.input_size;
vec2 inv_tex_size = 1.0 / tex_size;
tex_coord = tex_coord * tex_size - 0.5;
vec2 fxy = fract(tex_coord);
tex_coord -= fxy;
vec4 xcubic = cubic(fxy.x);
vec4 ycubic = cubic(fxy.y);
vec4 c = tex_coord.xxyy + vec2(-0.5, +1.5).xyxy;
vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw);
vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s;
offset *= inv_tex_size.xxyy;
vec4 sample0 = texture(tex, offset.xz);
vec4 sample1 = texture(tex, offset.yz);
vec4 sample2 = texture(tex, offset.xw);
vec4 sample3 = texture(tex, offset.yw);
float sx = s.x / (s.x + s.y);
float sy = s.z / (s.z + s.w);
return mix(
mix(sample3, sample2, sx),
mix(sample1, sample0, sx),
sy
);
}
void main() {
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
ivec2 size = imageSize(output_image);
if (pos.x >= size.x || pos.y >= size.y) {
return;
}
vec2 tex_coord = vec2(pos) / vec2(size);
vec4 color = bicubic_sample(input_texture, tex_coord);
imageStore(output_image, pos, color);
}

View file

@ -10,7 +10,7 @@
#ifdef CITRON_USE_FP16
#extension GL_AMD_gpu_shader_half_float : enable
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
#extension GL_NV_gpu_shader5 : enable
#define lfloat float16_t
#define lvec2 f16vec2

View file

@ -1,5 +1,4 @@
// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
// SPDX-FileCopyrightText: 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
@ -81,10 +80,8 @@ void TextureCache<P>::RunGarbageCollector() {
const auto Configure = [&](bool allow_aggressive) {
high_priority_mode = total_used_memory >= expected_memory;
aggressive_mode = allow_aggressive && total_used_memory >= critical_memory;
// Reduce ticks_to_destroy to be more aggressive in freeing memory
ticks_to_destroy = aggressive_mode ? 5ULL : high_priority_mode ? 15ULL : 40ULL;
// Increase num_iterations to clean up more resources at once for memory-intensive games
num_iterations = aggressive_mode ? 60 : (high_priority_mode ? 30 : 15);
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
};
const auto Cleanup = [this, &num_iterations, &high_priority_mode,
&aggressive_mode](ImageId image_id) {
@ -98,8 +95,7 @@ void TextureCache<P>::RunGarbageCollector() {
// used by the async decoder thread.
return false;
}
// Be more aggressive with cleanup for memory-intensive games
if (!aggressive_mode && !high_priority_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
return false;
}
const bool must_download =
@ -122,20 +118,19 @@ void TextureCache<P>::RunGarbageCollector() {
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
if (total_used_memory < critical_memory) {
if (aggressive_mode) {
// Sink the aggresiveness more gradually to prevent oscillation
num_iterations = num_iterations * 3 / 4;
// Sink the aggresiveness.
num_iterations >>= 2;
aggressive_mode = false;
return false;
}
if (high_priority_mode && total_used_memory < expected_memory) {
num_iterations = num_iterations * 3 / 4;
num_iterations >>= 1;
high_priority_mode = false;
}
}
return false;
};
// Run garbage collection more frequently for memory-intensive games
// Try to remove anything old enough and not high priority.
Configure(false);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
@ -143,67 +138,19 @@ void TextureCache<P>::RunGarbageCollector() {
// If pressure is still too high, prune aggressively.
if (total_used_memory >= critical_memory) {
Configure(true);
// Make a more thorough sweep with more aggressive settings
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy / 2, Cleanup);
// If we're still in a critical memory situation, do emergency cleanup
if (total_used_memory >= critical_memory + 50_MiB) {
// Last resort emergency cleanup - reduce thresholds dramatically
ticks_to_destroy = 1;
num_iterations = 100;
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
}
}
}
template <class P>
void TextureCache<P>::TickFrame() {
static u64 consecutive_high_memory_frames = 0;
static constexpr u64 EMERGENCY_CLEANUP_THRESHOLD = 120; // ~2 seconds at 60 FPS
// If we can obtain the memory info, use it instead of the estimate.
if (runtime.CanReportMemoryUsage()) {
total_used_memory = runtime.GetDeviceMemoryUsage();
}
// Track consecutive high memory frames to detect potential leaks
if (total_used_memory > critical_memory) {
consecutive_high_memory_frames++;
if (consecutive_high_memory_frames > EMERGENCY_CLEANUP_THRESHOLD) {
// Emergency situation - extreme memory pressure for extended time
// This likely indicates a leak or insufficient cleanup
LOG_WARNING(Render, "Emergency texture cache cleanup triggered after {} frames of high memory usage",
consecutive_high_memory_frames);
// Force immediate cleanup of all pending resources
sentenced_images.ForceDestroyAll();
sentenced_framebuffers.ForceDestroyAll();
sentenced_image_view.ForceDestroyAll();
// Do a forced garbage collection pass
bool saved_value = has_deleted_images;
RunGarbageCollector();
has_deleted_images = saved_value;
// Reset counter but keep some pressure
consecutive_high_memory_frames = 30;
}
else if (consecutive_high_memory_frames > 60) { // If high memory for >60 frames (~1 second)
// Force a more aggressive cleanup cycle
RunGarbageCollector();
consecutive_high_memory_frames = 45; // Reset but keep some pressure
}
} else if (total_used_memory > expected_memory) {
// Use u64(1) to ensure type compatibility, avoiding the ULL suffix
consecutive_high_memory_frames = std::max(u64(1), consecutive_high_memory_frames / 2);
} else {
consecutive_high_memory_frames = 0;
}
if (total_used_memory > minimum_memory) {
RunGarbageCollector();
}
sentenced_images.Tick();
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
@ -2218,35 +2165,27 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
if (image.HasScaled()) {
total_used_memory -= GetScaledImageSizeBytes(image);
}
// Calculate accurate memory usage for this image
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
if ((IsPixelFormatASTC(image.info.format) &&
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
True(image.flags & ImageFlagBits::Converted)) {
tentative_size = TranscodedAstcSize(tentative_size, image.info.format);
}
// Ensure memory usage is properly accounted for
total_used_memory -= Common::AlignUp(tentative_size, 1024);
const GPUVAddr gpu_addr = image.gpu_addr;
const auto alloc_it = image_allocs_table.find(gpu_addr);
if (alloc_it == image_allocs_table.end()) {
LOG_ERROR(HW_GPU, "Trying to delete an image alloc that does not exist in address 0x{:x}",
ASSERT_MSG(false, "Trying to delete an image alloc that does not exist in address 0x{:x}",
gpu_addr);
return;
}
const ImageAllocId alloc_id = alloc_it->second;
std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
if (alloc_image_it == alloc_images.end()) {
LOG_ERROR(HW_GPU, "Trying to delete an image that does not exist");
ASSERT_MSG(false, "Trying to delete an image that does not exist");
return;
}
// Ensure image is properly untracked and unregistered before deletion
ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
@ -2257,8 +2196,6 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
for (size_t rt = 0; rt < NUM_RT; ++rt) {
dirty[Dirty::ColorBuffer0 + rt] = true;
}
// Clear render target references
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
for (const ImageViewId image_view_id : image_view_ids) {
std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
@ -2266,12 +2203,9 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
render_targets.depth_buffer_id = ImageViewId{};
}
}
// Clean up references and dependencies
RemoveImageViewReferences(image_view_ids);
RemoveFramebuffers(image_view_ids);
// Handle aliased images
for (const AliasedImage& alias : image.aliased_images) {
ImageBase& other_image = slot_images[alias.id];
[[maybe_unused]] const size_t num_removed_aliases =
@ -2279,43 +2213,33 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
return other_alias.id == image_id;
});
other_image.CheckAliasState();
if (num_removed_aliases != 1) {
LOG_WARNING(HW_GPU, "Invalid number of removed aliases: {}", num_removed_aliases);
ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
num_removed_aliases);
}
}
// Handle overlapping images
for (const ImageId overlap_id : image.overlapping_images) {
ImageBase& other_image = slot_images[overlap_id];
[[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
other_image.overlapping_images,
[image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
other_image.CheckBadOverlapState();
if (num_removed_overlaps != 1) {
LOG_WARNING(HW_GPU, "Invalid number of removed overlaps: {}", num_removed_overlaps);
ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
num_removed_overlaps);
}
}
// Free resources - either immediately or queue for delayed destruction
for (const ImageViewId image_view_id : image_view_ids) {
if (!immediate_delete) {
sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
}
slot_image_views.erase(image_view_id);
}
if (!immediate_delete) {
sentenced_images.Push(std::move(slot_images[image_id]));
}
slot_images.erase(image_id);
// Clean up allocation table
alloc_images.erase(alloc_image_it);
if (alloc_images.empty()) {
image_allocs_table.erase(alloc_it);
}
// Mark tables as invalidated
for (size_t c : active_channel_ids) {
auto& channel_info = channel_storage[c];
if constexpr (ENABLE_VALIDATION) {

View file

@ -1,7 +1,7 @@
{
"$schema": "https://raw.githubusercontent.com/microsoft/vcpkg-tool/main/docs/vcpkg.schema.json",
"name": "citron",
"builtin-baseline": "c82f74667287d3dc386bce81e44964370c91a289",
"builtin-baseline": "1318ab14aae14db20085441cd71366891a9c9d0c",
"version": "1.0",
"dependencies": [
"boost-algorithm",
@ -31,7 +31,9 @@
"features": {
"citron-tests": {
"description": "Compile tests",
"dependencies": [ "catch2" ]
"dependencies": [
"catch2"
]
},
"web-service": {
"description": "Enable web services (telemetry, etc.)",
@ -55,11 +57,11 @@
"overrides": [
{
"name": "catch2",
"version": "3.8.0"
"version": "3.3.1"
},
{
"name": "fmt",
"version": "11.0.2"
"version": "10.1.1"
}
]
}