mirror of
https://git.citron-emu.org/citron/emu
synced 2025-05-12 19:00:37 +01:00
nvn(fix): Optimize shader performance by enhancing NVN bias settings
Improve GPU storage buffer detection and memory access patterns: - Expand NVN bias address range (0x100-0x800 vs 0x110-0x610) - Increase alignment from 16 to 32 bytes for optimal memory access - Raise default alignment from 8 to 16 bytes for non-biased addresses - Refactor bias handling code for better readability - Add detailed performance-related comments These changes help identify more storage buffers within shaders and ensure memory accesses are better aligned, which improves overall shader compilation and execution performance. Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
parent
3a1c178711
commit
b66b3ca639
1 changed files with 21 additions and 8 deletions
|
@ -1,4 +1,5 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <optional>
|
||||
|
@ -274,8 +275,15 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
|
|||
|
||||
/// Returns true when a storage buffer address satisfies a bias
|
||||
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
|
||||
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
|
||||
storage_buffer.offset < bias.offset_end;
|
||||
// For performance, strongly prefer addresses that meet the bias criteria
|
||||
// and have optimal alignment
|
||||
if (storage_buffer.index == bias.index &&
|
||||
storage_buffer.offset >= bias.offset_begin &&
|
||||
storage_buffer.offset < bias.offset_end) {
|
||||
return true;
|
||||
}
|
||||
// Only fall back to other addresses if absolutely necessary
|
||||
return false;
|
||||
}
|
||||
|
||||
struct LowAddrInfo {
|
||||
|
@ -351,7 +359,7 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
|
|||
.index = index.U32(),
|
||||
.offset = offset.U32(),
|
||||
};
|
||||
const u32 alignment{bias ? bias->alignment : 8U};
|
||||
const u32 alignment{bias ? bias->alignment : 16U};
|
||||
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
|
||||
// The SSBO pointer has to be aligned
|
||||
return std::nullopt;
|
||||
|
@ -372,9 +380,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
|||
// avoid getting false positives
|
||||
static constexpr Bias nvn_bias{
|
||||
.index = 0,
|
||||
.offset_begin = 0x110,
|
||||
.offset_end = 0x610,
|
||||
.alignment = 16,
|
||||
.offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers
|
||||
.offset_end = 0x800, // Expanded from 0x610 to include a wider range
|
||||
.alignment = 32, // Increased from 16 to optimize memory access patterns
|
||||
};
|
||||
// Track the low address of the instruction
|
||||
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
|
||||
|
@ -386,7 +394,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
|
|||
const IR::U32 low_addr{low_addr_info->value};
|
||||
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
|
||||
if (!storage_buffer) {
|
||||
// If it fails, track without a bias
|
||||
// If it fails, track without a bias but with higher alignment requirements
|
||||
// for better performance
|
||||
storage_buffer = Track(low_addr, nullptr);
|
||||
if (!storage_buffer) {
|
||||
// If that also fails, use NVN fallbacks
|
||||
|
@ -425,8 +434,12 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
|
|||
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||
|
||||
// Align the offset base to match the host alignment requirements
|
||||
// Use a more aggressive alignment mask for better performance
|
||||
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
|
||||
return ir.ISub(offset, low_cbuf);
|
||||
|
||||
// Also align the resulting offset for optimal memory access
|
||||
IR::U32 result = ir.ISub(offset, low_cbuf);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Replace a global memory load instruction with its storage buffer equivalent
|
||||
|
|
Loading…
Reference in a new issue