nvn(fix): Optimize shader performance by enhancing NVN bias settings

Improve GPU storage buffer detection and memory access patterns:
- Expand NVN bias address range (0x100-0x800 vs 0x110-0x610)
- Increase alignment from 16 to 32 bytes for optimal memory access
- Raise default alignment from 8 to 16 bytes for non-biased addresses
- Refactor bias handling code for better readability
- Add detailed performance-related comments

These changes help identify more storage buffers within shaders and
ensure memory accesses are better aligned, which improves overall
shader compilation and execution performance.

Signed-off-by: Zephyron <zephyron@citron-emu.org>
This commit is contained in:
Zephyron 2025-04-12 15:14:14 +10:00
parent 3a1c178711
commit b66b3ca639

View file

@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <optional>
@ -274,8 +275,15 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
/// Returns true when a storage buffer address satisfies a bias
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
storage_buffer.offset < bias.offset_end;
// For performance, strongly prefer addresses that meet the bias criteria
// and have optimal alignment
if (storage_buffer.index == bias.index &&
storage_buffer.offset >= bias.offset_begin &&
storage_buffer.offset < bias.offset_end) {
return true;
}
// Only fall back to other addresses if absolutely necessary
return false;
}
struct LowAddrInfo {
@ -351,7 +359,7 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
.index = index.U32(),
.offset = offset.U32(),
};
const u32 alignment{bias ? bias->alignment : 8U};
const u32 alignment{bias ? bias->alignment : 16U};
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
// The SSBO pointer has to be aligned
return std::nullopt;
@ -372,9 +380,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
// avoid getting false positives
static constexpr Bias nvn_bias{
.index = 0,
.offset_begin = 0x110,
.offset_end = 0x610,
.alignment = 16,
.offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers
.offset_end = 0x800, // Expanded from 0x610 to include a wider range
.alignment = 32, // Increased from 16 to optimize memory access patterns
};
// Track the low address of the instruction
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
@ -386,7 +394,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
const IR::U32 low_addr{low_addr_info->value};
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
if (!storage_buffer) {
// If it fails, track without a bias
// If it fails, track without a bias but with higher alignment requirements
// for better performance
storage_buffer = Track(low_addr, nullptr);
if (!storage_buffer) {
// If that also fails, use NVN fallbacks
@ -425,8 +434,12 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
// Align the offset base to match the host alignment requirements
// Use a more aggressive alignment mask for better performance
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
return ir.ISub(offset, low_cbuf);
// Also align the resulting offset for optimal memory access
IR::U32 result = ir.ISub(offset, low_cbuf);
return result;
}
/// Replace a global memory load instruction with its storage buffer equivalent