Merge pull request #3347 from ReinUsesLisp/local-mem

shader/memory: Implement LDL.S16, LDS.S16, STL.S16 and STS.S16
2024-11-15 17:32:47 +00:00 · 2020-01-30 10:59:52 -05:00 · 2020-01-30 10:59:52 -05:00 · c593e45dbd
parent 2db7adc42a d26e74f0a3
commit c593e45dbd
1 changed files with 55 additions and 30 deletions
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@ -24,6 +24,7 @@ using Tegra::Shader::GlobalAtomicType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 using Tegra::Shader::StoreType;
 namespace {
@ -63,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
    }
 }
 Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
    Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
    return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
                     Immediate(size));
 }
 Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
    Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
    return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
                     std::move(offset), Immediate(size));
 }
 Node Sign16Extend(Node value) {
    Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
    Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
    Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
    return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
 }
 } // Anonymous namespace
 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@ -138,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
        [[fallthrough]];
    case OpCode::Id::LD_S: {
-        const auto GetMemory = [&](s32 offset) {
+        const auto GetAddress = [&](s32 offset) {
            ASSERT(offset % 4 == 0);
            const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
-            const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
+            return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
-                                           immediate_offset);
+        };
-            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address)
+        const auto GetMemory = [&](s32 offset) {
-                                                             : GetLocalMemory(address);
+            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
                                                             : GetLocalMemory(GetAddress(offset));
        };
        switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bits32:
+        case StoreType::Signed16:
-        case Tegra::Shader::StoreType::Bits64:
+            SetRegister(bb, instr.gpr0,
-        case Tegra::Shader::StoreType::Bits128: {
+                        Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
-            const u32 count = [&]() {
+            break;
        case StoreType::Bits32:
        case StoreType::Bits64:
        case StoreType::Bits128: {
            const u32 count = [&] {
                switch (instr.ldst_sl.type.Value()) {
-                case Tegra::Shader::StoreType::Bits32:
+                case StoreType::Bits32:
                    return 1;
-                case Tegra::Shader::StoreType::Bits64:
+                case StoreType::Bits64:
                    return 2;
-                case Tegra::Shader::StoreType::Bits128:
+                case StoreType::Bits128:
                    return 4;
                default:
                    UNREACHABLE();
@ -214,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            // To handle unaligned loads get the bytes used to dereference global memory and extract
            // those bytes from the loaded u32.
            if (IsUnaligned(type)) {
-                Node mask = Immediate(GetUnalignedMask(type));
+                gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
                                 std::move(offset), Immediate(size));
            }
            SetTemporary(bb, i, gmem);
@ -271,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
        };
-        const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L
+        const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
-                                    ? &ShaderIR::SetLocalMemory
+        const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
-                                    : &ShaderIR::SetSharedMemory;
+        const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
        switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bits128:
+        case StoreType::Bits128:
            (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
            (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
            [[fallthrough]];
-        case Tegra::Shader::StoreType::Bits64:
+        case StoreType::Bits64:
            (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
            [[fallthrough]];
-        case Tegra::Shader::StoreType::Bits32:
+        case StoreType::Bits32:
            (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
            break;
        case StoreType::Signed16: {
            Node address = GetAddress(0);
            Node memory = (this->*get_memory)(address);
            (this->*set_memory)(
                bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
            break;
        }
        default:
            UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
                              static_cast<u32>(instr.ldst_sl.type.Value()));
@ -325,12 +354,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
            Node value = GetRegister(instr.gpr0.Value() + i);
            if (IsUnaligned(type)) {
-                Node mask = Immediate(GetUnalignedMask(type));
+                const u32 mask = GetUnalignedMask(type);
-                Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
+                value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
                offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
                value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
                                  Immediate(size));
            }
            bb.push_back(Operation(OperationCode::Assign, gmem, value));