From a841ce6451a92bfceea2266f1a48c31fa6b72888 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 19:02:06 +0100 Subject: [PATCH 1/3] xbyak_abi: Register indexes should be unsigned --- src/common/x64/xbyak_abi.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 636a5c0f96..c0c25ac95e 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -11,7 +11,7 @@ namespace Common::X64 { -inline int RegToIndex(const Xbyak::Reg& reg) { +inline std::size_t RegToIndex(const Xbyak::Reg& reg) { using Kind = Xbyak::Reg::Kind; ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, "RegSet only support GPRs and XMM registers."); @@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) { return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); } -inline Xbyak::Reg64 IndexToReg64(int reg_index) { +inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { ASSERT(reg_index < 16); - return Xbyak::Reg64(reg_index); + return Xbyak::Reg64(static_cast(reg_index)); } -inline Xbyak::Xmm IndexToXmm(int reg_index) { +inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { ASSERT(reg_index >= 16 && reg_index < 32); - return Xbyak::Xmm(reg_index - 16); + return Xbyak::Xmm(static_cast(reg_index - 16)); } -inline Xbyak::Reg IndexToReg(int reg_index) { +inline Xbyak::Reg IndexToReg(std::size_t reg_index) { if (reg_index < 16) { return IndexToReg64(reg_index); } else { From 045d20e076a83e921b6fe5bbfd0f789695a5da6c Mon Sep 17 00:00:00 2001 From: MerryMage Date: Mon, 15 Jun 2020 19:04:29 +0100 Subject: [PATCH 2/3] xbyak_abi: Prefer returning a struct to using out parameters in ABI_CalculateFrameSize --- src/common/x64/xbyak_abi.h | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index c0c25ac95e..5c0f9d7fff 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -151,9 +151,13 @@ constexpr std::size_t ABI_SHADOW_SPACE = 0; #endif -inline void ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment, - std::size_t needed_frame_size, s32* out_subtraction, - s32* out_xmm_offset) { +struct ABIFrameInfo { + s32 subtraction; + s32 xmm_offset; +}; + +inline ABIFrameInfo ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment, + std::size_t needed_frame_size) { int count = (regs & ABI_ALL_GPRS).Count(); rsp_alignment -= count * 8; std::size_t subtraction = 0; @@ -170,27 +174,26 @@ inline void ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment, rsp_alignment -= subtraction; subtraction += rsp_alignment & 0xF; - *out_subtraction = (s32)subtraction; - *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); + return ABIFrameInfo{static_cast(subtraction), + static_cast(subtraction - xmm_base_subtraction)}; } inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, std::size_t rsp_alignment, std::size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); for (int reg_index : (regs & ABI_ALL_GPRS)) { code.push(IndexToReg64(reg_index)); } - if (subtraction != 0) { - code.sub(code.rsp, subtraction); + if (frame_info.subtraction != 0) { + code.sub(code.rsp, frame_info.subtraction); } for (int reg_index : (regs & ABI_ALL_XMMS)) { - code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(reg_index)); - xmm_offset += 0x10; + code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(reg_index)); + frame_info.xmm_offset += 0x10; } return ABI_SHADOW_SPACE; @@ -199,16 +202,15 @@ inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, B inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, std::size_t rsp_alignment, std::size_t needed_frame_size = 0) { - s32 subtraction, xmm_offset; - ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); + auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); for (int reg_index : (regs & ABI_ALL_XMMS)) { - code.movaps(IndexToXmm(reg_index), code.xword[code.rsp + xmm_offset]); - xmm_offset += 0x10; + code.movaps(IndexToXmm(reg_index), code.xword[code.rsp + frame_info.xmm_offset]); + frame_info.xmm_offset += 0x10; } - if (subtraction != 0) { - code.add(code.rsp, subtraction); + if (frame_info.subtraction != 0) { + code.add(code.rsp, frame_info.subtraction); } // GPRs need to be popped in reverse order From 9c08409e0ea5a531c34eb09c4e3afc00acd32762 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sun, 30 Aug 2020 04:58:45 -0400 Subject: [PATCH 3/3] externals: Update Xbyak to 5.96 I made a request on the Xbyak issue tracker to allow some constructors to be constexpr in order to avoid static constructors from needing to execute for some of our register constants. This request was implemented, so this updates Xbyak so that we can make use of it. --- externals/xbyak | 2 +- src/common/x64/xbyak_abi.h | 78 ++++++++++--------- .../shader/shader_jit_x64_compiler.cpp | 36 ++++----- .../shader/shader_jit_x64_compiler.h | 3 +- 4 files changed, 63 insertions(+), 56 deletions(-) diff --git a/externals/xbyak b/externals/xbyak index 18c9caaa0a..c306b8e578 160000 --- a/externals/xbyak +++ b/externals/xbyak @@ -1 +1 @@ -Subproject commit 18c9caaa0a3ed5706c39f5aa86cce0db6e65b174 +Subproject commit c306b8e5786eeeb87b8925a8af5c3bf057ff5a90 diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 5c0f9d7fff..0139db2326 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h @@ -4,14 +4,14 @@ #pragma once +#include #include #include #include "common/assert.h" -#include "common/bit_set.h" namespace Common::X64 { -inline std::size_t RegToIndex(const Xbyak::Reg& reg) { +constexpr std::size_t RegToIndex(const Xbyak::Reg& reg) { using Kind = Xbyak::Reg::Kind; ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, "RegSet only support GPRs and XMM registers."); @@ -19,17 +19,17 @@ inline std::size_t RegToIndex(const Xbyak::Reg& reg) { return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); } -inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { +constexpr Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { ASSERT(reg_index < 16); return Xbyak::Reg64(static_cast(reg_index)); } -inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { +constexpr Xbyak::Xmm IndexToXmm(std::size_t reg_index) { ASSERT(reg_index >= 16 && reg_index < 32); return Xbyak::Xmm(static_cast(reg_index - 16)); } -inline Xbyak::Reg IndexToReg(std::size_t reg_index) { +constexpr Xbyak::Reg IndexToReg(std::size_t reg_index) { if (reg_index < 16) { return IndexToReg64(reg_index); } else { @@ -37,27 +37,27 @@ inline Xbyak::Reg IndexToReg(std::size_t reg_index) { } } -inline BitSet32 BuildRegSet(std::initializer_list regs) { - BitSet32 bits; +inline std::bitset<32> BuildRegSet(std::initializer_list regs) { + std::bitset<32> bits; for (const Xbyak::Reg& reg : regs) { bits[RegToIndex(reg)] = true; } return bits; } -const BitSet32 ABI_ALL_GPRS(0x0000FFFF); -const BitSet32 ABI_ALL_XMMS(0xFFFF0000); +constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); +constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); #ifdef _WIN32 // Microsoft x64 ABI -const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; -const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; -const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; -const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; -const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; +constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax; +constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; +constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; +constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; +constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; -const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ +const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ // GPRs Xbyak::util::rcx, Xbyak::util::rdx, @@ -74,7 +74,7 @@ const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ Xbyak::util::xmm5, }); -const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({ +const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ // GPRs Xbyak::util::rbx, Xbyak::util::rsi, @@ -102,13 +102,13 @@ constexpr std::size_t ABI_SHADOW_SPACE = 0x20; #else // System V x86-64 ABI -const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; -const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; -const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; -const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; -const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; +constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax; +constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; +constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; +constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; +constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; -const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ +const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ // GPRs Xbyak::util::rcx, Xbyak::util::rdx, @@ -137,7 +137,7 @@ const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ Xbyak::util::xmm15, }); -const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({ +const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ // GPRs Xbyak::util::rbx, Xbyak::util::rbp, @@ -156,12 +156,12 @@ struct ABIFrameInfo { s32 xmm_offset; }; -inline ABIFrameInfo ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment, +inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, std::size_t rsp_alignment, std::size_t needed_frame_size) { - int count = (regs & ABI_ALL_GPRS).Count(); + int count = (regs & ABI_ALL_GPRS).count(); rsp_alignment -= count * 8; std::size_t subtraction = 0; - int xmm_count = (regs & ABI_ALL_XMMS).Count(); + int xmm_count = (regs & ABI_ALL_XMMS).count(); if (xmm_count) { // If we have any XMMs to save, we must align the stack here. subtraction = rsp_alignment & 0xF; @@ -178,35 +178,41 @@ inline ABIFrameInfo ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignm static_cast(subtraction - xmm_base_subtraction)}; } -inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, +inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, std::size_t rsp_alignment, std::size_t needed_frame_size = 0) { auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); - for (int reg_index : (regs & ABI_ALL_GPRS)) { - code.push(IndexToReg64(reg_index)); + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_GPRS[i]) { + code.push(IndexToReg64(i)); + } } if (frame_info.subtraction != 0) { code.sub(code.rsp, frame_info.subtraction); } - for (int reg_index : (regs & ABI_ALL_XMMS)) { - code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(reg_index)); - frame_info.xmm_offset += 0x10; + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_XMMS[i]) { + code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); + frame_info.xmm_offset += 0x10; + } } return ABI_SHADOW_SPACE; } -inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, +inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, std::size_t rsp_alignment, std::size_t needed_frame_size = 0) { auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); - for (int reg_index : (regs & ABI_ALL_XMMS)) { - code.movaps(IndexToXmm(reg_index), code.xword[code.rsp + frame_info.xmm_offset]); - frame_info.xmm_offset += 0x10; + for (std::size_t i = 0; i < regs.size(); ++i) { + if (regs[i] && ABI_ALL_XMMS[i]) { + code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); + frame_info.xmm_offset += 0x10; + } } if (frame_info.subtraction != 0) { diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 5286c5905a..6067627889 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -102,40 +102,40 @@ const JitFunction instr_table[64] = { // purposes, as documented below: /// Pointer to the uniform memory -static const Reg64 UNIFORMS = r9; +constexpr Reg64 UNIFORMS = r9; /// The two 32-bit VS address offset registers set by the MOVA instruction -static const Reg64 ADDROFFS_REG_0 = r10; -static const Reg64 ADDROFFS_REG_1 = r11; +constexpr Reg64 ADDROFFS_REG_0 = r10; +constexpr Reg64 ADDROFFS_REG_1 = r11; /// VS loop count register (Multiplied by 16) -static const Reg32 LOOPCOUNT_REG = r12d; +constexpr Reg32 LOOPCOUNT_REG = r12d; /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) -static const Reg32 LOOPCOUNT = esi; +constexpr Reg32 LOOPCOUNT = esi; /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16) -static const Reg32 LOOPINC = edi; +constexpr Reg32 LOOPINC = edi; /// Result of the previous CMP instruction for the X-component comparison -static const Reg64 COND0 = r13; +constexpr Reg64 COND0 = r13; /// Result of the previous CMP instruction for the Y-component comparison -static const Reg64 COND1 = r14; +constexpr Reg64 COND1 = r14; /// Pointer to the UnitState instance for the current VS unit -static const Reg64 STATE = r15; +constexpr Reg64 STATE = r15; /// SIMD scratch register -static const Xmm SCRATCH = xmm0; +constexpr Xmm SCRATCH = xmm0; /// Loaded with the first swizzled source register, otherwise can be used as a scratch register -static const Xmm SRC1 = xmm1; +constexpr Xmm SRC1 = xmm1; /// Loaded with the second swizzled source register, otherwise can be used as a scratch register -static const Xmm SRC2 = xmm2; +constexpr Xmm SRC2 = xmm2; /// Loaded with the third swizzled source register, otherwise can be used as a scratch register -static const Xmm SRC3 = xmm3; +constexpr Xmm SRC3 = xmm3; /// Additional scratch register -static const Xmm SCRATCH2 = xmm4; +constexpr Xmm SCRATCH2 = xmm4; /// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one -static const Xmm ONE = xmm14; +constexpr Xmm ONE = xmm14; /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR -static const Xmm NEGBIT = xmm15; +constexpr Xmm NEGBIT = xmm15; // State registers that must not be modified by external functions calls // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed -static const BitSet32 persistent_regs = BuildRegSet({ +static const std::bitset<32> persistent_regs = BuildRegSet({ // Pointers to register blocks UNIFORMS, STATE, @@ -356,7 +356,7 @@ void JitShader::Compile_UniformCondition(Instruction instr) { cmp(byte[UNIFORMS + offset], 0); } -BitSet32 JitShader::PersistentCallerSavedRegs() { +std::bitset<32> JitShader::PersistentCallerSavedRegs() { return persistent_regs & ABI_ALL_CALLER_SAVED; } diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index bdf569632f..507cd0ff36 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -91,7 +92,7 @@ private: */ void Compile_Return(); - BitSet32 PersistentCallerSavedRegs(); + std::bitset<32> PersistentCallerSavedRegs(); /** * Assertion evaluated at compile-time, but only triggered if executed at runtime.