From efec8fe51311bdc6ce63f206f06b6c4ac4e1d2be Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sun, 10 Dec 2017 22:00:04 +0000 Subject: [PATCH 1/2] shader_jit_x64_compiler: Use haddps for horizontal summation --- .../shader/shader_jit_x64_compiler.cpp | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index c8afdd543c..9f50b18a7c 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -387,13 +387,18 @@ void JitShader::Compile_DP4(Instruction instr) { Compile_SanitizedMul(SRC1, SRC2, SCRATCH); - movaps(SRC2, SRC1); - shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY - addps(SRC1, SRC2); + if (Common::GetCPUCaps().sse3) { + haddps(SRC1, SRC1); + haddps(SRC1, SRC1); + } else { + movaps(SRC2, SRC1); + shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY + addps(SRC1, SRC2); - movaps(SRC2, SRC1); - shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX - addps(SRC1, SRC2); + movaps(SRC2, SRC1); + shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX + addps(SRC1, SRC2); + } Compile_DestEnable(instr, SRC1); } @@ -419,13 +424,18 @@ void JitShader::Compile_DPH(Instruction instr) { Compile_SanitizedMul(SRC1, SRC2, SCRATCH); - movaps(SRC2, SRC1); - shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY - addps(SRC1, SRC2); + if (Common::GetCPUCaps().sse3) { + haddps(SRC1, SRC1); + haddps(SRC1, SRC1); + } else { + movaps(SRC2, SRC1); + shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY + addps(SRC1, SRC2); - movaps(SRC2, SRC1); - shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX - addps(SRC1, SRC2); + movaps(SRC2, SRC1); + shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX + addps(SRC1, SRC2); + } Compile_DestEnable(instr, SRC1); } From 6c199e469971ffacce16cc24c87bb2e3d9ebcf7c Mon Sep 17 00:00:00 2001 From: MerryMage Date: Tue, 12 Dec 2017 15:37:00 +0000 Subject: [PATCH 2/2] fixup! shader_jit_x64_compiler: Use haddps for horizontal summation --- .../shader/shader_jit_x64_compiler.cpp | 28 +++---------------- 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 9f50b18a7c..fff9abbf76 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -387,18 +387,8 @@ void JitShader::Compile_DP4(Instruction instr) { Compile_SanitizedMul(SRC1, SRC2, SCRATCH); - if (Common::GetCPUCaps().sse3) { - haddps(SRC1, SRC1); - haddps(SRC1, SRC1); - } else { - movaps(SRC2, SRC1); - shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY - addps(SRC1, SRC2); - - movaps(SRC2, SRC1); - shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX - addps(SRC1, SRC2); - } + haddps(SRC1, SRC1); + haddps(SRC1, SRC1); Compile_DestEnable(instr, SRC1); } @@ -424,18 +414,8 @@ void JitShader::Compile_DPH(Instruction instr) { Compile_SanitizedMul(SRC1, SRC2, SCRATCH); - if (Common::GetCPUCaps().sse3) { - haddps(SRC1, SRC1); - haddps(SRC1, SRC1); - } else { - movaps(SRC2, SRC1); - shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY - addps(SRC1, SRC2); - - movaps(SRC2, SRC1); - shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX - addps(SRC1, SRC2); - } + haddps(SRC1, SRC1); + haddps(SRC1, SRC1); Compile_DestEnable(instr, SRC1); }