mirror of
https://git.h3cjp.net/H3cJP/citra.git
synced 2024-12-28 14:16:57 +00:00
Merge pull request #2512 from ReinUsesLisp/comp-indexing
gl_shader_decompiler: Pessimize uniform buffer access on AMD's prorpietary driver
This commit is contained in:
commit
0bcc305797
|
@ -2,11 +2,14 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
|
@ -24,6 +27,7 @@ Device::Device() {
|
|||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||
has_variable_aoffi = TestVariableAoffi();
|
||||
has_component_indexing_bug = TestComponentIndexingBug();
|
||||
}
|
||||
|
||||
Device::Device(std::nullptr_t) {
|
||||
|
@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) {
|
|||
max_vertex_attributes = 16;
|
||||
max_varyings = 15;
|
||||
has_variable_aoffi = true;
|
||||
has_component_indexing_bug = false;
|
||||
}
|
||||
|
||||
bool Device::TestVariableAoffi() {
|
||||
|
@ -52,4 +57,53 @@ void main() {
|
|||
return supported;
|
||||
}
|
||||
|
||||
bool Device::TestComponentIndexingBug() {
|
||||
constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}";
|
||||
const GLchar* COMPONENT_TEST = R"(#version 430 core
|
||||
layout (std430, binding = 0) buffer OutputBuffer {
|
||||
uint output_value;
|
||||
};
|
||||
layout (std140, binding = 0) uniform InputBuffer {
|
||||
uvec4 input_value[4096];
|
||||
};
|
||||
layout (location = 0) uniform uint idx;
|
||||
void main() {
|
||||
output_value = input_value[idx >> 2][idx & 3];
|
||||
})";
|
||||
const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)};
|
||||
SCOPE_EXIT({ glDeleteProgram(shader); });
|
||||
glUseProgram(shader);
|
||||
|
||||
OGLVertexArray vao;
|
||||
vao.Create();
|
||||
glBindVertexArray(vao.handle);
|
||||
|
||||
constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432};
|
||||
OGLBuffer ubo;
|
||||
ubo.Create();
|
||||
glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW);
|
||||
glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle);
|
||||
|
||||
OGLBuffer ssbo;
|
||||
ssbo.Create();
|
||||
glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT);
|
||||
|
||||
for (GLuint index = 4; index < 8; ++index) {
|
||||
glInvalidateBufferData(ssbo.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle);
|
||||
|
||||
glProgramUniform1ui(shader, 0, index);
|
||||
glDrawArrays(GL_POINTS, 0, 1);
|
||||
|
||||
GLuint result;
|
||||
glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
|
||||
if (result != values.at(index)) {
|
||||
LOG_INFO(Render_OpenGL, log_message, true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
LOG_INFO(Render_OpenGL, log_message, false);
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -30,13 +30,19 @@ public:
|
|||
return has_variable_aoffi;
|
||||
}
|
||||
|
||||
bool HasComponentIndexingBug() const {
|
||||
return has_component_indexing_bug;
|
||||
}
|
||||
|
||||
private:
|
||||
static bool TestVariableAoffi();
|
||||
static bool TestComponentIndexingBug();
|
||||
|
||||
std::size_t uniform_buffer_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
u32 max_varyings{};
|
||||
bool has_variable_aoffi{};
|
||||
bool has_component_indexing_bug{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -577,11 +577,28 @@ private:
|
|||
if (std::holds_alternative<OperationNode>(*offset)) {
|
||||
// Indirect access
|
||||
const std::string final_offset = code.GenerateTemporary();
|
||||
code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset));
|
||||
return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
|
||||
code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset));
|
||||
|
||||
if (!device.HasComponentIndexingBug()) {
|
||||
return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
|
||||
final_offset, final_offset);
|
||||
}
|
||||
|
||||
// AMD's proprietary GLSL compiler emits ill code for variable component access.
|
||||
// To bypass this driver bug generate 4 ifs, one per each component.
|
||||
const std::string pack = code.GenerateTemporary();
|
||||
code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
|
||||
final_offset);
|
||||
|
||||
const std::string result = code.GenerateTemporary();
|
||||
code.AddLine("float {};", result);
|
||||
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
|
||||
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
|
||||
pack, GetSwizzle(swizzle));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
UNREACHABLE_MSG("Unmanaged offset node type");
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue