mirror of
https://git.h3cjp.net/H3cJP/citra.git
synced 2025-01-13 19:27:20 +00:00
VideoCore: Extract swrast-specific data from OutputVertex
This commit is contained in:
parent
8ed9f9d49f
commit
dcdffabfe6
|
@ -18,6 +18,8 @@
|
||||||
#include "video_core/rasterizer.h"
|
#include "video_core/rasterizer.h"
|
||||||
#include "video_core/shader/shader.h"
|
#include "video_core/shader/shader.h"
|
||||||
|
|
||||||
|
using Pica::Rasterizer::Vertex;
|
||||||
|
|
||||||
namespace Pica {
|
namespace Pica {
|
||||||
|
|
||||||
namespace Clipper {
|
namespace Clipper {
|
||||||
|
@ -29,20 +31,20 @@ public:
|
||||||
float24::FromFloat32(0), float24::FromFloat32(0)))
|
float24::FromFloat32(0), float24::FromFloat32(0)))
|
||||||
: coeffs(coeffs), bias(bias) {}
|
: coeffs(coeffs), bias(bias) {}
|
||||||
|
|
||||||
bool IsInside(const OutputVertex& vertex) const {
|
bool IsInside(const Vertex& vertex) const {
|
||||||
return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
|
return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsOutSide(const OutputVertex& vertex) const {
|
bool IsOutSide(const Vertex& vertex) const {
|
||||||
return !IsInside(vertex);
|
return !IsInside(vertex);
|
||||||
}
|
}
|
||||||
|
|
||||||
OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const {
|
Vertex GetIntersection(const Vertex& v0, const Vertex& v1) const {
|
||||||
float24 dp = Math::Dot(v0.pos + bias, coeffs);
|
float24 dp = Math::Dot(v0.pos + bias, coeffs);
|
||||||
float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
|
float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
|
||||||
float24 factor = dp_prev / (dp_prev - dp);
|
float24 factor = dp_prev / (dp_prev - dp);
|
||||||
|
|
||||||
return OutputVertex::Lerp(factor, v0, v1);
|
return Vertex::Lerp(factor, v0, v1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -51,7 +53,7 @@ private:
|
||||||
Math::Vec4<float24> bias;
|
Math::Vec4<float24> bias;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void InitScreenCoordinates(OutputVertex& vtx) {
|
static void InitScreenCoordinates(Vertex& vtx) {
|
||||||
struct {
|
struct {
|
||||||
float24 halfsize_x;
|
float24 halfsize_x;
|
||||||
float24 offset_x;
|
float24 offset_x;
|
||||||
|
@ -91,8 +93,8 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
|
||||||
// introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
|
// introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
|
||||||
// fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
|
// fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
|
||||||
static const size_t MAX_VERTICES = 9;
|
static const size_t MAX_VERTICES = 9;
|
||||||
static_vector<OutputVertex, MAX_VERTICES> buffer_a = {v0, v1, v2};
|
static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2};
|
||||||
static_vector<OutputVertex, MAX_VERTICES> buffer_b;
|
static_vector<Vertex, MAX_VERTICES> buffer_b;
|
||||||
auto* output_list = &buffer_a;
|
auto* output_list = &buffer_a;
|
||||||
auto* input_list = &buffer_b;
|
auto* input_list = &buffer_b;
|
||||||
|
|
||||||
|
@ -123,7 +125,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
|
||||||
std::swap(input_list, output_list);
|
std::swap(input_list, output_list);
|
||||||
output_list->clear();
|
output_list->clear();
|
||||||
|
|
||||||
const OutputVertex* reference_vertex = &input_list->back();
|
const Vertex* reference_vertex = &input_list->back();
|
||||||
|
|
||||||
for (const auto& vertex : *input_list) {
|
for (const auto& vertex : *input_list) {
|
||||||
// NOTE: This algorithm changes vertex order in some cases!
|
// NOTE: This algorithm changes vertex order in some cases!
|
||||||
|
@ -148,9 +150,9 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
|
||||||
InitScreenCoordinates((*output_list)[1]);
|
InitScreenCoordinates((*output_list)[1]);
|
||||||
|
|
||||||
for (size_t i = 0; i < output_list->size() - 2; i++) {
|
for (size_t i = 0; i < output_list->size() - 2; i++) {
|
||||||
OutputVertex& vtx0 = (*output_list)[0];
|
Vertex& vtx0 = (*output_list)[0];
|
||||||
OutputVertex& vtx1 = (*output_list)[i + 1];
|
Vertex& vtx1 = (*output_list)[i + 1];
|
||||||
OutputVertex& vtx2 = (*output_list)[i + 2];
|
Vertex& vtx2 = (*output_list)[i + 2];
|
||||||
|
|
||||||
InitScreenCoordinates(vtx2);
|
InitScreenCoordinates(vtx2);
|
||||||
|
|
||||||
|
|
|
@ -307,8 +307,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24
|
||||||
* Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
|
* Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
|
||||||
* culling via recursion.
|
* culling via recursion.
|
||||||
*/
|
*/
|
||||||
static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
|
static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Vertex& v2,
|
||||||
const Shader::OutputVertex& v2, bool reversed = false) {
|
bool reversed = false) {
|
||||||
const auto& regs = g_state.regs;
|
const auto& regs = g_state.regs;
|
||||||
MICROPROFILE_SCOPE(GPU_Rasterization);
|
MICROPROFILE_SCOPE(GPU_Rasterization);
|
||||||
|
|
||||||
|
@ -1276,8 +1276,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
|
void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2) {
|
||||||
const Shader::OutputVertex& v2) {
|
|
||||||
ProcessTriangleInternal(v0, v1, v2);
|
ProcessTriangleInternal(v0, v1, v2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,16 +4,44 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
namespace Pica {
|
#include "video_core/shader/shader.h"
|
||||||
|
|
||||||
namespace Shader {
|
namespace Pica {
|
||||||
struct OutputVertex;
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace Rasterizer {
|
namespace Rasterizer {
|
||||||
|
|
||||||
void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
|
struct Vertex : Shader::OutputVertex {
|
||||||
const Shader::OutputVertex& v2);
|
Vertex(const OutputVertex& v) : OutputVertex(v) {}
|
||||||
|
|
||||||
|
// Attributes used to store intermediate results
|
||||||
|
// position after perspective divide
|
||||||
|
Math::Vec3<float24> screenpos;
|
||||||
|
|
||||||
|
// Linear interpolation
|
||||||
|
// factor: 0=this, 1=vtx
|
||||||
|
void Lerp(float24 factor, const Vertex& vtx) {
|
||||||
|
pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
|
||||||
|
|
||||||
|
// TODO: Should perform perspective correct interpolation here...
|
||||||
|
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
|
||||||
|
tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
|
||||||
|
tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
|
||||||
|
|
||||||
|
screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
|
||||||
|
|
||||||
|
color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Linear interpolation
|
||||||
|
// factor: 0=v0, 1=v1
|
||||||
|
static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) {
|
||||||
|
Vertex ret = v0;
|
||||||
|
ret.Lerp(factor, v1);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2);
|
||||||
|
|
||||||
} // namespace Rasterizer
|
} // namespace Rasterizer
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ OutputVertex OutputVertex::FromAttributeBuffer(const Regs& regs, AttributeBuffer
|
||||||
OutputVertex ret{};
|
OutputVertex ret{};
|
||||||
std::array<float24, 24> vertex_slots;
|
std::array<float24, 24> vertex_slots;
|
||||||
};
|
};
|
||||||
static_assert(sizeof(vertex_slots) <= sizeof(ret), "Struct and array have different sizes.");
|
static_assert(sizeof(vertex_slots) == sizeof(ret), "Struct and array have different sizes.");
|
||||||
|
|
||||||
unsigned int num_attributes = regs.vs_output_total;
|
unsigned int num_attributes = regs.vs_output_total;
|
||||||
ASSERT(num_attributes <= 7);
|
ASSERT(num_attributes <= 7);
|
||||||
|
|
|
@ -28,9 +28,6 @@ struct AttributeBuffer {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct OutputVertex {
|
struct OutputVertex {
|
||||||
OutputVertex() = default;
|
|
||||||
|
|
||||||
// VS output attributes
|
|
||||||
Math::Vec4<float24> pos;
|
Math::Vec4<float24> pos;
|
||||||
Math::Vec4<float24> quat;
|
Math::Vec4<float24> quat;
|
||||||
Math::Vec4<float24> color;
|
Math::Vec4<float24> color;
|
||||||
|
@ -42,42 +39,22 @@ struct OutputVertex {
|
||||||
INSERT_PADDING_WORDS(1);
|
INSERT_PADDING_WORDS(1);
|
||||||
Math::Vec2<float24> tc2;
|
Math::Vec2<float24> tc2;
|
||||||
|
|
||||||
// Padding for optimal alignment
|
|
||||||
INSERT_PADDING_WORDS(4);
|
|
||||||
|
|
||||||
// Attributes used to store intermediate results
|
|
||||||
|
|
||||||
// position after perspective divide
|
|
||||||
Math::Vec3<float24> screenpos;
|
|
||||||
INSERT_PADDING_WORDS(1);
|
|
||||||
|
|
||||||
// Linear interpolation
|
|
||||||
// factor: 0=this, 1=vtx
|
|
||||||
void Lerp(float24 factor, const OutputVertex& vtx) {
|
|
||||||
pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
|
|
||||||
|
|
||||||
// TODO: Should perform perspective correct interpolation here...
|
|
||||||
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
|
|
||||||
tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
|
|
||||||
tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
|
|
||||||
|
|
||||||
screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
|
|
||||||
|
|
||||||
color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Linear interpolation
|
|
||||||
// factor: 0=v0, 1=v1
|
|
||||||
static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) {
|
|
||||||
OutputVertex ret = v0;
|
|
||||||
ret.Lerp(factor, v1);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output);
|
static OutputVertex FromAttributeBuffer(const Regs& regs, AttributeBuffer& output);
|
||||||
};
|
};
|
||||||
|
#define ASSERT_POS(var, pos) \
|
||||||
|
static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \
|
||||||
|
"offset.")
|
||||||
|
ASSERT_POS(pos, Regs::VSOutputAttributes::POSITION_X);
|
||||||
|
ASSERT_POS(quat, Regs::VSOutputAttributes::QUATERNION_X);
|
||||||
|
ASSERT_POS(color, Regs::VSOutputAttributes::COLOR_R);
|
||||||
|
ASSERT_POS(tc0, Regs::VSOutputAttributes::TEXCOORD0_U);
|
||||||
|
ASSERT_POS(tc1, Regs::VSOutputAttributes::TEXCOORD1_U);
|
||||||
|
ASSERT_POS(tc0_w, Regs::VSOutputAttributes::TEXCOORD0_W);
|
||||||
|
ASSERT_POS(view, Regs::VSOutputAttributes::VIEW_X);
|
||||||
|
ASSERT_POS(tc2, Regs::VSOutputAttributes::TEXCOORD2_U);
|
||||||
|
#undef ASSERT_POS
|
||||||
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
|
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
|
||||||
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
|
static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
|
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
|
||||||
|
|
Loading…
Reference in a new issue