citra/src/video_core/renderer_vulkan/vk_rasterizer.cpp

1129 lines
46 KiB
C++

// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/alignment.h"
#include "common/logging/log.h"
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/settings.h"
#include "video_core/pica_state.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_pipeline.h"
#include "video_core/regs_rasterizer.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture/texture_decode.h"
namespace Vulkan {
namespace {
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_GS, "Vulkan", "Geometry Shader Setup", MP_RGB(128, 192, 128));
MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192));
using TriangleTopology = Pica::PipelineRegs::TriangleTopology;
using VideoCore::SurfaceType;
using namespace Pica::Shader::Generator;
constexpr u64 STREAM_BUFFER_SIZE = 64 * 1024 * 1024;
constexpr u64 UNIFORM_BUFFER_SIZE = 4 * 1024 * 1024;
constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024;
constexpr vk::BufferUsageFlags BUFFER_USAGE =
vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer;
struct DrawParams {
u32 vertex_count;
s32 vertex_offset;
u32 binding_count;
std::array<u32, 16> bindings;
bool is_indexed;
};
[[nodiscard]] u64 TextureBufferSize(const Instance& instance) {
// Use the smallest texel size from the texel views
// which corresponds to eR32G32Sfloat
const u64 max_size = instance.MaxTexelBufferElements() * 8;
return std::min(max_size, TEXTURE_BUFFER_SIZE);
}
} // Anonymous namespace
RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory,
VideoCore::CustomTexManager& custom_tex_manager,
VideoCore::RendererBase& renderer,
Frontend::EmuWindow& emu_window, const Instance& instance,
Scheduler& scheduler, DescriptorPool& pool,
RenderpassCache& renderpass_cache, u32 image_count)
: RasterizerAccelerated{memory}, instance{instance}, scheduler{scheduler},
renderpass_cache{renderpass_cache}, pipeline_cache{instance, scheduler, renderpass_cache,
pool},
runtime{instance, scheduler, renderpass_cache, pool, pipeline_cache.TextureProvider(),
image_count},
res_cache{memory, custom_tex_manager, runtime, regs, renderer},
stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE},
uniform_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformBuffer,
UNIFORM_BUFFER_SIZE},
texture_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformTexelBuffer,
TextureBufferSize(instance)},
texture_lf_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformTexelBuffer,
TextureBufferSize(instance)},
async_shaders{Settings::values.async_shader_compilation.GetValue()} {
vertex_buffers.fill(stream_buffer.Handle());
uniform_buffer_alignment = instance.UniformMinAlignment();
uniform_size_aligned_vs_pica =
Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment);
uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment);
uniform_size_aligned_fs = Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment);
// Define vertex layout for software shaders
MakeSoftwareVertexLayout();
pipeline_info.vertex_layout = software_layout;
const vk::Device device = instance.GetDevice();
texture_lf_view = device.createBufferViewUnique({
.buffer = texture_lf_buffer.Handle(),
.format = vk::Format::eR32G32Sfloat,
.offset = 0,
.range = VK_WHOLE_SIZE,
});
texture_rg_view = device.createBufferViewUnique({
.buffer = texture_buffer.Handle(),
.format = vk::Format::eR32G32Sfloat,
.offset = 0,
.range = VK_WHOLE_SIZE,
});
texture_rgba_view = device.createBufferViewUnique({
.buffer = texture_buffer.Handle(),
.format = vk::Format::eR32G32B32A32Sfloat,
.offset = 0,
.range = VK_WHOLE_SIZE,
});
// Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize
// all descriptor sets even the ones we don't use.
pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData));
pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(VSUniformData));
pipeline_cache.BindBuffer(2, uniform_buffer.Handle(), 0, sizeof(FSUniformData));
pipeline_cache.BindTexelBuffer(3, *texture_lf_view);
pipeline_cache.BindTexelBuffer(4, *texture_rg_view);
pipeline_cache.BindTexelBuffer(5, *texture_rgba_view);
Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
Surface& null_cube_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_CUBE_ID);
Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID);
for (u32 i = 0; i < 3; i++) {
pipeline_cache.BindTexture(i, null_surface.ImageView(), null_sampler.Handle());
}
pipeline_cache.BindTexture(3, null_cube_surface.ImageView(), null_sampler.Handle());
for (u32 i = 0; i < 7; i++) {
pipeline_cache.BindStorageImage(i, null_surface.StorageView());
}
SyncEntireState();
}
RasterizerVulkan::~RasterizerVulkan() = default;
void RasterizerVulkan::TickFrame() {
res_cache.TickFrame();
}
void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
pipeline_cache.LoadDiskCache();
}
void RasterizerVulkan::SyncFixedState() {
SyncCullMode();
SyncBlendEnabled();
SyncBlendFuncs();
SyncBlendColor();
SyncLogicOp();
SyncStencilTest();
SyncDepthTest();
SyncColorWriteMask();
SyncStencilWriteMask();
SyncDepthWriteMask();
}
void RasterizerVulkan::SetupVertexArray() {
const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = vertex_info;
auto [array_ptr, array_offset, invalidate] = stream_buffer.Map(vs_input_size, 16);
/**
* The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
* how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
* address containing the vertex array data. The data for each attribute loader (i) can be found
* by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
* as something analogous to Vulkan bindings. The user can store attributes in separate loaders
* or interleave them in the same loader.
**/
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
const PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE
const u32 stride_alignment = instance.GetMinVertexStrideAlignment();
VertexLayout& layout = pipeline_info.vertex_layout;
layout.binding_count = 0;
layout.attribute_count = 16;
enable_attributes.fill(false);
u32 buffer_offset = 0;
for (const auto& loader : vertex_attributes.attribute_loaders) {
if (loader.component_count == 0 || loader.byte_count == 0) {
continue;
}
// Analyze the attribute loader by checking which attributes it provides
u32 offset = 0;
for (u32 comp = 0; comp < loader.component_count && comp < 12; comp++) {
const u32 attribute_index = loader.GetComponent(comp);
if (attribute_index >= 12) {
// Attribute ids 12, to 15 signify 4, 8, 12 and 16-byte paddings respectively.
offset = Common::AlignUp(offset, 4);
offset += (attribute_index - 11) * 4;
continue;
}
const u32 size = vertex_attributes.GetNumElements(attribute_index);
if (size == 0) {
continue;
}
offset =
Common::AlignUp(offset, vertex_attributes.GetElementSizeInBytes(attribute_index));
const u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index);
const auto format = vertex_attributes.GetFormat(attribute_index);
VertexAttribute& attribute = layout.attributes[input_reg];
attribute.binding.Assign(layout.binding_count);
attribute.location.Assign(input_reg);
attribute.offset.Assign(offset);
attribute.type.Assign(format);
attribute.size.Assign(size);
enable_attributes[input_reg] = true;
offset += vertex_attributes.GetStride(attribute_index);
}
const PAddr data_addr =
base_address + loader.data_offset + (vs_input_index_min * loader.byte_count);
const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1;
u32 data_size = loader.byte_count * vertex_num;
res_cache.FlushRegion(data_addr, data_size);
const MemoryRef src_ref = memory.GetPhysicalRef(data_addr);
if (src_ref.GetSize() < data_size) {
LOG_ERROR(Render_Vulkan,
"Vertex buffer size {} exceeds available space {} at address {:#016X}",
data_size, src_ref.GetSize(), data_addr);
}
const u8* src_ptr = src_ref.GetPtr();
u8* dst_ptr = array_ptr + buffer_offset;
// Align stride up if required by Vulkan implementation.
const u32 aligned_stride =
Common::AlignUp(static_cast<u32>(loader.byte_count), stride_alignment);
if (aligned_stride == loader.byte_count) {
std::memcpy(dst_ptr, src_ptr, data_size);
} else {
for (size_t vertex = 0; vertex < vertex_num; vertex++) {
std::memcpy(dst_ptr + vertex * aligned_stride, src_ptr + vertex * loader.byte_count,
loader.byte_count);
}
}
// Create the binding associated with this loader
VertexBinding& binding = layout.bindings[layout.binding_count];
binding.binding.Assign(layout.binding_count);
binding.fixed.Assign(0);
binding.stride.Assign(aligned_stride);
// Keep track of the binding offsets so we can bind the vertex buffer later
binding_offsets[layout.binding_count++] = static_cast<u32>(array_offset + buffer_offset);
buffer_offset += Common::AlignUp(aligned_stride * vertex_num, 4);
}
stream_buffer.Commit(buffer_offset);
// Assign the rest of the attributes to the last binding
SetupFixedAttribs();
}
void RasterizerVulkan::SetupFixedAttribs() {
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
VertexLayout& layout = pipeline_info.vertex_layout;
auto [fixed_ptr, fixed_offset, _] = stream_buffer.Map(16 * sizeof(Common::Vec4f), 0);
binding_offsets[layout.binding_count] = static_cast<u32>(fixed_offset);
// Reserve the last binding for fixed and default attributes
// Place the default attrib at offset zero for easy access
static const Common::Vec4f default_attrib{0.f, 0.f, 0.f, 1.f};
std::memcpy(fixed_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
// Find all fixed attributes and assign them to the last binding
u32 offset = sizeof(Common::Vec4f);
for (std::size_t i = 0; i < 16; i++) {
if (vertex_attributes.IsDefaultAttribute(i)) {
const u32 reg = regs.vs.GetRegisterForAttribute(i);
if (!enable_attributes[reg]) {
const auto& attr = Pica::g_state.input_default_attributes.attr[i];
const std::array data = {attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(),
attr.w.ToFloat32()};
const u32 data_size = sizeof(float) * static_cast<u32>(data.size());
std::memcpy(fixed_ptr + offset, data.data(), data_size);
VertexAttribute& attribute = layout.attributes[reg];
attribute.binding.Assign(layout.binding_count);
attribute.location.Assign(reg);
attribute.offset.Assign(offset);
attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
attribute.size.Assign(4);
offset += data_size;
enable_attributes[reg] = true;
}
}
}
// Loop one more time to find unused attributes and assign them to the default one
// If the attribute is just disabled, shove the default attribute to avoid
// errors if the shader ever decides to use it.
for (u32 i = 0; i < 16; i++) {
if (!enable_attributes[i]) {
VertexAttribute& attribute = layout.attributes[i];
attribute.binding.Assign(layout.binding_count);
attribute.location.Assign(i);
attribute.offset.Assign(0);
attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
attribute.size.Assign(4);
}
}
// Define the fixed+default binding
VertexBinding& binding = layout.bindings[layout.binding_count];
binding.binding.Assign(layout.binding_count++);
binding.fixed.Assign(1);
binding.stride.Assign(offset);
stream_buffer.Commit(offset);
}
bool RasterizerVulkan::SetupVertexShader() {
MICROPROFILE_SCOPE(Vulkan_VS);
return pipeline_cache.UseProgrammableVertexShader(regs, Pica::g_state.vs,
pipeline_info.vertex_layout);
}
bool RasterizerVulkan::SetupGeometryShader() {
MICROPROFILE_SCOPE(Vulkan_GS);
if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) {
LOG_ERROR(Render_Vulkan, "Accelerate draw doesn't support geometry shader");
return false;
}
return pipeline_cache.UseFixedGeometryShader(regs);
}
bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) {
if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) {
if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) {
return false;
}
if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) {
return false;
}
}
pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology);
if (regs.pipeline.triangle_topology == TriangleTopology::Fan &&
!instance.IsTriangleFanSupported()) {
LOG_DEBUG(Render_Vulkan,
"Skipping accelerated draw with unsupported triangle fan topology");
return false;
}
// Vertex data setup might involve scheduler flushes so perform it
// early to avoid invalidating our state in the middle of the draw.
vertex_info = AnalyzeVertexArray(is_indexed, instance.GetMinVertexStrideAlignment());
SetupVertexArray();
if (!SetupVertexShader()) {
return false;
}
if (!SetupGeometryShader()) {
return false;
}
return Draw(true, is_indexed);
}
bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
if (is_indexed) {
SetupIndexArray();
}
const bool wait_built = !async_shaders || regs.pipeline.num_vertices <= 6;
if (!pipeline_cache.BindPipeline(pipeline_info, wait_built)) {
return true;
}
const DrawParams params = {
.vertex_count = regs.pipeline.num_vertices,
.vertex_offset = -static_cast<s32>(vertex_info.vs_input_index_min),
.binding_count = pipeline_info.vertex_layout.binding_count,
.bindings = binding_offsets,
.is_indexed = is_indexed,
};
scheduler.Record([this, params](vk::CommandBuffer cmdbuf) {
std::array<vk::DeviceSize, 16> offsets;
std::transform(params.bindings.begin(), params.bindings.end(), offsets.begin(),
[](u32 offset) { return static_cast<vk::DeviceSize>(offset); });
cmdbuf.bindVertexBuffers(0, params.binding_count, vertex_buffers.data(), offsets.data());
if (params.is_indexed) {
cmdbuf.drawIndexed(params.vertex_count, 1, 0, params.vertex_offset, 0);
} else {
cmdbuf.draw(params.vertex_count, 1, 0, 0);
}
});
return true;
}
void RasterizerVulkan::SetupIndexArray() {
const bool index_u8 = regs.pipeline.index_array.format == 0;
const bool native_u8 = index_u8 && instance.IsIndexTypeUint8Supported();
const u32 index_buffer_size = regs.pipeline.num_vertices * (native_u8 ? 1 : 2);
const vk::IndexType index_type = native_u8 ? vk::IndexType::eUint8EXT : vk::IndexType::eUint16;
const u8* index_data =
memory.GetPhysicalPointer(regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
regs.pipeline.index_array.offset);
auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2);
if (index_u8 && !native_u8) {
u16* index_ptr_u16 = reinterpret_cast<u16*>(index_ptr);
for (u32 i = 0; i < regs.pipeline.num_vertices; i++) {
index_ptr_u16[i] = index_data[i];
}
} else {
std::memcpy(index_ptr, index_data, index_buffer_size);
}
stream_buffer.Commit(index_buffer_size);
scheduler.Record(
[this, index_offset = index_offset, index_type = index_type](vk::CommandBuffer cmdbuf) {
cmdbuf.bindIndexBuffer(stream_buffer.Handle(), index_offset, index_type);
});
}
void RasterizerVulkan::DrawTriangles() {
if (vertex_batch.empty()) {
return;
}
pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List);
pipeline_info.vertex_layout = software_layout;
pipeline_cache.UseTrivialVertexShader();
pipeline_cache.UseTrivialGeometryShader();
Draw(false, false);
}
bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
const bool shadow_rendering = regs.framebuffer.IsShadowRendering();
const bool has_stencil = regs.framebuffer.HasStencil();
const bool write_color_fb = shadow_rendering || pipeline_info.blending.color_write_mask;
const bool write_depth_fb = pipeline_info.IsDepthWriteEnabled();
const bool using_color_fb =
regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb;
const bool using_depth_fb =
!shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 &&
(write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 ||
(has_stencil && pipeline_info.depth_stencil.stencil_test_enable));
const auto fb_helper = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb);
const Framebuffer* framebuffer = fb_helper.Framebuffer();
if (!framebuffer->Handle()) {
return true;
}
pipeline_info.attachments.color = framebuffer->Format(SurfaceType::Color);
pipeline_info.attachments.depth = framebuffer->Format(SurfaceType::Depth);
if (shadow_rendering) {
pipeline_cache.BindStorageImage(6, framebuffer->ImageView(SurfaceType::Color));
} else {
Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
pipeline_cache.BindStorageImage(6, null_surface.StorageView());
}
// Update scissor uniforms
const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor();
if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 ||
fs_uniform_block_data.data.scissor_x2 != scissor_x2 ||
fs_uniform_block_data.data.scissor_y1 != scissor_y1 ||
fs_uniform_block_data.data.scissor_y2 != scissor_y2) {
fs_uniform_block_data.data.scissor_x1 = scissor_x1;
fs_uniform_block_data.data.scissor_x2 = scissor_x2;
fs_uniform_block_data.data.scissor_y1 = scissor_y1;
fs_uniform_block_data.data.scissor_y2 = scissor_y2;
fs_uniform_block_data.dirty = true;
}
// Sync and bind the texture surfaces
SyncTextureUnits(framebuffer);
// Sync and bind the shader
if (shader_dirty) {
pipeline_cache.UseFragmentShader(regs);
shader_dirty = false;
}
// Sync the LUTs within the texture buffer
SyncAndUploadLUTs();
SyncAndUploadLUTsLF();
UploadUniforms(accelerate);
// Begin rendering
const auto draw_rect = fb_helper.DrawRect();
renderpass_cache.BeginRendering(framebuffer, draw_rect);
// Configure viewport and scissor
const auto viewport = fb_helper.Viewport();
pipeline_info.dynamic.viewport = Common::Rectangle<s32>{
viewport.x,
viewport.y,
viewport.x + viewport.width,
viewport.y + viewport.height,
};
pipeline_info.dynamic.scissor = draw_rect;
// Draw the vertex batch
bool succeeded = true;
if (accelerate) {
succeeded = AccelerateDrawBatchInternal(is_indexed);
} else {
pipeline_cache.BindPipeline(pipeline_info, true);
const u64 vertex_size = vertex_batch.size() * sizeof(HardwareVertex);
const u32 vertex_count = static_cast<u32>(vertex_batch.size());
const auto [buffer, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex));
std::memcpy(buffer, vertex_batch.data(), vertex_size);
stream_buffer.Commit(vertex_size);
scheduler.Record([this, offset = offset, vertex_count](vk::CommandBuffer cmdbuf) {
cmdbuf.bindVertexBuffers(0, stream_buffer.Handle(), offset);
cmdbuf.draw(vertex_count, 1, 0, 0);
});
}
vertex_batch.clear();
return succeeded;
}
void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) {
using TextureType = Pica::TexturingRegs::TextureConfig::TextureType;
const auto pica_textures = regs.texturing.GetTextures();
for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
const auto& texture = pica_textures[texture_index];
// If the texture unit is disabled bind a null surface to it
if (!texture.enabled) {
const Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID);
pipeline_cache.BindTexture(texture_index, null_surface.ImageView(),
null_sampler.Handle());
continue;
}
// Handle special tex0 configurations
if (texture_index == 0) {
switch (texture.config.type.Value()) {
case TextureType::Shadow2D: {
Surface& surface = res_cache.GetTextureSurface(texture);
surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap;
pipeline_cache.BindStorageImage(0, surface.StorageView());
continue;
}
case TextureType::ShadowCube: {
BindShadowCube(texture);
continue;
}
case TextureType::TextureCube: {
BindTextureCube(texture);
continue;
}
default:
UnbindSpecial();
break;
}
}
// Bind the texture provided by the rasterizer cache
Surface& surface = res_cache.GetTextureSurface(texture);
Sampler& sampler = res_cache.GetSampler(texture.config);
if (!IsFeedbackLoop(texture_index, framebuffer, surface, sampler)) {
pipeline_cache.BindTexture(texture_index, surface.ImageView(), sampler.Handle());
}
}
}
void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture) {
using CubeFace = Pica::TexturingRegs::CubeFace;
auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format);
constexpr std::array faces = {
CubeFace::PositiveX, CubeFace::NegativeX, CubeFace::PositiveY,
CubeFace::NegativeY, CubeFace::PositiveZ, CubeFace::NegativeZ,
};
for (CubeFace face : faces) {
const u32 binding = static_cast<u32>(face);
info.physical_address = regs.texturing.GetCubePhysicalAddress(face);
const VideoCore::SurfaceId surface_id = res_cache.GetTextureSurface(info);
Surface& surface = res_cache.GetSurface(surface_id);
surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap;
pipeline_cache.BindStorageImage(binding, surface.StorageView());
}
}
void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) {
using CubeFace = Pica::TexturingRegs::CubeFace;
const VideoCore::TextureCubeConfig config = {
.px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX),
.nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX),
.py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY),
.ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY),
.pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ),
.nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ),
.width = texture.config.width,
.levels = texture.config.lod.max_level + 1,
.format = texture.format,
};
Surface& surface = res_cache.GetTextureCube(config);
Sampler& sampler = res_cache.GetSampler(texture.config);
pipeline_cache.BindTexture(3, surface.ImageView(), sampler.Handle());
}
bool RasterizerVulkan::IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer,
Surface& surface, Sampler& sampler) {
const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color);
const bool is_feedback_loop = color_view == surface.ImageView();
if (!is_feedback_loop) {
return false;
}
// Make a temporary copy of the framebuffer to sample from
pipeline_cache.BindTexture(texture_index, surface.CopyImageView(), sampler.Handle());
return true;
}
void RasterizerVulkan::UnbindSpecial() {
Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
const Surface& null_cube_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_CUBE_ID);
const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID);
pipeline_cache.BindTexture(3, null_cube_surface.ImageView(), null_sampler.Handle());
for (u32 i = 0; i < 6; i++) {
pipeline_cache.BindStorageImage(i, null_surface.StorageView());
}
}
void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) {
switch (id) {
// Culling
case PICA_REG_INDEX(rasterizer.cull_mode):
SyncCullMode();
break;
// Blending
case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
SyncBlendEnabled();
// Update since logic op emulation depends on alpha blend enable.
SyncLogicOp();
SyncColorWriteMask();
break;
case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending):
SyncBlendFuncs();
break;
case PICA_REG_INDEX(framebuffer.output_merger.blend_const):
SyncBlendColor();
break;
// Sync VK stencil test + stencil write mask
// (Pica stencil test function register also contains a stencil write mask)
case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func):
SyncStencilTest();
SyncStencilWriteMask();
break;
case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op):
case PICA_REG_INDEX(framebuffer.framebuffer.depth_format):
SyncStencilTest();
break;
// Sync VK depth test + depth and color write mask
// (Pica depth test function register also contains a depth and color write mask)
case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable):
SyncDepthTest();
SyncDepthWriteMask();
SyncColorWriteMask();
break;
// Sync VK depth and stencil write mask
// (This is a dedicated combined depth / stencil write-enable register)
case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write):
SyncDepthWriteMask();
SyncStencilWriteMask();
break;
// Sync VK color write mask
// (This is a dedicated color write-enable register)
case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write):
SyncColorWriteMask();
break;
// Logic op
case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
SyncLogicOp();
// Update since color write mask is used to emulate no-op.
SyncColorWriteMask();
break;
}
}
void RasterizerVulkan::FlushAll() {
res_cache.FlushAll();
}
void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) {
res_cache.FlushRegion(addr, size);
}
void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) {
res_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) {
res_cache.FlushRegion(addr, size);
res_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::ClearAll(bool flush) {
res_cache.ClearAll(flush);
}
bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
return res_cache.AccelerateDisplayTransfer(config);
}
bool RasterizerVulkan::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) {
return res_cache.AccelerateTextureCopy(config);
}
bool RasterizerVulkan::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
return res_cache.AccelerateFill(config);
}
bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
PAddr framebuffer_addr, u32 pixel_stride,
ScreenInfo& screen_info) {
if (framebuffer_addr == 0) [[unlikely]] {
return false;
}
VideoCore::SurfaceParams src_params;
src_params.addr = framebuffer_addr;
src_params.width = std::min(config.width.Value(), pixel_stride);
src_params.height = config.height;
src_params.stride = pixel_stride;
src_params.is_tiled = false;
src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format);
src_params.UpdateParams();
const auto [src_surface_id, src_rect] =
res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true);
if (!src_surface_id) {
return false;
}
const Surface& src_surface = res_cache.GetSurface(src_surface_id);
const u32 scaled_width = src_surface.GetScaledWidth();
const u32 scaled_height = src_surface.GetScaledHeight();
screen_info.texcoords = Common::Rectangle<f32>(
(float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
(float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
screen_info.image_view = src_surface.ImageView();
return true;
}
void RasterizerVulkan::MakeSoftwareVertexLayout() {
constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3};
software_layout = VertexLayout{
.binding_count = 1,
.attribute_count = 8,
};
for (u32 i = 0; i < software_layout.binding_count; i++) {
VertexBinding& binding = software_layout.bindings[i];
binding.binding.Assign(i);
binding.fixed.Assign(0);
binding.stride.Assign(sizeof(HardwareVertex));
}
u32 offset = 0;
for (u32 i = 0; i < 8; i++) {
VertexAttribute& attribute = software_layout.attributes[i];
attribute.binding.Assign(0);
attribute.location.Assign(i);
attribute.offset.Assign(offset);
attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
attribute.size.Assign(sizes[i]);
offset += sizes[i] * sizeof(float);
}
}
void RasterizerVulkan::SyncCullMode() {
pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode);
}
void RasterizerVulkan::SyncBlendEnabled() {
pipeline_info.blending.blend_enable = regs.framebuffer.output_merger.alphablend_enable;
}
void RasterizerVulkan::SyncBlendFuncs() {
pipeline_info.blending.color_blend_eq.Assign(
regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb);
pipeline_info.blending.alpha_blend_eq.Assign(
regs.framebuffer.output_merger.alpha_blending.blend_equation_a);
pipeline_info.blending.src_color_blend_factor.Assign(
regs.framebuffer.output_merger.alpha_blending.factor_source_rgb);
pipeline_info.blending.dst_color_blend_factor.Assign(
regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb);
pipeline_info.blending.src_alpha_blend_factor.Assign(
regs.framebuffer.output_merger.alpha_blending.factor_source_a);
pipeline_info.blending.dst_alpha_blend_factor.Assign(
regs.framebuffer.output_merger.alpha_blending.factor_dest_a);
}
void RasterizerVulkan::SyncBlendColor() {
pipeline_info.dynamic.blend_color = regs.framebuffer.output_merger.blend_const.raw;
}
void RasterizerVulkan::SyncLogicOp() {
if (instance.NeedsLogicOpEmulation()) {
// We need this in the fragment shader to emulate logic operations
shader_dirty = true;
}
pipeline_info.blending.logic_op = regs.framebuffer.output_merger.logic_op;
const bool is_logic_op_emulated =
instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable;
const bool is_logic_op_noop =
regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp;
if (is_logic_op_emulated && is_logic_op_noop) {
// Color output is disabled by logic operation. We use color write mask to skip
// color but allow depth write.
pipeline_info.blending.color_write_mask = 0;
}
}
void RasterizerVulkan::SyncColorWriteMask() {
const u32 color_mask = regs.framebuffer.framebuffer.allow_color_write != 0
? (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF
: 0;
const bool is_logic_op_emulated =
instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable;
const bool is_logic_op_noop =
regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp;
if (is_logic_op_emulated && is_logic_op_noop) {
// Color output is disabled by logic operation. We use color write mask to skip
// color but allow depth write. Return early to avoid overwriting this.
return;
}
pipeline_info.blending.color_write_mask = color_mask;
}
void RasterizerVulkan::SyncStencilWriteMask() {
pipeline_info.dynamic.stencil_write_mask =
(regs.framebuffer.framebuffer.allow_depth_stencil_write != 0)
? static_cast<u32>(regs.framebuffer.output_merger.stencil_test.write_mask)
: 0;
}
void RasterizerVulkan::SyncDepthWriteMask() {
const bool write_enable = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 &&
regs.framebuffer.output_merger.depth_write_enable);
pipeline_info.depth_stencil.depth_write_enable.Assign(write_enable);
}
void RasterizerVulkan::SyncStencilTest() {
const auto& stencil_test = regs.framebuffer.output_merger.stencil_test;
const bool test_enable = stencil_test.enable && regs.framebuffer.framebuffer.depth_format ==
Pica::FramebufferRegs::DepthFormat::D24S8;
pipeline_info.depth_stencil.stencil_test_enable.Assign(test_enable);
pipeline_info.depth_stencil.stencil_fail_op.Assign(stencil_test.action_stencil_fail);
pipeline_info.depth_stencil.stencil_pass_op.Assign(stencil_test.action_depth_pass);
pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(stencil_test.action_depth_fail);
pipeline_info.depth_stencil.stencil_compare_op.Assign(stencil_test.func);
pipeline_info.dynamic.stencil_reference = stencil_test.reference_value;
pipeline_info.dynamic.stencil_compare_mask = stencil_test.input_mask;
}
void RasterizerVulkan::SyncDepthTest() {
const bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 ||
regs.framebuffer.output_merger.depth_write_enable == 1;
const auto compare_op = regs.framebuffer.output_merger.depth_test_enable == 1
? regs.framebuffer.output_merger.depth_test_func.Value()
: Pica::FramebufferRegs::CompareFunc::Always;
pipeline_info.depth_stencil.depth_test_enable.Assign(test_enabled);
pipeline_info.depth_stencil.depth_compare_op.Assign(compare_op);
}
void RasterizerVulkan::SyncAndUploadLUTsLF() {
constexpr std::size_t max_size =
sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler +
sizeof(Common::Vec2f) * 128; // fog
if (!fs_uniform_block_data.lighting_lut_dirty_any && !fs_uniform_block_data.fog_lut_dirty) {
return;
}
std::size_t bytes_used = 0;
auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f));
// Sync the lighting luts
if (fs_uniform_block_data.lighting_lut_dirty_any || invalidate) {
for (unsigned index = 0; index < fs_uniform_block_data.lighting_lut_dirty.size(); index++) {
if (fs_uniform_block_data.lighting_lut_dirty[index] || invalidate) {
std::array<Common::Vec2f, 256> new_data;
const auto& source_lut = Pica::g_state.lighting.luts[index];
std::transform(source_lut.begin(), source_lut.end(), new_data.begin(),
[](const auto& entry) {
return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()};
});
if (new_data != lighting_lut_data[index] || invalidate) {
lighting_lut_data[index] = new_data;
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(Common::Vec2f));
fs_uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] =
static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f));
fs_uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(Common::Vec2f);
}
fs_uniform_block_data.lighting_lut_dirty[index] = false;
}
}
fs_uniform_block_data.lighting_lut_dirty_any = false;
}
// Sync the fog lut
if (fs_uniform_block_data.fog_lut_dirty || invalidate) {
std::array<Common::Vec2f, 128> new_data;
std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(),
[](const auto& entry) {
return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()};
});
if (new_data != fog_lut_data || invalidate) {
fog_lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(Common::Vec2f));
fs_uniform_block_data.data.fog_lut_offset =
static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f));
fs_uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(Common::Vec2f);
}
fs_uniform_block_data.fog_lut_dirty = false;
}
texture_lf_buffer.Commit(static_cast<u32>(bytes_used));
}
void RasterizerVulkan::SyncAndUploadLUTs() {
const auto& proctex = Pica::g_state.proctex;
constexpr std::size_t max_size =
sizeof(Common::Vec2f) * 128 * 3 + // proctex: noise + color + alpha
sizeof(Common::Vec4f) * 256 + // proctex
sizeof(Common::Vec4f) * 256; // proctex diff
if (!fs_uniform_block_data.proctex_noise_lut_dirty &&
!fs_uniform_block_data.proctex_color_map_dirty &&
!fs_uniform_block_data.proctex_alpha_map_dirty &&
!fs_uniform_block_data.proctex_lut_dirty && !fs_uniform_block_data.proctex_diff_lut_dirty) {
return;
}
std::size_t bytes_used = 0;
auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f));
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
auto sync_proctex_value_lut =
[this, buffer = buffer, offset = offset, invalidate = invalidate,
&bytes_used](const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut,
std::array<Common::Vec2f, 128>& lut_data, int& lut_offset) {
std::array<Common::Vec2f, 128> new_data;
std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) {
return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()};
});
if (new_data != lut_data || invalidate) {
lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(Common::Vec2f));
lut_offset = static_cast<int>((offset + bytes_used) / sizeof(Common::Vec2f));
fs_uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(Common::Vec2f);
}
};
// Sync the proctex noise lut
if (fs_uniform_block_data.proctex_noise_lut_dirty || invalidate) {
sync_proctex_value_lut(proctex.noise_table, proctex_noise_lut_data,
fs_uniform_block_data.data.proctex_noise_lut_offset);
fs_uniform_block_data.proctex_noise_lut_dirty = false;
}
// Sync the proctex color map
if (fs_uniform_block_data.proctex_color_map_dirty || invalidate) {
sync_proctex_value_lut(proctex.color_map_table, proctex_color_map_data,
fs_uniform_block_data.data.proctex_color_map_offset);
fs_uniform_block_data.proctex_color_map_dirty = false;
}
// Sync the proctex alpha map
if (fs_uniform_block_data.proctex_alpha_map_dirty || invalidate) {
sync_proctex_value_lut(proctex.alpha_map_table, proctex_alpha_map_data,
fs_uniform_block_data.data.proctex_alpha_map_offset);
fs_uniform_block_data.proctex_alpha_map_dirty = false;
}
// Sync the proctex lut
if (fs_uniform_block_data.proctex_lut_dirty || invalidate) {
std::array<Common::Vec4f, 256> new_data;
std::transform(proctex.color_table.begin(), proctex.color_table.end(), new_data.begin(),
[](const auto& entry) {
auto rgba = entry.ToVector() / 255.0f;
return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()};
});
if (new_data != proctex_lut_data || invalidate) {
proctex_lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(Common::Vec4f));
fs_uniform_block_data.data.proctex_lut_offset =
static_cast<int>((offset + bytes_used) / sizeof(Common::Vec4f));
fs_uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(Common::Vec4f);
}
fs_uniform_block_data.proctex_lut_dirty = false;
}
// Sync the proctex difference lut
if (fs_uniform_block_data.proctex_diff_lut_dirty || invalidate) {
std::array<Common::Vec4f, 256> new_data;
std::transform(proctex.color_diff_table.begin(), proctex.color_diff_table.end(),
new_data.begin(), [](const auto& entry) {
auto rgba = entry.ToVector() / 255.0f;
return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()};
});
if (new_data != proctex_diff_lut_data || invalidate) {
proctex_diff_lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(Common::Vec4f));
fs_uniform_block_data.data.proctex_diff_lut_offset =
static_cast<int>((offset + bytes_used) / sizeof(Common::Vec4f));
fs_uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(Common::Vec4f);
}
fs_uniform_block_data.proctex_diff_lut_dirty = false;
}
texture_buffer.Commit(static_cast<u32>(bytes_used));
}
void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
const bool sync_vs_pica = accelerate_draw;
const bool sync_vs = vs_uniform_block_data.dirty;
const bool sync_fs = fs_uniform_block_data.dirty;
if (!sync_vs_pica && !sync_vs && !sync_fs) {
return;
}
const u64 uniform_size =
uniform_size_aligned_vs_pica + uniform_size_aligned_vs + uniform_size_aligned_fs;
auto [uniforms, offset, invalidate] =
uniform_buffer.Map(uniform_size, uniform_buffer_alignment);
u32 used_bytes = 0;
if (sync_vs || invalidate) {
std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data,
sizeof(vs_uniform_block_data.data));
pipeline_cache.SetBufferOffset(1, offset + used_bytes);
vs_uniform_block_data.dirty = false;
used_bytes += static_cast<u32>(uniform_size_aligned_vs);
}
if (sync_fs || invalidate) {
std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data,
sizeof(fs_uniform_block_data.data));
pipeline_cache.SetBufferOffset(2, offset + used_bytes);
fs_uniform_block_data.dirty = false;
used_bytes += static_cast<u32>(uniform_size_aligned_fs);
}
if (sync_vs_pica) {
VSPicaUniformData vs_uniforms;
vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs);
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
pipeline_cache.SetBufferOffset(0, offset + used_bytes);
used_bytes += static_cast<u32>(uniform_size_aligned_vs_pica);
}
uniform_buffer.Commit(used_bytes);
}
} // namespace Vulkan