renderer_vulkan: Rewrite descriptor set management and various small fixes (#77)

* renderer_vulkan: Remove vulkan prefix in SetObjectName

* renderer_vulkan: Rename renderpass cache to render manager

* It is no longer just a cache

* renderer_vulkan: Rewrite descriptor management

* Switch to batched vkUpdateDescriptorSets from cached descriptor sets with templates

* vk_master_semaphore: Remove waitable atomic

* These are buggy on some platforms and regular condition_variables are faster most of the time

* vk_texture_runtime.cpp: remove outdated references

* vk_render_manager: Minor cleanups and rename to RenderManager

* It is no longer just a renderpass cache
* Revert variable name change from render_manager back to renderpass_cache

---------

Co-authored-by: GPUCode <geoster3d@gmail.com>
This commit is contained in:
Reg Tiangha 2024-04-22 14:43:36 -06:00 committed by GitHub
parent e26ceabfd1
commit de65b15dde
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
41 changed files with 879 additions and 856 deletions

@ -1 +1 @@
Subproject commit 217e93c664ec6704ec2d8c36fa116c1a4a1e2d40
Subproject commit 5a5c9a643484d888873e32c5d7d484fae8e71d3d

View File

@ -160,8 +160,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_blit_helper.h
renderer_vulkan/vk_common.cpp
renderer_vulkan/vk_common.h
renderer_vulkan/vk_descriptor_pool.cpp
renderer_vulkan/vk_descriptor_pool.h
renderer_vulkan/vk_descriptor_update_queue.cpp
renderer_vulkan/vk_descriptor_update_queue.h
renderer_vulkan/vk_graphics_pipeline.cpp
renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_master_semaphore.cpp
@ -183,8 +183,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_platform.h
renderer_vulkan/vk_present_window.cpp
renderer_vulkan/vk_present_window.h
renderer_vulkan/vk_renderpass_cache.cpp
renderer_vulkan/vk_renderpass_cache.h
renderer_vulkan/vk_render_manager.cpp
renderer_vulkan/vk_render_manager.h
renderer_vulkan/vk_shader_util.cpp
renderer_vulkan/vk_shader_util.h
renderer_vulkan/vk_stream_buffer.cpp

View File

@ -385,7 +385,7 @@ std::vector<FileUtil::FSTEntry> CustomTexManager::GetTextures(u64 title_id) {
}
void CustomTexManager::CreateWorkers() {
const std::size_t num_workers = std::max(std::thread::hardware_concurrency(), 2U) - 1;
const std::size_t num_workers = std::max(std::thread::hardware_concurrency(), 2U) >> 1;
workers = std::make_unique<Common::ThreadWorker>(num_workers, "Custom textures");
}

View File

@ -176,15 +176,15 @@ struct TexturingRegs {
INSERT_PADDING_WORDS(0x9);
struct FullTextureConfig {
const bool enabled;
const u32 enabled;
const TextureConfig config;
const TextureFormat format;
};
const std::array<FullTextureConfig, 3> GetTextures() const {
return {{
{static_cast<bool>(main_config.texture0_enable), texture0, texture0_format},
{static_cast<bool>(main_config.texture1_enable), texture1, texture1_format},
{static_cast<bool>(main_config.texture2_enable), texture2, texture2_format},
{main_config.texture0_enable, texture0, texture0_format},
{main_config.texture1_enable, texture1, texture1_format},
{main_config.texture2_enable, texture2, texture2_format},
}};
}

View File

@ -600,14 +600,43 @@ typename T::Surface& RasterizerCache<T>::GetTextureCube(const TextureCubeConfig&
auto [it, new_surface] = texture_cube_cache.try_emplace(config);
TextureCube& cube = it->second;
const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz};
if (new_surface) {
Pica::Texture::TextureInfo info = {
.width = config.width,
.height = config.width,
.format = config.format,
};
info.SetDefaultStride();
u32 res_scale = 1;
for (u32 i = 0; i < addresses.size(); i++) {
if (!addresses[i]) {
continue;
}
SurfaceId& face_id = cube.face_ids[i];
if (!face_id) {
info.physical_address = addresses[i];
face_id = GetTextureSurface(info, config.levels - 1);
Surface& surface = slot_surfaces[face_id];
ASSERT_MSG(
surface.levels >= config.levels,
"Texture cube face levels are not enough to validate the levels requested");
surface.flags |= SurfaceFlagBits::Tracked;
}
Surface& surface = slot_surfaces[face_id];
res_scale = std::max(surface.res_scale, res_scale);
}
SurfaceParams cube_params = {
.addr = config.px,
.width = config.width,
.height = config.width,
.stride = config.width,
.levels = config.levels,
.res_scale = filter != Settings::TextureFilter::None ? resolution_scale_factor : 1,
.res_scale = res_scale,
.texture_type = TextureType::CubeMap,
.pixel_format = PixelFormatFromTextureFormat(config.format),
.type = SurfaceType::Texture,
@ -616,38 +645,20 @@ typename T::Surface& RasterizerCache<T>::GetTextureCube(const TextureCubeConfig&
cube.surface_id = CreateSurface(cube_params);
}
const u32 scaled_size = slot_surfaces[cube.surface_id].GetScaledWidth();
const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz};
Pica::Texture::TextureInfo info = {
.width = config.width,
.height = config.width,
.format = config.format,
};
info.SetDefaultStride();
Surface& cube_surface = slot_surfaces[cube.surface_id];
for (u32 i = 0; i < addresses.size(); i++) {
if (!addresses[i]) {
continue;
}
SurfaceId& face_id = cube.face_ids[i];
if (!face_id) {
info.physical_address = addresses[i];
face_id = GetTextureSurface(info, config.levels - 1);
ASSERT_MSG(slot_surfaces[face_id].levels >= config.levels,
"Texture cube face levels are not enough to validate the levels requested");
}
Surface& surface = slot_surfaces[face_id];
surface.flags |= SurfaceFlagBits::Tracked;
Surface& surface = slot_surfaces[cube.face_ids[i]];
if (cube.ticks[i] == surface.modification_tick) {
continue;
}
cube.ticks[i] = surface.modification_tick;
Surface& cube_surface = slot_surfaces[cube.surface_id];
boost::container::small_vector<TextureCopy, 8> upload_copies;
for (u32 level = 0; level < config.levels; level++) {
const u32 width_lod = scaled_size >> level;
const TextureCopy texture_copy = {
const u32 width_lod = surface.GetScaledWidth() >> level;
upload_copies.push_back({
.src_level = level,
.dst_level = level,
.src_layer = 0,
@ -655,9 +666,9 @@ typename T::Surface& RasterizerCache<T>::GetTextureCube(const TextureCubeConfig&
.src_offset = {0, 0},
.dst_offset = {0, 0},
.extent = {width_lod, width_lod},
};
runtime.CopyTextures(surface, cube_surface, texture_copy);
});
}
runtime.CopyTextures(surface, cube_surface, upload_copies);
}
return slot_surfaces[cube.surface_id];

View File

@ -260,16 +260,19 @@ void TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
}
bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
const VideoCore::TextureCopy& copy) {
std::span<const VideoCore::TextureCopy> copies) {
const GLenum src_textarget = source.texture_type == VideoCore::TextureType::CubeMap
? GL_TEXTURE_CUBE_MAP
: GL_TEXTURE_2D;
const GLenum dest_textarget =
dest.texture_type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D;
glCopyImageSubData(source.Handle(), src_textarget, copy.src_level, copy.src_offset.x,
copy.src_offset.y, copy.src_layer, dest.Handle(), dest_textarget,
copy.dst_level, copy.dst_offset.x, copy.dst_offset.y, copy.dst_layer,
copy.extent.width, copy.extent.height, 1);
for (const auto& copy : copies) {
glCopyImageSubData(source.Handle(), src_textarget, copy.src_level, copy.src_offset.x,
copy.src_offset.y, copy.src_layer, dest.Handle(), dest_textarget,
copy.dst_level, copy.dst_offset.x, copy.dst_offset.y, copy.dst_layer,
copy.extent.width, copy.extent.height, 1);
}
return true;
}

View File

@ -65,7 +65,12 @@ public:
void ClearTexture(Surface& surface, const VideoCore::TextureClear& clear);
/// Copies a rectangle of source to another rectange of dest
bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy);
bool CopyTextures(Surface& source, Surface& dest,
std::span<const VideoCore::TextureCopy> copies);
bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) {
return CopyTextures(source, dest, std::array{copy});
}
/// Blits a rectangle of source to another rectange of dest
bool BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);

View File

@ -54,21 +54,14 @@ RendererVulkan::RendererVulkan(Core::System& system, Pica::PicaCore& pica_,
Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window)
: RendererBase{system, window, secondary_window}, memory{system.Memory()}, pica{pica_},
instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance},
renderpass_cache{instance, scheduler}, pool{instance}, main_window{window, instance,
scheduler},
renderpass_cache{instance, scheduler}, main_window{window, instance, scheduler},
vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer,
VERTEX_BUFFER_SIZE},
rasterizer{memory,
pica,
system.CustomTexManager(),
*this,
render_window,
instance,
scheduler,
pool,
renderpass_cache,
main_window.ImageCount()},
present_set_provider{instance, pool, PRESENT_BINDINGS} {
update_queue{instance},
rasterizer{
memory, pica, system.CustomTexManager(), *this, render_window,
instance, scheduler, renderpass_cache, update_queue, main_window.ImageCount()},
present_heap{instance, scheduler.GetMasterSemaphore(), PRESENT_BINDINGS, 32} {
CompileShaders();
BuildLayouts();
BuildPipelines();
@ -127,16 +120,14 @@ void RendererVulkan::PrepareRendertarget() {
void RendererVulkan::PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout) {
const auto sampler = present_samplers[!Settings::values.filter_mode.GetValue()];
std::transform(screen_infos.begin(), screen_infos.end(), present_textures.begin(),
[&](auto& info) {
return DescriptorData{vk::DescriptorImageInfo{sampler, info.image_view,
vk::ImageLayout::eGeneral}};
});
const auto descriptor_set = present_set_provider.Acquire(present_textures);
const auto present_set = present_heap.Commit();
for (u32 index = 0; index < screen_infos.size(); index++) {
update_queue.AddImageSampler(present_set, 0, index, screen_infos[index].image_view,
sampler);
}
renderpass_cache.EndRendering();
scheduler.Record([this, layout, frame, descriptor_set, renderpass = main_window.Renderpass(),
scheduler.Record([this, layout, frame, present_set, renderpass = main_window.Renderpass(),
index = current_pipeline](vk::CommandBuffer cmdbuf) {
const vk::Viewport viewport = {
.x = 0.0f,
@ -171,7 +162,7 @@ void RendererVulkan::PrepareDraw(Frame* frame, const Layout::FramebufferLayout&
cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipelines[index]);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, descriptor_set, {});
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, present_set, {});
});
}
@ -264,7 +255,7 @@ void RendererVulkan::BuildLayouts() {
.size = sizeof(PresentUniformData),
};
const auto descriptor_set_layout = present_set_provider.Layout();
const auto descriptor_set_layout = present_heap.Layout();
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1,
.pSetLayouts = &descriptor_set_layout,
@ -809,29 +800,7 @@ void RendererVulkan::DrawScreens(Frame* frame, const Layout::FramebufferLayout&
}
}
scheduler.Record([image = frame->image](vk::CommandBuffer cmdbuf) {
const vk::ImageMemoryBarrier render_barrier = {
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.endRenderPass();
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, render_barrier);
});
scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.endRenderPass(); });
}
void RendererVulkan::SwapBuffers() {

View File

@ -7,11 +7,10 @@
#include "common/common_types.h"
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_present_window.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_render_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Core {
@ -118,15 +117,15 @@ private:
Instance instance;
Scheduler scheduler;
RenderpassCache renderpass_cache;
DescriptorPool pool;
RenderManager renderpass_cache;
PresentWindow main_window;
StreamBuffer vertex_buffer;
DescriptorUpdateQueue update_queue;
RasterizerVulkan rasterizer;
std::unique_ptr<PresentWindow> second_window;
DescriptorHeap present_heap;
vk::UniquePipelineLayout present_pipeline_layout;
DescriptorSetProvider present_set_provider;
std::array<vk::Pipeline, PRESENT_PIPELINES> present_pipelines;
std::array<vk::ShaderModule, PRESENT_PIPELINES> present_shaders;
std::array<vk::Sampler, 2> present_samplers;
@ -134,7 +133,6 @@ private:
u32 current_pipeline = 0;
std::array<ScreenInfo, 3> screen_infos{};
std::array<DescriptorData, 3> present_textures{};
PresentUniformData draw_info{};
vk::ClearColorValue clear_color{};
};

View File

@ -4,8 +4,9 @@
#include "common/vector_math.h"
#include "video_core/renderer_vulkan/vk_blit_helper.h"
#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_render_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
@ -177,12 +178,13 @@ constexpr vk::PipelineShaderStageCreateInfo MakeStages(vk::ShaderModule compute_
} // Anonymous namespace
BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_, DescriptorPool& pool,
RenderpassCache& renderpass_cache_)
BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_,
RenderManager& renderpass_cache_, DescriptorUpdateQueue& update_queue_)
: instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_},
device{instance.GetDevice()}, compute_provider{instance, pool, COMPUTE_BINDINGS},
compute_buffer_provider{instance, pool, COMPUTE_BUFFER_BINDINGS},
two_textures_provider{instance, pool, TWO_TEXTURES_BINDINGS},
update_queue{update_queue_}, device{instance.GetDevice()},
compute_provider{instance, scheduler.GetMasterSemaphore(), COMPUTE_BINDINGS},
compute_buffer_provider{instance, scheduler.GetMasterSemaphore(), COMPUTE_BUFFER_BINDINGS},
two_textures_provider{instance, scheduler.GetMasterSemaphore(), TWO_TEXTURES_BINDINGS, 16},
compute_pipeline_layout{
device.createPipelineLayout(PipelineLayoutCreateInfo(&compute_provider.Layout(), true))},
compute_buffer_pipeline_layout{device.createPipelineLayout(
@ -286,19 +288,9 @@ bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest,
.extent = {dest.GetScaledWidth(), dest.GetScaledHeight()},
};
std::array<DescriptorData, 2> textures{};
textures[0].image_info = vk::DescriptorImageInfo{
.sampler = nearest_sampler,
.imageView = source.DepthView(),
.imageLayout = vk::ImageLayout::eGeneral,
};
textures[1].image_info = vk::DescriptorImageInfo{
.sampler = nearest_sampler,
.imageView = source.StencilView(),
.imageLayout = vk::ImageLayout::eGeneral,
};
const auto descriptor_set = two_textures_provider.Acquire(textures);
const auto descriptor_set = two_textures_provider.Commit();
update_queue.AddImageSampler(descriptor_set, 0, 0, source.DepthView(), nearest_sampler);
update_queue.AddImageSampler(descriptor_set, 1, 0, source.StencilView(), nearest_sampler);
const RenderPass depth_pass = {
.framebuffer = dest.Framebuffer(),
@ -322,21 +314,12 @@ bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest,
bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest,
const VideoCore::TextureCopy& copy) {
std::array<DescriptorData, 3> textures{};
textures[0].image_info = vk::DescriptorImageInfo{
.imageView = source.DepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
};
textures[1].image_info = vk::DescriptorImageInfo{
.imageView = source.StencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
};
textures[2].image_info = vk::DescriptorImageInfo{
.imageView = dest.ImageView(),
.imageLayout = vk::ImageLayout::eGeneral,
};
const auto descriptor_set = compute_provider.Acquire(textures);
const auto descriptor_set = compute_provider.Commit();
update_queue.AddImageSampler(descriptor_set, 0, 0, source.DepthView(), VK_NULL_HANDLE,
vk::ImageLayout::eDepthStencilReadOnlyOptimal);
update_queue.AddImageSampler(descriptor_set, 1, 0, source.StencilView(), VK_NULL_HANDLE,
vk::ImageLayout::eDepthStencilReadOnlyOptimal);
update_queue.AddStorageImage(descriptor_set, 2, dest.ImageView());
renderpass_cache.EndRendering();
scheduler.Record([this, descriptor_set, copy, src_image = source.Image(),
@ -442,24 +425,13 @@ bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest,
bool BlitHelper::DepthToBuffer(Surface& source, vk::Buffer buffer,
const VideoCore::BufferTextureCopy& copy) {
std::array<DescriptorData, 3> textures{};
textures[0].image_info = vk::DescriptorImageInfo{
.sampler = nearest_sampler,
.imageView = source.DepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
};
textures[1].image_info = vk::DescriptorImageInfo{
.sampler = nearest_sampler,
.imageView = source.StencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
};
textures[2].buffer_info = vk::DescriptorBufferInfo{
.buffer = buffer,
.offset = copy.buffer_offset,
.range = copy.buffer_size,
};
const auto descriptor_set = compute_buffer_provider.Acquire(textures);
const auto descriptor_set = compute_buffer_provider.Commit();
update_queue.AddImageSampler(descriptor_set, 0, 0, source.DepthView(), nearest_sampler,
vk::ImageLayout::eDepthStencilReadOnlyOptimal);
update_queue.AddImageSampler(descriptor_set, 1, 0, source.StencilView(), nearest_sampler,
vk::ImageLayout::eDepthStencilReadOnlyOptimal);
update_queue.AddBuffer(descriptor_set, 2, buffer, copy.buffer_offset, copy.buffer_size,
vk::DescriptorType::eStorageBuffer);
renderpass_cache.EndRendering();
scheduler.Record([this, descriptor_set, copy, src_image = source.Image(),

View File

@ -4,7 +4,7 @@
#pragma once
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace VideoCore {
struct TextureBlit;
@ -15,16 +15,17 @@ struct BufferTextureCopy;
namespace Vulkan {
class Instance;
class RenderpassCache;
class RenderManager;
class Scheduler;
class Surface;
class DescriptorUpdateQueue;
class BlitHelper {
friend class TextureRuntime;
public:
BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorPool& pool,
RenderpassCache& renderpass_cache);
explicit BlitHelper(const Instance& instance, Scheduler& scheduler,
RenderManager& renderpass_cache, DescriptorUpdateQueue& update_queue);
~BlitHelper();
bool BlitDepthStencil(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
@ -41,14 +42,15 @@ private:
private:
const Instance& instance;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
RenderManager& renderpass_cache;
DescriptorUpdateQueue& update_queue;
vk::Device device;
vk::RenderPass r32_renderpass;
DescriptorSetProvider compute_provider;
DescriptorSetProvider compute_buffer_provider;
DescriptorSetProvider two_textures_provider;
DescriptorHeap compute_provider;
DescriptorHeap compute_buffer_provider;
DescriptorHeap two_textures_provider;
vk::PipelineLayout compute_pipeline_layout;
vk::PipelineLayout compute_buffer_pipeline_layout;
vk::PipelineLayout two_textures_pipeline_layout;

View File

@ -9,7 +9,6 @@
#define VK_NO_PROTOTYPES
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#define VULKAN_HPP_NO_CONSTRUCTORS
#define VULKAN_HPP_NO_UNION_CONSTRUCTORS
#define VULKAN_HPP_NO_STRUCT_SETTERS
#include <vulkan/vulkan.hpp>

View File

@ -1,141 +0,0 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
MICROPROFILE_DEFINE(Vulkan_DescriptorSetAcquire, "Vulkan", "Descriptor Set Acquire",
MP_RGB(64, 128, 256));
constexpr u32 MAX_BATCH_SIZE = 8;
DescriptorPool::DescriptorPool(const Instance& instance_) : instance{instance_} {
auto& pool = pools.emplace_back();
pool = CreatePool();
}
DescriptorPool::~DescriptorPool() = default;
std::vector<vk::DescriptorSet> DescriptorPool::Allocate(vk::DescriptorSetLayout layout,
u32 num_sets) {
std::array<vk::DescriptorSetLayout, MAX_BATCH_SIZE> layouts;
layouts.fill(layout);
u32 current_pool = 0;
vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = *pools[current_pool],
.descriptorSetCount = num_sets,
.pSetLayouts = layouts.data(),
};
while (true) {
try {
return instance.GetDevice().allocateDescriptorSets(alloc_info);
} catch (const vk::OutOfPoolMemoryError&) {
current_pool++;
if (current_pool == pools.size()) {
LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!");
auto& pool = pools.emplace_back();
pool = CreatePool();
}
alloc_info.descriptorPool = *pools[current_pool];
}
}
}
vk::DescriptorSet DescriptorPool::Allocate(vk::DescriptorSetLayout layout) {
const auto sets = Allocate(layout, 1);
return sets[0];
}
vk::UniqueDescriptorPool DescriptorPool::CreatePool() {
// Choose a sane pool size good for most games
static constexpr std::array<vk::DescriptorPoolSize, 6> pool_sizes = {{
{vk::DescriptorType::eUniformBufferDynamic, 64},
{vk::DescriptorType::eUniformTexelBuffer, 64},
{vk::DescriptorType::eCombinedImageSampler, 4096},
{vk::DescriptorType::eSampledImage, 256},
{vk::DescriptorType::eStorageImage, 256},
{vk::DescriptorType::eStorageBuffer, 32},
}};
const vk::DescriptorPoolCreateInfo descriptor_pool_info = {
.maxSets = 4098,
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data(),
};
return instance.GetDevice().createDescriptorPoolUnique(descriptor_pool_info);
}
DescriptorSetProvider::DescriptorSetProvider(
const Instance& instance, DescriptorPool& pool_,
std::span<const vk::DescriptorSetLayoutBinding> bindings)
: pool{pool_}, device{instance.GetDevice()} {
std::array<vk::DescriptorUpdateTemplateEntry, MAX_DESCRIPTORS> update_entries;
for (u32 i = 0; i < bindings.size(); i++) {
update_entries[i] = vk::DescriptorUpdateTemplateEntry{
.dstBinding = bindings[i].binding,
.dstArrayElement = 0,
.descriptorCount = bindings[i].descriptorCount,
.descriptorType = bindings[i].descriptorType,
.offset = i * sizeof(DescriptorData),
.stride = sizeof(DescriptorData),
};
}
const vk::DescriptorSetLayoutCreateInfo layout_info = {
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
};
layout = device.createDescriptorSetLayoutUnique(layout_info);
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = static_cast<u32>(bindings.size()),
.pDescriptorUpdateEntries = update_entries.data(),
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = *layout,
};
update_template = device.createDescriptorUpdateTemplateUnique(template_info);
}
DescriptorSetProvider::~DescriptorSetProvider() = default;
vk::DescriptorSet DescriptorSetProvider::Acquire(std::span<const DescriptorData> data) {
MICROPROFILE_SCOPE(Vulkan_DescriptorSetAcquire);
DescriptorSetData key{};
std::memcpy(key.data(), data.data(), data.size_bytes());
const auto [it, new_set] = descriptor_set_map.try_emplace(key);
if (!new_set) {
return it->second;
}
if (free_sets.empty()) {
free_sets = pool.Allocate(*layout, MAX_BATCH_SIZE);
}
it.value() = free_sets.back();
free_sets.pop_back();
device.updateDescriptorSetWithTemplate(it->second, *update_template, data[0]);
return it->second;
}
void DescriptorSetProvider::FreeWithImage(vk::ImageView image_view) {
for (auto it = descriptor_set_map.begin(); it != descriptor_set_map.end();) {
const auto& [data, set] = *it;
const bool has_image = std::any_of(data.begin(), data.end(), [image_view](auto& info) {
return info.image_info.imageView == image_view;
});
if (has_image) {
free_sets.push_back(set);
it = descriptor_set_map.erase(it);
} else {
it++;
}
}
}
} // namespace Vulkan

View File

@ -1,92 +0,0 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <span>
#include <vector>
#include <tsl/robin_map.h>
#include "common/hash.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
constexpr u32 MAX_DESCRIPTORS = 7;
union DescriptorData {
vk::DescriptorImageInfo image_info;
vk::DescriptorBufferInfo buffer_info;
vk::BufferView buffer_view;
bool operator==(const DescriptorData& other) const noexcept {
return std::memcmp(this, &other, sizeof(DescriptorData)) == 0;
}
};
using DescriptorSetData = std::array<DescriptorData, MAX_DESCRIPTORS>;
struct DataHasher {
u64 operator()(const DescriptorSetData& data) const noexcept {
return Common::ComputeHash64(data.data(), sizeof(data));
}
};
/**
* An interface for allocating descriptor sets that manages a collection of descriptor pools.
*/
class DescriptorPool {
public:
explicit DescriptorPool(const Instance& instance);
~DescriptorPool();
std::vector<vk::DescriptorSet> Allocate(vk::DescriptorSetLayout layout, u32 num_sets);
vk::DescriptorSet Allocate(vk::DescriptorSetLayout layout);
private:
vk::UniqueDescriptorPool CreatePool();
private:
const Instance& instance;
std::vector<vk::UniqueDescriptorPool> pools;
};
/**
* Allocates and caches descriptor sets of a specific layout.
*/
class DescriptorSetProvider {
public:
explicit DescriptorSetProvider(const Instance& instance, DescriptorPool& pool,
std::span<const vk::DescriptorSetLayoutBinding> bindings);
~DescriptorSetProvider();
vk::DescriptorSet Acquire(std::span<const DescriptorData> data);
void FreeWithImage(vk::ImageView image_view);
[[nodiscard]] vk::DescriptorSetLayout Layout() const noexcept {
return *layout;
}
[[nodiscard]] vk::DescriptorSetLayout& Layout() noexcept {
return layout.get();
}
[[nodiscard]] vk::DescriptorUpdateTemplate UpdateTemplate() const noexcept {
return *update_template;
}
private:
DescriptorPool& pool;
vk::Device device;
vk::UniqueDescriptorSetLayout layout;
vk::UniqueDescriptorUpdateTemplate update_template;
std::vector<vk::DescriptorSet> free_sets;
tsl::robin_map<DescriptorSetData, vk::DescriptorSet, DataHasher> descriptor_set_map;
};
} // namespace Vulkan

View File

@ -0,0 +1,109 @@
// Copyright 2024 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
DescriptorUpdateQueue::DescriptorUpdateQueue(const Instance& instance, u32 descriptor_write_max_)
: device{instance.GetDevice()}, descriptor_write_max{descriptor_write_max_} {
descriptor_infos = std::make_unique<DescriptorInfoUnion[]>(descriptor_write_max);
descriptor_writes = std::make_unique<vk::WriteDescriptorSet[]>(descriptor_write_max);
}
void DescriptorUpdateQueue::Flush() {
if (descriptor_write_end == 0) {
return;
}
device.updateDescriptorSets({std::span(descriptor_writes.get(), descriptor_write_end)}, {});
descriptor_write_end = 0;
}
void DescriptorUpdateQueue::AddStorageImage(vk::DescriptorSet target, u8 binding,
vk::ImageView image_view,
vk::ImageLayout image_layout) {
if (descriptor_write_end >= descriptor_write_max) [[unlikely]] {
Flush();
}
auto& image_info = descriptor_infos[descriptor_write_end].image_info;
image_info.sampler = VK_NULL_HANDLE;
image_info.imageView = image_view;
image_info.imageLayout = image_layout;
descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{
.dstSet = target,
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageImage,
.pImageInfo = &image_info,
};
}
void DescriptorUpdateQueue::AddImageSampler(vk::DescriptorSet target, u8 binding, u8 array_index,
vk::ImageView image_view, vk::Sampler sampler,
vk::ImageLayout image_layout) {
if (descriptor_write_end >= descriptor_write_max) [[unlikely]] {
Flush();
}
auto& image_info = descriptor_infos[descriptor_write_end].image_info;
image_info.sampler = sampler;
image_info.imageView = image_view;
image_info.imageLayout = image_layout;
descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{
.dstSet = target,
.dstBinding = binding,
.dstArrayElement = array_index,
.descriptorCount = 1,
.descriptorType =
sampler ? vk::DescriptorType::eCombinedImageSampler : vk::DescriptorType::eSampledImage,
.pImageInfo = &image_info,
};
}
void DescriptorUpdateQueue::AddBuffer(vk::DescriptorSet target, u8 binding, vk::Buffer buffer,
vk::DeviceSize offset, vk::DeviceSize size,
vk::DescriptorType type) {
if (descriptor_write_end >= descriptor_write_max) [[unlikely]] {
Flush();
}
auto& buffer_info = descriptor_infos[descriptor_write_end].buffer_info;
buffer_info.buffer = buffer;
buffer_info.offset = offset;
buffer_info.range = size;
descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{
.dstSet = target,
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = type,
.pBufferInfo = &buffer_info,
};
}
void DescriptorUpdateQueue::AddTexelBuffer(vk::DescriptorSet target, u8 binding,
vk::BufferView buffer_view) {
if (descriptor_write_end >= descriptor_write_max) [[unlikely]] {
Flush();
}
auto& buffer_info = descriptor_infos[descriptor_write_end].buffer_view;
buffer_info = buffer_view;
descriptor_writes[descriptor_write_end++] = vk::WriteDescriptorSet{
.dstSet = target,
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_info,
};
}
} // namespace Vulkan

View File

@ -0,0 +1,53 @@
// Copyright 2024 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include <variant>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
struct DescriptorInfoUnion {
DescriptorInfoUnion() {}
union {
vk::DescriptorImageInfo image_info;
vk::DescriptorBufferInfo buffer_info;
vk::BufferView buffer_view;
};
};
class DescriptorUpdateQueue {
public:
explicit DescriptorUpdateQueue(const Instance& instance, u32 descriptor_write_max = 2048);
~DescriptorUpdateQueue() = default;
void Flush();
void AddStorageImage(vk::DescriptorSet target, u8 binding, vk::ImageView image_view,
vk::ImageLayout image_layout = vk::ImageLayout::eGeneral);
void AddImageSampler(vk::DescriptorSet target, u8 binding, u8 array_index,
vk::ImageView image_view, vk::Sampler sampler,
vk::ImageLayout imageLayout = vk::ImageLayout::eGeneral);
void AddBuffer(vk::DescriptorSet target, u8 binding, vk::Buffer buffer, vk::DeviceSize offset,
vk::DeviceSize size = VK_WHOLE_SIZE,
vk::DescriptorType type = vk::DescriptorType::eUniformBufferDynamic);
void AddTexelBuffer(vk::DescriptorSet target, u8 binding, vk::BufferView buffer_view);
private:
const vk::Device device;
const u32 descriptor_write_max;
std::unique_ptr<DescriptorInfoUnion[]> descriptor_infos;
std::unique_ptr<vk::WriteDescriptorSet[]> descriptor_writes;
u32 descriptor_write_end = 0;
};
} // namespace Vulkan

View File

@ -9,7 +9,7 @@
#include "video_core/renderer_vulkan/pica_to_vk.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_render_manager.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
namespace Vulkan {
@ -64,7 +64,7 @@ Shader::~Shader() {
}
}
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, RenderpassCache& renderpass_cache_,
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, RenderManager& renderpass_cache_,
const PipelineInfo& info_, vk::PipelineCache pipeline_cache_,
vk::PipelineLayout layout_, std::array<Shader*, 3> stages_,
Common::ThreadWorker* worker_)

View File

@ -40,7 +40,7 @@ private:
namespace Vulkan {
class Instance;
class RenderpassCache;
class RenderManager;
constexpr u32 MAX_SHADER_STAGES = 3;
constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;
@ -126,7 +126,7 @@ struct AttachmentInfo {
};
/**
* Information about a graphics/compute pipeline
* Information about a graphics pipeline
*/
struct PipelineInfo {
BlendingState blending;
@ -165,7 +165,7 @@ struct Shader : public Common::AsyncHandle {
class GraphicsPipeline : public Common::AsyncHandle {
public:
explicit GraphicsPipeline(const Instance& instance, RenderpassCache& renderpass_cache,
explicit GraphicsPipeline(const Instance& instance, RenderManager& renderpass_cache,
const PipelineInfo& info, vk::PipelineCache pipeline_cache,
vk::PipelineLayout layout, std::array<Shader*, 3> stages,
Common::ThreadWorker* worker);
@ -181,7 +181,7 @@ public:
private:
const Instance& instance;
RenderpassCache& renderpass_cache;
RenderManager& renderpass_cache;
Common::ThreadWorker* worker;
vk::UniquePipeline pipeline;

View File

@ -5,7 +5,6 @@
#include <mutex>
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
@ -99,8 +98,7 @@ void MasterSemaphoreTimeline::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore
try {
instance.GetGraphicsQueue().submit(submit_info);
} catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
UNREACHABLE();
UNREACHABLE_MSG("Device lost during submit: {}", err.what());
}
}
@ -109,23 +107,21 @@ constexpr u64 FENCE_RESERVE = 8;
MasterSemaphoreFence::MasterSemaphoreFence(const Instance& instance_) : instance{instance_} {
const vk::Device device{instance.GetDevice()};
for (u64 i = 0; i < FENCE_RESERVE; i++) {
free_queue.push(device.createFenceUnique({}));
free_queue.push_back(device.createFence({}));
}
wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); });
}
MasterSemaphoreFence::~MasterSemaphoreFence() = default;
MasterSemaphoreFence::~MasterSemaphoreFence() {
std::ranges::for_each(free_queue,
[this](auto fence) { instance.GetDevice().destroyFence(fence); });
}
void MasterSemaphoreFence::Refresh() {}
void MasterSemaphoreFence::Wait(u64 tick) {
while (true) {
u64 current_value = gpu_tick.load(std::memory_order_relaxed);
if (current_value >= tick) {
return;
}
gpu_tick.wait(current_value);
}
std::unique_lock lk{free_mutex};
free_cv.wait(lk, [&] { return gpu_tick.load(std::memory_order_relaxed) >= tick; });
}
void MasterSemaphoreFence::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait,
@ -149,59 +145,56 @@ void MasterSemaphoreFence::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wa
.pSignalSemaphores = &signal,
};
vk::UniqueFence fence{GetFreeFence()};
const vk::Fence fence = GetFreeFence();
try {
instance.GetGraphicsQueue().submit(submit_info, *fence);
instance.GetGraphicsQueue().submit(submit_info, fence);
} catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
UNREACHABLE();
UNREACHABLE_MSG("Device lost during submit: {}", err.what());
}
std::scoped_lock lock{wait_mutex};
wait_queue.push({
.handle = std::move(fence),
.signal_value = signal_value,
});
wait_queue.emplace(fence, signal_value);
wait_cv.notify_one();
}
void MasterSemaphoreFence::WaitThread(std::stop_token token) {
const vk::Device device{instance.GetDevice()};
while (!token.stop_requested()) {
Fence fence;
vk::Fence fence;
u64 signal_value;
{
std::unique_lock lock{wait_mutex};
Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); });
if (token.stop_requested()) {
return;
}
fence = std::move(wait_queue.front());
std::tie(fence, signal_value) = wait_queue.front();
wait_queue.pop();
}
const vk::Result result = device.waitForFences(*fence.handle, true, WAIT_TIMEOUT);
const vk::Result result = device.waitForFences(fence, true, WAIT_TIMEOUT);
if (result != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Fence wait failed with error {}", vk::to_string(result));
UNREACHABLE();
UNREACHABLE_MSG("Fence wait failed with error {}", vk::to_string(result));
}
device.resetFences(*fence.handle);
gpu_tick.store(fence.signal_value);
gpu_tick.notify_all();
device.resetFences(fence);
gpu_tick.store(signal_value);
std::scoped_lock lock{free_mutex};
free_queue.push(std::move(fence.handle));
free_queue.push_back(fence);
free_cv.notify_all();
}
}
vk::UniqueFence MasterSemaphoreFence::GetFreeFence() {
vk::Fence MasterSemaphoreFence::GetFreeFence() {
std::scoped_lock lock{free_mutex};
if (free_queue.empty()) {
return instance.GetDevice().createFenceUnique({});
return instance.GetDevice().createFence({});
}
vk::UniqueFence fence{std::move(free_queue.front())};
free_queue.pop();
const vk::Fence fence = free_queue.front();
free_queue.pop_front();
return fence;
}

View File

@ -72,6 +72,8 @@ private:
};
class MasterSemaphoreFence : public MasterSemaphore {
using Waitable = std::pair<vk::Fence, u64>;
public:
explicit MasterSemaphoreFence(const Instance& instance);
~MasterSemaphoreFence() override;
@ -86,20 +88,15 @@ public:
private:
void WaitThread(std::stop_token token);
vk::UniqueFence GetFreeFence();
vk::Fence GetFreeFence();
private:
const Instance& instance;
struct Fence {
vk::UniqueFence handle;
u64 signal_value;
};
std::queue<vk::UniqueFence> free_queue;
std::queue<Fence> wait_queue;
std::deque<vk::Fence> free_queue;
std::queue<Waitable> wait_queue;
std::mutex free_mutex;
std::mutex wait_mutex;
std::condition_variable free_cv;
std::condition_variable_any wait_cv;
std::jthread wait_thread;
};

View File

@ -11,9 +11,10 @@
#include "common/scope_exit.h"
#include "common/settings.h"
#include "video_core/renderer_vulkan/pica_to_vk.h"
#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_render_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/shader/generator/glsl_fs_shader_gen.h"
@ -62,34 +63,34 @@ constexpr std::array<vk::DescriptorSetLayoutBinding, 6> BUFFER_BINDINGS = {{
{5, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment},
}};
template <u32 NumTex0>
constexpr std::array<vk::DescriptorSetLayoutBinding, 3> TEXTURE_BINDINGS = {{
{0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
{1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
{2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment},
{0, vk::DescriptorType::eCombinedImageSampler, NumTex0,
vk::ShaderStageFlagBits::eFragment}, // tex0
{1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, // tex1
{2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, // tex2
}};
// TODO: Use descriptor array for shadow cube
constexpr std::array<vk::DescriptorSetLayoutBinding, 7> SHADOW_BINDINGS = {{
{0, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{1, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{3, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{4, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{5, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
{6, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment},
constexpr std::array<vk::DescriptorSetLayoutBinding, 2> UTILITY_BINDINGS = {{
{0, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, // shadow_buffer
{1, vk::DescriptorType::eCombinedImageSampler, 1,
vk::ShaderStageFlagBits::eFragment}, // tex_normal
}};
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
RenderpassCache& renderpass_cache_, DescriptorPool& pool_)
: instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_}, pool{pool_},
num_worker_threads{std::max(std::thread::hardware_concurrency(), 2U)},
RenderManager& renderpass_cache_, DescriptorUpdateQueue& update_queue_)
: instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_},
update_queue{update_queue_},
num_worker_threads{std::max(std::thread::hardware_concurrency(), 2U) >> 1},
workers{num_worker_threads, "Pipeline workers"},
descriptor_set_providers{DescriptorSetProvider{instance, pool, BUFFER_BINDINGS},
DescriptorSetProvider{instance, pool, TEXTURE_BINDINGS},
DescriptorSetProvider{instance, pool, SHADOW_BINDINGS}},
descriptor_heaps{
DescriptorHeap{instance, scheduler.GetMasterSemaphore(), BUFFER_BINDINGS, 32},
DescriptorHeap{instance, scheduler.GetMasterSemaphore(), TEXTURE_BINDINGS<1>},
DescriptorHeap{instance, scheduler.GetMasterSemaphore(), UTILITY_BINDINGS, 32}},
trivial_vertex_shader{
instance, vk::ShaderStageFlagBits::eVertex,
GLSL::GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported(), true)} {
scheduler.RegisterOnDispatch([this] { update_queue.Flush(); });
profile = Pica::Shader::Profile{
.has_separable_shaders = true,
.has_clip_planes = instance.IsShaderClipDistanceSupported(),
@ -106,13 +107,13 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
}
void PipelineCache::BuildLayout() {
std::array<vk::DescriptorSetLayout, NUM_RASTERIZER_SETS> descriptor_set_layouts;
std::transform(descriptor_set_providers.begin(), descriptor_set_providers.end(),
descriptor_set_layouts.begin(),
[](const auto& provider) { return provider.Layout(); });
std::array<vk::DescriptorSetLayout, NumRasterizerSets> descriptor_set_layouts;
descriptor_set_layouts[0] = descriptor_heaps[0].Layout();
descriptor_set_layouts[1] = descriptor_heaps[1].Layout();
descriptor_set_layouts[2] = descriptor_heaps[2].Layout();
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = NUM_RASTERIZER_SETS,
.setLayoutCount = NumRasterizerSets,
.pSetLayouts = descriptor_set_layouts.data(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr,
@ -214,55 +215,11 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) {
return false;
}
u32 new_descriptors_start = 0;
std::span<vk::DescriptorSet> new_descriptors_span{};
std::span<u32> new_offsets_span{};
// Ensure all the descriptor sets are set at least once at the beginning.
if (scheduler.IsStateDirty(StateFlags::DescriptorSets)) {
set_dirty.set();
}
if (set_dirty.any()) {
for (u32 i = 0; i < NUM_RASTERIZER_SETS; i++) {
if (!set_dirty.test(i)) {
continue;
}
bound_descriptor_sets[i] = descriptor_set_providers[i].Acquire(update_data[i]);
}
new_descriptors_span = bound_descriptor_sets;
// Only send new offsets if the buffer descriptor-set changed.
if (set_dirty.test(0)) {
new_offsets_span = offsets;
}
// Try to compact the number of updated descriptor-set slots to the ones that have actually
// changed
if (!set_dirty.all()) {
const u64 dirty_mask = set_dirty.to_ulong();
new_descriptors_start = static_cast<u32>(std::countr_zero(dirty_mask));
const u32 new_descriptors_end = 64u - static_cast<u32>(std::countl_zero(dirty_mask));
const u32 new_descriptors_size = new_descriptors_end - new_descriptors_start;
new_descriptors_span =
new_descriptors_span.subspan(new_descriptors_start, new_descriptors_size);
}
set_dirty.reset();
}
boost::container::static_vector<vk::DescriptorSet, NUM_RASTERIZER_SETS> new_descriptors(
new_descriptors_span.begin(), new_descriptors_span.end());
boost::container::static_vector<u32, NUM_DYNAMIC_OFFSETS> new_offsets(new_offsets_span.begin(),
new_offsets_span.end());
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
const bool pipeline_dirty = (current_pipeline != pipeline) || is_dirty;
scheduler.Record([this, is_dirty, pipeline_dirty, pipeline,
current_dynamic = current_info.dynamic, dynamic = info.dynamic,
new_descriptors_start, descriptor_sets = std::move(new_descriptors),
offsets = std::move(new_offsets),
descriptor_sets = bound_descriptor_sets, offsets = offsets,
current_rasterization = current_info.rasterization,
current_depth_stencil = current_info.depth_stencil,
rasterization = info.rasterization,
@ -364,10 +321,8 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) {
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
}
if (descriptor_sets.size()) {
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout,
new_descriptors_start, descriptor_sets, offsets);
}
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
descriptor_sets, offsets);
});
current_info = info;
@ -385,7 +340,6 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::RegsInternal& regs,
// We also don't need the geometry shader if we have the barycentric extension.
const bool use_geometry_shader = instance.UseGeometryShaders() && !regs.lighting.disable &&
!instance.IsFragmentShaderBarycentricSupported();
PicaVSConfig config{regs, setup, instance.IsShaderClipDistanceSupported(), use_geometry_shader};
for (u32 i = 0; i < layout.attribute_count; i++) {
@ -402,7 +356,7 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::RegsInternal& regs,
}
}
auto [it, new_config] = programmable_vertex_map.try_emplace(config);
const auto [it, new_config] = programmable_vertex_map.try_emplace(config);
if (new_config) {
auto program = GLSL::GenerateVertexShader(setup, config, true);
if (program.empty()) {
@ -497,59 +451,6 @@ void PipelineCache::UseFragmentShader(const Pica::RegsInternal& regs,
shader_hashes[ProgramType::FS] = fs_config.Hash();
}
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler) {
auto& info = update_data[1][binding].image_info;
if (info.imageView == image_view && info.sampler == sampler) {
return;
}
set_dirty[1] = true;
info = vk::DescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = vk::ImageLayout::eGeneral,
};
}
void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) {
auto& info = update_data[2][binding].image_info;
if (info.imageView == image_view) {
return;
}
set_dirty[2] = true;
info = vk::DescriptorImageInfo{
.imageView = image_view,
.imageLayout = vk::ImageLayout::eGeneral,
};
}
void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) {
auto& info = update_data[0][binding].buffer_info;
if (info.buffer == buffer && info.offset == offset && info.range == size) {
return;
}
set_dirty[0] = true;
info = vk::DescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
};
}
void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) {
auto& view = update_data[0][binding].buffer_view;
if (view != buffer_view) {
set_dirty[0] = true;
view = buffer_view;
}
}
void PipelineCache::SetBufferOffset(u32 binding, std::size_t offset) {
if (offsets[binding] != static_cast<u32>(offset)) {
offsets[binding] = static_cast<u32>(offset);
set_dirty[0] = true;
}
}
bool PipelineCache::IsCacheValid(std::span<const u8> data) const {
if (data.size() < sizeof(vk::PipelineCacheHeaderVersionOne)) {
LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header");

View File

@ -7,8 +7,8 @@
#include <bitset>
#include <tsl/robin_map.h>
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/shader/generator/pica_fs_config.h"
#include "video_core/shader/generator/profile.h"
#include "video_core/shader/generator/shader_gen.h"
@ -22,23 +22,39 @@ namespace Vulkan {
class Instance;
class Scheduler;
class RenderpassCache;
class DescriptorPool;
class RenderManager;
class DescriptorUpdateQueue;
constexpr u32 NUM_RASTERIZER_SETS = 3;
constexpr u32 NUM_DYNAMIC_OFFSETS = 3;
enum class DescriptorHeapType : u32 {
Buffer,
Texture,
Utility,
};
/**
* Stores a collection of rasterizer pipelines used during rendering.
*/
class PipelineCache {
static constexpr u32 NumRasterizerSets = 3;
static constexpr u32 NumDescriptorHeaps = 3;
static constexpr u32 NumDynamicOffsets = 3;
public:
explicit PipelineCache(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorPool& pool);
RenderManager& renderpass_cache, DescriptorUpdateQueue& update_queue);
~PipelineCache();
[[nodiscard]] DescriptorSetProvider& TextureProvider() noexcept {
return descriptor_set_providers[1];
/// Acquires and binds a free descriptor set from the appropriate heap.
vk::DescriptorSet Acquire(DescriptorHeapType type) {
const u32 index = static_cast<u32>(type);
const auto descriptor_set = descriptor_heaps[index].Commit();
bound_descriptor_sets[index] = descriptor_set;
return descriptor_set;
}
/// Sets the dynamic offset for the uniform buffer at binding
void UpdateRange(u8 binding, u32 offset) {
offsets[binding] = offset;
}
/// Loads the pipeline cache stored to disk
@ -66,21 +82,6 @@ public:
/// Binds a fragment shader generated from PICA state
void UseFragmentShader(const Pica::RegsInternal& regs, const Pica::Shader::UserConfig& user);
/// Binds a texture to the specified binding
void BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler);
/// Binds a storage image to the specified binding
void BindStorageImage(u32 binding, vk::ImageView image_view);
/// Binds a buffer to the specified binding
void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size);
/// Binds a buffer to the specified binding
void BindTexelBuffer(u32 binding, vk::BufferView buffer_view);
/// Sets the dynamic offset for the uniform buffer at binding
void SetBufferOffset(u32 binding, std::size_t offset);
private:
/// Builds the rasterizer pipeline layout
void BuildLayout();
@ -97,8 +98,8 @@ private:
private:
const Instance& instance;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
DescriptorPool& pool;
RenderManager& renderpass_cache;
DescriptorUpdateQueue& update_queue;
Pica::Shader::Profile profile{};
vk::UniquePipelineCache pipeline_cache;
@ -110,11 +111,9 @@ private:
tsl::robin_map<u64, std::unique_ptr<GraphicsPipeline>, Common::IdentityHash<u64>>
graphics_pipelines;
std::array<DescriptorSetProvider, NUM_RASTERIZER_SETS> descriptor_set_providers;
std::array<DescriptorSetData, NUM_RASTERIZER_SETS> update_data{};
std::array<vk::DescriptorSet, NUM_RASTERIZER_SETS> bound_descriptor_sets{};
std::array<u32, NUM_DYNAMIC_OFFSETS> offsets{};
std::bitset<NUM_RASTERIZER_SETS> set_dirty{};
std::array<DescriptorHeap, NumDescriptorHeaps> descriptor_heaps;
std::array<vk::DescriptorSet, NumRasterizerSets> bound_descriptor_sets{};
std::array<u32, NumDynamicOffsets> offsets{};
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
std::array<Shader*, MAX_SHADER_STAGES> current_shaders;

View File

@ -32,8 +32,9 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugUtilsCallback(
VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type,
const VkDebugUtilsMessengerCallbackDataEXT* callback_data, void* user_data) {
switch (callback_data->messageIdNumber) {
switch (static_cast<u32>(callback_data->messageIdNumber)) {
case 0x609a13b: // Vertex attribute at location not consumed by shader
case 0xc81ad50e:
return VK_FALSE;
default:
break;

View File

@ -138,11 +138,11 @@ PresentWindow::PresentWindow(Frontend::EmuWindow& emu_window_, const Instance& i
if (instance.HasDebuggingToolAttached()) {
for (u32 i = 0; i < num_images; ++i) {
Vulkan::SetObjectName(device, swap_chain[i].cmdbuf, "Swapchain Command Buffer {}", i);
Vulkan::SetObjectName(device, swap_chain[i].render_ready,
"Swapchain Semaphore: render_ready {}", i);
Vulkan::SetObjectName(device, swap_chain[i].present_done,
"Swapchain Fence: present_done {}", i);
SetObjectName(device, swap_chain[i].cmdbuf, "Swapchain Command Buffer {}", i);
SetObjectName(device, swap_chain[i].render_ready,
"Swapchain Semaphore: render_ready {}", i);
SetObjectName(device, swap_chain[i].present_done, "Swapchain Fence: present_done {}",
i);
}
}

View File

@ -20,7 +20,7 @@ namespace Vulkan {
class Instance;
class Swapchain;
class Scheduler;
class RenderpassCache;
class RenderManager;
struct Frame {
u32 width;

View File

@ -58,13 +58,15 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore&
VideoCore::CustomTexManager& custom_tex_manager,
VideoCore::RendererBase& renderer,
Frontend::EmuWindow& emu_window, const Instance& instance,
Scheduler& scheduler, DescriptorPool& pool,
RenderpassCache& renderpass_cache, u32 image_count)
Scheduler& scheduler, RenderManager& renderpass_cache,
DescriptorUpdateQueue& update_queue_, u32 image_count)
: RasterizerAccelerated{memory, pica}, instance{instance}, scheduler{scheduler},
renderpass_cache{renderpass_cache}, pipeline_cache{instance, scheduler, renderpass_cache,
pool},
runtime{instance, scheduler, renderpass_cache, pool, pipeline_cache.TextureProvider(),
image_count},
renderpass_cache{renderpass_cache}, update_queue{update_queue_},
pipeline_cache{instance, scheduler, renderpass_cache, update_queue}, runtime{instance,
scheduler,
renderpass_cache,
update_queue,
image_count},
res_cache{memory, custom_tex_manager, runtime, regs, renderer},
stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE},
uniform_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformBuffer,
@ -77,11 +79,12 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore&
vertex_buffers.fill(stream_buffer.Handle());
// Query uniform buffer alignment.
uniform_buffer_alignment = instance.UniformMinAlignment();
uniform_size_aligned_vs_pica =
Common::AlignUp(sizeof(VSPicaUniformData), uniform_buffer_alignment);
uniform_size_aligned_vs = Common::AlignUp(sizeof(VSUniformData), uniform_buffer_alignment);
uniform_size_aligned_fs = Common::AlignUp(sizeof(FSUniformData), uniform_buffer_alignment);
Common::AlignUp<u32>(sizeof(VSPicaUniformData), uniform_buffer_alignment);
uniform_size_aligned_vs = Common::AlignUp<u32>(sizeof(VSUniformData), uniform_buffer_alignment);
uniform_size_aligned_fs = Common::AlignUp<u32>(sizeof(FSUniformData), uniform_buffer_alignment);
// Define vertex layout for software shaders
MakeSoftwareVertexLayout();
@ -107,24 +110,32 @@ RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore&
.range = VK_WHOLE_SIZE,
});
// Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize
// all descriptor sets even the ones we don't use.
pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData));
pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(VSUniformData));
pipeline_cache.BindBuffer(2, uniform_buffer.Handle(), 0, sizeof(FSUniformData));
pipeline_cache.BindTexelBuffer(3, *texture_lf_view);
pipeline_cache.BindTexelBuffer(4, *texture_rg_view);
pipeline_cache.BindTexelBuffer(5, *texture_rgba_view);
scheduler.RegisterOnSubmit([&renderpass_cache] { renderpass_cache.EndRendering(); });
// Prepare the static buffer descriptor set.
const auto buffer_set = pipeline_cache.Acquire(DescriptorHeapType::Buffer);
update_queue.AddBuffer(buffer_set, 0, uniform_buffer.Handle(), 0, sizeof(VSPicaUniformData));
update_queue.AddBuffer(buffer_set, 1, uniform_buffer.Handle(), 0, sizeof(VSUniformData));
update_queue.AddBuffer(buffer_set, 2, uniform_buffer.Handle(), 0, sizeof(FSUniformData));
update_queue.AddTexelBuffer(buffer_set, 3, *texture_lf_view);
update_queue.AddTexelBuffer(buffer_set, 4, *texture_rg_view);
update_queue.AddTexelBuffer(buffer_set, 5, *texture_rgba_view);
const auto texture_set = pipeline_cache.Acquire(DescriptorHeapType::Texture);
Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID);
// Prepare texture and utility descriptor sets.
for (u32 i = 0; i < 3; i++) {
pipeline_cache.BindTexture(i, null_surface.ImageView(), null_sampler.Handle());
update_queue.AddImageSampler(texture_set, i, 0, null_surface.ImageView(),
null_sampler.Handle());
}
for (u32 i = 0; i < 7; i++) {
pipeline_cache.BindStorageImage(i, null_surface.StorageView());
}
const auto utility_set = pipeline_cache.Acquire(DescriptorHeapType::Utility);
update_queue.AddStorageImage(utility_set, 0, null_surface.StorageView());
update_queue.AddImageSampler(utility_set, 1, 0, null_surface.ImageView(),
null_sampler.Handle());
update_queue.Flush();
SyncEntireState();
}
@ -477,13 +488,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
pipeline_info.attachments.color = framebuffer->Format(SurfaceType::Color);
pipeline_info.attachments.depth = framebuffer->Format(SurfaceType::Depth);
if (shadow_rendering) {
pipeline_cache.BindStorageImage(6, framebuffer->ImageView(SurfaceType::Color));
} else {
Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
pipeline_cache.BindStorageImage(6, null_surface.StorageView());
}
// Update scissor uniforms
const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor();
if (fs_uniform_block_data.data.scissor_x1 != scissor_x1 ||
@ -500,6 +504,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
// Sync and bind the texture surfaces
SyncTextureUnits(framebuffer);
SyncUtilityTextures(framebuffer);
// Sync and bind the shader
if (shader_dirty) {
@ -533,8 +538,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
} else {
pipeline_cache.BindPipeline(pipeline_info, true);
const u64 vertex_size = vertex_batch.size() * sizeof(HardwareVertex);
const u32 vertex_count = static_cast<u32>(vertex_batch.size());
const u32 vertex_size = vertex_count * sizeof(HardwareVertex);
const auto [buffer, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex));
std::memcpy(buffer, vertex_batch.data(), vertex_size);
@ -554,6 +559,11 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) {
using TextureType = Pica::TexturingRegs::TextureConfig::TextureType;
const auto pica_textures = regs.texturing.GetTextures();
const bool use_cube_heap =
pica_textures[0].enabled && pica_textures[0].config.type == TextureType::ShadowCube;
const auto texture_set = pipeline_cache.Acquire(use_cube_heap ? DescriptorHeapType::Texture
: DescriptorHeapType::Texture);
for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
const auto& texture = pica_textures[texture_index];
@ -561,8 +571,8 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) {
if (!texture.enabled) {
const Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID);
pipeline_cache.BindTexture(texture_index, null_surface.ImageView(),
null_sampler.Handle());
update_queue.AddImageSampler(texture_set, texture_index, 0, null_surface.ImageView(),
null_sampler.Handle());
continue;
}
@ -571,20 +581,21 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) {
switch (texture.config.type.Value()) {
case TextureType::Shadow2D: {
Surface& surface = res_cache.GetTextureSurface(texture);
Sampler& sampler = res_cache.GetSampler(texture.config);
surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap;
pipeline_cache.BindStorageImage(0, surface.StorageView());
update_queue.AddImageSampler(texture_set, texture_index, 0, surface.StorageView(),
sampler.Handle());
continue;
}
case TextureType::ShadowCube: {
BindShadowCube(texture);
BindShadowCube(texture, texture_set);
continue;
}
case TextureType::TextureCube: {
BindTextureCube(texture);
BindTextureCube(texture, texture_set);
continue;
}
default:
UnbindSpecial();
break;
}
}
@ -592,13 +603,26 @@ void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) {
// Bind the texture provided by the rasterizer cache
Surface& surface = res_cache.GetTextureSurface(texture);
Sampler& sampler = res_cache.GetSampler(texture.config);
if (!IsFeedbackLoop(texture_index, framebuffer, surface, sampler)) {
pipeline_cache.BindTexture(texture_index, surface.ImageView(), sampler.Handle());
}
const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color);
const bool is_feedback_loop = color_view == surface.ImageView();
const vk::ImageView texture_view =
is_feedback_loop ? surface.CopyImageView() : surface.ImageView();
update_queue.AddImageSampler(texture_set, texture_index, 0, texture_view, sampler.Handle());
}
}
void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture) {
void RasterizerVulkan::SyncUtilityTextures(const Framebuffer* framebuffer) {
const bool shadow_rendering = regs.framebuffer.IsShadowRendering();
if (!shadow_rendering) {
return;
}
const auto utility_set = pipeline_cache.Acquire(DescriptorHeapType::Utility);
update_queue.AddStorageImage(utility_set, 0, framebuffer->ImageView(SurfaceType::Color));
}
void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture,
vk::DescriptorSet texture_set) {
using CubeFace = Pica::TexturingRegs::CubeFace;
auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format);
constexpr std::array faces = {
@ -606,6 +630,8 @@ void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConf
CubeFace::NegativeY, CubeFace::PositiveZ, CubeFace::NegativeZ,
};
Sampler& sampler = res_cache.GetSampler(texture.config);
for (CubeFace face : faces) {
const u32 binding = static_cast<u32>(face);
info.physical_address = regs.texturing.GetCubePhysicalAddress(face);
@ -613,11 +639,13 @@ void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConf
const VideoCore::SurfaceId surface_id = res_cache.GetTextureSurface(info);
Surface& surface = res_cache.GetSurface(surface_id);
surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap;
pipeline_cache.BindStorageImage(binding, surface.StorageView());
update_queue.AddImageSampler(texture_set, 0, binding, surface.StorageView(),
sampler.Handle());
}
}
void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) {
void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture,
vk::DescriptorSet texture_set) {
using CubeFace = Pica::TexturingRegs::CubeFace;
const VideoCore::TextureCubeConfig config = {
.px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX),
@ -633,27 +661,7 @@ void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureCon
Surface& surface = res_cache.GetTextureCube(config);
Sampler& sampler = res_cache.GetSampler(texture.config);
pipeline_cache.BindTexture(0, surface.ImageView(), sampler.Handle());
}
bool RasterizerVulkan::IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer,
Surface& surface, Sampler& sampler) {
const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color);
const bool is_feedback_loop = color_view == surface.ImageView();
if (!is_feedback_loop) {
return false;
}
// Make a temporary copy of the framebuffer to sample from
pipeline_cache.BindTexture(texture_index, surface.CopyImageView(), sampler.Handle());
return true;
}
void RasterizerVulkan::UnbindSpecial() {
Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID);
for (u32 i = 0; i < 6; i++) {
pipeline_cache.BindStorageImage(i, null_surface.StorageView());
}
update_queue.AddImageSampler(texture_set, 0, 0, surface.ImageView(), sampler.Handle());
}
void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) {
@ -1091,7 +1099,7 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
return;
}
const u64 uniform_size =
const u32 uniform_size =
uniform_size_aligned_vs_pica + uniform_size_aligned_vs + uniform_size_aligned_fs;
auto [uniforms, offset, invalidate] =
uniform_buffer.Map(uniform_size, uniform_buffer_alignment);
@ -1102,18 +1110,18 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
std::memcpy(uniforms + used_bytes, &vs_uniform_block_data.data,
sizeof(vs_uniform_block_data.data));
pipeline_cache.SetBufferOffset(1, offset + used_bytes);
pipeline_cache.UpdateRange(1, offset + used_bytes);
vs_uniform_block_data.dirty = false;
used_bytes += static_cast<u32>(uniform_size_aligned_vs);
used_bytes += uniform_size_aligned_vs;
}
if (sync_fs || invalidate) {
std::memcpy(uniforms + used_bytes, &fs_uniform_block_data.data,
sizeof(fs_uniform_block_data.data));
pipeline_cache.SetBufferOffset(2, offset + used_bytes);
pipeline_cache.UpdateRange(2, offset + used_bytes);
fs_uniform_block_data.dirty = false;
used_bytes += static_cast<u32>(uniform_size_aligned_fs);
used_bytes += uniform_size_aligned_fs;
}
if (sync_vs_pica) {
@ -1121,8 +1129,8 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
vs_uniforms.uniforms.SetFromRegs(regs.vs, pica.vs_setup);
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
pipeline_cache.SetBufferOffset(0, offset + used_bytes);
used_bytes += static_cast<u32>(uniform_size_aligned_vs_pica);
pipeline_cache.UpdateRange(0, offset + used_bytes);
used_bytes += uniform_size_aligned_vs_pica;
}
uniform_buffer.Commit(used_bytes);

View File

@ -5,8 +5,9 @@
#pragma once
#include "video_core/rasterizer_accelerated.h"
#include "video_core/renderer_vulkan/vk_descriptor_update_queue.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_render_manager.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
@ -31,16 +32,16 @@ struct ScreenInfo;
class Instance;
class Scheduler;
class RenderpassCache;
class DescriptorPool;
class RenderManager;
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerVulkan(Memory::MemorySystem& memory, Pica::PicaCore& pica,
VideoCore::CustomTexManager& custom_tex_manager,
VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window,
const Instance& instance, Scheduler& scheduler, DescriptorPool& pool,
RenderpassCache& renderpass_cache, u32 image_count);
const Instance& instance, Scheduler& scheduler,
RenderManager& renderpass_cache, DescriptorUpdateQueue& update_queue,
u32 image_count);
~RasterizerVulkan() override;
void TickFrame();
@ -102,18 +103,16 @@ private:
/// Syncs all enabled PICA texture units
void SyncTextureUnits(const Framebuffer* framebuffer);
/// Syncs all utility textures in the fragment shader.
void SyncUtilityTextures(const Framebuffer* framebuffer);
/// Binds the PICA shadow cube required for shadow mapping
void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture);
void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture,
vk::DescriptorSet texture_set);
/// Binds a texture cube to texture unit 0
void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture);
/// Makes a temporary copy of the framebuffer if a feedback loop is detected
bool IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, Surface& surface,
Sampler& sampler);
/// Unbinds all special texture unit 0 texture configurations
void UnbindSpecial();
void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture,
vk::DescriptorSet texture_set);
/// Upload the uniform blocks to the uniform buffer object
void UploadUniforms(bool accelerate_draw);
@ -145,7 +144,8 @@ private:
private:
const Instance& instance;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
RenderManager& renderpass_cache;
DescriptorUpdateQueue& update_queue;
PipelineCache pipeline_cache;
TextureRuntime runtime;
RasterizerCache res_cache;
@ -164,10 +164,10 @@ private:
vk::UniqueBufferView texture_lf_view;
vk::UniqueBufferView texture_rg_view;
vk::UniqueBufferView texture_rgba_view;
u64 uniform_buffer_alignment;
u64 uniform_size_aligned_vs_pica;
u64 uniform_size_aligned_vs;
u64 uniform_size_aligned_fs;
vk::DeviceSize uniform_buffer_alignment;
u32 uniform_size_aligned_vs_pica;
u32 uniform_size_aligned_vs;
u32 uniform_size_aligned_fs;
bool async_shaders{false};
};

View File

@ -1,4 +1,4 @@
// Copyright 2023 Citra Emulator Project
// Copyright 2024 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -6,24 +6,24 @@
#include "common/assert.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_render_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Vulkan {
constexpr u32 MIN_DRAWS_TO_FLUSH = 20;
constexpr u32 MinDrawsToFlush = 20;
using VideoCore::PixelFormat;
using VideoCore::SurfaceType;
RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler)
RenderManager::RenderManager(const Instance& instance, Scheduler& scheduler)
: instance{instance}, scheduler{scheduler} {}
RenderpassCache::~RenderpassCache() = default;
RenderManager::~RenderManager() = default;
void RenderpassCache::BeginRendering(const Framebuffer* framebuffer,
Common::Rectangle<u32> draw_rect) {
void RenderManager::BeginRendering(const Framebuffer* framebuffer,
Common::Rectangle<u32> draw_rect) {
const vk::Rect2D render_area = {
.offset{
.x = static_cast<s32>(draw_rect.left),
@ -46,7 +46,7 @@ void RenderpassCache::BeginRendering(const Framebuffer* framebuffer,
BeginRendering(new_pass);
}
void RenderpassCache::BeginRendering(const RenderPass& new_pass) {
void RenderManager::BeginRendering(const RenderPass& new_pass) {
if (pass == new_pass) [[likely]] {
num_draws++;
return;
@ -67,12 +67,11 @@ void RenderpassCache::BeginRendering(const RenderPass& new_pass) {
pass = new_pass;
}
void RenderpassCache::EndRendering() {
void RenderManager::EndRendering() {
if (!pass.render_pass) {
return;
}
pass.render_pass = vk::RenderPass{};
scheduler.Record([images = images, aspects = aspects](vk::CommandBuffer cmdbuf) {
u32 num_barriers = 0;
vk::PipelineStageFlags pipeline_flags{};
@ -108,6 +107,9 @@ void RenderpassCache::EndRendering() {
};
}
cmdbuf.endRenderPass();
if (num_barriers == 0) {
return;
}
cmdbuf.pipelineBarrier(pipeline_flags,
vk::PipelineStageFlagBits::eFragmentShader |
vk::PipelineStageFlagBits::eTransfer,
@ -115,25 +117,29 @@ void RenderpassCache::EndRendering() {
num_barriers, barriers.data());
});
// Reset state.
pass.render_pass = VK_NULL_HANDLE;
images = {};
aspects = {};
// The Mali guide recommends flushing at the end of each major renderpass
// Testing has shown this has a significant effect on rendering performance
if (num_draws > MIN_DRAWS_TO_FLUSH && instance.ShouldFlush()) {
if (num_draws > MinDrawsToFlush && instance.ShouldFlush()) {
scheduler.Flush();
num_draws = 0;
}
}
vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color,
VideoCore::PixelFormat depth, bool is_clear) {
vk::RenderPass RenderManager::GetRenderpass(VideoCore::PixelFormat color,
VideoCore::PixelFormat depth, bool is_clear) {
std::scoped_lock lock{cache_mutex};
const u32 color_index =
color == VideoCore::PixelFormat::Invalid ? MAX_COLOR_FORMATS : static_cast<u32>(color);
const u32 depth_index = depth == VideoCore::PixelFormat::Invalid
? MAX_DEPTH_FORMATS
: (static_cast<u32>(depth) - 14);
color == VideoCore::PixelFormat::Invalid ? NumColorFormats : static_cast<u32>(color);
const u32 depth_index =
depth == VideoCore::PixelFormat::Invalid ? NumDepthFormats : (static_cast<u32>(depth) - 14);
ASSERT_MSG(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS,
ASSERT_MSG(color_index <= NumColorFormats && depth_index <= NumDepthFormats,
"Invalid color index {} and/or depth_index {}", color_index, depth_index);
vk::UniqueRenderPass& renderpass = cached_renderpasses[color_index][depth_index][is_clear];
@ -148,8 +154,8 @@ vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color,
return *renderpass;
}
vk::UniqueRenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth,
vk::AttachmentLoadOp load_op) const {
vk::UniqueRenderPass RenderManager::CreateRenderPass(vk::Format color, vk::Format depth,
vk::AttachmentLoadOp load_op) const {
u32 attachment_count = 0;
std::array<vk::AttachmentDescription, 2> attachments;

View File

@ -1,4 +1,4 @@
// Copyright 2023 Citra Emulator Project
// Copyright 2024 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -24,7 +24,7 @@ struct RenderPass {
vk::RenderPass render_pass;
vk::Rect2D render_area;
vk::ClearValue clear;
bool do_clear;
u32 do_clear;
bool operator==(const RenderPass& other) const noexcept {
return std::tie(framebuffer, render_pass, render_area, do_clear) ==
@ -34,13 +34,13 @@ struct RenderPass {
}
};
class RenderpassCache {
static constexpr std::size_t MAX_COLOR_FORMATS = 13;
static constexpr std::size_t MAX_DEPTH_FORMATS = 4;
class RenderManager {
static constexpr u32 NumColorFormats = 13;
static constexpr u32 NumDepthFormats = 4;
public:
explicit RenderpassCache(const Instance& instance, Scheduler& scheduler);
~RenderpassCache();
explicit RenderManager(const Instance& instance, Scheduler& scheduler);
~RenderManager();
/// Begins a new renderpass with the provided framebuffer as render target.
void BeginRendering(const Framebuffer* framebuffer, Common::Rectangle<u32> draw_rect);
@ -63,7 +63,7 @@ private:
private:
const Instance& instance;
Scheduler& scheduler;
vk::UniqueRenderPass cached_renderpasses[MAX_COLOR_FORMATS + 1][MAX_DEPTH_FORMATS + 1][2];
vk::UniqueRenderPass cached_renderpasses[NumColorFormats + 1][NumDepthFormats + 1][2];
std::mutex cache_mutex;
std::array<vk::Image, 2> images;
std::array<vk::ImageAspectFlags, 2> aspects;

View File

@ -4,6 +4,7 @@
#include <cstddef>
#include <optional>
#include <unordered_map>
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
@ -14,9 +15,7 @@ ResourcePool::ResourcePool(MasterSemaphore* master_semaphore_, std::size_t grow_
: master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
std::size_t ResourcePool::CommitResource() {
// Refresh semaphore to query updated results
master_semaphore->Refresh();
const u64 gpu_tick = master_semaphore->KnownGpuTick();
u64 gpu_tick = master_semaphore->KnownGpuTick();
const auto search = [this, gpu_tick](std::size_t begin,
std::size_t end) -> std::optional<std::size_t> {
for (std::size_t iterator = begin; iterator < end; ++iterator) {
@ -29,7 +28,13 @@ std::size_t ResourcePool::CommitResource() {
};
// Try to find a free resource from the hinted position to the end.
std::optional<std::size_t> found = search(hint_iterator, ticks.size());
auto found = search(hint_iterator, ticks.size());
if (!found) {
// Refresh semaphore to query updated results
master_semaphore->Refresh();
gpu_tick = master_semaphore->KnownGpuTick();
found = search(hint_iterator, ticks.size());
}
if (!found) {
// Search from beginning to the hinted position.
found = search(0, hint_iterator);
@ -48,75 +53,137 @@ std::size_t ResourcePool::CommitResource() {
}
std::size_t ResourcePool::ManageOverflow() {
const std::size_t old_capacity = ticks.size();
Grow();
// The last entry is guaranted to be free, since it's the first element of the freshly
// allocated resources.
return old_capacity;
}
void ResourcePool::Grow() {
const std::size_t old_capacity = ticks.size();
ticks.resize(old_capacity + grow_step);
Allocate(old_capacity, old_capacity + grow_step);
return old_capacity;
}
constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 4;
struct CommandPool::Pool {
vk::CommandPool handle;
std::array<vk::CommandBuffer, COMMAND_BUFFER_POOL_SIZE> cmdbufs;
};
CommandPool::CommandPool(const Instance& instance, MasterSemaphore* master_semaphore)
: ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {}
CommandPool::~CommandPool() {
vk::Device device = instance.GetDevice();
for (Pool& pool : pools) {
device.destroyCommandPool(pool.handle);
}
}
void CommandPool::Allocate(std::size_t begin, std::size_t end) {
// Command buffers are going to be commited, recorded, executed every single usage cycle.
// They are also going to be reseted when commited.
Pool& pool = pools.emplace_back();
: ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {
const vk::CommandPoolCreateInfo pool_create_info = {
.flags = vk::CommandPoolCreateFlagBits::eTransient |
vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(),
};
const vk::Device device = instance.GetDevice();
cmd_pool = device.createCommandPoolUnique(pool_create_info);
if (instance.HasDebuggingToolAttached()) {
SetObjectName(device, *cmd_pool, "CommandPool");
}
}
vk::Device device = instance.GetDevice();
pool.handle = device.createCommandPool(pool_create_info);
CommandPool::~CommandPool() = default;
void CommandPool::Allocate(std::size_t begin, std::size_t end) {
cmd_buffers.resize(end);
const vk::CommandBufferAllocateInfo buffer_alloc_info = {
.commandPool = pool.handle,
.commandPool = *cmd_pool,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = COMMAND_BUFFER_POOL_SIZE,
};
auto buffers = device.allocateCommandBuffers(buffer_alloc_info);
std::copy(buffers.begin(), buffers.end(), pool.cmdbufs.begin());
const vk::Device device = instance.GetDevice();
const auto result =
device.allocateCommandBuffers(&buffer_alloc_info, cmd_buffers.data() + begin);
ASSERT(result == vk::Result::eSuccess);
if (instance.HasDebuggingToolAttached()) {
Vulkan::SetObjectName(device, pool.handle, "CommandPool: Pool({})",
COMMAND_BUFFER_POOL_SIZE);
for (u32 i = 0; i < pool.cmdbufs.size(); ++i) {
Vulkan::SetObjectName(device, pool.cmdbufs[i], "CommandPool: Command Buffer {}", i);
for (std::size_t i = begin; i < end; ++i) {
SetObjectName(device, cmd_buffers[i], "CommandPool: Command Buffer {}", i);
}
}
}
vk::CommandBuffer CommandPool::Commit() {
const std::size_t index = CommitResource();
const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
return pools[pool_index].cmdbufs[sub_index];
return cmd_buffers[index];
}
constexpr u32 DESCRIPTOR_SET_BATCH = 32;
DescriptorHeap::DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore,
std::span<const vk::DescriptorSetLayoutBinding> bindings,
u32 descriptor_heap_count_)
: ResourcePool{master_semaphore, DESCRIPTOR_SET_BATCH}, device{instance.GetDevice()},
descriptor_heap_count{descriptor_heap_count_} {
// Create descriptor set layout.
const vk::DescriptorSetLayoutCreateInfo layout_ci = {
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
};
descriptor_set_layout = device.createDescriptorSetLayoutUnique(layout_ci);
if (instance.HasDebuggingToolAttached()) {
SetObjectName(device, *descriptor_set_layout, "DescriptorSetLayout");
}
// Build descriptor set pool counts.
std::unordered_map<vk::DescriptorType, u16> descriptor_type_counts;
for (const auto& binding : bindings) {
descriptor_type_counts[binding.descriptorType] += binding.descriptorCount;
}
for (const auto& [type, count] : descriptor_type_counts) {
auto& pool_size = pool_sizes.emplace_back();
pool_size.descriptorCount = count * descriptor_heap_count;
pool_size.type = type;
}
// Create descriptor pool
AppendDescriptorPool();
}
DescriptorHeap::~DescriptorHeap() = default;
void DescriptorHeap::Allocate(std::size_t begin, std::size_t end) {
ASSERT(end - begin == DESCRIPTOR_SET_BATCH);
descriptor_sets.resize(end);
std::array<vk::DescriptorSetLayout, DESCRIPTOR_SET_BATCH> layouts;
layouts.fill(*descriptor_set_layout);
u32 current_pool = 0;
vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = *pools[current_pool],
.descriptorSetCount = DESCRIPTOR_SET_BATCH,
.pSetLayouts = layouts.data(),
};
// Attempt to allocate the descriptor set batch. If the pool has run out of space, use a new
// one.
while (true) {
const auto result =
device.allocateDescriptorSets(&alloc_info, descriptor_sets.data() + begin);
if (result == vk::Result::eSuccess) {
break;
}
if (result == vk::Result::eErrorOutOfPoolMemory) {
current_pool++;
if (current_pool == pools.size()) {
LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!");
AppendDescriptorPool();
}
alloc_info.descriptorPool = *pools[current_pool];
}
}
}
vk::DescriptorSet DescriptorHeap::Commit() {
const std::size_t index = CommitResource();
return descriptor_sets[index];
}
void DescriptorHeap::AppendDescriptorPool() {
const vk::DescriptorPoolCreateInfo pool_info = {
.flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet,
.maxSets = descriptor_heap_count,
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data(),
};
auto& pool = pools.emplace_back();
pool = device.createDescriptorPoolUnique(pool_info);
}
} // namespace Vulkan

View File

@ -39,9 +39,6 @@ private:
/// Manages pool overflow allocating new resources.
std::size_t ManageOverflow();
/// Allocates a new page of resources.
void Grow();
protected:
MasterSemaphore* master_semaphore{nullptr};
std::size_t grow_step = 0; ///< Number of new resources created after an overflow
@ -59,9 +56,36 @@ public:
vk::CommandBuffer Commit();
private:
struct Pool;
const Instance& instance;
std::vector<Pool> pools;
vk::UniqueCommandPool cmd_pool;
std::vector<vk::CommandBuffer> cmd_buffers;
};
class DescriptorHeap final : public ResourcePool {
public:
explicit DescriptorHeap(const Instance& instance, MasterSemaphore* master_semaphore,
std::span<const vk::DescriptorSetLayoutBinding> bindings,
u32 descriptor_heap_count = 1024);
~DescriptorHeap() override;
const vk::DescriptorSetLayout& Layout() const {
return *descriptor_set_layout;
}
void Allocate(std::size_t begin, std::size_t end) override;
vk::DescriptorSet Commit();
private:
void AppendDescriptorPool();
private:
vk::Device device;
vk::UniqueDescriptorSetLayout descriptor_set_layout;
u32 descriptor_heap_count;
std::vector<vk::DescriptorPoolSize> pool_sizes;
std::vector<vk::UniqueDescriptorPool> pools;
std::vector<vk::DescriptorSet> descriptor_sets;
};
} // namespace Vulkan

View File

@ -5,10 +5,8 @@
#include <mutex>
#include <utility>
#include "common/microprofile.h"
#include "common/settings.h"
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
@ -98,6 +96,8 @@ void Scheduler::DispatchWork() {
return;
}
on_dispatch();
{
std::scoped_lock ql{queue_mutex};
work_queue.push(std::move(chunk));
@ -173,12 +173,16 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa
state = StateFlags::AllDirty;
const u64 signal_value = master_semaphore->NextTick();
on_submit();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
MICROPROFILE_SCOPE(Vulkan_Submit);
std::scoped_lock lock{submit_mutex};
master_semaphore->SubmitWork(cmdbuf, wait_semaphore, signal_semaphore, signal_value);
});
master_semaphore->Refresh();
if (!use_worker_thread) {
AllocateWorkerCommandBuffers();
} else {

View File

@ -4,6 +4,7 @@
#pragma once
#include <functional>
#include <memory>
#include <utility>
#include "common/alignment.h"
@ -49,11 +50,6 @@ public:
/// Records the command to the current chunk.
template <typename T>
void Record(T&& command) {
if (!use_worker_thread) {
command(current_cmdbuf);
return;
}
if (chunk->Record(command)) {
return;
}
@ -76,6 +72,16 @@ public:
return False(state & flag);
}
/// Registers a callback to perform on queue submission.
void RegisterOnSubmit(std::function<void()>&& func) {
on_submit = std::move(func);
}
/// Registers a callback to perform on queue submission.
void RegisterOnDispatch(std::function<void()>&& func) {
on_dispatch = std::move(func);
}
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore->CurrentTick();
@ -194,6 +200,8 @@ private:
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
vk::CommandBuffer current_cmdbuf;
StateFlags state{};
std::function<void()> on_submit;
std::function<void()> on_dispatch;
std::mutex execution_mutex;
std::mutex reserve_mutex;
std::mutex queue_mutex;

View File

@ -184,6 +184,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
includer)) [[unlikely]] {
LOG_INFO(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(),
shader->getInfoDebugLog());
LOG_INFO(Render_Vulkan, "Shader Source:\n{}", code);
return {};
}

View File

@ -82,7 +82,7 @@ StreamBuffer::~StreamBuffer() {
device.freeMemory(memory);
}
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u64 alignment) {
if (!is_coherent && type == BufferType::Stream) {
size = Common::AlignUp(size, instance.NonCoherentAtomSize());
}
@ -114,7 +114,7 @@ std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
return std::make_tuple(mapped + offset, offset, invalidate);
}
void StreamBuffer::Commit(u64 size) {
void StreamBuffer::Commit(u32 size) {
if (!is_coherent && type == BufferType::Stream) {
size = Common::AlignUp(size, instance.NonCoherentAtomSize());
}
@ -200,11 +200,10 @@ void StreamBuffer::CreateBuffers(u64 prefered_size) {
mapped = reinterpret_cast<u8*>(device.mapMemory(memory, 0, VK_WHOLE_SIZE));
if (instance.HasDebuggingToolAttached()) {
Vulkan::SetObjectName(device, buffer, "StreamBuffer({}): {} KiB {}", BufferTypeName(type),
stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags));
Vulkan::SetObjectName(device, memory, "StreamBufferMemory({}): {} Kib {}",
BufferTypeName(type), stream_buffer_size / 1024,
vk::to_string(mem_type.propertyFlags));
SetObjectName(device, buffer, "StreamBuffer({}): {} KiB {}", BufferTypeName(type),
stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags));
SetObjectName(device, memory, "StreamBufferMemory({}): {} Kib {}", BufferTypeName(type),
stream_buffer_size / 1024, vk::to_string(mem_type.propertyFlags));
}
}

View File

@ -35,10 +35,10 @@ public:
* @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
std::tuple<u8*, u32, bool> Map(u32 size, u64 alignment);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Commit(u64 size);
void Commit(u32 size);
vk::Buffer Handle() const noexcept {
return buffer;
@ -70,8 +70,8 @@ private:
vk::BufferUsageFlags usage{};
BufferType type;
u64 offset{}; ///< Buffer iterator.
u64 mapped_size{}; ///< Size reserved for the current copy.
u32 offset{}; ///< Buffer iterator.
u32 mapped_size{}; ///< Size reserved for the current copy.
bool is_coherent{}; ///< True if the buffer is coherent
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.

View File

@ -250,10 +250,8 @@ void Swapchain::RefreshSemaphores() {
if (instance.HasDebuggingToolAttached()) {
for (u32 i = 0; i < image_count; ++i) {
Vulkan::SetObjectName(device, image_acquired[i],
"Swapchain Semaphore: image_acquired {}", i);
Vulkan::SetObjectName(device, present_ready[i], "Swapchain Semaphore: present_ready {}",
i);
SetObjectName(device, image_acquired[i], "Swapchain Semaphore: image_acquired {}", i);
SetObjectName(device, present_ready[i], "Swapchain Semaphore: present_ready {}", i);
}
}
}
@ -265,7 +263,7 @@ void Swapchain::SetupImages() {
if (instance.HasDebuggingToolAttached()) {
for (u32 i = 0; i < image_count; ++i) {
Vulkan::SetObjectName(device, images[i], "Swapchain Image {}", i);
SetObjectName(device, images[i], "Swapchain Image {}", i);
}
}
}

View File

@ -12,9 +12,8 @@
#include "video_core/rasterizer_cache/texture_codec.h"
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/renderer_vulkan/pica_to_vk.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_render_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
@ -207,9 +206,9 @@ Handle MakeHandle(const Instance* instance, u32 width, u32 height, u32 levels, T
vk::UniqueImageView image_view = instance->GetDevice().createImageViewUnique(view_info);
if (!debug_name.empty() && instance->HasDebuggingToolAttached()) {
Vulkan::SetObjectName(instance->GetDevice(), image, debug_name);
Vulkan::SetObjectName(instance->GetDevice(), image_view.get(), "{} View({})", debug_name,
vk::to_string(aspect));
SetObjectName(instance->GetDevice(), image, debug_name);
SetObjectName(instance->GetDevice(), image_view.get(), "{} View({})", debug_name,
vk::to_string(aspect));
}
return Handle{
@ -249,10 +248,10 @@ constexpr u64 DOWNLOAD_BUFFER_SIZE = 16_MiB;
} // Anonymous namespace
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorPool& pool,
DescriptorSetProvider& texture_provider_, u32 num_swapchain_images_)
RenderManager& renderpass_cache, DescriptorUpdateQueue& update_queue,
u32 num_swapchain_images_)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
texture_provider{texture_provider_}, blit_helper{instance, scheduler, pool, renderpass_cache},
blit_helper{instance, scheduler, renderpass_cache, update_queue},
upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE,
BufferType::Upload},
download_buffer{instance, scheduler,
@ -268,7 +267,7 @@ VideoCore::StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
const auto [data, offset, invalidate] = buffer.Map(size, 16);
return VideoCore::StagingData{
.size = size,
.offset = static_cast<u32>(offset),
.offset = offset,
.mapped = std::span{data, size},
};
}
@ -453,7 +452,7 @@ void TextureRuntime::ClearTextureWithRenderpass(Surface& surface,
}
bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
const VideoCore::TextureCopy& copy) {
std::span<const VideoCore::TextureCopy> copies) {
renderpass_cache.EndRendering();
const RecordParams params = {
@ -466,8 +465,9 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
.dst_image = dest.Image(),
};
scheduler.Record([params, copy](vk::CommandBuffer cmdbuf) {
const vk::ImageCopy image_copy = {
boost::container::small_vector<vk::ImageCopy, 2> vk_copies;
std::ranges::transform(copies, std::back_inserter(vk_copies), [&](const auto& copy) {
return vk::ImageCopy{
.srcSubresource{
.aspectMask = params.aspect,
.mipLevel = copy.src_level,
@ -486,7 +486,9 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
0},
.extent = {copy.extent.width, copy.extent.height, 1},
};
});
scheduler.Record([params, copies = std::move(vk_copies)](vk::CommandBuffer cmdbuf) {
const bool self_copy = params.src_image == params.dst_image;
const vk::ImageLayout new_src_layout =
self_copy ? vk::ImageLayout::eGeneral : vk::ImageLayout::eTransferSrcOptimal;
@ -502,7 +504,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = params.src_image,
.subresourceRange = MakeSubresourceRange(params.aspect, copy.src_level),
.subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS),
},
vk::ImageMemoryBarrier{
.srcAccessMask = params.dst_access,
@ -512,7 +514,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = params.dst_image,
.subresourceRange = MakeSubresourceRange(params.aspect, copy.dst_level),
.subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS),
},
};
const std::array post_barriers = {
@ -524,7 +526,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = params.src_image,
.subresourceRange = MakeSubresourceRange(params.aspect, copy.src_level),
.subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS),
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
@ -534,7 +536,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = params.dst_image,
.subresourceRange = MakeSubresourceRange(params.aspect, copy.dst_level),
.subresourceRange = MakeSubresourceRange(params.aspect, 0, VK_REMAINING_MIP_LEVELS),
},
};
@ -542,7 +544,7 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
cmdbuf.copyImage(params.src_image, new_src_layout, params.dst_image, new_dst_layout,
image_copy);
copies);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
@ -694,13 +696,6 @@ bool TextureRuntime::NeedsConversion(VideoCore::PixelFormat format) const {
traits.aspect != (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil);
}
void TextureRuntime::FreeDescriptorSetsWithImage(vk::ImageView image_view) {
texture_provider.FreeWithImage(image_view);
blit_helper.compute_provider.FreeWithImage(image_view);
blit_helper.compute_buffer_provider.FreeWithImage(image_view);
blit_helper.two_textures_provider.FreeWithImage(image_view);
}
Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params)
: SurfaceBase{params}, runtime{&runtime_}, instance{&runtime_.GetInstance()},
scheduler{&runtime_.GetScheduler()}, traits{instance->GetTraits(pixel_format)} {
@ -798,9 +793,6 @@ Surface::~Surface() {
return;
}
for (const auto& [alloc, image, image_view] : handles) {
if (image_view) {
runtime->FreeDescriptorSetsWithImage(*image_view);
}
if (image) {
vmaDestroyImage(instance->GetAllocator(), image, alloc);
}
@ -902,7 +894,7 @@ void Surface::UploadCustom(const VideoCore::Material* material, u32 level) {
const Common::Rectangle rect{0U, height, width, 0U};
const auto upload = [&](u32 index, VideoCore::CustomTexture* texture) {
const u64 custom_size = texture->data.size();
const u32 custom_size = static_cast<u32>(texture->data.size());
const RecordParams params = {
.aspect = vk::ImageAspectFlagBits::eColor,
.pipeline_flags = PipelineStageFlags(),
@ -1088,7 +1080,7 @@ void Surface::ScaleUp(u32 new_scale) {
vk::PipelineStageFlagBits::eTopOfPipe,
vk::DependencyFlagBits::eByRegion, {}, {}, barriers);
});
LOG_INFO(HW_GPU, "Surface scale up!");
for (u32 level = 0; level < levels; level++) {
const VideoCore::TextureBlit blit = {
.src_level = level,
@ -1514,7 +1506,7 @@ Sampler::Sampler(TextureRuntime& runtime, const VideoCore::SamplerParams& params
instance.IsCustomBorderColorSupported() && (params.wrap_s == TextureConfig::ClampToBorder ||
params.wrap_t == TextureConfig::ClampToBorder);
const Common::Vec4f color = PicaToVK::ColorRGBA8(params.border_color);
const auto color = PicaToVK::ColorRGBA8(params.border_color);
const vk::SamplerCustomBorderColorCreateInfoEXT border_color_info = {
.customBorderColor = MakeClearColorValue(color),
.format = vk::Format::eUndefined,

View File

@ -22,10 +22,9 @@ struct Material;
namespace Vulkan {
class Instance;
class RenderpassCache;
class DescriptorPool;
class DescriptorSetProvider;
class RenderManager;
class Surface;
class DescriptorUpdateQueue;
struct Handle {
VmaAllocation alloc;
@ -42,8 +41,8 @@ class TextureRuntime {
public:
explicit TextureRuntime(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorPool& pool,
DescriptorSetProvider& texture_provider, u32 num_swapchain_images);
RenderManager& renderpass_cache, DescriptorUpdateQueue& update_queue,
u32 num_swapchain_images);
~TextureRuntime();
const Instance& GetInstance() const {
@ -54,7 +53,7 @@ public:
return scheduler;
}
RenderpassCache& GetRenderpassCache() {
RenderManager& GetRenderpassCache() {
return renderpass_cache;
}
@ -74,7 +73,12 @@ public:
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear);
/// Copies a rectangle of src_tex to another rectange of dst_rect
bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy);
bool CopyTextures(Surface& source, Surface& dest,
std::span<const VideoCore::TextureCopy> copies);
bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy) {
return CopyTextures(source, dest, std::array{copy});
}
/// Blits a rectangle of src_tex to another rectange of dst_rect
bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit);
@ -85,9 +89,6 @@ public:
/// Returns true if the provided pixel format needs convertion
bool NeedsConversion(VideoCore::PixelFormat format) const;
/// Removes any descriptor sets that contain the provided image view.
void FreeDescriptorSetsWithImage(vk::ImageView image_view);
private:
/// Clears a partial texture rect using a clear rectangle
void ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear);
@ -95,8 +96,7 @@ private:
private:
const Instance& instance;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
DescriptorSetProvider& texture_provider;
RenderManager& renderpass_cache;
BlitHelper blit_helper;
StreamBuffer upload_buffer;
StreamBuffer download_buffer;

View File

@ -106,7 +106,11 @@ FragmentModule::FragmentModule(const FSConfig& config_, const Profile& profile_)
out.reserve(RESERVE_SIZE);
DefineExtensions();
DefineInterface();
DefineBindings();
if (profile.is_vulkan) {
DefineBindingsVK();
} else {
DefineBindingsGL();
}
DefineHelpers();
DefineShadowHelpers();
DefineLightingHelpers();
@ -1272,7 +1276,43 @@ void FragmentModule::DefineInterface() {
out += "layout (location = 0) out vec4 color;\n\n";
}
void FragmentModule::DefineBindings() {
void FragmentModule::DefineBindingsVK() {
// Uniform and texture buffers
out += FSUniformBlockDef;
out += "layout(set = 0, binding = 3) uniform samplerBuffer texture_buffer_lut_lf;\n";
out += "layout(set = 0, binding = 4) uniform samplerBuffer texture_buffer_lut_rg;\n";
out += "layout(set = 0, binding = 5) uniform samplerBuffer texture_buffer_lut_rgba;\n\n";
// Texture samplers
const auto texture_type = config.texture.texture0_type.Value();
const auto sampler_tex0 = [&] {
switch (texture_type) {
case TextureType::Shadow2D:
case TextureType::ShadowCube:
return "usampler2D";
case TextureType::TextureCube:
return "samplerCube";
default:
return "sampler2D";
}
}();
for (u32 i = 0; i < 3; i++) {
const auto sampler = i == 0 ? sampler_tex0 : "sampler2D";
const auto num_descriptors = i == 0 && texture_type == TextureType::ShadowCube ? "[6]" : "";
out += fmt::format("layout(set = 1, binding = {0}) uniform {1} tex{0}{2};\n", i, sampler,
num_descriptors);
}
// Utility textures
if (config.framebuffer.shadow_rendering) {
out += "layout(set = 2, binding = 0, r32ui) uniform uimage2D shadow_buffer;\n\n";
}
if (config.user.use_custom_normal) {
out += "layout(set = 2, binding = 1) uniform sampler2D tex_normal;\n";
}
}
void FragmentModule::DefineBindingsGL() {
// Uniform and texture buffers
out += FSUniformBlockDef;
out += "layout(binding = 3) uniform samplerBuffer texture_buffer_lut_lf;\n";
@ -1280,33 +1320,32 @@ void FragmentModule::DefineBindings() {
out += "layout(binding = 5) uniform samplerBuffer texture_buffer_lut_rgba;\n\n";
// Texture samplers
const auto texunit_set = profile.is_vulkan ? "set = 1, " : "";
const auto texture_type = config.texture.texture0_type.Value();
for (u32 i = 0; i < 3; i++) {
const auto sampler =
i == 0 && texture_type == TextureType::TextureCube ? "samplerCube" : "sampler2D";
out +=
fmt::format("layout({0}binding = {1}) uniform {2} tex{1};\n", texunit_set, i, sampler);
out += fmt::format("layout(binding = {0}) uniform {1} tex{0};\n", i, sampler);
}
if (config.user.use_custom_normal && !profile.is_vulkan) {
// Utility textures
if (config.user.use_custom_normal) {
out += "layout(binding = 6) uniform sampler2D tex_normal;\n";
}
if (use_blend_fallback && !profile.is_vulkan) {
if (use_blend_fallback) {
out += "layout(location = 7) uniform sampler2D tex_color;\n";
}
// Storage images
static constexpr std::array postfixes = {"px", "nx", "py", "ny", "pz", "nz"};
const auto shadow_set = profile.is_vulkan ? "set = 2, " : "";
for (u32 i = 0; i < postfixes.size(); i++) {
out += fmt::format(
"layout({}binding = {}, r32ui) uniform readonly uimage2D shadow_texture_{};\n",
shadow_set, i, postfixes[i]);
// Shadow textures
if (texture_type == TextureType::Shadow2D || texture_type == TextureType::ShadowCube) {
static constexpr std::array postfixes = {"px", "nx", "py", "ny", "pz", "nz"};
for (u32 i = 0; i < postfixes.size(); i++) {
out += fmt::format(
"layout(binding = {}, r32ui) uniform readonly uimage2D shadow_texture_{};\n", i,
postfixes[i]);
}
}
if (config.framebuffer.shadow_rendering) {
out += fmt::format("layout({}binding = 6, r32ui) uniform uimage2D shadow_buffer;\n\n",
shadow_set);
out += "layout(binding = 6, r32ui) uniform uimage2D shadow_buffer;\n\n";
}
}
@ -1414,19 +1453,48 @@ float mix2(vec4 s, vec2 a) {
)";
if (config.texture.texture0_type == TexturingRegs::TextureConfig::Shadow2D) {
out += R"(
if (profile.is_vulkan) {
out += R"(
float SampleShadow2D(ivec2 uv, uint z) {
if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) )))
if (any(bvec4(lessThan(uv, ivec2(0)), greaterThanEqual(uv, textureSize(tex0, 0)))))
return 1.0;
return CompareShadow(texelFetch(tex0, uv, 0).x, z);
}
vec4 shadowTexture(vec2 uv, float w) {
)";
if (!config.texture.shadow_texture_orthographic) {
out += "uv /= w;";
}
out += R"(
uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias));
vec2 coord = vec2(textureSize(tex0, 0)) * uv - vec2(0.5);
vec2 coord_floor = floor(coord);
vec2 f = coord - coord_floor;
ivec2 i = ivec2(coord_floor);
vec4 s = vec4(
SampleShadow2D(i , z),
SampleShadow2D(i + ivec2(1, 0), z),
SampleShadow2D(i + ivec2(0, 1), z),
SampleShadow2D(i + ivec2(1, 1), z));
return vec4(mix2(s, f));
}
)";
} else {
out += R"(
float SampleShadow2D(ivec2 uv, uint z) {
if (any(bvec4(lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)))))
return 1.0;
return CompareShadow(imageLoad(shadow_texture_px, uv).x, z);
}
vec4 shadowTexture(vec2 uv, float w) {
)";
if (!config.texture.shadow_texture_orthographic) {
out += "uv /= w;";
}
out += R"(
if (!config.texture.shadow_texture_orthographic) {
out += "uv /= w;";
}
out += R"(
uint z = uint(max(0, int(min(abs(w), 1.0) * float(0xFFFFFF)) - shadow_texture_bias));
vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5);
vec2 coord_floor = floor(coord);
@ -1440,8 +1508,75 @@ vec4 shadowTexture(vec2 uv, float w) {
return vec4(mix2(s, f));
}
)";
}
} else if (config.texture.texture0_type == TexturingRegs::TextureConfig::ShadowCube) {
out += R"(
if (profile.is_vulkan) {
out += R"(
uvec4 SampleShadowCube(int face, ivec2 i00, ivec2 i10, ivec2 i01, ivec2 i11) {
return uvec4(
texelFetch(tex0[face], i00, 0).r,
texelFetch(tex0[face], i10, 0).r,
texelFetch(tex0[face], i01, 0).r,
texelFetch(tex0[face], i11, 0).r);
}
vec4 shadowTextureCube(vec2 uv, float w) {
ivec2 size = textureSize(tex0[0], 0);
vec3 c = vec3(uv, w);
vec3 a = abs(c);
if (a.x > a.y && a.x > a.z) {
w = a.x;
uv = -c.zy;
if (c.x < 0.0) uv.x = -uv.x;
} else if (a.y > a.z) {
w = a.y;
uv = c.xz;
if (c.y < 0.0) uv.y = -uv.y;
} else {
w = a.z;
uv = -c.xy;
if (c.z > 0.0) uv.x = -uv.x;
}
uint z = uint(max(0, int(min(w, 1.0) * float(0xFFFFFF)) - shadow_texture_bias));
vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5);
vec2 coord_floor = floor(coord);
vec2 f = coord - coord_floor;
ivec2 i00 = ivec2(coord_floor);
ivec2 i10 = i00 + ivec2(1, 0);
ivec2 i01 = i00 + ivec2(0, 1);
ivec2 i11 = i00 + ivec2(1, 1);
ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1);
i00 = clamp(i00, cmin, cmax);
i10 = clamp(i10, cmin, cmax);
i01 = clamp(i01, cmin, cmax);
i11 = clamp(i11, cmin, cmax);
uvec4 pixels;
if (a.x > a.y && a.x > a.z) {
if (c.x > 0.0)
pixels = SampleShadowCube(0, i00, i10, i01, i11);
else
pixels = SampleShadowCube(1, i00, i10, i01, i11);
} else if (a.y > a.z) {
if (c.y > 0.0)
pixels = SampleShadowCube(2, i00, i10, i01, i11);
else
pixels = SampleShadowCube(3, i00, i10, i01, i11);
} else {
if (c.z > 0.0)
pixels = SampleShadowCube(4, i00, i10, i01, i11);
else
pixels = SampleShadowCube(5, i00, i10, i01, i11);
}
vec4 s = vec4(
CompareShadow(pixels.x, z),
CompareShadow(pixels.y, z),
CompareShadow(pixels.z, z),
CompareShadow(pixels.w, z));
return vec4(mix2(s, f));
}
)";
} else {
out += R"(
vec4 shadowTextureCube(vec2 uv, float w) {
ivec2 size = imageSize(shadow_texture_px);
vec3 c = vec3(uv, w);
@ -1523,6 +1658,7 @@ vec4 shadowTextureCube(vec2 uv, float w) {
return vec4(mix2(s, f));
}
)";
}
}
}
}

View File

@ -74,7 +74,8 @@ private:
void DefineExtensions();
void DefineInterface();
void DefineBindings();
void DefineBindingsVK();
void DefineBindingsGL();
void DefineHelpers();
void DefineLightingHelpers();
void DefineShadowHelpers();