mirror of
				https://git.h3cjp.net/H3cJP/citra.git
				synced 2025-10-31 15:04:53 +00:00 
			
		
		
		
	shader_recompiler: add gl_Layer translation GS for older hardware
This commit is contained in:
		
							parent
							
								
									d6b63239ae
								
							
						
					
					
						commit
						3ef006b5ab
					
				|  | @ -221,6 +221,7 @@ add_library(shader_recompiler STATIC | |||
|     ir_opt/dual_vertex_pass.cpp | ||||
|     ir_opt/global_memory_to_storage_buffer_pass.cpp | ||||
|     ir_opt/identity_removal_pass.cpp | ||||
|     ir_opt/layer_pass.cpp | ||||
|     ir_opt/lower_fp16_to_fp32.cpp | ||||
|     ir_opt/lower_int64_to_int32.cpp | ||||
|     ir_opt/passes.h | ||||
|  |  | |||
|  | @ -9,6 +9,7 @@ | |||
| #include "common/settings.h" | ||||
| #include "shader_recompiler/exception.h" | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||||
| #include "shader_recompiler/frontend/ir/post_order.h" | ||||
| #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||||
|  | @ -233,6 +234,8 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
|         Optimization::VerificationPass(program); | ||||
|     } | ||||
|     Optimization::CollectShaderInfoPass(env, program); | ||||
|     Optimization::LayerPass(program, host_info); | ||||
| 
 | ||||
|     CollectInterpolationInfo(env, program); | ||||
|     AddNVNStorageBuffers(program); | ||||
|     return program; | ||||
|  | @ -331,4 +334,82 @@ void ConvertLegacyToGeneric(IR::Program& program, const Shader::RuntimeInfo& run | |||
|     } | ||||
| } | ||||
| 
 | ||||
| IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, | ||||
|                                         ObjectPool<IR::Block>& block_pool, | ||||
|                                         const HostTranslateInfo& host_info, | ||||
|                                         IR::Program& source_program, | ||||
|                                         Shader::OutputTopology output_topology) { | ||||
|     IR::Program program; | ||||
|     program.stage = Stage::Geometry; | ||||
|     program.output_topology = output_topology; | ||||
|     switch (output_topology) { | ||||
|     case OutputTopology::PointList: | ||||
|         program.output_vertices = 1; | ||||
|         break; | ||||
|     case OutputTopology::LineStrip: | ||||
|         program.output_vertices = 2; | ||||
|         break; | ||||
|     default: | ||||
|         program.output_vertices = 3; | ||||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     program.is_geometry_passthrough = false; | ||||
|     program.info.loads.mask = source_program.info.stores.mask; | ||||
|     program.info.stores.mask = source_program.info.stores.mask; | ||||
|     program.info.stores.Set(IR::Attribute::Layer, true); | ||||
|     program.info.stores.Set(source_program.info.emulated_layer, false); | ||||
| 
 | ||||
|     IR::Block* current_block = block_pool.Create(inst_pool); | ||||
|     auto& node{program.syntax_list.emplace_back()}; | ||||
|     node.type = IR::AbstractSyntaxNode::Type::Block; | ||||
|     node.data.block = current_block; | ||||
| 
 | ||||
|     IR::IREmitter ir{*current_block}; | ||||
|     for (u32 i = 0; i < program.output_vertices; i++) { | ||||
|         // Assign generics from input
 | ||||
|         for (u32 j = 0; j < 32; j++) { | ||||
|             if (!program.info.stores.Generic(j)) { | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             const IR::Attribute attr = IR::Attribute::Generic0X + (j * 4); | ||||
|             ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); | ||||
|             ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); | ||||
|             ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); | ||||
|             ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); | ||||
|         } | ||||
| 
 | ||||
|         // Assign position from input
 | ||||
|         const IR::Attribute attr = IR::Attribute::PositionX; | ||||
|         ir.SetAttribute(attr + 0, ir.GetAttribute(attr + 0, ir.Imm32(i)), ir.Imm32(0)); | ||||
|         ir.SetAttribute(attr + 1, ir.GetAttribute(attr + 1, ir.Imm32(i)), ir.Imm32(0)); | ||||
|         ir.SetAttribute(attr + 2, ir.GetAttribute(attr + 2, ir.Imm32(i)), ir.Imm32(0)); | ||||
|         ir.SetAttribute(attr + 3, ir.GetAttribute(attr + 3, ir.Imm32(i)), ir.Imm32(0)); | ||||
| 
 | ||||
|         // Assign layer
 | ||||
|         ir.SetAttribute(IR::Attribute::Layer, ir.GetAttribute(source_program.info.emulated_layer), | ||||
|                         ir.Imm32(0)); | ||||
| 
 | ||||
|         // Emit vertex
 | ||||
|         ir.EmitVertex(ir.Imm32(0)); | ||||
|     } | ||||
|     ir.EndPrimitive(ir.Imm32(0)); | ||||
| 
 | ||||
|     IR::Block* return_block{block_pool.Create(inst_pool)}; | ||||
|     IR::IREmitter{*return_block}.Epilogue(); | ||||
|     current_block->AddBranch(return_block); | ||||
| 
 | ||||
|     auto& merge{program.syntax_list.emplace_back()}; | ||||
|     merge.type = IR::AbstractSyntaxNode::Type::Block; | ||||
|     merge.data.block = return_block; | ||||
|     program.syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; | ||||
| 
 | ||||
|     program.blocks = GenerateBlocks(program.syntax_list); | ||||
|     program.post_order_blocks = PostOrder(program.syntax_list.front()); | ||||
|     Optimization::SsaRewritePass(program); | ||||
| 
 | ||||
|     return program; | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Maxwell
 | ||||
|  |  | |||
|  | @ -25,4 +25,13 @@ namespace Shader::Maxwell { | |||
| 
 | ||||
| void ConvertLegacyToGeneric(IR::Program& program, const RuntimeInfo& runtime_info); | ||||
| 
 | ||||
| // Maxwell v1 and older Nvidia cards don't support setting gl_Layer from non-geometry stages.
 | ||||
| // This creates a workaround by setting the layer as a generic output and creating a
 | ||||
| // passthrough geometry shader that reads the generic and sets the layer.
 | ||||
| [[nodiscard]] IR::Program GenerateGeometryPassthrough(ObjectPool<IR::Inst>& inst_pool, | ||||
|                                                       ObjectPool<IR::Block>& block_pool, | ||||
|                                                       const HostTranslateInfo& host_info, | ||||
|                                                       IR::Program& source_program, | ||||
|                                                       Shader::OutputTopology output_topology); | ||||
| 
 | ||||
| } // namespace Shader::Maxwell
 | ||||
|  |  | |||
|  | @ -14,6 +14,7 @@ struct HostTranslateInfo { | |||
|     bool support_int64{};        ///< True when the device supports 64-bit integers
 | ||||
|     bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
 | ||||
|     bool support_snorm_render_buffer{};  ///< True when the device supports SNORM render buffers
 | ||||
|     bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
 | ||||
| }; | ||||
| 
 | ||||
| } // namespace Shader
 | ||||
|  |  | |||
							
								
								
									
										68
									
								
								src/shader_recompiler/ir_opt/layer_pass.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								src/shader_recompiler/ir_opt/layer_pass.cpp
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,68 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <bit> | ||||
| #include <optional> | ||||
| 
 | ||||
| #include <boost/container/small_vector.hpp> | ||||
| 
 | ||||
| #include "shader_recompiler/environment.h" | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/breadth_first_search.h" | ||||
| #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||||
| #include "shader_recompiler/host_translate_info.h" | ||||
| #include "shader_recompiler/ir_opt/passes.h" | ||||
| #include "shader_recompiler/shader_info.h" | ||||
| 
 | ||||
| namespace Shader::Optimization { | ||||
| 
 | ||||
| static IR::Attribute EmulatedLayerAttribute(VaryingState& stores) { | ||||
|     for (u32 i = 0; i < 32; i++) { | ||||
|         if (!stores.Generic(i)) { | ||||
|             return IR::Attribute::Generic0X + (i * 4); | ||||
|         } | ||||
|     } | ||||
|     return IR::Attribute::Layer; | ||||
| } | ||||
| 
 | ||||
| static bool PermittedProgramStage(Stage stage) { | ||||
|     switch (stage) { | ||||
|     case Stage::VertexA: | ||||
|     case Stage::VertexB: | ||||
|     case Stage::TessellationControl: | ||||
|     case Stage::TessellationEval: | ||||
|         return true; | ||||
|     default: | ||||
|         return false; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void LayerPass(IR::Program& program, const HostTranslateInfo& host_info) { | ||||
|     if (host_info.support_viewport_index_layer || !PermittedProgramStage(program.stage)) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     const auto end{program.post_order_blocks.end()}; | ||||
|     const auto layer_attribute = EmulatedLayerAttribute(program.info.stores); | ||||
|     bool requires_layer_emulation = false; | ||||
| 
 | ||||
|     for (auto block = program.post_order_blocks.begin(); block != end; ++block) { | ||||
|         for (IR::Inst& inst : (*block)->Instructions()) { | ||||
|             if (inst.GetOpcode() == IR::Opcode::SetAttribute && | ||||
|                 inst.Arg(0).Attribute() == IR::Attribute::Layer) { | ||||
|                 requires_layer_emulation = true; | ||||
|                 inst.SetArg(0, IR::Value{layer_attribute}); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (requires_layer_emulation) { | ||||
|         program.info.requires_layer_emulation = true; | ||||
|         program.info.emulated_layer = layer_attribute; | ||||
|         program.info.stores.Set(IR::Attribute::Layer, false); | ||||
|         program.info.stores.Set(layer_attribute, true); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Optimization
 | ||||
|  | @ -23,6 +23,7 @@ void RescalingPass(IR::Program& program); | |||
| void SsaRewritePass(IR::Program& program); | ||||
| void PositionPass(Environment& env, IR::Program& program); | ||||
| void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); | ||||
| void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); | ||||
| void VerificationPass(const IR::Program& program); | ||||
| 
 | ||||
| // Dual Vertex
 | ||||
|  |  | |||
|  | @ -204,6 +204,9 @@ struct Info { | |||
|     u32 nvn_buffer_base{}; | ||||
|     std::bitset<16> nvn_buffer_used{}; | ||||
| 
 | ||||
|     bool requires_layer_emulation{}; | ||||
|     IR::Attribute emulated_layer{}; | ||||
| 
 | ||||
|     boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> | ||||
|         constant_buffer_descriptors; | ||||
|     boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; | ||||
|  |  | |||
|  | @ -39,6 +39,7 @@ using Shader::Backend::GLASM::EmitGLASM; | |||
| using Shader::Backend::GLSL::EmitGLSL; | ||||
| using Shader::Backend::SPIRV::EmitSPIRV; | ||||
| using Shader::Maxwell::ConvertLegacyToGeneric; | ||||
| using Shader::Maxwell::GenerateGeometryPassthrough; | ||||
| using Shader::Maxwell::MergeDualVertexPrograms; | ||||
| using Shader::Maxwell::TranslateProgram; | ||||
| using VideoCommon::ComputeEnvironment; | ||||
|  | @ -56,6 +57,17 @@ auto MakeSpan(Container& container) { | |||
|     return std::span(container.data(), container.size()); | ||||
| } | ||||
| 
 | ||||
| Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topology) { | ||||
|     switch (topology) { | ||||
|     case Maxwell::PrimitiveTopology::Points: | ||||
|         return Shader::OutputTopology::PointList; | ||||
|     case Maxwell::PrimitiveTopology::LineStrip: | ||||
|         return Shader::OutputTopology::LineStrip; | ||||
|     default: | ||||
|         return Shader::OutputTopology::TriangleStrip; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, | ||||
|                                     const Shader::IR::Program& program, | ||||
|                                     const Shader::IR::Program* previous_program, | ||||
|  | @ -220,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
|           .support_int64 = device.HasShaderInt64(), | ||||
|           .needs_demote_reorder = device.IsAmd(), | ||||
|           .support_snorm_render_buffer = false, | ||||
|           .support_viewport_index_layer = device.HasVertexViewportLayer(), | ||||
|       } { | ||||
|     if (use_asynchronous_shaders) { | ||||
|         workers = CreateWorkers(); | ||||
|  | @ -314,9 +327,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { | |||
|     const auto& regs{maxwell3d->regs}; | ||||
|     graphics_key.raw = 0; | ||||
|     graphics_key.early_z.Assign(regs.mandated_early_z != 0 ? 1 : 0); | ||||
|     graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 | ||||
|                                               ? regs.draw.topology.Value() | ||||
|                                               : Maxwell::PrimitiveTopology{}); | ||||
|     graphics_key.gs_input_topology.Assign(regs.draw.topology.Value()); | ||||
|     graphics_key.tessellation_primitive.Assign(regs.tessellation.params.domain_type.Value()); | ||||
|     graphics_key.tessellation_spacing.Assign(regs.tessellation.params.spacing.Value()); | ||||
|     graphics_key.tessellation_clockwise.Assign( | ||||
|  | @ -415,7 +426,19 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
|     std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | ||||
|     const bool uses_vertex_a{key.unique_hashes[0] != 0}; | ||||
|     const bool uses_vertex_b{key.unique_hashes[1] != 0}; | ||||
| 
 | ||||
|     // Layer passthrough generation for devices without GL_ARB_shader_viewport_layer_array
 | ||||
|     Shader::IR::Program* layer_source_program{}; | ||||
| 
 | ||||
|     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|         const bool is_emulated_stage = layer_source_program != nullptr && | ||||
|                                        index == static_cast<u32>(Maxwell::ShaderType::Geometry); | ||||
|         if (key.unique_hashes[index] == 0 && is_emulated_stage) { | ||||
|             auto topology = MaxwellToOutputTopology(key.gs_input_topology); | ||||
|             programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info, | ||||
|                                                           *layer_source_program, topology); | ||||
|             continue; | ||||
|         } | ||||
|         if (key.unique_hashes[index] == 0) { | ||||
|             continue; | ||||
|         } | ||||
|  | @ -443,6 +466,10 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
|                 Shader::NumDescriptors(program_vb.info.storage_buffers_descriptors); | ||||
|             programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); | ||||
|         } | ||||
| 
 | ||||
|         if (programs[index].info.requires_layer_emulation) { | ||||
|             layer_source_program = &programs[index]; | ||||
|         } | ||||
|     } | ||||
|     const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; | ||||
|     const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; | ||||
|  | @ -456,7 +483,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
|     const bool use_glasm{device.UseAssemblyShaders()}; | ||||
|     const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; | ||||
|     for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|         if (key.unique_hashes[index] == 0) { | ||||
|         const bool is_emulated_stage = layer_source_program != nullptr && | ||||
|                                        index == static_cast<u32>(Maxwell::ShaderType::Geometry); | ||||
|         if (key.unique_hashes[index] == 0 && !is_emulated_stage) { | ||||
|             continue; | ||||
|         } | ||||
|         UNIMPLEMENTED_IF(index == 0); | ||||
|  |  | |||
|  | @ -46,6 +46,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); | |||
| namespace { | ||||
| using Shader::Backend::SPIRV::EmitSPIRV; | ||||
| using Shader::Maxwell::ConvertLegacyToGeneric; | ||||
| using Shader::Maxwell::GenerateGeometryPassthrough; | ||||
| using Shader::Maxwell::MergeDualVertexPrograms; | ||||
| using Shader::Maxwell::TranslateProgram; | ||||
| using VideoCommon::ComputeEnvironment; | ||||
|  | @ -60,6 +61,17 @@ auto MakeSpan(Container& container) { | |||
|     return std::span(container.data(), container.size()); | ||||
| } | ||||
| 
 | ||||
| Shader::OutputTopology MaxwellToOutputTopology(Maxwell::PrimitiveTopology topology) { | ||||
|     switch (topology) { | ||||
|     case Maxwell::PrimitiveTopology::Points: | ||||
|         return Shader::OutputTopology::PointList; | ||||
|     case Maxwell::PrimitiveTopology::LineStrip: | ||||
|         return Shader::OutputTopology::LineStrip; | ||||
|     default: | ||||
|         return Shader::OutputTopology::TriangleStrip; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { | ||||
|     switch (comparison) { | ||||
|     case Maxwell::ComparisonOp::Never_D3D: | ||||
|  | @ -327,6 +339,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
|         .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || | ||||
|                                 driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, | ||||
|         .support_snorm_render_buffer = true, | ||||
|         .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
|  | @ -509,7 +522,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
|     std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | ||||
|     const bool uses_vertex_a{key.unique_hashes[0] != 0}; | ||||
|     const bool uses_vertex_b{key.unique_hashes[1] != 0}; | ||||
| 
 | ||||
|     // Layer passthrough generation for devices without VK_EXT_shader_viewport_index_layer
 | ||||
|     Shader::IR::Program* layer_source_program{}; | ||||
| 
 | ||||
|     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|         const bool is_emulated_stage = layer_source_program != nullptr && | ||||
|                                        index == static_cast<u32>(Maxwell::ShaderType::Geometry); | ||||
|         if (key.unique_hashes[index] == 0 && is_emulated_stage) { | ||||
|             auto topology = MaxwellToOutputTopology(key.state.topology); | ||||
|             programs[index] = GenerateGeometryPassthrough(pools.inst, pools.block, host_info, | ||||
|                                                           *layer_source_program, topology); | ||||
|             continue; | ||||
|         } | ||||
|         if (key.unique_hashes[index] == 0) { | ||||
|             continue; | ||||
|         } | ||||
|  | @ -530,6 +555,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
|             auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | ||||
|             programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); | ||||
|         } | ||||
| 
 | ||||
|         if (programs[index].info.requires_layer_emulation) { | ||||
|             layer_source_program = &programs[index]; | ||||
|         } | ||||
|     } | ||||
|     std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; | ||||
|     std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; | ||||
|  | @ -538,7 +567,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
|     Shader::Backend::Bindings binding; | ||||
|     for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; | ||||
|          ++index) { | ||||
|         if (key.unique_hashes[index] == 0) { | ||||
|         const bool is_emulated_stage = layer_source_program != nullptr && | ||||
|                                        index == static_cast<u32>(Maxwell::ShaderType::Geometry); | ||||
|         if (key.unique_hashes[index] == 0 && !is_emulated_stage) { | ||||
|             continue; | ||||
|         } | ||||
|         UNIMPLEMENTED_IF(index == 0); | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue