diff --git a/.gitmodules b/.gitmodules index cd47903699..0311d459d8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -82,3 +82,6 @@ [submodule "library-headers"] path = externals/library-headers/library-headers url = https://github.com/citra-emu/ext-library-headers.git +[submodule "libadrenotools"] + path = externals/libadrenotools + url = https://github.com/bylaws/libadrenotools diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 44882d6cfe..7777a6125f 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -242,3 +242,11 @@ target_include_directories(vma SYSTEM INTERFACE ./vma/include) # vulkan-headers add_library(vulkan-headers INTERFACE) target_include_directories(vulkan-headers SYSTEM INTERFACE ./vulkan-headers/include) +if (APPLE) + target_include_directories(vulkan-headers SYSTEM INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/MoltenVK) +endif() + +# adrenotools +if (ANDROID) + add_subdirectory(libadrenotools) +endif() diff --git a/externals/libadrenotools b/externals/libadrenotools new file mode 160000 index 0000000000..deec5f75ee --- /dev/null +++ b/externals/libadrenotools @@ -0,0 +1 @@ +Subproject commit deec5f75ee1a8ccbe32c8780b1d17284fc87b0f1 diff --git a/externals/moltenvk/mvk_config.h b/externals/moltenvk/mvk_config.h new file mode 100644 index 0000000000..360007e167 --- /dev/null +++ b/externals/moltenvk/mvk_config.h @@ -0,0 +1,1071 @@ +/* + * mvk_config.h + * + * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +#ifndef __mvk_config_h_ +#define __mvk_config_h_ 1 + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#include + + +/** This header contains the public configuration API for MoltenVK. */ + + +/** + * The version number of MoltenVK is a single integer value, derived from the Major, Minor, + * and Patch version values, where each of the Major, Minor, and Patch components is allocated + * two decimal digits, in the format MjMnPt. This creates a version number that is both human + * readable and allows efficient computational comparisons to a single integer number. + * + * The following examples illustrate how the MoltenVK version number is built from its components: + * - 002000 (version 0.20.0) + * - 010000 (version 1.0.0) + * - 030104 (version 3.1.4) + * - 401215 (version 4.12.15) + */ +#define MVK_VERSION_MAJOR 1 +#define MVK_VERSION_MINOR 2 +#define MVK_VERSION_PATCH 5 + +#define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch)) +#define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH) + + +#define MVK_CONFIGURATION_API_VERSION 37 + +/** Identifies the level of logging MoltenVK should be limited to outputting. */ +typedef enum MVKConfigLogLevel { + MVK_CONFIG_LOG_LEVEL_NONE = 0, /**< No logging. */ + MVK_CONFIG_LOG_LEVEL_ERROR = 1, /**< Log errors only. */ + MVK_CONFIG_LOG_LEVEL_WARNING = 2, /**< Log errors and warning messages. */ + MVK_CONFIG_LOG_LEVEL_INFO = 3, /**< Log errors, warnings and informational messages. */ + MVK_CONFIG_LOG_LEVEL_DEBUG = 4, /**< Log errors, warnings, infos and debug messages. */ + MVK_CONFIG_LOG_LEVEL_MAX_ENUM = 0x7FFFFFFF +} MVKConfigLogLevel; + +/** Identifies the level of Vulkan call trace logging MoltenVK should perform. */ +typedef enum MVKConfigTraceVulkanCalls { + MVK_CONFIG_TRACE_VULKAN_CALLS_NONE = 0, /**< No Vulkan call logging. */ + MVK_CONFIG_TRACE_VULKAN_CALLS_ENTER = 1, /**< Log the name of each Vulkan call when the call is entered. */ + MVK_CONFIG_TRACE_VULKAN_CALLS_ENTER_THREAD_ID = 2, /**< Log the name and thread ID of each Vulkan call when the call is entered. */ + MVK_CONFIG_TRACE_VULKAN_CALLS_ENTER_EXIT = 3, /**< Log the name of each Vulkan call when the call is entered and exited. This effectively brackets any other logging activity within the scope of the Vulkan call. */ + MVK_CONFIG_TRACE_VULKAN_CALLS_ENTER_EXIT_THREAD_ID = 4, /**< Log the name and thread ID of each Vulkan call when the call is entered and name when exited. This effectively brackets any other logging activity within the scope of the Vulkan call. */ + MVK_CONFIG_TRACE_VULKAN_CALLS_DURATION = 5, /**< Same as MVK_CONFIG_TRACE_VULKAN_CALLS_ENTER_EXIT, plus logs the time spent inside the Vulkan function. */ + MVK_CONFIG_TRACE_VULKAN_CALLS_DURATION_THREAD_ID = 6, /**< Same as MVK_CONFIG_TRACE_VULKAN_CALLS_ENTER_EXIT_THREAD_ID, plus logs the time spent inside the Vulkan function. */ + MVK_CONFIG_TRACE_VULKAN_CALLS_MAX_ENUM = 0x7FFFFFFF +} MVKConfigTraceVulkanCalls; + +/** Identifies the scope for Metal to run an automatic GPU capture for diagnostic debugging purposes. */ +typedef enum MVKConfigAutoGPUCaptureScope { + MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_NONE = 0, /**< No automatic GPU capture. */ + MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE = 1, /**< Automatically capture all GPU activity during the lifetime of a VkDevice. */ + MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME = 2, /**< Automatically capture all GPU activity during the rendering and presentation of the first frame. */ + MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_MAX_ENUM = 0x7FFFFFFF +} MVKConfigAutoGPUCaptureScope; + +/** Identifies extensions to advertise as part of MoltenVK configuration. */ +typedef enum MVKConfigAdvertiseExtensionBits { + MVK_CONFIG_ADVERTISE_EXTENSIONS_ALL = 0x00000001, /**< All supported extensions. */ + MVK_CONFIG_ADVERTISE_EXTENSIONS_WSI = 0x00000002, /**< WSI extensions supported on the platform. */ + MVK_CONFIG_ADVERTISE_EXTENSIONS_PORTABILITY = 0x00000004, /**< Vulkan Portability Subset extensions. */ + MVK_CONFIG_ADVERTISE_EXTENSIONS_MAX_ENUM = 0x7FFFFFFF +} MVKConfigAdvertiseExtensionBits; +typedef VkFlags MVKConfigAdvertiseExtensions; + +/** Identifies the use of Metal Argument Buffers. */ +typedef enum MVKUseMetalArgumentBuffers { + MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER = 0, /**< Don't use Metal Argument Buffers. */ + MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS = 1, /**< Use Metal Argument Buffers for all pipelines. */ + MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_DESCRIPTOR_INDEXING = 2, /**< Use Metal Argument Buffers only if VK_EXT_descriptor_indexing extension is enabled. */ + MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_MAX_ENUM = 0x7FFFFFFF +} MVKUseMetalArgumentBuffers; + +/** Identifies the Metal functionality used to support Vulkan semaphore functionality (VkSemaphore). */ +typedef enum MVKVkSemaphoreSupportStyle { + MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE = 0, /**< Limit Vulkan to a single queue, with no explicit semaphore synchronization, and use Metal's implicit guarantees that all operations submitted to a queue will give the same result as if they had been run in submission order. */ + MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE = 1, /**< Use Metal events (MTLEvent) when available on the platform, and where safe. This will revert to same as MVK_CONFIG_VK_SEMAPHORE_USE_SINGLE_QUEUE on some NVIDIA GPUs and Rosetta2, due to potential challenges with MTLEvents on those platforms, or in older environments where MTLEvents are not supported. */ + MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS = 2, /**< Always use Metal events (MTLEvent) when available on the platform. This will revert to same as MVK_CONFIG_VK_SEMAPHORE_USE_SINGLE_QUEUE in older environments where MTLEvents are not supported. */ + MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK = 3, /**< Use CPU callbacks upon GPU submission completion. This is the slowest technique, but allows multiple queues, compared to MVK_CONFIG_VK_SEMAPHORE_USE_SINGLE_QUEUE. */ + MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_MAX_ENUM = 0x7FFFFFFF +} MVKVkSemaphoreSupportStyle; + +/** Identifies the style of Metal command buffer pre-filling to be used. */ +typedef enum MVKPrefillMetalCommandBuffersStyle { + MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL = 0, /**< During Vulkan command buffer filling, do not prefill a Metal command buffer for each Vulkan command buffer. A single Metal command buffer is created and encoded for all the Vulkan command buffers included when vkQueueSubmit() is called. MoltenVK automatically creates and drains a single Metal object autorelease pool when vkQueueSubmit() is called. This is the fastest option, but potentially has the largest memory footprint. */ + MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_DEFERRED_ENCODING = 1, /**< During Vulkan command buffer filling, encode to the Metal command buffer when vkEndCommandBuffer() is called. MoltenVK automatically creates and drains a single Metal object autorelease pool when vkEndCommandBuffer() is called. This option has the fastest performance, and the largest memory footprint, of the prefilling options using autorelease pools. */ + MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING = 2, /**< During Vulkan command buffer filling, immediately encode to the Metal command buffer, as each command is submitted to the Vulkan command buffer, and do not retain any command content in the Vulkan command buffer. MoltenVK automatically creates and drains a Metal object autorelease pool for each and every command added to the Vulkan command buffer. This option has the smallest memory footprint, and the slowest performance, of the prefilling options using autorelease pools. */ + MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING_NO_AUTORELEASE = 3, /**< During Vulkan command buffer filling, immediately encode to the Metal command buffer, as each command is submitted to the Vulkan command buffer, do not retain any command content in the Vulkan command buffer, and assume the app will ensure that each thread that fills commands into a Vulkan command buffer has a Metal autorelease pool. MoltenVK will not create and drain any autorelease pools during encoding. This is the fastest prefilling option, and generally has a small memory footprint, depending on when the app-provided autorelease pool drains. */ + MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_MAX_ENUM = 0x7FFFFFFF +} MVKPrefillMetalCommandBuffersStyle; + +/** Identifies when Metal shaders will be compiled with the fast math option. */ +typedef enum MVKConfigFastMath { + MVK_CONFIG_FAST_MATH_NEVER = 0, /**< Metal shaders will never be compiled with the fast math option. */ + MVK_CONFIG_FAST_MATH_ALWAYS = 1, /**< Metal shaders will always be compiled with the fast math option. */ + MVK_CONFIG_FAST_MATH_ON_DEMAND = 2, /**< Metal shaders will be compiled with the fast math option, unless the shader includes execution modes that require it to be compiled without fast math. */ + MVK_CONFIG_FAST_MATH_MAX_ENUM = 0x7FFFFFFF +} MVKConfigFastMath; + +/** Identifies available system data compression algorithms. */ +typedef enum MVKConfigCompressionAlgorithm { + MVK_CONFIG_COMPRESSION_ALGORITHM_NONE = 0, /**< No compression. */ + MVK_CONFIG_COMPRESSION_ALGORITHM_LZFSE = 1, /**< Apple proprietary. Good balance of high performance and small compression size, particularly for larger data content. */ + MVK_CONFIG_COMPRESSION_ALGORITHM_ZLIB = 2, /**< Open cross-platform ZLib format. For smaller data content, has better performance and smaller size than LZFSE. */ + MVK_CONFIG_COMPRESSION_ALGORITHM_LZ4 = 3, /**< Fastest performance. Largest compression size. */ + MVK_CONFIG_COMPRESSION_ALGORITHM_LZMA = 4, /**< Slowest performance. Smallest compression size, particular with larger content. */ + MVK_CONFIG_COMPRESSION_ALGORITHM_MAX_ENUM = 0x7FFFFFFF, +} MVKConfigCompressionAlgorithm; + +/** Identifies the style of activity performance logging to use. */ +typedef enum MVKConfigActivityPerformanceLoggingStyle { + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT = 0, /**< Repeatedly log performance after a configured number of frames. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE = 1, /**< Log immediately after each performance measurement. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME = 2, /**< Log at the end of the VkDevice lifetime. This is useful for one-shot apps such as testing frameworks. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_MAX_ENUM = 0x7FFFFFFF, +} MVKConfigActivityPerformanceLoggingStyle; + +/** + * MoltenVK configuration settings. + * + * To be active, some configuration settings must be set before a VkDevice is created. + * See the description of the individual configuration structure members for more information. + * + * There are three mechanisms for setting the values of the MoltenVK configuration parameters: + * - Runtime API via the vkGetMoltenVKConfigurationMVK()/vkSetMoltenVKConfigurationMVK() functions. + * - Application runtime environment variables. + * - Build settings at MoltenVK build time. + * + * To change the MoltenVK configuration settings at runtime using a programmatic API, + * use the vkGetMoltenVKConfigurationMVK() and vkSetMoltenVKConfigurationMVK() functions + * to retrieve, modify, and set a copy of the MVKConfiguration structure. To be active, + * some configuration settings must be set before a VkInstance or VkDevice is created. + * See the description of each member for more information. + * + * The initial value of each of the configuration settings can established at runtime + * by a corresponding environment variable, or if the environment variable is not set, + * by a corresponding build setting at the time MoltenVK is compiled. The environment + * variable and build setting for each configuration parameter share the same name. + * + * For example, the initial value of the shaderConversionFlipVertexY configuration setting + * is set by the MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y at runtime, or by the + * MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y build setting when MoltenVK is compiled. + * + * This structure may be extended as new features are added to MoltenVK. If you are linking to + * an implementation of MoltenVK that was compiled from a different MVK_CONFIGURATION_API_VERSION + * than your app was, the size of this structure in your app may be larger or smaller than the + * struct in MoltenVK. See the description of the vkGetMoltenVKConfigurationMVK() and + * vkSetMoltenVKConfigurationMVK() functions for information about how to handle this. + * + * TO SUPPORT DYNAMIC LINKING TO THIS STRUCTURE AS DESCRIBED ABOVE, THIS STRUCTURE SHOULD NOT + * BE CHANGED EXCEPT TO ADD ADDITIONAL MEMBERS ON THE END. EXISTING MEMBERS, AND THEIR ORDER, + * SHOULD NOT BE CHANGED. + */ +typedef struct { + + /** + * If enabled, debugging capabilities will be enabled, including logging + * shader code during runtime shader conversion. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect subsequent MoltenVK behaviour. + * + * The initial value or this parameter is set by the + * MVK_DEBUG + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter is false if MoltenVK was + * built in Release mode, and true if MoltenVK was built in Debug mode. + */ + VkBool32 debugMode; + + /** + * If enabled, MSL vertex shader code created during runtime shader conversion will + * flip the Y-axis of each vertex, as the Vulkan Y-axis is the inverse of OpenGL. + * + * An alternate way to reverse the Y-axis is to employ a negative Y-axis value on + * the viewport, in which case this parameter can be disabled. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect subsequent MoltenVK behaviour. + * Specifically, this parameter can be enabled when compiling some pipelines, + * and disabled when compiling others. Existing pipelines are not automatically + * re-compiled when this parameter is changed. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to true. + */ + VkBool32 shaderConversionFlipVertexY; + + /** + * If enabled, queue command submissions (vkQueueSubmit() & vkQueuePresentKHR()) will be + * processed on the thread that called the submission function. If disabled, processing + * will be dispatched to a GCD dispatch_queue whose priority is determined by + * VkDeviceQueueCreateInfo::pQueuePriorities during vkCreateDevice(). + * + * The value of this parameter must be changed before creating a VkDevice, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to true for macOS 10.14 + * and above or iOS 12 and above, and false otherwise. The reason for this distinction + * is that this feature should be disabled when emulation is required to support VkEvents + * because native support for events (MTLEvent) is not available. + */ + VkBool32 synchronousQueueSubmits; + + /** + * If set to MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL, a single Metal + * command buffer will be created and filled when the Vulkan command buffers are submitted + * to the Vulkan queue. This allows a single Metal command buffer to be used for all of the + * Vulkan command buffers in a queue submission. The Metal command buffer is filled on the + * thread that processes the command queue submission. + * + * If set to any value other than MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL, + * where possible, a Metal command buffer will be created and filled when each Vulkan + * command buffer is filled. For applications that parallelize the filling of Vulkan + * commmand buffers across multiple threads, this allows the Metal command buffers to also + * be filled on the same parallel thread. Because each command buffer is filled separately, + * this requires that each Vulkan command buffer have a dedicated Metal command buffer. + * + * See the definition of the MVKPrefillMetalCommandBuffersStyle enumeration above for + * descriptions of the various values that can be used for this setting. The differences + * are primarily distinguished by how memory recovery is handled for autoreleased Metal + * objects that are created under the covers as the commands added to the Vulkan command + * buffer are encoded into the corresponding Metal command buffer. You can decide whether + * your app will recover all autoreleased Metal objects, or how agressively MoltenVK should + * recover autoreleased Metal objects, based on your approach to command buffer filling. + * + * Depending on the nature of your application, you may find performance is improved by filling + * the Metal command buffers on parallel threads, or you may find that performance is improved by + * consolidating all Vulkan command buffers onto a single Metal command buffer during queue submission. + * + * When enabling this feature, be aware that one Metal command buffer is required for each Vulkan + * command buffer. Depending on the number of command buffers that you use, you may also need to + * change the value of the maxActiveMetalCommandBuffersPerQueue setting. + * + * If this feature is enabled, be aware that if you have recorded commands to a Vulkan command buffer, + * and then choose to reset that command buffer instead of submitting it, the corresponding prefilled + * Metal command buffer will still be submitted. This is because Metal command buffers do not support + * the concept of being reset after being filled. Depending on when and how often you do this, + * it may cause unexpected visual artifacts and unnecessary GPU load. + * + * Prefilling of a Metal command buffer will not occur during the filling of secondary command + * buffers (VK_COMMAND_BUFFER_LEVEL_SECONDARY), or for primary command buffers that are intended + * to be submitted to multiple queues concurrently (VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT). + * + * This feature is incompatible with updating descriptors after binding. If any of the + * *UpdateAfterBind feature flags of VkPhysicalDeviceDescriptorIndexingFeatures or + * VkPhysicalDeviceInlineUniformBlockFeatures have been enabled, the value of this + * setting will be ignored and treated as if it is false. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect subsequent MoltenVK behaviour. + * Specifically, this parameter can be enabled when filling some command buffers, + * and disabled when later filling others. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to + * MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_NO_PREFILL. + */ + MVKPrefillMetalCommandBuffersStyle prefillMetalCommandBuffers; + + /** + * The maximum number of Metal command buffers that can be concurrently active per Vulkan queue. + * The number of active Metal command buffers required depends on the prefillMetalCommandBuffers + * setting. If prefillMetalCommandBuffers is enabled, one Metal command buffer is required per + * Vulkan command buffer. If prefillMetalCommandBuffers is disabled, one Metal command buffer + * is required per command buffer queue submission, which may be significantly less than the + * number of Vulkan command buffers. + * + * The value of this parameter must be changed before creating a VkDevice, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to 64. + */ + uint32_t maxActiveMetalCommandBuffersPerQueue; + + /** + * Depending on the GPU, Metal allows 8192 or 32768 occlusion queries per MTLBuffer. + * If enabled, MoltenVK allocates a MTLBuffer for each query pool, allowing each query + * pool to support that permitted number of queries. This may slow performance or cause + * unexpected behaviour if the query pool is not established prior to a Metal renderpass, + * or if the query pool is changed within a renderpass. If disabled, one MTLBuffer will + * be shared by all query pools, which improves performance, but limits the total device + * queries to the permitted number. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect subsequent MoltenVK behaviour. + * Specifically, this parameter can be enabled when creating some query pools, + * and disabled when creating others. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to true. + */ + VkBool32 supportLargeQueryPools; + + /** Obsolete, ignored, and deprecated. All surface presentations are performed with a command buffer. */ + VkBool32 presentWithCommandBuffer; + + /** + * If enabled, swapchain images will use simple Nearest sampling when minifying or magnifying + * the swapchain image to fit a physical display surface. If disabled, swapchain images will + * use Linear sampling when magnifying the swapchain image to fit a physical display surface. + * Enabling this setting avoids smearing effects when swapchain images are simple interger + * multiples of display pixels (eg- macOS Retina, and typical of graphics apps and games), + * but may cause aliasing effects when using non-integer display scaling. + * + * The value of this parameter must be changed before creating a VkSwapchain, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to true. + */ + VkBool32 swapchainMinMagFilterUseNearest; +#define swapchainMagFilterUseNearest swapchainMinMagFilterUseNearest + + /** + * The maximum amount of time, in nanoseconds, to wait for a Metal library, function, or + * pipeline state object to be compiled and created by the Metal compiler. An internal error + * within the Metal compiler can stall the thread for up to 30 seconds. Setting this value + * limits that delay to a specified amount of time, allowing shader compilations to fail fast. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect subsequent MoltenVK behaviour. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_METAL_COMPILE_TIMEOUT + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to infinite. + */ + uint64_t metalCompileTimeout; + + /** + * If enabled, performance statistics, as defined by the MVKPerformanceStatistics structure, + * are collected, and can be retrieved via the vkGetPerformanceStatisticsMVK() function. + * + * You can also use the activityPerformanceLoggingStyle and performanceLoggingFrameCount + * parameters to configure when to log the performance statistics collected by this parameter. + * + * The value of this parameter must be changed before creating a VkDevice, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_PERFORMANCE_TRACKING + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to false. + */ + VkBool32 performanceTracking; + + /** + * If non-zero, performance statistics, frame-based statistics will be logged, on a + * repeating cycle, once per this many frames. The performanceTracking parameter must + * also be enabled. If this parameter is zero, or the performanceTracking parameter + * is disabled, no frame-based performance statistics will be logged. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect subsequent MoltenVK behaviour. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_PERFORMANCE_LOGGING_FRAME_COUNT + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to zero. + */ + uint32_t performanceLoggingFrameCount; + + /** + * If enabled, a MoltenVK logo watermark will be rendered on top of the scene. + * This can be enabled for publicity during demos. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect subsequent MoltenVK behaviour. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_DISPLAY_WATERMARK + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to false. + */ + VkBool32 displayWatermark; + + /** + * Metal does not distinguish functionality between queues, which would normally mean only + * a single general-purpose queue family with multiple queues is needed. However, Vulkan + * associates command buffers with a queue family, whereas Metal associates command buffers + * with a specific Metal queue. In order to allow a Metal command buffer to be prefilled + * before is is formally submitted to a Vulkan queue, each Vulkan queue family can support + * only a single Metal queue. As a result, in order to provide parallel queue operations, + * MoltenVK provides multiple queue families, each with a single queue. + * + * If this parameter is disabled, all queue families will be advertised as having general-purpose + * graphics + compute + transfer functionality, which is how the actual Metal queues behave. + * + * If this parameter is enabled, one queue family will be advertised as having general-purpose + * graphics + compute + transfer functionality, and the remaining queue families will be advertised + * as having specialized graphics OR compute OR transfer functionality, to make it easier for some + * apps to select a queue family with the appropriate requirements. + * + * The value of this parameter must be changed before creating a VkDevice, and before + * querying a VkPhysicalDevice for queue family properties, for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_SPECIALIZED_QUEUE_FAMILIES + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to false. + */ + VkBool32 specializedQueueFamilies; + + /** + * If enabled, when the app creates a VkDevice from a VkPhysicalDevice (GPU) that is neither + * headless nor low-power, and is different than the GPU used by the windowing system, the + * windowing system will be forced to switch to use the GPU selected by the Vulkan app. + * When the Vulkan app is ended, the windowing system will automatically switch back to + * using the previous GPU, depending on the usage requirements of other running apps. + * + * If disabled, the Vulkan app will render using its selected GPU, and if the windowing + * system uses a different GPU, the windowing system compositor will automatically copy + * framebuffer content from the app GPU to the windowing system GPU. + * + * The value of this parmeter has no effect on systems with a single GPU, or when the + * Vulkan app creates a VkDevice from a low-power or headless VkPhysicalDevice (GPU). + * + * Switching the windowing system GPU to match the Vulkan app GPU maximizes app performance, + * because it avoids the windowing system compositor from having to copy framebuffer content + * between GPUs on each rendered frame. However, doing so forces the entire system to + * potentially switch to using a GPU that may consume more power while the app is running. + * + * Some Vulkan apps may want to render using a high-power GPU, but leave it up to the + * system window compositor to determine how best to blend content with the windowing + * system, and as a result, may want to disable this parameter. + * + * The value of this parameter must be changed before creating a VkDevice, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_SWITCH_SYSTEM_GPU + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to true. + */ + VkBool32 switchSystemGPU; + + /** + * Older versions of Metal do not natively support per-texture swizzling. When running on + * such a system, and this parameter is enabled, arbitrary VkImageView component swizzles + * are supported, as defined in VkImageViewCreateInfo::components when creating a VkImageView. + * + * If disabled, and native Metal per-texture swizzling is not available on the platform, + * a very limited set of VkImageView component swizzles are supported via format substitutions. + * + * If Metal supports native per-texture swizzling, this parameter is ignored. + * + * When running on an older version of Metal that does not support native per-texture + * swizzling, if this parameter is enabled, both when a VkImageView is created, and + * when any pipeline that uses that VkImageView is compiled, VkImageView swizzling is + * automatically performed in the converted Metal shader code during all texture sampling + * and reading operations, regardless of whether a swizzle is required for the VkImageView + * associated with the Metal texture. This may result in reduced performance. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect subsequent MoltenVK behaviour. + * Specifically, this parameter can be enabled when creating VkImageViews that need it, + * and compiling pipelines that use those VkImageViews, and can be disabled when creating + * VkImageViews that don't need it, and compiling pipelines that use those VkImageViews. + * + * Existing pipelines are not automatically re-compiled when this parameter is changed. + * + * An error is logged and returned during VkImageView creation if that VkImageView + * requires full image view swizzling and this feature is not enabled. An error is + * also logged when a pipeline that was not compiled with full image view swizzling + * is presented with a VkImageView that is expecting it. + * + * An error is also retuned and logged when a VkPhysicalDeviceImageFormatInfo2KHR is passed + * in a call to vkGetPhysicalDeviceImageFormatProperties2KHR() to query for an VkImageView + * format that will require full swizzling to be enabled, and this feature is not enabled. + * + * If this parameter is disabled, and native Metal per-texture swizzling is not available + * on the platform, the following limited set of VkImageView swizzles are supported by + * MoltenVK, via automatic format substitution: + * + * Texture format Swizzle + * -------------- ------- + * VK_FORMAT_R8_UNORM ZERO, ANY, ANY, RED + * VK_FORMAT_A8_UNORM ALPHA, ANY, ANY, ZERO + * VK_FORMAT_R8G8B8A8_UNORM BLUE, GREEN, RED, ALPHA + * VK_FORMAT_R8G8B8A8_SRGB BLUE, GREEN, RED, ALPHA + * VK_FORMAT_B8G8R8A8_UNORM BLUE, GREEN, RED, ALPHA + * VK_FORMAT_B8G8R8A8_SRGB BLUE, GREEN, RED, ALPHA + * VK_FORMAT_D32_SFLOAT_S8_UINT RED, ANY, ANY, ANY (stencil only) + * VK_FORMAT_D24_UNORM_S8_UINT RED, ANY, ANY, ANY (stencil only) + * + * The initial value or this parameter is set by the + * MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to false. + */ + VkBool32 fullImageViewSwizzle; + + /** + * The index of the queue family whose presentation submissions will + * be used as the default GPU Capture Scope during debugging in Xcode. + * + * The value of this parameter must be changed before creating a VkDevice, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to zero (the first queue family). + */ + uint32_t defaultGPUCaptureScopeQueueFamilyIndex; + + /** + * The index of the queue, within the queue family identified by the + * defaultGPUCaptureScopeQueueFamilyIndex parameter, whose presentation submissions + * will be used as the default GPU Capture Scope during debugging in Xcode. + * + * The value of this parameter must be changed before creating a VkDevice, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to zero (the first queue). + */ + uint32_t defaultGPUCaptureScopeQueueIndex; + + /** + * Identifies when Metal shaders will be compiled with the Metal fastMathEnabled property + * enabled. For shaders compiled with the Metal fastMathEnabled property enabled, shader + * floating point math is significantly faster, but it may cause the Metal Compiler to + * optimize floating point operations in ways that may violate the IEEE 754 standard. + * + * Enabling Metal fast math can dramatically improve shader performance, and has little + * practical effect on the numerical accuracy of most shaders. As such, disabling fast + * math should be done carefully and deliberately. For most applications, always enabling + * fast math, by setting the value of this property to MVK_CONFIG_FAST_MATH_ALWAYS, + * is the preferred choice. + * + * Apps that have specific accuracy and handling needs for particular shaders, may elect to + * set the value of this property to MVK_CONFIG_FAST_MATH_ON_DEMAND, so that fast math will + * be disabled when compiling shaders that request capabilities such as SignedZeroInfNanPreserve. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will be applied to future Metal shader compilations. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_FAST_MATH_ENABLED + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to MVK_CONFIG_FAST_MATH_ALWAYS. + */ + MVKConfigFastMath fastMathEnabled; + + /** + * Controls the level of logging performned by MoltenVK. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect subsequent MoltenVK behaviour. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_LOG_LEVEL + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, errors and informational messages are logged. + */ + MVKConfigLogLevel logLevel; + + /** + * Causes MoltenVK to log the name of each Vulkan call made by the application, + * along with the Mach thread ID, global system thread ID, and thread name. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect subsequent MoltenVK behaviour. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_TRACE_VULKAN_CALLS + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, no Vulkan call logging will occur. + */ + MVKConfigTraceVulkanCalls traceVulkanCalls; + + /** + * Force MoltenVK to use a low-power GPU, if one is availble on the device. + * + * The value of this parameter must be changed before creating a VkInstance, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_FORCE_LOW_POWER_GPU + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this setting is disabled by default, allowing both + * low-power and high-power GPU's to be used. + */ + VkBool32 forceLowPowerGPU; + + /** Deprecated. Vulkan sempphores using MTLFence are no longer supported. Use semaphoreSupportStyle instead. */ + VkBool32 semaphoreUseMTLFence; + + /** + * Determines the style used to implement Vulkan semaphore (VkSemaphore) functionality in Metal. + * See the documentation of the MVKVkSemaphoreSupportStyle for the options. + * + * In the special case of VK_SEMAPHORE_TYPE_TIMELINE semaphores, MoltenVK will always use + * MTLSharedEvent if it is available on the platform, regardless of the value of this parameter. + * + * The value of this parameter must be changed before creating a VkInstance, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this setting is set to + * MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE by default, + * and MoltenVK will use MTLEvent, except on NVIDIA GPU and Rosetta2 environments, + * or where MTLEvents are not supported, where it will use a single queue with + * implicit synchronization (as if this parameter was set to + * MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE). + * + * This parameter interacts with the deprecated legacy parameters semaphoreUseMTLEvent + * and semaphoreUseMTLFence. If semaphoreUseMTLEvent is enabled, this parameter will be + * set to MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE. + * If semaphoreUseMTLEvent is disabled, this parameter will be set to + * MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE if semaphoreUseMTLFence is enabled, + * or MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_CALLBACK if semaphoreUseMTLFence is disabled. + * Structurally, this parameter replaces, and is aliased by, semaphoreUseMTLEvent. + */ + MVKVkSemaphoreSupportStyle semaphoreSupportStyle; +#define semaphoreUseMTLEvent semaphoreSupportStyle + + /** + * Controls whether Metal should run an automatic GPU capture without the user having to + * trigger it manually via the Xcode user interface, and controls the scope under which + * that GPU capture will occur. This is useful when trying to capture a one-shot GPU trace, + * such as when running a Vulkan CTS test case. For the automatic GPU capture to occur, the + * Xcode scheme under which the app is run must have the Metal GPU capture option enabled. + * This parameter should not be set to manually trigger a GPU capture via the Xcode user interface. + * + * When the value of this parameter is MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME, + * the queue for which the GPU activity is captured is identifed by the values of + * the defaultGPUCaptureScopeQueueFamilyIndex and defaultGPUCaptureScopeQueueIndex + * configuration parameters. + * + * The value of this parameter must be changed before creating a VkDevice, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, no automatic GPU capture will occur. + */ + MVKConfigAutoGPUCaptureScope autoGPUCaptureScope; + + /** + * The path to a file where the automatic GPU capture should be saved, if autoGPUCaptureScope + * is enabled. In this case, the Xcode scheme need not have Metal GPU capture enabled, and in + * fact the app need not be run under Xcode's control at all. This is useful in case the app + * cannot be run under Xcode's control. A path starting with '~' can be used to place it in a + * user's home directory, as in the shell. This feature requires Metal 3.0 (macOS 10.15, iOS 13). + * + * If this parameter is NULL or an empty string, and autoGPUCaptureScope is enabled, automatic + * GPU capture will be handled by the Xcode user interface. + * + * The value of this parameter must be changed before creating a VkDevice, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, automatic GPU capture will be handled by the Xcode user interface. + */ + const char* autoGPUCaptureOutputFilepath; + + /** + * Controls whether MoltenVK should use a Metal 2D texture with a height of 1 for a + * Vulkan 1D image, or use a native Metal 1D texture. Metal imposes significant restrictions + * on native 1D textures, including not being renderable, clearable, or permitting mipmaps. + * Using a Metal 2D texture allows Vulkan 1D textures to support this additional functionality. + * + * The value of this parameter should only be changed before creating the VkInstance. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_TEXTURE_1D_AS_2D + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this setting is enabled by default, and MoltenVK will + * use a Metal 2D texture for each Vulkan 1D image. + */ + VkBool32 texture1DAs2D; + + /** + * Controls whether MoltenVK should preallocate memory in each VkDescriptorPool according + * to the values of the VkDescriptorPoolSize parameters. Doing so may improve descriptor set + * allocation performance and memory stability at a cost of preallocated application memory. + * If this setting is disabled, the descriptors required for a descriptor set will be individually + * dynamically allocated in application memory when the descriptor set itself is allocated. + * + * The value of this parameter may be changed at any time during application runtime, and the + * changed value will affect the behavior of VkDescriptorPools created after the value is changed. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_PREALLOCATE_DESCRIPTORS + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this setting is enabled by default, and MoltenVK will + * allocate a pool of descriptors when a VkDescriptorPool is created. + */ + VkBool32 preallocateDescriptors; + + /** + * Controls whether MoltenVK should use pools to manage memory used when adding commands + * to command buffers. If this setting is enabled, MoltenVK will use a pool to hold command + * resources for reuse during command execution. If this setting is disabled, command memory + * is allocated and destroyed each time a command is executed. This is a classic time-space + * trade off. When command pooling is active, the memory in the pool can be cleared via a + * call to the vkTrimCommandPoolKHR() command. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will immediately effect behavior of VkCommandPools created + * after the setting is changed. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_USE_COMMAND_POOLING + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this setting is enabled by default, and MoltenVK will pool command memory. + */ + VkBool32 useCommandPooling; + + /** + * Controls whether MoltenVK should use MTLHeaps for allocating textures and buffers + * from device memory. If this setting is enabled, and placement MTLHeaps are + * available on the platform, MoltenVK will allocate a placement MTLHeap for each VkDeviceMemory + * instance, and allocate textures and buffers from that placement heap. If this environment + * variable is disabled, MoltenVK will allocate textures and buffers from general device memory. + * + * Apple recommends that MTLHeaps should only be used for specific requirements such as aliasing + * or hazard tracking, and MoltenVK testing has shown that allocating multiple textures of + * different types or usages from one MTLHeap can occassionally cause corruption issues under + * certain circumstances. + * + * The value of this parameter must be changed before creating a VkInstance, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_USE_MTLHEAP + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this setting is disabled by default, and MoltenVK + * will allocate texures and buffers from general device memory. + */ + VkBool32 useMTLHeap; + + /** + * Controls when MoltenVK should log activity performance events. + * + * The value of this parameter must be changed before creating a VkDevice, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this setting is set to + * MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT by default, + * and activity performance will be logged when frame activity is logged. + */ + MVKConfigActivityPerformanceLoggingStyle activityPerformanceLoggingStyle; +#define logActivityPerformanceInline activityPerformanceLoggingStyle + + /** + * Controls the Vulkan API version that MoltenVK should advertise in vkEnumerateInstanceVersion(). + * When reading this value, it will be one of the VK_API_VERSION_1_* values, including the latest + * VK_HEADER_VERSION component. When setting this value, it should be set to one of: + * + * VK_API_VERSION_1_2 (equivalent decimal number 4202496) + * VK_API_VERSION_1_1 (equivalent decimal number 4198400) + * VK_API_VERSION_1_0 (equivalent decimal number 4194304) + * + * MoltenVK will automatically add the VK_HEADER_VERSION component. + * + * The value of this parameter must be changed before creating a VkInstance, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_API_VERSION_TO_ADVERTISE + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this parameter defaults to the highest API version + * currently supported by MoltenVK, including the latest VK_HEADER_VERSION component. + */ + uint32_t apiVersionToAdvertise; + + /** + * Controls which extensions MoltenVK should advertise it supports in + * vkEnumerateInstanceExtensionProperties() and vkEnumerateDeviceExtensionProperties(). + * The value of this parameter is a bitwise OR of values from the MVKConfigAdvertiseExtensionBits + * enumeration. Any prerequisite extensions are also advertised. + * If the flag MVK_CONFIG_ADVERTISE_EXTENSIONS_ALL is included, all supported extensions + * will be advertised. A value of zero means no extensions will be advertised. + * + * The value of this parameter must be changed before creating a VkInstance, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_ADVERTISE_EXTENSIONS + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, the value of this setting defaults to + * MVK_CONFIG_ADVERTISE_EXTENSIONS_ALL, and all supported extensions will be advertised. + */ + MVKConfigAdvertiseExtensions advertiseExtensions; + + /** + * Controls whether MoltenVK should treat a lost VkDevice as resumable, unless the + * corresponding VkPhysicalDevice has also been lost. The VK_ERROR_DEVICE_LOST error has + * a broad definitional range, and can mean anything from a GPU hiccup on the current + * command buffer submission, to a physically removed GPU. In the case where this error does + * not impact the VkPhysicalDevice, Vulkan requires that the app destroy and re-create a new + * VkDevice. However, not all apps (including CTS) respect that requirement, leading to what + * might be a transient command submission failure causing an unexpected catastrophic app failure. + * + * If this setting is enabled, in the case of a VK_ERROR_DEVICE_LOST error that does NOT impact + * the VkPhysicalDevice, MoltenVK will log the error, but will not mark the VkDevice as lost, + * allowing the VkDevice to continue to be used. If this setting is disabled, MoltenVK will + * mark the VkDevice as lost, and subsequent use of that VkDevice will be reduced or prohibited. + * + * The value of this parameter may be changed at any time during application runtime, + * and the changed value will affect the error behavior of subsequent command submissions. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_RESUME_LOST_DEVICE + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this setting is disabled by default, and MoltenVK + * will mark the VkDevice as lost when a command submission failure occurs. + */ + VkBool32 resumeLostDevice; + + /** + * Controls whether MoltenVK should use Metal argument buffers for resources defined in + * descriptor sets, if Metal argument buffers are supported on the platform. Using Metal + * argument buffers dramatically increases the number of buffers, textures and samplers + * that can be bound to a pipeline shader, and in most cases improves performance. + * This setting is an enumeration that specifies the conditions under which MoltenVK + * will use Metal argument buffers. + * + * NOTE: Currently, Metal argument buffer support is in beta stage, and is only supported + * on macOS 11.0 (Big Sur) or later, or on older versions of macOS using an Intel GPU. + * Metal argument buffers support is not available on iOS. Development to support iOS + * and a wider combination of GPU's on older macOS versions is under way. + * + * The value of this parameter must be changed before creating a VkDevice, + * for the change to take effect. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this setting is set to + * MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER by default, + * and MoltenVK will not use Metal argument buffers. + */ + MVKUseMetalArgumentBuffers useMetalArgumentBuffers; + + /** + * Controls the type of compression to use on the MSL source code that is stored in memory + * for use in a pipeline cache. After being converted from SPIR-V, or loaded directly into + * a VkShaderModule, and then compiled into a MTLLibrary, the MSL source code is no longer + * needed for operation, but it is retained so it can be written out as part of a pipeline + * cache export. When a large number of shaders are loaded, this can consume significant + * memory. In such a case, this parameter can be used to compress the MSL source code that + * is awaiting export as part of a pipeline cache. + * + * Pipeline cache compression is available for macOS 10.15 and above, and iOS/tvOS 13.0 and above. + * + * The value of this parameter can be changed at any time, and will affect the size of + * the cached MSL from subsequent shader compilations. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this setting is set to + * MVK_CONFIG_COMPRESSION_ALGORITHM_NONE by default, + * and MoltenVK will not compress the MSL source code after compilation into a MTLLibrary. + */ + MVKConfigCompressionAlgorithm shaderSourceCompressionAlgorithm; + + /** + * Maximize the concurrent executing compilation tasks. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION + * runtime environment variable or MoltenVK compile-time build setting. + * This setting requires macOS 13.3 & is disabled by default. + */ + VkBool32 shouldMaximizeConcurrentCompilation; + +} MVKConfiguration; + + + +#pragma mark - +#pragma mark Function types + + typedef VkResult (VKAPI_PTR *PFN_vkGetMoltenVKConfigurationMVK)(VkInstance ignored, MVKConfiguration* pConfiguration, size_t* pConfigurationSize); + typedef VkResult (VKAPI_PTR *PFN_vkSetMoltenVKConfigurationMVK)(VkInstance ignored, const MVKConfiguration* pConfiguration, size_t* pConfigurationSize); + + +#pragma mark - +#pragma mark Function prototypes + +#ifndef VK_NO_PROTOTYPES + +/** + * Populates the pConfiguration structure with the current MoltenVK configuration settings. + * + * To change a specific configuration value, call vkGetMoltenVKConfigurationMVK() to retrieve + * the current configuration, make changes, and call vkSetMoltenVKConfigurationMVK() to + * update all of the values. + * + * The VkInstance object you provide here is ignored, and a VK_NULL_HANDLE value can be provided. + * This function can be called before the VkInstance has been created. It is safe to call this function + * with a VkInstance retrieved from a different layer in the Vulkan SDK Loader and Layers framework. + * + * To be active, some configuration settings must be set before a VkInstance or VkDevice + * is created. See the description of the MVKConfiguration members for more information. + * + * If you are linking to an implementation of MoltenVK that was compiled from a different + * MVK_CONFIGURATION_API_VERSION than your app was, the size of the MVKConfiguration structure + * in your app may be larger or smaller than the same struct as expected by MoltenVK. + * + * When calling this function, set the value of *pConfigurationSize to sizeof(MVKConfiguration), + * to tell MoltenVK the limit of the size of your MVKConfiguration structure. Upon return from + * this function, the value of *pConfigurationSize will hold the actual number of bytes copied + * into your passed MVKConfiguration structure, which will be the smaller of what your app + * thinks is the size of MVKConfiguration, and what MoltenVK thinks it is. This represents the + * safe access area within the structure for both MoltenVK and your app. + * + * If the size that MoltenVK expects for MVKConfiguration is different than the value passed in + * *pConfigurationSize, this function will return VK_INCOMPLETE, otherwise it will return VK_SUCCESS. + * + * Although it is not necessary, you can use this function to determine in advance the value + * that MoltenVK expects the size of MVKConfiguration to be by setting the value of pConfiguration + * to NULL. In that case, this function will set *pConfigurationSize to the size that MoltenVK + * expects MVKConfiguration to be. + */ +VKAPI_ATTR VkResult VKAPI_CALL vkGetMoltenVKConfigurationMVK( + VkInstance ignored, + MVKConfiguration* pConfiguration, + size_t* pConfigurationSize); + +/** + * Sets the MoltenVK configuration settings to those found in the pConfiguration structure. + * + * To change a specific configuration value, call vkGetMoltenVKConfigurationMVK() + * to retrieve the current configuration, make changes, and call + * vkSetMoltenVKConfigurationMVK() to update all of the values. + * + * The VkInstance object you provide here is ignored, and a VK_NULL_HANDLE value can be provided. + * This function can be called before the VkInstance has been created. It is safe to call this function + * with a VkInstance retrieved from a different layer in the Vulkan SDK Loader and Layers framework. + * + * To be active, some configuration settings must be set before a VkInstance or VkDevice + * is created. See the description of the MVKConfiguration members for more information. + * + * If you are linking to an implementation of MoltenVK that was compiled from a different + * MVK_CONFIGURATION_API_VERSION than your app was, the size of the MVKConfiguration structure + * in your app may be larger or smaller than the same struct as expected by MoltenVK. + * + * When calling this function, set the value of *pConfigurationSize to sizeof(MVKConfiguration), + * to tell MoltenVK the limit of the size of your MVKConfiguration structure. Upon return from + * this function, the value of *pConfigurationSize will hold the actual number of bytes copied + * out of your passed MVKConfiguration structure, which will be the smaller of what your app + * thinks is the size of MVKConfiguration, and what MoltenVK thinks it is. This represents the + * safe access area within the structure for both MoltenVK and your app. + * + * If the size that MoltenVK expects for MVKConfiguration is different than the value passed in + * *pConfigurationSize, this function will return VK_INCOMPLETE, otherwise it will return VK_SUCCESS. + * + * Although it is not necessary, you can use this function to determine in advance the value + * that MoltenVK expects the size of MVKConfiguration to be by setting the value of pConfiguration + * to NULL. In that case, this function will set *pConfigurationSize to the size that MoltenVK + * expects MVKConfiguration to be. + */ +VKAPI_ATTR VkResult VKAPI_CALL vkSetMoltenVKConfigurationMVK( + VkInstance ignored, + const MVKConfiguration* pConfiguration, + size_t* pConfigurationSize); + + +#pragma mark - +#pragma mark Shaders + + /** + * NOTE: Shader code should be submitted as SPIR-V. Although some simple direct MSL shaders may work, + * direct loading of MSL source code or compiled MSL code is not officially supported at this time. + * Future versions of MoltenVK may support direct MSL submission again. + * + * Enumerates the magic number values to set in the MVKMSLSPIRVHeader when + * submitting a SPIR-V stream that contains either Metal Shading Language source + * code or Metal Shading Language compiled binary code in place of SPIR-V code. + */ + typedef enum { + kMVKMagicNumberSPIRVCode = 0x07230203, /**< SPIR-V stream contains standard SPIR-V code. */ + kMVKMagicNumberMSLSourceCode = 0x19960412, /**< SPIR-V stream contains Metal Shading Language source code. */ + kMVKMagicNumberMSLCompiledCode = 0x19981215, /**< SPIR-V stream contains Metal Shading Language compiled binary code. */ + } MVKMSLMagicNumber; + + /** + * NOTE: Shader code should be submitted as SPIR-V. Although some simple direct MSL shaders may work, + * direct loading of MSL source code or compiled MSL code is not officially supported at this time. + * Future versions of MoltenVK may support direct MSL submission again. + * + * Describes the header at the start of an SPIR-V stream, when it contains either + * Metal Shading Language source code or Metal Shading Language compiled binary code. + * + * To submit MSL source code to the vkCreateShaderModule() function in place of SPIR-V + * code, prepend a MVKMSLSPIRVHeader containing the kMVKMagicNumberMSLSourceCode magic + * number to the MSL source code. The MSL source code must be null-terminated. + * + * To submit MSL compiled binary code to the vkCreateShaderModule() function in place of + * SPIR-V code, prepend a MVKMSLSPIRVHeader containing the kMVKMagicNumberMSLCompiledCode + * magic number to the MSL compiled binary code. + * + * In both cases, the pCode element of VkShaderModuleCreateInfo should pointer to the + * location of the MVKMSLSPIRVHeader, and the MSL code should start at the byte immediately + * after the MVKMSLSPIRVHeader. + * + * The codeSize element of VkShaderModuleCreateInfo should be set to the entire size of + * the submitted code memory, including the additional sizeof(MVKMSLSPIRVHeader) bytes + * taken up by the MVKMSLSPIRVHeader, and, in the case of MSL source code, including + * the null-terminator byte. + */ + typedef uint32_t MVKMSLSPIRVHeader; + + +#endif // VK_NO_PROTOTYPES + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif diff --git a/externals/vulkan-headers b/externals/vulkan-headers index bae9700cd9..85c2334e92 160000 --- a/externals/vulkan-headers +++ b/externals/vulkan-headers @@ -1 +1 @@ -Subproject commit bae9700cd9425541a0f6029957f005e5ad3ef660 +Subproject commit 85c2334e92e215cce34e8e0ed8b2dce4700f4a50 diff --git a/src/android/app/src/main/java/org/citra/citra_emu/features/settings/ui/SettingsFragmentPresenter.java b/src/android/app/src/main/java/org/citra/citra_emu/features/settings/ui/SettingsFragmentPresenter.java index d791931b3b..9f73e1ff25 100644 --- a/src/android/app/src/main/java/org/citra/citra_emu/features/settings/ui/SettingsFragmentPresenter.java +++ b/src/android/app/src/main/java/org/citra/citra_emu/features/settings/ui/SettingsFragmentPresenter.java @@ -359,6 +359,8 @@ public final class SettingsFragmentPresenter { SettingSection rendererSection = mSettings.getSection(Settings.SECTION_RENDERER); Setting graphicsApi = rendererSection.getSetting(SettingsFile.KEY_GRAPHICS_API); + Setting spirvShaderGen = rendererSection.getSetting(SettingsFile.KEY_SPIRV_SHADER_GEN); + Setting asyncShaders = rendererSection.getSetting(SettingsFile.KEY_ASYNC_SHADERS); Setting resolutionFactor = rendererSection.getSetting(SettingsFile.KEY_RESOLUTION_FACTOR); Setting filterMode = rendererSection.getSetting(SettingsFile.KEY_FILTER_MODE); Setting shadersAccurateMul = rendererSection.getSetting(SettingsFile.KEY_SHADERS_ACCURATE_MUL); @@ -377,6 +379,8 @@ public final class SettingsFragmentPresenter { sl.add(new HeaderSetting(null, null, R.string.renderer, 0)); sl.add(new SingleChoiceSetting(SettingsFile.KEY_GRAPHICS_API, Settings.SECTION_RENDERER, R.string.graphics_api, 0, R.array.graphicsApiNames, R.array.graphicsApiValues, 0, graphicsApi)); + sl.add(new CheckBoxSetting(SettingsFile.KEY_SPIRV_SHADER_GEN, Settings.SECTION_RENDERER, R.string.spirv_shader_gen, R.string.spirv_shader_gen_description, true, spirvShaderGen)); + sl.add(new CheckBoxSetting(SettingsFile.KEY_ASYNC_SHADERS, Settings.SECTION_RENDERER, R.string.async_shaders, R.string.async_shaders_description, false, asyncShaders)); sl.add(new SliderSetting(SettingsFile.KEY_RESOLUTION_FACTOR, Settings.SECTION_RENDERER, R.string.internal_resolution, R.string.internal_resolution_description, 1, 4, "x", 1, resolutionFactor)); sl.add(new CheckBoxSetting(SettingsFile.KEY_FILTER_MODE, Settings.SECTION_RENDERER, R.string.linear_filtering, R.string.linear_filtering_description, true, filterMode)); sl.add(new CheckBoxSetting(SettingsFile.KEY_SHADERS_ACCURATE_MUL, Settings.SECTION_RENDERER, R.string.shaders_accurate_mul, R.string.shaders_accurate_mul_description, false, shadersAccurateMul)); @@ -424,6 +428,6 @@ public final class SettingsFragmentPresenter { sl.add(new CheckBoxSetting(SettingsFile.KEY_CPU_JIT, Settings.SECTION_CORE, R.string.cpu_jit, R.string.cpu_jit_description, true, useCpuJit, true, mView)); sl.add(new CheckBoxSetting(SettingsFile.KEY_HW_SHADER, Settings.SECTION_RENDERER, R.string.hw_shaders, R.string.hw_shaders_description, true, hardwareShader, true, mView)); sl.add(new CheckBoxSetting(SettingsFile.KEY_USE_VSYNC, Settings.SECTION_RENDERER, R.string.vsync, R.string.vsync_description, true, vsyncEnable)); - sl.add(new CheckBoxSetting(SettingsFile.KEY_RENDERER_DEBUG, Settings.SECTION_RENDERER, R.string.renderer_debug, R.string.renderer_debug_description, false, rendererDebug)); + sl.add(new CheckBoxSetting(SettingsFile.KEY_RENDERER_DEBUG, Settings.SECTION_DEBUG, R.string.renderer_debug, R.string.renderer_debug_description, false, rendererDebug)); } } diff --git a/src/android/app/src/main/java/org/citra/citra_emu/features/settings/utils/SettingsFile.java b/src/android/app/src/main/java/org/citra/citra_emu/features/settings/utils/SettingsFile.java index f1adea163f..fec8f32826 100644 --- a/src/android/app/src/main/java/org/citra/citra_emu/features/settings/utils/SettingsFile.java +++ b/src/android/app/src/main/java/org/citra/citra_emu/features/settings/utils/SettingsFile.java @@ -45,6 +45,8 @@ public final class SettingsFile { public static final String KEY_PREMIUM = "premium"; public static final String KEY_GRAPHICS_API = "graphics_api"; + public static final String KEY_SPIRV_SHADER_GEN = "spirv_shader_gen"; + public static final String KEY_ASYNC_SHADERS = "async_shader_compilation"; public static final String KEY_RENDERER_DEBUG = "renderer_debug"; public static final String KEY_HW_SHADER = "use_hw_shader"; public static final String KEY_SHADERS_ACCURATE_MUL = "shaders_accurate_mul"; diff --git a/src/android/app/src/main/jni/CMakeLists.txt b/src/android/app/src/main/jni/CMakeLists.txt index 531704490c..7cc6304c1c 100644 --- a/src/android/app/src/main/jni/CMakeLists.txt +++ b/src/android/app/src/main/jni/CMakeLists.txt @@ -19,6 +19,10 @@ add_library(citra-android SHARED default_ini.h emu_window/emu_window.cpp emu_window/emu_window.h + emu_window/emu_window_gl.cpp + emu_window/emu_window_gl.h + emu_window/emu_window_vk.cpp + emu_window/emu_window_vk.h game_info.cpp game_settings.cpp game_settings.h @@ -30,7 +34,7 @@ add_library(citra-android SHARED ndk_motion.h ) -target_link_libraries(citra-android PRIVATE audio_core citra_common citra_core input_common network) +target_link_libraries(citra-android PRIVATE audio_core citra_common citra_core input_common network adrenotools) target_link_libraries(citra-android PRIVATE android camera2ndk EGL glad inih jnigraphics log mediandk yuv) set(CPACK_PACKAGE_EXECUTABLES ${CPACK_PACKAGE_EXECUTABLES} citra-android) diff --git a/src/android/app/src/main/jni/config.cpp b/src/android/app/src/main/jni/config.cpp index 7efdeefcde..e4c25fdf2c 100644 --- a/src/android/app/src/main/jni/config.cpp +++ b/src/android/app/src/main/jni/config.cpp @@ -147,6 +147,9 @@ void Config::ReadValues() { Settings::values.shaders_accurate_mul = sdl2_config->GetBoolean("Renderer", "shaders_accurate_mul", false); ReadSetting("Renderer", Settings::values.graphics_api); + ReadSetting("Renderer", Settings::values.async_presentation); + ReadSetting("Renderer", Settings::values.async_shader_compilation); + ReadSetting("Renderer", Settings::values.spirv_shader_gen); ReadSetting("Renderer", Settings::values.use_hw_shader); ReadSetting("Renderer", Settings::values.use_shader_jit); ReadSetting("Renderer", Settings::values.resolution_factor); diff --git a/src/android/app/src/main/jni/default_ini.h b/src/android/app/src/main/jni/default_ini.h index 68e356b6f8..8718b264bd 100644 --- a/src/android/app/src/main/jni/default_ini.h +++ b/src/android/app/src/main/jni/default_ini.h @@ -99,9 +99,17 @@ cpu_clock_percentage = [Renderer] # Whether to render using OpenGL -# 1: OpenGLES (default) +# 1: OpenGL ES (default), 2: Vulkan graphics_api = +# Whether to compile shaders on multiple worker threads (Vulkan only) +# 0: Off, 1: On (default) +async_shader_compilation = + +# Whether to emit PICA fragment shader using SPIRV or GLSL (Vulkan only) +# 0: GLSL, 1: SPIR-V (default) +spirv_shader_gen = + # Whether to use hardware shaders to emulate 3DS shaders # 0: Software, 1 (default): Hardware use_hw_shader = diff --git a/src/android/app/src/main/jni/emu_window/emu_window.cpp b/src/android/app/src/main/jni/emu_window/emu_window.cpp index c2cc91ef56..5e53a92828 100644 --- a/src/android/app/src/main/jni/emu_window/emu_window.cpp +++ b/src/android/app/src/main/jni/emu_window/emu_window.cpp @@ -6,10 +6,7 @@ #include #include #include - #include -#include - #include "common/logging/log.h" #include "common/settings.h" #include "input_common/main.h" @@ -20,52 +17,6 @@ #include "video_core/renderer_base.h" #include "video_core/video_core.h" -static constexpr std::array egl_attribs{EGL_SURFACE_TYPE, - EGL_WINDOW_BIT, - EGL_RENDERABLE_TYPE, - EGL_OPENGL_ES3_BIT_KHR, - EGL_BLUE_SIZE, - 8, - EGL_GREEN_SIZE, - 8, - EGL_RED_SIZE, - 8, - EGL_DEPTH_SIZE, - 0, - EGL_STENCIL_SIZE, - 0, - EGL_NONE}; -static constexpr std::array egl_empty_attribs{EGL_WIDTH, 1, EGL_HEIGHT, 1, EGL_NONE}; -static constexpr std::array egl_context_attribs{EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE}; - -SharedContext_Android::SharedContext_Android(EGLDisplay egl_display, EGLConfig egl_config, - EGLContext egl_share_context) - : egl_display{egl_display}, egl_surface{eglCreatePbufferSurface(egl_display, egl_config, - egl_empty_attribs.data())}, - egl_context{eglCreateContext(egl_display, egl_config, egl_share_context, - egl_context_attribs.data())} { - ASSERT_MSG(egl_surface, "eglCreatePbufferSurface() failed!"); - ASSERT_MSG(egl_context, "eglCreateContext() failed!"); -} - -SharedContext_Android::~SharedContext_Android() { - if (!eglDestroySurface(egl_display, egl_surface)) { - LOG_CRITICAL(Frontend, "eglDestroySurface() failed"); - } - - if (!eglDestroyContext(egl_display, egl_context)) { - LOG_CRITICAL(Frontend, "eglDestroySurface() failed"); - } -} - -void SharedContext_Android::MakeCurrent() { - eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context); -} - -void SharedContext_Android::DoneCurrent() { - eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); -} - static bool IsPortraitMode() { return JNI_FALSE != IDCache::GetEnvForThread()->CallStaticBooleanMethod( IDCache::GetNativeLibraryClass(), IDCache::GetIsPortraitMode()); @@ -79,7 +30,12 @@ static void UpdateLandscapeScreenLayout() { void EmuWindow_Android::OnSurfaceChanged(ANativeWindow* surface) { render_window = surface; + + window_info.type = Frontend::WindowSystemType::Android; + window_info.render_surface = surface; + StopPresenting(); + OnFramebufferSizeChanged(); } bool EmuWindow_Android::OnTouchEvent(int x, int y, bool pressed) { @@ -98,6 +54,7 @@ void EmuWindow_Android::OnTouchMoved(int x, int y) { void EmuWindow_Android::OnFramebufferSizeChanged() { UpdateLandscapeScreenLayout(); const bool is_portrait_mode{IsPortraitMode()}; + const int bigger{window_width > window_height ? window_width : window_height}; const int smaller{window_width < window_height ? window_width : window_height}; if (is_portrait_mode) { @@ -107,7 +64,7 @@ void EmuWindow_Android::OnFramebufferSizeChanged() { } } -EmuWindow_Android::EmuWindow_Android(ANativeWindow* surface) { +EmuWindow_Android::EmuWindow_Android(ANativeWindow* surface) : host_window{surface} { LOG_DEBUG(Frontend, "Initializing EmuWindow_Android"); if (!surface) { @@ -115,108 +72,10 @@ EmuWindow_Android::EmuWindow_Android(ANativeWindow* surface) { return; } + window_width = ANativeWindow_getWidth(surface); + window_height = ANativeWindow_getHeight(surface); + Network::Init(); - - host_window = surface; - - if (egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); egl_display == EGL_NO_DISPLAY) { - LOG_CRITICAL(Frontend, "eglGetDisplay() failed"); - return; - } - if (eglInitialize(egl_display, 0, 0) != EGL_TRUE) { - LOG_CRITICAL(Frontend, "eglInitialize() failed"); - return; - } - if (EGLint egl_num_configs{}; eglChooseConfig(egl_display, egl_attribs.data(), &egl_config, 1, - &egl_num_configs) != EGL_TRUE) { - LOG_CRITICAL(Frontend, "eglChooseConfig() failed"); - return; - } - - CreateWindowSurface(); - - if (eglQuerySurface(egl_display, egl_surface, EGL_WIDTH, &window_width) != EGL_TRUE) { - return; - } - if (eglQuerySurface(egl_display, egl_surface, EGL_HEIGHT, &window_height) != EGL_TRUE) { - return; - } - - if (egl_context = eglCreateContext(egl_display, egl_config, 0, egl_context_attribs.data()); - egl_context == EGL_NO_CONTEXT) { - LOG_CRITICAL(Frontend, "eglCreateContext() failed"); - return; - } - if (eglSurfaceAttrib(egl_display, egl_surface, EGL_SWAP_BEHAVIOR, EGL_BUFFER_DESTROYED) != - EGL_TRUE) { - LOG_CRITICAL(Frontend, "eglSurfaceAttrib() failed"); - return; - } - if (core_context = CreateSharedContext(); !core_context) { - LOG_CRITICAL(Frontend, "CreateSharedContext() failed"); - return; - } - if (eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context) != EGL_TRUE) { - LOG_CRITICAL(Frontend, "eglMakeCurrent() failed"); - return; - } - if (!gladLoadGLES2Loader((GLADloadproc)eglGetProcAddress)) { - LOG_CRITICAL(Frontend, "gladLoadGLES2Loader() failed"); - return; - } - if (!eglSwapInterval(egl_display, Settings::values.use_vsync_new ? 1 : 0)) { - LOG_CRITICAL(Frontend, "eglSwapInterval() failed"); - return; - } - - OnFramebufferSizeChanged(); -} - -bool EmuWindow_Android::CreateWindowSurface() { - if (!host_window) { - return true; - } - - EGLint format{}; - eglGetConfigAttrib(egl_display, egl_config, EGL_NATIVE_VISUAL_ID, &format); - ANativeWindow_setBuffersGeometry(host_window, 0, 0, format); - - if (egl_surface = eglCreateWindowSurface(egl_display, egl_config, host_window, 0); - egl_surface == EGL_NO_SURFACE) { - return {}; - } - - return !!egl_surface; -} - -void EmuWindow_Android::DestroyWindowSurface() { - if (!egl_surface) { - return; - } - if (eglGetCurrentSurface(EGL_DRAW) == egl_surface) { - eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); - } - if (!eglDestroySurface(egl_display, egl_surface)) { - LOG_CRITICAL(Frontend, "eglDestroySurface() failed"); - } - egl_surface = EGL_NO_SURFACE; -} - -void EmuWindow_Android::DestroyContext() { - if (!egl_context) { - return; - } - if (eglGetCurrentContext() == egl_context) { - eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); - } - if (!eglDestroyContext(egl_display, egl_context)) { - LOG_CRITICAL(Frontend, "eglDestroySurface() failed"); - } - if (!eglTerminate(egl_display)) { - LOG_CRITICAL(Frontend, "eglTerminate() failed"); - } - egl_context = EGL_NO_CONTEXT; - egl_display = EGL_NO_DISPLAY; } EmuWindow_Android::~EmuWindow_Android() { @@ -224,48 +83,6 @@ EmuWindow_Android::~EmuWindow_Android() { DestroyContext(); } -std::unique_ptr EmuWindow_Android::CreateSharedContext() const { - return std::make_unique(egl_display, egl_config, egl_context); -} - -void EmuWindow_Android::StopPresenting() { - if (presenting_state == PresentingState::Running) { - eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); - } - presenting_state = PresentingState::Stopped; -} - -void EmuWindow_Android::TryPresenting() { - if (presenting_state != PresentingState::Running) { - if (presenting_state == PresentingState::Initial) { - eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); - presenting_state = PresentingState::Running; - } else { - return; - } - } - eglSwapInterval(egl_display, Settings::values.use_vsync_new ? 1 : 0); - if (VideoCore::g_renderer) { - VideoCore::g_renderer->TryPresent(0); - eglSwapBuffers(egl_display, egl_surface); - } -} - -void EmuWindow_Android::PollEvents() { - if (!render_window) { - return; - } - - host_window = render_window; - render_window = nullptr; - - DestroyWindowSurface(); - CreateWindowSurface(); - OnFramebufferSizeChanged(); - presenting_state = PresentingState::Initial; -} - void EmuWindow_Android::MakeCurrent() { core_context->MakeCurrent(); } diff --git a/src/android/app/src/main/jni/emu_window/emu_window.h b/src/android/app/src/main/jni/emu_window/emu_window.h index 10a293c969..3dd9f30df2 100644 --- a/src/android/app/src/main/jni/emu_window/emu_window.h +++ b/src/android/app/src/main/jni/emu_window/emu_window.h @@ -5,38 +5,13 @@ #pragma once #include - -#include -#include - #include "core/frontend/emu_window.h" -struct ANativeWindow; - -class SharedContext_Android : public Frontend::GraphicsContext { -public: - SharedContext_Android(EGLDisplay egl_display, EGLConfig egl_config, - EGLContext egl_share_context); - - ~SharedContext_Android() override; - - void MakeCurrent() override; - - void DoneCurrent() override; - -private: - EGLDisplay egl_display{}; - EGLSurface egl_surface{}; - EGLContext egl_context{}; -}; - class EmuWindow_Android : public Frontend::EmuWindow { public: EmuWindow_Android(ANativeWindow* surface); ~EmuWindow_Android(); - void Present(); - /// Called by the onSurfaceChanges() method to change the surface void OnSurfaceChanged(ANativeWindow* surface); @@ -46,38 +21,34 @@ public: /// Handles movement of touch pointer void OnTouchMoved(int x, int y); - void PollEvents() override; void MakeCurrent() override; + void DoneCurrent() override; - void TryPresenting(); - void StopPresenting(); + virtual void TryPresenting() {} - std::unique_ptr CreateSharedContext() const override; + virtual void StopPresenting() {} -private: +protected: void OnFramebufferSizeChanged(); - bool CreateWindowSurface(); - void DestroyWindowSurface(); - void DestroyContext(); + /// Creates the API specific window surface + virtual bool CreateWindowSurface() { + return false; + } + + /// Destroys the API specific window surface + virtual void DestroyWindowSurface() {} + + /// Destroys the graphics context + virtual void DestroyContext() {} + +protected: ANativeWindow* render_window{}; ANativeWindow* host_window{}; int window_width{}; int window_height{}; - EGLConfig egl_config; - EGLSurface egl_surface{}; - EGLContext egl_context{}; - EGLDisplay egl_display{}; - std::unique_ptr core_context; - - enum class PresentingState { - Initial, - Running, - Stopped, - }; - PresentingState presenting_state{}; }; diff --git a/src/android/app/src/main/jni/emu_window/emu_window_gl.cpp b/src/android/app/src/main/jni/emu_window/emu_window_gl.cpp new file mode 100644 index 0000000000..b910c3184a --- /dev/null +++ b/src/android/app/src/main/jni/emu_window/emu_window_gl.cpp @@ -0,0 +1,215 @@ +// Copyright 2019 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include +#include + +#include "common/logging/log.h" +#include "common/settings.h" +#include "input_common/main.h" +#include "jni/emu_window/emu_window_gl.h" +#include "video_core/renderer_base.h" +#include "video_core/video_core.h" + +static constexpr std::array egl_attribs{EGL_SURFACE_TYPE, + EGL_WINDOW_BIT, + EGL_RENDERABLE_TYPE, + EGL_OPENGL_ES3_BIT_KHR, + EGL_BLUE_SIZE, + 8, + EGL_GREEN_SIZE, + 8, + EGL_RED_SIZE, + 8, + EGL_DEPTH_SIZE, + 0, + EGL_STENCIL_SIZE, + 0, + EGL_NONE}; +static constexpr std::array egl_empty_attribs{EGL_WIDTH, 1, EGL_HEIGHT, 1, EGL_NONE}; +static constexpr std::array egl_context_attribs{EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE}; + +class SharedContext_Android : public Frontend::GraphicsContext { +public: + SharedContext_Android(EGLDisplay egl_display, EGLConfig egl_config, + EGLContext egl_share_context) + : egl_display{egl_display}, egl_surface{eglCreatePbufferSurface(egl_display, egl_config, + egl_empty_attribs.data())}, + egl_context{eglCreateContext(egl_display, egl_config, egl_share_context, + egl_context_attribs.data())} { + ASSERT_MSG(egl_surface, "eglCreatePbufferSurface() failed!"); + ASSERT_MSG(egl_context, "eglCreateContext() failed!"); + } + + ~SharedContext_Android() override { + if (!eglDestroySurface(egl_display, egl_surface)) { + LOG_CRITICAL(Frontend, "eglDestroySurface() failed"); + } + + if (!eglDestroyContext(egl_display, egl_context)) { + LOG_CRITICAL(Frontend, "eglDestroySurface() failed"); + } + } + + void MakeCurrent() override { + eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context); + } + + void DoneCurrent() override { + eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + } + +private: + EGLDisplay egl_display{}; + EGLSurface egl_surface{}; + EGLContext egl_context{}; +}; + +EmuWindow_Android_OpenGL::EmuWindow_Android_OpenGL(ANativeWindow* surface) + : EmuWindow_Android{surface} { + if (egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); egl_display == EGL_NO_DISPLAY) { + LOG_CRITICAL(Frontend, "eglGetDisplay() failed"); + return; + } + if (eglInitialize(egl_display, 0, 0) != EGL_TRUE) { + LOG_CRITICAL(Frontend, "eglInitialize() failed"); + return; + } + if (EGLint egl_num_configs{}; eglChooseConfig(egl_display, egl_attribs.data(), &egl_config, 1, + &egl_num_configs) != EGL_TRUE) { + LOG_CRITICAL(Frontend, "eglChooseConfig() failed"); + return; + } + + CreateWindowSurface(); + + if (eglQuerySurface(egl_display, egl_surface, EGL_WIDTH, &window_width) != EGL_TRUE) { + return; + } + if (eglQuerySurface(egl_display, egl_surface, EGL_HEIGHT, &window_height) != EGL_TRUE) { + return; + } + + if (egl_context = eglCreateContext(egl_display, egl_config, 0, egl_context_attribs.data()); + egl_context == EGL_NO_CONTEXT) { + LOG_CRITICAL(Frontend, "eglCreateContext() failed"); + return; + } + if (eglSurfaceAttrib(egl_display, egl_surface, EGL_SWAP_BEHAVIOR, EGL_BUFFER_DESTROYED) != + EGL_TRUE) { + LOG_CRITICAL(Frontend, "eglSurfaceAttrib() failed"); + return; + } + if (core_context = CreateSharedContext(); !core_context) { + LOG_CRITICAL(Frontend, "CreateSharedContext() failed"); + return; + } + if (eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context) != EGL_TRUE) { + LOG_CRITICAL(Frontend, "eglMakeCurrent() failed"); + return; + } + if (!gladLoadGLES2Loader((GLADloadproc)eglGetProcAddress)) { + LOG_CRITICAL(Frontend, "gladLoadGLES2Loader() failed"); + return; + } + if (!eglSwapInterval(egl_display, Settings::values.use_vsync_new ? 1 : 0)) { + LOG_CRITICAL(Frontend, "eglSwapInterval() failed"); + return; + } + + OnFramebufferSizeChanged(); +} + +bool EmuWindow_Android_OpenGL::CreateWindowSurface() { + if (!host_window) { + return true; + } + + EGLint format{}; + eglGetConfigAttrib(egl_display, egl_config, EGL_NATIVE_VISUAL_ID, &format); + ANativeWindow_setBuffersGeometry(host_window, 0, 0, format); + + if (egl_surface = eglCreateWindowSurface(egl_display, egl_config, host_window, 0); + egl_surface == EGL_NO_SURFACE) { + return {}; + } + + return egl_surface; +} + +void EmuWindow_Android_OpenGL::DestroyWindowSurface() { + if (!egl_surface) { + return; + } + if (eglGetCurrentSurface(EGL_DRAW) == egl_surface) { + eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + } + if (!eglDestroySurface(egl_display, egl_surface)) { + LOG_CRITICAL(Frontend, "eglDestroySurface() failed"); + } + egl_surface = EGL_NO_SURFACE; +} + +void EmuWindow_Android_OpenGL::DestroyContext() { + if (!egl_context) { + return; + } + if (eglGetCurrentContext() == egl_context) { + eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + } + if (!eglDestroyContext(egl_display, egl_context)) { + LOG_CRITICAL(Frontend, "eglDestroySurface() failed"); + } + if (!eglTerminate(egl_display)) { + LOG_CRITICAL(Frontend, "eglTerminate() failed"); + } + egl_context = EGL_NO_CONTEXT; + egl_display = EGL_NO_DISPLAY; +} + +std::unique_ptr EmuWindow_Android_OpenGL::CreateSharedContext() const { + return std::make_unique(egl_display, egl_config, egl_context); +} + +void EmuWindow_Android_OpenGL::PollEvents() { + if (!render_window) { + return; + } + + host_window = render_window; + render_window = nullptr; + + DestroyWindowSurface(); + CreateWindowSurface(); + OnFramebufferSizeChanged(); + presenting_state = PresentingState::Initial; +} + +void EmuWindow_Android_OpenGL::StopPresenting() { + if (presenting_state == PresentingState::Running) { + eglMakeCurrent(egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + } + presenting_state = PresentingState::Stopped; +} + +void EmuWindow_Android_OpenGL::TryPresenting() { + if (presenting_state == PresentingState::Initial) [[unlikely]] { + eglMakeCurrent(egl_display, egl_surface, egl_surface, egl_context); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + presenting_state = PresentingState::Running; + } + if (presenting_state != PresentingState::Running) [[unlikely]] { + return; + } + eglSwapInterval(egl_display, Settings::values.use_vsync_new ? 1 : 0); + if (VideoCore::g_renderer) { + VideoCore::g_renderer->TryPresent(0); + eglSwapBuffers(egl_display, egl_surface); + } +} diff --git a/src/android/app/src/main/jni/emu_window/emu_window_gl.h b/src/android/app/src/main/jni/emu_window/emu_window_gl.h new file mode 100644 index 0000000000..f92950b94d --- /dev/null +++ b/src/android/app/src/main/jni/emu_window/emu_window_gl.h @@ -0,0 +1,44 @@ +// Copyright 2019 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include +#include + +#include "jni/emu_window/emu_window.h" + +struct ANativeWindow; + +class EmuWindow_Android_OpenGL : public EmuWindow_Android { +public: + EmuWindow_Android_OpenGL(ANativeWindow* surface); + ~EmuWindow_Android_OpenGL() override = default; + + void TryPresenting() override; + void StopPresenting() override; + void PollEvents() override; + + std::unique_ptr CreateSharedContext() const override; + +private: + bool CreateWindowSurface() override; + void DestroyWindowSurface() override; + void DestroyContext() override; + +private: + EGLConfig egl_config; + EGLSurface egl_surface{}; + EGLContext egl_context{}; + EGLDisplay egl_display{}; + + enum class PresentingState { + Initial, + Running, + Stopped, + }; + PresentingState presenting_state{}; +}; diff --git a/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp b/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp new file mode 100644 index 0000000000..81dc69c4a8 --- /dev/null +++ b/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp @@ -0,0 +1,53 @@ +// Copyright 2019 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/logging/log.h" +#include "common/settings.h" +#include "jni/emu_window/emu_window_vk.h" +#include "video_core/video_core.h" + +class GraphicsContext_Android final : public Frontend::GraphicsContext { +public: + explicit GraphicsContext_Android(std::shared_ptr driver_library_) + : driver_library{driver_library_} {} + + ~GraphicsContext_Android() = default; + + std::shared_ptr GetDriverLibrary() override { + return driver_library; + } + +private: + std::shared_ptr driver_library; +}; + +EmuWindow_Android_Vulkan::EmuWindow_Android_Vulkan( + ANativeWindow* surface, std::shared_ptr driver_library_) + : EmuWindow_Android{surface}, driver_library{driver_library_} { + CreateWindowSurface(); + + if (core_context = CreateSharedContext(); !core_context) { + LOG_CRITICAL(Frontend, "CreateSharedContext() failed"); + return; + } + + OnFramebufferSizeChanged(); +} + +bool EmuWindow_Android_Vulkan::CreateWindowSurface() { + if (!host_window) { + return true; + } + + window_info.type = Frontend::WindowSystemType::Android; + window_info.render_surface = host_window; + + return true; +} + +std::unique_ptr EmuWindow_Android_Vulkan::CreateSharedContext() const { + return std::make_unique(driver_library); +} diff --git a/src/android/app/src/main/jni/emu_window/emu_window_vk.h b/src/android/app/src/main/jni/emu_window/emu_window_vk.h new file mode 100644 index 0000000000..58bbd30924 --- /dev/null +++ b/src/android/app/src/main/jni/emu_window/emu_window_vk.h @@ -0,0 +1,26 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "jni/emu_window/emu_window.h" + +struct ANativeWindow; + +class EmuWindow_Android_Vulkan : public EmuWindow_Android { +public: + EmuWindow_Android_Vulkan(ANativeWindow* surface, + std::shared_ptr driver_library); + ~EmuWindow_Android_Vulkan() override = default; + + void PollEvents() override {} + + std::unique_ptr CreateSharedContext() const override; + +private: + bool CreateWindowSurface() override; + +private: + std::shared_ptr driver_library; +}; diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index 2a722f37d8..220151d117 100644 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -4,13 +4,16 @@ #include #include +#include #include #include #include "audio_core/dsp_interface.h" #include "common/aarch64/cpu_detect.h" +#include "common/arch.h" #include "common/common_paths.h" +#include "common/dynamic_library/dynamic_library.h" #include "common/file_util.h" #include "common/logging/backend.h" #include "common/logging/log.h" @@ -33,7 +36,8 @@ #include "jni/camera/ndk_camera.h" #include "jni/camera/still_image_camera.h" #include "jni/config.h" -#include "jni/emu_window/emu_window.h" +#include "jni/emu_window/emu_window_gl.h" +#include "jni/emu_window/emu_window_vk.h" #include "jni/game_settings.h" #include "jni/id_cache.h" #include "jni/input_manager.h" @@ -42,10 +46,15 @@ #include "video_core/renderer_base.h" #include "video_core/video_core.h" +#if CITRA_ARCH(arm64) +#include +#endif + namespace { ANativeWindow* s_surf; +std::shared_ptr vulkan_library{}; std::unique_ptr window; std::atomic stop_run{true}; @@ -123,11 +132,14 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) { const auto graphics_api = Settings::values.graphics_api.GetValue(); switch (graphics_api) { case Settings::GraphicsAPI::OpenGL: - window = std::make_unique(s_surf); + window = std::make_unique(s_surf); + break; + case Settings::GraphicsAPI::Vulkan: + window = std::make_unique(s_surf, vulkan_library); break; default: - LOG_CRITICAL(Frontend, "Unknown graphics API {}, using OpenGL", graphics_api); - window = std::make_unique(s_surf); + LOG_CRITICAL(Frontend, "Unknown graphics API {}, using Vulkan", graphics_api); + window = std::make_unique(s_surf, vulkan_library); } Core::System& system{Core::System::GetInstance()}; @@ -228,6 +240,37 @@ static Core::System::ResultStatus RunCitra(const std::string& filepath) { return Core::System::ResultStatus::Success; } +void InitializeGpuDriver(const std::string& hook_lib_dir, const std::string& custom_driver_dir, + const std::string& custom_driver_name, + const std::string& file_redirect_dir) { +#if CITRA_ARCH(arm64) + void* handle{}; + const char* file_redirect_dir_{}; + int featureFlags{}; + + // Enable driver file redirection when renderer debugging is enabled. + if (Settings::values.renderer_debug && file_redirect_dir.size()) { + featureFlags |= ADRENOTOOLS_DRIVER_FILE_REDIRECT; + file_redirect_dir_ = file_redirect_dir.c_str(); + } + + // Try to load a custom driver. + if (custom_driver_name.size()) { + handle = adrenotools_open_libvulkan( + RTLD_NOW, featureFlags | ADRENOTOOLS_DRIVER_CUSTOM, nullptr, hook_lib_dir.c_str(), + custom_driver_dir.c_str(), custom_driver_name.c_str(), file_redirect_dir_, nullptr); + } + + // Try to load the system driver. + if (!handle) { + handle = adrenotools_open_libvulkan(RTLD_NOW, featureFlags, nullptr, hook_lib_dir.c_str(), + nullptr, nullptr, file_redirect_dir_, nullptr); + } + + vulkan_library = std::make_shared(handle); +#endif +} + extern "C" { void Java_org_citra_citra_1emu_NativeLibrary_SurfaceChanged(JNIEnv* env, @@ -238,6 +281,9 @@ void Java_org_citra_citra_1emu_NativeLibrary_SurfaceChanged(JNIEnv* env, if (window) { window->OnSurfaceChanged(s_surf); } + if (VideoCore::g_renderer) { + VideoCore::g_renderer->NotifySurfaceChanged(); + } LOG_INFO(Frontend, "Surface changed"); } @@ -258,6 +304,15 @@ void Java_org_citra_citra_1emu_NativeLibrary_DoFrame(JNIEnv* env, [[maybe_unused window->TryPresenting(); } +void JNICALL Java_org_yuzu_yuzu_1emu_NativeLibrary_initializeGpuDriver(JNIEnv* env, jclass clazz, + jstring hook_lib_dir, + jstring custom_driver_dir, + jstring custom_driver_name, + jstring file_redirect_dir) { + InitializeGpuDriver(GetJString(env, hook_lib_dir), GetJString(env, custom_driver_dir), + GetJString(env, custom_driver_name), GetJString(env, file_redirect_dir)); +} + void Java_org_citra_citra_1emu_NativeLibrary_NotifyOrientationChange(JNIEnv* env, [[maybe_unused]] jclass clazz, jint layout_option, diff --git a/src/android/app/src/main/jniLibs/arm64-v8a/libVkLayer_khronos_validation.so b/src/android/app/src/main/jniLibs/arm64-v8a/libVkLayer_khronos_validation.so new file mode 100644 index 0000000000..b999455c4f Binary files /dev/null and b/src/android/app/src/main/jniLibs/arm64-v8a/libVkLayer_khronos_validation.so differ diff --git a/src/android/app/src/main/jniLibs/arm64-v8a/libc++_shared.so b/src/android/app/src/main/jniLibs/arm64-v8a/libc++_shared.so new file mode 100644 index 0000000000..65ac529d1d Binary files /dev/null and b/src/android/app/src/main/jniLibs/arm64-v8a/libc++_shared.so differ diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index 69ae0e0e93..444abd6cb2 100644 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -177,11 +177,13 @@ - OpenGLES + OpenGL ES + Vulkan 1 + 2 diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index d5def8d9f7..79af0abfac 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -74,6 +74,10 @@ Renderer Graphics API + Enable SPIR-V shader generation + Emits the fragment shader used to emulate PICA using SPIR-V instead of GLSL + Enable asynchronous shader compilation + Compiles shaders in the background to reduce stuttering during gameplay. When enabled expect temporary graphical glitches Debug Renderer Log additional graphics related debug information. When enabled, game performance will be significantly reduced. Enable V-Sync diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt index 4e20b14d4d..80cfc46e45 100644 --- a/src/citra/CMakeLists.txt +++ b/src/citra/CMakeLists.txt @@ -12,6 +12,8 @@ add_executable(citra emu_window/emu_window_sdl2_gl.h emu_window/emu_window_sdl2_sw.cpp emu_window/emu_window_sdl2_sw.h + emu_window/emu_window_sdl2_vk.cpp + emu_window/emu_window_sdl2_vk.h precompiled_headers.h resource.h ) diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp index 588f46403c..4029dd1e37 100644 --- a/src/citra/citra.cpp +++ b/src/citra/citra.cpp @@ -15,6 +15,7 @@ #include "citra/emu_window/emu_window_sdl2.h" #include "citra/emu_window/emu_window_sdl2_gl.h" #include "citra/emu_window/emu_window_sdl2_sw.h" +#include "citra/emu_window/emu_window_sdl2_vk.h" #include "common/common_paths.h" #include "common/detached_tasks.h" #include "common/file_util.h" @@ -351,6 +352,8 @@ int main(int argc, char** argv) { switch (Settings::values.graphics_api.GetValue()) { case Settings::GraphicsAPI::OpenGL: return std::make_unique(system, fullscreen, is_secondary); + case Settings::GraphicsAPI::Vulkan: + return std::make_unique(system, fullscreen, is_secondary); case Settings::GraphicsAPI::Software: return std::make_unique(system, fullscreen, is_secondary); } diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 5edaf42da5..9520a8f6db 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -133,6 +133,10 @@ void Config::ReadValues() { // Renderer ReadSetting("Renderer", Settings::values.graphics_api); + ReadSetting("Renderer", Settings::values.physical_device); + ReadSetting("Renderer", Settings::values.spirv_shader_gen); + ReadSetting("Renderer", Settings::values.async_shader_compilation); + ReadSetting("Renderer", Settings::values.async_presentation); ReadSetting("Renderer", Settings::values.use_gles); ReadSetting("Renderer", Settings::values.use_hw_shader); ReadSetting("Renderer", Settings::values.shaders_accurate_mul); diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index 941e17733f..6dd52b3723 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h @@ -99,7 +99,7 @@ cpu_clock_percentage = [Renderer] # Whether to render using OpenGL or Software -# 0: Software, 1: OpenGL (default) +# 0: Software, 1: OpenGL (default), 2: Vulkan graphics_api = # Whether to render using GLES or OpenGL diff --git a/src/citra/emu_window/emu_window_sdl2_vk.cpp b/src/citra/emu_window/emu_window_sdl2_vk.cpp new file mode 100644 index 0000000000..b7e46e1a36 --- /dev/null +++ b/src/citra/emu_window/emu_window_sdl2_vk.cpp @@ -0,0 +1,90 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include +#include +#include "citra/emu_window/emu_window_sdl2_vk.h" +#include "common/logging/log.h" +#include "common/scm_rev.h" +#include "core/frontend/emu_window.h" + +class DummyContext : public Frontend::GraphicsContext {}; + +EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen, bool is_secondary) + : EmuWindow_SDL2{system, is_secondary} { + const std::string window_title = fmt::format("Citra {} | {}-{}", Common::g_build_fullname, + Common::g_scm_branch, Common::g_scm_desc); + render_window = + SDL_CreateWindow(window_title.c_str(), + SDL_WINDOWPOS_UNDEFINED, // x position + SDL_WINDOWPOS_UNDEFINED, // y position + Core::kScreenTopWidth, Core::kScreenTopHeight + Core::kScreenBottomHeight, + SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI); + SDL_SysWMinfo wm; + SDL_VERSION(&wm.version); + if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) { + LOG_CRITICAL(Frontend, "Failed to get information from the window manager"); + std::exit(EXIT_FAILURE); + } + + if (fullscreen) { + Fullscreen(); + SDL_ShowCursor(false); + } + + switch (wm.subsystem) { +#ifdef SDL_VIDEO_DRIVER_WINDOWS + case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS: + window_info.type = Frontend::WindowSystemType::Windows; + window_info.render_surface = reinterpret_cast(wm.info.win.window); + break; +#endif +#ifdef SDL_VIDEO_DRIVER_X11 + case SDL_SYSWM_TYPE::SDL_SYSWM_X11: + window_info.type = Frontend::WindowSystemType::X11; + window_info.display_connection = wm.info.x11.display; + window_info.render_surface = reinterpret_cast(wm.info.x11.window); + break; +#endif +#ifdef SDL_VIDEO_DRIVER_WAYLAND + case SDL_SYSWM_TYPE::SDL_SYSWM_WAYLAND: + window_info.type = Frontend::WindowSystemType::Wayland; + window_info.display_connection = wm.info.wl.display; + window_info.render_surface = wm.info.wl.surface; + break; +#endif +#ifdef SDL_VIDEO_DRIVER_COCOA + case SDL_SYSWM_TYPE::SDL_SYSWM_COCOA: + window_info.type = Frontend::WindowSystemType::MacOS; + window_info.render_surface = SDL_Metal_GetLayer(SDL_Metal_CreateView(render_window)); + break; +#endif +#ifdef SDL_VIDEO_DRIVER_ANDROID + case SDL_SYSWM_TYPE::SDL_SYSWM_ANDROID: + window_info.type = Frontend::WindowSystemType::Android; + window_info.render_surface = reinterpret_cast(wm.info.android.window); + break; +#endif + default: + LOG_CRITICAL(Frontend, "Window manager subsystem {} not implemented", wm.subsystem); + std::exit(EXIT_FAILURE); + break; + } + + render_window_id = SDL_GetWindowID(render_window); + + OnResize(); + OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); + SDL_PumpEvents(); +} + +EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default; + +std::unique_ptr EmuWindow_SDL2_VK::CreateSharedContext() const { + return std::make_unique(); +} diff --git a/src/citra/emu_window/emu_window_sdl2_vk.h b/src/citra/emu_window/emu_window_sdl2_vk.h new file mode 100644 index 0000000000..be1cd13520 --- /dev/null +++ b/src/citra/emu_window/emu_window_sdl2_vk.h @@ -0,0 +1,24 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "citra/emu_window/emu_window_sdl2.h" + +namespace Frontend { +class GraphicsContext; +} + +namespace Core { +class System; +} + +class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { +public: + explicit EmuWindow_SDL2_VK(Core::System& system_, bool fullscreen, bool is_secondary); + ~EmuWindow_SDL2_VK() override; + + std::unique_ptr CreateSharedContext() const override; +}; diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index e04b71783b..b33729ffbc 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -185,6 +185,8 @@ add_executable(citra-qt util/spinbox.h util/util.cpp util/util.h + util/vk_device_info.cpp + util/vk_device_info.h ) file(GLOB COMPAT_LIST diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index 157ecea41b..e055332fe8 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -229,20 +229,10 @@ class RenderWidget : public QWidget { public: RenderWidget(GRenderWindow* parent) : QWidget(parent) { setMouseTracking(true); - } - - virtual ~RenderWidget() = default; - - virtual void Present() {} - - void paintEvent(QPaintEvent* event) override { - Present(); update(); } - std::pair GetSize() const { - return std::make_pair(width(), height()); - } + virtual ~RenderWidget() = default; }; #ifdef HAS_OPENGL @@ -262,7 +252,7 @@ public: context = std::move(context_); } - void Present() override { + void Present() { if (!isVisible()) { return; } @@ -278,6 +268,11 @@ public: glFinish(); } + void paintEvent(QPaintEvent* event) override { + Present(); + update(); + } + QPaintEngine* paintEngine() const override { return nullptr; } @@ -289,11 +284,27 @@ private: }; #endif +class VulkanRenderWidget : public RenderWidget { +public: + explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) { + setAttribute(Qt::WA_NativeWindow); + setAttribute(Qt::WA_PaintOnScreen); + if (GetWindowSystemType() == Frontend::WindowSystemType::Wayland) { + setAttribute(Qt::WA_DontCreateNativeAncestors); + } + windowHandle()->setSurfaceType(QWindow::VulkanSurface); + } + + QPaintEngine* paintEngine() const override { + return nullptr; + } +}; + struct SoftwareRenderWidget : public RenderWidget { explicit SoftwareRenderWidget(GRenderWindow* parent, Core::System& system_) : RenderWidget(parent), system(system_) {} - void Present() override { + void Present() { if (!isVisible()) { return; } @@ -323,6 +334,11 @@ struct SoftwareRenderWidget : public RenderWidget { painter.end(); } + void paintEvent(QPaintEvent* event) override { + Present(); + update(); + } + QImage LoadFramebuffer(VideoCore::ScreenId screen_id) { const auto& renderer = static_cast(system.Renderer()); const auto& info = renderer.Screen(screen_id); @@ -601,6 +617,9 @@ bool GRenderWindow::InitRenderTarget() { return false; } break; + case Settings::GraphicsAPI::Vulkan: + InitializeVulkan(); + break; } // Update the Window System information with the new render target @@ -686,6 +705,13 @@ bool GRenderWindow::InitializeOpenGL() { #endif } +void GRenderWindow::InitializeVulkan() { + auto child = new VulkanRenderWidget(this); + child_widget = child; + child_widget->windowHandle()->create(); + main_context = std::make_unique(); +} + void GRenderWindow::InitializeSoftware() { child_widget = new SoftwareRenderWidget(this, system); main_context = std::make_unique(); diff --git a/src/citra_qt/bootmanager.h b/src/citra_qt/bootmanager.h index 1689eebb7a..ce96b313f3 100644 --- a/src/citra_qt/bootmanager.h +++ b/src/citra_qt/bootmanager.h @@ -187,6 +187,7 @@ private: void OnMinimalClientAreaChangeRequest(std::pair minimal_size) override; bool InitializeOpenGL(); + void InitializeVulkan(); void InitializeSoftware(); bool LoadOpenGL(); diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index 7f2059cc24..ce9fcadd1f 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -483,6 +483,7 @@ void Config::ReadDebuggingValues() { ReadBasicSetting(Settings::values.use_gdbstub); ReadBasicSetting(Settings::values.gdbstub_port); ReadBasicSetting(Settings::values.renderer_debug); + ReadBasicSetting(Settings::values.dump_command_buffers); qt_config->beginGroup(QStringLiteral("LLE")); for (const auto& service_module : Service::service_module_map) { @@ -627,6 +628,10 @@ void Config::ReadRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); ReadGlobalSetting(Settings::values.graphics_api); + ReadGlobalSetting(Settings::values.physical_device); + ReadGlobalSetting(Settings::values.spirv_shader_gen); + ReadGlobalSetting(Settings::values.async_shader_compilation); + ReadGlobalSetting(Settings::values.async_presentation); ReadGlobalSetting(Settings::values.use_hw_shader); ReadGlobalSetting(Settings::values.shaders_accurate_mul); ReadGlobalSetting(Settings::values.use_disk_shader_cache); @@ -1107,6 +1112,10 @@ void Config::SaveRendererValues() { qt_config->beginGroup(QStringLiteral("Renderer")); WriteGlobalSetting(Settings::values.graphics_api); + WriteGlobalSetting(Settings::values.physical_device); + WriteGlobalSetting(Settings::values.spirv_shader_gen); + WriteGlobalSetting(Settings::values.async_shader_compilation); + WriteGlobalSetting(Settings::values.async_presentation); WriteGlobalSetting(Settings::values.use_hw_shader); WriteGlobalSetting(Settings::values.shaders_accurate_mul); WriteGlobalSetting(Settings::values.use_disk_shader_cache); diff --git a/src/citra_qt/configuration/configure_debug.cpp b/src/citra_qt/configuration/configure_debug.cpp index 3e166834ba..98d95c9e03 100644 --- a/src/citra_qt/configuration/configure_debug.cpp +++ b/src/citra_qt/configuration/configure_debug.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include "citra_qt/configuration/configuration_shared.h" #include "citra_qt/configuration/configure_debug.h" @@ -12,6 +13,7 @@ #include "common/logging/backend.h" #include "common/settings.h" #include "ui_configure_debug.h" +#include "video_core/renderer_vulkan/vk_instance.h" // The QSlider doesn't have an easy way to set a custom step amount, // so we can just convert from the sliders range (0 - 79) to the expected @@ -34,8 +36,39 @@ ConfigureDebug::ConfigureDebug(bool is_powered_on_, QWidget* parent) QDesktopServices::openUrl(QUrl::fromLocalFile(path)); }); + connect(ui->toggle_renderer_debug, &QCheckBox::clicked, this, [this](bool checked) { + if (checked && Settings::values.graphics_api.GetValue() == Settings::GraphicsAPI::Vulkan) { + try { + Vulkan::Instance debug_inst{true}; + } catch (vk::LayerNotPresentError&) { + ui->toggle_renderer_debug->toggle(); + QMessageBox::warning(this, tr("Validation layer not available"), + tr("Unable to enable debug renderer because the layer " + "VK_LAYER_KHRONOS_validation is missing. " + "Please install the Vulkan SDK or the appropriate package " + "of your distribution")); + } + } + }); + + connect(ui->toggle_dump_command_buffers, &QCheckBox::clicked, this, [this](bool checked) { + if (checked && Settings::values.graphics_api.GetValue() == Settings::GraphicsAPI::Vulkan) { + try { + Vulkan::Instance debug_inst{false, true}; + } catch (vk::LayerNotPresentError&) { + ui->toggle_dump_command_buffers->toggle(); + QMessageBox::warning(this, tr("Command buffer dumping not available"), + tr("Unable to enable command buffer dumping because the layer " + "VK_LAYER_LUNARG_api_dump is missing. " + "Please install the Vulkan SDK or the appropriate package " + "of your distribution")); + } + } + }); + ui->toggle_cpu_jit->setEnabled(!is_powered_on); ui->toggle_renderer_debug->setEnabled(!is_powered_on); + ui->toggle_dump_command_buffers->setEnabled(!is_powered_on); // Set a minimum width for the label to prevent the slider from changing size. // This scales across DPIs. (This value should be enough for "xxx%") @@ -62,6 +95,7 @@ void ConfigureDebug::SetConfiguration() { ui->log_filter_edit->setText(QString::fromStdString(Settings::values.log_filter.GetValue())); ui->toggle_cpu_jit->setChecked(Settings::values.use_cpu_jit.GetValue()); ui->toggle_renderer_debug->setChecked(Settings::values.renderer_debug.GetValue()); + ui->toggle_dump_command_buffers->setChecked(Settings::values.dump_command_buffers.GetValue()); if (!Settings::IsConfiguringGlobal()) { if (Settings::values.cpu_clock_percentage.UsingGlobal()) { @@ -92,6 +126,7 @@ void ConfigureDebug::ApplyConfiguration() { Common::Log::SetGlobalFilter(filter); Settings::values.use_cpu_jit = ui->toggle_cpu_jit->isChecked(); Settings::values.renderer_debug = ui->toggle_renderer_debug->isChecked(); + Settings::values.dump_command_buffers = ui->toggle_dump_command_buffers->isChecked(); ConfigurationShared::ApplyPerGameSetting( &Settings::values.cpu_clock_percentage, ui->clock_speed_combo, diff --git a/src/citra_qt/configuration/configure_debug.ui b/src/citra_qt/configuration/configure_debug.ui index afdbc2b8fc..20b84441cb 100644 --- a/src/citra_qt/configuration/configure_debug.ui +++ b/src/citra_qt/configuration/configure_debug.ui @@ -7,7 +7,7 @@ 0 0 523 - 447 + 458 @@ -112,16 +112,6 @@ CPU - - - - <html><head/><body><p>Enables the use of the ARM JIT compiler for emulating the 3DS CPUs. Don't disable unless for debugging purposes</p></body></html> - - - Enable CPU JIT - - - @@ -202,6 +192,16 @@ + + + + <html><head/><body><p>Enables the use of the ARM JIT compiler for emulating the 3DS CPUs. Don't disable unless for debugging purposes</p></body></html> + + + Enable CPU JIT + + + @@ -209,6 +209,13 @@ + + + + Dump command buffers + + + diff --git a/src/citra_qt/configuration/configure_dialog.cpp b/src/citra_qt/configuration/configure_dialog.cpp index ee46ff5fbe..1e19f5b3c2 100644 --- a/src/citra_qt/configuration/configure_dialog.cpp +++ b/src/citra_qt/configuration/configure_dialog.cpp @@ -23,14 +23,14 @@ #include "ui_configure.h" ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry_, Core::System& system_, - bool enable_web_config) + std::span physical_devices, bool enable_web_config) : QDialog(parent), ui{std::make_unique()}, registry{registry_}, system{system_}, is_powered_on{system.IsPoweredOn()}, general_tab{std::make_unique(this)}, system_tab{std::make_unique(system, this)}, input_tab{std::make_unique(this)}, hotkeys_tab{std::make_unique(this)}, - graphics_tab{std::make_unique(is_powered_on, this)}, + graphics_tab{std::make_unique(physical_devices, is_powered_on, this)}, enhancements_tab{std::make_unique(this)}, audio_tab{std::make_unique(is_powered_on, this)}, camera_tab{std::make_unique(this)}, diff --git a/src/citra_qt/configuration/configure_dialog.h b/src/citra_qt/configuration/configure_dialog.h index 7a4ae96d2e..8f8170fe09 100644 --- a/src/citra_qt/configuration/configure_dialog.h +++ b/src/citra_qt/configuration/configure_dialog.h @@ -5,7 +5,9 @@ #pragma once #include +#include #include +#include class HotkeyRegistry; @@ -35,6 +37,7 @@ class ConfigureDialog : public QDialog { public: explicit ConfigureDialog(QWidget* parent, HotkeyRegistry& registry, Core::System& system, + std::span physical_devices, bool enable_web_config = true); ~ConfigureDialog() override; diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index 3951728e96..6424e3af1e 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -7,16 +7,34 @@ #include "citra_qt/configuration/configure_graphics.h" #include "common/settings.h" #include "ui_configure_graphics.h" +#include "video_core/renderer_vulkan/vk_instance.h" -ConfigureGraphics::ConfigureGraphics(bool is_powered_on, QWidget* parent) +ConfigureGraphics::ConfigureGraphics(std::span physical_devices, bool is_powered_on, + QWidget* parent) : QWidget(parent), ui(std::make_unique()) { ui->setupUi(this); + SetupPerGameUI(); + + for (const QString& name : physical_devices) { + ui->physical_device_combo->addItem(name); + } + ui->toggle_vsync_new->setEnabled(!is_powered_on); ui->graphics_api_combo->setEnabled(!is_powered_on); + ui->physical_device_combo->setEnabled(!is_powered_on); + ui->toggle_async_shaders->setEnabled(!is_powered_on); + ui->toggle_async_present->setEnabled(!is_powered_on); // Set the index to -1 to ensure the below lambda is called with setCurrentIndex ui->graphics_api_combo->setCurrentIndex(-1); + if (physical_devices.empty()) { + const u32 index = static_cast(Settings::GraphicsAPI::Vulkan); + ui->graphics_api_combo->removeItem(index); + ui->physical_device_combo->setVisible(false); + ui->spirv_shader_gen->setVisible(false); + } + connect(ui->graphics_api_combo, qOverload(&QComboBox::currentIndexChanged), this, [this](int index) { const auto graphics_api = @@ -35,7 +53,9 @@ ConfigureGraphics::ConfigureGraphics(bool is_powered_on, QWidget* parent) ui->toggle_disk_shader_cache->setEnabled(checked && enabled); }); - SetupPerGameUI(); + connect(ui->graphics_api_combo, qOverload(&QComboBox::currentIndexChanged), this, + &ConfigureGraphics::SetPhysicalDeviceComboVisibility); + SetConfiguration(); } @@ -47,15 +67,24 @@ void ConfigureGraphics::SetConfiguration() { !Settings::values.graphics_api.UsingGlobal()); ConfigurationShared::SetPerGameSetting(ui->graphics_api_combo, &Settings::values.graphics_api); + ConfigurationShared::SetHighlight(ui->physical_device_group, + !Settings::values.physical_device.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->physical_device_combo, + &Settings::values.physical_device); } else { ui->graphics_api_combo->setCurrentIndex( static_cast(Settings::values.graphics_api.GetValue())); + ui->physical_device_combo->setCurrentIndex( + static_cast(Settings::values.physical_device.GetValue())); } ui->toggle_hw_shader->setChecked(Settings::values.use_hw_shader.GetValue()); ui->toggle_accurate_mul->setChecked(Settings::values.shaders_accurate_mul.GetValue()); ui->toggle_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->toggle_vsync_new->setChecked(Settings::values.use_vsync_new.GetValue()); + ui->spirv_shader_gen->setChecked(Settings::values.spirv_shader_gen.GetValue()); + ui->toggle_async_shaders->setChecked(Settings::values.async_shader_compilation.GetValue()); + ui->toggle_async_present->setChecked(Settings::values.async_presentation.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit.GetValue()); @@ -65,6 +94,14 @@ void ConfigureGraphics::SetConfiguration() { void ConfigureGraphics::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.graphics_api, ui->graphics_api_combo); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.physical_device, + ui->physical_device_combo); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_shader_compilation, + ui->toggle_async_shaders, async_shader_compilation); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation, + ui->toggle_async_present, async_presentation); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.spirv_shader_gen, + ui->spirv_shader_gen, spirv_shader_gen); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_hw_shader, ui->toggle_hw_shader, use_hw_shader); ConfigurationShared::ApplyPerGameSetting(&Settings::values.shaders_accurate_mul, @@ -93,6 +130,11 @@ void ConfigureGraphics::SetupPerGameUI() { Settings::values.use_disk_shader_cache.UsingGlobal()); ui->toggle_vsync_new->setEnabled(ui->toggle_vsync_new->isEnabled() && Settings::values.use_vsync_new.UsingGlobal()); + ui->toggle_async_shaders->setEnabled( + Settings::values.async_shader_compilation.UsingGlobal()); + ui->toggle_async_present->setEnabled(Settings::values.async_presentation.UsingGlobal()); + ui->graphics_api_combo->setEnabled(Settings::values.graphics_api.UsingGlobal()); + ui->physical_device_combo->setEnabled(Settings::values.physical_device.UsingGlobal()); return; } @@ -102,6 +144,10 @@ void ConfigureGraphics::SetupPerGameUI() { ui->graphics_api_combo, ui->graphics_api_group, static_cast(Settings::values.graphics_api.GetValue(true))); + ConfigurationShared::SetColoredComboBox( + ui->physical_device_combo, ui->physical_device_group, + static_cast(Settings::values.physical_device.GetValue(true))); + ConfigurationShared::SetColoredTristate(ui->toggle_hw_shader, Settings::values.use_hw_shader, use_hw_shader); ConfigurationShared::SetColoredTristate( @@ -111,4 +157,34 @@ void ConfigureGraphics::SetupPerGameUI() { use_disk_shader_cache); ConfigurationShared::SetColoredTristate(ui->toggle_vsync_new, Settings::values.use_vsync_new, use_vsync_new); + ConfigurationShared::SetColoredTristate(ui->toggle_async_shaders, + Settings::values.async_shader_compilation, + async_shader_compilation); + ConfigurationShared::SetColoredTristate( + ui->toggle_async_present, Settings::values.async_presentation, async_presentation); + ConfigurationShared::SetColoredTristate(ui->spirv_shader_gen, Settings::values.spirv_shader_gen, + spirv_shader_gen); +} + +void ConfigureGraphics::SetPhysicalDeviceComboVisibility(int index) { + bool is_visible{}; + + // When configuring per-game the physical device combo should be + // shown either when the global api is used and that is Vulkan or + // Vulkan is set as the per-game api. + if (!Settings::IsConfiguringGlobal()) { + const auto global_graphics_api = Settings::values.graphics_api.GetValue(true); + const bool using_global = index == 0; + if (!using_global) { + index -= ConfigurationShared::USE_GLOBAL_OFFSET; + } + const auto graphics_api = static_cast(index); + is_visible = (using_global && global_graphics_api == Settings::GraphicsAPI::Vulkan) || + graphics_api == Settings::GraphicsAPI::Vulkan; + } else { + const auto graphics_api = static_cast(index); + is_visible = graphics_api == Settings::GraphicsAPI::Vulkan; + } + ui->physical_device_group->setVisible(is_visible); + ui->spirv_shader_gen->setVisible(is_visible); } diff --git a/src/citra_qt/configuration/configure_graphics.h b/src/citra_qt/configuration/configure_graphics.h index 44b12cfa96..97a3fe0aab 100644 --- a/src/citra_qt/configuration/configure_graphics.h +++ b/src/citra_qt/configuration/configure_graphics.h @@ -5,6 +5,8 @@ #pragma once #include +#include +#include #include namespace Ui { @@ -19,7 +21,8 @@ class ConfigureGraphics : public QWidget { Q_OBJECT public: - explicit ConfigureGraphics(bool is_powered_on, QWidget* parent = nullptr); + explicit ConfigureGraphics(std::span physical_devices, bool is_powered_on, + QWidget* parent = nullptr); ~ConfigureGraphics() override; void ApplyConfiguration(); @@ -30,11 +33,15 @@ public: private: void SetupPerGameUI(); + void SetPhysicalDeviceComboVisibility(int index); ConfigurationShared::CheckState use_hw_shader; ConfigurationShared::CheckState shaders_accurate_mul; ConfigurationShared::CheckState use_disk_shader_cache; ConfigurationShared::CheckState use_vsync_new; + ConfigurationShared::CheckState async_shader_compilation; + ConfigurationShared::CheckState async_presentation; + ConfigurationShared::CheckState spirv_shader_gen; std::unique_ptr ui; QColor bg_color; }; diff --git a/src/citra_qt/configuration/configure_graphics.ui b/src/citra_qt/configuration/configure_graphics.ui index 4235759cb1..bd77e6d6ba 100644 --- a/src/citra_qt/configuration/configure_graphics.ui +++ b/src/citra_qt/configuration/configure_graphics.ui @@ -7,7 +7,7 @@ 0 0 400 - 443 + 509 @@ -63,11 +63,51 @@ OpenGL + + + Vulkan + + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Physical Device + + + + + + + + + + + + + SPIR-V Shader Generation + + + @@ -95,7 +135,7 @@ - <html><head/><body><p>Use OpenGL to accelerate shader emulation.</p><p>Requires a relatively powerful GPU for better performance.</p></body></html> + <html><head/><body><p>Use the selected graphics API to accelerate shader emulation.</p><p>Requires a relatively powerful GPU for better performance.</p></body></html> Enable Hardware Shader @@ -143,6 +183,26 @@ + + + + <html><head/><body><p>Compile shaders using background threads to avoid shader compilation stutter. Expect temporary graphical glitches</p></body></html> + + + Enable Async Shader Compilation + + + + + + + <html><head/><body><p>Perform presentation on separate threads. Improves performance when using Vulkan in most games.</p></body></html> + + + Enable Async Presentation + + + diff --git a/src/citra_qt/configuration/configure_per_game.cpp b/src/citra_qt/configuration/configure_per_game.cpp index a9896e0add..c5aa3f5ca6 100644 --- a/src/citra_qt/configuration/configure_per_game.cpp +++ b/src/citra_qt/configuration/configure_per_game.cpp @@ -24,7 +24,7 @@ #include "ui_configure_per_game.h" ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id_, const QString& file_name, - Core::System& system_) + std::span physical_devices, Core::System& system_) : QDialog(parent), ui(std::make_unique()), filename{file_name.toStdString()}, title_id{title_id_}, system{system_} { const auto config_file_name = title_id == 0 ? std::string(FileUtil::GetFilename(filename)) @@ -35,7 +35,7 @@ ConfigurePerGame::ConfigurePerGame(QWidget* parent, u64 title_id_, const QString audio_tab = std::make_unique(is_powered_on, this); general_tab = std::make_unique(this); enhancements_tab = std::make_unique(this); - graphics_tab = std::make_unique(is_powered_on, this); + graphics_tab = std::make_unique(physical_devices, is_powered_on, this); system_tab = std::make_unique(system, this); debug_tab = std::make_unique(is_powered_on, this); cheat_tab = std::make_unique(system, title_id, this); diff --git a/src/citra_qt/configuration/configure_per_game.h b/src/citra_qt/configuration/configure_per_game.h index e3953fcb77..ef5ea731d0 100644 --- a/src/citra_qt/configuration/configure_per_game.h +++ b/src/citra_qt/configuration/configure_per_game.h @@ -4,9 +4,11 @@ #pragma once #include +#include #include #include #include +#include #include "citra_qt/configuration/config.h" namespace Core { @@ -35,9 +37,8 @@ class ConfigurePerGame : public QDialog { Q_OBJECT public: - // Cannot use std::filesystem::path due to https://bugreports.qt.io/browse/QTBUG-73263 explicit ConfigurePerGame(QWidget* parent, u64 title_id_, const QString& file_name, - Core::System& system_); + std::span physical_devices, Core::System& system_); ~ConfigurePerGame() override; /// Loads all button configurations to settings file diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index 202be08c3b..65d88770b2 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -62,6 +62,7 @@ #include "citra_qt/uisettings.h" #include "citra_qt/updater/updater.h" #include "citra_qt/util/clickable_label.h" +#include "citra_qt/util/vk_device_info.h" #include "common/arch.h" #include "common/common_paths.h" #include "common/detached_tasks.h" @@ -263,6 +264,14 @@ GMainWindow::GMainWindow(Core::System& system_) connect(&mouse_hide_timer, &QTimer::timeout, this, &GMainWindow::HideMouseCursor); connect(ui->menubar, &QMenuBar::hovered, this, &GMainWindow::OnMouseActivity); + physical_devices = GetVulkanPhysicalDevices(); + if (physical_devices.empty()) { + QMessageBox::warning(this, tr("No Suitable Vulkan Devices Detected"), + tr("Vulkan initialization failed during boot.
" + "Your GPU may not support Vulkan 1.1, or you do not " + "have the latest graphics driver.")); + } + #if ENABLE_QT_UPDATER if (UISettings::values.check_for_update_on_start) { CheckForUpdates(); @@ -2010,7 +2019,7 @@ void GMainWindow::OnLoadState() { void GMainWindow::OnConfigure() { game_list->SetDirectoryWatcherEnabled(false); Settings::SetConfiguringGlobal(true); - ConfigureDialog configureDialog(this, hotkey_registry, system, + ConfigureDialog configureDialog(this, hotkey_registry, system, physical_devices, !multiplayer_state->IsHostingPublicRoom()); connect(&configureDialog, &ConfigureDialog::LanguageChanged, this, &GMainWindow::OnLanguageChanged); @@ -2470,9 +2479,11 @@ void GMainWindow::ShowMouseCursor() { } void GMainWindow::UpdateAPIIndicator(bool update) { - static std::array graphics_apis = {QStringLiteral("SOFTWARE"), QStringLiteral("OPENGL")}; + static std::array graphics_apis = {QStringLiteral("SOFTWARE"), QStringLiteral("OPENGL"), + QStringLiteral("VULKAN")}; - static std::array graphics_api_colors = {QStringLiteral("#3ae400"), QStringLiteral("#00ccdd")}; + static std::array graphics_api_colors = {QStringLiteral("#3ae400"), QStringLiteral("#00ccdd"), + QStringLiteral("#91242a")}; u32 api_index = static_cast(Settings::values.graphics_api.GetValue()); if (update) { @@ -2764,7 +2775,7 @@ void GMainWindow::OnConfigurePerGame() { void GMainWindow::OpenPerGameConfiguration(u64 title_id, const QString& file_name) { Settings::SetConfiguringGlobal(false); - ConfigurePerGame dialog(this, title_id, file_name, system); + ConfigurePerGame dialog(this, title_id, file_name, physical_devices, system); const auto result = dialog.exec(); if (result != QDialog::Accepted) { diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h index d3c4d03dc5..c5de701e19 100644 --- a/src/citra_qt/main.h +++ b/src/citra_qt/main.h @@ -6,8 +6,10 @@ #include #include +#include #include #include +#include #include #include #include "citra_qt/compatibility_list.h" @@ -326,6 +328,8 @@ private: // Whether game was paused due to stopping video dumping bool game_paused_for_dumping = false; + std::vector physical_devices; + // Debugger panes ProfilerWidget* profilerWidget; MicroProfileDialog* microProfileDialog; diff --git a/src/citra_qt/util/vk_device_info.cpp b/src/citra_qt/util/vk_device_info.cpp new file mode 100644 index 0000000000..db6ccef077 --- /dev/null +++ b/src/citra_qt/util/vk_device_info.cpp @@ -0,0 +1,23 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "citra_qt/util/vk_device_info.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +std::vector GetVulkanPhysicalDevices() { + std::vector result; + try { + Vulkan::Instance instance{}; + const auto physical_devices = instance.GetPhysicalDevices(); + + for (const vk::PhysicalDevice physical_device : physical_devices) { + const QString name = QString::fromUtf8(physical_device.getProperties().deviceName, -1); + result.push_back(name); + } + } catch (const std::runtime_error& err) { + LOG_ERROR(Frontend, "Error occured while querying for physical devices: {}", err.what()); + } + + return result; +} diff --git a/src/citra_qt/util/vk_device_info.h b/src/citra_qt/util/vk_device_info.h new file mode 100644 index 0000000000..c8ef6343b0 --- /dev/null +++ b/src/citra_qt/util/vk_device_info.h @@ -0,0 +1,11 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +/// Returns a list of all available vulkan GPUs. +std::vector GetVulkanPhysicalDevices(); \ No newline at end of file diff --git a/src/common/dynamic_library/dynamic_library.cpp b/src/common/dynamic_library/dynamic_library.cpp index f5b3485375..016e27f4c2 100644 --- a/src/common/dynamic_library/dynamic_library.cpp +++ b/src/common/dynamic_library/dynamic_library.cpp @@ -12,6 +12,10 @@ namespace Common { +DynamicLibrary::DynamicLibrary() = default; + +DynamicLibrary::DynamicLibrary(void* handle_) : handle{handle_} {} + DynamicLibrary::DynamicLibrary(std::string_view name, int major, int minor) { auto full_name = GetLibraryName(name, major, minor); void(Load(full_name)); diff --git a/src/common/dynamic_library/dynamic_library.h b/src/common/dynamic_library/dynamic_library.h index 2fbb7a1b8f..9a0c2083fb 100644 --- a/src/common/dynamic_library/dynamic_library.h +++ b/src/common/dynamic_library/dynamic_library.h @@ -11,6 +11,7 @@ namespace Common { class DynamicLibrary { public: explicit DynamicLibrary(); + explicit DynamicLibrary(void* handle); explicit DynamicLibrary(std::string_view name, int major = -1, int minor = -1); ~DynamicLibrary(); diff --git a/src/common/hash.h b/src/common/hash.h index fde248de91..d0bb1413d8 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -37,8 +37,8 @@ static inline u64 ComputeStructHash64(const T& data) noexcept { * Combines the seed parameter with the provided hash, producing a new unique hash * Implementation from: http://boost.sourceforge.net/doc/html/boost/hash_combine.html */ -inline u64 HashCombine(std::size_t& seed, const u64 hash) { - return seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2); +inline u64 HashCombine(std::size_t seed, const u64 hash) { + return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); } template diff --git a/src/common/settings.cpp b/src/common/settings.cpp index f0356b00a2..d0b9af6588 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -31,6 +31,8 @@ std::string_view GetGraphicsAPIName(GraphicsAPI api) { return "Software"; case GraphicsAPI::OpenGL: return "OpenGL"; + case GraphicsAPI::Vulkan: + return "Vulkan"; default: return "Invalid"; } @@ -72,6 +74,9 @@ void LogSettings() { log_setting("Core_CPUClockPercentage", values.cpu_clock_percentage.GetValue()); log_setting("Renderer_UseGLES", values.use_gles.GetValue()); log_setting("Renderer_GraphicsAPI", GetGraphicsAPIName(values.graphics_api.GetValue())); + log_setting("Renderer_AsyncShaders", values.async_shader_compilation.GetValue()); + log_setting("Renderer_AsyncPresentation", values.async_presentation.GetValue()); + log_setting("Renderer_SpirvShaderGen", values.spirv_shader_gen.GetValue()); log_setting("Renderer_Debug", values.renderer_debug.GetValue()); log_setting("Renderer_UseHwShader", values.use_hw_shader.GetValue()); log_setting("Renderer_ShadersAccurateMul", values.shaders_accurate_mul.GetValue()); @@ -159,6 +164,10 @@ void RestoreGlobalState(bool is_powered_on) { // Renderer values.graphics_api.SetGlobal(true); + values.physical_device.SetGlobal(true); + values.spirv_shader_gen.SetGlobal(true); + values.async_shader_compilation.SetGlobal(true); + values.async_presentation.SetGlobal(true); values.use_hw_shader.SetGlobal(true); values.use_disk_shader_cache.SetGlobal(true); values.shaders_accurate_mul.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 6d3cbd98a6..fcc10aa24f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -17,10 +17,10 @@ namespace Settings { -constexpr u32 GraphicsAPICount = 2; enum class GraphicsAPI { Software = 0, OpenGL = 1, + Vulkan = 2, }; enum class InitClock : u32 { @@ -430,12 +430,15 @@ struct Values { Setting allow_plugin_loader{true, "allow_plugin_loader"}; // Renderer - SwitchableSetting graphics_api{ - GraphicsAPI::OpenGL, GraphicsAPI::Software, static_cast(GraphicsAPICount - 1), - "graphics_api"}; + SwitchableSetting graphics_api{GraphicsAPI::OpenGL, GraphicsAPI::Software, + GraphicsAPI::Vulkan, "graphics_api"}; + SwitchableSetting physical_device{0, "physical_device"}; Setting use_gles{false, "use_gles"}; Setting renderer_debug{false, "renderer_debug"}; Setting dump_command_buffers{false, "dump_command_buffers"}; + SwitchableSetting spirv_shader_gen{true, "spirv_shader_gen"}; + SwitchableSetting async_shader_compilation{false, "async_shader_compilation"}; + SwitchableSetting async_presentation{true, "async_presentation"}; SwitchableSetting use_hw_shader{true, "use_hw_shader"}; SwitchableSetting use_disk_shader_cache{true, "use_disk_shader_cache"}; SwitchableSetting shaders_accurate_mul{true, "shaders_accurate_mul"}; diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 61b09583b5..766449110d 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -12,6 +12,10 @@ #include "core/3ds.h" #include "core/frontend/framebuffer_layout.h" +namespace Common { +class DynamicLibrary; +} + namespace Frontend { /// Information for the Graphics Backends signifying what type of screen pointer is in @@ -82,6 +86,11 @@ public: /// Releases (dunno if this is the "right" word) the context from the caller thread virtual void DoneCurrent(){}; + /// Gets the GPU driver library (used by Android only) + virtual std::shared_ptr GetDriverLibrary() { + return {}; + } + class Scoped { public: explicit Scoped(GraphicsContext& context_) : context(context_) { diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 900d9b3d08..00b58d5d46 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -101,18 +101,47 @@ add_library(video_core STATIC renderer_software/sw_texturing.cpp renderer_software/sw_texturing.h renderer_vulkan/pica_to_vk.h + renderer_vulkan/renderer_vulkan.cpp + renderer_vulkan/renderer_vulkan.h + renderer_vulkan/vk_blit_helper.cpp + renderer_vulkan/vk_blit_helper.h renderer_vulkan/vk_common.cpp renderer_vulkan/vk_common.h + renderer_vulkan/vk_descriptor_pool.cpp + renderer_vulkan/vk_descriptor_pool.h + renderer_vulkan/vk_graphics_pipeline.cpp + renderer_vulkan/vk_graphics_pipeline.h + renderer_vulkan/vk_master_semaphore.cpp + renderer_vulkan/vk_master_semaphore.h + renderer_vulkan/vk_rasterizer.cpp + renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_rasterizer_cache.cpp + renderer_vulkan/vk_scheduler.cpp + renderer_vulkan/vk_scheduler.h + renderer_vulkan/vk_resource_pool.cpp + renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_instance.cpp renderer_vulkan/vk_instance.h + renderer_vulkan/vk_pipeline_cache.cpp + renderer_vulkan/vk_pipeline_cache.h renderer_vulkan/vk_platform.cpp renderer_vulkan/vk_platform.h + renderer_vulkan/vk_present_window.cpp + renderer_vulkan/vk_present_window.h + renderer_vulkan/vk_renderpass_cache.cpp + renderer_vulkan/vk_renderpass_cache.h renderer_vulkan/vk_shader_gen.cpp renderer_vulkan/vk_shader_gen.h renderer_vulkan/vk_shader_gen_spv.cpp renderer_vulkan/vk_shader_gen_spv.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h + renderer_vulkan/vk_stream_buffer.cpp + renderer_vulkan/vk_stream_buffer.h + renderer_vulkan/vk_swapchain.cpp + renderer_vulkan/vk_swapchain.h + renderer_vulkan/vk_texture_runtime.cpp + renderer_vulkan/vk_texture_runtime.h shader/debug_data.h shader/shader.cpp shader/shader.h diff --git a/src/video_core/host_shaders/format_reinterpreter/vulkan_d24s8_to_rgba8.comp b/src/video_core/host_shaders/format_reinterpreter/vulkan_d24s8_to_rgba8.comp index a3e4c40371..c70748c219 100644 --- a/src/video_core/host_shaders/format_reinterpreter/vulkan_d24s8_to_rgba8.comp +++ b/src/video_core/host_shaders/format_reinterpreter/vulkan_d24s8_to_rgba8.comp @@ -12,15 +12,17 @@ layout(set = 0, binding = 2, rgba8) uniform highp writeonly image2D color; layout(push_constant, std140) uniform ComputeInfo { mediump ivec2 src_offset; + mediump ivec2 dst_offset; mediump ivec2 extent; }; void main() { - ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy); + ivec2 src_coord = src_offset + ivec2(gl_GlobalInvocationID.xy); + ivec2 dst_coord = dst_offset + ivec2(gl_GlobalInvocationID.xy); highp uint depth_val = - uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); - lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; + uint(texelFetch(depth, src_coord, 0).x * (exp2(32.0) - 1.0)); + lowp uint stencil_val = texelFetch(stencil, src_coord, 0).x; highp uvec4 components = uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); - imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0)); + imageStore(color, dst_coord, vec4(components) / (exp2(8.0) - 1.0)); } diff --git a/src/video_core/host_shaders/vulkan_depth_to_buffer.comp b/src/video_core/host_shaders/vulkan_depth_to_buffer.comp index f88209c665..05f6561839 100644 --- a/src/video_core/host_shaders/vulkan_depth_to_buffer.comp +++ b/src/video_core/host_shaders/vulkan_depth_to_buffer.comp @@ -14,6 +14,7 @@ layout(binding = 2) writeonly buffer OutputBuffer{ layout(push_constant, std140) uniform ComputeInfo { mediump ivec2 src_offset; + mediump ivec2 dst_offset; mediump ivec2 extent; }; diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index bcd40354dd..18b93d6027 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -244,6 +244,11 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { } break; + // Fragment operation mode + case PICA_REG_INDEX(framebuffer.output_merger.fragment_operation_mode): + shader_dirty = true; + break; + // Alpha test case PICA_REG_INDEX(framebuffer.output_merger.alpha_test): SyncAlphaTest(); @@ -617,11 +622,10 @@ void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX(rasterizer.clip_coef[3]): SyncClipCoef(); break; - - default: - // Forward registers that map to fixed function API features to the video backend - NotifyFixedFunctionPicaRegisterChanged(id); } + + // Forward registers that map to fixed function API features to the video backend + NotifyFixedFunctionPicaRegisterChanged(id); } void RasterizerAccelerated::SyncDepthScale() { diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index cebf543776..648d5e4f0a 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -708,8 +708,8 @@ FramebufferHelper RasterizerCache::GetFramebufferSurfaces(bool using_color fb_rect = depth_rect; } - const Surface* color_surface = color_id ? &slot_surfaces[color_id] : nullptr; - const Surface* depth_surface = depth_id ? &slot_surfaces[depth_id] : nullptr; + Surface* color_surface = color_id ? &slot_surfaces[color_id] : nullptr; + Surface* depth_surface = depth_id ? &slot_surfaces[depth_id] : nullptr; if (color_id) { color_level = color_surface->LevelOf(color_params.addr); @@ -722,7 +722,7 @@ FramebufferHelper RasterizerCache::GetFramebufferSurfaces(bool using_color boost::icl::length(depth_vp_interval)); } - fb_params = FramebufferParams{ + const FramebufferParams fb_params = { .color_id = color_id, .depth_id = depth_id, .color_level = color_level, @@ -1147,11 +1147,14 @@ bool RasterizerCache::ValidateByReinterpretation(Surface& surface, SurfacePar } const PAddr addr = boost::icl::lower(interval); const SurfaceParams copy_params = surface.FromInterval(copy_interval); - const TextureBlit reinterpret = { + const auto src_rect = src_surface.GetScaledSubRect(copy_params); + const auto dst_rect = surface.GetScaledSubRect(copy_params); + const TextureCopy reinterpret = { .src_level = src_surface.LevelOf(addr), .dst_level = surface.LevelOf(addr), - .src_rect = src_surface.GetScaledSubRect(copy_params), - .dst_rect = surface.GetScaledSubRect(copy_params), + .src_offset = {src_rect.left, src_rect.bottom}, + .dst_offset = {dst_rect.left, dst_rect.bottom}, + .extent = {src_rect.GetWidth(), src_rect.GetHeight()}, }; return runtime.Reinterpret(src_surface, surface, reinterpret); } @@ -1300,11 +1303,6 @@ void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, SurfaceId region for (const SurfaceId surface_id : remove_surfaces) { UnregisterSurface(surface_id); - if (slot_surfaces[surface_id].type != SurfaceType::Fill) { - sentenced.emplace_back(surface_id, frame_tick); - } else { - slot_surfaces.erase(surface_id); - } } } @@ -1365,7 +1363,13 @@ void RasterizerCache::UnregisterSurface(SurfaceId surface_id) { surfaces.erase(vector_it); }); - RemoveTextureCubeFace(surface_id); + if (surface.type != SurfaceType::Fill) { + RemoveTextureCubeFace(surface_id); + sentenced.emplace_back(surface_id, frame_tick); + return; + } + + slot_surfaces.erase(surface_id); } template @@ -1376,7 +1380,9 @@ void RasterizerCache::UnregisterAll() { UnregisterSurface(surfaces.back()); } } - texture_cube_cache.clear(); + runtime.Finish(); + frame_tick += runtime.RemoveThreshold(); + RunGarbageCollector(); } template diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 3a86d1eb4c..5bb55b7bb2 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -26,7 +26,7 @@ using DiskResourceLoadCallback = std::function depth_test_enable; BitField<4, 3, CompareFunc> depth_test_func; BitField<8, 1, u32> red_enable; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 6e1f327e4e..d6958ec1e2 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -63,6 +63,9 @@ public: /// Synchronizes fixed function renderer state virtual void Sync() {} + /// This is called to notify the rendering backend of a surface change + virtual void NotifySurfaceChanged() {} + /// Returns the resolution scale factor relative to the native 3DS screen resolution u32 GetResolutionScaleFactor(); diff --git a/src/video_core/renderer_opengl/gl_blit_helper.cpp b/src/video_core/renderer_opengl/gl_blit_helper.cpp index 7472b4cc8c..830d28e32f 100644 --- a/src/video_core/renderer_opengl/gl_blit_helper.cpp +++ b/src/video_core/renderer_opengl/gl_blit_helper.cpp @@ -84,7 +84,7 @@ BlitHelper::BlitHelper(const Driver& driver_) BlitHelper::~BlitHelper() = default; bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, - const VideoCore::TextureBlit& blit) { + const VideoCore::TextureCopy& copy) { OpenGLState prev_state = OpenGLState::GetCurState(); SCOPE_EXIT({ prev_state.Apply(); }); @@ -99,32 +99,35 @@ bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, 1); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - } else if (blit.src_rect.top > temp_rect.top || blit.src_rect.right > temp_rect.right) { + } else if (copy.extent.width > temp_extent.width || copy.extent.height > temp_extent.height) { + temp_extent = copy.extent; temp_tex.Release(); temp_tex.Create(); state.texture_units[1].texture_2d = temp_tex.handle; state.Apply(); glActiveTexture(GL_TEXTURE1); - glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, blit.src_rect.right, - blit.src_rect.top); + glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, temp_extent.width, + temp_extent.height); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - temp_rect = blit.src_rect; } state.texture_units[1].texture_2d = temp_tex.handle; state.Apply(); glActiveTexture(GL_TEXTURE1); if (!use_texture_view) { - glCopyImageSubData(source.Handle(), GL_TEXTURE_2D, 0, blit.src_rect.left, - blit.src_rect.bottom, 0, temp_tex.handle, GL_TEXTURE_2D, 0, - blit.src_rect.left, blit.src_rect.bottom, 0, blit.src_rect.GetWidth(), - blit.src_rect.GetHeight(), 1); + glCopyImageSubData(source.Handle(), GL_TEXTURE_2D, 0, copy.src_offset.x, copy.src_offset.y, + 0, temp_tex.handle, GL_TEXTURE_2D, 0, copy.src_offset.x, + copy.src_offset.y, 0, copy.extent.width, copy.extent.height, 1); } glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); - SetParams(d24s8_to_rgba8, source.RealExtent(), blit.src_rect); - Draw(d24s8_to_rgba8, dest.Handle(), draw_fbo.handle, 0, blit.dst_rect); + const Common::Rectangle src_rect{copy.src_offset.x, copy.src_offset.y + copy.extent.height, + copy.src_offset.x + copy.extent.width, copy.src_offset.x}; + const Common::Rectangle dst_rect{copy.dst_offset.x, copy.dst_offset.y + copy.extent.height, + copy.dst_offset.x + copy.extent.width, copy.dst_offset.x}; + SetParams(d24s8_to_rgba8, source.RealExtent(), src_rect); + Draw(d24s8_to_rgba8, dest.Handle(), draw_fbo.handle, 0, dst_rect); if (use_texture_view) { temp_tex.Release(); @@ -138,14 +141,18 @@ bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, } bool BlitHelper::ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest, - const VideoCore::TextureBlit& blit) { + const VideoCore::TextureCopy& copy) { OpenGLState prev_state = OpenGLState::GetCurState(); SCOPE_EXIT({ prev_state.Apply(); }); state.texture_units[0].texture_2d = source.Handle(); - SetParams(rgba4_to_rgb5a1, source.RealExtent(), blit.src_rect); - Draw(rgba4_to_rgb5a1, dest.Handle(), draw_fbo.handle, 0, blit.dst_rect); + const Common::Rectangle src_rect{copy.src_offset.x, copy.src_offset.y + copy.extent.height, + copy.src_offset.x + copy.extent.width, copy.src_offset.x}; + const Common::Rectangle dst_rect{copy.dst_offset.x, copy.dst_offset.y + copy.extent.height, + copy.dst_offset.x + copy.extent.width, copy.dst_offset.x}; + SetParams(rgba4_to_rgb5a1, source.RealExtent(), src_rect); + Draw(rgba4_to_rgb5a1, dest.Handle(), draw_fbo.handle, 0, dst_rect); return true; } diff --git a/src/video_core/renderer_opengl/gl_blit_helper.h b/src/video_core/renderer_opengl/gl_blit_helper.h index 01ce770cb0..e6ba9cce59 100644 --- a/src/video_core/renderer_opengl/gl_blit_helper.h +++ b/src/video_core/renderer_opengl/gl_blit_helper.h @@ -5,12 +5,14 @@ #pragma once #include "common/math_util.h" +#include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_state.h" namespace VideoCore { struct Extent; struct TextureBlit; +struct TextureCopy; } // namespace VideoCore namespace OpenGL { @@ -25,9 +27,9 @@ public: bool Filter(Surface& surface, const VideoCore::TextureBlit& blit); - bool ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + bool ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); - bool ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + bool ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); private: void FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& blit); @@ -68,7 +70,7 @@ private: OGLProgram rgba4_to_rgb5a1; OGLTexture temp_tex; - Common::Rectangle temp_rect{}; + VideoCore::Extent temp_extent{}; bool use_texture_view{true}; }; diff --git a/src/video_core/renderer_opengl/gl_texture_mailbox.cpp b/src/video_core/renderer_opengl/gl_texture_mailbox.cpp index 11a8b6eb9a..d32a464554 100644 --- a/src/video_core/renderer_opengl/gl_texture_mailbox.cpp +++ b/src/video_core/renderer_opengl/gl_texture_mailbox.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/logging/log.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_mailbox.h" diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index a45ecc5702..f24fe10acb 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -170,14 +170,14 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::CustomPixelFormat p } bool TextureRuntime::Reinterpret(Surface& source, Surface& dest, - const VideoCore::TextureBlit& blit) { + const VideoCore::TextureCopy& copy) { const PixelFormat src_format = source.pixel_format; const PixelFormat dst_format = dest.pixel_format; ASSERT_MSG(src_format != dst_format, "Reinterpretation with the same format is invalid"); if (src_format == PixelFormat::D24S8 && dst_format == PixelFormat::RGBA8) { - blit_helper.ConvertDS24S8ToRGBA8(source, dest, blit); + blit_helper.ConvertDS24S8ToRGBA8(source, dest, copy); } else if (src_format == PixelFormat::RGBA4 && dst_format == PixelFormat::RGB5A1) { - blit_helper.ConvertRGBA4ToRGB5A1(source, dest, blit); + blit_helper.ConvertRGBA4ToRGB5A1(source, dest, copy); } else { LOG_WARNING(Render_OpenGL, "Unimplemented reinterpretation {} -> {}", VideoCore::PixelFormatAsString(src_format), diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index b9a971c2d5..9fdc77bc59 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -45,6 +45,9 @@ public: /// Returns the removal threshold ticks for the garbage collector u32 RemoveThreshold(); + /// Submits and waits for current GPU work. + void Finish() {} + /// Returns true if the provided pixel format cannot be used natively by the runtime. bool NeedsConversion(VideoCore::PixelFormat pixel_format) const; @@ -56,7 +59,7 @@ public: const FormatTuple& GetFormatTuple(VideoCore::CustomPixelFormat pixel_format); /// Attempts to reinterpret a rectangle of source to another rectangle of dest - bool Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + bool Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); /// Fills the rectangle of the texture with the clear value provided void ClearTexture(Surface& surface, const VideoCore::TextureClear& clear); diff --git a/src/video_core/renderer_vulkan/pica_to_vk.h b/src/video_core/renderer_vulkan/pica_to_vk.h index a5546f1ddf..96776ce707 100644 --- a/src/video_core/renderer_vulkan/pica_to_vk.h +++ b/src/video_core/renderer_vulkan/pica_to_vk.h @@ -6,6 +6,7 @@ #include "common/logging/log.h" #include "core/core.h" +#include "core/telemetry_session.h" #include "video_core/regs.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -172,7 +173,10 @@ inline vk::PrimitiveTopology PrimitiveTopology(Pica::PipelineRegs::TriangleTopol return vk::PrimitiveTopology::eTriangleList; case Pica::PipelineRegs::TriangleTopology::Strip: return vk::PrimitiveTopology::eTriangleStrip; + default: + UNREACHABLE_MSG("Unknown triangle topology {}", topology); } + return vk::PrimitiveTopology::eTriangleList; } inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) { @@ -182,7 +186,10 @@ inline vk::CullModeFlags CullMode(Pica::RasterizerRegs::CullMode mode) { case Pica::RasterizerRegs::CullMode::KeepClockWise: case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: return vk::CullModeFlagBits::eBack; + default: + UNREACHABLE_MSG("Unknown cull mode {}", mode); } + return vk::CullModeFlagBits::eNone; } inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) { @@ -192,7 +199,16 @@ inline vk::FrontFace FrontFace(Pica::RasterizerRegs::CullMode mode) { return vk::FrontFace::eCounterClockwise; case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: return vk::FrontFace::eClockwise; + default: + UNREACHABLE_MSG("Unknown cull mode {}", mode); } + return vk::FrontFace::eClockwise; +} + +inline Common::Vec4f ColorRGBA8(const u32 color) { + const auto rgba = + Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF}; + return rgba / 255.0f; } } // namespace PicaToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp new file mode 100644 index 0000000000..51932a8b65 --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -0,0 +1,1112 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/settings.h" +#include "core/core.h" +#include "core/frontend/emu_window.h" +#include "core/hw/gpu.h" +#include "core/hw/hw.h" +#include "core/hw/lcd.h" +#include "core/telemetry_session.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +#include "video_core/host_shaders/vulkan_present_anaglyph_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_interlaced_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_vert_spv.h" + +#include + +MICROPROFILE_DEFINE(Vulkan_RenderFrame, "Vulkan", "Render Frame", MP_RGB(128, 128, 64)); + +namespace Vulkan { + +/** + * Vertex structure that the drawn screen rectangles are composed of. + */ +struct ScreenRectVertex { + ScreenRectVertex() = default; + ScreenRectVertex(float x, float y, float u, float v) + : position{Common::MakeVec(x, y)}, tex_coord{Common::MakeVec(u, v)} {} + + Common::Vec2f position; + Common::Vec2f tex_coord; +}; + +constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 8192; + +constexpr std::array MakeOrthographicMatrix(u32 width, u32 height) { + // clang-format off + return { 2.f / width, 0.f, 0.f, -1.f, + 0.f, 2.f / height, 0.f, -1.f, + 0.f, 0.f, 1.f, 0.f, + 0.f, 0.f, 0.f, 1.f}; + // clang-format on +} + +namespace { + +std::string GetReadableVersion(u32 version) { + return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), + VK_VERSION_PATCH(version)); +} + +std::string GetDriverVersion(const Instance& instance) { + // Extracted from + // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 + const u32 version = instance.GetDriverVersion(); + if (instance.GetDriverID() == vk::DriverId::eNvidiaProprietary) { + const u32 major = (version >> 22) & 0x3ff; + const u32 minor = (version >> 14) & 0x0ff; + const u32 secondary = (version >> 6) & 0x0ff; + const u32 tertiary = version & 0x003f; + return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary); + } + if (instance.GetDriverID() == vk::DriverId::eIntelProprietaryWindows) { + const u32 major = version >> 14; + const u32 minor = version & 0x3fff; + return fmt::format("{}.{}", major, minor); + } + return GetReadableVersion(version); +} + +constexpr std::array PRESENT_BINDINGS = {{ + {0, vk::DescriptorType::eCombinedImageSampler, 3, vk::ShaderStageFlagBits::eFragment}, +}}; + +} // Anonymous namespace + +RendererVulkan::RendererVulkan(Core::System& system, Frontend::EmuWindow& window, + Frontend::EmuWindow* secondary_window) + : RendererBase{system, window, secondary_window}, memory{system.Memory()}, + telemetry_session{system.TelemetrySession()}, + instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance, + renderpass_cache}, + renderpass_cache{instance, scheduler}, pool{instance}, main_window{window, instance, + scheduler}, + vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer, + VERTEX_BUFFER_SIZE}, + rasterizer{memory, + system.CustomTexManager(), + *this, + render_window, + instance, + scheduler, + pool, + renderpass_cache, + main_window.ImageCount()}, + present_set_provider{instance, pool, PRESENT_BINDINGS} { + ReportDriver(); + CompileShaders(); + BuildLayouts(); + BuildPipelines(); + if (secondary_window) { + second_window = std::make_unique(*secondary_window, instance, scheduler); + } +} + +RendererVulkan::~RendererVulkan() { + vk::Device device = instance.GetDevice(); + scheduler.Finish(); + device.waitIdle(); + + device.destroyShaderModule(present_vertex_shader); + for (u32 i = 0; i < PRESENT_PIPELINES; i++) { + device.destroyPipeline(present_pipelines[i]); + device.destroyShaderModule(present_shaders[i]); + } + + for (auto& sampler : present_samplers) { + device.destroySampler(sampler); + } + + for (auto& info : screen_infos) { + device.destroyImageView(info.texture.image_view); + vmaDestroyImage(instance.GetAllocator(), info.texture.image, info.texture.allocation); + } +} + +void RendererVulkan::Sync() { + rasterizer.SyncEntireState(); +} + +void RendererVulkan::PrepareRendertarget() { + for (u32 i = 0; i < 3; i++) { + const u32 fb_id = i == 2 ? 1 : 0; + const auto& framebuffer = GPU::g_regs.framebuffer_config[fb_id]; + + // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 + u32 lcd_color_addr = + (fb_id == 0) ? LCD_REG_INDEX(color_fill_top) : LCD_REG_INDEX(color_fill_bottom); + lcd_color_addr = HW::VADDR_LCD + 4 * lcd_color_addr; + LCD::Regs::ColorFill color_fill{0}; + LCD::Read(color_fill.raw, lcd_color_addr); + + if (color_fill.is_enabled) { + LoadColorToActiveVkTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, + screen_infos[i].texture); + } else { + TextureInfo& texture = screen_infos[i].texture; + if (texture.width != framebuffer.width || texture.height != framebuffer.height || + texture.format != framebuffer.color_format) { + + // Reallocate texture if the framebuffer size has changed. + // This is expected to not happen very often and hence should not be a + // performance problem. + ConfigureFramebufferTexture(texture, framebuffer); + } + + LoadFBToScreenInfo(framebuffer, screen_infos[i], i == 1); + + // Resize the texture in case the framebuffer size has changed + texture.width = framebuffer.width; + texture.height = framebuffer.height; + } + } +} + +void RendererVulkan::PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout) { + const auto sampler = present_samplers[!Settings::values.filter_mode.GetValue()]; + std::transform(screen_infos.begin(), screen_infos.end(), present_textures.begin(), + [&](auto& info) { + return DescriptorData{vk::DescriptorImageInfo{sampler, info.image_view, + vk::ImageLayout::eGeneral}}; + }); + + const auto descriptor_set = present_set_provider.Acquire(present_textures); + + renderpass_cache.EndRendering(); + scheduler.Record([this, layout, frame, descriptor_set, renderpass = main_window.Renderpass(), + index = current_pipeline](vk::CommandBuffer cmdbuf) { + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = static_cast(layout.width), + .height = static_cast(layout.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {layout.width, layout.height}, + }; + + cmdbuf.setViewport(0, viewport); + cmdbuf.setScissor(0, scissor); + + const vk::ClearValue clear{.color = clear_color}; + const vk::PipelineLayout layout{*present_pipeline_layout}; + const vk::RenderPassBeginInfo renderpass_begin_info = { + .renderPass = renderpass, + .framebuffer = frame->framebuffer, + .renderArea = + vk::Rect2D{ + .offset = {0, 0}, + .extent = {frame->width, frame->height}, + }, + .clearValueCount = 1, + .pClearValues = &clear, + }; + + cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipelines[index]); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, descriptor_set, {}); + }); +} + +void RendererVulkan::RenderToWindow(PresentWindow& window, const Layout::FramebufferLayout& layout, + bool flipped) { + Frame* frame = window.GetRenderFrame(); + + if (layout.width != frame->width || layout.height != frame->height) { + window.WaitPresent(); + scheduler.Finish(); + window.RecreateFrame(frame, layout.width, layout.height); + } + + DrawScreens(frame, layout, flipped); + scheduler.Flush(frame->render_ready); + + window.Present(frame); +} + +void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, + ScreenInfo& screen_info, bool right_eye) { + + if (framebuffer.address_right1 == 0 || framebuffer.address_right2 == 0) { + right_eye = false; + } + + const PAddr framebuffer_addr = + framebuffer.active_fb == 0 + ? (right_eye ? framebuffer.address_right1 : framebuffer.address_left1) + : (right_eye ? framebuffer.address_right2 : framebuffer.address_left2); + + LOG_TRACE(Render_Vulkan, "0x{:08x} bytes from 0x{:08x}({}x{}), fmt {:x}", + framebuffer.stride * framebuffer.height, framebuffer_addr, framebuffer.width.Value(), + framebuffer.height.Value(), framebuffer.format); + + const int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); + const std::size_t pixel_stride = framebuffer.stride / bpp; + + ASSERT(pixel_stride * bpp == framebuffer.stride); + ASSERT(pixel_stride % 4 == 0); + + if (!rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, static_cast(pixel_stride), + screen_info)) { + // Reset the screen info's display texture to its own permanent texture + screen_info.image_view = screen_info.texture.image_view; + screen_info.texcoords = {0.f, 0.f, 1.f, 1.f}; + + ASSERT(false); + } +} + +void RendererVulkan::CompileShaders() { + vk::Device device = instance.GetDevice(); + present_vertex_shader = CompileSPV(VULKAN_PRESENT_VERT_SPV, device); + present_shaders[0] = CompileSPV(VULKAN_PRESENT_FRAG_SPV, device); + present_shaders[1] = CompileSPV(VULKAN_PRESENT_ANAGLYPH_FRAG_SPV, device); + present_shaders[2] = CompileSPV(VULKAN_PRESENT_INTERLACED_FRAG_SPV, device); + + auto properties = instance.GetPhysicalDevice().getProperties(); + for (std::size_t i = 0; i < present_samplers.size(); i++) { + const vk::Filter filter_mode = i == 0 ? vk::Filter::eLinear : vk::Filter::eNearest; + const vk::SamplerCreateInfo sampler_info = { + .magFilter = filter_mode, + .minFilter = filter_mode, + .mipmapMode = vk::SamplerMipmapMode::eLinear, + .addressModeU = vk::SamplerAddressMode::eClampToEdge, + .addressModeV = vk::SamplerAddressMode::eClampToEdge, + .anisotropyEnable = instance.IsAnisotropicFilteringSupported(), + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .compareEnable = false, + .compareOp = vk::CompareOp::eAlways, + .borderColor = vk::BorderColor::eIntOpaqueBlack, + .unnormalizedCoordinates = false, + }; + + present_samplers[i] = device.createSampler(sampler_info); + } +} + +void RendererVulkan::BuildLayouts() { + const vk::PushConstantRange push_range = { + .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, + .offset = 0, + .size = sizeof(PresentUniformData), + }; + + const auto descriptor_set_layout = present_set_provider.Layout(); + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 1, + .pSetLayouts = &descriptor_set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_range, + }; + present_pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); +} + +void RendererVulkan::BuildPipelines() { + const vk::VertexInputBindingDescription binding = { + .binding = 0, + .stride = sizeof(ScreenRectVertex), + .inputRate = vk::VertexInputRate::eVertex, + }; + + const std::array attributes = { + vk::VertexInputAttributeDescription{ + .location = 0, + .binding = 0, + .format = vk::Format::eR32G32Sfloat, + .offset = offsetof(ScreenRectVertex, position), + }, + vk::VertexInputAttributeDescription{ + .location = 1, + .binding = 0, + .format = vk::Format::eR32G32Sfloat, + .offset = offsetof(ScreenRectVertex, tex_coord), + }, + }; + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &binding, + .vertexAttributeDescriptionCount = static_cast(attributes.size()), + .pVertexAttributeDescriptions = attributes.data(), + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = vk::PrimitiveTopology::eTriangleStrip, + .primitiveRestartEnable = false, + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .cullMode = vk::CullModeFlagBits::eNone, + .frontFace = vk::FrontFace::eClockwise, + .depthBiasEnable = false, + .lineWidth = 1.0f, + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false, + }; + + const vk::PipelineColorBlendAttachmentState colorblend_attachment = { + .blendEnable = false, + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = false, + .attachmentCount = 1, + .pAttachments = &colorblend_attachment, + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}, + }; + + const vk::Viewport placeholder_viewport = vk::Viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + const vk::Rect2D placeholder_scissor = vk::Rect2D{{0, 0}, {1, 1}}; + const vk::PipelineViewportStateCreateInfo viewport_info = { + .viewportCount = 1, + .pViewports = &placeholder_viewport, + .scissorCount = 1, + .pScissors = &placeholder_scissor, + }; + + const std::array dynamic_states = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, + }; + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const vk::PipelineDepthStencilStateCreateInfo depth_info = { + .depthTestEnable = false, + .depthWriteEnable = false, + .depthCompareOp = vk::CompareOp::eAlways, + .depthBoundsTestEnable = false, + .stencilTestEnable = false, + }; + + for (u32 i = 0; i < PRESENT_PIPELINES; i++) { + const std::array shader_stages = { + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eVertex, + .module = present_vertex_shader, + .pName = "main", + }, + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = present_shaders[i], + .pName = "main", + }, + }; + + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_info, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = *present_pipeline_layout, + .renderPass = main_window.Renderpass(), + }; + + const auto [result, pipeline] = + instance.GetDevice().createGraphicsPipeline({}, pipeline_info); + ASSERT_MSG(result == vk::Result::eSuccess, "Unable to build present pipelines"); + present_pipelines[i] = pipeline; + } +} + +void RendererVulkan::ConfigureFramebufferTexture(TextureInfo& texture, + const GPU::Regs::FramebufferConfig& framebuffer) { + vk::Device device = instance.GetDevice(); + if (texture.image_view) { + device.destroyImageView(texture.image_view); + } + if (texture.image) { + vmaDestroyImage(instance.GetAllocator(), texture.image, texture.allocation); + } + + const VideoCore::PixelFormat pixel_format = + VideoCore::PixelFormatFromGPUPixelFormat(framebuffer.color_format); + const vk::Format format = instance.GetTraits(pixel_format).native; + const vk::ImageCreateInfo image_info = { + .imageType = vk::ImageType::e2D, + .format = format, + .extent = {framebuffer.width, framebuffer.height, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .usage = vk::ImageUsageFlagBits::eSampled, + }; + + const VmaAllocationCreateInfo alloc_info = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + }; + + VkImage unsafe_image{}; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + + VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, + &unsafe_image, &texture.allocation, nullptr); + if (result != VK_SUCCESS) [[unlikely]] { + LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); + UNREACHABLE(); + } + texture.image = vk::Image{unsafe_image}; + + const vk::ImageViewCreateInfo view_info = { + .image = texture.image, + .viewType = vk::ImageViewType::e2D, + .format = format, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + texture.image_view = device.createImageView(view_info); + + texture.width = framebuffer.width; + texture.height = framebuffer.height; + texture.format = framebuffer.color_format; +} + +void RendererVulkan::LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, + const TextureInfo& texture) { + const vk::ClearColorValue clear_color = { + .float32 = + std::array{ + color_r / 255.0f, + color_g / 255.0f, + color_b / 255.0f, + 1.0f, + }, + }; + + renderpass_cache.EndRendering(); + scheduler.Record([image = texture.image, clear_color](vk::CommandBuffer cmdbuf) { + const vk::ImageSubresourceRange range = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = range, + }; + + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = range, + }; + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eFragmentShader, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear_color, range); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + }); +} + +void RendererVulkan::ReloadPipeline() { + const Settings::StereoRenderOption render_3d = Settings::values.render_3d.GetValue(); + switch (render_3d) { + case Settings::StereoRenderOption::Anaglyph: + current_pipeline = 1; + break; + case Settings::StereoRenderOption::Interlaced: + case Settings::StereoRenderOption::ReverseInterlaced: + current_pipeline = 2; + draw_info.reverse_interlaced = render_3d == Settings::StereoRenderOption::ReverseInterlaced; + break; + default: + current_pipeline = 0; + break; + } +} + +void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, float h, + Layout::DisplayOrientation orientation) { + const ScreenInfo& screen_info = screen_infos[screen_id]; + const auto& texcoords = screen_info.texcoords; + + std::array vertices; + switch (orientation) { + case Layout::DisplayOrientation::Landscape: + vertices = {{ + ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.top, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.right), + }}; + break; + case Layout::DisplayOrientation::Portrait: + vertices = {{ + ScreenRectVertex(x, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x + w, y, texcoords.top, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.left), + }}; + std::swap(h, w); + break; + case Layout::DisplayOrientation::LandscapeFlipped: + vertices = {{ + ScreenRectVertex(x, y, texcoords.top, texcoords.right), + ScreenRectVertex(x + w, y, texcoords.top, texcoords.left), + ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, texcoords.left), + }}; + break; + case Layout::DisplayOrientation::PortraitFlipped: + vertices = {{ + ScreenRectVertex(x, y, texcoords.top, texcoords.left), + ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.left), + ScreenRectVertex(x, y + h, texcoords.top, texcoords.right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, texcoords.right), + }}; + std::swap(h, w); + break; + default: + LOG_ERROR(Render_Vulkan, "Unknown DisplayOrientation: {}", orientation); + break; + } + + const u64 size = sizeof(ScreenRectVertex) * vertices.size(); + auto [data, offset, invalidate] = vertex_buffer.Map(size, 16); + std::memcpy(data, vertices.data(), size); + vertex_buffer.Commit(size); + + const u32 scale_factor = GetResolutionScaleFactor(); + draw_info.i_resolution = + Common::MakeVec(static_cast(screen_info.texture.width * scale_factor), + static_cast(screen_info.texture.height * scale_factor), + 1.0f / static_cast(screen_info.texture.width * scale_factor), + 1.0f / static_cast(screen_info.texture.height * scale_factor)); + draw_info.o_resolution = Common::MakeVec(h, w, 1.0f / h, 1.0f / w); + draw_info.screen_id_l = screen_id; + + scheduler.Record([this, offset = offset, info = draw_info](vk::CommandBuffer cmdbuf) { + const u32 first_vertex = static_cast(offset) / sizeof(ScreenRectVertex); + cmdbuf.pushConstants(*present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(info), &info); + + cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0}); + cmdbuf.draw(4, 1, first_vertex, 0); + }); +} + +void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, + float w, float h, + Layout::DisplayOrientation orientation) { + const ScreenInfo& screen_info_l = screen_infos[screen_id_l]; + const auto& texcoords = screen_info_l.texcoords; + + std::array vertices; + switch (orientation) { + case Layout::DisplayOrientation::Landscape: + vertices = {{ + ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.top, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.right), + }}; + break; + case Layout::DisplayOrientation::Portrait: + vertices = {{ + ScreenRectVertex(x, y, texcoords.bottom, texcoords.right), + ScreenRectVertex(x + w, y, texcoords.top, texcoords.right), + ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.left), + ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.left), + }}; + std::swap(h, w); + break; + case Layout::DisplayOrientation::LandscapeFlipped: + vertices = {{ + ScreenRectVertex(x, y, texcoords.top, texcoords.right), + ScreenRectVertex(x + w, y, texcoords.top, texcoords.left), + ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, texcoords.left), + }}; + break; + case Layout::DisplayOrientation::PortraitFlipped: + vertices = {{ + ScreenRectVertex(x, y, texcoords.top, texcoords.left), + ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.left), + ScreenRectVertex(x, y + h, texcoords.top, texcoords.right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, texcoords.right), + }}; + std::swap(h, w); + break; + default: + LOG_ERROR(Render_Vulkan, "Unknown DisplayOrientation: {}", orientation); + break; + } + + const u64 size = sizeof(ScreenRectVertex) * vertices.size(); + auto [data, offset, invalidate] = vertex_buffer.Map(size, 16); + std::memcpy(data, vertices.data(), size); + vertex_buffer.Commit(size); + + const u32 scale_factor = GetResolutionScaleFactor(); + draw_info.i_resolution = + Common::MakeVec(static_cast(screen_info_l.texture.width * scale_factor), + static_cast(screen_info_l.texture.height * scale_factor), + 1.0f / static_cast(screen_info_l.texture.width * scale_factor), + 1.0f / static_cast(screen_info_l.texture.height * scale_factor)); + draw_info.o_resolution = Common::MakeVec(h, w, 1.0f / h, 1.0f / w); + draw_info.screen_id_l = screen_id_l; + draw_info.screen_id_r = screen_id_r; + + scheduler.Record([this, offset = offset, info = draw_info](vk::CommandBuffer cmdbuf) { + const u32 first_vertex = static_cast(offset) / sizeof(ScreenRectVertex); + cmdbuf.pushConstants(*present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, + 0, sizeof(info), &info); + + cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0}); + cmdbuf.draw(4, 1, first_vertex, 0); + }); +} + +void RendererVulkan::DrawTopScreen(const Layout::FramebufferLayout& layout, + const Common::Rectangle& top_screen) { + if (!layout.top_screen_enabled) { + return; + } + + const float top_screen_left = static_cast(top_screen.left); + const float top_screen_top = static_cast(top_screen.top); + const float top_screen_width = static_cast(top_screen.GetWidth()); + const float top_screen_height = static_cast(top_screen.GetHeight()); + + const auto orientation = layout.is_rotated ? Layout::DisplayOrientation::Landscape + : Layout::DisplayOrientation::Portrait; + switch (Settings::values.render_3d.GetValue()) { + case Settings::StereoRenderOption::Off: { + const int eye = static_cast(Settings::values.mono_render_option.GetValue()); + DrawSingleScreen(eye, top_screen_left, top_screen_top, top_screen_width, top_screen_height, + orientation); + break; + } + case Settings::StereoRenderOption::SideBySide: { + DrawSingleScreen(0, top_screen_left / 2, top_screen_top, top_screen_width / 2, + top_screen_height, orientation); + draw_info.layer = 1; + DrawSingleScreen(1, static_cast((top_screen_left / 2) + (layout.width / 2)), + top_screen_top, top_screen_width / 2, top_screen_height, orientation); + break; + } + case Settings::StereoRenderOption::CardboardVR: { + DrawSingleScreen(0, top_screen_left, top_screen_top, top_screen_width, top_screen_height, + orientation); + draw_info.layer = 1; + DrawSingleScreen( + 1, static_cast(layout.cardboard.top_screen_right_eye + (layout.width / 2)), + top_screen_top, top_screen_width, top_screen_height, orientation); + break; + } + case Settings::StereoRenderOption::Anaglyph: + case Settings::StereoRenderOption::Interlaced: + case Settings::StereoRenderOption::ReverseInterlaced: { + DrawSingleScreenStereo(0, 1, top_screen_left, top_screen_top, top_screen_width, + top_screen_height, orientation); + break; + } + } +} + +void RendererVulkan::DrawBottomScreen(const Layout::FramebufferLayout& layout, + const Common::Rectangle& bottom_screen) { + if (!layout.bottom_screen_enabled) { + return; + } + + const float bottom_screen_left = static_cast(bottom_screen.left); + const float bottom_screen_top = static_cast(bottom_screen.top); + const float bottom_screen_width = static_cast(bottom_screen.GetWidth()); + const float bottom_screen_height = static_cast(bottom_screen.GetHeight()); + + const auto orientation = layout.is_rotated ? Layout::DisplayOrientation::Landscape + : Layout::DisplayOrientation::Portrait; + + switch (Settings::values.render_3d.GetValue()) { + case Settings::StereoRenderOption::Off: { + DrawSingleScreen(2, bottom_screen_left, bottom_screen_top, bottom_screen_width, + bottom_screen_height, orientation); + break; + } + case Settings::StereoRenderOption::SideBySide: { + DrawSingleScreen(2, bottom_screen_left / 2, bottom_screen_top, bottom_screen_width / 2, + bottom_screen_height, orientation); + draw_info.layer = 1; + DrawSingleScreen(2, static_cast((bottom_screen_left / 2) + (layout.width / 2)), + bottom_screen_top, bottom_screen_width / 2, bottom_screen_height, + orientation); + break; + } + case Settings::StereoRenderOption::CardboardVR: { + DrawSingleScreen(2, bottom_screen_left, bottom_screen_top, bottom_screen_width, + bottom_screen_height, orientation); + draw_info.layer = 1; + DrawSingleScreen( + 2, static_cast(layout.cardboard.bottom_screen_right_eye + (layout.width / 2)), + bottom_screen_top, bottom_screen_width, bottom_screen_height, orientation); + break; + } + case Settings::StereoRenderOption::Anaglyph: + case Settings::StereoRenderOption::Interlaced: + case Settings::StereoRenderOption::ReverseInterlaced: { + DrawSingleScreenStereo(2, 2, bottom_screen_left, bottom_screen_top, bottom_screen_width, + bottom_screen_height, orientation); + break; + } + } +} + +void RendererVulkan::DrawScreens(Frame* frame, const Layout::FramebufferLayout& layout, + bool flipped) { + if (settings.bg_color_update_requested.exchange(false)) { + clear_color.float32[0] = Settings::values.bg_red.GetValue(); + clear_color.float32[1] = Settings::values.bg_green.GetValue(); + clear_color.float32[2] = Settings::values.bg_blue.GetValue(); + } + if (settings.shader_update_requested.exchange(false)) { + ReloadPipeline(); + } + + PrepareDraw(frame, layout); + + const auto& top_screen = layout.top_screen; + const auto& bottom_screen = layout.bottom_screen; + draw_info.modelview = MakeOrthographicMatrix(layout.width, layout.height); + + draw_info.layer = 0; + if (!Settings::values.swap_screen.GetValue()) { + DrawTopScreen(layout, top_screen); + draw_info.layer = 0; + DrawBottomScreen(layout, bottom_screen); + } else { + DrawBottomScreen(layout, bottom_screen); + draw_info.layer = 0; + DrawTopScreen(layout, top_screen); + } + + if (layout.additional_screen_enabled) { + const auto& additional_screen = layout.additional_screen; + if (!Settings::values.swap_screen.GetValue()) { + DrawTopScreen(layout, additional_screen); + } else { + DrawBottomScreen(layout, additional_screen); + } + } + + scheduler.Record([image = frame->image](vk::CommandBuffer cmdbuf) { + const vk::ImageMemoryBarrier render_barrier = { + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + + cmdbuf.endRenderPass(); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, render_barrier); + }); +} + +void RendererVulkan::SwapBuffers() { + const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout(); + PrepareRendertarget(); + RenderScreenshot(); + RenderToWindow(main_window, layout, false); +#ifndef ANDROID + if (Settings::values.layout_option.GetValue() == Settings::LayoutOption::SeparateWindows) { + ASSERT(secondary_window); + const auto& secondary_layout = secondary_window->GetFramebufferLayout(); + if (!second_window) { + second_window = std::make_unique(*secondary_window, instance, scheduler); + } + RenderToWindow(*second_window, secondary_layout, false); + secondary_window->PollEvents(); + } +#endif + rasterizer.TickFrame(); + EndFrame(); +} + +void RendererVulkan::RenderScreenshot() { + if (!settings.screenshot_requested.exchange(false)) { + return; + } + + const Layout::FramebufferLayout layout{settings.screenshot_framebuffer_layout}; + const u32 width = layout.width; + const u32 height = layout.height; + + const vk::ImageCreateInfo staging_image_info = { + .imageType = vk::ImageType::e2D, + .format = vk::Format::eB8G8R8A8Unorm, + .extent{ + .width = width, + .height = height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .tiling = vk::ImageTiling::eLinear, + .usage = vk::ImageUsageFlagBits::eTransferDst, + .initialLayout = vk::ImageLayout::eUndefined, + }; + + const VmaAllocationCreateInfo alloc_create_info = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST, + .requiredFlags = 0, + .preferredFlags = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + }; + + VkImage unsafe_image{}; + VmaAllocation allocation{}; + VmaAllocationInfo alloc_info; + VkImageCreateInfo unsafe_image_info = static_cast(staging_image_info); + + VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, + &alloc_create_info, &unsafe_image, &allocation, &alloc_info); + if (result != VK_SUCCESS) [[unlikely]] { + LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); + UNREACHABLE(); + } + vk::Image staging_image{unsafe_image}; + + Frame frame{}; + main_window.RecreateFrame(&frame, width, height); + + DrawScreens(&frame, layout, false); + + scheduler.Record( + [width, height, source_image = frame.image, staging_image](vk::CommandBuffer cmdbuf) { + const std::array read_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = source_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = staging_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const std::array write_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = source_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = staging_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + static constexpr vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + const std::array src_offsets = { + vk::Offset3D{0, 0, 0}, + vk::Offset3D{static_cast(width), static_cast(height), 1}, + }; + + const std::array dst_offsets = { + vk::Offset3D{0, static_cast(height), 0}, + vk::Offset3D{static_cast(width), 0, 1}, + }; + + const vk::ImageBlit blit_area = { + .srcSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .srcOffsets = src_offsets, + .dstSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffsets = dst_offsets, + }; + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barriers); + cmdbuf.blitImage(source_image, vk::ImageLayout::eTransferSrcOptimal, staging_image, + vk::ImageLayout::eTransferDstOptimal, blit_area, vk::Filter::eNearest); + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, memory_write_barrier, {}, write_barriers); + }); + + // Ensure the copy is fully completed before saving the screenshot + scheduler.Finish(); + + const vk::Device device = instance.GetDevice(); + + // Get layout of the image (including row pitch) + const vk::ImageSubresource subresource = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .arrayLayer = 0, + }; + + const vk::SubresourceLayout subresource_layout = + device.getImageSubresourceLayout(staging_image, subresource); + + // Copy backing image data to the QImage screenshot buffer + const u8* data = reinterpret_cast(alloc_info.pMappedData); + std::memcpy(settings.screenshot_bits, data + subresource_layout.offset, + subresource_layout.size); + + // Destroy allocated resources + vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation); + device.destroyFramebuffer(frame.framebuffer); + device.destroyImageView(frame.image_view); + + settings.screenshot_complete_callback(); +} + +void RendererVulkan::ReportDriver() const { + const std::string vendor_name{instance.GetVendorName()}; + const std::string model_name{instance.GetModelName()}; + const std::string driver_version = GetDriverVersion(instance); + const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); + + const std::string api_version = GetReadableVersion(instance.ApiVersion()); + + const std::string extensions = + fmt::format("{}", fmt::join(instance.GetAvailableExtensions(), ", ")); + + LOG_INFO(Render_Vulkan, "VK_DRIVER: {}", driver_name); + LOG_INFO(Render_Vulkan, "VK_DEVICE: {}", model_name); + LOG_INFO(Render_Vulkan, "VK_VERSION: {}", api_version); + + static constexpr auto field = Common::Telemetry::FieldType::UserSystem; + telemetry_session.AddField(field, "GPU_Vendor", vendor_name); + telemetry_session.AddField(field, "GPU_Model", model_name); + telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name); + telemetry_session.AddField(field, "GPU_Vulkan_Version", api_version); + telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h new file mode 100644 index 0000000000..165e11e4c7 --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -0,0 +1,139 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include "common/common_types.h" +#include "common/math_util.h" +#include "core/hw/gpu.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_present_window.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +namespace Core { +class System; +class TelemetrySession; +} // namespace Core + +namespace Memory { +class MemorySystem; +} + +namespace Layout { +struct FramebufferLayout; +} + +namespace Vulkan { + +struct TextureInfo { + u32 width; + u32 height; + GPU::Regs::PixelFormat format; + vk::Image image; + vk::ImageView image_view; + VmaAllocation allocation; +}; + +struct ScreenInfo { + TextureInfo texture; + Common::Rectangle texcoords; + vk::ImageView image_view; +}; + +struct PresentUniformData { + std::array modelview; + Common::Vec4f i_resolution; + Common::Vec4f o_resolution; + int screen_id_l = 0; + int screen_id_r = 0; + int layer = 0; + int reverse_interlaced = 0; +}; +static_assert(sizeof(PresentUniformData) == 112, + "PresentUniformData does not structure in shader!"); + +class RendererVulkan : public VideoCore::RendererBase { + static constexpr std::size_t PRESENT_PIPELINES = 3; + +public: + explicit RendererVulkan(Core::System& system, Frontend::EmuWindow& window, + Frontend::EmuWindow* secondary_window); + ~RendererVulkan() override; + + [[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() override { + return &rasterizer; + } + + void NotifySurfaceChanged() override { + main_window.NotifySurfaceChanged(); + } + + void SwapBuffers() override; + void TryPresent(int timeout_ms, bool is_secondary) override {} + void Sync() override; + +private: + void ReportDriver() const; + void ReloadPipeline(); + void CompileShaders(); + void BuildLayouts(); + void BuildPipelines(); + void ConfigureFramebufferTexture(TextureInfo& texture, + const GPU::Regs::FramebufferConfig& framebuffer); + void ConfigureRenderPipeline(); + void PrepareRendertarget(); + void RenderScreenshot(); + void PrepareDraw(Frame* frame, const Layout::FramebufferLayout& layout); + void RenderToWindow(PresentWindow& window, const Layout::FramebufferLayout& layout, + bool flipped); + + void DrawScreens(Frame* frame, const Layout::FramebufferLayout& layout, bool flipped); + void DrawBottomScreen(const Layout::FramebufferLayout& layout, + const Common::Rectangle& bottom_screen); + void DrawTopScreen(const Layout::FramebufferLayout& layout, + const Common::Rectangle& top_screen); + void DrawSingleScreen(u32 screen_id, float x, float y, float w, float h, + Layout::DisplayOrientation orientation); + void DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, float w, + float h, Layout::DisplayOrientation orientation); + void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, + ScreenInfo& screen_info, bool right_eye); + void LoadColorToActiveVkTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture); + +private: + Memory::MemorySystem& memory; + Core::TelemetrySession& telemetry_session; + + Instance instance; + Scheduler scheduler; + RenderpassCache renderpass_cache; + DescriptorPool pool; + PresentWindow main_window; + StreamBuffer vertex_buffer; + RasterizerVulkan rasterizer; + std::unique_ptr second_window; + + vk::UniquePipelineLayout present_pipeline_layout; + DescriptorSetProvider present_set_provider; + std::array present_pipelines; + std::array present_shaders; + std::array present_samplers; + vk::ShaderModule present_vertex_shader; + u32 current_pipeline = 0; + + std::array screen_infos{}; + std::array present_textures{}; + PresentUniformData draw_info{}; + vk::ClearColorValue clear_color{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.cpp b/src/video_core/renderer_vulkan/vk_blit_helper.cpp new file mode 100644 index 0000000000..b9b0875dac --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_blit_helper.cpp @@ -0,0 +1,554 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/vector_math.h" +#include "video_core/renderer_vulkan/vk_blit_helper.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +#include "video_core/host_shaders/format_reinterpreter/vulkan_d24s8_to_rgba8_comp_spv.h" +#include "video_core/host_shaders/full_screen_triangle_vert_spv.h" +#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/host_shaders/vulkan_depth_to_buffer_comp_spv.h" + +namespace Vulkan { + +using VideoCore::PixelFormat; + +namespace { +struct PushConstants { + std::array tex_scale; + std::array tex_offset; +}; + +struct ComputeInfo { + Common::Vec2i src_offset; + Common::Vec2i dst_offset; + Common::Vec2i src_extent; +}; + +inline constexpr vk::PushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + .size = sizeof(ComputeInfo), +}; + +constexpr std::array COMPUTE_BINDINGS = {{ + {0, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eCompute}, + {1, vk::DescriptorType::eSampledImage, 1, vk::ShaderStageFlagBits::eCompute}, + {2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eCompute}, +}}; + +constexpr std::array COMPUTE_BUFFER_BINDINGS = {{ + {0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eCompute}, + {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eCompute}, + {2, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute}, +}}; + +constexpr std::array TWO_TEXTURES_BINDINGS = {{ + {0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, +}}; + +inline constexpr vk::PushConstantRange PUSH_CONSTANT_RANGE{ + .stageFlags = vk::ShaderStageFlagBits::eVertex, + .offset = 0, + .size = sizeof(PushConstants), +}; +constexpr vk::PipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ + .vertexBindingDescriptionCount = 0, + .pVertexBindingDescriptions = nullptr, + .vertexAttributeDescriptionCount = 0, + .pVertexAttributeDescriptions = nullptr, +}; +constexpr vk::PipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{ + .topology = vk::PrimitiveTopology::eTriangleList, + .primitiveRestartEnable = VK_FALSE, +}; +constexpr vk::PipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{ + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, +}; +constexpr vk::PipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{ + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = vk::PolygonMode::eFill, + .cullMode = vk::CullModeFlagBits::eBack, + .frontFace = vk::FrontFace::eClockwise, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, +}; +constexpr vk::PipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{ + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, +}; +constexpr std::array DYNAMIC_STATES{ + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, +}; +constexpr vk::PipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ + .dynamicStateCount = static_cast(DYNAMIC_STATES.size()), + .pDynamicStates = DYNAMIC_STATES.data(), +}; +constexpr vk::PipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{ + .logicOpEnable = VK_FALSE, + .logicOp = vk::LogicOp::eClear, + .attachmentCount = 0, + .pAttachments = nullptr, + .blendConstants = std::array{0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr vk::PipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{ + .depthTestEnable = VK_TRUE, + .depthWriteEnable = VK_TRUE, + .depthCompareOp = vk::CompareOp::eAlways, + .depthBoundsTestEnable = VK_FALSE, + .stencilTestEnable = VK_FALSE, + .front = vk::StencilOpState{}, + .back = vk::StencilOpState{}, + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, +}; + +template +inline constexpr vk::SamplerCreateInfo SAMPLER_CREATE_INFO{ + .magFilter = filter, + .minFilter = filter, + .mipmapMode = vk::SamplerMipmapMode::eNearest, + .addressModeU = vk::SamplerAddressMode::eClampToBorder, + .addressModeV = vk::SamplerAddressMode::eClampToBorder, + .addressModeW = vk::SamplerAddressMode::eClampToBorder, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = vk::CompareOp::eNever, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = vk::BorderColor::eFloatOpaqueWhite, + .unnormalizedCoordinates = VK_FALSE, +}; + +constexpr vk::PipelineLayoutCreateInfo PipelineLayoutCreateInfo( + const vk::DescriptorSetLayout* set_layout, bool compute = false) { + return vk::PipelineLayoutCreateInfo{ + .setLayoutCount = 1, + .pSetLayouts = set_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = (compute ? &COMPUTE_PUSH_CONSTANT_RANGE : &PUSH_CONSTANT_RANGE), + }; +} + +constexpr std::array MakeStages( + vk::ShaderModule vertex_shader, vk::ShaderModule fragment_shader) { + return std::array{ + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eVertex, + .module = vertex_shader, + .pName = "main", + }, + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = fragment_shader, + .pName = "main", + }, + }; +} + +constexpr vk::PipelineShaderStageCreateInfo MakeStages(vk::ShaderModule compute_shader) { + return vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eCompute, + .module = compute_shader, + .pName = "main", + }; +} + +} // Anonymous namespace + +BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_, DescriptorPool& pool, + RenderpassCache& renderpass_cache_) + : instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_}, + device{instance.GetDevice()}, compute_provider{instance, pool, COMPUTE_BINDINGS}, + compute_buffer_provider{instance, pool, COMPUTE_BUFFER_BINDINGS}, + two_textures_provider{instance, pool, TWO_TEXTURES_BINDINGS}, + compute_pipeline_layout{ + device.createPipelineLayout(PipelineLayoutCreateInfo(&compute_provider.Layout(), true))}, + compute_buffer_pipeline_layout{device.createPipelineLayout( + PipelineLayoutCreateInfo(&compute_buffer_provider.Layout(), true))}, + two_textures_pipeline_layout{ + device.createPipelineLayout(PipelineLayoutCreateInfo(&two_textures_provider.Layout()))}, + full_screen_vert{CompileSPV(FULL_SCREEN_TRIANGLE_VERT_SPV, device)}, + d24s8_to_rgba8_comp{CompileSPV(VULKAN_D24S8_TO_RGBA8_COMP_SPV, device)}, + depth_to_buffer_comp{CompileSPV(VULKAN_DEPTH_TO_BUFFER_COMP_SPV, device)}, + blit_depth_stencil_frag{CompileSPV(VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV, device)}, + d24s8_to_rgba8_pipeline{MakeComputePipeline(d24s8_to_rgba8_comp, compute_pipeline_layout)}, + depth_to_buffer_pipeline{ + MakeComputePipeline(depth_to_buffer_comp, compute_buffer_pipeline_layout)}, + depth_blit_pipeline{MakeDepthStencilBlitPipeline()}, + linear_sampler{device.createSampler(SAMPLER_CREATE_INFO)}, + nearest_sampler{device.createSampler(SAMPLER_CREATE_INFO)} {} + +BlitHelper::~BlitHelper() { + device.destroyPipelineLayout(compute_pipeline_layout); + device.destroyPipelineLayout(compute_buffer_pipeline_layout); + device.destroyPipelineLayout(two_textures_pipeline_layout); + device.destroyShaderModule(full_screen_vert); + device.destroyShaderModule(d24s8_to_rgba8_comp); + device.destroyShaderModule(depth_to_buffer_comp); + device.destroyShaderModule(blit_depth_stencil_frag); + device.destroyPipeline(depth_to_buffer_pipeline); + device.destroyPipeline(d24s8_to_rgba8_pipeline); + device.destroyPipeline(depth_blit_pipeline); + device.destroySampler(linear_sampler); + device.destroySampler(nearest_sampler); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, vk::PipelineLayout layout, + const VideoCore::TextureBlit& blit) { + const vk::Offset2D offset{ + .x = std::min(blit.dst_rect.left, blit.dst_rect.right), + .y = std::min(blit.dst_rect.bottom, blit.dst_rect.top), + }; + const vk::Extent2D extent{ + .width = blit.dst_rect.GetWidth(), + .height = blit.dst_rect.GetHeight(), + }; + const vk::Viewport viewport{ + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const vk::Rect2D scissor{ + .offset = offset, + .extent = extent, + }; + const float scale_x = static_cast(blit.src_rect.GetWidth()); + const float scale_y = static_cast(blit.src_rect.GetHeight()); + const PushConstants push_constants{ + .tex_scale = {scale_x, scale_y}, + .tex_offset = {static_cast(blit.src_rect.left), + static_cast(blit.src_rect.bottom)}, + }; + cmdbuf.setViewport(0, viewport); + cmdbuf.setScissor(0, scissor); + cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eVertex, 0, sizeof(push_constants), + &push_constants); +} + +bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + if (!instance.IsShaderStencilExportSupported()) { + LOG_ERROR(Render_Vulkan, "Unable to emulate depth stencil images"); + return false; + } + + const vk::Rect2D dst_render_area = { + .offset = {0, 0}, + .extent = {dest.GetScaledWidth(), dest.GetScaledHeight()}, + }; + + std::array textures{}; + textures[0].image_info = vk::DescriptorImageInfo{ + .sampler = nearest_sampler, + .imageView = source.DepthView(), + .imageLayout = vk::ImageLayout::eGeneral, + }; + textures[1].image_info = vk::DescriptorImageInfo{ + .sampler = nearest_sampler, + .imageView = source.StencilView(), + .imageLayout = vk::ImageLayout::eGeneral, + }; + + const auto descriptor_set = two_textures_provider.Acquire(textures); + + const RenderPass depth_pass = { + .framebuffer = dest.Framebuffer(), + .render_pass = + renderpass_cache.GetRenderpass(PixelFormat::Invalid, dest.pixel_format, false), + .render_area = dst_render_area, + }; + renderpass_cache.BeginRendering(depth_pass); + + scheduler.Record([blit, descriptor_set, this](vk::CommandBuffer cmdbuf) { + const vk::PipelineLayout layout = two_textures_pipeline_layout; + + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_blit_pipeline); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, descriptor_set, {}); + BindBlitState(cmdbuf, layout, blit); + cmdbuf.draw(3, 1, 0, 0); + }); + scheduler.MakeDirty(StateFlags::Pipeline); + return true; +} + +bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, + const VideoCore::TextureCopy& copy) { + std::array textures{}; + textures[0].image_info = vk::DescriptorImageInfo{ + .imageView = source.DepthView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + }; + textures[1].image_info = vk::DescriptorImageInfo{ + .imageView = source.StencilView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + }; + textures[2].image_info = vk::DescriptorImageInfo{ + .imageView = dest.ImageView(), + .imageLayout = vk::ImageLayout::eGeneral, + }; + + const auto descriptor_set = compute_provider.Acquire(textures); + + renderpass_cache.EndRendering(); + scheduler.Record([this, descriptor_set, copy, src_image = source.Image(), + dst_image = dest.Image()](vk::CommandBuffer cmdbuf) { + const std::array pre_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = + vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eShaderWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const std::array post_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderRead, + .dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentRead, + .oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = + vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }}; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests, + vk::PipelineStageFlagBits::eComputeShader, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); + + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, + descriptor_set, {}); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, d24s8_to_rgba8_pipeline); + + const ComputeInfo info = { + .src_offset = Common::Vec2i{static_cast(copy.src_offset.x), + static_cast(copy.src_offset.y)}, + .dst_offset = Common::Vec2i{static_cast(copy.dst_offset.x), + static_cast(copy.dst_offset.y)}, + }; + cmdbuf.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, + sizeof(info), &info); + + cmdbuf.dispatch(copy.extent.width / 8, copy.extent.height / 8, 1); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests | + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); + }); + return true; +} + +bool BlitHelper::DepthToBuffer(Surface& source, vk::Buffer buffer, + const VideoCore::BufferTextureCopy& copy) { + std::array textures{}; + textures[0].image_info = vk::DescriptorImageInfo{ + .sampler = nearest_sampler, + .imageView = source.DepthView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + }; + textures[1].image_info = vk::DescriptorImageInfo{ + .sampler = nearest_sampler, + .imageView = source.StencilView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + }; + textures[2].buffer_info = vk::DescriptorBufferInfo{ + .buffer = buffer, + .offset = copy.buffer_offset, + .range = copy.buffer_size, + }; + + const auto descriptor_set = compute_buffer_provider.Acquire(textures); + + renderpass_cache.EndRendering(); + scheduler.Record([this, descriptor_set, copy, src_image = source.Image(), + extent = source.RealExtent(false)](vk::CommandBuffer cmdbuf) { + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = vk::AccessFlagBits::eShaderRead, + .dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentRead, + .oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests, + vk::PipelineStageFlagBits::eComputeShader, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_buffer_pipeline_layout, + 0, descriptor_set, {}); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, depth_to_buffer_pipeline); + + const ComputeInfo info = { + .src_offset = Common::Vec2i{static_cast(copy.texture_rect.left), + static_cast(copy.texture_rect.bottom)}, + .src_extent = + Common::Vec2i{static_cast(extent.width), static_cast(extent.height)}, + }; + cmdbuf.pushConstants(compute_buffer_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, + sizeof(ComputeInfo), &info); + + cmdbuf.dispatch(copy.texture_rect.GetWidth() / 8, copy.texture_rect.GetHeight() / 8, 1); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests | + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + }); + return true; +} + +vk::Pipeline BlitHelper::MakeComputePipeline(vk::ShaderModule shader, vk::PipelineLayout layout) { + const vk::ComputePipelineCreateInfo compute_info = { + .stage = MakeStages(shader), + .layout = layout, + }; + + if (const auto result = device.createComputePipeline({}, compute_info); + result.result == vk::Result::eSuccess) { + return result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Compute pipeline creation failed!"); + UNREACHABLE(); + } +} + +vk::Pipeline BlitHelper::MakeDepthStencilBlitPipeline() { + if (!instance.IsShaderStencilExportSupported()) { + return VK_NULL_HANDLE; + } + + const std::array stages = MakeStages(full_screen_vert, blit_depth_stencil_frag); + const auto renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid, + VideoCore::PixelFormat::D24S8, false); + vk::GraphicsPipelineCreateInfo depth_stencil_info = { + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = two_textures_pipeline_layout, + .renderPass = renderpass, + }; + + if (const auto result = device.createGraphicsPipeline({}, depth_stencil_info); + result.result == vk::Result::eSuccess) { + return result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Depth stencil blit pipeline creation failed!"); + UNREACHABLE(); + } + return VK_NULL_HANDLE; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.h b/src/video_core/renderer_vulkan/vk_blit_helper.h new file mode 100644 index 0000000000..b7735fcc96 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_blit_helper.h @@ -0,0 +1,71 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" + +namespace VideoCore { +struct TextureBlit; +struct TextureCopy; +struct BufferTextureCopy; +} // namespace VideoCore + +namespace Vulkan { + +class Instance; +class RenderpassCache; +class Scheduler; +class Surface; + +class BlitHelper { + friend class TextureRuntime; + +public: + BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorPool& pool, + RenderpassCache& renderpass_cache); + ~BlitHelper(); + + bool BlitDepthStencil(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + + bool ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); + + bool DepthToBuffer(Surface& source, vk::Buffer buffer, + const VideoCore::BufferTextureCopy& copy); + +private: + /// Creates compute pipelines used for blit + vk::Pipeline MakeComputePipeline(vk::ShaderModule shader, vk::PipelineLayout layout); + + /// Creates graphics pipelines used for blit + vk::Pipeline MakeDepthStencilBlitPipeline(); + +private: + const Instance& instance; + Scheduler& scheduler; + RenderpassCache& renderpass_cache; + + vk::Device device; + vk::RenderPass r32_renderpass; + + DescriptorSetProvider compute_provider; + DescriptorSetProvider compute_buffer_provider; + DescriptorSetProvider two_textures_provider; + vk::PipelineLayout compute_pipeline_layout; + vk::PipelineLayout compute_buffer_pipeline_layout; + vk::PipelineLayout two_textures_pipeline_layout; + + vk::ShaderModule full_screen_vert; + vk::ShaderModule d24s8_to_rgba8_comp; + vk::ShaderModule depth_to_buffer_comp; + vk::ShaderModule blit_depth_stencil_frag; + + vk::Pipeline d24s8_to_rgba8_pipeline; + vk::Pipeline depth_to_buffer_pipeline; + vk::Pipeline depth_blit_pipeline; + vk::Sampler linear_sampler; + vk::Sampler nearest_sampler; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index a8147acbe4..3fd6bc45c6 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -9,6 +9,7 @@ #define VK_NO_PROTOTYPES #define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 #define VULKAN_HPP_NO_CONSTRUCTORS +#define VULKAN_HPP_NO_UNION_CONSTRUCTORS #define VULKAN_HPP_NO_STRUCT_SETTERS #include diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp new file mode 100644 index 0000000000..3909da2372 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -0,0 +1,141 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/microprofile.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +MICROPROFILE_DEFINE(Vulkan_DescriptorSetAcquire, "Vulkan", "Descriptor Set Acquire", + MP_RGB(64, 128, 256)); + +constexpr u32 MAX_BATCH_SIZE = 8; + +DescriptorPool::DescriptorPool(const Instance& instance_) : instance{instance_} { + auto& pool = pools.emplace_back(); + pool = CreatePool(); +} + +DescriptorPool::~DescriptorPool() = default; + +std::vector DescriptorPool::Allocate(vk::DescriptorSetLayout layout, + u32 num_sets) { + std::array layouts; + layouts.fill(layout); + + u32 current_pool = 0; + vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = *pools[current_pool], + .descriptorSetCount = num_sets, + .pSetLayouts = layouts.data(), + }; + + while (true) { + try { + return instance.GetDevice().allocateDescriptorSets(alloc_info); + } catch (const vk::OutOfPoolMemoryError&) { + current_pool++; + if (current_pool == pools.size()) { + LOG_INFO(Render_Vulkan, "Run out of pools, creating new one!"); + auto& pool = pools.emplace_back(); + pool = CreatePool(); + } + alloc_info.descriptorPool = *pools[current_pool]; + } + } +} + +vk::DescriptorSet DescriptorPool::Allocate(vk::DescriptorSetLayout layout) { + const auto sets = Allocate(layout, 1); + return sets[0]; +} + +vk::UniqueDescriptorPool DescriptorPool::CreatePool() { + // Choose a sane pool size good for most games + static constexpr std::array pool_sizes = {{ + {vk::DescriptorType::eUniformBufferDynamic, 64}, + {vk::DescriptorType::eUniformTexelBuffer, 64}, + {vk::DescriptorType::eCombinedImageSampler, 4096}, + {vk::DescriptorType::eSampledImage, 256}, + {vk::DescriptorType::eStorageImage, 256}, + {vk::DescriptorType::eStorageBuffer, 32}, + }}; + + const vk::DescriptorPoolCreateInfo descriptor_pool_info = { + .maxSets = 4098, + .poolSizeCount = static_cast(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), + }; + + return instance.GetDevice().createDescriptorPoolUnique(descriptor_pool_info); +} + +DescriptorSetProvider::DescriptorSetProvider( + const Instance& instance, DescriptorPool& pool_, + std::span bindings) + : pool{pool_}, device{instance.GetDevice()} { + std::array update_entries; + + for (u32 i = 0; i < bindings.size(); i++) { + update_entries[i] = vk::DescriptorUpdateTemplateEntry{ + .dstBinding = bindings[i].binding, + .dstArrayElement = 0, + .descriptorCount = bindings[i].descriptorCount, + .descriptorType = bindings[i].descriptorType, + .offset = i * sizeof(DescriptorData), + .stride = sizeof(DescriptorData), + }; + } + + const vk::DescriptorSetLayoutCreateInfo layout_info = { + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }; + layout = device.createDescriptorSetLayoutUnique(layout_info); + + const vk::DescriptorUpdateTemplateCreateInfo template_info = { + .descriptorUpdateEntryCount = static_cast(bindings.size()), + .pDescriptorUpdateEntries = update_entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, + .descriptorSetLayout = *layout, + }; + update_template = device.createDescriptorUpdateTemplateUnique(template_info); +} + +DescriptorSetProvider::~DescriptorSetProvider() = default; + +vk::DescriptorSet DescriptorSetProvider::Acquire(std::span data) { + MICROPROFILE_SCOPE(Vulkan_DescriptorSetAcquire); + DescriptorSetData key{}; + std::memcpy(key.data(), data.data(), data.size_bytes()); + const auto [it, new_set] = descriptor_set_map.try_emplace(key); + if (!new_set) { + return it->second; + } + if (free_sets.empty()) { + free_sets = pool.Allocate(*layout, MAX_BATCH_SIZE); + } + it.value() = free_sets.back(); + free_sets.pop_back(); + device.updateDescriptorSetWithTemplate(it->second, *update_template, data[0]); + return it->second; +} + +void DescriptorSetProvider::FreeWithImage(vk::ImageView image_view) { + for (auto it = descriptor_set_map.begin(); it != descriptor_set_map.end();) { + const auto& [data, set] = *it; + const bool has_image = std::any_of(data.begin(), data.end(), [image_view](auto& info) { + return info.image_info.imageView == image_view; + }); + if (has_image) { + free_sets.push_back(set); + it = descriptor_set_map.erase(it); + } else { + it++; + } + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h new file mode 100644 index 0000000000..2990cd2945 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -0,0 +1,92 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/hash.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; + +constexpr u32 MAX_DESCRIPTORS = 7; + +union DescriptorData { + vk::DescriptorImageInfo image_info; + vk::DescriptorBufferInfo buffer_info; + vk::BufferView buffer_view; + + bool operator==(const DescriptorData& other) const noexcept { + return std::memcmp(this, &other, sizeof(DescriptorData)) == 0; + } +}; + +using DescriptorSetData = std::array; + +struct DataHasher { + u64 operator()(const DescriptorSetData& data) const noexcept { + return Common::ComputeHash64(data.data(), sizeof(data)); + } +}; + +/** + * An interface for allocating descriptor sets that manages a collection of descriptor pools. + */ +class DescriptorPool { +public: + explicit DescriptorPool(const Instance& instance); + ~DescriptorPool(); + + std::vector Allocate(vk::DescriptorSetLayout layout, u32 num_sets); + + vk::DescriptorSet Allocate(vk::DescriptorSetLayout layout); + +private: + vk::UniqueDescriptorPool CreatePool(); + +private: + const Instance& instance; + std::vector pools; +}; + +/** + * Allocates and caches descriptor sets of a specific layout. + */ +class DescriptorSetProvider { +public: + explicit DescriptorSetProvider(const Instance& instance, DescriptorPool& pool, + std::span bindings); + ~DescriptorSetProvider(); + + vk::DescriptorSet Acquire(std::span data); + + void FreeWithImage(vk::ImageView image_view); + + [[nodiscard]] vk::DescriptorSetLayout Layout() const noexcept { + return *layout; + } + + [[nodiscard]] vk::DescriptorSetLayout& Layout() noexcept { + return layout.get(); + } + + [[nodiscard]] vk::DescriptorUpdateTemplate UpdateTemplate() const noexcept { + return *update_template; + } + +private: + DescriptorPool& pool; + vk::Device device; + vk::UniqueDescriptorSetLayout layout; + vk::UniqueDescriptorUpdateTemplate update_template; + std::vector free_sets; + tsl::robin_map descriptor_set_map; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 0000000000..5ef3eb513f --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -0,0 +1,288 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/hash.h" +#include "common/microprofile.h" +#include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +namespace Vulkan { + +MICROPROFILE_DEFINE(Vulkan_Pipeline, "Vulkan", "Pipeline Building", MP_RGB(0, 192, 32)); + +vk::ShaderStageFlagBits MakeShaderStage(std::size_t index) { + switch (index) { + case 0: + return vk::ShaderStageFlagBits::eVertex; + case 1: + return vk::ShaderStageFlagBits::eFragment; + case 2: + return vk::ShaderStageFlagBits::eGeometry; + default: + LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!"); + UNREACHABLE(); + } + return vk::ShaderStageFlagBits::eVertex; +} + +u64 PipelineInfo::Hash(const Instance& instance) const { + u64 info_hash = 0; + const auto append_hash = [&info_hash](const auto& data) { + const u64 data_hash = Common::ComputeStructHash64(data); + info_hash = Common::HashCombine(info_hash, data_hash); + }; + + append_hash(vertex_layout); + append_hash(attachments); + append_hash(blending); + + if (!instance.IsExtendedDynamicStateSupported()) { + append_hash(rasterization); + append_hash(depth_stencil); + } + + return info_hash; +} + +Shader::Shader(const Instance& instance) : device{instance.GetDevice()} {} + +Shader::Shader(const Instance& instance, vk::ShaderStageFlagBits stage, std::string code) + : Shader{instance} { + module = Compile(code, stage, instance.GetDevice()); + MarkDone(); +} + +Shader::~Shader() { + if (device && module) { + device.destroyShaderModule(module); + } +} + +GraphicsPipeline::GraphicsPipeline(const Instance& instance_, RenderpassCache& renderpass_cache_, + const PipelineInfo& info_, vk::PipelineCache pipeline_cache_, + vk::PipelineLayout layout_, std::array stages_, + Common::ThreadWorker* worker_) + : instance{instance_}, renderpass_cache{renderpass_cache_}, worker{worker_}, + pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, info{info_}, stages{stages_} {} + +GraphicsPipeline::~GraphicsPipeline() = default; + +bool GraphicsPipeline::TryBuild(bool wait_built) { + // The pipeline is currently being compiled. We can either wait for it + // or skip the draw. + if (is_pending) { + return wait_built; + } + + // If the shaders haven't been compiled yet, we cannot proceed. + const bool shaders_pending = std::any_of( + stages.begin(), stages.end(), [](Shader* shader) { return shader && !shader->IsDone(); }); + if (!wait_built && shaders_pending) { + return false; + } + + // Ask the driver if it can give us the pipeline quickly. + if (!shaders_pending && instance.IsPipelineCreationCacheControlSupported() && Build(true)) { + return true; + } + + // Fallback to (a)synchronous compilation + worker->QueueWork([this] { Build(); }); + is_pending = true; + return wait_built; +} + +bool GraphicsPipeline::Build(bool fail_on_compile_required) { + MICROPROFILE_SCOPE(Vulkan_Pipeline); + const vk::Device device = instance.GetDevice(); + + std::array bindings; + for (u32 i = 0; i < info.vertex_layout.binding_count; i++) { + const auto& binding = info.vertex_layout.bindings[i]; + bindings[i] = vk::VertexInputBindingDescription{ + .binding = binding.binding, + .stride = binding.stride, + .inputRate = binding.fixed.Value() ? vk::VertexInputRate::eInstance + : vk::VertexInputRate::eVertex, + }; + } + + std::array attributes; + for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) { + const auto& attr = info.vertex_layout.attributes[i]; + const FormatTraits& traits = instance.GetTraits(attr.type, attr.size); + attributes[i] = vk::VertexInputAttributeDescription{ + .location = attr.location, + .binding = attr.binding, + .format = traits.native, + .offset = attr.offset, + }; + + // At the end there's always the fixed binding which takes up + // at least 16 bytes so we should always be able to alias. + if (traits.needs_emulation) { + const FormatTraits& comp_four_traits = instance.GetTraits(attr.type, 4); + attributes[i].format = comp_four_traits.native; + } + } + + const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { + .vertexBindingDescriptionCount = info.vertex_layout.binding_count, + .pVertexBindingDescriptions = bindings.data(), + .vertexAttributeDescriptionCount = info.vertex_layout.attribute_count, + .pVertexAttributeDescriptions = attributes.data(), + }; + + const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { + .topology = PicaToVK::PrimitiveTopology(info.rasterization.topology), + .primitiveRestartEnable = false, + }; + + const vk::PipelineRasterizationStateCreateInfo raster_state = { + .depthClampEnable = false, + .rasterizerDiscardEnable = false, + .cullMode = PicaToVK::CullMode(info.rasterization.cull_mode), + .frontFace = PicaToVK::FrontFace(info.rasterization.cull_mode), + .depthBiasEnable = false, + .lineWidth = 1.0f, + }; + + const vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = false, + }; + + const vk::PipelineColorBlendAttachmentState colorblend_attachment = { + .blendEnable = info.blending.blend_enable, + .srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor), + .dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor), + .colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq), + .srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor), + .dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor), + .alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq), + .colorWriteMask = static_cast(info.blending.color_write_mask), + }; + + const vk::PipelineColorBlendStateCreateInfo color_blending = { + .logicOpEnable = !info.blending.blend_enable && !instance.NeedsLogicOpEmulation(), + .logicOp = PicaToVK::LogicOp(info.blending.logic_op), + .attachmentCount = 1, + .pAttachments = &colorblend_attachment, + .blendConstants = std::array{1.0f, 1.0f, 1.0f, 1.0f}, + }; + + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = 1.0f, + .height = 1.0f, + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {1, 1}, + }; + + const vk::PipelineViewportStateCreateInfo viewport_info = { + .viewportCount = 1, + .pViewports = &viewport, + .scissorCount = 1, + .pScissors = &scissor, + }; + + boost::container::static_vector dynamic_states = { + vk::DynamicState::eViewport, vk::DynamicState::eScissor, + vk::DynamicState::eStencilCompareMask, vk::DynamicState::eStencilWriteMask, + vk::DynamicState::eStencilReference, vk::DynamicState::eBlendConstants, + }; + + if (instance.IsExtendedDynamicStateSupported()) { + constexpr std::array extended = { + vk::DynamicState::eCullModeEXT, vk::DynamicState::eDepthCompareOpEXT, + vk::DynamicState::eDepthTestEnableEXT, vk::DynamicState::eDepthWriteEnableEXT, + vk::DynamicState::eFrontFaceEXT, vk::DynamicState::ePrimitiveTopologyEXT, + vk::DynamicState::eStencilOpEXT, vk::DynamicState::eStencilTestEnableEXT, + }; + dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + } + + const vk::PipelineDynamicStateCreateInfo dynamic_info = { + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + + const vk::StencilOpState stencil_op_state = { + .failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op), + .passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op), + .depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op), + .compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op), + }; + + const vk::PipelineDepthStencilStateCreateInfo depth_info = { + .depthTestEnable = static_cast(info.depth_stencil.depth_test_enable.Value()), + .depthWriteEnable = static_cast(info.depth_stencil.depth_write_enable.Value()), + .depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op), + .depthBoundsTestEnable = false, + .stencilTestEnable = static_cast(info.depth_stencil.stencil_test_enable.Value()), + .front = stencil_op_state, + .back = stencil_op_state, + }; + + u32 shader_count = 0; + std::array shader_stages; + for (std::size_t i = 0; i < stages.size(); i++) { + Shader* shader = stages[i]; + if (!shader) { + continue; + } + + shader->WaitDone(); + shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ + .stage = MakeShaderStage(i), + .module = shader->Handle(), + .pName = "main", + }; + } + + vk::GraphicsPipelineCreateInfo pipeline_info = { + .stageCount = shader_count, + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_info, + .pInputAssemblyState = &input_assembly, + .pViewportState = &viewport_info, + .pRasterizationState = &raster_state, + .pMultisampleState = &multisampling, + .pDepthStencilState = &depth_info, + .pColorBlendState = &color_blending, + .pDynamicState = &dynamic_info, + .layout = pipeline_layout, + .renderPass = + renderpass_cache.GetRenderpass(info.attachments.color, info.attachments.depth, false), + }; + + if (fail_on_compile_required) { + pipeline_info.flags |= vk::PipelineCreateFlagBits::eFailOnPipelineCompileRequiredEXT; + } + + auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info); + if (result.result == vk::Result::eSuccess) { + pipeline = std::move(result.value); + } else if (result.result == vk::Result::eErrorPipelineCompileRequiredEXT) { + return false; + } else { + UNREACHABLE_MSG("Graphics pipeline creation failed!"); + } + + MarkDone(); + return true; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 0000000000..13267b59b2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -0,0 +1,192 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/thread_worker.h" +#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/renderer_vulkan/vk_shader_gen.h" + +namespace Common { + +struct AsyncHandle { +public: + AsyncHandle(bool is_done_ = false) : is_done{is_done_} {} + + [[nodiscard]] bool IsDone() noexcept { + return is_done.load(std::memory_order::relaxed); + } + + void WaitDone() noexcept { + std::unique_lock lock{mutex}; + condvar.wait(lock, [this] { return is_done.load(std::memory_order::relaxed); }); + } + + void MarkDone(bool done = true) noexcept { + std::scoped_lock lock{mutex}; + is_done = done; + condvar.notify_all(); + } + +private: + std::condition_variable condvar; + std::mutex mutex; + std::atomic_bool is_done{false}; +}; + +} // namespace Common + +namespace Vulkan { + +class Instance; +class RenderpassCache; + +constexpr u32 MAX_SHADER_STAGES = 3; +constexpr u32 MAX_VERTEX_ATTRIBUTES = 16; +constexpr u32 MAX_VERTEX_BINDINGS = 13; + +/** + * The pipeline state is tightly packed with bitfields to reduce + * the overhead of hashing as much as possible + */ +union RasterizationState { + u8 value = 0; + BitField<0, 2, Pica::PipelineRegs::TriangleTopology> topology; + BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode; +}; + +union DepthStencilState { + u32 value = 0; + BitField<0, 1, u32> depth_test_enable; + BitField<1, 1, u32> depth_write_enable; + BitField<2, 1, u32> stencil_test_enable; + BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op; + BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op; + BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op; + BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op; + BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op; +}; + +struct BlendingState { + u16 blend_enable; + u16 color_write_mask; + Pica::FramebufferRegs::LogicOp logic_op; + union { + u32 value = 0; + BitField<0, 4, Pica::FramebufferRegs::BlendFactor> src_color_blend_factor; + BitField<4, 4, Pica::FramebufferRegs::BlendFactor> dst_color_blend_factor; + BitField<8, 3, Pica::FramebufferRegs::BlendEquation> color_blend_eq; + BitField<11, 4, Pica::FramebufferRegs::BlendFactor> src_alpha_blend_factor; + BitField<15, 4, Pica::FramebufferRegs::BlendFactor> dst_alpha_blend_factor; + BitField<19, 3, Pica::FramebufferRegs::BlendEquation> alpha_blend_eq; + }; +}; + +struct DynamicState { + u32 blend_color = 0; + u8 stencil_reference; + u8 stencil_compare_mask; + u8 stencil_write_mask; + + bool operator==(const DynamicState& other) const noexcept { + return std::memcmp(this, &other, sizeof(DynamicState)) == 0; + } +}; + +union VertexBinding { + u16 value = 0; + BitField<0, 4, u16> binding; + BitField<4, 1, u16> fixed; + BitField<5, 11, u16> stride; +}; + +union VertexAttribute { + u32 value = 0; + BitField<0, 4, u32> binding; + BitField<4, 4, u32> location; + BitField<8, 3, Pica::PipelineRegs::VertexAttributeFormat> type; + BitField<11, 3, u32> size; + BitField<14, 11, u32> offset; +}; + +struct VertexLayout { + u8 binding_count; + u8 attribute_count; + std::array bindings; + std::array attributes; +}; + +struct AttachmentInfo { + VideoCore::PixelFormat color; + VideoCore::PixelFormat depth; +}; + +/** + * Information about a graphics/compute pipeline + */ +struct PipelineInfo { + BlendingState blending; + AttachmentInfo attachments; + RasterizationState rasterization; + DepthStencilState depth_stencil; + DynamicState dynamic; + VertexLayout vertex_layout; + + [[nodiscard]] u64 Hash(const Instance& instance) const; + + [[nodiscard]] bool IsDepthWriteEnabled() const noexcept { + const bool has_stencil = attachments.depth == VideoCore::PixelFormat::D24S8; + const bool depth_write = + depth_stencil.depth_test_enable && depth_stencil.depth_write_enable; + const bool stencil_write = + has_stencil && depth_stencil.stencil_test_enable && dynamic.stencil_write_mask != 0; + + return depth_write || stencil_write; + } +}; + +struct Shader : public Common::AsyncHandle { + explicit Shader(const Instance& instance); + explicit Shader(const Instance& instance, vk::ShaderStageFlagBits stage, std::string code); + ~Shader(); + + [[nodiscard]] vk::ShaderModule Handle() const noexcept { + return module; + } + + vk::ShaderModule module; + vk::Device device; + std::string program; +}; + +class GraphicsPipeline : public Common::AsyncHandle { +public: + explicit GraphicsPipeline(const Instance& instance, RenderpassCache& renderpass_cache, + const PipelineInfo& info, vk::PipelineCache pipeline_cache, + vk::PipelineLayout layout, std::array stages, + Common::ThreadWorker* worker); + ~GraphicsPipeline(); + + bool TryBuild(bool wait_built); + + bool Build(bool fail_on_compile_required = false); + + [[nodiscard]] vk::Pipeline Handle() const noexcept { + return *pipeline; + } + +private: + const Instance& instance; + RenderpassCache& renderpass_cache; + Common::ThreadWorker* worker; + + vk::UniquePipeline pipeline; + vk::PipelineLayout pipeline_layout; + vk::PipelineCache pipeline_cache; + + PipelineInfo info; + std::array stages; + bool is_pending{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 9d1b1935e0..01debd8980 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -14,6 +14,10 @@ #include +#ifdef __APPLE__ +#include +#endif + namespace Vulkan { namespace { @@ -130,12 +134,12 @@ Instance::Instance(bool enable_validation, bool dump_command_buffers) physical_devices{instance->enumeratePhysicalDevices()} {} Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) - : library{OpenLibrary()}, instance{CreateInstance( - *library, window.GetWindowInfo().type, - Settings::values.renderer_debug.GetValue(), - Settings::values.dump_command_buffers.GetValue())}, - debug_callback{CreateDebugCallback(*instance)}, physical_devices{ - instance->enumeratePhysicalDevices()} { + : library{OpenLibrary(&window)}, instance{CreateInstance( + *library, window.GetWindowInfo().type, + Settings::values.renderer_debug.GetValue(), + Settings::values.dump_command_buffers.GetValue())}, + debug_callback{CreateDebugCallback(*instance, debug_utils_supported)}, + physical_devices{instance->enumeratePhysicalDevices()} { const std::size_t num_physical_devices = static_cast(physical_devices.size()); ASSERT_MSG(physical_device_index < num_physical_devices, "Invalid physical device index {} provided when only {} devices exist", @@ -146,6 +150,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) CollectTelemetryParameters(); CreateDevice(); + CollectToolingInfo(); CreateFormatTable(); CreateCustomFormatTable(); CreateAttribTable(); @@ -209,12 +214,16 @@ FormatTraits Instance::DetermineTraits(VideoCore::PixelFormat pixel_format, vk:: best_usage |= vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eTransferSrc; } - if (supports_attachment) { + // Attachment flag is only needed for color and depth formats. + if (supports_attachment && + VideoCore::GetFormatType(pixel_format) != VideoCore::SurfaceType::Texture) { best_usage |= (format_aspect & vk::ImageAspectFlagBits::eDepth) ? vk::ImageUsageFlagBits::eDepthStencilAttachment : vk::ImageUsageFlagBits::eColorAttachment; } - if (supports_storage) { + // Storage flag is only needed for shadow rendering with RGBA8 texture. + // Keeping it disables can boost performance on mobile drivers. + if (supports_storage && pixel_format == VideoCore::PixelFormat::RGBA8) { best_usage |= vk::ImageUsageFlagBits::eStorage; } @@ -364,6 +373,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT, vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR, vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT, + vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT, vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>(); const vk::StructureChain properties_chain = physical_device.getProperties2(); } + if (has_fragment_shader_interlock) { + FEAT_SET(vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT, fragmentShaderPixelInterlock, + fragment_shader_interlock) + } else { + device_chain.unlink(); + } + if (has_extended_dynamic_state) { FEAT_SET(vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, extendedDynamicState, extended_dynamic_state) @@ -533,6 +556,12 @@ bool Instance::CreateDevice() { #undef PROP_GET #undef FEAT_SET +#ifdef __APPLE__ + if (!SetMoltenVkConfig()) { + LOG_WARNING(Render_Vulkan, "Unable to set MoltenVK configuration"); + } +#endif + try { device = physical_device.createDeviceUnique(device_chain.get()); } catch (vk::ExtensionNotPresentError& err) { @@ -580,4 +609,55 @@ void Instance::CollectTelemetryParameters() { vendor_name = driver.driverName.data(); } +void Instance::CollectToolingInfo() { + if (!tooling_info) { + return; + } + const auto tools = physical_device.getToolProperties(); + for (const vk::PhysicalDeviceToolProperties& tool : tools) { + const std::string_view name = tool.name; + LOG_INFO(Render_Vulkan, "Attached debugging tool: {}", name); + has_renderdoc = has_renderdoc || name == "RenderDoc"; + has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics"; + } +} + +bool Instance::SetMoltenVkConfig() { +#ifdef __APPLE__ + size_t mvk_config_size = sizeof(MVKConfiguration); + MVKConfiguration mvk_config{}; + + const auto _vkGetMoltenVKConfigurationMVK = + library->GetSymbol("vkGetMoltenVKConfigurationMVK"); + if (!_vkGetMoltenVKConfigurationMVK) { + return false; + } + + const auto _vkSetMoltenVKConfigurationMVK = + library->GetSymbol("vkSetMoltenVKConfigurationMVK"); + if (!_vkSetMoltenVKConfigurationMVK) { + return false; + } + + if (_vkGetMoltenVKConfigurationMVK(VK_NULL_HANDLE, &mvk_config, &mvk_config_size) != + VK_SUCCESS) { + return false; + } + + // Use synchronous queue submits if async presentation is enabled, to avoid threading + // indirection. + mvk_config.synchronousQueueSubmits = Settings::values.async_presentation.GetValue(); + // If the device is lost, make an attempt to resume if possible to avoid crashes. + mvk_config.resumeLostDevice = true; + // Maximize concurrency to improve shader compilation performance. + mvk_config.shouldMaximizeConcurrentCompilation = true; + + if (_vkSetMoltenVKConfigurationMVK(VK_NULL_HANDLE, &mvk_config, &mvk_config_size) != + VK_SUCCESS) { + return false; + } +#endif + return true; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index b75bb1d3ee..4d7d7f55e5 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -90,6 +90,16 @@ public: return present_queue; } + /// Returns true when a known debugging tool is attached. + bool HasDebuggingToolAttached() const { + return has_renderdoc || has_nsight_graphics; + } + + /// Returns true when VK_EXT_debug_utils is supported. + bool IsExtDebugUtilsSupported() const { + return debug_utils_supported; + } + /// Returns true if logic operations need shader emulation bool NeedsLogicOpEmulation() const { return !features.logicOp; @@ -130,6 +140,11 @@ public: return index_type_uint8; } + /// Returns true when VK_EXT_fragment_shader_interlock is supported + bool IsFragmentShaderInterlockSupported() const { + return fragment_shader_interlock; + } + /// Returns true when VK_KHR_image_format_list is supported bool IsImageFormatListSupported() const { return image_format_list; @@ -145,11 +160,6 @@ public: return shader_stencil_export; } - /// Returns true if VK_EXT_debug_utils is supported - bool IsExtDebugUtilsSupported() const { - return debug_messenger_supported; - } - /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; @@ -200,6 +210,11 @@ public: return properties.limits.minUniformBufferOffsetAlignment; } + /// Returns the minimum alignemt required for accessing host-mapped device memory + vk::DeviceSize NonCoherentAtomSize() const { + return properties.limits.nonCoherentAtomSize; + } + /// Returns the maximum supported elements in a texel buffer u32 MaxTexelBufferElements() const { return properties.limits.maxTexelBufferElements; @@ -249,6 +264,10 @@ private: /// Collects telemetry information from the device. void CollectTelemetryParameters(); + void CollectToolingInfo(); + + /// Sets MoltenVK configuration to the desired state. + bool SetMoltenVkConfig(); private: std::shared_ptr library; @@ -277,10 +296,14 @@ private: bool extended_dynamic_state{}; bool custom_border_color{}; bool index_type_uint8{}; + bool fragment_shader_interlock{}; bool image_format_list{}; bool pipeline_creation_cache_control{}; bool shader_stencil_export{}; - bool debug_messenger_supported{}; + bool tooling_info{}; + bool debug_utils_supported{}; + bool has_nsight_graphics{}; + bool has_renderdoc{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp new file mode 100644 index 0000000000..4ab4c9cd99 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -0,0 +1,207 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +constexpr u64 WAIT_TIMEOUT = std::numeric_limits::max(); + +MasterSemaphoreTimeline::MasterSemaphoreTimeline(const Instance& instance_) : instance{instance_} { + const vk::StructureChain semaphore_chain = { + vk::SemaphoreCreateInfo{}, + vk::SemaphoreTypeCreateInfoKHR{ + .semaphoreType = vk::SemaphoreType::eTimeline, + .initialValue = 0, + }, + }; + semaphore = instance.GetDevice().createSemaphoreUnique(semaphore_chain.get()); +} + +MasterSemaphoreTimeline::~MasterSemaphoreTimeline() = default; + +void MasterSemaphoreTimeline::Refresh() { + u64 this_tick{}; + u64 counter{}; + do { + this_tick = gpu_tick.load(std::memory_order_acquire); + counter = instance.GetDevice().getSemaphoreCounterValueKHR(*semaphore); + if (counter < this_tick) { + return; + } + } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release, + std::memory_order_relaxed)); +} + +void MasterSemaphoreTimeline::Wait(u64 tick) { + // No need to wait if the GPU is ahead of the tick + if (IsFree(tick)) { + return; + } + // Update the GPU tick and try again + Refresh(); + if (IsFree(tick)) { + return; + } + + // If none of the above is hit, fallback to a regular wait + const vk::SemaphoreWaitInfoKHR wait_info = { + .semaphoreCount = 1, + .pSemaphores = &semaphore.get(), + .pValues = &tick, + }; + + while (instance.GetDevice().waitSemaphoresKHR(&wait_info, WAIT_TIMEOUT) != + vk::Result::eSuccess) { + } + Refresh(); +} + +void MasterSemaphoreTimeline::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, + vk::Semaphore signal, u64 signal_value) { + cmdbuf.end(); + + const u32 num_signal_semaphores = signal ? 2U : 1U; + const std::array signal_values{signal_value, u64(0)}; + const std::array signal_semaphores{Handle(), signal}; + + const u32 num_wait_semaphores = wait ? 2U : 1U; + const std::array wait_values{signal_value - 1, u64(1)}; + const std::array wait_semaphores{Handle(), wait}; + + static constexpr std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + }; + + const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = { + .waitSemaphoreValueCount = num_wait_semaphores, + .pWaitSemaphoreValues = wait_values.data(), + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + + const vk::SubmitInfo submit_info = { + .pNext = &timeline_si, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1u, + .pCommandBuffers = &cmdbuf, + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + + try { + instance.GetGraphicsQueue().submit(submit_info); + } catch (vk::DeviceLostError& err) { + LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what()); + UNREACHABLE(); + } +} + +constexpr u64 FENCE_RESERVE = 8; + +MasterSemaphoreFence::MasterSemaphoreFence(const Instance& instance_) : instance{instance_} { + const vk::Device device{instance.GetDevice()}; + for (u64 i = 0; i < FENCE_RESERVE; i++) { + free_queue.push(device.createFenceUnique({})); + } + wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); }); +} + +MasterSemaphoreFence::~MasterSemaphoreFence() = default; + +void MasterSemaphoreFence::Refresh() {} + +void MasterSemaphoreFence::Wait(u64 tick) { + while (true) { + u64 current_value = gpu_tick.load(std::memory_order_relaxed); + if (current_value >= tick) { + return; + } + gpu_tick.wait(current_value); + } +} + +void MasterSemaphoreFence::SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, + vk::Semaphore signal, u64 signal_value) { + cmdbuf.end(); + + const u32 num_signal_semaphores = signal ? 1U : 0U; + const u32 num_wait_semaphores = wait ? 1U : 0U; + + static constexpr std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eColorAttachmentOutput, + }; + + const vk::SubmitInfo submit_info = { + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = &wait, + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1u, + .pCommandBuffers = &cmdbuf, + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = &signal, + }; + + vk::UniqueFence fence{GetFreeFence()}; + try { + instance.GetGraphicsQueue().submit(submit_info, *fence); + } catch (vk::DeviceLostError& err) { + LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what()); + UNREACHABLE(); + } + + std::scoped_lock lock{wait_mutex}; + wait_queue.push({ + .handle = std::move(fence), + .signal_value = signal_value, + }); + wait_cv.notify_one(); +} + +void MasterSemaphoreFence::WaitThread(std::stop_token token) { + const vk::Device device{instance.GetDevice()}; + while (!token.stop_requested()) { + Fence fence; + { + std::unique_lock lock{wait_mutex}; + Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); }); + if (token.stop_requested()) { + return; + } + fence = std::move(wait_queue.front()); + wait_queue.pop(); + } + + const vk::Result result = device.waitForFences(*fence.handle, true, WAIT_TIMEOUT); + if (result != vk::Result::eSuccess) { + LOG_CRITICAL(Render_Vulkan, "Fence wait failed with error {}", vk::to_string(result)); + UNREACHABLE(); + } + device.resetFences(*fence.handle); + + gpu_tick.store(fence.signal_value); + gpu_tick.notify_all(); + + std::scoped_lock lock{free_mutex}; + free_queue.push(std::move(fence.handle)); + } +} + +vk::UniqueFence MasterSemaphoreFence::GetFreeFence() { + std::scoped_lock lock{free_mutex}; + if (free_queue.empty()) { + return instance.GetDevice().createFenceUnique({}); + } + + vk::UniqueFence fence{std::move(free_queue.front())}; + free_queue.pop(); + return fence; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h new file mode 100644 index 0000000000..875e1b8d45 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/common_types.h" +#include "common/polyfill_thread.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class Scheduler; + +class MasterSemaphore { +public: + virtual ~MasterSemaphore() = default; + + [[nodiscard]] u64 CurrentTick() const noexcept { + return current_tick.load(std::memory_order_acquire); + } + + [[nodiscard]] u64 KnownGpuTick() const noexcept { + return gpu_tick.load(std::memory_order_acquire); + } + + [[nodiscard]] bool IsFree(u64 tick) const noexcept { + return KnownGpuTick() >= tick; + } + + [[nodiscard]] u64 NextTick() noexcept { + return current_tick.fetch_add(1, std::memory_order_release); + } + + /// Refresh the known GPU tick + virtual void Refresh() = 0; + + /// Waits for a tick to be hit on the GPU + virtual void Wait(u64 tick) = 0; + + /// Submits the provided command buffer for execution + virtual void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal, + u64 signal_value) = 0; + +protected: + std::atomic gpu_tick{0}; ///< Current known GPU tick. + std::atomic current_tick{1}; ///< Current logical tick. +}; + +class MasterSemaphoreTimeline : public MasterSemaphore { +public: + explicit MasterSemaphoreTimeline(const Instance& instance); + ~MasterSemaphoreTimeline() override; + + [[nodiscard]] vk::Semaphore Handle() const noexcept { + return semaphore.get(); + } + + void Refresh() override; + + void Wait(u64 tick) override; + + void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal, + u64 signal_value) override; + +private: + const Instance& instance; + vk::UniqueSemaphore semaphore; ///< Timeline semaphore. +}; + +class MasterSemaphoreFence : public MasterSemaphore { +public: + explicit MasterSemaphoreFence(const Instance& instance); + ~MasterSemaphoreFence() override; + + void Refresh() override; + + void Wait(u64 tick) override; + + void SubmitWork(vk::CommandBuffer cmdbuf, vk::Semaphore wait, vk::Semaphore signal, + u64 signal_value) override; + +private: + void WaitThread(std::stop_token token); + + vk::UniqueFence GetFreeFence(); + +private: + const Instance& instance; + + struct Fence { + vk::UniqueFence handle; + u64 signal_value; + }; + + std::queue free_queue; + std::queue wait_queue; + std::mutex free_mutex; + std::mutex wait_mutex; + std::condition_variable_any wait_cv; + std::jthread wait_thread; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp new file mode 100644 index 0000000000..6d4da71270 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -0,0 +1,524 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_paths.h" +#include "common/file_util.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/settings.h" +#include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_gen_spv.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +MICROPROFILE_DEFINE(Vulkan_Bind, "Vulkan", "Pipeline Bind", MP_RGB(192, 32, 32)); + +namespace Vulkan { + +enum ProgramType : u32 { + VS = 0, + GS = 2, + FS = 1, +}; + +u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) { + switch (format) { + case Pica::PipelineRegs::VertexAttributeFormat::FLOAT: + return sizeof(float) * size; + case Pica::PipelineRegs::VertexAttributeFormat::SHORT: + return sizeof(u16) * size; + case Pica::PipelineRegs::VertexAttributeFormat::BYTE: + case Pica::PipelineRegs::VertexAttributeFormat::UBYTE: + return sizeof(u8) * size; + } + return 0; +} + +AttribLoadFlags MakeAttribLoadFlag(Pica::PipelineRegs::VertexAttributeFormat format) { + switch (format) { + case Pica::PipelineRegs::VertexAttributeFormat::BYTE: + case Pica::PipelineRegs::VertexAttributeFormat::SHORT: + return AttribLoadFlags::Sint; + case Pica::PipelineRegs::VertexAttributeFormat::UBYTE: + return AttribLoadFlags::Uint; + default: + return AttribLoadFlags::Float; + } +} + +constexpr std::array BUFFER_BINDINGS = {{ + {0, vk::DescriptorType::eUniformBufferDynamic, 1, vk::ShaderStageFlagBits::eVertex}, + {1, vk::DescriptorType::eUniformBufferDynamic, 1, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eGeometry | + vk::ShaderStageFlagBits::eFragment}, + {2, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, + {3, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, + {4, vk::DescriptorType::eUniformTexelBuffer, 1, vk::ShaderStageFlagBits::eFragment}, +}}; + +constexpr std::array TEXTURE_BINDINGS = {{ + {0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {1, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, + {3, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment}, +}}; + +// TODO: Use descriptor array for shadow cube +constexpr std::array SHADOW_BINDINGS = {{ + {0, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {1, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {2, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {3, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {4, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {5, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, + {6, vk::DescriptorType::eStorageImage, 1, vk::ShaderStageFlagBits::eFragment}, +}}; + +PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, + RenderpassCache& renderpass_cache_, DescriptorPool& pool_) + : instance{instance_}, scheduler{scheduler_}, renderpass_cache{renderpass_cache_}, pool{pool_}, + num_worker_threads{std::max(std::thread::hardware_concurrency(), 2U)}, + workers{num_worker_threads, "Pipeline workers"}, + descriptor_set_providers{DescriptorSetProvider{instance, pool, BUFFER_BINDINGS}, + DescriptorSetProvider{instance, pool, TEXTURE_BINDINGS}, + DescriptorSetProvider{instance, pool, SHADOW_BINDINGS}}, + trivial_vertex_shader{instance, vk::ShaderStageFlagBits::eVertex, + GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported())} { + BuildLayout(); +} + +void PipelineCache::BuildLayout() { + std::array descriptor_set_layouts; + std::transform(descriptor_set_providers.begin(), descriptor_set_providers.end(), + descriptor_set_layouts.begin(), + [](const auto& provider) { return provider.Layout(); }); + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = NUM_RASTERIZER_SETS, + .pSetLayouts = descriptor_set_layouts.data(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; + pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); +} + +PipelineCache::~PipelineCache() { + SaveDiskCache(); +} + +void PipelineCache::LoadDiskCache() { + if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) { + return; + } + + const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(), + instance.GetVendorID(), instance.GetDeviceID()); + vk::PipelineCacheCreateInfo cache_info = { + .initialDataSize = 0, + .pInitialData = nullptr, + }; + + std::vector cache_data; + FileUtil::IOFile cache_file{cache_file_path, "r"}; + if (cache_file.IsOpen()) { + LOG_INFO(Render_Vulkan, "Loading pipeline cache"); + + const u64 cache_file_size = cache_file.GetSize(); + cache_data.resize(cache_file_size); + if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) { + if (!IsCacheValid(cache_data)) { + LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid, ignoring"); + } else { + cache_info.initialDataSize = cache_file_size; + cache_info.pInitialData = cache_data.data(); + } + } + + cache_file.Close(); + } + + vk::Device device = instance.GetDevice(); + pipeline_cache = device.createPipelineCacheUnique(cache_info); +} + +void PipelineCache::SaveDiskCache() { + if (!Settings::values.use_disk_shader_cache || !EnsureDirectories() || !pipeline_cache) { + return; + } + + const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(), + instance.GetVendorID(), instance.GetDeviceID()); + FileUtil::IOFile cache_file{cache_file_path, "wb"}; + if (!cache_file.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Unable to open pipeline cache for writing"); + return; + } + + vk::Device device = instance.GetDevice(); + auto cache_data = device.getPipelineCacheData(*pipeline_cache); + if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) { + LOG_ERROR(Render_Vulkan, "Error during pipeline cache write"); + return; + } + + cache_file.Close(); +} + +bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { + MICROPROFILE_SCOPE(Vulkan_Bind); + + u64 shader_hash = 0; + for (u32 i = 0; i < MAX_SHADER_STAGES; i++) { + shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]); + } + + const u64 info_hash = info.Hash(instance); + const u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash); + + auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash); + if (new_pipeline) { + it.value() = + std::make_unique(instance, renderpass_cache, info, *pipeline_cache, + *pipeline_layout, current_shaders, &workers); + } + + GraphicsPipeline* const pipeline{it->second.get()}; + if (!pipeline->IsDone() && !pipeline->TryBuild(wait_built)) { + return false; + } + + for (u32 i = 0; i < NUM_RASTERIZER_SETS; i++) { + if (!set_dirty[i]) { + continue; + } + bound_descriptor_sets[i] = descriptor_set_providers[i].Acquire(update_data[i]); + set_dirty[i] = false; + } + + const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline); + const bool pipeline_dirty = (current_pipeline != pipeline) || is_dirty; + scheduler.Record([this, is_dirty, pipeline_dirty, pipeline, + current_dynamic = current_info.dynamic, dynamic = info.dynamic, + descriptor_sets = bound_descriptor_sets, offsets = offsets, + current_rasterization = current_info.rasterization, + current_depth_stencil = current_info.depth_stencil, + rasterization = info.rasterization, + depth_stencil = info.depth_stencil](vk::CommandBuffer cmdbuf) { + if (dynamic.stencil_compare_mask != current_dynamic.stencil_compare_mask || is_dirty) { + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, + dynamic.stencil_compare_mask); + } + + if (dynamic.stencil_write_mask != current_dynamic.stencil_write_mask || is_dirty) { + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, + dynamic.stencil_write_mask); + } + + if (dynamic.stencil_reference != current_dynamic.stencil_reference || is_dirty) { + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, + dynamic.stencil_reference); + } + + if (dynamic.blend_color != current_dynamic.blend_color || is_dirty) { + const Common::Vec4f color = PicaToVK::ColorRGBA8(dynamic.blend_color); + cmdbuf.setBlendConstants(color.AsArray()); + } + + if (instance.IsExtendedDynamicStateSupported()) { + if (rasterization.cull_mode != current_rasterization.cull_mode || is_dirty) { + cmdbuf.setCullModeEXT(PicaToVK::CullMode(rasterization.cull_mode)); + cmdbuf.setFrontFaceEXT(PicaToVK::FrontFace(rasterization.cull_mode)); + } + + if (depth_stencil.depth_compare_op != current_depth_stencil.depth_compare_op || + is_dirty) { + cmdbuf.setDepthCompareOpEXT(PicaToVK::CompareFunc(depth_stencil.depth_compare_op)); + } + + if (depth_stencil.depth_test_enable != current_depth_stencil.depth_test_enable || + is_dirty) { + cmdbuf.setDepthTestEnableEXT(depth_stencil.depth_test_enable); + } + + if (depth_stencil.depth_write_enable != current_depth_stencil.depth_write_enable || + is_dirty) { + cmdbuf.setDepthWriteEnableEXT(depth_stencil.depth_write_enable); + } + + if (rasterization.topology != current_rasterization.topology || is_dirty) { + cmdbuf.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(rasterization.topology)); + } + + if (depth_stencil.stencil_test_enable != current_depth_stencil.stencil_test_enable || + is_dirty) { + cmdbuf.setStencilTestEnableEXT(depth_stencil.stencil_test_enable); + } + + if (depth_stencil.stencil_fail_op != current_depth_stencil.stencil_fail_op || + depth_stencil.stencil_pass_op != current_depth_stencil.stencil_pass_op || + depth_stencil.stencil_depth_fail_op != + current_depth_stencil.stencil_depth_fail_op || + depth_stencil.stencil_compare_op != current_depth_stencil.stencil_compare_op || + is_dirty) { + cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack, + PicaToVK::StencilOp(depth_stencil.stencil_fail_op), + PicaToVK::StencilOp(depth_stencil.stencil_pass_op), + PicaToVK::StencilOp(depth_stencil.stencil_depth_fail_op), + PicaToVK::CompareFunc(depth_stencil.stencil_compare_op)); + } + } + + if (pipeline_dirty) { + if (!pipeline->IsDone()) { + pipeline->WaitDone(); + } + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); + } + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, + descriptor_sets, offsets); + }); + + current_info = info; + current_pipeline = pipeline; + scheduler.MarkStateNonDirty(StateFlags::Pipeline); + + return true; +} + +bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, + Pica::Shader::ShaderSetup& setup, + const VertexLayout& layout) { + PicaVSConfig config{regs.rasterizer, regs.vs, setup, instance}; + config.state.use_geometry_shader = instance.UseGeometryShaders(); + + for (u32 i = 0; i < layout.attribute_count; i++) { + const VertexAttribute& attr = layout.attributes[i]; + const FormatTraits& traits = instance.GetTraits(attr.type, attr.size); + const u32 location = attr.location.Value(); + AttribLoadFlags& flags = config.state.load_flags[location]; + + if (traits.needs_conversion) { + flags = MakeAttribLoadFlag(attr.type); + } + if (traits.needs_emulation) { + flags |= AttribLoadFlags::ZeroW; + } + } + + auto [it, new_config] = programmable_vertex_map.try_emplace(config); + if (new_config) { + auto code = GenerateVertexShader(setup, config); + if (!code) { + LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader"); + programmable_vertex_map[config] = nullptr; + return false; + } + + std::string& program = code.value(); + auto [iter, new_program] = programmable_vertex_cache.try_emplace(program, instance); + auto& shader = iter->second; + + if (new_program) { + shader.program = std::move(program); + const vk::Device device = instance.GetDevice(); + workers.QueueWork([device, &shader] { + shader.module = Compile(shader.program, vk::ShaderStageFlagBits::eVertex, device); + shader.MarkDone(); + }); + } + + it->second = &shader; + } + + Shader* const shader{it->second}; + if (!shader) { + LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader"); + return false; + } + + current_shaders[ProgramType::VS] = shader; + shader_hashes[ProgramType::VS] = config.Hash(); + + return true; +} + +void PipelineCache::UseTrivialVertexShader() { + current_shaders[ProgramType::VS] = &trivial_vertex_shader; + shader_hashes[ProgramType::VS] = 0; +} + +bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { + if (!instance.UseGeometryShaders()) { + UseTrivialGeometryShader(); + return true; + } + + const PicaFixedGSConfig gs_config{regs, instance}; + auto [it, new_shader] = fixed_geometry_shaders.try_emplace(gs_config, instance); + auto& shader = it->second; + + if (new_shader) { + workers.QueueWork([gs_config, device = instance.GetDevice(), &shader]() { + const std::string code = GenerateFixedGeometryShader(gs_config); + shader.module = Compile(code, vk::ShaderStageFlagBits::eGeometry, device); + shader.MarkDone(); + }); + } + + current_shaders[ProgramType::GS] = &shader; + shader_hashes[ProgramType::GS] = gs_config.Hash(); + + return true; +} + +void PipelineCache::UseTrivialGeometryShader() { + current_shaders[ProgramType::GS] = nullptr; + shader_hashes[ProgramType::GS] = 0; +} + +void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { + const PicaFSConfig config{regs, instance}; + + const auto [it, new_shader] = fragment_shaders.try_emplace(config, instance); + auto& shader = it->second; + + if (new_shader) { + const bool use_spirv = Settings::values.spirv_shader_gen.GetValue(); + const auto texture0_type = config.state.texture0_type.Value(); + const bool is_shadow = texture0_type == Pica::TexturingRegs::TextureConfig::Shadow2D || + texture0_type == Pica::TexturingRegs::TextureConfig::ShadowCube || + config.state.shadow_rendering.Value(); + if (use_spirv && !is_shadow) { + const std::vector code = GenerateFragmentShaderSPV(config); + shader.module = CompileSPV(code, instance.GetDevice()); + shader.MarkDone(); + } else { + workers.QueueWork([config, device = instance.GetDevice(), &shader]() { + const std::string code = GenerateFragmentShader(config); + shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device); + shader.MarkDone(); + }); + } + } + + current_shaders[ProgramType::FS] = &shader; + shader_hashes[ProgramType::FS] = config.Hash(); +} + +void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler) { + auto& info = update_data[1][binding].image_info; + if (info.imageView == image_view && info.sampler == sampler) { + return; + } + set_dirty[1] = true; + info = vk::DescriptorImageInfo{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = vk::ImageLayout::eGeneral, + }; +} + +void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) { + auto& info = update_data[2][binding].image_info; + if (info.imageView == image_view) { + return; + } + set_dirty[2] = true; + info = vk::DescriptorImageInfo{ + .imageView = image_view, + .imageLayout = vk::ImageLayout::eGeneral, + }; +} + +void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) { + auto& info = update_data[0][binding].buffer_info; + if (info.buffer == buffer && info.offset == offset && info.range == size) { + return; + } + set_dirty[0] = true; + info = vk::DescriptorBufferInfo{ + .buffer = buffer, + .offset = offset, + .range = size, + }; +} + +void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) { + auto& view = update_data[0][binding].buffer_view; + if (view != buffer_view) { + set_dirty[0] = true; + view = buffer_view; + } +} + +void PipelineCache::SetBufferOffset(u32 binding, size_t offset) { + offsets[binding] = static_cast(offset); +} + +bool PipelineCache::IsCacheValid(std::span data) const { + if (data.size() < sizeof(vk::PipelineCacheHeaderVersionOne)) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header"); + return false; + } + + vk::PipelineCacheHeaderVersionOne header; + std::memcpy(&header, data.data(), sizeof(header)); + if (header.headerSize < sizeof(header)) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header length"); + return false; + } + + if (header.headerVersion != vk::PipelineCacheHeaderVersion::eOne) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Invalid header version"); + return false; + } + + if (u32 vendor_id = instance.GetVendorID(); header.vendorID != vendor_id) { + LOG_ERROR( + Render_Vulkan, + "Pipeline cache failed validation: Incorrect vendor ID (file: {:#X}, device: {:#X})", + header.vendorID, vendor_id); + return false; + } + + if (u32 device_id = instance.GetDeviceID(); header.deviceID != device_id) { + LOG_ERROR( + Render_Vulkan, + "Pipeline cache failed validation: Incorrect device ID (file: {:#X}, device: {:#X})", + header.deviceID, device_id); + return false; + } + + if (header.pipelineCacheUUID != instance.GetPipelineCacheUUID()) { + LOG_ERROR(Render_Vulkan, "Pipeline cache failed validation: Incorrect UUID"); + return false; + } + + return true; +} + +bool PipelineCache::EnsureDirectories() const { + const auto create_dir = [](const std::string& dir) { + if (!FileUtil::CreateDir(dir)) { + LOG_ERROR(Render_Vulkan, "Failed to create directory={}", dir); + return false; + } + + return true; + }; + + return create_dir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) && + create_dir(GetPipelineCacheDir()); +} + +std::string PipelineCache::GetPipelineCacheDir() const { + return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + "vulkan" + DIR_SEP; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h new file mode 100644 index 0000000000..954f4b3e9c --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -0,0 +1,123 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" + +namespace Pica { +struct Regs; +} + +namespace Vulkan { + +class Instance; +class Scheduler; +class RenderpassCache; +class DescriptorPool; + +constexpr u32 NUM_RASTERIZER_SETS = 3; +constexpr u32 NUM_DYNAMIC_OFFSETS = 2; + +/** + * Stores a collection of rasterizer pipelines used during rendering. + */ +class PipelineCache { +public: + explicit PipelineCache(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorPool& pool); + ~PipelineCache(); + + [[nodiscard]] DescriptorSetProvider& TextureProvider() noexcept { + return descriptor_set_providers[1]; + } + + /// Loads the pipeline cache stored to disk + void LoadDiskCache(); + + /// Stores the generated pipeline cache to disk + void SaveDiskCache(); + + /// Binds a pipeline using the provided information + bool BindPipeline(const PipelineInfo& info, bool wait_built = false); + + /// Binds a PICA decompiled vertex shader + bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, + const VertexLayout& layout); + + /// Binds a passthrough vertex shader + void UseTrivialVertexShader(); + + /// Binds a PICA decompiled geometry shader + bool UseFixedGeometryShader(const Pica::Regs& regs); + + /// Binds a passthrough geometry shader + void UseTrivialGeometryShader(); + + /// Binds a fragment shader generated from PICA state + void UseFragmentShader(const Pica::Regs& regs); + + /// Binds a texture to the specified binding + void BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler); + + /// Binds a storage image to the specified binding + void BindStorageImage(u32 binding, vk::ImageView image_view); + + /// Binds a buffer to the specified binding + void BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size); + + /// Binds a buffer to the specified binding + void BindTexelBuffer(u32 binding, vk::BufferView buffer_view); + + /// Sets the dynamic offset for the uniform buffer at binding + void SetBufferOffset(u32 binding, size_t offset); + +private: + /// Builds the rasterizer pipeline layout + void BuildLayout(); + + /// Returns true when the disk data can be used by the current driver + bool IsCacheValid(std::span cache_data) const; + + /// Create shader disk cache directories. Returns true on success. + bool EnsureDirectories() const; + + /// Returns the pipeline cache storage dir + std::string GetPipelineCacheDir() const; + +private: + const Instance& instance; + Scheduler& scheduler; + RenderpassCache& renderpass_cache; + DescriptorPool& pool; + + vk::UniquePipelineCache pipeline_cache; + vk::UniquePipelineLayout pipeline_layout; + std::size_t num_worker_threads; + Common::ThreadWorker workers; + PipelineInfo current_info{}; + GraphicsPipeline* current_pipeline{}; + tsl::robin_map, Common::IdentityHash> + graphics_pipelines; + + std::array descriptor_set_providers; + std::array update_data{}; + std::array bound_descriptor_sets{}; + std::array offsets{}; + std::bitset set_dirty{}; + + std::array shader_hashes; + std::array current_shaders; + std::unordered_map programmable_vertex_map; + std::unordered_map programmable_vertex_cache; + std::unordered_map fixed_geometry_shaders; + std::unordered_map fragment_shaders; + Shader trivial_vertex_shader; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 25beb60f9e..7b4c6e230d 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -95,7 +95,14 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugReportCallback(VkDebugReportFlagsEXT } } // Anonymous namespace -std::shared_ptr OpenLibrary() { +std::shared_ptr OpenLibrary( + [[maybe_unused]] Frontend::GraphicsContext* context) { +#ifdef ANDROID + // Android may override the Vulkan driver from the frontend. + if (auto library = context->GetDriverLibrary(); library) { + return library; + } +#endif auto library = std::make_shared(); #ifdef __APPLE__ const std::string filename = Common::DynamicLibrary::GetLibraryName("vulkan"); @@ -273,16 +280,14 @@ vk::UniqueInstance CreateInstance(const Common::DynamicLibrary& library, const auto vkGetInstanceProcAddr = library.GetSymbol("vkGetInstanceProcAddr"); if (!vkGetInstanceProcAddr) { - LOG_CRITICAL(Render_Vulkan, "Failed GetSymbol vkGetInstanceProcAddr"); - return {}; + throw std::runtime_error("Failed GetSymbol vkGetInstanceProcAddr"); } VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); const auto extensions = GetInstanceExtensions(window_type, enable_validation); const u32 available_version = vk::enumerateInstanceVersion(); if (available_version < VK_API_VERSION_1_1) { - LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!"); - return {}; + throw std::runtime_error("Vulkan 1.0 is not supported, 1.1 is required!"); } const vk::ApplicationInfo application_info = { @@ -343,7 +348,7 @@ vk::UniqueDebugReportCallbackEXT CreateDebugReportCallback(vk::Instance instance return instance.createDebugReportCallbackEXTUnique(callback_ci); } -DebugCallback CreateDebugCallback(vk::Instance instance) { +DebugCallback CreateDebugCallback(vk::Instance instance, bool& debug_utils_supported) { if (!Settings::values.renderer_debug) { return {}; } @@ -356,7 +361,8 @@ DebugCallback CreateDebugCallback(vk::Instance instance) { return std::strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, prop.extensionName) == 0; }); // Prefer debug util messenger if available. - if (it != properties.end()) { + debug_utils_supported = it != properties.end(); + if (debug_utils_supported) { return CreateDebugMessenger(instance); } // Otherwise fallback to debug report callback. diff --git a/src/video_core/renderer_vulkan/vk_platform.h b/src/video_core/renderer_vulkan/vk_platform.h index f4b69e6c2e..48ebcc90d2 100644 --- a/src/video_core/renderer_vulkan/vk_platform.h +++ b/src/video_core/renderer_vulkan/vk_platform.h @@ -13,6 +13,7 @@ namespace Frontend { class EmuWindow; +class GraphicsContext; enum class WindowSystemType : u8; } // namespace Frontend @@ -21,7 +22,8 @@ namespace Vulkan { using DebugCallback = std::variant; -std::shared_ptr OpenLibrary(); +std::shared_ptr OpenLibrary( + [[maybe_unused]] Frontend::GraphicsContext* context = nullptr); vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& emu_window); @@ -29,6 +31,6 @@ vk::UniqueInstance CreateInstance(const Common::DynamicLibrary& library, Frontend::WindowSystemType window_type, bool enable_validation, bool dump_command_buffers); -DebugCallback CreateDebugCallback(vk::Instance instance); +DebugCallback CreateDebugCallback(vk::Instance instance, bool& debug_utils_supported); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_present_window.cpp b/src/video_core/renderer_vulkan/vk_present_window.cpp new file mode 100644 index 0000000000..82111d0b0a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_present_window.cpp @@ -0,0 +1,515 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/microprofile.h" +#include "common/settings.h" +#include "common/thread.h" +#include "core/frontend/emu_window.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_platform.h" +#include "video_core/renderer_vulkan/vk_present_window.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +#include + +MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128)); + +namespace Vulkan { + +namespace { + +bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, vk::Format format) { + const vk::FormatProperties props{physical_device.getFormatProperties(format)}; + return static_cast(props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eBlitDst); +} + +[[nodiscard]] vk::ImageSubresourceLayers MakeImageSubresourceLayers() { + return vk::ImageSubresourceLayers{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }; +} + +[[nodiscard]] vk::ImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width, + s32 swapchain_height) { + return vk::ImageBlit{ + .srcSubresource = MakeImageSubresourceLayers(), + .srcOffsets = + std::array{ + vk::Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }, + vk::Offset3D{ + .x = frame_width, + .y = frame_height, + .z = 1, + }, + }, + .dstSubresource = MakeImageSubresourceLayers(), + .dstOffsets = + std::array{ + vk::Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }, + vk::Offset3D{ + .x = swapchain_width, + .y = swapchain_height, + .z = 1, + }, + }, + }; +} + +[[nodiscard]] vk::ImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width, + u32 swapchain_height) { + return vk::ImageCopy{ + .srcSubresource = MakeImageSubresourceLayers(), + .srcOffset = + vk::Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }, + .dstSubresource = MakeImageSubresourceLayers(), + .dstOffset = + vk::Offset3D{ + .x = 0, + .y = 0, + .z = 0, + }, + .extent = + vk::Extent3D{ + .width = std::min(frame_width, swapchain_width), + .height = std::min(frame_height, swapchain_height), + .depth = 1, + }, + }; +} + +} // Anonymous namespace + +PresentWindow::PresentWindow(Frontend::EmuWindow& emu_window_, const Instance& instance_, + Scheduler& scheduler_) + : emu_window{emu_window_}, instance{instance_}, scheduler{scheduler_}, + surface{CreateSurface(instance.GetInstance(), emu_window)}, + next_surface{surface}, swapchain{instance, emu_window.GetFramebufferLayout().width, + emu_window.GetFramebufferLayout().height, surface}, + graphics_queue{instance.GetGraphicsQueue()}, present_renderpass{CreateRenderpass()}, + vsync_enabled{Settings::values.use_vsync_new.GetValue()}, + blit_supported{ + CanBlitToSwapchain(instance.GetPhysicalDevice(), swapchain.GetSurfaceFormat().format)}, + use_present_thread{Settings::values.async_presentation.GetValue()}, + last_render_surface{emu_window.GetWindowInfo().render_surface} { + + const u32 num_images = swapchain.GetImageCount(); + const vk::Device device = instance.GetDevice(); + + const vk::CommandPoolCreateInfo pool_info = { + .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer | + vk::CommandPoolCreateFlagBits::eTransient, + .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(), + }; + command_pool = device.createCommandPool(pool_info); + + const vk::CommandBufferAllocateInfo alloc_info = { + .commandPool = command_pool, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = num_images, + }; + const std::vector command_buffers = device.allocateCommandBuffers(alloc_info); + + swap_chain.resize(num_images); + for (u32 i = 0; i < num_images; i++) { + Frame& frame = swap_chain[i]; + frame.cmdbuf = command_buffers[i]; + frame.render_ready = device.createSemaphore({}); + frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled}); + free_queue.push(&frame); + } + + if (use_present_thread) { + present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); }); + } +} + +PresentWindow::~PresentWindow() { + scheduler.Finish(); + const vk::Device device = instance.GetDevice(); + device.destroyCommandPool(command_pool); + device.destroyRenderPass(present_renderpass); + for (auto& frame : swap_chain) { + device.destroyImageView(frame.image_view); + device.destroyFramebuffer(frame.framebuffer); + device.destroySemaphore(frame.render_ready); + device.destroyFence(frame.present_done); + vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation); + } +} + +void PresentWindow::RecreateFrame(Frame* frame, u32 width, u32 height) { + vk::Device device = instance.GetDevice(); + if (frame->framebuffer) { + device.destroyFramebuffer(frame->framebuffer); + } + if (frame->image_view) { + device.destroyImageView(frame->image_view); + } + if (frame->image) { + vmaDestroyImage(instance.GetAllocator(), frame->image, frame->allocation); + } + + const vk::Format format = swapchain.GetSurfaceFormat().format; + const vk::ImageCreateInfo image_info = { + .imageType = vk::ImageType::e2D, + .format = format, + .extent = {width, height, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc, + }; + + const VmaAllocationCreateInfo alloc_info = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + }; + + VkImage unsafe_image{}; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + + VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, + &unsafe_image, &frame->allocation, nullptr); + if (result != VK_SUCCESS) [[unlikely]] { + LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); + UNREACHABLE(); + } + frame->image = vk::Image{unsafe_image}; + + const vk::ImageViewCreateInfo view_info = { + .image = frame->image, + .viewType = vk::ImageViewType::e2D, + .format = format, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + frame->image_view = device.createImageView(view_info); + + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = present_renderpass, + .attachmentCount = 1, + .pAttachments = &frame->image_view, + .width = width, + .height = height, + .layers = 1, + }; + frame->framebuffer = instance.GetDevice().createFramebuffer(framebuffer_info); + + frame->width = width; + frame->height = height; +} + +Frame* PresentWindow::GetRenderFrame() { + MICROPROFILE_SCOPE(Vulkan_WaitPresent); + + // Wait for free presentation frames + std::unique_lock lock{free_mutex}; + free_cv.wait(lock, [this] { return !free_queue.empty(); }); + + // Take the frame from the queue + Frame* frame = free_queue.front(); + free_queue.pop(); + + vk::Device device = instance.GetDevice(); + vk::Result result{}; + + const auto wait = [&]() { + result = device.waitForFences(frame->present_done, false, std::numeric_limits::max()); + return result; + }; + + // Wait for the presentation to be finished so all frame resources are free + while (wait() != vk::Result::eSuccess) { + // Retry if the waiting times out + if (result == vk::Result::eTimeout) { + continue; + } + + // eErrorInitializationFailed occurs on Mali GPU drivers due to them + // using the ppoll() syscall which isn't correctly restarted after a signal, + // we need to manually retry waiting in that case + if (result == vk::Result::eErrorInitializationFailed) { + continue; + } + } + + device.resetFences(frame->present_done); + return frame; +} + +void PresentWindow::Present(Frame* frame) { + if (!use_present_thread) { + scheduler.WaitWorker(); + CopyToSwapchain(frame); + free_queue.push(frame); + return; + } + + scheduler.Record([this, frame](vk::CommandBuffer) { + std::unique_lock lock{queue_mutex}; + present_queue.push(frame); + frame_cv.notify_one(); + }); +} + +void PresentWindow::WaitPresent() { + if (!use_present_thread) { + return; + } + + // Wait for the present queue to be empty + { + std::unique_lock queue_lock{queue_mutex}; + frame_cv.wait(queue_lock, [this] { return present_queue.empty(); }); + } + + // The above condition will be satisfied when the last frame is taken from the queue. + // To ensure that frame has been presented as well take hold of the swapchain + // mutex. + std::scoped_lock swapchain_lock{swapchain_mutex}; +} + +void PresentWindow::PresentThread(std::stop_token token) { + Common::SetCurrentThreadName("VulkanPresent"); + while (!token.stop_requested()) { + std::unique_lock lock{queue_mutex}; + + // Wait for presentation frames + Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); }); + if (token.stop_requested()) { + return; + } + + // Take the frame and notify anyone waiting + Frame* frame = present_queue.front(); + present_queue.pop(); + frame_cv.notify_one(); + + // By exchanging the lock ownership we take the swapchain lock + // before the queue lock goes out of scope. This way the swapchain + // lock in WaitPresent is guaranteed to occur after here. + std::exchange(lock, std::unique_lock{swapchain_mutex}); + + CopyToSwapchain(frame); + + // Free the frame for reuse + std::scoped_lock fl{free_mutex}; + free_queue.push(frame); + free_cv.notify_one(); + } +} + +void PresentWindow::NotifySurfaceChanged() { +#ifdef ANDROID + std::scoped_lock lock{recreate_surface_mutex}; + next_surface = CreateSurface(instance.GetInstance(), emu_window); + recreate_surface_cv.notify_one(); +#endif +} + +void PresentWindow::CopyToSwapchain(Frame* frame) { + const auto recreate_swapchain = [&] { +#ifdef ANDROID + { + std::unique_lock lock{recreate_surface_mutex}; + recreate_surface_cv.wait(lock, [this]() { return surface != next_surface; }); + surface = next_surface; + } +#endif + std::scoped_lock submit_lock{scheduler.submit_mutex}; + graphics_queue.waitIdle(); + swapchain.Create(frame->width, frame->height, surface); + }; + +#ifndef ANDROID + const bool use_vsync = Settings::values.use_vsync_new.GetValue(); + const bool size_changed = + swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height; + const bool vsync_changed = vsync_enabled != use_vsync; + if (vsync_changed || size_changed) [[unlikely]] { + vsync_enabled = use_vsync; + recreate_swapchain(); + } +#endif + + while (!swapchain.AcquireNextImage()) { + recreate_swapchain(); + } + + const vk::Image swapchain_image = swapchain.Image(); + + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, + }; + const vk::CommandBuffer cmdbuf = frame->cmdbuf; + cmdbuf.begin(begin_info); + + const vk::Extent2D extent = swapchain.GetExtent(); + const std::array pre_barriers{ + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = swapchain_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = frame->image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const vk::ImageMemoryBarrier post_barrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::ePresentSrcKHR, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = swapchain_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, + {}, {}, pre_barriers); + + if (blit_supported) { + cmdbuf.blitImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image, + vk::ImageLayout::eTransferDstOptimal, + MakeImageBlit(frame->width, frame->height, extent.width, extent.height), + vk::Filter::eLinear); + } else { + cmdbuf.copyImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image, + vk::ImageLayout::eTransferDstOptimal, + MakeImageCopy(frame->width, frame->height, extent.width, extent.height)); + } + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + + cmdbuf.end(); + + static constexpr std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eAllGraphics, + }; + + const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore(); + const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore(); + const std::array wait_semaphores = {image_acquired, frame->render_ready}; + + vk::SubmitInfo submit_info = { + .waitSemaphoreCount = static_cast(wait_semaphores.size()), + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1u, + .pCommandBuffers = &cmdbuf, + .signalSemaphoreCount = 1, + .pSignalSemaphores = &present_ready, + }; + + std::scoped_lock submit_lock{scheduler.submit_mutex}; + + try { + graphics_queue.submit(submit_info, frame->present_done); + } catch (vk::DeviceLostError& err) { + LOG_CRITICAL(Render_Vulkan, "Device lost during present submit: {}", err.what()); + UNREACHABLE(); + } + + swapchain.Present(); +} + +vk::RenderPass PresentWindow::CreateRenderpass() { + const vk::AttachmentReference color_ref = { + .attachment = 0, + .layout = vk::ImageLayout::eGeneral, + }; + + const vk::SubpassDescription subpass = { + .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = 1u, + .pColorAttachments = &color_ref, + .pResolveAttachments = 0, + .pDepthStencilAttachment = nullptr, + }; + + const vk::AttachmentDescription color_attachment = { + .format = swapchain.GetSurfaceFormat().format, + .loadOp = vk::AttachmentLoadOp::eClear, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = vk::AttachmentLoadOp::eDontCare, + .stencilStoreOp = vk::AttachmentStoreOp::eDontCare, + .initialLayout = vk::ImageLayout::eUndefined, + .finalLayout = vk::ImageLayout::eTransferSrcOptimal, + }; + + const vk::RenderPassCreateInfo renderpass_info = { + .attachmentCount = 1, + .pAttachments = &color_attachment, + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }; + + return instance.GetDevice().createRenderPass(renderpass_info); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_present_window.h b/src/video_core/renderer_vulkan/vk_present_window.h new file mode 100644 index 0000000000..f5e9844e7d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_present_window.h @@ -0,0 +1,100 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include "common/polyfill_thread.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +VK_DEFINE_HANDLE(VmaAllocation) + +namespace Frontend { +class EmuWindow; +} + +namespace Vulkan { + +class Instance; +class Swapchain; +class Scheduler; +class RenderpassCache; + +struct Frame { + u32 width; + u32 height; + VmaAllocation allocation; + vk::Framebuffer framebuffer; + vk::Image image; + vk::ImageView image_view; + vk::Semaphore render_ready; + vk::Fence present_done; + vk::CommandBuffer cmdbuf; +}; + +class PresentWindow final { +public: + explicit PresentWindow(Frontend::EmuWindow& emu_window, const Instance& instance, + Scheduler& scheduler); + ~PresentWindow(); + + /// Waits for all queued frames to finish presenting. + void WaitPresent(); + + /// Returns the last used render frame. + Frame* GetRenderFrame(); + + /// Recreates the render frame to match provided parameters. + void RecreateFrame(Frame* frame, u32 width, u32 height); + + /// Queues the provided frame for presentation. + void Present(Frame* frame); + + /// This is called to notify the rendering backend of a surface change + void NotifySurfaceChanged(); + + [[nodiscard]] vk::RenderPass Renderpass() const noexcept { + return present_renderpass; + } + + u32 ImageCount() const noexcept { + return swapchain.GetImageCount(); + } + +private: + void PresentThread(std::stop_token token); + + void CopyToSwapchain(Frame* frame); + + vk::RenderPass CreateRenderpass(); + +private: + Frontend::EmuWindow& emu_window; + const Instance& instance; + Scheduler& scheduler; + vk::SurfaceKHR surface; + vk::SurfaceKHR next_surface{}; + Swapchain swapchain; + vk::CommandPool command_pool; + vk::Queue graphics_queue; + vk::RenderPass present_renderpass; + std::vector swap_chain; + std::queue free_queue; + std::queue present_queue; + std::condition_variable free_cv; + std::condition_variable recreate_surface_cv; + std::condition_variable_any frame_cv; + std::mutex swapchain_mutex; + std::mutex recreate_surface_mutex; + std::mutex queue_mutex; + std::mutex free_mutex; + std::jthread present_thread; + bool vsync_enabled{}; + bool blit_supported; + bool use_present_thread{true}; + void* last_render_surface{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 0000000000..5511f43354 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -0,0 +1,1145 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/alignment.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/microprofile.h" +#include "common/settings.h" +#include "video_core/pica_state.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_pipeline.h" +#include "video_core/regs_rasterizer.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/texture/texture_decode.h" + +namespace Vulkan { + +namespace { + +MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_GS, "Vulkan", "Geometry Shader Setup", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192)); + +using TriangleTopology = Pica::PipelineRegs::TriangleTopology; +using VideoCore::SurfaceType; + +constexpr u64 STREAM_BUFFER_SIZE = 64 * 1024 * 1024; +constexpr u64 UNIFORM_BUFFER_SIZE = 4 * 1024 * 1024; +constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024; + +constexpr vk::BufferUsageFlags BUFFER_USAGE = + vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer; + +struct DrawParams { + u32 vertex_count; + s32 vertex_offset; + u32 binding_count; + std::array bindings; + bool is_indexed; +}; + +[[nodiscard]] u64 TextureBufferSize(const Instance& instance) { + // Use the smallest texel size from the texel views + // which corresponds to eR32G32Sfloat + const u64 max_size = instance.MaxTexelBufferElements() * 8; + return std::min(max_size, TEXTURE_BUFFER_SIZE); +} + +} // Anonymous namespace + +RasterizerVulkan::RasterizerVulkan(Memory::MemorySystem& memory, + VideoCore::CustomTexManager& custom_tex_manager, + VideoCore::RendererBase& renderer, + Frontend::EmuWindow& emu_window, const Instance& instance, + Scheduler& scheduler, DescriptorPool& pool, + RenderpassCache& renderpass_cache, u32 image_count) + : RasterizerAccelerated{memory}, instance{instance}, scheduler{scheduler}, + renderpass_cache{renderpass_cache}, pipeline_cache{instance, scheduler, renderpass_cache, + pool}, + runtime{instance, scheduler, renderpass_cache, pool, pipeline_cache.TextureProvider(), + image_count}, + res_cache{memory, custom_tex_manager, runtime, regs, renderer}, + stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE}, + uniform_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformBuffer, + UNIFORM_BUFFER_SIZE}, + texture_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformTexelBuffer, + TextureBufferSize(instance)}, + texture_lf_buffer{instance, scheduler, vk::BufferUsageFlagBits::eUniformTexelBuffer, + TextureBufferSize(instance)}, + async_shaders{Settings::values.async_shader_compilation.GetValue()} { + + vertex_buffers.fill(stream_buffer.Handle()); + + uniform_buffer_alignment = instance.UniformMinAlignment(); + uniform_size_aligned_vs = + Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); + uniform_size_aligned_fs = + Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment); + + // Define vertex layout for software shaders + MakeSoftwareVertexLayout(); + pipeline_info.vertex_layout = software_layout; + + const vk::Device device = instance.GetDevice(); + texture_lf_view = device.createBufferViewUnique({ + .buffer = texture_lf_buffer.Handle(), + .format = vk::Format::eR32G32Sfloat, + .offset = 0, + .range = VK_WHOLE_SIZE, + }); + texture_rg_view = device.createBufferViewUnique({ + .buffer = texture_buffer.Handle(), + .format = vk::Format::eR32G32Sfloat, + .offset = 0, + .range = VK_WHOLE_SIZE, + }); + texture_rgba_view = device.createBufferViewUnique({ + .buffer = texture_buffer.Handle(), + .format = vk::Format::eR32G32B32A32Sfloat, + .offset = 0, + .range = VK_WHOLE_SIZE, + }); + + // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize + // all descriptor sets even the ones we don't use. + pipeline_cache.BindBuffer(0, uniform_buffer.Handle(), 0, sizeof(Pica::Shader::VSUniformData)); + pipeline_cache.BindBuffer(1, uniform_buffer.Handle(), 0, sizeof(Pica::Shader::UniformData)); + pipeline_cache.BindTexelBuffer(2, *texture_lf_view); + pipeline_cache.BindTexelBuffer(3, *texture_rg_view); + pipeline_cache.BindTexelBuffer(4, *texture_rgba_view); + + Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); + Surface& null_cube_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_CUBE_ID); + Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); + for (u32 i = 0; i < 3; i++) { + pipeline_cache.BindTexture(i, null_surface.ImageView(), null_sampler.Handle()); + } + pipeline_cache.BindTexture(3, null_cube_surface.ImageView(), null_sampler.Handle()); + + for (u32 i = 0; i < 7; i++) { + pipeline_cache.BindStorageImage(i, null_surface.StorageView()); + } + + SyncEntireState(); +} + +RasterizerVulkan::~RasterizerVulkan() = default; + +void RasterizerVulkan::TickFrame() { + res_cache.TickFrame(); +} + +void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + pipeline_cache.LoadDiskCache(); +} + +void RasterizerVulkan::SyncFixedState() { + SyncClipEnabled(); + SyncCullMode(); + SyncBlendEnabled(); + SyncBlendFuncs(); + SyncBlendColor(); + SyncLogicOp(); + SyncStencilTest(); + SyncDepthTest(); + SyncColorWriteMask(); + SyncStencilWriteMask(); + SyncDepthWriteMask(); +} + +void RasterizerVulkan::SetupVertexArray() { + const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = vertex_info; + auto [array_ptr, array_offset, invalidate] = stream_buffer.Map(vs_input_size, 16); + + /** + * The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU + * how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base + * address containing the vertex array data. The data for each attribute loader (i) can be found + * by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought + * as something analogous to Vulkan bindings. The user can store attributes in separate loaders + * or interleave them in the same loader. + **/ + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + const PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE + const u32 stride_alignment = instance.GetMinVertexStrideAlignment(); + + VertexLayout& layout = pipeline_info.vertex_layout; + layout.binding_count = 0; + layout.attribute_count = 16; + enable_attributes.fill(false); + + u32 buffer_offset = 0; + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count == 0 || loader.byte_count == 0) { + continue; + } + + // Analyze the attribute loader by checking which attributes it provides + u32 offset = 0; + for (u32 comp = 0; comp < loader.component_count && comp < 12; comp++) { + const u32 attribute_index = loader.GetComponent(comp); + if (attribute_index >= 12) { + // Attribute ids 12, to 15 signify 4, 8, 12 and 16-byte paddings respectively. + offset = Common::AlignUp(offset, 4); + offset += (attribute_index - 11) * 4; + continue; + } + + const u32 size = vertex_attributes.GetNumElements(attribute_index); + if (size == 0) { + continue; + } + + offset = + Common::AlignUp(offset, vertex_attributes.GetElementSizeInBytes(attribute_index)); + + const u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index); + const auto format = vertex_attributes.GetFormat(attribute_index); + + VertexAttribute& attribute = layout.attributes[input_reg]; + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(input_reg); + attribute.offset.Assign(offset); + attribute.type.Assign(format); + attribute.size.Assign(size); + + enable_attributes[input_reg] = true; + offset += vertex_attributes.GetStride(attribute_index); + } + + const PAddr data_addr = + base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); + const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; + u32 data_size = loader.byte_count * vertex_num; + res_cache.FlushRegion(data_addr, data_size); + + const MemoryRef src_ref = memory.GetPhysicalRef(data_addr); + if (src_ref.GetSize() < data_size) { + LOG_ERROR(Render_Vulkan, + "Vertex buffer size {} exceeds available space {} at address {:#016X}", + data_size, src_ref.GetSize(), data_addr); + } + + const u8* src_ptr = src_ref.GetPtr(); + u8* dst_ptr = array_ptr + buffer_offset; + + // Align stride up if required by Vulkan implementation. + const u32 aligned_stride = + Common::AlignUp(static_cast(loader.byte_count), stride_alignment); + if (aligned_stride == loader.byte_count) { + std::memcpy(dst_ptr, src_ptr, data_size); + } else { + for (size_t vertex = 0; vertex < vertex_num; vertex++) { + std::memcpy(dst_ptr + vertex * aligned_stride, src_ptr + vertex * loader.byte_count, + loader.byte_count); + } + } + + // Create the binding associated with this loader + VertexBinding& binding = layout.bindings[layout.binding_count]; + binding.binding.Assign(layout.binding_count); + binding.fixed.Assign(0); + binding.stride.Assign(aligned_stride); + + // Keep track of the binding offsets so we can bind the vertex buffer later + binding_offsets[layout.binding_count++] = static_cast(array_offset + buffer_offset); + buffer_offset += Common::AlignUp(aligned_stride * vertex_num, 4); + } + + stream_buffer.Commit(buffer_offset); + + // Assign the rest of the attributes to the last binding + SetupFixedAttribs(); +} + +void RasterizerVulkan::SetupFixedAttribs() { + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + VertexLayout& layout = pipeline_info.vertex_layout; + + auto [fixed_ptr, fixed_offset, _] = stream_buffer.Map(16 * sizeof(Common::Vec4f), 0); + binding_offsets[layout.binding_count] = static_cast(fixed_offset); + + // Reserve the last binding for fixed and default attributes + // Place the default attrib at offset zero for easy access + static const Common::Vec4f default_attrib{0.f, 0.f, 0.f, 1.f}; + std::memcpy(fixed_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f)); + + // Find all fixed attributes and assign them to the last binding + u32 offset = sizeof(Common::Vec4f); + for (std::size_t i = 0; i < 16; i++) { + if (vertex_attributes.IsDefaultAttribute(i)) { + const u32 reg = regs.vs.GetRegisterForAttribute(i); + if (!enable_attributes[reg]) { + const auto& attr = Pica::g_state.input_default_attributes.attr[i]; + const std::array data = {attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), + attr.w.ToFloat32()}; + + const u32 data_size = sizeof(float) * static_cast(data.size()); + std::memcpy(fixed_ptr + offset, data.data(), data_size); + + VertexAttribute& attribute = layout.attributes[reg]; + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(reg); + attribute.offset.Assign(offset); + attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT); + attribute.size.Assign(4); + + offset += data_size; + enable_attributes[reg] = true; + } + } + } + + // Loop one more time to find unused attributes and assign them to the default one + // If the attribute is just disabled, shove the default attribute to avoid + // errors if the shader ever decides to use it. + for (u32 i = 0; i < 16; i++) { + if (!enable_attributes[i]) { + VertexAttribute& attribute = layout.attributes[i]; + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(i); + attribute.offset.Assign(0); + attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT); + attribute.size.Assign(4); + } + } + + // Define the fixed+default binding + VertexBinding& binding = layout.bindings[layout.binding_count]; + binding.binding.Assign(layout.binding_count++); + binding.fixed.Assign(1); + binding.stride.Assign(offset); + + stream_buffer.Commit(offset); +} + +bool RasterizerVulkan::SetupVertexShader() { + MICROPROFILE_SCOPE(Vulkan_VS); + return pipeline_cache.UseProgrammableVertexShader(regs, Pica::g_state.vs, + pipeline_info.vertex_layout); +} + +bool RasterizerVulkan::SetupGeometryShader() { + MICROPROFILE_SCOPE(Vulkan_GS); + + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + LOG_ERROR(Render_Vulkan, "Accelerate draw doesn't support geometry shader"); + return false; + } + + return pipeline_cache.UseFixedGeometryShader(regs); +} + +bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) { + if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) { + if (regs.pipeline.gs_config.mode != Pica::PipelineRegs::GSMode::Point) { + return false; + } + if (regs.pipeline.triangle_topology != Pica::PipelineRegs::TriangleTopology::Shader) { + return false; + } + } + + pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology); + if (regs.pipeline.triangle_topology == TriangleTopology::Fan && + !instance.IsTriangleFanSupported()) { + LOG_DEBUG(Render_Vulkan, + "Skipping accelerated draw with unsupported triangle fan topology"); + return false; + } + + // Vertex data setup might involve scheduler flushes so perform it + // early to avoid invalidating our state in the middle of the draw. + vertex_info = AnalyzeVertexArray(is_indexed, instance.GetMinVertexStrideAlignment()); + SetupVertexArray(); + + if (!SetupVertexShader()) { + return false; + } + if (!SetupGeometryShader()) { + return false; + } + + return Draw(true, is_indexed); +} + +bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { + if (is_indexed) { + SetupIndexArray(); + } + + const bool wait_built = !async_shaders || regs.pipeline.num_vertices <= 6; + if (!pipeline_cache.BindPipeline(pipeline_info, wait_built)) { + return true; + } + + const DrawParams params = { + .vertex_count = regs.pipeline.num_vertices, + .vertex_offset = -static_cast(vertex_info.vs_input_index_min), + .binding_count = pipeline_info.vertex_layout.binding_count, + .bindings = binding_offsets, + .is_indexed = is_indexed, + }; + + scheduler.Record([this, params](vk::CommandBuffer cmdbuf) { + std::array offsets; + std::transform(params.bindings.begin(), params.bindings.end(), offsets.begin(), + [](u32 offset) { return static_cast(offset); }); + cmdbuf.bindVertexBuffers(0, params.binding_count, vertex_buffers.data(), offsets.data()); + if (params.is_indexed) { + cmdbuf.drawIndexed(params.vertex_count, 1, 0, params.vertex_offset, 0); + } else { + cmdbuf.draw(params.vertex_count, 1, 0, 0); + } + }); + + return true; +} + +void RasterizerVulkan::SetupIndexArray() { + const bool index_u8 = regs.pipeline.index_array.format == 0; + const bool native_u8 = index_u8 && instance.IsIndexTypeUint8Supported(); + const u32 index_buffer_size = regs.pipeline.num_vertices * (native_u8 ? 1 : 2); + const vk::IndexType index_type = native_u8 ? vk::IndexType::eUint8EXT : vk::IndexType::eUint16; + + const u8* index_data = + memory.GetPhysicalPointer(regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + + regs.pipeline.index_array.offset); + + auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2); + + if (index_u8 && !native_u8) { + u16* index_ptr_u16 = reinterpret_cast(index_ptr); + for (u32 i = 0; i < regs.pipeline.num_vertices; i++) { + index_ptr_u16[i] = index_data[i]; + } + } else { + std::memcpy(index_ptr, index_data, index_buffer_size); + } + + stream_buffer.Commit(index_buffer_size); + + scheduler.Record( + [this, index_offset = index_offset, index_type = index_type](vk::CommandBuffer cmdbuf) { + cmdbuf.bindIndexBuffer(stream_buffer.Handle(), index_offset, index_type); + }); +} + +void RasterizerVulkan::DrawTriangles() { + if (vertex_batch.empty()) { + return; + } + + pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List); + pipeline_info.vertex_layout = software_layout; + + pipeline_cache.UseTrivialVertexShader(); + pipeline_cache.UseTrivialGeometryShader(); + + Draw(false, false); +} + +bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { + MICROPROFILE_SCOPE(Vulkan_Drawing); + + const bool shadow_rendering = regs.framebuffer.IsShadowRendering(); + const bool has_stencil = regs.framebuffer.HasStencil(); + + const bool write_color_fb = shadow_rendering || pipeline_info.blending.color_write_mask; + const bool write_depth_fb = pipeline_info.IsDepthWriteEnabled(); + const bool using_color_fb = + regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; + const bool using_depth_fb = + !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && + (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || + (has_stencil && pipeline_info.depth_stencil.stencil_test_enable)); + + const auto fb_helper = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); + const Framebuffer* framebuffer = fb_helper.Framebuffer(); + if (!framebuffer->Handle()) { + return true; + } + + pipeline_info.attachments.color = framebuffer->Format(SurfaceType::Color); + pipeline_info.attachments.depth = framebuffer->Format(SurfaceType::Depth); + + if (shadow_rendering) { + pipeline_cache.BindStorageImage(6, framebuffer->ImageView(SurfaceType::Color)); + } else { + Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); + pipeline_cache.BindStorageImage(6, null_surface.StorageView()); + } + + // Update scissor uniforms + const auto [scissor_x1, scissor_y2, scissor_x2, scissor_y1] = fb_helper.Scissor(); + if (uniform_block_data.data.scissor_x1 != scissor_x1 || + uniform_block_data.data.scissor_x2 != scissor_x2 || + uniform_block_data.data.scissor_y1 != scissor_y1 || + uniform_block_data.data.scissor_y2 != scissor_y2) { + + uniform_block_data.data.scissor_x1 = scissor_x1; + uniform_block_data.data.scissor_x2 = scissor_x2; + uniform_block_data.data.scissor_y1 = scissor_y1; + uniform_block_data.data.scissor_y2 = scissor_y2; + uniform_block_data.dirty = true; + } + + // Sync and bind the texture surfaces + SyncTextureUnits(framebuffer); + + // Sync and bind the shader + if (shader_dirty) { + pipeline_cache.UseFragmentShader(regs); + shader_dirty = false; + } + + // Sync the LUTs within the texture buffer + SyncAndUploadLUTs(); + SyncAndUploadLUTsLF(); + UploadUniforms(accelerate); + + // Begin rendering + const auto draw_rect = fb_helper.DrawRect(); + renderpass_cache.BeginRendering(framebuffer, draw_rect); + + // Configure viewport and scissor + const auto viewport = fb_helper.Viewport(); + scheduler.Record([viewport, draw_rect](vk::CommandBuffer cmdbuf) { + const vk::Viewport vk_viewport = { + .x = static_cast(viewport.x), + .y = static_cast(viewport.y), + .width = static_cast(viewport.width), + .height = static_cast(viewport.height), + .minDepth = 0.f, + .maxDepth = 1.f, + }; + + const vk::Rect2D scissor = { + .offset{ + .x = static_cast(draw_rect.left), + .y = static_cast(draw_rect.bottom), + }, + .extent{ + .width = draw_rect.GetWidth(), + .height = draw_rect.GetHeight(), + }, + }; + + cmdbuf.setViewport(0, vk_viewport); + cmdbuf.setScissor(0, scissor); + }); + + // Draw the vertex batch + bool succeeded = true; + if (accelerate) { + succeeded = AccelerateDrawBatchInternal(is_indexed); + } else { + pipeline_cache.BindPipeline(pipeline_info, true); + + const u64 vertex_size = vertex_batch.size() * sizeof(HardwareVertex); + const u32 vertex_count = static_cast(vertex_batch.size()); + const auto [buffer, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex)); + + std::memcpy(buffer, vertex_batch.data(), vertex_size); + stream_buffer.Commit(vertex_size); + + scheduler.Record([this, offset = offset, vertex_count](vk::CommandBuffer cmdbuf) { + cmdbuf.bindVertexBuffers(0, stream_buffer.Handle(), offset); + cmdbuf.draw(vertex_count, 1, 0, 0); + }); + } + + vertex_batch.clear(); + return succeeded; +} + +void RasterizerVulkan::SyncTextureUnits(const Framebuffer* framebuffer) { + using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; + + const auto pica_textures = regs.texturing.GetTextures(); + for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { + const auto& texture = pica_textures[texture_index]; + + // If the texture unit is disabled bind a null surface to it + if (!texture.enabled) { + const Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); + const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); + pipeline_cache.BindTexture(texture_index, null_surface.ImageView(), + null_sampler.Handle()); + continue; + } + + // Handle special tex0 configurations + if (texture_index == 0) { + switch (texture.config.type.Value()) { + case TextureType::Shadow2D: { + Surface& surface = res_cache.GetTextureSurface(texture); + surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; + pipeline_cache.BindStorageImage(0, surface.StorageView()); + continue; + } + case TextureType::ShadowCube: { + BindShadowCube(texture); + continue; + } + case TextureType::TextureCube: { + BindTextureCube(texture); + continue; + } + default: + UnbindSpecial(); + break; + } + } + + // Bind the texture provided by the rasterizer cache + Surface& surface = res_cache.GetTextureSurface(texture); + Sampler& sampler = res_cache.GetSampler(texture.config); + if (!IsFeedbackLoop(texture_index, framebuffer, surface, sampler)) { + pipeline_cache.BindTexture(texture_index, surface.ImageView(), sampler.Handle()); + } + } +} + +void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture) { + using CubeFace = Pica::TexturingRegs::CubeFace; + auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format); + constexpr std::array faces = { + CubeFace::PositiveX, CubeFace::NegativeX, CubeFace::PositiveY, + CubeFace::NegativeY, CubeFace::PositiveZ, CubeFace::NegativeZ, + }; + + for (CubeFace face : faces) { + const u32 binding = static_cast(face); + info.physical_address = regs.texturing.GetCubePhysicalAddress(face); + + const VideoCore::SurfaceId surface_id = res_cache.GetTextureSurface(info); + Surface& surface = res_cache.GetSurface(surface_id); + surface.flags |= VideoCore::SurfaceFlagBits::ShadowMap; + pipeline_cache.BindStorageImage(binding, surface.StorageView()); + } +} + +void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) { + using CubeFace = Pica::TexturingRegs::CubeFace; + const VideoCore::TextureCubeConfig config = { + .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX), + .nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX), + .py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY), + .ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY), + .pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ), + .nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ), + .width = texture.config.width, + .levels = texture.config.lod.max_level + 1, + .format = texture.format, + }; + + Surface& surface = res_cache.GetTextureCube(config); + Sampler& sampler = res_cache.GetSampler(texture.config); + pipeline_cache.BindTexture(3, surface.ImageView(), sampler.Handle()); +} + +bool RasterizerVulkan::IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, + Surface& surface, Sampler& sampler) { + const vk::ImageView color_view = framebuffer->ImageView(SurfaceType::Color); + const bool is_feedback_loop = color_view == surface.ImageView(); + if (!is_feedback_loop) { + return false; + } + + // Make a temporary copy of the framebuffer to sample from + pipeline_cache.BindTexture(texture_index, surface.CopyImageView(), sampler.Handle()); + return true; +} + +void RasterizerVulkan::UnbindSpecial() { + Surface& null_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_ID); + const Surface& null_cube_surface = res_cache.GetSurface(VideoCore::NULL_SURFACE_CUBE_ID); + const Sampler& null_sampler = res_cache.GetSampler(VideoCore::NULL_SAMPLER_ID); + pipeline_cache.BindTexture(3, null_cube_surface.ImageView(), null_sampler.Handle()); + for (u32 i = 0; i < 6; i++) { + pipeline_cache.BindStorageImage(i, null_surface.StorageView()); + } +} + +void RasterizerVulkan::NotifyFixedFunctionPicaRegisterChanged(u32 id) { + switch (id) { + // Clipping plane + case PICA_REG_INDEX(rasterizer.clip_enable): + SyncClipEnabled(); + break; + + // Culling + case PICA_REG_INDEX(rasterizer.cull_mode): + SyncCullMode(); + break; + + // Blending + case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): + SyncBlendEnabled(); + // Update since logic op emulation depends on alpha blend enable. + SyncLogicOp(); + SyncColorWriteMask(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending): + SyncBlendFuncs(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.blend_const): + SyncBlendColor(); + break; + + // Sync VK stencil test + stencil write mask + // (Pica stencil test function register also contains a stencil write mask) + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func): + SyncStencilTest(); + SyncStencilWriteMask(); + break; + case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op): + case PICA_REG_INDEX(framebuffer.framebuffer.depth_format): + SyncStencilTest(); + break; + + // Sync VK depth test + depth and color write mask + // (Pica depth test function register also contains a depth and color write mask) + case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable): + SyncDepthTest(); + SyncDepthWriteMask(); + SyncColorWriteMask(); + break; + + // Sync VK depth and stencil write mask + // (This is a dedicated combined depth / stencil write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write): + SyncDepthWriteMask(); + SyncStencilWriteMask(); + break; + + // Sync VK color write mask + // (This is a dedicated color write-enable register) + case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write): + SyncColorWriteMask(); + break; + + // Logic op + case PICA_REG_INDEX(framebuffer.output_merger.logic_op): + SyncLogicOp(); + // Update since color write mask is used to emulate no-op. + SyncColorWriteMask(); + break; + } +} + +void RasterizerVulkan::FlushAll() { + res_cache.FlushAll(); +} + +void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) { + res_cache.FlushRegion(addr, size); +} + +void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) { + res_cache.InvalidateRegion(addr, size); +} + +void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) { + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size); +} + +void RasterizerVulkan::ClearAll(bool flush) { + res_cache.ClearAll(flush); +} + +bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + return res_cache.AccelerateDisplayTransfer(config); +} + +bool RasterizerVulkan::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { + return res_cache.AccelerateTextureCopy(config); +} + +bool RasterizerVulkan::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { + return res_cache.AccelerateFill(config); +} + +bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, + PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { + if (framebuffer_addr == 0) [[unlikely]] { + return false; + } + + VideoCore::SurfaceParams src_params; + src_params.addr = framebuffer_addr; + src_params.width = std::min(config.width.Value(), pixel_stride); + src_params.height = config.height; + src_params.stride = pixel_stride; + src_params.is_tiled = false; + src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format); + src_params.UpdateParams(); + + const auto [src_surface_id, src_rect] = + res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); + + if (!src_surface_id) { + return false; + } + + const Surface& src_surface = res_cache.GetSurface(src_surface_id); + const u32 scaled_width = src_surface.GetScaledWidth(); + const u32 scaled_height = src_surface.GetScaledHeight(); + + screen_info.texcoords = Common::Rectangle( + (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, + (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); + + screen_info.image_view = src_surface.ImageView(); + + return true; +} + +void RasterizerVulkan::MakeSoftwareVertexLayout() { + constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3}; + + software_layout = VertexLayout{ + .binding_count = 1, + .attribute_count = 8, + }; + + for (u32 i = 0; i < software_layout.binding_count; i++) { + VertexBinding& binding = software_layout.bindings[i]; + binding.binding.Assign(i); + binding.fixed.Assign(0); + binding.stride.Assign(sizeof(HardwareVertex)); + } + + u32 offset = 0; + for (u32 i = 0; i < 8; i++) { + VertexAttribute& attribute = software_layout.attributes[i]; + attribute.binding.Assign(0); + attribute.location.Assign(i); + attribute.offset.Assign(offset); + attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT); + attribute.size.Assign(sizes[i]); + offset += sizes[i] * sizeof(float); + } +} + +void RasterizerVulkan::SyncClipEnabled() { + bool clip_enabled = regs.rasterizer.clip_enable != 0; + if (clip_enabled != uniform_block_data.data.enable_clip1) { + uniform_block_data.data.enable_clip1 = clip_enabled; + uniform_block_data.dirty = true; + } +} + +void RasterizerVulkan::SyncCullMode() { + pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode); +} + +void RasterizerVulkan::SyncBlendEnabled() { + pipeline_info.blending.blend_enable = regs.framebuffer.output_merger.alphablend_enable; +} + +void RasterizerVulkan::SyncBlendFuncs() { + pipeline_info.blending.color_blend_eq.Assign( + regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb); + pipeline_info.blending.alpha_blend_eq.Assign( + regs.framebuffer.output_merger.alpha_blending.blend_equation_a); + pipeline_info.blending.src_color_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_source_rgb); + pipeline_info.blending.dst_color_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb); + pipeline_info.blending.src_alpha_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_source_a); + pipeline_info.blending.dst_alpha_blend_factor.Assign( + regs.framebuffer.output_merger.alpha_blending.factor_dest_a); +} + +void RasterizerVulkan::SyncBlendColor() { + pipeline_info.dynamic.blend_color = regs.framebuffer.output_merger.blend_const.raw; +} + +void RasterizerVulkan::SyncLogicOp() { + if (instance.NeedsLogicOpEmulation()) { + // We need this in the fragment shader to emulate logic operations + shader_dirty = true; + } + + pipeline_info.blending.logic_op = regs.framebuffer.output_merger.logic_op; + + const bool is_logic_op_emulated = + instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable; + const bool is_logic_op_noop = + regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp; + if (is_logic_op_emulated && is_logic_op_noop) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. + pipeline_info.blending.color_write_mask = 0; + } +} + +void RasterizerVulkan::SyncColorWriteMask() { + const u32 color_mask = regs.framebuffer.framebuffer.allow_color_write != 0 + ? (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF + : 0; + + const bool is_logic_op_emulated = + instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable; + const bool is_logic_op_noop = + regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp; + if (is_logic_op_emulated && is_logic_op_noop) { + // Color output is disabled by logic operation. We use color write mask to skip + // color but allow depth write. Return early to avoid overwriting this. + return; + } + + pipeline_info.blending.color_write_mask = color_mask; +} + +void RasterizerVulkan::SyncStencilWriteMask() { + pipeline_info.dynamic.stencil_write_mask = + (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) + : 0; +} + +void RasterizerVulkan::SyncDepthWriteMask() { + const bool write_enable = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable); + pipeline_info.depth_stencil.depth_write_enable.Assign(write_enable); +} + +void RasterizerVulkan::SyncStencilTest() { + const auto& stencil_test = regs.framebuffer.output_merger.stencil_test; + const bool test_enable = stencil_test.enable && regs.framebuffer.framebuffer.depth_format == + Pica::FramebufferRegs::DepthFormat::D24S8; + + pipeline_info.depth_stencil.stencil_test_enable.Assign(test_enable); + pipeline_info.depth_stencil.stencil_fail_op.Assign(stencil_test.action_stencil_fail); + pipeline_info.depth_stencil.stencil_pass_op.Assign(stencil_test.action_depth_pass); + pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(stencil_test.action_depth_fail); + pipeline_info.depth_stencil.stencil_compare_op.Assign(stencil_test.func); + pipeline_info.dynamic.stencil_reference = stencil_test.reference_value; + pipeline_info.dynamic.stencil_compare_mask = stencil_test.input_mask; +} + +void RasterizerVulkan::SyncDepthTest() { + const bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || + regs.framebuffer.output_merger.depth_write_enable == 1; + const auto compare_op = regs.framebuffer.output_merger.depth_test_enable == 1 + ? regs.framebuffer.output_merger.depth_test_func.Value() + : Pica::FramebufferRegs::CompareFunc::Always; + + pipeline_info.depth_stencil.depth_test_enable.Assign(test_enabled); + pipeline_info.depth_stencil.depth_compare_op.Assign(compare_op); +} + +void RasterizerVulkan::SyncAndUploadLUTsLF() { + constexpr std::size_t max_size = + sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler + + sizeof(Common::Vec2f) * 128; // fog + + if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) { + return; + } + + std::size_t bytes_used = 0; + auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); + + // Sync the lighting luts + if (uniform_block_data.lighting_lut_dirty_any || invalidate) { + for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) { + if (uniform_block_data.lighting_lut_dirty[index] || invalidate) { + std::array new_data; + const auto& source_lut = Pica::g_state.lighting.luts[index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), + [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lighting_lut_data[index] || invalidate) { + lighting_lut_data[index] = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] = + static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + uniform_block_data.lighting_lut_dirty[index] = false; + } + } + uniform_block_data.lighting_lut_dirty_any = false; + } + + // Sync the fog lut + if (uniform_block_data.fog_lut_dirty || invalidate) { + std::array new_data; + + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), + [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != fog_lut_data || invalidate) { + fog_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + uniform_block_data.data.fog_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + uniform_block_data.fog_lut_dirty = false; + } + + texture_lf_buffer.Commit(static_cast(bytes_used)); +} + +void RasterizerVulkan::SyncAndUploadLUTs() { + const auto& proctex = Pica::g_state.proctex; + constexpr std::size_t max_size = + sizeof(Common::Vec2f) * 128 * 3 + // proctex: noise + color + alpha + sizeof(Common::Vec4f) * 256 + // proctex + sizeof(Common::Vec4f) * 256; // proctex diff + + if (!uniform_block_data.proctex_noise_lut_dirty && + !uniform_block_data.proctex_color_map_dirty && + !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty && + !uniform_block_data.proctex_diff_lut_dirty) { + return; + } + + std::size_t bytes_used = 0; + auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f)); + + // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap + auto sync_proctex_value_lut = + [this, buffer = buffer, offset = offset, invalidate = invalidate, + &bytes_used](const std::array& lut, + std::array& lut_data, int& lut_offset) { + std::array new_data; + std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) { + return Common::Vec2f{entry.ToFloat(), entry.DiffToFloat()}; + }); + + if (new_data != lut_data || invalidate) { + lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec2f)); + lut_offset = static_cast((offset + bytes_used) / sizeof(Common::Vec2f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec2f); + } + }; + + // Sync the proctex noise lut + if (uniform_block_data.proctex_noise_lut_dirty || invalidate) { + sync_proctex_value_lut(proctex.noise_table, proctex_noise_lut_data, + uniform_block_data.data.proctex_noise_lut_offset); + uniform_block_data.proctex_noise_lut_dirty = false; + } + + // Sync the proctex color map + if (uniform_block_data.proctex_color_map_dirty || invalidate) { + sync_proctex_value_lut(proctex.color_map_table, proctex_color_map_data, + uniform_block_data.data.proctex_color_map_offset); + uniform_block_data.proctex_color_map_dirty = false; + } + + // Sync the proctex alpha map + if (uniform_block_data.proctex_alpha_map_dirty || invalidate) { + sync_proctex_value_lut(proctex.alpha_map_table, proctex_alpha_map_data, + uniform_block_data.data.proctex_alpha_map_offset); + uniform_block_data.proctex_alpha_map_dirty = false; + } + + // Sync the proctex lut + if (uniform_block_data.proctex_lut_dirty || invalidate) { + std::array new_data; + + std::transform(proctex.color_table.begin(), proctex.color_table.end(), new_data.begin(), + [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_lut_data || invalidate) { + proctex_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec4f)); + uniform_block_data.data.proctex_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec4f); + } + uniform_block_data.proctex_lut_dirty = false; + } + + // Sync the proctex difference lut + if (uniform_block_data.proctex_diff_lut_dirty || invalidate) { + std::array new_data; + + std::transform(proctex.color_diff_table.begin(), proctex.color_diff_table.end(), + new_data.begin(), [](const auto& entry) { + auto rgba = entry.ToVector() / 255.0f; + return Common::Vec4f{rgba.r(), rgba.g(), rgba.b(), rgba.a()}; + }); + + if (new_data != proctex_diff_lut_data || invalidate) { + proctex_diff_lut_data = new_data; + std::memcpy(buffer + bytes_used, new_data.data(), + new_data.size() * sizeof(Common::Vec4f)); + uniform_block_data.data.proctex_diff_lut_offset = + static_cast((offset + bytes_used) / sizeof(Common::Vec4f)); + uniform_block_data.dirty = true; + bytes_used += new_data.size() * sizeof(Common::Vec4f); + } + uniform_block_data.proctex_diff_lut_dirty = false; + } + + texture_buffer.Commit(static_cast(bytes_used)); +} + +void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { + const bool sync_vs = accelerate_draw; + const bool sync_fs = uniform_block_data.dirty; + + if (!sync_vs && !sync_fs) { + return; + } + + const u64 uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; + auto [uniforms, offset, invalidate] = + uniform_buffer.Map(uniform_size, uniform_buffer_alignment); + + u32 used_bytes = 0; + if (sync_vs) { + Pica::Shader::VSUniformData vs_uniforms; + vs_uniforms.uniforms.SetFromRegs(regs.vs, Pica::g_state.vs); + std::memcpy(uniforms, &vs_uniforms, sizeof(vs_uniforms)); + + pipeline_cache.SetBufferOffset(0, offset); + used_bytes += static_cast(uniform_size_aligned_vs); + } + + if (sync_fs || invalidate) { + std::memcpy(uniforms + used_bytes, &uniform_block_data.data, + sizeof(Pica::Shader::UniformData)); + + pipeline_cache.SetBufferOffset(1, offset + used_bytes); + uniform_block_data.dirty = false; + used_bytes += static_cast(uniform_size_aligned_fs); + } + + uniform_buffer.Commit(used_bytes); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h new file mode 100644 index 0000000000..4d5faee605 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -0,0 +1,171 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/hw/gpu.h" +#include "video_core/rasterizer_accelerated.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace Frontend { +class EmuWindow; +} + +namespace VideoCore { +class CustomTexManager; +class RendererBase; +} // namespace VideoCore + +namespace Vulkan { + +struct ScreenInfo; + +class Instance; +class Scheduler; +class RenderpassCache; +class DescriptorPool; + +class RasterizerVulkan : public VideoCore::RasterizerAccelerated { +public: + explicit RasterizerVulkan(Memory::MemorySystem& memory, + VideoCore::CustomTexManager& custom_tex_manager, + VideoCore::RendererBase& renderer, Frontend::EmuWindow& emu_window, + const Instance& instance, Scheduler& scheduler, DescriptorPool& pool, + RenderpassCache& renderpass_cache, u32 image_count); + ~RasterizerVulkan() override; + + void TickFrame(); + void LoadDiskResources(const std::atomic_bool& stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; + + void DrawTriangles() override; + void FlushAll() override; + void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; + void FlushAndInvalidateRegion(PAddr addr, u32 size) override; + void ClearAll(bool flush) override; + bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; + bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; + bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, + u32 pixel_stride, ScreenInfo& screen_info); + bool AccelerateDrawBatch(bool is_indexed) override; + + void SyncFixedState() override; + +private: + void NotifyFixedFunctionPicaRegisterChanged(u32 id) override; + + /// Syncs the clip enabled status to match the PICA register + void SyncClipEnabled(); + + /// Syncs the cull mode to match the PICA register + void SyncCullMode(); + + /// Syncs the blend enabled status to match the PICA register + void SyncBlendEnabled(); + + /// Syncs the blend functions to match the PICA register + void SyncBlendFuncs(); + + /// Syncs the blend color to match the PICA register + void SyncBlendColor(); + + /// Syncs the logic op states to match the PICA register + void SyncLogicOp(); + + /// Syncs the color write mask to match the PICA register state + void SyncColorWriteMask(); + + /// Syncs the stencil write mask to match the PICA register state + void SyncStencilWriteMask(); + + /// Syncs the depth write mask to match the PICA register state + void SyncDepthWriteMask(); + + /// Syncs the stencil test states to match the PICA register + void SyncStencilTest(); + + /// Syncs the depth test states to match the PICA register + void SyncDepthTest(); + + /// Syncs and uploads the lighting, fog and proctex LUTs + void SyncAndUploadLUTs(); + void SyncAndUploadLUTsLF(); + + /// Syncs all enabled PICA texture units + void SyncTextureUnits(const Framebuffer* framebuffer); + + /// Binds the PICA shadow cube required for shadow mapping + void BindShadowCube(const Pica::TexturingRegs::FullTextureConfig& texture); + + /// Binds a texture cube to texture unit 0 + void BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture); + + /// Makes a temporary copy of the framebuffer if a feedback loop is detected + bool IsFeedbackLoop(u32 texture_index, const Framebuffer* framebuffer, Surface& surface, + Sampler& sampler); + + /// Unbinds all special texture unit 0 texture configurations + void UnbindSpecial(); + + /// Upload the uniform blocks to the uniform buffer object + void UploadUniforms(bool accelerate_draw); + + /// Generic draw function for DrawTriangles and AccelerateDrawBatch + bool Draw(bool accelerate, bool is_indexed); + + /// Internal implementation for AccelerateDrawBatch + bool AccelerateDrawBatchInternal(bool is_indexed); + + /// Setup index array for AccelerateDrawBatch + void SetupIndexArray(); + + /// Setup vertex array for AccelerateDrawBatch + void SetupVertexArray(); + + /// Setup the fixed attribute emulation in vulkan + void SetupFixedAttribs(); + + /// Setup vertex shader for AccelerateDrawBatch + bool SetupVertexShader(); + + /// Setup geometry shader for AccelerateDrawBatch + bool SetupGeometryShader(); + + /// Creates the vertex layout struct used for software shader pipelines + void MakeSoftwareVertexLayout(); + +private: + const Instance& instance; + Scheduler& scheduler; + RenderpassCache& renderpass_cache; + PipelineCache pipeline_cache; + TextureRuntime runtime; + RasterizerCache res_cache; + + VertexLayout software_layout; + std::array binding_offsets{}; + std::array enable_attributes{}; + std::array vertex_buffers; + VertexArrayInfo vertex_info; + PipelineInfo pipeline_info{}; + + StreamBuffer stream_buffer; ///< Vertex+Index buffer + StreamBuffer uniform_buffer; ///< Uniform buffer + StreamBuffer texture_buffer; ///< Texture buffer + StreamBuffer texture_lf_buffer; ///< Texture Light-Fog buffer + vk::UniqueBufferView texture_lf_view; + vk::UniqueBufferView texture_rg_view; + vk::UniqueBufferView texture_rgba_view; + u64 uniform_buffer_alignment; + u64 uniform_size_aligned_vs; + u64 uniform_size_aligned_fs; + bool async_shaders{false}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp new file mode 100644 index 0000000000..21eb7ce6b6 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp @@ -0,0 +1,10 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/rasterizer_cache/rasterizer_cache.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace VideoCore { +template class RasterizerCache; +} // namespace VideoCore diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp new file mode 100644 index 0000000000..f9fc22bc59 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -0,0 +1,221 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "common/assert.h" +#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +namespace Vulkan { + +constexpr u32 MIN_DRAWS_TO_FLUSH = 20; + +using VideoCore::PixelFormat; +using VideoCore::SurfaceType; + +RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler) + : instance{instance}, scheduler{scheduler} {} + +RenderpassCache::~RenderpassCache() = default; + +void RenderpassCache::BeginRendering(const Framebuffer* framebuffer, + Common::Rectangle draw_rect) { + const vk::Rect2D render_area = { + .offset{ + .x = static_cast(draw_rect.left), + .y = static_cast(draw_rect.bottom), + }, + .extent{ + .width = draw_rect.GetWidth(), + .height = draw_rect.GetHeight(), + }, + }; + const RenderPass new_pass = { + .framebuffer = framebuffer->Handle(), + .render_pass = framebuffer->RenderPass(), + .render_area = render_area, + .clear = {}, + .do_clear = false, + }; + images = framebuffer->Images(); + aspects = framebuffer->Aspects(); + BeginRendering(new_pass); +} + +void RenderpassCache::BeginRendering(const RenderPass& new_pass) { + if (pass == new_pass) [[likely]] { + num_draws++; + return; + } + + EndRendering(); + scheduler.Record([info = new_pass](vk::CommandBuffer cmdbuf) { + const vk::RenderPassBeginInfo renderpass_begin_info = { + .renderPass = info.render_pass, + .framebuffer = info.framebuffer, + .renderArea = info.render_area, + .clearValueCount = info.do_clear ? 1u : 0u, + .pClearValues = &info.clear, + }; + cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); + }); + + pass = new_pass; +} + +void RenderpassCache::EndRendering() { + if (!pass.render_pass) { + return; + } + + pass.render_pass = vk::RenderPass{}; + scheduler.Record([images = images, aspects = aspects](vk::CommandBuffer cmdbuf) { + u32 num_barriers = 0; + vk::PipelineStageFlags pipeline_flags{}; + std::array barriers; + for (u32 i = 0; i < images.size(); i++) { + if (!images[i]) { + continue; + } + const bool is_color = static_cast(aspects[i] & vk::ImageAspectFlagBits::eColor); + if (is_color) { + pipeline_flags |= vk::PipelineStageFlagBits::eColorAttachmentOutput; + } else { + pipeline_flags |= vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests; + } + barriers[num_barriers++] = vk::ImageMemoryBarrier{ + .srcAccessMask = is_color ? vk::AccessFlagBits::eColorAttachmentWrite + : vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = images[i], + .subresourceRange{ + .aspectMask = aspects[i], + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + } + cmdbuf.endRenderPass(); + cmdbuf.pipelineBarrier(pipeline_flags, + vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, 0, nullptr, 0, nullptr, + num_barriers, barriers.data()); + }); + + // The Mali guide recommends flushing at the end of each major renderpass + // Testing has shown this has a significant effect on rendering performance + if (num_draws > MIN_DRAWS_TO_FLUSH && instance.ShouldFlush()) { + scheduler.Flush(); + num_draws = 0; + } +} + +vk::RenderPass RenderpassCache::GetRenderpass(VideoCore::PixelFormat color, + VideoCore::PixelFormat depth, bool is_clear) { + std::scoped_lock lock{cache_mutex}; + + const u32 color_index = + color == VideoCore::PixelFormat::Invalid ? MAX_COLOR_FORMATS : static_cast(color); + const u32 depth_index = depth == VideoCore::PixelFormat::Invalid + ? MAX_DEPTH_FORMATS + : (static_cast(depth) - 14); + + ASSERT_MSG(color_index <= MAX_COLOR_FORMATS && depth_index <= MAX_DEPTH_FORMATS, + "Invalid color index {} and/or depth_index {}", color_index, depth_index); + + vk::UniqueRenderPass& renderpass = cached_renderpasses[color_index][depth_index][is_clear]; + if (!renderpass) { + const vk::Format color_format = instance.GetTraits(color).native; + const vk::Format depth_format = instance.GetTraits(depth).native; + const vk::AttachmentLoadOp load_op = + is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad; + renderpass = CreateRenderPass(color_format, depth_format, load_op); + } + + return *renderpass; +} + +vk::UniqueRenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format depth, + vk::AttachmentLoadOp load_op) const { + u32 attachment_count = 0; + std::array attachments; + + bool use_color = false; + vk::AttachmentReference color_attachment_ref{}; + bool use_depth = false; + vk::AttachmentReference depth_attachment_ref{}; + + if (color != vk::Format::eUndefined) { + attachments[attachment_count] = vk::AttachmentDescription{ + .format = color, + .loadOp = load_op, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = vk::AttachmentLoadOp::eDontCare, + .stencilStoreOp = vk::AttachmentStoreOp::eDontCare, + .initialLayout = vk::ImageLayout::eGeneral, + .finalLayout = vk::ImageLayout::eGeneral, + }; + + color_attachment_ref = vk::AttachmentReference{ + .attachment = attachment_count++, + .layout = vk::ImageLayout::eGeneral, + }; + + use_color = true; + } + + if (depth != vk::Format::eUndefined) { + attachments[attachment_count] = vk::AttachmentDescription{ + .format = depth, + .loadOp = load_op, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = load_op, + .stencilStoreOp = vk::AttachmentStoreOp::eStore, + .initialLayout = vk::ImageLayout::eGeneral, + .finalLayout = vk::ImageLayout::eGeneral, + }; + + depth_attachment_ref = vk::AttachmentReference{ + .attachment = attachment_count++, + .layout = vk::ImageLayout::eGeneral, + }; + + use_depth = true; + } + + const vk::SubpassDescription subpass = { + .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = use_color ? 1u : 0u, + .pColorAttachments = &color_attachment_ref, + .pResolveAttachments = 0, + .pDepthStencilAttachment = use_depth ? &depth_attachment_ref : nullptr, + }; + + const vk::RenderPassCreateInfo renderpass_info = { + .attachmentCount = attachment_count, + .pAttachments = attachments.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }; + + return instance.GetDevice().createRenderPassUnique(renderpass_info); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h new file mode 100644 index 0000000000..64f9931a22 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -0,0 +1,74 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/math_util.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace VideoCore { +enum class PixelFormat : u32; +} + +namespace Vulkan { + +class Instance; +class Scheduler; +class Framebuffer; + +struct RenderPass { + vk::Framebuffer framebuffer; + vk::RenderPass render_pass; + vk::Rect2D render_area; + vk::ClearValue clear; + bool do_clear; + + bool operator==(const RenderPass& other) const noexcept { + return std::tie(framebuffer, render_pass, render_area, do_clear) == + std::tie(other.framebuffer, other.render_pass, other.render_area, + other.do_clear) && + std::memcmp(&clear, &other.clear, sizeof(vk::ClearValue)) == 0; + } +}; + +class RenderpassCache { + static constexpr size_t MAX_COLOR_FORMATS = 13; + static constexpr size_t MAX_DEPTH_FORMATS = 4; + +public: + explicit RenderpassCache(const Instance& instance, Scheduler& scheduler); + ~RenderpassCache(); + + /// Begins a new renderpass with the provided framebuffer as render target. + void BeginRendering(const Framebuffer* framebuffer, Common::Rectangle draw_rect); + + /// Begins a new renderpass with the provided render state. + void BeginRendering(const RenderPass& new_pass); + + /// Exits from any currently active renderpass instance + void EndRendering(); + + /// Returns the renderpass associated with the color-depth format pair + vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth, + bool is_clear); + +private: + /// Creates a renderpass configured appropriately and stores it in cached_renderpasses + vk::UniqueRenderPass CreateRenderPass(vk::Format color, vk::Format depth, + vk::AttachmentLoadOp load_op) const; + +private: + const Instance& instance; + Scheduler& scheduler; + vk::UniqueRenderPass cached_renderpasses[MAX_COLOR_FORMATS + 1][MAX_DEPTH_FORMATS + 1][2]; + std::mutex cache_mutex; + std::array images; + std::array aspects; + RenderPass pass{}; + u32 num_draws{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp new file mode 100644 index 0000000000..02a5e22b7f --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -0,0 +1,113 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" + +namespace Vulkan { + +ResourcePool::ResourcePool(MasterSemaphore* master_semaphore_, size_t grow_step_) + : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} + +std::size_t ResourcePool::CommitResource() { + // Refresh semaphore to query updated results + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); + const auto search = [this, gpu_tick](std::size_t begin, + std::size_t end) -> std::optional { + for (std::size_t iterator = begin; iterator < end; ++iterator) { + if (gpu_tick >= ticks[iterator]) { + ticks[iterator] = master_semaphore->CurrentTick(); + return iterator; + } + } + return std::nullopt; + }; + + // Try to find a free resource from the hinted position to the end. + std::optional found = search(hint_iterator, ticks.size()); + if (!found) { + // Search from beginning to the hinted position. + found = search(0, hint_iterator); + if (!found) { + // Both searches failed, the pool is full; handle it. + const std::size_t free_resource = ManageOverflow(); + + ticks[free_resource] = master_semaphore->CurrentTick(); + found = free_resource; + } + } + + // Free iterator is hinted to the resource after the one that's been commited. + hint_iterator = (*found + 1) % ticks.size(); + return *found; +} + +std::size_t ResourcePool::ManageOverflow() { + const std::size_t old_capacity = ticks.size(); + Grow(); + + // The last entry is guaranted to be free, since it's the first element of the freshly + // allocated resources. + return old_capacity; +} + +void ResourcePool::Grow() { + const size_t old_capacity = ticks.size(); + ticks.resize(old_capacity + grow_step); + Allocate(old_capacity, old_capacity + grow_step); +} + +constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4; + +struct CommandPool::Pool { + vk::CommandPool handle; + std::array cmdbufs; +}; + +CommandPool::CommandPool(const Instance& instance, MasterSemaphore* master_semaphore) + : ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {} + +CommandPool::~CommandPool() { + vk::Device device = instance.GetDevice(); + for (Pool& pool : pools) { + device.destroyCommandPool(pool.handle); + } +} + +void CommandPool::Allocate(std::size_t begin, std::size_t end) { + // Command buffers are going to be commited, recorded, executed every single usage cycle. + // They are also going to be reseted when commited. + Pool& pool = pools.emplace_back(); + + const vk::CommandPoolCreateInfo pool_create_info = { + .flags = vk::CommandPoolCreateFlagBits::eTransient | + vk::CommandPoolCreateFlagBits::eResetCommandBuffer, + .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(), + }; + + vk::Device device = instance.GetDevice(); + pool.handle = device.createCommandPool(pool_create_info); + + const vk::CommandBufferAllocateInfo buffer_alloc_info = { + .commandPool = pool.handle, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = COMMAND_BUFFER_POOL_SIZE, + }; + + auto buffers = device.allocateCommandBuffers(buffer_alloc_info); + std::copy(buffers.begin(), buffers.end(), pool.cmdbufs.begin()); +} + +vk::CommandBuffer CommandPool::Commit() { + const std::size_t index = CommitResource(); + const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; + const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; + return pools[pool_index].cmdbufs[sub_index]; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h new file mode 100644 index 0000000000..81fc549e7d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -0,0 +1,67 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class MasterSemaphore; + +/** + * Handles a pool of resources protected by fences. Manages resource overflow allocating more + * resources. + */ +class ResourcePool { +public: + explicit ResourcePool() = default; + explicit ResourcePool(MasterSemaphore* master_semaphore, std::size_t grow_step); + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; + +protected: + std::size_t CommitResource(); + + /// Called when a chunk of resources have to be allocated. + virtual void Allocate(std::size_t begin, std::size_t end) = 0; + +private: + /// Manages pool overflow allocating new resources. + std::size_t ManageOverflow(); + + /// Allocates a new page of resources. + void Grow(); + +protected: + MasterSemaphore* master_semaphore{nullptr}; + std::size_t grow_step = 0; ///< Number of new resources created after an overflow + std::size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found + std::vector ticks; ///< Ticks for each resource +}; + +class CommandPool final : public ResourcePool { +public: + explicit CommandPool(const Instance& instance, MasterSemaphore* master_semaphore); + ~CommandPool() override; + + void Allocate(std::size_t begin, std::size_t end) override; + + vk::CommandBuffer Commit(); + +private: + struct Pool; + const Instance& instance; + std::vector pools; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp new file mode 100644 index 0000000000..bc72f00c95 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -0,0 +1,202 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/microprofile.h" +#include "common/settings.h" +#include "common/thread.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); +MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255)); + +namespace Vulkan { + +namespace { + +std::unique_ptr MakeMasterSemaphore(const Instance& instance) { + if (instance.IsTimelineSemaphoreSupported()) { + return std::make_unique(instance); + } else { + return std::make_unique(instance); + } +} + +} // Anonymous namespace + +void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { + auto command = first; + while (command != nullptr) { + auto next = command->GetNext(); + command->Execute(cmdbuf); + command->~Command(); + command = next; + } + submit = false; + command_offset = 0; + first = nullptr; + last = nullptr; +} + +Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache) + : renderpass_cache{renderpass_cache}, master_semaphore{MakeMasterSemaphore(instance)}, + command_pool{instance, master_semaphore.get()}, use_worker_thread{true} { + AllocateWorkerCommandBuffers(); + if (use_worker_thread) { + AcquireNewChunk(); + worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); }); + } +} + +Scheduler::~Scheduler() = default; + +void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) { + // When flushing, we only send data to the worker thread; no waiting is necessary. + SubmitExecution(signal, wait); +} + +void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) { + // When finishing, we need to wait for the submission to have executed on the device. + const u64 presubmit_tick = CurrentTick(); + SubmitExecution(signal, wait); + Wait(presubmit_tick); +} + +void Scheduler::WaitWorker() { + if (!use_worker_thread) { + return; + } + + MICROPROFILE_SCOPE(Vulkan_WaitForWorker); + DispatchWork(); + + // Ensure the queue is drained. + { + std::unique_lock ql{queue_mutex}; + event_cv.wait(ql, [this] { return work_queue.empty(); }); + } + + // Now wait for execution to finish. + // This needs to be done in the same order as WorkerThread. + std::scoped_lock el{execution_mutex}; +} + +void Scheduler::Wait(u64 tick) { + if (tick >= master_semaphore->CurrentTick()) { + // Make sure we are not waiting for the current tick without signalling + Flush(); + } + master_semaphore->Wait(tick); +} + +void Scheduler::DispatchWork() { + if (!use_worker_thread || chunk->Empty()) { + return; + } + + { + std::scoped_lock ql{queue_mutex}; + work_queue.push(std::move(chunk)); + } + + event_cv.notify_all(); + AcquireNewChunk(); +} + +void Scheduler::WorkerThread(std::stop_token stop_token) { + Common::SetCurrentThreadName("VulkanWorker"); + + const auto TryPopQueue{[this](auto& work) -> bool { + if (work_queue.empty()) { + return false; + } + + work = std::move(work_queue.front()); + work_queue.pop(); + event_cv.notify_all(); + return true; + }}; + + while (!stop_token.stop_requested()) { + std::unique_ptr work; + + { + std::unique_lock lk{queue_mutex}; + + // Wait for work. + Common::CondvarWait(event_cv, lk, stop_token, [&] { return TryPopQueue(work); }); + + // If we've been asked to stop, we're done. + if (stop_token.stop_requested()) { + return; + } + + // Exchange lock ownership so that we take the execution lock before + // the queue lock goes out of scope. This allows us to force execution + // to complete in the next step. + std::exchange(lk, std::unique_lock{execution_mutex}); + + // Perform the work, tracking whether the chunk was a submission + // before executing. + const bool has_submit = work->HasSubmit(); + work->ExecuteAll(current_cmdbuf); + + // If the chunk was a submission, reallocate the command buffer. + if (has_submit) { + AllocateWorkerCommandBuffers(); + } + } + + { + std::scoped_lock rl{reserve_mutex}; + + // Recycle the chunk back to the reserve. + chunk_reserve.emplace_back(std::move(work)); + } + } +} + +void Scheduler::AllocateWorkerCommandBuffers() { + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, + }; + + current_cmdbuf = command_pool.Commit(); + current_cmdbuf.begin(begin_info); +} + +void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { + state = StateFlags::AllDirty; + const u64 signal_value = master_semaphore->NextTick(); + + renderpass_cache.EndRendering(); + Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + MICROPROFILE_SCOPE(Vulkan_Submit); + std::scoped_lock lock{submit_mutex}; + master_semaphore->SubmitWork(cmdbuf, wait_semaphore, signal_semaphore, signal_value); + }); + + if (!use_worker_thread) { + AllocateWorkerCommandBuffers(); + } else { + chunk->MarkSubmit(); + DispatchWork(); + } +} + +void Scheduler::AcquireNewChunk() { + std::scoped_lock lock{reserve_mutex}; + if (chunk_reserve.empty()) { + chunk = std::make_unique(); + return; + } + + chunk = std::move(chunk_reserve.back()); + chunk_reserve.pop_back(); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h new file mode 100644 index 0000000000..faffd22e8b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -0,0 +1,210 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/alignment.h" +#include "common/common_funcs.h" +#include "common/logging/log.h" +#include "common/polyfill_thread.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" + +namespace Vulkan { + +enum class StateFlags { + AllDirty = 0, + Renderpass = 1 << 0, + Pipeline = 1 << 1, + DescriptorSets = 1 << 2 +}; + +DECLARE_ENUM_FLAG_OPERATORS(StateFlags) + +class Instance; +class RenderpassCache; + +/// The scheduler abstracts command buffer and fence management with an interface that's able to do +/// OpenGL-like operations on Vulkan command buffers. +class Scheduler { +public: + explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache); + ~Scheduler(); + + /// Sends the current execution context to the GPU. + void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); + + /// Sends the current execution context to the GPU and waits for it to complete. + void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); + + /// Waits for the worker thread to finish executing everything. After this function returns it's + /// safe to touch worker resources. + void WaitWorker(); + + /// Waits for the given tick to trigger on the GPU. + void Wait(u64 tick); + + /// Sends currently recorded work to the worker thread. + void DispatchWork(); + + /// Records the command to the current chunk. + template + void Record(T&& command) { + if (!use_worker_thread) { + command(current_cmdbuf); + return; + } + + if (chunk->Record(command)) { + return; + } + DispatchWork(); + (void)chunk->Record(command); + } + + /// Marks the provided state as non dirty + void MarkStateNonDirty(StateFlags flag) noexcept { + state |= flag; + } + + /// Marks the provided state as dirty + void MakeDirty(StateFlags flag) noexcept { + state &= ~flag; + } + + /// Returns true if the state is dirty + [[nodiscard]] bool IsStateDirty(StateFlags flag) const noexcept { + return False(state & flag); + } + + /// Returns the current command buffer tick. + [[nodiscard]] u64 CurrentTick() const noexcept { + return master_semaphore->CurrentTick(); + } + + /// Returns true when a tick has been triggered by the GPU. + [[nodiscard]] bool IsFree(u64 tick) const noexcept { + return master_semaphore->IsFree(tick); + } + + /// Returns the master timeline semaphore. + [[nodiscard]] MasterSemaphore* GetMasterSemaphore() noexcept { + return master_semaphore.get(); + } + + std::mutex submit_mutex; + +private: + class Command { + public: + virtual ~Command() = default; + + virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; + + Command* GetNext() const { + return next; + } + + void SetNext(Command* next_) { + next = next_; + } + + private: + Command* next = nullptr; + }; + + template + class TypedCommand final : public Command { + public: + explicit TypedCommand(T&& command_) : command{std::move(command_)} {} + ~TypedCommand() override = default; + + TypedCommand(TypedCommand&&) = delete; + TypedCommand& operator=(TypedCommand&&) = delete; + + void Execute(vk::CommandBuffer cmdbuf) const override { + command(cmdbuf); + } + + private: + T command; + }; + + class CommandChunk final { + public: + void ExecuteAll(vk::CommandBuffer cmdbuf); + + template + bool Record(T& command) { + using FuncType = TypedCommand; + static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large"); + + recorded_counts++; + command_offset = Common::AlignUp(command_offset, alignof(FuncType)); + if (command_offset > sizeof(data) - sizeof(FuncType)) { + return false; + } + Command* const current_last = last; + last = new (data.data() + command_offset) FuncType(std::move(command)); + + if (current_last) { + current_last->SetNext(last); + } else { + first = last; + } + command_offset += sizeof(FuncType); + return true; + } + + void MarkSubmit() { + submit = true; + } + + bool Empty() const { + return recorded_counts == 0; + } + + bool HasSubmit() const { + return submit; + } + + private: + Command* first = nullptr; + Command* last = nullptr; + + std::size_t recorded_counts = 0; + std::size_t command_offset = 0; + bool submit = false; + alignas(std::max_align_t) std::array data{}; + }; + +private: + void WorkerThread(std::stop_token stop_token); + + void AllocateWorkerCommandBuffers(); + + void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore); + + void AcquireNewChunk(); + +private: + RenderpassCache& renderpass_cache; + std::unique_ptr master_semaphore; + CommandPool command_pool; + std::unique_ptr chunk; + std::queue> work_queue; + std::vector> chunk_reserve; + vk::CommandBuffer current_cmdbuf; + StateFlags state{}; + std::mutex execution_mutex; + std::mutex reserve_mutex; + std::mutex queue_mutex; + std::condition_variable_any event_cv; + std::jthread worker_thread; + bool use_worker_thread; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 049366b59d..37aac4dd9f 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -46,7 +46,7 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool use_clip_p if (is_output) { // gl_PerVertex redeclaration is required for separate shader object out += "out gl_PerVertex {\n"; - out += " vec4 gl_Position;\n"; + out += " invariant vec4 gl_Position;\n"; if (use_clip_planes) { out += " float gl_ClipDistance[2];\n"; } @@ -236,6 +236,10 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) { state.shadow_rendering.Assign(regs.framebuffer.output_merger.fragment_operation_mode == FramebufferRegs::FragmentOperationMode::Shadow); state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0); + + // We only need fragment shader interlock when shadow rendering. + state.use_fragment_shader_interlock.Assign(state.shadow_rendering && + instance.IsFragmentShaderInterlockSupported()); } void PicaShaderConfigCommon::Init(const Pica::RasterizerRegs& rasterizer, @@ -1194,8 +1198,31 @@ float ProcTexNoiseCoef(vec2 x) { std::string GenerateFragmentShader(const PicaFSConfig& config) { const auto& state = config.state; - std::string out = "#version 450 core\n" - "#extension GL_ARB_separate_shader_objects : enable\n\n"; + std::string out = R"( +#version 450 core +#extension GL_ARB_separate_shader_objects : enable +)"; + + if (state.use_fragment_shader_interlock) { + out += R"( +#if defined(GL_ARB_fragment_shader_interlock) +#extension GL_ARB_fragment_shader_interlock : enable +#define beginInvocationInterlock beginInvocationInterlockARB +#define endInvocationInterlock endInvocationInterlockARB +#elif defined(GL_NV_fragment_shader_interlock) +#extension GL_NV_fragment_shader_interlock : enable +#define beginInvocationInterlock beginInvocationInterlockNV +#define endInvocationInterlock endInvocationInterlockNV +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define beginInvocationInterlock beginFragmentShaderOrderingINTEL +#define endInvocationInterlock +#endif + +layout(pixel_interlock_ordered) in; +)"; + } + out += GetVertexInterfaceDeclaration(false); out += R"( @@ -1280,6 +1307,19 @@ uint EncodeShadow(uvec2 pixel) { return (pixel.x << 8) | pixel.y; } +uint UpdateShadow(uint pixel, uint d, uint s) { + uvec2 ref = DecodeShadow(pixel); + if (d < ref.x) { + if (s == 0u) { + ref.x = d; + } else { + s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); + ref.y = min(s, ref.y); + } + } + return EncodeShadow(ref); +} + float CompareShadow(uint pixel, uint z) { uvec2 p = DecodeShadow(pixel); return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); @@ -1511,10 +1551,11 @@ vec4 secondary_fragment_color = vec4(0.0); "gl_FragCoord.y < float(scissor_y2))) discard;\n"; } - // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use - // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then - // do our own transformation according to PICA specification. - out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n" + // The PICA depth range is [-1, 0] while in Vulkan that range is [0, 1]. + // Thus in the vertex shader we flip the sign of the z component to place + // it in the correct range. Here we undo the transformation to get the original z_over_w, + // then do our own transformation according to PICA specification. + out += "float z_over_w = -gl_FragCoord.z;\n" "float depth = z_over_w * depth_scale + depth_offset;\n"; if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) { out += "depth /= gl_FragCoord.w;\n"; @@ -1577,26 +1618,26 @@ vec4 secondary_fragment_color = vec4(0.0); uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF)); uint s = uint(last_tex_env_out.g * float(0xFF)); ivec2 image_coord = ivec2(gl_FragCoord.xy); - +)"; + if (state.use_fragment_shader_interlock) { + out += R"( +beginInvocationInterlock(); +uint old_shadow = imageLoad(shadow_buffer, image_coord).x; +uint new_shadow = UpdateShadow(old_shadow, d, s); +imageStore(shadow_buffer, image_coord, uvec4(new_shadow)); +endInvocationInterlock(); +)"; + } else { + out += R"( uint old = imageLoad(shadow_buffer, image_coord).x; uint new1; uint old2; do { old2 = old; - - uvec2 ref = DecodeShadow(old); - if (d < ref.x) { - if (s == 0u) { - ref.x = d; - } else { - s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); - ref.y = min(s, ref.y); - } - } - new1 = EncodeShadow(ref); - + new1 = UpdateShadow(old, d, s); } while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new1)) != old2); )"; + } } else { out += "gl_FragDepth = depth;\n"; // Round the final fragment color to maintain the PICA's 8 bits of precision @@ -1652,6 +1693,7 @@ std::string GenerateTrivialVertexShader(bool use_clip_planes) { out += UniformBlockDef; out += R"( +const float EPSILON_Z = 0.00000001f; void main() { primary_color = vert_color; @@ -1661,14 +1703,17 @@ void main() { texcoord0_w = vert_texcoord0_w; normquat = vert_normquat; view = vert_view; - gl_Position = vert_position; - gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0; + vec4 vtx_pos = vert_position; + if (abs(vtx_pos.z) < EPSILON_Z) { + vtx_pos.z = 0.f; + } + gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w); )"; if (use_clip_planes) { out += R"( - gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 + gl_ClipDistance[0] = -vtx_pos.z; // fixed PICA clipping plane z <= 0 if (enable_clip1) { - gl_ClipDistance[1] = dot(clip_coef, vert_position); + gl_ClipDistance[1] = dot(clip_coef, vtx_pos); } else { gl_ClipDistance[1] = 0; } @@ -1768,6 +1813,7 @@ layout (set = 0, binding = 0, std140) uniform vs_config { return "0.0"; }; + out += "const float EPSILON_Z = 0.00000001f;\n\n"; out += "vec4 GetVertexQuaternion() {\n"; out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " + semantic(VSOutputAttributes::QUATERNION_Y) + ", " + @@ -1780,8 +1826,10 @@ layout (set = 0, binding = 0, std140) uniform vs_config { semantic(VSOutputAttributes::POSITION_Y) + ", " + semantic(VSOutputAttributes::POSITION_Z) + ", " + semantic(VSOutputAttributes::POSITION_W) + ");\n"; - out += " gl_Position = vtx_pos;\n"; - out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n"; + out += " if (abs(vtx_pos.z) < EPSILON_Z) {\n"; + out += " vtx_pos.z = 0.f;\n"; + out += " }\n"; + out += " gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w);\n"; if (config.use_clip_planes) { out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 out += " if (enable_clip1) {\n"; @@ -1855,6 +1903,7 @@ struct Vertex { return "0.0"; }; + out += "const float EPSILON_Z = 0.00000001f;\n\n"; out += "vec4 GetVertexQuaternion(Vertex vtx) {\n"; out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " + semantic(VSOutputAttributes::QUATERNION_Y) + ", " + @@ -1867,8 +1916,10 @@ struct Vertex { semantic(VSOutputAttributes::POSITION_Y) + ", " + semantic(VSOutputAttributes::POSITION_Z) + ", " + semantic(VSOutputAttributes::POSITION_W) + ");\n"; - out += " gl_Position = vtx_pos;\n"; - out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n"; + out += " if (abs(vtx_pos.z) < EPSILON_Z) {\n"; + out += " vtx_pos.z = 0.f;\n"; + out += " }\n"; + out += " gl_Position = vec4(vtx_pos.x, vtx_pos.y, -vtx_pos.z, vtx_pos.w);\n"; if (use_clip_planes) { out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 out += " if (enable_clip1) {\n"; diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index 52fa832ff6..7ccc01969b 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -55,6 +55,7 @@ struct PicaFSConfigState { BitField<23, 4, Pica::FramebufferRegs::LogicOp> logic_op; BitField<27, 1, u32> shadow_rendering; BitField<28, 1, u32> shadow_texture_orthographic; + BitField<29, 1, u32> use_fragment_shader_interlock; }; union { diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp index 3320219448..f1efcf86f3 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp @@ -53,7 +53,7 @@ void FragmentModule::Generate() { combiner_buffer = ConstF32(0.f, 0.f, 0.f, 0.f); next_combiner_buffer = GetShaderDataMember(vec_ids.Get(4), ConstS32(27)); - last_tex_env_out = ConstF32(0.f, 0.f, 0.f, 0.f); + last_tex_env_out = rounded_primary_color; // Write shader bytecode to emulate PICA TEV stages for (std::size_t index = 0; index < config.state.tev_stages.size(); ++index) { @@ -115,7 +115,7 @@ void FragmentModule::WriteDepth() { const Id input_pointer_id{TypePointer(spv::StorageClass::Input, f32_id)}; const Id gl_frag_coord_z{ OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(2u)))}; - const Id z_over_w{OpFma(f32_id, ConstF32(2.f), gl_frag_coord_z, ConstF32(-1.f))}; + const Id z_over_w{OpFNegate(f32_id, gl_frag_coord_z)}; const Id depth_scale{GetShaderDataMember(f32_id, ConstS32(2))}; const Id depth_offset{GetShaderDataMember(f32_id, ConstS32(3))}; depth = OpFma(f32_id, z_over_w, depth_scale, depth_offset); diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 2d84a76b95..ba5c5f8670 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -160,7 +160,7 @@ bool InitializeCompiler() { vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device) { if (!InitializeCompiler()) { - return VK_NULL_HANDLE; + return {}; } EProfile profile = ECoreProfile; @@ -182,7 +182,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v includer)) [[unlikely]] { LOG_INFO(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog()); - return VK_NULL_HANDLE; + return {}; } // Even though there's only a single shader, we still need to link it to generate SPV @@ -191,7 +191,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v if (!program->link(messages)) { LOG_INFO(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog()); - return VK_NULL_HANDLE; + return {}; } glslang::TIntermediate* intermediate = program->getIntermediate(lang); @@ -227,7 +227,7 @@ vk::ShaderModule CompileSPV(std::span code, vk::Device device) { UNREACHABLE_MSG("{}", err.what()); } - return VK_NULL_HANDLE; + return {}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp new file mode 100644 index 0000000000..27b1314427 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -0,0 +1,209 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/alignment.h" +#include "common/assert.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace Vulkan { + +namespace { + +std::string_view BufferTypeName(BufferType type) { + switch (type) { + case BufferType::Upload: + return "Upload"; + case BufferType::Download: + return "Download"; + case BufferType::Stream: + return "Stream"; + default: + return "Invalid"; + } +} + +vk::MemoryPropertyFlags MakePropertyFlags(BufferType type) { + switch (type) { + case BufferType::Upload: + return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent; + case BufferType::Download: + return vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached; + case BufferType::Stream: + return vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent; + default: + UNREACHABLE_MSG("Unknown buffer type {}", type); + return vk::MemoryPropertyFlagBits::eHostVisible; + } +} + +/// Find a memory type with the passed requirements +std::optional FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, + vk::MemoryPropertyFlags wanted) { + for (u32 i = 0; i < properties.memoryTypeCount; ++i) { + const auto flags = properties.memoryTypes[i].propertyFlags; + if ((flags & wanted) == wanted) { + return i; + } + } + return std::nullopt; +} + +/// Get the preferred host visible memory type. +u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, BufferType type) { + vk::MemoryPropertyFlags flags = MakePropertyFlags(type); + std::optional preferred_type = FindMemoryType(properties, flags); + + constexpr std::array remove_flags = { + vk::MemoryPropertyFlagBits::eHostCached, + vk::MemoryPropertyFlagBits::eHostCoherent, + }; + + for (u32 i = 0; i < remove_flags.size() && !preferred_type; i++) { + flags &= ~remove_flags[i]; + preferred_type = FindMemoryType(properties, flags); + } + ASSERT_MSG(preferred_type, "No suitable memory type found"); + return preferred_type.value(); +} + +constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; +constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; + +} // Anonymous namespace + +StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_, + vk::BufferUsageFlags usage_, u64 size, BufferType type_) + : instance{instance_}, scheduler{scheduler_}, device{instance.GetDevice()}, + stream_buffer_size{size}, usage{usage_}, type{type_} { + CreateBuffers(size); + ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); + ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); +} + +StreamBuffer::~StreamBuffer() { + device.unmapMemory(memory); + device.destroyBuffer(buffer); + device.freeMemory(memory); +} + +std::tuple StreamBuffer::Map(u64 size, u64 alignment) { + if (!is_coherent && type == BufferType::Stream) { + size = Common::AlignUp(size, instance.NonCoherentAtomSize()); + } + + ASSERT(size <= stream_buffer_size); + mapped_size = size; + + if (alignment > 0) { + offset = Common::AlignUp(offset, alignment); + } + + bool invalidate{false}; + if (offset + size > stream_buffer_size) { + // The buffer would overflow, save the amount of used watches and reset the state. + invalidate = true; + invalidation_mark = current_watch_cursor; + current_watch_cursor = 0; + offset = 0; + + // Swap watches and reset waiting cursors. + std::swap(previous_watches, current_watches); + wait_cursor = 0; + wait_bound = 0; + } + + const u64 mapped_upper_bound = offset + size; + WaitPendingOperations(mapped_upper_bound); + + return std::make_tuple(mapped + offset, offset, invalidate); +} + +void StreamBuffer::Commit(u64 size) { + if (!is_coherent && type == BufferType::Stream) { + size = Common::AlignUp(size, instance.NonCoherentAtomSize()); + } + + ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size, + size); + + const vk::MappedMemoryRange range = { + .memory = memory, + .offset = offset, + .size = size, + }; + + if (!is_coherent && type == BufferType::Download) { + device.invalidateMappedMemoryRanges(range); + } else if (!is_coherent) { + device.flushMappedMemoryRanges(range); + } + + offset += size; + + if (current_watch_cursor + 1 >= current_watches.size()) { + // Ensure that there are enough watches. + ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); + } + auto& watch = current_watches[current_watch_cursor++]; + watch.upper_bound = offset; + watch.tick = scheduler.CurrentTick(); +} + +void StreamBuffer::CreateBuffers(u64 prefered_size) { + const vk::Device device = instance.GetDevice(); + const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties(); + const u32 preferred_type = GetMemoryType(memory_properties, type); + const vk::MemoryType mem_type = memory_properties.memoryTypes[preferred_type]; + const u32 preferred_heap = mem_type.heapIndex; + is_coherent = + static_cast(mem_type.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent); + + // Substract from the preferred heap size some bytes to avoid getting out of memory. + const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; + // As per DXVK's example, using `heap_size / 2` + const VkDeviceSize allocable_size = heap_size / 2; + buffer = device.createBuffer({ + .size = std::min(prefered_size, allocable_size), + .usage = usage, + }); + + const auto requirements = device.getBufferMemoryRequirements(buffer); + stream_buffer_size = static_cast(requirements.size); + + LOG_INFO(Render_Vulkan, "Creating {} buffer with size {} KB with flags {}", + BufferTypeName(type), stream_buffer_size / 1024, + vk::to_string(mem_type.propertyFlags)); + + memory = device.allocateMemory({ + .allocationSize = requirements.size, + .memoryTypeIndex = preferred_type, + }); + + device.bindBufferMemory(buffer, memory, 0); + mapped = reinterpret_cast(device.mapMemory(memory, 0, VK_WHOLE_SIZE)); +} + +void StreamBuffer::ReserveWatches(std::vector& watches, std::size_t grow_size) { + watches.resize(watches.size() + grow_size); +} + +void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { + if (!invalidation_mark) { + return; + } + while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { + auto& watch = previous_watches[wait_cursor]; + wait_bound = watch.upper_bound; + scheduler.Wait(watch.tick); + ++wait_cursor; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h new file mode 100644 index 0000000000..2b14c78a72 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -0,0 +1,86 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +enum class BufferType : u32 { + Upload = 0, + Download = 1, + Stream = 2, +}; + +class Instance; +class Scheduler; + +class StreamBuffer final { + static constexpr std::size_t MAX_BUFFER_VIEWS = 3; + +public: + explicit StreamBuffer(const Instance& instance, Scheduler& scheduler, + vk::BufferUsageFlags usage, u64 size, + BufferType type = BufferType::Stream); + ~StreamBuffer(); + + /** + * Reserves a region of memory from the stream buffer. + * @param size Size to reserve. + * @returns A pair of a raw memory pointer (with offset added), and the buffer offset + */ + std::tuple Map(u64 size, u64 alignment); + + /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. + void Commit(u64 size); + + vk::Buffer Handle() const noexcept { + return buffer; + } + +private: + struct Watch { + u64 tick{}; + u64 upper_bound{}; + }; + + /// Creates Vulkan buffer handles committing the required the required memory. + void CreateBuffers(u64 prefered_size); + + /// Increases the amount of watches available. + void ReserveWatches(std::vector& watches, std::size_t grow_size); + + void WaitPendingOperations(u64 requested_upper_bound); + +private: + const Instance& instance; ///< Vulkan instance. + Scheduler& scheduler; ///< Command scheduler. + + vk::Device device; + vk::Buffer buffer; ///< Mapped buffer. + vk::DeviceMemory memory; ///< Memory allocation. + u8* mapped{}; ///< Pointer to the mapped memory + u64 stream_buffer_size{}; ///< Stream buffer size. + vk::BufferUsageFlags usage{}; + BufferType type; + + u64 offset{}; ///< Buffer iterator. + u64 mapped_size{}; ///< Size reserved for the current copy. + bool is_coherent{}; ///< True if the buffer is coherent + + std::vector current_watches; ///< Watches recorded in the current iteration. + std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation. + std::optional invalidation_mark; ///< Number of watches used in the previous cycle. + + std::vector previous_watches; ///< Watches used in the previous iteration. + std::size_t wait_cursor{}; ///< Last watch being waited for completion. + u64 wait_bound{}; ///< Highest offset being watched for completion. +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp new file mode 100644 index 0000000000..260839c8fb --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -0,0 +1,259 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "common/settings.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +MICROPROFILE_DEFINE(Vulkan_Acquire, "Vulkan", "Swapchain Acquire", MP_RGB(185, 66, 245)); +MICROPROFILE_DEFINE(Vulkan_Present, "Vulkan", "Swapchain Present", MP_RGB(66, 185, 245)); + +namespace Vulkan { + +Swapchain::Swapchain(const Instance& instance_, u32 width, u32 height, vk::SurfaceKHR surface_) + : instance{instance_}, surface{surface_} { + FindPresentFormat(); + SetPresentMode(); + Create(width, height, surface); +} + +Swapchain::~Swapchain() { + Destroy(); + instance.GetInstance().destroySurfaceKHR(surface); +} + +void Swapchain::Create(u32 width_, u32 height_, vk::SurfaceKHR surface_) { + width = width_; + height = height_; + surface = surface_; + needs_recreation = false; + + Destroy(); + + SetPresentMode(); + SetSurfaceProperties(); + + const std::array queue_family_indices = { + instance.GetGraphicsQueueFamilyIndex(), + instance.GetPresentQueueFamilyIndex(), + }; + + const bool exclusive = queue_family_indices[0] == queue_family_indices[1]; + const u32 queue_family_indices_count = exclusive ? 1u : 2u; + const vk::SharingMode sharing_mode = + exclusive ? vk::SharingMode::eExclusive : vk::SharingMode::eConcurrent; + const vk::SwapchainCreateInfoKHR swapchain_info = { + .surface = surface, + .minImageCount = image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageExtent = extent, + .imageArrayLayers = 1, + .imageUsage = vk::ImageUsageFlagBits::eColorAttachment | + vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, + .imageSharingMode = sharing_mode, + .queueFamilyIndexCount = queue_family_indices_count, + .pQueueFamilyIndices = queue_family_indices.data(), + .preTransform = transform, + .compositeAlpha = composite_alpha, + .presentMode = present_mode, + .clipped = true, + .oldSwapchain = nullptr, + }; + + try { + swapchain = instance.GetDevice().createSwapchainKHR(swapchain_info); + } catch (vk::SystemError& err) { + LOG_CRITICAL(Render_Vulkan, "{}", err.what()); + UNREACHABLE(); + } + + SetupImages(); + RefreshSemaphores(); +} + +bool Swapchain::AcquireNextImage() { + MICROPROFILE_SCOPE(Vulkan_Acquire); + vk::Device device = instance.GetDevice(); + vk::Result result = + device.acquireNextImageKHR(swapchain, std::numeric_limits::max(), + image_acquired[frame_index], VK_NULL_HANDLE, &image_index); + + switch (result) { + case vk::Result::eSuccess: + break; + case vk::Result::eSuboptimalKHR: + case vk::Result::eErrorSurfaceLostKHR: + case vk::Result::eErrorOutOfDateKHR: + needs_recreation = true; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Swapchain acquire returned unknown result {}", result); + UNREACHABLE(); + break; + } + + return !needs_recreation; +} + +void Swapchain::Present() { + if (needs_recreation) { + return; + } + + const vk::PresentInfoKHR present_info = { + .waitSemaphoreCount = 1, + .pWaitSemaphores = &present_ready[image_index], + .swapchainCount = 1, + .pSwapchains = &swapchain, + .pImageIndices = &image_index, + }; + + MICROPROFILE_SCOPE(Vulkan_Present); + try { + [[maybe_unused]] vk::Result result = instance.GetPresentQueue().presentKHR(present_info); + } catch (vk::OutOfDateKHRError&) { + needs_recreation = true; + } catch (const vk::SystemError& err) { + LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed {}", err.what()); + UNREACHABLE(); + } + + frame_index = (frame_index + 1) % image_count; +} + +void Swapchain::FindPresentFormat() { + const auto formats = instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface); + + // If there is a single undefined surface format, the device doesn't care, so we'll just use + // RGBA. + if (formats[0].format == vk::Format::eUndefined) { + surface_format.format = vk::Format::eR8G8B8A8Unorm; + surface_format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear; + return; + } + + // Try to find a suitable format. + for (const vk::SurfaceFormatKHR& sformat : formats) { + vk::Format format = sformat.format; + if (format != vk::Format::eR8G8B8A8Unorm && format != vk::Format::eB8G8R8A8Unorm) { + continue; + } + + surface_format.format = format; + surface_format.colorSpace = sformat.colorSpace; + return; + } + + LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!"); + UNREACHABLE(); +} + +void Swapchain::SetPresentMode() { + const auto modes = instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface); + const bool use_vsync = Settings::values.use_vsync_new.GetValue(); + const auto find_mode = [&modes](vk::PresentModeKHR requested) { + const auto it = + std::find_if(modes.begin(), modes.end(), + [&requested](vk::PresentModeKHR mode) { return mode == requested; }); + + return it != modes.end(); + }; + + present_mode = vk::PresentModeKHR::eFifo; + const bool has_immediate = find_mode(vk::PresentModeKHR::eImmediate); + const bool has_mailbox = find_mode(vk::PresentModeKHR::eMailbox); + if (!has_immediate && !has_mailbox) { + LOG_WARNING(Render_Vulkan, "Forcing Fifo present mode as no alternatives are available"); + return; + } + + // If the user has disabled vsync use immediate mode for the least latency. + // This may have screen tearing. + if (!use_vsync) { + present_mode = + has_immediate ? vk::PresentModeKHR::eImmediate : vk::PresentModeKHR::eMailbox; + return; + } + // If vsync is enabled attempt to use mailbox mode in case the user wants to speedup/slowdown + // the game. If mailbox is not available use immediate and warn about it. + if (use_vsync && Settings::values.frame_limit.GetValue() > 100) { + present_mode = has_mailbox ? vk::PresentModeKHR::eMailbox : vk::PresentModeKHR::eImmediate; + if (!has_mailbox) { + LOG_WARNING( + Render_Vulkan, + "Vsync enabled while frame limiting and no mailbox support, expect tearing"); + } + return; + } +} + +void Swapchain::SetSurfaceProperties() { + const vk::SurfaceCapabilitiesKHR capabilities = + instance.GetPhysicalDevice().getSurfaceCapabilitiesKHR(surface); + + extent = capabilities.currentExtent; + if (capabilities.currentExtent.width == std::numeric_limits::max()) { + extent.width = std::max(capabilities.minImageExtent.width, + std::min(capabilities.maxImageExtent.width, width)); + extent.height = std::max(capabilities.minImageExtent.height, + std::min(capabilities.maxImageExtent.height, height)); + } + + // Select number of images in swap chain, we prefer one buffer in the background to work on + image_count = capabilities.minImageCount + 1; + if (capabilities.maxImageCount > 0) { + image_count = std::min(image_count, capabilities.maxImageCount); + } + + // Prefer identity transform if possible + transform = vk::SurfaceTransformFlagBitsKHR::eIdentity; + if (!(capabilities.supportedTransforms & transform)) { + transform = capabilities.currentTransform; + } + + // Opaque is not supported everywhere. + composite_alpha = vk::CompositeAlphaFlagBitsKHR::eOpaque; + if (!(capabilities.supportedCompositeAlpha & vk::CompositeAlphaFlagBitsKHR::eOpaque)) { + composite_alpha = vk::CompositeAlphaFlagBitsKHR::eInherit; + } +} + +void Swapchain::Destroy() { + vk::Device device = instance.GetDevice(); + if (swapchain) { + device.destroySwapchainKHR(swapchain); + } + for (u32 i = 0; i < image_count; i++) { + device.destroySemaphore(image_acquired[i]); + device.destroySemaphore(present_ready[i]); + } + image_acquired.clear(); + present_ready.clear(); +} + +void Swapchain::RefreshSemaphores() { + const vk::Device device = instance.GetDevice(); + image_acquired.resize(image_count); + present_ready.resize(image_count); + + for (vk::Semaphore& semaphore : image_acquired) { + semaphore = device.createSemaphore({}); + } + for (vk::Semaphore& semaphore : present_ready) { + semaphore = device.createSemaphore({}); + } +} + +void Swapchain::SetupImages() { + vk::Device device = instance.GetDevice(); + images = device.getSwapchainImagesKHR(swapchain); + image_count = static_cast(images.size()); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h new file mode 100644 index 0000000000..c3f6c17d0b --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -0,0 +1,110 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class Scheduler; + +class Swapchain { +public: + explicit Swapchain(const Instance& instance, u32 width, u32 height, vk::SurfaceKHR surface); + ~Swapchain(); + + /// Creates (or recreates) the swapchain with a given size. + void Create(u32 width, u32 height, vk::SurfaceKHR surface); + + /// Acquires the next image in the swapchain. + bool AcquireNextImage(); + + /// Presents the current image and move to the next one + void Present(); + + vk::SurfaceKHR GetSurface() const { + return surface; + } + + vk::Image Image() const { + return images[image_index]; + } + + vk::SurfaceFormatKHR GetSurfaceFormat() const { + return surface_format; + } + + vk::SwapchainKHR GetHandle() const { + return swapchain; + } + + u32 GetWidth() const { + return width; + } + + u32 GetHeight() const { + return height; + } + + u32 GetImageCount() const { + return image_count; + } + + vk::Extent2D GetExtent() const { + return extent; + } + + [[nodiscard]] vk::Semaphore GetImageAcquiredSemaphore() const { + return image_acquired[frame_index]; + } + + [[nodiscard]] vk::Semaphore GetPresentReadySemaphore() const { + return present_ready[image_index]; + } + +private: + /// Selects the best available swapchain image format + void FindPresentFormat(); + + /// Sets the best available present mode + void SetPresentMode(); + + /// Sets the surface properties according to device capabilities + void SetSurfaceProperties(); + + /// Destroys current swapchain resources + void Destroy(); + + /// Performs creation of image views and framebuffers from the swapchain images + void SetupImages(); + + /// Creates the image acquired and present ready semaphores + void RefreshSemaphores(); + +private: + const Instance& instance; + vk::SwapchainKHR swapchain{}; + vk::SurfaceKHR surface{}; + vk::SurfaceFormatKHR surface_format; + vk::PresentModeKHR present_mode; + vk::Extent2D extent; + vk::SurfaceTransformFlagBitsKHR transform; + vk::CompositeAlphaFlagBitsKHR composite_alpha; + std::vector images; + std::vector image_acquired; + std::vector present_ready; + u32 width = 0; + u32 height = 0; + u32 image_count = 0; + u32 image_index = 0; + u32 frame_index = 0; + bool needs_recreation = true; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp new file mode 100644 index 0000000000..4dc222c1e2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -0,0 +1,1581 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/microprofile.h" +#include "common/scope_exit.h" +#include "video_core/custom_textures/material.h" +#include "video_core/rasterizer_cache/texture_codec.h" +#include "video_core/rasterizer_cache/utils.h" +#include "video_core/renderer_vulkan/pica_to_vk.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" + +#include +#include + +// Ignore the -Wclass-memaccess warning on memcpy for non-trivially default constructible objects. +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#endif + +MICROPROFILE_DEFINE(Vulkan_ImageAlloc, "Vulkan", "Texture Allocation", MP_RGB(192, 52, 235)); + +namespace Vulkan { + +namespace { + +using VideoCore::MapType; +using VideoCore::PixelFormat; +using VideoCore::SurfaceType; +using VideoCore::TextureType; + +struct RecordParams { + vk::ImageAspectFlags aspect; + vk::Filter filter; + vk::PipelineStageFlags pipeline_flags; + vk::AccessFlags src_access; + vk::AccessFlags dst_access; + vk::Image src_image; + vk::Image dst_image; +}; + +vk::Filter MakeFilter(VideoCore::PixelFormat pixel_format) { + switch (pixel_format) { + case VideoCore::PixelFormat::D16: + case VideoCore::PixelFormat::D24: + case VideoCore::PixelFormat::D24S8: + return vk::Filter::eNearest; + default: + return vk::Filter::eLinear; + } +} + +[[nodiscard]] vk::ClearValue MakeClearValue(VideoCore::ClearValue clear) { + static_assert(sizeof(VideoCore::ClearValue) == sizeof(vk::ClearValue)); + + vk::ClearValue value{}; + std::memcpy(&value, &clear, sizeof(vk::ClearValue)); + return value; +} + +[[nodiscard]] vk::ClearColorValue MakeClearColorValue(Common::Vec4f color) { + return vk::ClearColorValue{ + .float32 = std::array{color[0], color[1], color[2], color[3]}, + }; +} + +[[nodiscard]] vk::ClearDepthStencilValue MakeClearDepthStencilValue(VideoCore::ClearValue clear) { + return vk::ClearDepthStencilValue{ + .depth = clear.depth, + .stencil = clear.stencil, + }; +} + +u32 UnpackDepthStencil(const VideoCore::StagingData& data, vk::Format dest) { + u32 depth_offset = 0; + u32 stencil_offset = 4 * data.size / 5; + const auto& mapped = data.mapped; + + switch (dest) { + case vk::Format::eD24UnormS8Uint: { + for (; stencil_offset < data.size; depth_offset += 4) { + u8* ptr = mapped.data() + depth_offset; + const u32 d24s8 = VideoCore::MakeInt(ptr); + const u32 d24 = d24s8 >> 8; + mapped[stencil_offset] = d24s8 & 0xFF; + std::memcpy(ptr, &d24, 4); + stencil_offset++; + } + break; + } + case vk::Format::eD32SfloatS8Uint: { + for (; stencil_offset < data.size; depth_offset += 4) { + u8* ptr = mapped.data() + depth_offset; + const u32 d24s8 = VideoCore::MakeInt(ptr); + const float d32 = (d24s8 >> 8) / 16777215.f; + mapped[stencil_offset] = d24s8 & 0xFF; + std::memcpy(ptr, &d32, 4); + stencil_offset++; + } + break; + } + default: + LOG_ERROR(Render_Vulkan, "Unimplemented convertion for depth format {}", + vk::to_string(dest)); + UNREACHABLE(); + } + + ASSERT(depth_offset == 4 * data.size / 5); + return depth_offset; +} + +boost::container::small_vector MakeInitBarriers( + vk::ImageAspectFlags aspect, std::span images, size_t num_images) { + boost::container::small_vector barriers; + for (size_t i = 0; i < num_images; i++) { + barriers.push_back(vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eNone, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = images[i], + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); + } + return barriers; +} + +Handle MakeHandle(const Instance* instance, u32 width, u32 height, u32 levels, TextureType type, + vk::Format format, vk::ImageUsageFlags usage, vk::ImageCreateFlags flags, + vk::ImageAspectFlags aspect, bool need_format_list, + std::string_view debug_name = {}) { + const u32 layers = type == TextureType::CubeMap ? 6 : 1; + + const std::array format_list = { + vk::Format::eR8G8B8A8Unorm, + vk::Format::eR32Uint, + }; + const vk::ImageFormatListCreateInfo image_format_list = { + .viewFormatCount = static_cast(format_list.size()), + .pViewFormats = format_list.data(), + }; + + const vk::ImageCreateInfo image_info = { + .pNext = need_format_list ? &image_format_list : nullptr, + .flags = flags, + .imageType = vk::ImageType::e2D, + .format = format, + .extent = {width, height, 1}, + .mipLevels = levels, + .arrayLayers = layers, + .samples = vk::SampleCountFlagBits::e1, + .usage = usage, + }; + + const VmaAllocationCreateInfo alloc_info = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + }; + + VkImage unsafe_image{}; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + VmaAllocation allocation{}; + + VkResult result = vmaCreateImage(instance->GetAllocator(), &unsafe_image_info, &alloc_info, + &unsafe_image, &allocation, nullptr); + if (result != VK_SUCCESS) [[unlikely]] { + LOG_CRITICAL(Render_Vulkan, "Failed allocating image with error {}", result); + UNREACHABLE(); + } + + if (!debug_name.empty() && instance->HasDebuggingToolAttached()) { + const vk::DebugUtilsObjectNameInfoEXT name_info = { + .objectType = vk::ObjectType::eImage, + .objectHandle = reinterpret_cast(unsafe_image), + .pObjectName = debug_name.data(), + }; + instance->GetDevice().setDebugUtilsObjectNameEXT(name_info); + } + + const vk::Image image{unsafe_image}; + const vk::ImageViewCreateInfo view_info = { + .image = image, + .viewType = + type == TextureType::CubeMap ? vk::ImageViewType::eCube : vk::ImageViewType::e2D, + .format = format, + .subresourceRange{ + .aspectMask = aspect, + .baseMipLevel = 0, + .levelCount = levels, + .baseArrayLayer = 0, + .layerCount = layers, + }, + }; + vk::UniqueImageView image_view = instance->GetDevice().createImageViewUnique(view_info); + + return Handle{ + .alloc = allocation, + .image = image, + .image_view = std::move(image_view), + }; +} + +vk::UniqueFramebuffer MakeFramebuffer(vk::Device device, vk::RenderPass render_pass, u32 width, + u32 height, std::span attachments, + u32 num_attachments) { + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = render_pass, + .attachmentCount = num_attachments, + .pAttachments = attachments.data(), + .width = width, + .height = height, + .layers = 1, + }; + return device.createFramebufferUnique(framebuffer_info); +} + +vk::ImageSubresourceRange MakeSubresourceRange(vk::ImageAspectFlags aspect, u32 level = 0, + u32 levels = 1, u32 layer = 0) { + return vk::ImageSubresourceRange{ + .aspectMask = aspect, + .baseMipLevel = level, + .levelCount = levels, + .baseArrayLayer = layer, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; +} + +constexpr u64 UPLOAD_BUFFER_SIZE = 512 * 1024 * 1024; +constexpr u64 DOWNLOAD_BUFFER_SIZE = 16 * 1024 * 1024; + +} // Anonymous namespace + +TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorPool& pool, + DescriptorSetProvider& texture_provider_, u32 num_swapchain_images_) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, + texture_provider{texture_provider_}, blit_helper{instance, scheduler, pool, renderpass_cache}, + upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE, + BufferType::Upload}, + download_buffer{instance, scheduler, + vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eStorageBuffer, + DOWNLOAD_BUFFER_SIZE, BufferType::Download}, + num_swapchain_images{num_swapchain_images_} {} + +TextureRuntime::~TextureRuntime() = default; + +VideoCore::StagingData TextureRuntime::FindStaging(u32 size, bool upload) { + StreamBuffer& buffer = upload ? upload_buffer : download_buffer; + const auto [data, offset, invalidate] = buffer.Map(size, 16); + return VideoCore::StagingData{ + .size = size, + .offset = static_cast(offset), + .mapped = std::span{data, size}, + }; +} + +u32 TextureRuntime::RemoveThreshold() { + return num_swapchain_images; +} + +void TextureRuntime::Finish() { + scheduler.Finish(); +} + +bool TextureRuntime::Reinterpret(Surface& source, Surface& dest, + const VideoCore::TextureCopy& copy) { + const PixelFormat src_format = source.pixel_format; + const PixelFormat dst_format = dest.pixel_format; + ASSERT_MSG(src_format != dst_format, "Reinterpretation with the same format is invalid"); + + if (!source.traits.needs_conversion && !dest.traits.needs_conversion && + source.type == dest.type) { + CopyTextures(source, dest, copy); + return true; + } + + if (src_format == PixelFormat::D24S8 && dst_format == PixelFormat::RGBA8) { + blit_helper.ConvertDS24S8ToRGBA8(source, dest, copy); + } else { + LOG_WARNING(Render_Vulkan, "Unimplemented reinterpretation {} -> {}", + VideoCore::PixelFormatAsString(src_format), + VideoCore::PixelFormatAsString(dst_format)); + return false; + } + return true; +} + +bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear) { + renderpass_cache.EndRendering(); + + const RecordParams params = { + .aspect = surface.Aspect(), + .pipeline_flags = surface.PipelineStageFlags(), + .src_access = surface.AccessFlags(), + .src_image = surface.Image(), + }; + + if (clear.texture_rect == surface.GetScaledRect()) { + scheduler.Record([params, clear](vk::CommandBuffer cmdbuf) { + const vk::ImageSubresourceRange range = { + .aspectMask = params.aspect, + .baseMipLevel = clear.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = range, + }; + + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = range, + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + const bool is_color = + static_cast(params.aspect & vk::ImageAspectFlagBits::eColor); + if (is_color) { + cmdbuf.clearColorImage(params.src_image, vk::ImageLayout::eTransferDstOptimal, + MakeClearColorValue(clear.value.color), range); + } else { + cmdbuf.clearDepthStencilImage(params.src_image, + vk::ImageLayout::eTransferDstOptimal, + MakeClearDepthStencilValue(clear.value), range); + } + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + }); + return true; + } + + ClearTextureWithRenderpass(surface, clear); + return true; +} + +void TextureRuntime::ClearTextureWithRenderpass(Surface& surface, + const VideoCore::TextureClear& clear) { + const bool is_color = surface.type != VideoCore::SurfaceType::Depth && + surface.type != VideoCore::SurfaceType::DepthStencil; + + const auto color_format = is_color ? surface.pixel_format : PixelFormat::Invalid; + const auto depth_format = is_color ? PixelFormat::Invalid : surface.pixel_format; + const auto render_pass = renderpass_cache.GetRenderpass(color_format, depth_format, true); + + const RecordParams params = { + .aspect = surface.Aspect(), + .pipeline_flags = surface.PipelineStageFlags(), + .src_access = surface.AccessFlags(), + .src_image = surface.Image(), + }; + + scheduler.Record([params, is_color, clear, render_pass, + framebuffer = surface.Framebuffer()](vk::CommandBuffer cmdbuf) { + const vk::AccessFlags access_flag = + is_color ? vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eColorAttachmentWrite + : vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite; + + const vk::PipelineStageFlags pipeline_flags = + is_color ? vk::PipelineStageFlagBits::eColorAttachmentOutput + : vk::PipelineStageFlagBits::eEarlyFragmentTests; + + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = params.src_access, + .dstAccessMask = access_flag, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, clear.texture_level), + }; + + const vk::ImageMemoryBarrier post_barrier = { + .srcAccessMask = access_flag, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, clear.texture_level), + }; + + const vk::Rect2D render_area = { + .offset{ + .x = static_cast(clear.texture_rect.left), + .y = static_cast(clear.texture_rect.bottom), + }, + .extent{ + .width = clear.texture_rect.GetWidth(), + .height = clear.texture_rect.GetHeight(), + }, + }; + + const auto clear_value = MakeClearValue(clear.value); + + const vk::RenderPassBeginInfo renderpass_begin_info = { + .renderPass = render_pass, + .framebuffer = framebuffer, + .renderArea = render_area, + .clearValueCount = 1, + .pClearValues = &clear_value, + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + + cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); + cmdbuf.endRenderPass(); + + cmdbuf.pipelineBarrier(pipeline_flags, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + }); +} + +bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, + const VideoCore::TextureCopy& copy) { + renderpass_cache.EndRendering(); + + const RecordParams params = { + .aspect = source.Aspect(), + .filter = MakeFilter(source.pixel_format), + .pipeline_flags = source.PipelineStageFlags() | dest.PipelineStageFlags(), + .src_access = source.AccessFlags(), + .dst_access = dest.AccessFlags(), + .src_image = source.Image(), + .dst_image = dest.Image(), + }; + + scheduler.Record([params, copy](vk::CommandBuffer cmdbuf) { + const vk::ImageCopy image_copy = { + .srcSubresource{ + .aspectMask = params.aspect, + .mipLevel = copy.src_level, + .baseArrayLayer = copy.src_layer, + .layerCount = 1, + }, + .srcOffset = {static_cast(copy.src_offset.x), static_cast(copy.src_offset.y), + 0}, + .dstSubresource{ + .aspectMask = params.aspect, + .mipLevel = copy.dst_level, + .baseArrayLayer = copy.dst_layer, + .layerCount = 1, + }, + .dstOffset = {static_cast(copy.dst_offset.x), static_cast(copy.dst_offset.y), + 0}, + .extent = {copy.extent.width, copy.extent.height, 1}, + }; + + const bool self_copy = params.src_image == params.dst_image; + const vk::ImageLayout new_src_layout = + self_copy ? vk::ImageLayout::eGeneral : vk::ImageLayout::eTransferSrcOptimal; + const vk::ImageLayout new_dst_layout = + self_copy ? vk::ImageLayout::eGeneral : vk::ImageLayout::eTransferDstOptimal; + + const std::array pre_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = new_src_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, copy.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = params.dst_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = new_dst_layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, copy.dst_level), + }, + }; + const std::array post_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eNone, + .oldLayout = new_src_layout, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, copy.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.dst_access, + .oldLayout = new_dst_layout, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, copy.dst_level), + }, + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); + + cmdbuf.copyImage(params.src_image, new_src_layout, params.dst_image, new_dst_layout, + image_copy); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); + }); + + return true; +} + +bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + const bool is_depth_stencil = source.type == VideoCore::SurfaceType::DepthStencil; + const auto& depth_traits = instance.GetTraits(source.pixel_format); + if (is_depth_stencil && !depth_traits.blit_support) { + return blit_helper.BlitDepthStencil(source, dest, blit); + } + + renderpass_cache.EndRendering(); + + const RecordParams params = { + .aspect = source.Aspect(), + .filter = MakeFilter(source.pixel_format), + .pipeline_flags = source.PipelineStageFlags() | dest.PipelineStageFlags(), + .src_access = source.AccessFlags(), + .dst_access = dest.AccessFlags(), + .src_image = source.Image(), + .dst_image = dest.Image(), + }; + + scheduler.Record([params, blit](vk::CommandBuffer cmdbuf) { + const std::array source_offsets = { + vk::Offset3D{static_cast(blit.src_rect.left), + static_cast(blit.src_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.src_rect.right), static_cast(blit.src_rect.top), + 1}, + }; + + const std::array dest_offsets = { + vk::Offset3D{static_cast(blit.dst_rect.left), + static_cast(blit.dst_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.dst_rect.right), static_cast(blit.dst_rect.top), + 1}, + }; + + const vk::ImageBlit blit_area = { + .srcSubresource{ + .aspectMask = params.aspect, + .mipLevel = blit.src_level, + .baseArrayLayer = blit.src_layer, + .layerCount = 1, + }, + .srcOffsets = source_offsets, + .dstSubresource{ + .aspectMask = params.aspect, + .mipLevel = blit.dst_level, + .baseArrayLayer = blit.dst_layer, + .layerCount = 1, + }, + .dstOffsets = dest_offsets, + }; + + const std::array read_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, blit.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = params.dst_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, blit.dst_level), + }, + }; + const std::array write_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, blit.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.dst_access, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, blit.dst_level), + }, + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barriers); + + cmdbuf.blitImage(params.src_image, vk::ImageLayout::eTransferSrcOptimal, params.dst_image, + vk::ImageLayout::eTransferDstOptimal, blit_area, params.filter); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barriers); + }); + + return true; +} + +void TextureRuntime::GenerateMipmaps(Surface& surface) { + if (VideoCore::IsCustomFormatCompressed(surface.custom_format)) { + LOG_ERROR(Render_Vulkan, "Generating mipmaps for compressed formats unsupported!"); + return; + } + + renderpass_cache.EndRendering(); + + auto [width, height] = surface.RealExtent(); + const u32 levels = surface.levels; + for (u32 i = 1; i < levels; i++) { + const Common::Rectangle src_rect{0, height, width, 0}; + width = width > 1 ? width >> 1 : 1; + height = height > 1 ? height >> 1 : 1; + const Common::Rectangle dst_rect{0, height, width, 0}; + + const VideoCore::TextureBlit blit = { + .src_level = i - 1, + .dst_level = i, + .src_rect = src_rect, + .dst_rect = dst_rect, + }; + BlitTextures(surface, surface, blit); + } +} + +bool TextureRuntime::NeedsConversion(VideoCore::PixelFormat format) const { + const FormatTraits traits = instance.GetTraits(format); + return traits.needs_conversion && + // DepthStencil formats are handled elsewhere due to de-interleaving. + traits.aspect != (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil); +} + +void TextureRuntime::FreeDescriptorSetsWithImage(vk::ImageView image_view) { + texture_provider.FreeWithImage(image_view); + blit_helper.compute_provider.FreeWithImage(image_view); + blit_helper.compute_buffer_provider.FreeWithImage(image_view); + blit_helper.two_textures_provider.FreeWithImage(image_view); +} + +Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params) + : SurfaceBase{params}, runtime{&runtime_}, instance{&runtime_.GetInstance()}, + scheduler{&runtime_.GetScheduler()}, traits{instance->GetTraits(pixel_format)} { + + if (pixel_format == VideoCore::PixelFormat::Invalid) { + return; + } + + const bool is_mutable = pixel_format == VideoCore::PixelFormat::RGBA8; + const vk::Format format = traits.native; + + ASSERT_MSG(format != vk::Format::eUndefined && levels >= 1, + "Image allocation parameters are invalid"); + + u32 num_images = 0; + std::array raw_images; + + vk::ImageCreateFlags flags{}; + if (texture_type == VideoCore::TextureType::CubeMap) { + flags |= vk::ImageCreateFlagBits::eCubeCompatible; + } + if (is_mutable) { + flags |= vk::ImageCreateFlagBits::eMutableFormat; + } + + const bool need_format_list = is_mutable && instance->IsImageFormatListSupported(); + handles[0] = MakeHandle(instance, width, height, levels, texture_type, format, traits.usage, + flags, traits.aspect, need_format_list, DebugName(false)); + raw_images[num_images++] = handles[0].image; + + if (res_scale != 1) { + handles[1] = + MakeHandle(instance, GetScaledWidth(), GetScaledHeight(), levels, texture_type, format, + traits.usage, flags, traits.aspect, need_format_list, DebugName(true)); + raw_images[num_images++] = handles[1].image; + } + + runtime->renderpass_cache.EndRendering(); + scheduler->Record([raw_images, num_images, aspect = traits.aspect](vk::CommandBuffer cmdbuf) { + const auto barriers = MakeInitBarriers(aspect, raw_images, num_images); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTopOfPipe, + vk::DependencyFlagBits::eByRegion, {}, {}, barriers); + }); +} + +Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceBase& surface, + const VideoCore::Material* mat) + : SurfaceBase{surface}, runtime{&runtime_}, instance{&runtime_.GetInstance()}, + scheduler{&runtime_.GetScheduler()}, traits{instance->GetTraits(mat->format)} { + if (!traits.transfer_support) { + return; + } + + const bool has_normal = mat && mat->Map(MapType::Normal); + const vk::Format format = traits.native; + + u32 num_images = 0; + std::array raw_images; + + vk::ImageCreateFlags flags{}; + if (texture_type == VideoCore::TextureType::CubeMap) { + flags |= vk::ImageCreateFlagBits::eCubeCompatible; + } + + const std::string debug_name = DebugName(false, true); + handles[0] = MakeHandle(instance, mat->width, mat->height, levels, texture_type, format, + traits.usage, flags, traits.aspect, false, debug_name); + raw_images[num_images++] = handles[0].image; + + if (res_scale != 1) { + handles[1] = MakeHandle(instance, mat->width, mat->height, levels, texture_type, + vk::Format::eR8G8B8A8Unorm, traits.usage, flags, traits.aspect, + false, debug_name); + raw_images[num_images++] = handles[1].image; + } + if (has_normal) { + handles[2] = MakeHandle(instance, mat->width, mat->height, levels, texture_type, format, + traits.usage, flags, traits.aspect, false, debug_name); + raw_images[num_images++] = handles[2].image; + } + + runtime->renderpass_cache.EndRendering(); + scheduler->Record([raw_images, num_images, aspect = traits.aspect](vk::CommandBuffer cmdbuf) { + const auto barriers = MakeInitBarriers(aspect, raw_images, num_images); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTopOfPipe, + vk::DependencyFlagBits::eByRegion, {}, {}, barriers); + }); + + custom_format = mat->format; + material = mat; +} + +Surface::~Surface() { + if (!handles[0].image_view) { + return; + } + for (const auto& [alloc, image, image_view] : handles) { + if (image_view) { + runtime->FreeDescriptorSetsWithImage(*image_view); + } + if (image) { + vmaDestroyImage(instance->GetAllocator(), image, alloc); + } + } + if (copy_handle.image_view) { + vmaDestroyImage(instance->GetAllocator(), copy_handle.image, copy_handle.alloc); + } +} + +void Surface::Upload(const VideoCore::BufferTextureCopy& upload, + const VideoCore::StagingData& staging) { + runtime->renderpass_cache.EndRendering(); + + const RecordParams params = { + .aspect = Aspect(), + .pipeline_flags = PipelineStageFlags(), + .src_access = AccessFlags(), + .src_image = Image(0), + }; + + scheduler->Record([buffer = runtime->upload_buffer.Handle(), format = traits.native, params, + staging, upload](vk::CommandBuffer cmdbuf) { + u32 num_copies = 1; + std::array buffer_image_copies; + + const auto rect = upload.texture_rect; + buffer_image_copies[0] = vk::BufferImageCopy{ + .bufferOffset = upload.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource{ + .aspectMask = params.aspect, + .mipLevel = upload.texture_level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}, + }; + + if (params.aspect & vk::ImageAspectFlagBits::eStencil) { + buffer_image_copies[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; + vk::BufferImageCopy& stencil_copy = buffer_image_copies[1]; + stencil_copy = buffer_image_copies[0]; + stencil_copy.bufferOffset += UnpackDepthStencil(staging, format); + stencil_copy.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; + num_copies++; + } + + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, upload.texture_level), + }; + const vk::ImageMemoryBarrier write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, upload.texture_level), + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); + + cmdbuf.copyBufferToImage(buffer, params.src_image, vk::ImageLayout::eTransferDstOptimal, + num_copies, buffer_image_copies.data()); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier); + }); + + runtime->upload_buffer.Commit(staging.size); + + if (res_scale != 1) { + const VideoCore::TextureBlit blit = { + .src_level = upload.texture_level, + .dst_level = upload.texture_level, + .src_rect = upload.texture_rect, + .dst_rect = upload.texture_rect * res_scale, + }; + + BlitScale(blit, true); + } +} + +void Surface::UploadCustom(const VideoCore::Material* material, u32 level) { + const u32 width = material->width; + const u32 height = material->height; + const auto color = material->textures[0]; + const Common::Rectangle rect{0U, height, width, 0U}; + + const auto upload = [&](u32 index, VideoCore::CustomTexture* texture) { + const u64 custom_size = texture->data.size(); + const RecordParams params = { + .aspect = vk::ImageAspectFlagBits::eColor, + .pipeline_flags = PipelineStageFlags(), + .src_access = AccessFlags(), + .src_image = Image(index), + }; + + const auto [data, offset, invalidate] = runtime->upload_buffer.Map(custom_size, 0); + std::memcpy(data, texture->data.data(), custom_size); + runtime->upload_buffer.Commit(custom_size); + + scheduler->Record([buffer = runtime->upload_buffer.Handle(), level, params, rect, + offset = offset](vk::CommandBuffer cmdbuf) { + const vk::BufferImageCopy buffer_image_copy = { + .bufferOffset = offset, + .bufferRowLength = 0, + .bufferImageHeight = rect.GetHeight(), + .imageSubresource{ + .aspectMask = params.aspect, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}, + }; + + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, level), + }; + const vk::ImageMemoryBarrier write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = params.src_access, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, level), + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); + + cmdbuf.copyBufferToImage(buffer, params.src_image, vk::ImageLayout::eTransferDstOptimal, + buffer_image_copy); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier); + }); + }; + + upload(0, color); + + for (u32 i = 1; i < VideoCore::MAX_MAPS; i++) { + const auto texture = material->textures[i]; + if (!texture) { + continue; + } + upload(i + 1, texture); + } +} + +void Surface::Download(const VideoCore::BufferTextureCopy& download, + const VideoCore::StagingData& staging) { + SCOPE_EXIT({ + scheduler->Finish(); + runtime->download_buffer.Commit(staging.size); + }); + + runtime->renderpass_cache.EndRendering(); + + if (pixel_format == PixelFormat::D24S8) { + runtime->blit_helper.DepthToBuffer(*this, runtime->download_buffer.Handle(), download); + return; + } + + if (res_scale != 1) { + const VideoCore::TextureBlit blit = { + .src_level = download.texture_level, + .dst_level = download.texture_level, + .src_rect = download.texture_rect * res_scale, + .dst_rect = download.texture_rect, + }; + + BlitScale(blit, false); + } + + const RecordParams params = { + .aspect = Aspect(), + .pipeline_flags = PipelineStageFlags(), + .src_access = AccessFlags(), + .src_image = Image(0), + }; + + scheduler->Record( + [buffer = runtime->download_buffer.Handle(), params, download](vk::CommandBuffer cmdbuf) { + const auto rect = download.texture_rect; + const vk::BufferImageCopy buffer_image_copy = { + .bufferOffset = download.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource{ + .aspectMask = params.aspect, + .mipLevel = download.texture_level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}, + }; + + const vk::ImageMemoryBarrier read_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, download.texture_level), + }; + const vk::ImageMemoryBarrier image_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, download.texture_level), + }; + const vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barrier); + + cmdbuf.copyImageToBuffer(params.src_image, vk::ImageLayout::eTransferSrcOptimal, buffer, + buffer_image_copy); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, memory_write_barrier, {}, + image_write_barrier); + }); +} + +void Surface::ScaleUp(u32 new_scale) { + if (res_scale == new_scale || new_scale == 1) { + return; + } + + res_scale = new_scale; + + const bool is_mutable = pixel_format == VideoCore::PixelFormat::RGBA8; + + vk::ImageCreateFlags flags{}; + if (texture_type == VideoCore::TextureType::CubeMap) { + flags |= vk::ImageCreateFlagBits::eCubeCompatible; + } + if (is_mutable) { + flags |= vk::ImageCreateFlagBits::eMutableFormat; + } + + handles[1] = + MakeHandle(instance, GetScaledWidth(), GetScaledHeight(), levels, texture_type, + traits.native, traits.usage, flags, traits.aspect, false, DebugName(true)); + + runtime->renderpass_cache.EndRendering(); + scheduler->Record( + [raw_images = std::array{Image()}, aspect = traits.aspect](vk::CommandBuffer cmdbuf) { + const auto barriers = MakeInitBarriers(aspect, raw_images, raw_images.size()); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTopOfPipe, + vk::DependencyFlagBits::eByRegion, {}, {}, barriers); + }); + LOG_INFO(HW_GPU, "Surface scale up!"); + for (u32 level = 0; level < levels; level++) { + const VideoCore::TextureBlit blit = { + .src_level = level, + .dst_level = level, + .src_rect = GetRect(level), + .dst_rect = GetScaledRect(level), + }; + BlitScale(blit, true); + } +} + +u32 Surface::GetInternalBytesPerPixel() const { + // Request 5 bytes for D24S8 as well because we can use the + // extra space when deinterleaving the data during upload + if (traits.native == vk::Format::eD24UnormS8Uint) { + return 5; + } + + return vk::blockSize(traits.native); +} + +vk::AccessFlags Surface::AccessFlags() const noexcept { + const bool is_color = static_cast(Aspect() & vk::ImageAspectFlagBits::eColor); + const vk::AccessFlags attachment_flags = + is_color + ? vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite + : vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite; + + return vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eTransferRead | + vk::AccessFlagBits::eTransferWrite | + (is_framebuffer ? attachment_flags : vk::AccessFlagBits::eNone) | + (is_storage ? vk::AccessFlagBits::eShaderWrite : vk::AccessFlagBits::eNone); +} + +vk::PipelineStageFlags Surface::PipelineStageFlags() const noexcept { + const bool is_color = static_cast(Aspect() & vk::ImageAspectFlagBits::eColor); + const vk::PipelineStageFlags attachment_flags = + is_color ? vk::PipelineStageFlagBits::eColorAttachmentOutput + : vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests; + + return vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eFragmentShader | + (is_framebuffer ? attachment_flags : vk::PipelineStageFlagBits::eNone) | + (is_storage ? vk::PipelineStageFlagBits::eComputeShader + : vk::PipelineStageFlagBits::eNone); +} + +vk::Image Surface::Image(u32 index) const noexcept { + const vk::Image image = handles[index].image; + if (!image) { + return handles[0].image; + } + return image; +} + +vk::ImageView Surface::CopyImageView() noexcept { + vk::ImageLayout copy_layout = vk::ImageLayout::eGeneral; + if (!copy_handle.image) { + vk::ImageCreateFlags flags{}; + if (texture_type == VideoCore::TextureType::CubeMap) { + flags |= vk::ImageCreateFlagBits::eCubeCompatible; + } + copy_handle = + MakeHandle(instance, GetScaledWidth(), GetScaledHeight(), levels, texture_type, + traits.native, traits.usage, flags, traits.aspect, false); + copy_layout = vk::ImageLayout::eUndefined; + } + + runtime->renderpass_cache.EndRendering(); + + const RecordParams params = { + .aspect = Aspect(), + .pipeline_flags = PipelineStageFlags(), + .src_access = AccessFlags(), + .src_image = Image(), + .dst_image = copy_handle.image, + }; + + scheduler->Record([params, copy_layout, levels = this->levels, width = GetScaledWidth(), + height = GetScaledHeight()](vk::CommandBuffer cmdbuf) { + std::array pre_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, 0, levels), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = copy_layout, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, 0, levels), + }, + }; + std::array post_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange = MakeSubresourceRange(params.aspect, 0, levels), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.dst_image, + .subresourceRange = MakeSubresourceRange(params.aspect, 0, levels), + }, + }; + + boost::container::small_vector image_copies; + for (u32 level = 0; level < levels; level++) { + image_copies.push_back(vk::ImageCopy{ + .srcSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffset = {0, 0, 0}, + .extent = {width >> level, height >> level, 1}, + }); + } + + cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); + + cmdbuf.copyImage(params.src_image, vk::ImageLayout::eTransferSrcOptimal, params.dst_image, + vk::ImageLayout::eTransferDstOptimal, image_copies); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); + }); + + return copy_handle.image_view.get(); +} + +vk::ImageView Surface::ImageView(u32 index) const noexcept { + const auto& image_view = handles[index].image_view.get(); + if (!image_view) { + return handles[0].image_view.get(); + } + return image_view; +} + +vk::ImageView Surface::FramebufferView() noexcept { + is_framebuffer = true; + return ImageView(); +} + +vk::ImageView Surface::DepthView() noexcept { + if (depth_view) { + return depth_view.get(); + } + + const vk::ImageViewCreateInfo view_info = { + .image = Image(), + .viewType = vk::ImageViewType::e2D, + .format = instance->GetTraits(pixel_format).native, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + + depth_view = instance->GetDevice().createImageViewUnique(view_info); + return depth_view.get(); +} + +vk::ImageView Surface::StencilView() noexcept { + if (stencil_view) { + return stencil_view.get(); + } + + const vk::ImageViewCreateInfo view_info = { + .image = Image(), + .viewType = vk::ImageViewType::e2D, + .format = instance->GetTraits(pixel_format).native, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + + stencil_view = instance->GetDevice().createImageViewUnique(view_info); + return stencil_view.get(); +} + +vk::ImageView Surface::StorageView() noexcept { + if (storage_view) { + return storage_view.get(); + } + + if (pixel_format != VideoCore::PixelFormat::RGBA8) { + LOG_WARNING(Render_Vulkan, + "Attempted to retrieve storage view from unsupported surface with format {}", + VideoCore::PixelFormatAsString(pixel_format)); + return ImageView(); + } + + is_storage = true; + + const vk::ImageViewCreateInfo storage_view_info = { + .image = Image(), + .viewType = vk::ImageViewType::e2D, + .format = vk::Format::eR32Uint, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + storage_view = instance->GetDevice().createImageViewUnique(storage_view_info); + return storage_view.get(); +} + +vk::Framebuffer Surface::Framebuffer() noexcept { + const u32 index = res_scale == 1 ? 0u : 1u; + if (framebuffers[index]) { + return framebuffers[index].get(); + } + + const bool is_depth = type == SurfaceType::Depth || type == SurfaceType::DepthStencil; + const auto color_format = is_depth ? PixelFormat::Invalid : pixel_format; + const auto depth_format = is_depth ? pixel_format : PixelFormat::Invalid; + const auto render_pass = + runtime->renderpass_cache.GetRenderpass(color_format, depth_format, false); + const auto attachments = std::array{ImageView()}; + framebuffers[index] = MakeFramebuffer(instance->GetDevice(), render_pass, GetScaledWidth(), + GetScaledHeight(), attachments, 1); + return framebuffers[index].get(); +} + +void Surface::BlitScale(const VideoCore::TextureBlit& blit, bool up_scale) { + const FormatTraits& depth_traits = instance->GetTraits(pixel_format); + const bool is_depth_stencil = pixel_format == PixelFormat::D24S8; + if (is_depth_stencil && !depth_traits.blit_support) { + LOG_WARNING(Render_Vulkan, "Depth scale unsupported by hardware"); + return; + } + + scheduler->Record([src_image = Image(!up_scale), aspect = Aspect(), + filter = MakeFilter(pixel_format), dst_image = Image(up_scale), + blit](vk::CommandBuffer render_cmdbuf) { + const std::array source_offsets = { + vk::Offset3D{static_cast(blit.src_rect.left), + static_cast(blit.src_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.src_rect.right), static_cast(blit.src_rect.top), + 1}, + }; + + const std::array dest_offsets = { + vk::Offset3D{static_cast(blit.dst_rect.left), + static_cast(blit.dst_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.dst_rect.right), static_cast(blit.dst_rect.top), + 1}, + }; + + const vk::ImageBlit blit_area = { + .srcSubresource{ + .aspectMask = aspect, + .mipLevel = blit.src_level, + .baseArrayLayer = blit.src_layer, + .layerCount = 1, + }, + .srcOffsets = source_offsets, + .dstSubresource{ + .aspectMask = aspect, + .mipLevel = blit.dst_level, + .baseArrayLayer = blit.dst_layer, + .layerCount = 1, + }, + .dstOffsets = dest_offsets, + }; + + const std::array read_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = MakeSubresourceRange(aspect, blit.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderRead | + vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eColorAttachmentRead | + vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = MakeSubresourceRange(aspect, blit.dst_level), + }, + }; + const std::array write_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange = MakeSubresourceRange(aspect, blit.src_level), + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite | vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange = MakeSubresourceRange(aspect, blit.dst_level), + }, + }; + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barriers); + + render_cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, + vk::ImageLayout::eTransferDstOptimal, blit_area, filter); + + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, {}, {}, write_barriers); + }); +} + +Framebuffer::Framebuffer(TextureRuntime& runtime, const VideoCore::FramebufferParams& params, + Surface* color, Surface* depth) + : VideoCore::FramebufferParams{params}, res_scale{color ? color->res_scale + : (depth ? depth->res_scale : 1u)} { + auto& renderpass_cache = runtime.GetRenderpassCache(); + if (shadow_rendering && !color) { + return; + } + + width = height = std::numeric_limits::max(); + + const auto prepare = [&](u32 index, Surface* surface) { + const VideoCore::Extent extent = surface->RealExtent(); + width = std::min(width, extent.width); + height = std::min(height, extent.height); + if (!shadow_rendering) { + formats[index] = surface->pixel_format; + } + images[index] = surface->Image(); + aspects[index] = surface->Aspect(); + image_views[index] = shadow_rendering ? surface->StorageView() : surface->FramebufferView(); + }; + + u32 num_attachments = 0; + std::array attachments; + + if (color) { + prepare(0, color); + attachments[num_attachments++] = image_views[0]; + } + + if (depth) { + prepare(1, depth); + attachments[num_attachments++] = image_views[1]; + } + + const vk::Device device = runtime.GetInstance().GetDevice(); + if (shadow_rendering) { + render_pass = + renderpass_cache.GetRenderpass(PixelFormat::Invalid, PixelFormat::Invalid, false); + framebuffer = MakeFramebuffer(device, render_pass, color->GetScaledWidth(), + color->GetScaledHeight(), {}, 0); + } else { + render_pass = renderpass_cache.GetRenderpass(formats[0], formats[1], false); + framebuffer = + MakeFramebuffer(device, render_pass, width, height, attachments, num_attachments); + } +} + +Framebuffer::~Framebuffer() = default; + +Sampler::Sampler(TextureRuntime& runtime, const VideoCore::SamplerParams& params) { + using TextureConfig = VideoCore::SamplerParams::TextureConfig; + + const Instance& instance = runtime.GetInstance(); + const vk::PhysicalDeviceProperties properties = instance.GetPhysicalDevice().getProperties(); + const bool use_border_color = + instance.IsCustomBorderColorSupported() && (params.wrap_s == TextureConfig::ClampToBorder || + params.wrap_t == TextureConfig::ClampToBorder); + + const Common::Vec4f color = PicaToVK::ColorRGBA8(params.border_color); + const vk::SamplerCustomBorderColorCreateInfoEXT border_color_info = { + .customBorderColor = MakeClearColorValue(color), + .format = vk::Format::eUndefined, + }; + + const vk::Filter mag_filter = PicaToVK::TextureFilterMode(params.mag_filter); + const vk::Filter min_filter = PicaToVK::TextureFilterMode(params.min_filter); + const vk::SamplerMipmapMode mipmap_mode = PicaToVK::TextureMipFilterMode(params.mip_filter); + const vk::SamplerAddressMode wrap_u = PicaToVK::WrapMode(params.wrap_s); + const vk::SamplerAddressMode wrap_v = PicaToVK::WrapMode(params.wrap_t); + const float lod_min = static_cast(params.lod_min); + const float lod_max = static_cast(params.lod_max); + + const vk::SamplerCreateInfo sampler_info = { + .pNext = use_border_color ? &border_color_info : nullptr, + .magFilter = mag_filter, + .minFilter = min_filter, + .mipmapMode = mipmap_mode, + .addressModeU = wrap_u, + .addressModeV = wrap_v, + .mipLodBias = 0, + .anisotropyEnable = instance.IsAnisotropicFilteringSupported(), + .maxAnisotropy = properties.limits.maxSamplerAnisotropy, + .compareEnable = false, + .compareOp = vk::CompareOp::eAlways, + .minLod = lod_min, + .maxLod = lod_max, + .borderColor = + use_border_color ? vk::BorderColor::eFloatCustomEXT : vk::BorderColor::eIntOpaqueBlack, + .unnormalizedCoordinates = false, + }; + sampler = instance.GetDevice().createSamplerUnique(sampler_info); +} + +Sampler::~Sampler() = default; + +DebugScope::DebugScope(TextureRuntime& runtime, Common::Vec4f color, std::string_view label) + : scheduler{runtime.GetScheduler()}, has_debug_tool{ + runtime.GetInstance().HasDebuggingToolAttached()} { + if (!has_debug_tool) { + return; + } + scheduler.Record([color, label](vk::CommandBuffer cmdbuf) { + const vk::DebugUtilsLabelEXT debug_label = { + .pLabelName = label.data(), + .color = std::array{color[0], color[1], color[2], color[3]}, + }; + cmdbuf.beginDebugUtilsLabelEXT(debug_label); + }); +} + +DebugScope::~DebugScope() { + if (!has_debug_tool) { + return; + } + scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.endDebugUtilsLabelEXT(); }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h new file mode 100644 index 0000000000..2bef63dabf --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -0,0 +1,298 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "video_core/rasterizer_cache/framebuffer_base.h" +#include "video_core/rasterizer_cache/rasterizer_cache_base.h" +#include "video_core/rasterizer_cache/surface_base.h" +#include "video_core/renderer_vulkan/vk_blit_helper.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +VK_DEFINE_HANDLE(VmaAllocation) + +namespace VideoCore { +struct Material; +} + +namespace Vulkan { + +class Instance; +class RenderpassCache; +class DescriptorPool; +class DescriptorSetProvider; +class Surface; + +struct Handle { + VmaAllocation alloc; + vk::Image image; + vk::UniqueImageView image_view; +}; + +/** + * Provides texture manipulation functions to the rasterizer cache + * Separating this into a class makes it easier to abstract graphics API code + */ +class TextureRuntime { + friend class Surface; + +public: + explicit TextureRuntime(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorPool& pool, + DescriptorSetProvider& texture_provider, u32 num_swapchain_images); + ~TextureRuntime(); + + const Instance& GetInstance() const { + return instance; + } + + Scheduler& GetScheduler() const { + return scheduler; + } + + RenderpassCache& GetRenderpassCache() { + return renderpass_cache; + } + + /// Returns the removal threshold ticks for the garbage collector + u32 RemoveThreshold(); + + /// Submits and waits for current GPU work. + void Finish(); + + /// Maps an internal staging buffer of the provided size for pixel uploads/downloads + VideoCore::StagingData FindStaging(u32 size, bool upload); + + /// Attempts to reinterpret a rectangle of source to another rectangle of dest + bool Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); + + /// Fills the rectangle of the texture with the clear value provided + bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear); + + /// Copies a rectangle of src_tex to another rectange of dst_rect + bool CopyTextures(Surface& source, Surface& dest, const VideoCore::TextureCopy& copy); + + /// Blits a rectangle of src_tex to another rectange of dst_rect + bool BlitTextures(Surface& surface, Surface& dest, const VideoCore::TextureBlit& blit); + + /// Generates mipmaps for all the available levels of the texture + void GenerateMipmaps(Surface& surface); + + /// Returns true if the provided pixel format needs convertion + bool NeedsConversion(VideoCore::PixelFormat format) const; + + /// Removes any descriptor sets that contain the provided image view. + void FreeDescriptorSetsWithImage(vk::ImageView image_view); + +private: + /// Clears a partial texture rect using a clear rectangle + void ClearTextureWithRenderpass(Surface& surface, const VideoCore::TextureClear& clear); + +private: + const Instance& instance; + Scheduler& scheduler; + RenderpassCache& renderpass_cache; + DescriptorSetProvider& texture_provider; + BlitHelper blit_helper; + StreamBuffer upload_buffer; + StreamBuffer download_buffer; + u32 num_swapchain_images; +}; + +class Surface : public VideoCore::SurfaceBase { + friend class TextureRuntime; + +public: + explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceParams& params); + explicit Surface(TextureRuntime& runtime, const VideoCore::SurfaceBase& surface, + const VideoCore::Material* materal); + ~Surface(); + + Surface(const Surface&) = delete; + Surface& operator=(const Surface&) = delete; + + Surface(Surface&& o) noexcept = default; + Surface& operator=(Surface&& o) noexcept = default; + + vk::ImageAspectFlags Aspect() const noexcept { + return traits.aspect; + } + + /// Returns the image at index, otherwise the base image + vk::Image Image(u32 index = 1) const noexcept; + + /// Returns the image view at index, otherwise the base view + vk::ImageView ImageView(u32 index = 1) const noexcept; + + /// Returns a copy of the upscaled image handle, used for feedback loops. + vk::ImageView CopyImageView() noexcept; + + /// Returns the framebuffer view of the surface image + vk::ImageView FramebufferView() noexcept; + + /// Returns the depth view of the surface image + vk::ImageView DepthView() noexcept; + + /// Returns the stencil view of the surface image + vk::ImageView StencilView() noexcept; + + /// Returns the R32 image view used for atomic load/store + vk::ImageView StorageView() noexcept; + + /// Returns a framebuffer handle for rendering to this surface + vk::Framebuffer Framebuffer() noexcept; + + /// Uploads pixel data in staging to a rectangle region of the surface texture + void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging); + + /// Uploads the custom material to the surface allocation. + void UploadCustom(const VideoCore::Material* material, u32 level); + + /// Downloads pixel data to staging from a rectangle region of the surface texture + void Download(const VideoCore::BufferTextureCopy& download, + const VideoCore::StagingData& staging); + + /// Scales up the surface to match the new resolution scale. + void ScaleUp(u32 new_scale); + + /// Returns the bpp of the internal surface format + u32 GetInternalBytesPerPixel() const; + + /// Returns the access flags indicative of the surface + vk::AccessFlags AccessFlags() const noexcept; + + /// Returns the pipeline stage flags indicative of the surface + vk::PipelineStageFlags PipelineStageFlags() const noexcept; + +private: + /// Performs blit between the scaled/unscaled images + void BlitScale(const VideoCore::TextureBlit& blit, bool up_scale); + + /// Downloads scaled depth stencil data + void DepthStencilDownload(const VideoCore::BufferTextureCopy& download, + const VideoCore::StagingData& staging); + +public: + TextureRuntime* runtime; + const Instance* instance; + Scheduler* scheduler; + FormatTraits traits; + std::array handles{}; + std::array framebuffers{}; + Handle copy_handle; + vk::UniqueImageView depth_view; + vk::UniqueImageView stencil_view; + vk::UniqueImageView storage_view; + bool is_framebuffer{}; + bool is_storage{}; +}; + +class Framebuffer : public VideoCore::FramebufferParams { +public: + explicit Framebuffer(TextureRuntime& runtime, const VideoCore::FramebufferParams& params, + Surface* color, Surface* depth_stencil); + ~Framebuffer(); + + Framebuffer(const Framebuffer&) = delete; + Framebuffer& operator=(const Framebuffer&) = delete; + + Framebuffer(Framebuffer&& o) noexcept = default; + Framebuffer& operator=(Framebuffer&& o) noexcept = default; + + VideoCore::PixelFormat Format(VideoCore::SurfaceType type) const noexcept { + return formats[Index(type)]; + } + + [[nodiscard]] vk::ImageView ImageView(VideoCore::SurfaceType type) const noexcept { + return image_views[Index(type)]; + } + + [[nodiscard]] vk::Framebuffer Handle() const noexcept { + return framebuffer.get(); + } + + [[nodiscard]] std::array Images() const noexcept { + return images; + } + + [[nodiscard]] std::array Aspects() const noexcept { + return aspects; + } + + [[nodiscard]] vk::RenderPass RenderPass() const noexcept { + return render_pass; + } + + u32 Scale() const noexcept { + return res_scale; + } + + u32 Width() const noexcept { + return width; + } + + u32 Height() const noexcept { + return height; + } + +private: + std::array images{}; + std::array image_views{}; + vk::UniqueFramebuffer framebuffer; + vk::RenderPass render_pass; + std::array aspects{}; + std::array formats{VideoCore::PixelFormat::Invalid, + VideoCore::PixelFormat::Invalid}; + u32 width{}; + u32 height{}; + u32 res_scale{1}; +}; + +class Sampler { +public: + Sampler(TextureRuntime& runtime, const VideoCore::SamplerParams& params); + ~Sampler(); + + Sampler(const Sampler&) = delete; + Sampler& operator=(const Sampler&) = delete; + + Sampler(Sampler&& o) noexcept = default; + Sampler& operator=(Sampler&& o) noexcept = default; + + [[nodiscard]] vk::Sampler Handle() const noexcept { + return sampler.get(); + } + +private: + vk::UniqueSampler sampler; +}; + +class DebugScope { +public: + template + explicit DebugScope(TextureRuntime& runtime, Common::Vec4f color, + fmt::format_string format, T... args) + : DebugScope{runtime, color, fmt::format(format, std::forward(args)...)} {} + explicit DebugScope(TextureRuntime& runtime, Common::Vec4f color, std::string_view label); + ~DebugScope(); + +private: + Scheduler& scheduler; + bool has_debug_tool; +}; + +struct Traits { + using Runtime = Vulkan::TextureRuntime; + using Surface = Vulkan::Surface; + using Sampler = Vulkan::Sampler; + using Framebuffer = Vulkan::Framebuffer; + using DebugScope = Vulkan::DebugScope; +}; + +using RasterizerCache = VideoCore::RasterizerCache; + +} // namespace Vulkan diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 8b33aab295..c836e7372f 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -14,6 +14,7 @@ #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_software/renderer_software.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/video_core.h" namespace VideoCore { @@ -39,6 +40,9 @@ void Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window case Settings::GraphicsAPI::Software: g_renderer = std::make_unique(system, emu_window); break; + case Settings::GraphicsAPI::Vulkan: + g_renderer = std::make_unique(system, emu_window, secondary_window); + break; case Settings::GraphicsAPI::OpenGL: g_renderer = std::make_unique(system, emu_window, secondary_window); break;