forked from eden-emu/eden
		
	Merge pull request #2520 from ReinUsesLisp/vulkan-refresh
vk_device,vk_shader_decompiler: Miscellaneous changes
This commit is contained in:
		
						commit
						72f09c55a8
					
				
					 5 changed files with 219 additions and 89 deletions
				
			
		
							
								
								
									
										2
									
								
								externals/Vulkan-Headers
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								externals/Vulkan-Headers
									
										
									
									
										vendored
									
									
								
							|  | @ -1 +1 @@ | |||
| Subproject commit 15e5c4db7500b936ae758236f2e72fc1aec22020 | ||||
| Subproject commit d05c8df88da98ec1ab3bc600d7f5783b4060895b | ||||
|  | @ -18,6 +18,7 @@ constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { | |||
|     vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; | ||||
| constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { | ||||
|     vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; | ||||
| constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}}; | ||||
| 
 | ||||
| } // namespace Alternatives
 | ||||
| 
 | ||||
|  | @ -51,15 +52,19 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy | |||
|     : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { | ||||
|     SetupFamilies(dldi, surface); | ||||
|     SetupProperties(dldi); | ||||
|     SetupFeatures(dldi); | ||||
| } | ||||
| 
 | ||||
| VKDevice::~VKDevice() = default; | ||||
| 
 | ||||
| bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { | ||||
|     const auto queue_cis = GetDeviceQueueCreateInfos(); | ||||
|     vk::PhysicalDeviceFeatures device_features{}; | ||||
|     vk::PhysicalDeviceFeatures device_features; | ||||
|     device_features.vertexPipelineStoresAndAtomics = true; | ||||
|     device_features.independentBlend = true; | ||||
|     device_features.textureCompressionASTC_LDR = is_optimal_astc_supported; | ||||
| 
 | ||||
|     const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; | ||||
|     const auto queue_cis = GetDeviceQueueCreateInfos(); | ||||
|     const std::vector<const char*> extensions = LoadExtensions(dldi); | ||||
|     const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), | ||||
|                                          0, nullptr, static_cast<u32>(extensions.size()), | ||||
|                                          extensions.data(), &device_features); | ||||
|  | @ -90,7 +95,7 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, | |||
|         LOG_CRITICAL(Render_Vulkan, | ||||
|                      "Format={} with usage={} and type={} has no defined alternatives and host " | ||||
|                      "hardware does not support it", | ||||
|                      static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), | ||||
|                      vk::to_string(wanted_format), vk::to_string(wanted_usage), | ||||
|                      static_cast<u32>(format_type)); | ||||
|         UNREACHABLE(); | ||||
|         return wanted_format; | ||||
|  | @ -118,6 +123,30 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, | |||
|     return wanted_format; | ||||
| } | ||||
| 
 | ||||
| bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, | ||||
|                                       const vk::DispatchLoaderDynamic& dldi) const { | ||||
|     if (!features.textureCompressionASTC_LDR) { | ||||
|         return false; | ||||
|     } | ||||
|     const auto format_feature_usage{ | ||||
|         vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | | ||||
|         vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | | ||||
|         vk::FormatFeatureFlagBits::eTransferDst}; | ||||
|     constexpr std::array<vk::Format, 9> astc_formats = { | ||||
|         vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, | ||||
|         vk::Format::eAstc8x8SrgbBlock,  vk::Format::eAstc8x6SrgbBlock, | ||||
|         vk::Format::eAstc5x4SrgbBlock,  vk::Format::eAstc5x5UnormBlock, | ||||
|         vk::Format::eAstc5x5SrgbBlock,  vk::Format::eAstc10x8UnormBlock, | ||||
|         vk::Format::eAstc10x8SrgbBlock}; | ||||
|     for (const auto format : astc_formats) { | ||||
|         const auto format_properties{physical.getFormatProperties(format, dldi)}; | ||||
|         if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { | ||||
|             return false; | ||||
|         } | ||||
|     } | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, | ||||
|                                  FormatType format_type) const { | ||||
|     const auto it = format_properties.find(wanted_format); | ||||
|  | @ -132,11 +161,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag | |||
| 
 | ||||
| bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, | ||||
|                           vk::SurfaceKHR surface) { | ||||
|     const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME; | ||||
| 
 | ||||
|     bool has_swapchain{}; | ||||
|     for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | ||||
|         has_swapchain |= prop.extensionName == swapchain_extension; | ||||
|         has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | ||||
|     } | ||||
|     if (!has_swapchain) { | ||||
|         // The device doesn't support creating swapchains.
 | ||||
|  | @ -160,8 +187,14 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||
|     } | ||||
| 
 | ||||
|     // TODO(Rodrigo): Check if the device matches all requeriments.
 | ||||
|     const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); | ||||
|     if (props.limits.maxUniformBufferRange < 65536) { | ||||
|     const auto properties{physical.getProperties(dldi)}; | ||||
|     const auto limits{properties.limits}; | ||||
|     if (limits.maxUniformBufferRange < 65536) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)}; | ||||
|     if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|  | @ -169,6 +202,30 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||
|     return true; | ||||
| } | ||||
| 
 | ||||
| std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { | ||||
|     std::vector<const char*> extensions; | ||||
|     extensions.reserve(2); | ||||
|     extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | ||||
| 
 | ||||
|     const auto Test = [&](const vk::ExtensionProperties& extension, | ||||
|                           std::optional<std::reference_wrapper<bool>> status, const char* name, | ||||
|                           u32 revision) { | ||||
|         if (extension.extensionName != std::string(name)) { | ||||
|             return; | ||||
|         } | ||||
|         extensions.push_back(name); | ||||
|         if (status) { | ||||
|             status->get() = true; | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | ||||
|         Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1); | ||||
|     } | ||||
| 
 | ||||
|     return extensions; | ||||
| } | ||||
| 
 | ||||
| void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { | ||||
|     std::optional<u32> graphics_family_, present_family_; | ||||
| 
 | ||||
|  | @ -196,10 +253,16 @@ void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { | |||
|     const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); | ||||
|     device_type = props.deviceType; | ||||
|     uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); | ||||
|     max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange); | ||||
| } | ||||
| 
 | ||||
| void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { | ||||
|     const auto supported_features{physical.getFeatures(dldi)}; | ||||
|     is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); | ||||
| } | ||||
| 
 | ||||
| std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { | ||||
|     static const float QUEUE_PRIORITY = 1.f; | ||||
|     static const float QUEUE_PRIORITY = 1.0f; | ||||
| 
 | ||||
|     std::set<u32> unique_queue_families = {graphics_family, present_family}; | ||||
|     std::vector<vk::DeviceQueueCreateInfo> queue_cis; | ||||
|  | @ -212,26 +275,43 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con | |||
| 
 | ||||
| std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( | ||||
|     const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { | ||||
|     static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, | ||||
|                                         vk::Format::eB5G6R5UnormPack16, | ||||
|                                         vk::Format::eA2B10G10R10UnormPack32, | ||||
|                                         vk::Format::eR32G32B32A32Sfloat, | ||||
|                                         vk::Format::eR16G16Unorm, | ||||
|                                         vk::Format::eR16G16Snorm, | ||||
|                                         vk::Format::eR8G8B8A8Srgb, | ||||
|                                         vk::Format::eR8Unorm, | ||||
|                                         vk::Format::eB10G11R11UfloatPack32, | ||||
|                                         vk::Format::eR32Sfloat, | ||||
|                                         vk::Format::eR16Sfloat, | ||||
|                                         vk::Format::eR16G16B16A16Sfloat, | ||||
|                                         vk::Format::eD32Sfloat, | ||||
|                                         vk::Format::eD16Unorm, | ||||
|                                         vk::Format::eD16UnormS8Uint, | ||||
|                                         vk::Format::eD24UnormS8Uint, | ||||
|                                         vk::Format::eD32SfloatS8Uint, | ||||
|                                         vk::Format::eBc1RgbaUnormBlock, | ||||
|                                         vk::Format::eBc2UnormBlock, | ||||
|                                         vk::Format::eBc3UnormBlock, | ||||
|                                         vk::Format::eBc4UnormBlock, | ||||
|                                         vk::Format::eBc5UnormBlock, | ||||
|                                         vk::Format::eBc5SnormBlock, | ||||
|                                         vk::Format::eBc7UnormBlock, | ||||
|                                         vk::Format::eAstc4x4UnormBlock, | ||||
|                                         vk::Format::eAstc4x4SrgbBlock, | ||||
|                                         vk::Format::eAstc8x8SrgbBlock, | ||||
|                                         vk::Format::eAstc8x6SrgbBlock, | ||||
|                                         vk::Format::eAstc5x4SrgbBlock, | ||||
|                                         vk::Format::eAstc5x5UnormBlock, | ||||
|                                         vk::Format::eAstc5x5SrgbBlock, | ||||
|                                         vk::Format::eAstc10x8UnormBlock, | ||||
|                                         vk::Format::eAstc10x8SrgbBlock}; | ||||
|     std::map<vk::Format, vk::FormatProperties> format_properties; | ||||
| 
 | ||||
|     const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) { | ||||
|     for (const auto format : formats) { | ||||
|         format_properties.emplace(format, physical.getFormatProperties(format, dldi)); | ||||
|     }; | ||||
|     AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); | ||||
|     AddFormatQuery(vk::Format::eB5G6R5UnormPack16); | ||||
|     AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32); | ||||
|     AddFormatQuery(vk::Format::eR8G8B8A8Srgb); | ||||
|     AddFormatQuery(vk::Format::eR8Unorm); | ||||
|     AddFormatQuery(vk::Format::eD32Sfloat); | ||||
|     AddFormatQuery(vk::Format::eD16Unorm); | ||||
|     AddFormatQuery(vk::Format::eD16UnormS8Uint); | ||||
|     AddFormatQuery(vk::Format::eD24UnormS8Uint); | ||||
|     AddFormatQuery(vk::Format::eD32SfloatS8Uint); | ||||
|     AddFormatQuery(vk::Format::eBc1RgbaUnormBlock); | ||||
|     AddFormatQuery(vk::Format::eBc2UnormBlock); | ||||
|     AddFormatQuery(vk::Format::eBc3UnormBlock); | ||||
|     AddFormatQuery(vk::Format::eBc4UnormBlock); | ||||
| 
 | ||||
|     } | ||||
|     return format_properties; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -11,7 +11,7 @@ | |||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| /// Format usage descriptor
 | ||||
| /// Format usage descriptor.
 | ||||
| enum class FormatType { Linear, Optimal, Buffer }; | ||||
| 
 | ||||
| /// Handles data specific to a physical device.
 | ||||
|  | @ -34,12 +34,12 @@ public: | |||
|     vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, | ||||
|                                   FormatType format_type) const; | ||||
| 
 | ||||
|     /// Returns the dispatch loader with direct function pointers of the device
 | ||||
|     /// Returns the dispatch loader with direct function pointers of the device.
 | ||||
|     const vk::DispatchLoaderDynamic& GetDispatchLoader() const { | ||||
|         return dld; | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the logical device
 | ||||
|     /// Returns the logical device.
 | ||||
|     vk::Device GetLogical() const { | ||||
|         return logical.get(); | ||||
|     } | ||||
|  | @ -69,30 +69,55 @@ public: | |||
|         return present_family; | ||||
|     } | ||||
| 
 | ||||
|     /// Returns if the device is integrated with the host CPU
 | ||||
|     /// Returns if the device is integrated with the host CPU.
 | ||||
|     bool IsIntegrated() const { | ||||
|         return device_type == vk::PhysicalDeviceType::eIntegratedGpu; | ||||
|     } | ||||
| 
 | ||||
|     /// Returns uniform buffer alignment requeriment
 | ||||
|     /// Returns uniform buffer alignment requeriment.
 | ||||
|     u64 GetUniformBufferAlignment() const { | ||||
|         return uniform_buffer_alignment; | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the maximum range for storage buffers.
 | ||||
|     u64 GetMaxStorageBufferRange() const { | ||||
|         return max_storage_buffer_range; | ||||
|     } | ||||
| 
 | ||||
|     /// Returns true if ASTC is natively supported.
 | ||||
|     bool IsOptimalAstcSupported() const { | ||||
|         return is_optimal_astc_supported; | ||||
|     } | ||||
| 
 | ||||
|     /// Returns true if the device supports VK_EXT_scalar_block_layout.
 | ||||
|     bool IsExtScalarBlockLayoutSupported() const { | ||||
|         return ext_scalar_block_layout; | ||||
|     } | ||||
| 
 | ||||
|     /// Checks if the physical device is suitable.
 | ||||
|     static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, | ||||
|                            vk::SurfaceKHR surface); | ||||
| 
 | ||||
| private: | ||||
|     /// Loads extensions into a vector and stores available ones in this object.
 | ||||
|     std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); | ||||
| 
 | ||||
|     /// Sets up queue families.
 | ||||
|     void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); | ||||
| 
 | ||||
|     /// Sets up device properties.
 | ||||
|     void SetupProperties(const vk::DispatchLoaderDynamic& dldi); | ||||
| 
 | ||||
|     /// Sets up device features.
 | ||||
|     void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); | ||||
| 
 | ||||
|     /// Returns a list of queue initialization descriptors.
 | ||||
|     std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; | ||||
| 
 | ||||
|     /// Returns true if ASTC textures are natively supported.
 | ||||
|     bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, | ||||
|                                 const vk::DispatchLoaderDynamic& dldi) const; | ||||
| 
 | ||||
|     /// Returns true if a format is supported.
 | ||||
|     bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, | ||||
|                            FormatType format_type) const; | ||||
|  | @ -101,16 +126,19 @@ private: | |||
|     static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( | ||||
|         const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); | ||||
| 
 | ||||
|     const vk::PhysicalDevice physical;  ///< Physical device
 | ||||
|     vk::DispatchLoaderDynamic dld;      ///< Device function pointers
 | ||||
|     UniqueDevice logical;               ///< Logical device
 | ||||
|     vk::Queue graphics_queue;           ///< Main graphics queue
 | ||||
|     vk::Queue present_queue;            ///< Main present queue
 | ||||
|     u32 graphics_family{};              ///< Main graphics queue family index
 | ||||
|     u32 present_family{};               ///< Main present queue family index
 | ||||
|     vk::PhysicalDeviceType device_type; ///< Physical device type
 | ||||
|     u64 uniform_buffer_alignment{};     ///< Uniform buffer alignment requeriment
 | ||||
|     std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
 | ||||
|     const vk::PhysicalDevice physical;  ///< Physical device.
 | ||||
|     vk::DispatchLoaderDynamic dld;      ///< Device function pointers.
 | ||||
|     UniqueDevice logical;               ///< Logical device.
 | ||||
|     vk::Queue graphics_queue;           ///< Main graphics queue.
 | ||||
|     vk::Queue present_queue;            ///< Main present queue.
 | ||||
|     u32 graphics_family{};              ///< Main graphics queue family index.
 | ||||
|     u32 present_family{};               ///< Main present queue family index.
 | ||||
|     vk::PhysicalDeviceType device_type; ///< Physical device type.
 | ||||
|     u64 uniform_buffer_alignment{};     ///< Uniform buffer alignment requeriment.
 | ||||
|     u64 max_storage_buffer_range{};     ///< Max storage buffer size.
 | ||||
|     bool is_optimal_astc_supported{};   ///< Support for native ASTC.
 | ||||
|     bool ext_scalar_block_layout{};     ///< Support for VK_EXT_scalar_block_layout.
 | ||||
|     std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary.
 | ||||
| }; | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -17,6 +17,7 @@ | |||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/engines/shader_bytecode.h" | ||||
| #include "video_core/engines/shader_header.h" | ||||
| #include "video_core/renderer_vulkan/vk_device.h" | ||||
| #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||||
| #include "video_core/shader/shader_ir.h" | ||||
| 
 | ||||
|  | @ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | |||
| using Operation = const OperationNode&; | ||||
| 
 | ||||
| // TODO(Rodrigo): Use rasterizer's value
 | ||||
| constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; | ||||
| constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000; | ||||
| constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4; | ||||
| constexpr u32 STAGE_BINDING_STRIDE = 0x100; | ||||
| 
 | ||||
| enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||||
|  | @ -87,8 +89,8 @@ bool IsPrecise(Operation operand) { | |||
| 
 | ||||
| class SPIRVDecompiler : public Sirit::Module { | ||||
| public: | ||||
|     explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) | ||||
|         : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { | ||||
|     explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) | ||||
|         : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { | ||||
|         AddCapability(spv::Capability::Shader); | ||||
|         AddExtension("SPV_KHR_storage_buffer_storage_class"); | ||||
|         AddExtension("SPV_KHR_variable_pointers"); | ||||
|  | @ -195,8 +197,10 @@ public: | |||
|             entries.samplers.emplace_back(sampler); | ||||
|         } | ||||
|         for (const auto& attribute : ir.GetInputAttributes()) { | ||||
|             if (IsGenericAttribute(attribute)) { | ||||
|                 entries.attributes.insert(GetGenericAttributeLocation(attribute)); | ||||
|             } | ||||
|         } | ||||
|         entries.clip_distances = ir.GetClipDistances(); | ||||
|         entries.shader_length = ir.GetLength(); | ||||
|         entries.entry_function = execute_function; | ||||
|  | @ -210,7 +214,6 @@ private: | |||
|         std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | ||||
| 
 | ||||
|     static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); | ||||
|     static constexpr u32 CBUF_STRIDE = 16; | ||||
| 
 | ||||
|     void AllocateBindings() { | ||||
|         const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; | ||||
|  | @ -315,6 +318,7 @@ private: | |||
|         constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", | ||||
|                                                                          "overflow"}; | ||||
|         for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { | ||||
|             const auto flag_code = static_cast<InternalFlag>(flag); | ||||
|             const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | ||||
|             internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); | ||||
|         } | ||||
|  | @ -374,7 +378,9 @@ private: | |||
|         u32 binding = const_buffers_base_binding; | ||||
|         for (const auto& entry : ir.GetConstantBuffers()) { | ||||
|             const auto [index, size] = entry; | ||||
|             const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); | ||||
|             const Id type = | ||||
|                 device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; | ||||
|             const Id id = OpVariable(type, spv::StorageClass::Uniform); | ||||
|             AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); | ||||
| 
 | ||||
|             Decorate(id, spv::Decoration::Binding, binding++); | ||||
|  | @ -569,33 +575,35 @@ private: | |||
|             const Node offset = cbuf->GetOffset(); | ||||
|             const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); | ||||
| 
 | ||||
|             Id pointer{}; | ||||
|             if (device.IsExtScalarBlockLayoutSupported()) { | ||||
|                 const Id buffer_offset = Emit(OpShiftRightLogical( | ||||
|                     t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); | ||||
|                 pointer = Emit( | ||||
|                     OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset)); | ||||
|             } else { | ||||
|                 Id buffer_index{}; | ||||
|                 Id buffer_element{}; | ||||
| 
 | ||||
|                 if (const auto immediate = std::get_if<ImmediateNode>(offset)) { | ||||
|                     // Direct access
 | ||||
|                     const u32 offset_imm = immediate->GetValue(); | ||||
|                     ASSERT(offset_imm % 4 == 0); | ||||
|                     buffer_index = Constant(t_uint, offset_imm / 16); | ||||
|                     buffer_element = Constant(t_uint, (offset_imm / 4) % 4); | ||||
| 
 | ||||
|                 } else if (std::holds_alternative<OperationNode>(*offset)) { | ||||
|                     // Indirect access
 | ||||
|                 // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
 | ||||
|                 // emits sub-optimal code on GLSL from my testing).
 | ||||
|                     const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); | ||||
|                     const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); | ||||
|                 const Id final_offset = Emit( | ||||
|                     OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); | ||||
|                     const Id final_offset = Emit(OpUMod( | ||||
|                         t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); | ||||
|                     buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); | ||||
|                     buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); | ||||
| 
 | ||||
|                 } else { | ||||
|                     UNREACHABLE_MSG("Unmanaged offset node type"); | ||||
|                 } | ||||
| 
 | ||||
|             const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), | ||||
|                 pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), | ||||
|                                              buffer_index, buffer_element)); | ||||
|             } | ||||
|             return Emit(OpLoad(t_float, pointer)); | ||||
| 
 | ||||
|         } else if (const auto gmem = std::get_if<GmemNode>(node)) { | ||||
|  | @ -612,7 +620,9 @@ private: | |||
|             // It's invalid to call conditional on nested nodes, use an operation instead
 | ||||
|             const Id true_label = OpLabel(); | ||||
|             const Id skip_label = OpLabel(); | ||||
|             Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); | ||||
|             const Id condition = Visit(conditional->GetCondition()); | ||||
|             Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone)); | ||||
|             Emit(OpBranchConditional(condition, true_label, skip_label)); | ||||
|             Emit(true_label); | ||||
| 
 | ||||
|             VisitBasicBlock(conditional->GetCode()); | ||||
|  | @ -968,11 +978,11 @@ private: | |||
|         case ShaderStage::Vertex: { | ||||
|             // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
 | ||||
|             // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
 | ||||
|             const Id position = AccessElement(t_float4, per_vertex, position_index); | ||||
|             Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); | ||||
|             const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); | ||||
|             Id depth = Emit(OpLoad(t_float, z_pointer)); | ||||
|             depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); | ||||
|             depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); | ||||
|             Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); | ||||
|             Emit(OpStore(z_pointer, depth)); | ||||
|             break; | ||||
|         } | ||||
|         case ShaderStage::Fragment: { | ||||
|  | @ -1311,6 +1321,7 @@ private: | |||
|         &SPIRVDecompiler::WorkGroupId<2>, | ||||
|     }; | ||||
| 
 | ||||
|     const VKDevice& device; | ||||
|     const ShaderIR& ir; | ||||
|     const ShaderStage stage; | ||||
|     const Tegra::Shader::Header header; | ||||
|  | @ -1349,12 +1360,18 @@ private: | |||
|     const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); | ||||
| 
 | ||||
|     const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); | ||||
|     const Id t_cbuf_array = | ||||
|         Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), | ||||
|                  spv::Decoration::ArrayStride, CBUF_STRIDE); | ||||
|     const Id t_cbuf_struct = MemberDecorate( | ||||
|         Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||||
|     const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); | ||||
|     const Id t_cbuf_std140 = Decorate( | ||||
|         Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"), | ||||
|         spv::Decoration::ArrayStride, 16u); | ||||
|     const Id t_cbuf_scalar = Decorate( | ||||
|         Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"), | ||||
|         spv::Decoration::ArrayStride, 4u); | ||||
|     const Id t_cbuf_std140_struct = MemberDecorate( | ||||
|         Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||||
|     const Id t_cbuf_scalar_struct = MemberDecorate( | ||||
|         Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||||
|     const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); | ||||
|     const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); | ||||
| 
 | ||||
|     const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); | ||||
|     const Id t_gmem_array = | ||||
|  | @ -1403,8 +1420,9 @@ private: | |||
|     std::map<u32, Id> labels; | ||||
| }; | ||||
| 
 | ||||
| DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { | ||||
|     auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage); | ||||
| DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | ||||
|                            Maxwell::ShaderStage stage) { | ||||
|     auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage); | ||||
|     decompiler->Decompile(); | ||||
|     return {std::move(decompiler), decompiler->GetShaderEntries()}; | ||||
| } | ||||
|  |  | |||
|  | @ -20,10 +20,13 @@ namespace VideoCommon::Shader { | |||
| class ShaderIR; | ||||
| } | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| class VKDevice; | ||||
| } | ||||
| 
 | ||||
| namespace Vulkan::VKShader { | ||||
| 
 | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
| 
 | ||||
| using SamplerEntry = VideoCommon::Shader::Sampler; | ||||
| 
 | ||||
| constexpr u32 DESCRIPTOR_SET = 0; | ||||
|  | @ -75,6 +78,7 @@ struct ShaderEntries { | |||
| 
 | ||||
| using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; | ||||
| 
 | ||||
| DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); | ||||
| DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | ||||
|                            Maxwell::ShaderStage stage); | ||||
| 
 | ||||
| } // namespace Vulkan::VKShader
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei