forked from eden-emu/eden
		
	Merge pull request #11789 from Kelebek1/spirv_shift_right
Manually robust on Maxwell and earlier
This commit is contained in:
		
						commit
						4b06bcc82c
					
				
					 6 changed files with 97 additions and 25 deletions
				
			
		|  | @ -111,16 +111,33 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, | ||||||
|     } else if (element_size > 1) { |     } else if (element_size > 1) { | ||||||
|         const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; |         const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; | ||||||
|         const Id shift{ctx.Const(log2_element_size)}; |         const Id shift{ctx.Const(log2_element_size)}; | ||||||
|         buffer_offset = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); |         buffer_offset = ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), shift); | ||||||
|     } else { |     } else { | ||||||
|         buffer_offset = ctx.Def(offset); |         buffer_offset = ctx.Def(offset); | ||||||
|     } |     } | ||||||
|     if (!binding.IsImmediate()) { |     if (!binding.IsImmediate()) { | ||||||
|         return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset); |         return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset); | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|     const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; |     const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; | ||||||
|     const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)}; |     const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)}; | ||||||
|     return ctx.OpLoad(result_type, access_chain); |     const Id val = ctx.OpLoad(result_type, access_chain); | ||||||
|  | 
 | ||||||
|  |     if (offset.IsImmediate() || !ctx.profile.has_broken_robust) { | ||||||
|  |         return val; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const auto is_float = UniformDefinitions::IsFloat(member_ptr); | ||||||
|  |     const auto num_elements = UniformDefinitions::NumElements(member_ptr); | ||||||
|  |     const std::array zero_vec{ | ||||||
|  |         is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||||||
|  |         is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||||||
|  |         is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||||||
|  |         is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||||||
|  |     }; | ||||||
|  |     const Id cond = ctx.OpULessThanEqual(ctx.TypeBool(), buffer_offset, ctx.Const(0xFFFFu)); | ||||||
|  |     const Id zero = ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements)); | ||||||
|  |     return ctx.OpSelect(result_type, cond, val, zero); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||||||
|  | @ -138,7 +155,7 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde | ||||||
|         const u32 element{(offset.U32() / 4) % 4 + index_offset}; |         const u32 element{(offset.U32() / 4) % 4 + index_offset}; | ||||||
|         return ctx.OpCompositeExtract(ctx.U32[1], vector, element); |         return ctx.OpCompositeExtract(ctx.U32[1], vector, element); | ||||||
|     } |     } | ||||||
|     const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; |     const Id shift{ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; | ||||||
|     Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; |     Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; | ||||||
|     if (index_offset > 0) { |     if (index_offset > 0) { | ||||||
|         element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); |         element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); | ||||||
|  |  | ||||||
|  | @ -64,6 +64,42 @@ struct UniformDefinitions { | ||||||
|     Id F32{}; |     Id F32{}; | ||||||
|     Id U32x2{}; |     Id U32x2{}; | ||||||
|     Id U32x4{}; |     Id U32x4{}; | ||||||
|  | 
 | ||||||
|  |     constexpr static size_t NumElements(Id UniformDefinitions::*member_ptr) { | ||||||
|  |         if (member_ptr == &UniformDefinitions::U8) { | ||||||
|  |             return 1; | ||||||
|  |         } | ||||||
|  |         if (member_ptr == &UniformDefinitions::S8) { | ||||||
|  |             return 1; | ||||||
|  |         } | ||||||
|  |         if (member_ptr == &UniformDefinitions::U16) { | ||||||
|  |             return 1; | ||||||
|  |         } | ||||||
|  |         if (member_ptr == &UniformDefinitions::S16) { | ||||||
|  |             return 1; | ||||||
|  |         } | ||||||
|  |         if (member_ptr == &UniformDefinitions::U32) { | ||||||
|  |             return 1; | ||||||
|  |         } | ||||||
|  |         if (member_ptr == &UniformDefinitions::F32) { | ||||||
|  |             return 1; | ||||||
|  |         } | ||||||
|  |         if (member_ptr == &UniformDefinitions::U32x2) { | ||||||
|  |             return 2; | ||||||
|  |         } | ||||||
|  |         if (member_ptr == &UniformDefinitions::U32x4) { | ||||||
|  |             return 4; | ||||||
|  |         } | ||||||
|  |         ASSERT(false); | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     constexpr static bool IsFloat(Id UniformDefinitions::*member_ptr) { | ||||||
|  |         if (member_ptr == &UniformDefinitions::F32) { | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| struct StorageTypeDefinition { | struct StorageTypeDefinition { | ||||||
|  |  | ||||||
|  | @ -9,7 +9,6 @@ namespace Shader { | ||||||
| 
 | 
 | ||||||
| struct Profile { | struct Profile { | ||||||
|     u32 supported_spirv{0x00010000}; |     u32 supported_spirv{0x00010000}; | ||||||
| 
 |  | ||||||
|     bool unified_descriptor_binding{}; |     bool unified_descriptor_binding{}; | ||||||
|     bool support_descriptor_aliasing{}; |     bool support_descriptor_aliasing{}; | ||||||
|     bool support_int8{}; |     bool support_int8{}; | ||||||
|  | @ -82,6 +81,9 @@ struct Profile { | ||||||
|     bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; |     bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; | ||||||
| 
 | 
 | ||||||
|     u32 gl_max_compute_smem_size{}; |     u32 gl_max_compute_smem_size{}; | ||||||
|  | 
 | ||||||
|  |     /// Maxwell and earlier nVidia architectures have broken robust support
 | ||||||
|  |     bool has_broken_robust{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Shader
 | } // namespace Shader
 | ||||||
|  |  | ||||||
|  | @ -356,7 +356,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | ||||||
|         .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, |         .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, | ||||||
|         .ignore_nan_fp_comparisons = false, |         .ignore_nan_fp_comparisons = false, | ||||||
|         .has_broken_spirv_subgroup_mask_vector_extract_dynamic = |         .has_broken_spirv_subgroup_mask_vector_extract_dynamic = | ||||||
|             driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; |             driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, | ||||||
|  |         .has_broken_robust = | ||||||
|  |             device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Maxwell, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|     host_info = Shader::HostTranslateInfo{ |     host_info = Shader::HostTranslateInfo{ | ||||||
|         .support_float64 = device.IsFloat64Supported(), |         .support_float64 = device.IsFloat64Supported(), | ||||||
|         .support_float16 = device.IsFloat16Supported(), |         .support_float16 = device.IsFloat16Supported(), | ||||||
|  |  | ||||||
|  | @ -83,15 +83,6 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{ | ||||||
| 
 | 
 | ||||||
| } // namespace Alternatives
 | } // namespace Alternatives
 | ||||||
| 
 | 
 | ||||||
| enum class NvidiaArchitecture { |  | ||||||
|     KeplerOrOlder, |  | ||||||
|     Maxwell, |  | ||||||
|     Pascal, |  | ||||||
|     Volta, |  | ||||||
|     Turing, |  | ||||||
|     AmpereOrNewer, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| template <typename T> | template <typename T> | ||||||
| void SetNext(void**& next, T& data) { | void SetNext(void**& next, T& data) { | ||||||
|     *next = &data; |     *next = &data; | ||||||
|  | @ -326,9 +317,9 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | ||||||
|         if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { |         if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { | ||||||
|             // Only Ampere and newer support this feature
 |             // Only Ampere and newer support this feature
 | ||||||
|             // TODO: Find a way to differentiate Ampere and Ada
 |             // TODO: Find a way to differentiate Ampere and Ada
 | ||||||
|             return NvidiaArchitecture::AmpereOrNewer; |             return NvidiaArchitecture::Arch_AmpereOrNewer; | ||||||
|         } |         } | ||||||
|         return NvidiaArchitecture::Turing; |         return NvidiaArchitecture::Arch_Turing; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) { |     if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) { | ||||||
|  | @ -340,7 +331,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | ||||||
|         physical_properties.pNext = &advanced_blending_props; |         physical_properties.pNext = &advanced_blending_props; | ||||||
|         physical.GetProperties2(physical_properties); |         physical.GetProperties2(physical_properties); | ||||||
|         if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) { |         if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) { | ||||||
|             return NvidiaArchitecture::Maxwell; |             return NvidiaArchitecture::Arch_Maxwell; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) { |         if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) { | ||||||
|  | @ -350,13 +341,13 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | ||||||
|             physical_properties.pNext = &conservative_raster_props; |             physical_properties.pNext = &conservative_raster_props; | ||||||
|             physical.GetProperties2(physical_properties); |             physical.GetProperties2(physical_properties); | ||||||
|             if (conservative_raster_props.degenerateLinesRasterized) { |             if (conservative_raster_props.degenerateLinesRasterized) { | ||||||
|                 return NvidiaArchitecture::Volta; |                 return NvidiaArchitecture::Arch_Volta; | ||||||
|             } |             } | ||||||
|             return NvidiaArchitecture::Pascal; |             return NvidiaArchitecture::Arch_Pascal; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return NvidiaArchitecture::KeplerOrOlder; |     return NvidiaArchitecture::Arch_KeplerOrOlder; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::vector<const char*> ExtensionListForVulkan( | std::vector<const char*> ExtensionListForVulkan( | ||||||
|  | @ -436,6 +427,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||||
|         throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); |         throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     if (is_nvidia) { | ||||||
|  |         nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     SetupFamilies(surface); |     SetupFamilies(surface); | ||||||
|     const auto queue_cis = GetDeviceQueueCreateInfos(); |     const auto queue_cis = GetDeviceQueueCreateInfos(); | ||||||
| 
 | 
 | ||||||
|  | @ -532,11 +527,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||||
| 
 | 
 | ||||||
|     if (is_nvidia) { |     if (is_nvidia) { | ||||||
|         const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; |         const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; | ||||||
|         const auto arch = GetNvidiaArchitecture(physical, supported_extensions); |         const auto arch = GetNvidiaArch(); | ||||||
|         if (arch >= NvidiaArchitecture::AmpereOrNewer) { |         if (arch >= NvidiaArchitecture::Arch_AmpereOrNewer) { | ||||||
|             LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); |             LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); | ||||||
|             features.shader_float16_int8.shaderFloat16 = false; |             features.shader_float16_int8.shaderFloat16 = false; | ||||||
|         } else if (arch <= NvidiaArchitecture::Volta) { |         } else if (arch <= NvidiaArchitecture::Arch_Volta) { | ||||||
|             if (nv_major_version < 527) { |             if (nv_major_version < 527) { | ||||||
|                 LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); |                 LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); | ||||||
|                 RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); |                 RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); | ||||||
|  | @ -686,8 +681,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||||
|             RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); |             RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); | ||||||
|         } |         } | ||||||
|     } else if (extensions.push_descriptor && is_nvidia) { |     } else if (extensions.push_descriptor && is_nvidia) { | ||||||
|         const auto arch = GetNvidiaArchitecture(physical, supported_extensions); |         const auto arch = GetNvidiaArch(); | ||||||
|         if (arch <= NvidiaArchitecture::Pascal) { |         if (arch <= NvidiaArchitecture::Arch_Pascal) { | ||||||
|             LOG_WARNING(Render_Vulkan, |             LOG_WARNING(Render_Vulkan, | ||||||
|                         "Pascal and older architectures have broken VK_KHR_push_descriptor"); |                         "Pascal and older architectures have broken VK_KHR_push_descriptor"); | ||||||
|             RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); |             RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); | ||||||
|  |  | ||||||
|  | @ -177,6 +177,15 @@ enum class FormatType { Linear, Optimal, Buffer }; | ||||||
| /// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
 | /// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
 | ||||||
| const u32 GuestWarpSize = 32; | const u32 GuestWarpSize = 32; | ||||||
| 
 | 
 | ||||||
|  | enum class NvidiaArchitecture { | ||||||
|  |     Arch_KeplerOrOlder, | ||||||
|  |     Arch_Maxwell, | ||||||
|  |     Arch_Pascal, | ||||||
|  |     Arch_Volta, | ||||||
|  |     Arch_Turing, | ||||||
|  |     Arch_AmpereOrNewer, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| /// Handles data specific to a physical device.
 | /// Handles data specific to a physical device.
 | ||||||
| class Device { | class Device { | ||||||
| public: | public: | ||||||
|  | @ -670,6 +679,14 @@ public: | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     bool IsNvidia() const noexcept { | ||||||
|  |         return properties.driver.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     NvidiaArchitecture GetNvidiaArch() const noexcept { | ||||||
|  |         return nvidia_arch; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     /// Checks if the physical device is suitable and configures the object state
 |     /// Checks if the physical device is suitable and configures the object state
 | ||||||
|     /// with all necessary info about its properties.
 |     /// with all necessary info about its properties.
 | ||||||
|  | @ -788,6 +805,7 @@ private: | ||||||
|     bool supports_conditional_barriers{};      ///< Allows barriers in conditional control flow.
 |     bool supports_conditional_barriers{};      ///< Allows barriers in conditional control flow.
 | ||||||
|     u64 device_access_memory{};                ///< Total size of device local memory in bytes.
 |     u64 device_access_memory{};                ///< Total size of device local memory in bytes.
 | ||||||
|     u32 sets_per_pool{};                       ///< Sets per Description Pool
 |     u32 sets_per_pool{};                       ///< Sets per Description Pool
 | ||||||
|  |     NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer}; | ||||||
| 
 | 
 | ||||||
|     // Telemetry parameters
 |     // Telemetry parameters
 | ||||||
|     std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions.
 |     std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions.
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 liamwhite
						liamwhite