forked from eden-emu/eden
		
	Avoid using VectorExtractDynamic for subgroup mask on Adreno GPUs
This crashes their shader compiler for some reason.
This commit is contained in:
		
							parent
							
								
									2beb3051c1
								
							
						
					
					
						commit
						cfbe4b09eb
					
				
					 3 changed files with 19 additions and 1 deletions
				
			
		|  | @ -17,8 +17,23 @@ Id GetThreadId(EmitContext& ctx) { | ||||||
| Id WarpExtract(EmitContext& ctx, Id value) { | Id WarpExtract(EmitContext& ctx, Id value) { | ||||||
|     const Id thread_id{GetThreadId(ctx)}; |     const Id thread_id{GetThreadId(ctx)}; | ||||||
|     const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; |     const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; | ||||||
|  |     if (ctx.profile.has_broken_spirv_subgroup_mask_vector_extract_dynamic) { | ||||||
|  |         const Id c0_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(0U)), | ||||||
|  |                                      ctx.OpCompositeExtract(ctx.U32[1], value, 0U), ctx.Const(0U))}; | ||||||
|  |         const Id c1_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(1U)), | ||||||
|  |                                      ctx.OpCompositeExtract(ctx.U32[1], value, 1U), ctx.Const(0U))}; | ||||||
|  |         const Id c2_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(2U)), | ||||||
|  |                                      ctx.OpCompositeExtract(ctx.U32[1], value, 2U), ctx.Const(0U))}; | ||||||
|  |         const Id c3_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(3U)), | ||||||
|  |                                      ctx.OpCompositeExtract(ctx.U32[1], value, 3U), ctx.Const(0U))}; | ||||||
|  |         const Id c0_or_c1{ctx.OpBitwiseOr(ctx.U32[1], c0_sel, c1_sel)}; | ||||||
|  |         const Id c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c2_sel, c3_sel)}; | ||||||
|  |         const Id c0_or_c1_or_c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c0_or_c1, c2_or_c3)}; | ||||||
|  |         return c0_or_c1_or_c2_or_c3; | ||||||
|  |     } else { | ||||||
|         return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); |         return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); | ||||||
|     } |     } | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| Id LoadMask(EmitContext& ctx, Id mask) { | Id LoadMask(EmitContext& ctx, Id mask) { | ||||||
|     const Id value{ctx.OpLoad(ctx.U32[4], mask)}; |     const Id value{ctx.OpLoad(ctx.U32[4], mask)}; | ||||||
|  |  | ||||||
|  | @ -78,6 +78,8 @@ struct Profile { | ||||||
|     bool has_gl_bool_ref_bug{}; |     bool has_gl_bool_ref_bug{}; | ||||||
|     /// Ignores SPIR-V ordered vs unordered using GLSL semantics
 |     /// Ignores SPIR-V ordered vs unordered using GLSL semantics
 | ||||||
|     bool ignore_nan_fp_comparisons{}; |     bool ignore_nan_fp_comparisons{}; | ||||||
|  |     /// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs
 | ||||||
|  |     bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; | ||||||
| 
 | 
 | ||||||
|     u32 gl_max_compute_smem_size{}; |     u32 gl_max_compute_smem_size{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -351,6 +351,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | ||||||
|         .has_broken_signed_operations = false, |         .has_broken_signed_operations = false, | ||||||
|         .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, |         .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, | ||||||
|         .ignore_nan_fp_comparisons = false, |         .ignore_nan_fp_comparisons = false, | ||||||
|  |         .has_broken_spirv_subgroup_mask_vector_extract_dynamic = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY | ||||||
|     }; |     }; | ||||||
|     host_info = Shader::HostTranslateInfo{ |     host_info = Shader::HostTranslateInfo{ | ||||||
|         .support_float16 = device.IsFloat16Supported(), |         .support_float16 = device.IsFloat16Supported(), | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Billy Laws
						Billy Laws