forked from eden-emu/eden
		
	shader: Initial support for textures and TEX
This commit is contained in:
		
							parent
							
								
									449374cb13
								
							
						
					
					
						commit
						a956d3cdb3
					
				
					 33 changed files with 1489 additions and 342 deletions
				
			
		|  | @ -9,6 +9,7 @@ add_library(shader_recompiler STATIC | ||||||
|     backend/spirv/emit_spirv_control_flow.cpp |     backend/spirv/emit_spirv_control_flow.cpp | ||||||
|     backend/spirv/emit_spirv_convert.cpp |     backend/spirv/emit_spirv_convert.cpp | ||||||
|     backend/spirv/emit_spirv_floating_point.cpp |     backend/spirv/emit_spirv_floating_point.cpp | ||||||
|  |     backend/spirv/emit_spirv_image.cpp | ||||||
|     backend/spirv/emit_spirv_integer.cpp |     backend/spirv/emit_spirv_integer.cpp | ||||||
|     backend/spirv/emit_spirv_logical.cpp |     backend/spirv/emit_spirv_logical.cpp | ||||||
|     backend/spirv/emit_spirv_memory.cpp |     backend/spirv/emit_spirv_memory.cpp | ||||||
|  | @ -100,6 +101,7 @@ add_library(shader_recompiler STATIC | ||||||
|     frontend/maxwell/translate/impl/predicate_set_predicate.cpp |     frontend/maxwell/translate/impl/predicate_set_predicate.cpp | ||||||
|     frontend/maxwell/translate/impl/predicate_set_register.cpp |     frontend/maxwell/translate/impl/predicate_set_register.cpp | ||||||
|     frontend/maxwell/translate/impl/select_source_with_predicate.cpp |     frontend/maxwell/translate/impl/select_source_with_predicate.cpp | ||||||
|  |     frontend/maxwell/translate/impl/texture_sample.cpp | ||||||
|     frontend/maxwell/translate/translate.cpp |     frontend/maxwell/translate/translate.cpp | ||||||
|     frontend/maxwell/translate/translate.h |     frontend/maxwell/translate/translate.h | ||||||
|     ir_opt/collect_shader_info_pass.cpp |     ir_opt/collect_shader_info_pass.cpp | ||||||
|  | @ -110,6 +112,7 @@ add_library(shader_recompiler STATIC | ||||||
|     ir_opt/lower_fp16_to_fp32.cpp |     ir_opt/lower_fp16_to_fp32.cpp | ||||||
|     ir_opt/passes.h |     ir_opt/passes.h | ||||||
|     ir_opt/ssa_rewrite_pass.cpp |     ir_opt/ssa_rewrite_pass.cpp | ||||||
|  |     ir_opt/texture_pass.cpp | ||||||
|     ir_opt/verification_pass.cpp |     ir_opt/verification_pass.cpp | ||||||
|     object_pool.h |     object_pool.h | ||||||
|     profile.h |     profile.h | ||||||
|  |  | ||||||
|  | @ -12,6 +12,43 @@ | ||||||
| #include "shader_recompiler/backend/spirv/emit_context.h" | #include "shader_recompiler/backend/spirv/emit_context.h" | ||||||
| 
 | 
 | ||||||
| namespace Shader::Backend::SPIRV { | namespace Shader::Backend::SPIRV { | ||||||
|  | namespace { | ||||||
|  | Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { | ||||||
|  |     const spv::ImageFormat format{spv::ImageFormat::Unknown}; | ||||||
|  |     const Id type{ctx.F32[1]}; | ||||||
|  |     switch (desc.type) { | ||||||
|  |     case TextureType::Color1D: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 1, format); | ||||||
|  |     case TextureType::ColorArray1D: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 1, format); | ||||||
|  |     case TextureType::Color2D: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 1, format); | ||||||
|  |     case TextureType::ColorArray2D: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 1, format); | ||||||
|  |     case TextureType::Color3D: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 1, format); | ||||||
|  |     case TextureType::ColorCube: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Cube, false, false, false, 1, format); | ||||||
|  |     case TextureType::ColorArrayCube: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); | ||||||
|  |     case TextureType::Shadow1D: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Dim1D, true, false, false, 1, format); | ||||||
|  |     case TextureType::ShadowArray1D: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Dim1D, true, true, false, 1, format); | ||||||
|  |     case TextureType::Shadow2D: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Dim2D, true, false, false, 1, format); | ||||||
|  |     case TextureType::ShadowArray2D: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Dim2D, true, true, false, 1, format); | ||||||
|  |     case TextureType::Shadow3D: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Dim3D, true, false, false, 1, format); | ||||||
|  |     case TextureType::ShadowCube: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); | ||||||
|  |     case TextureType::ShadowArrayCube: | ||||||
|  |         return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); | ||||||
|  |     } | ||||||
|  |     throw InvalidArgument("Invalid texture type {}", desc.type); | ||||||
|  | } | ||||||
|  | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { | void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { | ||||||
|     defs[0] = sirit_ctx.Name(base_type, name); |     defs[0] = sirit_ctx.Name(base_type, name); | ||||||
|  | @ -35,6 +72,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program) | ||||||
|     u32 binding{}; |     u32 binding{}; | ||||||
|     DefineConstantBuffers(program.info, binding); |     DefineConstantBuffers(program.info, binding); | ||||||
|     DefineStorageBuffers(program.info, binding); |     DefineStorageBuffers(program.info, binding); | ||||||
|  |     DefineTextures(program.info, binding); | ||||||
| 
 | 
 | ||||||
|     DefineLabels(program); |     DefineLabels(program); | ||||||
| } | } | ||||||
|  | @ -46,6 +84,10 @@ Id EmitContext::Def(const IR::Value& value) { | ||||||
|         return value.Inst()->Definition<Id>(); |         return value.Inst()->Definition<Id>(); | ||||||
|     } |     } | ||||||
|     switch (value.Type()) { |     switch (value.Type()) { | ||||||
|  |     case IR::Type::Void: | ||||||
|  |         // Void instructions are used for optional arguments (e.g. texture offsets)
 | ||||||
|  |         // They are not meant to be used in the SPIR-V module
 | ||||||
|  |         return Id{}; | ||||||
|     case IR::Type::U1: |     case IR::Type::U1: | ||||||
|         return value.U1() ? true_value : false_value; |         return value.U1() ? true_value : false_value; | ||||||
|     case IR::Type::U32: |     case IR::Type::U32: | ||||||
|  | @ -122,7 +164,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { | ||||||
|     uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); |     uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); | ||||||
| 
 | 
 | ||||||
|     u32 index{}; |     u32 index{}; | ||||||
|     for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { |     for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { | ||||||
|         const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; |         const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; | ||||||
|         Decorate(id, spv::Decoration::Binding, binding); |         Decorate(id, spv::Decoration::Binding, binding); | ||||||
|         Decorate(id, spv::Decoration::DescriptorSet, 0U); |         Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||||||
|  | @ -152,7 +194,7 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | ||||||
|     storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); |     storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); | ||||||
| 
 | 
 | ||||||
|     u32 index{}; |     u32 index{}; | ||||||
|     for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { |     for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { | ||||||
|         const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; |         const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; | ||||||
|         Decorate(id, spv::Decoration::Binding, binding); |         Decorate(id, spv::Decoration::Binding, binding); | ||||||
|         Decorate(id, spv::Decoration::DescriptorSet, 0U); |         Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||||||
|  | @ -163,6 +205,29 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void EmitContext::DefineTextures(const Info& info, u32& binding) { | ||||||
|  |     textures.reserve(info.texture_descriptors.size()); | ||||||
|  |     for (const TextureDescriptor& desc : info.texture_descriptors) { | ||||||
|  |         if (desc.count != 1) { | ||||||
|  |             throw NotImplementedException("Array of textures"); | ||||||
|  |         } | ||||||
|  |         const Id type{TypeSampledImage(ImageType(*this, desc))}; | ||||||
|  |         const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, type)}; | ||||||
|  |         const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; | ||||||
|  |         Decorate(id, spv::Decoration::Binding, binding); | ||||||
|  |         Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||||||
|  |         Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); | ||||||
|  |         for (u32 index = 0; index < desc.count; ++index) { | ||||||
|  |             // TODO: Pass count info
 | ||||||
|  |             textures.push_back(TextureDefinition{ | ||||||
|  |                 .id{id}, | ||||||
|  |                 .type{type}, | ||||||
|  |             }); | ||||||
|  |         } | ||||||
|  |         binding += desc.count; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void EmitContext::DefineLabels(IR::Program& program) { | void EmitContext::DefineLabels(IR::Program& program) { | ||||||
|     for (const IR::Function& function : program.functions) { |     for (const IR::Function& function : program.functions) { | ||||||
|         for (IR::Block* const block : function.blocks) { |         for (IR::Block* const block : function.blocks) { | ||||||
|  |  | ||||||
|  | @ -29,6 +29,11 @@ private: | ||||||
|     std::array<Id, 4> defs{}; |     std::array<Id, 4> defs{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct TextureDefinition { | ||||||
|  |     Id id; | ||||||
|  |     Id type; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| class EmitContext final : public Sirit::Module { | class EmitContext final : public Sirit::Module { | ||||||
| public: | public: | ||||||
|     explicit EmitContext(const Profile& profile, IR::Program& program); |     explicit EmitContext(const Profile& profile, IR::Program& program); | ||||||
|  | @ -56,6 +61,7 @@ public: | ||||||
| 
 | 
 | ||||||
|     std::array<Id, Info::MAX_CBUFS> cbufs{}; |     std::array<Id, Info::MAX_CBUFS> cbufs{}; | ||||||
|     std::array<Id, Info::MAX_SSBOS> ssbos{}; |     std::array<Id, Info::MAX_SSBOS> ssbos{}; | ||||||
|  |     std::vector<TextureDefinition> textures; | ||||||
| 
 | 
 | ||||||
|     Id workgroup_id{}; |     Id workgroup_id{}; | ||||||
|     Id local_invocation_id{}; |     Id local_invocation_id{}; | ||||||
|  | @ -66,6 +72,7 @@ private: | ||||||
|     void DefineSpecialVariables(const Info& info); |     void DefineSpecialVariables(const Info& info); | ||||||
|     void DefineConstantBuffers(const Info& info, u32& binding); |     void DefineConstantBuffers(const Info& info, u32& binding); | ||||||
|     void DefineStorageBuffers(const Info& info, u32& binding); |     void DefineStorageBuffers(const Info& info, u32& binding); | ||||||
|  |     void DefineTextures(const Info& info, u32& binding); | ||||||
|     void DefineLabels(IR::Program& program); |     void DefineLabels(IR::Program& program); | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -221,6 +221,14 @@ std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program | ||||||
|                          workgroup_size[2]); |                          workgroup_size[2]); | ||||||
| 
 | 
 | ||||||
|     SetupDenormControl(profile, program, ctx, func); |     SetupDenormControl(profile, program, ctx, func); | ||||||
|  |     if (info.uses_sampled_1d) { | ||||||
|  |         ctx.AddCapability(spv::Capability::Sampled1D); | ||||||
|  |     } | ||||||
|  |     if (info.uses_sparse_residency) { | ||||||
|  |         ctx.AddCapability(spv::Capability::SparseResidency); | ||||||
|  |     } | ||||||
|  |     // TODO: Track this usage
 | ||||||
|  |     ctx.AddCapability(spv::Capability::ImageGatherExtended); | ||||||
| 
 | 
 | ||||||
|     return ctx.Assemble(); |     return ctx.Assemble(); | ||||||
| } | } | ||||||
|  | @ -259,4 +267,8 @@ void EmitGetOverflowFromOp(EmitContext&) { | ||||||
|     throw LogicError("Unreachable instruction"); |     throw LogicError("Unreachable instruction"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void EmitGetSparseFromOp(EmitContext&) { | ||||||
|  |     throw LogicError("Unreachable instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Shader::Backend::SPIRV
 | } // namespace Shader::Backend::SPIRV
 | ||||||
|  |  | ||||||
|  | @ -83,7 +83,8 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Va | ||||||
|                         Id value); |                         Id value); | ||||||
| void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||||
|                         Id value); |                         Id value); | ||||||
| void EmitWriteStorage128(EmitContext& ctx); | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||||
|  |                          Id value); | ||||||
| Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); | ||||||
| Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||||||
| Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||||||
|  | @ -145,6 +146,7 @@ void EmitGetZeroFromOp(EmitContext& ctx); | ||||||
| void EmitGetSignFromOp(EmitContext& ctx); | void EmitGetSignFromOp(EmitContext& ctx); | ||||||
| void EmitGetCarryFromOp(EmitContext& ctx); | void EmitGetCarryFromOp(EmitContext& ctx); | ||||||
| void EmitGetOverflowFromOp(EmitContext& ctx); | void EmitGetOverflowFromOp(EmitContext& ctx); | ||||||
|  | void EmitGetSparseFromOp(EmitContext& ctx); | ||||||
| Id EmitFPAbs16(EmitContext& ctx, Id value); | Id EmitFPAbs16(EmitContext& ctx, Id value); | ||||||
| Id EmitFPAbs32(EmitContext& ctx, Id value); | Id EmitFPAbs32(EmitContext& ctx, Id value); | ||||||
| Id EmitFPAbs64(EmitContext& ctx, Id value); | Id EmitFPAbs64(EmitContext& ctx, Id value); | ||||||
|  | @ -291,5 +293,33 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value); | ||||||
| Id EmitConvertF32F16(EmitContext& ctx, Id value); | Id EmitConvertF32F16(EmitContext& ctx, Id value); | ||||||
| Id EmitConvertF32F64(EmitContext& ctx, Id value); | Id EmitConvertF32F64(EmitContext& ctx, Id value); | ||||||
| Id EmitConvertF64F32(EmitContext& ctx, Id value); | Id EmitConvertF64F32(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF16S32(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF16S64(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF16U32(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF16U64(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF32S32(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF32S64(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF32U32(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF32U64(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF64S32(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF64S64(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF64U32(EmitContext& ctx, Id value); | ||||||
|  | Id EmitConvertF64U64(EmitContext& ctx, Id value); | ||||||
|  | Id EmitBindlessImageSampleImplicitLod(EmitContext&); | ||||||
|  | Id EmitBindlessImageSampleExplicitLod(EmitContext&); | ||||||
|  | Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); | ||||||
|  | Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); | ||||||
|  | Id EmitBoundImageSampleImplicitLod(EmitContext&); | ||||||
|  | Id EmitBoundImageSampleExplicitLod(EmitContext&); | ||||||
|  | Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); | ||||||
|  | Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); | ||||||
|  | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||||||
|  |                               Id bias_lc, Id offset); | ||||||
|  | Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||||||
|  |                               Id lod_lc, Id offset); | ||||||
|  | Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||||||
|  |                                   Id coords, Id dref, Id bias_lc, Id offset); | ||||||
|  | Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||||||
|  |                                   Id coords, Id dref, Id lod_lc, Id offset); | ||||||
| 
 | 
 | ||||||
| } // namespace Shader::Backend::SPIRV
 | } // namespace Shader::Backend::SPIRV
 | ||||||
|  |  | ||||||
|  | @ -102,4 +102,52 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) { | ||||||
|     return ctx.OpFConvert(ctx.F64[1], value); |     return ctx.OpFConvert(ctx.F64[1], value); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | Id EmitConvertF16S32(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertSToF(ctx.F16[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF16S64(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertSToF(ctx.F16[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF16U32(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertUToF(ctx.F16[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF16U64(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertUToF(ctx.F16[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF32S32(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertSToF(ctx.F32[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF32S64(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertSToF(ctx.F32[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF32U32(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertUToF(ctx.F32[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF32U64(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertUToF(ctx.F32[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF64S32(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertSToF(ctx.F64[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF64S64(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertSToF(ctx.F64[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF64U32(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertUToF(ctx.F64[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitConvertF64U64(EmitContext& ctx, Id value) { | ||||||
|  |     return ctx.OpConvertUToF(ctx.F64[1], value); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Shader::Backend::SPIRV
 | } // namespace Shader::Backend::SPIRV
 | ||||||
|  |  | ||||||
							
								
								
									
										146
									
								
								src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										146
									
								
								src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,146 @@ | ||||||
|  | // Copyright 2021 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include <boost/container/static_vector.hpp> | ||||||
|  | 
 | ||||||
|  | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||||
|  | #include "shader_recompiler/frontend/ir/modifiers.h" | ||||||
|  | 
 | ||||||
|  | namespace Shader::Backend::SPIRV { | ||||||
|  | namespace { | ||||||
|  | class ImageOperands { | ||||||
|  | public: | ||||||
|  |     explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, | ||||||
|  |                            Id lod, Id offset) { | ||||||
|  |         if (has_bias) { | ||||||
|  |             const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; | ||||||
|  |             Add(spv::ImageOperandsMask::Bias, bias); | ||||||
|  |         } | ||||||
|  |         if (has_lod) { | ||||||
|  |             const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; | ||||||
|  |             Add(spv::ImageOperandsMask::Lod, lod_value); | ||||||
|  |         } | ||||||
|  |         if (Sirit::ValidId(offset)) { | ||||||
|  |             Add(spv::ImageOperandsMask::Offset, offset); | ||||||
|  |         } | ||||||
|  |         if (has_lod_clamp) { | ||||||
|  |             const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; | ||||||
|  |             Add(spv::ImageOperandsMask::MinLod, lod_clamp); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Add(spv::ImageOperandsMask new_mask, Id value) { | ||||||
|  |         mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | | ||||||
|  |                                                    static_cast<unsigned>(new_mask)); | ||||||
|  |         operands.push_back(value); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::span<const Id> Span() const noexcept { | ||||||
|  |         return std::span{operands.data(), operands.size()}; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     spv::ImageOperandsMask Mask() const noexcept { | ||||||
|  |         return mask; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     boost::container::static_vector<Id, 3> operands; | ||||||
|  |     spv::ImageOperandsMask mask{}; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | Id Texture(EmitContext& ctx, const IR::Value& index) { | ||||||
|  |     if (index.IsImmediate()) { | ||||||
|  |         const TextureDefinition def{ctx.textures.at(index.U32())}; | ||||||
|  |         return ctx.OpLoad(def.type, def.id); | ||||||
|  |     } | ||||||
|  |     throw NotImplementedException("Indirect texture sample"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template <typename MethodPtrType, typename... Args> | ||||||
|  | Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, | ||||||
|  |         Id result_type, Args&&... args) { | ||||||
|  |     IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | ||||||
|  |     if (!sparse) { | ||||||
|  |         return (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...); | ||||||
|  |     } | ||||||
|  |     const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; | ||||||
|  |     const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)}; | ||||||
|  |     const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; | ||||||
|  |     sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); | ||||||
|  |     sparse->Invalidate(); | ||||||
|  |     return ctx.OpCompositeExtract(result_type, sample, 1U); | ||||||
|  | } | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
|  | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { | ||||||
|  |     throw LogicError("Unreachable instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitBindlessImageSampleExplicitLod(EmitContext&) { | ||||||
|  |     throw LogicError("Unreachable instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { | ||||||
|  |     throw LogicError("Unreachable instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { | ||||||
|  |     throw LogicError("Unreachable instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitBoundImageSampleImplicitLod(EmitContext&) { | ||||||
|  |     throw LogicError("Unreachable instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitBoundImageSampleExplicitLod(EmitContext&) { | ||||||
|  |     throw LogicError("Unreachable instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) { | ||||||
|  |     throw LogicError("Unreachable instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { | ||||||
|  |     throw LogicError("Unreachable instruction"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||||||
|  |                               Id bias_lc, Id offset) { | ||||||
|  |     const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||||||
|  |     const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, | ||||||
|  |                                  offset); | ||||||
|  |     return Emit(&EmitContext::OpImageSparseSampleImplicitLod, | ||||||
|  |                 &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), | ||||||
|  |                 coords, operands.Mask(), operands.Span()); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||||||
|  |                               Id lod_lc, Id offset) { | ||||||
|  |     const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||||||
|  |     const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); | ||||||
|  |     return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | ||||||
|  |                 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), | ||||||
|  |                 coords, operands.Mask(), operands.Span()); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||||||
|  |                                   Id coords, Id dref, Id bias_lc, Id offset) { | ||||||
|  |     const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||||||
|  |     const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, | ||||||
|  |                                  offset); | ||||||
|  |     return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, | ||||||
|  |                 &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], | ||||||
|  |                 Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||||||
|  |                                   Id coords, Id dref, Id lod_lc, Id offset) { | ||||||
|  |     const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||||||
|  |     const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); | ||||||
|  |     return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, | ||||||
|  |                 &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], | ||||||
|  |                 Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Shader::Backend::SPIRV
 | ||||||
|  | @ -154,8 +154,22 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va | ||||||
|     ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); |     ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void EmitWriteStorage128(EmitContext&) { | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||||
|     throw NotImplementedException("SPIR-V Instruction"); |                          Id value) { | ||||||
|  |     if (!binding.IsImmediate()) { | ||||||
|  |         throw NotImplementedException("Dynamic storage buffer indexing"); | ||||||
|  |     } | ||||||
|  |     // TODO: Support reinterpreting bindings, guaranteed to be aligned
 | ||||||
|  |     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||||
|  |     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||||
|  |     for (u32 element = 0; element < 4; ++element) { | ||||||
|  |         Id index = base_index; | ||||||
|  |         if (element > 0) { | ||||||
|  |             index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); | ||||||
|  |         } | ||||||
|  |         const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; | ||||||
|  |         ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element)); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace Shader::Backend::SPIRV
 | } // namespace Shader::Backend::SPIRV
 | ||||||
|  |  | ||||||
|  | @ -12,6 +12,8 @@ public: | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; |     [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; | ||||||
| 
 | 
 | ||||||
|  |     [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; | ||||||
|  | 
 | ||||||
|     [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; |     [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -39,6 +39,10 @@ u64 FileEnvironment::ReadInstruction(u32 offset) { | ||||||
|     return data[offset / 8]; |     return data[offset / 8]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | u32 FileEnvironment::TextureBoundBuffer() { | ||||||
|  |     throw NotImplementedException("Texture bound buffer serialization"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| std::array<u32, 3> FileEnvironment::WorkgroupSize() { | std::array<u32, 3> FileEnvironment::WorkgroupSize() { | ||||||
|     return {1, 1, 1}; |     return {1, 1, 1}; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -3,7 +3,7 @@ | ||||||
| #include <vector> | #include <vector> | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "environment.h" | #include "shader_recompiler/environment.h" | ||||||
| 
 | 
 | ||||||
| namespace Shader { | namespace Shader { | ||||||
| 
 | 
 | ||||||
|  | @ -14,6 +14,8 @@ public: | ||||||
| 
 | 
 | ||||||
|     u64 ReadInstruction(u32 offset) override; |     u64 ReadInstruction(u32 offset) override; | ||||||
| 
 | 
 | ||||||
|  |     u32 TextureBoundBuffer() override; | ||||||
|  | 
 | ||||||
|     std::array<u32, 3> WorkgroupSize() override; |     std::array<u32, 3> WorkgroupSize() override; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|  |  | ||||||
|  | @ -7,11 +7,24 @@ | ||||||
| #include "shader_recompiler/frontend/ir/value.h" | #include "shader_recompiler/frontend/ir/value.h" | ||||||
| 
 | 
 | ||||||
| namespace Shader::IR { | namespace Shader::IR { | ||||||
| 
 | namespace { | ||||||
| [[noreturn]] static void ThrowInvalidType(Type type) { | [[noreturn]] void ThrowInvalidType(Type type) { | ||||||
|     throw InvalidArgument("Invalid type {}", type); |     throw InvalidArgument("Invalid type {}", type); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { | ||||||
|  |     if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { | ||||||
|  |         return ir.CompositeConstruct(bias_lod, lod_clamp); | ||||||
|  |     } else if (!bias_lod.IsEmpty()) { | ||||||
|  |         return bias_lod; | ||||||
|  |     } else if (!lod_clamp.IsEmpty()) { | ||||||
|  |         return lod_clamp; | ||||||
|  |     } else { | ||||||
|  |         return Value{}; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
| U1 IREmitter::Imm1(bool value) const { | U1 IREmitter::Imm1(bool value) const { | ||||||
|     return U1{Value{value}}; |     return U1{Value{value}}; | ||||||
| } | } | ||||||
|  | @ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { | ||||||
|     return Inst<U1>(Opcode::GetOverflowFromOp, op); |     return Inst<U1>(Opcode::GetOverflowFromOp, op); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | U1 IREmitter::GetSparseFromOp(const Value& op) { | ||||||
|  |     return Inst<U1>(Opcode::GetSparseFromOp, op); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | ||||||
|     if (a.Type() != a.Type()) { |     if (a.Type() != a.Type()) { | ||||||
|         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); |         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||||||
|  | @ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { | ||||||
|  |     switch (bitsize) { | ||||||
|  |     case 16: | ||||||
|  |         switch (value.Type()) { | ||||||
|  |         case Type::U32: | ||||||
|  |             return Inst<F16>(Opcode::ConvertF16S32, value); | ||||||
|  |         case Type::U64: | ||||||
|  |             return Inst<F16>(Opcode::ConvertF16S64, value); | ||||||
|  |         default: | ||||||
|  |             ThrowInvalidType(value.Type()); | ||||||
|  |         } | ||||||
|  |     case 32: | ||||||
|  |         switch (value.Type()) { | ||||||
|  |         case Type::U32: | ||||||
|  |             return Inst<F32>(Opcode::ConvertF32S32, value); | ||||||
|  |         case Type::U64: | ||||||
|  |             return Inst<F32>(Opcode::ConvertF32S64, value); | ||||||
|  |         default: | ||||||
|  |             ThrowInvalidType(value.Type()); | ||||||
|  |         } | ||||||
|  |     case 64: | ||||||
|  |         switch (value.Type()) { | ||||||
|  |         case Type::U32: | ||||||
|  |             return Inst<F16>(Opcode::ConvertF64S32, value); | ||||||
|  |         case Type::U64: | ||||||
|  |             return Inst<F16>(Opcode::ConvertF64S64, value); | ||||||
|  |         default: | ||||||
|  |             ThrowInvalidType(value.Type()); | ||||||
|  |         } | ||||||
|  |     default: | ||||||
|  |         throw InvalidArgument("Invalid destination bitsize {}", bitsize); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { | ||||||
|  |     switch (bitsize) { | ||||||
|  |     case 16: | ||||||
|  |         switch (value.Type()) { | ||||||
|  |         case Type::U32: | ||||||
|  |             return Inst<F16>(Opcode::ConvertF16U32, value); | ||||||
|  |         case Type::U64: | ||||||
|  |             return Inst<F16>(Opcode::ConvertF16U64, value); | ||||||
|  |         default: | ||||||
|  |             ThrowInvalidType(value.Type()); | ||||||
|  |         } | ||||||
|  |     case 32: | ||||||
|  |         switch (value.Type()) { | ||||||
|  |         case Type::U32: | ||||||
|  |             return Inst<F32>(Opcode::ConvertF32U32, value); | ||||||
|  |         case Type::U64: | ||||||
|  |             return Inst<F32>(Opcode::ConvertF32U64, value); | ||||||
|  |         default: | ||||||
|  |             ThrowInvalidType(value.Type()); | ||||||
|  |         } | ||||||
|  |     case 64: | ||||||
|  |         switch (value.Type()) { | ||||||
|  |         case Type::U32: | ||||||
|  |             return Inst<F16>(Opcode::ConvertF64U32, value); | ||||||
|  |         case Type::U64: | ||||||
|  |             return Inst<F16>(Opcode::ConvertF64U64, value); | ||||||
|  |         default: | ||||||
|  |             ThrowInvalidType(value.Type()); | ||||||
|  |         } | ||||||
|  |     default: | ||||||
|  |         throw InvalidArgument("Invalid destination bitsize {}", bitsize); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { | ||||||
|  |     if (is_signed) { | ||||||
|  |         return ConvertSToF(bitsize, value); | ||||||
|  |     } else { | ||||||
|  |         return ConvertUToF(bitsize, value); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { | U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { | ||||||
|     switch (result_bitsize) { |     switch (result_bitsize) { | ||||||
|     case 32: |     case 32: | ||||||
|  | @ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | ||||||
|     throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); |     throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, | ||||||
|  |                                         const Value& offset, const F32& lod_clamp, | ||||||
|  |                                         TextureInstInfo info) { | ||||||
|  |     const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; | ||||||
|  |     const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod | ||||||
|  |                                          : Opcode::BindlessImageSampleImplicitLod}; | ||||||
|  |     return Inst(op, Flags{info}, handle, coords, bias_lc, offset); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, | ||||||
|  |                                         const Value& offset, const F32& lod_clamp, | ||||||
|  |                                         TextureInstInfo info) { | ||||||
|  |     const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; | ||||||
|  |     const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod | ||||||
|  |                                          : Opcode::BindlessImageSampleExplicitLod}; | ||||||
|  |     return Inst(op, Flags{info}, handle, coords, lod_lc, offset); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, | ||||||
|  |                                           const F32& bias, const Value& offset, | ||||||
|  |                                           const F32& lod_clamp, TextureInstInfo info) { | ||||||
|  |     const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; | ||||||
|  |     const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod | ||||||
|  |                                          : Opcode::BindlessImageSampleDrefImplicitLod}; | ||||||
|  |     return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, | ||||||
|  |                                           const F32& lod, const Value& offset, const F32& lod_clamp, | ||||||
|  |                                           TextureInstInfo info) { | ||||||
|  |     const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; | ||||||
|  |     const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod | ||||||
|  |                                          : Opcode::BindlessImageSampleDrefExplicitLod}; | ||||||
|  |     return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Shader::IR
 | } // namespace Shader::IR
 | ||||||
|  |  | ||||||
|  | @ -91,6 +91,7 @@ public: | ||||||
|     [[nodiscard]] U1 GetSignFromOp(const Value& op); |     [[nodiscard]] U1 GetSignFromOp(const Value& op); | ||||||
|     [[nodiscard]] U1 GetCarryFromOp(const Value& op); |     [[nodiscard]] U1 GetCarryFromOp(const Value& op); | ||||||
|     [[nodiscard]] U1 GetOverflowFromOp(const Value& op); |     [[nodiscard]] U1 GetOverflowFromOp(const Value& op); | ||||||
|  |     [[nodiscard]] U1 GetSparseFromOp(const Value& op); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); |     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); | ||||||
|     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); |     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); | ||||||
|  | @ -159,7 +160,7 @@ public: | ||||||
|     [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, |     [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | ||||||
|                                      const U32& count); |                                      const U32& count); | ||||||
|     [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, |     [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, | ||||||
|                                       bool is_signed); |                                       bool is_signed = false); | ||||||
|     [[nodiscard]] U32 BitReverse(const U32& value); |     [[nodiscard]] U32 BitReverse(const U32& value); | ||||||
|     [[nodiscard]] U32 BitCount(const U32& value); |     [[nodiscard]] U32 BitCount(const U32& value); | ||||||
|     [[nodiscard]] U32 BitwiseNot(const U32& value); |     [[nodiscard]] U32 BitwiseNot(const U32& value); | ||||||
|  | @ -186,10 +187,28 @@ public: | ||||||
|     [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); |     [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); | ||||||
|     [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); |     [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); | ||||||
|     [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); |     [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); | ||||||
|  |     [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); | ||||||
|  |     [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); | ||||||
|  |     [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); |     [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); | ||||||
|     [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); |     [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); | ||||||
| 
 | 
 | ||||||
|  |     [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, | ||||||
|  |                                                const F32& bias, const Value& offset, | ||||||
|  |                                                const F32& lod_clamp, TextureInstInfo info); | ||||||
|  |     [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, | ||||||
|  |                                                const F32& lod, const Value& offset, | ||||||
|  |                                                const F32& lod_clamp, TextureInstInfo info); | ||||||
|  |     [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, | ||||||
|  |                                                  const F32& dref, const F32& bias, | ||||||
|  |                                                  const Value& offset, const F32& lod_clamp, | ||||||
|  |                                                  TextureInstInfo info); | ||||||
|  |     [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, | ||||||
|  |                                                  const F32& dref, const F32& lod, | ||||||
|  |                                                  const Value& offset, const F32& lod_clamp, | ||||||
|  |                                                  TextureInstInfo info); | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     IR::Block::iterator insertion_point; |     IR::Block::iterator insertion_point; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -10,26 +10,27 @@ | ||||||
| #include "shader_recompiler/frontend/ir/type.h" | #include "shader_recompiler/frontend/ir/type.h" | ||||||
| 
 | 
 | ||||||
| namespace Shader::IR { | namespace Shader::IR { | ||||||
| 
 | namespace { | ||||||
| static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { | void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { | ||||||
|     if (inst && inst->Opcode() != opcode) { |     if (inst && inst->Opcode() != opcode) { | ||||||
|         throw LogicError("Invalid pseudo-instruction"); |         throw LogicError("Invalid pseudo-instruction"); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { | void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { | ||||||
|     if (dest_inst) { |     if (dest_inst) { | ||||||
|         throw LogicError("Only one of each type of pseudo-op allowed"); |         throw LogicError("Only one of each type of pseudo-op allowed"); | ||||||
|     } |     } | ||||||
|     dest_inst = pseudo_inst; |     dest_inst = pseudo_inst; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { | void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { | ||||||
|     if (inst->Opcode() != expected_opcode) { |     if (inst->Opcode() != expected_opcode) { | ||||||
|         throw LogicError("Undoing use of invalid pseudo-op"); |         throw LogicError("Undoing use of invalid pseudo-op"); | ||||||
|     } |     } | ||||||
|     inst = nullptr; |     inst = nullptr; | ||||||
| } | } | ||||||
|  | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { | Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { | ||||||
|     if (op == Opcode::Phi) { |     if (op == Opcode::Phi) { | ||||||
|  | @ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept { | ||||||
|     case Opcode::GetSignFromOp: |     case Opcode::GetSignFromOp: | ||||||
|     case Opcode::GetCarryFromOp: |     case Opcode::GetCarryFromOp: | ||||||
|     case Opcode::GetOverflowFromOp: |     case Opcode::GetOverflowFromOp: | ||||||
|  |     case Opcode::GetSparseFromOp: | ||||||
|         return true; |         return true; | ||||||
|     default: |     default: | ||||||
|         return false; |         return false; | ||||||
|  | @ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const { | ||||||
|                        [](const IR::Value& value) { return value.IsImmediate(); }); |                        [](const IR::Value& value) { return value.IsImmediate(); }); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool Inst::HasAssociatedPseudoOperation() const noexcept { |  | ||||||
|     return zero_inst || sign_inst || carry_inst || overflow_inst; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | ||||||
|     // This is faster than doing a search through the block.
 |     if (!associated_insts) { | ||||||
|  |         return nullptr; | ||||||
|  |     } | ||||||
|     switch (opcode) { |     switch (opcode) { | ||||||
|     case Opcode::GetZeroFromOp: |     case Opcode::GetZeroFromOp: | ||||||
|         CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp); |         CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp); | ||||||
|         return zero_inst; |         return associated_insts->zero_inst; | ||||||
|     case Opcode::GetSignFromOp: |     case Opcode::GetSignFromOp: | ||||||
|         CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp); |         CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp); | ||||||
|         return sign_inst; |         return associated_insts->sign_inst; | ||||||
|     case Opcode::GetCarryFromOp: |     case Opcode::GetCarryFromOp: | ||||||
|         CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp); |         CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp); | ||||||
|         return carry_inst; |         return associated_insts->carry_inst; | ||||||
|     case Opcode::GetOverflowFromOp: |     case Opcode::GetOverflowFromOp: | ||||||
|         CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); |         CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp); | ||||||
|         return overflow_inst; |         return associated_insts->overflow_inst; | ||||||
|  |     case Opcode::GetSparseFromOp: | ||||||
|  |         CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); | ||||||
|  |         return associated_insts->sparse_inst; | ||||||
|     default: |     default: | ||||||
|         throw InvalidArgument("{} is not a pseudo-instruction", opcode); |         throw InvalidArgument("{} is not a pseudo-instruction", opcode); | ||||||
|     } |     } | ||||||
|  | @ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { | ||||||
|     op = opcode; |     op = opcode; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) { | ||||||
|  |     if (!associated_insts) { | ||||||
|  |         associated_insts = std::make_unique<AssociatedInsts>(); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void Inst::Use(const Value& value) { | void Inst::Use(const Value& value) { | ||||||
|     Inst* const inst{value.Inst()}; |     Inst* const inst{value.Inst()}; | ||||||
|     ++inst->use_count; |     ++inst->use_count; | ||||||
| 
 | 
 | ||||||
|  |     std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; | ||||||
|     switch (op) { |     switch (op) { | ||||||
|     case Opcode::GetZeroFromOp: |     case Opcode::GetZeroFromOp: | ||||||
|         SetPseudoInstruction(inst->zero_inst, this); |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         SetPseudoInstruction(assoc_inst->zero_inst, this); | ||||||
|         break; |         break; | ||||||
|     case Opcode::GetSignFromOp: |     case Opcode::GetSignFromOp: | ||||||
|         SetPseudoInstruction(inst->sign_inst, this); |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         SetPseudoInstruction(assoc_inst->sign_inst, this); | ||||||
|         break; |         break; | ||||||
|     case Opcode::GetCarryFromOp: |     case Opcode::GetCarryFromOp: | ||||||
|         SetPseudoInstruction(inst->carry_inst, this); |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         SetPseudoInstruction(assoc_inst->carry_inst, this); | ||||||
|         break; |         break; | ||||||
|     case Opcode::GetOverflowFromOp: |     case Opcode::GetOverflowFromOp: | ||||||
|         SetPseudoInstruction(inst->overflow_inst, this); |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         SetPseudoInstruction(assoc_inst->overflow_inst, this); | ||||||
|  |         break; | ||||||
|  |     case Opcode::GetSparseFromOp: | ||||||
|  |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         SetPseudoInstruction(assoc_inst->sparse_inst, this); | ||||||
|         break; |         break; | ||||||
|     default: |     default: | ||||||
|         break; |         break; | ||||||
|  | @ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) { | ||||||
|     Inst* const inst{value.Inst()}; |     Inst* const inst{value.Inst()}; | ||||||
|     --inst->use_count; |     --inst->use_count; | ||||||
| 
 | 
 | ||||||
|  |     std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; | ||||||
|     switch (op) { |     switch (op) { | ||||||
|     case Opcode::GetZeroFromOp: |     case Opcode::GetZeroFromOp: | ||||||
|         RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp); | ||||||
|         break; |         break; | ||||||
|     case Opcode::GetSignFromOp: |     case Opcode::GetSignFromOp: | ||||||
|         RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp); | ||||||
|         break; |         break; | ||||||
|     case Opcode::GetCarryFromOp: |     case Opcode::GetCarryFromOp: | ||||||
|         RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp); | ||||||
|         break; |         break; | ||||||
|     case Opcode::GetOverflowFromOp: |     case Opcode::GetOverflowFromOp: | ||||||
|         RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); |         AllocAssociatedInsts(assoc_inst); | ||||||
|  |         RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); | ||||||
|         break; |         break; | ||||||
|     default: |     default: | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|  | @ -22,7 +22,7 @@ namespace Shader::IR { | ||||||
| 
 | 
 | ||||||
| class Block; | class Block; | ||||||
| 
 | 
 | ||||||
| constexpr size_t MAX_ARG_COUNT = 4; | struct AssociatedInsts; | ||||||
| 
 | 
 | ||||||
| class Inst : public boost::intrusive::list_base_hook<> { | class Inst : public boost::intrusive::list_base_hook<> { | ||||||
| public: | public: | ||||||
|  | @ -50,6 +50,11 @@ public: | ||||||
|         return op; |         return op; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     /// Determines if there is a pseudo-operation associated with this instruction.
 | ||||||
|  |     [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { | ||||||
|  |         return associated_insts != nullptr; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /// Determines whether or not this instruction may have side effects.
 |     /// Determines whether or not this instruction may have side effects.
 | ||||||
|     [[nodiscard]] bool MayHaveSideEffects() const noexcept; |     [[nodiscard]] bool MayHaveSideEffects() const noexcept; | ||||||
| 
 | 
 | ||||||
|  | @ -60,8 +65,6 @@ public: | ||||||
|     /// Determines if all arguments of this instruction are immediates.
 |     /// Determines if all arguments of this instruction are immediates.
 | ||||||
|     [[nodiscard]] bool AreAllArgsImmediates() const; |     [[nodiscard]] bool AreAllArgsImmediates() const; | ||||||
| 
 | 
 | ||||||
|     /// Determines if there is a pseudo-operation associated with this instruction.
 |  | ||||||
|     [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; |  | ||||||
|     /// Gets a pseudo-operation associated with this instruction
 |     /// Gets a pseudo-operation associated with this instruction
 | ||||||
|     [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); |     [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); | ||||||
| 
 | 
 | ||||||
|  | @ -122,14 +125,21 @@ private: | ||||||
|     u32 definition{}; |     u32 definition{}; | ||||||
|     union { |     union { | ||||||
|         NonTriviallyDummy dummy{}; |         NonTriviallyDummy dummy{}; | ||||||
|         std::array<Value, MAX_ARG_COUNT> args; |  | ||||||
|         std::vector<std::pair<Block*, Value>> phi_args; |         std::vector<std::pair<Block*, Value>> phi_args; | ||||||
|  |         std::array<Value, 5> args; | ||||||
|  |     }; | ||||||
|  |     std::unique_ptr<AssociatedInsts> associated_insts; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); | ||||||
|  | 
 | ||||||
|  | struct AssociatedInsts { | ||||||
|  |     union { | ||||||
|  |         Inst* sparse_inst; | ||||||
|  |         Inst* zero_inst{}; | ||||||
|     }; |     }; | ||||||
|     Inst* zero_inst{}; |  | ||||||
|     Inst* sign_inst{}; |     Inst* sign_inst{}; | ||||||
|     Inst* carry_inst{}; |     Inst* carry_inst{}; | ||||||
|     Inst* overflow_inst{}; |     Inst* overflow_inst{}; | ||||||
| }; | }; | ||||||
| static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size"); |  | ||||||
| 
 | 
 | ||||||
| } // namespace Shader::IR
 | } // namespace Shader::IR
 | ||||||
|  |  | ||||||
|  | @ -4,7 +4,9 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include "common/bit_field.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "shader_recompiler/shader_info.h" | ||||||
| 
 | 
 | ||||||
| namespace Shader::IR { | namespace Shader::IR { | ||||||
| 
 | 
 | ||||||
|  | @ -30,4 +32,12 @@ struct FpControl { | ||||||
| }; | }; | ||||||
| static_assert(sizeof(FpControl) <= sizeof(u32)); | static_assert(sizeof(FpControl) <= sizeof(u32)); | ||||||
| 
 | 
 | ||||||
|  | union TextureInstInfo { | ||||||
|  |     u32 raw; | ||||||
|  |     BitField<0, 8, TextureType> type; | ||||||
|  |     BitField<8, 1, u32> has_bias; | ||||||
|  |     BitField<16, 1, u32> has_lod_clamp; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); | ||||||
|  | 
 | ||||||
| } // namespace Shader::IR
 | } // namespace Shader::IR
 | ||||||
|  |  | ||||||
|  | @ -14,7 +14,7 @@ namespace { | ||||||
| struct OpcodeMeta { | struct OpcodeMeta { | ||||||
|     std::string_view name; |     std::string_view name; | ||||||
|     Type type; |     Type type; | ||||||
|     std::array<Type, 4> arg_types; |     std::array<Type, 5> arg_types; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| using enum Type; | using enum Type; | ||||||
|  |  | ||||||
|  | @ -2,301 +2,330 @@ | ||||||
| // Licensed under GPLv2 or any later version
 | // Licensed under GPLv2 or any later version
 | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
| //     opcode name,                                         return type,    arg1 type,      arg2 type,      arg3 type,      arg4 type,      ...
 | //     opcode name,                                         return type,    arg1 type,      arg2 type,      arg3 type,      arg4 type,      arg4 type,      ...
 | ||||||
| OPCODE(Phi,                                                 Opaque,                                                                         ) | OPCODE(Phi,                                                 Opaque,                                                                                         ) | ||||||
| OPCODE(Identity,                                            Opaque,         Opaque,                                                         ) | OPCODE(Identity,                                            Opaque,         Opaque,                                                                         ) | ||||||
| OPCODE(Void,                                                Void,                                                                           ) | OPCODE(Void,                                                Void,                                                                                           ) | ||||||
| 
 | 
 | ||||||
| // Control flow
 | // Control flow
 | ||||||
| OPCODE(Branch,                                              Void,           Label,                                                          ) | OPCODE(Branch,                                              Void,           Label,                                                                          ) | ||||||
| OPCODE(BranchConditional,                                   Void,           U1,             Label,          Label,                          ) | OPCODE(BranchConditional,                                   Void,           U1,             Label,          Label,                                          ) | ||||||
| OPCODE(LoopMerge,                                           Void,           Label,          Label,                                          ) | OPCODE(LoopMerge,                                           Void,           Label,          Label,                                                          ) | ||||||
| OPCODE(SelectionMerge,                                      Void,           Label,                                                          ) | OPCODE(SelectionMerge,                                      Void,           Label,                                                                          ) | ||||||
| OPCODE(Return,                                              Void,                                                                           ) | OPCODE(Return,                                              Void,                                                                                           ) | ||||||
| 
 | 
 | ||||||
| // Context getters/setters
 | // Context getters/setters
 | ||||||
| OPCODE(GetRegister,                                         U32,            Reg,                                                            ) | OPCODE(GetRegister,                                         U32,            Reg,                                                                            ) | ||||||
| OPCODE(SetRegister,                                         Void,           Reg,            U32,                                            ) | OPCODE(SetRegister,                                         Void,           Reg,            U32,                                                            ) | ||||||
| OPCODE(GetPred,                                             U1,             Pred,                                                           ) | OPCODE(GetPred,                                             U1,             Pred,                                                                           ) | ||||||
| OPCODE(SetPred,                                             Void,           Pred,           U1,                                             ) | OPCODE(SetPred,                                             Void,           Pred,           U1,                                                             ) | ||||||
| OPCODE(GetGotoVariable,                                     U1,             U32,                                                            ) | OPCODE(GetGotoVariable,                                     U1,             U32,                                                                            ) | ||||||
| OPCODE(SetGotoVariable,                                     Void,           U32,            U1,                                             ) | OPCODE(SetGotoVariable,                                     Void,           U32,            U1,                                                             ) | ||||||
| OPCODE(GetCbuf,                                             U32,            U32,            U32,                                            ) | OPCODE(GetCbuf,                                             U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(GetAttribute,                                        U32,            Attribute,                                                      ) | OPCODE(GetAttribute,                                        U32,            Attribute,                                                                      ) | ||||||
| OPCODE(SetAttribute,                                        Void,           Attribute,      U32,                                            ) | OPCODE(SetAttribute,                                        Void,           Attribute,      U32,                                                            ) | ||||||
| OPCODE(GetAttributeIndexed,                                 U32,            U32,                                                            ) | OPCODE(GetAttributeIndexed,                                 U32,            U32,                                                                            ) | ||||||
| OPCODE(SetAttributeIndexed,                                 Void,           U32,            U32,                                            ) | OPCODE(SetAttributeIndexed,                                 Void,           U32,            U32,                                                            ) | ||||||
| OPCODE(GetZFlag,                                            U1,             Void,                                                           ) | OPCODE(GetZFlag,                                            U1,             Void,                                                                           ) | ||||||
| OPCODE(GetSFlag,                                            U1,             Void,                                                           ) | OPCODE(GetSFlag,                                            U1,             Void,                                                                           ) | ||||||
| OPCODE(GetCFlag,                                            U1,             Void,                                                           ) | OPCODE(GetCFlag,                                            U1,             Void,                                                                           ) | ||||||
| OPCODE(GetOFlag,                                            U1,             Void,                                                           ) | OPCODE(GetOFlag,                                            U1,             Void,                                                                           ) | ||||||
| OPCODE(SetZFlag,                                            Void,           U1,                                                             ) | OPCODE(SetZFlag,                                            Void,           U1,                                                                             ) | ||||||
| OPCODE(SetSFlag,                                            Void,           U1,                                                             ) | OPCODE(SetSFlag,                                            Void,           U1,                                                                             ) | ||||||
| OPCODE(SetCFlag,                                            Void,           U1,                                                             ) | OPCODE(SetCFlag,                                            Void,           U1,                                                                             ) | ||||||
| OPCODE(SetOFlag,                                            Void,           U1,                                                             ) | OPCODE(SetOFlag,                                            Void,           U1,                                                                             ) | ||||||
| OPCODE(WorkgroupId,                                         U32x3,                                                                          ) | OPCODE(WorkgroupId,                                         U32x3,                                                                                          ) | ||||||
| OPCODE(LocalInvocationId,                                   U32x3,                                                                          ) | OPCODE(LocalInvocationId,                                   U32x3,                                                                                          ) | ||||||
| 
 | 
 | ||||||
| // Undefined
 | // Undefined
 | ||||||
| OPCODE(UndefU1,                                             U1,                                                                             ) | OPCODE(UndefU1,                                             U1,                                                                                             ) | ||||||
| OPCODE(UndefU8,                                             U8,                                                                             ) | OPCODE(UndefU8,                                             U8,                                                                                             ) | ||||||
| OPCODE(UndefU16,                                            U16,                                                                            ) | OPCODE(UndefU16,                                            U16,                                                                                            ) | ||||||
| OPCODE(UndefU32,                                            U32,                                                                            ) | OPCODE(UndefU32,                                            U32,                                                                                            ) | ||||||
| OPCODE(UndefU64,                                            U64,                                                                            ) | OPCODE(UndefU64,                                            U64,                                                                                            ) | ||||||
| 
 | 
 | ||||||
| // Memory operations
 | // Memory operations
 | ||||||
| OPCODE(LoadGlobalU8,                                        U32,            U64,                                                            ) | OPCODE(LoadGlobalU8,                                        U32,            U64,                                                                            ) | ||||||
| OPCODE(LoadGlobalS8,                                        U32,            U64,                                                            ) | OPCODE(LoadGlobalS8,                                        U32,            U64,                                                                            ) | ||||||
| OPCODE(LoadGlobalU16,                                       U32,            U64,                                                            ) | OPCODE(LoadGlobalU16,                                       U32,            U64,                                                                            ) | ||||||
| OPCODE(LoadGlobalS16,                                       U32,            U64,                                                            ) | OPCODE(LoadGlobalS16,                                       U32,            U64,                                                                            ) | ||||||
| OPCODE(LoadGlobal32,                                        U32,            U64,                                                            ) | OPCODE(LoadGlobal32,                                        U32,            U64,                                                                            ) | ||||||
| OPCODE(LoadGlobal64,                                        U32x2,          U64,                                                            ) | OPCODE(LoadGlobal64,                                        U32x2,          U64,                                                                            ) | ||||||
| OPCODE(LoadGlobal128,                                       U32x4,          U64,                                                            ) | OPCODE(LoadGlobal128,                                       U32x4,          U64,                                                                            ) | ||||||
| OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                            ) | OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                                            ) | ||||||
| OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                            ) | OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                                            ) | ||||||
| OPCODE(WriteGlobalU16,                                      Void,           U64,            U32,                                            ) | OPCODE(WriteGlobalU16,                                      Void,           U64,            U32,                                                            ) | ||||||
| OPCODE(WriteGlobalS16,                                      Void,           U64,            U32,                                            ) | OPCODE(WriteGlobalS16,                                      Void,           U64,            U32,                                                            ) | ||||||
| OPCODE(WriteGlobal32,                                       Void,           U64,            U32,                                            ) | OPCODE(WriteGlobal32,                                       Void,           U64,            U32,                                                            ) | ||||||
| OPCODE(WriteGlobal64,                                       Void,           U64,            U32x2,                                          ) | OPCODE(WriteGlobal64,                                       Void,           U64,            U32x2,                                                          ) | ||||||
| OPCODE(WriteGlobal128,                                      Void,           U64,            U32x4,                                          ) | OPCODE(WriteGlobal128,                                      Void,           U64,            U32x4,                                                          ) | ||||||
| 
 | 
 | ||||||
| // Storage buffer operations
 | // Storage buffer operations
 | ||||||
| OPCODE(LoadStorageU8,                                       U32,            U32,            U32,                                            ) | OPCODE(LoadStorageU8,                                       U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(LoadStorageS8,                                       U32,            U32,            U32,                                            ) | OPCODE(LoadStorageS8,                                       U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(LoadStorageU16,                                      U32,            U32,            U32,                                            ) | OPCODE(LoadStorageU16,                                      U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(LoadStorageS16,                                      U32,            U32,            U32,                                            ) | OPCODE(LoadStorageS16,                                      U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(LoadStorage32,                                       U32,            U32,            U32,                                            ) | OPCODE(LoadStorage32,                                       U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(LoadStorage64,                                       U32x2,          U32,            U32,                                            ) | OPCODE(LoadStorage64,                                       U32x2,          U32,            U32,                                                            ) | ||||||
| OPCODE(LoadStorage128,                                      U32x4,          U32,            U32,                                            ) | OPCODE(LoadStorage128,                                      U32x4,          U32,            U32,                                                            ) | ||||||
| OPCODE(WriteStorageU8,                                      Void,           U32,            U32,            U32,                            ) | OPCODE(WriteStorageU8,                                      Void,           U32,            U32,            U32,                                            ) | ||||||
| OPCODE(WriteStorageS8,                                      Void,           U32,            U32,            U32,                            ) | OPCODE(WriteStorageS8,                                      Void,           U32,            U32,            U32,                                            ) | ||||||
| OPCODE(WriteStorageU16,                                     Void,           U32,            U32,            U32,                            ) | OPCODE(WriteStorageU16,                                     Void,           U32,            U32,            U32,                                            ) | ||||||
| OPCODE(WriteStorageS16,                                     Void,           U32,            U32,            U32,                            ) | OPCODE(WriteStorageS16,                                     Void,           U32,            U32,            U32,                                            ) | ||||||
| OPCODE(WriteStorage32,                                      Void,           U32,            U32,            U32,                            ) | OPCODE(WriteStorage32,                                      Void,           U32,            U32,            U32,                                            ) | ||||||
| OPCODE(WriteStorage64,                                      Void,           U32,            U32,            U32x2,                          ) | OPCODE(WriteStorage64,                                      Void,           U32,            U32,            U32x2,                                          ) | ||||||
| OPCODE(WriteStorage128,                                     Void,           U32,            U32,            U32x4,                          ) | OPCODE(WriteStorage128,                                     Void,           U32,            U32,            U32x4,                                          ) | ||||||
| 
 | 
 | ||||||
| // Vector utility
 | // Vector utility
 | ||||||
| OPCODE(CompositeConstructU32x2,                             U32x2,          U32,            U32,                                            ) | OPCODE(CompositeConstructU32x2,                             U32x2,          U32,            U32,                                                            ) | ||||||
| OPCODE(CompositeConstructU32x3,                             U32x3,          U32,            U32,            U32,                            ) | OPCODE(CompositeConstructU32x3,                             U32x3,          U32,            U32,            U32,                                            ) | ||||||
| OPCODE(CompositeConstructU32x4,                             U32x4,          U32,            U32,            U32,            U32,            ) | OPCODE(CompositeConstructU32x4,                             U32x4,          U32,            U32,            U32,            U32,                            ) | ||||||
| OPCODE(CompositeExtractU32x2,                               U32,            U32x2,          U32,                                            ) | OPCODE(CompositeExtractU32x2,                               U32,            U32x2,          U32,                                                            ) | ||||||
| OPCODE(CompositeExtractU32x3,                               U32,            U32x3,          U32,                                            ) | OPCODE(CompositeExtractU32x3,                               U32,            U32x3,          U32,                                                            ) | ||||||
| OPCODE(CompositeExtractU32x4,                               U32,            U32x4,          U32,                                            ) | OPCODE(CompositeExtractU32x4,                               U32,            U32x4,          U32,                                                            ) | ||||||
| OPCODE(CompositeInsertU32x2,                                U32x2,          U32x2,          U32,            U32,                            ) | OPCODE(CompositeInsertU32x2,                                U32x2,          U32x2,          U32,            U32,                                            ) | ||||||
| OPCODE(CompositeInsertU32x3,                                U32x3,          U32x3,          U32,            U32,                            ) | OPCODE(CompositeInsertU32x3,                                U32x3,          U32x3,          U32,            U32,                                            ) | ||||||
| OPCODE(CompositeInsertU32x4,                                U32x4,          U32x4,          U32,            U32,                            ) | OPCODE(CompositeInsertU32x4,                                U32x4,          U32x4,          U32,            U32,                                            ) | ||||||
| OPCODE(CompositeConstructF16x2,                             F16x2,          F16,            F16,                                            ) | OPCODE(CompositeConstructF16x2,                             F16x2,          F16,            F16,                                                            ) | ||||||
| OPCODE(CompositeConstructF16x3,                             F16x3,          F16,            F16,            F16,                            ) | OPCODE(CompositeConstructF16x3,                             F16x3,          F16,            F16,            F16,                                            ) | ||||||
| OPCODE(CompositeConstructF16x4,                             F16x4,          F16,            F16,            F16,            F16,            ) | OPCODE(CompositeConstructF16x4,                             F16x4,          F16,            F16,            F16,            F16,                            ) | ||||||
| OPCODE(CompositeExtractF16x2,                               F16,            F16x2,          U32,                                            ) | OPCODE(CompositeExtractF16x2,                               F16,            F16x2,          U32,                                                            ) | ||||||
| OPCODE(CompositeExtractF16x3,                               F16,            F16x3,          U32,                                            ) | OPCODE(CompositeExtractF16x3,                               F16,            F16x3,          U32,                                                            ) | ||||||
| OPCODE(CompositeExtractF16x4,                               F16,            F16x4,          U32,                                            ) | OPCODE(CompositeExtractF16x4,                               F16,            F16x4,          U32,                                                            ) | ||||||
| OPCODE(CompositeInsertF16x2,                                F16x2,          F16x2,          F16,            U32,                            ) | OPCODE(CompositeInsertF16x2,                                F16x2,          F16x2,          F16,            U32,                                            ) | ||||||
| OPCODE(CompositeInsertF16x3,                                F16x3,          F16x3,          F16,            U32,                            ) | OPCODE(CompositeInsertF16x3,                                F16x3,          F16x3,          F16,            U32,                                            ) | ||||||
| OPCODE(CompositeInsertF16x4,                                F16x4,          F16x4,          F16,            U32,                            ) | OPCODE(CompositeInsertF16x4,                                F16x4,          F16x4,          F16,            U32,                                            ) | ||||||
| OPCODE(CompositeConstructF32x2,                             F32x2,          F32,            F32,                                            ) | OPCODE(CompositeConstructF32x2,                             F32x2,          F32,            F32,                                                            ) | ||||||
| OPCODE(CompositeConstructF32x3,                             F32x3,          F32,            F32,            F32,                            ) | OPCODE(CompositeConstructF32x3,                             F32x3,          F32,            F32,            F32,                                            ) | ||||||
| OPCODE(CompositeConstructF32x4,                             F32x4,          F32,            F32,            F32,            F32,            ) | OPCODE(CompositeConstructF32x4,                             F32x4,          F32,            F32,            F32,            F32,                            ) | ||||||
| OPCODE(CompositeExtractF32x2,                               F32,            F32x2,          U32,                                            ) | OPCODE(CompositeExtractF32x2,                               F32,            F32x2,          U32,                                                            ) | ||||||
| OPCODE(CompositeExtractF32x3,                               F32,            F32x3,          U32,                                            ) | OPCODE(CompositeExtractF32x3,                               F32,            F32x3,          U32,                                                            ) | ||||||
| OPCODE(CompositeExtractF32x4,                               F32,            F32x4,          U32,                                            ) | OPCODE(CompositeExtractF32x4,                               F32,            F32x4,          U32,                                                            ) | ||||||
| OPCODE(CompositeInsertF32x2,                                F32x2,          F32x2,          F32,            U32,                            ) | OPCODE(CompositeInsertF32x2,                                F32x2,          F32x2,          F32,            U32,                                            ) | ||||||
| OPCODE(CompositeInsertF32x3,                                F32x3,          F32x3,          F32,            U32,                            ) | OPCODE(CompositeInsertF32x3,                                F32x3,          F32x3,          F32,            U32,                                            ) | ||||||
| OPCODE(CompositeInsertF32x4,                                F32x4,          F32x4,          F32,            U32,                            ) | OPCODE(CompositeInsertF32x4,                                F32x4,          F32x4,          F32,            U32,                                            ) | ||||||
| OPCODE(CompositeConstructF64x2,                             F64x2,          F64,            F64,                                            ) | OPCODE(CompositeConstructF64x2,                             F64x2,          F64,            F64,                                                            ) | ||||||
| OPCODE(CompositeConstructF64x3,                             F64x3,          F64,            F64,            F64,                            ) | OPCODE(CompositeConstructF64x3,                             F64x3,          F64,            F64,            F64,                                            ) | ||||||
| OPCODE(CompositeConstructF64x4,                             F64x4,          F64,            F64,            F64,            F64,            ) | OPCODE(CompositeConstructF64x4,                             F64x4,          F64,            F64,            F64,            F64,                            ) | ||||||
| OPCODE(CompositeExtractF64x2,                               F64,            F64x2,          U32,                                            ) | OPCODE(CompositeExtractF64x2,                               F64,            F64x2,          U32,                                                            ) | ||||||
| OPCODE(CompositeExtractF64x3,                               F64,            F64x3,          U32,                                            ) | OPCODE(CompositeExtractF64x3,                               F64,            F64x3,          U32,                                                            ) | ||||||
| OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                            ) | OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                                            ) | ||||||
| OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                            ) | OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                                            ) | ||||||
| OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                            ) | OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                                            ) | ||||||
| OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                            ) | OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                                            ) | ||||||
| 
 | 
 | ||||||
| // Select operations
 | // Select operations
 | ||||||
| OPCODE(SelectU1,                                            U1,             U1,             U1,             U1,                             ) | OPCODE(SelectU1,                                            U1,             U1,             U1,             U1,                                             ) | ||||||
| OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                             ) | OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                                             ) | ||||||
| OPCODE(SelectU16,                                           U16,            U1,             U16,            U16,                            ) | OPCODE(SelectU16,                                           U16,            U1,             U16,            U16,                                            ) | ||||||
| OPCODE(SelectU32,                                           U32,            U1,             U32,            U32,                            ) | OPCODE(SelectU32,                                           U32,            U1,             U32,            U32,                                            ) | ||||||
| OPCODE(SelectU64,                                           U64,            U1,             U64,            U64,                            ) | OPCODE(SelectU64,                                           U64,            U1,             U64,            U64,                                            ) | ||||||
| OPCODE(SelectF16,                                           F16,            U1,             F16,            F16,                            ) | OPCODE(SelectF16,                                           F16,            U1,             F16,            F16,                                            ) | ||||||
| OPCODE(SelectF32,                                           F32,            U1,             F32,            F32,                            ) | OPCODE(SelectF32,                                           F32,            U1,             F32,            F32,                                            ) | ||||||
| 
 | 
 | ||||||
| // Bitwise conversions
 | // Bitwise conversions
 | ||||||
| OPCODE(BitCastU16F16,                                       U16,            F16,                                                            ) | OPCODE(BitCastU16F16,                                       U16,            F16,                                                                            ) | ||||||
| OPCODE(BitCastU32F32,                                       U32,            F32,                                                            ) | OPCODE(BitCastU32F32,                                       U32,            F32,                                                                            ) | ||||||
| OPCODE(BitCastU64F64,                                       U64,            F64,                                                            ) | OPCODE(BitCastU64F64,                                       U64,            F64,                                                                            ) | ||||||
| OPCODE(BitCastF16U16,                                       F16,            U16,                                                            ) | OPCODE(BitCastF16U16,                                       F16,            U16,                                                                            ) | ||||||
| OPCODE(BitCastF32U32,                                       F32,            U32,                                                            ) | OPCODE(BitCastF32U32,                                       F32,            U32,                                                                            ) | ||||||
| OPCODE(BitCastF64U64,                                       F64,            U64,                                                            ) | OPCODE(BitCastF64U64,                                       F64,            U64,                                                                            ) | ||||||
| OPCODE(PackUint2x32,                                        U64,            U32x2,                                                          ) | OPCODE(PackUint2x32,                                        U64,            U32x2,                                                                          ) | ||||||
| OPCODE(UnpackUint2x32,                                      U32x2,          U64,                                                            ) | OPCODE(UnpackUint2x32,                                      U32x2,          U64,                                                                            ) | ||||||
| OPCODE(PackFloat2x16,                                       U32,            F16x2,                                                          ) | OPCODE(PackFloat2x16,                                       U32,            F16x2,                                                                          ) | ||||||
| OPCODE(UnpackFloat2x16,                                     F16x2,          U32,                                                            ) | OPCODE(UnpackFloat2x16,                                     F16x2,          U32,                                                                            ) | ||||||
| OPCODE(PackHalf2x16,                                        U32,            F32x2,                                                          ) | OPCODE(PackHalf2x16,                                        U32,            F32x2,                                                                          ) | ||||||
| OPCODE(UnpackHalf2x16,                                      F32x2,          U32,                                                            ) | OPCODE(UnpackHalf2x16,                                      F32x2,          U32,                                                                            ) | ||||||
| OPCODE(PackDouble2x32,                                      F64,            U32x2,                                                          ) | OPCODE(PackDouble2x32,                                      F64,            U32x2,                                                                          ) | ||||||
| OPCODE(UnpackDouble2x32,                                    U32x2,          F64,                                                            ) | OPCODE(UnpackDouble2x32,                                    U32x2,          F64,                                                                            ) | ||||||
| 
 | 
 | ||||||
| // Pseudo-operation, handled specially at final emit
 | // Pseudo-operation, handled specially at final emit
 | ||||||
| OPCODE(GetZeroFromOp,                                       U1,             Opaque,                                                         ) | OPCODE(GetZeroFromOp,                                       U1,             Opaque,                                                                         ) | ||||||
| OPCODE(GetSignFromOp,                                       U1,             Opaque,                                                         ) | OPCODE(GetSignFromOp,                                       U1,             Opaque,                                                                         ) | ||||||
| OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                         ) | OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                                         ) | ||||||
| OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                         ) | OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                                         ) | ||||||
|  | OPCODE(GetSparseFromOp,                                     U1,             Opaque,                                                                         ) | ||||||
| 
 | 
 | ||||||
| // Floating-point operations
 | // Floating-point operations
 | ||||||
| OPCODE(FPAbs16,                                             F16,            F16,                                                            ) | OPCODE(FPAbs16,                                             F16,            F16,                                                                            ) | ||||||
| OPCODE(FPAbs32,                                             F32,            F32,                                                            ) | OPCODE(FPAbs32,                                             F32,            F32,                                                                            ) | ||||||
| OPCODE(FPAbs64,                                             F64,            F64,                                                            ) | OPCODE(FPAbs64,                                             F64,            F64,                                                                            ) | ||||||
| OPCODE(FPAdd16,                                             F16,            F16,            F16,                                            ) | OPCODE(FPAdd16,                                             F16,            F16,            F16,                                                            ) | ||||||
| OPCODE(FPAdd32,                                             F32,            F32,            F32,                                            ) | OPCODE(FPAdd32,                                             F32,            F32,            F32,                                                            ) | ||||||
| OPCODE(FPAdd64,                                             F64,            F64,            F64,                                            ) | OPCODE(FPAdd64,                                             F64,            F64,            F64,                                                            ) | ||||||
| OPCODE(FPFma16,                                             F16,            F16,            F16,            F16,                            ) | OPCODE(FPFma16,                                             F16,            F16,            F16,            F16,                                            ) | ||||||
| OPCODE(FPFma32,                                             F32,            F32,            F32,            F32,                            ) | OPCODE(FPFma32,                                             F32,            F32,            F32,            F32,                                            ) | ||||||
| OPCODE(FPFma64,                                             F64,            F64,            F64,            F64,                            ) | OPCODE(FPFma64,                                             F64,            F64,            F64,            F64,                                            ) | ||||||
| OPCODE(FPMax32,                                             F32,            F32,            F32,                                            ) | OPCODE(FPMax32,                                             F32,            F32,            F32,                                                            ) | ||||||
| OPCODE(FPMax64,                                             F64,            F64,            F64,                                            ) | OPCODE(FPMax64,                                             F64,            F64,            F64,                                                            ) | ||||||
| OPCODE(FPMin32,                                             F32,            F32,            F32,                                            ) | OPCODE(FPMin32,                                             F32,            F32,            F32,                                                            ) | ||||||
| OPCODE(FPMin64,                                             F64,            F64,            F64,                                            ) | OPCODE(FPMin64,                                             F64,            F64,            F64,                                                            ) | ||||||
| OPCODE(FPMul16,                                             F16,            F16,            F16,                                            ) | OPCODE(FPMul16,                                             F16,            F16,            F16,                                                            ) | ||||||
| OPCODE(FPMul32,                                             F32,            F32,            F32,                                            ) | OPCODE(FPMul32,                                             F32,            F32,            F32,                                                            ) | ||||||
| OPCODE(FPMul64,                                             F64,            F64,            F64,                                            ) | OPCODE(FPMul64,                                             F64,            F64,            F64,                                                            ) | ||||||
| OPCODE(FPNeg16,                                             F16,            F16,                                                            ) | OPCODE(FPNeg16,                                             F16,            F16,                                                                            ) | ||||||
| OPCODE(FPNeg32,                                             F32,            F32,                                                            ) | OPCODE(FPNeg32,                                             F32,            F32,                                                                            ) | ||||||
| OPCODE(FPNeg64,                                             F64,            F64,                                                            ) | OPCODE(FPNeg64,                                             F64,            F64,                                                                            ) | ||||||
| OPCODE(FPRecip32,                                           F32,            F32,                                                            ) | OPCODE(FPRecip32,                                           F32,            F32,                                                                            ) | ||||||
| OPCODE(FPRecip64,                                           F64,            F64,                                                            ) | OPCODE(FPRecip64,                                           F64,            F64,                                                                            ) | ||||||
| OPCODE(FPRecipSqrt32,                                       F32,            F32,                                                            ) | OPCODE(FPRecipSqrt32,                                       F32,            F32,                                                                            ) | ||||||
| OPCODE(FPRecipSqrt64,                                       F64,            F64,                                                            ) | OPCODE(FPRecipSqrt64,                                       F64,            F64,                                                                            ) | ||||||
| OPCODE(FPSqrt,                                              F32,            F32,                                                            ) | OPCODE(FPSqrt,                                              F32,            F32,                                                                            ) | ||||||
| OPCODE(FPSin,                                               F32,            F32,                                                            ) | OPCODE(FPSin,                                               F32,            F32,                                                                            ) | ||||||
| OPCODE(FPExp2,                                              F32,            F32,                                                            ) | OPCODE(FPExp2,                                              F32,            F32,                                                                            ) | ||||||
| OPCODE(FPCos,                                               F32,            F32,                                                            ) | OPCODE(FPCos,                                               F32,            F32,                                                                            ) | ||||||
| OPCODE(FPLog2,                                              F32,            F32,                                                            ) | OPCODE(FPLog2,                                              F32,            F32,                                                                            ) | ||||||
| OPCODE(FPSaturate16,                                        F16,            F16,                                                            ) | OPCODE(FPSaturate16,                                        F16,            F16,                                                                            ) | ||||||
| OPCODE(FPSaturate32,                                        F32,            F32,                                                            ) | OPCODE(FPSaturate32,                                        F32,            F32,                                                                            ) | ||||||
| OPCODE(FPSaturate64,                                        F64,            F64,                                                            ) | OPCODE(FPSaturate64,                                        F64,            F64,                                                                            ) | ||||||
| OPCODE(FPRoundEven16,                                       F16,            F16,                                                            ) | OPCODE(FPRoundEven16,                                       F16,            F16,                                                                            ) | ||||||
| OPCODE(FPRoundEven32,                                       F32,            F32,                                                            ) | OPCODE(FPRoundEven32,                                       F32,            F32,                                                                            ) | ||||||
| OPCODE(FPRoundEven64,                                       F64,            F64,                                                            ) | OPCODE(FPRoundEven64,                                       F64,            F64,                                                                            ) | ||||||
| OPCODE(FPFloor16,                                           F16,            F16,                                                            ) | OPCODE(FPFloor16,                                           F16,            F16,                                                                            ) | ||||||
| OPCODE(FPFloor32,                                           F32,            F32,                                                            ) | OPCODE(FPFloor32,                                           F32,            F32,                                                                            ) | ||||||
| OPCODE(FPFloor64,                                           F64,            F64,                                                            ) | OPCODE(FPFloor64,                                           F64,            F64,                                                                            ) | ||||||
| OPCODE(FPCeil16,                                            F16,            F16,                                                            ) | OPCODE(FPCeil16,                                            F16,            F16,                                                                            ) | ||||||
| OPCODE(FPCeil32,                                            F32,            F32,                                                            ) | OPCODE(FPCeil32,                                            F32,            F32,                                                                            ) | ||||||
| OPCODE(FPCeil64,                                            F64,            F64,                                                            ) | OPCODE(FPCeil64,                                            F64,            F64,                                                                            ) | ||||||
| OPCODE(FPTrunc16,                                           F16,            F16,                                                            ) | OPCODE(FPTrunc16,                                           F16,            F16,                                                                            ) | ||||||
| OPCODE(FPTrunc32,                                           F32,            F32,                                                            ) | OPCODE(FPTrunc32,                                           F32,            F32,                                                                            ) | ||||||
| OPCODE(FPTrunc64,                                           F64,            F64,                                                            ) | OPCODE(FPTrunc64,                                           F64,            F64,                                                                            ) | ||||||
| 
 | 
 | ||||||
| OPCODE(FPOrdEqual16,                                        U1,             F16,            F16,                                            ) | OPCODE(FPOrdEqual16,                                        U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPOrdEqual32,                                        U1,             F32,            F32,                                            ) | OPCODE(FPOrdEqual32,                                        U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPOrdEqual64,                                        U1,             F64,            F64,                                            ) | OPCODE(FPOrdEqual64,                                        U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPUnordEqual16,                                      U1,             F16,            F16,                                            ) | OPCODE(FPUnordEqual16,                                      U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPUnordEqual32,                                      U1,             F32,            F32,                                            ) | OPCODE(FPUnordEqual32,                                      U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPUnordEqual64,                                      U1,             F64,            F64,                                            ) | OPCODE(FPUnordEqual64,                                      U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPOrdNotEqual16,                                     U1,             F16,            F16,                                            ) | OPCODE(FPOrdNotEqual16,                                     U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPOrdNotEqual32,                                     U1,             F32,            F32,                                            ) | OPCODE(FPOrdNotEqual32,                                     U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPOrdNotEqual64,                                     U1,             F64,            F64,                                            ) | OPCODE(FPOrdNotEqual64,                                     U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPUnordNotEqual16,                                   U1,             F16,            F16,                                            ) | OPCODE(FPUnordNotEqual16,                                   U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPUnordNotEqual32,                                   U1,             F32,            F32,                                            ) | OPCODE(FPUnordNotEqual32,                                   U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPUnordNotEqual64,                                   U1,             F64,            F64,                                            ) | OPCODE(FPUnordNotEqual64,                                   U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPOrdLessThan16,                                     U1,             F16,            F16,                                            ) | OPCODE(FPOrdLessThan16,                                     U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPOrdLessThan32,                                     U1,             F32,            F32,                                            ) | OPCODE(FPOrdLessThan32,                                     U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPOrdLessThan64,                                     U1,             F64,            F64,                                            ) | OPCODE(FPOrdLessThan64,                                     U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPUnordLessThan16,                                   U1,             F16,            F16,                                            ) | OPCODE(FPUnordLessThan16,                                   U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPUnordLessThan32,                                   U1,             F32,            F32,                                            ) | OPCODE(FPUnordLessThan32,                                   U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPUnordLessThan64,                                   U1,             F64,            F64,                                            ) | OPCODE(FPUnordLessThan64,                                   U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPOrdGreaterThan16,                                  U1,             F16,            F16,                                            ) | OPCODE(FPOrdGreaterThan16,                                  U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPOrdGreaterThan32,                                  U1,             F32,            F32,                                            ) | OPCODE(FPOrdGreaterThan32,                                  U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPOrdGreaterThan64,                                  U1,             F64,            F64,                                            ) | OPCODE(FPOrdGreaterThan64,                                  U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPUnordGreaterThan16,                                U1,             F16,            F16,                                            ) | OPCODE(FPUnordGreaterThan16,                                U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPUnordGreaterThan32,                                U1,             F32,            F32,                                            ) | OPCODE(FPUnordGreaterThan32,                                U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPUnordGreaterThan64,                                U1,             F64,            F64,                                            ) | OPCODE(FPUnordGreaterThan64,                                U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPOrdLessThanEqual16,                                U1,             F16,            F16,                                            ) | OPCODE(FPOrdLessThanEqual16,                                U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPOrdLessThanEqual32,                                U1,             F32,            F32,                                            ) | OPCODE(FPOrdLessThanEqual32,                                U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPOrdLessThanEqual64,                                U1,             F64,            F64,                                            ) | OPCODE(FPOrdLessThanEqual64,                                U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPUnordLessThanEqual16,                              U1,             F16,            F16,                                            ) | OPCODE(FPUnordLessThanEqual16,                              U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPUnordLessThanEqual32,                              U1,             F32,            F32,                                            ) | OPCODE(FPUnordLessThanEqual32,                              U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPUnordLessThanEqual64,                              U1,             F64,            F64,                                            ) | OPCODE(FPUnordLessThanEqual64,                              U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPOrdGreaterThanEqual16,                             U1,             F16,            F16,                                            ) | OPCODE(FPOrdGreaterThanEqual16,                             U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPOrdGreaterThanEqual32,                             U1,             F32,            F32,                                            ) | OPCODE(FPOrdGreaterThanEqual32,                             U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPOrdGreaterThanEqual64,                             U1,             F64,            F64,                                            ) | OPCODE(FPOrdGreaterThanEqual64,                             U1,             F64,            F64,                                                            ) | ||||||
| OPCODE(FPUnordGreaterThanEqual16,                           U1,             F16,            F16,                                            ) | OPCODE(FPUnordGreaterThanEqual16,                           U1,             F16,            F16,                                                            ) | ||||||
| OPCODE(FPUnordGreaterThanEqual32,                           U1,             F32,            F32,                                            ) | OPCODE(FPUnordGreaterThanEqual32,                           U1,             F32,            F32,                                                            ) | ||||||
| OPCODE(FPUnordGreaterThanEqual64,                           U1,             F64,            F64,                                            ) | OPCODE(FPUnordGreaterThanEqual64,                           U1,             F64,            F64,                                                            ) | ||||||
| 
 | 
 | ||||||
| // Integer operations
 | // Integer operations
 | ||||||
| OPCODE(IAdd32,                                              U32,            U32,            U32,                                            ) | OPCODE(IAdd32,                                              U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(IAdd64,                                              U64,            U64,            U64,                                            ) | OPCODE(IAdd64,                                              U64,            U64,            U64,                                                            ) | ||||||
| OPCODE(ISub32,                                              U32,            U32,            U32,                                            ) | OPCODE(ISub32,                                              U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(ISub64,                                              U64,            U64,            U64,                                            ) | OPCODE(ISub64,                                              U64,            U64,            U64,                                                            ) | ||||||
| OPCODE(IMul32,                                              U32,            U32,            U32,                                            ) | OPCODE(IMul32,                                              U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(INeg32,                                              U32,            U32,                                                            ) | OPCODE(INeg32,                                              U32,            U32,                                                                            ) | ||||||
| OPCODE(INeg64,                                              U64,            U64,                                                            ) | OPCODE(INeg64,                                              U64,            U64,                                                                            ) | ||||||
| OPCODE(IAbs32,                                              U32,            U32,                                                            ) | OPCODE(IAbs32,                                              U32,            U32,                                                                            ) | ||||||
| OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                            ) | OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(ShiftLeftLogical64,                                  U64,            U64,            U32,                                            ) | OPCODE(ShiftLeftLogical64,                                  U64,            U64,            U32,                                                            ) | ||||||
| OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                            ) | OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(ShiftRightLogical64,                                 U64,            U64,            U32,                                            ) | OPCODE(ShiftRightLogical64,                                 U64,            U64,            U32,                                                            ) | ||||||
| OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                            ) | OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(ShiftRightArithmetic64,                              U64,            U64,            U32,                                            ) | OPCODE(ShiftRightArithmetic64,                              U64,            U64,            U32,                                                            ) | ||||||
| OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                            ) | OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                            ) | OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                            ) | OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(BitFieldInsert,                                      U32,            U32,            U32,            U32,            U32,            ) | OPCODE(BitFieldInsert,                                      U32,            U32,            U32,            U32,            U32,                            ) | ||||||
| OPCODE(BitFieldSExtract,                                    U32,            U32,            U32,            U32,                            ) | OPCODE(BitFieldSExtract,                                    U32,            U32,            U32,            U32,                                            ) | ||||||
| OPCODE(BitFieldUExtract,                                    U32,            U32,            U32,            U32,                            ) | OPCODE(BitFieldUExtract,                                    U32,            U32,            U32,            U32,                                            ) | ||||||
| OPCODE(BitReverse32,                                        U32,            U32,                                                            ) | OPCODE(BitReverse32,                                        U32,            U32,                                                                            ) | ||||||
| OPCODE(BitCount32,                                          U32,            U32,                                                            ) | OPCODE(BitCount32,                                          U32,            U32,                                                                            ) | ||||||
| OPCODE(BitwiseNot32,                                        U32,            U32,                                                            ) | OPCODE(BitwiseNot32,                                        U32,            U32,                                                                            ) | ||||||
| 
 | 
 | ||||||
| OPCODE(FindSMsb32,                                          U32,            U32,                                                            ) | OPCODE(FindSMsb32,                                          U32,            U32,                                                                            ) | ||||||
| OPCODE(FindUMsb32,                                          U32,            U32,                                                            ) | OPCODE(FindUMsb32,                                          U32,            U32,                                                                            ) | ||||||
| OPCODE(SMin32,                                              U32,            U32,            U32,                                            ) | OPCODE(SMin32,                                              U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(UMin32,                                              U32,            U32,            U32,                                            ) | OPCODE(UMin32,                                              U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(SMax32,                                              U32,            U32,            U32,                                            ) | OPCODE(SMax32,                                              U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(UMax32,                                              U32,            U32,            U32,                                            ) | OPCODE(UMax32,                                              U32,            U32,            U32,                                                            ) | ||||||
| OPCODE(SLessThan,                                           U1,             U32,            U32,                                            ) | OPCODE(SLessThan,                                           U1,             U32,            U32,                                                            ) | ||||||
| OPCODE(ULessThan,                                           U1,             U32,            U32,                                            ) | OPCODE(ULessThan,                                           U1,             U32,            U32,                                                            ) | ||||||
| OPCODE(IEqual,                                              U1,             U32,            U32,                                            ) | OPCODE(IEqual,                                              U1,             U32,            U32,                                                            ) | ||||||
| OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                            ) | OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                                            ) | ||||||
| OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                            ) | OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                                            ) | ||||||
| OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                            ) | OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                                            ) | ||||||
| OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                            ) | OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                                            ) | ||||||
| OPCODE(INotEqual,                                           U1,             U32,            U32,                                            ) | OPCODE(INotEqual,                                           U1,             U32,            U32,                                                            ) | ||||||
| OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                            ) | OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                                            ) | ||||||
| OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                            ) | OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                                            ) | ||||||
| 
 | 
 | ||||||
| // Logical operations
 | // Logical operations
 | ||||||
| OPCODE(LogicalOr,                                           U1,             U1,             U1,                                             ) | OPCODE(LogicalOr,                                           U1,             U1,             U1,                                                             ) | ||||||
| OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                             ) | OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                                             ) | ||||||
| OPCODE(LogicalXor,                                          U1,             U1,             U1,                                             ) | OPCODE(LogicalXor,                                          U1,             U1,             U1,                                                             ) | ||||||
| OPCODE(LogicalNot,                                          U1,             U1,                                                             ) | OPCODE(LogicalNot,                                          U1,             U1,                                                                             ) | ||||||
| 
 | 
 | ||||||
| // Conversion operations
 | // Conversion operations
 | ||||||
| OPCODE(ConvertS16F16,                                       U32,            F16,                                                            ) | OPCODE(ConvertS16F16,                                       U32,            F16,                                                                            ) | ||||||
| OPCODE(ConvertS16F32,                                       U32,            F32,                                                            ) | OPCODE(ConvertS16F32,                                       U32,            F32,                                                                            ) | ||||||
| OPCODE(ConvertS16F64,                                       U32,            F64,                                                            ) | OPCODE(ConvertS16F64,                                       U32,            F64,                                                                            ) | ||||||
| OPCODE(ConvertS32F16,                                       U32,            F16,                                                            ) | OPCODE(ConvertS32F16,                                       U32,            F16,                                                                            ) | ||||||
| OPCODE(ConvertS32F32,                                       U32,            F32,                                                            ) | OPCODE(ConvertS32F32,                                       U32,            F32,                                                                            ) | ||||||
| OPCODE(ConvertS32F64,                                       U32,            F64,                                                            ) | OPCODE(ConvertS32F64,                                       U32,            F64,                                                                            ) | ||||||
| OPCODE(ConvertS64F16,                                       U64,            F16,                                                            ) | OPCODE(ConvertS64F16,                                       U64,            F16,                                                                            ) | ||||||
| OPCODE(ConvertS64F32,                                       U64,            F32,                                                            ) | OPCODE(ConvertS64F32,                                       U64,            F32,                                                                            ) | ||||||
| OPCODE(ConvertS64F64,                                       U64,            F64,                                                            ) | OPCODE(ConvertS64F64,                                       U64,            F64,                                                                            ) | ||||||
| OPCODE(ConvertU16F16,                                       U32,            F16,                                                            ) | OPCODE(ConvertU16F16,                                       U32,            F16,                                                                            ) | ||||||
| OPCODE(ConvertU16F32,                                       U32,            F32,                                                            ) | OPCODE(ConvertU16F32,                                       U32,            F32,                                                                            ) | ||||||
| OPCODE(ConvertU16F64,                                       U32,            F64,                                                            ) | OPCODE(ConvertU16F64,                                       U32,            F64,                                                                            ) | ||||||
| OPCODE(ConvertU32F16,                                       U32,            F16,                                                            ) | OPCODE(ConvertU32F16,                                       U32,            F16,                                                                            ) | ||||||
| OPCODE(ConvertU32F32,                                       U32,            F32,                                                            ) | OPCODE(ConvertU32F32,                                       U32,            F32,                                                                            ) | ||||||
| OPCODE(ConvertU32F64,                                       U32,            F64,                                                            ) | OPCODE(ConvertU32F64,                                       U32,            F64,                                                                            ) | ||||||
| OPCODE(ConvertU64F16,                                       U64,            F16,                                                            ) | OPCODE(ConvertU64F16,                                       U64,            F16,                                                                            ) | ||||||
| OPCODE(ConvertU64F32,                                       U64,            F32,                                                            ) | OPCODE(ConvertU64F32,                                       U64,            F32,                                                                            ) | ||||||
| OPCODE(ConvertU64F64,                                       U64,            F64,                                                            ) | OPCODE(ConvertU64F64,                                       U64,            F64,                                                                            ) | ||||||
| OPCODE(ConvertU64U32,                                       U64,            U32,                                                            ) | OPCODE(ConvertU64U32,                                       U64,            U32,                                                                            ) | ||||||
| OPCODE(ConvertU32U64,                                       U32,            U64,                                                            ) | OPCODE(ConvertU32U64,                                       U32,            U64,                                                                            ) | ||||||
| OPCODE(ConvertF16F32,                                       F16,            F32,                                                            ) | OPCODE(ConvertF16F32,                                       F16,            F32,                                                                            ) | ||||||
| OPCODE(ConvertF32F16,                                       F32,            F16,                                                            ) | OPCODE(ConvertF32F16,                                       F32,            F16,                                                                            ) | ||||||
| OPCODE(ConvertF32F64,                                       F32,            F64,                                                            ) | OPCODE(ConvertF32F64,                                       F32,            F64,                                                                            ) | ||||||
| OPCODE(ConvertF64F32,                                       F64,            F32,                                                            ) | OPCODE(ConvertF64F32,                                       F64,            F32,                                                                            ) | ||||||
|  | OPCODE(ConvertF16S32,                                       F16,            U32,                                                                            ) | ||||||
|  | OPCODE(ConvertF16S64,                                       F16,            U64,                                                                            ) | ||||||
|  | OPCODE(ConvertF16U32,                                       F16,            U32,                                                                            ) | ||||||
|  | OPCODE(ConvertF16U64,                                       F16,            U64,                                                                            ) | ||||||
|  | OPCODE(ConvertF32S32,                                       F32,            U32,                                                                            ) | ||||||
|  | OPCODE(ConvertF32S64,                                       F32,            U64,                                                                            ) | ||||||
|  | OPCODE(ConvertF32U32,                                       F32,            U32,                                                                            ) | ||||||
|  | OPCODE(ConvertF32U64,                                       F32,            U64,                                                                            ) | ||||||
|  | OPCODE(ConvertF64S32,                                       F64,            U32,                                                                            ) | ||||||
|  | OPCODE(ConvertF64S64,                                       F64,            U64,                                                                            ) | ||||||
|  | OPCODE(ConvertF64U32,                                       F64,            U32,                                                                            ) | ||||||
|  | OPCODE(ConvertF64U64,                                       F64,            U64,                                                                            ) | ||||||
|  | 
 | ||||||
|  | // Image operations
 | ||||||
|  | OPCODE(BindlessImageSampleImplicitLod,                      F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||||
|  | OPCODE(BindlessImageSampleExplicitLod,                      F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||||
|  | OPCODE(BindlessImageSampleDrefImplicitLod,                  F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||||
|  | OPCODE(BindlessImageSampleDrefExplicitLod,                  F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||||
|  | 
 | ||||||
|  | OPCODE(BoundImageSampleImplicitLod,                         F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||||
|  | OPCODE(BoundImageSampleExplicitLod,                         F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||||
|  | OPCODE(BoundImageSampleDrefImplicitLod,                     F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||||
|  | OPCODE(BoundImageSampleDrefExplicitLod,                     F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||||
|  | 
 | ||||||
|  | OPCODE(ImageSampleImplicitLod,                              F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||||
|  | OPCODE(ImageSampleExplicitLod,                              F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||||
|  | OPCODE(ImageSampleDrefImplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||||
|  | OPCODE(ImageSampleDrefExplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||||
|  |  | ||||||
|  | @ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256; | ||||||
|     return reg + (-num); |     return reg + (-num); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | [[nodiscard]] constexpr Reg operator++(Reg& reg) { | ||||||
|  |     reg = reg + 1; | ||||||
|  |     return reg; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | [[nodiscard]] constexpr Reg operator++(Reg& reg, int) { | ||||||
|  |     const Reg copy{reg}; | ||||||
|  |     reg = reg + 1; | ||||||
|  |     return copy; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { | [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { | ||||||
|     return static_cast<size_t>(reg); |     return static_cast<size_t>(reg); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -75,6 +75,7 @@ private: | ||||||
|         f64 imm_f64; |         f64 imm_f64; | ||||||
|     }; |     }; | ||||||
| }; | }; | ||||||
|  | static_assert(std::is_trivially_copyable_v<Value>); | ||||||
| 
 | 
 | ||||||
| template <IR::Type type_> | template <IR::Type type_> | ||||||
| class TypedValue : public Value { | class TypedValue : public Value { | ||||||
|  |  | ||||||
|  | @ -249,8 +249,8 @@ INST(SULD,         "SULD",           "1110 1011 000- ----") | ||||||
| INST(SURED,        "SURED",          "1110 1011 010- ----") | INST(SURED,        "SURED",          "1110 1011 010- ----") | ||||||
| INST(SUST,         "SUST",           "1110 1011 001- ----") | INST(SUST,         "SUST",           "1110 1011 001- ----") | ||||||
| INST(SYNC,         "SYNC",           "1111 0000 1111 1---") | INST(SYNC,         "SYNC",           "1111 0000 1111 1---") | ||||||
| INST(TEX,          "TEX",            "1100 00-- --11 1---") | INST(TEX,          "TEX",            "1100 0--- ---- ----") | ||||||
| INST(TEX_b,        "TEX (b)",        "1101 1110 1011 1---") | INST(TEX_b,        "TEX (b)",        "1101 1110 10-- ----") | ||||||
| INST(TEXS,         "TEXS",           "1101 -00- ---- ----") | INST(TEXS,         "TEXS",           "1101 -00- ---- ----") | ||||||
| INST(TLD,          "TLD",            "1101 1100 --11 1---") | INST(TLD,          "TLD",            "1101 1100 --11 1---") | ||||||
| INST(TLD_b,        "TLD (b)",        "1101 1101 --11 1---") | INST(TLD_b,        "TLD (b)",        "1101 1101 --11 1---") | ||||||
|  |  | ||||||
|  | @ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | ||||||
|         Optimization::SsaRewritePass(function.post_order_blocks); |         Optimization::SsaRewritePass(function.post_order_blocks); | ||||||
|     } |     } | ||||||
|     Optimization::GlobalMemoryToStorageBufferPass(program); |     Optimization::GlobalMemoryToStorageBufferPass(program); | ||||||
|  |     Optimization::TexturePass(env, program); | ||||||
|     for (IR::Function& function : functions) { |     for (IR::Function& function : functions) { | ||||||
|         Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); |         Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); | ||||||
|         Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); |         Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); | ||||||
|  |  | ||||||
|  | @ -585,14 +585,6 @@ void TranslatorVisitor::SYNC(u64) { | ||||||
|     ThrowNotImplemented(Opcode::SYNC); |     ThrowNotImplemented(Opcode::SYNC); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void TranslatorVisitor::TEX(u64) { |  | ||||||
|     ThrowNotImplemented(Opcode::TEX); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void TranslatorVisitor::TEX_b(u64) { |  | ||||||
|     ThrowNotImplemented(Opcode::TEX_b); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void TranslatorVisitor::TEXS(u64) { | void TranslatorVisitor::TEXS(u64) { | ||||||
|     ThrowNotImplemented(Opcode::TEXS); |     ThrowNotImplemented(Opcode::TEXS); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -0,0 +1,232 @@ | ||||||
|  | // Copyright 2021 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include <optional> | ||||||
|  | 
 | ||||||
|  | #include "common/bit_field.h" | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "shader_recompiler/frontend/ir/modifiers.h" | ||||||
|  | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||||
|  | 
 | ||||||
|  | namespace Shader::Maxwell { | ||||||
|  | namespace { | ||||||
|  | enum class Blod : u64 { | ||||||
|  |     None, | ||||||
|  |     LZ, | ||||||
|  |     LB, | ||||||
|  |     LL, | ||||||
|  |     INVALIDBLOD4, | ||||||
|  |     INVALIDBLOD5, | ||||||
|  |     LBA, | ||||||
|  |     LLA, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | enum class TextureType : u64 { | ||||||
|  |     _1D, | ||||||
|  |     ARRAY_1D, | ||||||
|  |     _2D, | ||||||
|  |     ARRAY_2D, | ||||||
|  |     _3D, | ||||||
|  |     ARRAY_3D, | ||||||
|  |     CUBE, | ||||||
|  |     ARRAY_CUBE, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | Shader::TextureType GetType(TextureType type, bool dc) { | ||||||
|  |     switch (type) { | ||||||
|  |     case TextureType::_1D: | ||||||
|  |         return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; | ||||||
|  |     case TextureType::ARRAY_1D: | ||||||
|  |         return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; | ||||||
|  |     case TextureType::_2D: | ||||||
|  |         return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; | ||||||
|  |     case TextureType::ARRAY_2D: | ||||||
|  |         return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; | ||||||
|  |     case TextureType::_3D: | ||||||
|  |         return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; | ||||||
|  |     case TextureType::ARRAY_3D: | ||||||
|  |         throw NotImplementedException("3D array texture type"); | ||||||
|  |     case TextureType::CUBE: | ||||||
|  |         return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; | ||||||
|  |     case TextureType::ARRAY_CUBE: | ||||||
|  |         return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; | ||||||
|  |     } | ||||||
|  |     throw NotImplementedException("Invalid texture type {}", type); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||||||
|  |     const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; | ||||||
|  |     switch (type) { | ||||||
|  |     case TextureType::_1D: | ||||||
|  |         return v.F(reg); | ||||||
|  |     case TextureType::ARRAY_1D: | ||||||
|  |         return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); | ||||||
|  |     case TextureType::_2D: | ||||||
|  |         return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||||||
|  |     case TextureType::ARRAY_2D: | ||||||
|  |         return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); | ||||||
|  |     case TextureType::_3D: | ||||||
|  |         return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||||||
|  |     case TextureType::ARRAY_3D: | ||||||
|  |         throw NotImplementedException("3D array texture type"); | ||||||
|  |     case TextureType::CUBE: | ||||||
|  |         return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||||||
|  |     case TextureType::ARRAY_CUBE: | ||||||
|  |         return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); | ||||||
|  |     } | ||||||
|  |     throw NotImplementedException("Invalid texture type {}", type); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { | ||||||
|  |     switch (blod) { | ||||||
|  |     case Blod::None: | ||||||
|  |         return v.ir.Imm32(0.0f); | ||||||
|  |     case Blod::LZ: | ||||||
|  |         return v.ir.Imm32(0.0f); | ||||||
|  |     case Blod::LB: | ||||||
|  |     case Blod::LL: | ||||||
|  |     case Blod::LBA: | ||||||
|  |     case Blod::LLA: | ||||||
|  |         return v.F(reg++); | ||||||
|  |     case Blod::INVALIDBLOD4: | ||||||
|  |     case Blod::INVALIDBLOD5: | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  |     throw NotImplementedException("Invalid blod {}", blod); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||||||
|  |     const IR::U32 value{v.X(reg++)}; | ||||||
|  |     switch (type) { | ||||||
|  |     case TextureType::_1D: | ||||||
|  |     case TextureType::ARRAY_1D: | ||||||
|  |         return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); | ||||||
|  |     case TextureType::_2D: | ||||||
|  |     case TextureType::ARRAY_2D: | ||||||
|  |         return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), | ||||||
|  |                                        v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); | ||||||
|  |     case TextureType::_3D: | ||||||
|  |     case TextureType::ARRAY_3D: | ||||||
|  |         return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), | ||||||
|  |                                        v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), | ||||||
|  |                                        v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); | ||||||
|  |     case TextureType::CUBE: | ||||||
|  |     case TextureType::ARRAY_CUBE: | ||||||
|  |         throw NotImplementedException("Illegal offset on CUBE sample"); | ||||||
|  |     } | ||||||
|  |     throw NotImplementedException("Invalid texture type {}", type); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool HasExplicitLod(Blod blod) { | ||||||
|  |     switch (blod) { | ||||||
|  |     case Blod::LL: | ||||||
|  |     case Blod::LLA: | ||||||
|  |     case Blod::LZ: | ||||||
|  |         return true; | ||||||
|  |     default: | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, | ||||||
|  |           std::optional<u32> cbuf_offset) { | ||||||
|  |     union { | ||||||
|  |         u64 raw; | ||||||
|  |         BitField<35, 1, u64> ndv; | ||||||
|  |         BitField<49, 1, u64> nodep; | ||||||
|  |         BitField<50, 1, u64> dc; | ||||||
|  |         BitField<51, 3, IR::Pred> sparse_pred; | ||||||
|  |         BitField<0, 8, IR::Reg> dest_reg; | ||||||
|  |         BitField<8, 8, IR::Reg> coord_reg; | ||||||
|  |         BitField<20, 8, IR::Reg> meta_reg; | ||||||
|  |         BitField<28, 3, TextureType> type; | ||||||
|  |         BitField<31, 4, u64> mask; | ||||||
|  |     } const tex{insn}; | ||||||
|  | 
 | ||||||
|  |     if (lc) { | ||||||
|  |         throw NotImplementedException("LC"); | ||||||
|  |     } | ||||||
|  |     const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; | ||||||
|  | 
 | ||||||
|  |     IR::Reg meta_reg{tex.meta_reg}; | ||||||
|  |     IR::Value handle; | ||||||
|  |     IR::Value offset; | ||||||
|  |     IR::F32 dref; | ||||||
|  |     IR::F32 lod_clamp; | ||||||
|  |     if (cbuf_offset) { | ||||||
|  |         handle = v.ir.Imm32(*cbuf_offset); | ||||||
|  |     } else { | ||||||
|  |         handle = v.X(meta_reg++); | ||||||
|  |     } | ||||||
|  |     const IR::F32 lod{MakeLod(v, meta_reg, blod)}; | ||||||
|  |     if (aoffi) { | ||||||
|  |         offset = MakeOffset(v, meta_reg, tex.type); | ||||||
|  |     } | ||||||
|  |     if (tex.dc != 0) { | ||||||
|  |         dref = v.F(meta_reg++); | ||||||
|  |     } | ||||||
|  |     IR::TextureInstInfo info{}; | ||||||
|  |     info.type.Assign(GetType(tex.type, tex.dc != 0)); | ||||||
|  |     info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); | ||||||
|  |     info.has_lod_clamp.Assign(lc ? 1 : 0); | ||||||
|  | 
 | ||||||
|  |     const IR::Value sample{[&]() -> IR::Value { | ||||||
|  |         if (tex.dc == 0) { | ||||||
|  |             if (HasExplicitLod(blod)) { | ||||||
|  |                 return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); | ||||||
|  |             } else { | ||||||
|  |                 return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         if (HasExplicitLod(blod)) { | ||||||
|  |             return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, | ||||||
|  |                                                    info); | ||||||
|  |         } else { | ||||||
|  |             return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, | ||||||
|  |                                                    info); | ||||||
|  |         } | ||||||
|  |     }()}; | ||||||
|  | 
 | ||||||
|  |     for (int element = 0; element < 4; ++element) { | ||||||
|  |         if (((tex.mask >> element) & 1) == 0) { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         IR::F32 value; | ||||||
|  |         if (tex.dc != 0) { | ||||||
|  |             value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); | ||||||
|  |         } else { | ||||||
|  |             value = IR::F32{v.ir.CompositeExtract(sample, element)}; | ||||||
|  |         } | ||||||
|  |         v.F(tex.dest_reg + element, value); | ||||||
|  |     } | ||||||
|  |     if (tex.sparse_pred != IR::Pred::PT) { | ||||||
|  |         v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
|  | void TranslatorVisitor::TEX(u64 insn) { | ||||||
|  |     union { | ||||||
|  |         u64 raw; | ||||||
|  |         BitField<54, 1, u64> aoffi; | ||||||
|  |         BitField<55, 3, Blod> blod; | ||||||
|  |         BitField<58, 1, u64> lc; | ||||||
|  |         BitField<36, 13, u64> cbuf_offset; | ||||||
|  |     } const tex{insn}; | ||||||
|  | 
 | ||||||
|  |     Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void TranslatorVisitor::TEX_b(u64 insn) { | ||||||
|  |     union { | ||||||
|  |         u64 raw; | ||||||
|  |         BitField<36, 1, u64> aoffi; | ||||||
|  |         BitField<37, 3, Blod> blod; | ||||||
|  |         BitField<40, 1, u64> lc; | ||||||
|  |     } const tex{insn}; | ||||||
|  | 
 | ||||||
|  |     Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Shader::Maxwell
 | ||||||
|  | @ -82,6 +82,25 @@ void VisitUsages(Info& info, IR::Inst& inst) { | ||||||
|             throw NotImplementedException("Constant buffer with non-immediate index"); |             throw NotImplementedException("Constant buffer with non-immediate index"); | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|  |     case IR::Opcode::BindlessImageSampleImplicitLod: | ||||||
|  |     case IR::Opcode::BindlessImageSampleExplicitLod: | ||||||
|  |     case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||||||
|  |     case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||||||
|  |     case IR::Opcode::BoundImageSampleImplicitLod: | ||||||
|  |     case IR::Opcode::BoundImageSampleExplicitLod: | ||||||
|  |     case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||||||
|  |     case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||||||
|  |     case IR::Opcode::ImageSampleImplicitLod: | ||||||
|  |     case IR::Opcode::ImageSampleExplicitLod: | ||||||
|  |     case IR::Opcode::ImageSampleDrefImplicitLod: | ||||||
|  |     case IR::Opcode::ImageSampleDrefExplicitLod: { | ||||||
|  |         const TextureType type{inst.Flags<IR::TextureInstInfo>().type}; | ||||||
|  |         info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || | ||||||
|  |                                 type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; | ||||||
|  |         info.uses_sparse_residency |= | ||||||
|  |             inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|     default: |     default: | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -226,6 +226,7 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value, | ||||||
|     } |     } | ||||||
|     // Reversed loops are more likely to find the right result
 |     // Reversed loops are more likely to find the right result
 | ||||||
|     for (size_t arg = inst->NumArgs(); arg--;) { |     for (size_t arg = inst->NumArgs(); arg--;) { | ||||||
|  |         IR::Block* inst_block{block}; | ||||||
|         if (inst->Opcode() == IR::Opcode::Phi) { |         if (inst->Opcode() == IR::Opcode::Phi) { | ||||||
|             // If we are going through a phi node, mark the current block as visited
 |             // If we are going through a phi node, mark the current block as visited
 | ||||||
|             visited.insert(block); |             visited.insert(block); | ||||||
|  | @ -235,15 +236,11 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value, | ||||||
|                 // Already visited, skip
 |                 // Already visited, skip
 | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)}; |             inst_block = phi_block; | ||||||
|             if (storage_buffer) { |         } | ||||||
|                 return *storage_buffer; |         const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; | ||||||
|             } |         if (storage_buffer) { | ||||||
|         } else { |             return *storage_buffer; | ||||||
|             const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)}; |  | ||||||
|             if (storage_buffer) { |  | ||||||
|                 return *storage_buffer; |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     return std::nullopt; |     return std::nullopt; | ||||||
|  |  | ||||||
|  | @ -6,6 +6,7 @@ | ||||||
| 
 | 
 | ||||||
| #include <span> | #include <span> | ||||||
| 
 | 
 | ||||||
|  | #include "shader_recompiler/environment.h" | ||||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | #include "shader_recompiler/frontend/ir/basic_block.h" | ||||||
| #include "shader_recompiler/frontend/ir/function.h" | #include "shader_recompiler/frontend/ir/function.h" | ||||||
| #include "shader_recompiler/frontend/ir/program.h" | #include "shader_recompiler/frontend/ir/program.h" | ||||||
|  | @ -26,6 +27,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program); | ||||||
| void IdentityRemovalPass(IR::Function& function); | void IdentityRemovalPass(IR::Function& function); | ||||||
| void LowerFp16ToFp32(IR::Program& program); | void LowerFp16ToFp32(IR::Program& program); | ||||||
| void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); | void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); | ||||||
|  | void TexturePass(Environment& env, IR::Program& program); | ||||||
| void VerificationPass(const IR::Function& function); | void VerificationPass(const IR::Function& function); | ||||||
| 
 | 
 | ||||||
| } // namespace Shader::Optimization
 | } // namespace Shader::Optimization
 | ||||||
|  |  | ||||||
							
								
								
									
										199
									
								
								src/shader_recompiler/ir_opt/texture_pass.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										199
									
								
								src/shader_recompiler/ir_opt/texture_pass.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,199 @@ | ||||||
|  | // Copyright 2021 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include <optional> | ||||||
|  | 
 | ||||||
|  | #include <boost/container/flat_set.hpp> | ||||||
|  | #include <boost/container/small_vector.hpp> | ||||||
|  | 
 | ||||||
|  | #include "shader_recompiler/environment.h" | ||||||
|  | #include "shader_recompiler/frontend/ir/basic_block.h" | ||||||
|  | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||||||
|  | #include "shader_recompiler/ir_opt/passes.h" | ||||||
|  | #include "shader_recompiler/shader_info.h" | ||||||
|  | 
 | ||||||
|  | namespace Shader::Optimization { | ||||||
|  | namespace { | ||||||
|  | struct ConstBufferAddr { | ||||||
|  |     u32 index; | ||||||
|  |     u32 offset; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct TextureInst { | ||||||
|  |     ConstBufferAddr cbuf; | ||||||
|  |     IR::Inst* inst; | ||||||
|  |     IR::Block* block; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | using TextureInstVector = boost::container::small_vector<TextureInst, 24>; | ||||||
|  | 
 | ||||||
|  | using VisitedBlocks = boost::container::flat_set<IR::Block*, std::less<IR::Block*>, | ||||||
|  |                                                  boost::container::small_vector<IR::Block*, 2>>; | ||||||
|  | 
 | ||||||
|  | IR::Opcode IndexedInstruction(const IR::Inst& inst) { | ||||||
|  |     switch (inst.Opcode()) { | ||||||
|  |     case IR::Opcode::BindlessImageSampleImplicitLod: | ||||||
|  |     case IR::Opcode::BoundImageSampleImplicitLod: | ||||||
|  |         return IR::Opcode::ImageSampleImplicitLod; | ||||||
|  |     case IR::Opcode::BoundImageSampleExplicitLod: | ||||||
|  |     case IR::Opcode::BindlessImageSampleExplicitLod: | ||||||
|  |         return IR::Opcode::ImageSampleExplicitLod; | ||||||
|  |     case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||||||
|  |     case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||||||
|  |         return IR::Opcode::ImageSampleDrefImplicitLod; | ||||||
|  |     case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||||||
|  |     case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||||||
|  |         return IR::Opcode::ImageSampleDrefExplicitLod; | ||||||
|  |     default: | ||||||
|  |         return IR::Opcode::Void; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool IsBindless(const IR::Inst& inst) { | ||||||
|  |     switch (inst.Opcode()) { | ||||||
|  |     case IR::Opcode::BindlessImageSampleImplicitLod: | ||||||
|  |     case IR::Opcode::BindlessImageSampleExplicitLod: | ||||||
|  |     case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||||||
|  |     case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||||||
|  |         return true; | ||||||
|  |     case IR::Opcode::BoundImageSampleImplicitLod: | ||||||
|  |     case IR::Opcode::BoundImageSampleExplicitLod: | ||||||
|  |     case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||||||
|  |     case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||||||
|  |         return false; | ||||||
|  |     default: | ||||||
|  |         throw InvalidArgument("Invalid opcode {}", inst.Opcode()); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool IsTextureInstruction(const IR::Inst& inst) { | ||||||
|  |     return IndexedInstruction(inst) != IR::Opcode::Void; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | std::optional<ConstBufferAddr> Track(IR::Block* block, const IR::Value& value, | ||||||
|  |                                      VisitedBlocks& visited) { | ||||||
|  |     if (value.IsImmediate()) { | ||||||
|  |         // Immediates can't be a storage buffer
 | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     const IR::Inst* const inst{value.InstRecursive()}; | ||||||
|  |     if (inst->Opcode() == IR::Opcode::GetCbuf) { | ||||||
|  |         const IR::Value index{inst->Arg(0)}; | ||||||
|  |         const IR::Value offset{inst->Arg(1)}; | ||||||
|  |         if (!index.IsImmediate()) { | ||||||
|  |             // Reading a bindless texture from variable indices is valid
 | ||||||
|  |             // but not supported here at the moment
 | ||||||
|  |             return std::nullopt; | ||||||
|  |         } | ||||||
|  |         if (!offset.IsImmediate()) { | ||||||
|  |             // TODO: Support arrays of textures
 | ||||||
|  |             return std::nullopt; | ||||||
|  |         } | ||||||
|  |         return ConstBufferAddr{ | ||||||
|  |             .index{index.U32()}, | ||||||
|  |             .offset{offset.U32()}, | ||||||
|  |         }; | ||||||
|  |     } | ||||||
|  |     // Reversed loops are more likely to find the right result
 | ||||||
|  |     for (size_t arg = inst->NumArgs(); arg--;) { | ||||||
|  |         IR::Block* inst_block{block}; | ||||||
|  |         if (inst->Opcode() == IR::Opcode::Phi) { | ||||||
|  |             // If we are going through a phi node, mark the current block as visited
 | ||||||
|  |             visited.insert(block); | ||||||
|  |             // and skip already visited blocks to avoid looping forever
 | ||||||
|  |             IR::Block* const phi_block{inst->PhiBlock(arg)}; | ||||||
|  |             if (visited.contains(phi_block)) { | ||||||
|  |                 // Already visited, skip
 | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             inst_block = phi_block; | ||||||
|  |         } | ||||||
|  |         const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)}; | ||||||
|  |         if (storage_buffer) { | ||||||
|  |             return *storage_buffer; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     return std::nullopt; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { | ||||||
|  |     ConstBufferAddr addr; | ||||||
|  |     if (IsBindless(inst)) { | ||||||
|  |         VisitedBlocks visited; | ||||||
|  |         const std::optional<ConstBufferAddr> track_addr{Track(block, IR::Value{&inst}, visited)}; | ||||||
|  |         if (!track_addr) { | ||||||
|  |             throw NotImplementedException("Failed to track bindless texture constant buffer"); | ||||||
|  |         } | ||||||
|  |         addr = *track_addr; | ||||||
|  |     } else { | ||||||
|  |         addr = ConstBufferAddr{ | ||||||
|  |             .index{env.TextureBoundBuffer()}, | ||||||
|  |             .offset{inst.Arg(0).U32()}, | ||||||
|  |         }; | ||||||
|  |     } | ||||||
|  |     return TextureInst{ | ||||||
|  |         .cbuf{addr}, | ||||||
|  |         .inst{&inst}, | ||||||
|  |         .block{block}, | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | class Descriptors { | ||||||
|  | public: | ||||||
|  |     explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {} | ||||||
|  | 
 | ||||||
|  |     u32 Add(const TextureDescriptor& descriptor) { | ||||||
|  |         // TODO: Handle arrays
 | ||||||
|  |         auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) { | ||||||
|  |             return descriptor.cbuf_index == existing.cbuf_index && | ||||||
|  |                    descriptor.cbuf_offset == existing.cbuf_offset && | ||||||
|  |                    descriptor.type == existing.type; | ||||||
|  |         })}; | ||||||
|  |         if (it != descriptors.end()) { | ||||||
|  |             return static_cast<u32>(std::distance(descriptors.begin(), it)); | ||||||
|  |         } | ||||||
|  |         descriptors.push_back(descriptor); | ||||||
|  |         return static_cast<u32>(descriptors.size()) - 1; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     TextureDescriptors& descriptors; | ||||||
|  | }; | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
|  | void TexturePass(Environment& env, IR::Program& program) { | ||||||
|  |     TextureInstVector to_replace; | ||||||
|  |     for (IR::Function& function : program.functions) { | ||||||
|  |         for (IR::Block* const block : function.post_order_blocks) { | ||||||
|  |             for (IR::Inst& inst : block->Instructions()) { | ||||||
|  |                 if (!IsTextureInstruction(inst)) { | ||||||
|  |                     continue; | ||||||
|  |                 } | ||||||
|  |                 to_replace.push_back(MakeInst(env, block, inst)); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     // Sort instructions to visit textures by constant buffer index, then by offset
 | ||||||
|  |     std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) { | ||||||
|  |         return lhs.cbuf.offset < rhs.cbuf.offset; | ||||||
|  |     }); | ||||||
|  |     std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { | ||||||
|  |         return lhs.cbuf.index < rhs.cbuf.index; | ||||||
|  |     }); | ||||||
|  |     Descriptors descriptors{program.info.texture_descriptors}; | ||||||
|  |     for (TextureInst& texture_inst : to_replace) { | ||||||
|  |         // TODO: Handle arrays
 | ||||||
|  |         IR::Inst* const inst{texture_inst.inst}; | ||||||
|  |         const u32 index{descriptors.Add(TextureDescriptor{ | ||||||
|  |             .type{inst->Flags<IR::TextureInstInfo>().type}, | ||||||
|  |             .cbuf_index{texture_inst.cbuf.index}, | ||||||
|  |             .cbuf_offset{texture_inst.cbuf.offset}, | ||||||
|  |             .count{1}, | ||||||
|  |         })}; | ||||||
|  |         inst->ReplaceOpcode(IndexedInstruction(*inst)); | ||||||
|  |         inst->SetArg(0, IR::Value{index}); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Shader::Optimization
 | ||||||
|  | @ -8,25 +8,51 @@ | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| 
 | 
 | ||||||
|  | #include <boost/container/small_vector.hpp> | ||||||
| #include <boost/container/static_vector.hpp> | #include <boost/container/static_vector.hpp> | ||||||
| 
 | 
 | ||||||
| namespace Shader { | namespace Shader { | ||||||
| 
 | 
 | ||||||
|  | enum class TextureType : u32 { | ||||||
|  |     Color1D, | ||||||
|  |     ColorArray1D, | ||||||
|  |     Color2D, | ||||||
|  |     ColorArray2D, | ||||||
|  |     Color3D, | ||||||
|  |     ColorCube, | ||||||
|  |     ColorArrayCube, | ||||||
|  |     Shadow1D, | ||||||
|  |     ShadowArray1D, | ||||||
|  |     Shadow2D, | ||||||
|  |     ShadowArray2D, | ||||||
|  |     Shadow3D, | ||||||
|  |     ShadowCube, | ||||||
|  |     ShadowArrayCube, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct TextureDescriptor { | ||||||
|  |     TextureType type; | ||||||
|  |     u32 cbuf_index; | ||||||
|  |     u32 cbuf_offset; | ||||||
|  |     u32 count; | ||||||
|  | }; | ||||||
|  | using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; | ||||||
|  | 
 | ||||||
|  | struct ConstantBufferDescriptor { | ||||||
|  |     u32 index; | ||||||
|  |     u32 count; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct StorageBufferDescriptor { | ||||||
|  |     u32 cbuf_index; | ||||||
|  |     u32 cbuf_offset; | ||||||
|  |     u32 count; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| struct Info { | struct Info { | ||||||
|     static constexpr size_t MAX_CBUFS{18}; |     static constexpr size_t MAX_CBUFS{18}; | ||||||
|     static constexpr size_t MAX_SSBOS{16}; |     static constexpr size_t MAX_SSBOS{16}; | ||||||
| 
 | 
 | ||||||
|     struct ConstantBufferDescriptor { |  | ||||||
|         u32 index; |  | ||||||
|         u32 count; |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     struct StorageBufferDescriptor { |  | ||||||
|         u32 cbuf_index; |  | ||||||
|         u32 cbuf_offset; |  | ||||||
|         u32 count; |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     bool uses_workgroup_id{}; |     bool uses_workgroup_id{}; | ||||||
|     bool uses_local_invocation_id{}; |     bool uses_local_invocation_id{}; | ||||||
|     bool uses_fp16{}; |     bool uses_fp16{}; | ||||||
|  | @ -35,12 +61,16 @@ struct Info { | ||||||
|     bool uses_fp16_denorms_preserve{}; |     bool uses_fp16_denorms_preserve{}; | ||||||
|     bool uses_fp32_denorms_flush{}; |     bool uses_fp32_denorms_flush{}; | ||||||
|     bool uses_fp32_denorms_preserve{}; |     bool uses_fp32_denorms_preserve{}; | ||||||
|  |     bool uses_image_1d{}; | ||||||
|  |     bool uses_sampled_1d{}; | ||||||
|  |     bool uses_sparse_residency{}; | ||||||
| 
 | 
 | ||||||
|     u32 constant_buffer_mask{}; |     u32 constant_buffer_mask{}; | ||||||
| 
 | 
 | ||||||
|     boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> |     boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> | ||||||
|         constant_buffer_descriptors; |         constant_buffer_descriptors; | ||||||
|     boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; |     boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; | ||||||
|  |     TextureDescriptors texture_descriptors; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Shader
 | } // namespace Shader
 | ||||||
|  |  | ||||||
|  | @ -40,6 +40,16 @@ vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Sh | ||||||
|         }); |         }); | ||||||
|         ++binding; |         ++binding; | ||||||
|     } |     } | ||||||
|  |     for (const auto& desc : info.texture_descriptors) { | ||||||
|  |         bindings.push_back({ | ||||||
|  |             .binding = binding, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||||
|  |             .pImmutableSamplers = nullptr, | ||||||
|  |         }); | ||||||
|  |         ++binding; | ||||||
|  |     } | ||||||
|     return device.GetLogical().CreateDescriptorSetLayout({ |     return device.GetLogical().CreateDescriptorSetLayout({ | ||||||
|         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||||||
|         .pNext = nullptr, |         .pNext = nullptr, | ||||||
|  | @ -79,6 +89,18 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( | ||||||
|         ++binding; |         ++binding; | ||||||
|         offset += sizeof(DescriptorUpdateEntry); |         offset += sizeof(DescriptorUpdateEntry); | ||||||
|     } |     } | ||||||
|  |     for (const auto& desc : info.texture_descriptors) { | ||||||
|  |         entries.push_back({ | ||||||
|  |             .dstBinding = binding, | ||||||
|  |             .dstArrayElement = 0, | ||||||
|  |             .descriptorCount = 1, | ||||||
|  |             .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, | ||||||
|  |             .offset = offset, | ||||||
|  |             .stride = sizeof(DescriptorUpdateEntry), | ||||||
|  |         }); | ||||||
|  |         ++binding; | ||||||
|  |         offset += sizeof(DescriptorUpdateEntry); | ||||||
|  |     } | ||||||
|     return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ |     return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ | ||||||
|         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, |         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, | ||||||
|         .pNext = nullptr, |         .pNext = nullptr, | ||||||
|  | @ -92,6 +114,44 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( | ||||||
|         .set = 0, |         .set = 0, | ||||||
|     }); |     }); | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | struct TextureHandle { | ||||||
|  |     explicit TextureHandle(u32 data, bool via_header_index) { | ||||||
|  |         const Tegra::Texture::TextureHandle handle{data}; | ||||||
|  |         image = handle.tic_id; | ||||||
|  |         sampler = via_header_index ? image : handle.tsc_id.Value(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u32 image; | ||||||
|  |     u32 sampler; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | VideoCommon::ImageViewType CastType(Shader::TextureType type) { | ||||||
|  |     switch (type) { | ||||||
|  |     case Shader::TextureType::Color1D: | ||||||
|  |     case Shader::TextureType::Shadow1D: | ||||||
|  |         return VideoCommon::ImageViewType::e1D; | ||||||
|  |     case Shader::TextureType::ColorArray1D: | ||||||
|  |     case Shader::TextureType::ShadowArray1D: | ||||||
|  |         return VideoCommon::ImageViewType::e1DArray; | ||||||
|  |     case Shader::TextureType::Color2D: | ||||||
|  |     case Shader::TextureType::Shadow2D: | ||||||
|  |         return VideoCommon::ImageViewType::e2D; | ||||||
|  |     case Shader::TextureType::ColorArray2D: | ||||||
|  |     case Shader::TextureType::ShadowArray2D: | ||||||
|  |         return VideoCommon::ImageViewType::e2DArray; | ||||||
|  |     case Shader::TextureType::Color3D: | ||||||
|  |     case Shader::TextureType::Shadow3D: | ||||||
|  |         return VideoCommon::ImageViewType::e3D; | ||||||
|  |     case Shader::TextureType::ColorCube: | ||||||
|  |     case Shader::TextureType::ShadowCube: | ||||||
|  |         return VideoCommon::ImageViewType::Cube; | ||||||
|  |     case Shader::TextureType::ColorArrayCube: | ||||||
|  |     case Shader::TextureType::ShadowArrayCube: | ||||||
|  |         return VideoCommon::ImageViewType::CubeArray; | ||||||
|  |     } | ||||||
|  |     UNREACHABLE_MSG("Invalid texture type {}", type); | ||||||
|  | } | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, | ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, | ||||||
|  | @ -143,6 +203,47 @@ void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { | ||||||
|     buffer_cache.BindHostComputeBuffers(); |     buffer_cache.BindHostComputeBuffers(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, | ||||||
|  |                                             Tegra::MemoryManager& gpu_memory, | ||||||
|  |                                             TextureCache& texture_cache) { | ||||||
|  |     texture_cache.SynchronizeComputeDescriptors(); | ||||||
|  | 
 | ||||||
|  |     static constexpr size_t max_elements = 64; | ||||||
|  |     std::array<ImageId, max_elements> image_view_ids; | ||||||
|  |     boost::container::static_vector<u32, max_elements> image_view_indices; | ||||||
|  |     boost::container::static_vector<VkSampler, max_elements> sampler_handles; | ||||||
|  | 
 | ||||||
|  |     const auto& launch_desc{kepler_compute.launch_description}; | ||||||
|  |     const auto& cbufs{launch_desc.const_buffer_config}; | ||||||
|  |     const bool via_header_index{launch_desc.linked_tsc}; | ||||||
|  |     for (const auto& desc : info.texture_descriptors) { | ||||||
|  |         const u32 cbuf_index{desc.cbuf_index}; | ||||||
|  |         const u32 cbuf_offset{desc.cbuf_offset}; | ||||||
|  |         ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); | ||||||
|  | 
 | ||||||
|  |         const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; | ||||||
|  |         const u32 raw_handle{gpu_memory.Read<u32>(addr)}; | ||||||
|  | 
 | ||||||
|  |         const TextureHandle handle(raw_handle, via_header_index); | ||||||
|  |         image_view_indices.push_back(handle.image); | ||||||
|  | 
 | ||||||
|  |         Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||||||
|  |         sampler_handles.push_back(sampler->Handle()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||||||
|  |     texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||||||
|  | 
 | ||||||
|  |     size_t index{}; | ||||||
|  |     for (const auto& desc : info.texture_descriptors) { | ||||||
|  |         const VkSampler vk_sampler{sampler_handles[index]}; | ||||||
|  |         ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; | ||||||
|  |         const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; | ||||||
|  |         update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); | ||||||
|  |         ++index; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { | VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { | ||||||
|     const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; |     const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; | ||||||
|     update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); |     update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); | ||||||
|  |  | ||||||
|  | @ -6,9 +6,11 @@ | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "shader_recompiler/shader_info.h" | #include "shader_recompiler/shader_info.h" | ||||||
|  | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
| #include "video_core/renderer_vulkan/vk_pipeline.h" | #include "video_core/renderer_vulkan/vk_pipeline.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
| 
 | 
 | ||||||
|  | @ -30,6 +32,8 @@ public: | ||||||
|     ComputePipeline(const ComputePipeline&) = delete; |     ComputePipeline(const ComputePipeline&) = delete; | ||||||
| 
 | 
 | ||||||
|     void ConfigureBufferCache(BufferCache& buffer_cache); |     void ConfigureBufferCache(BufferCache& buffer_cache); | ||||||
|  |     void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, | ||||||
|  |                                Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); |     [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -76,6 +76,10 @@ public: | ||||||
|         return gpu_memory.Read<u64>(program_base + address); |         return gpu_memory.Read<u64>(program_base + address); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     u32 TextureBoundBuffer() override { | ||||||
|  |         return kepler_compute.regs.tex_cb_index; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     std::array<u32, 3> WorkgroupSize() override { |     std::array<u32, 3> WorkgroupSize() override { | ||||||
|         const auto& qmd{kepler_compute.launch_description}; |         const auto& qmd{kepler_compute.launch_description}; | ||||||
|         return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; |         return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; | ||||||
|  |  | ||||||
|  | @ -241,9 +241,10 @@ void RasterizerVulkan::DispatchCompute() { | ||||||
|     if (!pipeline) { |     if (!pipeline) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     std::scoped_lock lock{buffer_cache.mutex}; |     std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; | ||||||
|     update_descriptor_queue.Acquire(); |     update_descriptor_queue.Acquire(); | ||||||
|     pipeline->ConfigureBufferCache(buffer_cache); |     pipeline->ConfigureBufferCache(buffer_cache); | ||||||
|  |     pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); | ||||||
|     const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; |     const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; | ||||||
| 
 | 
 | ||||||
|     const auto& qmd{kepler_compute.launch_description}; |     const auto& qmd{kepler_compute.launch_description}; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ReinUsesLisp
						ReinUsesLisp