forked from eden-emu/eden
		
	Shader Recomnpiler: implement textuzreGrad 3D emulation constant propagation
This commit is contained in:
		
							parent
							
								
									1f584c14e7
								
							
						
					
					
						commit
						01858648d1
					
				
					 8 changed files with 261 additions and 11 deletions
				
			
		|  | @ -558,12 +558,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||||||
|                        const IR::Value& coord, const IR::Value& derivatives, |                        const IR::Value& coord, const IR::Value& derivatives, | ||||||
|                        const IR::Value& offset, const IR::Value& lod_clamp) { |                        const IR::Value& offset, const IR::Value& lod_clamp) { | ||||||
|     const auto info{inst.Flags<IR::TextureInstInfo>()}; |     const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||||||
|     ScopedRegister dpdx, dpdy; |     ScopedRegister dpdx, dpdy, coords; | ||||||
|     const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; |     const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | ||||||
|     if (multi_component) { |     if (multi_component) { | ||||||
|         // Allocate this early to avoid aliasing other registers
 |         // Allocate this early to avoid aliasing other registers
 | ||||||
|         dpdx = ScopedRegister{ctx.reg_alloc}; |         dpdx = ScopedRegister{ctx.reg_alloc}; | ||||||
|         dpdy = ScopedRegister{ctx.reg_alloc}; |         dpdy = ScopedRegister{ctx.reg_alloc}; | ||||||
|  |         if (info.num_derivates >= 3) { | ||||||
|  |             coords = ScopedRegister{ctx.reg_alloc}; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|     const auto sparse_inst{PrepareSparse(inst)}; |     const auto sparse_inst{PrepareSparse(inst)}; | ||||||
|     const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |     const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||||||
|  | @ -580,15 +583,27 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||||||
|                 "MOV.F {}.y,{}.w;", |                 "MOV.F {}.y,{}.w;", | ||||||
|                 dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, |                 dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, | ||||||
|                 dpdy.reg, derivatives_vec); |                 dpdy.reg, derivatives_vec); | ||||||
|  |         Register final_coord; | ||||||
|  |         if (info.num_derivates >= 3) { | ||||||
|  |             ctx.Add("MOV.F {}.z,{}.x;" | ||||||
|  |                     "MOV.F {}.z,{}.y;", | ||||||
|  |                     dpdx.reg, coord_vec, dpdy.reg, coord_vec); | ||||||
|  |             ctx.Add("MOV.F {}.x,0;" | ||||||
|  |                     "MOV.F {}.y,0;", | ||||||
|  |                     "MOV.F {}.z,0;", coords.reg, coords.reg, coords.reg); | ||||||
|  |             final_coord = coords.reg; | ||||||
|  |         } else { | ||||||
|  |             final_coord = coord_vec; | ||||||
|  |         } | ||||||
|         if (info.has_lod_clamp) { |         if (info.has_lod_clamp) { | ||||||
|             const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; |             const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; | ||||||
|             ctx.Add("MOV.F {}.w,{};" |             ctx.Add("MOV.F {}.w,{};" | ||||||
|                     "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", |                     "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", | ||||||
|                     dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, |                     dpdy.reg, lod_clamp_value, sparse_mod, ret, final_coord, dpdx.reg, dpdy.reg, | ||||||
|                     texture, type, offset_vec); |                     texture, type, offset_vec); | ||||||
|         } else { |         } else { | ||||||
|             ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, |             ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, final_coord, dpdx.reg, | ||||||
|                     texture, type, offset_vec); |                     dpdy.reg, texture, type, offset_vec); | ||||||
|         } |         } | ||||||
|     } else { |     } else { | ||||||
|         ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, |         ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, | ||||||
|  |  | ||||||
|  | @ -548,7 +548,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||||||
|     if (sparse_inst) { |     if (sparse_inst) { | ||||||
|         throw NotImplementedException("EmitImageGradient Sparse"); |         throw NotImplementedException("EmitImageGradient Sparse"); | ||||||
|     } |     } | ||||||
|     if (!offset.IsEmpty()) { |     if (!offset.IsEmpty() && info.num_derivates <= 2) { | ||||||
|         throw NotImplementedException("EmitImageGradient offset"); |         throw NotImplementedException("EmitImageGradient offset"); | ||||||
|     } |     } | ||||||
|     const auto texture{Texture(ctx, info, index)}; |     const auto texture{Texture(ctx, info, index)}; | ||||||
|  | @ -556,6 +556,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||||||
|     const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; |     const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | ||||||
|     const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; |     const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; | ||||||
|     if (multi_component) { |     if (multi_component) { | ||||||
|  |         if (info.num_derivates >= 3) { | ||||||
|  |             const auto offset_vec{ctx.var_alloc.Consume(offset)}; | ||||||
|  |             ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yz, {}.y));", texel, texture, | ||||||
|  |                     coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|         ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, |         ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, | ||||||
|                 derivatives_vec, derivatives_vec); |                 derivatives_vec, derivatives_vec); | ||||||
|     } else { |     } else { | ||||||
|  |  | ||||||
|  | @ -42,6 +42,7 @@ union TextureInstInfo { | ||||||
|     BitField<23, 2, u32> gather_component; |     BitField<23, 2, u32> gather_component; | ||||||
|     BitField<25, 2, u32> num_derivates; |     BitField<25, 2, u32> num_derivates; | ||||||
|     BitField<27, 3, ImageFormat> image_format; |     BitField<27, 3, ImageFormat> image_format; | ||||||
|  |     BitField<30, 1, u32> ndv_is_active; | ||||||
| }; | }; | ||||||
| static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -19,7 +19,7 @@ void TranslatorVisitor::FSWZADD(u64 insn) { | ||||||
|     } const fswzadd{insn}; |     } const fswzadd{insn}; | ||||||
| 
 | 
 | ||||||
|     if (fswzadd.ndv != 0) { |     if (fswzadd.ndv != 0) { | ||||||
|         throw NotImplementedException("FSWZADD NDV"); |         LOG_WARNING(Shader, "(STUBBED) FSWZADD - NDV mode"); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const IR::F32 src_a{GetFloatReg8(insn)}; |     const IR::F32 src_a{GetFloatReg8(insn)}; | ||||||
|  |  | ||||||
|  | @ -16,8 +16,10 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa | ||||||
|         BitField<12, 4, u64> mov32i_mask; |         BitField<12, 4, u64> mov32i_mask; | ||||||
|     } const mov{insn}; |     } const mov{insn}; | ||||||
| 
 | 
 | ||||||
|     if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { |     u64 mask = is_mov32i ? mov.mov32i_mask : mov.mask; | ||||||
|         throw NotImplementedException("Non-full move mask"); |     if (mask != 0xf && mask != 0x1) { | ||||||
|  |         LOG_WARNING(Shader, "(STUBBED) Masked Mov"); | ||||||
|  |         return; | ||||||
|     } |     } | ||||||
|     v.X(mov.dest_reg, src); |     v.X(mov.dest_reg, src); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -209,7 +209,7 @@ void TranslatorVisitor::R2B(u64) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void TranslatorVisitor::RAM(u64) { | void TranslatorVisitor::RAM(u64) { | ||||||
|     ThrowNotImplemented(Opcode::RAM); |     LOG_WARNING(Shader, "(STUBBED) RAM Instruction"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void TranslatorVisitor::RET(u64) { | void TranslatorVisitor::RET(u64) { | ||||||
|  | @ -221,7 +221,7 @@ void TranslatorVisitor::RTT(u64) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void TranslatorVisitor::SAM(u64) { | void TranslatorVisitor::SAM(u64) { | ||||||
|     ThrowNotImplemented(Opcode::SAM); |     LOG_WARNING(Shader, "(STUBBED) SAM Instruction"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void TranslatorVisitor::SETCRSPTR(u64) { | void TranslatorVisitor::SETCRSPTR(u64) { | ||||||
|  |  | ||||||
|  | @ -172,6 +172,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, | ||||||
|     info.is_depth.Assign(tex.dc != 0 ? 1 : 0); |     info.is_depth.Assign(tex.dc != 0 ? 1 : 0); | ||||||
|     info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); |     info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); | ||||||
|     info.has_lod_clamp.Assign(lc ? 1 : 0); |     info.has_lod_clamp.Assign(lc ? 1 : 0); | ||||||
|  |     info.ndv_is_active.Assign(tex.ndv != 0 ? 1 : 0); | ||||||
| 
 | 
 | ||||||
|     const IR::Value sample{[&]() -> IR::Value { |     const IR::Value sample{[&]() -> IR::Value { | ||||||
|         if (tex.dc == 0) { |         if (tex.dc == 0) { | ||||||
|  |  | ||||||
|  | @ -10,6 +10,7 @@ | ||||||
| #include "shader_recompiler/environment.h" | #include "shader_recompiler/environment.h" | ||||||
| #include "shader_recompiler/exception.h" | #include "shader_recompiler/exception.h" | ||||||
| #include "shader_recompiler/frontend/ir/ir_emitter.h" | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||||||
|  | #include "shader_recompiler/frontend/ir/modifiers.h" | ||||||
| #include "shader_recompiler/frontend/ir/value.h" | #include "shader_recompiler/frontend/ir/value.h" | ||||||
| #include "shader_recompiler/ir_opt/passes.h" | #include "shader_recompiler/ir_opt/passes.h" | ||||||
| 
 | 
 | ||||||
|  | @ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void FoldFPAdd32(IR::Inst& inst) { | ||||||
|  |     if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     const IR::Value lhs_value{inst.Arg(0)}; | ||||||
|  |     const IR::Value rhs_value{inst.Arg(1)}; | ||||||
|  |     const auto check_neutral = [](const IR::Value& one_operand) { | ||||||
|  |         return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f; | ||||||
|  |     }; | ||||||
|  |     if (check_neutral(lhs_value)) { | ||||||
|  |         inst.ReplaceUsesWith(rhs_value); | ||||||
|  |     } | ||||||
|  |     if (check_neutral(rhs_value)) { | ||||||
|  |         inst.ReplaceUsesWith(lhs_value); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool FoldDerivateYFromCorrection(IR::Inst& inst) { | ||||||
|  |     const IR::Value lhs_value{inst.Arg(0)}; | ||||||
|  |     const IR::Value rhs_value{inst.Arg(1)}; | ||||||
|  |     IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | ||||||
|  |     IR::Inst* const rhs_op{rhs_value.InstRecursive()}; | ||||||
|  |     if (lhs_op->GetOpcode() == IR::Opcode::YDirection) { | ||||||
|  |         if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |         inst.ReplaceUsesWith(rhs_value); | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
|  |     if (rhs_op->GetOpcode() != IR::Opcode::YDirection) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     inst.ReplaceUsesWith(lhs_value); | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void FoldFPMul32(IR::Inst& inst) { | void FoldFPMul32(IR::Inst& inst) { | ||||||
|  |     if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|     const auto control{inst.Flags<IR::FpControl>()}; |     const auto control{inst.Flags<IR::FpControl>()}; | ||||||
|     if (control.no_contraction) { |     if (control.no_contraction) { | ||||||
|         return; |         return; | ||||||
|  | @ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) { | ||||||
|     if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { |     if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |     if (FoldDerivateYFromCorrection(inst)) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|     IR::Inst* const lhs_op{lhs_value.InstRecursive()}; |     IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | ||||||
|     IR::Inst* const rhs_op{rhs_value.InstRecursive()}; |     IR::Inst* const rhs_op{rhs_value.InstRecursive()}; | ||||||
|     if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || |     if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || | ||||||
|  | @ -622,8 +668,13 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | ||||||
|     } |     } | ||||||
|     const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; |     const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; | ||||||
|     if (value_2 != value_3) { |     if (value_2 != value_3) { | ||||||
|  |         if (!value_2.IsImmediate() || !value_3.IsImmediate()) { | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  |         if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|     const IR::Value index{inst2->Arg(1)}; |     const IR::Value index{inst2->Arg(1)}; | ||||||
|     const IR::Value clamp{inst2->Arg(2)}; |     const IR::Value clamp{inst2->Arg(2)}; | ||||||
|     const IR::Value segmentation_mask{inst2->Arg(3)}; |     const IR::Value segmentation_mask{inst2->Arg(3)}; | ||||||
|  | @ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { | ||||||
|  |     if (coord.IsImmediate()) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     const auto check_through_shuffle = [](IR::Value input, IR::Value& result) { | ||||||
|  |         const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)}; | ||||||
|  |         IR::Inst* const inst2{value_1.InstRecursive()}; | ||||||
|  |         if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |         const IR::Value index{inst2->Arg(1).Resolve()}; | ||||||
|  |         const IR::Value clamp{inst2->Arg(2).Resolve()}; | ||||||
|  |         const IR::Value segmentation_mask{inst2->Arg(3).Resolve()}; | ||||||
|  |         if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |         if (index.U32() != 3 && clamp.U32() != 3) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |         result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32); | ||||||
|  |         return true; | ||||||
|  |     }; | ||||||
|  |     IR::Inst* const inst = coord.InstRecursive(); | ||||||
|  |     if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     std::array<IR::Value, 3> temporary_values; | ||||||
|  |     IR::Value value_1 = inst->Arg(0).Resolve(); | ||||||
|  |     IR::Value value_2 = inst->Arg(1).Resolve(); | ||||||
|  |     IR::Value value_3 = inst->Arg(2).Resolve(); | ||||||
|  |     std::array<u32, 4> swizzles_mask_a{}; | ||||||
|  |     std::array<u32, 4> swizzles_mask_b{}; | ||||||
|  |     const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) { | ||||||
|  |         u32 value = mask.U32(); | ||||||
|  |         for (size_t i = 0; i < 4; i++) { | ||||||
|  |             mask_results[i] = (value >> (i * 2)) & 0x3; | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |     resolve_mask(swizzles_mask_a, value_3); | ||||||
|  |     size_t coordinate_index = 0; | ||||||
|  |     const auto resolve_pending = [&](IR::Value resolve_v) { | ||||||
|  |         IR::Inst* const inst_r = resolve_v.InstRecursive(); | ||||||
|  |         if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |         if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |         if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |         resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve()); | ||||||
|  |         return true; | ||||||
|  |     }; | ||||||
|  |     if (value_1.IsImmediate() || value_2.IsImmediate()) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     bool should_continue = false; | ||||||
|  |     if (resolve_pending(value_1)) { | ||||||
|  |         should_continue = check_through_shuffle(value_2, temporary_values[0]); | ||||||
|  |         coordinate_index = 0; | ||||||
|  |     } | ||||||
|  |     if (resolve_pending(value_2)) { | ||||||
|  |         should_continue = check_through_shuffle(value_1, temporary_values[0]); | ||||||
|  |         coordinate_index = 2; | ||||||
|  |     } | ||||||
|  |     if (!should_continue) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     // figure which is which
 | ||||||
|  |     size_t zero_mask_a = 0; | ||||||
|  |     size_t zero_mask_b = 0; | ||||||
|  |     for (size_t i = 0; i < 4; i++) { | ||||||
|  |         if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) { | ||||||
|  |             // last operand can be inversed, we cannot determine a result.
 | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |         zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i; | ||||||
|  |         zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i; | ||||||
|  |     } | ||||||
|  |     static constexpr size_t ddx_pattern = 0b1010; | ||||||
|  |     static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111; | ||||||
|  |     if (std::popcount(zero_mask_a) != 2) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     if (std::popcount(zero_mask_b) != 2) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     if (zero_mask_a == zero_mask_b) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     results[0] = temporary_values[coordinate_index]; | ||||||
|  | 
 | ||||||
|  |     if (coordinate_index == 0) { | ||||||
|  |         if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) { | ||||||
|  |             results[1] = temporary_values[1]; | ||||||
|  |             results[2] = temporary_values[2]; | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |         results[2] = temporary_values[1]; | ||||||
|  |         results[1] = temporary_values[2]; | ||||||
|  |     } else { | ||||||
|  |         const auto assign_result = [&results](IR::Value temporary_value, size_t mask) { | ||||||
|  |             if (mask == ddx_pattern || mask == ddx_pattern_inv) { | ||||||
|  |                 results[1] = temporary_value; | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  |             results[2] = temporary_value; | ||||||
|  |         }; | ||||||
|  |         assign_result(temporary_values[1], zero_mask_b); | ||||||
|  |         assign_result(temporary_values[0], zero_mask_a); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | ||||||
|  |     IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>(); | ||||||
|  |     auto orig_opcode = inst.GetOpcode(); | ||||||
|  |     if (info.ndv_is_active == 0) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     if (info.type != TextureType::Color3D) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     const IR::Value handle{inst.Arg(0)}; | ||||||
|  |     const IR::Value coords{inst.Arg(1)}; | ||||||
|  |     const IR::Value bias_lc{inst.Arg(2)}; | ||||||
|  |     const IR::Value offset{inst.Arg(3)}; | ||||||
|  |     if (!offset.IsImmediate()) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     IR::Inst* const inst2 = coords.InstRecursive(); | ||||||
|  |     std::array<std::array<IR::Value, 3>, 3> results_matrix; | ||||||
|  |     for (size_t i = 0; i < 3; i++) { | ||||||
|  |         if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     IR::F32 lod_clamp{}; | ||||||
|  |     if (info.has_lod_clamp != 0) { | ||||||
|  |         if (!bias_lc.IsImmediate()) { | ||||||
|  |             lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()}; | ||||||
|  |         } else { | ||||||
|  |             lod_clamp = IR::F32{bias_lc}; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||||||
|  |     IR::Value new_coords = | ||||||
|  |         ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]); | ||||||
|  |     IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], | ||||||
|  |                                                     results_matrix[1][1], results_matrix[1][2]); | ||||||
|  |     IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); | ||||||
|  |     info.num_derivates.Assign(3); | ||||||
|  |     IR::Value new_gradient_instruction = | ||||||
|  |         ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); | ||||||
|  |     IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); | ||||||
|  |     if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) { | ||||||
|  |         new_inst->ReplaceOpcode(IR::Opcode::ImageGradient); | ||||||
|  |     } | ||||||
|  |     inst.ReplaceUsesWith(new_gradient_instruction); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { | void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { | ||||||
|     const IR::Value bank{inst.Arg(0)}; |     const IR::Value bank{inst.Arg(0)}; | ||||||
|     const IR::Value offset{inst.Arg(1)}; |     const IR::Value offset{inst.Arg(1)}; | ||||||
|  | @ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | ||||||
|     case IR::Opcode::SelectF32: |     case IR::Opcode::SelectF32: | ||||||
|     case IR::Opcode::SelectF64: |     case IR::Opcode::SelectF64: | ||||||
|         return FoldSelect(inst); |         return FoldSelect(inst); | ||||||
|  |     case IR::Opcode::FPNeg32: | ||||||
|  |         FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); | ||||||
|  |         return; | ||||||
|  |     case IR::Opcode::FPAdd32: | ||||||
|  |         FoldFPAdd32(inst); | ||||||
|  |         return; | ||||||
|     case IR::Opcode::FPMul32: |     case IR::Opcode::FPMul32: | ||||||
|         return FoldFPMul32(inst); |         return FoldFPMul32(inst); | ||||||
|     case IR::Opcode::LogicalAnd: |     case IR::Opcode::LogicalAnd: | ||||||
|  | @ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | ||||||
|             FoldDriverConstBuffer(env, block, inst, 1); |             FoldDriverConstBuffer(env, block, inst, 1); | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|  |     case IR::Opcode::BindlessImageSampleImplicitLod: | ||||||
|  |     case IR::Opcode::BoundImageSampleImplicitLod: | ||||||
|  |     case IR::Opcode::ImageSampleImplicitLod: | ||||||
|  |         FoldImageSampleImplicitLod(block, inst); | ||||||
|  |         break; | ||||||
|     default: |     default: | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow