forked from eden-emu/eden
		
	shader: Implement LOP and LOP3
This commit is contained in:
		
							parent
							
								
									382cba94ed
								
							
						
					
					
						commit
						980cafdc27
					
				
					 8 changed files with 227 additions and 31 deletions
				
			
		|  | @ -86,6 +86,8 @@ add_library(shader_recompiler STATIC | |||
|     frontend/maxwell/translate/impl/integer_short_multiply_add.cpp | ||||
|     frontend/maxwell/translate/impl/load_store_attribute.cpp | ||||
|     frontend/maxwell/translate/impl/load_store_memory.cpp | ||||
|     frontend/maxwell/translate/impl/logic_operation.cpp | ||||
|     frontend/maxwell/translate/impl/logic_operation_three_input.cpp | ||||
|     frontend/maxwell/translate/impl/move_predicate_to_register.cpp | ||||
|     frontend/maxwell/translate/impl/move_register.cpp | ||||
|     frontend/maxwell/translate/impl/move_special_register.cpp | ||||
|  |  | |||
|  | @ -178,8 +178,8 @@ INST(LOP_reg,      "LOP (reg)",      "0101 1100 0100 0---") | |||
| INST(LOP_cbuf,     "LOP (cbuf)",     "0100 1100 0100 0---") | ||||
| INST(LOP_imm,      "LOP (imm)",      "0011 100- 0100 0---") | ||||
| INST(LOP3_reg,     "LOP3 (reg)",     "0101 1011 1110 0---") | ||||
| INST(LOP3_cbuf,    "LOP3 (cbuf)",    "0011 11-- ---- ----") | ||||
| INST(LOP3_imm,     "LOP3 (imm)",     "0000 001- ---- ----") | ||||
| INST(LOP3_cbuf,    "LOP3 (cbuf)",    "0000 001- ---- ----") | ||||
| INST(LOP3_imm,     "LOP3 (imm)",     "0011 11-- ---- ----") | ||||
| INST(LOP32I,       "LOP32I",         "0000 01-- ---- ----") | ||||
| INST(MEMBAR,       "MEMBAR",         "1110 1111 1001 1---") | ||||
| INST(MOV_reg,      "MOV (reg)",      "0101 1100 1001 1---") | ||||
|  |  | |||
|  | @ -5,9 +5,8 @@ | |||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
| [[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, | ||||
|                                     const IR::U32& operand_2, CompareOp compare_op, | ||||
|                                     bool is_signed) { | ||||
| IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||||
|                       CompareOp compare_op, bool is_signed) { | ||||
|     switch (compare_op) { | ||||
|     case CompareOp::False: | ||||
|         return ir.Imm1(false); | ||||
|  | @ -30,8 +29,8 @@ namespace Shader::Maxwell { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, | ||||
|                                       const IR::U1& predicate_2, BooleanOp bop) { | ||||
| IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, | ||||
|                         BooleanOp bop) { | ||||
|     switch (bop) { | ||||
|     case BooleanOp::AND: | ||||
|         return ir.LogicalAnd(predicate_1, predicate_2); | ||||
|  | @ -43,4 +42,20 @@ namespace Shader::Maxwell { | |||
|         throw NotImplementedException("Invalid bop {}", bop); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) { | ||||
|     switch (op) { | ||||
|     case PredicateOp::False: | ||||
|         return ir.Imm1(false); | ||||
|     case PredicateOp::True: | ||||
|         return ir.Imm1(true); | ||||
|     case PredicateOp::Zero: | ||||
|         return ir.IEqual(result, ir.Imm32(0)); | ||||
|     case PredicateOp::NonZero: | ||||
|         return ir.INotEqual(result, ir.Imm32(0)); | ||||
|     default: | ||||
|         throw NotImplementedException("Invalid Predicate operation {}", op); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Maxwell
 | ||||
|  |  | |||
|  | @ -13,4 +13,6 @@ namespace Shader::Maxwell { | |||
| 
 | ||||
| [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, | ||||
|                                       const IR::U1& predicate_2, BooleanOp bop); | ||||
| 
 | ||||
| [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); | ||||
| } // namespace Shader::Maxwell
 | ||||
|  |  | |||
|  | @ -28,6 +28,13 @@ enum class BooleanOp : u64 { | |||
|     XOR, | ||||
| }; | ||||
| 
 | ||||
| enum class PredicateOp : u64 { | ||||
|     False, | ||||
|     True, | ||||
|     Zero, | ||||
|     NonZero, | ||||
| }; | ||||
| 
 | ||||
| class TranslatorVisitor { | ||||
| public: | ||||
|     explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} | ||||
|  |  | |||
|  | @ -0,0 +1,77 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class LogicalOp : u64 { | ||||
|     AND, | ||||
|     OR, | ||||
|     XOR, | ||||
|     PASS_B, | ||||
| }; | ||||
| 
 | ||||
| [[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1, | ||||
|                                        const IR::U32& operand_2, LogicalOp op) { | ||||
|     switch (op) { | ||||
|     case LogicalOp::AND: | ||||
|         return ir.BitwiseAnd(operand_1, operand_2); | ||||
|     case LogicalOp::OR: | ||||
|         return ir.BitwiseOr(operand_1, operand_2); | ||||
|     case LogicalOp::XOR: | ||||
|         return ir.BitwiseXor(operand_1, operand_2); | ||||
|     case LogicalOp::PASS_B: | ||||
|         return operand_2; | ||||
|     default: | ||||
|         throw NotImplementedException("Invalid Logical operation {}", op); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_reg; | ||||
|         BitField<39, 1, u64> neg_a; | ||||
|         BitField<40, 1, u64> neg_b; | ||||
|         BitField<41, 2, LogicalOp> bit_op; | ||||
|         BitField<43, 1, u64> x; | ||||
|         BitField<44, 2, PredicateOp> pred_op; | ||||
|         BitField<48, 3, IR::Pred> pred; | ||||
|     } const lop{insn}; | ||||
| 
 | ||||
|     if (lop.x != 0) { | ||||
|         throw NotImplementedException("LOP X"); | ||||
|     } | ||||
|     IR::U32 op_a{v.X(lop.src_reg)}; | ||||
|     if (lop.neg_a != 0) { | ||||
|         op_a = v.ir.BitwiseNot(op_a); | ||||
|     } | ||||
|     if (lop.neg_b != 0) { | ||||
|         op_b = v.ir.BitwiseNot(op_b); | ||||
|     } | ||||
| 
 | ||||
|     const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, lop.bit_op)}; | ||||
|     const IR::U1 pred_result{PredicateOperation(v.ir, result, lop.pred_op)}; | ||||
|     v.X(lop.dest_reg, result); | ||||
|     v.ir.SetPred(lop.pred, pred_result); | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| void TranslatorVisitor::LOP_reg(u64 insn) { | ||||
|     LOP(*this, insn, GetReg20(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP_cbuf(u64 insn) { | ||||
|     LOP(*this, insn, GetCbuf(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP_imm(u64 insn) { | ||||
|     LOP(*this, insn, GetImm20(insn)); | ||||
| } | ||||
| } // namespace Shader::Maxwell
 | ||||
|  | @ -0,0 +1,117 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| // https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
 | ||||
| // Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
 | ||||
| IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, | ||||
|                  u64 ttbl) { | ||||
|     IR::U32 r{ir.Imm32(0)}; | ||||
|     const IR::U32 not_a{ir.BitwiseNot(a)}; | ||||
|     const IR::U32 not_b{ir.BitwiseNot(b)}; | ||||
|     const IR::U32 not_c{ir.BitwiseNot(c)}; | ||||
|     if (ttbl & 0x01) { | ||||
|         // r |= ~a & ~b & ~c;
 | ||||
|         const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||||
|         const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||||
|         r = ir.BitwiseOr(r, rhs); | ||||
|     } | ||||
|     if (ttbl & 0x02) { | ||||
|         // r |= ~a & ~b & c;
 | ||||
|         const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||||
|         const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||||
|         r = ir.BitwiseOr(r, rhs); | ||||
|     } | ||||
|     if (ttbl & 0x04) { | ||||
|         // r |= ~a & b & ~c;
 | ||||
|         const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||||
|         const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||||
|         r = ir.BitwiseOr(r, rhs); | ||||
|     } | ||||
|     if (ttbl & 0x08) { | ||||
|         // r |= ~a & b & c;
 | ||||
|         const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||||
|         const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||||
|         r = ir.BitwiseOr(r, rhs); | ||||
|     } | ||||
|     if (ttbl & 0x10) { | ||||
|         // r |= a & ~b & ~c;
 | ||||
|         const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||||
|         const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||||
|         r = ir.BitwiseOr(r, rhs); | ||||
|     } | ||||
|     if (ttbl & 0x20) { | ||||
|         // r |= a & ~b & c;
 | ||||
|         const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||||
|         const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||||
|         r = ir.BitwiseOr(r, rhs); | ||||
|     } | ||||
|     if (ttbl & 0x40) { | ||||
|         // r |= a & b & ~c;
 | ||||
|         const auto lhs{ir.BitwiseAnd(a, b)}; | ||||
|         const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||||
|         r = ir.BitwiseOr(r, rhs); | ||||
|     } | ||||
|     if (ttbl & 0x80) { | ||||
|         // r |= a & b & c;
 | ||||
|         const auto lhs{ir.BitwiseAnd(a, b)}; | ||||
|         const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||||
|         r = ir.BitwiseOr(r, rhs); | ||||
|     } | ||||
|     return r; | ||||
| } | ||||
| 
 | ||||
| IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_reg; | ||||
|     } const lop3{insn}; | ||||
| 
 | ||||
|     const IR::U32 op_a{v.X(lop3.src_reg)}; | ||||
|     const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; | ||||
|     v.X(lop3.dest_reg, result); | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| u64 GetLut48(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<48, 8, u64> lut; | ||||
|     } const lut{insn}; | ||||
|     return lut.lut; | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| void TranslatorVisitor::LOP3_reg(u64 insn) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<28, 8, u64> lut; | ||||
|         BitField<38, 1, u64> x; | ||||
|         BitField<36, 2, PredicateOp> pred_op; | ||||
|         BitField<48, 3, IR::Pred> pred; | ||||
|     } const lop3{insn}; | ||||
| 
 | ||||
|     if (lop3.x != 0) { | ||||
|         throw NotImplementedException("LOP3 X"); | ||||
|     } | ||||
|     const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)}; | ||||
|     const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)}; | ||||
|     ir.SetPred(lop3.pred, pred_result); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP3_cbuf(u64 insn) { | ||||
|     LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP3_imm(u64 insn) { | ||||
|     LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn)); | ||||
| } | ||||
| } // namespace Shader::Maxwell
 | ||||
|  | @ -493,30 +493,6 @@ void TranslatorVisitor::LONGJMP(u64) { | |||
|     ThrowNotImplemented(Opcode::LONGJMP); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::LOP_reg); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::LOP_cbuf); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::LOP_imm); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP3_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::LOP3_reg); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP3_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::LOP3_cbuf); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP3_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::LOP3_imm); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::LOP32I(u64) { | ||||
|     ThrowNotImplemented(Opcode::LOP32I); | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ameerj
						ameerj