forked from eden-emu/eden
		
	Refactor precise usage and add FMNMX, MUFU, FMUL32 and FADD332
This commit is contained in:
		
							parent
							
								
									6eb07feeea
								
							
						
					
					
						commit
						95576a248a
					
				
					 2 changed files with 37 additions and 74 deletions
				
			
		|  | @ -341,10 +341,10 @@ public: | |||
|      */ | ||||
|     void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, | ||||
|                             u64 dest_num_components, u64 value_num_components, | ||||
|                             bool is_saturated = false, u64 dest_elem = 0) { | ||||
|                             bool is_saturated = false, u64 dest_elem = 0, bool precise = false) { | ||||
| 
 | ||||
|         SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, | ||||
|                     dest_num_components, value_num_components, dest_elem); | ||||
|                     dest_num_components, value_num_components, dest_elem, precise); | ||||
|     } | ||||
| 
 | ||||
|     /**
 | ||||
|  | @ -368,7 +368,7 @@ public: | |||
|         const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | ||||
| 
 | ||||
|         SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', | ||||
|                     dest_num_components, value_num_components, dest_elem); | ||||
|                     dest_num_components, value_num_components, dest_elem, false); | ||||
| 
 | ||||
|         if (sets_cc) { | ||||
|             const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; | ||||
|  | @ -416,7 +416,7 @@ public: | |||
|             } | ||||
|         }(); | ||||
| 
 | ||||
|         SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem); | ||||
|         SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false); | ||||
|     } | ||||
| 
 | ||||
|     /**
 | ||||
|  | @ -757,7 +757,8 @@ private: | |||
|      * @param dest_elem Optional, the destination element to use for the operation. | ||||
|      */ | ||||
|     void SetRegister(const Register& reg, u64 elem, const std::string& value, | ||||
|                      u64 dest_num_components, u64 value_num_components, u64 dest_elem) { | ||||
|                      u64 dest_num_components, u64 value_num_components, u64 dest_elem, | ||||
|                      bool precise) { | ||||
|         if (reg == Register::ZeroIndex) { | ||||
|             LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex"); | ||||
|             UNREACHABLE(); | ||||
|  | @ -774,7 +775,18 @@ private: | |||
|             src += GetSwizzle(elem); | ||||
|         } | ||||
| 
 | ||||
|         shader.AddLine(dest + " = " + src + ';'); | ||||
|         if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { | ||||
|             shader.AddLine('{'); | ||||
|             ++shader.scope; | ||||
|             // This avoids optimizations of constant propagation and keeps the code as the original
 | ||||
|             // Sadly using the precise keyword causes "linking" errors on fragment shaders.
 | ||||
|             shader.AddLine("precise float tmp = " + src + ';'); | ||||
|             shader.AddLine(dest + " = tmp;"); | ||||
|             --shader.scope; | ||||
|             shader.AddLine('}'); | ||||
|         } else { | ||||
|             shader.AddLine(dest + " = " + src + ';'); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Build the GLSL register list.
 | ||||
|  | @ -1511,24 +1523,8 @@ private: | |||
| 
 | ||||
|                 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); | ||||
| 
 | ||||
|                 shader.AddLine('{'); | ||||
|                 ++shader.scope; | ||||
| 
 | ||||
|                 // This avoids optimizations of constant propagation and keeps the code as the original
 | ||||
|                 // Sadly using the precise keyword causes "linking" errors on fragment shaders.
 | ||||
|                 if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { | ||||
|                     shader.AddLine("float tmp = " + op_a + " * " + op_b + ';'); | ||||
|                 } else { | ||||
|                     shader.AddLine("precise float tmp = " + op_a + " * " + op_b + ';'); | ||||
|                 } | ||||
| 
 | ||||
| 
 | ||||
|                 regs.SetRegisterToFloat(instr.gpr0, 0,  "tmp", 1, 1, | ||||
|                                         instr.alu.saturate_d); | ||||
| 
 | ||||
| 
 | ||||
|                 --shader.scope; | ||||
|                 shader.AddLine('}'); | ||||
|                 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, | ||||
|                                         instr.alu.saturate_d, 0, true); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::FADD_C: | ||||
|  | @ -1537,24 +1533,8 @@ private: | |||
|                 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||||
|                 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||||
| 
 | ||||
|                 shader.AddLine('{'); | ||||
|                 ++shader.scope; | ||||
| 
 | ||||
|                 // This avoids optimizations of constant propagation and keeps the code as the original
 | ||||
|                 // Sadly using the precise keyword causes "linking" errors on fragment shaders.
 | ||||
|                 if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { | ||||
|                     shader.AddLine("float tmp = " + op_a + " + " + op_b + ';'); | ||||
|                 } else { | ||||
|                     shader.AddLine("precise float tmp = " + op_a + " + " + op_b + ';'); | ||||
|                 } | ||||
|                 regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1, | ||||
|                                         instr.alu.saturate_d); | ||||
| 
 | ||||
| 
 | ||||
|                 --shader.scope; | ||||
|                 shader.AddLine('}'); | ||||
| 
 | ||||
| 
 | ||||
|                 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, | ||||
|                                         instr.alu.saturate_d, 0, true); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::MUFU: { | ||||
|  | @ -1562,31 +1542,31 @@ private: | |||
|                 switch (instr.sub_op) { | ||||
|                 case SubOp::Cos: | ||||
|                     regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, | ||||
|                                             instr.alu.saturate_d); | ||||
|                                             instr.alu.saturate_d, 0, true); | ||||
|                     break; | ||||
|                 case SubOp::Sin: | ||||
|                     regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, | ||||
|                                             instr.alu.saturate_d); | ||||
|                                             instr.alu.saturate_d, 0, true); | ||||
|                     break; | ||||
|                 case SubOp::Ex2: | ||||
|                     regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, | ||||
|                                             instr.alu.saturate_d); | ||||
|                                             instr.alu.saturate_d, 0, true); | ||||
|                     break; | ||||
|                 case SubOp::Lg2: | ||||
|                     regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, | ||||
|                                             instr.alu.saturate_d); | ||||
|                                             instr.alu.saturate_d, 0, true); | ||||
|                     break; | ||||
|                 case SubOp::Rcp: | ||||
|                     regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, | ||||
|                                             instr.alu.saturate_d); | ||||
|                                             instr.alu.saturate_d, 0, true); | ||||
|                     break; | ||||
|                 case SubOp::Rsq: | ||||
|                     regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, | ||||
|                                             instr.alu.saturate_d); | ||||
|                                             instr.alu.saturate_d, 0, true); | ||||
|                     break; | ||||
|                 case SubOp::Sqrt: | ||||
|                     regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, | ||||
|                                             instr.alu.saturate_d); | ||||
|                                             instr.alu.saturate_d, 0, true); | ||||
|                     break; | ||||
|                 default: | ||||
|                     LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", | ||||
|  | @ -1607,7 +1587,7 @@ private: | |||
|                 regs.SetRegisterToFloat(instr.gpr0, 0, | ||||
|                                         '(' + condition + ") ? min(" + parameters + ") : max(" + | ||||
|                                             parameters + ')', | ||||
|                                         1, 1); | ||||
|                                         1, 1, false, 0, true); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::RRO_C: | ||||
|  | @ -1636,7 +1616,7 @@ private: | |||
|                 regs.SetRegisterToFloat(instr.gpr0, 0, | ||||
|                                         regs.GetRegisterAsFloat(instr.gpr8) + " * " + | ||||
|                                             GetImmediate32(instr), | ||||
|                                         1, 1, instr.fmul32.saturate); | ||||
|                                         1, 1, instr.fmul32.saturate, 0, true); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::FADD32I: { | ||||
|  | @ -1659,7 +1639,7 @@ private: | |||
|                     op_b = "-(" + op_b + ')'; | ||||
|                 } | ||||
| 
 | ||||
|                 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1); | ||||
|                 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true); | ||||
|                 break; | ||||
|             } | ||||
|             } | ||||
|  | @ -2121,23 +2101,9 @@ private: | |||
|             } | ||||
|             } | ||||
| 
 | ||||
|             shader.AddLine('{'); | ||||
|             ++shader.scope; | ||||
|             regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', | ||||
|                                     1, 1, instr.alu.saturate_d, 0, true); | ||||
| 
 | ||||
|             // This avoids optimizations of constant propagation and keeps the code as the original
 | ||||
|             // Sadly using the precise keyword causes "linking" errors on fragment shaders.
 | ||||
|             if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { | ||||
|                 shader.AddLine("float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");"); | ||||
|             } else { | ||||
|                 shader.AddLine("precise float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");"); | ||||
|             } | ||||
| 
 | ||||
|             regs.SetRegisterToFloat(instr.gpr0, 0,  "tmp", 1, 1, | ||||
|                                     instr.alu.saturate_d); | ||||
| 
 | ||||
| 
 | ||||
|             --shader.scope; | ||||
|             shader.AddLine('}'); | ||||
|             break; | ||||
|         } | ||||
|         case OpCode::Type::Hfma2: { | ||||
|  |  | |||
|  | @ -15,8 +15,7 @@ static constexpr u32 PROGRAM_OFFSET{10}; | |||
| 
 | ||||
| ProgramResult GenerateVertexShader(const ShaderSetup& setup) { | ||||
|     std::string out = "#version 430 core\n"; | ||||
|     out += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||||
|     out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; | ||||
|     out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||||
|     out += Decompiler::GetCommonDeclarations(); | ||||
| 
 | ||||
|     out += R"( | ||||
|  | @ -84,8 +83,7 @@ void main() { | |||
| 
 | ||||
| ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { | ||||
|     std::string out = "#version 430 core\n"; | ||||
|     out += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||||
|     out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; | ||||
|     out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||||
|     out += Decompiler::GetCommonDeclarations(); | ||||
|     out += "bool exec_geometry();\n"; | ||||
| 
 | ||||
|  | @ -119,8 +117,7 @@ void main() { | |||
| 
 | ||||
| ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { | ||||
|     std::string out = "#version 430 core\n"; | ||||
|     out += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||||
|     out += "#extension GL_ARB_gpu_shader5 : enable\n\n"; | ||||
|     out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||||
|     out += Decompiler::GetCommonDeclarations(); | ||||
|     out += "bool exec_fragment();\n"; | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 FernandoS27
						FernandoS27