forked from eden-emu/eden
		
	video_core/macro_jit_x64: Move impl class into cpp file
Keeps the implementation internalized and also reduces API-facing header dependencies. Notably, this fully internalizes all of the xbyak externals.
This commit is contained in:
		
							parent
							
								
									a3c81745b1
								
							
						
					
					
						commit
						6b873b72ae
					
				
					 2 changed files with 86 additions and 87 deletions
				
			
		|  | @ -2,9 +2,17 @@ | |||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <array> | ||||
| #include <bitset> | ||||
| #include <optional> | ||||
| 
 | ||||
| #include <xbyak/xbyak.h> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/bit_field.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "common/x64/xbyak_abi.h" | ||||
| #include "common/x64/xbyak_util.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/macro/macro_interpreter.h" | ||||
|  | @ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 | |||
| MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); | ||||
| 
 | ||||
| namespace Tegra { | ||||
| namespace { | ||||
| constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; | ||||
| constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; | ||||
| constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; | ||||
| constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; | ||||
| constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; | ||||
| 
 | ||||
| static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | ||||
| const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | ||||
|     STATE, | ||||
|     RESULT, | ||||
|     PARAMETERS, | ||||
|  | @ -28,19 +37,73 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | |||
|     BRANCH_HOLDER, | ||||
| }); | ||||
| 
 | ||||
| MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) | ||||
|     : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | ||||
| // Arbitrarily chosen based on current booting games.
 | ||||
| constexpr size_t MAX_CODE_SIZE = 0x10000; | ||||
| 
 | ||||
| std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | ||||
|     return std::make_unique<MacroJITx64Impl>(maxwell3d, code); | ||||
| } | ||||
| class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { | ||||
| public: | ||||
|     explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) | ||||
|         : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { | ||||
|         Compile(); | ||||
|     } | ||||
| 
 | ||||
| MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) | ||||
|     : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { | ||||
|     Compile(); | ||||
| } | ||||
|     void Execute(const std::vector<u32>& parameters, u32 method) override; | ||||
| 
 | ||||
| MacroJITx64Impl::~MacroJITx64Impl() = default; | ||||
|     void Compile_ALU(Macro::Opcode opcode); | ||||
|     void Compile_AddImmediate(Macro::Opcode opcode); | ||||
|     void Compile_ExtractInsert(Macro::Opcode opcode); | ||||
|     void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); | ||||
|     void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); | ||||
|     void Compile_Read(Macro::Opcode opcode); | ||||
|     void Compile_Branch(Macro::Opcode opcode); | ||||
| 
 | ||||
| private: | ||||
|     void Optimizer_ScanFlags(); | ||||
| 
 | ||||
|     void Compile(); | ||||
|     bool Compile_NextInstruction(); | ||||
| 
 | ||||
|     Xbyak::Reg32 Compile_FetchParameter(); | ||||
|     Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | ||||
| 
 | ||||
|     void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | ||||
|     void Compile_Send(Xbyak::Reg32 value); | ||||
| 
 | ||||
|     Macro::Opcode GetOpCode() const; | ||||
|     std::bitset<32> PersistentCallerSavedRegs() const; | ||||
| 
 | ||||
|     struct JITState { | ||||
|         Engines::Maxwell3D* maxwell3d{}; | ||||
|         std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | ||||
|         u32 carry_flag{}; | ||||
|     }; | ||||
|     static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | ||||
|     using ProgramType = void (*)(JITState*, const u32*); | ||||
| 
 | ||||
|     struct OptimizerState { | ||||
|         bool can_skip_carry{}; | ||||
|         bool has_delayed_pc{}; | ||||
|         bool zero_reg_skip{}; | ||||
|         bool skip_dummy_addimmediate{}; | ||||
|         bool optimize_for_method_move{}; | ||||
|         bool enable_asserts{}; | ||||
|     }; | ||||
|     OptimizerState optimizer{}; | ||||
| 
 | ||||
|     std::optional<Macro::Opcode> next_opcode{}; | ||||
|     ProgramType program{nullptr}; | ||||
| 
 | ||||
|     std::array<Xbyak::Label, MAX_CODE_SIZE> labels; | ||||
|     std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; | ||||
|     Xbyak::Label end_of_code{}; | ||||
| 
 | ||||
|     bool is_delay_slot{}; | ||||
|     u32 pc{}; | ||||
|     std::optional<u32> delayed_pc; | ||||
| 
 | ||||
|     const std::vector<u32>& code; | ||||
|     Engines::Maxwell3D& maxwell3d; | ||||
| }; | ||||
| 
 | ||||
| void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { | ||||
|     MICROPROFILE_SCOPE(MacroJitExecute); | ||||
|  | @ -307,11 +370,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { | |||
|     Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||||
| } | ||||
| 
 | ||||
| static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { | ||||
| void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { | ||||
|     maxwell3d->CallMethodFromMME(method_address.address, value); | ||||
| } | ||||
| 
 | ||||
| void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | ||||
| void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | ||||
|     Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||
|     mov(Common::X64::ABI_PARAM1, qword[STATE]); | ||||
|     mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); | ||||
|  | @ -338,7 +401,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | |||
|     L(dont_process); | ||||
| } | ||||
| 
 | ||||
| void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | ||||
| void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | ||||
|     ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); | ||||
|     const s32 jump_address = | ||||
|         static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); | ||||
|  | @ -392,7 +455,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | |||
|     L(end); | ||||
| } | ||||
| 
 | ||||
| void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { | ||||
| void MacroJITx64Impl::Optimizer_ScanFlags() { | ||||
|     optimizer.can_skip_carry = true; | ||||
|     optimizer.has_delayed_pc = false; | ||||
|     for (auto raw_op : code) { | ||||
|  | @ -534,7 +597,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() { | |||
|     return true; | ||||
| } | ||||
| 
 | ||||
| Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { | ||||
| Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { | ||||
|     mov(eax, dword[PARAMETERS]); | ||||
|     add(PARAMETERS, sizeof(u32)); | ||||
|     return eax; | ||||
|  | @ -615,5 +678,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const { | |||
| std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { | ||||
|     return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) | ||||
|     : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | ||||
| 
 | ||||
| std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | ||||
|     return std::make_unique<MacroJITx64Impl>(maxwell3d, code); | ||||
| } | ||||
| } // namespace Tegra
 | ||||
|  |  | |||
|  | @ -4,12 +4,7 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <bitset> | ||||
| #include <xbyak/xbyak.h> | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/x64/xbyak_abi.h" | ||||
| #include "video_core/macro/macro.h" | ||||
| 
 | ||||
| namespace Tegra { | ||||
|  | @ -18,9 +13,6 @@ namespace Engines { | |||
| class Maxwell3D; | ||||
| } | ||||
| 
 | ||||
| /// MAX_CODE_SIZE is arbitrarily chosen based on current booting games
 | ||||
| constexpr size_t MAX_CODE_SIZE = 0x10000; | ||||
| 
 | ||||
| class MacroJITx64 final : public MacroEngine { | ||||
| public: | ||||
|     explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); | ||||
|  | @ -32,67 +24,4 @@ private: | |||
|     Engines::Maxwell3D& maxwell3d; | ||||
| }; | ||||
| 
 | ||||
| class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { | ||||
| public: | ||||
|     explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); | ||||
|     ~MacroJITx64Impl(); | ||||
| 
 | ||||
|     void Execute(const std::vector<u32>& parameters, u32 method) override; | ||||
| 
 | ||||
|     void Compile_ALU(Macro::Opcode opcode); | ||||
|     void Compile_AddImmediate(Macro::Opcode opcode); | ||||
|     void Compile_ExtractInsert(Macro::Opcode opcode); | ||||
|     void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); | ||||
|     void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); | ||||
|     void Compile_Read(Macro::Opcode opcode); | ||||
|     void Compile_Branch(Macro::Opcode opcode); | ||||
| 
 | ||||
| private: | ||||
|     void Optimizer_ScanFlags(); | ||||
| 
 | ||||
|     void Compile(); | ||||
|     bool Compile_NextInstruction(); | ||||
| 
 | ||||
|     Xbyak::Reg32 Compile_FetchParameter(); | ||||
|     Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | ||||
| 
 | ||||
|     void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | ||||
|     void Compile_Send(Xbyak::Reg32 value); | ||||
| 
 | ||||
|     Macro::Opcode GetOpCode() const; | ||||
|     std::bitset<32> PersistentCallerSavedRegs() const; | ||||
| 
 | ||||
|     struct JITState { | ||||
|         Engines::Maxwell3D* maxwell3d{}; | ||||
|         std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | ||||
|         u32 carry_flag{}; | ||||
|     }; | ||||
|     static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | ||||
|     using ProgramType = void (*)(JITState*, const u32*); | ||||
| 
 | ||||
|     struct OptimizerState { | ||||
|         bool can_skip_carry{}; | ||||
|         bool has_delayed_pc{}; | ||||
|         bool zero_reg_skip{}; | ||||
|         bool skip_dummy_addimmediate{}; | ||||
|         bool optimize_for_method_move{}; | ||||
|         bool enable_asserts{}; | ||||
|     }; | ||||
|     OptimizerState optimizer{}; | ||||
| 
 | ||||
|     std::optional<Macro::Opcode> next_opcode{}; | ||||
|     ProgramType program{nullptr}; | ||||
| 
 | ||||
|     std::array<Xbyak::Label, MAX_CODE_SIZE> labels; | ||||
|     std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; | ||||
|     Xbyak::Label end_of_code{}; | ||||
| 
 | ||||
|     bool is_delay_slot{}; | ||||
|     u32 pc{}; | ||||
|     std::optional<u32> delayed_pc; | ||||
| 
 | ||||
|     const std::vector<u32>& code; | ||||
|     Engines::Maxwell3D& maxwell3d; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Tegra
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Lioncash
						Lioncash