forked from eden-emu/eden
		
	maxwell_3d: Restructure macro upload to use a single macro code memory.
- Fixes an issue where macros could be skipped. - Fixes rendering of distant objects in Super Mario Odyssey.
This commit is contained in:
		
							parent
							
								
									f6d9e33742
								
							
						
					
					
						commit
						42b74a5e41
					
				
					 4 changed files with 55 additions and 27 deletions
				
			
		|  | @ -43,15 +43,17 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | ||||||
|     // Reset the current macro.
 |     // Reset the current macro.
 | ||||||
|     executing_macro = 0; |     executing_macro = 0; | ||||||
| 
 | 
 | ||||||
|     // The requested macro must have been uploaded already.
 |     // Lookup the macro offset
 | ||||||
|     auto macro_code = uploaded_macros.find(method); |     const u32 entry{(method - MacroRegistersStart) >> 1}; | ||||||
|     if (macro_code == uploaded_macros.end()) { |     const auto& search{macro_offsets.find(entry)}; | ||||||
|         LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); |     if (search == macro_offsets.end()) { | ||||||
|  |         LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method); | ||||||
|  |         UNREACHABLE(); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Execute the current macro.
 |     // Execute the current macro.
 | ||||||
|     macro_interpreter.Execute(macro_code->second, std::move(parameters)); |     macro_interpreter.Execute(search->second, std::move(parameters)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | ||||||
|  | @ -97,6 +99,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | ||||||
|         ProcessMacroUpload(value); |         ProcessMacroUpload(value); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  |     case MAXWELL3D_REG_INDEX(macros.bind): { | ||||||
|  |         ProcessMacroBind(value); | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): |     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): | ||||||
|     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): |     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): | ||||||
|     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): |     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): | ||||||
|  | @ -158,9 +164,13 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Maxwell3D::ProcessMacroUpload(u32 data) { | void Maxwell3D::ProcessMacroUpload(u32 data) { | ||||||
|     // Store the uploaded macro code to interpret them when they're called.
 |     ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), | ||||||
|     auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; |                "upload_address exceeded macro_memory size!"); | ||||||
|     macro.push_back(data); |     macro_memory[regs.macros.upload_address++] = data; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void Maxwell3D::ProcessMacroBind(u32 data) { | ||||||
|  |     macro_offsets[regs.macros.entry] = data; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Maxwell3D::ProcessQueryGet() { | void Maxwell3D::ProcessQueryGet() { | ||||||
|  |  | ||||||
|  | @ -475,12 +475,13 @@ public: | ||||||
|                 INSERT_PADDING_WORDS(0x45); |                 INSERT_PADDING_WORDS(0x45); | ||||||
| 
 | 
 | ||||||
|                 struct { |                 struct { | ||||||
|                     INSERT_PADDING_WORDS(1); |                     u32 upload_address; | ||||||
|                     u32 data; |                     u32 data; | ||||||
|                     u32 entry; |                     u32 entry; | ||||||
|  |                     u32 bind; | ||||||
|                 } macros; |                 } macros; | ||||||
| 
 | 
 | ||||||
|                 INSERT_PADDING_WORDS(0x189); |                 INSERT_PADDING_WORDS(0x188); | ||||||
| 
 | 
 | ||||||
|                 u32 tfb_enabled; |                 u32 tfb_enabled; | ||||||
| 
 | 
 | ||||||
|  | @ -994,12 +995,25 @@ public: | ||||||
|     /// Returns the texture information for a specific texture in a specific shader stage.
 |     /// Returns the texture information for a specific texture in a specific shader stage.
 | ||||||
|     Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; |     Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; | ||||||
| 
 | 
 | ||||||
|  |     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
 | ||||||
|  |     /// we've seen used.
 | ||||||
|  |     using MacroMemory = std::array<u32, 0x40000>; | ||||||
|  | 
 | ||||||
|  |     /// Gets a reference to macro memory.
 | ||||||
|  |     const MacroMemory& GetMacroMemory() const { | ||||||
|  |         return macro_memory; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     void InitializeRegisterDefaults(); |     void InitializeRegisterDefaults(); | ||||||
| 
 | 
 | ||||||
|     VideoCore::RasterizerInterface& rasterizer; |     VideoCore::RasterizerInterface& rasterizer; | ||||||
| 
 | 
 | ||||||
|     std::unordered_map<u32, std::vector<u32>> uploaded_macros; |     /// Start offsets of each macro in macro_memory
 | ||||||
|  |     std::unordered_map<u32, u32> macro_offsets; | ||||||
|  | 
 | ||||||
|  |     /// Memory for macro code
 | ||||||
|  |     MacroMemory macro_memory; | ||||||
| 
 | 
 | ||||||
|     /// Macro method that is currently being executed / being fed parameters.
 |     /// Macro method that is currently being executed / being fed parameters.
 | ||||||
|     u32 executing_macro = 0; |     u32 executing_macro = 0; | ||||||
|  | @ -1022,9 +1036,12 @@ private: | ||||||
|      */ |      */ | ||||||
|     void CallMacroMethod(u32 method, std::vector<u32> parameters); |     void CallMacroMethod(u32 method, std::vector<u32> parameters); | ||||||
| 
 | 
 | ||||||
|     /// Handles writes to the macro uploading registers.
 |     /// Handles writes to the macro uploading register.
 | ||||||
|     void ProcessMacroUpload(u32 data); |     void ProcessMacroUpload(u32 data); | ||||||
| 
 | 
 | ||||||
|  |     /// Handles writes to the macro bind register.
 | ||||||
|  |     void ProcessMacroBind(u32 data); | ||||||
|  | 
 | ||||||
|     /// Handles a write to the CLEAR_BUFFERS register.
 |     /// Handles a write to the CLEAR_BUFFERS register.
 | ||||||
|     void ProcessClearBuffers(); |     void ProcessClearBuffers(); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -11,7 +11,7 @@ namespace Tegra { | ||||||
| 
 | 
 | ||||||
| MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | ||||||
| 
 | 
 | ||||||
| void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) { | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { | ||||||
|     Reset(); |     Reset(); | ||||||
|     registers[1] = parameters[0]; |     registers[1] = parameters[0]; | ||||||
|     this->parameters = std::move(parameters); |     this->parameters = std::move(parameters); | ||||||
|  | @ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa | ||||||
|     // Execute the code until we hit an exit condition.
 |     // Execute the code until we hit an exit condition.
 | ||||||
|     bool keep_executing = true; |     bool keep_executing = true; | ||||||
|     while (keep_executing) { |     while (keep_executing) { | ||||||
|         keep_executing = Step(code, false); |         keep_executing = Step(offset, false); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Assert the the macro used all the input parameters
 |     // Assert the the macro used all the input parameters
 | ||||||
|  | @ -37,10 +37,10 @@ void MacroInterpreter::Reset() { | ||||||
|     next_parameter_index = 1; |     next_parameter_index = 1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { | bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { | ||||||
|     u32 base_address = pc; |     u32 base_address = pc; | ||||||
| 
 | 
 | ||||||
|     Opcode opcode = GetOpcode(code); |     Opcode opcode = GetOpcode(offset); | ||||||
|     pc += 4; |     pc += 4; | ||||||
| 
 | 
 | ||||||
|     // Update the program counter if we were delayed
 |     // Update the program counter if we were delayed
 | ||||||
|  | @ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { | ||||||
| 
 | 
 | ||||||
|             delayed_pc = base_address + opcode.GetBranchTarget(); |             delayed_pc = base_address + opcode.GetBranchTarget(); | ||||||
|             // Execute one more instruction due to the delay slot.
 |             // Execute one more instruction due to the delay slot.
 | ||||||
|             return Step(code, true); |             return Step(offset, true); | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  | @ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { | ||||||
|         // Exit has a delay slot, execute the next instruction
 |         // Exit has a delay slot, execute the next instruction
 | ||||||
|         // Note: Executing an exit during a branch delay slot will cause the instruction at the
 |         // Note: Executing an exit during a branch delay slot will cause the instruction at the
 | ||||||
|         // branch target to be executed before exiting.
 |         // branch target to be executed before exiting.
 | ||||||
|         Step(code, true); |         Step(offset, true); | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const { | MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const { | ||||||
|  |     const auto& macro_memory{maxwell3d.GetMacroMemory()}; | ||||||
|     ASSERT((pc % sizeof(u32)) == 0); |     ASSERT((pc % sizeof(u32)) == 0); | ||||||
|     ASSERT(pc < code.size() * sizeof(u32)); |     ASSERT((pc + offset) < macro_memory.size() * sizeof(u32)); | ||||||
|     return {code[pc / sizeof(u32)]}; |     return {macro_memory[offset + pc / sizeof(u32)]}; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { | u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { | ||||||
|  |  | ||||||
|  | @ -22,10 +22,10 @@ public: | ||||||
| 
 | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * Executes the macro code with the specified input parameters. |      * Executes the macro code with the specified input parameters. | ||||||
|      * @param code The macro byte code to execute |      * @param offset Offset to start execution at. | ||||||
|      * @param parameters The parameters of the macro |      * @param parameters The parameters of the macro. | ||||||
|      */ |      */ | ||||||
|     void Execute(const std::vector<u32>& code, std::vector<u32> parameters); |     void Execute(u32 offset, std::vector<u32> parameters); | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     enum class Operation : u32 { |     enum class Operation : u32 { | ||||||
|  | @ -110,11 +110,11 @@ private: | ||||||
|     /**
 |     /**
 | ||||||
|      * Executes a single macro instruction located at the current program counter. Returns whether |      * Executes a single macro instruction located at the current program counter. Returns whether | ||||||
|      * the interpreter should keep running. |      * the interpreter should keep running. | ||||||
|      * @param code The macro code to execute. |      * @param offset Offset to start execution at. | ||||||
|      * @param is_delay_slot Whether the current step is being executed due to a delay slot in a |      * @param is_delay_slot Whether the current step is being executed due to a delay slot in a | ||||||
|      * previous instruction. |      * previous instruction. | ||||||
|      */ |      */ | ||||||
|     bool Step(const std::vector<u32>& code, bool is_delay_slot); |     bool Step(u32 offset, bool is_delay_slot); | ||||||
| 
 | 
 | ||||||
|     /// Calculates the result of an ALU operation. src_a OP src_b;
 |     /// Calculates the result of an ALU operation. src_a OP src_b;
 | ||||||
|     u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; |     u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; | ||||||
|  | @ -127,7 +127,7 @@ private: | ||||||
|     bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; |     bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; | ||||||
| 
 | 
 | ||||||
|     /// Reads an opcode at the current program counter location.
 |     /// Reads an opcode at the current program counter location.
 | ||||||
|     Opcode GetOpcode(const std::vector<u32>& code) const; |     Opcode GetOpcode(u32 offset) const; | ||||||
| 
 | 
 | ||||||
|     /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
 |     /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
 | ||||||
|     u32 GetRegister(u32 register_id) const; |     u32 GetRegister(u32 register_id) const; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei