forked from eden-emu/eden
		
	DMAPusher: Improve collection of non executing methods
This commit is contained in:
		
							parent
							
								
									d2643a61c3
								
							
						
					
					
						commit
						4bf1ee5bdc
					
				
					 13 changed files with 181 additions and 2 deletions
				
			
		|  | @ -178,6 +178,11 @@ void DmaPusher::CallMethod(u32 argument) const { | |||
|         }); | ||||
|     } else { | ||||
|         auto subchannel = subchannels[dma_state.subchannel]; | ||||
|         if (!subchannel->execution_mask[dma_state.method]) [[likely]] { | ||||
|             subchannel->method_sink.emplace_back(dma_state.method, argument); | ||||
|             return; | ||||
|         } | ||||
|         subchannel->ConsumeSink(); | ||||
|         subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; | ||||
|         subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call); | ||||
|     } | ||||
|  | @ -189,6 +194,7 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { | |||
|                                dma_state.method_count); | ||||
|     } else { | ||||
|         auto subchannel = subchannels[dma_state.subchannel]; | ||||
|         subchannel->ConsumeSink(); | ||||
|         subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; | ||||
|         subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, | ||||
|                                     dma_state.method_count); | ||||
|  |  | |||
|  | @ -3,6 +3,10 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <bitset> | ||||
| #include <limits> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace Tegra::Engines { | ||||
|  | @ -18,8 +22,25 @@ public: | |||
|     virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||
|                                  u32 methods_pending) = 0; | ||||
| 
 | ||||
|     void ConsumeSink() { | ||||
|         if (method_sink.empty()) { | ||||
|             return; | ||||
|         } | ||||
|         ConsumeSinkImpl(); | ||||
|     } | ||||
| 
 | ||||
|     std::bitset<std::numeric_limits<u16>::max()> execution_mask{}; | ||||
|     std::vector<std::pair<u32, u32>> method_sink{}; | ||||
|     bool current_dirty{}; | ||||
|     GPUVAddr current_dma_segment; | ||||
| 
 | ||||
| protected: | ||||
|     virtual void ConsumeSinkImpl() { | ||||
|         for (auto [method, value] : method_sink) { | ||||
|             CallMethod(method, value, true); | ||||
|         } | ||||
|         method_sink.clear(); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| } // namespace Tegra::Engines
 | ||||
|  |  | |||
|  | @ -25,6 +25,9 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager_) { | |||
|     // Nvidia's OpenGL driver seems to assume these values
 | ||||
|     regs.src.depth = 1; | ||||
|     regs.dst.depth = 1; | ||||
| 
 | ||||
|     execution_mask.reset(); | ||||
|     execution_mask[FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1] = true; | ||||
| } | ||||
| 
 | ||||
| Fermi2D::~Fermi2D() = default; | ||||
|  | @ -49,6 +52,13 @@ void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void Fermi2D::ConsumeSinkImpl() { | ||||
|     for (auto [method, value] : method_sink) { | ||||
|         regs.reg_array[method] = value; | ||||
|     } | ||||
|     method_sink.clear(); | ||||
| } | ||||
| 
 | ||||
| void Fermi2D::Blit() { | ||||
|     MICROPROFILE_SCOPE(GPU_BlitEngine); | ||||
|     LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}", | ||||
|  |  | |||
|  | @ -309,6 +309,8 @@ private: | |||
|     /// Performs the copy from the source surface to the destination surface as configured in the
 | ||||
|     /// registers.
 | ||||
|     void Blit(); | ||||
| 
 | ||||
|     void ConsumeSinkImpl() override; | ||||
| }; | ||||
| 
 | ||||
| #define ASSERT_REG_POSITION(field_name, position)                                                  \ | ||||
|  |  | |||
|  | @ -14,7 +14,12 @@ | |||
| namespace Tegra::Engines { | ||||
| 
 | ||||
| KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_) | ||||
|     : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {} | ||||
|     : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} { | ||||
|     execution_mask.reset(); | ||||
|     execution_mask[KEPLER_COMPUTE_REG_INDEX(exec_upload)] = true; | ||||
|     execution_mask[KEPLER_COMPUTE_REG_INDEX(data_upload)] = true; | ||||
|     execution_mask[KEPLER_COMPUTE_REG_INDEX(launch)] = true; | ||||
| } | ||||
| 
 | ||||
| KeplerCompute::~KeplerCompute() = default; | ||||
| 
 | ||||
|  | @ -23,6 +28,13 @@ void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) | |||
|     upload_state.BindRasterizer(rasterizer); | ||||
| } | ||||
| 
 | ||||
| void KeplerCompute::ConsumeSinkImpl() { | ||||
|     for (auto [method, value] : method_sink) { | ||||
|         regs.reg_array[method] = value; | ||||
|     } | ||||
|     method_sink.clear(); | ||||
| } | ||||
| 
 | ||||
| void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||
|     ASSERT_MSG(method < Regs::NUM_REGS, | ||||
|                "Invalid KeplerCompute register, increase the size of the Regs structure"); | ||||
|  |  | |||
|  | @ -204,6 +204,8 @@ public: | |||
| private: | ||||
|     void ProcessLaunch(); | ||||
| 
 | ||||
|     void ConsumeSinkImpl() override; | ||||
| 
 | ||||
|     /// Retrieves information about a specific TIC entry from the TIC buffer.
 | ||||
|     Texture::TICEntry GetTICEntry(u32 tic_index) const; | ||||
| 
 | ||||
|  |  | |||
|  | @ -18,6 +18,17 @@ KeplerMemory::~KeplerMemory() = default; | |||
| 
 | ||||
| void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||
|     upload_state.BindRasterizer(rasterizer_); | ||||
| 
 | ||||
|     execution_mask.reset(); | ||||
|     execution_mask[KEPLERMEMORY_REG_INDEX(exec)] = true; | ||||
|     execution_mask[KEPLERMEMORY_REG_INDEX(data)] = true; | ||||
| } | ||||
| 
 | ||||
| void KeplerMemory::ConsumeSinkImpl() { | ||||
|     for (auto [method, value] : method_sink) { | ||||
|         regs.reg_array[method] = value; | ||||
|     } | ||||
|     method_sink.clear(); | ||||
| } | ||||
| 
 | ||||
| void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||
|  |  | |||
|  | @ -73,6 +73,8 @@ public: | |||
|     } regs{}; | ||||
| 
 | ||||
| private: | ||||
|     void ConsumeSinkImpl() override; | ||||
| 
 | ||||
|     Core::System& system; | ||||
|     Upload::State upload_state; | ||||
| }; | ||||
|  |  | |||
|  | @ -4,6 +4,7 @@ | |||
| #include <cstring> | ||||
| #include <optional> | ||||
| #include "common/assert.h" | ||||
| #include "common/scope_exit.h" | ||||
| #include "common/settings.h" | ||||
| #include "core/core.h" | ||||
| #include "core/core_timing.h" | ||||
|  | @ -30,6 +31,10 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) | |||
|                                                                                 regs.upload} { | ||||
|     dirty.flags.flip(); | ||||
|     InitializeRegisterDefaults(); | ||||
|     execution_mask.reset(); | ||||
|     for (size_t i = 0; i < execution_mask.size(); i++) { | ||||
|         execution_mask[i] = IsMethodExecutable(static_cast<u32>(i)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| Maxwell3D::~Maxwell3D() = default; | ||||
|  | @ -123,6 +128,71 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
|     shadow_state = regs; | ||||
| } | ||||
| 
 | ||||
| bool Maxwell3D::IsMethodExecutable(u32 method) { | ||||
|     if (method >= MacroRegistersStart) { | ||||
|         return true; | ||||
|     } | ||||
|     switch (method) { | ||||
|     case MAXWELL3D_REG_INDEX(draw.end): | ||||
|     case MAXWELL3D_REG_INDEX(draw.begin): | ||||
|     case MAXWELL3D_REG_INDEX(vertex_buffer.first): | ||||
|     case MAXWELL3D_REG_INDEX(vertex_buffer.count): | ||||
|     case MAXWELL3D_REG_INDEX(index_buffer.first): | ||||
|     case MAXWELL3D_REG_INDEX(index_buffer.count): | ||||
|     case MAXWELL3D_REG_INDEX(draw_inline_index): | ||||
|     case MAXWELL3D_REG_INDEX(index_buffer32_subsequent): | ||||
|     case MAXWELL3D_REG_INDEX(index_buffer16_subsequent): | ||||
|     case MAXWELL3D_REG_INDEX(index_buffer8_subsequent): | ||||
|     case MAXWELL3D_REG_INDEX(index_buffer32_first): | ||||
|     case MAXWELL3D_REG_INDEX(index_buffer16_first): | ||||
|     case MAXWELL3D_REG_INDEX(index_buffer8_first): | ||||
|     case MAXWELL3D_REG_INDEX(inline_index_2x16.even): | ||||
|     case MAXWELL3D_REG_INDEX(inline_index_4x8.index0): | ||||
|     case MAXWELL3D_REG_INDEX(vertex_array_instance_first): | ||||
|     case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent): | ||||
|     case MAXWELL3D_REG_INDEX(wait_for_idle): | ||||
|     case MAXWELL3D_REG_INDEX(shadow_ram_control): | ||||
|     case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr): | ||||
|     case MAXWELL3D_REG_INDEX(load_mme.instruction): | ||||
|     case MAXWELL3D_REG_INDEX(load_mme.start_address): | ||||
|     case MAXWELL3D_REG_INDEX(falcon[4]): | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer): | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 1: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 2: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 3: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 4: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 5: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 6: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 7: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 8: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 9: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 10: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 11: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 12: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 13: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 14: | ||||
|     case MAXWELL3D_REG_INDEX(const_buffer.buffer) + 15: | ||||
|     case MAXWELL3D_REG_INDEX(bind_groups[0].raw_config): | ||||
|     case MAXWELL3D_REG_INDEX(bind_groups[1].raw_config): | ||||
|     case MAXWELL3D_REG_INDEX(bind_groups[2].raw_config): | ||||
|     case MAXWELL3D_REG_INDEX(bind_groups[3].raw_config): | ||||
|     case MAXWELL3D_REG_INDEX(bind_groups[4].raw_config): | ||||
|     case MAXWELL3D_REG_INDEX(topology_override): | ||||
|     case MAXWELL3D_REG_INDEX(clear_surface): | ||||
|     case MAXWELL3D_REG_INDEX(report_semaphore.query): | ||||
|     case MAXWELL3D_REG_INDEX(render_enable.mode): | ||||
|     case MAXWELL3D_REG_INDEX(clear_report_value): | ||||
|     case MAXWELL3D_REG_INDEX(sync_info): | ||||
|     case MAXWELL3D_REG_INDEX(launch_dma): | ||||
|     case MAXWELL3D_REG_INDEX(inline_data): | ||||
|     case MAXWELL3D_REG_INDEX(fragment_barrier): | ||||
|     case MAXWELL3D_REG_INDEX(tiled_cache_barrier): | ||||
|         return true; | ||||
|     default: | ||||
|         return false; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) { | ||||
|     if (executing_macro == 0) { | ||||
|         // A macro call must begin by writing the macro method's register, not its argument.
 | ||||
|  | @ -141,6 +211,7 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool | |||
| 
 | ||||
|     // Call the macro when there are no more parameters in the command buffer
 | ||||
|     if (is_last_call) { | ||||
|         ConsumeSink(); | ||||
|         CallMacroMethod(executing_macro, macro_params); | ||||
|         macro_params.clear(); | ||||
|         macro_addresses.clear(); | ||||
|  | @ -214,6 +285,29 @@ u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { | |||
|     return argument; | ||||
| } | ||||
| 
 | ||||
| void Maxwell3D::ConsumeSinkImpl() { | ||||
|     SCOPE_EXIT({ method_sink.clear(); }); | ||||
|     const auto control = shadow_state.shadow_ram_control; | ||||
|     if (control == Regs::ShadowRamControl::Track || | ||||
|         control == Regs::ShadowRamControl::TrackWithFilter) { | ||||
| 
 | ||||
|         for (auto [method, value] : method_sink) { | ||||
|             shadow_state.reg_array[method] = value; | ||||
|             ProcessDirtyRegisters(method, value); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     if (control == Regs::ShadowRamControl::Replay) { | ||||
|         for (auto [method, value] : method_sink) { | ||||
|             ProcessDirtyRegisters(method, shadow_state.reg_array[method]); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     for (auto [method, value] : method_sink) { | ||||
|         ProcessDirtyRegisters(method, value); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) { | ||||
|     if (regs.reg_array[method] == argument) { | ||||
|         return; | ||||
|  |  | |||
|  | @ -3123,6 +3123,8 @@ private: | |||
| 
 | ||||
|     void ProcessDirtyRegisters(u32 method, u32 argument); | ||||
| 
 | ||||
|     void ConsumeSinkImpl() override; | ||||
| 
 | ||||
|     void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call); | ||||
| 
 | ||||
|     /// Retrieves information about a specific TIC entry from the TIC buffer.
 | ||||
|  | @ -3172,6 +3174,8 @@ private: | |||
| 
 | ||||
|     void RefreshParametersImpl(); | ||||
| 
 | ||||
|     bool IsMethodExecutable(u32 method); | ||||
| 
 | ||||
|     Core::System& system; | ||||
|     MemoryManager& memory_manager; | ||||
| 
 | ||||
|  |  | |||
|  | @ -21,7 +21,10 @@ namespace Tegra::Engines { | |||
| using namespace Texture; | ||||
| 
 | ||||
| MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) | ||||
|     : system{system_}, memory_manager{memory_manager_} {} | ||||
|     : system{system_}, memory_manager{memory_manager_} { | ||||
|     execution_mask.reset(); | ||||
|     execution_mask[offsetof(Regs, launch_dma) / sizeof(u32)] = true; | ||||
| } | ||||
| 
 | ||||
| MaxwellDMA::~MaxwellDMA() = default; | ||||
| 
 | ||||
|  | @ -29,6 +32,13 @@ void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | |||
|     rasterizer = rasterizer_; | ||||
| } | ||||
| 
 | ||||
| void MaxwellDMA::ConsumeSinkImpl() { | ||||
|     for (auto [method, value] : method_sink) { | ||||
|         regs.reg_array[method] = value; | ||||
|     } | ||||
|     method_sink.clear(); | ||||
| } | ||||
| 
 | ||||
| void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||
|     ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); | ||||
| 
 | ||||
|  |  | |||
|  | @ -231,6 +231,8 @@ private: | |||
| 
 | ||||
|     void ReleaseSemaphore(); | ||||
| 
 | ||||
|     void ConsumeSinkImpl() override; | ||||
| 
 | ||||
|     Core::System& system; | ||||
| 
 | ||||
|     MemoryManager& memory_manager; | ||||
|  |  | |||
|  | @ -126,6 +126,7 @@ private: | |||
| 
 | ||||
|         const u32 vertex_first = parameters[3]; | ||||
|         const u32 vertex_count = parameters[1]; | ||||
|          | ||||
| 
 | ||||
|         if (maxwell3d.AnyParametersDirty() && | ||||
|             maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { | ||||
|  | @ -135,6 +136,7 @@ private: | |||
| 
 | ||||
|         const u32 base_instance = parameters[4]; | ||||
|         if (extended) { | ||||
|             maxwell3d.regs.global_base_instance_index = base_instance; | ||||
|             maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||
|             maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); | ||||
|         } | ||||
|  | @ -144,6 +146,7 @@ private: | |||
|             vertex_first, vertex_count, base_instance, instance_count); | ||||
| 
 | ||||
|         if (extended) { | ||||
|             maxwell3d.regs.global_base_instance_index = 0; | ||||
|             maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||
|             maxwell3d.replace_table.clear(); | ||||
|         } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow