forked from eden-emu/eden
		
	video_core: Move command buffer loop.
This moves the hot loop into video_core. This refactoring shall reduce the CPU overhead of calling ProcessCommandList.
This commit is contained in:
		
							parent
							
								
									f84b102dfb
								
							
						
					
					
						commit
						a5613f87ef
					
				
					 5 changed files with 86 additions and 79 deletions
				
			
		|  | @ -8,6 +8,7 @@ | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
|  | #include "video_core/command_processor.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| 
 | 
 | ||||||
|  | @ -134,17 +135,16 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp | ||||||
|     LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", |     LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", | ||||||
|                 params.address, params.num_entries, params.flags); |                 params.address, params.num_entries, params.flags); | ||||||
| 
 | 
 | ||||||
|     ASSERT_MSG(input.size() == |     ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + | ||||||
|                    sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(IoctlGpfifoEntry), |                                    params.num_entries * sizeof(Tegra::CommandListHeader), | ||||||
|                "Incorrect input size"); |                "Incorrect input size"); | ||||||
| 
 | 
 | ||||||
|     std::vector<IoctlGpfifoEntry> entries(params.num_entries); |     std::vector<Tegra::CommandListHeader> entries(params.num_entries); | ||||||
|     std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], |     std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], | ||||||
|                 params.num_entries * sizeof(IoctlGpfifoEntry)); |                 params.num_entries * sizeof(Tegra::CommandListHeader)); | ||||||
|     for (auto entry : entries) { | 
 | ||||||
|         Tegra::GPUVAddr va_addr = entry.Address(); |     Core::System::GetInstance().GPU().ProcessCommandLists(entries); | ||||||
|         Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz); | 
 | ||||||
|     } |  | ||||||
|     params.fence_out.id = 0; |     params.fence_out.id = 0; | ||||||
|     params.fence_out.value = 0; |     params.fence_out.value = 0; | ||||||
|     std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); |     std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); | ||||||
|  | @ -160,14 +160,12 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) | ||||||
|     LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", |     LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", | ||||||
|                 params.address, params.num_entries, params.flags); |                 params.address, params.num_entries, params.flags); | ||||||
| 
 | 
 | ||||||
|     std::vector<IoctlGpfifoEntry> entries(params.num_entries); |     std::vector<Tegra::CommandListHeader> entries(params.num_entries); | ||||||
|     Memory::ReadBlock(params.address, entries.data(), |     Memory::ReadBlock(params.address, entries.data(), | ||||||
|                       params.num_entries * sizeof(IoctlGpfifoEntry)); |                       params.num_entries * sizeof(Tegra::CommandListHeader)); | ||||||
|  | 
 | ||||||
|  |     Core::System::GetInstance().GPU().ProcessCommandLists(entries); | ||||||
| 
 | 
 | ||||||
|     for (auto entry : entries) { |  | ||||||
|         Tegra::GPUVAddr va_addr = entry.Address(); |  | ||||||
|         Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz); |  | ||||||
|     } |  | ||||||
|     params.fence_out.id = 0; |     params.fence_out.id = 0; | ||||||
|     params.fence_out.value = 0; |     params.fence_out.value = 0; | ||||||
|     std::memcpy(output.data(), ¶ms, output.size()); |     std::memcpy(output.data(), ¶ms, output.size()); | ||||||
|  |  | ||||||
|  | @ -10,7 +10,6 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/swap.h" | #include "common/swap.h" | ||||||
| #include "core/hle/service/nvdrv/devices/nvdevice.h" | #include "core/hle/service/nvdrv/devices/nvdevice.h" | ||||||
| #include "video_core/memory_manager.h" |  | ||||||
| 
 | 
 | ||||||
| namespace Service::Nvidia::Devices { | namespace Service::Nvidia::Devices { | ||||||
| 
 | 
 | ||||||
|  | @ -151,22 +150,6 @@ private: | ||||||
|     }; |     }; | ||||||
|     static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size"); |     static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size"); | ||||||
| 
 | 
 | ||||||
|     struct IoctlGpfifoEntry { |  | ||||||
|         u32_le entry0; // gpu_va_lo
 |  | ||||||
|         union { |  | ||||||
|             u32_le entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
 |  | ||||||
|             BitField<0, 8, u32_le> gpu_va_hi; |  | ||||||
|             BitField<8, 2, u32_le> unk1; |  | ||||||
|             BitField<10, 21, u32_le> sz; |  | ||||||
|             BitField<31, 1, u32_le> unk2; |  | ||||||
|         }; |  | ||||||
| 
 |  | ||||||
|         Tegra::GPUVAddr Address() const { |  | ||||||
|             return (static_cast<Tegra::GPUVAddr>(gpu_va_hi) << 32) | entry0; |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
|     static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size"); |  | ||||||
| 
 |  | ||||||
|     struct IoctlSubmitGpfifo { |     struct IoctlSubmitGpfifo { | ||||||
|         u64_le address;     // pointer to gpfifo entry structs
 |         u64_le address;     // pointer to gpfifo entry structs
 | ||||||
|         u32_le num_entries; // number of fence objects being submitted
 |         u32_le num_entries; // number of fence objects being submitted
 | ||||||
|  |  | ||||||
|  | @ -69,7 +69,13 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPU::ProcessCommandList(GPUVAddr address, u32 size) { | MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192)); | ||||||
|  | 
 | ||||||
|  | void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { | ||||||
|  |     MICROPROFILE_SCOPE(ProcessCommandLists); | ||||||
|  |     for (auto entry : commands) { | ||||||
|  |         Tegra::GPUVAddr address = entry.Address(); | ||||||
|  |         u32 size = entry.sz; | ||||||
|         const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); |         const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); | ||||||
|         VAddr current_addr = *head_address; |         VAddr current_addr = *head_address; | ||||||
|         while (current_addr < *head_address + size * sizeof(CommandHeader)) { |         while (current_addr < *head_address + size * sizeof(CommandHeader)) { | ||||||
|  | @ -100,8 +106,8 @@ void GPU::ProcessCommandList(GPUVAddr address, u32 size) { | ||||||
|             case SubmissionMode::IncreaseOnce: { |             case SubmissionMode::IncreaseOnce: { | ||||||
|                 ASSERT(header.arg_count.Value() >= 1); |                 ASSERT(header.arg_count.Value() >= 1); | ||||||
| 
 | 
 | ||||||
|             // Use the original method for the first argument and then the next method for all other
 |                 // Use the original method for the first argument and then the next method for all
 | ||||||
|             // arguments.
 |                 // other arguments.
 | ||||||
|                 WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), |                 WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), | ||||||
|                          header.arg_count - 1); |                          header.arg_count - 1); | ||||||
|                 current_addr += sizeof(u32); |                 current_addr += sizeof(u32); | ||||||
|  | @ -123,5 +129,6 @@ void GPU::ProcessCommandList(GPUVAddr address, u32 size) { | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| } // namespace Tegra
 | } // namespace Tegra
 | ||||||
|  |  | ||||||
|  | @ -7,6 +7,7 @@ | ||||||
| #include <type_traits> | #include <type_traits> | ||||||
| #include "common/bit_field.h" | #include "common/bit_field.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "video_core/memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| 
 | 
 | ||||||
|  | @ -19,6 +20,22 @@ enum class SubmissionMode : u32 { | ||||||
|     IncreaseOnce = 5 |     IncreaseOnce = 5 | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | struct CommandListHeader { | ||||||
|  |     u32 entry0; // gpu_va_lo
 | ||||||
|  |     union { | ||||||
|  |         u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
 | ||||||
|  |         BitField<0, 8, u32> gpu_va_hi; | ||||||
|  |         BitField<8, 2, u32> unk1; | ||||||
|  |         BitField<10, 21, u32> sz; | ||||||
|  |         BitField<31, 1, u32> unk2; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     GPUVAddr Address() const { | ||||||
|  |         return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0; | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size"); | ||||||
|  | 
 | ||||||
| union CommandHeader { | union CommandHeader { | ||||||
|     u32 hex; |     u32 hex; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -6,6 +6,7 @@ | ||||||
| 
 | 
 | ||||||
| #include <array> | #include <array> | ||||||
| #include <memory> | #include <memory> | ||||||
|  | #include <vector> | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "core/hle/service/nvflinger/buffer_queue.h" | #include "core/hle/service/nvflinger/buffer_queue.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
|  | @ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format); | ||||||
| /// Returns the number of bytes per pixel of each depth format.
 | /// Returns the number of bytes per pixel of each depth format.
 | ||||||
| u32 DepthFormatBytesPerPixel(DepthFormat format); | u32 DepthFormatBytesPerPixel(DepthFormat format); | ||||||
| 
 | 
 | ||||||
|  | struct CommandListHeader; | ||||||
| class DebugContext; | class DebugContext; | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  | @ -115,7 +117,7 @@ public: | ||||||
|     ~GPU(); |     ~GPU(); | ||||||
| 
 | 
 | ||||||
|     /// Processes a command list stored at the specified address in GPU memory.
 |     /// Processes a command list stored at the specified address in GPU memory.
 | ||||||
|     void ProcessCommandList(GPUVAddr address, u32 size); |     void ProcessCommandLists(const std::vector<CommandListHeader>& commands); | ||||||
| 
 | 
 | ||||||
|     /// Returns a reference to the Maxwell3D GPU engine.
 |     /// Returns a reference to the Maxwell3D GPU engine.
 | ||||||
|     Engines::Maxwell3D& Maxwell3D(); |     Engines::Maxwell3D& Maxwell3D(); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Markus Wick
						Markus Wick