forked from eden-emu/eden
		
	hle service: nvdrv: nvhost_gpu: Update to use SyncpointManager and other improvements.
- Refactor so that SubmitGPFIFO and KickoffPB use shared functionality. - Implement add_wait and add_increment flags.
This commit is contained in:
		
							parent
							
								
									c6e1c46ac7
								
							
						
					
					
						commit
						e67b8678f8
					
				
					 3 changed files with 108 additions and 48 deletions
				
			
		|  | @ -7,14 +7,17 @@ | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | ||||||
|  | #include "core/hle/service/nvdrv/syncpoint_manager.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Service::Nvidia::Devices { | namespace Service::Nvidia::Devices { | ||||||
| 
 | 
 | ||||||
| nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) | nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, | ||||||
|     : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} |                        SyncpointManager& syncpoint_manager) | ||||||
|  |     : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} {} | ||||||
|  | 
 | ||||||
| nvhost_gpu::~nvhost_gpu() = default; | nvhost_gpu::~nvhost_gpu() = default; | ||||||
| 
 | 
 | ||||||
| u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | ||||||
|  | @ -126,10 +129,9 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou | ||||||
|                 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, |                 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, | ||||||
|                 params.unk3); |                 params.unk3); | ||||||
| 
 | 
 | ||||||
|     auto& gpu = system.GPU(); |     params.fence_out.id = syncpoint_manager.AllocateSyncpoint(); | ||||||
|     params.fence_out.id = assigned_syncpoints; |     params.fence_out.value = syncpoint_manager.RefreshSyncpoint(params.fence_out.id); | ||||||
|     params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints); | 
 | ||||||
|     assigned_syncpoints++; |  | ||||||
|     std::memcpy(output.data(), ¶ms, output.size()); |     std::memcpy(output.data(), ¶ms, output.size()); | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
|  | @ -145,37 +147,95 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector< | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) { | ||||||
|  |     return { | ||||||
|  |         Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, | ||||||
|  |                                   Tegra::SubmissionMode::Increasing), | ||||||
|  |         {fence.value}, | ||||||
|  |         Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | ||||||
|  |                                   Tegra::SubmissionMode::Increasing), | ||||||
|  |         Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id), | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) { | ||||||
|  |     std::vector<Tegra::CommandHeader> result{ | ||||||
|  |         Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, | ||||||
|  |                                   Tegra::SubmissionMode::Increasing), | ||||||
|  |         {}}; | ||||||
|  | 
 | ||||||
|  |     for (u32 count = 0; count < add_increment; ++count) { | ||||||
|  |         result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | ||||||
|  |                                                       Tegra::SubmissionMode::Increasing)); | ||||||
|  |         result.emplace_back( | ||||||
|  |             Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id)); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence, | ||||||
|  |                                                                           u32 add_increment) { | ||||||
|  |     std::vector<Tegra::CommandHeader> result{ | ||||||
|  |         Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1, | ||||||
|  |                                   Tegra::SubmissionMode::Increasing), | ||||||
|  |         {}}; | ||||||
|  |     const std::vector<Tegra::CommandHeader> increment{ | ||||||
|  |         BuildIncrementCommandList(fence, add_increment)}; | ||||||
|  | 
 | ||||||
|  |     result.insert(result.end(), increment.begin(), increment.end()); | ||||||
|  | 
 | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, | ||||||
|  |                                  Tegra::CommandList&& entries) { | ||||||
|  |     LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, | ||||||
|  |               params.num_entries, params.flags.raw); | ||||||
|  | 
 | ||||||
|  |     auto& gpu = system.GPU(); | ||||||
|  |     if (params.flags.add_wait.Value() && | ||||||
|  |         !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { | ||||||
|  |         gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (params.flags.add_increment.Value() || params.flags.increment.Value()) { | ||||||
|  |         const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0; | ||||||
|  |         params.fence_out.value = syncpoint_manager.IncreaseSyncpoint( | ||||||
|  |             params.fence_out.id, params.AddIncrementValue() + increment_value); | ||||||
|  |     } else { | ||||||
|  |         params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     entries.RefreshIntegrityChecks(gpu); | ||||||
|  |     gpu.PushGPUEntries(std::move(entries)); | ||||||
|  | 
 | ||||||
|  |     if (params.flags.add_increment.Value()) { | ||||||
|  |         if (params.flags.suppress_wfi) { | ||||||
|  |             gpu.PushGPUEntries(Tegra::CommandList{ | ||||||
|  |                 BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())}); | ||||||
|  |         } else { | ||||||
|  |             gpu.PushGPUEntries(Tegra::CommandList{ | ||||||
|  |                 BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())}); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { | u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { | ||||||
|     if (input.size() < sizeof(IoctlSubmitGpfifo)) { |     if (input.size() < sizeof(IoctlSubmitGpfifo)) { | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|     } |     } | ||||||
|     IoctlSubmitGpfifo params{}; |     IoctlSubmitGpfifo params{}; | ||||||
|     std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |     std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | ||||||
|     LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, |  | ||||||
|               params.num_entries, params.flags.raw); |  | ||||||
| 
 |  | ||||||
|     ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + |  | ||||||
|                                    params.num_entries * sizeof(Tegra::CommandListHeader), |  | ||||||
|                "Incorrect input size"); |  | ||||||
| 
 | 
 | ||||||
|     Tegra::CommandList entries(params.num_entries); |     Tegra::CommandList entries(params.num_entries); | ||||||
|     std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], |     std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)], | ||||||
|                 params.num_entries * sizeof(Tegra::CommandListHeader)); |                 params.num_entries * sizeof(Tegra::CommandListHeader)); | ||||||
| 
 | 
 | ||||||
|     UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); |     return SubmitGPFIFOImpl(params, output, std::move(entries)); | ||||||
|     UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); |  | ||||||
| 
 |  | ||||||
|     auto& gpu = system.GPU(); |  | ||||||
|     u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); |  | ||||||
|     if (params.flags.increment.Value()) { |  | ||||||
|         params.fence_out.value += current_syncpoint_value; |  | ||||||
|     } else { |  | ||||||
|         params.fence_out.value = current_syncpoint_value; |  | ||||||
|     } |  | ||||||
|     gpu.PushGPUEntries(std::move(entries)); |  | ||||||
| 
 |  | ||||||
|     std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); |  | ||||||
|     return 0; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | ||||||
|  | @ -185,31 +245,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | ||||||
|     } |     } | ||||||
|     IoctlSubmitGpfifo params{}; |     IoctlSubmitGpfifo params{}; | ||||||
|     std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |     std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | ||||||
|     LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, |  | ||||||
|               params.num_entries, params.flags.raw); |  | ||||||
| 
 | 
 | ||||||
|     Tegra::CommandList entries(params.num_entries); |     Tegra::CommandList entries(params.num_entries); | ||||||
|     if (version == IoctlVersion::Version2) { |     if (version == IoctlVersion::Version2) { | ||||||
|         std::memcpy(entries.data(), input2.data(), |         std::memcpy(entries.command_lists.data(), input2.data(), | ||||||
|                     params.num_entries * sizeof(Tegra::CommandListHeader)); |                     params.num_entries * sizeof(Tegra::CommandListHeader)); | ||||||
|     } else { |     } else { | ||||||
|         system.Memory().ReadBlock(params.address, entries.data(), |         system.Memory().ReadBlock(params.address, entries.command_lists.data(), | ||||||
|                                   params.num_entries * sizeof(Tegra::CommandListHeader)); |                                   params.num_entries * sizeof(Tegra::CommandListHeader)); | ||||||
|     } |     } | ||||||
|     UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); |  | ||||||
|     UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); |  | ||||||
| 
 | 
 | ||||||
|     auto& gpu = system.GPU(); |     return SubmitGPFIFOImpl(params, output, std::move(entries)); | ||||||
|     u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); |  | ||||||
|     if (params.flags.increment.Value()) { |  | ||||||
|         params.fence_out.value += current_syncpoint_value; |  | ||||||
|     } else { |  | ||||||
|         params.fence_out.value = current_syncpoint_value; |  | ||||||
|     } |  | ||||||
|     gpu.PushGPUEntries(std::move(entries)); |  | ||||||
| 
 |  | ||||||
|     std::memcpy(output.data(), ¶ms, output.size()); |  | ||||||
|     return 0; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { | u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { | ||||||
|  |  | ||||||
|  | @ -11,6 +11,11 @@ | ||||||
| #include "common/swap.h" | #include "common/swap.h" | ||||||
| #include "core/hle/service/nvdrv/devices/nvdevice.h" | #include "core/hle/service/nvdrv/devices/nvdevice.h" | ||||||
| #include "core/hle/service/nvdrv/nvdata.h" | #include "core/hle/service/nvdrv/nvdata.h" | ||||||
|  | #include "video_core/dma_pusher.h" | ||||||
|  | 
 | ||||||
|  | namespace Service::Nvidia { | ||||||
|  | class SyncpointManager; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| namespace Service::Nvidia::Devices { | namespace Service::Nvidia::Devices { | ||||||
| 
 | 
 | ||||||
|  | @ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b); | ||||||
| 
 | 
 | ||||||
| class nvhost_gpu final : public nvdevice { | class nvhost_gpu final : public nvdevice { | ||||||
| public: | public: | ||||||
|     explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |     explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, | ||||||
|  |                         SyncpointManager& syncpoint_manager); | ||||||
|     ~nvhost_gpu() override; |     ~nvhost_gpu() override; | ||||||
| 
 | 
 | ||||||
|     u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, |     u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | ||||||
|  | @ -162,10 +168,15 @@ private: | ||||||
|             u32_le raw; |             u32_le raw; | ||||||
|             BitField<0, 1, u32_le> add_wait;      // append a wait sync_point to the list
 |             BitField<0, 1, u32_le> add_wait;      // append a wait sync_point to the list
 | ||||||
|             BitField<1, 1, u32_le> add_increment; // append an increment to the list
 |             BitField<1, 1, u32_le> add_increment; // append an increment to the list
 | ||||||
|             BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
 |             BitField<2, 1, u32_le> new_hw_format; // mostly ignored
 | ||||||
|  |             BitField<4, 1, u32_le> suppress_wfi;  // suppress wait for interrupt
 | ||||||
|             BitField<8, 1, u32_le> increment;     // increment the returned fence
 |             BitField<8, 1, u32_le> increment;     // increment the returned fence
 | ||||||
|         } flags; |         } flags; | ||||||
|         Fence fence_out; // returned new fence object for others to wait on
 |         Fence fence_out; // returned new fence object for others to wait on
 | ||||||
|  | 
 | ||||||
|  |         u32 AddIncrementValue() const { | ||||||
|  |             return flags.add_increment.Value() << 1; | ||||||
|  |         } | ||||||
|     }; |     }; | ||||||
|     static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), |     static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), | ||||||
|                   "IoctlSubmitGpfifo is incorrect size"); |                   "IoctlSubmitGpfifo is incorrect size"); | ||||||
|  | @ -190,6 +201,8 @@ private: | ||||||
|     u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output); |     u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
|     u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output); |     u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
|     u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output); |     u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
|  |     u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, | ||||||
|  |                          Tegra::CommandList&& entries); | ||||||
|     u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output); |     u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
|     u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, |     u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | ||||||
|                   const std::vector<u8>& input2, IoctlVersion version); |                   const std::vector<u8>& input2, IoctlVersion version); | ||||||
|  | @ -198,7 +211,7 @@ private: | ||||||
|     u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); |     u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
| 
 | 
 | ||||||
|     std::shared_ptr<nvmap> nvmap_dev; |     std::shared_ptr<nvmap> nvmap_dev; | ||||||
|     u32 assigned_syncpoints{}; |     SyncpointManager& syncpoint_manager; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Service::Nvidia::Devices
 | } // namespace Service::Nvidia::Devices
 | ||||||
|  |  | ||||||
|  | @ -47,7 +47,8 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { | ||||||
|     } |     } | ||||||
|     auto nvmap_dev = std::make_shared<Devices::nvmap>(system); |     auto nvmap_dev = std::make_shared<Devices::nvmap>(system); | ||||||
|     devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); |     devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); | ||||||
|     devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev); |     devices["/dev/nvhost-gpu"] = | ||||||
|  |         std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager); | ||||||
|     devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); |     devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); | ||||||
|     devices["/dev/nvmap"] = nvmap_dev; |     devices["/dev/nvmap"] = nvmap_dev; | ||||||
|     devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); |     devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei