forked from eden-emu/eden
		
	Merge pull request #4869 from bunnei/improve-gpu-sync
Improvements to GPU synchronization & various refactoring
This commit is contained in:
		
						commit
						6bbbbe8f85
					
				
					 15 changed files with 451 additions and 120 deletions
				
			
		|  | @ -454,6 +454,8 @@ add_library(core STATIC | ||||||
|     hle/service/nvdrv/nvdrv.h |     hle/service/nvdrv/nvdrv.h | ||||||
|     hle/service/nvdrv/nvmemp.cpp |     hle/service/nvdrv/nvmemp.cpp | ||||||
|     hle/service/nvdrv/nvmemp.h |     hle/service/nvdrv/nvmemp.h | ||||||
|  |     hle/service/nvdrv/syncpoint_manager.cpp | ||||||
|  |     hle/service/nvdrv/syncpoint_manager.h | ||||||
|     hle/service/nvflinger/buffer_queue.cpp |     hle/service/nvflinger/buffer_queue.cpp | ||||||
|     hle/service/nvflinger/buffer_queue.h |     hle/service/nvflinger/buffer_queue.h | ||||||
|     hle/service/nvflinger/nvflinger.cpp |     hle/service/nvflinger/nvflinger.cpp | ||||||
|  |  | ||||||
|  | @ -179,16 +179,18 @@ struct System::Impl { | ||||||
|         arp_manager.ResetAll(); |         arp_manager.ResetAll(); | ||||||
| 
 | 
 | ||||||
|         telemetry_session = std::make_unique<Core::TelemetrySession>(); |         telemetry_session = std::make_unique<Core::TelemetrySession>(); | ||||||
|  | 
 | ||||||
|  |         gpu_core = VideoCore::CreateGPU(emu_window, system); | ||||||
|  |         if (!gpu_core) { | ||||||
|  |             return ResultStatus::ErrorVideoCore; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         service_manager = std::make_shared<Service::SM::ServiceManager>(kernel); |         service_manager = std::make_shared<Service::SM::ServiceManager>(kernel); | ||||||
| 
 | 
 | ||||||
|         Service::Init(service_manager, system); |         Service::Init(service_manager, system); | ||||||
|         GDBStub::DeferStart(); |         GDBStub::DeferStart(); | ||||||
| 
 | 
 | ||||||
|         interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); |         interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); | ||||||
|         gpu_core = VideoCore::CreateGPU(emu_window, system); |  | ||||||
|         if (!gpu_core) { |  | ||||||
|             return ResultStatus::ErrorVideoCore; |  | ||||||
|         } |  | ||||||
| 
 | 
 | ||||||
|         // Initialize time manager, which must happen after kernel is created
 |         // Initialize time manager, which must happen after kernel is created
 | ||||||
|         time_manager.Initialize(); |         time_manager.Initialize(); | ||||||
|  |  | ||||||
|  | @ -15,8 +15,9 @@ | ||||||
| 
 | 
 | ||||||
| namespace Service::Nvidia::Devices { | namespace Service::Nvidia::Devices { | ||||||
| 
 | 
 | ||||||
| nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface) | nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface, | ||||||
|     : nvdevice(system), events_interface{events_interface} {} |                          SyncpointManager& syncpoint_manager) | ||||||
|  |     : nvdevice(system), events_interface{events_interface}, syncpoint_manager{syncpoint_manager} {} | ||||||
| nvhost_ctrl::~nvhost_ctrl() = default; | nvhost_ctrl::~nvhost_ctrl() = default; | ||||||
| 
 | 
 | ||||||
| u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | ||||||
|  | @ -70,19 +71,33 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | ||||||
|         return NvResult::BadParameter; |         return NvResult::BadParameter; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     if (syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) { | ||||||
|  |         params.value = syncpoint_manager.GetSyncpointMin(params.syncpt_id); | ||||||
|  |         std::memcpy(output.data(), ¶ms, sizeof(params)); | ||||||
|  |         return NvResult::Success; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (const auto new_value = syncpoint_manager.RefreshSyncpoint(params.syncpt_id); | ||||||
|  |         syncpoint_manager.IsSyncpointExpired(params.syncpt_id, params.threshold)) { | ||||||
|  |         params.value = new_value; | ||||||
|  |         std::memcpy(output.data(), ¶ms, sizeof(params)); | ||||||
|  |         return NvResult::Success; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     auto event = events_interface.events[event_id]; |     auto event = events_interface.events[event_id]; | ||||||
|     auto& gpu = system.GPU(); |     auto& gpu = system.GPU(); | ||||||
|  | 
 | ||||||
|     // This is mostly to take into account unimplemented features. As synced
 |     // This is mostly to take into account unimplemented features. As synced
 | ||||||
|     // gpu is always synced.
 |     // gpu is always synced.
 | ||||||
|     if (!gpu.IsAsync()) { |     if (!gpu.IsAsync()) { | ||||||
|         event.writable->Signal(); |         event.event.writable->Signal(); | ||||||
|         return NvResult::Success; |         return NvResult::Success; | ||||||
|     } |     } | ||||||
|     auto lock = gpu.LockSync(); |     auto lock = gpu.LockSync(); | ||||||
|     const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); |     const u32 current_syncpoint_value = event.fence.value; | ||||||
|     const s32 diff = current_syncpoint_value - params.threshold; |     const s32 diff = current_syncpoint_value - params.threshold; | ||||||
|     if (diff >= 0) { |     if (diff >= 0) { | ||||||
|         event.writable->Signal(); |         event.event.writable->Signal(); | ||||||
|         params.value = current_syncpoint_value; |         params.value = current_syncpoint_value; | ||||||
|         std::memcpy(output.data(), ¶ms, sizeof(params)); |         std::memcpy(output.data(), ¶ms, sizeof(params)); | ||||||
|         return NvResult::Success; |         return NvResult::Success; | ||||||
|  | @ -109,7 +124,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | ||||||
|             params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; |             params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; | ||||||
|         } |         } | ||||||
|         params.value |= event_id; |         params.value |= event_id; | ||||||
|         event.writable->Clear(); |         event.event.writable->Clear(); | ||||||
|         gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); |         gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | ||||||
|         if (!is_async && ctrl.fresh_call) { |         if (!is_async && ctrl.fresh_call) { | ||||||
|             ctrl.must_delay = true; |             ctrl.must_delay = true; | ||||||
|  | @ -157,15 +172,19 @@ u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vecto | ||||||
| u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) { | u32 nvhost_ctrl::IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output) { | ||||||
|     IocCtrlEventSignalParams params{}; |     IocCtrlEventSignalParams params{}; | ||||||
|     std::memcpy(¶ms, input.data(), sizeof(params)); |     std::memcpy(¶ms, input.data(), sizeof(params)); | ||||||
|  | 
 | ||||||
|     u32 event_id = params.event_id & 0x00FF; |     u32 event_id = params.event_id & 0x00FF; | ||||||
|     LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id); |     LOG_WARNING(Service_NVDRV, "cleared event wait on, event_id: {:X}", event_id); | ||||||
|  | 
 | ||||||
|     if (event_id >= MaxNvEvents) { |     if (event_id >= MaxNvEvents) { | ||||||
|         return NvResult::BadParameter; |         return NvResult::BadParameter; | ||||||
|     } |     } | ||||||
|     if (events_interface.status[event_id] == EventState::Waiting) { |     if (events_interface.status[event_id] == EventState::Waiting) { | ||||||
|         events_interface.LiberateEvent(event_id); |         events_interface.LiberateEvent(event_id); | ||||||
|         events_interface.events[event_id].writable->Signal(); |  | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     syncpoint_manager.RefreshSyncpoint(events_interface.events[event_id].fence.id); | ||||||
|  | 
 | ||||||
|     return NvResult::Success; |     return NvResult::Success; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -14,7 +14,8 @@ namespace Service::Nvidia::Devices { | ||||||
| 
 | 
 | ||||||
| class nvhost_ctrl final : public nvdevice { | class nvhost_ctrl final : public nvdevice { | ||||||
| public: | public: | ||||||
|     explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface); |     explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface, | ||||||
|  |                          SyncpointManager& syncpoint_manager); | ||||||
|     ~nvhost_ctrl() override; |     ~nvhost_ctrl() override; | ||||||
| 
 | 
 | ||||||
|     u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, |     u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | ||||||
|  | @ -145,6 +146,7 @@ private: | ||||||
|     u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); |     u32 IocCtrlClearEventWait(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
| 
 | 
 | ||||||
|     EventInterface& events_interface; |     EventInterface& events_interface; | ||||||
|  |     SyncpointManager& syncpoint_manager; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Service::Nvidia::Devices
 | } // namespace Service::Nvidia::Devices
 | ||||||
|  |  | ||||||
|  | @ -7,14 +7,20 @@ | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | ||||||
|  | #include "core/hle/service/nvdrv/syncpoint_manager.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Service::Nvidia::Devices { | namespace Service::Nvidia::Devices { | ||||||
| 
 | 
 | ||||||
| nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev) | nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, | ||||||
|     : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {} |                        SyncpointManager& syncpoint_manager) | ||||||
|  |     : nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager{syncpoint_manager} { | ||||||
|  |     channel_fence.id = syncpoint_manager.AllocateSyncpoint(); | ||||||
|  |     channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| nvhost_gpu::~nvhost_gpu() = default; | nvhost_gpu::~nvhost_gpu() = default; | ||||||
| 
 | 
 | ||||||
| u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | ||||||
|  | @ -126,10 +132,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou | ||||||
|                 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, |                 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, | ||||||
|                 params.unk3); |                 params.unk3); | ||||||
| 
 | 
 | ||||||
|     auto& gpu = system.GPU(); |     channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id); | ||||||
|     params.fence_out.id = assigned_syncpoints; | 
 | ||||||
|     params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints); |     params.fence_out = channel_fence; | ||||||
|     assigned_syncpoints++; | 
 | ||||||
|     std::memcpy(output.data(), ¶ms, output.size()); |     std::memcpy(output.data(), ¶ms, output.size()); | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
|  | @ -145,37 +151,98 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector< | ||||||
|     return 0; |     return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) { | ||||||
|  |     return { | ||||||
|  |         Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, | ||||||
|  |                                   Tegra::SubmissionMode::Increasing), | ||||||
|  |         {fence.value}, | ||||||
|  |         Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | ||||||
|  |                                   Tegra::SubmissionMode::Increasing), | ||||||
|  |         Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id), | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, u32 add_increment) { | ||||||
|  |     std::vector<Tegra::CommandHeader> result{ | ||||||
|  |         Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceValue, 1, | ||||||
|  |                                   Tegra::SubmissionMode::Increasing), | ||||||
|  |         {}}; | ||||||
|  | 
 | ||||||
|  |     for (u32 count = 0; count < add_increment; ++count) { | ||||||
|  |         result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | ||||||
|  |                                                       Tegra::SubmissionMode::Increasing)); | ||||||
|  |         result.emplace_back( | ||||||
|  |             Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id)); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(Fence fence, | ||||||
|  |                                                                           u32 add_increment) { | ||||||
|  |     std::vector<Tegra::CommandHeader> result{ | ||||||
|  |         Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForInterrupt, 1, | ||||||
|  |                                   Tegra::SubmissionMode::Increasing), | ||||||
|  |         {}}; | ||||||
|  |     const std::vector<Tegra::CommandHeader> increment{ | ||||||
|  |         BuildIncrementCommandList(fence, add_increment)}; | ||||||
|  | 
 | ||||||
|  |     result.insert(result.end(), increment.begin(), increment.end()); | ||||||
|  | 
 | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | u32 nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, | ||||||
|  |                                  Tegra::CommandList&& entries) { | ||||||
|  |     LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, | ||||||
|  |               params.num_entries, params.flags.raw); | ||||||
|  | 
 | ||||||
|  |     auto& gpu = system.GPU(); | ||||||
|  | 
 | ||||||
|  |     params.fence_out.id = channel_fence.id; | ||||||
|  | 
 | ||||||
|  |     if (params.flags.add_wait.Value() && | ||||||
|  |         !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) { | ||||||
|  |         gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)}); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     if (params.flags.add_increment.Value() || params.flags.increment.Value()) { | ||||||
|  |         const u32 increment_value = params.flags.increment.Value() ? params.fence_out.value : 0; | ||||||
|  |         params.fence_out.value = syncpoint_manager.IncreaseSyncpoint( | ||||||
|  |             params.fence_out.id, params.AddIncrementValue() + increment_value); | ||||||
|  |     } else { | ||||||
|  |         params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     entries.RefreshIntegrityChecks(gpu); | ||||||
|  |     gpu.PushGPUEntries(std::move(entries)); | ||||||
|  | 
 | ||||||
|  |     if (params.flags.add_increment.Value()) { | ||||||
|  |         if (params.flags.suppress_wfi) { | ||||||
|  |             gpu.PushGPUEntries(Tegra::CommandList{ | ||||||
|  |                 BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())}); | ||||||
|  |         } else { | ||||||
|  |             gpu.PushGPUEntries(Tegra::CommandList{ | ||||||
|  |                 BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())}); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { | u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { | ||||||
|     if (input.size() < sizeof(IoctlSubmitGpfifo)) { |     if (input.size() < sizeof(IoctlSubmitGpfifo)) { | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|     } |     } | ||||||
|     IoctlSubmitGpfifo params{}; |     IoctlSubmitGpfifo params{}; | ||||||
|     std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |     std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | ||||||
|     LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, |  | ||||||
|               params.num_entries, params.flags.raw); |  | ||||||
| 
 |  | ||||||
|     ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + |  | ||||||
|                                    params.num_entries * sizeof(Tegra::CommandListHeader), |  | ||||||
|                "Incorrect input size"); |  | ||||||
| 
 | 
 | ||||||
|     Tegra::CommandList entries(params.num_entries); |     Tegra::CommandList entries(params.num_entries); | ||||||
|     std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], |     std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)], | ||||||
|                 params.num_entries * sizeof(Tegra::CommandListHeader)); |                 params.num_entries * sizeof(Tegra::CommandListHeader)); | ||||||
| 
 | 
 | ||||||
|     UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); |     return SubmitGPFIFOImpl(params, output, std::move(entries)); | ||||||
|     UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); |  | ||||||
| 
 |  | ||||||
|     auto& gpu = system.GPU(); |  | ||||||
|     u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); |  | ||||||
|     if (params.flags.increment.Value()) { |  | ||||||
|         params.fence_out.value += current_syncpoint_value; |  | ||||||
|     } else { |  | ||||||
|         params.fence_out.value = current_syncpoint_value; |  | ||||||
|     } |  | ||||||
|     gpu.PushGPUEntries(std::move(entries)); |  | ||||||
| 
 |  | ||||||
|     std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); |  | ||||||
|     return 0; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | ||||||
|  | @ -185,31 +252,17 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | ||||||
|     } |     } | ||||||
|     IoctlSubmitGpfifo params{}; |     IoctlSubmitGpfifo params{}; | ||||||
|     std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); |     std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); | ||||||
|     LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, |  | ||||||
|               params.num_entries, params.flags.raw); |  | ||||||
| 
 | 
 | ||||||
|     Tegra::CommandList entries(params.num_entries); |     Tegra::CommandList entries(params.num_entries); | ||||||
|     if (version == IoctlVersion::Version2) { |     if (version == IoctlVersion::Version2) { | ||||||
|         std::memcpy(entries.data(), input2.data(), |         std::memcpy(entries.command_lists.data(), input2.data(), | ||||||
|                     params.num_entries * sizeof(Tegra::CommandListHeader)); |                     params.num_entries * sizeof(Tegra::CommandListHeader)); | ||||||
|     } else { |     } else { | ||||||
|         system.Memory().ReadBlock(params.address, entries.data(), |         system.Memory().ReadBlock(params.address, entries.command_lists.data(), | ||||||
|                                   params.num_entries * sizeof(Tegra::CommandListHeader)); |                                   params.num_entries * sizeof(Tegra::CommandListHeader)); | ||||||
|     } |     } | ||||||
|     UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); |  | ||||||
|     UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); |  | ||||||
| 
 | 
 | ||||||
|     auto& gpu = system.GPU(); |     return SubmitGPFIFOImpl(params, output, std::move(entries)); | ||||||
|     u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); |  | ||||||
|     if (params.flags.increment.Value()) { |  | ||||||
|         params.fence_out.value += current_syncpoint_value; |  | ||||||
|     } else { |  | ||||||
|         params.fence_out.value = current_syncpoint_value; |  | ||||||
|     } |  | ||||||
|     gpu.PushGPUEntries(std::move(entries)); |  | ||||||
| 
 |  | ||||||
|     std::memcpy(output.data(), ¶ms, output.size()); |  | ||||||
|     return 0; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { | u32 nvhost_gpu::GetWaitbase(const std::vector<u8>& input, std::vector<u8>& output) { | ||||||
|  |  | ||||||
|  | @ -11,6 +11,11 @@ | ||||||
| #include "common/swap.h" | #include "common/swap.h" | ||||||
| #include "core/hle/service/nvdrv/devices/nvdevice.h" | #include "core/hle/service/nvdrv/devices/nvdevice.h" | ||||||
| #include "core/hle/service/nvdrv/nvdata.h" | #include "core/hle/service/nvdrv/nvdata.h" | ||||||
|  | #include "video_core/dma_pusher.h" | ||||||
|  | 
 | ||||||
|  | namespace Service::Nvidia { | ||||||
|  | class SyncpointManager; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| namespace Service::Nvidia::Devices { | namespace Service::Nvidia::Devices { | ||||||
| 
 | 
 | ||||||
|  | @ -21,7 +26,8 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b); | ||||||
| 
 | 
 | ||||||
| class nvhost_gpu final : public nvdevice { | class nvhost_gpu final : public nvdevice { | ||||||
| public: | public: | ||||||
|     explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev); |     explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev, | ||||||
|  |                         SyncpointManager& syncpoint_manager); | ||||||
|     ~nvhost_gpu() override; |     ~nvhost_gpu() override; | ||||||
| 
 | 
 | ||||||
|     u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, |     u32 ioctl(Ioctl command, const std::vector<u8>& input, const std::vector<u8>& input2, | ||||||
|  | @ -162,10 +168,15 @@ private: | ||||||
|             u32_le raw; |             u32_le raw; | ||||||
|             BitField<0, 1, u32_le> add_wait;      // append a wait sync_point to the list
 |             BitField<0, 1, u32_le> add_wait;      // append a wait sync_point to the list
 | ||||||
|             BitField<1, 1, u32_le> add_increment; // append an increment to the list
 |             BitField<1, 1, u32_le> add_increment; // append an increment to the list
 | ||||||
|             BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
 |             BitField<2, 1, u32_le> new_hw_format; // mostly ignored
 | ||||||
|  |             BitField<4, 1, u32_le> suppress_wfi;  // suppress wait for interrupt
 | ||||||
|             BitField<8, 1, u32_le> increment;     // increment the returned fence
 |             BitField<8, 1, u32_le> increment;     // increment the returned fence
 | ||||||
|         } flags; |         } flags; | ||||||
|         Fence fence_out; // returned new fence object for others to wait on
 |         Fence fence_out; // returned new fence object for others to wait on
 | ||||||
|  | 
 | ||||||
|  |         u32 AddIncrementValue() const { | ||||||
|  |             return flags.add_increment.Value() << 1; | ||||||
|  |         } | ||||||
|     }; |     }; | ||||||
|     static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), |     static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), | ||||||
|                   "IoctlSubmitGpfifo is incorrect size"); |                   "IoctlSubmitGpfifo is incorrect size"); | ||||||
|  | @ -190,6 +201,8 @@ private: | ||||||
|     u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output); |     u32 SetChannelPriority(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
|     u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output); |     u32 AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
|     u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output); |     u32 AllocateObjectContext(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
|  |     u32 SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, | ||||||
|  |                          Tegra::CommandList&& entries); | ||||||
|     u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output); |     u32 SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
|     u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, |     u32 KickoffPB(const std::vector<u8>& input, std::vector<u8>& output, | ||||||
|                   const std::vector<u8>& input2, IoctlVersion version); |                   const std::vector<u8>& input2, IoctlVersion version); | ||||||
|  | @ -198,7 +211,8 @@ private: | ||||||
|     u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); |     u32 ChannelSetTimeslice(const std::vector<u8>& input, std::vector<u8>& output); | ||||||
| 
 | 
 | ||||||
|     std::shared_ptr<nvmap> nvmap_dev; |     std::shared_ptr<nvmap> nvmap_dev; | ||||||
|     u32 assigned_syncpoints{}; |     SyncpointManager& syncpoint_manager; | ||||||
|  |     Fence channel_fence; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Service::Nvidia::Devices
 | } // namespace Service::Nvidia::Devices
 | ||||||
|  |  | ||||||
|  | @ -21,6 +21,7 @@ | ||||||
| #include "core/hle/service/nvdrv/interface.h" | #include "core/hle/service/nvdrv/interface.h" | ||||||
| #include "core/hle/service/nvdrv/nvdrv.h" | #include "core/hle/service/nvdrv/nvdrv.h" | ||||||
| #include "core/hle/service/nvdrv/nvmemp.h" | #include "core/hle/service/nvdrv/nvmemp.h" | ||||||
|  | #include "core/hle/service/nvdrv/syncpoint_manager.h" | ||||||
| #include "core/hle/service/nvflinger/nvflinger.h" | #include "core/hle/service/nvflinger/nvflinger.h" | ||||||
| 
 | 
 | ||||||
| namespace Service::Nvidia { | namespace Service::Nvidia { | ||||||
|  | @ -36,21 +37,23 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger | ||||||
|     nvflinger.SetNVDrvInstance(module_); |     nvflinger.SetNVDrvInstance(module_); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Module::Module(Core::System& system) { | Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} { | ||||||
|     auto& kernel = system.Kernel(); |     auto& kernel = system.Kernel(); | ||||||
|     for (u32 i = 0; i < MaxNvEvents; i++) { |     for (u32 i = 0; i < MaxNvEvents; i++) { | ||||||
|         std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); |         std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); | ||||||
|         events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(kernel, event_label); |         events_interface.events[i] = {Kernel::WritableEvent::CreateEventPair(kernel, event_label)}; | ||||||
|         events_interface.status[i] = EventState::Free; |         events_interface.status[i] = EventState::Free; | ||||||
|         events_interface.registered[i] = false; |         events_interface.registered[i] = false; | ||||||
|     } |     } | ||||||
|     auto nvmap_dev = std::make_shared<Devices::nvmap>(system); |     auto nvmap_dev = std::make_shared<Devices::nvmap>(system); | ||||||
|     devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); |     devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev); | ||||||
|     devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev); |     devices["/dev/nvhost-gpu"] = | ||||||
|  |         std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev, syncpoint_manager); | ||||||
|     devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); |     devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system); | ||||||
|     devices["/dev/nvmap"] = nvmap_dev; |     devices["/dev/nvmap"] = nvmap_dev; | ||||||
|     devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); |     devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev); | ||||||
|     devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface); |     devices["/dev/nvhost-ctrl"] = | ||||||
|  |         std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager); | ||||||
|     devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); |     devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev); | ||||||
|     devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); |     devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system); | ||||||
|     devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); |     devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev); | ||||||
|  | @ -95,17 +98,17 @@ void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) { | ||||||
|         if (events_interface.assigned_syncpt[i] == syncpoint_id && |         if (events_interface.assigned_syncpt[i] == syncpoint_id && | ||||||
|             events_interface.assigned_value[i] == value) { |             events_interface.assigned_value[i] == value) { | ||||||
|             events_interface.LiberateEvent(i); |             events_interface.LiberateEvent(i); | ||||||
|             events_interface.events[i].writable->Signal(); |             events_interface.events[i].event.writable->Signal(); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const { | std::shared_ptr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const { | ||||||
|     return events_interface.events[event_id].readable; |     return events_interface.events[event_id].event.readable; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const { | std::shared_ptr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const { | ||||||
|     return events_interface.events[event_id].writable; |     return events_interface.events[event_id].event.writable; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace Service::Nvidia
 | } // namespace Service::Nvidia
 | ||||||
|  |  | ||||||
|  | @ -10,6 +10,7 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "core/hle/kernel/writable_event.h" | #include "core/hle/kernel/writable_event.h" | ||||||
| #include "core/hle/service/nvdrv/nvdata.h" | #include "core/hle/service/nvdrv/nvdata.h" | ||||||
|  | #include "core/hle/service/nvdrv/syncpoint_manager.h" | ||||||
| #include "core/hle/service/service.h" | #include "core/hle/service/service.h" | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
|  | @ -22,15 +23,23 @@ class NVFlinger; | ||||||
| 
 | 
 | ||||||
| namespace Service::Nvidia { | namespace Service::Nvidia { | ||||||
| 
 | 
 | ||||||
|  | class SyncpointManager; | ||||||
|  | 
 | ||||||
| namespace Devices { | namespace Devices { | ||||||
| class nvdevice; | class nvdevice; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /// Represents an Nvidia event
 | ||||||
|  | struct NvEvent { | ||||||
|  |     Kernel::EventPair event; | ||||||
|  |     Fence fence{}; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| struct EventInterface { | struct EventInterface { | ||||||
|     // Mask representing currently busy events
 |     // Mask representing currently busy events
 | ||||||
|     u64 events_mask{}; |     u64 events_mask{}; | ||||||
|     // Each kernel event associated to an NV event
 |     // Each kernel event associated to an NV event
 | ||||||
|     std::array<Kernel::EventPair, MaxNvEvents> events; |     std::array<NvEvent, MaxNvEvents> events; | ||||||
|     // The status of the current NVEvent
 |     // The status of the current NVEvent
 | ||||||
|     std::array<EventState, MaxNvEvents> status{}; |     std::array<EventState, MaxNvEvents> status{}; | ||||||
|     // Tells if an NVEvent is registered or not
 |     // Tells if an NVEvent is registered or not
 | ||||||
|  | @ -119,6 +128,9 @@ public: | ||||||
|     std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const; |     std::shared_ptr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|  |     /// Manages syncpoints on the host
 | ||||||
|  |     SyncpointManager syncpoint_manager; | ||||||
|  | 
 | ||||||
|     /// Id to use for the next open file descriptor.
 |     /// Id to use for the next open file descriptor.
 | ||||||
|     u32 next_fd = 1; |     u32 next_fd = 1; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										39
									
								
								src/core/hle/service/nvdrv/syncpoint_manager.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								src/core/hle/service/nvdrv/syncpoint_manager.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,39 @@ | ||||||
|  | // Copyright 2020 yuzu emulator team
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #include "common/assert.h" | ||||||
|  | #include "core/hle/service/nvdrv/syncpoint_manager.h" | ||||||
|  | #include "video_core/gpu.h" | ||||||
|  | 
 | ||||||
|  | namespace Service::Nvidia { | ||||||
|  | 
 | ||||||
|  | SyncpointManager::SyncpointManager(Tegra::GPU& gpu) : gpu{gpu} {} | ||||||
|  | 
 | ||||||
|  | SyncpointManager::~SyncpointManager() = default; | ||||||
|  | 
 | ||||||
|  | u32 SyncpointManager::RefreshSyncpoint(u32 syncpoint_id) { | ||||||
|  |     syncpoints[syncpoint_id].min = gpu.GetSyncpointValue(syncpoint_id); | ||||||
|  |     return GetSyncpointMin(syncpoint_id); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | u32 SyncpointManager::AllocateSyncpoint() { | ||||||
|  |     for (u32 syncpoint_id = 1; syncpoint_id < MaxSyncPoints; syncpoint_id++) { | ||||||
|  |         if (!syncpoints[syncpoint_id].is_allocated) { | ||||||
|  |             syncpoints[syncpoint_id].is_allocated = true; | ||||||
|  |             return syncpoint_id; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     UNREACHABLE_MSG("No more available syncpoints!"); | ||||||
|  |     return {}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | u32 SyncpointManager::IncreaseSyncpoint(u32 syncpoint_id, u32 value) { | ||||||
|  |     for (u32 index = 0; index < value; ++index) { | ||||||
|  |         syncpoints[syncpoint_id].max.fetch_add(1, std::memory_order_relaxed); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return GetSyncpointMax(syncpoint_id); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // namespace Service::Nvidia
 | ||||||
							
								
								
									
										85
									
								
								src/core/hle/service/nvdrv/syncpoint_manager.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								src/core/hle/service/nvdrv/syncpoint_manager.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,85 @@ | ||||||
|  | // Copyright 2020 yuzu emulator team
 | ||||||
|  | // Licensed under GPLv2 or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <array> | ||||||
|  | #include <atomic> | ||||||
|  | 
 | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "core/hle/service/nvdrv/nvdata.h" | ||||||
|  | 
 | ||||||
|  | namespace Tegra { | ||||||
|  | class GPU; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | namespace Service::Nvidia { | ||||||
|  | 
 | ||||||
|  | class SyncpointManager final { | ||||||
|  | public: | ||||||
|  |     explicit SyncpointManager(Tegra::GPU& gpu); | ||||||
|  |     ~SyncpointManager(); | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Returns true if the specified syncpoint is expired for the given value. | ||||||
|  |      * @param syncpoint_id Syncpoint ID to check. | ||||||
|  |      * @param value Value to check against the specified syncpoint. | ||||||
|  |      * @returns True if the specified syncpoint is expired for the given value, otherwise False. | ||||||
|  |      */ | ||||||
|  |     bool IsSyncpointExpired(u32 syncpoint_id, u32 value) const { | ||||||
|  |         return (GetSyncpointMax(syncpoint_id) - value) >= (GetSyncpointMin(syncpoint_id) - value); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Gets the lower bound for the specified syncpoint. | ||||||
|  |      * @param syncpoint_id Syncpoint ID to get the lower bound for. | ||||||
|  |      * @returns The lower bound for the specified syncpoint. | ||||||
|  |      */ | ||||||
|  |     u32 GetSyncpointMin(u32 syncpoint_id) const { | ||||||
|  |         return syncpoints[syncpoint_id].min.load(std::memory_order_relaxed); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Gets the uper bound for the specified syncpoint. | ||||||
|  |      * @param syncpoint_id Syncpoint ID to get the upper bound for. | ||||||
|  |      * @returns The upper bound for the specified syncpoint. | ||||||
|  |      */ | ||||||
|  |     u32 GetSyncpointMax(u32 syncpoint_id) const { | ||||||
|  |         return syncpoints[syncpoint_id].max.load(std::memory_order_relaxed); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Refreshes the minimum value for the specified syncpoint. | ||||||
|  |      * @param syncpoint_id Syncpoint ID to be refreshed. | ||||||
|  |      * @returns The new syncpoint minimum value. | ||||||
|  |      */ | ||||||
|  |     u32 RefreshSyncpoint(u32 syncpoint_id); | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Allocates a new syncoint. | ||||||
|  |      * @returns The syncpoint ID for the newly allocated syncpoint. | ||||||
|  |      */ | ||||||
|  |     u32 AllocateSyncpoint(); | ||||||
|  | 
 | ||||||
|  |     /**
 | ||||||
|  |      * Increases the maximum value for the specified syncpoint. | ||||||
|  |      * @param syncpoint_id Syncpoint ID to be increased. | ||||||
|  |      * @param value Value to increase the specified syncpoint by. | ||||||
|  |      * @returns The new syncpoint maximum value. | ||||||
|  |      */ | ||||||
|  |     u32 IncreaseSyncpoint(u32 syncpoint_id, u32 value); | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     struct Syncpoint { | ||||||
|  |         std::atomic<u32> min; | ||||||
|  |         std::atomic<u32> max; | ||||||
|  |         std::atomic<bool> is_allocated; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     std::array<Syncpoint, MaxSyncPoints> syncpoints{}; | ||||||
|  | 
 | ||||||
|  |     Tegra::GPU& gpu; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace Service::Nvidia
 | ||||||
|  | @ -242,6 +242,10 @@ void NVFlinger::Compose() { | ||||||
| 
 | 
 | ||||||
|         const auto& igbp_buffer = buffer->get().igbp_buffer; |         const auto& igbp_buffer = buffer->get().igbp_buffer; | ||||||
| 
 | 
 | ||||||
|  |         if (!system.IsPoweredOn()) { | ||||||
|  |             return; // We are likely shutting down
 | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         auto& gpu = system.GPU(); |         auto& gpu = system.GPU(); | ||||||
|         const auto& multi_fence = buffer->get().multi_fence; |         const auto& multi_fence = buffer->get().multi_fence; | ||||||
|         guard->unlock(); |         guard->unlock(); | ||||||
|  |  | ||||||
|  | @ -2,6 +2,7 @@ | ||||||
| // Licensed under GPLv2 or any later version
 | // Licensed under GPLv2 or any later version
 | ||||||
| // Refer to the license.txt file included.
 | // Refer to the license.txt file included.
 | ||||||
| 
 | 
 | ||||||
|  | #include "common/cityhash.h" | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
|  | @ -12,6 +13,20 @@ | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| 
 | 
 | ||||||
|  | void CommandList::RefreshIntegrityChecks(GPU& gpu) { | ||||||
|  |     command_list_hashes.resize(command_lists.size()); | ||||||
|  | 
 | ||||||
|  |     for (std::size_t index = 0; index < command_lists.size(); ++index) { | ||||||
|  |         const CommandListHeader command_list_header = command_lists[index]; | ||||||
|  |         std::vector<CommandHeader> command_headers(command_list_header.size); | ||||||
|  |         gpu.MemoryManager().ReadBlockUnsafe(command_list_header.addr, command_headers.data(), | ||||||
|  |                                             command_list_header.size * sizeof(u32)); | ||||||
|  |         command_list_hashes[index] = | ||||||
|  |             Common::CityHash64(reinterpret_cast<char*>(command_headers.data()), | ||||||
|  |                                command_list_header.size * sizeof(u32)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} | DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} | ||||||
| 
 | 
 | ||||||
| DmaPusher::~DmaPusher() = default; | DmaPusher::~DmaPusher() = default; | ||||||
|  | @ -45,32 +60,51 @@ bool DmaPusher::Step() { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const CommandList& command_list{dma_pushbuffer.front()}; |     CommandList& command_list{dma_pushbuffer.front()}; | ||||||
|     ASSERT_OR_EXECUTE(!command_list.empty(), { | 
 | ||||||
|         // Somehow the command_list is empty, in order to avoid a crash
 |     ASSERT_OR_EXECUTE( | ||||||
|         // We ignore it and assume its size is 0.
 |         command_list.command_lists.size() || command_list.prefetch_command_list.size(), { | ||||||
|  |             // Somehow the command_list is empty, in order to avoid a crash
 | ||||||
|  |             // We ignore it and assume its size is 0.
 | ||||||
|  |             dma_pushbuffer.pop(); | ||||||
|  |             dma_pushbuffer_subindex = 0; | ||||||
|  |             return true; | ||||||
|  |         }); | ||||||
|  | 
 | ||||||
|  |     if (command_list.prefetch_command_list.size()) { | ||||||
|  |         // Prefetched command list from nvdrv, used for things like synchronization
 | ||||||
|  |         command_headers = std::move(command_list.prefetch_command_list); | ||||||
|         dma_pushbuffer.pop(); |         dma_pushbuffer.pop(); | ||||||
|         dma_pushbuffer_subindex = 0; |     } else { | ||||||
|         return true; |         const CommandListHeader command_list_header{ | ||||||
|     }); |             command_list.command_lists[dma_pushbuffer_subindex]}; | ||||||
|     const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; |         const u64 next_hash = command_list.command_list_hashes[dma_pushbuffer_subindex++]; | ||||||
|     const GPUVAddr dma_get = command_list_header.addr; |         const GPUVAddr dma_get = command_list_header.addr; | ||||||
| 
 | 
 | ||||||
|     if (dma_pushbuffer_subindex >= command_list.size()) { |         if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { | ||||||
|         // We've gone through the current list, remove it from the queue
 |             // We've gone through the current list, remove it from the queue
 | ||||||
|         dma_pushbuffer.pop(); |             dma_pushbuffer.pop(); | ||||||
|         dma_pushbuffer_subindex = 0; |             dma_pushbuffer_subindex = 0; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (command_list_header.size == 0) { | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Push buffer non-empty, read a word
 | ||||||
|  |         command_headers.resize(command_list_header.size); | ||||||
|  |         gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), | ||||||
|  |                                             command_list_header.size * sizeof(u32)); | ||||||
|  | 
 | ||||||
|  |         // Integrity check
 | ||||||
|  |         const u64 new_hash = Common::CityHash64(reinterpret_cast<char*>(command_headers.data()), | ||||||
|  |                                                 command_list_header.size * sizeof(u32)); | ||||||
|  |         if (new_hash != next_hash) { | ||||||
|  |             LOG_CRITICAL(HW_GPU, "CommandList at addr=0x{:X} is corrupt, skipping!", dma_get); | ||||||
|  |             dma_pushbuffer.pop(); | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
|     if (command_list_header.size == 0) { |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Push buffer non-empty, read a word
 |  | ||||||
|     command_headers.resize(command_list_header.size); |  | ||||||
|     gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), |  | ||||||
|                                         command_list_header.size * sizeof(u32)); |  | ||||||
| 
 |  | ||||||
|     for (std::size_t index = 0; index < command_headers.size();) { |     for (std::size_t index = 0; index < command_headers.size();) { | ||||||
|         const CommandHeader& command_header = command_headers[index]; |         const CommandHeader& command_header = command_headers[index]; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -27,6 +27,31 @@ enum class SubmissionMode : u32 { | ||||||
|     IncreaseOnce = 5 |     IncreaseOnce = 5 | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 | ||||||
|  | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
 | ||||||
|  | // So the values you see in docs might be multiplied by 4.
 | ||||||
|  | enum class BufferMethods : u32 { | ||||||
|  |     BindObject = 0x0, | ||||||
|  |     Nop = 0x2, | ||||||
|  |     SemaphoreAddressHigh = 0x4, | ||||||
|  |     SemaphoreAddressLow = 0x5, | ||||||
|  |     SemaphoreSequence = 0x6, | ||||||
|  |     SemaphoreTrigger = 0x7, | ||||||
|  |     NotifyIntr = 0x8, | ||||||
|  |     WrcacheFlush = 0x9, | ||||||
|  |     Unk28 = 0xA, | ||||||
|  |     UnkCacheFlush = 0xB, | ||||||
|  |     RefCnt = 0x14, | ||||||
|  |     SemaphoreAcquire = 0x1A, | ||||||
|  |     SemaphoreRelease = 0x1B, | ||||||
|  |     FenceValue = 0x1C, | ||||||
|  |     FenceAction = 0x1D, | ||||||
|  |     WaitForInterrupt = 0x1E, | ||||||
|  |     Unk7c = 0x1F, | ||||||
|  |     Yield = 0x20, | ||||||
|  |     NonPullerMethods = 0x40, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| struct CommandListHeader { | struct CommandListHeader { | ||||||
|     union { |     union { | ||||||
|         u64 raw; |         u64 raw; | ||||||
|  | @ -49,9 +74,29 @@ union CommandHeader { | ||||||
| static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); | static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); | ||||||
| static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); | static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); | ||||||
| 
 | 
 | ||||||
|  | static constexpr CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, | ||||||
|  |                                                   SubmissionMode mode) { | ||||||
|  |     CommandHeader result{}; | ||||||
|  |     result.method.Assign(static_cast<u32>(method)); | ||||||
|  |     result.arg_count.Assign(arg_count); | ||||||
|  |     result.mode.Assign(mode); | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| class GPU; | class GPU; | ||||||
| 
 | 
 | ||||||
| using CommandList = std::vector<Tegra::CommandListHeader>; | struct CommandList final { | ||||||
|  |     CommandList() = default; | ||||||
|  |     explicit CommandList(std::size_t size) : command_lists(size) {} | ||||||
|  |     explicit CommandList(std::vector<Tegra::CommandHeader>&& prefetch_command_list) | ||||||
|  |         : prefetch_command_list{std::move(prefetch_command_list)} {} | ||||||
|  | 
 | ||||||
|  |     void RefreshIntegrityChecks(GPU& gpu); | ||||||
|  | 
 | ||||||
|  |     std::vector<Tegra::CommandListHeader> command_lists; | ||||||
|  |     std::vector<u64> command_list_hashes; | ||||||
|  |     std::vector<Tegra::CommandHeader> prefetch_command_list; | ||||||
|  | }; | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the |  * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the | ||||||
|  | @ -60,7 +105,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>; | ||||||
|  * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
 |  * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
 | ||||||
|  * details on this implementation. |  * details on this implementation. | ||||||
|  */ |  */ | ||||||
| class DmaPusher { | class DmaPusher final { | ||||||
| public: | public: | ||||||
|     explicit DmaPusher(Core::System& system, GPU& gpu); |     explicit DmaPusher(Core::System& system, GPU& gpu); | ||||||
|     ~DmaPusher(); |     ~DmaPusher(); | ||||||
|  |  | ||||||
|  | @ -194,30 +194,6 @@ void GPU::SyncGuestHost() { | ||||||
| void GPU::OnCommandListEnd() { | void GPU::OnCommandListEnd() { | ||||||
|     renderer->Rasterizer().ReleaseFences(); |     renderer->Rasterizer().ReleaseFences(); | ||||||
| } | } | ||||||
| // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
 |  | ||||||
| // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
 |  | ||||||
| // So the values you see in docs might be multiplied by 4.
 |  | ||||||
| enum class BufferMethods { |  | ||||||
|     BindObject = 0x0, |  | ||||||
|     Nop = 0x2, |  | ||||||
|     SemaphoreAddressHigh = 0x4, |  | ||||||
|     SemaphoreAddressLow = 0x5, |  | ||||||
|     SemaphoreSequence = 0x6, |  | ||||||
|     SemaphoreTrigger = 0x7, |  | ||||||
|     NotifyIntr = 0x8, |  | ||||||
|     WrcacheFlush = 0x9, |  | ||||||
|     Unk28 = 0xA, |  | ||||||
|     UnkCacheFlush = 0xB, |  | ||||||
|     RefCnt = 0x14, |  | ||||||
|     SemaphoreAcquire = 0x1A, |  | ||||||
|     SemaphoreRelease = 0x1B, |  | ||||||
|     FenceValue = 0x1C, |  | ||||||
|     FenceAction = 0x1D, |  | ||||||
|     Unk78 = 0x1E, |  | ||||||
|     Unk7c = 0x1F, |  | ||||||
|     Yield = 0x20, |  | ||||||
|     NonPullerMethods = 0x40, |  | ||||||
| }; |  | ||||||
| 
 | 
 | ||||||
| enum class GpuSemaphoreOperation { | enum class GpuSemaphoreOperation { | ||||||
|     AcquireEqual = 0x1, |     AcquireEqual = 0x1, | ||||||
|  | @ -277,7 +253,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | ||||||
|     case BufferMethods::UnkCacheFlush: |     case BufferMethods::UnkCacheFlush: | ||||||
|     case BufferMethods::WrcacheFlush: |     case BufferMethods::WrcacheFlush: | ||||||
|     case BufferMethods::FenceValue: |     case BufferMethods::FenceValue: | ||||||
|  |         break; | ||||||
|     case BufferMethods::FenceAction: |     case BufferMethods::FenceAction: | ||||||
|  |         ProcessFenceActionMethod(); | ||||||
|  |         break; | ||||||
|  |     case BufferMethods::WaitForInterrupt: | ||||||
|  |         ProcessWaitForInterruptMethod(); | ||||||
|         break; |         break; | ||||||
|     case BufferMethods::SemaphoreTrigger: { |     case BufferMethods::SemaphoreTrigger: { | ||||||
|         ProcessSemaphoreTriggerMethod(); |         ProcessSemaphoreTriggerMethod(); | ||||||
|  | @ -391,6 +372,25 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void GPU::ProcessFenceActionMethod() { | ||||||
|  |     switch (regs.fence_action.op) { | ||||||
|  |     case FenceOperation::Acquire: | ||||||
|  |         WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | ||||||
|  |         break; | ||||||
|  |     case FenceOperation::Increment: | ||||||
|  |         IncrementSyncPoint(regs.fence_action.syncpoint_id); | ||||||
|  |         break; | ||||||
|  |     default: | ||||||
|  |         UNIMPLEMENTED_MSG("Unimplemented operation {}", | ||||||
|  |                           static_cast<u32>(regs.fence_action.op.Value())); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void GPU::ProcessWaitForInterruptMethod() { | ||||||
|  |     // TODO(bunnei) ImplementMe
 | ||||||
|  |     LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void GPU::ProcessSemaphoreTriggerMethod() { | void GPU::ProcessSemaphoreTriggerMethod() { | ||||||
|     const auto semaphoreOperationMask = 0xF; |     const auto semaphoreOperationMask = 0xF; | ||||||
|     const auto op = |     const auto op = | ||||||
|  |  | ||||||
|  | @ -263,6 +263,24 @@ public: | ||||||
|         return use_nvdec; |         return use_nvdec; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     enum class FenceOperation : u32 { | ||||||
|  |         Acquire = 0, | ||||||
|  |         Increment = 1, | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     union FenceAction { | ||||||
|  |         u32 raw; | ||||||
|  |         BitField<0, 1, FenceOperation> op; | ||||||
|  |         BitField<8, 24, u32> syncpoint_id; | ||||||
|  | 
 | ||||||
|  |         static constexpr CommandHeader Build(FenceOperation op, u32 syncpoint_id) { | ||||||
|  |             FenceAction result{}; | ||||||
|  |             result.op.Assign(op); | ||||||
|  |             result.syncpoint_id.Assign(syncpoint_id); | ||||||
|  |             return {result.raw}; | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|     struct Regs { |     struct Regs { | ||||||
|         static constexpr size_t NUM_REGS = 0x40; |         static constexpr size_t NUM_REGS = 0x40; | ||||||
| 
 | 
 | ||||||
|  | @ -291,10 +309,7 @@ public: | ||||||
|                 u32 semaphore_acquire; |                 u32 semaphore_acquire; | ||||||
|                 u32 semaphore_release; |                 u32 semaphore_release; | ||||||
|                 u32 fence_value; |                 u32 fence_value; | ||||||
|                 union { |                 FenceAction fence_action; | ||||||
|                     BitField<4, 4, u32> operation; |  | ||||||
|                     BitField<8, 8, u32> id; |  | ||||||
|                 } fence_action; |  | ||||||
|                 INSERT_UNION_PADDING_WORDS(0xE2); |                 INSERT_UNION_PADDING_WORDS(0xE2); | ||||||
| 
 | 
 | ||||||
|                 // Puller state
 |                 // Puller state
 | ||||||
|  | @ -342,6 +357,8 @@ protected: | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     void ProcessBindMethod(const MethodCall& method_call); |     void ProcessBindMethod(const MethodCall& method_call); | ||||||
|  |     void ProcessFenceActionMethod(); | ||||||
|  |     void ProcessWaitForInterruptMethod(); | ||||||
|     void ProcessSemaphoreTriggerMethod(); |     void ProcessSemaphoreTriggerMethod(); | ||||||
|     void ProcessSemaphoreRelease(); |     void ProcessSemaphoreRelease(); | ||||||
|     void ProcessSemaphoreAcquire(); |     void ProcessSemaphoreAcquire(); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Chloe
						Chloe