forked from eden-emu/eden
		
	MacroHLE: Add MultidrawIndirect HLE Macro.
This commit is contained in:
		
							parent
							
								
									d6e0e7cfad
								
							
						
					
					
						commit
						f2f2784817
					
				
					 13 changed files with 169 additions and 47 deletions
				
			
		|  | @ -170,6 +170,9 @@ public: | |||
|     void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | ||||
|                                   bool is_written, bool is_image); | ||||
| 
 | ||||
|     [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||||
|                                                        bool synchronize, bool mark_as_written); | ||||
| 
 | ||||
|     void FlushCachedWrites(); | ||||
| 
 | ||||
|     /// Return true when there are uncommitted buffers to be downloaded
 | ||||
|  | @ -790,6 +793,25 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add | |||
|     compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||||
|                                                                  bool synchronize, | ||||
|                                                                  bool mark_as_written) { | ||||
|     const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||
|     if (!cpu_addr) { | ||||
|         return {&slot_buffers[NULL_BUFFER_ID], 0}; | ||||
|     } | ||||
|     const BufferId buffer_id = FindBuffer(*cpu_addr, size); | ||||
|     Buffer& buffer = slot_buffers[buffer_id]; | ||||
|     if (synchronize) { | ||||
|         SynchronizeBuffer(buffer, *cpu_addr, size); | ||||
|     } | ||||
|     if (mark_as_written) { | ||||
|         MarkWrittenBuffer(buffer_id, *cpu_addr, size); | ||||
|     } | ||||
|     return {&buffer, buffer.Offset(*cpu_addr)}; | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void BufferCache<P>::FlushCachedWrites() { | ||||
|     for (const BufferId buffer_id : cached_write_buffer_ids) { | ||||
|  |  | |||
|  | @ -61,7 +61,7 @@ bool DmaPusher::Step() { | |||
|     } else { | ||||
|         const CommandListHeader command_list_header{ | ||||
|             command_list.command_lists[dma_pushbuffer_subindex++]}; | ||||
|         const GPUVAddr dma_get = command_list_header.addr; | ||||
|         dma_state.dma_get = command_list_header.addr; | ||||
| 
 | ||||
|         if (dma_pushbuffer_subindex >= command_list.command_lists.size()) { | ||||
|             // We've gone through the current list, remove it from the queue
 | ||||
|  | @ -75,11 +75,11 @@ bool DmaPusher::Step() { | |||
| 
 | ||||
|         // Push buffer non-empty, read a word
 | ||||
|         command_headers.resize_destructive(command_list_header.size); | ||||
|         if (Settings::IsGPULevelHigh()) { | ||||
|             memory_manager.ReadBlock(dma_get, command_headers.data(), | ||||
|         if (Settings::IsGPULevelExtreme()) { | ||||
|             memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), | ||||
|                                      command_list_header.size * sizeof(u32)); | ||||
|         } else { | ||||
|             memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(), | ||||
|             memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), | ||||
|                                            command_list_header.size * sizeof(u32)); | ||||
|         } | ||||
|         ProcessCommands(command_headers); | ||||
|  | @ -174,8 +174,10 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { | |||
|         puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, | ||||
|                                dma_state.method_count); | ||||
|     } else { | ||||
|         subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start, | ||||
|                                                            num_methods, dma_state.method_count); | ||||
|         auto subchannel = subchannels[dma_state.subchannel]; | ||||
|         subchannel->current_dma_segment = dma_state.dma_get; | ||||
|         subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, | ||||
|                                     dma_state.method_count); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -156,6 +156,7 @@ private: | |||
|         u32 subchannel;        ///< Current subchannel
 | ||||
|         u32 method_count;      ///< Current method count
 | ||||
|         u32 length_pending;    ///< Large NI command length pending
 | ||||
|         GPUVAddr dma_get;      ///< Currently read segment
 | ||||
|         bool non_incrementing; ///< Current command's NI flag
 | ||||
|         bool is_last_call; | ||||
|     }; | ||||
|  |  | |||
|  | @ -91,6 +91,16 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind | |||
|     ProcessDraw(true, num_instances); | ||||
| } | ||||
| 
 | ||||
| void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) { | ||||
|     const auto& regs{maxwell3d->regs}; | ||||
|     draw_state.topology = topology; | ||||
|     draw_state.index_buffer = regs.index_buffer; | ||||
|     draw_state.index_buffer.first = index_first; | ||||
|     draw_state.index_buffer.count = index_count; | ||||
| 
 | ||||
|     ProcessDrawIndirect(true); | ||||
| } | ||||
| 
 | ||||
| void DrawManager::SetInlineIndexBuffer(u32 index) { | ||||
|     draw_state.inline_index_draw_indexes.push_back(static_cast<u8>(index & 0x000000ff)); | ||||
|     draw_state.inline_index_draw_indexes.push_back(static_cast<u8>((index & 0x0000ff00) >> 8)); | ||||
|  | @ -198,4 +208,15 @@ void DrawManager::ProcessDraw(bool draw_indexed, u32 instance_count) { | |||
|         maxwell3d->rasterizer->Draw(draw_indexed, instance_count); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void DrawManager::ProcessDrawIndirect(bool draw_indexed) { | ||||
|     LOG_TRACE(HW_GPU, "called, topology={}, count={}", draw_state.topology, | ||||
|               draw_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count); | ||||
| 
 | ||||
|     UpdateTopology(); | ||||
| 
 | ||||
|     if (maxwell3d->ShouldExecute()) { | ||||
|         maxwell3d->rasterizer->DrawIndirect(draw_indexed); | ||||
|     } | ||||
| } | ||||
| } // namespace Tegra::Engines
 | ||||
|  |  | |||
|  | @ -32,6 +32,13 @@ public: | |||
|         std::vector<u8> inline_index_draw_indexes; | ||||
|     }; | ||||
| 
 | ||||
|     struct IndirectParams { | ||||
|         GPUVAddr start_address; | ||||
|         size_t buffer_size; | ||||
|         size_t max_draw_counts; | ||||
|         size_t stride; | ||||
|     }; | ||||
| 
 | ||||
|     explicit DrawManager(Maxwell3D* maxwell_3d); | ||||
| 
 | ||||
|     void ProcessMethodCall(u32 method, u32 argument); | ||||
|  | @ -46,10 +53,20 @@ public: | |||
|     void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, | ||||
|                    u32 base_instance, u32 num_instances); | ||||
| 
 | ||||
|     void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count); | ||||
| 
 | ||||
|     const State& GetDrawState() const { | ||||
|         return draw_state; | ||||
|     } | ||||
| 
 | ||||
|     IndirectParams& GetIndirectParams() { | ||||
|         return indirect_state; | ||||
|     } | ||||
| 
 | ||||
|     const IndirectParams& GetIndirectParams() const { | ||||
|         return indirect_state; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     void SetInlineIndexBuffer(u32 index); | ||||
| 
 | ||||
|  | @ -63,7 +80,10 @@ private: | |||
| 
 | ||||
|     void ProcessDraw(bool draw_indexed, u32 instance_count); | ||||
| 
 | ||||
|     void ProcessDrawIndirect(bool draw_indexed); | ||||
| 
 | ||||
|     Maxwell3D* maxwell3d{}; | ||||
|     State draw_state{}; | ||||
|     IndirectParams indirect_state{}; | ||||
| }; | ||||
| } // namespace Tegra::Engines
 | ||||
|  |  | |||
|  | @ -17,6 +17,8 @@ public: | |||
|     /// Write multiple values to the register identified by method.
 | ||||
|     virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||
|                                  u32 methods_pending) = 0; | ||||
| 
 | ||||
|     GPUVAddr current_dma_segment; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Tegra::Engines
 | ||||
|  |  | |||
|  | @ -53,42 +53,43 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& | |||
| 
 | ||||
| // Multidraw Indirect
 | ||||
| void HLE_3F5E74B9C9A50164(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { | ||||
|     SCOPE_EXIT({ | ||||
|         // Clean everything.
 | ||||
|         maxwell3d.regs.vertex_id_base = 0x0; | ||||
|         maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|         maxwell3d.CallMethod(0x8e4, 0x0, true); | ||||
|         maxwell3d.CallMethod(0x8e5, 0x0, true); | ||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||
|     }); | ||||
|     const u32 start_indirect = parameters[0]; | ||||
|     const u32 end_indirect = parameters[1]; | ||||
|     if (start_indirect >= end_indirect) { | ||||
|         // Nothing to do.
 | ||||
|         return; | ||||
|     } | ||||
|     const u32 padding = parameters[3]; | ||||
|     const std::size_t max_draws = parameters[4]; | ||||
|     const auto topology = | ||||
|         static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); | ||||
|     const u32 padding = parameters[3]; // padding is in words
 | ||||
| 
 | ||||
|     // size of each indirect segment
 | ||||
|     const u32 indirect_words = 5 + padding; | ||||
|     const std::size_t first_draw = start_indirect; | ||||
|     const std::size_t effective_draws = end_indirect - start_indirect; | ||||
|     const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws); | ||||
| 
 | ||||
|     for (std::size_t index = first_draw; index < last_draw; index++) { | ||||
|     const u32 stride = indirect_words * sizeof(u32); | ||||
|     const GPUVAddr start_address = maxwell3d.current_dma_segment + 4 * sizeof(u32); | ||||
|     const std::size_t draw_count = end_indirect - start_indirect; | ||||
|     u32 lowest_first = std::numeric_limits<u32>::max(); | ||||
|     u32 highest_limit = std::numeric_limits<u32>::min(); | ||||
|     for (std::size_t index = 0; index < draw_count; index++) { | ||||
|         const std::size_t base = index * indirect_words + 5; | ||||
|         const u32 base_vertex = parameters[base + 3]; | ||||
|         const u32 base_instance = parameters[base + 4]; | ||||
|         maxwell3d.regs.vertex_id_base = base_vertex; | ||||
|         const u32 count = parameters[base]; | ||||
|         const u32 first_index = parameters[base + 2]; | ||||
|         lowest_first = std::min(lowest_first, first_index); | ||||
|         highest_limit = std::max(highest_limit, first_index + count); | ||||
|     } | ||||
| 
 | ||||
|     const u32 base_vertex = parameters[8]; | ||||
|     const u32 base_instance = parameters[9]; | ||||
|     maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|     maxwell3d.CallMethod(0x8e4, base_vertex, true); | ||||
|     maxwell3d.CallMethod(0x8e5, base_instance, true); | ||||
|     auto& params = maxwell3d.draw_manager->GetIndirectParams(); | ||||
|     params.start_address = start_address; | ||||
|     params.buffer_size = sizeof(u32) + stride * draw_count; | ||||
|     params.max_draw_counts = draw_count; | ||||
|     params.stride = stride; | ||||
|     maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||
|         maxwell3d.draw_manager->DrawIndex( | ||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]), | ||||
|             parameters[base + 2], parameters[base], base_vertex, base_instance, | ||||
|             parameters[base + 1]); | ||||
|     } | ||||
|     maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); | ||||
| } | ||||
| 
 | ||||
| // Multi-layer Clear
 | ||||
|  |  | |||
|  | @ -42,6 +42,9 @@ public: | |||
|     /// Dispatches a draw invocation
 | ||||
|     virtual void Draw(bool is_indexed, u32 instance_count) = 0; | ||||
| 
 | ||||
|     /// Dispatches an indirect draw invocation
 | ||||
|     virtual void DrawIndirect(bool is_indexed) {} | ||||
| 
 | ||||
|     /// Clear the current framebuffer
 | ||||
|     virtual void Clear(u32 layer_count) = 0; | ||||
| 
 | ||||
|  |  | |||
|  | @ -180,7 +180,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 
 | ||||
| RasterizerVulkan::~RasterizerVulkan() = default; | ||||
| 
 | ||||
| void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { | ||||
| template <typename Func> | ||||
| void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_Drawing); | ||||
| 
 | ||||
|     SCOPE_EXIT({ gpu.TickWork(); }); | ||||
|  | @ -201,6 +202,13 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { | |||
| 
 | ||||
|     UpdateDynamicStates(); | ||||
| 
 | ||||
|     draw_func(); | ||||
| 
 | ||||
|     EndTransformFeedback(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { | ||||
|     PrepareDraw(is_indexed, [this, is_indexed, instance_count] { | ||||
|         const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | ||||
|         const u32 num_instances{instance_count}; | ||||
|         const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)}; | ||||
|  | @ -214,7 +222,28 @@ void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { | |||
|                             draw_params.base_vertex, draw_params.base_instance); | ||||
|             } | ||||
|         }); | ||||
|     EndTransformFeedback(); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::DrawIndirect(bool is_indexed) { | ||||
|     PrepareDraw(is_indexed, [this, is_indexed] { | ||||
|         const auto params = maxwell3d->draw_manager->GetIndirectParams(); | ||||
|         const auto [buffer, offset] = buffer_cache.ObtainBuffer( | ||||
|             params.start_address, static_cast<u32>(params.buffer_size), true, false); | ||||
|         scheduler.Record([buffer_obj = buffer->Handle(), offset, | ||||
|                           max_draw_counts = params.max_draw_counts, stride = params.stride, | ||||
|                           is_indexed](vk::CommandBuffer cmdbuf) { | ||||
|             if (is_indexed) { | ||||
|                 cmdbuf.DrawIndexedIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset, | ||||
|                                                 static_cast<u32>(max_draw_counts), | ||||
|                                                 static_cast<u32>(stride)); | ||||
|             } else { | ||||
|                 cmdbuf.DrawIndirectCount(buffer_obj, offset + 4ULL, buffer_obj, offset, | ||||
|                                          static_cast<u32>(max_draw_counts), | ||||
|                                          static_cast<u32>(stride)); | ||||
|             } | ||||
|         }); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::Clear(u32 layer_count) { | ||||
|  |  | |||
|  | @ -65,6 +65,7 @@ public: | |||
|     ~RasterizerVulkan() override; | ||||
| 
 | ||||
|     void Draw(bool is_indexed, u32 instance_count) override; | ||||
|     void DrawIndirect(bool is_indexed) override; | ||||
|     void Clear(u32 layer_count) override; | ||||
|     void DispatchCompute() override; | ||||
|     void ResetCounter(VideoCore::QueryType type) override; | ||||
|  | @ -114,6 +115,9 @@ private: | |||
| 
 | ||||
|     static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void PrepareDraw(bool is_indexed, Func&&); | ||||
| 
 | ||||
|     void FlushWork(); | ||||
| 
 | ||||
|     void UpdateDynamicStates(); | ||||
|  |  | |||
|  | @ -350,7 +350,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
|             .sampleRateShading = true, | ||||
|             .dualSrcBlend = true, | ||||
|             .logicOp = true, | ||||
|             .multiDrawIndirect = false, | ||||
|             .multiDrawIndirect = true, | ||||
|             .drawIndirectFirstInstance = false, | ||||
|             .depthClamp = true, | ||||
|             .depthBiasClamp = true, | ||||
|  |  | |||
|  | @ -94,6 +94,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
|     X(vkCmdDispatch); | ||||
|     X(vkCmdDraw); | ||||
|     X(vkCmdDrawIndexed); | ||||
|     X(vkCmdDrawIndirectCount); | ||||
|     X(vkCmdDrawIndexedIndirectCount); | ||||
|     X(vkCmdEndQuery); | ||||
|     X(vkCmdEndRenderPass); | ||||
|     X(vkCmdEndTransformFeedbackEXT); | ||||
|  |  | |||
|  | @ -213,6 +213,8 @@ struct DeviceDispatch : InstanceDispatch { | |||
|     PFN_vkCmdDispatch vkCmdDispatch{}; | ||||
|     PFN_vkCmdDraw vkCmdDraw{}; | ||||
|     PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; | ||||
|     PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; | ||||
|     PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; | ||||
|     PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; | ||||
|     PFN_vkCmdEndQuery vkCmdEndQuery{}; | ||||
|     PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; | ||||
|  | @ -1019,6 +1021,19 @@ public: | |||
|                               first_instance); | ||||
|     } | ||||
| 
 | ||||
|     void DrawIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, VkBuffer count_buffer, | ||||
|                            VkDeviceSize count_offset, u32 draw_count, u32 stride) const noexcept { | ||||
|         dld->vkCmdDrawIndirectCount(handle, src_buffer, src_offset, count_buffer, count_offset, | ||||
|                                     draw_count, stride); | ||||
|     } | ||||
| 
 | ||||
|     void DrawIndexedIndirectCount(VkBuffer src_buffer, VkDeviceSize src_offset, | ||||
|                                   VkBuffer count_buffer, VkDeviceSize count_offset, u32 draw_count, | ||||
|                                   u32 stride) const noexcept { | ||||
|         dld->vkCmdDrawIndexedIndirectCount(handle, src_buffer, src_offset, count_buffer, | ||||
|                                            count_offset, draw_count, stride); | ||||
|     } | ||||
| 
 | ||||
|     void ClearAttachments(Span<VkClearAttachment> attachments, | ||||
|                           Span<VkClearRect> rects) const noexcept { | ||||
|         dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(), | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow