Revert Buffer cache changes and setup additional macros.
This commit is contained in:
		
							parent
							
								
									12a76465b9
								
							
						
					
					
						commit
						d2643a61c3
					
				
					 7 changed files with 179 additions and 128 deletions
				
			
		|  | @ -170,11 +170,6 @@ public: | ||||||
|     void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, |     void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | ||||||
|                                   bool is_written, bool is_image); |                                   bool is_written, bool is_image); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, |  | ||||||
|                                                        bool synchronize = true, |  | ||||||
|                                                        bool mark_as_written = false, |  | ||||||
|                                                        bool discard_downloads = false); |  | ||||||
| 
 |  | ||||||
|     void FlushCachedWrites(); |     void FlushCachedWrites(); | ||||||
| 
 | 
 | ||||||
|     /// Return true when there are uncommitted buffers to be downloaded
 |     /// Return true when there are uncommitted buffers to be downloaded
 | ||||||
|  | @ -354,8 +349,6 @@ private: | ||||||
| 
 | 
 | ||||||
|     bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |     bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||||||
| 
 | 
 | ||||||
|     bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); |  | ||||||
| 
 |  | ||||||
|     void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |     void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||||||
|                       std::span<BufferCopy> copies); |                       std::span<BufferCopy> copies); | ||||||
| 
 | 
 | ||||||
|  | @ -442,7 +435,6 @@ private: | ||||||
| 
 | 
 | ||||||
|     std::vector<BufferId> cached_write_buffer_ids; |     std::vector<BufferId> cached_write_buffer_ids; | ||||||
| 
 | 
 | ||||||
|     IntervalSet discarded_ranges; |  | ||||||
|     IntervalSet uncommitted_ranges; |     IntervalSet uncommitted_ranges; | ||||||
|     IntervalSet common_ranges; |     IntervalSet common_ranges; | ||||||
|     std::deque<IntervalSet> committed_ranges; |     std::deque<IntervalSet> committed_ranges; | ||||||
|  | @ -600,17 +592,13 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | ||||||
|     }}; |     }}; | ||||||
| 
 | 
 | ||||||
|     boost::container::small_vector<IntervalType, 4> tmp_intervals; |     boost::container::small_vector<IntervalType, 4> tmp_intervals; | ||||||
|     const bool is_high_accuracy = |  | ||||||
|         Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; |  | ||||||
|     auto mirror = [&](VAddr base_address, VAddr base_address_end) { |     auto mirror = [&](VAddr base_address, VAddr base_address_end) { | ||||||
|         const u64 size = base_address_end - base_address; |         const u64 size = base_address_end - base_address; | ||||||
|         const VAddr diff = base_address - *cpu_src_address; |         const VAddr diff = base_address - *cpu_src_address; | ||||||
|         const VAddr new_base_address = *cpu_dest_address + diff; |         const VAddr new_base_address = *cpu_dest_address + diff; | ||||||
|         const IntervalType add_interval{new_base_address, new_base_address + size}; |         const IntervalType add_interval{new_base_address, new_base_address + size}; | ||||||
|  |         uncommitted_ranges.add(add_interval); | ||||||
|         tmp_intervals.push_back(add_interval); |         tmp_intervals.push_back(add_interval); | ||||||
|         if (is_high_accuracy) { |  | ||||||
|             uncommitted_ranges.add(add_interval); |  | ||||||
|         } |  | ||||||
|     }; |     }; | ||||||
|     ForEachWrittenRange(*cpu_src_address, amount, mirror); |     ForEachWrittenRange(*cpu_src_address, amount, mirror); | ||||||
|     // This subtraction in this order is important for overlapping copies.
 |     // This subtraction in this order is important for overlapping copies.
 | ||||||
|  | @ -821,32 +809,6 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add | ||||||
|     compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); |     compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> |  | ||||||
| std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, |  | ||||||
|                                                                  bool synchronize, |  | ||||||
|                                                                  bool mark_as_written, |  | ||||||
|                                                                  bool discard_downloads) { |  | ||||||
|     const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |  | ||||||
|     if (!cpu_addr) { |  | ||||||
|         return {&slot_buffers[NULL_BUFFER_ID], 0}; |  | ||||||
|     } |  | ||||||
|     const BufferId buffer_id = FindBuffer(*cpu_addr, size); |  | ||||||
|     Buffer& buffer = slot_buffers[buffer_id]; |  | ||||||
|     if (synchronize) { |  | ||||||
|         // SynchronizeBuffer(buffer, *cpu_addr, size);
 |  | ||||||
|         SynchronizeBufferNoModified(buffer, *cpu_addr, size); |  | ||||||
|     } |  | ||||||
|     if (mark_as_written) { |  | ||||||
|         MarkWrittenBuffer(buffer_id, *cpu_addr, size); |  | ||||||
|     } |  | ||||||
|     if (discard_downloads) { |  | ||||||
|         IntervalType interval{*cpu_addr, size}; |  | ||||||
|         ClearDownload(interval); |  | ||||||
|         discarded_ranges.subtract(interval); |  | ||||||
|     } |  | ||||||
|     return {&buffer, buffer.Offset(*cpu_addr)}; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| template <class P> | template <class P> | ||||||
| void BufferCache<P>::FlushCachedWrites() { | void BufferCache<P>::FlushCachedWrites() { | ||||||
|     for (const BufferId buffer_id : cached_write_buffer_ids) { |     for (const BufferId buffer_id : cached_write_buffer_ids) { | ||||||
|  | @ -862,6 +824,10 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| void BufferCache<P>::AccumulateFlushes() { | void BufferCache<P>::AccumulateFlushes() { | ||||||
|  |     if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { | ||||||
|  |         uncommitted_ranges.clear(); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|     if (uncommitted_ranges.empty()) { |     if (uncommitted_ranges.empty()) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  | @ -877,14 +843,12 @@ template <class P> | ||||||
| void BufferCache<P>::CommitAsyncFlushesHigh() { | void BufferCache<P>::CommitAsyncFlushesHigh() { | ||||||
|     AccumulateFlushes(); |     AccumulateFlushes(); | ||||||
| 
 | 
 | ||||||
|     for (const auto& interval : discarded_ranges) { |  | ||||||
|         common_ranges.subtract(interval); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (committed_ranges.empty()) { |     if (committed_ranges.empty()) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     MICROPROFILE_SCOPE(GPU_DownloadMemory); |     MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||||||
|  |     const bool is_accuracy_normal = | ||||||
|  |         Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | ||||||
| 
 | 
 | ||||||
|     auto it = committed_ranges.begin(); |     auto it = committed_ranges.begin(); | ||||||
|     while (it != committed_ranges.end()) { |     while (it != committed_ranges.end()) { | ||||||
|  | @ -909,6 +873,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | ||||||
|             ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |             ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | ||||||
|                 buffer.ForEachDownloadRangeAndClear( |                 buffer.ForEachDownloadRangeAndClear( | ||||||
|                     cpu_addr, size, [&](u64 range_offset, u64 range_size) { |                     cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||||||
|  |                         if (is_accuracy_normal) { | ||||||
|  |                             return; | ||||||
|  |                         } | ||||||
|                         const VAddr buffer_addr = buffer.CpuAddr(); |                         const VAddr buffer_addr = buffer.CpuAddr(); | ||||||
|                         const auto add_download = [&](VAddr start, VAddr end) { |                         const auto add_download = [&](VAddr start, VAddr end) { | ||||||
|                             const u64 new_offset = start - buffer_addr; |                             const u64 new_offset = start - buffer_addr; | ||||||
|  | @ -973,7 +940,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| void BufferCache<P>::CommitAsyncFlushes() { | void BufferCache<P>::CommitAsyncFlushes() { | ||||||
|     CommitAsyncFlushesHigh(); |     if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { | ||||||
|  |         CommitAsyncFlushesHigh(); | ||||||
|  |     } else { | ||||||
|  |         uncommitted_ranges.clear(); | ||||||
|  |         committed_ranges.clear(); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
|  | @ -1353,7 +1325,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | ||||||
|     const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |     const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | ||||||
|     const auto& index_array = draw_state.index_buffer; |     const auto& index_array = draw_state.index_buffer; | ||||||
|     auto& flags = maxwell3d->dirty.flags; |     auto& flags = maxwell3d->dirty.flags; | ||||||
|     if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { |     if (!flags[Dirty::IndexBuffer]) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     flags[Dirty::IndexBuffer] = false; |     flags[Dirty::IndexBuffer] = false; | ||||||
|  | @ -1574,11 +1546,7 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s | ||||||
|     if (!is_async) { |     if (!is_async) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     const bool is_high_accuracy = |     uncommitted_ranges.add(base_interval); | ||||||
|         Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; |  | ||||||
|     if (is_high_accuracy) { |  | ||||||
|         uncommitted_ranges.add(base_interval); |  | ||||||
|     } |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
|  | @ -1771,51 +1739,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | ||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> |  | ||||||
| bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { |  | ||||||
|     boost::container::small_vector<BufferCopy, 4> copies; |  | ||||||
|     u64 total_size_bytes = 0; |  | ||||||
|     u64 largest_copy = 0; |  | ||||||
|     IntervalSet found_sets{}; |  | ||||||
|     auto make_copies = [&] { |  | ||||||
|         for (auto& interval : found_sets) { |  | ||||||
|             const std::size_t sub_size = interval.upper() - interval.lower(); |  | ||||||
|             const VAddr cpu_addr = interval.lower(); |  | ||||||
|             copies.push_back(BufferCopy{ |  | ||||||
|                 .src_offset = total_size_bytes, |  | ||||||
|                 .dst_offset = cpu_addr - buffer.CpuAddr(), |  | ||||||
|                 .size = sub_size, |  | ||||||
|             }); |  | ||||||
|             total_size_bytes += sub_size; |  | ||||||
|             largest_copy = std::max(largest_copy, sub_size); |  | ||||||
|         } |  | ||||||
|         const std::span<BufferCopy> copies_span(copies.data(), copies.size()); |  | ||||||
|         UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); |  | ||||||
|     }; |  | ||||||
|     buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { |  | ||||||
|         const VAddr base_adr = buffer.CpuAddr() + range_offset; |  | ||||||
|         const VAddr end_adr = base_adr + range_size; |  | ||||||
|         const IntervalType add_interval{base_adr, end_adr}; |  | ||||||
|         found_sets.add(add_interval); |  | ||||||
|     }); |  | ||||||
|     if (found_sets.empty()) { |  | ||||||
|         return true; |  | ||||||
|     } |  | ||||||
|     const IntervalType search_interval{cpu_addr, cpu_addr + size}; |  | ||||||
|     auto it = common_ranges.lower_bound(search_interval); |  | ||||||
|     auto it_end = common_ranges.upper_bound(search_interval); |  | ||||||
|     if (it == common_ranges.end()) { |  | ||||||
|         make_copies(); |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     while (it != it_end) { |  | ||||||
|         found_sets.subtract(*it); |  | ||||||
|         it++; |  | ||||||
|     } |  | ||||||
|     make_copies(); |  | ||||||
|     return false; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| template <class P> | template <class P> | ||||||
| void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||||||
|                                   std::span<BufferCopy> copies) { |                                   std::span<BufferCopy> copies) { | ||||||
|  |  | ||||||
|  | @ -77,11 +77,20 @@ bool DmaPusher::Step() { | ||||||
|         command_headers.resize_destructive(command_list_header.size); |         command_headers.resize_destructive(command_list_header.size); | ||||||
|         constexpr u32 MacroRegistersStart = 0xE00; |         constexpr u32 MacroRegistersStart = 0xE00; | ||||||
|         if (dma_state.method < MacroRegistersStart) { |         if (dma_state.method < MacroRegistersStart) { | ||||||
|             memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), |             if (Settings::IsGPULevelHigh()) { | ||||||
|                                      command_list_header.size * sizeof(u32)); |                 memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), | ||||||
|  |                                          command_list_header.size * sizeof(u32)); | ||||||
|  |             } else { | ||||||
|  |                 memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), | ||||||
|  |                                                command_list_header.size * sizeof(u32)); | ||||||
|  |             } | ||||||
|         } else { |         } else { | ||||||
|             memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), |             const size_t copy_size = command_list_header.size * sizeof(u32); | ||||||
|                                            command_list_header.size * sizeof(u32)); |             if (subchannels[dma_state.subchannel]) { | ||||||
|  |                 subchannels[dma_state.subchannel]->current_dirty = | ||||||
|  |                     memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size); | ||||||
|  |             } | ||||||
|  |             memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size); | ||||||
|         } |         } | ||||||
|         ProcessCommands(command_headers); |         ProcessCommands(command_headers); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -18,6 +18,7 @@ public: | ||||||
|     virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |     virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | ||||||
|                                  u32 methods_pending) = 0; |                                  u32 methods_pending) = 0; | ||||||
| 
 | 
 | ||||||
|  |     bool current_dirty{}; | ||||||
|     GPUVAddr current_dma_segment; |     GPUVAddr current_dma_segment; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -4,6 +4,7 @@ | ||||||
| #include <cstring> | #include <cstring> | ||||||
| #include <optional> | #include <optional> | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
|  | #include "common/settings.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/core_timing.h" | #include "core/core_timing.h" | ||||||
| #include "video_core/dirty_flags.h" | #include "video_core/dirty_flags.h" | ||||||
|  | @ -14,6 +15,7 @@ | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/textures/texture.h" | #include "video_core/textures/texture.h" | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| namespace Tegra::Engines { | namespace Tegra::Engines { | ||||||
| 
 | 
 | ||||||
| using VideoCore::QueryType; | using VideoCore::QueryType; | ||||||
|  | @ -134,6 +136,8 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool | ||||||
|         macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); |         macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); | ||||||
|     } |     } | ||||||
|     macro_segments.emplace_back(current_dma_segment, amount); |     macro_segments.emplace_back(current_dma_segment, amount); | ||||||
|  |     current_macro_dirty |= current_dirty; | ||||||
|  |     current_dirty = false; | ||||||
| 
 | 
 | ||||||
|     // Call the macro when there are no more parameters in the command buffer
 |     // Call the macro when there are no more parameters in the command buffer
 | ||||||
|     if (is_last_call) { |     if (is_last_call) { | ||||||
|  | @ -141,10 +145,14 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool | ||||||
|         macro_params.clear(); |         macro_params.clear(); | ||||||
|         macro_addresses.clear(); |         macro_addresses.clear(); | ||||||
|         macro_segments.clear(); |         macro_segments.clear(); | ||||||
|  |         current_macro_dirty = false; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Maxwell3D::RefreshParameters() { | void Maxwell3D::RefreshParametersImpl() { | ||||||
|  |     if (!Settings::IsGPULevelHigh()) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|     size_t current_index = 0; |     size_t current_index = 0; | ||||||
|     for (auto& segment : macro_segments) { |     for (auto& segment : macro_segments) { | ||||||
|         if (segment.first == 0) { |         if (segment.first == 0) { | ||||||
|  | @ -157,21 +165,6 @@ void Maxwell3D::RefreshParameters() { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool Maxwell3D::AnyParametersDirty() { |  | ||||||
|     size_t current_index = 0; |  | ||||||
|     for (auto& segment : macro_segments) { |  | ||||||
|         if (segment.first == 0) { |  | ||||||
|             current_index += segment.second; |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         if (memory_manager.IsMemoryDirty(segment.first, sizeof(u32) * segment.second)) { |  | ||||||
|             return true; |  | ||||||
|         } |  | ||||||
|         current_index += segment.second; |  | ||||||
|     } |  | ||||||
|     return false; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 Maxwell3D::GetMaxCurrentVertices() { | u32 Maxwell3D::GetMaxCurrentVertices() { | ||||||
|     u32 num_vertices = 0; |     u32 num_vertices = 0; | ||||||
|     for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { |     for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { | ||||||
|  | @ -332,7 +325,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||||
| 
 | 
 | ||||||
|     const u32 argument = ProcessShadowRam(method, method_argument); |     const u32 argument = ProcessShadowRam(method, method_argument); | ||||||
|     ProcessDirtyRegisters(method, argument); |     ProcessDirtyRegisters(method, argument); | ||||||
| 
 |  | ||||||
|     ProcessMethodCall(method, argument, method_argument, is_last_call); |     ProcessMethodCall(method, argument, method_argument, is_last_call); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -272,6 +272,7 @@ public: | ||||||
|             }; |             }; | ||||||
| 
 | 
 | ||||||
|             union { |             union { | ||||||
|  |                 u32 raw; | ||||||
|                 BitField<0, 1, Mode> mode; |                 BitField<0, 1, Mode> mode; | ||||||
|                 BitField<4, 8, u32> pad; |                 BitField<4, 8, u32> pad; | ||||||
|             }; |             }; | ||||||
|  | @ -1217,10 +1218,12 @@ public: | ||||||
| 
 | 
 | ||||||
|         struct Window { |         struct Window { | ||||||
|             union { |             union { | ||||||
|  |                 u32 raw_1; | ||||||
|                 BitField<0, 16, u32> x_min; |                 BitField<0, 16, u32> x_min; | ||||||
|                 BitField<16, 16, u32> x_max; |                 BitField<16, 16, u32> x_max; | ||||||
|             }; |             }; | ||||||
|             union { |             union { | ||||||
|  |                 u32 raw_2; | ||||||
|                 BitField<0, 16, u32> y_min; |                 BitField<0, 16, u32> y_min; | ||||||
|                 BitField<16, 16, u32> y_max; |                 BitField<16, 16, u32> y_max; | ||||||
|             }; |             }; | ||||||
|  | @ -3090,9 +3093,16 @@ public: | ||||||
|         return macro_addresses[index]; |         return macro_addresses[index]; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void RefreshParameters(); |     void RefreshParameters() { | ||||||
|  |         if (!current_macro_dirty) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         RefreshParametersImpl(); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     bool AnyParametersDirty(); |     bool AnyParametersDirty() { | ||||||
|  |         return current_macro_dirty; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     u32 GetMaxCurrentVertices(); |     u32 GetMaxCurrentVertices(); | ||||||
| 
 | 
 | ||||||
|  | @ -3101,6 +3111,9 @@ public: | ||||||
|     /// Handles a write to the CLEAR_BUFFERS register.
 |     /// Handles a write to the CLEAR_BUFFERS register.
 | ||||||
|     void ProcessClearBuffers(u32 layer_count); |     void ProcessClearBuffers(u32 layer_count); | ||||||
| 
 | 
 | ||||||
|  |     /// Handles a write to the CB_BIND register.
 | ||||||
|  |     void ProcessCBBind(size_t stage_index); | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     void InitializeRegisterDefaults(); |     void InitializeRegisterDefaults(); | ||||||
| 
 | 
 | ||||||
|  | @ -3154,12 +3167,11 @@ private: | ||||||
|     void ProcessCBData(u32 value); |     void ProcessCBData(u32 value); | ||||||
|     void ProcessCBMultiData(const u32* start_base, u32 amount); |     void ProcessCBMultiData(const u32* start_base, u32 amount); | ||||||
| 
 | 
 | ||||||
|     /// Handles a write to the CB_BIND register.
 |  | ||||||
|     void ProcessCBBind(size_t stage_index); |  | ||||||
| 
 |  | ||||||
|     /// Returns a query's value or an empty object if the value will be deferred through a cache.
 |     /// Returns a query's value or an empty object if the value will be deferred through a cache.
 | ||||||
|     std::optional<u64> GetQueryResult(); |     std::optional<u64> GetQueryResult(); | ||||||
| 
 | 
 | ||||||
|  |     void RefreshParametersImpl(); | ||||||
|  | 
 | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|     MemoryManager& memory_manager; |     MemoryManager& memory_manager; | ||||||
| 
 | 
 | ||||||
|  | @ -3187,6 +3199,7 @@ private: | ||||||
|     bool draw_indexed{}; |     bool draw_indexed{}; | ||||||
|     std::vector<std::pair<GPUVAddr, size_t>> macro_segments; |     std::vector<std::pair<GPUVAddr, size_t>> macro_segments; | ||||||
|     std::vector<GPUVAddr> macro_addresses; |     std::vector<GPUVAddr> macro_addresses; | ||||||
|  |     bool current_macro_dirty{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| #define ASSERT_REG_POSITION(field_name, position)                                                  \ | #define ASSERT_REG_POSITION(field_name, position)                                                  \ | ||||||
|  |  | ||||||
|  | @ -12,6 +12,7 @@ | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/fs/fs.h" | #include "common/fs/fs.h" | ||||||
| #include "common/fs/path_util.h" | #include "common/fs/path_util.h" | ||||||
|  | #include "common/microprofile.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/macro/macro.h" | #include "video_core/macro/macro.h" | ||||||
|  | @ -22,6 +23,8 @@ | ||||||
| #include "video_core/macro/macro_jit_x64.h" | #include "video_core/macro/macro_jit_x64.h" | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro hle", MP_RGB(128, 192, 192)); | ||||||
|  | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| 
 | 
 | ||||||
| static void Dump(u64 hash, std::span<const u32> code) { | static void Dump(u64 hash, std::span<const u32> code) { | ||||||
|  | @ -60,6 +63,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { | ||||||
|     if (compiled_macro != macro_cache.end()) { |     if (compiled_macro != macro_cache.end()) { | ||||||
|         const auto& cache_info = compiled_macro->second; |         const auto& cache_info = compiled_macro->second; | ||||||
|         if (cache_info.has_hle_program) { |         if (cache_info.has_hle_program) { | ||||||
|  |             MICROPROFILE_SCOPE(MacroHLE); | ||||||
|             cache_info.hle_program->Execute(parameters, method); |             cache_info.hle_program->Execute(parameters, method); | ||||||
|         } else { |         } else { | ||||||
|             maxwell3d.RefreshParameters(); |             maxwell3d.RefreshParameters(); | ||||||
|  | @ -106,6 +110,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { | ||||||
|         if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { |         if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { | ||||||
|             cache_info.has_hle_program = true; |             cache_info.has_hle_program = true; | ||||||
|             cache_info.hle_program = std::move(hle_program); |             cache_info.hle_program = std::move(hle_program); | ||||||
|  |             MICROPROFILE_SCOPE(MacroHLE); | ||||||
|             cache_info.hle_program->Execute(parameters, method); |             cache_info.hle_program->Execute(parameters, method); | ||||||
|         } else { |         } else { | ||||||
|             maxwell3d.RefreshParameters(); |             maxwell3d.RefreshParameters(); | ||||||
|  |  | ||||||
|  | @ -86,7 +86,7 @@ public: | ||||||
| 
 | 
 | ||||||
|     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||||
|         auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); |         auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); | ||||||
|         if (!IsTopologySafe(topology)) { |         if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { | ||||||
|             Fallback(parameters); |             Fallback(parameters); | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  | @ -117,8 +117,8 @@ private: | ||||||
|     void Fallback(const std::vector<u32>& parameters) { |     void Fallback(const std::vector<u32>& parameters) { | ||||||
|         SCOPE_EXIT({ |         SCOPE_EXIT({ | ||||||
|             if (extended) { |             if (extended) { | ||||||
|                 maxwell3d.CallMethod(0x8e3, 0x640, true); |                 maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||||
|                 maxwell3d.CallMethod(0x8e4, 0, true); |                 maxwell3d.replace_table.clear(); | ||||||
|             } |             } | ||||||
|         }); |         }); | ||||||
|         maxwell3d.RefreshParameters(); |         maxwell3d.RefreshParameters(); | ||||||
|  | @ -127,7 +127,8 @@ private: | ||||||
|         const u32 vertex_first = parameters[3]; |         const u32 vertex_first = parameters[3]; | ||||||
|         const u32 vertex_count = parameters[1]; |         const u32 vertex_count = parameters[1]; | ||||||
| 
 | 
 | ||||||
|         if (maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { |         if (maxwell3d.AnyParametersDirty() && | ||||||
|  |             maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { | ||||||
|             ASSERT_MSG(false, "Faulty draw!"); |             ASSERT_MSG(false, "Faulty draw!"); | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  | @ -157,7 +158,7 @@ public: | ||||||
| 
 | 
 | ||||||
|     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||||
|         auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); |         auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); | ||||||
|         if (!IsTopologySafe(topology)) { |         if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { | ||||||
|             Fallback(parameters); |             Fallback(parameters); | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  | @ -169,7 +170,11 @@ public: | ||||||
|         } |         } | ||||||
|         const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); |         const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); | ||||||
|         const u32 base_size = std::max<u32>(minimum_limit, estimate); |         const u32 base_size = std::max<u32>(minimum_limit, estimate); | ||||||
|         maxwell3d.regs.draw.topology.Assign(topology); |         const u32 element_base = parameters[4]; | ||||||
|  |         const u32 base_instance = parameters[5]; | ||||||
|  |         maxwell3d.regs.vertex_id_base = element_base; | ||||||
|  |         maxwell3d.regs.global_base_vertex_index = element_base; | ||||||
|  |         maxwell3d.regs.global_base_instance_index = base_instance; | ||||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||||
|         maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; |         maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||||
|         maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); |         maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||||||
|  | @ -186,6 +191,9 @@ public: | ||||||
|         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); |         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); | ||||||
|         maxwell3d.engine_state = Maxwell::EngineHint::None; |         maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||||
|         maxwell3d.replace_table.clear(); |         maxwell3d.replace_table.clear(); | ||||||
|  |         maxwell3d.regs.vertex_id_base = 0x0; | ||||||
|  |         maxwell3d.regs.global_base_vertex_index = 0x0; | ||||||
|  |         maxwell3d.regs.global_base_instance_index = 0x0; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|  | @ -195,6 +203,8 @@ private: | ||||||
|         const u32 element_base = parameters[4]; |         const u32 element_base = parameters[4]; | ||||||
|         const u32 base_instance = parameters[5]; |         const u32 base_instance = parameters[5]; | ||||||
|         maxwell3d.regs.vertex_id_base = element_base; |         maxwell3d.regs.vertex_id_base = element_base; | ||||||
|  |         maxwell3d.regs.global_base_vertex_index = element_base; | ||||||
|  |         maxwell3d.regs.global_base_instance_index = base_instance; | ||||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||||
|         maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; |         maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||||
|         maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); |         maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||||||
|  | @ -205,6 +215,8 @@ private: | ||||||
|             parameters[3], parameters[1], element_base, base_instance, instance_count); |             parameters[3], parameters[1], element_base, base_instance, instance_count); | ||||||
| 
 | 
 | ||||||
|         maxwell3d.regs.vertex_id_base = 0x0; |         maxwell3d.regs.vertex_id_base = 0x0; | ||||||
|  |         maxwell3d.regs.global_base_vertex_index = 0x0; | ||||||
|  |         maxwell3d.regs.global_base_instance_index = 0x0; | ||||||
|         maxwell3d.engine_state = Maxwell::EngineHint::None; |         maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||||
|         maxwell3d.replace_table.clear(); |         maxwell3d.replace_table.clear(); | ||||||
|     } |     } | ||||||
|  | @ -253,7 +265,6 @@ public: | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         maxwell3d.regs.draw.topology.Assign(topology); |  | ||||||
|         const u32 padding = parameters[3]; // padding is in words
 |         const u32 padding = parameters[3]; // padding is in words
 | ||||||
| 
 | 
 | ||||||
|         // size of each indirect segment
 |         // size of each indirect segment
 | ||||||
|  | @ -335,6 +346,83 @@ private: | ||||||
|     u32 minimum_limit{1 << 12}; |     u32 minimum_limit{1 << 12}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { | ||||||
|  | public: | ||||||
|  |     explicit HLE_C713C83D8F63CCF3(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||||||
|  | 
 | ||||||
|  |     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||||
|  |         maxwell3d.RefreshParameters(); | ||||||
|  |         const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; | ||||||
|  |         const u32 address = maxwell3d.regs.shadow_scratch[24]; | ||||||
|  |         auto& const_buffer = maxwell3d.regs.const_buffer; | ||||||
|  |         const_buffer.size = 0x7000; | ||||||
|  |         const_buffer.address_high = (address >> 24) & 0xFF; | ||||||
|  |         const_buffer.address_low = address << 8; | ||||||
|  |         const_buffer.offset = offset; | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | class HLE_D7333D26E0A93EDE final : public HLEMacroImpl { | ||||||
|  | public: | ||||||
|  |     explicit HLE_D7333D26E0A93EDE(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||||||
|  | 
 | ||||||
|  |     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||||
|  |         maxwell3d.RefreshParameters(); | ||||||
|  |         const size_t index = parameters[0]; | ||||||
|  |         const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; | ||||||
|  |         const u32 size = maxwell3d.regs.shadow_scratch[47 + index]; | ||||||
|  |         auto& const_buffer = maxwell3d.regs.const_buffer; | ||||||
|  |         const_buffer.size = size; | ||||||
|  |         const_buffer.address_high = (address >> 24) & 0xFF; | ||||||
|  |         const_buffer.address_low = address << 8; | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | class HLE_BindShader final : public HLEMacroImpl { | ||||||
|  | public: | ||||||
|  |     explicit HLE_BindShader(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||||||
|  | 
 | ||||||
|  |     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||||
|  |         maxwell3d.RefreshParameters(); | ||||||
|  |         auto& regs = maxwell3d.regs; | ||||||
|  |         const u32 index = parameters[0]; | ||||||
|  |         if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         regs.pipelines[index & 0xF].offset = parameters[2]; | ||||||
|  |         maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true; | ||||||
|  |         regs.shadow_scratch[28 + index] = parameters[1]; | ||||||
|  |         regs.shadow_scratch[34 + index] = parameters[2]; | ||||||
|  | 
 | ||||||
|  |         const u32 address = parameters[4]; | ||||||
|  |         auto& const_buffer = regs.const_buffer; | ||||||
|  |         const_buffer.size = 0x10000; | ||||||
|  |         const_buffer.address_high = (address >> 24) & 0xFF; | ||||||
|  |         const_buffer.address_low = address << 8; | ||||||
|  | 
 | ||||||
|  |         const size_t bind_group_id = parameters[3] & 0x7F; | ||||||
|  |         auto& bind_group = regs.bind_groups[bind_group_id]; | ||||||
|  |         bind_group.raw_config = 0x11; | ||||||
|  |         maxwell3d.ProcessCBBind(bind_group_id); | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | class HLE_SetRasterBoundingBox final : public HLEMacroImpl { | ||||||
|  | public: | ||||||
|  |     explicit HLE_SetRasterBoundingBox(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||||||
|  | 
 | ||||||
|  |     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||||
|  |         maxwell3d.RefreshParameters(); | ||||||
|  |         const u32 raster_mode = parameters[0]; | ||||||
|  |         auto& regs = maxwell3d.regs; | ||||||
|  |         const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable; | ||||||
|  |         const u32 scratch_data = maxwell3d.regs.shadow_scratch[52]; | ||||||
|  |         regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; | ||||||
|  |         regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { | HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { | ||||||
|  | @ -368,6 +456,26 @@ HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { | ||||||
|                          [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { |                          [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { | ||||||
|                              return std::make_unique<HLE_MultiLayerClear>(maxwell3d); |                              return std::make_unique<HLE_MultiLayerClear>(maxwell3d); | ||||||
|                          })); |                          })); | ||||||
|  |     builders.emplace(0xC713C83D8F63CCF3ULL, | ||||||
|  |                      std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>( | ||||||
|  |                          [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { | ||||||
|  |                              return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d); | ||||||
|  |                          })); | ||||||
|  |     builders.emplace(0xD7333D26E0A93EDEULL, | ||||||
|  |                      std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>( | ||||||
|  |                          [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { | ||||||
|  |                              return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d); | ||||||
|  |                          })); | ||||||
|  |     builders.emplace(0xEB29B2A09AA06D38ULL, | ||||||
|  |                      std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>( | ||||||
|  |                          [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { | ||||||
|  |                              return std::make_unique<HLE_BindShader>(maxwell3d); | ||||||
|  |                          })); | ||||||
|  |     builders.emplace(0xDB1341DBEB4C8AF7ULL, | ||||||
|  |                      std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>( | ||||||
|  |                          [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { | ||||||
|  |                              return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d); | ||||||
|  |                          })); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| HLEMacro::~HLEMacro() = default; | HLEMacro::~HLEMacro() = default; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow