forked from eden-emu/eden
		
	accelerateDMA: Fixes and feedback.
This commit is contained in:
		
							parent
							
								
									be1a3f7a0f
								
							
						
					
					
						commit
						bc19d28963
					
				
					 3 changed files with 62 additions and 88 deletions
				
			
		|  | @ -202,6 +202,36 @@ private: | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     template <typename Func> | ||||||
|  |     void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) { | ||||||
|  |         const VAddr start_address = cpu_addr; | ||||||
|  |         const VAddr end_address = start_address + size; | ||||||
|  |         const VAddr search_base = | ||||||
|  |             static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size))); | ||||||
|  |         const IntervalType search_interval{search_base, search_base + 1}; | ||||||
|  |         auto it = common_ranges.lower_bound(search_interval); | ||||||
|  |         if (it == common_ranges.end()) { | ||||||
|  |             it = common_ranges.begin(); | ||||||
|  |         } | ||||||
|  |         for (; it != common_ranges.end(); it++) { | ||||||
|  |             VAddr inter_addr_end = it->upper(); | ||||||
|  |             VAddr inter_addr = it->lower(); | ||||||
|  |             if (inter_addr >= end_address) { | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             if (inter_addr_end <= start_address) { | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |             if (inter_addr_end > end_address) { | ||||||
|  |                 inter_addr_end = end_address; | ||||||
|  |             } | ||||||
|  |             if (inter_addr < start_address) { | ||||||
|  |                 inter_addr = start_address; | ||||||
|  |             } | ||||||
|  |             func(inter_addr, inter_addr_end); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     static bool IsRangeGranular(VAddr cpu_addr, size_t size) { |     static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | ||||||
|         return (cpu_addr & ~Core::Memory::PAGE_MASK) == |         return (cpu_addr & ~Core::Memory::PAGE_MASK) == | ||||||
|                ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); |                ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); | ||||||
|  | @ -441,12 +471,15 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | ||||||
|     } |     } | ||||||
|     const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); |     const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); | ||||||
|     const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); |     const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); | ||||||
|     if (!(source_dirty || dest_dirty)) { |     if (!source_dirty && !dest_dirty) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; |     const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | ||||||
|     common_ranges.subtract(subtract_interval); |     uncommitted_ranges.subtract(subtract_interval); | ||||||
|  |     for (auto& interval_set : committed_ranges) { | ||||||
|  |         interval_set.subtract(subtract_interval); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     BufferId buffer_a; |     BufferId buffer_a; | ||||||
|     BufferId buffer_b; |     BufferId buffer_b; | ||||||
|  | @ -457,46 +490,28 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | ||||||
|     } while (has_deleted_buffers); |     } while (has_deleted_buffers); | ||||||
|     auto& src_buffer = slot_buffers[buffer_a]; |     auto& src_buffer = slot_buffers[buffer_a]; | ||||||
|     auto& dest_buffer = slot_buffers[buffer_b]; |     auto& dest_buffer = slot_buffers[buffer_b]; | ||||||
|     SynchronizeBuffer(src_buffer, *cpu_src_address, amount); |     SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); | ||||||
|     SynchronizeBuffer(dest_buffer, *cpu_dest_address, amount); |     SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount)); | ||||||
|     std::array copies{BufferCopy{ |     std::array copies{BufferCopy{ | ||||||
|         .src_offset = src_buffer.Offset(*cpu_src_address), |         .src_offset = src_buffer.Offset(*cpu_src_address), | ||||||
|         .dst_offset = dest_buffer.Offset(*cpu_dest_address), |         .dst_offset = dest_buffer.Offset(*cpu_dest_address), | ||||||
|         .size = amount, |         .size = amount, | ||||||
|     }}; |     }}; | ||||||
| 
 | 
 | ||||||
|     auto mirror = [&](VAddr base_address, u64 size) { |     boost::container::small_vector<IntervalType, 4> tmp_intervals; | ||||||
|         VAddr diff = base_address - *cpu_src_address; |     auto mirror = [&](VAddr base_address, VAddr base_address_end) { | ||||||
|         VAddr new_base_address = *cpu_dest_address + diff; |         const u64 size = base_address_end - base_address; | ||||||
|  |         const VAddr diff = base_address - *cpu_src_address; | ||||||
|  |         const VAddr new_base_address = *cpu_dest_address + diff; | ||||||
|         const IntervalType add_interval{new_base_address, new_base_address + size}; |         const IntervalType add_interval{new_base_address, new_base_address + size}; | ||||||
|         common_ranges.add(add_interval); |         uncommitted_ranges.add(add_interval); | ||||||
|  |         tmp_intervals.push_back(add_interval); | ||||||
|     }; |     }; | ||||||
| 
 |     ForEachWrittenRange(*cpu_src_address, amount, mirror); | ||||||
|     const VAddr start_address = *cpu_src_address; |     // This subtraction in this order is important for overlapping copies.
 | ||||||
|     const VAddr end_address = start_address + amount; |     common_ranges.subtract(subtract_interval); | ||||||
|     const IntervalType search_interval{start_address - amount, 1}; |     for (const IntervalType add_interval : tmp_intervals) { | ||||||
|     auto it = common_ranges.lower_bound(search_interval); |         common_ranges.add(add_interval); | ||||||
|     if (it == common_ranges.end()) { |  | ||||||
|         it = common_ranges.begin(); |  | ||||||
|     } |  | ||||||
|     while (it != common_ranges.end()) { |  | ||||||
|         VAddr inter_addr_end = it->upper(); |  | ||||||
|         VAddr inter_addr = it->lower(); |  | ||||||
|         if (inter_addr >= end_address) { |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         if (inter_addr_end <= start_address) { |  | ||||||
|             it++; |  | ||||||
|             continue; |  | ||||||
|         } |  | ||||||
|         if (inter_addr_end > end_address) { |  | ||||||
|             inter_addr_end = end_address; |  | ||||||
|         } |  | ||||||
|         if (inter_addr < start_address) { |  | ||||||
|             inter_addr = start_address; |  | ||||||
|         } |  | ||||||
|         mirror(inter_addr, inter_addr_end - inter_addr); |  | ||||||
|         it++; |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     runtime.CopyBuffer(dest_buffer, src_buffer, copies); |     runtime.CopyBuffer(dest_buffer, src_buffer, copies); | ||||||
|  | @ -695,30 +710,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | ||||||
| 
 | 
 | ||||||
|                         const VAddr start_address = buffer_addr + range_offset; |                         const VAddr start_address = buffer_addr + range_offset; | ||||||
|                         const VAddr end_address = start_address + range_size; |                         const VAddr end_address = start_address + range_size; | ||||||
|                         const IntervalType search_interval{cpu_addr, 1}; |                         ForEachWrittenRange(start_address, range_size, add_download); | ||||||
|                         auto it = common_ranges.lower_bound(search_interval); |  | ||||||
|                         if (it == common_ranges.end()) { |  | ||||||
|                             it = common_ranges.begin(); |  | ||||||
|                         } |  | ||||||
|                         while (it != common_ranges.end()) { |  | ||||||
|                             VAddr inter_addr_end = it->upper(); |  | ||||||
|                             VAddr inter_addr = it->lower(); |  | ||||||
|                             if (inter_addr >= end_address) { |  | ||||||
|                                 break; |  | ||||||
|                             } |  | ||||||
|                             if (inter_addr_end <= start_address) { |  | ||||||
|                                 it++; |  | ||||||
|                                 continue; |  | ||||||
|                             } |  | ||||||
|                             if (inter_addr_end > end_address) { |  | ||||||
|                                 inter_addr_end = end_address; |  | ||||||
|                             } |  | ||||||
|                             if (inter_addr < start_address) { |  | ||||||
|                                 inter_addr = start_address; |  | ||||||
|                             } |  | ||||||
|                             add_download(inter_addr, inter_addr_end); |  | ||||||
|                             it++; |  | ||||||
|                         } |  | ||||||
|                         const IntervalType subtract_interval{start_address, end_address}; |                         const IntervalType subtract_interval{start_address, end_address}; | ||||||
|                         common_ranges.subtract(subtract_interval); |                         common_ranges.subtract(subtract_interval); | ||||||
|                     }); |                     }); | ||||||
|  | @ -816,7 +808,9 @@ void BufferCache<P>::BindHostIndexBuffer() { | ||||||
|     const u32 size = index_buffer.size; |     const u32 size = index_buffer.size; | ||||||
|     SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); |     SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | ||||||
|     if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |     if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | ||||||
|         runtime.BindIndexBuffer(buffer, offset, size); |         const u32 new_offset = offset + maxwell3d.regs.index_array.first * | ||||||
|  |                                             maxwell3d.regs.index_array.FormatSizeInBytes(); | ||||||
|  |         runtime.BindIndexBuffer(buffer, new_offset, size); | ||||||
|     } else { |     } else { | ||||||
|         runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, |         runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, | ||||||
|                                 maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, |                                 maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, | ||||||
|  | @ -1429,30 +1423,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | ||||||
| 
 | 
 | ||||||
|         const VAddr start_address = buffer_addr + range_offset; |         const VAddr start_address = buffer_addr + range_offset; | ||||||
|         const VAddr end_address = start_address + range_size; |         const VAddr end_address = start_address + range_size; | ||||||
|         const IntervalType search_interval{start_address - range_size, 1}; |         ForEachWrittenRange(start_address, range_size, add_download); | ||||||
|         auto it = common_ranges.lower_bound(search_interval); |  | ||||||
|         if (it == common_ranges.end()) { |  | ||||||
|             it = common_ranges.begin(); |  | ||||||
|         } |  | ||||||
|         while (it != common_ranges.end()) { |  | ||||||
|             VAddr inter_addr_end = it->upper(); |  | ||||||
|             VAddr inter_addr = it->lower(); |  | ||||||
|             if (inter_addr >= end_address) { |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|             if (inter_addr_end <= start_address) { |  | ||||||
|                 it++; |  | ||||||
|                 continue; |  | ||||||
|             } |  | ||||||
|             if (inter_addr_end > end_address) { |  | ||||||
|                 inter_addr_end = end_address; |  | ||||||
|             } |  | ||||||
|             if (inter_addr < start_address) { |  | ||||||
|                 inter_addr = start_address; |  | ||||||
|             } |  | ||||||
|             add_download(inter_addr, inter_addr_end); |  | ||||||
|             it++; |  | ||||||
|         } |  | ||||||
|         const IntervalType subtract_interval{start_address, end_address}; |         const IntervalType subtract_interval{start_address, end_address}; | ||||||
|         common_ranges.subtract(subtract_interval); |         common_ranges.subtract(subtract_interval); | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|  | @ -10,13 +10,15 @@ | ||||||
| #include <stop_token> | #include <stop_token> | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/fermi_2d.h" | #include "video_core/engines/fermi_2d.h" | ||||||
| #include "video_core/engines/maxwell_dma.h" |  | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| #include "video_core/guest_driver.h" | #include "video_core/guest_driver.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| class MemoryManager; | class MemoryManager; | ||||||
|  | namespace Engines { | ||||||
|  | class AccelerateDMAInterface; | ||||||
| } | } | ||||||
|  | } // namespace Tegra
 | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -13,6 +13,7 @@ | ||||||
| #include <boost/container/static_vector.hpp> | #include <boost/container/static_vector.hpp> | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "video_core/engines/maxwell_dma.h" | ||||||
| #include "video_core/rasterizer_accelerated.h" | #include "video_core/rasterizer_accelerated.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_vulkan/blit_image.h" | #include "video_core/renderer_vulkan/blit_image.h" | ||||||
|  | @ -55,7 +56,7 @@ public: | ||||||
| 
 | 
 | ||||||
|     bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; |     bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; | ||||||
| 
 | 
 | ||||||
|   private: | private: | ||||||
|     BufferCache& buffer_cache; |     BufferCache& buffer_cache; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow