forked from eden-emu/eden
		
	video_core: Cache GPU internal writes.
This commit is contained in:
		
							parent
							
								
									b78328f19a
								
							
						
					
					
						commit
						6c7eb81f7d
					
				
					 10 changed files with 185 additions and 30 deletions
				
			
		|  | @ -85,6 +85,7 @@ add_library(video_core STATIC | ||||||
|     gpu.h |     gpu.h | ||||||
|     gpu_thread.cpp |     gpu_thread.cpp | ||||||
|     gpu_thread.h |     gpu_thread.h | ||||||
|  |     invalidation_accumulator.h | ||||||
|     memory_manager.cpp |     memory_manager.cpp | ||||||
|     memory_manager.h |     memory_manager.h | ||||||
|     precompiled_headers.h |     precompiled_headers.h | ||||||
|  |  | ||||||
|  | @ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) { | ||||||
|                                        regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, |                                        regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, | ||||||
|                                        x_elements, regs.line_count, regs.dest.BlockHeight(), |                                        x_elements, regs.line_count, regs.dest.BlockHeight(), | ||||||
|                                        regs.dest.BlockDepth(), regs.line_length_in); |                                        regs.dest.BlockDepth(), regs.line_length_in); | ||||||
|         memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); |         memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -485,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Maxwell3D::ProcessQueryGet() { | void Maxwell3D::ProcessQueryGet() { | ||||||
|     // TODO(Subv): Support the other query units.
 |  | ||||||
|     if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) { |  | ||||||
|         LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented"); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     switch (regs.report_semaphore.query.operation) { |     switch (regs.report_semaphore.query.operation) { | ||||||
|     case Regs::ReportSemaphore::Operation::Release: |     case Regs::ReportSemaphore::Operation::Release: | ||||||
|         if (regs.report_semaphore.query.short_query != 0) { |         if (regs.report_semaphore.query.short_query != 0) { | ||||||
|  | @ -649,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) { | ||||||
| 
 | 
 | ||||||
|     const GPUVAddr address{buffer_address + regs.const_buffer.offset}; |     const GPUVAddr address{buffer_address + regs.const_buffer.offset}; | ||||||
|     const size_t copy_size = amount * sizeof(u32); |     const size_t copy_size = amount * sizeof(u32); | ||||||
|     memory_manager.WriteBlock(address, start_base, copy_size); |     memory_manager.WriteBlockCached(address, start_base, copy_size); | ||||||
| 
 | 
 | ||||||
|     // Increment the current buffer position.
 |     // Increment the current buffer position.
 | ||||||
|     regs.const_buffer.offset += static_cast<u32>(copy_size); |     regs.const_buffer.offset += static_cast<u32>(copy_size); | ||||||
|  |  | ||||||
|  | @ -69,7 +69,7 @@ void MaxwellDMA::Launch() { | ||||||
|     if (launch.multi_line_enable) { |     if (launch.multi_line_enable) { | ||||||
|         const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; |         const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; | ||||||
|         const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; |         const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; | ||||||
| 
 |         memory_manager.FlushCaching(); | ||||||
|         if (!is_src_pitch && !is_dst_pitch) { |         if (!is_src_pitch && !is_dst_pitch) { | ||||||
|             // If both the source and the destination are in block layout, assert.
 |             // If both the source and the destination are in block layout, assert.
 | ||||||
|             CopyBlockLinearToBlockLinear(); |             CopyBlockLinearToBlockLinear(); | ||||||
|  | @ -104,6 +104,7 @@ void MaxwellDMA::Launch() { | ||||||
|                                             reinterpret_cast<u8*>(tmp_buffer.data()), |                                             reinterpret_cast<u8*>(tmp_buffer.data()), | ||||||
|                                             regs.line_length_in * sizeof(u32)); |                                             regs.line_length_in * sizeof(u32)); | ||||||
|         } else { |         } else { | ||||||
|  |             memory_manager.FlushCaching(); | ||||||
|             const auto convert_linear_2_blocklinear_addr = [](u64 address) { |             const auto convert_linear_2_blocklinear_addr = [](u64 address) { | ||||||
|                 return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | |                 return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | | ||||||
|                        ((address & 0x180) >> 1) | ((address & 0x20) << 3); |                        ((address & 0x180) >> 1) | ((address & 0x20) << 3); | ||||||
|  | @ -121,7 +122,7 @@ void MaxwellDMA::Launch() { | ||||||
|                     memory_manager.ReadBlockUnsafe( |                     memory_manager.ReadBlockUnsafe( | ||||||
|                         convert_linear_2_blocklinear_addr(regs.offset_in + offset), |                         convert_linear_2_blocklinear_addr(regs.offset_in + offset), | ||||||
|                         tmp_buffer.data(), tmp_buffer.size()); |                         tmp_buffer.data(), tmp_buffer.size()); | ||||||
|                     memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(), |                     memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), | ||||||
|                                               tmp_buffer.size()); |                                               tmp_buffer.size()); | ||||||
|                 } |                 } | ||||||
|             } else if (is_src_pitch && !is_dst_pitch) { |             } else if (is_src_pitch && !is_dst_pitch) { | ||||||
|  | @ -132,7 +133,7 @@ void MaxwellDMA::Launch() { | ||||||
|                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | ||||||
|                     memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), |                     memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), | ||||||
|                                                    tmp_buffer.size()); |                                                    tmp_buffer.size()); | ||||||
|                     memory_manager.WriteBlock( |                     memory_manager.WriteBlockCached( | ||||||
|                         convert_linear_2_blocklinear_addr(regs.offset_out + offset), |                         convert_linear_2_blocklinear_addr(regs.offset_out + offset), | ||||||
|                         tmp_buffer.data(), tmp_buffer.size()); |                         tmp_buffer.data(), tmp_buffer.size()); | ||||||
|                 } |                 } | ||||||
|  | @ -141,7 +142,7 @@ void MaxwellDMA::Launch() { | ||||||
|                     std::vector<u8> tmp_buffer(regs.line_length_in); |                     std::vector<u8> tmp_buffer(regs.line_length_in); | ||||||
|                     memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), |                     memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), | ||||||
|                                                    regs.line_length_in); |                                                    regs.line_length_in); | ||||||
|                     memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), |                     memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), | ||||||
|                                               regs.line_length_in); |                                               regs.line_length_in); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  | @ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | ||||||
|                      src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |                      src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | ||||||
|                      regs.pitch_out); |                      regs.pitch_out); | ||||||
| 
 | 
 | ||||||
|     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::CopyPitchToBlockLinear() { | void MaxwellDMA::CopyPitchToBlockLinear() { | ||||||
|  | @ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | ||||||
|                    dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |                    dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | ||||||
|                    regs.pitch_in); |                    regs.pitch_in); | ||||||
| 
 | 
 | ||||||
|     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::FastCopyBlockLinearToPitch() { | void MaxwellDMA::FastCopyBlockLinearToPitch() { | ||||||
|  | @ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { | ||||||
|                      regs.src_params.block_size.height, regs.src_params.block_size.depth, |                      regs.src_params.block_size.height, regs.src_params.block_size.depth, | ||||||
|                      regs.pitch_out); |                      regs.pitch_out); | ||||||
| 
 | 
 | ||||||
|     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::CopyBlockLinearToBlockLinear() { | void MaxwellDMA::CopyBlockLinearToBlockLinear() { | ||||||
|  | @ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { | ||||||
|                    dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, |                    dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, | ||||||
|                    dst.block_size.height, dst.block_size.depth, pitch); |                    dst.block_size.height, dst.block_size.depth, pitch); | ||||||
| 
 | 
 | ||||||
|     memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::ReleaseSemaphore() { | void MaxwellDMA::ReleaseSemaphore() { | ||||||
|  |  | ||||||
							
								
								
									
										78
									
								
								src/video_core/invalidation_accumulator.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								src/video_core/invalidation_accumulator.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,78 @@ | ||||||
|  | // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
|  | #include "common/common_types.h" | ||||||
|  | 
 | ||||||
|  | namespace VideoCommon { | ||||||
|  | 
 | ||||||
|  | class InvalidationAccumulator { | ||||||
|  | public: | ||||||
|  |     InvalidationAccumulator() = default; | ||||||
|  |     ~InvalidationAccumulator() = default; | ||||||
|  | 
 | ||||||
|  |     void Add(GPUVAddr address, size_t size) { | ||||||
|  |         const auto reset_values = [&]() { | ||||||
|  |             if (has_collected) { | ||||||
|  |                 buffer.emplace_back(start_address, accumulated_size); | ||||||
|  |             } | ||||||
|  |             start_address = address; | ||||||
|  |             accumulated_size = size; | ||||||
|  |             last_collection = start_address + size; | ||||||
|  |         }; | ||||||
|  |         if (address >= start_address && address + size <= last_collection) [[likely]] { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         size = (address + size + atomicy_side_mask) & atomicy_mask - address; | ||||||
|  |         address = address & atomicy_mask; | ||||||
|  |         if (!has_collected) [[unlikely]] { | ||||||
|  |             reset_values(); | ||||||
|  |             has_collected = true; | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         if (address != last_collection) [[unlikely]] { | ||||||
|  |             reset_values(); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         accumulated_size += size; | ||||||
|  |         last_collection += size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Clear() { | ||||||
|  |         buffer.clear(); | ||||||
|  |         start_address = 0; | ||||||
|  |         last_collection = 0; | ||||||
|  |         has_collected = false; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool AnyAccumulated() const { | ||||||
|  |         return has_collected; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     template <typename Func> | ||||||
|  |     void Callback(Func&& func) { | ||||||
|  |         if (!has_collected) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         buffer.emplace_back(start_address, accumulated_size); | ||||||
|  |         for (auto& [address, size] : buffer) { | ||||||
|  |             func(address, size); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     static constexpr size_t atomicy_bits = 5; | ||||||
|  |     static constexpr size_t atomicy_size = 1ULL << atomicy_bits; | ||||||
|  |     static constexpr size_t atomicy_side_mask = atomicy_size - 1; | ||||||
|  |     static constexpr size_t atomicy_mask = ~atomicy_side_mask; | ||||||
|  |     GPUVAddr start_address{}; | ||||||
|  |     GPUVAddr last_collection{}; | ||||||
|  |     size_t accumulated_size{}; | ||||||
|  |     bool has_collected{}; | ||||||
|  |     std::vector<std::pair<VAddr, size_t>> buffer; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace VideoCommon
 | ||||||
|  | @ -11,6 +11,7 @@ | ||||||
| #include "core/hle/kernel/k_page_table.h" | #include "core/hle/kernel/k_page_table.h" | ||||||
| #include "core/hle/kernel/k_process.h" | #include "core/hle/kernel/k_process.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
|  | #include "video_core/invalidation_accumulator.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
|  | @ -26,7 +27,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 | ||||||
|       entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, |       entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, | ||||||
|                                            page_bits != big_page_bits ? page_bits : 0}, |                                            page_bits != big_page_bits ? page_bits : 0}, | ||||||
|       kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( |       kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( | ||||||
|                                       1, std::memory_order_acq_rel)} { |                                       1, std::memory_order_acq_rel)}, | ||||||
|  |       accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} { | ||||||
|     address_space_size = 1ULL << address_space_bits; |     address_space_size = 1ULL << address_space_bits; | ||||||
|     page_size = 1ULL << page_bits; |     page_size = 1ULL << page_bits; | ||||||
|     page_mask = page_size - 1ULL; |     page_mask = page_size - 1ULL; | ||||||
|  | @ -185,15 +187,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | ||||||
|     if (size == 0) { |     if (size == 0) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); |     GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash); | ||||||
| 
 | 
 | ||||||
|     for (const auto& [map_addr, map_size] : submapped_ranges) { |     for (const auto& [map_addr, map_size] : page_stash) { | ||||||
|         // Flush and invalidate through the GPU interface, to be asynchronous if possible.
 |         rasterizer->UnmapMemory(map_addr, map_size); | ||||||
|         const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr); |  | ||||||
|         ASSERT(cpu_addr); |  | ||||||
| 
 |  | ||||||
|         rasterizer->UnmapMemory(*cpu_addr, map_size); |  | ||||||
|     } |     } | ||||||
|  |     page_stash.clear(); | ||||||
| 
 | 
 | ||||||
|     BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |     BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); | ||||||
|     PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |     PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); | ||||||
|  | @ -454,6 +453,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf | ||||||
|     WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); |     WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, | ||||||
|  |                                      std::size_t size) { | ||||||
|  |     WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); | ||||||
|  |     accumulator->Add(gpu_dest_addr, size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, | void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, | ||||||
|                                 VideoCommon::CacheType which) const { |                                 VideoCommon::CacheType which) const { | ||||||
|     auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, |     auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, | ||||||
|  | @ -663,7 +668,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons | ||||||
| std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | ||||||
|     GPUVAddr gpu_addr, std::size_t size) const { |     GPUVAddr gpu_addr, std::size_t size) const { | ||||||
|     std::vector<std::pair<GPUVAddr, std::size_t>> result{}; |     std::vector<std::pair<GPUVAddr, std::size_t>> result{}; | ||||||
|     std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; |     GetSubmappedRangeImpl<true>(gpu_addr, size, result); | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template <bool is_gpu_address> | ||||||
|  | void MemoryManager::GetSubmappedRangeImpl( | ||||||
|  |     GPUVAddr gpu_addr, std::size_t size, | ||||||
|  |     std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& | ||||||
|  |         result) const { | ||||||
|  |     std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> | ||||||
|  |         last_segment{}; | ||||||
|     std::optional<VAddr> old_page_addr{}; |     std::optional<VAddr> old_page_addr{}; | ||||||
|     const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, |     const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, | ||||||
|                                                 [[maybe_unused]] std::size_t offset, |                                                 [[maybe_unused]] std::size_t offset, | ||||||
|  | @ -685,8 +700,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | ||||||
|         } |         } | ||||||
|         old_page_addr = {cpu_addr_base + copy_amount}; |         old_page_addr = {cpu_addr_base + copy_amount}; | ||||||
|         if (!last_segment) { |         if (!last_segment) { | ||||||
|  |             if constexpr (is_gpu_address) { | ||||||
|                 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; |                 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; | ||||||
|                 last_segment = {new_base_addr, copy_amount}; |                 last_segment = {new_base_addr, copy_amount}; | ||||||
|  |             } else { | ||||||
|  |                 last_segment = {cpu_addr_base, copy_amount}; | ||||||
|  |             } | ||||||
|         } else { |         } else { | ||||||
|             last_segment->second += copy_amount; |             last_segment->second += copy_amount; | ||||||
|         } |         } | ||||||
|  | @ -703,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | ||||||
|         } |         } | ||||||
|         old_page_addr = {cpu_addr_base + copy_amount}; |         old_page_addr = {cpu_addr_base + copy_amount}; | ||||||
|         if (!last_segment) { |         if (!last_segment) { | ||||||
|  |             if constexpr (is_gpu_address) { | ||||||
|                 const GPUVAddr new_base_addr = (page_index << page_bits) + offset; |                 const GPUVAddr new_base_addr = (page_index << page_bits) + offset; | ||||||
|                 last_segment = {new_base_addr, copy_amount}; |                 last_segment = {new_base_addr, copy_amount}; | ||||||
|  |             } else { | ||||||
|  |                 last_segment = {cpu_addr_base, copy_amount}; | ||||||
|  |             } | ||||||
|         } else { |         } else { | ||||||
|             last_segment->second += copy_amount; |             last_segment->second += copy_amount; | ||||||
|         } |         } | ||||||
|  | @ -715,7 +738,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | ||||||
|     }; |     }; | ||||||
|     MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); |     MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); | ||||||
|     split(0, 0, 0); |     split(0, 0, 0); | ||||||
|     return result; | } | ||||||
|  | 
 | ||||||
|  | void MemoryManager::FlushCaching() { | ||||||
|  |     if (!accumulator->AnyAccumulated()) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     accumulator->Callback([this](GPUVAddr addr, size_t size) { | ||||||
|  |         GetSubmappedRangeImpl<false>(addr, size, page_stash); | ||||||
|  |     }); | ||||||
|  |     rasterizer->InnerInvalidation(page_stash); | ||||||
|  |     page_stash.clear(); | ||||||
|  |     accumulator->Clear(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace Tegra
 | } // namespace Tegra
 | ||||||
|  |  | ||||||
|  | @ -19,6 +19,10 @@ namespace VideoCore { | ||||||
| class RasterizerInterface; | class RasterizerInterface; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | namespace VideoCommon { | ||||||
|  | class InvalidationAccumulator; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
| class DeviceMemory; | class DeviceMemory; | ||||||
| namespace Memory { | namespace Memory { | ||||||
|  | @ -80,6 +84,7 @@ public: | ||||||
|      */ |      */ | ||||||
|     void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; |     void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; | ||||||
|     void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); |     void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); | ||||||
|  |     void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); | ||||||
| 
 | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * Checks if a gpu region can be simply read with a pointer. |      * Checks if a gpu region can be simply read with a pointer. | ||||||
|  | @ -129,6 +134,8 @@ public: | ||||||
|     size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, |     size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, | ||||||
|                                size_t max_size = std::numeric_limits<size_t>::max()) const; |                                size_t max_size = std::numeric_limits<size_t>::max()) const; | ||||||
| 
 | 
 | ||||||
|  |     void FlushCaching(); | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> |     template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | ||||||
|     inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, |     inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, | ||||||
|  | @ -154,6 +161,12 @@ private: | ||||||
|     inline bool IsBigPageContinous(size_t big_page_index) const; |     inline bool IsBigPageContinous(size_t big_page_index) const; | ||||||
|     inline void SetBigPageContinous(size_t big_page_index, bool value); |     inline void SetBigPageContinous(size_t big_page_index, bool value); | ||||||
| 
 | 
 | ||||||
|  |     template <bool is_gpu_address> | ||||||
|  |     void GetSubmappedRangeImpl( | ||||||
|  |         GPUVAddr gpu_addr, std::size_t size, | ||||||
|  |         std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& | ||||||
|  |             result) const; | ||||||
|  | 
 | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|     Core::Memory::Memory& memory; |     Core::Memory::Memory& memory; | ||||||
|     Core::DeviceMemory& device_memory; |     Core::DeviceMemory& device_memory; | ||||||
|  | @ -201,10 +214,12 @@ private: | ||||||
|     Common::VirtualBuffer<u32> big_page_table_cpu; |     Common::VirtualBuffer<u32> big_page_table_cpu; | ||||||
| 
 | 
 | ||||||
|     std::vector<u64> big_page_continous; |     std::vector<u64> big_page_continous; | ||||||
|  |     std::vector<std::pair<VAddr, std::size_t>> page_stash{}; | ||||||
| 
 | 
 | ||||||
|     constexpr static size_t continous_bits = 64; |     constexpr static size_t continous_bits = 64; | ||||||
| 
 | 
 | ||||||
|     const size_t unique_identifier; |     const size_t unique_identifier; | ||||||
|  |     std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; | ||||||
| 
 | 
 | ||||||
|     static std::atomic<size_t> unique_identifier_generator; |     static std::atomic<size_t> unique_identifier_generator; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -6,6 +6,7 @@ | ||||||
| #include <functional> | #include <functional> | ||||||
| #include <optional> | #include <optional> | ||||||
| #include <span> | #include <span> | ||||||
|  | #include <utility> | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/polyfill_thread.h" | #include "common/polyfill_thread.h" | ||||||
| #include "video_core/cache_types.h" | #include "video_core/cache_types.h" | ||||||
|  | @ -95,6 +96,12 @@ public: | ||||||
|     virtual void InvalidateRegion(VAddr addr, u64 size, |     virtual void InvalidateRegion(VAddr addr, u64 size, | ||||||
|                                   VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |                                   VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | ||||||
| 
 | 
 | ||||||
|  |     virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | ||||||
|  |         for (const auto [cpu_addr, size] : sequences) { | ||||||
|  |             InvalidateRegion(cpu_addr, size); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region are desync with guest
 |     /// Notify rasterizer that any caches of the specified region are desync with guest
 | ||||||
|     virtual void OnCPUWrite(VAddr addr, u64 size) = 0; |     virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | ||||||
| 
 | 
 | ||||||
|     SCOPE_EXIT({ gpu.TickWork(); }); |     SCOPE_EXIT({ gpu.TickWork(); }); | ||||||
|     FlushWork(); |     FlushWork(); | ||||||
|  |     gpu_memory->FlushCaching(); | ||||||
| 
 | 
 | ||||||
|     query_cache.UpdateCounters(); |     query_cache.UpdateCounters(); | ||||||
| 
 | 
 | ||||||
|  | @ -393,6 +394,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::DispatchCompute() { | void RasterizerVulkan::DispatchCompute() { | ||||||
|     FlushWork(); |     FlushWork(); | ||||||
|  |     gpu_memory->FlushCaching(); | ||||||
| 
 | 
 | ||||||
|     ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; |     ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; | ||||||
|     if (!pipeline) { |     if (!pipeline) { | ||||||
|  | @ -481,6 +483,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | ||||||
|  |     { | ||||||
|  |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|  |         for (const auto [addr, size] : sequences) { | ||||||
|  |             texture_cache.WriteMemory(addr, size); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     { | ||||||
|  |         std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|  |         for (const auto [addr, size] : sequences) { | ||||||
|  |             buffer_cache.WriteMemory(addr, size); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     { | ||||||
|  |         for (const auto [addr, size] : sequences) { | ||||||
|  |             query_cache.InvalidateRegion(addr, size); | ||||||
|  |             pipeline_cache.InvalidateRegion(addr, size); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return; |         return; | ||||||
|  |  | ||||||
|  | @ -79,6 +79,7 @@ public: | ||||||
|                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void InvalidateRegion(VAddr addr, u64 size, |     void InvalidateRegion(VAddr addr, u64 size, | ||||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|  |     void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; | ||||||
|     void OnCPUWrite(VAddr addr, u64 size) override; |     void OnCPUWrite(VAddr addr, u64 size) override; | ||||||
|     void InvalidateGPUCache() override; |     void InvalidateGPUCache() override; | ||||||
|     void UnmapMemory(VAddr addr, u64 size) override; |     void UnmapMemory(VAddr addr, u64 size) override; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow