forked from eden-emu/eden
		
	SMMU: Initial adaptation to video_core.
This commit is contained in:
		
							parent
							
								
									d0329a2c00
								
							
						
					
					
						commit
						9db159da71
					
				
					 79 changed files with 1262 additions and 1263 deletions
				
			
		|  | @ -9,6 +9,7 @@ | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/core_timing.h" | #include "core/core_timing.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
|  | #include "core/guest_memory.h" | ||||||
| 
 | 
 | ||||||
| #include "core/hle/kernel/k_process.h" | #include "core/hle/kernel/k_process.h" | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -10,6 +10,7 @@ | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "common/scratch_buffer.h" | #include "common/scratch_buffer.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
|  | #include "core/guest_memory.h" | ||||||
| 
 | 
 | ||||||
| namespace AudioCore::Renderer { | namespace AudioCore::Renderer { | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -37,6 +37,8 @@ add_library(core STATIC | ||||||
|     debugger/gdbstub_arch.h |     debugger/gdbstub_arch.h | ||||||
|     debugger/gdbstub.cpp |     debugger/gdbstub.cpp | ||||||
|     debugger/gdbstub.h |     debugger/gdbstub.h | ||||||
|  |     device_memory_manager.h | ||||||
|  |     device_memory_manager.inc | ||||||
|     device_memory.cpp |     device_memory.cpp | ||||||
|     device_memory.h |     device_memory.h | ||||||
|     file_sys/fssystem/fs_i_storage.h |     file_sys/fssystem/fs_i_storage.h | ||||||
|  |  | ||||||
|  | @ -651,7 +651,7 @@ size_t System::GetCurrentHostThreadID() const { | ||||||
|     return impl->kernel.GetCurrentHostThreadID(); |     return impl->kernel.GetCurrentHostThreadID(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { | void System::GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback) { | ||||||
|     return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); |     return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -224,7 +224,7 @@ public: | ||||||
|     /// Prepare the core emulation for a reschedule
 |     /// Prepare the core emulation for a reschedule
 | ||||||
|     void PrepareReschedule(u32 core_index); |     void PrepareReschedule(u32 core_index); | ||||||
| 
 | 
 | ||||||
|     void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); |     void GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] size_t GetCurrentHostThreadID() const; |     [[nodiscard]] size_t GetCurrentHostThreadID() const; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -3,10 +3,11 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
| #include <deque> |  | ||||||
| #include <memory> |  | ||||||
| #include <array> | #include <array> | ||||||
| #include <atomic> | #include <atomic> | ||||||
|  | #include <deque> | ||||||
|  | #include <memory> | ||||||
|  | #include <mutex> | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/virtual_buffer.h" | #include "common/virtual_buffer.h" | ||||||
|  | @ -48,26 +49,54 @@ public: | ||||||
|     template <typename T> |     template <typename T> | ||||||
|     const T* GetPointer(DAddr address) const; |     const T* GetPointer(DAddr address) const; | ||||||
| 
 | 
 | ||||||
|  |     DAddr GetAddressFromPAddr(PAddr address) const { | ||||||
|  |         DAddr subbits = static_cast<DAddr>(address & page_mask); | ||||||
|  |         return (static_cast<DAddr>(compressed_device_addr[(address >> page_bits)]) << page_bits) + subbits; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const { | ||||||
|  |         PAddr subbits = static_cast<PAddr>(address & page_mask); | ||||||
|  |         auto paddr = compressed_physical_ptr[(address >> page_bits)]; | ||||||
|  |         if (paddr == 0) { | ||||||
|  |             return 0; | ||||||
|  |         } | ||||||
|  |         return (static_cast<PAddr>(paddr - 1) << page_bits) + subbits; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     template <typename T> |     template <typename T> | ||||||
|     void Write(DAddr address, T value); |     void Write(DAddr address, T value); | ||||||
| 
 | 
 | ||||||
|     template <typename T> |     template <typename T> | ||||||
|     T Read(DAddr address) const; |     T Read(DAddr address) const; | ||||||
| 
 | 
 | ||||||
|  |     const u8* GetSpan(const DAddr src_addr, const std::size_t size) const { | ||||||
|  |         return nullptr; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u8* GetSpan(const DAddr src_addr, const std::size_t size) { | ||||||
|  |         return nullptr; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     void ReadBlock(DAddr address, void* dest_pointer, size_t size); |     void ReadBlock(DAddr address, void* dest_pointer, size_t size); | ||||||
|     void WriteBlock(DAddr address, void* src_pointer, size_t size); |     void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size); | ||||||
|  |     void WriteBlock(DAddr address, const void* src_pointer, size_t size); | ||||||
|  |     void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size); | ||||||
| 
 | 
 | ||||||
|     size_t RegisterProcess(Memory::Memory* memory); |     size_t RegisterProcess(Memory::Memory* memory); | ||||||
|     void UnregisterProcess(size_t id); |     void UnregisterProcess(size_t id); | ||||||
| 
 | 
 | ||||||
|     void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); |     void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); | ||||||
| 
 | 
 | ||||||
|  |     static constexpr size_t AS_BITS = Traits::device_virtual_bits; | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     static constexpr bool supports_pinning = Traits::supports_pinning; |     static constexpr bool supports_pinning = Traits::supports_pinning; | ||||||
|     static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; |     static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; | ||||||
|     static constexpr size_t device_as_size = 1ULL << device_virtual_bits; |     static constexpr size_t device_as_size = 1ULL << device_virtual_bits; | ||||||
|     static constexpr size_t physical_max_bits = 33; |     static constexpr size_t physical_max_bits = 33; | ||||||
|     static constexpr size_t page_bits = 12; |     static constexpr size_t page_bits = 12; | ||||||
|  |     static constexpr size_t page_size = 1ULL << page_bits; | ||||||
|  |     static constexpr size_t page_mask = page_size - 1ULL; | ||||||
|     static constexpr u32 physical_address_base = 1U << page_bits; |     static constexpr u32 physical_address_base = 1U << page_bits; | ||||||
| 
 | 
 | ||||||
|     template <typename T> |     template <typename T> | ||||||
|  | @ -136,11 +165,15 @@ private: | ||||||
|     private: |     private: | ||||||
|         std::array<std::atomic_uint16_t, subentries> values{}; |         std::array<std::atomic_uint16_t, subentries> values{}; | ||||||
|     }; |     }; | ||||||
|     static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), "CounterEntry should be 8 bytes!"); |     static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), | ||||||
|  |                   "CounterEntry should be 8 bytes!"); | ||||||
| 
 | 
 | ||||||
|     static constexpr size_t num_counter_entries = (1ULL << (device_virtual_bits - page_bits)) / subentries; |     static constexpr size_t num_counter_entries = | ||||||
|  |         (1ULL << (device_virtual_bits - page_bits)) / subentries; | ||||||
|     using CachedPages = std::array<CounterEntry, num_counter_entries>; |     using CachedPages = std::array<CounterEntry, num_counter_entries>; | ||||||
|     std::unique_ptr<CachedPages> cached_pages; |     std::unique_ptr<CachedPages> cached_pages; | ||||||
|  |     std::mutex counter_guard; | ||||||
|  |     std::mutex mapping_guard; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Core
 | } // namespace Core
 | ||||||
|  | @ -105,7 +105,8 @@ template <typename Traits> | ||||||
| DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_) | DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_) | ||||||
|     : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())}, |     : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())}, | ||||||
|       interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), |       interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), | ||||||
|       compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { |       compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)), | ||||||
|  |       cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { | ||||||
|     impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); |     impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); | ||||||
|     cached_pages = std::make_unique<CachedPages>(); |     cached_pages = std::make_unique<CachedPages>(); | ||||||
| } | } | ||||||
|  | @ -144,10 +145,10 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size | ||||||
|     Core::Memory::Memory* process_memory = registered_processes[process_id]; |     Core::Memory::Memory* process_memory = registered_processes[process_id]; | ||||||
|     size_t start_page_d = address >> Memory::YUZU_PAGEBITS; |     size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | ||||||
|     size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; |     size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | ||||||
|     std::atomic_thread_fence(std::memory_order_acquire); |     std::scoped_lock lk(mapping_guard); | ||||||
|     for (size_t i = 0; i < num_pages; i++) { |     for (size_t i = 0; i < num_pages; i++) { | ||||||
|         const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; |         const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; | ||||||
|         auto* ptr = process_memory->GetPointer(Common::ProcessAddress(new_vaddress)); |         auto* ptr = process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress)); | ||||||
|         if (ptr == nullptr) [[unlikely]] { |         if (ptr == nullptr) [[unlikely]] { | ||||||
|             compressed_physical_ptr[start_page_d + i] = 0; |             compressed_physical_ptr[start_page_d + i] = 0; | ||||||
|             continue; |             continue; | ||||||
|  | @ -157,14 +158,14 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size | ||||||
|         compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); |         compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); | ||||||
|         InsertCPUBacking(start_page_d + i, new_vaddress, process_id); |         InsertCPUBacking(start_page_d + i, new_vaddress, process_id); | ||||||
|     } |     } | ||||||
|     std::atomic_thread_fence(std::memory_order_release); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename Traits> | template <typename Traits> | ||||||
| void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | ||||||
|     size_t start_page_d = address >> Memory::YUZU_PAGEBITS; |     size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | ||||||
|     size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; |     size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | ||||||
|     std::atomic_thread_fence(std::memory_order_acquire); |     interface->InvalidateRegion(address, size); | ||||||
|  |     std::scoped_lock lk(mapping_guard); | ||||||
|     for (size_t i = 0; i < num_pages; i++) { |     for (size_t i = 0; i < num_pages; i++) { | ||||||
|         auto phys_addr = compressed_physical_ptr[start_page_d + i]; |         auto phys_addr = compressed_physical_ptr[start_page_d + i]; | ||||||
|         compressed_physical_ptr[start_page_d + i] = 0; |         compressed_physical_ptr[start_page_d + i] = 0; | ||||||
|  | @ -173,7 +174,6 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | ||||||
|             compressed_device_addr[phys_addr - 1] = 0; |             compressed_device_addr[phys_addr - 1] = 0; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     std::atomic_thread_fence(std::memory_order_release); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename Traits> | template <typename Traits> | ||||||
|  | @ -256,6 +256,7 @@ void DeviceMemoryManager<Traits>::WalkBlock(DAddr addr, std::size_t size, auto o | ||||||
| 
 | 
 | ||||||
| template <typename Traits> | template <typename Traits> | ||||||
| void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) { | void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) { | ||||||
|  |     interface->FlushRegion(address, size); | ||||||
|     WalkBlock( |     WalkBlock( | ||||||
|         address, size, |         address, size, | ||||||
|         [&](size_t copy_amount, DAddr current_vaddr) { |         [&](size_t copy_amount, DAddr current_vaddr) { | ||||||
|  | @ -274,7 +275,7 @@ void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, s | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename Traits> | template <typename Traits> | ||||||
| void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, void* src_pointer, size_t size) { | void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, const void* src_pointer, size_t size) { | ||||||
|     WalkBlock( |     WalkBlock( | ||||||
|         address, size, |         address, size, | ||||||
|         [&](size_t copy_amount, DAddr current_vaddr) { |         [&](size_t copy_amount, DAddr current_vaddr) { | ||||||
|  | @ -287,7 +288,46 @@ void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, void* src_pointer, s | ||||||
|             std::memcpy(dst_ptr, src_pointer, copy_amount); |             std::memcpy(dst_ptr, src_pointer, copy_amount); | ||||||
|         }, |         }, | ||||||
|         [&](const std::size_t copy_amount) { |         [&](const std::size_t copy_amount) { | ||||||
|             src_pointer = static_cast<u8*>(src_pointer) + copy_amount; |             src_pointer = static_cast<const u8*>(src_pointer) + copy_amount; | ||||||
|  |         }); | ||||||
|  |     interface->InvalidateRegion(address, size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template <typename Traits> | ||||||
|  | void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) { | ||||||
|  |     WalkBlock( | ||||||
|  |         address, size, | ||||||
|  |         [&](size_t copy_amount, DAddr current_vaddr) { | ||||||
|  |             LOG_ERROR( | ||||||
|  |                 HW_Memory, | ||||||
|  |                 "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||||||
|  |                 current_vaddr, address, size); | ||||||
|  |             std::memset(dest_pointer, 0, copy_amount); | ||||||
|  |         }, | ||||||
|  |         [&](size_t copy_amount, const u8* const src_ptr) { | ||||||
|  |             std::memcpy(dest_pointer, src_ptr, copy_amount); | ||||||
|  |         }, | ||||||
|  |         [&](const std::size_t copy_amount) { | ||||||
|  |             dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount; | ||||||
|  |         }); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template <typename Traits> | ||||||
|  | void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer, | ||||||
|  |                                                    size_t size) { | ||||||
|  |     WalkBlock( | ||||||
|  |         address, size, | ||||||
|  |         [&](size_t copy_amount, DAddr current_vaddr) { | ||||||
|  |             LOG_ERROR( | ||||||
|  |                 HW_Memory, | ||||||
|  |                 "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||||||
|  |                 current_vaddr, address, size); | ||||||
|  |         }, | ||||||
|  |         [&](size_t copy_amount, u8* const dst_ptr) { | ||||||
|  |             std::memcpy(dst_ptr, src_pointer, copy_amount); | ||||||
|  |         }, | ||||||
|  |         [&](const std::size_t copy_amount) { | ||||||
|  |             src_pointer = static_cast<const u8*>(src_pointer) + copy_amount; | ||||||
|         }); |         }); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -313,6 +353,18 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(size_t id) { | ||||||
| 
 | 
 | ||||||
| template <typename Traits> | template <typename Traits> | ||||||
| void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { | void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { | ||||||
|  |     bool locked = false; | ||||||
|  |     auto lock = [&] { | ||||||
|  |         if (!locked) { | ||||||
|  |             counter_guard.lock(); | ||||||
|  |             locked = true; | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |     SCOPE_EXIT({ | ||||||
|  |         if (locked) { | ||||||
|  |             counter_guard.unlock(); | ||||||
|  |         } | ||||||
|  |     }); | ||||||
|     u64 uncache_begin = 0; |     u64 uncache_begin = 0; | ||||||
|     u64 cache_begin = 0; |     u64 cache_begin = 0; | ||||||
|     u64 uncache_bytes = 0; |     u64 uncache_bytes = 0; | ||||||
|  | @ -347,6 +399,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | ||||||
|             } |             } | ||||||
|             uncache_bytes += Memory::YUZU_PAGESIZE; |             uncache_bytes += Memory::YUZU_PAGESIZE; | ||||||
|         } else if (uncache_bytes > 0) { |         } else if (uncache_bytes > 0) { | ||||||
|  |             lock(); | ||||||
|             MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, |             MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, | ||||||
|                               uncache_bytes, false); |                               uncache_bytes, false); | ||||||
|             uncache_bytes = 0; |             uncache_bytes = 0; | ||||||
|  | @ -357,6 +410,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | ||||||
|             } |             } | ||||||
|             cache_bytes += Memory::YUZU_PAGESIZE; |             cache_bytes += Memory::YUZU_PAGESIZE; | ||||||
|         } else if (cache_bytes > 0) { |         } else if (cache_bytes > 0) { | ||||||
|  |             lock(); | ||||||
|             MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, |             MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||||||
|                               true); |                               true); | ||||||
|             cache_bytes = 0; |             cache_bytes = 0; | ||||||
|  | @ -364,10 +418,12 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | ||||||
|         vpage++; |         vpage++; | ||||||
|     } |     } | ||||||
|     if (uncache_bytes > 0) { |     if (uncache_bytes > 0) { | ||||||
|  |         lock(); | ||||||
|         MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, |         MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, | ||||||
|                           false); |                           false); | ||||||
|     } |     } | ||||||
|     if (cache_bytes > 0) { |     if (cache_bytes > 0) { | ||||||
|  |         lock(); | ||||||
|         MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, |         MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||||||
|                           true); |                           true); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -23,7 +23,7 @@ public: | ||||||
| 
 | 
 | ||||||
|     ~GPUDirtyMemoryManager() = default; |     ~GPUDirtyMemoryManager() = default; | ||||||
| 
 | 
 | ||||||
|     void Collect(VAddr address, size_t size) { |     void Collect(PAddr address, size_t size) { | ||||||
|         TransformAddress t = BuildTransform(address, size); |         TransformAddress t = BuildTransform(address, size); | ||||||
|         TransformAddress tmp, original; |         TransformAddress tmp, original; | ||||||
|         do { |         do { | ||||||
|  | @ -47,7 +47,7 @@ public: | ||||||
|                                                 std::memory_order_relaxed)); |                                                 std::memory_order_relaxed)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void Gather(std::function<void(VAddr, size_t)>& callback) { |     void Gather(std::function<void(PAddr, size_t)>& callback) { | ||||||
|         { |         { | ||||||
|             std::scoped_lock lk(guard); |             std::scoped_lock lk(guard); | ||||||
|             TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); |             TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); | ||||||
|  | @ -65,7 +65,7 @@ public: | ||||||
|                 mask = mask >> empty_bits; |                 mask = mask >> empty_bits; | ||||||
| 
 | 
 | ||||||
|                 const size_t continuous_bits = std::countr_one(mask); |                 const size_t continuous_bits = std::countr_one(mask); | ||||||
|                 callback((static_cast<VAddr>(transform.address) << page_bits) + offset, |                 callback((static_cast<PAddr>(transform.address) << page_bits) + offset, | ||||||
|                          continuous_bits << align_bits); |                          continuous_bits << align_bits); | ||||||
|                 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; |                 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; | ||||||
|                 offset += continuous_bits << align_bits; |                 offset += continuous_bits << align_bits; | ||||||
|  | @ -89,7 +89,7 @@ private: | ||||||
|     constexpr static size_t align_mask = align_size - 1; |     constexpr static size_t align_mask = align_size - 1; | ||||||
|     constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; |     constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; | ||||||
| 
 | 
 | ||||||
|     bool IsValid(VAddr address) { |     bool IsValid(PAddr address) { | ||||||
|         return address < (1ULL << 39); |         return address < (1ULL << 39); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -103,7 +103,7 @@ private: | ||||||
|         return mask; |         return mask; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     TransformAddress BuildTransform(VAddr address, size_t size) { |     TransformAddress BuildTransform(PAddr address, size_t size) { | ||||||
|         const size_t minor_address = address & page_mask; |         const size_t minor_address = address & page_mask; | ||||||
|         const size_t minor_bit = minor_address >> align_bits; |         const size_t minor_bit = minor_address >> align_bits; | ||||||
|         const size_t top_bit = (minor_address + size + align_mask) >> align_bits; |         const size_t top_bit = (minor_address + size + align_mask) >> align_bits; | ||||||
|  |  | ||||||
							
								
								
									
										218
									
								
								src/core/guest_memory.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										218
									
								
								src/core/guest_memory.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,218 @@ | ||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <iterator> | ||||||
|  | #include <memory> | ||||||
|  | #include <optional> | ||||||
|  | #include <span> | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
|  | #include "common/scratch_buffer.h" | ||||||
|  | #include "core/memory.h" | ||||||
|  | 
 | ||||||
|  | namespace Core::Memory { | ||||||
|  | 
 | ||||||
|  | enum GuestMemoryFlags : u32 { | ||||||
|  |     Read = 1 << 0, | ||||||
|  |     Write = 1 << 1, | ||||||
|  |     Safe = 1 << 2, | ||||||
|  |     Cached = 1 << 3, | ||||||
|  | 
 | ||||||
|  |     SafeRead = Read | Safe, | ||||||
|  |     SafeWrite = Write | Safe, | ||||||
|  |     SafeReadWrite = SafeRead | SafeWrite, | ||||||
|  |     SafeReadCachedWrite = SafeReadWrite | Cached, | ||||||
|  | 
 | ||||||
|  |     UnsafeRead = Read, | ||||||
|  |     UnsafeWrite = Write, | ||||||
|  |     UnsafeReadWrite = UnsafeRead | UnsafeWrite, | ||||||
|  |     UnsafeReadCachedWrite = UnsafeReadWrite | Cached, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | namespace { | ||||||
|  | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||||||
|  | class GuestMemory { | ||||||
|  |     using iterator = T*; | ||||||
|  |     using const_iterator = const T*; | ||||||
|  |     using value_type = T; | ||||||
|  |     using element_type = T; | ||||||
|  |     using iterator_category = std::contiguous_iterator_tag; | ||||||
|  | 
 | ||||||
|  | public: | ||||||
|  |     GuestMemory() = delete; | ||||||
|  |     explicit GuestMemory(M& memory, u64 addr, std::size_t size, | ||||||
|  |                          Common::ScratchBuffer<T>* backup = nullptr) | ||||||
|  |         : m_memory{memory}, m_addr{addr}, m_size{size} { | ||||||
|  |         static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); | ||||||
|  |         if constexpr (FLAGS & GuestMemoryFlags::Read) { | ||||||
|  |             Read(addr, size, backup); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     ~GuestMemory() = default; | ||||||
|  | 
 | ||||||
|  |     T* data() noexcept { | ||||||
|  |         return m_data_span.data(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const T* data() const noexcept { | ||||||
|  |         return m_data_span.data(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     size_t size() const noexcept { | ||||||
|  |         return m_size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     size_t size_bytes() const noexcept { | ||||||
|  |         return this->size() * sizeof(T); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] T* begin() noexcept { | ||||||
|  |         return this->data(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] const T* begin() const noexcept { | ||||||
|  |         return this->data(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] T* end() noexcept { | ||||||
|  |         return this->data() + this->size(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     [[nodiscard]] const T* end() const noexcept { | ||||||
|  |         return this->data() + this->size(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     T& operator[](size_t index) noexcept { | ||||||
|  |         return m_data_span[index]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const T& operator[](size_t index) const noexcept { | ||||||
|  |         return m_data_span[index]; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void SetAddressAndSize(u64 addr, std::size_t size) noexcept { | ||||||
|  |         m_addr = addr; | ||||||
|  |         m_size = size; | ||||||
|  |         m_addr_changed = true; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::span<T> Read(u64 addr, std::size_t size, | ||||||
|  |                       Common::ScratchBuffer<T>* backup = nullptr) noexcept { | ||||||
|  |         m_addr = addr; | ||||||
|  |         m_size = size; | ||||||
|  |         if (m_size == 0) { | ||||||
|  |             m_is_data_copy = true; | ||||||
|  |             return {}; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (this->TrySetSpan()) { | ||||||
|  |             if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||||
|  |                 m_memory.FlushRegion(m_addr, this->size_bytes()); | ||||||
|  |             } | ||||||
|  |         } else { | ||||||
|  |             if (backup) { | ||||||
|  |                 backup->resize_destructive(this->size()); | ||||||
|  |                 m_data_span = *backup; | ||||||
|  |             } else { | ||||||
|  |                 m_data_copy.resize(this->size()); | ||||||
|  |                 m_data_span = std::span(m_data_copy); | ||||||
|  |             } | ||||||
|  |             m_is_data_copy = true; | ||||||
|  |             m_span_valid = true; | ||||||
|  |             if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||||
|  |                 m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); | ||||||
|  |             } else { | ||||||
|  |                 m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         return m_data_span; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Write(std::span<T> write_data) noexcept { | ||||||
|  |         if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||||||
|  |             m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); | ||||||
|  |         } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||||
|  |             m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); | ||||||
|  |         } else { | ||||||
|  |             m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool TrySetSpan() noexcept { | ||||||
|  |         if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { | ||||||
|  |             m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; | ||||||
|  |             m_span_valid = true; | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | protected: | ||||||
|  |     bool IsDataCopy() const noexcept { | ||||||
|  |         return m_is_data_copy; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool AddressChanged() const noexcept { | ||||||
|  |         return m_addr_changed; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     M& m_memory; | ||||||
|  |     u64 m_addr{}; | ||||||
|  |     size_t m_size{}; | ||||||
|  |     std::span<T> m_data_span{}; | ||||||
|  |     std::vector<T> m_data_copy{}; | ||||||
|  |     bool m_span_valid{false}; | ||||||
|  |     bool m_is_data_copy{false}; | ||||||
|  |     bool m_addr_changed{false}; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | template <typename M, typename T, GuestMemoryFlags FLAGS> | ||||||
|  | class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { | ||||||
|  | public: | ||||||
|  |     GuestMemoryScoped() = delete; | ||||||
|  |     explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, | ||||||
|  |                                Common::ScratchBuffer<T>* backup = nullptr) | ||||||
|  |         : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { | ||||||
|  |         if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { | ||||||
|  |             if (!this->TrySetSpan()) { | ||||||
|  |                 if (backup) { | ||||||
|  |                     this->m_data_span = *backup; | ||||||
|  |                     this->m_span_valid = true; | ||||||
|  |                     this->m_is_data_copy = true; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     ~GuestMemoryScoped() { | ||||||
|  |         if constexpr (FLAGS & GuestMemoryFlags::Write) { | ||||||
|  |             if (this->size() == 0) [[unlikely]] { | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             if (this->AddressChanged() || this->IsDataCopy()) { | ||||||
|  |                 ASSERT(this->m_span_valid); | ||||||
|  |                 if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||||||
|  |                     this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); | ||||||
|  |                 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||||
|  |                     this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); | ||||||
|  |                 } else { | ||||||
|  |                     this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); | ||||||
|  |                 } | ||||||
|  |             } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || (FLAGS & GuestMemoryFlags::Cached))  { | ||||||
|  |                 this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | } // namespace
 | ||||||
|  | 
 | ||||||
|  | template <typename T, GuestMemoryFlags FLAGS> | ||||||
|  | using CpuGuestMemory = GuestMemory<Core::Memory::Memory, T, FLAGS>; | ||||||
|  | template <typename T, GuestMemoryFlags FLAGS> | ||||||
|  | using CpuGuestMemoryScoped = GuestMemoryScoped<Core::Memory::Memory, T, FLAGS>; | ||||||
|  | 
 | ||||||
|  | } // namespace Tegra::Memory
 | ||||||
|  | @ -22,19 +22,7 @@ | ||||||
| #include "core/hle/service/hle_ipc.h" | #include "core/hle/service/hle_ipc.h" | ||||||
| #include "core/hle/service/ipc_helpers.h" | #include "core/hle/service/ipc_helpers.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| 
 | #include "core/guest_memory.h" | ||||||
| namespace { |  | ||||||
| static thread_local std::array read_buffer_data_a{ |  | ||||||
|     Common::ScratchBuffer<u8>(), |  | ||||||
|     Common::ScratchBuffer<u8>(), |  | ||||||
|     Common::ScratchBuffer<u8>(), |  | ||||||
| }; |  | ||||||
| static thread_local std::array read_buffer_data_x{ |  | ||||||
|     Common::ScratchBuffer<u8>(), |  | ||||||
|     Common::ScratchBuffer<u8>(), |  | ||||||
|     Common::ScratchBuffer<u8>(), |  | ||||||
| }; |  | ||||||
| } // Anonymous namespace
 |  | ||||||
| 
 | 
 | ||||||
| namespace Service { | namespace Service { | ||||||
| 
 | 
 | ||||||
|  | @ -343,48 +331,27 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { | std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { | ||||||
|     static thread_local std::array read_buffer_a{ |     Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|     }; |  | ||||||
| 
 | 
 | ||||||
|     ASSERT_OR_EXECUTE_MSG( |     ASSERT_OR_EXECUTE_MSG( | ||||||
|         BufferDescriptorA().size() > buffer_index, { return {}; }, |         BufferDescriptorA().size() > buffer_index, { return {}; }, | ||||||
|         "BufferDescriptorA invalid buffer_index {}", buffer_index); |         "BufferDescriptorA invalid buffer_index {}", buffer_index); | ||||||
|     auto& read_buffer = read_buffer_a[buffer_index]; |     return gm.Read(BufferDescriptorA()[buffer_index].Address(), | ||||||
|     return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), |                    BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); | ||||||
|                             BufferDescriptorA()[buffer_index].Size(), |  | ||||||
|                             &read_buffer_data_a[buffer_index]); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { | std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { | ||||||
|     static thread_local std::array read_buffer_x{ |     Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|     }; |  | ||||||
| 
 | 
 | ||||||
|     ASSERT_OR_EXECUTE_MSG( |     ASSERT_OR_EXECUTE_MSG( | ||||||
|         BufferDescriptorX().size() > buffer_index, { return {}; }, |         BufferDescriptorX().size() > buffer_index, { return {}; }, | ||||||
|         "BufferDescriptorX invalid buffer_index {}", buffer_index); |         "BufferDescriptorX invalid buffer_index {}", buffer_index); | ||||||
|     auto& read_buffer = read_buffer_x[buffer_index]; |     return gm.Read(BufferDescriptorX()[buffer_index].Address(), | ||||||
|     return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), |                    BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); | ||||||
|                             BufferDescriptorX()[buffer_index].Size(), |  | ||||||
|                             &read_buffer_data_x[buffer_index]); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { | std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { | ||||||
|     static thread_local std::array read_buffer_a{ |     Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|     }; |  | ||||||
|     static thread_local std::array read_buffer_x{ |  | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), |  | ||||||
|     }; |  | ||||||
| 
 | 
 | ||||||
|     const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && |     const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && | ||||||
|                            BufferDescriptorA()[buffer_index].Size()}; |                            BufferDescriptorA()[buffer_index].Size()}; | ||||||
|  | @ -401,18 +368,14 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons | ||||||
|         ASSERT_OR_EXECUTE_MSG( |         ASSERT_OR_EXECUTE_MSG( | ||||||
|             BufferDescriptorA().size() > buffer_index, { return {}; }, |             BufferDescriptorA().size() > buffer_index, { return {}; }, | ||||||
|             "BufferDescriptorA invalid buffer_index {}", buffer_index); |             "BufferDescriptorA invalid buffer_index {}", buffer_index); | ||||||
|         auto& read_buffer = read_buffer_a[buffer_index]; |         return gm.Read(BufferDescriptorA()[buffer_index].Address(), | ||||||
|         return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), |                        BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); | ||||||
|                                 BufferDescriptorA()[buffer_index].Size(), |  | ||||||
|                                 &read_buffer_data_a[buffer_index]); |  | ||||||
|     } else { |     } else { | ||||||
|         ASSERT_OR_EXECUTE_MSG( |         ASSERT_OR_EXECUTE_MSG( | ||||||
|             BufferDescriptorX().size() > buffer_index, { return {}; }, |             BufferDescriptorX().size() > buffer_index, { return {}; }, | ||||||
|             "BufferDescriptorX invalid buffer_index {}", buffer_index); |             "BufferDescriptorX invalid buffer_index {}", buffer_index); | ||||||
|         auto& read_buffer = read_buffer_x[buffer_index]; |         return gm.Read(BufferDescriptorX()[buffer_index].Address(), | ||||||
|         return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), |                        BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); | ||||||
|                                 BufferDescriptorX()[buffer_index].Size(), |  | ||||||
|                                 &read_buffer_data_x[buffer_index]); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -19,8 +19,6 @@ | ||||||
| #include "core/hle/ipc.h" | #include "core/hle/ipc.h" | ||||||
| #include "core/hle/kernel/k_handle_table.h" | #include "core/hle/kernel/k_handle_table.h" | ||||||
| #include "core/hle/kernel/svc_common.h" | #include "core/hle/kernel/svc_common.h" | ||||||
| #include "core/hle/kernel/k_auto_object.h" |  | ||||||
| #include "core/hle/kernel/k_handle_table.h" |  | ||||||
| 
 | 
 | ||||||
| union Result; | union Result; | ||||||
| 
 | 
 | ||||||
|  | @ -377,10 +375,6 @@ public: | ||||||
|         return nullptr; |         return nullptr; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Kernel::KScopedAutoObject<Kernel::KAutoObject> GetObjectFromHandle(u32 handle) { |  | ||||||
|         return GetClientHandleTable().GetObjectForIpc(handle, thread); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] std::shared_ptr<SessionRequestManager> GetManager() const { |     [[nodiscard]] std::shared_ptr<SessionRequestManager> GetManager() const { | ||||||
|         return manager.lock(); |         return manager.lock(); | ||||||
|     } |     } | ||||||
|  | @ -432,6 +426,9 @@ private: | ||||||
| 
 | 
 | ||||||
|     Kernel::KernelCore& kernel; |     Kernel::KernelCore& kernel; | ||||||
|     Core::Memory::Memory& memory; |     Core::Memory::Memory& memory; | ||||||
|  | 
 | ||||||
|  |     mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_a{}; | ||||||
|  |     mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_x{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Service
 | } // namespace Service
 | ||||||
|  |  | ||||||
|  | @ -2,6 +2,8 @@ | ||||||
| // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
 | // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
 | ||||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||||
| 
 | 
 | ||||||
|  | #include <functional> | ||||||
|  | 
 | ||||||
| #include "common/alignment.h" | #include "common/alignment.h" | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
|  | @ -18,6 +20,7 @@ NvMap::Handle::Handle(u64 size_, Id id_) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { | NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { | ||||||
|  |     std::scoped_lock lock(mutex); | ||||||
|     // Handles cannot be allocated twice
 |     // Handles cannot be allocated twice
 | ||||||
|     if (allocated) { |     if (allocated) { | ||||||
|         return NvResult::AccessDenied; |         return NvResult::AccessDenied; | ||||||
|  | @ -78,11 +81,9 @@ void NvMap::UnmapHandle(Handle& handle_description) { | ||||||
| 
 | 
 | ||||||
|     // Free and unmap the handle from the SMMU
 |     // Free and unmap the handle from the SMMU
 | ||||||
|     auto& smmu = host1x.MemoryManager(); |     auto& smmu = host1x.MemoryManager(); | ||||||
|     smmu.Unmap(static_cast<DAddr>(handle_description.pin_virt_address), |     smmu.Unmap(handle_description.d_address, handle_description.aligned_size); | ||||||
|                handle_description.aligned_size); |     smmu.Free(handle_description.d_address, static_cast<size_t>(handle_description.aligned_size)); | ||||||
|     smmu.Free(handle_description.pin_virt_address, |     handle_description.d_address = 0; | ||||||
|               static_cast<size_t>(handle_description.aligned_size)); |  | ||||||
|     handle_description.pin_virt_address = 0; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool NvMap::TryRemoveHandle(const Handle& handle_description) { | bool NvMap::TryRemoveHandle(const Handle& handle_description) { | ||||||
|  | @ -123,41 +124,16 @@ std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| VAddr NvMap::GetHandleAddress(Handle::Id handle) { | DAddr NvMap::GetHandleAddress(Handle::Id handle) { | ||||||
|     std::scoped_lock lock(handles_lock); |     std::scoped_lock lock(handles_lock); | ||||||
|     try { |     try { | ||||||
|         return handles.at(handle)->address; |         return handles.at(handle)->d_address; | ||||||
|     } catch (std::out_of_range&) { |     } catch (std::out_of_range&) { | ||||||
|         return 0; |         return 0; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| NvResult NvMap::AllocateHandle(Handle::Id handle, Handle::Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, size_t session_id) { | DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_area_pin) { | ||||||
|     auto handle_description{GetHandle(handle)}; |  | ||||||
|     if (!handle_description) [[unlikely]] { |  | ||||||
|         return NvResult::BadParameter; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (handle_description->allocated) [[unlikely]] { |  | ||||||
|         return NvResult::InsufficientMemory; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::scoped_lock lock(handle_description->mutex); |  | ||||||
|     NvResult result = handle_description->Alloc(pFlags, pAlign, pKind, pAddress); |  | ||||||
|     if (result != NvResult::Success) { |  | ||||||
|         return result; |  | ||||||
|     } |  | ||||||
|     auto& smmu = host1x.MemoryManager(); |  | ||||||
|     size_t total_size = static_cast<size_t>(handle_description->aligned_size); |  | ||||||
|     handle_description->d_address = smmu.Allocate(total_size); |  | ||||||
|     if (handle_description->d_address == 0) { |  | ||||||
|         return NvResult::InsufficientMemory; |  | ||||||
|     } |  | ||||||
|     smmu.Map(handle_description->d_address, handle_description->address, total_size, session_id); |  | ||||||
|     return NvResult::Success; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| u32 NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id) { |  | ||||||
|     auto handle_description{GetHandle(handle)}; |     auto handle_description{GetHandle(handle)}; | ||||||
|     if (!handle_description) [[unlikely]] { |     if (!handle_description) [[unlikely]] { | ||||||
|         return 0; |         return 0; | ||||||
|  | @ -176,35 +152,38 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id) { | ||||||
|                 handle_description->unmap_queue_entry.reset(); |                 handle_description->unmap_queue_entry.reset(); | ||||||
| 
 | 
 | ||||||
|                 handle_description->pins++; |                 handle_description->pins++; | ||||||
|                 return handle_description->pin_virt_address; |                 return handle_description->d_address; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         using namespace std::placeholders; | ||||||
|         // If not then allocate some space and map it
 |         // If not then allocate some space and map it
 | ||||||
|         DAddr address{}; |         DAddr address{}; | ||||||
|         auto& smmu = host1x.MemoryManager(); |         auto& smmu = host1x.MemoryManager(); | ||||||
|         while ((address = smmu.AllocatePinned( |         auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); | ||||||
|                     static_cast<size_t>(handle_description->aligned_size))) == 0) { |                          //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
 | ||||||
|  |         while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) { | ||||||
|             // Free handles until the allocation succeeds
 |             // Free handles until the allocation succeeds
 | ||||||
|             std::scoped_lock queueLock(unmap_queue_lock); |             std::scoped_lock queueLock(unmap_queue_lock); | ||||||
|             if (auto freeHandleDesc{unmap_queue.front()}) { |             if (auto freeHandleDesc{unmap_queue.front()}) { | ||||||
|                 // Handles in the unmap queue are guaranteed not to be pinned so don't bother
 |                 // Handles in the unmap queue are guaranteed not to be pinned so don't bother
 | ||||||
|                 // checking if they are before unmapping
 |                 // checking if they are before unmapping
 | ||||||
|                 std::scoped_lock freeLock(freeHandleDesc->mutex); |                 std::scoped_lock freeLock(freeHandleDesc->mutex); | ||||||
|                 if (handle_description->pin_virt_address) |                 if (handle_description->d_address) | ||||||
|                     UnmapHandle(*freeHandleDesc); |                     UnmapHandle(*freeHandleDesc); | ||||||
|             } else { |             } else { | ||||||
|                 LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); |                 LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         handle_description->d_address = address; | ||||||
|  | 
 | ||||||
|         smmu.Map(address, handle_description->address, handle_description->aligned_size, |         smmu.Map(address, handle_description->address, handle_description->aligned_size, | ||||||
|                  session_id); |                  session_id); | ||||||
|         handle_description->pin_virt_address = static_cast<u32>(address); |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     handle_description->pins++; |     handle_description->pins++; | ||||||
|     return handle_description->pin_virt_address; |     return handle_description->d_address; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void NvMap::UnpinHandle(Handle::Id handle) { | void NvMap::UnpinHandle(Handle::Id handle) { | ||||||
|  | @ -255,15 +234,10 @@ std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool interna | ||||||
|                 LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); |                 LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); | ||||||
|             } else if (handle_description->dupes == 0) { |             } else if (handle_description->dupes == 0) { | ||||||
|                 // Force unmap the handle
 |                 // Force unmap the handle
 | ||||||
|                 if (handle_description->pin_virt_address) { |                 if (handle_description->d_address) { | ||||||
|                     std::scoped_lock queueLock(unmap_queue_lock); |                     std::scoped_lock queueLock(unmap_queue_lock); | ||||||
|                     UnmapHandle(*handle_description); |                     UnmapHandle(*handle_description); | ||||||
|                 } |                 } | ||||||
|                 if (handle_description->allocated) { |  | ||||||
|                     auto& smmu = host1x.MemoryManager(); |  | ||||||
|                     smmu.Free(handle_description->d_address, handle_description->aligned_size); |  | ||||||
|                     smmu.Unmap(handle_description->d_address, handle_description->aligned_size); |  | ||||||
|                 } |  | ||||||
| 
 | 
 | ||||||
|                 handle_description->pins = 0; |                 handle_description->pins = 0; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|  | @ -48,7 +48,7 @@ public: | ||||||
|         using Id = u32; |         using Id = u32; | ||||||
|         Id id; //!< A globally unique identifier for this handle
 |         Id id; //!< A globally unique identifier for this handle
 | ||||||
| 
 | 
 | ||||||
|         s32 pins{}; |         s64 pins{}; | ||||||
|         u32 pin_virt_address{}; |         u32 pin_virt_address{}; | ||||||
|         std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; |         std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; | ||||||
| 
 | 
 | ||||||
|  | @ -63,15 +63,14 @@ public: | ||||||
| 
 | 
 | ||||||
|         VAddr address{};   //!< The memory location in the guest's AS that this handle corresponds to,
 |         VAddr address{};   //!< The memory location in the guest's AS that this handle corresponds to,
 | ||||||
|                            //!< this can also be in the nvdrv tmem
 |                            //!< this can also be in the nvdrv tmem
 | ||||||
|         DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
 |  | ||||||
|                            //!< this can also be in the nvdrv tmem
 |  | ||||||
|         bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC
 |         bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC
 | ||||||
|                                      //!< call
 |                                      //!< call
 | ||||||
| 
 | 
 | ||||||
|         u8 kind{};        //!< Used for memory compression
 |         u8 kind{};        //!< Used for memory compression
 | ||||||
|         bool allocated{}; //!< If the handle has been allocated with `Alloc`
 |         bool allocated{}; //!< If the handle has been allocated with `Alloc`
 | ||||||
| 
 | 
 | ||||||
|         u64 dma_map_addr{}; //! remove me after implementing pinning.
 |         DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
 | ||||||
|  |                            //!< this can also be in the nvdrv tmem
 | ||||||
| 
 | 
 | ||||||
|         Handle(u64 size, Id id); |         Handle(u64 size, Id id); | ||||||
| 
 | 
 | ||||||
|  | @ -119,7 +118,7 @@ public: | ||||||
| 
 | 
 | ||||||
|     std::shared_ptr<Handle> GetHandle(Handle::Id handle); |     std::shared_ptr<Handle> GetHandle(Handle::Id handle); | ||||||
| 
 | 
 | ||||||
|     VAddr GetHandleAddress(Handle::Id handle); |     DAddr GetHandleAddress(Handle::Id handle); | ||||||
| 
 | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * @brief Maps a handle into the SMMU address space |      * @brief Maps a handle into the SMMU address space | ||||||
|  | @ -127,15 +126,7 @@ public: | ||||||
|      * number of calls to `UnpinHandle` |      * number of calls to `UnpinHandle` | ||||||
|      * @return The SMMU virtual address that the handle has been mapped to |      * @return The SMMU virtual address that the handle has been mapped to | ||||||
|      */ |      */ | ||||||
|     u32 PinHandle(Handle::Id handle, size_t session_id); |     DAddr PinHandle(Handle::Id handle, size_t session_id, bool low_area_pin); | ||||||
| 
 |  | ||||||
|     /**
 |  | ||||||
|      * @brief Maps a handle into the SMMU address space |  | ||||||
|      * @note This operation is refcounted, the number of calls to this must eventually match the |  | ||||||
|      * number of calls to `UnpinHandle` |  | ||||||
|      * @return The SMMU virtual address that the handle has been mapped to |  | ||||||
|      */ |  | ||||||
|     NvResult AllocateHandle(Handle::Id handle, Handle::Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, size_t session_id); |  | ||||||
| 
 | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * @brief When this has been called an equal number of times to `PinHandle` for the supplied |      * @brief When this has been called an equal number of times to `PinHandle` for the supplied | ||||||
|  |  | ||||||
|  | @ -42,7 +42,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form | ||||||
|                         u32 height, u32 stride, android::BufferTransformFlags transform, |                         u32 height, u32 stride, android::BufferTransformFlags transform, | ||||||
|                         const Common::Rectangle<int>& crop_rect, |                         const Common::Rectangle<int>& crop_rect, | ||||||
|                         std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { |                         std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { | ||||||
|     const VAddr addr = nvmap.GetHandleAddress(buffer_handle); |     const DAddr addr = nvmap.GetHandleAddress(buffer_handle); | ||||||
|     LOG_TRACE(Service, |     LOG_TRACE(Service, | ||||||
|               "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", |               "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", | ||||||
|               addr, offset, width, height, stride, format); |               addr, offset, width, height, stride, format); | ||||||
|  |  | ||||||
|  | @ -40,15 +40,15 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i | ||||||
|         case 0x3: |         case 0x3: | ||||||
|             return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output); |             return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output); | ||||||
|         case 0x5: |         case 0x5: | ||||||
|             return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output); |             return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output, fd); | ||||||
|         case 0x6: |         case 0x6: | ||||||
|             return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output); |             return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output, fd); | ||||||
|         case 0x8: |         case 0x8: | ||||||
|             return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output); |             return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output); | ||||||
|         case 0x9: |         case 0x9: | ||||||
|             return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output); |             return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output); | ||||||
|         case 0x14: |         case 0x14: | ||||||
|             return WrapVariable(this, &nvhost_as_gpu::Remap, input, output); |             return WrapVariable(this, &nvhost_as_gpu::Remap, input, output, fd); | ||||||
|         default: |         default: | ||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|  | @ -86,8 +86,15 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i | ||||||
|     return NvResult::NotImplemented; |     return NvResult::NotImplemented; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void nvhost_as_gpu::OnOpen(size_t session_id, DeviceFD fd) {} | void nvhost_as_gpu::OnOpen(size_t session_id, DeviceFD fd) { | ||||||
| void nvhost_as_gpu::OnClose(DeviceFD fd) {} |     sessions[fd] = session_id; | ||||||
|  | } | ||||||
|  | void nvhost_as_gpu::OnClose(DeviceFD fd) { | ||||||
|  |     auto it = sessions.find(fd); | ||||||
|  |     if (it != sessions.end()) { | ||||||
|  |         sessions.erase(it); | ||||||
|  |     } | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { | NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { | ||||||
|     LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size); |     LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size); | ||||||
|  | @ -206,6 +213,8 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { | ||||||
|                        static_cast<u32>(aligned_size >> page_size_bits)); |                        static_cast<u32>(aligned_size >> page_size_bits)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     nvmap.UnpinHandle(mapping->handle); | ||||||
|  | 
 | ||||||
|     // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
 |     // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
 | ||||||
|     // Only FreeSpace can unmap them fully
 |     // Only FreeSpace can unmap them fully
 | ||||||
|     if (mapping->sparse_alloc) { |     if (mapping->sparse_alloc) { | ||||||
|  | @ -259,7 +268,7 @@ NvResult nvhost_as_gpu::FreeSpace(IoctlFreeSpace& params) { | ||||||
|     return NvResult::Success; |     return NvResult::Success; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) { | NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries, DeviceFD fd) { | ||||||
|     LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", entries.size()); |     LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", entries.size()); | ||||||
| 
 | 
 | ||||||
|     if (!vm.initialised) { |     if (!vm.initialised) { | ||||||
|  | @ -293,19 +302,19 @@ NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) { | ||||||
|                 return NvResult::BadValue; |                 return NvResult::BadValue; | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             VAddr cpu_address{static_cast<VAddr>( |             DAddr base = nvmap.PinHandle(entry.handle, sessions[fd], false); | ||||||
|                 handle->address + |             DAddr device_address{static_cast<DAddr>( | ||||||
|                 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; |                 base + (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; | ||||||
| 
 | 
 | ||||||
|             gmmu->Map(virtual_address, cpu_address, size, static_cast<Tegra::PTEKind>(entry.kind), |             gmmu->Map(virtual_address, device_address, size, | ||||||
|                       use_big_pages); |                       static_cast<Tegra::PTEKind>(entry.kind), use_big_pages); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return NvResult::Success; |     return NvResult::Success; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params, DeviceFD fd) { | ||||||
|     LOG_DEBUG(Service_NVDRV, |     LOG_DEBUG(Service_NVDRV, | ||||||
|               "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" |               "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" | ||||||
|               ", offset={}", |               ", offset={}", | ||||||
|  | @ -331,9 +340,9 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; |             u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; | ||||||
|             VAddr cpu_address{mapping->ptr + params.buffer_offset}; |             VAddr device_address{mapping->ptr + params.buffer_offset}; | ||||||
| 
 | 
 | ||||||
|             gmmu->Map(gpu_address, cpu_address, params.mapping_size, |             gmmu->Map(gpu_address, device_address, params.mapping_size, | ||||||
|                       static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); |                       static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); | ||||||
| 
 | 
 | ||||||
|             return NvResult::Success; |             return NvResult::Success; | ||||||
|  | @ -349,7 +358,8 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | ||||||
|         return NvResult::BadValue; |         return NvResult::BadValue; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; |     DAddr device_address{static_cast<DAddr>(nvmap.PinHandle(params.handle, sessions[fd], false) + | ||||||
|  |                                             params.buffer_offset)}; | ||||||
|     u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; |     u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; | ||||||
| 
 | 
 | ||||||
|     bool big_page{[&]() { |     bool big_page{[&]() { | ||||||
|  | @ -373,15 +383,14 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         const bool use_big_pages = alloc->second.big_pages && big_page; |         const bool use_big_pages = alloc->second.big_pages && big_page; | ||||||
|         gmmu->Map(params.offset, cpu_address, size, static_cast<Tegra::PTEKind>(params.kind), |         gmmu->Map(params.offset, device_address, size, static_cast<Tegra::PTEKind>(params.kind), | ||||||
|                   use_big_pages); |                   use_big_pages); | ||||||
| 
 | 
 | ||||||
|         auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, |         auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size, | ||||||
|                                                use_big_pages, alloc->second.sparse)}; |                                                true, use_big_pages, alloc->second.sparse)}; | ||||||
|         alloc->second.mappings.push_back(mapping); |         alloc->second.mappings.push_back(mapping); | ||||||
|         mapping_map[params.offset] = mapping; |         mapping_map[params.offset] = mapping; | ||||||
|     } else { |     } else { | ||||||
| 
 |  | ||||||
|         auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; |         auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; | ||||||
|         u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; |         u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; | ||||||
|         u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; |         u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; | ||||||
|  | @ -394,18 +403,18 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | ||||||
|             return NvResult::InsufficientMemory; |             return NvResult::InsufficientMemory; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), |         gmmu->Map(params.offset, device_address, Common::AlignUp(size, page_size), | ||||||
|                   static_cast<Tegra::PTEKind>(params.kind), big_page); |                   static_cast<Tegra::PTEKind>(params.kind), big_page); | ||||||
| 
 | 
 | ||||||
|         auto mapping{ |         auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size, | ||||||
|             std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; |                                                false, big_page, false)}; | ||||||
|         mapping_map[params.offset] = mapping; |         mapping_map[params.offset] = mapping; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return NvResult::Success; |     return NvResult::Success; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { | NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params, DeviceFD fd) { | ||||||
|     LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); |     LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); | ||||||
| 
 | 
 | ||||||
|     std::scoped_lock lock(mutex); |     std::scoped_lock lock(mutex); | ||||||
|  | @ -433,6 +442,8 @@ NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { | ||||||
|             gmmu->Unmap(params.offset, mapping->size); |             gmmu->Unmap(params.offset, mapping->size); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         nvmap.UnpinHandle(mapping->handle); | ||||||
|  | 
 | ||||||
|         mapping_map.erase(params.offset); |         mapping_map.erase(params.offset); | ||||||
|     } catch (const std::out_of_range&) { |     } catch (const std::out_of_range&) { | ||||||
|         LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); |         LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); | ||||||
|  |  | ||||||
|  | @ -141,9 +141,9 @@ private: | ||||||
| 
 | 
 | ||||||
|     NvResult AllocAsEx(IoctlAllocAsEx& params); |     NvResult AllocAsEx(IoctlAllocAsEx& params); | ||||||
|     NvResult AllocateSpace(IoctlAllocSpace& params); |     NvResult AllocateSpace(IoctlAllocSpace& params); | ||||||
|     NvResult Remap(std::span<IoctlRemapEntry> params); |     NvResult Remap(std::span<IoctlRemapEntry> params, DeviceFD fd); | ||||||
|     NvResult MapBufferEx(IoctlMapBufferEx& params); |     NvResult MapBufferEx(IoctlMapBufferEx& params, DeviceFD fd); | ||||||
|     NvResult UnmapBuffer(IoctlUnmapBuffer& params); |     NvResult UnmapBuffer(IoctlUnmapBuffer& params, DeviceFD fd); | ||||||
|     NvResult FreeSpace(IoctlFreeSpace& params); |     NvResult FreeSpace(IoctlFreeSpace& params); | ||||||
|     NvResult BindChannel(IoctlBindChannel& params); |     NvResult BindChannel(IoctlBindChannel& params); | ||||||
| 
 | 
 | ||||||
|  | @ -159,16 +159,18 @@ private: | ||||||
|     NvCore::NvMap& nvmap; |     NvCore::NvMap& nvmap; | ||||||
| 
 | 
 | ||||||
|     struct Mapping { |     struct Mapping { | ||||||
|         VAddr ptr; |         NvCore::NvMap::Handle::Id handle; | ||||||
|  |         DAddr ptr; | ||||||
|         u64 offset; |         u64 offset; | ||||||
|         u64 size; |         u64 size; | ||||||
|         bool fixed; |         bool fixed; | ||||||
|         bool big_page; // Only valid if fixed == false
 |         bool big_page; // Only valid if fixed == false
 | ||||||
|         bool sparse_alloc; |         bool sparse_alloc; | ||||||
| 
 | 
 | ||||||
|         Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) |         Mapping(NvCore::NvMap::Handle::Id handle_, DAddr ptr_, u64 offset_, u64 size_, bool fixed_, | ||||||
|             : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), |                 bool big_page_, bool sparse_alloc_) | ||||||
|               sparse_alloc(sparse_alloc_) {} |             : handle(handle_), ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), | ||||||
|  |               big_page(big_page_), sparse_alloc(sparse_alloc_) {} | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     struct Allocation { |     struct Allocation { | ||||||
|  | @ -212,9 +214,7 @@ private: | ||||||
|         bool initialised{}; |         bool initialised{}; | ||||||
|     } vm; |     } vm; | ||||||
|     std::shared_ptr<Tegra::MemoryManager> gmmu; |     std::shared_ptr<Tegra::MemoryManager> gmmu; | ||||||
| 
 |     std::unordered_map<DeviceFD, size_t> sessions; | ||||||
|     // s32 channel{};
 |  | ||||||
|     // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
 |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Service::Nvidia::Devices
 | } // namespace Service::Nvidia::Devices
 | ||||||
|  |  | ||||||
|  | @ -95,6 +95,9 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De | ||||||
|     offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); |     offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); | ||||||
| 
 | 
 | ||||||
|     auto& gpu = system.GPU(); |     auto& gpu = system.GPU(); | ||||||
|  |     //auto& device_memory = system.Host1x().MemoryManager();
 | ||||||
|  |     auto* session = core.GetSession(sessions[fd]); | ||||||
|  | 
 | ||||||
|     if (gpu.UseNvdec()) { |     if (gpu.UseNvdec()) { | ||||||
|         for (std::size_t i = 0; i < syncpt_increments.size(); i++) { |         for (std::size_t i = 0; i < syncpt_increments.size(); i++) { | ||||||
|             const SyncptIncr& syncpt_incr = syncpt_increments[i]; |             const SyncptIncr& syncpt_incr = syncpt_increments[i]; | ||||||
|  | @ -106,7 +109,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De | ||||||
|         const auto object = nvmap.GetHandle(cmd_buffer.memory_id); |         const auto object = nvmap.GetHandle(cmd_buffer.memory_id); | ||||||
|         ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); |         ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); | ||||||
|         Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); |         Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); | ||||||
|         system.ApplicationMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), |         session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), | ||||||
|                                              cmdlist.size() * sizeof(u32)); |                                              cmdlist.size() * sizeof(u32)); | ||||||
|         gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); |         gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); | ||||||
|     } |     } | ||||||
|  | @ -136,7 +139,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { | ||||||
| NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) { | NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) { | ||||||
|     const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); |     const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); | ||||||
|     for (size_t i = 0; i < num_entries; i++) { |     for (size_t i = 0; i < num_entries; i++) { | ||||||
|         entries[i].map_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd]); |         DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true); | ||||||
|  |         entries[i].map_address = static_cast<u32>(pin_address); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return NvResult::Success; |     return NvResult::Success; | ||||||
|  |  | ||||||
|  | @ -123,8 +123,8 @@ NvResult nvmap::IocAlloc(IocAllocParams& params, DeviceFD fd) { | ||||||
|         return NvResult::InsufficientMemory; |         return NvResult::InsufficientMemory; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const auto result = file.AllocateHandle(params.handle, params.flags, params.align, params.kind, |     const auto result = | ||||||
|                                             params.address, sessions[fd]); |         handle_description->Alloc(params.flags, params.align, params.kind, params.address); | ||||||
|     if (result != NvResult::Success) { |     if (result != NvResult::Success) { | ||||||
|         LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); |         LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); | ||||||
|         return result; |         return result; | ||||||
|  |  | ||||||
|  | @ -13,8 +13,6 @@ | ||||||
| #include "core/hle/service/nvdrv/nvdrv.h" | #include "core/hle/service/nvdrv/nvdrv.h" | ||||||
| #include "core/hle/service/nvdrv/nvdrv_interface.h" | #include "core/hle/service/nvdrv/nvdrv_interface.h" | ||||||
| 
 | 
 | ||||||
| #pragma optimize("", off) |  | ||||||
| 
 |  | ||||||
| namespace Service::Nvidia { | namespace Service::Nvidia { | ||||||
| 
 | 
 | ||||||
| void NVDRV::Open(HLERequestContext& ctx) { | void NVDRV::Open(HLERequestContext& ctx) { | ||||||
|  | @ -173,8 +171,8 @@ void NVDRV::Initialize(HLERequestContext& ctx) { | ||||||
|     [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>(); |     [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>(); | ||||||
| 
 | 
 | ||||||
|     auto& container = nvdrv->GetContainer(); |     auto& container = nvdrv->GetContainer(); | ||||||
|     auto process = ctx.GetObjectFromHandle(process_handle); |     auto process = ctx.GetObjectFromHandle<Kernel::KProcess>(process_handle); | ||||||
|     session_id = container.OpenSession(process->DynamicCast<Kernel::KProcess*>()); |     session_id = container.OpenSession(process.GetPointerUnsafe()); | ||||||
| 
 | 
 | ||||||
|     is_initialized = true; |     is_initialized = true; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -24,6 +24,8 @@ | ||||||
| #include "core/hle/kernel/k_process.h" | #include "core/hle/kernel/k_process.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
|  | #include "video_core/host1x/host1x.h" | ||||||
| #include "video_core/rasterizer_download_area.h" | #include "video_core/rasterizer_download_area.h" | ||||||
| 
 | 
 | ||||||
| namespace Core::Memory { | namespace Core::Memory { | ||||||
|  | @ -638,15 +640,16 @@ struct Memory::Impl { | ||||||
|                   base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); |                   base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); | ||||||
| 
 | 
 | ||||||
|         // During boot, current_page_table might not be set yet, in which case we need not flush
 |         // During boot, current_page_table might not be set yet, in which case we need not flush
 | ||||||
|         if (system.IsPoweredOn()) { |         /*if (system.IsPoweredOn()) {
 | ||||||
|             auto& gpu = system.GPU(); |             auto& gpu = system.GPU(); | ||||||
|             for (u64 i = 0; i < size; i++) { |             for (u64 i = 0; i < size; i++) { | ||||||
|                 const auto page = base + i; |                 const auto page = base + i; | ||||||
|                 if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { |                 if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { | ||||||
|  | 
 | ||||||
|                     gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); |                     gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         }*/ | ||||||
| 
 | 
 | ||||||
|         const auto end = base + size; |         const auto end = base + size; | ||||||
|         ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", |         ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", | ||||||
|  | @ -811,10 +814,15 @@ struct Memory::Impl { | ||||||
|         return true; |         return true; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void HandleRasterizerDownload(VAddr address, size_t size) { |     void HandleRasterizerDownload(VAddr v_address, size_t size) { | ||||||
|  |         const auto* p = GetPointerImpl( | ||||||
|  |             v_address, []() {}, []() {}); | ||||||
|  |         auto& gpu_device_memory = system.Host1x().MemoryManager(); | ||||||
|  |         DAddr address = | ||||||
|  |             gpu_device_memory.GetAddressFromPAddr(system.DeviceMemory().GetRawPhysicalAddr(p)); | ||||||
|         const size_t core = system.GetCurrentHostThreadID(); |         const size_t core = system.GetCurrentHostThreadID(); | ||||||
|         auto& current_area = rasterizer_read_areas[core]; |         auto& current_area = rasterizer_read_areas[core]; | ||||||
|         const VAddr end_address = address + size; |         const DAddr end_address = address + size; | ||||||
|         if (current_area.start_address <= address && end_address <= current_area.end_address) |         if (current_area.start_address <= address && end_address <= current_area.end_address) | ||||||
|             [[likely]] { |             [[likely]] { | ||||||
|             return; |             return; | ||||||
|  | @ -822,7 +830,10 @@ struct Memory::Impl { | ||||||
|         current_area = system.GPU().OnCPURead(address, size); |         current_area = system.GPU().OnCPURead(address, size); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void HandleRasterizerWrite(VAddr address, size_t size) { |     void HandleRasterizerWrite(VAddr v_address, size_t size) { | ||||||
|  |         const auto* p = GetPointerImpl( | ||||||
|  |             v_address, []() {}, []() {}); | ||||||
|  |         PAddr address = system.DeviceMemory().GetRawPhysicalAddr(p); | ||||||
|         constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; |         constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; | ||||||
|         const size_t core = std::min(system.GetCurrentHostThreadID(), |         const size_t core = std::min(system.GetCurrentHostThreadID(), | ||||||
|                                      sys_core); // any other calls threads go to syscore.
 |                                      sys_core); // any other calls threads go to syscore.
 | ||||||
|  | @ -836,7 +847,7 @@ struct Memory::Impl { | ||||||
|             } |             } | ||||||
|         }); |         }); | ||||||
|         auto& current_area = rasterizer_write_areas[core]; |         auto& current_area = rasterizer_write_areas[core]; | ||||||
|         VAddr subaddress = address >> YUZU_PAGEBITS; |         PAddr subaddress = address >> YUZU_PAGEBITS; | ||||||
|         bool do_collection = current_area.last_address == subaddress; |         bool do_collection = current_area.last_address == subaddress; | ||||||
|         if (!do_collection) [[unlikely]] { |         if (!do_collection) [[unlikely]] { | ||||||
|             do_collection = system.GPU().OnCPUWrite(address, size); |             do_collection = system.GPU().OnCPUWrite(address, size); | ||||||
|  | @ -849,7 +860,7 @@ struct Memory::Impl { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     struct GPUDirtyState { |     struct GPUDirtyState { | ||||||
|         VAddr last_address; |         PAddr last_address; | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { |     void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | ||||||
|  |  | ||||||
|  | @ -498,209 +498,4 @@ private: | ||||||
|     std::unique_ptr<Impl> impl; |     std::unique_ptr<Impl> impl; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| enum GuestMemoryFlags : u32 { |  | ||||||
|     Read = 1 << 0, |  | ||||||
|     Write = 1 << 1, |  | ||||||
|     Safe = 1 << 2, |  | ||||||
|     Cached = 1 << 3, |  | ||||||
| 
 |  | ||||||
|     SafeRead = Read | Safe, |  | ||||||
|     SafeWrite = Write | Safe, |  | ||||||
|     SafeReadWrite = SafeRead | SafeWrite, |  | ||||||
|     SafeReadCachedWrite = SafeReadWrite | Cached, |  | ||||||
| 
 |  | ||||||
|     UnsafeRead = Read, |  | ||||||
|     UnsafeWrite = Write, |  | ||||||
|     UnsafeReadWrite = UnsafeRead | UnsafeWrite, |  | ||||||
|     UnsafeReadCachedWrite = UnsafeReadWrite | Cached, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| namespace { |  | ||||||
| template <typename M, typename T, GuestMemoryFlags FLAGS> |  | ||||||
| class GuestMemory { |  | ||||||
|     using iterator = T*; |  | ||||||
|     using const_iterator = const T*; |  | ||||||
|     using value_type = T; |  | ||||||
|     using element_type = T; |  | ||||||
|     using iterator_category = std::contiguous_iterator_tag; |  | ||||||
| 
 |  | ||||||
| public: |  | ||||||
|     GuestMemory() = delete; |  | ||||||
|     explicit GuestMemory(M& memory, u64 addr, std::size_t size, |  | ||||||
|                          Common::ScratchBuffer<T>* backup = nullptr) |  | ||||||
|         : m_memory{memory}, m_addr{addr}, m_size{size} { |  | ||||||
|         static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); |  | ||||||
|         if constexpr (FLAGS & GuestMemoryFlags::Read) { |  | ||||||
|             Read(addr, size, backup); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ~GuestMemory() = default; |  | ||||||
| 
 |  | ||||||
|     T* data() noexcept { |  | ||||||
|         return m_data_span.data(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const T* data() const noexcept { |  | ||||||
|         return m_data_span.data(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     size_t size() const noexcept { |  | ||||||
|         return m_size; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     size_t size_bytes() const noexcept { |  | ||||||
|         return this->size() * sizeof(T); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] T* begin() noexcept { |  | ||||||
|         return this->data(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const T* begin() const noexcept { |  | ||||||
|         return this->data(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] T* end() noexcept { |  | ||||||
|         return this->data() + this->size(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] const T* end() const noexcept { |  | ||||||
|         return this->data() + this->size(); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     T& operator[](size_t index) noexcept { |  | ||||||
|         return m_data_span[index]; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     const T& operator[](size_t index) const noexcept { |  | ||||||
|         return m_data_span[index]; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void SetAddressAndSize(u64 addr, std::size_t size) noexcept { |  | ||||||
|         m_addr = addr; |  | ||||||
|         m_size = size; |  | ||||||
|         m_addr_changed = true; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     std::span<T> Read(u64 addr, std::size_t size, |  | ||||||
|                       Common::ScratchBuffer<T>* backup = nullptr) noexcept { |  | ||||||
|         m_addr = addr; |  | ||||||
|         m_size = size; |  | ||||||
|         if (m_size == 0) { |  | ||||||
|             m_is_data_copy = true; |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         if (this->TrySetSpan()) { |  | ||||||
|             if constexpr (FLAGS & GuestMemoryFlags::Safe) { |  | ||||||
|                 m_memory.FlushRegion(m_addr, this->size_bytes()); |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             if (backup) { |  | ||||||
|                 backup->resize_destructive(this->size()); |  | ||||||
|                 m_data_span = *backup; |  | ||||||
|             } else { |  | ||||||
|                 m_data_copy.resize(this->size()); |  | ||||||
|                 m_data_span = std::span(m_data_copy); |  | ||||||
|             } |  | ||||||
|             m_is_data_copy = true; |  | ||||||
|             m_span_valid = true; |  | ||||||
|             if constexpr (FLAGS & GuestMemoryFlags::Safe) { |  | ||||||
|                 m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); |  | ||||||
|             } else { |  | ||||||
|                 m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         return m_data_span; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     void Write(std::span<T> write_data) noexcept { |  | ||||||
|         if constexpr (FLAGS & GuestMemoryFlags::Cached) { |  | ||||||
|             m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); |  | ||||||
|         } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { |  | ||||||
|             m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); |  | ||||||
|         } else { |  | ||||||
|             m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool TrySetSpan() noexcept { |  | ||||||
|         if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { |  | ||||||
|             m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; |  | ||||||
|             m_span_valid = true; |  | ||||||
|             return true; |  | ||||||
|         } |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| protected: |  | ||||||
|     bool IsDataCopy() const noexcept { |  | ||||||
|         return m_is_data_copy; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     bool AddressChanged() const noexcept { |  | ||||||
|         return m_addr_changed; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     M& m_memory; |  | ||||||
|     u64 m_addr{}; |  | ||||||
|     size_t m_size{}; |  | ||||||
|     std::span<T> m_data_span{}; |  | ||||||
|     std::vector<T> m_data_copy{}; |  | ||||||
|     bool m_span_valid{false}; |  | ||||||
|     bool m_is_data_copy{false}; |  | ||||||
|     bool m_addr_changed{false}; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| template <typename M, typename T, GuestMemoryFlags FLAGS> |  | ||||||
| class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { |  | ||||||
| public: |  | ||||||
|     GuestMemoryScoped() = delete; |  | ||||||
|     explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, |  | ||||||
|                                Common::ScratchBuffer<T>* backup = nullptr) |  | ||||||
|         : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { |  | ||||||
|         if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { |  | ||||||
|             if (!this->TrySetSpan()) { |  | ||||||
|                 if (backup) { |  | ||||||
|                     this->m_data_span = *backup; |  | ||||||
|                     this->m_span_valid = true; |  | ||||||
|                     this->m_is_data_copy = true; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     ~GuestMemoryScoped() { |  | ||||||
|         if constexpr (FLAGS & GuestMemoryFlags::Write) { |  | ||||||
|             if (this->size() == 0) [[unlikely]] { |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             if (this->AddressChanged() || this->IsDataCopy()) { |  | ||||||
|                 ASSERT(this->m_span_valid); |  | ||||||
|                 if constexpr (FLAGS & GuestMemoryFlags::Cached) { |  | ||||||
|                     this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); |  | ||||||
|                 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { |  | ||||||
|                     this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); |  | ||||||
|                 } else { |  | ||||||
|                     this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); |  | ||||||
|                 } |  | ||||||
|             } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || |  | ||||||
|                                  (FLAGS & GuestMemoryFlags::Cached)) { |  | ||||||
|                 this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| } // namespace
 |  | ||||||
| 
 |  | ||||||
| template <typename T, GuestMemoryFlags FLAGS> |  | ||||||
| using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>; |  | ||||||
| template <typename T, GuestMemoryFlags FLAGS> |  | ||||||
| using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>; |  | ||||||
| template <typename T, GuestMemoryFlags FLAGS> |  | ||||||
| using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>; |  | ||||||
| template <typename T, GuestMemoryFlags FLAGS> |  | ||||||
| using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>; |  | ||||||
| } // namespace Core::Memory
 | } // namespace Core::Memory
 | ||||||
|  |  | ||||||
|  | @ -95,6 +95,7 @@ add_library(video_core STATIC | ||||||
|     gpu.h |     gpu.h | ||||||
|     gpu_thread.cpp |     gpu_thread.cpp | ||||||
|     gpu_thread.h |     gpu_thread.h | ||||||
|  |     guest_memory.h | ||||||
|     invalidation_accumulator.h |     invalidation_accumulator.h | ||||||
|     memory_manager.cpp |     memory_manager.cpp | ||||||
|     memory_manager.h |     memory_manager.h | ||||||
|  | @ -107,8 +108,6 @@ add_library(video_core STATIC | ||||||
|     query_cache/query_stream.h |     query_cache/query_stream.h | ||||||
|     query_cache/types.h |     query_cache/types.h | ||||||
|     query_cache.h |     query_cache.h | ||||||
|     rasterizer_accelerated.cpp |  | ||||||
|     rasterizer_accelerated.h |  | ||||||
|     rasterizer_interface.h |     rasterizer_interface.h | ||||||
|     renderer_base.cpp |     renderer_base.cpp | ||||||
|     renderer_base.h |     renderer_base.h | ||||||
|  |  | ||||||
|  | @ -33,13 +33,12 @@ struct NullBufferParams {}; | ||||||
|  * |  * | ||||||
|  * The buffer size and address is forcefully aligned to CPU page boundaries. |  * The buffer size and address is forcefully aligned to CPU page boundaries. | ||||||
|  */ |  */ | ||||||
| template <class RasterizerInterface> |  | ||||||
| class BufferBase { | class BufferBase { | ||||||
| public: | public: | ||||||
|     static constexpr u64 BASE_PAGE_BITS = 16; |     static constexpr u64 BASE_PAGE_BITS = 16; | ||||||
|     static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; |     static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; | ||||||
| 
 | 
 | ||||||
|     explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) |     explicit BufferBase(VAddr cpu_addr_, u64 size_bytes_) | ||||||
|         : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} |         : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} | ||||||
| 
 | 
 | ||||||
|     explicit BufferBase(NullBufferParams) {} |     explicit BufferBase(NullBufferParams) {} | ||||||
|  |  | ||||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -32,7 +32,6 @@ | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "common/scope_exit.h" | #include "common/scope_exit.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/buffer_cache/buffer_base.h" | #include "video_core/buffer_cache/buffer_base.h" | ||||||
| #include "video_core/control/channel_state_cache.h" | #include "video_core/control/channel_state_cache.h" | ||||||
| #include "video_core/delayed_destruction_ring.h" | #include "video_core/delayed_destruction_ring.h" | ||||||
|  | @ -41,7 +40,6 @@ | ||||||
| #include "video_core/engines/kepler_compute.h" | #include "video_core/engines/kepler_compute.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" |  | ||||||
| #include "video_core/surface.h" | #include "video_core/surface.h" | ||||||
| #include "video_core/texture_cache/slot_vector.h" | #include "video_core/texture_cache/slot_vector.h" | ||||||
| #include "video_core/texture_cache/types.h" | #include "video_core/texture_cache/types.h" | ||||||
|  | @ -94,7 +92,7 @@ static constexpr BufferId NULL_BUFFER_ID{0}; | ||||||
| static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | ||||||
| 
 | 
 | ||||||
| struct Binding { | struct Binding { | ||||||
|     VAddr cpu_addr{}; |     DAddr device_addr{}; | ||||||
|     u32 size{}; |     u32 size{}; | ||||||
|     BufferId buffer_id; |     BufferId buffer_id; | ||||||
| }; | }; | ||||||
|  | @ -104,7 +102,7 @@ struct TextureBufferBinding : Binding { | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static constexpr Binding NULL_BINDING{ | static constexpr Binding NULL_BINDING{ | ||||||
|     .cpu_addr = 0, |     .device_addr = 0, | ||||||
|     .size = 0, |     .size = 0, | ||||||
|     .buffer_id = NULL_BUFFER_ID, |     .buffer_id = NULL_BUFFER_ID, | ||||||
| }; | }; | ||||||
|  | @ -204,10 +202,10 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | ||||||
|     using Async_Buffer = typename P::Async_Buffer; |     using Async_Buffer = typename P::Async_Buffer; | ||||||
|     using MemoryTracker = typename P::MemoryTracker; |     using MemoryTracker = typename P::MemoryTracker; | ||||||
| 
 | 
 | ||||||
|     using IntervalCompare = std::less<VAddr>; |     using IntervalCompare = std::less<DAddr>; | ||||||
|     using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; |     using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>; | ||||||
|     using IntervalAllocator = boost::fast_pool_allocator<VAddr>; |     using IntervalAllocator = boost::fast_pool_allocator<DAddr>; | ||||||
|     using IntervalSet = boost::icl::interval_set<VAddr>; |     using IntervalSet = boost::icl::interval_set<DAddr>; | ||||||
|     using IntervalType = typename IntervalSet::interval_type; |     using IntervalType = typename IntervalSet::interval_type; | ||||||
| 
 | 
 | ||||||
|     template <typename Type> |     template <typename Type> | ||||||
|  | @ -230,32 +228,31 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | ||||||
| 
 | 
 | ||||||
|     using OverlapCombine = counter_add_functor<int>; |     using OverlapCombine = counter_add_functor<int>; | ||||||
|     using OverlapSection = boost::icl::inter_section<int>; |     using OverlapSection = boost::icl::inter_section<int>; | ||||||
|     using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; |     using OverlapCounter = boost::icl::split_interval_map<DAddr, int>; | ||||||
| 
 | 
 | ||||||
|     struct OverlapResult { |     struct OverlapResult { | ||||||
|         boost::container::small_vector<BufferId, 16> ids; |         boost::container::small_vector<BufferId, 16> ids; | ||||||
|         VAddr begin; |         DAddr begin; | ||||||
|         VAddr end; |         DAddr end; | ||||||
|         bool has_stream_leap = false; |         bool has_stream_leap = false; | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
| public: | public: | ||||||
|     explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |     explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_); | ||||||
|                          Core::Memory::Memory& cpu_memory_, Runtime& runtime_); |  | ||||||
| 
 | 
 | ||||||
|     void TickFrame(); |     void TickFrame(); | ||||||
| 
 | 
 | ||||||
|     void WriteMemory(VAddr cpu_addr, u64 size); |     void WriteMemory(DAddr device_addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     void CachedWriteMemory(VAddr cpu_addr, u64 size); |     void CachedWriteMemory(DAddr device_addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     bool OnCPUWrite(VAddr cpu_addr, u64 size); |     bool OnCPUWrite(DAddr device_addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     void DownloadMemory(VAddr cpu_addr, u64 size); |     void DownloadMemory(DAddr device_addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); |     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr device_addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); |     bool InlineMemory(DAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | ||||||
| 
 | 
 | ||||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); |     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | ||||||
| 
 | 
 | ||||||
|  | @ -300,7 +297,7 @@ public: | ||||||
|                                                        ObtainBufferSynchronize sync_info, |                                                        ObtainBufferSynchronize sync_info, | ||||||
|                                                        ObtainBufferOperation post_op); |                                                        ObtainBufferOperation post_op); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, |     [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(DAddr gpu_addr, u32 size, | ||||||
|                                                           ObtainBufferSynchronize sync_info, |                                                           ObtainBufferSynchronize sync_info, | ||||||
|                                                           ObtainBufferOperation post_op); |                                                           ObtainBufferOperation post_op); | ||||||
|     void FlushCachedWrites(); |     void FlushCachedWrites(); | ||||||
|  | @ -326,13 +323,13 @@ public: | ||||||
|     bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); |     bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||||||
| 
 | 
 | ||||||
|     /// Return true when a CPU region is modified from the GPU
 |     /// Return true when a CPU region is modified from the GPU
 | ||||||
|     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |     [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size); | ||||||
| 
 | 
 | ||||||
|     /// Return true when a region is registered on the cache
 |     /// Return true when a region is registered on the cache
 | ||||||
|     [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); |     [[nodiscard]] bool IsRegionRegistered(DAddr addr, size_t size); | ||||||
| 
 | 
 | ||||||
|     /// Return true when a CPU region is modified from the CPU
 |     /// Return true when a CPU region is modified from the CPU
 | ||||||
|     [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); |     [[nodiscard]] bool IsRegionCpuModified(DAddr addr, size_t size); | ||||||
| 
 | 
 | ||||||
|     void SetDrawIndirect( |     void SetDrawIndirect( | ||||||
|         const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { |         const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | ||||||
|  | @ -366,9 +363,9 @@ private: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     template <typename Func> |     template <typename Func> | ||||||
|     void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { |     void ForEachBufferInRange(DAddr device_addr, u64 size, Func&& func) { | ||||||
|         const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); |         const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); | ||||||
|         for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { |         for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) { | ||||||
|             const BufferId buffer_id = page_table[page]; |             const BufferId buffer_id = page_table[page]; | ||||||
|             if (!buffer_id) { |             if (!buffer_id) { | ||||||
|                 ++page; |                 ++page; | ||||||
|  | @ -377,15 +374,15 @@ private: | ||||||
|             Buffer& buffer = slot_buffers[buffer_id]; |             Buffer& buffer = slot_buffers[buffer_id]; | ||||||
|             func(buffer_id, buffer); |             func(buffer_id, buffer); | ||||||
| 
 | 
 | ||||||
|             const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); |             const DAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||||||
|             page = Common::DivCeil(end_addr, CACHING_PAGESIZE); |             page = Common::DivCeil(end_addr, CACHING_PAGESIZE); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     template <typename Func> |     template <typename Func> | ||||||
|     void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { |     void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) { | ||||||
|         const VAddr start_address = cpu_addr; |         const DAddr start_address = device_addr; | ||||||
|         const VAddr end_address = start_address + size; |         const DAddr end_address = start_address + size; | ||||||
|         const IntervalType search_interval{start_address, end_address}; |         const IntervalType search_interval{start_address, end_address}; | ||||||
|         auto it = current_range.lower_bound(search_interval); |         auto it = current_range.lower_bound(search_interval); | ||||||
|         if (it == current_range.end()) { |         if (it == current_range.end()) { | ||||||
|  | @ -393,8 +390,8 @@ private: | ||||||
|         } |         } | ||||||
|         auto end_it = current_range.upper_bound(search_interval); |         auto end_it = current_range.upper_bound(search_interval); | ||||||
|         for (; it != end_it; it++) { |         for (; it != end_it; it++) { | ||||||
|             VAddr inter_addr_end = it->upper(); |             DAddr inter_addr_end = it->upper(); | ||||||
|             VAddr inter_addr = it->lower(); |             DAddr inter_addr = it->lower(); | ||||||
|             if (inter_addr_end > end_address) { |             if (inter_addr_end > end_address) { | ||||||
|                 inter_addr_end = end_address; |                 inter_addr_end = end_address; | ||||||
|             } |             } | ||||||
|  | @ -406,10 +403,10 @@ private: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     template <typename Func> |     template <typename Func> | ||||||
|     void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, |     void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size, | ||||||
|                                  Func&& func) { |                                  Func&& func) { | ||||||
|         const VAddr start_address = cpu_addr; |         const DAddr start_address = device_addr; | ||||||
|         const VAddr end_address = start_address + size; |         const DAddr end_address = start_address + size; | ||||||
|         const IntervalType search_interval{start_address, end_address}; |         const IntervalType search_interval{start_address, end_address}; | ||||||
|         auto it = current_range.lower_bound(search_interval); |         auto it = current_range.lower_bound(search_interval); | ||||||
|         if (it == current_range.end()) { |         if (it == current_range.end()) { | ||||||
|  | @ -418,8 +415,8 @@ private: | ||||||
|         auto end_it = current_range.upper_bound(search_interval); |         auto end_it = current_range.upper_bound(search_interval); | ||||||
|         for (; it != end_it; it++) { |         for (; it != end_it; it++) { | ||||||
|             auto& inter = it->first; |             auto& inter = it->first; | ||||||
|             VAddr inter_addr_end = inter.upper(); |             DAddr inter_addr_end = inter.upper(); | ||||||
|             VAddr inter_addr = inter.lower(); |             DAddr inter_addr = inter.lower(); | ||||||
|             if (inter_addr_end > end_address) { |             if (inter_addr_end > end_address) { | ||||||
|                 inter_addr_end = end_address; |                 inter_addr_end = end_address; | ||||||
|             } |             } | ||||||
|  | @ -451,9 +448,9 @@ private: | ||||||
|         } while (any_removals); |         } while (any_removals); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     static bool IsRangeGranular(VAddr cpu_addr, size_t size) { |     static bool IsRangeGranular(DAddr device_addr, size_t size) { | ||||||
|         return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == |         return (device_addr & ~Core::Memory::YUZU_PAGEMASK) == | ||||||
|                ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); |                ((device_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void RunGarbageCollector(); |     void RunGarbageCollector(); | ||||||
|  | @ -508,15 +505,15 @@ private: | ||||||
| 
 | 
 | ||||||
|     void UpdateComputeTextureBuffers(); |     void UpdateComputeTextureBuffers(); | ||||||
| 
 | 
 | ||||||
|     void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); |     void MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); |     [[nodiscard]] BufferId FindBuffer(DAddr device_addr, u32 size); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); |     [[nodiscard]] OverlapResult ResolveOverlaps(DAddr device_addr, u32 wanted_size); | ||||||
| 
 | 
 | ||||||
|     void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); |     void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); |     [[nodiscard]] BufferId CreateBuffer(DAddr device_addr, u32 wanted_size); | ||||||
| 
 | 
 | ||||||
|     void Register(BufferId buffer_id); |     void Register(BufferId buffer_id); | ||||||
| 
 | 
 | ||||||
|  | @ -527,7 +524,7 @@ private: | ||||||
| 
 | 
 | ||||||
|     void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; |     void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | ||||||
| 
 | 
 | ||||||
|     bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |     bool SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size); | ||||||
| 
 | 
 | ||||||
|     void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |     void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||||||
|                       std::span<BufferCopy> copies); |                       std::span<BufferCopy> copies); | ||||||
|  | @ -539,7 +536,7 @@ private: | ||||||
| 
 | 
 | ||||||
|     void DownloadBufferMemory(Buffer& buffer_id); |     void DownloadBufferMemory(Buffer& buffer_id); | ||||||
| 
 | 
 | ||||||
|     void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); |     void DownloadBufferMemory(Buffer& buffer_id, DAddr device_addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); |     void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); | ||||||
| 
 | 
 | ||||||
|  | @ -549,7 +546,7 @@ private: | ||||||
|     [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, |     [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | ||||||
|                                                                PixelFormat format); |                                                                PixelFormat format); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); |     [[nodiscard]] std::span<const u8> ImmediateBufferWithData(DAddr device_addr, size_t size); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); |     [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | ||||||
| 
 | 
 | ||||||
|  | @ -557,11 +554,10 @@ private: | ||||||
| 
 | 
 | ||||||
|     void ClearDownload(IntervalType subtract_interval); |     void ClearDownload(IntervalType subtract_interval); | ||||||
| 
 | 
 | ||||||
|     void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, |     void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, | ||||||
|                                     std::span<const u8> inlined_buffer); |                                     std::span<const u8> inlined_buffer); | ||||||
| 
 | 
 | ||||||
|     VideoCore::RasterizerInterface& rasterizer; |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
|     Core::Memory::Memory& cpu_memory; |  | ||||||
| 
 | 
 | ||||||
|     SlotVector<Buffer> slot_buffers; |     SlotVector<Buffer> slot_buffers; | ||||||
|     DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; |     DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||||||
|  | @ -598,7 +594,7 @@ private: | ||||||
|     u64 critical_memory = 0; |     u64 critical_memory = 0; | ||||||
|     BufferId inline_buffer_id; |     BufferId inline_buffer_id; | ||||||
| 
 | 
 | ||||||
|     std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; |     std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table; | ||||||
|     Common::ScratchBuffer<u8> tmp_buffer; |     Common::ScratchBuffer<u8> tmp_buffer; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -17,19 +17,19 @@ | ||||||
| 
 | 
 | ||||||
| namespace VideoCommon { | namespace VideoCommon { | ||||||
| 
 | 
 | ||||||
| template <class RasterizerInterface> | template <typename DeviceTracker> | ||||||
| class MemoryTrackerBase { | class MemoryTrackerBase { | ||||||
|     static constexpr size_t MAX_CPU_PAGE_BITS = 39; |     static constexpr size_t MAX_CPU_PAGE_BITS = 34; | ||||||
|     static constexpr size_t HIGHER_PAGE_BITS = 22; |     static constexpr size_t HIGHER_PAGE_BITS = 22; | ||||||
|     static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; |     static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; | ||||||
|     static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; |     static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; | ||||||
|     static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); |     static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); | ||||||
|     static constexpr size_t MANAGER_POOL_SIZE = 32; |     static constexpr size_t MANAGER_POOL_SIZE = 32; | ||||||
|     static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; |     static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; | ||||||
|     using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; |     using Manager = WordManager<DeviceTracker, WORDS_STACK_NEEDED>; | ||||||
| 
 | 
 | ||||||
| public: | public: | ||||||
|     MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} |     MemoryTrackerBase(DeviceTracker& device_tracker_) : device_tracker{&device_tracker_} {} | ||||||
|     ~MemoryTrackerBase() = default; |     ~MemoryTrackerBase() = default; | ||||||
| 
 | 
 | ||||||
|     /// Returns the inclusive CPU modified range in a begin end pair
 |     /// Returns the inclusive CPU modified range in a begin end pair
 | ||||||
|  | @ -74,7 +74,7 @@ public: | ||||||
|             }); |             }); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Mark region as CPU modified, notifying the rasterizer about this change
 |     /// Mark region as CPU modified, notifying the device_tracker about this change
 | ||||||
|     void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { |     void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | ||||||
|         IteratePages<true>(dirty_cpu_addr, query_size, |         IteratePages<true>(dirty_cpu_addr, query_size, | ||||||
|                            [](Manager* manager, u64 offset, size_t size) { |                            [](Manager* manager, u64 offset, size_t size) { | ||||||
|  | @ -83,7 +83,7 @@ public: | ||||||
|                            }); |                            }); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Unmark region as CPU modified, notifying the rasterizer about this change
 |     /// Unmark region as CPU modified, notifying the device_tracker about this change
 | ||||||
|     void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { |     void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | ||||||
|         IteratePages<true>(dirty_cpu_addr, query_size, |         IteratePages<true>(dirty_cpu_addr, query_size, | ||||||
|                            [](Manager* manager, u64 offset, size_t size) { |                            [](Manager* manager, u64 offset, size_t size) { | ||||||
|  | @ -139,7 +139,7 @@ public: | ||||||
|             }); |             }); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Flushes cached CPU writes, and notify the rasterizer about the deltas
 |     /// Flushes cached CPU writes, and notify the device_tracker about the deltas
 | ||||||
|     void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { |     void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { | ||||||
|         IteratePages<false>(query_cpu_addr, query_size, |         IteratePages<false>(query_cpu_addr, query_size, | ||||||
|                             [](Manager* manager, [[maybe_unused]] u64 offset, |                             [](Manager* manager, [[maybe_unused]] u64 offset, | ||||||
|  | @ -280,7 +280,7 @@ private: | ||||||
|         manager_pool.emplace_back(); |         manager_pool.emplace_back(); | ||||||
|         auto& last_pool = manager_pool.back(); |         auto& last_pool = manager_pool.back(); | ||||||
|         for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { |         for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { | ||||||
|             new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); |             new (&last_pool[i]) Manager(0, *device_tracker, HIGHER_PAGE_SIZE); | ||||||
|             free_managers.push_back(&last_pool[i]); |             free_managers.push_back(&last_pool[i]); | ||||||
|         } |         } | ||||||
|         return on_return(); |         return on_return(); | ||||||
|  | @ -293,7 +293,7 @@ private: | ||||||
| 
 | 
 | ||||||
|     std::unordered_set<u32> cached_pages; |     std::unordered_set<u32> cached_pages; | ||||||
| 
 | 
 | ||||||
|     RasterizerInterface* rasterizer = nullptr; |     DeviceTracker* device_tracker = nullptr; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCommon
 | } // namespace VideoCommon
 | ||||||
|  |  | ||||||
|  | @ -163,11 +163,11 @@ struct Words { | ||||||
|     WordsArray<stack_words> preflushable; |     WordsArray<stack_words> preflushable; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| template <class RasterizerInterface, size_t stack_words = 1> | template <class DeviceTracker, size_t stack_words = 1> | ||||||
| class WordManager { | class WordManager { | ||||||
| public: | public: | ||||||
|     explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) |     explicit WordManager(VAddr cpu_addr_, DeviceTracker& tracker_, u64 size_bytes) | ||||||
|         : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} |         : cpu_addr{cpu_addr_}, tracker{&tracker_}, words{size_bytes} {} | ||||||
| 
 | 
 | ||||||
|     explicit WordManager() = default; |     explicit WordManager() = default; | ||||||
| 
 | 
 | ||||||
|  | @ -279,7 +279,7 @@ public: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * Loop over each page in the given range, turn off those bits and notify the rasterizer if |      * Loop over each page in the given range, turn off those bits and notify the tracker if | ||||||
|      * needed. Call the given function on each turned off range. |      * needed. Call the given function on each turned off range. | ||||||
|      * |      * | ||||||
|      * @param query_cpu_range Base CPU address to loop over |      * @param query_cpu_range Base CPU address to loop over | ||||||
|  | @ -459,26 +459,26 @@ private: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * Notify rasterizer about changes in the CPU tracking state of a word in the buffer |      * Notify tracker about changes in the CPU tracking state of a word in the buffer | ||||||
|      * |      * | ||||||
|      * @param word_index   Index to the word to notify to the rasterizer |      * @param word_index   Index to the word to notify to the tracker | ||||||
|      * @param current_bits Current state of the word |      * @param current_bits Current state of the word | ||||||
|      * @param new_bits     New state of the word |      * @param new_bits     New state of the word | ||||||
|      * |      * | ||||||
|      * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages |      * @tparam add_to_tracker True when the tracker should start tracking the new pages | ||||||
|      */ |      */ | ||||||
|     template <bool add_to_rasterizer> |     template <bool add_to_tracker> | ||||||
|     void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { |     void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | ||||||
|         u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; |         u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; | ||||||
|         VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; |         VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | ||||||
|         IteratePages(changed_bits, [&](size_t offset, size_t size) { |         IteratePages(changed_bits, [&](size_t offset, size_t size) { | ||||||
|             rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, |             tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, | ||||||
|                                                size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); |                                                size * BYTES_PER_PAGE, add_to_tracker ? 1 : -1); | ||||||
|         }); |         }); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     VAddr cpu_addr = 0; |     VAddr cpu_addr = 0; | ||||||
|     RasterizerInterface* rasterizer = nullptr; |     DeviceTracker* tracker = nullptr; | ||||||
|     Words<stack_words> words; |     Words<stack_words> words; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -5,10 +5,10 @@ | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/dma_pusher.h" | #include "video_core/dma_pusher.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
|  | #include "video_core/guest_memory.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  | @ -85,15 +85,15 @@ bool DmaPusher::Step() { | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         const auto safe_process = [&] { |         const auto safe_process = [&] { | ||||||
|             Core::Memory::GpuGuestMemory<Tegra::CommandHeader, |             Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, | ||||||
|                                          Core::Memory::GuestMemoryFlags::SafeRead> |                                          Tegra::Memory::GuestMemoryFlags::SafeRead> | ||||||
|                 headers(memory_manager, dma_state.dma_get, command_list_header.size, |                 headers(memory_manager, dma_state.dma_get, command_list_header.size, | ||||||
|                         &command_headers); |                         &command_headers); | ||||||
|             ProcessCommands(headers); |             ProcessCommands(headers); | ||||||
|         }; |         }; | ||||||
|         const auto unsafe_process = [&] { |         const auto unsafe_process = [&] { | ||||||
|             Core::Memory::GpuGuestMemory<Tegra::CommandHeader, |             Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, | ||||||
|                                          Core::Memory::GuestMemoryFlags::UnsafeRead> |                                          Tegra::Memory::GuestMemoryFlags::UnsafeRead> | ||||||
|                 headers(memory_manager, dma_state.dma_get, command_list_header.size, |                 headers(memory_manager, dma_state.dma_get, command_list_header.size, | ||||||
|                         &command_headers); |                         &command_headers); | ||||||
|             ProcessCommands(headers); |             ProcessCommands(headers); | ||||||
|  |  | ||||||
|  | @ -5,8 +5,8 @@ | ||||||
| 
 | 
 | ||||||
| #include "common/algorithm.h" | #include "common/algorithm.h" | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/engines/engine_upload.h" | #include "video_core/engines/engine_upload.h" | ||||||
|  | #include "video_core/guest_memory.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/textures/decoders.h" | #include "video_core/textures/decoders.h" | ||||||
|  | @ -68,7 +68,8 @@ void State::ProcessData(std::span<const u8> read_buffer) { | ||||||
|             true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, |             true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, | ||||||
|             regs.dest.BlockHeight(), regs.dest.BlockDepth()); |             regs.dest.BlockHeight(), regs.dest.BlockDepth()); | ||||||
| 
 | 
 | ||||||
|         Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> |         Tegra::Memory::GpuGuestMemoryScoped<u8, | ||||||
|  |                                             Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|             tmp(memory_manager, address, dst_size, &tmp_buffer); |             tmp(memory_manager, address, dst_size, &tmp_buffer); | ||||||
| 
 | 
 | ||||||
|         Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, |         Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, | ||||||
|  |  | ||||||
|  | @ -11,6 +11,7 @@ | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/engines/maxwell_dma.h" | #include "video_core/engines/maxwell_dma.h" | ||||||
|  | #include "video_core/guest_memory.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
| #include "video_core/textures/decoders.h" | #include "video_core/textures/decoders.h" | ||||||
|  | @ -133,8 +134,8 @@ void MaxwellDMA::Launch() { | ||||||
|                 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |                 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | ||||||
|                 read_buffer.resize_destructive(16); |                 read_buffer.resize_destructive(16); | ||||||
|                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | ||||||
|                     Core::Memory::GpuGuestMemoryScoped< |                     Tegra::Memory::GpuGuestMemoryScoped< | ||||||
|                         u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> |                         u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|                         tmp_write_buffer(memory_manager, |                         tmp_write_buffer(memory_manager, | ||||||
|                                          convert_linear_2_blocklinear_addr(regs.offset_in + offset), |                                          convert_linear_2_blocklinear_addr(regs.offset_in + offset), | ||||||
|                                          16, &read_buffer); |                                          16, &read_buffer); | ||||||
|  | @ -146,16 +147,16 @@ void MaxwellDMA::Launch() { | ||||||
|                 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |                 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | ||||||
|                 read_buffer.resize_destructive(16); |                 read_buffer.resize_destructive(16); | ||||||
|                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | ||||||
|                     Core::Memory::GpuGuestMemoryScoped< |                     Tegra::Memory::GpuGuestMemoryScoped< | ||||||
|                         u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> |                         u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|                         tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); |                         tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); | ||||||
|                     tmp_write_buffer.SetAddressAndSize( |                     tmp_write_buffer.SetAddressAndSize( | ||||||
|                         convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); |                         convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); | ||||||
|                 } |                 } | ||||||
|             } else { |             } else { | ||||||
|                 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { |                 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | ||||||
|                     Core::Memory::GpuGuestMemoryScoped< |                     Tegra::Memory::GpuGuestMemoryScoped< | ||||||
|                         u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> |                         u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|                         tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, |                         tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, | ||||||
|                                          &read_buffer); |                                          &read_buffer); | ||||||
|                     tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); |                     tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); | ||||||
|  | @ -226,9 +227,9 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | ||||||
| 
 | 
 | ||||||
|     const size_t dst_size = dst_operand.pitch * regs.line_count; |     const size_t dst_size = dst_operand.pitch * regs.line_count; | ||||||
| 
 | 
 | ||||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||||
|         memory_manager, src_operand.address, src_size, &read_buffer); |         memory_manager, src_operand.address, src_size, &read_buffer); | ||||||
|     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> |     Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> | ||||||
|         tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); |         tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); | ||||||
| 
 | 
 | ||||||
|     UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, |     UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, | ||||||
|  | @ -290,9 +291,9 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | ||||||
| 
 | 
 | ||||||
|     GPUVAddr src_addr = regs.offset_in; |     GPUVAddr src_addr = regs.offset_in; | ||||||
|     GPUVAddr dst_addr = regs.offset_out; |     GPUVAddr dst_addr = regs.offset_out; | ||||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||||
|         memory_manager, src_addr, src_size, &read_buffer); |         memory_manager, src_addr, src_size, &read_buffer); | ||||||
|     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> |     Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> | ||||||
|         tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); |         tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); | ||||||
| 
 | 
 | ||||||
|     //  If the input is linear and the output is tiled, swizzle the input and copy it over.
 |     //  If the input is linear and the output is tiled, swizzle the input and copy it over.
 | ||||||
|  | @ -344,9 +345,9 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { | ||||||
| 
 | 
 | ||||||
|     intermediate_buffer.resize_destructive(mid_buffer_size); |     intermediate_buffer.resize_destructive(mid_buffer_size); | ||||||
| 
 | 
 | ||||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||||
|         memory_manager, regs.offset_in, src_size, &read_buffer); |         memory_manager, regs.offset_in, src_size, &read_buffer); | ||||||
|     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> |     Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||||
|         tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); |         tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); | ||||||
| 
 | 
 | ||||||
|     UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, |     UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, | ||||||
|  |  | ||||||
|  | @ -11,6 +11,7 @@ | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/surface.h" | #include "video_core/surface.h" | ||||||
| #include "video_core/textures/decoders.h" | #include "video_core/textures/decoders.h" | ||||||
|  | #include "video_core/guest_memory.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| class MemoryManager; | class MemoryManager; | ||||||
|  | @ -160,7 +161,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | ||||||
|     const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); |     const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); | ||||||
|     const size_t src_size = get_surface_size(src, src_bytes_per_pixel); |     const size_t src_size = get_surface_size(src, src_bytes_per_pixel); | ||||||
| 
 | 
 | ||||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( |     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( | ||||||
|         memory_manager, src.Address(), src_size, &impl->tmp_buffer); |         memory_manager, src.Address(), src_size, &impl->tmp_buffer); | ||||||
| 
 | 
 | ||||||
|     const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; |     const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; | ||||||
|  | @ -220,7 +221,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); |     const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); | ||||||
|     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite> |     Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadWrite> | ||||||
|         tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); |         tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); | ||||||
| 
 | 
 | ||||||
|     if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { |     if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { | ||||||
|  |  | ||||||
|  | @ -14,7 +14,7 @@ namespace Tegra { | ||||||
|  * Struct describing framebuffer configuration |  * Struct describing framebuffer configuration | ||||||
|  */ |  */ | ||||||
| struct FramebufferConfig { | struct FramebufferConfig { | ||||||
|     VAddr address{}; |     DAddr address{}; | ||||||
|     u32 offset{}; |     u32 offset{}; | ||||||
|     u32 width{}; |     u32 width{}; | ||||||
|     u32 height{}; |     u32 height{}; | ||||||
|  |  | ||||||
|  | @ -34,6 +34,8 @@ | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
| #include "video_core/shader_notify.h" | #include "video_core/shader_notify.h" | ||||||
| 
 | 
 | ||||||
|  | #pragma optimize("", off) | ||||||
|  | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| 
 | 
 | ||||||
| struct GPU::Impl { | struct GPU::Impl { | ||||||
|  | @ -95,8 +97,8 @@ struct GPU::Impl { | ||||||
| 
 | 
 | ||||||
|     /// Synchronizes CPU writes with Host GPU memory.
 |     /// Synchronizes CPU writes with Host GPU memory.
 | ||||||
|     void InvalidateGPUCache() { |     void InvalidateGPUCache() { | ||||||
|         std::function<void(VAddr, size_t)> callback_writes( |         std::function<void(PAddr, size_t)> callback_writes( | ||||||
|             [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); |             [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); | ||||||
|         system.GatherGPUDirtyMemory(callback_writes); |         system.GatherGPUDirtyMemory(callback_writes); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -279,11 +281,11 @@ struct GPU::Impl { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||||
|     void FlushRegion(VAddr addr, u64 size) { |     void FlushRegion(DAddr addr, u64 size) { | ||||||
|         gpu_thread.FlushRegion(addr, size); |         gpu_thread.FlushRegion(addr, size); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size) { |     VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) { | ||||||
|         auto raster_area = rasterizer->GetFlushArea(addr, size); |         auto raster_area = rasterizer->GetFlushArea(addr, size); | ||||||
|         if (raster_area.preemtive) { |         if (raster_area.preemtive) { | ||||||
|             return raster_area; |             return raster_area; | ||||||
|  | @ -299,16 +301,16 @@ struct GPU::Impl { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 |     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||||
|     void InvalidateRegion(VAddr addr, u64 size) { |     void InvalidateRegion(DAddr addr, u64 size) { | ||||||
|         gpu_thread.InvalidateRegion(addr, size); |         gpu_thread.InvalidateRegion(addr, size); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool OnCPUWrite(VAddr addr, u64 size) { |     bool OnCPUWrite(DAddr addr, u64 size) { | ||||||
|         return rasterizer->OnCPUWrite(addr, size); |         return rasterizer->OnCPUWrite(addr, size); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 |     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) { |     void FlushAndInvalidateRegion(DAddr addr, u64 size) { | ||||||
|         gpu_thread.FlushAndInvalidateRegion(addr, size); |         gpu_thread.FlushAndInvalidateRegion(addr, size); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -437,7 +439,7 @@ void GPU::OnCommandListEnd() { | ||||||
|     impl->OnCommandListEnd(); |     impl->OnCommandListEnd(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | u64 GPU::RequestFlush(DAddr addr, std::size_t size) { | ||||||
|     return impl->RequestSyncOperation( |     return impl->RequestSyncOperation( | ||||||
|         [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); |         [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); | ||||||
| } | } | ||||||
|  | @ -557,23 +559,23 @@ void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||||||
|     impl->SwapBuffers(framebuffer); |     impl->SwapBuffers(framebuffer); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| VideoCore::RasterizerDownloadArea GPU::OnCPURead(VAddr addr, u64 size) { | VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) { | ||||||
|     return impl->OnCPURead(addr, size); |     return impl->OnCPURead(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPU::FlushRegion(VAddr addr, u64 size) { | void GPU::FlushRegion(DAddr addr, u64 size) { | ||||||
|     impl->FlushRegion(addr, size); |     impl->FlushRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPU::InvalidateRegion(VAddr addr, u64 size) { | void GPU::InvalidateRegion(DAddr addr, u64 size) { | ||||||
|     impl->InvalidateRegion(addr, size); |     impl->InvalidateRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool GPU::OnCPUWrite(VAddr addr, u64 size) { | bool GPU::OnCPUWrite(DAddr addr, u64 size) { | ||||||
|     return impl->OnCPUWrite(addr, size); |     return impl->OnCPUWrite(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | void GPU::FlushAndInvalidateRegion(DAddr addr, u64 size) { | ||||||
|     impl->FlushAndInvalidateRegion(addr, size); |     impl->FlushAndInvalidateRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -158,7 +158,7 @@ public: | ||||||
|     void InitAddressSpace(Tegra::MemoryManager& memory_manager); |     void InitAddressSpace(Tegra::MemoryManager& memory_manager); | ||||||
| 
 | 
 | ||||||
|     /// Request a host GPU memory flush from the CPU.
 |     /// Request a host GPU memory flush from the CPU.
 | ||||||
|     [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); |     [[nodiscard]] u64 RequestFlush(DAddr addr, std::size_t size); | ||||||
| 
 | 
 | ||||||
|     /// Obtains current flush request fence id.
 |     /// Obtains current flush request fence id.
 | ||||||
|     [[nodiscard]] u64 CurrentSyncRequestFence() const; |     [[nodiscard]] u64 CurrentSyncRequestFence() const; | ||||||
|  | @ -242,20 +242,20 @@ public: | ||||||
|     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||||
|     [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size); |     [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||||
|     void FlushRegion(VAddr addr, u64 size); |     void FlushRegion(DAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 |     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||||
|     void InvalidateRegion(VAddr addr, u64 size); |     void InvalidateRegion(DAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
 |     /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
 | ||||||
|     /// sensible, false otherwise
 |     /// sensible, false otherwise
 | ||||||
|     bool OnCPUWrite(VAddr addr, u64 size); |     bool OnCPUWrite(DAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 |     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); |     void FlushAndInvalidateRegion(DAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     struct Impl; |     struct Impl; | ||||||
|  |  | ||||||
|  | @ -82,7 +82,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||||||
|     PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); |     PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ThreadManager::FlushRegion(VAddr addr, u64 size) { | void ThreadManager::FlushRegion(DAddr addr, u64 size) { | ||||||
|     if (!is_async) { |     if (!is_async) { | ||||||
|         // Always flush with synchronous GPU mode
 |         // Always flush with synchronous GPU mode
 | ||||||
|         PushCommand(FlushRegionCommand(addr, size)); |         PushCommand(FlushRegionCommand(addr, size)); | ||||||
|  | @ -101,11 +101,11 @@ void ThreadManager::TickGPU() { | ||||||
|     PushCommand(GPUTickCommand()); |     PushCommand(GPUTickCommand()); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | void ThreadManager::InvalidateRegion(DAddr addr, u64 size) { | ||||||
|     rasterizer->OnCacheInvalidation(addr, size); |     rasterizer->OnCacheInvalidation(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) { | ||||||
|     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 |     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 | ||||||
|     rasterizer->OnCacheInvalidation(addr, size); |     rasterizer->OnCacheInvalidation(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -54,26 +54,26 @@ struct SwapBuffersCommand final { | ||||||
| 
 | 
 | ||||||
| /// Command to signal to the GPU thread to flush a region
 | /// Command to signal to the GPU thread to flush a region
 | ||||||
| struct FlushRegionCommand final { | struct FlushRegionCommand final { | ||||||
|     explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} |     explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} | ||||||
| 
 | 
 | ||||||
|     VAddr addr; |     DAddr addr; | ||||||
|     u64 size; |     u64 size; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /// Command to signal to the GPU thread to invalidate a region
 | /// Command to signal to the GPU thread to invalidate a region
 | ||||||
| struct InvalidateRegionCommand final { | struct InvalidateRegionCommand final { | ||||||
|     explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} |     explicit constexpr InvalidateRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} | ||||||
| 
 | 
 | ||||||
|     VAddr addr; |     DAddr addr; | ||||||
|     u64 size; |     u64 size; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /// Command to signal to the GPU thread to flush and invalidate a region
 | /// Command to signal to the GPU thread to flush and invalidate a region
 | ||||||
| struct FlushAndInvalidateRegionCommand final { | struct FlushAndInvalidateRegionCommand final { | ||||||
|     explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_) |     explicit constexpr FlushAndInvalidateRegionCommand(DAddr addr_, u64 size_) | ||||||
|         : addr{addr_}, size{size_} {} |         : addr{addr_}, size{size_} {} | ||||||
| 
 | 
 | ||||||
|     VAddr addr; |     DAddr addr; | ||||||
|     u64 size; |     u64 size; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -122,13 +122,13 @@ public: | ||||||
|     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||||
|     void FlushRegion(VAddr addr, u64 size); |     void FlushRegion(DAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 |     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||||
|     void InvalidateRegion(VAddr addr, u64 size); |     void InvalidateRegion(DAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 |     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); |     void FlushAndInvalidateRegion(DAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     void TickGPU(); |     void TickGPU(); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										29
									
								
								src/video_core/guest_memory.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								src/video_core/guest_memory.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,29 @@ | ||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||||
|  | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <iterator> | ||||||
|  | #include <memory> | ||||||
|  | #include <optional> | ||||||
|  | #include <span> | ||||||
|  | #include <vector> | ||||||
|  | 
 | ||||||
|  | #include "common/scratch_buffer.h" | ||||||
|  | #include "core/guest_memory.h" | ||||||
|  | #include "video_core/memory_manager.h" | ||||||
|  | 
 | ||||||
|  | namespace Tegra::Memory { | ||||||
|  | 
 | ||||||
|  | using GuestMemoryFlags = Core::Memory::GuestMemoryFlags; | ||||||
|  | 
 | ||||||
|  | template <typename T, GuestMemoryFlags FLAGS> | ||||||
|  | using DeviceGuestMemory = Core::Memory::GuestMemory<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>; | ||||||
|  | template <typename T, GuestMemoryFlags FLAGS> | ||||||
|  | using DeviceGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>; | ||||||
|  | template <typename T, GuestMemoryFlags FLAGS> | ||||||
|  | using GpuGuestMemory = Core::Memory::GuestMemory<Tegra::MemoryManager, T, FLAGS>; | ||||||
|  | template <typename T, GuestMemoryFlags FLAGS> | ||||||
|  | using GpuGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>; | ||||||
|  | 
 | ||||||
|  | } // namespace Tegra::Memory
 | ||||||
|  | @ -1,6 +1,8 @@ | ||||||
| // SPDX-FileCopyrightText: 2023 yuzu Emulator Project
 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project
 | ||||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
| 
 | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
| #include "core/device_memory_manager.h" | #include "core/device_memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
|  | @ -12,8 +14,8 @@ namespace Tegra { | ||||||
| struct MaxwellDeviceMethods; | struct MaxwellDeviceMethods; | ||||||
| 
 | 
 | ||||||
| struct MaxwellDeviceTraits { | struct MaxwellDeviceTraits { | ||||||
|     static constexpr bool supports_pinning = true; |     static constexpr bool supports_pinning = false; | ||||||
|     static constexpr size_t device_virtual_bits = 34; |     static constexpr size_t device_virtual_bits = 32; | ||||||
|     using DeviceInterface = typename VideoCore::RasterizerInterface; |     using DeviceInterface = typename VideoCore::RasterizerInterface; | ||||||
|     using DeviceMethods = typename MaxwellDeviceMethods; |     using DeviceMethods = typename MaxwellDeviceMethods; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -7,22 +7,24 @@ | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/device_memory.h" |  | ||||||
| #include "core/hle/kernel/k_page_table.h" | #include "core/hle/kernel/k_page_table.h" | ||||||
| #include "core/hle/kernel/k_process.h" | #include "core/hle/kernel/k_process.h" | ||||||
|  | #include "video_core/guest_memory.h" | ||||||
|  | #include "video_core/host1x/host1x.h" | ||||||
| #include "video_core/invalidation_accumulator.h" | #include "video_core/invalidation_accumulator.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| using Core::Memory::GuestMemoryFlags; | using Tegra::Memory::GuestMemoryFlags; | ||||||
| 
 | 
 | ||||||
| std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | ||||||
| 
 | 
 | ||||||
| MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, | MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, | ||||||
|                              u64 page_bits_) |                              u64 page_bits_) | ||||||
|     : system{system_}, memory{system.ApplicationMemory()}, device_memory{system.DeviceMemory()}, |     : system{system_}, memory{system.Host1x().MemoryManager()}, | ||||||
|       address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, |       address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, | ||||||
|       entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, |       entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, | ||||||
|                                            page_bits != big_page_bits ? page_bits : 0}, |                                            page_bits != big_page_bits ? page_bits : 0}, | ||||||
|  | @ -42,7 +44,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 | ||||||
|     big_page_table_mask = big_page_table_size - 1; |     big_page_table_mask = big_page_table_size - 1; | ||||||
| 
 | 
 | ||||||
|     big_entries.resize(big_page_table_size / 32, 0); |     big_entries.resize(big_page_table_size / 32, 0); | ||||||
|     big_page_table_cpu.resize(big_page_table_size); |     big_page_table_dev.resize(big_page_table_size); | ||||||
|     big_page_continuous.resize(big_page_table_size / continuous_bits, 0); |     big_page_continuous.resize(big_page_table_size / continuous_bits, 0); | ||||||
|     entries.resize(page_table_size / 32, 0); |     entries.resize(page_table_size / 32, 0); | ||||||
| } | } | ||||||
|  | @ -100,7 +102,7 @@ inline void MemoryManager::SetBigPageContinuous(size_t big_page_index, bool valu | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <MemoryManager::EntryType entry_type> | template <MemoryManager::EntryType entry_type> | ||||||
| GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, | GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, | ||||||
|                                     PTEKind kind) { |                                     PTEKind kind) { | ||||||
|     [[maybe_unused]] u64 remaining_size{size}; |     [[maybe_unused]] u64 remaining_size{size}; | ||||||
|     if constexpr (entry_type == EntryType::Mapped) { |     if constexpr (entry_type == EntryType::Mapped) { | ||||||
|  | @ -114,9 +116,9 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | ||||||
|             rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); |             rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); | ||||||
|         } |         } | ||||||
|         if constexpr (entry_type == EntryType::Mapped) { |         if constexpr (entry_type == EntryType::Mapped) { | ||||||
|             const VAddr current_cpu_addr = cpu_addr + offset; |             const DAddr current_dev_addr = dev_addr + offset; | ||||||
|             const auto index = PageEntryIndex<false>(current_gpu_addr); |             const auto index = PageEntryIndex<false>(current_gpu_addr); | ||||||
|             const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); |             const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits); | ||||||
|             page_table[index] = sub_value; |             page_table[index] = sub_value; | ||||||
|         } |         } | ||||||
|         remaining_size -= page_size; |         remaining_size -= page_size; | ||||||
|  | @ -126,7 +128,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <MemoryManager::EntryType entry_type> | template <MemoryManager::EntryType entry_type> | ||||||
| GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, | GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, | ||||||
|                                        size_t size, PTEKind kind) { |                                        size_t size, PTEKind kind) { | ||||||
|     [[maybe_unused]] u64 remaining_size{size}; |     [[maybe_unused]] u64 remaining_size{size}; | ||||||
|     for (u64 offset{}; offset < size; offset += big_page_size) { |     for (u64 offset{}; offset < size; offset += big_page_size) { | ||||||
|  | @ -137,20 +139,20 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr | ||||||
|             rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); |             rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); | ||||||
|         } |         } | ||||||
|         if constexpr (entry_type == EntryType::Mapped) { |         if constexpr (entry_type == EntryType::Mapped) { | ||||||
|             const VAddr current_cpu_addr = cpu_addr + offset; |             const DAddr current_dev_addr = dev_addr + offset; | ||||||
|             const auto index = PageEntryIndex<true>(current_gpu_addr); |             const auto index = PageEntryIndex<true>(current_gpu_addr); | ||||||
|             const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); |             const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits); | ||||||
|             big_page_table_cpu[index] = sub_value; |             big_page_table_dev[index] = sub_value; | ||||||
|             const bool is_continuous = ([&] { |             const bool is_continuous = ([&] { | ||||||
|                 uintptr_t base_ptr{ |                 uintptr_t base_ptr{ | ||||||
|                     reinterpret_cast<uintptr_t>(memory.GetPointerSilent(current_cpu_addr))}; |                     reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(current_dev_addr))}; | ||||||
|                 if (base_ptr == 0) { |                 if (base_ptr == 0) { | ||||||
|                     return false; |                     return false; | ||||||
|                 } |                 } | ||||||
|                 for (VAddr start_cpu = current_cpu_addr + page_size; |                 for (DAddr start_cpu = current_dev_addr + page_size; | ||||||
|                      start_cpu < current_cpu_addr + big_page_size; start_cpu += page_size) { |                      start_cpu < current_dev_addr + big_page_size; start_cpu += page_size) { | ||||||
|                     base_ptr += page_size; |                     base_ptr += page_size; | ||||||
|                     auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointerSilent(start_cpu)); |                     auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(start_cpu)); | ||||||
|                     if (next_ptr == 0 || base_ptr != next_ptr) { |                     if (next_ptr == 0 || base_ptr != next_ptr) { | ||||||
|                         return false; |                         return false; | ||||||
|                     } |                     } | ||||||
|  | @ -172,12 +174,12 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) | ||||||
|     rasterizer = rasterizer_; |     rasterizer = rasterizer_; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind, | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size, PTEKind kind, | ||||||
|                             bool is_big_pages) { |                             bool is_big_pages) { | ||||||
|     if (is_big_pages) [[likely]] { |     if (is_big_pages) [[likely]] { | ||||||
|         return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); |         return BigPageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind); | ||||||
|     } |     } | ||||||
|     return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); |     return PageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { | ||||||
|  | @ -202,7 +204,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | ||||||
|     PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |     PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | ||||||
|     if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] { |     if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] { | ||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|     } |     } | ||||||
|  | @ -211,17 +213,17 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | ||||||
|             return std::nullopt; |             return std::nullopt; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) |         const DAddr dev_addr_base = static_cast<DAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) | ||||||
|                                     << cpu_page_bits; |                                     << cpu_page_bits; | ||||||
|         return cpu_addr_base + (gpu_addr & page_mask); |         return dev_addr_base + (gpu_addr & page_mask); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const VAddr cpu_addr_base = |     const DAddr dev_addr_base = | ||||||
|         static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; |         static_cast<DAddr>(big_page_table_dev[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; | ||||||
|     return cpu_addr_base + (gpu_addr & big_page_mask); |     return dev_addr_base + (gpu_addr & big_page_mask); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | ||||||
|     size_t page_index{addr >> page_bits}; |     size_t page_index{addr >> page_bits}; | ||||||
|     const size_t page_last{(addr + size + page_size - 1) >> page_bits}; |     const size_t page_last{(addr + size + page_size - 1) >> page_bits}; | ||||||
|     while (page_index < page_last) { |     while (page_index < page_last) { | ||||||
|  | @ -274,7 +276,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return memory.GetPointer(*address); |     return memory.GetPointer<u8>(*address); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | ||||||
|  | @ -283,7 +285,7 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return memory.GetPointer(*address); |     return memory.GetPointer<u8>(*address); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining.
 | #ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining.
 | ||||||
|  | @ -367,25 +369,25 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: | ||||||
|         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||||||
|     }; |     }; | ||||||
|     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|         if constexpr (is_safe) { |         if constexpr (is_safe) { | ||||||
|             rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); |             rasterizer->FlushRegion(dev_addr_base, copy_amount, which); | ||||||
|         } |         } | ||||||
|         u8* physical = memory.GetPointer(cpu_addr_base); |         u8* physical = memory.GetPointer<u8>(dev_addr_base); | ||||||
|         std::memcpy(dest_buffer, physical, copy_amount); |         std::memcpy(dest_buffer, physical, copy_amount); | ||||||
|         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||||||
|     }; |     }; | ||||||
|     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||||
|         if constexpr (is_safe) { |         if constexpr (is_safe) { | ||||||
|             rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); |             rasterizer->FlushRegion(dev_addr_base, copy_amount, which); | ||||||
|         } |         } | ||||||
|         if (!IsBigPageContinuous(page_index)) [[unlikely]] { |         if (!IsBigPageContinuous(page_index)) [[unlikely]] { | ||||||
|             memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); |             memory.ReadBlockUnsafe(dev_addr_base, dest_buffer, copy_amount); | ||||||
|         } else { |         } else { | ||||||
|             u8* physical = memory.GetPointer(cpu_addr_base); |             u8* physical = memory.GetPointer<u8>(dev_addr_base); | ||||||
|             std::memcpy(dest_buffer, physical, copy_amount); |             std::memcpy(dest_buffer, physical, copy_amount); | ||||||
|         } |         } | ||||||
|         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||||||
|  | @ -416,25 +418,25 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe | ||||||
|         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||||||
|     }; |     }; | ||||||
|     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|         if constexpr (is_safe) { |         if constexpr (is_safe) { | ||||||
|             rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); |             rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); | ||||||
|         } |         } | ||||||
|         u8* physical = memory.GetPointer(cpu_addr_base); |         u8* physical = memory.GetPointer<u8>(dev_addr_base); | ||||||
|         std::memcpy(physical, src_buffer, copy_amount); |         std::memcpy(physical, src_buffer, copy_amount); | ||||||
|         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||||||
|     }; |     }; | ||||||
|     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||||
|         if constexpr (is_safe) { |         if constexpr (is_safe) { | ||||||
|             rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); |             rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); | ||||||
|         } |         } | ||||||
|         if (!IsBigPageContinuous(page_index)) [[unlikely]] { |         if (!IsBigPageContinuous(page_index)) [[unlikely]] { | ||||||
|             memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); |             memory.WriteBlockUnsafe(dev_addr_base, src_buffer, copy_amount); | ||||||
|         } else { |         } else { | ||||||
|             u8* physical = memory.GetPointer(cpu_addr_base); |             u8* physical = memory.GetPointer<u8>(dev_addr_base); | ||||||
|             std::memcpy(physical, src_buffer, copy_amount); |             std::memcpy(physical, src_buffer, copy_amount); | ||||||
|         } |         } | ||||||
|         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||||||
|  | @ -470,14 +472,14 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, | ||||||
|                           [[maybe_unused]] std::size_t copy_amount) {}; |                           [[maybe_unused]] std::size_t copy_amount) {}; | ||||||
| 
 | 
 | ||||||
|     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|         rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); |         rasterizer->FlushRegion(dev_addr_base, copy_amount, which); | ||||||
|     }; |     }; | ||||||
|     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||||
|         rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); |         rasterizer->FlushRegion(dev_addr_base, copy_amount, which); | ||||||
|     }; |     }; | ||||||
|     auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, |     auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||||
|                                  std::size_t copy_amount) { |                                  std::size_t copy_amount) { | ||||||
|  | @ -495,15 +497,15 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, | ||||||
|                           [[maybe_unused]] std::size_t copy_amount) { return false; }; |                           [[maybe_unused]] std::size_t copy_amount) { return false; }; | ||||||
| 
 | 
 | ||||||
|     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|         result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); |         result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which); | ||||||
|         return result; |         return result; | ||||||
|     }; |     }; | ||||||
|     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||||
|         result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); |         result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which); | ||||||
|         return result; |         return result; | ||||||
|     }; |     }; | ||||||
|     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||||
|  | @ -517,7 +519,7 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | ||||||
|     std::optional<VAddr> old_page_addr{}; |     std::optional<DAddr> old_page_addr{}; | ||||||
|     size_t range_so_far = 0; |     size_t range_so_far = 0; | ||||||
|     bool result{false}; |     bool result{false}; | ||||||
|     auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, |     auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, | ||||||
|  | @ -526,24 +528,24 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | ||||||
|         return true; |         return true; | ||||||
|     }; |     }; | ||||||
|     auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|         if (old_page_addr && *old_page_addr != cpu_addr_base) { |         if (old_page_addr && *old_page_addr != dev_addr_base) { | ||||||
|             result = true; |             result = true; | ||||||
|             return true; |             return true; | ||||||
|         } |         } | ||||||
|         range_so_far += copy_amount; |         range_so_far += copy_amount; | ||||||
|         old_page_addr = {cpu_addr_base + copy_amount}; |         old_page_addr = {dev_addr_base + copy_amount}; | ||||||
|         return false; |         return false; | ||||||
|     }; |     }; | ||||||
|     auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||||
|         if (old_page_addr && *old_page_addr != cpu_addr_base) { |         if (old_page_addr && *old_page_addr != dev_addr_base) { | ||||||
|             return true; |             return true; | ||||||
|         } |         } | ||||||
|         range_so_far += copy_amount; |         range_so_far += copy_amount; | ||||||
|         old_page_addr = {cpu_addr_base + copy_amount}; |         old_page_addr = {dev_addr_base + copy_amount}; | ||||||
|         return false; |         return false; | ||||||
|     }; |     }; | ||||||
|     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||||
|  | @ -568,14 +570,14 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | ||||||
|                           [[maybe_unused]] std::size_t copy_amount) {}; |                           [[maybe_unused]] std::size_t copy_amount) {}; | ||||||
| 
 | 
 | ||||||
|     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|         rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); |         rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); | ||||||
|     }; |     }; | ||||||
|     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||||
|         rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); |         rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); | ||||||
|     }; |     }; | ||||||
|     auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, |     auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||||
|                                       std::size_t copy_amount) { |                                       std::size_t copy_amount) { | ||||||
|  | @ -587,7 +589,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | ||||||
| 
 | 
 | ||||||
| void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, | ||||||
|                               VideoCommon::CacheType which) { |                               VideoCommon::CacheType which) { | ||||||
|     Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( |     Tegra::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( | ||||||
|         *this, gpu_src_addr, size); |         *this, gpu_src_addr, size); | ||||||
|     data.SetAddressAndSize(gpu_dest_addr, size); |     data.SetAddressAndSize(gpu_dest_addr, size); | ||||||
|     FlushRegion(gpu_dest_addr, size, which); |     FlushRegion(gpu_dest_addr, size, which); | ||||||
|  | @ -611,7 +613,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const { | bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const { | ||||||
|     std::optional<VAddr> old_page_addr{}; |     std::optional<DAddr> old_page_addr{}; | ||||||
|     bool result{true}; |     bool result{true}; | ||||||
|     auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, |     auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, | ||||||
|                     std::size_t copy_amount) { |                     std::size_t copy_amount) { | ||||||
|  | @ -619,23 +621,23 @@ bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const | ||||||
|         return true; |         return true; | ||||||
|     }; |     }; | ||||||
|     auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|         if (old_page_addr && *old_page_addr != cpu_addr_base) { |         if (old_page_addr && *old_page_addr != dev_addr_base) { | ||||||
|             result = false; |             result = false; | ||||||
|             return true; |             return true; | ||||||
|         } |         } | ||||||
|         old_page_addr = {cpu_addr_base + copy_amount}; |         old_page_addr = {dev_addr_base + copy_amount}; | ||||||
|         return false; |         return false; | ||||||
|     }; |     }; | ||||||
|     auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |     auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||||
|         if (old_page_addr && *old_page_addr != cpu_addr_base) { |         if (old_page_addr && *old_page_addr != dev_addr_base) { | ||||||
|             result = false; |             result = false; | ||||||
|             return true; |             return true; | ||||||
|         } |         } | ||||||
|         old_page_addr = {cpu_addr_base + copy_amount}; |         old_page_addr = {dev_addr_base + copy_amount}; | ||||||
|         return false; |         return false; | ||||||
|     }; |     }; | ||||||
|     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, |     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||||
|  | @ -678,11 +680,11 @@ template <bool is_gpu_address> | ||||||
| void MemoryManager::GetSubmappedRangeImpl( | void MemoryManager::GetSubmappedRangeImpl( | ||||||
|     GPUVAddr gpu_addr, std::size_t size, |     GPUVAddr gpu_addr, std::size_t size, | ||||||
|     boost::container::small_vector< |     boost::container::small_vector< | ||||||
|         std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) |         std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>& result) | ||||||
|     const { |     const { | ||||||
|     std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> |     std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>> | ||||||
|         last_segment{}; |         last_segment{}; | ||||||
|     std::optional<VAddr> old_page_addr{}; |     std::optional<DAddr> old_page_addr{}; | ||||||
|     const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, |     const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, | ||||||
|                                                 [[maybe_unused]] std::size_t offset, |                                                 [[maybe_unused]] std::size_t offset, | ||||||
|                                                 [[maybe_unused]] std::size_t copy_amount) { |                                                 [[maybe_unused]] std::size_t copy_amount) { | ||||||
|  | @ -694,20 +696,20 @@ void MemoryManager::GetSubmappedRangeImpl( | ||||||
|     const auto extend_size_big = [this, &split, &old_page_addr, |     const auto extend_size_big = [this, &split, &old_page_addr, | ||||||
|                                   &last_segment](std::size_t page_index, std::size_t offset, |                                   &last_segment](std::size_t page_index, std::size_t offset, | ||||||
|                                                  std::size_t copy_amount) { |                                                  std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||||
|         if (old_page_addr) { |         if (old_page_addr) { | ||||||
|             if (*old_page_addr != cpu_addr_base) { |             if (*old_page_addr != dev_addr_base) { | ||||||
|                 split(0, 0, 0); |                 split(0, 0, 0); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         old_page_addr = {cpu_addr_base + copy_amount}; |         old_page_addr = {dev_addr_base + copy_amount}; | ||||||
|         if (!last_segment) { |         if (!last_segment) { | ||||||
|             if constexpr (is_gpu_address) { |             if constexpr (is_gpu_address) { | ||||||
|                 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; |                 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; | ||||||
|                 last_segment = {new_base_addr, copy_amount}; |                 last_segment = {new_base_addr, copy_amount}; | ||||||
|             } else { |             } else { | ||||||
|                 last_segment = {cpu_addr_base, copy_amount}; |                 last_segment = {dev_addr_base, copy_amount}; | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             last_segment->second += copy_amount; |             last_segment->second += copy_amount; | ||||||
|  | @ -716,20 +718,20 @@ void MemoryManager::GetSubmappedRangeImpl( | ||||||
|     const auto extend_size_short = [this, &split, &old_page_addr, |     const auto extend_size_short = [this, &split, &old_page_addr, | ||||||
|                                     &last_segment](std::size_t page_index, std::size_t offset, |                                     &last_segment](std::size_t page_index, std::size_t offset, | ||||||
|                                                    std::size_t copy_amount) { |                                                    std::size_t copy_amount) { | ||||||
|         const VAddr cpu_addr_base = |         const DAddr dev_addr_base = | ||||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; |             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|         if (old_page_addr) { |         if (old_page_addr) { | ||||||
|             if (*old_page_addr != cpu_addr_base) { |             if (*old_page_addr != dev_addr_base) { | ||||||
|                 split(0, 0, 0); |                 split(0, 0, 0); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         old_page_addr = {cpu_addr_base + copy_amount}; |         old_page_addr = {dev_addr_base + copy_amount}; | ||||||
|         if (!last_segment) { |         if (!last_segment) { | ||||||
|             if constexpr (is_gpu_address) { |             if constexpr (is_gpu_address) { | ||||||
|                 const GPUVAddr new_base_addr = (page_index << page_bits) + offset; |                 const GPUVAddr new_base_addr = (page_index << page_bits) + offset; | ||||||
|                 last_segment = {new_base_addr, copy_amount}; |                 last_segment = {new_base_addr, copy_amount}; | ||||||
|             } else { |             } else { | ||||||
|                 last_segment = {cpu_addr_base, copy_amount}; |                 last_segment = {dev_addr_base, copy_amount}; | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             last_segment->second += copy_amount; |             last_segment->second += copy_amount; | ||||||
|  | @ -756,9 +758,9 @@ void MemoryManager::FlushCaching() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { | const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { | ||||||
|     auto cpu_addr = GpuToCpuAddress(src_addr); |     auto dev_addr = GpuToCpuAddress(src_addr); | ||||||
|     if (cpu_addr) { |     if (dev_addr) { | ||||||
|         return memory.GetSpan(*cpu_addr, size); |         return memory.GetSpan(*dev_addr, size); | ||||||
|     } |     } | ||||||
|     return nullptr; |     return nullptr; | ||||||
| } | } | ||||||
|  | @ -767,9 +769,9 @@ u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) { | ||||||
|     if (!IsContinuousRange(src_addr, size)) { |     if (!IsContinuousRange(src_addr, size)) { | ||||||
|         return nullptr; |         return nullptr; | ||||||
|     } |     } | ||||||
|     auto cpu_addr = GpuToCpuAddress(src_addr); |     auto dev_addr = GpuToCpuAddress(src_addr); | ||||||
|     if (cpu_addr) { |     if (dev_addr) { | ||||||
|         return memory.GetSpan(*cpu_addr, size); |         return memory.GetSpan(*dev_addr, size); | ||||||
|     } |     } | ||||||
|     return nullptr; |     return nullptr; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -17,6 +17,7 @@ | ||||||
| #include "common/virtual_buffer.h" | #include "common/virtual_buffer.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| #include "video_core/cache_types.h" | #include "video_core/cache_types.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| #include "video_core/pte_kind.h" | #include "video_core/pte_kind.h" | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
|  | @ -28,10 +29,6 @@ class InvalidationAccumulator; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
| class DeviceMemory; |  | ||||||
| namespace Memory { |  | ||||||
| class Memory; |  | ||||||
| } // namespace Memory
 |  | ||||||
| class System; | class System; | ||||||
| } // namespace Core
 | } // namespace Core
 | ||||||
| 
 | 
 | ||||||
|  | @ -50,9 +47,9 @@ public: | ||||||
|     /// Binds a renderer to the memory manager.
 |     /// Binds a renderer to the memory manager.
 | ||||||
|     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; |     [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr) const; | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; |     [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; | ||||||
| 
 | 
 | ||||||
|     template <typename T> |     template <typename T> | ||||||
|     [[nodiscard]] T Read(GPUVAddr addr) const; |     [[nodiscard]] T Read(GPUVAddr addr) const; | ||||||
|  | @ -110,7 +107,7 @@ public: | ||||||
|     [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; |     [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; | ||||||
| 
 | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * Checks if a gpu region is mapped by a single range of cpu addresses. |      * Checks if a gpu region is mapped by a single range of device addresses. | ||||||
|      */ |      */ | ||||||
|     [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const; |     [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const; | ||||||
| 
 | 
 | ||||||
|  | @ -120,14 +117,14 @@ public: | ||||||
|     [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; |     [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; | ||||||
| 
 | 
 | ||||||
|     /**
 |     /**
 | ||||||
|      * Returns a vector with all the subranges of cpu addresses mapped beneath. |      * Returns a vector with all the subranges of device addresses mapped beneath. | ||||||
|      * if the region is continuous, a single pair will be returned. If it's unmapped, an empty |      * if the region is continuous, a single pair will be returned. If it's unmapped, an empty | ||||||
|      * vector will be returned; |      * vector will be returned; | ||||||
|      */ |      */ | ||||||
|     boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( |     boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( | ||||||
|         GPUVAddr gpu_addr, std::size_t size) const; |         GPUVAddr gpu_addr, std::size_t size) const; | ||||||
| 
 | 
 | ||||||
|     GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, |     GPUVAddr Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size, | ||||||
|                  PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); |                  PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); | ||||||
|     GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); |     GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); | ||||||
|     void Unmap(GPUVAddr gpu_addr, std::size_t size); |     void Unmap(GPUVAddr gpu_addr, std::size_t size); | ||||||
|  | @ -186,12 +183,11 @@ private: | ||||||
|     void GetSubmappedRangeImpl( |     void GetSubmappedRangeImpl( | ||||||
|         GPUVAddr gpu_addr, std::size_t size, |         GPUVAddr gpu_addr, std::size_t size, | ||||||
|         boost::container::small_vector< |         boost::container::small_vector< | ||||||
|             std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& |             std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>& | ||||||
|             result) const; |             result) const; | ||||||
| 
 | 
 | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|     Core::Memory::Memory& memory; |     MaxwellDeviceMemoryManager& memory; | ||||||
|     Core::DeviceMemory& device_memory; |  | ||||||
| 
 | 
 | ||||||
|     const u64 address_space_bits; |     const u64 address_space_bits; | ||||||
|     const u64 page_bits; |     const u64 page_bits; | ||||||
|  | @ -218,11 +214,11 @@ private: | ||||||
|     std::vector<u64> big_entries; |     std::vector<u64> big_entries; | ||||||
| 
 | 
 | ||||||
|     template <EntryType entry_type> |     template <EntryType entry_type> | ||||||
|     GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, |     GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, | ||||||
|                          PTEKind kind); |                          PTEKind kind); | ||||||
| 
 | 
 | ||||||
|     template <EntryType entry_type> |     template <EntryType entry_type> | ||||||
|     GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, |     GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, | ||||||
|                             PTEKind kind); |                             PTEKind kind); | ||||||
| 
 | 
 | ||||||
|     template <bool is_big_page> |     template <bool is_big_page> | ||||||
|  | @ -233,11 +229,11 @@ private: | ||||||
| 
 | 
 | ||||||
|     Common::MultiLevelPageTable<u32> page_table; |     Common::MultiLevelPageTable<u32> page_table; | ||||||
|     Common::RangeMap<GPUVAddr, PTEKind> kind_map; |     Common::RangeMap<GPUVAddr, PTEKind> kind_map; | ||||||
|     Common::VirtualBuffer<u32> big_page_table_cpu; |     Common::VirtualBuffer<u32> big_page_table_dev; | ||||||
| 
 | 
 | ||||||
|     std::vector<u64> big_page_continuous; |     std::vector<u64> big_page_continuous; | ||||||
|     boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{}; |     boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash{}; | ||||||
|     boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{}; |     boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash2{}; | ||||||
| 
 | 
 | ||||||
|     mutable std::mutex guard; |     mutable std::mutex guard; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -21,6 +21,7 @@ | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| #include "video_core/control/channel_state_cache.h" | #include "video_core/control/channel_state_cache.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/texture_cache/slot_vector.h" | #include "video_core/texture_cache/slot_vector.h" | ||||||
|  | @ -102,11 +103,12 @@ template <class QueryCache, class CachedQuery, class CounterStream, class HostCo | ||||||
| class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||||
| public: | public: | ||||||
|     explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, |     explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, | ||||||
|                               Core::Memory::Memory& cpu_memory_) |                               Tegra::MaxwellDeviceMemoryManager& device_memory_) | ||||||
|         : rasterizer{rasterizer_}, |         : rasterizer{rasterizer_}, | ||||||
|           // Use reinterpret_cast instead of static_cast as workaround for
 |           // Use reinterpret_cast instead of static_cast as workaround for
 | ||||||
|           // UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
 |           // UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
 | ||||||
|           cpu_memory{cpu_memory_}, streams{{ |           device_memory{device_memory_}, | ||||||
|  |           streams{{ | ||||||
|               {CounterStream{reinterpret_cast<QueryCache&>(*this), |               {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||||
|                              VideoCore::QueryType::SamplesPassed}}, |                              VideoCore::QueryType::SamplesPassed}}, | ||||||
|               {CounterStream{reinterpret_cast<QueryCache&>(*this), |               {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||||
|  | @ -322,13 +324,14 @@ private: | ||||||
|             local_lock.unlock(); |             local_lock.unlock(); | ||||||
|             if (timestamp) { |             if (timestamp) { | ||||||
|                 u64 timestamp_value = *timestamp; |                 u64 timestamp_value = *timestamp; | ||||||
|                 cpu_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, sizeof(u64)); |                 device_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, | ||||||
|                 cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); |                                                sizeof(u64)); | ||||||
|  |                 device_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); | ||||||
|                 rasterizer.InvalidateRegion(address, sizeof(u64) * 2, |                 rasterizer.InvalidateRegion(address, sizeof(u64) * 2, | ||||||
|                                             VideoCommon::CacheType::NoQueryCache); |                                             VideoCommon::CacheType::NoQueryCache); | ||||||
|             } else { |             } else { | ||||||
|                 u32 small_value = static_cast<u32>(value); |                 u32 small_value = static_cast<u32>(value); | ||||||
|                 cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); |                 device_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); | ||||||
|                 rasterizer.InvalidateRegion(address, sizeof(u32), |                 rasterizer.InvalidateRegion(address, sizeof(u32), | ||||||
|                                             VideoCommon::CacheType::NoQueryCache); |                                             VideoCommon::CacheType::NoQueryCache); | ||||||
|             } |             } | ||||||
|  | @ -342,7 +345,7 @@ private: | ||||||
|     SlotVector<AsyncJob> slot_async_jobs; |     SlotVector<AsyncJob> slot_async_jobs; | ||||||
| 
 | 
 | ||||||
|     VideoCore::RasterizerInterface& rasterizer; |     VideoCore::RasterizerInterface& rasterizer; | ||||||
|     Core::Memory::Memory& cpu_memory; |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
| 
 | 
 | ||||||
|     mutable std::recursive_mutex mutex; |     mutable std::recursive_mutex mutex; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -23,7 +23,7 @@ DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) | ||||||
| 
 | 
 | ||||||
| class QueryBase { | class QueryBase { | ||||||
| public: | public: | ||||||
|     VAddr guest_address{}; |     DAddr guest_address{}; | ||||||
|     QueryFlagBits flags{}; |     QueryFlagBits flags{}; | ||||||
|     u64 value{}; |     u64 value{}; | ||||||
| 
 | 
 | ||||||
|  | @ -32,7 +32,7 @@ protected: | ||||||
|     QueryBase() = default; |     QueryBase() = default; | ||||||
| 
 | 
 | ||||||
|     // Parameterized constructor
 |     // Parameterized constructor
 | ||||||
|     QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) |     QueryBase(DAddr address, QueryFlagBits flags_, u64 value_) | ||||||
|         : guest_address(address), flags(flags_), value{value_} {} |         : guest_address(address), flags(flags_), value{value_} {} | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -18,6 +18,7 @@ | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/query_cache/bank_base.h" | #include "video_core/query_cache/bank_base.h" | ||||||
| #include "video_core/query_cache/query_base.h" | #include "video_core/query_cache/query_base.h" | ||||||
|  | @ -113,9 +114,10 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | ||||||
|     using RuntimeType = typename Traits::RuntimeType; |     using RuntimeType = typename Traits::RuntimeType; | ||||||
| 
 | 
 | ||||||
|     QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, |     QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, | ||||||
|                        Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) |                        Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_, | ||||||
|  |                        Tegra::GPU& gpu_) | ||||||
|         : owner{owner_}, rasterizer{rasterizer_}, |         : owner{owner_}, rasterizer{rasterizer_}, | ||||||
|           cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { |           device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} { | ||||||
|         streamer_mask = 0; |         streamer_mask = 0; | ||||||
|         for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { |         for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { | ||||||
|             streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); |             streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); | ||||||
|  | @ -158,7 +160,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | ||||||
| 
 | 
 | ||||||
|     QueryCacheBase<Traits>* owner; |     QueryCacheBase<Traits>* owner; | ||||||
|     VideoCore::RasterizerInterface& rasterizer; |     VideoCore::RasterizerInterface& rasterizer; | ||||||
|     Core::Memory::Memory& cpu_memory; |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
|     RuntimeType& runtime; |     RuntimeType& runtime; | ||||||
|     Tegra::GPU& gpu; |     Tegra::GPU& gpu; | ||||||
|     std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; |     std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; | ||||||
|  | @ -171,10 +173,11 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | ||||||
| template <typename Traits> | template <typename Traits> | ||||||
| QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, | QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, | ||||||
|                                        VideoCore::RasterizerInterface& rasterizer_, |                                        VideoCore::RasterizerInterface& rasterizer_, | ||||||
|                                        Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) |                                        Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||||
|  |                                        RuntimeType& runtime_) | ||||||
|     : cached_queries{} { |     : cached_queries{} { | ||||||
|     impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( |     impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( | ||||||
|         this, rasterizer_, cpu_memory_, runtime_, gpu_); |         this, rasterizer_, device_memory_, runtime_, gpu_); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename Traits> | template <typename Traits> | ||||||
|  | @ -240,7 +243,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | ||||||
|     if (!cpu_addr_opt) [[unlikely]] { |     if (!cpu_addr_opt) [[unlikely]] { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     VAddr cpu_addr = *cpu_addr_opt; |     DAddr cpu_addr = *cpu_addr_opt; | ||||||
|     const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); |     const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); | ||||||
|     auto* query = streamer->GetQuery(new_query_id); |     auto* query = streamer->GetQuery(new_query_id); | ||||||
|     if (is_fence) { |     if (is_fence) { | ||||||
|  | @ -253,10 +256,9 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | ||||||
|         return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, |         return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, | ||||||
|                                         static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); |                                         static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); | ||||||
|     }; |     }; | ||||||
|     u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); |     u8* pointer = impl->device_memory.GetPointer<u8>(cpu_addr); | ||||||
|     u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); |     u8* pointer_timestamp = impl->device_memory.GetPointer<u8>(cpu_addr + 8); | ||||||
|     bool is_synced = !Settings::IsGPULevelHigh() && is_fence; |     bool is_synced = !Settings::IsGPULevelHigh() && is_fence; | ||||||
| 
 |  | ||||||
|     std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, |     std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, | ||||||
|                                      pointer, pointer_timestamp] { |                                      pointer, pointer_timestamp] { | ||||||
|         if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { |         if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { | ||||||
|  | @ -559,7 +561,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo | ||||||
|     } |     } | ||||||
|     if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && |     if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && | ||||||
|         False(query_base->flags & QueryFlagBits::IsGuestSynced)) { |         False(query_base->flags & QueryFlagBits::IsGuestSynced)) { | ||||||
|         auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); |         auto* ptr = impl->device_memory.GetPointer<u8>(query_base->guest_address); | ||||||
|         if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { |         if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | ||||||
|             std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); |             std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); | ||||||
|             return false; |             return false; | ||||||
|  |  | ||||||
|  | @ -17,10 +17,7 @@ | ||||||
| #include "video_core/control/channel_state_cache.h" | #include "video_core/control/channel_state_cache.h" | ||||||
| #include "video_core/query_cache/query_base.h" | #include "video_core/query_cache/query_base.h" | ||||||
| #include "video_core/query_cache/types.h" | #include "video_core/query_cache/types.h" | ||||||
| 
 | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| namespace Core::Memory { |  | ||||||
| class Memory; |  | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
| class RasterizerInterface; | class RasterizerInterface; | ||||||
|  | @ -53,7 +50,7 @@ public: | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, |     explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, | ||||||
|                             Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); |                             Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_); | ||||||
| 
 | 
 | ||||||
|     ~QueryCacheBase(); |     ~QueryCacheBase(); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,72 +0,0 @@ | ||||||
| // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 |  | ||||||
| 
 |  | ||||||
| #include <atomic> |  | ||||||
| 
 |  | ||||||
| #include "common/assert.h" |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "common/div_ceil.h" |  | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/rasterizer_accelerated.h" |  | ||||||
| 
 |  | ||||||
| namespace VideoCore { |  | ||||||
| 
 |  | ||||||
| using namespace Core::Memory; |  | ||||||
| 
 |  | ||||||
| RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) |  | ||||||
|     : cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {} |  | ||||||
| 
 |  | ||||||
| RasterizerAccelerated::~RasterizerAccelerated() = default; |  | ||||||
| 
 |  | ||||||
| void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { |  | ||||||
|     u64 uncache_begin = 0; |  | ||||||
|     u64 cache_begin = 0; |  | ||||||
|     u64 uncache_bytes = 0; |  | ||||||
|     u64 cache_bytes = 0; |  | ||||||
| 
 |  | ||||||
|     std::atomic_thread_fence(std::memory_order_acquire); |  | ||||||
|     const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); |  | ||||||
|     for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) { |  | ||||||
|         std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); |  | ||||||
| 
 |  | ||||||
|         if (delta > 0) { |  | ||||||
|             ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!"); |  | ||||||
|         } else if (delta < 0) { |  | ||||||
|             ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); |  | ||||||
|         } else { |  | ||||||
|             ASSERT_MSG(false, "Delta must be non-zero!"); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Adds or subtracts 1, as count is a unsigned 8-bit value
 |  | ||||||
|         count.fetch_add(static_cast<u16>(delta), std::memory_order_release); |  | ||||||
| 
 |  | ||||||
|         // Assume delta is either -1 or 1
 |  | ||||||
|         if (count.load(std::memory_order::relaxed) == 0) { |  | ||||||
|             if (uncache_bytes == 0) { |  | ||||||
|                 uncache_begin = page; |  | ||||||
|             } |  | ||||||
|             uncache_bytes += YUZU_PAGESIZE; |  | ||||||
|         } else if (uncache_bytes > 0) { |  | ||||||
|             cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, |  | ||||||
|                                                   false); |  | ||||||
|             uncache_bytes = 0; |  | ||||||
|         } |  | ||||||
|         if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { |  | ||||||
|             if (cache_bytes == 0) { |  | ||||||
|                 cache_begin = page; |  | ||||||
|             } |  | ||||||
|             cache_bytes += YUZU_PAGESIZE; |  | ||||||
|         } else if (cache_bytes > 0) { |  | ||||||
|             cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); |  | ||||||
|             cache_bytes = 0; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     if (uncache_bytes > 0) { |  | ||||||
|         cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false); |  | ||||||
|     } |  | ||||||
|     if (cache_bytes > 0) { |  | ||||||
|         cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCore
 |  | ||||||
|  | @ -1,49 +0,0 @@ | ||||||
| // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
 |  | ||||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| 
 |  | ||||||
| #include <array> |  | ||||||
| #include <atomic> |  | ||||||
| 
 |  | ||||||
| #include "common/common_types.h" |  | ||||||
| #include "video_core/rasterizer_interface.h" |  | ||||||
| 
 |  | ||||||
| namespace Core::Memory { |  | ||||||
| class Memory; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| namespace VideoCore { |  | ||||||
| 
 |  | ||||||
| /// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface.
 |  | ||||||
| class RasterizerAccelerated : public RasterizerInterface { |  | ||||||
| public: |  | ||||||
|     explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); |  | ||||||
|     ~RasterizerAccelerated() override; |  | ||||||
| 
 |  | ||||||
|     void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; |  | ||||||
| 
 |  | ||||||
| private: |  | ||||||
|     class CacheEntry final { |  | ||||||
|     public: |  | ||||||
|         CacheEntry() = default; |  | ||||||
| 
 |  | ||||||
|         std::atomic_uint16_t& Count(std::size_t page) { |  | ||||||
|             return values[page & 3]; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         const std::atomic_uint16_t& Count(std::size_t page) const { |  | ||||||
|             return values[page & 3]; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|     private: |  | ||||||
|         std::array<std::atomic_uint16_t, 4> values{}; |  | ||||||
|     }; |  | ||||||
|     static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); |  | ||||||
| 
 |  | ||||||
|     using CachedPages = std::array<CacheEntry, 0x2000000>; |  | ||||||
|     std::unique_ptr<CachedPages> cached_pages; |  | ||||||
|     Core::Memory::Memory& cpu_memory; |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| } // namespace VideoCore
 |  | ||||||
|  | @ -86,35 +86,35 @@ public: | ||||||
|     virtual void FlushAll() = 0; |     virtual void FlushAll() = 0; | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||||
|     virtual void FlushRegion(VAddr addr, u64 size, |     virtual void FlushRegion(DAddr addr, u64 size, | ||||||
|                              VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |                              VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Check if the the specified memory area requires flushing to CPU Memory.
 |     /// Check if the the specified memory area requires flushing to CPU Memory.
 | ||||||
|     virtual bool MustFlushRegion(VAddr addr, u64 size, |     virtual bool MustFlushRegion(DAddr addr, u64 size, | ||||||
|                                  VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |                                  VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | ||||||
| 
 | 
 | ||||||
|     virtual RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) = 0; |     virtual RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 |     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||||
|     virtual void InvalidateRegion(VAddr addr, u64 size, |     virtual void InvalidateRegion(DAddr addr, u64 size, | ||||||
|                                   VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |                                   VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | ||||||
| 
 | 
 | ||||||
|     virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { |     virtual void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) { | ||||||
|         for (const auto& [cpu_addr, size] : sequences) { |         for (const auto& [cpu_addr, size] : sequences) { | ||||||
|             InvalidateRegion(cpu_addr, size); |             InvalidateRegion(cpu_addr, size); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region are desync with guest
 |     /// Notify rasterizer that any caches of the specified region are desync with guest
 | ||||||
|     virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; |     virtual void OnCacheInvalidation(PAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
|     virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; |     virtual bool OnCPUWrite(PAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Sync memory between guest and host.
 |     /// Sync memory between guest and host.
 | ||||||
|     virtual void InvalidateGPUCache() = 0; |     virtual void InvalidateGPUCache() = 0; | ||||||
| 
 | 
 | ||||||
|     /// Unmap memory range
 |     /// Unmap memory range
 | ||||||
|     virtual void UnmapMemory(VAddr addr, u64 size) = 0; |     virtual void UnmapMemory(DAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Remap GPU memory range. This means underneath backing memory changed
 |     /// Remap GPU memory range. This means underneath backing memory changed
 | ||||||
|     virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; |     virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; | ||||||
|  | @ -122,7 +122,7 @@ public: | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||||
|     /// and invalidated
 |     /// and invalidated
 | ||||||
|     virtual void FlushAndInvalidateRegion( |     virtual void FlushAndInvalidateRegion( | ||||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |         DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Notify the host renderer to wait for previous primitive and compute operations.
 |     /// Notify the host renderer to wait for previous primitive and compute operations.
 | ||||||
|     virtual void WaitForIdle() = 0; |     virtual void WaitForIdle() = 0; | ||||||
|  | @ -157,13 +157,10 @@ public: | ||||||
| 
 | 
 | ||||||
|     /// Attempt to use a faster method to display the framebuffer to screen
 |     /// Attempt to use a faster method to display the framebuffer to screen
 | ||||||
|     [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, |     [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||||
|                                                  VAddr framebuffer_addr, u32 pixel_stride) { |                                                  DAddr framebuffer_addr, u32 pixel_stride) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Increase/decrease the number of object in pages touching the specified region
 |  | ||||||
|     virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} |  | ||||||
| 
 |  | ||||||
|     /// Initialize disk cached resources for the game being emulated
 |     /// Initialize disk cached resources for the game being emulated
 | ||||||
|     virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |     virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||||
|                                    const DiskResourceLoadCallback& callback) {} |                                    const DiskResourceLoadCallback& callback) {} | ||||||
|  |  | ||||||
|  | @ -19,8 +19,7 @@ bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) { | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu) | RasterizerNull::RasterizerNull(Tegra::GPU& gpu) : m_gpu{gpu} {} | ||||||
|     : RasterizerAccelerated(cpu_memory_), m_gpu{gpu} {} |  | ||||||
| RasterizerNull::~RasterizerNull() = default; | RasterizerNull::~RasterizerNull() = default; | ||||||
| 
 | 
 | ||||||
| void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} | void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} | ||||||
|  | @ -45,16 +44,16 @@ void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr | ||||||
|                                                u32 size) {} |                                                u32 size) {} | ||||||
| void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} | void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} | ||||||
| void RasterizerNull::FlushAll() {} | void RasterizerNull::FlushAll() {} | ||||||
| void RasterizerNull::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | void RasterizerNull::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} | ||||||
| bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) { | bool RasterizerNull::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) { | ||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | void RasterizerNull::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} | ||||||
| bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { | bool RasterizerNull::OnCPUWrite(PAddr addr, u64 size) { | ||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} | void RasterizerNull::OnCacheInvalidation(PAddr addr, u64 size) {} | ||||||
| VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { | VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(PAddr addr, u64 size) { | ||||||
|     VideoCore::RasterizerDownloadArea new_area{ |     VideoCore::RasterizerDownloadArea new_area{ | ||||||
|         .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), |         .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), | ||||||
|         .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), |         .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), | ||||||
|  | @ -63,7 +62,7 @@ VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 s | ||||||
|     return new_area; |     return new_area; | ||||||
| } | } | ||||||
| void RasterizerNull::InvalidateGPUCache() {} | void RasterizerNull::InvalidateGPUCache() {} | ||||||
| void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {} | void RasterizerNull::UnmapMemory(DAddr addr, u64 size) {} | ||||||
| void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {} | void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {} | ||||||
| void RasterizerNull::SignalFence(std::function<void()>&& func) { | void RasterizerNull::SignalFence(std::function<void()>&& func) { | ||||||
|     func(); |     func(); | ||||||
|  | @ -78,7 +77,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) { | ||||||
| } | } | ||||||
| void RasterizerNull::SignalReference() {} | void RasterizerNull::SignalReference() {} | ||||||
| void RasterizerNull::ReleaseFences(bool) {} | void RasterizerNull::ReleaseFences(bool) {} | ||||||
| void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | void RasterizerNull::FlushAndInvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} | ||||||
| void RasterizerNull::WaitForIdle() {} | void RasterizerNull::WaitForIdle() {} | ||||||
| void RasterizerNull::FragmentBarrier() {} | void RasterizerNull::FragmentBarrier() {} | ||||||
| void RasterizerNull::TiledCacheBarrier() {} | void RasterizerNull::TiledCacheBarrier() {} | ||||||
|  | @ -95,7 +94,7 @@ bool RasterizerNull::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surfac | ||||||
| void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | ||||||
|                                               std::span<const u8> memory) {} |                                               std::span<const u8> memory) {} | ||||||
| bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, | bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||||
|                                        VAddr framebuffer_addr, u32 pixel_stride) { |                                        DAddr framebuffer_addr, u32 pixel_stride) { | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||||
|  |  | ||||||
|  | @ -6,7 +6,6 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/control/channel_state_cache.h" | #include "video_core/control/channel_state_cache.h" | ||||||
| #include "video_core/engines/maxwell_dma.h" | #include "video_core/engines/maxwell_dma.h" | ||||||
| #include "video_core/rasterizer_accelerated.h" |  | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
|  | @ -32,10 +31,10 @@ public: | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class RasterizerNull final : public VideoCore::RasterizerAccelerated, | class RasterizerNull final : public VideoCore::RasterizerInterface, | ||||||
|                              protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |                              protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||||
| public: | public: | ||||||
|     explicit RasterizerNull(Core::Memory::Memory& cpu_memory, Tegra::GPU& gpu); |     explicit RasterizerNull(Tegra::GPU& gpu); | ||||||
|     ~RasterizerNull() override; |     ~RasterizerNull() override; | ||||||
| 
 | 
 | ||||||
|     void Draw(bool is_indexed, u32 instance_count) override; |     void Draw(bool is_indexed, u32 instance_count) override; | ||||||
|  | @ -48,17 +47,17 @@ public: | ||||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||||
|     void FlushAll() override; |     void FlushAll() override; | ||||||
|     void FlushRegion(VAddr addr, u64 size, |     void FlushRegion(DAddr addr, u64 size, | ||||||
|                      VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                      VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     bool MustFlushRegion(VAddr addr, u64 size, |     bool MustFlushRegion(DAddr addr, u64 size, | ||||||
|                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void InvalidateRegion(VAddr addr, u64 size, |     void InvalidateRegion(DAddr addr, u64 size, | ||||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void OnCacheInvalidation(VAddr addr, u64 size) override; |     void OnCacheInvalidation(DAddr addr, u64 size) override; | ||||||
|     bool OnCPUWrite(VAddr addr, u64 size) override; |     bool OnCPUWrite(DAddr addr, u64 size) override; | ||||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; |     VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override; | ||||||
|     void InvalidateGPUCache() override; |     void InvalidateGPUCache() override; | ||||||
|     void UnmapMemory(VAddr addr, u64 size) override; |     void UnmapMemory(DAddr addr, u64 size) override; | ||||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||||
|     void SignalFence(std::function<void()>&& func) override; |     void SignalFence(std::function<void()>&& func) override; | ||||||
|     void SyncOperation(std::function<void()>&& func) override; |     void SyncOperation(std::function<void()>&& func) override; | ||||||
|  | @ -66,7 +65,7 @@ public: | ||||||
|     void SignalReference() override; |     void SignalReference() override; | ||||||
|     void ReleaseFences(bool force) override; |     void ReleaseFences(bool force) override; | ||||||
|     void FlushAndInvalidateRegion( |     void FlushAndInvalidateRegion( | ||||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |         DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void WaitForIdle() override; |     void WaitForIdle() override; | ||||||
|     void FragmentBarrier() override; |     void FragmentBarrier() override; | ||||||
|     void TiledCacheBarrier() override; |     void TiledCacheBarrier() override; | ||||||
|  | @ -78,7 +77,7 @@ public: | ||||||
|     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||||||
|     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | ||||||
|                                   std::span<const u8> memory) override; |                                   std::span<const u8> memory) override; | ||||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, | ||||||
|                            u32 pixel_stride) override; |                            u32 pixel_stride) override; | ||||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||||
|                            const VideoCore::DiskResourceLoadCallback& callback) override; |                            const VideoCore::DiskResourceLoadCallback& callback) override; | ||||||
|  |  | ||||||
|  | @ -7,10 +7,9 @@ | ||||||
| 
 | 
 | ||||||
| namespace Null { | namespace Null { | ||||||
| 
 | 
 | ||||||
| RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, | RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | ||||||
|                            Tegra::GPU& gpu, |  | ||||||
|                            std::unique_ptr<Core::Frontend::GraphicsContext> context_) |                            std::unique_ptr<Core::Frontend::GraphicsContext> context_) | ||||||
|     : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(cpu_memory, gpu) {} |     : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(gpu) {} | ||||||
| 
 | 
 | ||||||
| RendererNull::~RendererNull() = default; | RendererNull::~RendererNull() = default; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -13,8 +13,7 @@ namespace Null { | ||||||
| 
 | 
 | ||||||
| class RendererNull final : public VideoCore::RendererBase { | class RendererNull final : public VideoCore::RendererBase { | ||||||
| public: | public: | ||||||
|     explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, |     explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | ||||||
|                           Tegra::GPU& gpu, |  | ||||||
|                           std::unique_ptr<Core::Frontend::GraphicsContext> context); |                           std::unique_ptr<Core::Frontend::GraphicsContext> context); | ||||||
|     ~RendererNull() override; |     ~RendererNull() override; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -47,11 +47,10 @@ constexpr std::array PROGRAM_LUT{ | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | ||||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} |     : VideoCommon::BufferBase(null_params) {} | ||||||
| 
 | 
 | ||||||
| Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_) | ||||||
|                VAddr cpu_addr_, u64 size_bytes_) |     : VideoCommon::BufferBase(cpu_addr_, size_bytes_) { | ||||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { |  | ||||||
|     buffer.Create(); |     buffer.Create(); | ||||||
|     if (runtime.device.HasDebuggingToolAttached()) { |     if (runtime.device.HasDebuggingToolAttached()) { | ||||||
|         const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); |         const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | ||||||
|  |  | ||||||
|  | @ -10,7 +10,6 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/buffer_cache/buffer_cache_base.h" | #include "video_core/buffer_cache/buffer_cache_base.h" | ||||||
| #include "video_core/buffer_cache/memory_tracker_base.h" | #include "video_core/buffer_cache/memory_tracker_base.h" | ||||||
| #include "video_core/rasterizer_interface.h" |  | ||||||
| #include "video_core/renderer_opengl/gl_device.h" | #include "video_core/renderer_opengl/gl_device.h" | ||||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" | #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" | ||||||
|  | @ -19,9 +18,9 @@ namespace OpenGL { | ||||||
| 
 | 
 | ||||||
| class BufferCacheRuntime; | class BufferCacheRuntime; | ||||||
| 
 | 
 | ||||||
| class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { | class Buffer : public VideoCommon::BufferBase { | ||||||
| public: | public: | ||||||
|     explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, |     explicit Buffer(BufferCacheRuntime&, DAddr cpu_addr, | ||||||
|                     u64 size_bytes); |                     u64 size_bytes); | ||||||
|     explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); |     explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); | ||||||
| 
 | 
 | ||||||
|  | @ -244,7 +243,7 @@ struct BufferCacheParams { | ||||||
|     using Runtime = OpenGL::BufferCacheRuntime; |     using Runtime = OpenGL::BufferCacheRuntime; | ||||||
|     using Buffer = OpenGL::Buffer; |     using Buffer = OpenGL::Buffer; | ||||||
|     using Async_Buffer = OpenGL::StagingBufferMap; |     using Async_Buffer = OpenGL::StagingBufferMap; | ||||||
|     using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; |     using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>; | ||||||
| 
 | 
 | ||||||
|     static constexpr bool IS_OPENGL = true; |     static constexpr bool IS_OPENGL = true; | ||||||
|     static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; |     static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; | ||||||
|  |  | ||||||
|  | @ -35,8 +35,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | ||||||
| 
 | 
 | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_) | ||||||
|     : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { |     : QueryCacheLegacy(rasterizer_, device_memory_), gl_rasterizer{rasterizer_} { | ||||||
|     EnableCounters(); |     EnableCounters(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -11,6 +11,7 @@ | ||||||
| #include "video_core/query_cache.h" | #include "video_core/query_cache.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
|  | @ -28,7 +29,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | ||||||
| class QueryCache final | class QueryCache final | ||||||
|     : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { |     : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { | ||||||
| public: | public: | ||||||
|     explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); |     explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_); | ||||||
|     ~QueryCache(); |     ~QueryCache(); | ||||||
| 
 | 
 | ||||||
|     OGLQuery AllocateQuery(VideoCore::QueryType type); |     OGLQuery AllocateQuery(VideoCore::QueryType type); | ||||||
|  |  | ||||||
|  | @ -70,18 +70,18 @@ std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryTy | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||||
|                                    Core::Memory::Memory& cpu_memory_, const Device& device_, |                                    Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||||
|                                    ScreenInfo& screen_info_, ProgramManager& program_manager_, |                                    const Device& device_, ScreenInfo& screen_info_, | ||||||
|                                    StateTracker& state_tracker_) |                                    ProgramManager& program_manager_, StateTracker& state_tracker_) | ||||||
|     : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), |     : gpu(gpu_), device_memory(device_memory_), device(device_), screen_info(screen_info_), | ||||||
|       program_manager(program_manager_), state_tracker(state_tracker_), |       program_manager(program_manager_), state_tracker(state_tracker_), | ||||||
|       texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), |       texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), | ||||||
|       texture_cache(texture_cache_runtime, *this), |       texture_cache(texture_cache_runtime, device_memory_), | ||||||
|       buffer_cache_runtime(device, staging_buffer_pool), |       buffer_cache_runtime(device, staging_buffer_pool), | ||||||
|       buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |       buffer_cache(device_memory_, buffer_cache_runtime), | ||||||
|       shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, |       shader_cache(device_memory_, emu_window_, device, texture_cache, buffer_cache, | ||||||
|                    state_tracker, gpu.ShaderNotify()), |                    program_manager, state_tracker, gpu.ShaderNotify()), | ||||||
|       query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache), |       query_cache(*this, device_memory_), accelerate_dma(buffer_cache, texture_cache), | ||||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | ||||||
|       blit_image(program_manager_) {} |       blit_image(program_manager_) {} | ||||||
| 
 | 
 | ||||||
|  | @ -475,7 +475,7 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) { | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::FlushAll() {} | void RasterizerOpenGL::FlushAll() {} | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | void RasterizerOpenGL::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); |     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return; |         return; | ||||||
|  | @ -493,7 +493,7 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | bool RasterizerOpenGL::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||||
|     if ((True(which & VideoCommon::CacheType::BufferCache))) { |     if ((True(which & VideoCommon::CacheType::BufferCache))) { | ||||||
|         std::scoped_lock lock{buffer_cache.mutex}; |         std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|         if (buffer_cache.IsRegionGpuModified(addr, size)) { |         if (buffer_cache.IsRegionGpuModified(addr, size)) { | ||||||
|  | @ -510,7 +510,7 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT | ||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 size) { | VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(DAddr addr, u64 size) { | ||||||
|     { |     { | ||||||
|         std::scoped_lock lock{texture_cache.mutex}; |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|         auto area = texture_cache.GetFlushArea(addr, size); |         auto area = texture_cache.GetFlushArea(addr, size); | ||||||
|  | @ -533,7 +533,7 @@ VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 | ||||||
|     return new_area; |     return new_area; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | void RasterizerOpenGL::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); |     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return; |         return; | ||||||
|  | @ -554,8 +554,9 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | bool RasterizerOpenGL::OnCPUWrite(PAddr p_addr, u64 size) { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); |     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||||
|  |     const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  | @ -576,8 +577,9 @@ bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | ||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { | void RasterizerOpenGL::OnCacheInvalidation(PAddr p_addr, u64 size) { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); |     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||||
|  |     const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  | @ -596,7 +598,7 @@ void RasterizerOpenGL::InvalidateGPUCache() { | ||||||
|     gpu.InvalidateGPUCache(); |     gpu.InvalidateGPUCache(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | void RasterizerOpenGL::UnmapMemory(DAddr addr, u64 size) { | ||||||
|     { |     { | ||||||
|         std::scoped_lock lock{texture_cache.mutex}; |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|         texture_cache.UnmapMemory(addr, size); |         texture_cache.UnmapMemory(addr, size); | ||||||
|  | @ -635,7 +637,7 @@ void RasterizerOpenGL::ReleaseFences(bool force) { | ||||||
|     fence_manager.WaitPendingFences(force); |     fence_manager.WaitPendingFences(force); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, | void RasterizerOpenGL::FlushAndInvalidateRegion(DAddr addr, u64 size, | ||||||
|                                                 VideoCommon::CacheType which) { |                                                 VideoCommon::CacheType which) { | ||||||
|     if (Settings::IsGPULevelExtreme()) { |     if (Settings::IsGPULevelExtreme()) { | ||||||
|         FlushRegion(addr, size, which); |         FlushRegion(addr, size, which); | ||||||
|  | @ -739,7 +741,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||||
|                                          VAddr framebuffer_addr, u32 pixel_stride) { |                                          DAddr framebuffer_addr, u32 pixel_stride) { | ||||||
|     if (framebuffer_addr == 0) { |     if (framebuffer_addr == 0) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -14,7 +14,6 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/control/channel_state_cache.h" | #include "video_core/control/channel_state_cache.h" | ||||||
| #include "video_core/engines/maxwell_dma.h" | #include "video_core/engines/maxwell_dma.h" | ||||||
| #include "video_core/rasterizer_accelerated.h" |  | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_opengl/blit_image.h" | #include "video_core/renderer_opengl/blit_image.h" | ||||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||||
|  | @ -72,11 +71,11 @@ private: | ||||||
|     TextureCache& texture_cache; |     TextureCache& texture_cache; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class RasterizerOpenGL : public VideoCore::RasterizerAccelerated, | class RasterizerOpenGL : public VideoCore::RasterizerInterface, | ||||||
|                          protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |                          protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||||
| public: | public: | ||||||
|     explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |     explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||||
|                               Core::Memory::Memory& cpu_memory_, const Device& device_, |                               Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, | ||||||
|                               ScreenInfo& screen_info_, ProgramManager& program_manager_, |                               ScreenInfo& screen_info_, ProgramManager& program_manager_, | ||||||
|                               StateTracker& state_tracker_); |                               StateTracker& state_tracker_); | ||||||
|     ~RasterizerOpenGL() override; |     ~RasterizerOpenGL() override; | ||||||
|  | @ -92,17 +91,17 @@ public: | ||||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||||
|     void FlushAll() override; |     void FlushAll() override; | ||||||
|     void FlushRegion(VAddr addr, u64 size, |     void FlushRegion(DAddr addr, u64 size, | ||||||
|                      VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                      VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     bool MustFlushRegion(VAddr addr, u64 size, |     bool MustFlushRegion(DAddr addr, u64 size, | ||||||
|                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; |     VideoCore::RasterizerDownloadArea GetFlushArea(PAddr addr, u64 size) override; | ||||||
|     void InvalidateRegion(VAddr addr, u64 size, |     void InvalidateRegion(DAddr addr, u64 size, | ||||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void OnCacheInvalidation(VAddr addr, u64 size) override; |     void OnCacheInvalidation(PAddr addr, u64 size) override; | ||||||
|     bool OnCPUWrite(VAddr addr, u64 size) override; |     bool OnCPUWrite(PAddr addr, u64 size) override; | ||||||
|     void InvalidateGPUCache() override; |     void InvalidateGPUCache() override; | ||||||
|     void UnmapMemory(VAddr addr, u64 size) override; |     void UnmapMemory(DAddr addr, u64 size) override; | ||||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||||
|     void SignalFence(std::function<void()>&& func) override; |     void SignalFence(std::function<void()>&& func) override; | ||||||
|     void SyncOperation(std::function<void()>&& func) override; |     void SyncOperation(std::function<void()>&& func) override; | ||||||
|  | @ -110,7 +109,7 @@ public: | ||||||
|     void SignalReference() override; |     void SignalReference() override; | ||||||
|     void ReleaseFences(bool force = true) override; |     void ReleaseFences(bool force = true) override; | ||||||
|     void FlushAndInvalidateRegion( |     void FlushAndInvalidateRegion( | ||||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |         DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void WaitForIdle() override; |     void WaitForIdle() override; | ||||||
|     void FragmentBarrier() override; |     void FragmentBarrier() override; | ||||||
|     void TiledCacheBarrier() override; |     void TiledCacheBarrier() override; | ||||||
|  | @ -123,7 +122,7 @@ public: | ||||||
|     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||||||
|     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | ||||||
|                                   std::span<const u8> memory) override; |                                   std::span<const u8> memory) override; | ||||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, | ||||||
|                            u32 pixel_stride) override; |                            u32 pixel_stride) override; | ||||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||||
|                            const VideoCore::DiskResourceLoadCallback& callback) override; |                            const VideoCore::DiskResourceLoadCallback& callback) override; | ||||||
|  | @ -235,6 +234,7 @@ private: | ||||||
|                        VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); |                        VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); | ||||||
| 
 | 
 | ||||||
|     Tegra::GPU& gpu; |     Tegra::GPU& gpu; | ||||||
|  |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
| 
 | 
 | ||||||
|     const Device& device; |     const Device& device; | ||||||
|     ScreenInfo& screen_info; |     ScreenInfo& screen_info; | ||||||
|  |  | ||||||
|  | @ -168,11 +168,12 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs | ||||||
| } | } | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||||
|                          const Device& device_, TextureCache& texture_cache_, |                          Core::Frontend::EmuWindow& emu_window_, const Device& device_, | ||||||
|                          BufferCache& buffer_cache_, ProgramManager& program_manager_, |                          TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||||
|                          StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_) |                          ProgramManager& program_manager_, StateTracker& state_tracker_, | ||||||
|     : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_}, |                          VideoCore::ShaderNotify& shader_notify_) | ||||||
|  |     : VideoCommon::ShaderCache{device_memory_}, emu_window{emu_window_}, device{device_}, | ||||||
|       texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, |       texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, | ||||||
|       state_tracker{state_tracker_}, shader_notify{shader_notify_}, |       state_tracker{state_tracker_}, shader_notify{shader_notify_}, | ||||||
|       use_asynchronous_shaders{device.UseAsynchronousShaders()}, |       use_asynchronous_shaders{device.UseAsynchronousShaders()}, | ||||||
|  |  | ||||||
|  | @ -17,7 +17,7 @@ | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| class MemoryManager; | class MemoryManager; | ||||||
| } | } // namespace Tegra
 | ||||||
| 
 | 
 | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
| 
 | 
 | ||||||
|  | @ -28,10 +28,11 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>; | ||||||
| 
 | 
 | ||||||
| class ShaderCache : public VideoCommon::ShaderCache { | class ShaderCache : public VideoCommon::ShaderCache { | ||||||
| public: | public: | ||||||
|     explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, |     explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||||
|                          const Device& device_, TextureCache& texture_cache_, |                          Core::Frontend::EmuWindow& emu_window_, const Device& device_, | ||||||
|                          BufferCache& buffer_cache_, ProgramManager& program_manager_, |                          TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||||
|                          StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_); |                          ProgramManager& program_manager_, StateTracker& state_tracker_, | ||||||
|  |                          VideoCore::ShaderNotify& shader_notify_); | ||||||
|     ~ShaderCache(); |     ~ShaderCache(); | ||||||
| 
 | 
 | ||||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||||
|  |  | ||||||
|  | @ -144,12 +144,13 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit | ||||||
| 
 | 
 | ||||||
| RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | ||||||
|                                Core::Frontend::EmuWindow& emu_window_, |                                Core::Frontend::EmuWindow& emu_window_, | ||||||
|                                Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, |                                Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, | ||||||
|                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) |                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) | ||||||
|     : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, |     : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, | ||||||
|       emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, device{emu_window_}, |       emu_window{emu_window_}, device_memory{device_memory_}, gpu{gpu_}, device{emu_window_}, | ||||||
|       state_tracker{}, program_manager{device}, |       state_tracker{}, program_manager{device}, | ||||||
|       rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { |       rasterizer(emu_window, gpu, device_memory, device, screen_info, program_manager, | ||||||
|  |                  state_tracker) { | ||||||
|     if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { |     if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | ||||||
|         glEnable(GL_DEBUG_OUTPUT); |         glEnable(GL_DEBUG_OUTPUT); | ||||||
|         glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); |         glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | ||||||
|  | @ -242,7 +243,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | ||||||
|     const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; |     const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; | ||||||
|     const u64 size_in_bytes{Tegra::Texture::CalculateSize( |     const u64 size_in_bytes{Tegra::Texture::CalculateSize( | ||||||
|         true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; |         true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; | ||||||
|     const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; |     const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)}; | ||||||
|     const std::span<const u8> input_data(host_ptr, size_in_bytes); |     const std::span<const u8> input_data(host_ptr, size_in_bytes); | ||||||
|     Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, |     Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, | ||||||
|                                      framebuffer.width, framebuffer.height, 1, block_height_log2, |                                      framebuffer.width, framebuffer.height, 1, block_height_log2, | ||||||
|  |  | ||||||
|  | @ -61,7 +61,7 @@ class RendererOpenGL final : public VideoCore::RendererBase { | ||||||
| public: | public: | ||||||
|     explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, |     explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, | ||||||
|                             Core::Frontend::EmuWindow& emu_window_, |                             Core::Frontend::EmuWindow& emu_window_, | ||||||
|                             Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, |                             Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, | ||||||
|                             std::unique_ptr<Core::Frontend::GraphicsContext> context_); |                             std::unique_ptr<Core::Frontend::GraphicsContext> context_); | ||||||
|     ~RendererOpenGL() override; |     ~RendererOpenGL() override; | ||||||
| 
 | 
 | ||||||
|  | @ -101,7 +101,7 @@ private: | ||||||
| 
 | 
 | ||||||
|     Core::TelemetrySession& telemetry_session; |     Core::TelemetrySession& telemetry_session; | ||||||
|     Core::Frontend::EmuWindow& emu_window; |     Core::Frontend::EmuWindow& emu_window; | ||||||
|     Core::Memory::Memory& cpu_memory; |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
|     Tegra::GPU& gpu; |     Tegra::GPU& gpu; | ||||||
| 
 | 
 | ||||||
|     Device device; |     Device device; | ||||||
|  |  | ||||||
|  | @ -82,10 +82,10 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl | ||||||
| 
 | 
 | ||||||
| RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | ||||||
|                                Core::Frontend::EmuWindow& emu_window, |                                Core::Frontend::EmuWindow& emu_window, | ||||||
|                                Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, |                                Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, | ||||||
|                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) try |                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) try | ||||||
|     : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), |     : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), | ||||||
|       cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), |       device_memory(device_memory_), gpu(gpu_), library(OpenLibrary(context.get())), | ||||||
|       instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, |       instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | ||||||
|                               Settings::values.renderer_debug.GetValue())), |                               Settings::values.renderer_debug.GetValue())), | ||||||
|       debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) |       debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) | ||||||
|  | @ -97,9 +97,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | ||||||
|                 render_window.GetFramebufferLayout().height), |                 render_window.GetFramebufferLayout().height), | ||||||
|       present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, |       present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, | ||||||
|                       surface), |                       surface), | ||||||
|       blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, |       blit_screen(device_memory, render_window, device, memory_allocator, swapchain, present_manager, | ||||||
|                   scheduler, screen_info), |                   scheduler, screen_info), | ||||||
|       rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, |       rasterizer(render_window, gpu, device_memory, screen_info, device, memory_allocator, | ||||||
|                  state_tracker, scheduler) { |                  state_tracker, scheduler) { | ||||||
|     if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { |     if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { | ||||||
|         turbo_mode.emplace(instance, dld); |         turbo_mode.emplace(instance, dld); | ||||||
|  | @ -128,7 +128,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||||||
|     screen_info.width = framebuffer->width; |     screen_info.width = framebuffer->width; | ||||||
|     screen_info.height = framebuffer->height; |     screen_info.height = framebuffer->height; | ||||||
| 
 | 
 | ||||||
|     const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |     const DAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | ||||||
|     const bool use_accelerated = |     const bool use_accelerated = | ||||||
|         rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |         rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | ||||||
|     RenderScreenshot(*framebuffer, use_accelerated); |     RenderScreenshot(*framebuffer, use_accelerated); | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ | ||||||
| #include "video_core/vulkan_common/vulkan_device.h" | #include "video_core/vulkan_common/vulkan_device.h" | ||||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
| class TelemetrySession; | class TelemetrySession; | ||||||
|  | @ -42,7 +43,7 @@ class RendererVulkan final : public VideoCore::RendererBase { | ||||||
| public: | public: | ||||||
|     explicit RendererVulkan(Core::TelemetrySession& telemtry_session, |     explicit RendererVulkan(Core::TelemetrySession& telemtry_session, | ||||||
|                             Core::Frontend::EmuWindow& emu_window, |                             Core::Frontend::EmuWindow& emu_window, | ||||||
|                             Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, |                             Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, | ||||||
|                             std::unique_ptr<Core::Frontend::GraphicsContext> context_); |                             std::unique_ptr<Core::Frontend::GraphicsContext> context_); | ||||||
|     ~RendererVulkan() override; |     ~RendererVulkan() override; | ||||||
| 
 | 
 | ||||||
|  | @ -62,7 +63,7 @@ private: | ||||||
|     void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); |     void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); | ||||||
| 
 | 
 | ||||||
|     Core::TelemetrySession& telemetry_session; |     Core::TelemetrySession& telemetry_session; | ||||||
|     Core::Memory::Memory& cpu_memory; |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
|     Tegra::GPU& gpu; |     Tegra::GPU& gpu; | ||||||
| 
 | 
 | ||||||
|     std::shared_ptr<Common::DynamicLibrary> library; |     std::shared_ptr<Common::DynamicLibrary> library; | ||||||
|  |  | ||||||
|  | @ -14,8 +14,8 @@ | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/frontend/emu_window.h" | #include "core/frontend/emu_window.h" | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| #include "video_core/host_shaders/fxaa_frag_spv.h" | #include "video_core/host_shaders/fxaa_frag_spv.h" | ||||||
| #include "video_core/host_shaders/fxaa_vert_spv.h" | #include "video_core/host_shaders/fxaa_vert_spv.h" | ||||||
| #include "video_core/host_shaders/present_bicubic_frag_spv.h" | #include "video_core/host_shaders/present_bicubic_frag_spv.h" | ||||||
|  | @ -121,11 +121,12 @@ struct BlitScreen::BufferData { | ||||||
|     // Unaligned image data goes here
 |     // Unaligned image data goes here
 | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, | BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||||
|                        const Device& device_, MemoryAllocator& memory_allocator_, |                        Core::Frontend::EmuWindow& render_window_, const Device& device_, | ||||||
|                        Swapchain& swapchain_, PresentManager& present_manager_, |                        MemoryAllocator& memory_allocator_, Swapchain& swapchain_, | ||||||
|                        Scheduler& scheduler_, const ScreenInfo& screen_info_) |                        PresentManager& present_manager_, Scheduler& scheduler_, | ||||||
|     : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, |                        const ScreenInfo& screen_info_) | ||||||
|  |     : device_memory{device_memory_}, render_window{render_window_}, device{device_}, | ||||||
|       memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, |       memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, | ||||||
|       scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { |       scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { | ||||||
|     resource_ticks.resize(image_count); |     resource_ticks.resize(image_count); | ||||||
|  | @ -219,8 +220,8 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | ||||||
|     if (!use_accelerated) { |     if (!use_accelerated) { | ||||||
|         const u64 image_offset = GetRawImageOffset(framebuffer); |         const u64 image_offset = GetRawImageOffset(framebuffer); | ||||||
| 
 | 
 | ||||||
|         const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; |         const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | ||||||
|         const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); |         const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr); | ||||||
| 
 | 
 | ||||||
|         // TODO(Rodrigo): Read this from HLE
 |         // TODO(Rodrigo): Read this from HLE
 | ||||||
|         constexpr u32 block_height_log2 = 4; |         constexpr u32 block_height_log2 = 4; | ||||||
|  |  | ||||||
|  | @ -8,15 +8,12 @@ | ||||||
| #include "core/frontend/framebuffer_layout.h" | #include "core/frontend/framebuffer_layout.h" | ||||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| namespace Core::Memory { |  | ||||||
| class Memory; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| namespace Core::Frontend { | namespace Core::Frontend { | ||||||
| class EmuWindow; | class EmuWindow; | ||||||
| } | } | ||||||
|  | @ -56,7 +53,7 @@ struct ScreenInfo { | ||||||
| 
 | 
 | ||||||
| class BlitScreen { | class BlitScreen { | ||||||
| public: | public: | ||||||
|     explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, |     explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, Core::Frontend::EmuWindow& render_window, | ||||||
|                         const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, |                         const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, | ||||||
|                         PresentManager& present_manager, Scheduler& scheduler, |                         PresentManager& present_manager, Scheduler& scheduler, | ||||||
|                         const ScreenInfo& screen_info); |                         const ScreenInfo& screen_info); | ||||||
|  | @ -109,7 +106,7 @@ private: | ||||||
|     u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; |     u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; | ||||||
|     u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; |     u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; | ||||||
| 
 | 
 | ||||||
|     Core::Memory::Memory& cpu_memory; |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
|     Core::Frontend::EmuWindow& render_window; |     Core::Frontend::EmuWindow& render_window; | ||||||
|     const Device& device; |     const Device& device; | ||||||
|     MemoryAllocator& memory_allocator; |     MemoryAllocator& memory_allocator; | ||||||
|  |  | ||||||
|  | @ -79,7 +79,7 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) | ||||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} { |     : VideoCommon::BufferBase(null_params), tracker{4096} { | ||||||
|     if (runtime.device.HasNullDescriptor()) { |     if (runtime.device.HasNullDescriptor()) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  | @ -88,11 +88,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_p | ||||||
|     is_null = true; |     is_null = true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_) | ||||||
|                VAddr cpu_addr_, u64 size_bytes_) |     : VideoCommon::BufferBase(cpu_addr_, size_bytes_), device{&runtime.device}, | ||||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), |       buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, tracker{SizeBytes()} { | ||||||
|       device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, |  | ||||||
|       tracker{SizeBytes()} { |  | ||||||
|     if (runtime.device.HasDebuggingToolAttached()) { |     if (runtime.device.HasDebuggingToolAttached()) { | ||||||
|         buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); |         buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -23,11 +23,10 @@ struct HostVertexBinding; | ||||||
| 
 | 
 | ||||||
| class BufferCacheRuntime; | class BufferCacheRuntime; | ||||||
| 
 | 
 | ||||||
| class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { | class Buffer : public VideoCommon::BufferBase { | ||||||
| public: | public: | ||||||
|     explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); |     explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); | ||||||
|     explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |     explicit Buffer(BufferCacheRuntime& runtime, VAddr cpu_addr_, u64 size_bytes_); | ||||||
|                     VAddr cpu_addr_, u64 size_bytes_); |  | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); |     [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | ||||||
| 
 | 
 | ||||||
|  | @ -173,7 +172,7 @@ struct BufferCacheParams { | ||||||
|     using Runtime = Vulkan::BufferCacheRuntime; |     using Runtime = Vulkan::BufferCacheRuntime; | ||||||
|     using Buffer = Vulkan::Buffer; |     using Buffer = Vulkan::Buffer; | ||||||
|     using Async_Buffer = Vulkan::StagingBufferRef; |     using Async_Buffer = Vulkan::StagingBufferRef; | ||||||
|     using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; |     using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>; | ||||||
| 
 | 
 | ||||||
|     static constexpr bool IS_OPENGL = false; |     static constexpr bool IS_OPENGL = false; | ||||||
|     static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; |     static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | ||||||
|  |  | ||||||
|  | @ -30,7 +30,6 @@ | ||||||
| #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_rasterizer.h" |  | ||||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||||
| #include "video_core/renderer_vulkan/vk_shader_util.h" | #include "video_core/renderer_vulkan/vk_shader_util.h" | ||||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||||
|  | @ -299,12 +298,12 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c | ||||||
|     return std::memcmp(&rhs, this, Size()) == 0; |     return std::memcmp(&rhs, this, Size()) == 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_, | PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, | ||||||
|                              Scheduler& scheduler_, DescriptorPool& descriptor_pool_, |                              Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | ||||||
|                              GuestDescriptorQueue& guest_descriptor_queue_, |                              GuestDescriptorQueue& guest_descriptor_queue_, | ||||||
|                              RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, |                              RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, | ||||||
|                              TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) |                              TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) | ||||||
|     : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_}, |     : VideoCommon::ShaderCache{device_memory_}, device{device_}, scheduler{scheduler_}, | ||||||
|       descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, |       descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, | ||||||
|       render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, |       render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, | ||||||
|       texture_cache{texture_cache_}, shader_notify{shader_notify_}, |       texture_cache{texture_cache_}, shader_notify{shader_notify_}, | ||||||
|  |  | ||||||
|  | @ -26,6 +26,7 @@ | ||||||
| #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||||||
| #include "video_core/renderer_vulkan/vk_texture_cache.h" | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||||
| #include "video_core/shader_cache.h" | #include "video_core/shader_cache.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
|  | @ -79,7 +80,6 @@ class ComputePipeline; | ||||||
| class DescriptorPool; | class DescriptorPool; | ||||||
| class Device; | class Device; | ||||||
| class PipelineStatistics; | class PipelineStatistics; | ||||||
| class RasterizerVulkan; |  | ||||||
| class RenderPassCache; | class RenderPassCache; | ||||||
| class Scheduler; | class Scheduler; | ||||||
| 
 | 
 | ||||||
|  | @ -99,7 +99,7 @@ struct ShaderPools { | ||||||
| 
 | 
 | ||||||
| class PipelineCache : public VideoCommon::ShaderCache { | class PipelineCache : public VideoCommon::ShaderCache { | ||||||
| public: | public: | ||||||
|     explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler, |     explicit PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device, Scheduler& scheduler, | ||||||
|                            DescriptorPool& descriptor_pool, |                            DescriptorPool& descriptor_pool, | ||||||
|                            GuestDescriptorQueue& guest_descriptor_queue, |                            GuestDescriptorQueue& guest_descriptor_queue, | ||||||
|                            RenderPassCache& render_pass_cache, BufferCache& buffer_cache, |                            RenderPassCache& render_pass_cache, BufferCache& buffer_cache, | ||||||
|  |  | ||||||
|  | @ -14,7 +14,9 @@ | ||||||
| #include "common/bit_util.h" | #include "common/bit_util.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
|  | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/engines/draw_manager.h" | #include "video_core/engines/draw_manager.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| #include "video_core/query_cache/query_cache.h" | #include "video_core/query_cache/query_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_compute_pass.h" | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||||
|  | @ -102,7 +104,7 @@ private: | ||||||
| using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; | using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; | ||||||
| 
 | 
 | ||||||
| struct HostSyncValues { | struct HostSyncValues { | ||||||
|     VAddr address; |     DAddr address; | ||||||
|     size_t size; |     size_t size; | ||||||
|     size_t offset; |     size_t offset; | ||||||
| 
 | 
 | ||||||
|  | @ -317,7 +319,7 @@ public: | ||||||
|         pending_sync.clear(); |         pending_sync.clear(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, |     size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, | ||||||
|                         [[maybe_unused]] std::optional<u32> subreport) override { |                         [[maybe_unused]] std::optional<u32> subreport) override { | ||||||
|         PauseCounter(); |         PauseCounter(); | ||||||
|         auto index = BuildQuery(); |         auto index = BuildQuery(); | ||||||
|  | @ -738,7 +740,7 @@ public: | ||||||
|         pending_sync.clear(); |         pending_sync.clear(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, |     size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, | ||||||
|                         std::optional<u32> subreport_) override { |                         std::optional<u32> subreport_) override { | ||||||
|         auto index = BuildQuery(); |         auto index = BuildQuery(); | ||||||
|         auto* new_query = GetQuery(index); |         auto* new_query = GetQuery(index); | ||||||
|  | @ -769,9 +771,9 @@ public: | ||||||
|         return index; |         return index; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) { |     std::optional<std::pair<DAddr, size_t>> GetLastQueryStream(size_t stream) { | ||||||
|         if (last_queries[stream] != 0) { |         if (last_queries[stream] != 0) { | ||||||
|             std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); |             std::pair<DAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); | ||||||
|             return result; |             return result; | ||||||
|         } |         } | ||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|  | @ -974,7 +976,7 @@ private: | ||||||
|     size_t buffers_count{}; |     size_t buffers_count{}; | ||||||
|     std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; |     std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; | ||||||
|     std::array<VkDeviceSize, NUM_STREAMS> offsets{}; |     std::array<VkDeviceSize, NUM_STREAMS> offsets{}; | ||||||
|     std::array<VAddr, NUM_STREAMS> last_queries; |     std::array<DAddr, NUM_STREAMS> last_queries; | ||||||
|     std::array<size_t, NUM_STREAMS> last_queries_stride; |     std::array<size_t, NUM_STREAMS> last_queries_stride; | ||||||
|     Maxwell3D::Regs::PrimitiveTopology out_topology; |     Maxwell3D::Regs::PrimitiveTopology out_topology; | ||||||
|     u64 streams_mask; |     u64 streams_mask; | ||||||
|  | @ -987,7 +989,7 @@ public: | ||||||
|         : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} |         : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} | ||||||
| 
 | 
 | ||||||
|     // Parameterized constructor
 |     // Parameterized constructor
 | ||||||
|     PrimitivesQueryBase(bool has_timestamp, VAddr address) |     PrimitivesQueryBase(bool has_timestamp, DAddr address) | ||||||
|         : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { |         : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { | ||||||
|         if (has_timestamp) { |         if (has_timestamp) { | ||||||
|             flags |= VideoCommon::QueryFlagBits::HasTimestamp; |             flags |= VideoCommon::QueryFlagBits::HasTimestamp; | ||||||
|  | @ -995,7 +997,7 @@ public: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     u64 stride{}; |     u64 stride{}; | ||||||
|     VAddr dependant_address{}; |     DAddr dependant_address{}; | ||||||
|     Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; |     Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; | ||||||
|     size_t dependant_index{}; |     size_t dependant_index{}; | ||||||
|     bool dependant_manage{}; |     bool dependant_manage{}; | ||||||
|  | @ -1005,15 +1007,15 @@ class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<Primitive | ||||||
| public: | public: | ||||||
|     explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, |     explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, | ||||||
|                                          TFBCounterStreamer& tfb_streamer_, |                                          TFBCounterStreamer& tfb_streamer_, | ||||||
|                                          Core::Memory::Memory& cpu_memory_) |                                          Tegra::MaxwellDeviceMemoryManager& device_memory_) | ||||||
|         : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, |         : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, | ||||||
|           tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} { |           tfb_streamer{tfb_streamer_}, device_memory{device_memory_} { | ||||||
|         MakeDependent(&tfb_streamer); |         MakeDependent(&tfb_streamer); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     ~PrimitivesSucceededStreamer() = default; |     ~PrimitivesSucceededStreamer() = default; | ||||||
| 
 | 
 | ||||||
|     size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, |     size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, | ||||||
|                         std::optional<u32> subreport_) override { |                         std::optional<u32> subreport_) override { | ||||||
|         auto index = BuildQuery(); |         auto index = BuildQuery(); | ||||||
|         auto* new_query = GetQuery(index); |         auto* new_query = GetQuery(index); | ||||||
|  | @ -1063,6 +1065,8 @@ public: | ||||||
|                 } |                 } | ||||||
|             }); |             }); | ||||||
|         } |         } | ||||||
|  |         auto* ptr = device_memory.GetPointer<u8>(new_query->dependant_address); | ||||||
|  |         ASSERT(ptr != nullptr); | ||||||
| 
 | 
 | ||||||
|         new_query->dependant_manage = must_manage_dependance; |         new_query->dependant_manage = must_manage_dependance; | ||||||
|         pending_flush_queries.push_back(index); |         pending_flush_queries.push_back(index); | ||||||
|  | @ -1100,7 +1104,7 @@ public: | ||||||
|                 num_vertices = dependant_query->value / query->stride; |                 num_vertices = dependant_query->value / query->stride; | ||||||
|                 tfb_streamer.Free(query->dependant_index); |                 tfb_streamer.Free(query->dependant_index); | ||||||
|             } else { |             } else { | ||||||
|                 u8* pointer = cpu_memory.GetPointer(query->dependant_address); |                 u8* pointer = device_memory.GetPointer<u8>(query->dependant_address); | ||||||
|                 u32 result; |                 u32 result; | ||||||
|                 std::memcpy(&result, pointer, sizeof(u32)); |                 std::memcpy(&result, pointer, sizeof(u32)); | ||||||
|                 num_vertices = static_cast<u64>(result) / query->stride; |                 num_vertices = static_cast<u64>(result) / query->stride; | ||||||
|  | @ -1137,7 +1141,7 @@ public: | ||||||
| private: | private: | ||||||
|     QueryCacheRuntime& runtime; |     QueryCacheRuntime& runtime; | ||||||
|     TFBCounterStreamer& tfb_streamer; |     TFBCounterStreamer& tfb_streamer; | ||||||
|     Core::Memory::Memory& cpu_memory; |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
| 
 | 
 | ||||||
|     // syncing queue
 |     // syncing queue
 | ||||||
|     std::vector<size_t> pending_sync; |     std::vector<size_t> pending_sync; | ||||||
|  | @ -1152,12 +1156,12 @@ private: | ||||||
| 
 | 
 | ||||||
| struct QueryCacheRuntimeImpl { | struct QueryCacheRuntimeImpl { | ||||||
|     QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, |     QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, | ||||||
|                           Core::Memory::Memory& cpu_memory_, Vulkan::BufferCache& buffer_cache_, |                           Tegra::MaxwellDeviceMemoryManager& device_memory_, Vulkan::BufferCache& buffer_cache_, | ||||||
|                           const Device& device_, const MemoryAllocator& memory_allocator_, |                           const Device& device_, const MemoryAllocator& memory_allocator_, | ||||||
|                           Scheduler& scheduler_, StagingBufferPool& staging_pool_, |                           Scheduler& scheduler_, StagingBufferPool& staging_pool_, | ||||||
|                           ComputePassDescriptorQueue& compute_pass_descriptor_queue, |                           ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||||||
|                           DescriptorPool& descriptor_pool) |                           DescriptorPool& descriptor_pool) | ||||||
|         : rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, |         : rasterizer{rasterizer_}, device_memory{device_memory_}, | ||||||
|           buffer_cache{buffer_cache_}, device{device_}, |           buffer_cache{buffer_cache_}, device{device_}, | ||||||
|           memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, |           memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, | ||||||
|           guest_streamer(0, runtime), |           guest_streamer(0, runtime), | ||||||
|  | @ -1168,7 +1172,7 @@ struct QueryCacheRuntimeImpl { | ||||||
|                        scheduler, memory_allocator, staging_pool), |                        scheduler, memory_allocator, staging_pool), | ||||||
|           primitives_succeeded_streamer( |           primitives_succeeded_streamer( | ||||||
|               static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, |               static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, | ||||||
|               cpu_memory_), |               device_memory_), | ||||||
|           primitives_needed_minus_succeeded_streamer( |           primitives_needed_minus_succeeded_streamer( | ||||||
|               static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), |               static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), | ||||||
|           hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} { |           hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} { | ||||||
|  | @ -1195,7 +1199,7 @@ struct QueryCacheRuntimeImpl { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     VideoCore::RasterizerInterface* rasterizer; |     VideoCore::RasterizerInterface* rasterizer; | ||||||
|     Core::Memory::Memory& cpu_memory; |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
|     Vulkan::BufferCache& buffer_cache; |     Vulkan::BufferCache& buffer_cache; | ||||||
| 
 | 
 | ||||||
|     const Device& device; |     const Device& device; | ||||||
|  | @ -1210,7 +1214,7 @@ struct QueryCacheRuntimeImpl { | ||||||
|     PrimitivesSucceededStreamer primitives_succeeded_streamer; |     PrimitivesSucceededStreamer primitives_succeeded_streamer; | ||||||
|     VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer; |     VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer; | ||||||
| 
 | 
 | ||||||
|     std::vector<std::pair<VAddr, VAddr>> little_cache; |     std::vector<std::pair<DAddr, DAddr>> little_cache; | ||||||
|     std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; |     std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; | ||||||
|     std::vector<size_t> redirect_cache; |     std::vector<size_t> redirect_cache; | ||||||
|     std::vector<std::vector<VkBufferCopy>> copies_setup; |     std::vector<std::vector<VkBufferCopy>> copies_setup; | ||||||
|  | @ -1229,14 +1233,14 @@ struct QueryCacheRuntimeImpl { | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | ||||||
|                                      Core::Memory::Memory& cpu_memory_, |                                      Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||||
|                                      Vulkan::BufferCache& buffer_cache_, const Device& device_, |                                      Vulkan::BufferCache& buffer_cache_, const Device& device_, | ||||||
|                                      const MemoryAllocator& memory_allocator_, |                                      const MemoryAllocator& memory_allocator_, | ||||||
|                                      Scheduler& scheduler_, StagingBufferPool& staging_pool_, |                                      Scheduler& scheduler_, StagingBufferPool& staging_pool_, | ||||||
|                                      ComputePassDescriptorQueue& compute_pass_descriptor_queue, |                                      ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||||||
|                                      DescriptorPool& descriptor_pool) { |                                      DescriptorPool& descriptor_pool) { | ||||||
|     impl = std::make_unique<QueryCacheRuntimeImpl>( |     impl = std::make_unique<QueryCacheRuntimeImpl>( | ||||||
|         *this, rasterizer, cpu_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, |         *this, rasterizer, device_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, | ||||||
|         staging_pool_, compute_pass_descriptor_queue, descriptor_pool); |         staging_pool_, compute_pass_descriptor_queue, descriptor_pool); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1309,7 +1313,7 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo | ||||||
|     ResumeHostConditionalRendering(); |     ResumeHostConditionalRendering(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal) { | void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) { | ||||||
|     VkBuffer to_resolve; |     VkBuffer to_resolve; | ||||||
|     u32 to_resolve_offset; |     u32 to_resolve_offset; | ||||||
|     { |     { | ||||||
|  | @ -1350,11 +1354,11 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const auto check_in_bc = [&](VAddr address) { |     const auto check_in_bc = [&](DAddr address) { | ||||||
|         return impl->buffer_cache.IsRegionGpuModified(address, 8); |         return impl->buffer_cache.IsRegionGpuModified(address, 8); | ||||||
|     }; |     }; | ||||||
|     const auto check_value = [&](VAddr address) { |     const auto check_value = [&](DAddr address) { | ||||||
|         u8* ptr = impl->cpu_memory.GetPointer(address); |         u8* ptr = impl->device_memory.GetPointer<u8>(address); | ||||||
|         u64 value{}; |         u64 value{}; | ||||||
|         std::memcpy(&value, ptr, sizeof(value)); |         std::memcpy(&value, ptr, sizeof(value)); | ||||||
|         return value == 0; |         return value == 0; | ||||||
|  | @ -1477,8 +1481,8 @@ void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer ba | ||||||
|     for (auto& sync_val : values) { |     for (auto& sync_val : values) { | ||||||
|         total_size += sync_val.size; |         total_size += sync_val.size; | ||||||
|         bool found = false; |         bool found = false; | ||||||
|         VAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); |         DAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); | ||||||
|         VAddr base_end = base + Core::Memory::YUZU_PAGESIZE; |         DAddr base_end = base + Core::Memory::YUZU_PAGESIZE; | ||||||
|         for (size_t i = 0; i < impl->little_cache.size(); i++) { |         for (size_t i = 0; i < impl->little_cache.size(); i++) { | ||||||
|             const auto set_found = [&] { |             const auto set_found = [&] { | ||||||
|                 impl->redirect_cache.push_back(i); |                 impl->redirect_cache.push_back(i); | ||||||
|  |  | ||||||
|  | @ -27,7 +27,7 @@ struct QueryCacheRuntimeImpl; | ||||||
| class QueryCacheRuntime { | class QueryCacheRuntime { | ||||||
| public: | public: | ||||||
|     explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, |     explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | ||||||
|                                Core::Memory::Memory& cpu_memory_, |                                Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||||
|                                Vulkan::BufferCache& buffer_cache_, const Device& device_, |                                Vulkan::BufferCache& buffer_cache_, const Device& device_, | ||||||
|                                const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, |                                const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, | ||||||
|                                StagingBufferPool& staging_pool_, |                                StagingBufferPool& staging_pool_, | ||||||
|  | @ -61,7 +61,7 @@ public: | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); |     void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); | ||||||
|     void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); |     void HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal); | ||||||
|     friend struct QueryCacheRuntimeImpl; |     friend struct QueryCacheRuntimeImpl; | ||||||
|     std::unique_ptr<QueryCacheRuntimeImpl> impl; |     std::unique_ptr<QueryCacheRuntimeImpl> impl; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -18,6 +18,7 @@ | ||||||
| #include "video_core/engines/draw_manager.h" | #include "video_core/engines/draw_manager.h" | ||||||
| #include "video_core/engines/kepler_compute.h" | #include "video_core/engines/kepler_compute.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| #include "video_core/renderer_vulkan/blit_image.h" | #include "video_core/renderer_vulkan/blit_image.h" | ||||||
| #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||||||
| #include "video_core/renderer_vulkan/maxwell_to_vk.h" | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||||||
|  | @ -37,6 +38,7 @@ | ||||||
| #include "video_core/vulkan_common/vulkan_device.h" | #include "video_core/vulkan_common/vulkan_device.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
| 
 | 
 | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||||
|  | @ -163,10 +165,11 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, | ||||||
| } // Anonymous namespace
 | } // Anonymous namespace
 | ||||||
| 
 | 
 | ||||||
| RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||||
|                                    Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, |                                    Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||||
|                                    const Device& device_, MemoryAllocator& memory_allocator_, |                                    ScreenInfo& screen_info_, const Device& device_, | ||||||
|                                    StateTracker& state_tracker_, Scheduler& scheduler_) |                                    MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, | ||||||
|     : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_}, |                                    Scheduler& scheduler_) | ||||||
|  |     : gpu{gpu_}, device_memory{device_memory_}, screen_info{screen_info_}, device{device_}, | ||||||
|       memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, |       memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, | ||||||
|       staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), |       staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | ||||||
|       guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), |       guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), | ||||||
|  | @ -174,14 +177,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | ||||||
|       texture_cache_runtime{ |       texture_cache_runtime{ | ||||||
|           device,     scheduler,         memory_allocator, staging_pool, |           device,     scheduler,         memory_allocator, staging_pool, | ||||||
|           blit_image, render_pass_cache, descriptor_pool,  compute_pass_descriptor_queue}, |           blit_image, render_pass_cache, descriptor_pool,  compute_pass_descriptor_queue}, | ||||||
|       texture_cache(texture_cache_runtime, *this), |       texture_cache(texture_cache_runtime, device_memory), | ||||||
|       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | ||||||
|                            guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), |                            guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), | ||||||
|       buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |       buffer_cache(device_memory, buffer_cache_runtime), | ||||||
|       query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, |       query_cache_runtime(this, device_memory, buffer_cache, device, memory_allocator, scheduler, | ||||||
|                           staging_pool, compute_pass_descriptor_queue, descriptor_pool), |                           staging_pool, compute_pass_descriptor_queue, descriptor_pool), | ||||||
|       query_cache(gpu, *this, cpu_memory_, query_cache_runtime), |       query_cache(gpu, *this, device_memory, query_cache_runtime), | ||||||
|       pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, |       pipeline_cache(device_memory, device, scheduler, descriptor_pool, guest_descriptor_queue, | ||||||
|                      render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), |                      render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | ||||||
|       accelerate_dma(buffer_cache, texture_cache, scheduler), |       accelerate_dma(buffer_cache, texture_cache, scheduler), | ||||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | ||||||
|  | @ -508,7 +511,7 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::FlushAll() {} | void RasterizerVulkan::FlushAll() {} | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | void RasterizerVulkan::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  | @ -525,7 +528,7 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | bool RasterizerVulkan::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||||
|     if ((True(which & VideoCommon::CacheType::BufferCache))) { |     if ((True(which & VideoCommon::CacheType::BufferCache))) { | ||||||
|         std::scoped_lock lock{buffer_cache.mutex}; |         std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|         if (buffer_cache.IsRegionGpuModified(addr, size)) { |         if (buffer_cache.IsRegionGpuModified(addr, size)) { | ||||||
|  | @ -542,7 +545,7 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT | ||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 size) { | VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(DAddr addr, u64 size) { | ||||||
|     { |     { | ||||||
|         std::scoped_lock lock{texture_cache.mutex}; |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|         auto area = texture_cache.GetFlushArea(addr, size); |         auto area = texture_cache.GetFlushArea(addr, size); | ||||||
|  | @ -558,7 +561,7 @@ VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 | ||||||
|     return new_area; |     return new_area; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | void RasterizerVulkan::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  | @ -578,7 +581,7 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) { | ||||||
|     { |     { | ||||||
|         std::scoped_lock lock{texture_cache.mutex}; |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|         for (const auto& [addr, size] : sequences) { |         for (const auto& [addr, size] : sequences) { | ||||||
|  | @ -599,7 +602,8 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | bool RasterizerVulkan::OnCPUWrite(PAddr p_addr, u64 size) { | ||||||
|  |     const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  | @ -620,7 +624,8 @@ bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | ||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { | void RasterizerVulkan::OnCacheInvalidation(PAddr p_addr, u64 size) { | ||||||
|  |     const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  | @ -640,7 +645,7 @@ void RasterizerVulkan::InvalidateGPUCache() { | ||||||
|     gpu.InvalidateGPUCache(); |     gpu.InvalidateGPUCache(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | void RasterizerVulkan::UnmapMemory(DAddr addr, u64 size) { | ||||||
|     { |     { | ||||||
|         std::scoped_lock lock{texture_cache.mutex}; |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|         texture_cache.UnmapMemory(addr, size); |         texture_cache.UnmapMemory(addr, size); | ||||||
|  | @ -679,7 +684,7 @@ void RasterizerVulkan::ReleaseFences(bool force) { | ||||||
|     fence_manager.WaitPendingFences(force); |     fence_manager.WaitPendingFences(force); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, | void RasterizerVulkan::FlushAndInvalidateRegion(DAddr addr, u64 size, | ||||||
|                                                 VideoCommon::CacheType which) { |                                                 VideoCommon::CacheType which) { | ||||||
|     if (Settings::IsGPULevelExtreme()) { |     if (Settings::IsGPULevelExtreme()) { | ||||||
|         FlushRegion(addr, size, which); |         FlushRegion(addr, size, which); | ||||||
|  | @ -782,7 +787,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||||
|                                          VAddr framebuffer_addr, u32 pixel_stride) { |                                          DAddr framebuffer_addr, u32 pixel_stride) { | ||||||
|     if (!framebuffer_addr) { |     if (!framebuffer_addr) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -12,7 +12,6 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/control/channel_state_cache.h" | #include "video_core/control/channel_state_cache.h" | ||||||
| #include "video_core/engines/maxwell_dma.h" | #include "video_core/engines/maxwell_dma.h" | ||||||
| #include "video_core/rasterizer_accelerated.h" |  | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_vulkan/blit_image.h" | #include "video_core/renderer_vulkan/blit_image.h" | ||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
|  | @ -25,6 +24,7 @@ | ||||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
|  | @ -34,10 +34,14 @@ namespace Core::Frontend { | ||||||
| class EmuWindow; | class EmuWindow; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| namespace Tegra::Engines { | namespace Tegra { | ||||||
|  | 
 | ||||||
|  | namespace Engines { | ||||||
| class Maxwell3D; | class Maxwell3D; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | } // namespace Tegra
 | ||||||
|  | 
 | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
| 
 | 
 | ||||||
| struct ScreenInfo; | struct ScreenInfo; | ||||||
|  | @ -70,13 +74,14 @@ private: | ||||||
|     Scheduler& scheduler; |     Scheduler& scheduler; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, | class RasterizerVulkan final : public VideoCore::RasterizerInterface, | ||||||
|                                protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |                                protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||||
| public: | public: | ||||||
|     explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |     explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||||
|                               Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, |                               Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||||
|                               const Device& device_, MemoryAllocator& memory_allocator_, |                               ScreenInfo& screen_info_, const Device& device_, | ||||||
|                               StateTracker& state_tracker_, Scheduler& scheduler_); |                               MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, | ||||||
|  |                               Scheduler& scheduler_); | ||||||
|     ~RasterizerVulkan() override; |     ~RasterizerVulkan() override; | ||||||
| 
 | 
 | ||||||
|     void Draw(bool is_indexed, u32 instance_count) override; |     void Draw(bool is_indexed, u32 instance_count) override; | ||||||
|  | @ -90,18 +95,18 @@ public: | ||||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; |     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||||
|     void FlushAll() override; |     void FlushAll() override; | ||||||
|     void FlushRegion(VAddr addr, u64 size, |     void FlushRegion(DAddr addr, u64 size, | ||||||
|                      VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                      VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     bool MustFlushRegion(VAddr addr, u64 size, |     bool MustFlushRegion(DAddr addr, u64 size, | ||||||
|                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; |     VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override; | ||||||
|     void InvalidateRegion(VAddr addr, u64 size, |     void InvalidateRegion(DAddr addr, u64 size, | ||||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; |     void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) override; | ||||||
|     void OnCacheInvalidation(VAddr addr, u64 size) override; |     void OnCacheInvalidation(DAddr addr, u64 size) override; | ||||||
|     bool OnCPUWrite(VAddr addr, u64 size) override; |     bool OnCPUWrite(DAddr addr, u64 size) override; | ||||||
|     void InvalidateGPUCache() override; |     void InvalidateGPUCache() override; | ||||||
|     void UnmapMemory(VAddr addr, u64 size) override; |     void UnmapMemory(DAddr addr, u64 size) override; | ||||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||||
|     void SignalFence(std::function<void()>&& func) override; |     void SignalFence(std::function<void()>&& func) override; | ||||||
|     void SyncOperation(std::function<void()>&& func) override; |     void SyncOperation(std::function<void()>&& func) override; | ||||||
|  | @ -109,7 +114,7 @@ public: | ||||||
|     void SignalReference() override; |     void SignalReference() override; | ||||||
|     void ReleaseFences(bool force = true) override; |     void ReleaseFences(bool force = true) override; | ||||||
|     void FlushAndInvalidateRegion( |     void FlushAndInvalidateRegion( | ||||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |         DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void WaitForIdle() override; |     void WaitForIdle() override; | ||||||
|     void FragmentBarrier() override; |     void FragmentBarrier() override; | ||||||
|     void TiledCacheBarrier() override; |     void TiledCacheBarrier() override; | ||||||
|  | @ -122,7 +127,7 @@ public: | ||||||
|     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||||||
|     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | ||||||
|                                   std::span<const u8> memory) override; |                                   std::span<const u8> memory) override; | ||||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, | ||||||
|                            u32 pixel_stride) override; |                            u32 pixel_stride) override; | ||||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||||
|                            const VideoCore::DiskResourceLoadCallback& callback) override; |                            const VideoCore::DiskResourceLoadCallback& callback) override; | ||||||
|  | @ -176,6 +181,7 @@ private: | ||||||
|     void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); |     void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); | ||||||
| 
 | 
 | ||||||
|     Tegra::GPU& gpu; |     Tegra::GPU& gpu; | ||||||
|  |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
| 
 | 
 | ||||||
|     ScreenInfo& screen_info; |     ScreenInfo& screen_info; | ||||||
|     const Device& device; |     const Device& device; | ||||||
|  |  | ||||||
|  | @ -12,6 +12,7 @@ | ||||||
| #include "video_core/dirty_flags.h" | #include "video_core/dirty_flags.h" | ||||||
| #include "video_core/engines/kepler_compute.h" | #include "video_core/engines/kepler_compute.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/shader_cache.h" | #include "video_core/shader_cache.h" | ||||||
| #include "video_core/shader_environment.h" | #include "video_core/shader_environment.h" | ||||||
|  | @ -34,7 +35,7 @@ void ShaderCache::SyncGuestHost() { | ||||||
|     RemovePendingShaders(); |     RemovePendingShaders(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} | ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_) : device_memory{device_memory_} {} | ||||||
| 
 | 
 | ||||||
| bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | ||||||
|     auto& dirty{maxwell3d->dirty.flags}; |     auto& dirty{maxwell3d->dirty.flags}; | ||||||
|  | @ -132,7 +133,7 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t | ||||||
| 
 | 
 | ||||||
|     storage.push_back(std::move(data)); |     storage.push_back(std::move(data)); | ||||||
| 
 | 
 | ||||||
|     rasterizer.UpdatePagesCachedCount(addr, size, 1); |     device_memory.UpdatePagesCachedCount(addr, size, 1); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { | void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { | ||||||
|  | @ -209,7 +210,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) { | ||||||
| 
 | 
 | ||||||
|     const VAddr addr = entry->addr_start; |     const VAddr addr = entry->addr_start; | ||||||
|     const size_t size = entry->addr_end - addr; |     const size_t size = entry->addr_end - addr; | ||||||
|     rasterizer.UpdatePagesCachedCount(addr, size, -1); |     device_memory.UpdatePagesCachedCount(addr, size, -1); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { | void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { | ||||||
|  |  | ||||||
|  | @ -16,6 +16,7 @@ | ||||||
| #include "video_core/control/channel_state_cache.h" | #include "video_core/control/channel_state_cache.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/shader_environment.h" | #include "video_core/shader_environment.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| class MemoryManager; | class MemoryManager; | ||||||
|  | @ -77,7 +78,7 @@ protected: | ||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_); |     explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory); | ||||||
| 
 | 
 | ||||||
|     /// @brief Update the hashes and information of shader stages
 |     /// @brief Update the hashes and information of shader stages
 | ||||||
|     /// @param unique_hashes Shader hashes to store into when a stage is enabled
 |     /// @param unique_hashes Shader hashes to store into when a stage is enabled
 | ||||||
|  | @ -145,7 +146,7 @@ private: | ||||||
|     /// @brief Create a new shader entry and register it
 |     /// @brief Create a new shader entry and register it
 | ||||||
|     const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); |     const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); | ||||||
| 
 | 
 | ||||||
|     VideoCore::RasterizerInterface& rasterizer; |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
| 
 | 
 | ||||||
|     mutable std::mutex lookup_mutex; |     mutable std::mutex lookup_mutex; | ||||||
|     std::mutex invalidation_mutex; |     std::mutex invalidation_mutex; | ||||||
|  |  | ||||||
|  | @ -8,10 +8,11 @@ | ||||||
| 
 | 
 | ||||||
| #include "common/alignment.h" | #include "common/alignment.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
| #include "core/memory.h" |  | ||||||
| #include "video_core/control/channel_state.h" | #include "video_core/control/channel_state.h" | ||||||
| #include "video_core/dirty_flags.h" | #include "video_core/dirty_flags.h" | ||||||
| #include "video_core/engines/kepler_compute.h" | #include "video_core/engines/kepler_compute.h" | ||||||
|  | #include "video_core/guest_memory.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| #include "video_core/texture_cache/image_view_base.h" | #include "video_core/texture_cache/image_view_base.h" | ||||||
| #include "video_core/texture_cache/samples_helper.h" | #include "video_core/texture_cache/samples_helper.h" | ||||||
| #include "video_core/texture_cache/texture_cache_base.h" | #include "video_core/texture_cache/texture_cache_base.h" | ||||||
|  | @ -27,8 +28,8 @@ using VideoCore::Surface::SurfaceType; | ||||||
| using namespace Common::Literals; | using namespace Common::Literals; | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) | TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_) | ||||||
|     : runtime{runtime_}, rasterizer{rasterizer_} { |     : runtime{runtime_}, device_memory{device_memory_} { | ||||||
|     // Configure null sampler
 |     // Configure null sampler
 | ||||||
|     TSCEntry sampler_descriptor{}; |     TSCEntry sampler_descriptor{}; | ||||||
|     sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); |     sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||||||
|  | @ -49,19 +50,19 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | ||||||
|     void(slot_samplers.insert(runtime, sampler_descriptor)); |     void(slot_samplers.insert(runtime, sampler_descriptor)); | ||||||
| 
 | 
 | ||||||
|     if constexpr (HAS_DEVICE_MEMORY_INFO) { |     if constexpr (HAS_DEVICE_MEMORY_INFO) { | ||||||
|         const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); |         const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); | ||||||
|         const s64 min_spacing_expected = device_memory - 1_GiB; |         const s64 min_spacing_expected = device_local_memory - 1_GiB; | ||||||
|         const s64 min_spacing_critical = device_memory - 512_MiB; |         const s64 min_spacing_critical = device_local_memory - 512_MiB; | ||||||
|         const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); |         const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); | ||||||
|         const s64 min_vacancy_expected = (6 * mem_threshold) / 10; |         const s64 min_vacancy_expected = (6 * mem_threshold) / 10; | ||||||
|         const s64 min_vacancy_critical = (3 * mem_threshold) / 10; |         const s64 min_vacancy_critical = (3 * mem_threshold) / 10; | ||||||
|         expected_memory = static_cast<u64>( |         expected_memory = static_cast<u64>( | ||||||
|             std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), |             std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), | ||||||
|                      DEFAULT_EXPECTED_MEMORY)); |                      DEFAULT_EXPECTED_MEMORY)); | ||||||
|         critical_memory = static_cast<u64>( |         critical_memory = static_cast<u64>( | ||||||
|             std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), |             std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), | ||||||
|                      DEFAULT_CRITICAL_MEMORY)); |                      DEFAULT_CRITICAL_MEMORY)); | ||||||
|         minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2); |         minimum_memory = static_cast<u64>((device_local_memory - mem_threshold) / 2); | ||||||
|     } else { |     } else { | ||||||
|         expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |         expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | ||||||
|         critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |         critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | ||||||
|  | @ -513,7 +514,7 @@ FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) { | ||||||
|     ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { |     ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { | ||||||
|         if (True(image.flags & ImageFlagBits::CpuModified)) { |         if (True(image.flags & ImageFlagBits::CpuModified)) { | ||||||
|             return; |             return; | ||||||
|  | @ -526,7 +527,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) { | ||||||
|     boost::container::small_vector<ImageId, 16> images; |     boost::container::small_vector<ImageId, 16> images; | ||||||
|     ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { |     ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { | ||||||
|         if (!image.IsSafeDownload()) { |         if (!image.IsSafeDownload()) { | ||||||
|  | @ -553,7 +554,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(VAddr cpu_addr, | std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(DAddr cpu_addr, | ||||||
|                                                                                u64 size) { |                                                                                u64 size) { | ||||||
|     std::optional<VideoCore::RasterizerDownloadArea> area{}; |     std::optional<VideoCore::RasterizerDownloadArea> area{}; | ||||||
|     ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { |     ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { | ||||||
|  | @ -579,7 +580,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) { | ||||||
|     boost::container::small_vector<ImageId, 16> deleted_images; |     boost::container::small_vector<ImageId, 16> deleted_images; | ||||||
|     ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |     ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||||||
|     for (const ImageId id : deleted_images) { |     for (const ImageId id : deleted_images) { | ||||||
|  | @ -713,7 +714,7 @@ bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( | ||||||
|     const Tegra::FramebufferConfig& config, VAddr cpu_addr) { |     const Tegra::FramebufferConfig& config, DAddr cpu_addr) { | ||||||
|     // TODO: Properly implement this
 |     // TODO: Properly implement this
 | ||||||
|     const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); |     const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); | ||||||
|     if (it == page_table.end()) { |     if (it == page_table.end()) { | ||||||
|  | @ -940,7 +941,7 @@ bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcep | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | bool TextureCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { | ||||||
|     bool is_modified = false; |     bool is_modified = false; | ||||||
|     ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { |     ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { | ||||||
|         if (False(image.flags & ImageFlagBits::GpuModified)) { |         if (False(image.flags & ImageFlagBits::GpuModified)) { | ||||||
|  | @ -1059,7 +1060,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( |     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | ||||||
|         *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); |         *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); | ||||||
| 
 | 
 | ||||||
|     if (True(image.flags & ImageFlagBits::Converted)) { |     if (True(image.flags & ImageFlagBits::Converted)) { | ||||||
|  | @ -1124,7 +1125,7 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a | ||||||
| template <class P> | template <class P> | ||||||
| ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||||||
|                                    RelaxedOptions options) { |                                    RelaxedOptions options) { | ||||||
|     std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |     std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||||
|     if (!cpu_addr) { |     if (!cpu_addr) { | ||||||
|         cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |         cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | ||||||
|         if (!cpu_addr) { |         if (!cpu_addr) { | ||||||
|  | @ -1265,7 +1266,7 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { | ||||||
| 
 | 
 | ||||||
|     static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; |     static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; | ||||||
|     local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); |     local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); | ||||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( |     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | ||||||
|         *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); |         *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); | ||||||
| 
 | 
 | ||||||
|     auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, |     auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, | ||||||
|  | @ -1339,14 +1340,14 @@ bool TextureCache<P>::ScaleDown(Image& image) { | ||||||
| template <class P> | template <class P> | ||||||
| ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||||||
|                                      RelaxedOptions options) { |                                      RelaxedOptions options) { | ||||||
|     std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |     std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||||
|     if (!cpu_addr) { |     if (!cpu_addr) { | ||||||
|         const auto size = CalculateGuestSizeInBytes(info); |         const auto size = CalculateGuestSizeInBytes(info); | ||||||
|         cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); |         cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); | ||||||
|         if (!cpu_addr) { |         if (!cpu_addr) { | ||||||
|             const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; |             const DAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | ||||||
|             virtual_invalid_space += Common::AlignUp(size, 32); |             virtual_invalid_space += Common::AlignUp(size, 32); | ||||||
|             cpu_addr = std::optional<VAddr>(fake_addr); |             cpu_addr = std::optional<DAddr>(fake_addr); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); |     ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | ||||||
|  | @ -1362,7 +1363,7 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { | ||||||
|     ImageInfo new_info = info; |     ImageInfo new_info = info; | ||||||
|     const size_t size_bytes = CalculateGuestSizeInBytes(new_info); |     const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | ||||||
|     const bool broken_views = runtime.HasBrokenTextureViewFormats(); |     const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||||||
|  | @ -1650,7 +1651,7 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { | ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { | ||||||
|     std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |     std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||||
|     if (!cpu_addr) { |     if (!cpu_addr) { | ||||||
|         cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |         cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | ||||||
|         if (!cpu_addr) { |         if (!cpu_addr) { | ||||||
|  | @ -1780,7 +1781,7 @@ ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAdd | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| template <typename Func> | template <typename Func> | ||||||
| void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { | void TextureCache<P>::ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func) { | ||||||
|     using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; |     using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||||||
|     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; |     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||||||
|     boost::container::small_vector<ImageId, 32> images; |     boost::container::small_vector<ImageId, 32> images; | ||||||
|  | @ -1924,11 +1925,11 @@ void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, | ||||||
| template <class P> | template <class P> | ||||||
| template <typename Func> | template <typename Func> | ||||||
| void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | ||||||
|     using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; |     using FuncReturn = typename std::invoke_result<Func, GPUVAddr, DAddr, size_t>::type; | ||||||
|     static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; |     static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; | ||||||
|     const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); |     const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | ||||||
|     for (const auto& [gpu_addr, size] : segments) { |     for (const auto& [gpu_addr, size] : segments) { | ||||||
|         std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |         std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||||
|         ASSERT(cpu_addr); |         ASSERT(cpu_addr); | ||||||
|         if constexpr (RETURNS_BOOL) { |         if constexpr (RETURNS_BOOL) { | ||||||
|             if (func(gpu_addr, *cpu_addr, size)) { |             if (func(gpu_addr, *cpu_addr, size)) { | ||||||
|  | @ -1980,7 +1981,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | ||||||
|     } |     } | ||||||
|     boost::container::small_vector<ImageViewId, 16> sparse_maps; |     boost::container::small_vector<ImageViewId, 16> sparse_maps; | ||||||
|     ForEachSparseSegment( |     ForEachSparseSegment( | ||||||
|         image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { |         image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) { | ||||||
|             auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); |             auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | ||||||
|             ForEachCPUPage(cpu_addr, size, |             ForEachCPUPage(cpu_addr, size, | ||||||
|                            [this, map_id](u64 page) { page_table[page].push_back(map_id); }); |                            [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||||||
|  | @ -2048,7 +2049,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | ||||||
|     auto& sparse_maps = it->second; |     auto& sparse_maps = it->second; | ||||||
|     for (auto& map_view_id : sparse_maps) { |     for (auto& map_view_id : sparse_maps) { | ||||||
|         const auto& map_range = slot_map_views[map_view_id]; |         const auto& map_range = slot_map_views[map_view_id]; | ||||||
|         const VAddr cpu_addr = map_range.cpu_addr; |         const DAddr cpu_addr = map_range.cpu_addr; | ||||||
|         const std::size_t size = map_range.size; |         const std::size_t size = map_range.size; | ||||||
|         ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { |         ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { | ||||||
|             const auto page_it = page_table.find(page); |             const auto page_it = page_table.find(page); | ||||||
|  | @ -2080,7 +2081,7 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | ||||||
|     ASSERT(False(image.flags & ImageFlagBits::Tracked)); |     ASSERT(False(image.flags & ImageFlagBits::Tracked)); | ||||||
|     image.flags |= ImageFlagBits::Tracked; |     image.flags |= ImageFlagBits::Tracked; | ||||||
|     if (False(image.flags & ImageFlagBits::Sparse)) { |     if (False(image.flags & ImageFlagBits::Sparse)) { | ||||||
|         rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); |         device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     if (True(image.flags & ImageFlagBits::Registered)) { |     if (True(image.flags & ImageFlagBits::Registered)) { | ||||||
|  | @ -2089,15 +2090,15 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | ||||||
|         auto& sparse_maps = it->second; |         auto& sparse_maps = it->second; | ||||||
|         for (auto& map_view_id : sparse_maps) { |         for (auto& map_view_id : sparse_maps) { | ||||||
|             const auto& map = slot_map_views[map_view_id]; |             const auto& map = slot_map_views[map_view_id]; | ||||||
|             const VAddr cpu_addr = map.cpu_addr; |             const DAddr cpu_addr = map.cpu_addr; | ||||||
|             const std::size_t size = map.size; |             const std::size_t size = map.size; | ||||||
|             rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); |             device_memory.UpdatePagesCachedCount(cpu_addr, size, 1); | ||||||
|         } |         } | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     ForEachSparseSegment(image, |     ForEachSparseSegment(image, | ||||||
|                          [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { |                          [this]([[maybe_unused]] GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) { | ||||||
|                              rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); |                              device_memory.UpdatePagesCachedCount(cpu_addr, size, 1); | ||||||
|                          }); |                          }); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -2106,7 +2107,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | ||||||
|     ASSERT(True(image.flags & ImageFlagBits::Tracked)); |     ASSERT(True(image.flags & ImageFlagBits::Tracked)); | ||||||
|     image.flags &= ~ImageFlagBits::Tracked; |     image.flags &= ~ImageFlagBits::Tracked; | ||||||
|     if (False(image.flags & ImageFlagBits::Sparse)) { |     if (False(image.flags & ImageFlagBits::Sparse)) { | ||||||
|         rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); |         device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     ASSERT(True(image.flags & ImageFlagBits::Registered)); |     ASSERT(True(image.flags & ImageFlagBits::Registered)); | ||||||
|  | @ -2115,9 +2116,9 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | ||||||
|     auto& sparse_maps = it->second; |     auto& sparse_maps = it->second; | ||||||
|     for (auto& map_view_id : sparse_maps) { |     for (auto& map_view_id : sparse_maps) { | ||||||
|         const auto& map = slot_map_views[map_view_id]; |         const auto& map = slot_map_views[map_view_id]; | ||||||
|         const VAddr cpu_addr = map.cpu_addr; |         const DAddr cpu_addr = map.cpu_addr; | ||||||
|         const std::size_t size = map.size; |         const std::size_t size = map.size; | ||||||
|         rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); |         device_memory.UpdatePagesCachedCount(cpu_addr, size, -1); | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -36,9 +36,11 @@ | ||||||
| #include "video_core/texture_cache/types.h" | #include "video_core/texture_cache/types.h" | ||||||
| #include "video_core/textures/texture.h" | #include "video_core/textures/texture.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra::Control { | namespace Tegra { | ||||||
|  | namespace Control { | ||||||
| struct ChannelState; | struct ChannelState; | ||||||
| } | } | ||||||
|  | } // namespace Tegra
 | ||||||
| 
 | 
 | ||||||
| namespace VideoCommon { | namespace VideoCommon { | ||||||
| 
 | 
 | ||||||
|  | @ -126,7 +128,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
| public: | public: | ||||||
|     explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); |     explicit TextureCache(Runtime&, Tegra::MaxwellDeviceMemoryManager&); | ||||||
| 
 | 
 | ||||||
|     /// Notify the cache that a new frame has been queued
 |     /// Notify the cache that a new frame has been queued
 | ||||||
|     void TickFrame(); |     void TickFrame(); | ||||||
|  | @ -190,15 +192,15 @@ public: | ||||||
|     Framebuffer* GetFramebuffer(); |     Framebuffer* GetFramebuffer(); | ||||||
| 
 | 
 | ||||||
|     /// Mark images in a range as modified from the CPU
 |     /// Mark images in a range as modified from the CPU
 | ||||||
|     void WriteMemory(VAddr cpu_addr, size_t size); |     void WriteMemory(DAddr cpu_addr, size_t size); | ||||||
| 
 | 
 | ||||||
|     /// Download contents of host images to guest memory in a region
 |     /// Download contents of host images to guest memory in a region
 | ||||||
|     void DownloadMemory(VAddr cpu_addr, size_t size); |     void DownloadMemory(DAddr cpu_addr, size_t size); | ||||||
| 
 | 
 | ||||||
|     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); |     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr cpu_addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     /// Remove images in a region
 |     /// Remove images in a region
 | ||||||
|     void UnmapMemory(VAddr cpu_addr, size_t size); |     void UnmapMemory(DAddr cpu_addr, size_t size); | ||||||
| 
 | 
 | ||||||
|     /// Remove images in a region
 |     /// Remove images in a region
 | ||||||
|     void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); |     void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); | ||||||
|  | @ -210,7 +212,7 @@ public: | ||||||
| 
 | 
 | ||||||
|     /// Try to find a cached image view in the given CPU address
 |     /// Try to find a cached image view in the given CPU address
 | ||||||
|     [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, |     [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, | ||||||
|                                                          VAddr cpu_addr); |                                                          DAddr cpu_addr); | ||||||
| 
 | 
 | ||||||
|     /// Return true when there are uncommitted images to be downloaded
 |     /// Return true when there are uncommitted images to be downloaded
 | ||||||
|     [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |     [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | ||||||
|  | @ -235,7 +237,7 @@ public: | ||||||
|                                  GPUVAddr address = 0, size_t size = 0); |                                  GPUVAddr address = 0, size_t size = 0); | ||||||
| 
 | 
 | ||||||
|     /// Return true when a CPU region is modified from the GPU
 |     /// Return true when a CPU region is modified from the GPU
 | ||||||
|     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |     [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] bool IsRescaling() const noexcept; |     [[nodiscard]] bool IsRescaling() const noexcept; | ||||||
| 
 | 
 | ||||||
|  | @ -252,7 +254,7 @@ public: | ||||||
| private: | private: | ||||||
|     /// Iterate over all page indices in a range
 |     /// Iterate over all page indices in a range
 | ||||||
|     template <typename Func> |     template <typename Func> | ||||||
|     static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { |     static void ForEachCPUPage(DAddr addr, size_t size, Func&& func) { | ||||||
|         static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; |         static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | ||||||
|         const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; |         const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; | ||||||
|         for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { |         for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { | ||||||
|  | @ -326,7 +328,7 @@ private: | ||||||
| 
 | 
 | ||||||
|     /// Create a new image and join perfectly matching existing images
 |     /// Create a new image and join perfectly matching existing images
 | ||||||
|     /// Remove joined images from the cache
 |     /// Remove joined images from the cache
 | ||||||
|     [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); |     [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr); | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); |     [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); | ||||||
| 
 | 
 | ||||||
|  | @ -349,7 +351,7 @@ private: | ||||||
| 
 | 
 | ||||||
|     /// Iterates over all the images in a region calling func
 |     /// Iterates over all the images in a region calling func
 | ||||||
|     template <typename Func> |     template <typename Func> | ||||||
|     void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); |     void ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func); | ||||||
| 
 | 
 | ||||||
|     template <typename Func> |     template <typename Func> | ||||||
|     void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); |     void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); | ||||||
|  | @ -421,7 +423,7 @@ private: | ||||||
| 
 | 
 | ||||||
|     Runtime& runtime; |     Runtime& runtime; | ||||||
| 
 | 
 | ||||||
|     VideoCore::RasterizerInterface& rasterizer; |     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||||
|     std::deque<TextureCacheGPUMap> gpu_page_table_storage; |     std::deque<TextureCacheGPUMap> gpu_page_table_storage; | ||||||
| 
 | 
 | ||||||
|     RenderTargets render_targets; |     RenderTargets render_targets; | ||||||
|  | @ -432,7 +434,7 @@ private: | ||||||
|     std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; |     std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; | ||||||
|     std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; |     std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; | ||||||
| 
 | 
 | ||||||
|     VAddr virtual_invalid_space{}; |     DAddr virtual_invalid_space{}; | ||||||
| 
 | 
 | ||||||
|     bool has_deleted_images = false; |     bool has_deleted_images = false; | ||||||
|     bool is_rescaling = false; |     bool is_rescaling = false; | ||||||
|  |  | ||||||
|  | @ -23,6 +23,7 @@ | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| #include "video_core/compatible_formats.h" | #include "video_core/compatible_formats.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/guest_memory.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/surface.h" | #include "video_core/surface.h" | ||||||
| #include "video_core/texture_cache/decode_bc.h" | #include "video_core/texture_cache/decode_bc.h" | ||||||
|  | @ -552,7 +553,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | ||||||
|     for (s32 layer = 0; layer < info.resources.layers; ++layer) { |     for (s32 layer = 0; layer < info.resources.layers; ++layer) { | ||||||
|         const std::span<const u8> src = input.subspan(host_offset); |         const std::span<const u8> src = input.subspan(host_offset); | ||||||
|         { |         { | ||||||
|             Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> |             Tegra::Memory::GpuGuestMemoryScoped<u8, | ||||||
|  |                                                 Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite> | ||||||
|                 dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); |                 dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); | ||||||
| 
 | 
 | ||||||
|             SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, |             SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | ||||||
|  |  | ||||||
|  | @ -6,6 +6,8 @@ | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
|  | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
|  | #include "video_core/host1x/host1x.h" | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
| #include "video_core/renderer_null/renderer_null.h" | #include "video_core/renderer_null/renderer_null.h" | ||||||
| #include "video_core/renderer_opengl/renderer_opengl.h" | #include "video_core/renderer_opengl/renderer_opengl.h" | ||||||
|  | @ -18,18 +20,17 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( | ||||||
|     Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, |     Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | ||||||
|     std::unique_ptr<Core::Frontend::GraphicsContext> context) { |     std::unique_ptr<Core::Frontend::GraphicsContext> context) { | ||||||
|     auto& telemetry_session = system.TelemetrySession(); |     auto& telemetry_session = system.TelemetrySession(); | ||||||
|     auto& cpu_memory = system.ApplicationMemory(); |     auto& device_memory = system.Host1x().MemoryManager(); | ||||||
| 
 | 
 | ||||||
|     switch (Settings::values.renderer_backend.GetValue()) { |     switch (Settings::values.renderer_backend.GetValue()) { | ||||||
|     case Settings::RendererBackend::OpenGL: |     case Settings::RendererBackend::OpenGL: | ||||||
|         return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory, |         return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, | ||||||
|                                                         gpu, std::move(context)); |                                                         device_memory, gpu, std::move(context)); | ||||||
|     case Settings::RendererBackend::Vulkan: |     case Settings::RendererBackend::Vulkan: | ||||||
|         return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory, |         return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, | ||||||
|                                                         gpu, std::move(context)); |                                                         device_memory, gpu, std::move(context)); | ||||||
|     case Settings::RendererBackend::Null: |     case Settings::RendererBackend::Null: | ||||||
|         return std::make_unique<Null::RendererNull>(emu_window, cpu_memory, gpu, |         return std::make_unique<Null::RendererNull>(emu_window, gpu, std::move(context)); | ||||||
|                                                     std::move(context)); |  | ||||||
|     default: |     default: | ||||||
|         return nullptr; |         return nullptr; | ||||||
|     } |     } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow