forked from eden-emu/eden
		
	SMMU: Initial adaptation to video_core.
This commit is contained in:
		
							parent
							
								
									d0329a2c00
								
							
						
					
					
						commit
						9db159da71
					
				
					 79 changed files with 1262 additions and 1263 deletions
				
			
		|  | @ -9,6 +9,7 @@ | |||
| #include "core/core.h" | ||||
| #include "core/core_timing.h" | ||||
| #include "core/memory.h" | ||||
| #include "core/guest_memory.h" | ||||
| 
 | ||||
| #include "core/hle/kernel/k_process.h" | ||||
| 
 | ||||
|  |  | |||
|  | @ -10,6 +10,7 @@ | |||
| #include "common/logging/log.h" | ||||
| #include "common/scratch_buffer.h" | ||||
| #include "core/memory.h" | ||||
| #include "core/guest_memory.h" | ||||
| 
 | ||||
| namespace AudioCore::Renderer { | ||||
| 
 | ||||
|  |  | |||
|  | @ -37,6 +37,8 @@ add_library(core STATIC | |||
|     debugger/gdbstub_arch.h | ||||
|     debugger/gdbstub.cpp | ||||
|     debugger/gdbstub.h | ||||
|     device_memory_manager.h | ||||
|     device_memory_manager.inc | ||||
|     device_memory.cpp | ||||
|     device_memory.h | ||||
|     file_sys/fssystem/fs_i_storage.h | ||||
|  |  | |||
|  | @ -651,7 +651,7 @@ size_t System::GetCurrentHostThreadID() const { | |||
|     return impl->kernel.GetCurrentHostThreadID(); | ||||
| } | ||||
| 
 | ||||
| void System::GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback) { | ||||
| void System::GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback) { | ||||
|     return this->ApplicationProcess()->GatherGPUDirtyMemory(callback); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -224,7 +224,7 @@ public: | |||
|     /// Prepare the core emulation for a reschedule
 | ||||
|     void PrepareReschedule(u32 core_index); | ||||
| 
 | ||||
|     void GatherGPUDirtyMemory(std::function<void(VAddr, size_t)>& callback); | ||||
|     void GatherGPUDirtyMemory(std::function<void(PAddr, size_t)>& callback); | ||||
| 
 | ||||
|     [[nodiscard]] size_t GetCurrentHostThreadID() const; | ||||
| 
 | ||||
|  |  | |||
|  | @ -3,10 +3,11 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <deque> | ||||
| #include <memory> | ||||
| #include <array> | ||||
| #include <atomic> | ||||
| #include <deque> | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "common/virtual_buffer.h" | ||||
|  | @ -48,26 +49,54 @@ public: | |||
|     template <typename T> | ||||
|     const T* GetPointer(DAddr address) const; | ||||
| 
 | ||||
|     DAddr GetAddressFromPAddr(PAddr address) const { | ||||
|         DAddr subbits = static_cast<DAddr>(address & page_mask); | ||||
|         return (static_cast<DAddr>(compressed_device_addr[(address >> page_bits)]) << page_bits) + subbits; | ||||
|     } | ||||
| 
 | ||||
|     PAddr GetPhysicalRawAddressFromDAddr(DAddr address) const { | ||||
|         PAddr subbits = static_cast<PAddr>(address & page_mask); | ||||
|         auto paddr = compressed_physical_ptr[(address >> page_bits)]; | ||||
|         if (paddr == 0) { | ||||
|             return 0; | ||||
|         } | ||||
|         return (static_cast<PAddr>(paddr - 1) << page_bits) + subbits; | ||||
|     } | ||||
| 
 | ||||
|     template <typename T> | ||||
|     void Write(DAddr address, T value); | ||||
| 
 | ||||
|     template <typename T> | ||||
|     T Read(DAddr address) const; | ||||
| 
 | ||||
|     const u8* GetSpan(const DAddr src_addr, const std::size_t size) const { | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     u8* GetSpan(const DAddr src_addr, const std::size_t size) { | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     void ReadBlock(DAddr address, void* dest_pointer, size_t size); | ||||
|     void WriteBlock(DAddr address, void* src_pointer, size_t size); | ||||
|     void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size); | ||||
|     void WriteBlock(DAddr address, const void* src_pointer, size_t size); | ||||
|     void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size); | ||||
| 
 | ||||
|     size_t RegisterProcess(Memory::Memory* memory); | ||||
|     void UnregisterProcess(size_t id); | ||||
| 
 | ||||
|     void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); | ||||
| 
 | ||||
|     static constexpr size_t AS_BITS = Traits::device_virtual_bits; | ||||
| 
 | ||||
| private: | ||||
|     static constexpr bool supports_pinning = Traits::supports_pinning; | ||||
|     static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; | ||||
|     static constexpr size_t device_as_size = 1ULL << device_virtual_bits; | ||||
|     static constexpr size_t physical_max_bits = 33; | ||||
|     static constexpr size_t page_bits = 12; | ||||
|     static constexpr size_t page_size = 1ULL << page_bits; | ||||
|     static constexpr size_t page_mask = page_size - 1ULL; | ||||
|     static constexpr u32 physical_address_base = 1U << page_bits; | ||||
| 
 | ||||
|     template <typename T> | ||||
|  | @ -136,11 +165,15 @@ private: | |||
|     private: | ||||
|         std::array<std::atomic_uint16_t, subentries> values{}; | ||||
|     }; | ||||
|     static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), "CounterEntry should be 8 bytes!"); | ||||
|     static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), | ||||
|                   "CounterEntry should be 8 bytes!"); | ||||
| 
 | ||||
|     static constexpr size_t num_counter_entries = (1ULL << (device_virtual_bits - page_bits)) / subentries; | ||||
|     static constexpr size_t num_counter_entries = | ||||
|         (1ULL << (device_virtual_bits - page_bits)) / subentries; | ||||
|     using CachedPages = std::array<CounterEntry, num_counter_entries>; | ||||
|     std::unique_ptr<CachedPages> cached_pages; | ||||
|     std::mutex counter_guard; | ||||
|     std::mutex mapping_guard; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Core
 | ||||
|  | @ -105,7 +105,8 @@ template <typename Traits> | |||
| DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memory_) | ||||
|     : physical_base{reinterpret_cast<const uintptr_t>(device_memory_.buffer.BackingBasePointer())}, | ||||
|       interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), | ||||
|       compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { | ||||
|       compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)), | ||||
|       cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { | ||||
|     impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); | ||||
|     cached_pages = std::make_unique<CachedPages>(); | ||||
| } | ||||
|  | @ -144,10 +145,10 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size | |||
|     Core::Memory::Memory* process_memory = registered_processes[process_id]; | ||||
|     size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | ||||
|     size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | ||||
|     std::atomic_thread_fence(std::memory_order_acquire); | ||||
|     std::scoped_lock lk(mapping_guard); | ||||
|     for (size_t i = 0; i < num_pages; i++) { | ||||
|         const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; | ||||
|         auto* ptr = process_memory->GetPointer(Common::ProcessAddress(new_vaddress)); | ||||
|         auto* ptr = process_memory->GetPointerSilent(Common::ProcessAddress(new_vaddress)); | ||||
|         if (ptr == nullptr) [[unlikely]] { | ||||
|             compressed_physical_ptr[start_page_d + i] = 0; | ||||
|             continue; | ||||
|  | @ -157,14 +158,14 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size | |||
|         compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); | ||||
|         InsertCPUBacking(start_page_d + i, new_vaddress, process_id); | ||||
|     } | ||||
|     std::atomic_thread_fence(std::memory_order_release); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | ||||
|     size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | ||||
|     size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | ||||
|     std::atomic_thread_fence(std::memory_order_acquire); | ||||
|     interface->InvalidateRegion(address, size); | ||||
|     std::scoped_lock lk(mapping_guard); | ||||
|     for (size_t i = 0; i < num_pages; i++) { | ||||
|         auto phys_addr = compressed_physical_ptr[start_page_d + i]; | ||||
|         compressed_physical_ptr[start_page_d + i] = 0; | ||||
|  | @ -173,7 +174,6 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | |||
|             compressed_device_addr[phys_addr - 1] = 0; | ||||
|         } | ||||
|     } | ||||
|     std::atomic_thread_fence(std::memory_order_release); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
|  | @ -256,6 +256,7 @@ void DeviceMemoryManager<Traits>::WalkBlock(DAddr addr, std::size_t size, auto o | |||
| 
 | ||||
| template <typename Traits> | ||||
| void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) { | ||||
|     interface->FlushRegion(address, size); | ||||
|     WalkBlock( | ||||
|         address, size, | ||||
|         [&](size_t copy_amount, DAddr current_vaddr) { | ||||
|  | @ -274,7 +275,7 @@ void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, s | |||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, void* src_pointer, size_t size) { | ||||
| void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, const void* src_pointer, size_t size) { | ||||
|     WalkBlock( | ||||
|         address, size, | ||||
|         [&](size_t copy_amount, DAddr current_vaddr) { | ||||
|  | @ -287,7 +288,46 @@ void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, void* src_pointer, s | |||
|             std::memcpy(dst_ptr, src_pointer, copy_amount); | ||||
|         }, | ||||
|         [&](const std::size_t copy_amount) { | ||||
|             src_pointer = static_cast<u8*>(src_pointer) + copy_amount; | ||||
|             src_pointer = static_cast<const u8*>(src_pointer) + copy_amount; | ||||
|         }); | ||||
|     interface->InvalidateRegion(address, size); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) { | ||||
|     WalkBlock( | ||||
|         address, size, | ||||
|         [&](size_t copy_amount, DAddr current_vaddr) { | ||||
|             LOG_ERROR( | ||||
|                 HW_Memory, | ||||
|                 "Unmapped Device ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||||
|                 current_vaddr, address, size); | ||||
|             std::memset(dest_pointer, 0, copy_amount); | ||||
|         }, | ||||
|         [&](size_t copy_amount, const u8* const src_ptr) { | ||||
|             std::memcpy(dest_pointer, src_ptr, copy_amount); | ||||
|         }, | ||||
|         [&](const std::size_t copy_amount) { | ||||
|             dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount; | ||||
|         }); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer, | ||||
|                                                    size_t size) { | ||||
|     WalkBlock( | ||||
|         address, size, | ||||
|         [&](size_t copy_amount, DAddr current_vaddr) { | ||||
|             LOG_ERROR( | ||||
|                 HW_Memory, | ||||
|                 "Unmapped Device WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||||
|                 current_vaddr, address, size); | ||||
|         }, | ||||
|         [&](size_t copy_amount, u8* const dst_ptr) { | ||||
|             std::memcpy(dst_ptr, src_pointer, copy_amount); | ||||
|         }, | ||||
|         [&](const std::size_t copy_amount) { | ||||
|             src_pointer = static_cast<const u8*>(src_pointer) + copy_amount; | ||||
|         }); | ||||
| } | ||||
| 
 | ||||
|  | @ -313,6 +353,18 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(size_t id) { | |||
| 
 | ||||
| template <typename Traits> | ||||
| void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { | ||||
|     bool locked = false; | ||||
|     auto lock = [&] { | ||||
|         if (!locked) { | ||||
|             counter_guard.lock(); | ||||
|             locked = true; | ||||
|         } | ||||
|     }; | ||||
|     SCOPE_EXIT({ | ||||
|         if (locked) { | ||||
|             counter_guard.unlock(); | ||||
|         } | ||||
|     }); | ||||
|     u64 uncache_begin = 0; | ||||
|     u64 cache_begin = 0; | ||||
|     u64 uncache_bytes = 0; | ||||
|  | @ -347,6 +399,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
|             } | ||||
|             uncache_bytes += Memory::YUZU_PAGESIZE; | ||||
|         } else if (uncache_bytes > 0) { | ||||
|             lock(); | ||||
|             MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, | ||||
|                               uncache_bytes, false); | ||||
|             uncache_bytes = 0; | ||||
|  | @ -357,6 +410,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
|             } | ||||
|             cache_bytes += Memory::YUZU_PAGESIZE; | ||||
|         } else if (cache_bytes > 0) { | ||||
|             lock(); | ||||
|             MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||||
|                               true); | ||||
|             cache_bytes = 0; | ||||
|  | @ -364,10 +418,12 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
|         vpage++; | ||||
|     } | ||||
|     if (uncache_bytes > 0) { | ||||
|         lock(); | ||||
|         MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, | ||||
|                           false); | ||||
|     } | ||||
|     if (cache_bytes > 0) { | ||||
|         lock(); | ||||
|         MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||||
|                           true); | ||||
|     } | ||||
|  |  | |||
|  | @ -23,7 +23,7 @@ public: | |||
| 
 | ||||
|     ~GPUDirtyMemoryManager() = default; | ||||
| 
 | ||||
|     void Collect(VAddr address, size_t size) { | ||||
|     void Collect(PAddr address, size_t size) { | ||||
|         TransformAddress t = BuildTransform(address, size); | ||||
|         TransformAddress tmp, original; | ||||
|         do { | ||||
|  | @ -47,7 +47,7 @@ public: | |||
|                                                 std::memory_order_relaxed)); | ||||
|     } | ||||
| 
 | ||||
|     void Gather(std::function<void(VAddr, size_t)>& callback) { | ||||
|     void Gather(std::function<void(PAddr, size_t)>& callback) { | ||||
|         { | ||||
|             std::scoped_lock lk(guard); | ||||
|             TransformAddress t = current.exchange(default_transform, std::memory_order_relaxed); | ||||
|  | @ -65,7 +65,7 @@ public: | |||
|                 mask = mask >> empty_bits; | ||||
| 
 | ||||
|                 const size_t continuous_bits = std::countr_one(mask); | ||||
|                 callback((static_cast<VAddr>(transform.address) << page_bits) + offset, | ||||
|                 callback((static_cast<PAddr>(transform.address) << page_bits) + offset, | ||||
|                          continuous_bits << align_bits); | ||||
|                 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; | ||||
|                 offset += continuous_bits << align_bits; | ||||
|  | @ -89,7 +89,7 @@ private: | |||
|     constexpr static size_t align_mask = align_size - 1; | ||||
|     constexpr static TransformAddress default_transform = {.address = ~0U, .mask = 0U}; | ||||
| 
 | ||||
|     bool IsValid(VAddr address) { | ||||
|     bool IsValid(PAddr address) { | ||||
|         return address < (1ULL << 39); | ||||
|     } | ||||
| 
 | ||||
|  | @ -103,7 +103,7 @@ private: | |||
|         return mask; | ||||
|     } | ||||
| 
 | ||||
|     TransformAddress BuildTransform(VAddr address, size_t size) { | ||||
|     TransformAddress BuildTransform(PAddr address, size_t size) { | ||||
|         const size_t minor_address = address & page_mask; | ||||
|         const size_t minor_bit = minor_address >> align_bits; | ||||
|         const size_t top_bit = (minor_address + size + align_mask) >> align_bits; | ||||
|  |  | |||
							
								
								
									
										218
									
								
								src/core/guest_memory.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										218
									
								
								src/core/guest_memory.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,218 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <iterator> | ||||
| #include <memory> | ||||
| #include <optional> | ||||
| #include <span> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "common/scratch_buffer.h" | ||||
| #include "core/memory.h" | ||||
| 
 | ||||
| namespace Core::Memory { | ||||
| 
 | ||||
| enum GuestMemoryFlags : u32 { | ||||
|     Read = 1 << 0, | ||||
|     Write = 1 << 1, | ||||
|     Safe = 1 << 2, | ||||
|     Cached = 1 << 3, | ||||
| 
 | ||||
|     SafeRead = Read | Safe, | ||||
|     SafeWrite = Write | Safe, | ||||
|     SafeReadWrite = SafeRead | SafeWrite, | ||||
|     SafeReadCachedWrite = SafeReadWrite | Cached, | ||||
| 
 | ||||
|     UnsafeRead = Read, | ||||
|     UnsafeWrite = Write, | ||||
|     UnsafeReadWrite = UnsafeRead | UnsafeWrite, | ||||
|     UnsafeReadCachedWrite = UnsafeReadWrite | Cached, | ||||
| }; | ||||
| 
 | ||||
| namespace { | ||||
| template <typename M, typename T, GuestMemoryFlags FLAGS> | ||||
| class GuestMemory { | ||||
|     using iterator = T*; | ||||
|     using const_iterator = const T*; | ||||
|     using value_type = T; | ||||
|     using element_type = T; | ||||
|     using iterator_category = std::contiguous_iterator_tag; | ||||
| 
 | ||||
| public: | ||||
|     GuestMemory() = delete; | ||||
|     explicit GuestMemory(M& memory, u64 addr, std::size_t size, | ||||
|                          Common::ScratchBuffer<T>* backup = nullptr) | ||||
|         : m_memory{memory}, m_addr{addr}, m_size{size} { | ||||
|         static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); | ||||
|         if constexpr (FLAGS & GuestMemoryFlags::Read) { | ||||
|             Read(addr, size, backup); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     ~GuestMemory() = default; | ||||
| 
 | ||||
|     T* data() noexcept { | ||||
|         return m_data_span.data(); | ||||
|     } | ||||
| 
 | ||||
|     const T* data() const noexcept { | ||||
|         return m_data_span.data(); | ||||
|     } | ||||
| 
 | ||||
|     size_t size() const noexcept { | ||||
|         return m_size; | ||||
|     } | ||||
| 
 | ||||
|     size_t size_bytes() const noexcept { | ||||
|         return this->size() * sizeof(T); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] T* begin() noexcept { | ||||
|         return this->data(); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] const T* begin() const noexcept { | ||||
|         return this->data(); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] T* end() noexcept { | ||||
|         return this->data() + this->size(); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] const T* end() const noexcept { | ||||
|         return this->data() + this->size(); | ||||
|     } | ||||
| 
 | ||||
|     T& operator[](size_t index) noexcept { | ||||
|         return m_data_span[index]; | ||||
|     } | ||||
| 
 | ||||
|     const T& operator[](size_t index) const noexcept { | ||||
|         return m_data_span[index]; | ||||
|     } | ||||
| 
 | ||||
|     void SetAddressAndSize(u64 addr, std::size_t size) noexcept { | ||||
|         m_addr = addr; | ||||
|         m_size = size; | ||||
|         m_addr_changed = true; | ||||
|     } | ||||
| 
 | ||||
|     std::span<T> Read(u64 addr, std::size_t size, | ||||
|                       Common::ScratchBuffer<T>* backup = nullptr) noexcept { | ||||
|         m_addr = addr; | ||||
|         m_size = size; | ||||
|         if (m_size == 0) { | ||||
|             m_is_data_copy = true; | ||||
|             return {}; | ||||
|         } | ||||
| 
 | ||||
|         if (this->TrySetSpan()) { | ||||
|             if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||
|                 m_memory.FlushRegion(m_addr, this->size_bytes()); | ||||
|             } | ||||
|         } else { | ||||
|             if (backup) { | ||||
|                 backup->resize_destructive(this->size()); | ||||
|                 m_data_span = *backup; | ||||
|             } else { | ||||
|                 m_data_copy.resize(this->size()); | ||||
|                 m_data_span = std::span(m_data_copy); | ||||
|             } | ||||
|             m_is_data_copy = true; | ||||
|             m_span_valid = true; | ||||
|             if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||
|                 m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); | ||||
|             } else { | ||||
|                 m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); | ||||
|             } | ||||
|         } | ||||
|         return m_data_span; | ||||
|     } | ||||
| 
 | ||||
|     void Write(std::span<T> write_data) noexcept { | ||||
|         if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||||
|             m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); | ||||
|         } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||
|             m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); | ||||
|         } else { | ||||
|             m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     bool TrySetSpan() noexcept { | ||||
|         if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { | ||||
|             m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; | ||||
|             m_span_valid = true; | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     bool IsDataCopy() const noexcept { | ||||
|         return m_is_data_copy; | ||||
|     } | ||||
| 
 | ||||
|     bool AddressChanged() const noexcept { | ||||
|         return m_addr_changed; | ||||
|     } | ||||
| 
 | ||||
|     M& m_memory; | ||||
|     u64 m_addr{}; | ||||
|     size_t m_size{}; | ||||
|     std::span<T> m_data_span{}; | ||||
|     std::vector<T> m_data_copy{}; | ||||
|     bool m_span_valid{false}; | ||||
|     bool m_is_data_copy{false}; | ||||
|     bool m_addr_changed{false}; | ||||
| }; | ||||
| 
 | ||||
| template <typename M, typename T, GuestMemoryFlags FLAGS> | ||||
| class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { | ||||
| public: | ||||
|     GuestMemoryScoped() = delete; | ||||
|     explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, | ||||
|                                Common::ScratchBuffer<T>* backup = nullptr) | ||||
|         : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { | ||||
|         if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { | ||||
|             if (!this->TrySetSpan()) { | ||||
|                 if (backup) { | ||||
|                     this->m_data_span = *backup; | ||||
|                     this->m_span_valid = true; | ||||
|                     this->m_is_data_copy = true; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     ~GuestMemoryScoped() { | ||||
|         if constexpr (FLAGS & GuestMemoryFlags::Write) { | ||||
|             if (this->size() == 0) [[unlikely]] { | ||||
|                 return; | ||||
|             } | ||||
| 
 | ||||
|             if (this->AddressChanged() || this->IsDataCopy()) { | ||||
|                 ASSERT(this->m_span_valid); | ||||
|                 if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||||
|                     this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); | ||||
|                 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||
|                     this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); | ||||
|                 } else { | ||||
|                     this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); | ||||
|                 } | ||||
|             } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || (FLAGS & GuestMemoryFlags::Cached))  { | ||||
|                 this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| } // namespace
 | ||||
| 
 | ||||
| template <typename T, GuestMemoryFlags FLAGS> | ||||
| using CpuGuestMemory = GuestMemory<Core::Memory::Memory, T, FLAGS>; | ||||
| template <typename T, GuestMemoryFlags FLAGS> | ||||
| using CpuGuestMemoryScoped = GuestMemoryScoped<Core::Memory::Memory, T, FLAGS>; | ||||
| 
 | ||||
| } // namespace Tegra::Memory
 | ||||
|  | @ -22,19 +22,7 @@ | |||
| #include "core/hle/service/hle_ipc.h" | ||||
| #include "core/hle/service/ipc_helpers.h" | ||||
| #include "core/memory.h" | ||||
| 
 | ||||
| namespace { | ||||
| static thread_local std::array read_buffer_data_a{ | ||||
|     Common::ScratchBuffer<u8>(), | ||||
|     Common::ScratchBuffer<u8>(), | ||||
|     Common::ScratchBuffer<u8>(), | ||||
| }; | ||||
| static thread_local std::array read_buffer_data_x{ | ||||
|     Common::ScratchBuffer<u8>(), | ||||
|     Common::ScratchBuffer<u8>(), | ||||
|     Common::ScratchBuffer<u8>(), | ||||
| }; | ||||
| } // Anonymous namespace
 | ||||
| #include "core/guest_memory.h" | ||||
| 
 | ||||
| namespace Service { | ||||
| 
 | ||||
|  | @ -343,48 +331,27 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons | |||
| } | ||||
| 
 | ||||
| std::span<const u8> HLERequestContext::ReadBufferA(std::size_t buffer_index) const { | ||||
|     static thread_local std::array read_buffer_a{ | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|     }; | ||||
|     Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); | ||||
| 
 | ||||
|     ASSERT_OR_EXECUTE_MSG( | ||||
|         BufferDescriptorA().size() > buffer_index, { return {}; }, | ||||
|         "BufferDescriptorA invalid buffer_index {}", buffer_index); | ||||
|     auto& read_buffer = read_buffer_a[buffer_index]; | ||||
|     return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), | ||||
|                             BufferDescriptorA()[buffer_index].Size(), | ||||
|                             &read_buffer_data_a[buffer_index]); | ||||
|     return gm.Read(BufferDescriptorA()[buffer_index].Address(), | ||||
|                    BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); | ||||
| } | ||||
| 
 | ||||
| std::span<const u8> HLERequestContext::ReadBufferX(std::size_t buffer_index) const { | ||||
|     static thread_local std::array read_buffer_x{ | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|     }; | ||||
|     Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); | ||||
| 
 | ||||
|     ASSERT_OR_EXECUTE_MSG( | ||||
|         BufferDescriptorX().size() > buffer_index, { return {}; }, | ||||
|         "BufferDescriptorX invalid buffer_index {}", buffer_index); | ||||
|     auto& read_buffer = read_buffer_x[buffer_index]; | ||||
|     return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), | ||||
|                             BufferDescriptorX()[buffer_index].Size(), | ||||
|                             &read_buffer_data_x[buffer_index]); | ||||
|     return gm.Read(BufferDescriptorX()[buffer_index].Address(), | ||||
|                    BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); | ||||
| } | ||||
| 
 | ||||
| std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { | ||||
|     static thread_local std::array read_buffer_a{ | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|     }; | ||||
|     static thread_local std::array read_buffer_x{ | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|         Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), | ||||
|     }; | ||||
|     Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> gm(memory, 0, 0); | ||||
| 
 | ||||
|     const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && | ||||
|                            BufferDescriptorA()[buffer_index].Size()}; | ||||
|  | @ -401,18 +368,14 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons | |||
|         ASSERT_OR_EXECUTE_MSG( | ||||
|             BufferDescriptorA().size() > buffer_index, { return {}; }, | ||||
|             "BufferDescriptorA invalid buffer_index {}", buffer_index); | ||||
|         auto& read_buffer = read_buffer_a[buffer_index]; | ||||
|         return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), | ||||
|                                 BufferDescriptorA()[buffer_index].Size(), | ||||
|                                 &read_buffer_data_a[buffer_index]); | ||||
|         return gm.Read(BufferDescriptorA()[buffer_index].Address(), | ||||
|                        BufferDescriptorA()[buffer_index].Size(), &read_buffer_data_a[buffer_index]); | ||||
|     } else { | ||||
|         ASSERT_OR_EXECUTE_MSG( | ||||
|             BufferDescriptorX().size() > buffer_index, { return {}; }, | ||||
|             "BufferDescriptorX invalid buffer_index {}", buffer_index); | ||||
|         auto& read_buffer = read_buffer_x[buffer_index]; | ||||
|         return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), | ||||
|                                 BufferDescriptorX()[buffer_index].Size(), | ||||
|                                 &read_buffer_data_x[buffer_index]); | ||||
|         return gm.Read(BufferDescriptorX()[buffer_index].Address(), | ||||
|                        BufferDescriptorX()[buffer_index].Size(), &read_buffer_data_x[buffer_index]); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -19,8 +19,6 @@ | |||
| #include "core/hle/ipc.h" | ||||
| #include "core/hle/kernel/k_handle_table.h" | ||||
| #include "core/hle/kernel/svc_common.h" | ||||
| #include "core/hle/kernel/k_auto_object.h" | ||||
| #include "core/hle/kernel/k_handle_table.h" | ||||
| 
 | ||||
| union Result; | ||||
| 
 | ||||
|  | @ -377,10 +375,6 @@ public: | |||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     Kernel::KScopedAutoObject<Kernel::KAutoObject> GetObjectFromHandle(u32 handle) { | ||||
|         return GetClientHandleTable().GetObjectForIpc(handle, thread); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] std::shared_ptr<SessionRequestManager> GetManager() const { | ||||
|         return manager.lock(); | ||||
|     } | ||||
|  | @ -432,6 +426,9 @@ private: | |||
| 
 | ||||
|     Kernel::KernelCore& kernel; | ||||
|     Core::Memory::Memory& memory; | ||||
| 
 | ||||
|     mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_a{}; | ||||
|     mutable std::array<Common::ScratchBuffer<u8>, 3> read_buffer_data_x{}; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Service
 | ||||
|  |  | |||
|  | @ -2,6 +2,8 @@ | |||
| // SPDX-FileCopyrightText: 2022 Skyline Team and Contributors
 | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||
| 
 | ||||
| #include <functional> | ||||
| 
 | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "common/logging/log.h" | ||||
|  | @ -18,6 +20,7 @@ NvMap::Handle::Handle(u64 size_, Id id_) | |||
| } | ||||
| 
 | ||||
| NvResult NvMap::Handle::Alloc(Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress) { | ||||
|     std::scoped_lock lock(mutex); | ||||
|     // Handles cannot be allocated twice
 | ||||
|     if (allocated) { | ||||
|         return NvResult::AccessDenied; | ||||
|  | @ -78,11 +81,9 @@ void NvMap::UnmapHandle(Handle& handle_description) { | |||
| 
 | ||||
|     // Free and unmap the handle from the SMMU
 | ||||
|     auto& smmu = host1x.MemoryManager(); | ||||
|     smmu.Unmap(static_cast<DAddr>(handle_description.pin_virt_address), | ||||
|                handle_description.aligned_size); | ||||
|     smmu.Free(handle_description.pin_virt_address, | ||||
|               static_cast<size_t>(handle_description.aligned_size)); | ||||
|     handle_description.pin_virt_address = 0; | ||||
|     smmu.Unmap(handle_description.d_address, handle_description.aligned_size); | ||||
|     smmu.Free(handle_description.d_address, static_cast<size_t>(handle_description.aligned_size)); | ||||
|     handle_description.d_address = 0; | ||||
| } | ||||
| 
 | ||||
| bool NvMap::TryRemoveHandle(const Handle& handle_description) { | ||||
|  | @ -123,41 +124,16 @@ std::shared_ptr<NvMap::Handle> NvMap::GetHandle(Handle::Id handle) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| VAddr NvMap::GetHandleAddress(Handle::Id handle) { | ||||
| DAddr NvMap::GetHandleAddress(Handle::Id handle) { | ||||
|     std::scoped_lock lock(handles_lock); | ||||
|     try { | ||||
|         return handles.at(handle)->address; | ||||
|         return handles.at(handle)->d_address; | ||||
|     } catch (std::out_of_range&) { | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| NvResult NvMap::AllocateHandle(Handle::Id handle, Handle::Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, size_t session_id) { | ||||
|     auto handle_description{GetHandle(handle)}; | ||||
|     if (!handle_description) [[unlikely]] { | ||||
|         return NvResult::BadParameter; | ||||
|     } | ||||
| 
 | ||||
|     if (handle_description->allocated) [[unlikely]] { | ||||
|         return NvResult::InsufficientMemory; | ||||
|     } | ||||
| 
 | ||||
|     std::scoped_lock lock(handle_description->mutex); | ||||
|     NvResult result = handle_description->Alloc(pFlags, pAlign, pKind, pAddress); | ||||
|     if (result != NvResult::Success) { | ||||
|         return result; | ||||
|     } | ||||
|     auto& smmu = host1x.MemoryManager(); | ||||
|     size_t total_size = static_cast<size_t>(handle_description->aligned_size); | ||||
|     handle_description->d_address = smmu.Allocate(total_size); | ||||
|     if (handle_description->d_address == 0) { | ||||
|         return NvResult::InsufficientMemory; | ||||
|     } | ||||
|     smmu.Map(handle_description->d_address, handle_description->address, total_size, session_id); | ||||
|     return NvResult::Success; | ||||
| } | ||||
| 
 | ||||
| u32 NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id) { | ||||
| DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_area_pin) { | ||||
|     auto handle_description{GetHandle(handle)}; | ||||
|     if (!handle_description) [[unlikely]] { | ||||
|         return 0; | ||||
|  | @ -176,35 +152,38 @@ u32 NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id) { | |||
|                 handle_description->unmap_queue_entry.reset(); | ||||
| 
 | ||||
|                 handle_description->pins++; | ||||
|                 return handle_description->pin_virt_address; | ||||
|                 return handle_description->d_address; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         using namespace std::placeholders; | ||||
|         // If not then allocate some space and map it
 | ||||
|         DAddr address{}; | ||||
|         auto& smmu = host1x.MemoryManager(); | ||||
|         while ((address = smmu.AllocatePinned( | ||||
|                     static_cast<size_t>(handle_description->aligned_size))) == 0) { | ||||
|         auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1); | ||||
|                          //: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
 | ||||
|         while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) { | ||||
|             // Free handles until the allocation succeeds
 | ||||
|             std::scoped_lock queueLock(unmap_queue_lock); | ||||
|             if (auto freeHandleDesc{unmap_queue.front()}) { | ||||
|                 // Handles in the unmap queue are guaranteed not to be pinned so don't bother
 | ||||
|                 // checking if they are before unmapping
 | ||||
|                 std::scoped_lock freeLock(freeHandleDesc->mutex); | ||||
|                 if (handle_description->pin_virt_address) | ||||
|                 if (handle_description->d_address) | ||||
|                     UnmapHandle(*freeHandleDesc); | ||||
|             } else { | ||||
|                 LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!"); | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         handle_description->d_address = address; | ||||
| 
 | ||||
|         smmu.Map(address, handle_description->address, handle_description->aligned_size, | ||||
|                  session_id); | ||||
|         handle_description->pin_virt_address = static_cast<u32>(address); | ||||
|     } | ||||
| 
 | ||||
|     handle_description->pins++; | ||||
|     return handle_description->pin_virt_address; | ||||
|     return handle_description->d_address; | ||||
| } | ||||
| 
 | ||||
| void NvMap::UnpinHandle(Handle::Id handle) { | ||||
|  | @ -255,15 +234,10 @@ std::optional<NvMap::FreeInfo> NvMap::FreeHandle(Handle::Id handle, bool interna | |||
|                 LOG_WARNING(Service_NVDRV, "User duplicate count imbalance detected!"); | ||||
|             } else if (handle_description->dupes == 0) { | ||||
|                 // Force unmap the handle
 | ||||
|                 if (handle_description->pin_virt_address) { | ||||
|                 if (handle_description->d_address) { | ||||
|                     std::scoped_lock queueLock(unmap_queue_lock); | ||||
|                     UnmapHandle(*handle_description); | ||||
|                 } | ||||
|                 if (handle_description->allocated) { | ||||
|                     auto& smmu = host1x.MemoryManager(); | ||||
|                     smmu.Free(handle_description->d_address, handle_description->aligned_size); | ||||
|                     smmu.Unmap(handle_description->d_address, handle_description->aligned_size); | ||||
|                 } | ||||
| 
 | ||||
|                 handle_description->pins = 0; | ||||
|             } | ||||
|  |  | |||
|  | @ -48,7 +48,7 @@ public: | |||
|         using Id = u32; | ||||
|         Id id; //!< A globally unique identifier for this handle
 | ||||
| 
 | ||||
|         s32 pins{}; | ||||
|         s64 pins{}; | ||||
|         u32 pin_virt_address{}; | ||||
|         std::optional<typename std::list<std::shared_ptr<Handle>>::iterator> unmap_queue_entry{}; | ||||
| 
 | ||||
|  | @ -63,15 +63,14 @@ public: | |||
| 
 | ||||
|         VAddr address{};   //!< The memory location in the guest's AS that this handle corresponds to,
 | ||||
|                            //!< this can also be in the nvdrv tmem
 | ||||
|         DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
 | ||||
|                            //!< this can also be in the nvdrv tmem
 | ||||
|         bool is_shared_mem_mapped{}; //!< If this nvmap has been mapped with the MapSharedMem IPC
 | ||||
|                                      //!< call
 | ||||
| 
 | ||||
|         u8 kind{};        //!< Used for memory compression
 | ||||
|         bool allocated{}; //!< If the handle has been allocated with `Alloc`
 | ||||
| 
 | ||||
|         u64 dma_map_addr{}; //! remove me after implementing pinning.
 | ||||
|         DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
 | ||||
|                            //!< this can also be in the nvdrv tmem
 | ||||
| 
 | ||||
|         Handle(u64 size, Id id); | ||||
| 
 | ||||
|  | @ -119,7 +118,7 @@ public: | |||
| 
 | ||||
|     std::shared_ptr<Handle> GetHandle(Handle::Id handle); | ||||
| 
 | ||||
|     VAddr GetHandleAddress(Handle::Id handle); | ||||
|     DAddr GetHandleAddress(Handle::Id handle); | ||||
| 
 | ||||
|     /**
 | ||||
|      * @brief Maps a handle into the SMMU address space | ||||
|  | @ -127,15 +126,7 @@ public: | |||
|      * number of calls to `UnpinHandle` | ||||
|      * @return The SMMU virtual address that the handle has been mapped to | ||||
|      */ | ||||
|     u32 PinHandle(Handle::Id handle, size_t session_id); | ||||
| 
 | ||||
|     /**
 | ||||
|      * @brief Maps a handle into the SMMU address space | ||||
|      * @note This operation is refcounted, the number of calls to this must eventually match the | ||||
|      * number of calls to `UnpinHandle` | ||||
|      * @return The SMMU virtual address that the handle has been mapped to | ||||
|      */ | ||||
|     NvResult AllocateHandle(Handle::Id handle, Handle::Flags pFlags, u32 pAlign, u8 pKind, u64 pAddress, size_t session_id); | ||||
|     DAddr PinHandle(Handle::Id handle, size_t session_id, bool low_area_pin); | ||||
| 
 | ||||
|     /**
 | ||||
|      * @brief When this has been called an equal number of times to `PinHandle` for the supplied | ||||
|  |  | |||
|  | @ -42,7 +42,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form | |||
|                         u32 height, u32 stride, android::BufferTransformFlags transform, | ||||
|                         const Common::Rectangle<int>& crop_rect, | ||||
|                         std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { | ||||
|     const VAddr addr = nvmap.GetHandleAddress(buffer_handle); | ||||
|     const DAddr addr = nvmap.GetHandleAddress(buffer_handle); | ||||
|     LOG_TRACE(Service, | ||||
|               "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", | ||||
|               addr, offset, width, height, stride, format); | ||||
|  |  | |||
|  | @ -40,15 +40,15 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i | |||
|         case 0x3: | ||||
|             return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output); | ||||
|         case 0x5: | ||||
|             return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output); | ||||
|             return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output, fd); | ||||
|         case 0x6: | ||||
|             return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output); | ||||
|             return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output, fd); | ||||
|         case 0x8: | ||||
|             return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output); | ||||
|         case 0x9: | ||||
|             return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output); | ||||
|         case 0x14: | ||||
|             return WrapVariable(this, &nvhost_as_gpu::Remap, input, output); | ||||
|             return WrapVariable(this, &nvhost_as_gpu::Remap, input, output, fd); | ||||
|         default: | ||||
|             break; | ||||
|         } | ||||
|  | @ -86,8 +86,15 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i | |||
|     return NvResult::NotImplemented; | ||||
| } | ||||
| 
 | ||||
| void nvhost_as_gpu::OnOpen(size_t session_id, DeviceFD fd) {} | ||||
| void nvhost_as_gpu::OnClose(DeviceFD fd) {} | ||||
| void nvhost_as_gpu::OnOpen(size_t session_id, DeviceFD fd) { | ||||
|     sessions[fd] = session_id; | ||||
| } | ||||
| void nvhost_as_gpu::OnClose(DeviceFD fd) { | ||||
|     auto it = sessions.find(fd); | ||||
|     if (it != sessions.end()) { | ||||
|         sessions.erase(it); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| NvResult nvhost_as_gpu::AllocAsEx(IoctlAllocAsEx& params) { | ||||
|     LOG_DEBUG(Service_NVDRV, "called, big_page_size=0x{:X}", params.big_page_size); | ||||
|  | @ -206,6 +213,8 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { | |||
|                        static_cast<u32>(aligned_size >> page_size_bits)); | ||||
|     } | ||||
| 
 | ||||
|     nvmap.UnpinHandle(mapping->handle); | ||||
| 
 | ||||
|     // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state
 | ||||
|     // Only FreeSpace can unmap them fully
 | ||||
|     if (mapping->sparse_alloc) { | ||||
|  | @ -259,7 +268,7 @@ NvResult nvhost_as_gpu::FreeSpace(IoctlFreeSpace& params) { | |||
|     return NvResult::Success; | ||||
| } | ||||
| 
 | ||||
| NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) { | ||||
| NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries, DeviceFD fd) { | ||||
|     LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", entries.size()); | ||||
| 
 | ||||
|     if (!vm.initialised) { | ||||
|  | @ -293,19 +302,19 @@ NvResult nvhost_as_gpu::Remap(std::span<IoctlRemapEntry> entries) { | |||
|                 return NvResult::BadValue; | ||||
|             } | ||||
| 
 | ||||
|             VAddr cpu_address{static_cast<VAddr>( | ||||
|                 handle->address + | ||||
|                 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; | ||||
|             DAddr base = nvmap.PinHandle(entry.handle, sessions[fd], false); | ||||
|             DAddr device_address{static_cast<DAddr>( | ||||
|                 base + (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; | ||||
| 
 | ||||
|             gmmu->Map(virtual_address, cpu_address, size, static_cast<Tegra::PTEKind>(entry.kind), | ||||
|                       use_big_pages); | ||||
|             gmmu->Map(virtual_address, device_address, size, | ||||
|                       static_cast<Tegra::PTEKind>(entry.kind), use_big_pages); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return NvResult::Success; | ||||
| } | ||||
| 
 | ||||
| NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | ||||
| NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params, DeviceFD fd) { | ||||
|     LOG_DEBUG(Service_NVDRV, | ||||
|               "called, flags={:X}, nvmap_handle={:X}, buffer_offset={}, mapping_size={}" | ||||
|               ", offset={}", | ||||
|  | @ -331,9 +340,9 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
|             } | ||||
| 
 | ||||
|             u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; | ||||
|             VAddr cpu_address{mapping->ptr + params.buffer_offset}; | ||||
|             VAddr device_address{mapping->ptr + params.buffer_offset}; | ||||
| 
 | ||||
|             gmmu->Map(gpu_address, cpu_address, params.mapping_size, | ||||
|             gmmu->Map(gpu_address, device_address, params.mapping_size, | ||||
|                       static_cast<Tegra::PTEKind>(params.kind), mapping->big_page); | ||||
| 
 | ||||
|             return NvResult::Success; | ||||
|  | @ -349,7 +358,8 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
|         return NvResult::BadValue; | ||||
|     } | ||||
| 
 | ||||
|     VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; | ||||
|     DAddr device_address{static_cast<DAddr>(nvmap.PinHandle(params.handle, sessions[fd], false) + | ||||
|                                             params.buffer_offset)}; | ||||
|     u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; | ||||
| 
 | ||||
|     bool big_page{[&]() { | ||||
|  | @ -373,15 +383,14 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
|         } | ||||
| 
 | ||||
|         const bool use_big_pages = alloc->second.big_pages && big_page; | ||||
|         gmmu->Map(params.offset, cpu_address, size, static_cast<Tegra::PTEKind>(params.kind), | ||||
|         gmmu->Map(params.offset, device_address, size, static_cast<Tegra::PTEKind>(params.kind), | ||||
|                   use_big_pages); | ||||
| 
 | ||||
|         auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, | ||||
|                                                use_big_pages, alloc->second.sparse)}; | ||||
|         auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size, | ||||
|                                                true, use_big_pages, alloc->second.sparse)}; | ||||
|         alloc->second.mappings.push_back(mapping); | ||||
|         mapping_map[params.offset] = mapping; | ||||
|     } else { | ||||
| 
 | ||||
|         auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; | ||||
|         u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; | ||||
|         u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; | ||||
|  | @ -394,18 +403,18 @@ NvResult nvhost_as_gpu::MapBufferEx(IoctlMapBufferEx& params) { | |||
|             return NvResult::InsufficientMemory; | ||||
|         } | ||||
| 
 | ||||
|         gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), | ||||
|         gmmu->Map(params.offset, device_address, Common::AlignUp(size, page_size), | ||||
|                   static_cast<Tegra::PTEKind>(params.kind), big_page); | ||||
| 
 | ||||
|         auto mapping{ | ||||
|             std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; | ||||
|         auto mapping{std::make_shared<Mapping>(params.handle, device_address, params.offset, size, | ||||
|                                                false, big_page, false)}; | ||||
|         mapping_map[params.offset] = mapping; | ||||
|     } | ||||
| 
 | ||||
|     return NvResult::Success; | ||||
| } | ||||
| 
 | ||||
| NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { | ||||
| NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params, DeviceFD fd) { | ||||
|     LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset); | ||||
| 
 | ||||
|     std::scoped_lock lock(mutex); | ||||
|  | @ -433,6 +442,8 @@ NvResult nvhost_as_gpu::UnmapBuffer(IoctlUnmapBuffer& params) { | |||
|             gmmu->Unmap(params.offset, mapping->size); | ||||
|         } | ||||
| 
 | ||||
|         nvmap.UnpinHandle(mapping->handle); | ||||
| 
 | ||||
|         mapping_map.erase(params.offset); | ||||
|     } catch (const std::out_of_range&) { | ||||
|         LOG_WARNING(Service_NVDRV, "Couldn't find region to unmap at 0x{:X}", params.offset); | ||||
|  |  | |||
|  | @ -141,9 +141,9 @@ private: | |||
| 
 | ||||
|     NvResult AllocAsEx(IoctlAllocAsEx& params); | ||||
|     NvResult AllocateSpace(IoctlAllocSpace& params); | ||||
|     NvResult Remap(std::span<IoctlRemapEntry> params); | ||||
|     NvResult MapBufferEx(IoctlMapBufferEx& params); | ||||
|     NvResult UnmapBuffer(IoctlUnmapBuffer& params); | ||||
|     NvResult Remap(std::span<IoctlRemapEntry> params, DeviceFD fd); | ||||
|     NvResult MapBufferEx(IoctlMapBufferEx& params, DeviceFD fd); | ||||
|     NvResult UnmapBuffer(IoctlUnmapBuffer& params, DeviceFD fd); | ||||
|     NvResult FreeSpace(IoctlFreeSpace& params); | ||||
|     NvResult BindChannel(IoctlBindChannel& params); | ||||
| 
 | ||||
|  | @ -159,16 +159,18 @@ private: | |||
|     NvCore::NvMap& nvmap; | ||||
| 
 | ||||
|     struct Mapping { | ||||
|         VAddr ptr; | ||||
|         NvCore::NvMap::Handle::Id handle; | ||||
|         DAddr ptr; | ||||
|         u64 offset; | ||||
|         u64 size; | ||||
|         bool fixed; | ||||
|         bool big_page; // Only valid if fixed == false
 | ||||
|         bool sparse_alloc; | ||||
| 
 | ||||
|         Mapping(VAddr ptr_, u64 offset_, u64 size_, bool fixed_, bool big_page_, bool sparse_alloc_) | ||||
|             : ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), big_page(big_page_), | ||||
|               sparse_alloc(sparse_alloc_) {} | ||||
|         Mapping(NvCore::NvMap::Handle::Id handle_, DAddr ptr_, u64 offset_, u64 size_, bool fixed_, | ||||
|                 bool big_page_, bool sparse_alloc_) | ||||
|             : handle(handle_), ptr(ptr_), offset(offset_), size(size_), fixed(fixed_), | ||||
|               big_page(big_page_), sparse_alloc(sparse_alloc_) {} | ||||
|     }; | ||||
| 
 | ||||
|     struct Allocation { | ||||
|  | @ -212,9 +214,7 @@ private: | |||
|         bool initialised{}; | ||||
|     } vm; | ||||
|     std::shared_ptr<Tegra::MemoryManager> gmmu; | ||||
| 
 | ||||
|     // s32 channel{};
 | ||||
|     // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
 | ||||
|     std::unordered_map<DeviceFD, size_t> sessions; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Service::Nvidia::Devices
 | ||||
|  |  | |||
|  | @ -95,6 +95,9 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De | |||
|     offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); | ||||
| 
 | ||||
|     auto& gpu = system.GPU(); | ||||
|     //auto& device_memory = system.Host1x().MemoryManager();
 | ||||
|     auto* session = core.GetSession(sessions[fd]); | ||||
| 
 | ||||
|     if (gpu.UseNvdec()) { | ||||
|         for (std::size_t i = 0; i < syncpt_increments.size(); i++) { | ||||
|             const SyncptIncr& syncpt_incr = syncpt_increments[i]; | ||||
|  | @ -106,7 +109,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De | |||
|         const auto object = nvmap.GetHandle(cmd_buffer.memory_id); | ||||
|         ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); | ||||
|         Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); | ||||
|         system.ApplicationMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), | ||||
|         session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), | ||||
|                                              cmdlist.size() * sizeof(u32)); | ||||
|         gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); | ||||
|     } | ||||
|  | @ -136,7 +139,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { | |||
| NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) { | ||||
|     const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); | ||||
|     for (size_t i = 0; i < num_entries; i++) { | ||||
|         entries[i].map_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd]); | ||||
|         DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true); | ||||
|         entries[i].map_address = static_cast<u32>(pin_address); | ||||
|     } | ||||
| 
 | ||||
|     return NvResult::Success; | ||||
|  |  | |||
|  | @ -123,8 +123,8 @@ NvResult nvmap::IocAlloc(IocAllocParams& params, DeviceFD fd) { | |||
|         return NvResult::InsufficientMemory; | ||||
|     } | ||||
| 
 | ||||
|     const auto result = file.AllocateHandle(params.handle, params.flags, params.align, params.kind, | ||||
|                                             params.address, sessions[fd]); | ||||
|     const auto result = | ||||
|         handle_description->Alloc(params.flags, params.align, params.kind, params.address); | ||||
|     if (result != NvResult::Success) { | ||||
|         LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); | ||||
|         return result; | ||||
|  |  | |||
|  | @ -13,8 +13,6 @@ | |||
| #include "core/hle/service/nvdrv/nvdrv.h" | ||||
| #include "core/hle/service/nvdrv/nvdrv_interface.h" | ||||
| 
 | ||||
| #pragma optimize("", off) | ||||
| 
 | ||||
| namespace Service::Nvidia { | ||||
| 
 | ||||
| void NVDRV::Open(HLERequestContext& ctx) { | ||||
|  | @ -173,8 +171,8 @@ void NVDRV::Initialize(HLERequestContext& ctx) { | |||
|     [[maybe_unused]] const auto transfer_memory_size = rp.Pop<u32>(); | ||||
| 
 | ||||
|     auto& container = nvdrv->GetContainer(); | ||||
|     auto process = ctx.GetObjectFromHandle(process_handle); | ||||
|     session_id = container.OpenSession(process->DynamicCast<Kernel::KProcess*>()); | ||||
|     auto process = ctx.GetObjectFromHandle<Kernel::KProcess>(process_handle); | ||||
|     session_id = container.OpenSession(process.GetPointerUnsafe()); | ||||
| 
 | ||||
|     is_initialized = true; | ||||
| } | ||||
|  |  | |||
|  | @ -24,6 +24,8 @@ | |||
| #include "core/hle/kernel/k_process.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| #include "video_core/host1x/host1x.h" | ||||
| #include "video_core/rasterizer_download_area.h" | ||||
| 
 | ||||
| namespace Core::Memory { | ||||
|  | @ -638,15 +640,16 @@ struct Memory::Impl { | |||
|                   base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); | ||||
| 
 | ||||
|         // During boot, current_page_table might not be set yet, in which case we need not flush
 | ||||
|         if (system.IsPoweredOn()) { | ||||
|         /*if (system.IsPoweredOn()) {
 | ||||
|             auto& gpu = system.GPU(); | ||||
|             for (u64 i = 0; i < size; i++) { | ||||
|                 const auto page = base + i; | ||||
|                 if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { | ||||
| 
 | ||||
|                     gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         }*/ | ||||
| 
 | ||||
|         const auto end = base + size; | ||||
|         ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", | ||||
|  | @ -811,10 +814,15 @@ struct Memory::Impl { | |||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     void HandleRasterizerDownload(VAddr address, size_t size) { | ||||
|     void HandleRasterizerDownload(VAddr v_address, size_t size) { | ||||
|         const auto* p = GetPointerImpl( | ||||
|             v_address, []() {}, []() {}); | ||||
|         auto& gpu_device_memory = system.Host1x().MemoryManager(); | ||||
|         DAddr address = | ||||
|             gpu_device_memory.GetAddressFromPAddr(system.DeviceMemory().GetRawPhysicalAddr(p)); | ||||
|         const size_t core = system.GetCurrentHostThreadID(); | ||||
|         auto& current_area = rasterizer_read_areas[core]; | ||||
|         const VAddr end_address = address + size; | ||||
|         const DAddr end_address = address + size; | ||||
|         if (current_area.start_address <= address && end_address <= current_area.end_address) | ||||
|             [[likely]] { | ||||
|             return; | ||||
|  | @ -822,7 +830,10 @@ struct Memory::Impl { | |||
|         current_area = system.GPU().OnCPURead(address, size); | ||||
|     } | ||||
| 
 | ||||
|     void HandleRasterizerWrite(VAddr address, size_t size) { | ||||
|     void HandleRasterizerWrite(VAddr v_address, size_t size) { | ||||
|         const auto* p = GetPointerImpl( | ||||
|             v_address, []() {}, []() {}); | ||||
|         PAddr address = system.DeviceMemory().GetRawPhysicalAddr(p); | ||||
|         constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; | ||||
|         const size_t core = std::min(system.GetCurrentHostThreadID(), | ||||
|                                      sys_core); // any other calls threads go to syscore.
 | ||||
|  | @ -836,7 +847,7 @@ struct Memory::Impl { | |||
|             } | ||||
|         }); | ||||
|         auto& current_area = rasterizer_write_areas[core]; | ||||
|         VAddr subaddress = address >> YUZU_PAGEBITS; | ||||
|         PAddr subaddress = address >> YUZU_PAGEBITS; | ||||
|         bool do_collection = current_area.last_address == subaddress; | ||||
|         if (!do_collection) [[unlikely]] { | ||||
|             do_collection = system.GPU().OnCPUWrite(address, size); | ||||
|  | @ -849,7 +860,7 @@ struct Memory::Impl { | |||
|     } | ||||
| 
 | ||||
|     struct GPUDirtyState { | ||||
|         VAddr last_address; | ||||
|         PAddr last_address; | ||||
|     }; | ||||
| 
 | ||||
|     void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | ||||
|  |  | |||
|  | @ -498,209 +498,4 @@ private: | |||
|     std::unique_ptr<Impl> impl; | ||||
| }; | ||||
| 
 | ||||
| enum GuestMemoryFlags : u32 { | ||||
|     Read = 1 << 0, | ||||
|     Write = 1 << 1, | ||||
|     Safe = 1 << 2, | ||||
|     Cached = 1 << 3, | ||||
| 
 | ||||
|     SafeRead = Read | Safe, | ||||
|     SafeWrite = Write | Safe, | ||||
|     SafeReadWrite = SafeRead | SafeWrite, | ||||
|     SafeReadCachedWrite = SafeReadWrite | Cached, | ||||
| 
 | ||||
|     UnsafeRead = Read, | ||||
|     UnsafeWrite = Write, | ||||
|     UnsafeReadWrite = UnsafeRead | UnsafeWrite, | ||||
|     UnsafeReadCachedWrite = UnsafeReadWrite | Cached, | ||||
| }; | ||||
| 
 | ||||
| namespace { | ||||
| template <typename M, typename T, GuestMemoryFlags FLAGS> | ||||
| class GuestMemory { | ||||
|     using iterator = T*; | ||||
|     using const_iterator = const T*; | ||||
|     using value_type = T; | ||||
|     using element_type = T; | ||||
|     using iterator_category = std::contiguous_iterator_tag; | ||||
| 
 | ||||
| public: | ||||
|     GuestMemory() = delete; | ||||
|     explicit GuestMemory(M& memory, u64 addr, std::size_t size, | ||||
|                          Common::ScratchBuffer<T>* backup = nullptr) | ||||
|         : m_memory{memory}, m_addr{addr}, m_size{size} { | ||||
|         static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); | ||||
|         if constexpr (FLAGS & GuestMemoryFlags::Read) { | ||||
|             Read(addr, size, backup); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     ~GuestMemory() = default; | ||||
| 
 | ||||
|     T* data() noexcept { | ||||
|         return m_data_span.data(); | ||||
|     } | ||||
| 
 | ||||
|     const T* data() const noexcept { | ||||
|         return m_data_span.data(); | ||||
|     } | ||||
| 
 | ||||
|     size_t size() const noexcept { | ||||
|         return m_size; | ||||
|     } | ||||
| 
 | ||||
|     size_t size_bytes() const noexcept { | ||||
|         return this->size() * sizeof(T); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] T* begin() noexcept { | ||||
|         return this->data(); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] const T* begin() const noexcept { | ||||
|         return this->data(); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] T* end() noexcept { | ||||
|         return this->data() + this->size(); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] const T* end() const noexcept { | ||||
|         return this->data() + this->size(); | ||||
|     } | ||||
| 
 | ||||
|     T& operator[](size_t index) noexcept { | ||||
|         return m_data_span[index]; | ||||
|     } | ||||
| 
 | ||||
|     const T& operator[](size_t index) const noexcept { | ||||
|         return m_data_span[index]; | ||||
|     } | ||||
| 
 | ||||
|     void SetAddressAndSize(u64 addr, std::size_t size) noexcept { | ||||
|         m_addr = addr; | ||||
|         m_size = size; | ||||
|         m_addr_changed = true; | ||||
|     } | ||||
| 
 | ||||
|     std::span<T> Read(u64 addr, std::size_t size, | ||||
|                       Common::ScratchBuffer<T>* backup = nullptr) noexcept { | ||||
|         m_addr = addr; | ||||
|         m_size = size; | ||||
|         if (m_size == 0) { | ||||
|             m_is_data_copy = true; | ||||
|             return {}; | ||||
|         } | ||||
| 
 | ||||
|         if (this->TrySetSpan()) { | ||||
|             if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||
|                 m_memory.FlushRegion(m_addr, this->size_bytes()); | ||||
|             } | ||||
|         } else { | ||||
|             if (backup) { | ||||
|                 backup->resize_destructive(this->size()); | ||||
|                 m_data_span = *backup; | ||||
|             } else { | ||||
|                 m_data_copy.resize(this->size()); | ||||
|                 m_data_span = std::span(m_data_copy); | ||||
|             } | ||||
|             m_is_data_copy = true; | ||||
|             m_span_valid = true; | ||||
|             if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||
|                 m_memory.ReadBlock(m_addr, this->data(), this->size_bytes()); | ||||
|             } else { | ||||
|                 m_memory.ReadBlockUnsafe(m_addr, this->data(), this->size_bytes()); | ||||
|             } | ||||
|         } | ||||
|         return m_data_span; | ||||
|     } | ||||
| 
 | ||||
|     void Write(std::span<T> write_data) noexcept { | ||||
|         if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||||
|             m_memory.WriteBlockCached(m_addr, write_data.data(), this->size_bytes()); | ||||
|         } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||
|             m_memory.WriteBlock(m_addr, write_data.data(), this->size_bytes()); | ||||
|         } else { | ||||
|             m_memory.WriteBlockUnsafe(m_addr, write_data.data(), this->size_bytes()); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     bool TrySetSpan() noexcept { | ||||
|         if (u8* ptr = m_memory.GetSpan(m_addr, this->size_bytes()); ptr) { | ||||
|             m_data_span = {reinterpret_cast<T*>(ptr), this->size()}; | ||||
|             m_span_valid = true; | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     bool IsDataCopy() const noexcept { | ||||
|         return m_is_data_copy; | ||||
|     } | ||||
| 
 | ||||
|     bool AddressChanged() const noexcept { | ||||
|         return m_addr_changed; | ||||
|     } | ||||
| 
 | ||||
|     M& m_memory; | ||||
|     u64 m_addr{}; | ||||
|     size_t m_size{}; | ||||
|     std::span<T> m_data_span{}; | ||||
|     std::vector<T> m_data_copy{}; | ||||
|     bool m_span_valid{false}; | ||||
|     bool m_is_data_copy{false}; | ||||
|     bool m_addr_changed{false}; | ||||
| }; | ||||
| 
 | ||||
| template <typename M, typename T, GuestMemoryFlags FLAGS> | ||||
| class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> { | ||||
| public: | ||||
|     GuestMemoryScoped() = delete; | ||||
|     explicit GuestMemoryScoped(M& memory, u64 addr, std::size_t size, | ||||
|                                Common::ScratchBuffer<T>* backup = nullptr) | ||||
|         : GuestMemory<M, T, FLAGS>(memory, addr, size, backup) { | ||||
|         if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { | ||||
|             if (!this->TrySetSpan()) { | ||||
|                 if (backup) { | ||||
|                     this->m_data_span = *backup; | ||||
|                     this->m_span_valid = true; | ||||
|                     this->m_is_data_copy = true; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     ~GuestMemoryScoped() { | ||||
|         if constexpr (FLAGS & GuestMemoryFlags::Write) { | ||||
|             if (this->size() == 0) [[unlikely]] { | ||||
|                 return; | ||||
|             } | ||||
| 
 | ||||
|             if (this->AddressChanged() || this->IsDataCopy()) { | ||||
|                 ASSERT(this->m_span_valid); | ||||
|                 if constexpr (FLAGS & GuestMemoryFlags::Cached) { | ||||
|                     this->m_memory.WriteBlockCached(this->m_addr, this->data(), this->size_bytes()); | ||||
|                 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { | ||||
|                     this->m_memory.WriteBlock(this->m_addr, this->data(), this->size_bytes()); | ||||
|                 } else { | ||||
|                     this->m_memory.WriteBlockUnsafe(this->m_addr, this->data(), this->size_bytes()); | ||||
|                 } | ||||
|             } else if constexpr ((FLAGS & GuestMemoryFlags::Safe) || | ||||
|                                  (FLAGS & GuestMemoryFlags::Cached)) { | ||||
|                 this->m_memory.InvalidateRegion(this->m_addr, this->size_bytes()); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| } // namespace
 | ||||
| 
 | ||||
| template <typename T, GuestMemoryFlags FLAGS> | ||||
| using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>; | ||||
| template <typename T, GuestMemoryFlags FLAGS> | ||||
| using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>; | ||||
| template <typename T, GuestMemoryFlags FLAGS> | ||||
| using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>; | ||||
| template <typename T, GuestMemoryFlags FLAGS> | ||||
| using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>; | ||||
| } // namespace Core::Memory
 | ||||
|  |  | |||
|  | @ -95,6 +95,7 @@ add_library(video_core STATIC | |||
|     gpu.h | ||||
|     gpu_thread.cpp | ||||
|     gpu_thread.h | ||||
|     guest_memory.h | ||||
|     invalidation_accumulator.h | ||||
|     memory_manager.cpp | ||||
|     memory_manager.h | ||||
|  | @ -107,8 +108,6 @@ add_library(video_core STATIC | |||
|     query_cache/query_stream.h | ||||
|     query_cache/types.h | ||||
|     query_cache.h | ||||
|     rasterizer_accelerated.cpp | ||||
|     rasterizer_accelerated.h | ||||
|     rasterizer_interface.h | ||||
|     renderer_base.cpp | ||||
|     renderer_base.h | ||||
|  |  | |||
|  | @ -33,13 +33,12 @@ struct NullBufferParams {}; | |||
|  * | ||||
|  * The buffer size and address is forcefully aligned to CPU page boundaries. | ||||
|  */ | ||||
| template <class RasterizerInterface> | ||||
| class BufferBase { | ||||
| public: | ||||
|     static constexpr u64 BASE_PAGE_BITS = 16; | ||||
|     static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; | ||||
| 
 | ||||
|     explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) | ||||
|     explicit BufferBase(VAddr cpu_addr_, u64 size_bytes_) | ||||
|         : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} | ||||
| 
 | ||||
|     explicit BufferBase(NullBufferParams) {} | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -32,7 +32,6 @@ | |||
| #include "common/microprofile.h" | ||||
| #include "common/scope_exit.h" | ||||
| #include "common/settings.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/buffer_cache/buffer_base.h" | ||||
| #include "video_core/control/channel_state_cache.h" | ||||
| #include "video_core/delayed_destruction_ring.h" | ||||
|  | @ -41,7 +40,6 @@ | |||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/surface.h" | ||||
| #include "video_core/texture_cache/slot_vector.h" | ||||
| #include "video_core/texture_cache/types.h" | ||||
|  | @ -94,7 +92,7 @@ static constexpr BufferId NULL_BUFFER_ID{0}; | |||
| static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | ||||
| 
 | ||||
| struct Binding { | ||||
|     VAddr cpu_addr{}; | ||||
|     DAddr device_addr{}; | ||||
|     u32 size{}; | ||||
|     BufferId buffer_id; | ||||
| }; | ||||
|  | @ -104,7 +102,7 @@ struct TextureBufferBinding : Binding { | |||
| }; | ||||
| 
 | ||||
| static constexpr Binding NULL_BINDING{ | ||||
|     .cpu_addr = 0, | ||||
|     .device_addr = 0, | ||||
|     .size = 0, | ||||
|     .buffer_id = NULL_BUFFER_ID, | ||||
| }; | ||||
|  | @ -204,10 +202,10 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
|     using Async_Buffer = typename P::Async_Buffer; | ||||
|     using MemoryTracker = typename P::MemoryTracker; | ||||
| 
 | ||||
|     using IntervalCompare = std::less<VAddr>; | ||||
|     using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; | ||||
|     using IntervalAllocator = boost::fast_pool_allocator<VAddr>; | ||||
|     using IntervalSet = boost::icl::interval_set<VAddr>; | ||||
|     using IntervalCompare = std::less<DAddr>; | ||||
|     using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>; | ||||
|     using IntervalAllocator = boost::fast_pool_allocator<DAddr>; | ||||
|     using IntervalSet = boost::icl::interval_set<DAddr>; | ||||
|     using IntervalType = typename IntervalSet::interval_type; | ||||
| 
 | ||||
|     template <typename Type> | ||||
|  | @ -230,32 +228,31 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 
 | ||||
|     using OverlapCombine = counter_add_functor<int>; | ||||
|     using OverlapSection = boost::icl::inter_section<int>; | ||||
|     using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; | ||||
|     using OverlapCounter = boost::icl::split_interval_map<DAddr, int>; | ||||
| 
 | ||||
|     struct OverlapResult { | ||||
|         boost::container::small_vector<BufferId, 16> ids; | ||||
|         VAddr begin; | ||||
|         VAddr end; | ||||
|         DAddr begin; | ||||
|         DAddr end; | ||||
|         bool has_stream_leap = false; | ||||
|     }; | ||||
| 
 | ||||
| public: | ||||
|     explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||||
|                          Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | ||||
|     explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_); | ||||
| 
 | ||||
|     void TickFrame(); | ||||
| 
 | ||||
|     void WriteMemory(VAddr cpu_addr, u64 size); | ||||
|     void WriteMemory(DAddr device_addr, u64 size); | ||||
| 
 | ||||
|     void CachedWriteMemory(VAddr cpu_addr, u64 size); | ||||
|     void CachedWriteMemory(DAddr device_addr, u64 size); | ||||
| 
 | ||||
|     bool OnCPUWrite(VAddr cpu_addr, u64 size); | ||||
|     bool OnCPUWrite(DAddr device_addr, u64 size); | ||||
| 
 | ||||
|     void DownloadMemory(VAddr cpu_addr, u64 size); | ||||
|     void DownloadMemory(DAddr device_addr, u64 size); | ||||
| 
 | ||||
|     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | ||||
|     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr device_addr, u64 size); | ||||
| 
 | ||||
|     bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | ||||
|     bool InlineMemory(DAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | ||||
| 
 | ||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | ||||
| 
 | ||||
|  | @ -300,7 +297,7 @@ public: | |||
|                                                        ObtainBufferSynchronize sync_info, | ||||
|                                                        ObtainBufferOperation post_op); | ||||
| 
 | ||||
|     [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, | ||||
|     [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(DAddr gpu_addr, u32 size, | ||||
|                                                           ObtainBufferSynchronize sync_info, | ||||
|                                                           ObtainBufferOperation post_op); | ||||
|     void FlushCachedWrites(); | ||||
|  | @ -326,13 +323,13 @@ public: | |||
|     bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||||
| 
 | ||||
|     /// Return true when a CPU region is modified from the GPU
 | ||||
|     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||||
|     [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size); | ||||
| 
 | ||||
|     /// Return true when a region is registered on the cache
 | ||||
|     [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | ||||
|     [[nodiscard]] bool IsRegionRegistered(DAddr addr, size_t size); | ||||
| 
 | ||||
|     /// Return true when a CPU region is modified from the CPU
 | ||||
|     [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | ||||
|     [[nodiscard]] bool IsRegionCpuModified(DAddr addr, size_t size); | ||||
| 
 | ||||
|     void SetDrawIndirect( | ||||
|         const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | ||||
|  | @ -366,9 +363,9 @@ private: | |||
|     } | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { | ||||
|         const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); | ||||
|         for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { | ||||
|     void ForEachBufferInRange(DAddr device_addr, u64 size, Func&& func) { | ||||
|         const u64 page_end = Common::DivCeil(device_addr + size, CACHING_PAGESIZE); | ||||
|         for (u64 page = device_addr >> CACHING_PAGEBITS; page < page_end;) { | ||||
|             const BufferId buffer_id = page_table[page]; | ||||
|             if (!buffer_id) { | ||||
|                 ++page; | ||||
|  | @ -377,15 +374,15 @@ private: | |||
|             Buffer& buffer = slot_buffers[buffer_id]; | ||||
|             func(buffer_id, buffer); | ||||
| 
 | ||||
|             const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||||
|             const DAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||||
|             page = Common::DivCeil(end_addr, CACHING_PAGESIZE); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { | ||||
|         const VAddr start_address = cpu_addr; | ||||
|         const VAddr end_address = start_address + size; | ||||
|     void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) { | ||||
|         const DAddr start_address = device_addr; | ||||
|         const DAddr end_address = start_address + size; | ||||
|         const IntervalType search_interval{start_address, end_address}; | ||||
|         auto it = current_range.lower_bound(search_interval); | ||||
|         if (it == current_range.end()) { | ||||
|  | @ -393,8 +390,8 @@ private: | |||
|         } | ||||
|         auto end_it = current_range.upper_bound(search_interval); | ||||
|         for (; it != end_it; it++) { | ||||
|             VAddr inter_addr_end = it->upper(); | ||||
|             VAddr inter_addr = it->lower(); | ||||
|             DAddr inter_addr_end = it->upper(); | ||||
|             DAddr inter_addr = it->lower(); | ||||
|             if (inter_addr_end > end_address) { | ||||
|                 inter_addr_end = end_address; | ||||
|             } | ||||
|  | @ -406,10 +403,10 @@ private: | |||
|     } | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, | ||||
|     void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size, | ||||
|                                  Func&& func) { | ||||
|         const VAddr start_address = cpu_addr; | ||||
|         const VAddr end_address = start_address + size; | ||||
|         const DAddr start_address = device_addr; | ||||
|         const DAddr end_address = start_address + size; | ||||
|         const IntervalType search_interval{start_address, end_address}; | ||||
|         auto it = current_range.lower_bound(search_interval); | ||||
|         if (it == current_range.end()) { | ||||
|  | @ -418,8 +415,8 @@ private: | |||
|         auto end_it = current_range.upper_bound(search_interval); | ||||
|         for (; it != end_it; it++) { | ||||
|             auto& inter = it->first; | ||||
|             VAddr inter_addr_end = inter.upper(); | ||||
|             VAddr inter_addr = inter.lower(); | ||||
|             DAddr inter_addr_end = inter.upper(); | ||||
|             DAddr inter_addr = inter.lower(); | ||||
|             if (inter_addr_end > end_address) { | ||||
|                 inter_addr_end = end_address; | ||||
|             } | ||||
|  | @ -451,9 +448,9 @@ private: | |||
|         } while (any_removals); | ||||
|     } | ||||
| 
 | ||||
|     static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | ||||
|         return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | ||||
|                ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | ||||
|     static bool IsRangeGranular(DAddr device_addr, size_t size) { | ||||
|         return (device_addr & ~Core::Memory::YUZU_PAGEMASK) == | ||||
|                ((device_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | ||||
|     } | ||||
| 
 | ||||
|     void RunGarbageCollector(); | ||||
|  | @ -508,15 +505,15 @@ private: | |||
| 
 | ||||
|     void UpdateComputeTextureBuffers(); | ||||
| 
 | ||||
|     void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | ||||
|     void MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size); | ||||
| 
 | ||||
|     [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | ||||
|     [[nodiscard]] BufferId FindBuffer(DAddr device_addr, u32 size); | ||||
| 
 | ||||
|     [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | ||||
|     [[nodiscard]] OverlapResult ResolveOverlaps(DAddr device_addr, u32 wanted_size); | ||||
| 
 | ||||
|     void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | ||||
| 
 | ||||
|     [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | ||||
|     [[nodiscard]] BufferId CreateBuffer(DAddr device_addr, u32 wanted_size); | ||||
| 
 | ||||
|     void Register(BufferId buffer_id); | ||||
| 
 | ||||
|  | @ -527,7 +524,7 @@ private: | |||
| 
 | ||||
|     void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | ||||
| 
 | ||||
|     bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | ||||
|     bool SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size); | ||||
| 
 | ||||
|     void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||||
|                       std::span<BufferCopy> copies); | ||||
|  | @ -539,7 +536,7 @@ private: | |||
| 
 | ||||
|     void DownloadBufferMemory(Buffer& buffer_id); | ||||
| 
 | ||||
|     void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||||
|     void DownloadBufferMemory(Buffer& buffer_id, DAddr device_addr, u64 size); | ||||
| 
 | ||||
|     void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); | ||||
| 
 | ||||
|  | @ -549,7 +546,7 @@ private: | |||
|     [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | ||||
|                                                                PixelFormat format); | ||||
| 
 | ||||
|     [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | ||||
|     [[nodiscard]] std::span<const u8> ImmediateBufferWithData(DAddr device_addr, size_t size); | ||||
| 
 | ||||
|     [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | ||||
| 
 | ||||
|  | @ -557,11 +554,10 @@ private: | |||
| 
 | ||||
|     void ClearDownload(IntervalType subtract_interval); | ||||
| 
 | ||||
|     void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | ||||
|     void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, | ||||
|                                     std::span<const u8> inlined_buffer); | ||||
| 
 | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
| 
 | ||||
|     SlotVector<Buffer> slot_buffers; | ||||
|     DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||||
|  | @ -598,7 +594,7 @@ private: | |||
|     u64 critical_memory = 0; | ||||
|     BufferId inline_buffer_id; | ||||
| 
 | ||||
|     std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | ||||
|     std::array<BufferId, ((1ULL << 34) >> CACHING_PAGEBITS)> page_table; | ||||
|     Common::ScratchBuffer<u8> tmp_buffer; | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -17,19 +17,19 @@ | |||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| template <class RasterizerInterface> | ||||
| template <typename DeviceTracker> | ||||
| class MemoryTrackerBase { | ||||
|     static constexpr size_t MAX_CPU_PAGE_BITS = 39; | ||||
|     static constexpr size_t MAX_CPU_PAGE_BITS = 34; | ||||
|     static constexpr size_t HIGHER_PAGE_BITS = 22; | ||||
|     static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; | ||||
|     static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; | ||||
|     static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); | ||||
|     static constexpr size_t MANAGER_POOL_SIZE = 32; | ||||
|     static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; | ||||
|     using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; | ||||
|     using Manager = WordManager<DeviceTracker, WORDS_STACK_NEEDED>; | ||||
| 
 | ||||
| public: | ||||
|     MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} | ||||
|     MemoryTrackerBase(DeviceTracker& device_tracker_) : device_tracker{&device_tracker_} {} | ||||
|     ~MemoryTrackerBase() = default; | ||||
| 
 | ||||
|     /// Returns the inclusive CPU modified range in a begin end pair
 | ||||
|  | @ -74,7 +74,7 @@ public: | |||
|             }); | ||||
|     } | ||||
| 
 | ||||
|     /// Mark region as CPU modified, notifying the rasterizer about this change
 | ||||
|     /// Mark region as CPU modified, notifying the device_tracker about this change
 | ||||
|     void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | ||||
|         IteratePages<true>(dirty_cpu_addr, query_size, | ||||
|                            [](Manager* manager, u64 offset, size_t size) { | ||||
|  | @ -83,7 +83,7 @@ public: | |||
|                            }); | ||||
|     } | ||||
| 
 | ||||
|     /// Unmark region as CPU modified, notifying the rasterizer about this change
 | ||||
|     /// Unmark region as CPU modified, notifying the device_tracker about this change
 | ||||
|     void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | ||||
|         IteratePages<true>(dirty_cpu_addr, query_size, | ||||
|                            [](Manager* manager, u64 offset, size_t size) { | ||||
|  | @ -139,7 +139,7 @@ public: | |||
|             }); | ||||
|     } | ||||
| 
 | ||||
|     /// Flushes cached CPU writes, and notify the rasterizer about the deltas
 | ||||
|     /// Flushes cached CPU writes, and notify the device_tracker about the deltas
 | ||||
|     void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { | ||||
|         IteratePages<false>(query_cpu_addr, query_size, | ||||
|                             [](Manager* manager, [[maybe_unused]] u64 offset, | ||||
|  | @ -280,7 +280,7 @@ private: | |||
|         manager_pool.emplace_back(); | ||||
|         auto& last_pool = manager_pool.back(); | ||||
|         for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { | ||||
|             new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); | ||||
|             new (&last_pool[i]) Manager(0, *device_tracker, HIGHER_PAGE_SIZE); | ||||
|             free_managers.push_back(&last_pool[i]); | ||||
|         } | ||||
|         return on_return(); | ||||
|  | @ -293,7 +293,7 @@ private: | |||
| 
 | ||||
|     std::unordered_set<u32> cached_pages; | ||||
| 
 | ||||
|     RasterizerInterface* rasterizer = nullptr; | ||||
|     DeviceTracker* device_tracker = nullptr; | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
|  |  | |||
|  | @ -163,11 +163,11 @@ struct Words { | |||
|     WordsArray<stack_words> preflushable; | ||||
| }; | ||||
| 
 | ||||
| template <class RasterizerInterface, size_t stack_words = 1> | ||||
| template <class DeviceTracker, size_t stack_words = 1> | ||||
| class WordManager { | ||||
| public: | ||||
|     explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) | ||||
|         : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} | ||||
|     explicit WordManager(VAddr cpu_addr_, DeviceTracker& tracker_, u64 size_bytes) | ||||
|         : cpu_addr{cpu_addr_}, tracker{&tracker_}, words{size_bytes} {} | ||||
| 
 | ||||
|     explicit WordManager() = default; | ||||
| 
 | ||||
|  | @ -279,7 +279,7 @@ public: | |||
|     } | ||||
| 
 | ||||
|     /**
 | ||||
|      * Loop over each page in the given range, turn off those bits and notify the rasterizer if | ||||
|      * Loop over each page in the given range, turn off those bits and notify the tracker if | ||||
|      * needed. Call the given function on each turned off range. | ||||
|      * | ||||
|      * @param query_cpu_range Base CPU address to loop over | ||||
|  | @ -459,26 +459,26 @@ private: | |||
|     } | ||||
| 
 | ||||
|     /**
 | ||||
|      * Notify rasterizer about changes in the CPU tracking state of a word in the buffer | ||||
|      * Notify tracker about changes in the CPU tracking state of a word in the buffer | ||||
|      * | ||||
|      * @param word_index   Index to the word to notify to the rasterizer | ||||
|      * @param word_index   Index to the word to notify to the tracker | ||||
|      * @param current_bits Current state of the word | ||||
|      * @param new_bits     New state of the word | ||||
|      * | ||||
|      * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | ||||
|      * @tparam add_to_tracker True when the tracker should start tracking the new pages | ||||
|      */ | ||||
|     template <bool add_to_rasterizer> | ||||
|     template <bool add_to_tracker> | ||||
|     void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | ||||
|         u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | ||||
|         u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; | ||||
|         VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | ||||
|         IteratePages(changed_bits, [&](size_t offset, size_t size) { | ||||
|             rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, | ||||
|                                                size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); | ||||
|             tracker->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, | ||||
|                                                size * BYTES_PER_PAGE, add_to_tracker ? 1 : -1); | ||||
|         }); | ||||
|     } | ||||
| 
 | ||||
|     VAddr cpu_addr = 0; | ||||
|     RasterizerInterface* rasterizer = nullptr; | ||||
|     DeviceTracker* tracker = nullptr; | ||||
|     Words<stack_words> words; | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -5,10 +5,10 @@ | |||
| #include "common/microprofile.h" | ||||
| #include "common/settings.h" | ||||
| #include "core/core.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/dma_pusher.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/guest_memory.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| 
 | ||||
| namespace Tegra { | ||||
|  | @ -85,15 +85,15 @@ bool DmaPusher::Step() { | |||
|             } | ||||
|         } | ||||
|         const auto safe_process = [&] { | ||||
|             Core::Memory::GpuGuestMemory<Tegra::CommandHeader, | ||||
|                                          Core::Memory::GuestMemoryFlags::SafeRead> | ||||
|             Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, | ||||
|                                          Tegra::Memory::GuestMemoryFlags::SafeRead> | ||||
|                 headers(memory_manager, dma_state.dma_get, command_list_header.size, | ||||
|                         &command_headers); | ||||
|             ProcessCommands(headers); | ||||
|         }; | ||||
|         const auto unsafe_process = [&] { | ||||
|             Core::Memory::GpuGuestMemory<Tegra::CommandHeader, | ||||
|                                          Core::Memory::GuestMemoryFlags::UnsafeRead> | ||||
|             Tegra::Memory::GpuGuestMemory<Tegra::CommandHeader, | ||||
|                                          Tegra::Memory::GuestMemoryFlags::UnsafeRead> | ||||
|                 headers(memory_manager, dma_state.dma_get, command_list_header.size, | ||||
|                         &command_headers); | ||||
|             ProcessCommands(headers); | ||||
|  |  | |||
|  | @ -5,8 +5,8 @@ | |||
| 
 | ||||
| #include "common/algorithm.h" | ||||
| #include "common/assert.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/engines/engine_upload.h" | ||||
| #include "video_core/guest_memory.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
|  | @ -68,7 +68,8 @@ void State::ProcessData(std::span<const u8> read_buffer) { | |||
|             true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, | ||||
|             regs.dest.BlockHeight(), regs.dest.BlockDepth()); | ||||
| 
 | ||||
|         Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||
|         Tegra::Memory::GpuGuestMemoryScoped<u8, | ||||
|                                             Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||
|             tmp(memory_manager, address, dst_size, &tmp_buffer); | ||||
| 
 | ||||
|         Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ | |||
| #include "core/memory.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/engines/maxwell_dma.h" | ||||
| #include "video_core/guest_memory.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
|  | @ -133,8 +134,8 @@ void MaxwellDMA::Launch() { | |||
|                 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | ||||
|                 read_buffer.resize_destructive(16); | ||||
|                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | ||||
|                     Core::Memory::GpuGuestMemoryScoped< | ||||
|                         u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||
|                     Tegra::Memory::GpuGuestMemoryScoped< | ||||
|                         u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||
|                         tmp_write_buffer(memory_manager, | ||||
|                                          convert_linear_2_blocklinear_addr(regs.offset_in + offset), | ||||
|                                          16, &read_buffer); | ||||
|  | @ -146,16 +147,16 @@ void MaxwellDMA::Launch() { | |||
|                 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | ||||
|                 read_buffer.resize_destructive(16); | ||||
|                 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | ||||
|                     Core::Memory::GpuGuestMemoryScoped< | ||||
|                         u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||
|                     Tegra::Memory::GpuGuestMemoryScoped< | ||||
|                         u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||
|                         tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); | ||||
|                     tmp_write_buffer.SetAddressAndSize( | ||||
|                         convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); | ||||
|                 } | ||||
|             } else { | ||||
|                 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | ||||
|                     Core::Memory::GpuGuestMemoryScoped< | ||||
|                         u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||
|                     Tegra::Memory::GpuGuestMemoryScoped< | ||||
|                         u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||
|                         tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, | ||||
|                                          &read_buffer); | ||||
|                     tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); | ||||
|  | @ -226,9 +227,9 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 
 | ||||
|     const size_t dst_size = dst_operand.pitch * regs.line_count; | ||||
| 
 | ||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||
|     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||
|         memory_manager, src_operand.address, src_size, &read_buffer); | ||||
|     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> | ||||
|     Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> | ||||
|         tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); | ||||
| 
 | ||||
|     UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, | ||||
|  | @ -290,9 +291,9 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 
 | ||||
|     GPUVAddr src_addr = regs.offset_in; | ||||
|     GPUVAddr dst_addr = regs.offset_out; | ||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||
|     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||
|         memory_manager, src_addr, src_size, &read_buffer); | ||||
|     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> | ||||
|     Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::UnsafeReadCachedWrite> | ||||
|         tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); | ||||
| 
 | ||||
|     //  If the input is linear and the output is tiled, swizzle the input and copy it over.
 | ||||
|  | @ -344,9 +345,9 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { | |||
| 
 | ||||
|     intermediate_buffer.resize_destructive(mid_buffer_size); | ||||
| 
 | ||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||
|     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | ||||
|         memory_manager, regs.offset_in, src_size, &read_buffer); | ||||
|     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||
|     Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||||
|         tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); | ||||
| 
 | ||||
|     UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ | |||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/surface.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
| #include "video_core/guest_memory.h" | ||||
| 
 | ||||
| namespace Tegra { | ||||
| class MemoryManager; | ||||
|  | @ -160,7 +161,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | |||
|     const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); | ||||
|     const size_t src_size = get_surface_size(src, src_bytes_per_pixel); | ||||
| 
 | ||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( | ||||
|     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( | ||||
|         memory_manager, src.Address(), src_size, &impl->tmp_buffer); | ||||
| 
 | ||||
|     const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; | ||||
|  | @ -220,7 +221,7 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | |||
|     } | ||||
| 
 | ||||
|     const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); | ||||
|     Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite> | ||||
|     Tegra::Memory::GpuGuestMemoryScoped<u8, Tegra::Memory::GuestMemoryFlags::SafeReadWrite> | ||||
|         tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); | ||||
| 
 | ||||
|     if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { | ||||
|  |  | |||
|  | @ -14,7 +14,7 @@ namespace Tegra { | |||
|  * Struct describing framebuffer configuration | ||||
|  */ | ||||
| struct FramebufferConfig { | ||||
|     VAddr address{}; | ||||
|     DAddr address{}; | ||||
|     u32 offset{}; | ||||
|     u32 width{}; | ||||
|     u32 height{}; | ||||
|  |  | |||
|  | @ -34,6 +34,8 @@ | |||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/shader_notify.h" | ||||
| 
 | ||||
| #pragma optimize("", off) | ||||
| 
 | ||||
| namespace Tegra { | ||||
| 
 | ||||
| struct GPU::Impl { | ||||
|  | @ -95,8 +97,8 @@ struct GPU::Impl { | |||
| 
 | ||||
|     /// Synchronizes CPU writes with Host GPU memory.
 | ||||
|     void InvalidateGPUCache() { | ||||
|         std::function<void(VAddr, size_t)> callback_writes( | ||||
|             [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); | ||||
|         std::function<void(PAddr, size_t)> callback_writes( | ||||
|             [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); | ||||
|         system.GatherGPUDirtyMemory(callback_writes); | ||||
|     } | ||||
| 
 | ||||
|  | @ -279,11 +281,11 @@ struct GPU::Impl { | |||
|     } | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     void FlushRegion(VAddr addr, u64 size) { | ||||
|     void FlushRegion(DAddr addr, u64 size) { | ||||
|         gpu_thread.FlushRegion(addr, size); | ||||
|     } | ||||
| 
 | ||||
|     VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size) { | ||||
|     VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) { | ||||
|         auto raster_area = rasterizer->GetFlushArea(addr, size); | ||||
|         if (raster_area.preemtive) { | ||||
|             return raster_area; | ||||
|  | @ -299,16 +301,16 @@ struct GPU::Impl { | |||
|     } | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||
|     void InvalidateRegion(VAddr addr, u64 size) { | ||||
|     void InvalidateRegion(DAddr addr, u64 size) { | ||||
|         gpu_thread.InvalidateRegion(addr, size); | ||||
|     } | ||||
| 
 | ||||
|     bool OnCPUWrite(VAddr addr, u64 size) { | ||||
|     bool OnCPUWrite(DAddr addr, u64 size) { | ||||
|         return rasterizer->OnCPUWrite(addr, size); | ||||
|     } | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     void FlushAndInvalidateRegion(DAddr addr, u64 size) { | ||||
|         gpu_thread.FlushAndInvalidateRegion(addr, size); | ||||
|     } | ||||
| 
 | ||||
|  | @ -437,7 +439,7 @@ void GPU::OnCommandListEnd() { | |||
|     impl->OnCommandListEnd(); | ||||
| } | ||||
| 
 | ||||
| u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | ||||
| u64 GPU::RequestFlush(DAddr addr, std::size_t size) { | ||||
|     return impl->RequestSyncOperation( | ||||
|         [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); | ||||
| } | ||||
|  | @ -557,23 +559,23 @@ void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
|     impl->SwapBuffers(framebuffer); | ||||
| } | ||||
| 
 | ||||
| VideoCore::RasterizerDownloadArea GPU::OnCPURead(VAddr addr, u64 size) { | ||||
| VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) { | ||||
|     return impl->OnCPURead(addr, size); | ||||
| } | ||||
| 
 | ||||
| void GPU::FlushRegion(VAddr addr, u64 size) { | ||||
| void GPU::FlushRegion(DAddr addr, u64 size) { | ||||
|     impl->FlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void GPU::InvalidateRegion(VAddr addr, u64 size) { | ||||
| void GPU::InvalidateRegion(DAddr addr, u64 size) { | ||||
|     impl->InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| bool GPU::OnCPUWrite(VAddr addr, u64 size) { | ||||
| bool GPU::OnCPUWrite(DAddr addr, u64 size) { | ||||
|     return impl->OnCPUWrite(addr, size); | ||||
| } | ||||
| 
 | ||||
| void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
| void GPU::FlushAndInvalidateRegion(DAddr addr, u64 size) { | ||||
|     impl->FlushAndInvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -158,7 +158,7 @@ public: | |||
|     void InitAddressSpace(Tegra::MemoryManager& memory_manager); | ||||
| 
 | ||||
|     /// Request a host GPU memory flush from the CPU.
 | ||||
|     [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | ||||
|     [[nodiscard]] u64 RequestFlush(DAddr addr, std::size_t size); | ||||
| 
 | ||||
|     /// Obtains current flush request fence id.
 | ||||
|     [[nodiscard]] u64 CurrentSyncRequestFence() const; | ||||
|  | @ -242,20 +242,20 @@ public: | |||
|     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(VAddr addr, u64 size); | ||||
|     [[nodiscard]] VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size); | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     void FlushRegion(VAddr addr, u64 size); | ||||
|     void FlushRegion(DAddr addr, u64 size); | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||
|     void InvalidateRegion(VAddr addr, u64 size); | ||||
|     void InvalidateRegion(DAddr addr, u64 size); | ||||
| 
 | ||||
|     /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
 | ||||
|     /// sensible, false otherwise
 | ||||
|     bool OnCPUWrite(VAddr addr, u64 size); | ||||
|     bool OnCPUWrite(DAddr addr, u64 size); | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||||
|     void FlushAndInvalidateRegion(DAddr addr, u64 size); | ||||
| 
 | ||||
| private: | ||||
|     struct Impl; | ||||
|  |  | |||
|  | @ -82,7 +82,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
|     PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::FlushRegion(VAddr addr, u64 size) { | ||||
| void ThreadManager::FlushRegion(DAddr addr, u64 size) { | ||||
|     if (!is_async) { | ||||
|         // Always flush with synchronous GPU mode
 | ||||
|         PushCommand(FlushRegionCommand(addr, size)); | ||||
|  | @ -101,11 +101,11 @@ void ThreadManager::TickGPU() { | |||
|     PushCommand(GPUTickCommand()); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||||
| void ThreadManager::InvalidateRegion(DAddr addr, u64 size) { | ||||
|     rasterizer->OnCacheInvalidation(addr, size); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
| void ThreadManager::FlushAndInvalidateRegion(DAddr addr, u64 size) { | ||||
|     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 | ||||
|     rasterizer->OnCacheInvalidation(addr, size); | ||||
| } | ||||
|  |  | |||
|  | @ -54,26 +54,26 @@ struct SwapBuffersCommand final { | |||
| 
 | ||||
| /// Command to signal to the GPU thread to flush a region
 | ||||
| struct FlushRegionCommand final { | ||||
|     explicit constexpr FlushRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} | ||||
|     explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} | ||||
| 
 | ||||
|     VAddr addr; | ||||
|     DAddr addr; | ||||
|     u64 size; | ||||
| }; | ||||
| 
 | ||||
| /// Command to signal to the GPU thread to invalidate a region
 | ||||
| struct InvalidateRegionCommand final { | ||||
|     explicit constexpr InvalidateRegionCommand(VAddr addr_, u64 size_) : addr{addr_}, size{size_} {} | ||||
|     explicit constexpr InvalidateRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} | ||||
| 
 | ||||
|     VAddr addr; | ||||
|     DAddr addr; | ||||
|     u64 size; | ||||
| }; | ||||
| 
 | ||||
| /// Command to signal to the GPU thread to flush and invalidate a region
 | ||||
| struct FlushAndInvalidateRegionCommand final { | ||||
|     explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr_, u64 size_) | ||||
|     explicit constexpr FlushAndInvalidateRegionCommand(DAddr addr_, u64 size_) | ||||
|         : addr{addr_}, size{size_} {} | ||||
| 
 | ||||
|     VAddr addr; | ||||
|     DAddr addr; | ||||
|     u64 size; | ||||
| }; | ||||
| 
 | ||||
|  | @ -122,13 +122,13 @@ public: | |||
|     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     void FlushRegion(VAddr addr, u64 size); | ||||
|     void FlushRegion(DAddr addr, u64 size); | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||
|     void InvalidateRegion(VAddr addr, u64 size); | ||||
|     void InvalidateRegion(DAddr addr, u64 size); | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||||
|     void FlushAndInvalidateRegion(DAddr addr, u64 size); | ||||
| 
 | ||||
|     void TickGPU(); | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										29
									
								
								src/video_core/guest_memory.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								src/video_core/guest_memory.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,29 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <iterator> | ||||
| #include <memory> | ||||
| #include <optional> | ||||
| #include <span> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "common/scratch_buffer.h" | ||||
| #include "core/guest_memory.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| 
 | ||||
| namespace Tegra::Memory { | ||||
| 
 | ||||
| using GuestMemoryFlags = Core::Memory::GuestMemoryFlags; | ||||
| 
 | ||||
| template <typename T, GuestMemoryFlags FLAGS> | ||||
| using DeviceGuestMemory = Core::Memory::GuestMemory<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>; | ||||
| template <typename T, GuestMemoryFlags FLAGS> | ||||
| using DeviceGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MaxwellDeviceMemoryManager, T, FLAGS>; | ||||
| template <typename T, GuestMemoryFlags FLAGS> | ||||
| using GpuGuestMemory = Core::Memory::GuestMemory<Tegra::MemoryManager, T, FLAGS>; | ||||
| template <typename T, GuestMemoryFlags FLAGS> | ||||
| using GpuGuestMemoryScoped = Core::Memory::GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>; | ||||
| 
 | ||||
| } // namespace Tegra::Memory
 | ||||
|  | @ -1,6 +1,8 @@ | |||
| // SPDX-FileCopyrightText: 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "core/device_memory_manager.h" | ||||
| 
 | ||||
| namespace VideoCore { | ||||
|  | @ -12,8 +14,8 @@ namespace Tegra { | |||
| struct MaxwellDeviceMethods; | ||||
| 
 | ||||
| struct MaxwellDeviceTraits { | ||||
|     static constexpr bool supports_pinning = true; | ||||
|     static constexpr size_t device_virtual_bits = 34; | ||||
|     static constexpr bool supports_pinning = false; | ||||
|     static constexpr size_t device_virtual_bits = 32; | ||||
|     using DeviceInterface = typename VideoCore::RasterizerInterface; | ||||
|     using DeviceMethods = typename MaxwellDeviceMethods; | ||||
| }; | ||||
|  |  | |||
|  | @ -7,22 +7,24 @@ | |||
| #include "common/assert.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "core/core.h" | ||||
| #include "core/device_memory.h" | ||||
| #include "core/hle/kernel/k_page_table.h" | ||||
| #include "core/hle/kernel/k_process.h" | ||||
| #include "video_core/guest_memory.h" | ||||
| #include "video_core/host1x/host1x.h" | ||||
| #include "video_core/invalidation_accumulator.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| 
 | ||||
| 
 | ||||
| namespace Tegra { | ||||
| using Core::Memory::GuestMemoryFlags; | ||||
| using Tegra::Memory::GuestMemoryFlags; | ||||
| 
 | ||||
| std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | ||||
| 
 | ||||
| MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, | ||||
|                              u64 page_bits_) | ||||
|     : system{system_}, memory{system.ApplicationMemory()}, device_memory{system.DeviceMemory()}, | ||||
|     : system{system_}, memory{system.Host1x().MemoryManager()}, | ||||
|       address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, | ||||
|       entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, | ||||
|                                            page_bits != big_page_bits ? page_bits : 0}, | ||||
|  | @ -42,7 +44,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 | |||
|     big_page_table_mask = big_page_table_size - 1; | ||||
| 
 | ||||
|     big_entries.resize(big_page_table_size / 32, 0); | ||||
|     big_page_table_cpu.resize(big_page_table_size); | ||||
|     big_page_table_dev.resize(big_page_table_size); | ||||
|     big_page_continuous.resize(big_page_table_size / continuous_bits, 0); | ||||
|     entries.resize(page_table_size / 32, 0); | ||||
| } | ||||
|  | @ -100,7 +102,7 @@ inline void MemoryManager::SetBigPageContinuous(size_t big_page_index, bool valu | |||
| } | ||||
| 
 | ||||
| template <MemoryManager::EntryType entry_type> | ||||
| GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, | ||||
| GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, | ||||
|                                     PTEKind kind) { | ||||
|     [[maybe_unused]] u64 remaining_size{size}; | ||||
|     if constexpr (entry_type == EntryType::Mapped) { | ||||
|  | @ -114,9 +116,9 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||
|             rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); | ||||
|         } | ||||
|         if constexpr (entry_type == EntryType::Mapped) { | ||||
|             const VAddr current_cpu_addr = cpu_addr + offset; | ||||
|             const DAddr current_dev_addr = dev_addr + offset; | ||||
|             const auto index = PageEntryIndex<false>(current_gpu_addr); | ||||
|             const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); | ||||
|             const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits); | ||||
|             page_table[index] = sub_value; | ||||
|         } | ||||
|         remaining_size -= page_size; | ||||
|  | @ -126,7 +128,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||
| } | ||||
| 
 | ||||
| template <MemoryManager::EntryType entry_type> | ||||
| GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, | ||||
| GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, | ||||
|                                        size_t size, PTEKind kind) { | ||||
|     [[maybe_unused]] u64 remaining_size{size}; | ||||
|     for (u64 offset{}; offset < size; offset += big_page_size) { | ||||
|  | @ -137,20 +139,20 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr | |||
|             rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); | ||||
|         } | ||||
|         if constexpr (entry_type == EntryType::Mapped) { | ||||
|             const VAddr current_cpu_addr = cpu_addr + offset; | ||||
|             const DAddr current_dev_addr = dev_addr + offset; | ||||
|             const auto index = PageEntryIndex<true>(current_gpu_addr); | ||||
|             const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); | ||||
|             big_page_table_cpu[index] = sub_value; | ||||
|             const u32 sub_value = static_cast<u32>(current_dev_addr >> cpu_page_bits); | ||||
|             big_page_table_dev[index] = sub_value; | ||||
|             const bool is_continuous = ([&] { | ||||
|                 uintptr_t base_ptr{ | ||||
|                     reinterpret_cast<uintptr_t>(memory.GetPointerSilent(current_cpu_addr))}; | ||||
|                     reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(current_dev_addr))}; | ||||
|                 if (base_ptr == 0) { | ||||
|                     return false; | ||||
|                 } | ||||
|                 for (VAddr start_cpu = current_cpu_addr + page_size; | ||||
|                      start_cpu < current_cpu_addr + big_page_size; start_cpu += page_size) { | ||||
|                 for (DAddr start_cpu = current_dev_addr + page_size; | ||||
|                      start_cpu < current_dev_addr + big_page_size; start_cpu += page_size) { | ||||
|                     base_ptr += page_size; | ||||
|                     auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointerSilent(start_cpu)); | ||||
|                     auto next_ptr = reinterpret_cast<uintptr_t>(memory.GetPointer<u8>(start_cpu)); | ||||
|                     if (next_ptr == 0 || base_ptr != next_ptr) { | ||||
|                         return false; | ||||
|                     } | ||||
|  | @ -172,12 +174,12 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) | |||
|     rasterizer = rasterizer_; | ||||
| } | ||||
| 
 | ||||
| GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind, | ||||
| GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size, PTEKind kind, | ||||
|                             bool is_big_pages) { | ||||
|     if (is_big_pages) [[likely]] { | ||||
|         return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); | ||||
|         return BigPageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind); | ||||
|     } | ||||
|     return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); | ||||
|     return PageTableOp<EntryType::Mapped>(gpu_addr, dev_addr, size, kind); | ||||
| } | ||||
| 
 | ||||
| GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { | ||||
|  | @ -202,7 +204,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
|     PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); | ||||
| } | ||||
| 
 | ||||
| std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | ||||
| std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | ||||
|     if (!IsWithinGPUAddressRange(gpu_addr)) [[unlikely]] { | ||||
|         return std::nullopt; | ||||
|     } | ||||
|  | @ -211,17 +213,17 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | |||
|             return std::nullopt; | ||||
|         } | ||||
| 
 | ||||
|         const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) | ||||
|         const DAddr dev_addr_base = static_cast<DAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) | ||||
|                                     << cpu_page_bits; | ||||
|         return cpu_addr_base + (gpu_addr & page_mask); | ||||
|         return dev_addr_base + (gpu_addr & page_mask); | ||||
|     } | ||||
| 
 | ||||
|     const VAddr cpu_addr_base = | ||||
|         static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; | ||||
|     return cpu_addr_base + (gpu_addr & big_page_mask); | ||||
|     const DAddr dev_addr_base = | ||||
|         static_cast<DAddr>(big_page_table_dev[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; | ||||
|     return dev_addr_base + (gpu_addr & big_page_mask); | ||||
| } | ||||
| 
 | ||||
| std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | ||||
| std::optional<DAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | ||||
|     size_t page_index{addr >> page_bits}; | ||||
|     const size_t page_last{(addr + size + page_size - 1) >> page_bits}; | ||||
|     while (page_index < page_last) { | ||||
|  | @ -274,7 +276,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     return memory.GetPointer(*address); | ||||
|     return memory.GetPointer<u8>(*address); | ||||
| } | ||||
| 
 | ||||
| const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | ||||
|  | @ -283,7 +285,7 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     return memory.GetPointer(*address); | ||||
|     return memory.GetPointer<u8>(*address); | ||||
| } | ||||
| 
 | ||||
| #ifdef _MSC_VER // no need for gcc / clang but msvc's compiler is more conservative with inlining.
 | ||||
|  | @ -367,25 +369,25 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: | |||
|         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||||
|     }; | ||||
|     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         if constexpr (is_safe) { | ||||
|             rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | ||||
|             rasterizer->FlushRegion(dev_addr_base, copy_amount, which); | ||||
|         } | ||||
|         u8* physical = memory.GetPointer(cpu_addr_base); | ||||
|         u8* physical = memory.GetPointer<u8>(dev_addr_base); | ||||
|         std::memcpy(dest_buffer, physical, copy_amount); | ||||
|         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||||
|     }; | ||||
|     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||
|         if constexpr (is_safe) { | ||||
|             rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | ||||
|             rasterizer->FlushRegion(dev_addr_base, copy_amount, which); | ||||
|         } | ||||
|         if (!IsBigPageContinuous(page_index)) [[unlikely]] { | ||||
|             memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); | ||||
|             memory.ReadBlockUnsafe(dev_addr_base, dest_buffer, copy_amount); | ||||
|         } else { | ||||
|             u8* physical = memory.GetPointer(cpu_addr_base); | ||||
|             u8* physical = memory.GetPointer<u8>(dev_addr_base); | ||||
|             std::memcpy(dest_buffer, physical, copy_amount); | ||||
|         } | ||||
|         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||||
|  | @ -416,25 +418,25 @@ void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffe | |||
|         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||||
|     }; | ||||
|     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         if constexpr (is_safe) { | ||||
|             rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | ||||
|             rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); | ||||
|         } | ||||
|         u8* physical = memory.GetPointer(cpu_addr_base); | ||||
|         u8* physical = memory.GetPointer<u8>(dev_addr_base); | ||||
|         std::memcpy(physical, src_buffer, copy_amount); | ||||
|         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||||
|     }; | ||||
|     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||
|         if constexpr (is_safe) { | ||||
|             rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | ||||
|             rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); | ||||
|         } | ||||
|         if (!IsBigPageContinuous(page_index)) [[unlikely]] { | ||||
|             memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); | ||||
|             memory.WriteBlockUnsafe(dev_addr_base, src_buffer, copy_amount); | ||||
|         } else { | ||||
|             u8* physical = memory.GetPointer(cpu_addr_base); | ||||
|             u8* physical = memory.GetPointer<u8>(dev_addr_base); | ||||
|             std::memcpy(physical, src_buffer, copy_amount); | ||||
|         } | ||||
|         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||||
|  | @ -470,14 +472,14 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, | |||
|                           [[maybe_unused]] std::size_t copy_amount) {}; | ||||
| 
 | ||||
|     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         rasterizer->FlushRegion(dev_addr_base, copy_amount, which); | ||||
|     }; | ||||
|     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||
|         rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||
|         rasterizer->FlushRegion(dev_addr_base, copy_amount, which); | ||||
|     }; | ||||
|     auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||
|                                  std::size_t copy_amount) { | ||||
|  | @ -495,15 +497,15 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, | |||
|                           [[maybe_unused]] std::size_t copy_amount) { return false; }; | ||||
| 
 | ||||
|     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which); | ||||
|         return result; | ||||
|     }; | ||||
|     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||
|         result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount, which); | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||
|         result |= rasterizer->MustFlushRegion(dev_addr_base, copy_amount, which); | ||||
|         return result; | ||||
|     }; | ||||
|     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||
|  | @ -517,7 +519,7 @@ bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size, | |||
| } | ||||
| 
 | ||||
| size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | ||||
|     std::optional<VAddr> old_page_addr{}; | ||||
|     std::optional<DAddr> old_page_addr{}; | ||||
|     size_t range_so_far = 0; | ||||
|     bool result{false}; | ||||
|     auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, | ||||
|  | @ -526,24 +528,24 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const { | |||
|         return true; | ||||
|     }; | ||||
|     auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         if (old_page_addr && *old_page_addr != cpu_addr_base) { | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         if (old_page_addr && *old_page_addr != dev_addr_base) { | ||||
|             result = true; | ||||
|             return true; | ||||
|         } | ||||
|         range_so_far += copy_amount; | ||||
|         old_page_addr = {cpu_addr_base + copy_amount}; | ||||
|         old_page_addr = {dev_addr_base + copy_amount}; | ||||
|         return false; | ||||
|     }; | ||||
|     auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||
|         if (old_page_addr && *old_page_addr != cpu_addr_base) { | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||
|         if (old_page_addr && *old_page_addr != dev_addr_base) { | ||||
|             return true; | ||||
|         } | ||||
|         range_so_far += copy_amount; | ||||
|         old_page_addr = {cpu_addr_base + copy_amount}; | ||||
|         old_page_addr = {dev_addr_base + copy_amount}; | ||||
|         return false; | ||||
|     }; | ||||
|     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||
|  | @ -568,14 +570,14 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | |||
|                           [[maybe_unused]] std::size_t copy_amount) {}; | ||||
| 
 | ||||
|     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); | ||||
|     }; | ||||
|     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||
|         rasterizer->InvalidateRegion(cpu_addr_base, copy_amount, which); | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||
|         rasterizer->InvalidateRegion(dev_addr_base, copy_amount, which); | ||||
|     }; | ||||
|     auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||
|                                       std::size_t copy_amount) { | ||||
|  | @ -587,7 +589,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | |||
| 
 | ||||
| void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, | ||||
|                               VideoCommon::CacheType which) { | ||||
|     Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( | ||||
|     Tegra::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( | ||||
|         *this, gpu_src_addr, size); | ||||
|     data.SetAddressAndSize(gpu_dest_addr, size); | ||||
|     FlushRegion(gpu_dest_addr, size, which); | ||||
|  | @ -611,7 +613,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | |||
| } | ||||
| 
 | ||||
| bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const { | ||||
|     std::optional<VAddr> old_page_addr{}; | ||||
|     std::optional<DAddr> old_page_addr{}; | ||||
|     bool result{true}; | ||||
|     auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, | ||||
|                     std::size_t copy_amount) { | ||||
|  | @ -619,23 +621,23 @@ bool MemoryManager::IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const | |||
|         return true; | ||||
|     }; | ||||
|     auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         if (old_page_addr && *old_page_addr != cpu_addr_base) { | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         if (old_page_addr && *old_page_addr != dev_addr_base) { | ||||
|             result = false; | ||||
|             return true; | ||||
|         } | ||||
|         old_page_addr = {cpu_addr_base + copy_amount}; | ||||
|         old_page_addr = {dev_addr_base + copy_amount}; | ||||
|         return false; | ||||
|     }; | ||||
|     auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||
|         if (old_page_addr && *old_page_addr != cpu_addr_base) { | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||
|         if (old_page_addr && *old_page_addr != dev_addr_base) { | ||||
|             result = false; | ||||
|             return true; | ||||
|         } | ||||
|         old_page_addr = {cpu_addr_base + copy_amount}; | ||||
|         old_page_addr = {dev_addr_base + copy_amount}; | ||||
|         return false; | ||||
|     }; | ||||
|     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||
|  | @ -678,11 +680,11 @@ template <bool is_gpu_address> | |||
| void MemoryManager::GetSubmappedRangeImpl( | ||||
|     GPUVAddr gpu_addr, std::size_t size, | ||||
|     boost::container::small_vector< | ||||
|         std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) | ||||
|         std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>& result) | ||||
|     const { | ||||
|     std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> | ||||
|     std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>> | ||||
|         last_segment{}; | ||||
|     std::optional<VAddr> old_page_addr{}; | ||||
|     std::optional<DAddr> old_page_addr{}; | ||||
|     const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, | ||||
|                                                 [[maybe_unused]] std::size_t offset, | ||||
|                                                 [[maybe_unused]] std::size_t copy_amount) { | ||||
|  | @ -694,20 +696,20 @@ void MemoryManager::GetSubmappedRangeImpl( | |||
|     const auto extend_size_big = [this, &split, &old_page_addr, | ||||
|                                   &last_segment](std::size_t page_index, std::size_t offset, | ||||
|                                                  std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(big_page_table_dev[page_index]) << cpu_page_bits) + offset; | ||||
|         if (old_page_addr) { | ||||
|             if (*old_page_addr != cpu_addr_base) { | ||||
|             if (*old_page_addr != dev_addr_base) { | ||||
|                 split(0, 0, 0); | ||||
|             } | ||||
|         } | ||||
|         old_page_addr = {cpu_addr_base + copy_amount}; | ||||
|         old_page_addr = {dev_addr_base + copy_amount}; | ||||
|         if (!last_segment) { | ||||
|             if constexpr (is_gpu_address) { | ||||
|                 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; | ||||
|                 last_segment = {new_base_addr, copy_amount}; | ||||
|             } else { | ||||
|                 last_segment = {cpu_addr_base, copy_amount}; | ||||
|                 last_segment = {dev_addr_base, copy_amount}; | ||||
|             } | ||||
|         } else { | ||||
|             last_segment->second += copy_amount; | ||||
|  | @ -716,20 +718,20 @@ void MemoryManager::GetSubmappedRangeImpl( | |||
|     const auto extend_size_short = [this, &split, &old_page_addr, | ||||
|                                     &last_segment](std::size_t page_index, std::size_t offset, | ||||
|                                                    std::size_t copy_amount) { | ||||
|         const VAddr cpu_addr_base = | ||||
|             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         const DAddr dev_addr_base = | ||||
|             (static_cast<DAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||
|         if (old_page_addr) { | ||||
|             if (*old_page_addr != cpu_addr_base) { | ||||
|             if (*old_page_addr != dev_addr_base) { | ||||
|                 split(0, 0, 0); | ||||
|             } | ||||
|         } | ||||
|         old_page_addr = {cpu_addr_base + copy_amount}; | ||||
|         old_page_addr = {dev_addr_base + copy_amount}; | ||||
|         if (!last_segment) { | ||||
|             if constexpr (is_gpu_address) { | ||||
|                 const GPUVAddr new_base_addr = (page_index << page_bits) + offset; | ||||
|                 last_segment = {new_base_addr, copy_amount}; | ||||
|             } else { | ||||
|                 last_segment = {cpu_addr_base, copy_amount}; | ||||
|                 last_segment = {dev_addr_base, copy_amount}; | ||||
|             } | ||||
|         } else { | ||||
|             last_segment->second += copy_amount; | ||||
|  | @ -756,9 +758,9 @@ void MemoryManager::FlushCaching() { | |||
| } | ||||
| 
 | ||||
| const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { | ||||
|     auto cpu_addr = GpuToCpuAddress(src_addr); | ||||
|     if (cpu_addr) { | ||||
|         return memory.GetSpan(*cpu_addr, size); | ||||
|     auto dev_addr = GpuToCpuAddress(src_addr); | ||||
|     if (dev_addr) { | ||||
|         return memory.GetSpan(*dev_addr, size); | ||||
|     } | ||||
|     return nullptr; | ||||
| } | ||||
|  | @ -767,9 +769,9 @@ u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) { | |||
|     if (!IsContinuousRange(src_addr, size)) { | ||||
|         return nullptr; | ||||
|     } | ||||
|     auto cpu_addr = GpuToCpuAddress(src_addr); | ||||
|     if (cpu_addr) { | ||||
|         return memory.GetSpan(*cpu_addr, size); | ||||
|     auto dev_addr = GpuToCpuAddress(src_addr); | ||||
|     if (dev_addr) { | ||||
|         return memory.GetSpan(*dev_addr, size); | ||||
|     } | ||||
|     return nullptr; | ||||
| } | ||||
|  |  | |||
|  | @ -17,6 +17,7 @@ | |||
| #include "common/virtual_buffer.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/cache_types.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| #include "video_core/pte_kind.h" | ||||
| 
 | ||||
| namespace VideoCore { | ||||
|  | @ -28,10 +29,6 @@ class InvalidationAccumulator; | |||
| } | ||||
| 
 | ||||
| namespace Core { | ||||
| class DeviceMemory; | ||||
| namespace Memory { | ||||
| class Memory; | ||||
| } // namespace Memory
 | ||||
| class System; | ||||
| } // namespace Core
 | ||||
| 
 | ||||
|  | @ -50,9 +47,9 @@ public: | |||
|     /// Binds a renderer to the memory manager.
 | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||
| 
 | ||||
|     [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; | ||||
|     [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr) const; | ||||
| 
 | ||||
|     [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; | ||||
|     [[nodiscard]] std::optional<DAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; | ||||
| 
 | ||||
|     template <typename T> | ||||
|     [[nodiscard]] T Read(GPUVAddr addr) const; | ||||
|  | @ -110,7 +107,7 @@ public: | |||
|     [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; | ||||
| 
 | ||||
|     /**
 | ||||
|      * Checks if a gpu region is mapped by a single range of cpu addresses. | ||||
|      * Checks if a gpu region is mapped by a single range of device addresses. | ||||
|      */ | ||||
|     [[nodiscard]] bool IsContinuousRange(GPUVAddr gpu_addr, std::size_t size) const; | ||||
| 
 | ||||
|  | @ -120,14 +117,14 @@ public: | |||
|     [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; | ||||
| 
 | ||||
|     /**
 | ||||
|      * Returns a vector with all the subranges of cpu addresses mapped beneath. | ||||
|      * Returns a vector with all the subranges of device addresses mapped beneath. | ||||
|      * if the region is continuous, a single pair will be returned. If it's unmapped, an empty | ||||
|      * vector will be returned; | ||||
|      */ | ||||
|     boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( | ||||
|         GPUVAddr gpu_addr, std::size_t size) const; | ||||
| 
 | ||||
|     GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, | ||||
|     GPUVAddr Map(GPUVAddr gpu_addr, DAddr dev_addr, std::size_t size, | ||||
|                  PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); | ||||
|     GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); | ||||
|     void Unmap(GPUVAddr gpu_addr, std::size_t size); | ||||
|  | @ -186,12 +183,11 @@ private: | |||
|     void GetSubmappedRangeImpl( | ||||
|         GPUVAddr gpu_addr, std::size_t size, | ||||
|         boost::container::small_vector< | ||||
|             std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& | ||||
|             std::pair<std::conditional_t<is_gpu_address, GPUVAddr, DAddr>, std::size_t>, 32>& | ||||
|             result) const; | ||||
| 
 | ||||
|     Core::System& system; | ||||
|     Core::Memory::Memory& memory; | ||||
|     Core::DeviceMemory& device_memory; | ||||
|     MaxwellDeviceMemoryManager& memory; | ||||
| 
 | ||||
|     const u64 address_space_bits; | ||||
|     const u64 page_bits; | ||||
|  | @ -218,11 +214,11 @@ private: | |||
|     std::vector<u64> big_entries; | ||||
| 
 | ||||
|     template <EntryType entry_type> | ||||
|     GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, | ||||
|     GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, | ||||
|                          PTEKind kind); | ||||
| 
 | ||||
|     template <EntryType entry_type> | ||||
|     GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size, | ||||
|     GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] DAddr dev_addr, size_t size, | ||||
|                             PTEKind kind); | ||||
| 
 | ||||
|     template <bool is_big_page> | ||||
|  | @ -233,11 +229,11 @@ private: | |||
| 
 | ||||
|     Common::MultiLevelPageTable<u32> page_table; | ||||
|     Common::RangeMap<GPUVAddr, PTEKind> kind_map; | ||||
|     Common::VirtualBuffer<u32> big_page_table_cpu; | ||||
|     Common::VirtualBuffer<u32> big_page_table_dev; | ||||
| 
 | ||||
|     std::vector<u64> big_page_continuous; | ||||
|     boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{}; | ||||
|     boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{}; | ||||
|     boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash{}; | ||||
|     boost::container::small_vector<std::pair<DAddr, std::size_t>, 32> page_stash2{}; | ||||
| 
 | ||||
|     mutable std::mutex guard; | ||||
| 
 | ||||
|  |  | |||
|  | @ -21,6 +21,7 @@ | |||
| #include "core/memory.h" | ||||
| #include "video_core/control/channel_state_cache.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/texture_cache/slot_vector.h" | ||||
|  | @ -102,11 +103,12 @@ template <class QueryCache, class CachedQuery, class CounterStream, class HostCo | |||
| class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||
| public: | ||||
|     explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, | ||||
|                               Core::Memory::Memory& cpu_memory_) | ||||
|                               Tegra::MaxwellDeviceMemoryManager& device_memory_) | ||||
|         : rasterizer{rasterizer_}, | ||||
|           // Use reinterpret_cast instead of static_cast as workaround for
 | ||||
|           // UBSan bug (https://github.com/llvm/llvm-project/issues/59060)
 | ||||
|           cpu_memory{cpu_memory_}, streams{{ | ||||
|           device_memory{device_memory_}, | ||||
|           streams{{ | ||||
|               {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||
|                              VideoCore::QueryType::SamplesPassed}}, | ||||
|               {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||
|  | @ -322,13 +324,14 @@ private: | |||
|             local_lock.unlock(); | ||||
|             if (timestamp) { | ||||
|                 u64 timestamp_value = *timestamp; | ||||
|                 cpu_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, sizeof(u64)); | ||||
|                 cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); | ||||
|                 device_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, | ||||
|                                                sizeof(u64)); | ||||
|                 device_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); | ||||
|                 rasterizer.InvalidateRegion(address, sizeof(u64) * 2, | ||||
|                                             VideoCommon::CacheType::NoQueryCache); | ||||
|             } else { | ||||
|                 u32 small_value = static_cast<u32>(value); | ||||
|                 cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); | ||||
|                 device_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); | ||||
|                 rasterizer.InvalidateRegion(address, sizeof(u32), | ||||
|                                             VideoCommon::CacheType::NoQueryCache); | ||||
|             } | ||||
|  | @ -342,7 +345,7 @@ private: | |||
|     SlotVector<AsyncJob> slot_async_jobs; | ||||
| 
 | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
| 
 | ||||
|     mutable std::recursive_mutex mutex; | ||||
| 
 | ||||
|  |  | |||
|  | @ -23,7 +23,7 @@ DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) | |||
| 
 | ||||
| class QueryBase { | ||||
| public: | ||||
|     VAddr guest_address{}; | ||||
|     DAddr guest_address{}; | ||||
|     QueryFlagBits flags{}; | ||||
|     u64 value{}; | ||||
| 
 | ||||
|  | @ -32,7 +32,7 @@ protected: | |||
|     QueryBase() = default; | ||||
| 
 | ||||
|     // Parameterized constructor
 | ||||
|     QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) | ||||
|     QueryBase(DAddr address, QueryFlagBits flags_, u64 value_) | ||||
|         : guest_address(address), flags(flags_), value{value_} {} | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ | |||
| #include "core/memory.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/query_cache/bank_base.h" | ||||
| #include "video_core/query_cache/query_base.h" | ||||
|  | @ -113,9 +114,10 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
|     using RuntimeType = typename Traits::RuntimeType; | ||||
| 
 | ||||
|     QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, | ||||
|                        Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) | ||||
|                        Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_, | ||||
|                        Tegra::GPU& gpu_) | ||||
|         : owner{owner_}, rasterizer{rasterizer_}, | ||||
|           cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { | ||||
|           device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} { | ||||
|         streamer_mask = 0; | ||||
|         for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { | ||||
|             streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); | ||||
|  | @ -158,7 +160,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
| 
 | ||||
|     QueryCacheBase<Traits>* owner; | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
|     RuntimeType& runtime; | ||||
|     Tegra::GPU& gpu; | ||||
|     std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; | ||||
|  | @ -171,10 +173,11 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | |||
| template <typename Traits> | ||||
| QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, | ||||
|                                        VideoCore::RasterizerInterface& rasterizer_, | ||||
|                                        Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) | ||||
|                                        Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||
|                                        RuntimeType& runtime_) | ||||
|     : cached_queries{} { | ||||
|     impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( | ||||
|         this, rasterizer_, cpu_memory_, runtime_, gpu_); | ||||
|         this, rasterizer_, device_memory_, runtime_, gpu_); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
|  | @ -240,7 +243,7 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
|     if (!cpu_addr_opt) [[unlikely]] { | ||||
|         return; | ||||
|     } | ||||
|     VAddr cpu_addr = *cpu_addr_opt; | ||||
|     DAddr cpu_addr = *cpu_addr_opt; | ||||
|     const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); | ||||
|     auto* query = streamer->GetQuery(new_query_id); | ||||
|     if (is_fence) { | ||||
|  | @ -253,10 +256,9 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
|         return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, | ||||
|                                         static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); | ||||
|     }; | ||||
|     u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); | ||||
|     u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); | ||||
|     u8* pointer = impl->device_memory.GetPointer<u8>(cpu_addr); | ||||
|     u8* pointer_timestamp = impl->device_memory.GetPointer<u8>(cpu_addr + 8); | ||||
|     bool is_synced = !Settings::IsGPULevelHigh() && is_fence; | ||||
| 
 | ||||
|     std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, | ||||
|                                      pointer, pointer_timestamp] { | ||||
|         if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { | ||||
|  | @ -559,7 +561,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo | |||
|     } | ||||
|     if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && | ||||
|         False(query_base->flags & QueryFlagBits::IsGuestSynced)) { | ||||
|         auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); | ||||
|         auto* ptr = impl->device_memory.GetPointer<u8>(query_base->guest_address); | ||||
|         if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | ||||
|             std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); | ||||
|             return false; | ||||
|  |  | |||
|  | @ -17,10 +17,7 @@ | |||
| #include "video_core/control/channel_state_cache.h" | ||||
| #include "video_core/query_cache/query_base.h" | ||||
| #include "video_core/query_cache/types.h" | ||||
| 
 | ||||
| namespace Core::Memory { | ||||
| class Memory; | ||||
| } | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| class RasterizerInterface; | ||||
|  | @ -53,7 +50,7 @@ public: | |||
|     }; | ||||
| 
 | ||||
|     explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, | ||||
|                             Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); | ||||
|                             Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_); | ||||
| 
 | ||||
|     ~QueryCacheBase(); | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,72 +0,0 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||
| 
 | ||||
| #include <atomic> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/div_ceil.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/rasterizer_accelerated.h" | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| 
 | ||||
| using namespace Core::Memory; | ||||
| 
 | ||||
| RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) | ||||
|     : cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {} | ||||
| 
 | ||||
| RasterizerAccelerated::~RasterizerAccelerated() = default; | ||||
| 
 | ||||
| void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||||
|     u64 uncache_begin = 0; | ||||
|     u64 cache_begin = 0; | ||||
|     u64 uncache_bytes = 0; | ||||
|     u64 cache_bytes = 0; | ||||
| 
 | ||||
|     std::atomic_thread_fence(std::memory_order_acquire); | ||||
|     const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); | ||||
|     for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) { | ||||
|         std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); | ||||
| 
 | ||||
|         if (delta > 0) { | ||||
|             ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!"); | ||||
|         } else if (delta < 0) { | ||||
|             ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); | ||||
|         } else { | ||||
|             ASSERT_MSG(false, "Delta must be non-zero!"); | ||||
|         } | ||||
| 
 | ||||
|         // Adds or subtracts 1, as count is a unsigned 8-bit value
 | ||||
|         count.fetch_add(static_cast<u16>(delta), std::memory_order_release); | ||||
| 
 | ||||
|         // Assume delta is either -1 or 1
 | ||||
|         if (count.load(std::memory_order::relaxed) == 0) { | ||||
|             if (uncache_bytes == 0) { | ||||
|                 uncache_begin = page; | ||||
|             } | ||||
|             uncache_bytes += YUZU_PAGESIZE; | ||||
|         } else if (uncache_bytes > 0) { | ||||
|             cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, | ||||
|                                                   false); | ||||
|             uncache_bytes = 0; | ||||
|         } | ||||
|         if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { | ||||
|             if (cache_bytes == 0) { | ||||
|                 cache_begin = page; | ||||
|             } | ||||
|             cache_bytes += YUZU_PAGESIZE; | ||||
|         } else if (cache_bytes > 0) { | ||||
|             cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); | ||||
|             cache_bytes = 0; | ||||
|         } | ||||
|     } | ||||
|     if (uncache_bytes > 0) { | ||||
|         cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false); | ||||
|     } | ||||
|     if (cache_bytes > 0) { | ||||
|         cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace VideoCore
 | ||||
|  | @ -1,49 +0,0 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <atomic> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| 
 | ||||
| namespace Core::Memory { | ||||
| class Memory; | ||||
| } | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| 
 | ||||
| /// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface.
 | ||||
| class RasterizerAccelerated : public RasterizerInterface { | ||||
| public: | ||||
|     explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); | ||||
|     ~RasterizerAccelerated() override; | ||||
| 
 | ||||
|     void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; | ||||
| 
 | ||||
| private: | ||||
|     class CacheEntry final { | ||||
|     public: | ||||
|         CacheEntry() = default; | ||||
| 
 | ||||
|         std::atomic_uint16_t& Count(std::size_t page) { | ||||
|             return values[page & 3]; | ||||
|         } | ||||
| 
 | ||||
|         const std::atomic_uint16_t& Count(std::size_t page) const { | ||||
|             return values[page & 3]; | ||||
|         } | ||||
| 
 | ||||
|     private: | ||||
|         std::array<std::atomic_uint16_t, 4> values{}; | ||||
|     }; | ||||
|     static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); | ||||
| 
 | ||||
|     using CachedPages = std::array<CacheEntry, 0x2000000>; | ||||
|     std::unique_ptr<CachedPages> cached_pages; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCore
 | ||||
|  | @ -86,35 +86,35 @@ public: | |||
|     virtual void FlushAll() = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     virtual void FlushRegion(VAddr addr, u64 size, | ||||
|     virtual void FlushRegion(DAddr addr, u64 size, | ||||
|                              VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | ||||
| 
 | ||||
|     /// Check if the the specified memory area requires flushing to CPU Memory.
 | ||||
|     virtual bool MustFlushRegion(VAddr addr, u64 size, | ||||
|     virtual bool MustFlushRegion(DAddr addr, u64 size, | ||||
|                                  VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | ||||
| 
 | ||||
|     virtual RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) = 0; | ||||
|     virtual RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||
|     virtual void InvalidateRegion(VAddr addr, u64 size, | ||||
|     virtual void InvalidateRegion(DAddr addr, u64 size, | ||||
|                                   VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | ||||
| 
 | ||||
|     virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | ||||
|     virtual void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) { | ||||
|         for (const auto& [cpu_addr, size] : sequences) { | ||||
|             InvalidateRegion(cpu_addr, size); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region are desync with guest
 | ||||
|     virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; | ||||
|     virtual void OnCacheInvalidation(PAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; | ||||
|     virtual bool OnCPUWrite(PAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Sync memory between guest and host.
 | ||||
|     virtual void InvalidateGPUCache() = 0; | ||||
| 
 | ||||
|     /// Unmap memory range
 | ||||
|     virtual void UnmapMemory(VAddr addr, u64 size) = 0; | ||||
|     virtual void UnmapMemory(DAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Remap GPU memory range. This means underneath backing memory changed
 | ||||
|     virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; | ||||
|  | @ -122,7 +122,7 @@ public: | |||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     /// and invalidated
 | ||||
|     virtual void FlushAndInvalidateRegion( | ||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | ||||
|         DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | ||||
| 
 | ||||
|     /// Notify the host renderer to wait for previous primitive and compute operations.
 | ||||
|     virtual void WaitForIdle() = 0; | ||||
|  | @ -157,13 +157,10 @@ public: | |||
| 
 | ||||
|     /// Attempt to use a faster method to display the framebuffer to screen
 | ||||
|     [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||
|                                                  VAddr framebuffer_addr, u32 pixel_stride) { | ||||
|                                                  DAddr framebuffer_addr, u32 pixel_stride) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     /// Increase/decrease the number of object in pages touching the specified region
 | ||||
|     virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} | ||||
| 
 | ||||
|     /// Initialize disk cached resources for the game being emulated
 | ||||
|     virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||
|                                    const DiskResourceLoadCallback& callback) {} | ||||
|  |  | |||
|  | @ -19,8 +19,7 @@ bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) { | |||
|     return true; | ||||
| } | ||||
| 
 | ||||
| RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu) | ||||
|     : RasterizerAccelerated(cpu_memory_), m_gpu{gpu} {} | ||||
| RasterizerNull::RasterizerNull(Tegra::GPU& gpu) : m_gpu{gpu} {} | ||||
| RasterizerNull::~RasterizerNull() = default; | ||||
| 
 | ||||
| void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} | ||||
|  | @ -45,16 +44,16 @@ void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr | |||
|                                                u32 size) {} | ||||
| void RasterizerNull::DisableGraphicsUniformBuffer(size_t stage, u32 index) {} | ||||
| void RasterizerNull::FlushAll() {} | ||||
| void RasterizerNull::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | ||||
| bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType) { | ||||
| void RasterizerNull::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} | ||||
| bool RasterizerNull::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType) { | ||||
|     return false; | ||||
| } | ||||
| void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | ||||
| bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { | ||||
| void RasterizerNull::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} | ||||
| bool RasterizerNull::OnCPUWrite(PAddr addr, u64 size) { | ||||
|     return false; | ||||
| } | ||||
| void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} | ||||
| VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { | ||||
| void RasterizerNull::OnCacheInvalidation(PAddr addr, u64 size) {} | ||||
| VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(PAddr addr, u64 size) { | ||||
|     VideoCore::RasterizerDownloadArea new_area{ | ||||
|         .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), | ||||
|         .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), | ||||
|  | @ -63,7 +62,7 @@ VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 s | |||
|     return new_area; | ||||
| } | ||||
| void RasterizerNull::InvalidateGPUCache() {} | ||||
| void RasterizerNull::UnmapMemory(VAddr addr, u64 size) {} | ||||
| void RasterizerNull::UnmapMemory(DAddr addr, u64 size) {} | ||||
| void RasterizerNull::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {} | ||||
| void RasterizerNull::SignalFence(std::function<void()>&& func) { | ||||
|     func(); | ||||
|  | @ -78,7 +77,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) { | |||
| } | ||||
| void RasterizerNull::SignalReference() {} | ||||
| void RasterizerNull::ReleaseFences(bool) {} | ||||
| void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | ||||
| void RasterizerNull::FlushAndInvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType) {} | ||||
| void RasterizerNull::WaitForIdle() {} | ||||
| void RasterizerNull::FragmentBarrier() {} | ||||
| void RasterizerNull::TiledCacheBarrier() {} | ||||
|  | @ -95,7 +94,7 @@ bool RasterizerNull::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surfac | |||
| void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | ||||
|                                               std::span<const u8> memory) {} | ||||
| bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||
|                                        VAddr framebuffer_addr, u32 pixel_stride) { | ||||
|                                        DAddr framebuffer_addr, u32 pixel_stride) { | ||||
|     return true; | ||||
| } | ||||
| void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||
|  |  | |||
|  | @ -6,7 +6,6 @@ | |||
| #include "common/common_types.h" | ||||
| #include "video_core/control/channel_state_cache.h" | ||||
| #include "video_core/engines/maxwell_dma.h" | ||||
| #include "video_core/rasterizer_accelerated.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| 
 | ||||
| namespace Core { | ||||
|  | @ -32,10 +31,10 @@ public: | |||
|     } | ||||
| }; | ||||
| 
 | ||||
| class RasterizerNull final : public VideoCore::RasterizerAccelerated, | ||||
| class RasterizerNull final : public VideoCore::RasterizerInterface, | ||||
|                              protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||
| public: | ||||
|     explicit RasterizerNull(Core::Memory::Memory& cpu_memory, Tegra::GPU& gpu); | ||||
|     explicit RasterizerNull(Tegra::GPU& gpu); | ||||
|     ~RasterizerNull() override; | ||||
| 
 | ||||
|     void Draw(bool is_indexed, u32 instance_count) override; | ||||
|  | @ -48,17 +47,17 @@ public: | |||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||
|     void FlushAll() override; | ||||
|     void FlushRegion(VAddr addr, u64 size, | ||||
|     void FlushRegion(DAddr addr, u64 size, | ||||
|                      VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     bool MustFlushRegion(VAddr addr, u64 size, | ||||
|     bool MustFlushRegion(DAddr addr, u64 size, | ||||
|                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size, | ||||
|     void InvalidateRegion(DAddr addr, u64 size, | ||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void OnCacheInvalidation(VAddr addr, u64 size) override; | ||||
|     bool OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | ||||
|     void OnCacheInvalidation(DAddr addr, u64 size) override; | ||||
|     bool OnCPUWrite(DAddr addr, u64 size) override; | ||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override; | ||||
|     void InvalidateGPUCache() override; | ||||
|     void UnmapMemory(VAddr addr, u64 size) override; | ||||
|     void UnmapMemory(DAddr addr, u64 size) override; | ||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||
|     void SignalFence(std::function<void()>&& func) override; | ||||
|     void SyncOperation(std::function<void()>&& func) override; | ||||
|  | @ -66,7 +65,7 @@ public: | |||
|     void SignalReference() override; | ||||
|     void ReleaseFences(bool force) override; | ||||
|     void FlushAndInvalidateRegion( | ||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|         DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void WaitForIdle() override; | ||||
|     void FragmentBarrier() override; | ||||
|     void TiledCacheBarrier() override; | ||||
|  | @ -78,7 +77,7 @@ public: | |||
|     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||||
|     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | ||||
|                                   std::span<const u8> memory) override; | ||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, | ||||
|                            u32 pixel_stride) override; | ||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||
|                            const VideoCore::DiskResourceLoadCallback& callback) override; | ||||
|  |  | |||
|  | @ -7,10 +7,9 @@ | |||
| 
 | ||||
| namespace Null { | ||||
| 
 | ||||
| RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, | ||||
|                            Tegra::GPU& gpu, | ||||
| RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | ||||
|                            std::unique_ptr<Core::Frontend::GraphicsContext> context_) | ||||
|     : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(cpu_memory, gpu) {} | ||||
|     : RendererBase(emu_window, std::move(context_)), m_gpu(gpu), m_rasterizer(gpu) {} | ||||
| 
 | ||||
| RendererNull::~RendererNull() = default; | ||||
| 
 | ||||
|  |  | |||
|  | @ -13,8 +13,7 @@ namespace Null { | |||
| 
 | ||||
| class RendererNull final : public VideoCore::RendererBase { | ||||
| public: | ||||
|     explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, | ||||
|                           Tegra::GPU& gpu, | ||||
|     explicit RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | ||||
|                           std::unique_ptr<Core::Frontend::GraphicsContext> context); | ||||
|     ~RendererNull() override; | ||||
| 
 | ||||
|  |  | |||
|  | @ -47,11 +47,10 @@ constexpr std::array PROGRAM_LUT{ | |||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | ||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | ||||
|     : VideoCommon::BufferBase(null_params) {} | ||||
| 
 | ||||
| Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||||
|                VAddr cpu_addr_, u64 size_bytes_) | ||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | ||||
| Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_) | ||||
|     : VideoCommon::BufferBase(cpu_addr_, size_bytes_) { | ||||
|     buffer.Create(); | ||||
|     if (runtime.device.HasDebuggingToolAttached()) { | ||||
|         const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | ||||
|  |  | |||
|  | @ -10,7 +10,6 @@ | |||
| #include "common/common_types.h" | ||||
| #include "video_core/buffer_cache/buffer_cache_base.h" | ||||
| #include "video_core/buffer_cache/memory_tracker_base.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_staging_buffer_pool.h" | ||||
|  | @ -19,9 +18,9 @@ namespace OpenGL { | |||
| 
 | ||||
| class BufferCacheRuntime; | ||||
| 
 | ||||
| class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { | ||||
| class Buffer : public VideoCommon::BufferBase { | ||||
| public: | ||||
|     explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, | ||||
|     explicit Buffer(BufferCacheRuntime&, DAddr cpu_addr, | ||||
|                     u64 size_bytes); | ||||
|     explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); | ||||
| 
 | ||||
|  | @ -244,7 +243,7 @@ struct BufferCacheParams { | |||
|     using Runtime = OpenGL::BufferCacheRuntime; | ||||
|     using Buffer = OpenGL::Buffer; | ||||
|     using Async_Buffer = OpenGL::StagingBufferMap; | ||||
|     using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | ||||
|     using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>; | ||||
| 
 | ||||
|     static constexpr bool IS_OPENGL = true; | ||||
|     static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; | ||||
|  |  | |||
|  | @ -35,8 +35,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | |||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | ||||
|     : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { | ||||
| QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_) | ||||
|     : QueryCacheLegacy(rasterizer_, device_memory_), gl_rasterizer{rasterizer_} { | ||||
|     EnableCounters(); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ | |||
| #include "video_core/query_cache.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| 
 | ||||
| namespace Core { | ||||
| class System; | ||||
|  | @ -28,7 +29,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | |||
| class QueryCache final | ||||
|     : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { | ||||
| public: | ||||
|     explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); | ||||
|     explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_); | ||||
|     ~QueryCache(); | ||||
| 
 | ||||
|     OGLQuery AllocateQuery(VideoCore::QueryType type); | ||||
|  |  | |||
|  | @ -70,18 +70,18 @@ std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryTy | |||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||
|                                    Core::Memory::Memory& cpu_memory_, const Device& device_, | ||||
|                                    ScreenInfo& screen_info_, ProgramManager& program_manager_, | ||||
|                                    StateTracker& state_tracker_) | ||||
|     : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), | ||||
|                                    Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||
|                                    const Device& device_, ScreenInfo& screen_info_, | ||||
|                                    ProgramManager& program_manager_, StateTracker& state_tracker_) | ||||
|     : gpu(gpu_), device_memory(device_memory_), device(device_), screen_info(screen_info_), | ||||
|       program_manager(program_manager_), state_tracker(state_tracker_), | ||||
|       texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), | ||||
|       texture_cache(texture_cache_runtime, *this), | ||||
|       texture_cache(texture_cache_runtime, device_memory_), | ||||
|       buffer_cache_runtime(device, staging_buffer_pool), | ||||
|       buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | ||||
|       shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, | ||||
|                    state_tracker, gpu.ShaderNotify()), | ||||
|       query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache), | ||||
|       buffer_cache(device_memory_, buffer_cache_runtime), | ||||
|       shader_cache(device_memory_, emu_window_, device, texture_cache, buffer_cache, | ||||
|                    program_manager, state_tracker, gpu.ShaderNotify()), | ||||
|       query_cache(*this, device_memory_), accelerate_dma(buffer_cache, texture_cache), | ||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | ||||
|       blit_image(program_manager_) {} | ||||
| 
 | ||||
|  | @ -475,7 +475,7 @@ void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) { | |||
| 
 | ||||
| void RasterizerOpenGL::FlushAll() {} | ||||
| 
 | ||||
| void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
| void RasterizerOpenGL::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|  | @ -493,7 +493,7 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType | |||
|     } | ||||
| } | ||||
| 
 | ||||
| bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
| bool RasterizerOpenGL::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
|     if ((True(which & VideoCommon::CacheType::BufferCache))) { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         if (buffer_cache.IsRegionGpuModified(addr, size)) { | ||||
|  | @ -510,7 +510,7 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT | |||
|     return false; | ||||
| } | ||||
| 
 | ||||
| VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 size) { | ||||
| VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(DAddr addr, u64 size) { | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         auto area = texture_cache.GetFlushArea(addr, size); | ||||
|  | @ -533,7 +533,7 @@ VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(VAddr addr, u64 | |||
|     return new_area; | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
| void RasterizerOpenGL::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|  | @ -554,8 +554,9 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||
|     } | ||||
| } | ||||
| 
 | ||||
| bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | ||||
| bool RasterizerOpenGL::OnCPUWrite(PAddr p_addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return false; | ||||
|     } | ||||
|  | @ -576,8 +577,9 @@ bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
|     return false; | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { | ||||
| void RasterizerOpenGL::OnCacheInvalidation(PAddr p_addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|  | @ -596,7 +598,7 @@ void RasterizerOpenGL::InvalidateGPUCache() { | |||
|     gpu.InvalidateGPUCache(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | ||||
| void RasterizerOpenGL::UnmapMemory(DAddr addr, u64 size) { | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.UnmapMemory(addr, size); | ||||
|  | @ -635,7 +637,7 @@ void RasterizerOpenGL::ReleaseFences(bool force) { | |||
|     fence_manager.WaitPendingFences(force); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, | ||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(DAddr addr, u64 size, | ||||
|                                                 VideoCommon::CacheType which) { | ||||
|     if (Settings::IsGPULevelExtreme()) { | ||||
|         FlushRegion(addr, size, which); | ||||
|  | @ -739,7 +741,7 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si | |||
| } | ||||
| 
 | ||||
| bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||
|                                          VAddr framebuffer_addr, u32 pixel_stride) { | ||||
|                                          DAddr framebuffer_addr, u32 pixel_stride) { | ||||
|     if (framebuffer_addr == 0) { | ||||
|         return false; | ||||
|     } | ||||
|  |  | |||
|  | @ -14,7 +14,6 @@ | |||
| #include "common/common_types.h" | ||||
| #include "video_core/control/channel_state_cache.h" | ||||
| #include "video_core/engines/maxwell_dma.h" | ||||
| #include "video_core/rasterizer_accelerated.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_opengl/blit_image.h" | ||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||
|  | @ -72,11 +71,11 @@ private: | |||
|     TextureCache& texture_cache; | ||||
| }; | ||||
| 
 | ||||
| class RasterizerOpenGL : public VideoCore::RasterizerAccelerated, | ||||
| class RasterizerOpenGL : public VideoCore::RasterizerInterface, | ||||
|                          protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||
| public: | ||||
|     explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||
|                               Core::Memory::Memory& cpu_memory_, const Device& device_, | ||||
|                               Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, | ||||
|                               ScreenInfo& screen_info_, ProgramManager& program_manager_, | ||||
|                               StateTracker& state_tracker_); | ||||
|     ~RasterizerOpenGL() override; | ||||
|  | @ -92,17 +91,17 @@ public: | |||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||
|     void FlushAll() override; | ||||
|     void FlushRegion(VAddr addr, u64 size, | ||||
|     void FlushRegion(DAddr addr, u64 size, | ||||
|                      VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     bool MustFlushRegion(VAddr addr, u64 size, | ||||
|     bool MustFlushRegion(DAddr addr, u64 size, | ||||
|                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size, | ||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(PAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(DAddr addr, u64 size, | ||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void OnCacheInvalidation(VAddr addr, u64 size) override; | ||||
|     bool OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void OnCacheInvalidation(PAddr addr, u64 size) override; | ||||
|     bool OnCPUWrite(PAddr addr, u64 size) override; | ||||
|     void InvalidateGPUCache() override; | ||||
|     void UnmapMemory(VAddr addr, u64 size) override; | ||||
|     void UnmapMemory(DAddr addr, u64 size) override; | ||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||
|     void SignalFence(std::function<void()>&& func) override; | ||||
|     void SyncOperation(std::function<void()>&& func) override; | ||||
|  | @ -110,7 +109,7 @@ public: | |||
|     void SignalReference() override; | ||||
|     void ReleaseFences(bool force = true) override; | ||||
|     void FlushAndInvalidateRegion( | ||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|         DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void WaitForIdle() override; | ||||
|     void FragmentBarrier() override; | ||||
|     void TiledCacheBarrier() override; | ||||
|  | @ -123,7 +122,7 @@ public: | |||
|     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||||
|     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | ||||
|                                   std::span<const u8> memory) override; | ||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, | ||||
|                            u32 pixel_stride) override; | ||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||
|                            const VideoCore::DiskResourceLoadCallback& callback) override; | ||||
|  | @ -235,6 +234,7 @@ private: | |||
|                        VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); | ||||
| 
 | ||||
|     Tegra::GPU& gpu; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
| 
 | ||||
|     const Device& device; | ||||
|     ScreenInfo& screen_info; | ||||
|  |  | |||
|  | @ -168,11 +168,12 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs | |||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | ||||
|                          const Device& device_, TextureCache& texture_cache_, | ||||
|                          BufferCache& buffer_cache_, ProgramManager& program_manager_, | ||||
|                          StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_) | ||||
|     : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_}, | ||||
| ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||
|                          Core::Frontend::EmuWindow& emu_window_, const Device& device_, | ||||
|                          TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||
|                          ProgramManager& program_manager_, StateTracker& state_tracker_, | ||||
|                          VideoCore::ShaderNotify& shader_notify_) | ||||
|     : VideoCommon::ShaderCache{device_memory_}, emu_window{emu_window_}, device{device_}, | ||||
|       texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, | ||||
|       state_tracker{state_tracker_}, shader_notify{shader_notify_}, | ||||
|       use_asynchronous_shaders{device.UseAsynchronousShaders()}, | ||||
|  |  | |||
|  | @ -17,7 +17,7 @@ | |||
| 
 | ||||
| namespace Tegra { | ||||
| class MemoryManager; | ||||
| } | ||||
| } // namespace Tegra
 | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
|  | @ -28,10 +28,11 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>; | |||
| 
 | ||||
| class ShaderCache : public VideoCommon::ShaderCache { | ||||
| public: | ||||
|     explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | ||||
|                          const Device& device_, TextureCache& texture_cache_, | ||||
|                          BufferCache& buffer_cache_, ProgramManager& program_manager_, | ||||
|                          StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_); | ||||
|     explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||
|                          Core::Frontend::EmuWindow& emu_window_, const Device& device_, | ||||
|                          TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||
|                          ProgramManager& program_manager_, StateTracker& state_tracker_, | ||||
|                          VideoCore::ShaderNotify& shader_notify_); | ||||
|     ~ShaderCache(); | ||||
| 
 | ||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||
|  |  | |||
|  | @ -144,12 +144,13 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit | |||
| 
 | ||||
| RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | ||||
|                                Core::Frontend::EmuWindow& emu_window_, | ||||
|                                Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | ||||
|                                Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, | ||||
|                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) | ||||
|     : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, | ||||
|       emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, device{emu_window_}, | ||||
|       emu_window{emu_window_}, device_memory{device_memory_}, gpu{gpu_}, device{emu_window_}, | ||||
|       state_tracker{}, program_manager{device}, | ||||
|       rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { | ||||
|       rasterizer(emu_window, gpu, device_memory, device, screen_info, program_manager, | ||||
|                  state_tracker) { | ||||
|     if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | ||||
|         glEnable(GL_DEBUG_OUTPUT); | ||||
|         glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | ||||
|  | @ -242,7 +243,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | |||
|     const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; | ||||
|     const u64 size_in_bytes{Tegra::Texture::CalculateSize( | ||||
|         true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; | ||||
|     const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; | ||||
|     const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)}; | ||||
|     const std::span<const u8> input_data(host_ptr, size_in_bytes); | ||||
|     Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, | ||||
|                                      framebuffer.width, framebuffer.height, 1, block_height_log2, | ||||
|  |  | |||
|  | @ -61,7 +61,7 @@ class RendererOpenGL final : public VideoCore::RendererBase { | |||
| public: | ||||
|     explicit RendererOpenGL(Core::TelemetrySession& telemetry_session_, | ||||
|                             Core::Frontend::EmuWindow& emu_window_, | ||||
|                             Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | ||||
|                             Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, | ||||
|                             std::unique_ptr<Core::Frontend::GraphicsContext> context_); | ||||
|     ~RendererOpenGL() override; | ||||
| 
 | ||||
|  | @ -101,7 +101,7 @@ private: | |||
| 
 | ||||
|     Core::TelemetrySession& telemetry_session; | ||||
|     Core::Frontend::EmuWindow& emu_window; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
|     Tegra::GPU& gpu; | ||||
| 
 | ||||
|     Device device; | ||||
|  |  | |||
|  | @ -82,10 +82,10 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl | |||
| 
 | ||||
| RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | ||||
|                                Core::Frontend::EmuWindow& emu_window, | ||||
|                                Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | ||||
|                                Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, | ||||
|                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) try | ||||
|     : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), | ||||
|       cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), | ||||
|       device_memory(device_memory_), gpu(gpu_), library(OpenLibrary(context.get())), | ||||
|       instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | ||||
|                               Settings::values.renderer_debug.GetValue())), | ||||
|       debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance) | ||||
|  | @ -97,9 +97,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
|                 render_window.GetFramebufferLayout().height), | ||||
|       present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, | ||||
|                       surface), | ||||
|       blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, | ||||
|       blit_screen(device_memory, render_window, device, memory_allocator, swapchain, present_manager, | ||||
|                   scheduler, screen_info), | ||||
|       rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, | ||||
|       rasterizer(render_window, gpu, device_memory, screen_info, device, memory_allocator, | ||||
|                  state_tracker, scheduler) { | ||||
|     if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { | ||||
|         turbo_mode.emplace(instance, dld); | ||||
|  | @ -128,7 +128,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
|     screen_info.width = framebuffer->width; | ||||
|     screen_info.height = framebuffer->height; | ||||
| 
 | ||||
|     const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | ||||
|     const DAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | ||||
|     const bool use_accelerated = | ||||
|         rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | ||||
|     RenderScreenshot(*framebuffer, use_accelerated); | ||||
|  |  | |||
|  | @ -20,6 +20,7 @@ | |||
| #include "video_core/vulkan_common/vulkan_device.h" | ||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| 
 | ||||
| namespace Core { | ||||
| class TelemetrySession; | ||||
|  | @ -42,7 +43,7 @@ class RendererVulkan final : public VideoCore::RendererBase { | |||
| public: | ||||
|     explicit RendererVulkan(Core::TelemetrySession& telemtry_session, | ||||
|                             Core::Frontend::EmuWindow& emu_window, | ||||
|                             Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | ||||
|                             Tegra::MaxwellDeviceMemoryManager& device_memory_, Tegra::GPU& gpu_, | ||||
|                             std::unique_ptr<Core::Frontend::GraphicsContext> context_); | ||||
|     ~RendererVulkan() override; | ||||
| 
 | ||||
|  | @ -62,7 +63,7 @@ private: | |||
|     void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); | ||||
| 
 | ||||
|     Core::TelemetrySession& telemetry_session; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
|     Tegra::GPU& gpu; | ||||
| 
 | ||||
|     std::shared_ptr<Common::DynamicLibrary> library; | ||||
|  |  | |||
|  | @ -14,8 +14,8 @@ | |||
| #include "common/settings.h" | ||||
| #include "core/core.h" | ||||
| #include "core/frontend/emu_window.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| #include "video_core/host_shaders/fxaa_frag_spv.h" | ||||
| #include "video_core/host_shaders/fxaa_vert_spv.h" | ||||
| #include "video_core/host_shaders/present_bicubic_frag_spv.h" | ||||
|  | @ -121,11 +121,12 @@ struct BlitScreen::BufferData { | |||
|     // Unaligned image data goes here
 | ||||
| }; | ||||
| 
 | ||||
| BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, | ||||
|                        const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                        Swapchain& swapchain_, PresentManager& present_manager_, | ||||
|                        Scheduler& scheduler_, const ScreenInfo& screen_info_) | ||||
|     : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, | ||||
| BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||
|                        Core::Frontend::EmuWindow& render_window_, const Device& device_, | ||||
|                        MemoryAllocator& memory_allocator_, Swapchain& swapchain_, | ||||
|                        PresentManager& present_manager_, Scheduler& scheduler_, | ||||
|                        const ScreenInfo& screen_info_) | ||||
|     : device_memory{device_memory_}, render_window{render_window_}, device{device_}, | ||||
|       memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, | ||||
|       scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { | ||||
|     resource_ticks.resize(image_count); | ||||
|  | @ -219,8 +220,8 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
|     if (!use_accelerated) { | ||||
|         const u64 image_offset = GetRawImageOffset(framebuffer); | ||||
| 
 | ||||
|         const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | ||||
|         const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); | ||||
|         const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | ||||
|         const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr); | ||||
| 
 | ||||
|         // TODO(Rodrigo): Read this from HLE
 | ||||
|         constexpr u32 block_height_log2 = 4; | ||||
|  |  | |||
|  | @ -8,15 +8,12 @@ | |||
| #include "core/frontend/framebuffer_layout.h" | ||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| 
 | ||||
| namespace Core { | ||||
| class System; | ||||
| } | ||||
| 
 | ||||
| namespace Core::Memory { | ||||
| class Memory; | ||||
| } | ||||
| 
 | ||||
| namespace Core::Frontend { | ||||
| class EmuWindow; | ||||
| } | ||||
|  | @ -56,7 +53,7 @@ struct ScreenInfo { | |||
| 
 | ||||
| class BlitScreen { | ||||
| public: | ||||
|     explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, | ||||
|     explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, Core::Frontend::EmuWindow& render_window, | ||||
|                         const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, | ||||
|                         PresentManager& present_manager, Scheduler& scheduler, | ||||
|                         const ScreenInfo& screen_info); | ||||
|  | @ -109,7 +106,7 @@ private: | |||
|     u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; | ||||
|     u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; | ||||
| 
 | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
|     Core::Frontend::EmuWindow& render_window; | ||||
|     const Device& device; | ||||
|     MemoryAllocator& memory_allocator; | ||||
|  |  | |||
|  | @ -79,7 +79,7 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo | |||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params) | ||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} { | ||||
|     : VideoCommon::BufferBase(null_params), tracker{4096} { | ||||
|     if (runtime.device.HasNullDescriptor()) { | ||||
|         return; | ||||
|     } | ||||
|  | @ -88,11 +88,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_p | |||
|     is_null = true; | ||||
| } | ||||
| 
 | ||||
| Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||||
|                VAddr cpu_addr_, u64 size_bytes_) | ||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), | ||||
|       device{&runtime.device}, buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, | ||||
|       tracker{SizeBytes()} { | ||||
| Buffer::Buffer(BufferCacheRuntime& runtime, DAddr cpu_addr_, u64 size_bytes_) | ||||
|     : VideoCommon::BufferBase(cpu_addr_, size_bytes_), device{&runtime.device}, | ||||
|       buffer{CreateBuffer(*device, runtime.memory_allocator, SizeBytes())}, tracker{SizeBytes()} { | ||||
|     if (runtime.device.HasDebuggingToolAttached()) { | ||||
|         buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | ||||
|     } | ||||
|  |  | |||
|  | @ -23,11 +23,10 @@ struct HostVertexBinding; | |||
| 
 | ||||
| class BufferCacheRuntime; | ||||
| 
 | ||||
| class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { | ||||
| class Buffer : public VideoCommon::BufferBase { | ||||
| public: | ||||
|     explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); | ||||
|     explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||||
|                     VAddr cpu_addr_, u64 size_bytes_); | ||||
|     explicit Buffer(BufferCacheRuntime& runtime, VAddr cpu_addr_, u64 size_bytes_); | ||||
| 
 | ||||
|     [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | ||||
| 
 | ||||
|  | @ -173,7 +172,7 @@ struct BufferCacheParams { | |||
|     using Runtime = Vulkan::BufferCacheRuntime; | ||||
|     using Buffer = Vulkan::Buffer; | ||||
|     using Async_Buffer = Vulkan::StagingBufferRef; | ||||
|     using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | ||||
|     using MemoryTracker = VideoCommon::MemoryTrackerBase<Tegra::MaxwellDeviceMemoryManager>; | ||||
| 
 | ||||
|     static constexpr bool IS_OPENGL = false; | ||||
|     static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | ||||
|  |  | |||
|  | @ -30,7 +30,6 @@ | |||
| #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||
| #include "video_core/renderer_vulkan/vk_shader_util.h" | ||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||
|  | @ -299,12 +298,12 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c | |||
|     return std::memcmp(&rhs, this, Size()) == 0; | ||||
| } | ||||
| 
 | ||||
| PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_, | ||||
| PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, | ||||
|                              Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | ||||
|                              GuestDescriptorQueue& guest_descriptor_queue_, | ||||
|                              RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, | ||||
|                              TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) | ||||
|     : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_}, | ||||
|     : VideoCommon::ShaderCache{device_memory_}, device{device_}, scheduler{scheduler_}, | ||||
|       descriptor_pool{descriptor_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, | ||||
|       render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, | ||||
|       texture_cache{texture_cache_}, shader_notify{shader_notify_}, | ||||
|  |  | |||
|  | @ -26,6 +26,7 @@ | |||
| #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||||
| #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||
| #include "video_core/shader_cache.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| 
 | ||||
| namespace Core { | ||||
| class System; | ||||
|  | @ -79,7 +80,6 @@ class ComputePipeline; | |||
| class DescriptorPool; | ||||
| class Device; | ||||
| class PipelineStatistics; | ||||
| class RasterizerVulkan; | ||||
| class RenderPassCache; | ||||
| class Scheduler; | ||||
| 
 | ||||
|  | @ -99,7 +99,7 @@ struct ShaderPools { | |||
| 
 | ||||
| class PipelineCache : public VideoCommon::ShaderCache { | ||||
| public: | ||||
|     explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler, | ||||
|     explicit PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device, Scheduler& scheduler, | ||||
|                            DescriptorPool& descriptor_pool, | ||||
|                            GuestDescriptorQueue& guest_descriptor_queue, | ||||
|                            RenderPassCache& render_pass_cache, BufferCache& buffer_cache, | ||||
|  |  | |||
|  | @ -14,7 +14,9 @@ | |||
| #include "common/bit_util.h" | ||||
| #include "common/common_types.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/engines/draw_manager.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| #include "video_core/query_cache/query_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||
|  | @ -102,7 +104,7 @@ private: | |||
| using BaseStreamer = VideoCommon::SimpleStreamer<VideoCommon::HostQueryBase>; | ||||
| 
 | ||||
| struct HostSyncValues { | ||||
|     VAddr address; | ||||
|     DAddr address; | ||||
|     size_t size; | ||||
|     size_t offset; | ||||
| 
 | ||||
|  | @ -317,7 +319,7 @@ public: | |||
|         pending_sync.clear(); | ||||
|     } | ||||
| 
 | ||||
|     size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||||
|     size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, | ||||
|                         [[maybe_unused]] std::optional<u32> subreport) override { | ||||
|         PauseCounter(); | ||||
|         auto index = BuildQuery(); | ||||
|  | @ -738,7 +740,7 @@ public: | |||
|         pending_sync.clear(); | ||||
|     } | ||||
| 
 | ||||
|     size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||||
|     size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, | ||||
|                         std::optional<u32> subreport_) override { | ||||
|         auto index = BuildQuery(); | ||||
|         auto* new_query = GetQuery(index); | ||||
|  | @ -769,9 +771,9 @@ public: | |||
|         return index; | ||||
|     } | ||||
| 
 | ||||
|     std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) { | ||||
|     std::optional<std::pair<DAddr, size_t>> GetLastQueryStream(size_t stream) { | ||||
|         if (last_queries[stream] != 0) { | ||||
|             std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); | ||||
|             std::pair<DAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); | ||||
|             return result; | ||||
|         } | ||||
|         return std::nullopt; | ||||
|  | @ -974,7 +976,7 @@ private: | |||
|     size_t buffers_count{}; | ||||
|     std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; | ||||
|     std::array<VkDeviceSize, NUM_STREAMS> offsets{}; | ||||
|     std::array<VAddr, NUM_STREAMS> last_queries; | ||||
|     std::array<DAddr, NUM_STREAMS> last_queries; | ||||
|     std::array<size_t, NUM_STREAMS> last_queries_stride; | ||||
|     Maxwell3D::Regs::PrimitiveTopology out_topology; | ||||
|     u64 streams_mask; | ||||
|  | @ -987,7 +989,7 @@ public: | |||
|         : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} | ||||
| 
 | ||||
|     // Parameterized constructor
 | ||||
|     PrimitivesQueryBase(bool has_timestamp, VAddr address) | ||||
|     PrimitivesQueryBase(bool has_timestamp, DAddr address) | ||||
|         : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { | ||||
|         if (has_timestamp) { | ||||
|             flags |= VideoCommon::QueryFlagBits::HasTimestamp; | ||||
|  | @ -995,7 +997,7 @@ public: | |||
|     } | ||||
| 
 | ||||
|     u64 stride{}; | ||||
|     VAddr dependant_address{}; | ||||
|     DAddr dependant_address{}; | ||||
|     Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; | ||||
|     size_t dependant_index{}; | ||||
|     bool dependant_manage{}; | ||||
|  | @ -1005,15 +1007,15 @@ class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<Primitive | |||
| public: | ||||
|     explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, | ||||
|                                          TFBCounterStreamer& tfb_streamer_, | ||||
|                                          Core::Memory::Memory& cpu_memory_) | ||||
|                                          Tegra::MaxwellDeviceMemoryManager& device_memory_) | ||||
|         : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, | ||||
|           tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} { | ||||
|           tfb_streamer{tfb_streamer_}, device_memory{device_memory_} { | ||||
|         MakeDependent(&tfb_streamer); | ||||
|     } | ||||
| 
 | ||||
|     ~PrimitivesSucceededStreamer() = default; | ||||
| 
 | ||||
|     size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||||
|     size_t WriteCounter(DAddr address, bool has_timestamp, u32 value, | ||||
|                         std::optional<u32> subreport_) override { | ||||
|         auto index = BuildQuery(); | ||||
|         auto* new_query = GetQuery(index); | ||||
|  | @ -1063,6 +1065,8 @@ public: | |||
|                 } | ||||
|             }); | ||||
|         } | ||||
|         auto* ptr = device_memory.GetPointer<u8>(new_query->dependant_address); | ||||
|         ASSERT(ptr != nullptr); | ||||
| 
 | ||||
|         new_query->dependant_manage = must_manage_dependance; | ||||
|         pending_flush_queries.push_back(index); | ||||
|  | @ -1100,7 +1104,7 @@ public: | |||
|                 num_vertices = dependant_query->value / query->stride; | ||||
|                 tfb_streamer.Free(query->dependant_index); | ||||
|             } else { | ||||
|                 u8* pointer = cpu_memory.GetPointer(query->dependant_address); | ||||
|                 u8* pointer = device_memory.GetPointer<u8>(query->dependant_address); | ||||
|                 u32 result; | ||||
|                 std::memcpy(&result, pointer, sizeof(u32)); | ||||
|                 num_vertices = static_cast<u64>(result) / query->stride; | ||||
|  | @ -1137,7 +1141,7 @@ public: | |||
| private: | ||||
|     QueryCacheRuntime& runtime; | ||||
|     TFBCounterStreamer& tfb_streamer; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
| 
 | ||||
|     // syncing queue
 | ||||
|     std::vector<size_t> pending_sync; | ||||
|  | @ -1152,12 +1156,12 @@ private: | |||
| 
 | ||||
| struct QueryCacheRuntimeImpl { | ||||
|     QueryCacheRuntimeImpl(QueryCacheRuntime& runtime, VideoCore::RasterizerInterface* rasterizer_, | ||||
|                           Core::Memory::Memory& cpu_memory_, Vulkan::BufferCache& buffer_cache_, | ||||
|                           Tegra::MaxwellDeviceMemoryManager& device_memory_, Vulkan::BufferCache& buffer_cache_, | ||||
|                           const Device& device_, const MemoryAllocator& memory_allocator_, | ||||
|                           Scheduler& scheduler_, StagingBufferPool& staging_pool_, | ||||
|                           ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||||
|                           DescriptorPool& descriptor_pool) | ||||
|         : rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, | ||||
|         : rasterizer{rasterizer_}, device_memory{device_memory_}, | ||||
|           buffer_cache{buffer_cache_}, device{device_}, | ||||
|           memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, | ||||
|           guest_streamer(0, runtime), | ||||
|  | @ -1168,7 +1172,7 @@ struct QueryCacheRuntimeImpl { | |||
|                        scheduler, memory_allocator, staging_pool), | ||||
|           primitives_succeeded_streamer( | ||||
|               static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, | ||||
|               cpu_memory_), | ||||
|               device_memory_), | ||||
|           primitives_needed_minus_succeeded_streamer( | ||||
|               static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), | ||||
|           hcr_setup{}, hcr_is_set{}, is_hcr_running{}, maxwell3d{} { | ||||
|  | @ -1195,7 +1199,7 @@ struct QueryCacheRuntimeImpl { | |||
|     } | ||||
| 
 | ||||
|     VideoCore::RasterizerInterface* rasterizer; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
|     Vulkan::BufferCache& buffer_cache; | ||||
| 
 | ||||
|     const Device& device; | ||||
|  | @ -1210,7 +1214,7 @@ struct QueryCacheRuntimeImpl { | |||
|     PrimitivesSucceededStreamer primitives_succeeded_streamer; | ||||
|     VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_succeeded_streamer; | ||||
| 
 | ||||
|     std::vector<std::pair<VAddr, VAddr>> little_cache; | ||||
|     std::vector<std::pair<DAddr, DAddr>> little_cache; | ||||
|     std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; | ||||
|     std::vector<size_t> redirect_cache; | ||||
|     std::vector<std::vector<VkBufferCopy>> copies_setup; | ||||
|  | @ -1229,14 +1233,14 @@ struct QueryCacheRuntimeImpl { | |||
| }; | ||||
| 
 | ||||
| QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | ||||
|                                      Core::Memory::Memory& cpu_memory_, | ||||
|                                      Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||
|                                      Vulkan::BufferCache& buffer_cache_, const Device& device_, | ||||
|                                      const MemoryAllocator& memory_allocator_, | ||||
|                                      Scheduler& scheduler_, StagingBufferPool& staging_pool_, | ||||
|                                      ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||||
|                                      DescriptorPool& descriptor_pool) { | ||||
|     impl = std::make_unique<QueryCacheRuntimeImpl>( | ||||
|         *this, rasterizer, cpu_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, | ||||
|         *this, rasterizer, device_memory_, buffer_cache_, device_, memory_allocator_, scheduler_, | ||||
|         staging_pool_, compute_pass_descriptor_queue, descriptor_pool); | ||||
| } | ||||
| 
 | ||||
|  | @ -1309,7 +1313,7 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo | |||
|     ResumeHostConditionalRendering(); | ||||
| } | ||||
| 
 | ||||
| void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal) { | ||||
| void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) { | ||||
|     VkBuffer to_resolve; | ||||
|     u32 to_resolve_offset; | ||||
|     { | ||||
|  | @ -1350,11 +1354,11 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku | |||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     const auto check_in_bc = [&](VAddr address) { | ||||
|     const auto check_in_bc = [&](DAddr address) { | ||||
|         return impl->buffer_cache.IsRegionGpuModified(address, 8); | ||||
|     }; | ||||
|     const auto check_value = [&](VAddr address) { | ||||
|         u8* ptr = impl->cpu_memory.GetPointer(address); | ||||
|     const auto check_value = [&](DAddr address) { | ||||
|         u8* ptr = impl->device_memory.GetPointer<u8>(address); | ||||
|         u64 value{}; | ||||
|         std::memcpy(&value, ptr, sizeof(value)); | ||||
|         return value == 0; | ||||
|  | @ -1477,8 +1481,8 @@ void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer ba | |||
|     for (auto& sync_val : values) { | ||||
|         total_size += sync_val.size; | ||||
|         bool found = false; | ||||
|         VAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); | ||||
|         VAddr base_end = base + Core::Memory::YUZU_PAGESIZE; | ||||
|         DAddr base = Common::AlignDown(sync_val.address, Core::Memory::YUZU_PAGESIZE); | ||||
|         DAddr base_end = base + Core::Memory::YUZU_PAGESIZE; | ||||
|         for (size_t i = 0; i < impl->little_cache.size(); i++) { | ||||
|             const auto set_found = [&] { | ||||
|                 impl->redirect_cache.push_back(i); | ||||
|  |  | |||
|  | @ -27,7 +27,7 @@ struct QueryCacheRuntimeImpl; | |||
| class QueryCacheRuntime { | ||||
| public: | ||||
|     explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | ||||
|                                Core::Memory::Memory& cpu_memory_, | ||||
|                                Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||
|                                Vulkan::BufferCache& buffer_cache_, const Device& device_, | ||||
|                                const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, | ||||
|                                StagingBufferPool& staging_pool_, | ||||
|  | @ -61,7 +61,7 @@ public: | |||
| 
 | ||||
| private: | ||||
|     void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); | ||||
|     void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); | ||||
|     void HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal); | ||||
|     friend struct QueryCacheRuntimeImpl; | ||||
|     std::unique_ptr<QueryCacheRuntimeImpl> impl; | ||||
| }; | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ | |||
| #include "video_core/engines/draw_manager.h" | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| #include "video_core/renderer_vulkan/blit_image.h" | ||||
| #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||||
| #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||||
|  | @ -37,6 +38,7 @@ | |||
| #include "video_core/vulkan_common/vulkan_device.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| 
 | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
|  | @ -163,10 +165,11 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, | |||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||
|                                    Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, | ||||
|                                    const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                                    StateTracker& state_tracker_, Scheduler& scheduler_) | ||||
|     : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_}, | ||||
|                                    Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||
|                                    ScreenInfo& screen_info_, const Device& device_, | ||||
|                                    MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, | ||||
|                                    Scheduler& scheduler_) | ||||
|     : gpu{gpu_}, device_memory{device_memory_}, screen_info{screen_info_}, device{device_}, | ||||
|       memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, | ||||
|       staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | ||||
|       guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), | ||||
|  | @ -174,14 +177,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
|       texture_cache_runtime{ | ||||
|           device,     scheduler,         memory_allocator, staging_pool, | ||||
|           blit_image, render_pass_cache, descriptor_pool,  compute_pass_descriptor_queue}, | ||||
|       texture_cache(texture_cache_runtime, *this), | ||||
|       texture_cache(texture_cache_runtime, device_memory), | ||||
|       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | ||||
|                            guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), | ||||
|       buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | ||||
|       query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, | ||||
|       buffer_cache(device_memory, buffer_cache_runtime), | ||||
|       query_cache_runtime(this, device_memory, buffer_cache, device, memory_allocator, scheduler, | ||||
|                           staging_pool, compute_pass_descriptor_queue, descriptor_pool), | ||||
|       query_cache(gpu, *this, cpu_memory_, query_cache_runtime), | ||||
|       pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, | ||||
|       query_cache(gpu, *this, device_memory, query_cache_runtime), | ||||
|       pipeline_cache(device_memory, device, scheduler, descriptor_pool, guest_descriptor_queue, | ||||
|                      render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | ||||
|       accelerate_dma(buffer_cache, texture_cache, scheduler), | ||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | ||||
|  | @ -508,7 +511,7 @@ void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 in | |||
| 
 | ||||
| void RasterizerVulkan::FlushAll() {} | ||||
| 
 | ||||
| void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
| void RasterizerVulkan::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|  | @ -525,7 +528,7 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType | |||
|     } | ||||
| } | ||||
| 
 | ||||
| bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
| bool RasterizerVulkan::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
|     if ((True(which & VideoCommon::CacheType::BufferCache))) { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         if (buffer_cache.IsRegionGpuModified(addr, size)) { | ||||
|  | @ -542,7 +545,7 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheT | |||
|     return false; | ||||
| } | ||||
| 
 | ||||
| VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 size) { | ||||
| VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(DAddr addr, u64 size) { | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         auto area = texture_cache.GetFlushArea(addr, size); | ||||
|  | @ -558,7 +561,7 @@ VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 | |||
|     return new_area; | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
| void RasterizerVulkan::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) { | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|  | @ -578,7 +581,7 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | ||||
| void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) { | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         for (const auto& [addr, size] : sequences) { | ||||
|  | @ -599,7 +602,8 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s | |||
|     } | ||||
| } | ||||
| 
 | ||||
| bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | ||||
| bool RasterizerVulkan::OnCPUWrite(PAddr p_addr, u64 size) { | ||||
|     const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return false; | ||||
|     } | ||||
|  | @ -620,7 +624,8 @@ bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
|     return false; | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { | ||||
| void RasterizerVulkan::OnCacheInvalidation(PAddr p_addr, u64 size) { | ||||
|     const DAddr addr = device_memory.GetAddressFromPAddr(p_addr); | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|  | @ -640,7 +645,7 @@ void RasterizerVulkan::InvalidateGPUCache() { | |||
|     gpu.InvalidateGPUCache(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | ||||
| void RasterizerVulkan::UnmapMemory(DAddr addr, u64 size) { | ||||
|     { | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.UnmapMemory(addr, size); | ||||
|  | @ -679,7 +684,7 @@ void RasterizerVulkan::ReleaseFences(bool force) { | |||
|     fence_manager.WaitPendingFences(force); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, | ||||
| void RasterizerVulkan::FlushAndInvalidateRegion(DAddr addr, u64 size, | ||||
|                                                 VideoCommon::CacheType which) { | ||||
|     if (Settings::IsGPULevelExtreme()) { | ||||
|         FlushRegion(addr, size, which); | ||||
|  | @ -782,7 +787,7 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si | |||
| } | ||||
| 
 | ||||
| bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||
|                                          VAddr framebuffer_addr, u32 pixel_stride) { | ||||
|                                          DAddr framebuffer_addr, u32 pixel_stride) { | ||||
|     if (!framebuffer_addr) { | ||||
|         return false; | ||||
|     } | ||||
|  |  | |||
|  | @ -12,7 +12,6 @@ | |||
| #include "common/common_types.h" | ||||
| #include "video_core/control/channel_state_cache.h" | ||||
| #include "video_core/engines/maxwell_dma.h" | ||||
| #include "video_core/rasterizer_accelerated.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_vulkan/blit_image.h" | ||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||
|  | @ -25,6 +24,7 @@ | |||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| 
 | ||||
| namespace Core { | ||||
| class System; | ||||
|  | @ -34,10 +34,14 @@ namespace Core::Frontend { | |||
| class EmuWindow; | ||||
| } | ||||
| 
 | ||||
| namespace Tegra::Engines { | ||||
| namespace Tegra { | ||||
| 
 | ||||
| namespace Engines { | ||||
| class Maxwell3D; | ||||
| } | ||||
| 
 | ||||
| } // namespace Tegra
 | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| struct ScreenInfo; | ||||
|  | @ -70,13 +74,14 @@ private: | |||
|     Scheduler& scheduler; | ||||
| }; | ||||
| 
 | ||||
| class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, | ||||
| class RasterizerVulkan final : public VideoCore::RasterizerInterface, | ||||
|                                protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||
| public: | ||||
|     explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||
|                               Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, | ||||
|                               const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                               StateTracker& state_tracker_, Scheduler& scheduler_); | ||||
|                               Tegra::MaxwellDeviceMemoryManager& device_memory_, | ||||
|                               ScreenInfo& screen_info_, const Device& device_, | ||||
|                               MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, | ||||
|                               Scheduler& scheduler_); | ||||
|     ~RasterizerVulkan() override; | ||||
| 
 | ||||
|     void Draw(bool is_indexed, u32 instance_count) override; | ||||
|  | @ -90,18 +95,18 @@ public: | |||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||
|     void FlushAll() override; | ||||
|     void FlushRegion(VAddr addr, u64 size, | ||||
|     void FlushRegion(DAddr addr, u64 size, | ||||
|                      VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     bool MustFlushRegion(VAddr addr, u64 size, | ||||
|     bool MustFlushRegion(DAddr addr, u64 size, | ||||
|                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size, | ||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(DAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(DAddr addr, u64 size, | ||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; | ||||
|     void OnCacheInvalidation(VAddr addr, u64 size) override; | ||||
|     bool OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) override; | ||||
|     void OnCacheInvalidation(DAddr addr, u64 size) override; | ||||
|     bool OnCPUWrite(DAddr addr, u64 size) override; | ||||
|     void InvalidateGPUCache() override; | ||||
|     void UnmapMemory(VAddr addr, u64 size) override; | ||||
|     void UnmapMemory(DAddr addr, u64 size) override; | ||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||
|     void SignalFence(std::function<void()>&& func) override; | ||||
|     void SyncOperation(std::function<void()>&& func) override; | ||||
|  | @ -109,7 +114,7 @@ public: | |||
|     void SignalReference() override; | ||||
|     void ReleaseFences(bool force = true) override; | ||||
|     void FlushAndInvalidateRegion( | ||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|         DAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void WaitForIdle() override; | ||||
|     void FragmentBarrier() override; | ||||
|     void TiledCacheBarrier() override; | ||||
|  | @ -122,7 +127,7 @@ public: | |||
|     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||||
|     void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | ||||
|                                   std::span<const u8> memory) override; | ||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, | ||||
|                            u32 pixel_stride) override; | ||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||
|                            const VideoCore::DiskResourceLoadCallback& callback) override; | ||||
|  | @ -176,6 +181,7 @@ private: | |||
|     void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); | ||||
| 
 | ||||
|     Tegra::GPU& gpu; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
| 
 | ||||
|     ScreenInfo& screen_info; | ||||
|     const Device& device; | ||||
|  |  | |||
|  | @ -12,6 +12,7 @@ | |||
| #include "video_core/dirty_flags.h" | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/shader_cache.h" | ||||
| #include "video_core/shader_environment.h" | ||||
|  | @ -34,7 +35,7 @@ void ShaderCache::SyncGuestHost() { | |||
|     RemovePendingShaders(); | ||||
| } | ||||
| 
 | ||||
| ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} | ||||
| ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_) : device_memory{device_memory_} {} | ||||
| 
 | ||||
| bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | ||||
|     auto& dirty{maxwell3d->dirty.flags}; | ||||
|  | @ -132,7 +133,7 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t | |||
| 
 | ||||
|     storage.push_back(std::move(data)); | ||||
| 
 | ||||
|     rasterizer.UpdatePagesCachedCount(addr, size, 1); | ||||
|     device_memory.UpdatePagesCachedCount(addr, size, 1); | ||||
| } | ||||
| 
 | ||||
| void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { | ||||
|  | @ -209,7 +210,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) { | |||
| 
 | ||||
|     const VAddr addr = entry->addr_start; | ||||
|     const size_t size = entry->addr_end - addr; | ||||
|     rasterizer.UpdatePagesCachedCount(addr, size, -1); | ||||
|     device_memory.UpdatePagesCachedCount(addr, size, -1); | ||||
| } | ||||
| 
 | ||||
| void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { | ||||
|  |  | |||
|  | @ -16,6 +16,7 @@ | |||
| #include "video_core/control/channel_state_cache.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/shader_environment.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| 
 | ||||
| namespace Tegra { | ||||
| class MemoryManager; | ||||
|  | @ -77,7 +78,7 @@ protected: | |||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_); | ||||
|     explicit ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory); | ||||
| 
 | ||||
|     /// @brief Update the hashes and information of shader stages
 | ||||
|     /// @param unique_hashes Shader hashes to store into when a stage is enabled
 | ||||
|  | @ -145,7 +146,7 @@ private: | |||
|     /// @brief Create a new shader entry and register it
 | ||||
|     const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); | ||||
| 
 | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
| 
 | ||||
|     mutable std::mutex lookup_mutex; | ||||
|     std::mutex invalidation_mutex; | ||||
|  |  | |||
|  | @ -8,10 +8,11 @@ | |||
| 
 | ||||
| #include "common/alignment.h" | ||||
| #include "common/settings.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/control/channel_state.h" | ||||
| #include "video_core/dirty_flags.h" | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/guest_memory.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| #include "video_core/texture_cache/image_view_base.h" | ||||
| #include "video_core/texture_cache/samples_helper.h" | ||||
| #include "video_core/texture_cache/texture_cache_base.h" | ||||
|  | @ -27,8 +28,8 @@ using VideoCore::Surface::SurfaceType; | |||
| using namespace Common::Literals; | ||||
| 
 | ||||
| template <class P> | ||||
| TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) | ||||
|     : runtime{runtime_}, rasterizer{rasterizer_} { | ||||
| TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_) | ||||
|     : runtime{runtime_}, device_memory{device_memory_} { | ||||
|     // Configure null sampler
 | ||||
|     TSCEntry sampler_descriptor{}; | ||||
|     sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||||
|  | @ -49,19 +50,19 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
|     void(slot_samplers.insert(runtime, sampler_descriptor)); | ||||
| 
 | ||||
|     if constexpr (HAS_DEVICE_MEMORY_INFO) { | ||||
|         const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); | ||||
|         const s64 min_spacing_expected = device_memory - 1_GiB; | ||||
|         const s64 min_spacing_critical = device_memory - 512_MiB; | ||||
|         const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); | ||||
|         const s64 device_local_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); | ||||
|         const s64 min_spacing_expected = device_local_memory - 1_GiB; | ||||
|         const s64 min_spacing_critical = device_local_memory - 512_MiB; | ||||
|         const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); | ||||
|         const s64 min_vacancy_expected = (6 * mem_threshold) / 10; | ||||
|         const s64 min_vacancy_critical = (3 * mem_threshold) / 10; | ||||
|         expected_memory = static_cast<u64>( | ||||
|             std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | ||||
|             std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), | ||||
|                      DEFAULT_EXPECTED_MEMORY)); | ||||
|         critical_memory = static_cast<u64>( | ||||
|             std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | ||||
|             std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), | ||||
|                      DEFAULT_CRITICAL_MEMORY)); | ||||
|         minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2); | ||||
|         minimum_memory = static_cast<u64>((device_local_memory - mem_threshold) / 2); | ||||
|     } else { | ||||
|         expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | ||||
|         critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | ||||
|  | @ -513,7 +514,7 @@ FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { | |||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | ||||
| void TextureCache<P>::WriteMemory(DAddr cpu_addr, size_t size) { | ||||
|     ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { | ||||
|         if (True(image.flags & ImageFlagBits::CpuModified)) { | ||||
|             return; | ||||
|  | @ -526,7 +527,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | |||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | ||||
| void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) { | ||||
|     boost::container::small_vector<ImageId, 16> images; | ||||
|     ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { | ||||
|         if (!image.IsSafeDownload()) { | ||||
|  | @ -553,7 +554,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | |||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(VAddr cpu_addr, | ||||
| std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(DAddr cpu_addr, | ||||
|                                                                                u64 size) { | ||||
|     std::optional<VideoCore::RasterizerDownloadArea> area{}; | ||||
|     ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { | ||||
|  | @ -579,7 +580,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V | |||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | ||||
| void TextureCache<P>::UnmapMemory(DAddr cpu_addr, size_t size) { | ||||
|     boost::container::small_vector<ImageId, 16> deleted_images; | ||||
|     ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||||
|     for (const ImageId id : deleted_images) { | ||||
|  | @ -713,7 +714,7 @@ bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | |||
| 
 | ||||
| template <class P> | ||||
| typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( | ||||
|     const Tegra::FramebufferConfig& config, VAddr cpu_addr) { | ||||
|     const Tegra::FramebufferConfig& config, DAddr cpu_addr) { | ||||
|     // TODO: Properly implement this
 | ||||
|     const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); | ||||
|     if (it == page_table.end()) { | ||||
|  | @ -940,7 +941,7 @@ bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcep | |||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | ||||
| bool TextureCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { | ||||
|     bool is_modified = false; | ||||
|     ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { | ||||
|         if (False(image.flags & ImageFlagBits::GpuModified)) { | ||||
|  | @ -1059,7 +1060,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | |||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | ||||
|     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | ||||
|         *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); | ||||
| 
 | ||||
|     if (True(image.flags & ImageFlagBits::Converted)) { | ||||
|  | @ -1124,7 +1125,7 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a | |||
| template <class P> | ||||
| ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||||
|                                    RelaxedOptions options) { | ||||
|     std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||
|     std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||
|     if (!cpu_addr) { | ||||
|         cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | ||||
|         if (!cpu_addr) { | ||||
|  | @ -1265,7 +1266,7 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { | |||
| 
 | ||||
|     static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; | ||||
|     local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); | ||||
|     Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | ||||
|     Tegra::Memory::GpuGuestMemory<u8, Tegra::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( | ||||
|         *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); | ||||
| 
 | ||||
|     auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, | ||||
|  | @ -1339,14 +1340,14 @@ bool TextureCache<P>::ScaleDown(Image& image) { | |||
| template <class P> | ||||
| ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||||
|                                      RelaxedOptions options) { | ||||
|     std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||
|     std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||
|     if (!cpu_addr) { | ||||
|         const auto size = CalculateGuestSizeInBytes(info); | ||||
|         cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); | ||||
|         if (!cpu_addr) { | ||||
|             const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | ||||
|             const DAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | ||||
|             virtual_invalid_space += Common::AlignUp(size, 32); | ||||
|             cpu_addr = std::optional<VAddr>(fake_addr); | ||||
|             cpu_addr = std::optional<DAddr>(fake_addr); | ||||
|         } | ||||
|     } | ||||
|     ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | ||||
|  | @ -1362,7 +1363,7 @@ ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { | ||||
| ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { | ||||
|     ImageInfo new_info = info; | ||||
|     const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | ||||
|     const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||||
|  | @ -1650,7 +1651,7 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag | |||
| 
 | ||||
| template <class P> | ||||
| ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { | ||||
|     std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||
|     std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||
|     if (!cpu_addr) { | ||||
|         cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | ||||
|         if (!cpu_addr) { | ||||
|  | @ -1780,7 +1781,7 @@ ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAdd | |||
| 
 | ||||
| template <class P> | ||||
| template <typename Func> | ||||
| void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { | ||||
| void TextureCache<P>::ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func) { | ||||
|     using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||||
|     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||||
|     boost::container::small_vector<ImageId, 32> images; | ||||
|  | @ -1924,11 +1925,11 @@ void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, | |||
| template <class P> | ||||
| template <typename Func> | ||||
| void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | ||||
|     using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | ||||
|     using FuncReturn = typename std::invoke_result<Func, GPUVAddr, DAddr, size_t>::type; | ||||
|     static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; | ||||
|     const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | ||||
|     for (const auto& [gpu_addr, size] : segments) { | ||||
|         std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||
|         std::optional<DAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||
|         ASSERT(cpu_addr); | ||||
|         if constexpr (RETURNS_BOOL) { | ||||
|             if (func(gpu_addr, *cpu_addr, size)) { | ||||
|  | @ -1980,7 +1981,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
|     } | ||||
|     boost::container::small_vector<ImageViewId, 16> sparse_maps; | ||||
|     ForEachSparseSegment( | ||||
|         image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||||
|         image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) { | ||||
|             auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | ||||
|             ForEachCPUPage(cpu_addr, size, | ||||
|                            [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||||
|  | @ -2048,7 +2049,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
|     auto& sparse_maps = it->second; | ||||
|     for (auto& map_view_id : sparse_maps) { | ||||
|         const auto& map_range = slot_map_views[map_view_id]; | ||||
|         const VAddr cpu_addr = map_range.cpu_addr; | ||||
|         const DAddr cpu_addr = map_range.cpu_addr; | ||||
|         const std::size_t size = map_range.size; | ||||
|         ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { | ||||
|             const auto page_it = page_table.find(page); | ||||
|  | @ -2080,7 +2081,7 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | |||
|     ASSERT(False(image.flags & ImageFlagBits::Tracked)); | ||||
|     image.flags |= ImageFlagBits::Tracked; | ||||
|     if (False(image.flags & ImageFlagBits::Sparse)) { | ||||
|         rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | ||||
|         device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | ||||
|         return; | ||||
|     } | ||||
|     if (True(image.flags & ImageFlagBits::Registered)) { | ||||
|  | @ -2089,15 +2090,15 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | |||
|         auto& sparse_maps = it->second; | ||||
|         for (auto& map_view_id : sparse_maps) { | ||||
|             const auto& map = slot_map_views[map_view_id]; | ||||
|             const VAddr cpu_addr = map.cpu_addr; | ||||
|             const DAddr cpu_addr = map.cpu_addr; | ||||
|             const std::size_t size = map.size; | ||||
|             rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||||
|             device_memory.UpdatePagesCachedCount(cpu_addr, size, 1); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     ForEachSparseSegment(image, | ||||
|                          [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||||
|                              rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||||
|                          [this]([[maybe_unused]] GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) { | ||||
|                              device_memory.UpdatePagesCachedCount(cpu_addr, size, 1); | ||||
|                          }); | ||||
| } | ||||
| 
 | ||||
|  | @ -2106,7 +2107,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | |||
|     ASSERT(True(image.flags & ImageFlagBits::Tracked)); | ||||
|     image.flags &= ~ImageFlagBits::Tracked; | ||||
|     if (False(image.flags & ImageFlagBits::Sparse)) { | ||||
|         rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||||
|         device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||||
|         return; | ||||
|     } | ||||
|     ASSERT(True(image.flags & ImageFlagBits::Registered)); | ||||
|  | @ -2115,9 +2116,9 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | |||
|     auto& sparse_maps = it->second; | ||||
|     for (auto& map_view_id : sparse_maps) { | ||||
|         const auto& map = slot_map_views[map_view_id]; | ||||
|         const VAddr cpu_addr = map.cpu_addr; | ||||
|         const DAddr cpu_addr = map.cpu_addr; | ||||
|         const std::size_t size = map.size; | ||||
|         rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||||
|         device_memory.UpdatePagesCachedCount(cpu_addr, size, -1); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -36,9 +36,11 @@ | |||
| #include "video_core/texture_cache/types.h" | ||||
| #include "video_core/textures/texture.h" | ||||
| 
 | ||||
| namespace Tegra::Control { | ||||
| namespace Tegra { | ||||
| namespace Control { | ||||
| struct ChannelState; | ||||
| } | ||||
| } // namespace Tegra
 | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
|  | @ -126,7 +128,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI | |||
|     }; | ||||
| 
 | ||||
| public: | ||||
|     explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); | ||||
|     explicit TextureCache(Runtime&, Tegra::MaxwellDeviceMemoryManager&); | ||||
| 
 | ||||
|     /// Notify the cache that a new frame has been queued
 | ||||
|     void TickFrame(); | ||||
|  | @ -190,15 +192,15 @@ public: | |||
|     Framebuffer* GetFramebuffer(); | ||||
| 
 | ||||
|     /// Mark images in a range as modified from the CPU
 | ||||
|     void WriteMemory(VAddr cpu_addr, size_t size); | ||||
|     void WriteMemory(DAddr cpu_addr, size_t size); | ||||
| 
 | ||||
|     /// Download contents of host images to guest memory in a region
 | ||||
|     void DownloadMemory(VAddr cpu_addr, size_t size); | ||||
|     void DownloadMemory(DAddr cpu_addr, size_t size); | ||||
| 
 | ||||
|     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | ||||
|     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(DAddr cpu_addr, u64 size); | ||||
| 
 | ||||
|     /// Remove images in a region
 | ||||
|     void UnmapMemory(VAddr cpu_addr, size_t size); | ||||
|     void UnmapMemory(DAddr cpu_addr, size_t size); | ||||
| 
 | ||||
|     /// Remove images in a region
 | ||||
|     void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); | ||||
|  | @ -210,7 +212,7 @@ public: | |||
| 
 | ||||
|     /// Try to find a cached image view in the given CPU address
 | ||||
|     [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, | ||||
|                                                          VAddr cpu_addr); | ||||
|                                                          DAddr cpu_addr); | ||||
| 
 | ||||
|     /// Return true when there are uncommitted images to be downloaded
 | ||||
|     [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | ||||
|  | @ -235,7 +237,7 @@ public: | |||
|                                  GPUVAddr address = 0, size_t size = 0); | ||||
| 
 | ||||
|     /// Return true when a CPU region is modified from the GPU
 | ||||
|     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||||
|     [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size); | ||||
| 
 | ||||
|     [[nodiscard]] bool IsRescaling() const noexcept; | ||||
| 
 | ||||
|  | @ -252,7 +254,7 @@ public: | |||
| private: | ||||
|     /// Iterate over all page indices in a range
 | ||||
|     template <typename Func> | ||||
|     static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { | ||||
|     static void ForEachCPUPage(DAddr addr, size_t size, Func&& func) { | ||||
|         static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | ||||
|         const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; | ||||
|         for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { | ||||
|  | @ -326,7 +328,7 @@ private: | |||
| 
 | ||||
|     /// Create a new image and join perfectly matching existing images
 | ||||
|     /// Remove joined images from the cache
 | ||||
|     [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | ||||
|     [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr); | ||||
| 
 | ||||
|     [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); | ||||
| 
 | ||||
|  | @ -349,7 +351,7 @@ private: | |||
| 
 | ||||
|     /// Iterates over all the images in a region calling func
 | ||||
|     template <typename Func> | ||||
|     void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | ||||
|     void ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func); | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); | ||||
|  | @ -421,7 +423,7 @@ private: | |||
| 
 | ||||
|     Runtime& runtime; | ||||
| 
 | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     Tegra::MaxwellDeviceMemoryManager& device_memory; | ||||
|     std::deque<TextureCacheGPUMap> gpu_page_table_storage; | ||||
| 
 | ||||
|     RenderTargets render_targets; | ||||
|  | @ -432,7 +434,7 @@ private: | |||
|     std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; | ||||
|     std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; | ||||
| 
 | ||||
|     VAddr virtual_invalid_space{}; | ||||
|     DAddr virtual_invalid_space{}; | ||||
| 
 | ||||
|     bool has_deleted_images = false; | ||||
|     bool is_rescaling = false; | ||||
|  |  | |||
|  | @ -23,6 +23,7 @@ | |||
| #include "core/memory.h" | ||||
| #include "video_core/compatible_formats.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/guest_memory.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/surface.h" | ||||
| #include "video_core/texture_cache/decode_bc.h" | ||||
|  | @ -552,7 +553,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
|     for (s32 layer = 0; layer < info.resources.layers; ++layer) { | ||||
|         const std::span<const u8> src = input.subspan(host_offset); | ||||
|         { | ||||
|             Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> | ||||
|             Tegra::Memory::GpuGuestMemoryScoped<u8, | ||||
|                                                 Tegra::Memory::GuestMemoryFlags::UnsafeReadWrite> | ||||
|                 dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); | ||||
| 
 | ||||
|             SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | ||||
|  |  | |||
|  | @ -6,6 +6,8 @@ | |||
| #include "common/logging/log.h" | ||||
| #include "common/settings.h" | ||||
| #include "core/core.h" | ||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | ||||
| #include "video_core/host1x/host1x.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/renderer_null/renderer_null.h" | ||||
| #include "video_core/renderer_opengl/renderer_opengl.h" | ||||
|  | @ -18,18 +20,17 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( | |||
|     Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | ||||
|     std::unique_ptr<Core::Frontend::GraphicsContext> context) { | ||||
|     auto& telemetry_session = system.TelemetrySession(); | ||||
|     auto& cpu_memory = system.ApplicationMemory(); | ||||
|     auto& device_memory = system.Host1x().MemoryManager(); | ||||
| 
 | ||||
|     switch (Settings::values.renderer_backend.GetValue()) { | ||||
|     case Settings::RendererBackend::OpenGL: | ||||
|         return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory, | ||||
|                                                         gpu, std::move(context)); | ||||
|         return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, | ||||
|                                                         device_memory, gpu, std::move(context)); | ||||
|     case Settings::RendererBackend::Vulkan: | ||||
|         return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory, | ||||
|                                                         gpu, std::move(context)); | ||||
|         return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, | ||||
|                                                         device_memory, gpu, std::move(context)); | ||||
|     case Settings::RendererBackend::Null: | ||||
|         return std::make_unique<Null::RendererNull>(emu_window, cpu_memory, gpu, | ||||
|                                                     std::move(context)); | ||||
|         return std::make_unique<Null::RendererNull>(emu_window, gpu, std::move(context)); | ||||
|     default: | ||||
|         return nullptr; | ||||
|     } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow