forked from eden-emu/eden
		
	Merge pull request #3610 from FernandoS27/gpu-caches
Refactor all the GPU Caches to use VAddr for cache addressing
This commit is contained in:
		
						commit
						dcd672b1a6
					
				
					 32 changed files with 573 additions and 430 deletions
				
			
		|  | @ -242,7 +242,52 @@ struct Memory::Impl { | |||
|             } | ||||
|             case Common::PageType::RasterizerCachedMemory: { | ||||
|                 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | ||||
|                 system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); | ||||
|                 system.GPU().FlushRegion(current_vaddr, copy_amount); | ||||
|                 std::memcpy(dest_buffer, host_ptr, copy_amount); | ||||
|                 break; | ||||
|             } | ||||
|             default: | ||||
|                 UNREACHABLE(); | ||||
|             } | ||||
| 
 | ||||
|             page_index++; | ||||
|             page_offset = 0; | ||||
|             dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||||
|             remaining_size -= copy_amount; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer, | ||||
|                          const std::size_t size) { | ||||
|         const auto& page_table = process.VMManager().page_table; | ||||
| 
 | ||||
|         std::size_t remaining_size = size; | ||||
|         std::size_t page_index = src_addr >> PAGE_BITS; | ||||
|         std::size_t page_offset = src_addr & PAGE_MASK; | ||||
| 
 | ||||
|         while (remaining_size > 0) { | ||||
|             const std::size_t copy_amount = | ||||
|                 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | ||||
|             const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | ||||
| 
 | ||||
|             switch (page_table.attributes[page_index]) { | ||||
|             case Common::PageType::Unmapped: { | ||||
|                 LOG_ERROR(HW_Memory, | ||||
|                           "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||||
|                           current_vaddr, src_addr, size); | ||||
|                 std::memset(dest_buffer, 0, copy_amount); | ||||
|                 break; | ||||
|             } | ||||
|             case Common::PageType::Memory: { | ||||
|                 DEBUG_ASSERT(page_table.pointers[page_index]); | ||||
| 
 | ||||
|                 const u8* const src_ptr = | ||||
|                     page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||||
|                 std::memcpy(dest_buffer, src_ptr, copy_amount); | ||||
|                 break; | ||||
|             } | ||||
|             case Common::PageType::RasterizerCachedMemory: { | ||||
|                 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | ||||
|                 std::memcpy(dest_buffer, host_ptr, copy_amount); | ||||
|                 break; | ||||
|             } | ||||
|  | @ -261,6 +306,10 @@ struct Memory::Impl { | |||
|         ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); | ||||
|     } | ||||
| 
 | ||||
|     void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { | ||||
|         ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size); | ||||
|     } | ||||
| 
 | ||||
|     void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, | ||||
|                     const std::size_t size) { | ||||
|         const auto& page_table = process.VMManager().page_table; | ||||
|  | @ -290,7 +339,50 @@ struct Memory::Impl { | |||
|             } | ||||
|             case Common::PageType::RasterizerCachedMemory: { | ||||
|                 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | ||||
|                 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); | ||||
|                 system.GPU().InvalidateRegion(current_vaddr, copy_amount); | ||||
|                 std::memcpy(host_ptr, src_buffer, copy_amount); | ||||
|                 break; | ||||
|             } | ||||
|             default: | ||||
|                 UNREACHABLE(); | ||||
|             } | ||||
| 
 | ||||
|             page_index++; | ||||
|             page_offset = 0; | ||||
|             src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||||
|             remaining_size -= copy_amount; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr, | ||||
|                           const void* src_buffer, const std::size_t size) { | ||||
|         const auto& page_table = process.VMManager().page_table; | ||||
|         std::size_t remaining_size = size; | ||||
|         std::size_t page_index = dest_addr >> PAGE_BITS; | ||||
|         std::size_t page_offset = dest_addr & PAGE_MASK; | ||||
| 
 | ||||
|         while (remaining_size > 0) { | ||||
|             const std::size_t copy_amount = | ||||
|                 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | ||||
|             const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | ||||
| 
 | ||||
|             switch (page_table.attributes[page_index]) { | ||||
|             case Common::PageType::Unmapped: { | ||||
|                 LOG_ERROR(HW_Memory, | ||||
|                           "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||||
|                           current_vaddr, dest_addr, size); | ||||
|                 break; | ||||
|             } | ||||
|             case Common::PageType::Memory: { | ||||
|                 DEBUG_ASSERT(page_table.pointers[page_index]); | ||||
| 
 | ||||
|                 u8* const dest_ptr = | ||||
|                     page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||||
|                 std::memcpy(dest_ptr, src_buffer, copy_amount); | ||||
|                 break; | ||||
|             } | ||||
|             case Common::PageType::RasterizerCachedMemory: { | ||||
|                 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | ||||
|                 std::memcpy(host_ptr, src_buffer, copy_amount); | ||||
|                 break; | ||||
|             } | ||||
|  | @ -309,6 +401,10 @@ struct Memory::Impl { | |||
|         WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); | ||||
|     } | ||||
| 
 | ||||
|     void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) { | ||||
|         WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size); | ||||
|     } | ||||
| 
 | ||||
|     void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { | ||||
|         const auto& page_table = process.VMManager().page_table; | ||||
|         std::size_t remaining_size = size; | ||||
|  | @ -337,7 +433,7 @@ struct Memory::Impl { | |||
|             } | ||||
|             case Common::PageType::RasterizerCachedMemory: { | ||||
|                 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | ||||
|                 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); | ||||
|                 system.GPU().InvalidateRegion(current_vaddr, copy_amount); | ||||
|                 std::memset(host_ptr, 0, copy_amount); | ||||
|                 break; | ||||
|             } | ||||
|  | @ -384,7 +480,7 @@ struct Memory::Impl { | |||
|             } | ||||
|             case Common::PageType::RasterizerCachedMemory: { | ||||
|                 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | ||||
|                 system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); | ||||
|                 system.GPU().FlushRegion(current_vaddr, copy_amount); | ||||
|                 WriteBlock(process, dest_addr, host_ptr, copy_amount); | ||||
|                 break; | ||||
|             } | ||||
|  | @ -545,7 +641,7 @@ struct Memory::Impl { | |||
|             break; | ||||
|         case Common::PageType::RasterizerCachedMemory: { | ||||
|             const u8* const host_ptr = GetPointerFromVMA(vaddr); | ||||
|             system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T)); | ||||
|             system.GPU().FlushRegion(vaddr, sizeof(T)); | ||||
|             T value; | ||||
|             std::memcpy(&value, host_ptr, sizeof(T)); | ||||
|             return value; | ||||
|  | @ -587,7 +683,7 @@ struct Memory::Impl { | |||
|             break; | ||||
|         case Common::PageType::RasterizerCachedMemory: { | ||||
|             u8* const host_ptr{GetPointerFromVMA(vaddr)}; | ||||
|             system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); | ||||
|             system.GPU().InvalidateRegion(vaddr, sizeof(T)); | ||||
|             std::memcpy(host_ptr, &data, sizeof(T)); | ||||
|             break; | ||||
|         } | ||||
|  | @ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_ | |||
|     impl->ReadBlock(src_addr, dest_buffer, size); | ||||
| } | ||||
| 
 | ||||
| void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, | ||||
|                              void* dest_buffer, const std::size_t size) { | ||||
|     impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size); | ||||
| } | ||||
| 
 | ||||
| void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { | ||||
|     impl->ReadBlockUnsafe(src_addr, dest_buffer, size); | ||||
| } | ||||
| 
 | ||||
| void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, | ||||
|                         std::size_t size) { | ||||
|     impl->WriteBlock(process, dest_addr, src_buffer, size); | ||||
|  | @ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std | |||
|     impl->WriteBlock(dest_addr, src_buffer, size); | ||||
| } | ||||
| 
 | ||||
| void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, | ||||
|                               const void* src_buffer, std::size_t size) { | ||||
|     impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size); | ||||
| } | ||||
| 
 | ||||
| void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, | ||||
|                               const std::size_t size) { | ||||
|     impl->WriteBlockUnsafe(dest_addr, src_buffer, size); | ||||
| } | ||||
| 
 | ||||
| void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { | ||||
|     impl->ZeroBlock(process, dest_addr, size); | ||||
| } | ||||
|  |  | |||
|  | @ -294,6 +294,27 @@ public: | |||
|     void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, | ||||
|                    std::size_t size); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Reads a contiguous block of bytes from a specified process' address space. | ||||
|      * This unsafe version does not trigger GPU flushing. | ||||
|      * | ||||
|      * @param process     The process to read the data from. | ||||
|      * @param src_addr    The virtual address to begin reading from. | ||||
|      * @param dest_buffer The buffer to place the read bytes into. | ||||
|      * @param size        The amount of data to read, in bytes. | ||||
|      * | ||||
|      * @note If a size of 0 is specified, then this function reads nothing and | ||||
|      *       no attempts to access memory are made at all. | ||||
|      * | ||||
|      * @pre dest_buffer must be at least size bytes in length, otherwise a | ||||
|      *      buffer overrun will occur. | ||||
|      * | ||||
|      * @post The range [dest_buffer, size) contains the read bytes from the | ||||
|      *       process' address space. | ||||
|      */ | ||||
|     void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, | ||||
|                          std::size_t size); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Reads a contiguous block of bytes from the current process' address space. | ||||
|      * | ||||
|  | @ -312,6 +333,25 @@ public: | |||
|      */ | ||||
|     void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Reads a contiguous block of bytes from the current process' address space. | ||||
|      * This unsafe version does not trigger GPU flushing. | ||||
|      * | ||||
|      * @param src_addr    The virtual address to begin reading from. | ||||
|      * @param dest_buffer The buffer to place the read bytes into. | ||||
|      * @param size        The amount of data to read, in bytes. | ||||
|      * | ||||
|      * @note If a size of 0 is specified, then this function reads nothing and | ||||
|      *       no attempts to access memory are made at all. | ||||
|      * | ||||
|      * @pre dest_buffer must be at least size bytes in length, otherwise a | ||||
|      *      buffer overrun will occur. | ||||
|      * | ||||
|      * @post The range [dest_buffer, size) contains the read bytes from the | ||||
|      *       current process' address space. | ||||
|      */ | ||||
|     void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Writes a range of bytes into a given process' address space at the specified | ||||
|      * virtual address. | ||||
|  | @ -335,6 +375,26 @@ public: | |||
|     void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, | ||||
|                     std::size_t size); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Writes a range of bytes into a given process' address space at the specified | ||||
|      * virtual address. | ||||
|      * This unsafe version does not invalidate GPU Memory. | ||||
|      * | ||||
|      * @param process    The process to write data into the address space of. | ||||
|      * @param dest_addr  The destination virtual address to begin writing the data at. | ||||
|      * @param src_buffer The data to write into the process' address space. | ||||
|      * @param size       The size of the data to write, in bytes. | ||||
|      * | ||||
|      * @post The address range [dest_addr, size) in the process' address space | ||||
|      *       contains the data that was within src_buffer. | ||||
|      * | ||||
|      * @post If an attempt is made to write into an unmapped region of memory, the writes | ||||
|      *       will be ignored and an error will be logged. | ||||
|      * | ||||
|      */ | ||||
|     void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, | ||||
|                           std::size_t size); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Writes a range of bytes into the current process' address space at the specified | ||||
|      * virtual address. | ||||
|  | @ -356,6 +416,24 @@ public: | |||
|      */ | ||||
|     void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Writes a range of bytes into the current process' address space at the specified | ||||
|      * virtual address. | ||||
|      * This unsafe version does not invalidate GPU Memory. | ||||
|      * | ||||
|      * @param dest_addr  The destination virtual address to begin writing the data at. | ||||
|      * @param src_buffer The data to write into the current process' address space. | ||||
|      * @param size       The size of the data to write, in bytes. | ||||
|      * | ||||
|      * @post The address range [dest_addr, size) in the current process' address space | ||||
|      *       contains the data that was within src_buffer. | ||||
|      * | ||||
|      * @post If an attempt is made to write into an unmapped region of memory, the writes | ||||
|      *       will be ignored and an error will be logged. | ||||
|      * | ||||
|      */ | ||||
|     void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size); | ||||
| 
 | ||||
|     /**
 | ||||
|      * Fills the specified address range within a process' address space with zeroes. | ||||
|      * | ||||
|  |  | |||
|  | @ -15,37 +15,29 @@ namespace VideoCommon { | |||
| 
 | ||||
| class BufferBlock { | ||||
| public: | ||||
|     bool Overlaps(const CacheAddr start, const CacheAddr end) const { | ||||
|         return (cache_addr < end) && (cache_addr_end > start); | ||||
|     bool Overlaps(const VAddr start, const VAddr end) const { | ||||
|         return (cpu_addr < end) && (cpu_addr_end > start); | ||||
|     } | ||||
| 
 | ||||
|     bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | ||||
|         return cache_addr <= other_start && other_end <= cache_addr_end; | ||||
|     bool IsInside(const VAddr other_start, const VAddr other_end) const { | ||||
|         return cpu_addr <= other_start && other_end <= cpu_addr_end; | ||||
|     } | ||||
| 
 | ||||
|     u8* GetWritableHostPtr() const { | ||||
|         return FromCacheAddr(cache_addr); | ||||
|     std::size_t GetOffset(const VAddr in_addr) { | ||||
|         return static_cast<std::size_t>(in_addr - cpu_addr); | ||||
|     } | ||||
| 
 | ||||
|     u8* GetWritableHostPtr(std::size_t offset) const { | ||||
|         return FromCacheAddr(cache_addr + offset); | ||||
|     VAddr GetCpuAddr() const { | ||||
|         return cpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     std::size_t GetOffset(const CacheAddr in_addr) { | ||||
|         return static_cast<std::size_t>(in_addr - cache_addr); | ||||
|     VAddr GetCpuAddrEnd() const { | ||||
|         return cpu_addr_end; | ||||
|     } | ||||
| 
 | ||||
|     CacheAddr GetCacheAddr() const { | ||||
|         return cache_addr; | ||||
|     } | ||||
| 
 | ||||
|     CacheAddr GetCacheAddrEnd() const { | ||||
|         return cache_addr_end; | ||||
|     } | ||||
| 
 | ||||
|     void SetCacheAddr(const CacheAddr new_addr) { | ||||
|         cache_addr = new_addr; | ||||
|         cache_addr_end = new_addr + size; | ||||
|     void SetCpuAddr(const VAddr new_addr) { | ||||
|         cpu_addr = new_addr; | ||||
|         cpu_addr_end = new_addr + size; | ||||
|     } | ||||
| 
 | ||||
|     std::size_t GetSize() const { | ||||
|  | @ -61,14 +53,14 @@ public: | |||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { | ||||
|         SetCacheAddr(cache_addr); | ||||
|     explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { | ||||
|         SetCpuAddr(cpu_addr); | ||||
|     } | ||||
|     ~BufferBlock() = default; | ||||
| 
 | ||||
| private: | ||||
|     CacheAddr cache_addr{}; | ||||
|     CacheAddr cache_addr_end{}; | ||||
|     VAddr cpu_addr{}; | ||||
|     VAddr cpu_addr_end{}; | ||||
|     std::size_t size{}; | ||||
|     u64 epoch{}; | ||||
| }; | ||||
|  |  | |||
|  | @ -19,6 +19,7 @@ | |||
| #include "common/alignment.h" | ||||
| #include "common/common_types.h" | ||||
| #include "core/core.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/buffer_cache/buffer_block.h" | ||||
| #include "video_core/buffer_cache/map_interval.h" | ||||
| #include "video_core/memory_manager.h" | ||||
|  | @ -37,28 +38,45 @@ public: | |||
|                             bool is_written = false, bool use_fast_cbuf = false) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         auto& memory_manager = system.GPU().MemoryManager(); | ||||
|         const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||||
|         if (!host_ptr) { | ||||
|         const std::optional<VAddr> cpu_addr_opt = | ||||
|             system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||||
| 
 | ||||
|         if (!cpu_addr_opt) { | ||||
|             return {GetEmptyBuffer(size), 0}; | ||||
|         } | ||||
|         const auto cache_addr = ToCacheAddr(host_ptr); | ||||
| 
 | ||||
|         VAddr cpu_addr = *cpu_addr_opt; | ||||
| 
 | ||||
|         // Cache management is a big overhead, so only cache entries with a given size.
 | ||||
|         // TODO: Figure out which size is the best for given games.
 | ||||
|         constexpr std::size_t max_stream_size = 0x800; | ||||
|         if (use_fast_cbuf || size < max_stream_size) { | ||||
|             if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { | ||||
|             if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { | ||||
|                 auto& memory_manager = system.GPU().MemoryManager(); | ||||
|                 if (use_fast_cbuf) { | ||||
|                     return ConstBufferUpload(host_ptr, size); | ||||
|                     if (memory_manager.IsGranularRange(gpu_addr, size)) { | ||||
|                         const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||||
|                         return ConstBufferUpload(host_ptr, size); | ||||
|                     } else { | ||||
|                         staging_buffer.resize(size); | ||||
|                         memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||||
|                         return ConstBufferUpload(staging_buffer.data(), size); | ||||
|                     } | ||||
|                 } else { | ||||
|                     return StreamBufferUpload(host_ptr, size, alignment); | ||||
|                     if (memory_manager.IsGranularRange(gpu_addr, size)) { | ||||
|                         const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||||
|                         return StreamBufferUpload(host_ptr, size, alignment); | ||||
|                     } else { | ||||
|                         staging_buffer.resize(size); | ||||
|                         memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||||
|                         return StreamBufferUpload(staging_buffer.data(), size, alignment); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         auto block = GetBlock(cache_addr, size); | ||||
|         auto map = MapAddress(block, gpu_addr, cache_addr, size); | ||||
|         auto block = GetBlock(cpu_addr, size); | ||||
|         auto map = MapAddress(block, gpu_addr, cpu_addr, size); | ||||
|         if (is_written) { | ||||
|             map->MarkAsModified(true, GetModifiedTicks()); | ||||
|             if (!map->IsWritten()) { | ||||
|  | @ -71,7 +89,7 @@ public: | |||
|             } | ||||
|         } | ||||
| 
 | ||||
|         const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); | ||||
|         const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr)); | ||||
| 
 | ||||
|         return {ToHandle(block), offset}; | ||||
|     } | ||||
|  | @ -112,7 +130,7 @@ public: | |||
|     } | ||||
| 
 | ||||
|     /// Write any cached resources overlapping the specified region back to memory
 | ||||
|     void FlushRegion(CacheAddr addr, std::size_t size) { | ||||
|     void FlushRegion(VAddr addr, std::size_t size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||||
|  | @ -127,7 +145,7 @@ public: | |||
|     } | ||||
| 
 | ||||
|     /// Mark the specified region as being invalidated
 | ||||
|     void InvalidateRegion(CacheAddr addr, u64 size) { | ||||
|     void InvalidateRegion(VAddr addr, u64 size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||||
|  | @ -152,7 +170,7 @@ protected: | |||
| 
 | ||||
|     virtual void WriteBarrier() = 0; | ||||
| 
 | ||||
|     virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; | ||||
|     virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; | ||||
| 
 | ||||
|     virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, | ||||
|                                  const u8* data) = 0; | ||||
|  | @ -169,20 +187,17 @@ protected: | |||
| 
 | ||||
|     /// Register an object into the cache
 | ||||
|     void Register(const MapInterval& new_map, bool inherit_written = false) { | ||||
|         const CacheAddr cache_ptr = new_map->GetStart(); | ||||
|         const std::optional<VAddr> cpu_addr = | ||||
|             system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); | ||||
|         if (!cache_ptr || !cpu_addr) { | ||||
|         const VAddr cpu_addr = new_map->GetStart(); | ||||
|         if (!cpu_addr) { | ||||
|             LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", | ||||
|                          new_map->GetGpuAddress()); | ||||
|             return; | ||||
|         } | ||||
|         const std::size_t size = new_map->GetEnd() - new_map->GetStart(); | ||||
|         new_map->SetCpuAddress(*cpu_addr); | ||||
|         new_map->MarkAsRegistered(true); | ||||
|         const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | ||||
|         mapped_addresses.insert({interval, new_map}); | ||||
|         rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | ||||
|         rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||||
|         if (inherit_written) { | ||||
|             MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | ||||
|             new_map->MarkAsWritten(true); | ||||
|  | @ -192,7 +207,7 @@ protected: | |||
|     /// Unregisters an object from the cache
 | ||||
|     void Unregister(MapInterval& map) { | ||||
|         const std::size_t size = map->GetEnd() - map->GetStart(); | ||||
|         rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); | ||||
|         rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); | ||||
|         map->MarkAsRegistered(false); | ||||
|         if (map->IsWritten()) { | ||||
|             UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | ||||
|  | @ -202,32 +217,39 @@ protected: | |||
|     } | ||||
| 
 | ||||
| private: | ||||
|     MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { | ||||
|     MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { | ||||
|         return std::make_shared<MapIntervalBase>(start, end, gpu_addr); | ||||
|     } | ||||
| 
 | ||||
|     MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, | ||||
|                            const CacheAddr cache_addr, const std::size_t size) { | ||||
|     MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, | ||||
|                            const std::size_t size) { | ||||
| 
 | ||||
|         std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); | ||||
|         std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); | ||||
|         if (overlaps.empty()) { | ||||
|             const CacheAddr cache_addr_end = cache_addr + size; | ||||
|             MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); | ||||
|             u8* host_ptr = FromCacheAddr(cache_addr); | ||||
|             UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); | ||||
|             auto& memory_manager = system.GPU().MemoryManager(); | ||||
|             const VAddr cpu_addr_end = cpu_addr + size; | ||||
|             MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); | ||||
|             if (memory_manager.IsGranularRange(gpu_addr, size)) { | ||||
|                 u8* host_ptr = memory_manager.GetPointer(gpu_addr); | ||||
|                 UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); | ||||
|             } else { | ||||
|                 staging_buffer.resize(size); | ||||
|                 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||||
|                 UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); | ||||
|             } | ||||
|             Register(new_map); | ||||
|             return new_map; | ||||
|         } | ||||
| 
 | ||||
|         const CacheAddr cache_addr_end = cache_addr + size; | ||||
|         const VAddr cpu_addr_end = cpu_addr + size; | ||||
|         if (overlaps.size() == 1) { | ||||
|             MapInterval& current_map = overlaps[0]; | ||||
|             if (current_map->IsInside(cache_addr, cache_addr_end)) { | ||||
|             if (current_map->IsInside(cpu_addr, cpu_addr_end)) { | ||||
|                 return current_map; | ||||
|             } | ||||
|         } | ||||
|         CacheAddr new_start = cache_addr; | ||||
|         CacheAddr new_end = cache_addr_end; | ||||
|         VAddr new_start = cpu_addr; | ||||
|         VAddr new_end = cpu_addr_end; | ||||
|         bool write_inheritance = false; | ||||
|         bool modified_inheritance = false; | ||||
|         // Calculate new buffer parameters
 | ||||
|  | @ -237,7 +259,7 @@ private: | |||
|             write_inheritance |= overlap->IsWritten(); | ||||
|             modified_inheritance |= overlap->IsModified(); | ||||
|         } | ||||
|         GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; | ||||
|         GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; | ||||
|         for (auto& overlap : overlaps) { | ||||
|             Unregister(overlap); | ||||
|         } | ||||
|  | @ -250,7 +272,7 @@ private: | |||
|         return new_map; | ||||
|     } | ||||
| 
 | ||||
|     void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, | ||||
|     void UpdateBlock(const TBuffer& block, VAddr start, VAddr end, | ||||
|                      std::vector<MapInterval>& overlaps) { | ||||
|         const IntervalType base_interval{start, end}; | ||||
|         IntervalSet interval_set{}; | ||||
|  | @ -262,13 +284,15 @@ private: | |||
|         for (auto& interval : interval_set) { | ||||
|             std::size_t size = interval.upper() - interval.lower(); | ||||
|             if (size > 0) { | ||||
|                 u8* host_ptr = FromCacheAddr(interval.lower()); | ||||
|                 UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); | ||||
|                 staging_buffer.resize(size); | ||||
|                 system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); | ||||
|                 UploadBlockData(block, block->GetOffset(interval.lower()), size, | ||||
|                                 staging_buffer.data()); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { | ||||
|     std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { | ||||
|         if (size == 0) { | ||||
|             return {}; | ||||
|         } | ||||
|  | @ -290,8 +314,9 @@ private: | |||
|     void FlushMap(MapInterval map) { | ||||
|         std::size_t size = map->GetEnd() - map->GetStart(); | ||||
|         TBuffer block = blocks[map->GetStart() >> block_page_bits]; | ||||
|         u8* host_ptr = FromCacheAddr(map->GetStart()); | ||||
|         DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); | ||||
|         staging_buffer.resize(size); | ||||
|         DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); | ||||
|         system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); | ||||
|         map->MarkAsModified(false, 0); | ||||
|     } | ||||
| 
 | ||||
|  | @ -316,14 +341,14 @@ private: | |||
|     TBuffer EnlargeBlock(TBuffer buffer) { | ||||
|         const std::size_t old_size = buffer->GetSize(); | ||||
|         const std::size_t new_size = old_size + block_page_size; | ||||
|         const CacheAddr cache_addr = buffer->GetCacheAddr(); | ||||
|         TBuffer new_buffer = CreateBlock(cache_addr, new_size); | ||||
|         const VAddr cpu_addr = buffer->GetCpuAddr(); | ||||
|         TBuffer new_buffer = CreateBlock(cpu_addr, new_size); | ||||
|         CopyBlock(buffer, new_buffer, 0, 0, old_size); | ||||
|         buffer->SetEpoch(epoch); | ||||
|         pending_destruction.push_back(buffer); | ||||
|         const CacheAddr cache_addr_end = cache_addr + new_size - 1; | ||||
|         u64 page_start = cache_addr >> block_page_bits; | ||||
|         const u64 page_end = cache_addr_end >> block_page_bits; | ||||
|         const VAddr cpu_addr_end = cpu_addr + new_size - 1; | ||||
|         u64 page_start = cpu_addr >> block_page_bits; | ||||
|         const u64 page_end = cpu_addr_end >> block_page_bits; | ||||
|         while (page_start <= page_end) { | ||||
|             blocks[page_start] = new_buffer; | ||||
|             ++page_start; | ||||
|  | @ -334,9 +359,9 @@ private: | |||
|     TBuffer MergeBlocks(TBuffer first, TBuffer second) { | ||||
|         const std::size_t size_1 = first->GetSize(); | ||||
|         const std::size_t size_2 = second->GetSize(); | ||||
|         const CacheAddr first_addr = first->GetCacheAddr(); | ||||
|         const CacheAddr second_addr = second->GetCacheAddr(); | ||||
|         const CacheAddr new_addr = std::min(first_addr, second_addr); | ||||
|         const VAddr first_addr = first->GetCpuAddr(); | ||||
|         const VAddr second_addr = second->GetCpuAddr(); | ||||
|         const VAddr new_addr = std::min(first_addr, second_addr); | ||||
|         const std::size_t new_size = size_1 + size_2; | ||||
|         TBuffer new_buffer = CreateBlock(new_addr, new_size); | ||||
|         CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); | ||||
|  | @ -345,9 +370,9 @@ private: | |||
|         second->SetEpoch(epoch); | ||||
|         pending_destruction.push_back(first); | ||||
|         pending_destruction.push_back(second); | ||||
|         const CacheAddr cache_addr_end = new_addr + new_size - 1; | ||||
|         const VAddr cpu_addr_end = new_addr + new_size - 1; | ||||
|         u64 page_start = new_addr >> block_page_bits; | ||||
|         const u64 page_end = cache_addr_end >> block_page_bits; | ||||
|         const u64 page_end = cpu_addr_end >> block_page_bits; | ||||
|         while (page_start <= page_end) { | ||||
|             blocks[page_start] = new_buffer; | ||||
|             ++page_start; | ||||
|  | @ -355,18 +380,18 @@ private: | |||
|         return new_buffer; | ||||
|     } | ||||
| 
 | ||||
|     TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { | ||||
|     TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { | ||||
|         TBuffer found{}; | ||||
|         const CacheAddr cache_addr_end = cache_addr + size - 1; | ||||
|         u64 page_start = cache_addr >> block_page_bits; | ||||
|         const u64 page_end = cache_addr_end >> block_page_bits; | ||||
|         const VAddr cpu_addr_end = cpu_addr + size - 1; | ||||
|         u64 page_start = cpu_addr >> block_page_bits; | ||||
|         const u64 page_end = cpu_addr_end >> block_page_bits; | ||||
|         while (page_start <= page_end) { | ||||
|             auto it = blocks.find(page_start); | ||||
|             if (it == blocks.end()) { | ||||
|                 if (found) { | ||||
|                     found = EnlargeBlock(found); | ||||
|                 } else { | ||||
|                     const CacheAddr start_addr = (page_start << block_page_bits); | ||||
|                     const VAddr start_addr = (page_start << block_page_bits); | ||||
|                     found = CreateBlock(start_addr, block_page_size); | ||||
|                     blocks[page_start] = found; | ||||
|                 } | ||||
|  | @ -386,7 +411,7 @@ private: | |||
|         return found; | ||||
|     } | ||||
| 
 | ||||
|     void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | ||||
|     void MarkRegionAsWritten(const VAddr start, const VAddr end) { | ||||
|         u64 page_start = start >> write_page_bit; | ||||
|         const u64 page_end = end >> write_page_bit; | ||||
|         while (page_start <= page_end) { | ||||
|  | @ -400,7 +425,7 @@ private: | |||
|         } | ||||
|     } | ||||
| 
 | ||||
|     void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | ||||
|     void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { | ||||
|         u64 page_start = start >> write_page_bit; | ||||
|         const u64 page_end = end >> write_page_bit; | ||||
|         while (page_start <= page_end) { | ||||
|  | @ -416,7 +441,7 @@ private: | |||
|         } | ||||
|     } | ||||
| 
 | ||||
|     bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { | ||||
|     bool IsRegionWritten(const VAddr start, const VAddr end) const { | ||||
|         u64 page_start = start >> write_page_bit; | ||||
|         const u64 page_end = end >> write_page_bit; | ||||
|         while (page_start <= page_end) { | ||||
|  | @ -440,8 +465,8 @@ private: | |||
|     u64 buffer_offset = 0; | ||||
|     u64 buffer_offset_base = 0; | ||||
| 
 | ||||
|     using IntervalSet = boost::icl::interval_set<CacheAddr>; | ||||
|     using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; | ||||
|     using IntervalSet = boost::icl::interval_set<VAddr>; | ||||
|     using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; | ||||
|     using IntervalType = typename IntervalCache::interval_type; | ||||
|     IntervalCache mapped_addresses; | ||||
| 
 | ||||
|  | @ -456,6 +481,8 @@ private: | |||
|     u64 epoch = 0; | ||||
|     u64 modified_ticks = 0; | ||||
| 
 | ||||
|     std::vector<u8> staging_buffer; | ||||
| 
 | ||||
|     std::recursive_mutex mutex; | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -11,7 +11,7 @@ namespace VideoCommon { | |||
| 
 | ||||
| class MapIntervalBase { | ||||
| public: | ||||
|     MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) | ||||
|     MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) | ||||
|         : start{start}, end{end}, gpu_addr{gpu_addr} {} | ||||
| 
 | ||||
|     void SetCpuAddress(VAddr new_cpu_addr) { | ||||
|  | @ -26,7 +26,7 @@ public: | |||
|         return gpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | ||||
|     bool IsInside(const VAddr other_start, const VAddr other_end) const { | ||||
|         return (start <= other_start && other_end <= end); | ||||
|     } | ||||
| 
 | ||||
|  | @ -46,11 +46,11 @@ public: | |||
|         return is_registered; | ||||
|     } | ||||
| 
 | ||||
|     CacheAddr GetStart() const { | ||||
|     VAddr GetStart() const { | ||||
|         return start; | ||||
|     } | ||||
| 
 | ||||
|     CacheAddr GetEnd() const { | ||||
|     VAddr GetEnd() const { | ||||
|         return end; | ||||
|     } | ||||
| 
 | ||||
|  | @ -76,8 +76,8 @@ public: | |||
|     } | ||||
| 
 | ||||
| private: | ||||
|     CacheAddr start; | ||||
|     CacheAddr end; | ||||
|     VAddr start; | ||||
|     VAddr end; | ||||
|     GPUVAddr gpu_addr; | ||||
|     VAddr cpu_addr{}; | ||||
|     bool is_written{}; | ||||
|  |  | |||
|  | @ -270,13 +270,13 @@ public: | |||
|     virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     virtual void FlushRegion(CacheAddr addr, u64 size) = 0; | ||||
|     virtual void FlushRegion(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||
|     virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; | ||||
|     virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||
|     virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | ||||
|     virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
| protected: | ||||
|     virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; | ||||
|  |  | |||
|  | @ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
|     gpu_thread.SwapBuffers(framebuffer); | ||||
| } | ||||
| 
 | ||||
| void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { | ||||
| void GPUAsynch::FlushRegion(VAddr addr, u64 size) { | ||||
|     gpu_thread.FlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { | ||||
| void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     gpu_thread.InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||
| void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     gpu_thread.FlushAndInvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -27,9 +27,9 @@ public: | |||
|     void Start() override; | ||||
|     void PushGPUEntries(Tegra::CommandList&& entries) override; | ||||
|     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||||
|     void FlushRegion(CacheAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void FlushRegion(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void WaitIdle() const override; | ||||
| 
 | ||||
| protected: | ||||
|  |  | |||
|  | @ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
|     renderer->SwapBuffers(framebuffer); | ||||
| } | ||||
| 
 | ||||
| void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { | ||||
| void GPUSynch::FlushRegion(VAddr addr, u64 size) { | ||||
|     renderer->Rasterizer().FlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { | ||||
| void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     renderer->Rasterizer().InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||
| void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -26,9 +26,9 @@ public: | |||
|     void Start() override; | ||||
|     void PushGPUEntries(Tegra::CommandList&& entries) override; | ||||
|     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||||
|     void FlushRegion(CacheAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void FlushRegion(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void WaitIdle() const override {} | ||||
| 
 | ||||
| protected: | ||||
|  |  | |||
|  | @ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
|     PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { | ||||
| void ThreadManager::FlushRegion(VAddr addr, u64 size) { | ||||
|     PushCommand(FlushRegionCommand(addr, size)); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { | ||||
| void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     system.Renderer().Rasterizer().InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||
| void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 | ||||
|     InvalidateRegion(addr, size); | ||||
| } | ||||
|  |  | |||
|  | @ -47,26 +47,26 @@ struct SwapBuffersCommand final { | |||
| 
 | ||||
| /// Command to signal to the GPU thread to flush a region
 | ||||
| struct FlushRegionCommand final { | ||||
|     explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} | ||||
|     explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | ||||
| 
 | ||||
|     CacheAddr addr; | ||||
|     VAddr addr; | ||||
|     u64 size; | ||||
| }; | ||||
| 
 | ||||
| /// Command to signal to the GPU thread to invalidate a region
 | ||||
| struct InvalidateRegionCommand final { | ||||
|     explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} | ||||
|     explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | ||||
| 
 | ||||
|     CacheAddr addr; | ||||
|     VAddr addr; | ||||
|     u64 size; | ||||
| }; | ||||
| 
 | ||||
| /// Command to signal to the GPU thread to flush and invalidate a region
 | ||||
| struct FlushAndInvalidateRegionCommand final { | ||||
|     explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) | ||||
|     explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) | ||||
|         : addr{addr}, size{size} {} | ||||
| 
 | ||||
|     CacheAddr addr; | ||||
|     VAddr addr; | ||||
|     u64 size; | ||||
| }; | ||||
| 
 | ||||
|  | @ -111,13 +111,13 @@ public: | |||
|     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     void FlushRegion(CacheAddr addr, u64 size); | ||||
|     void FlushRegion(VAddr addr, u64 size); | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||
|     void InvalidateRegion(CacheAddr addr, u64 size); | ||||
|     void InvalidateRegion(VAddr addr, u64 size); | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||
|     void FlushAndInvalidateRegion(CacheAddr addr, u64 size); | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||||
| 
 | ||||
|     // Wait until the gpu thread is idle.
 | ||||
|     void WaitIdle() const; | ||||
|  |  | |||
|  | @ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | |||
|     ASSERT((gpu_addr & page_mask) == 0); | ||||
| 
 | ||||
|     const u64 aligned_size{Common::AlignUp(size, page_size)}; | ||||
|     const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; | ||||
|     const auto cpu_addr = GpuToCpuAddress(gpu_addr); | ||||
|     ASSERT(cpu_addr); | ||||
| 
 | ||||
|     // Flush and invalidate through the GPU interface, to be asynchronous if possible.
 | ||||
|     system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); | ||||
|     system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size); | ||||
| 
 | ||||
|     UnmapRange(gpu_addr, aligned_size); | ||||
|     ASSERT(system.CurrentProcess() | ||||
|  | @ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const { | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     const u8* page_pointer{page_table.pointers[addr >> page_bits]}; | ||||
|     const u8* page_pointer{GetPointer(addr)}; | ||||
|     if (page_pointer) { | ||||
|         // NOTE: Avoid adding any extra logic to this fast-path block
 | ||||
|         T value; | ||||
|         std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); | ||||
|         std::memcpy(&value, page_pointer, sizeof(T)); | ||||
|         return value; | ||||
|     } | ||||
| 
 | ||||
|  | @ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) { | |||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     u8* page_pointer{page_table.pointers[addr >> page_bits]}; | ||||
|     u8* page_pointer{GetPointer(addr)}; | ||||
|     if (page_pointer) { | ||||
|         // NOTE: Avoid adding any extra logic to this fast-path block
 | ||||
|         std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); | ||||
|         std::memcpy(page_pointer, &data, sizeof(T)); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|  | @ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) { | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     u8* const page_pointer{page_table.pointers[addr >> page_bits]}; | ||||
|     if (page_pointer != nullptr) { | ||||
|         return page_pointer + (addr & page_mask); | ||||
|     auto& memory = system.Memory(); | ||||
| 
 | ||||
|     const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; | ||||
| 
 | ||||
|     if (page_addr != 0) { | ||||
|         return memory.GetPointer(page_addr + (addr & page_mask)); | ||||
|     } | ||||
| 
 | ||||
|     LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); | ||||
|  | @ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; | ||||
|     if (page_pointer != nullptr) { | ||||
|         return page_pointer + (addr & page_mask); | ||||
|     const auto& memory = system.Memory(); | ||||
| 
 | ||||
|     const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; | ||||
| 
 | ||||
|     if (page_addr != 0) { | ||||
|         return memory.GetPointer(page_addr + (addr & page_mask)); | ||||
|     } | ||||
| 
 | ||||
|     LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); | ||||
|  | @ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s | |||
|     std::size_t page_index{src_addr >> page_bits}; | ||||
|     std::size_t page_offset{src_addr & page_mask}; | ||||
| 
 | ||||
|     auto& memory = system.Memory(); | ||||
| 
 | ||||
|     while (remaining_size > 0) { | ||||
|         const std::size_t copy_amount{ | ||||
|             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||||
| 
 | ||||
|         switch (page_table.attributes[page_index]) { | ||||
|         case Common::PageType::Memory: { | ||||
|             const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | ||||
|             const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; | ||||
|             // Flush must happen on the rasterizer interface, such that memory is always synchronous
 | ||||
|             // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
 | ||||
|             rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); | ||||
|             std::memcpy(dest_buffer, src_ptr, copy_amount); | ||||
|             rasterizer.FlushRegion(src_addr, copy_amount); | ||||
|             memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); | ||||
|             break; | ||||
|         } | ||||
|         default: | ||||
|  | @ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, | |||
|     std::size_t page_index{src_addr >> page_bits}; | ||||
|     std::size_t page_offset{src_addr & page_mask}; | ||||
| 
 | ||||
|     auto& memory = system.Memory(); | ||||
| 
 | ||||
|     while (remaining_size > 0) { | ||||
|         const std::size_t copy_amount{ | ||||
|             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||||
|         const u8* page_pointer = page_table.pointers[page_index]; | ||||
|         if (page_pointer) { | ||||
|             const u8* src_ptr{page_pointer + page_offset}; | ||||
|             std::memcpy(dest_buffer, src_ptr, copy_amount); | ||||
|             const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; | ||||
|             memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); | ||||
|         } else { | ||||
|             std::memset(dest_buffer, 0, copy_amount); | ||||
|         } | ||||
|  | @ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const | |||
|     std::size_t page_index{dest_addr >> page_bits}; | ||||
|     std::size_t page_offset{dest_addr & page_mask}; | ||||
| 
 | ||||
|     auto& memory = system.Memory(); | ||||
| 
 | ||||
|     while (remaining_size > 0) { | ||||
|         const std::size_t copy_amount{ | ||||
|             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||||
| 
 | ||||
|         switch (page_table.attributes[page_index]) { | ||||
|         case Common::PageType::Memory: { | ||||
|             u8* dest_ptr{page_table.pointers[page_index] + page_offset}; | ||||
|             const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; | ||||
|             // Invalidate must happen on the rasterizer interface, such that memory is always
 | ||||
|             // synchronous when it is written (even when in asynchronous GPU mode).
 | ||||
|             rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); | ||||
|             std::memcpy(dest_ptr, src_buffer, copy_amount); | ||||
|             rasterizer.InvalidateRegion(dest_addr, copy_amount); | ||||
|             memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); | ||||
|             break; | ||||
|         } | ||||
|         default: | ||||
|  | @ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, | |||
|     std::size_t page_index{dest_addr >> page_bits}; | ||||
|     std::size_t page_offset{dest_addr & page_mask}; | ||||
| 
 | ||||
|     auto& memory = system.Memory(); | ||||
| 
 | ||||
|     while (remaining_size > 0) { | ||||
|         const std::size_t copy_amount{ | ||||
|             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||||
|         u8* page_pointer = page_table.pointers[page_index]; | ||||
|         if (page_pointer) { | ||||
|             u8* dest_ptr{page_pointer + page_offset}; | ||||
|             std::memcpy(dest_ptr, src_buffer, copy_amount); | ||||
|             const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; | ||||
|             memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); | ||||
|         } | ||||
|         page_index++; | ||||
|         page_offset = 0; | ||||
|  | @ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, | |||
| } | ||||
| 
 | ||||
| void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | ||||
|     std::size_t remaining_size{size}; | ||||
|     std::size_t page_index{src_addr >> page_bits}; | ||||
|     std::size_t page_offset{src_addr & page_mask}; | ||||
| 
 | ||||
|     while (remaining_size > 0) { | ||||
|         const std::size_t copy_amount{ | ||||
|             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||||
| 
 | ||||
|         switch (page_table.attributes[page_index]) { | ||||
|         case Common::PageType::Memory: { | ||||
|             // Flush must happen on the rasterizer interface, such that memory is always synchronous
 | ||||
|             // when it is copied (even when in asynchronous GPU mode).
 | ||||
|             const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | ||||
|             rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); | ||||
|             WriteBlock(dest_addr, src_ptr, copy_amount); | ||||
|             break; | ||||
|         } | ||||
|         default: | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
| 
 | ||||
|         page_index++; | ||||
|         page_offset = 0; | ||||
|         dest_addr += static_cast<VAddr>(copy_amount); | ||||
|         src_addr += static_cast<VAddr>(copy_amount); | ||||
|         remaining_size -= copy_amount; | ||||
|     } | ||||
|     std::vector<u8> tmp_buffer(size); | ||||
|     ReadBlock(src_addr, tmp_buffer.data(), size); | ||||
|     WriteBlock(dest_addr, tmp_buffer.data(), size); | ||||
| } | ||||
| 
 | ||||
| void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | ||||
|  | @ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const | |||
|     WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); | ||||
| } | ||||
| 
 | ||||
| bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { | ||||
|     const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; | ||||
|     const std::size_t page = (addr & Memory::PAGE_MASK) + size; | ||||
|     return page <= Memory::PAGE_SIZE; | ||||
| } | ||||
| 
 | ||||
| void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, | ||||
|                              VAddr backing_addr) { | ||||
|     LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, | ||||
|  |  | |||
|  | @ -97,6 +97,11 @@ public: | |||
|     void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); | ||||
|     void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); | ||||
| 
 | ||||
|     /**
 | ||||
|      * IsGranularRange checks if a gpu region can be simply read with a pointer | ||||
|      */ | ||||
|     bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); | ||||
| 
 | ||||
| private: | ||||
|     using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; | ||||
|     using VMAHandle = VMAMap::const_iterator; | ||||
|  |  | |||
|  | @ -98,12 +98,12 @@ public: | |||
|                                                       static_cast<QueryCache&>(*this), | ||||
|                                                       VideoCore::QueryType::SamplesPassed}}} {} | ||||
| 
 | ||||
|     void InvalidateRegion(CacheAddr addr, std::size_t size) { | ||||
|     void InvalidateRegion(VAddr addr, std::size_t size) { | ||||
|         std::unique_lock lock{mutex}; | ||||
|         FlushAndRemoveRegion(addr, size); | ||||
|     } | ||||
| 
 | ||||
|     void FlushRegion(CacheAddr addr, std::size_t size) { | ||||
|     void FlushRegion(VAddr addr, std::size_t size) { | ||||
|         std::unique_lock lock{mutex}; | ||||
|         FlushAndRemoveRegion(addr, size); | ||||
|     } | ||||
|  | @ -117,14 +117,16 @@ public: | |||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { | ||||
|         std::unique_lock lock{mutex}; | ||||
|         auto& memory_manager = system.GPU().MemoryManager(); | ||||
|         const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||||
|         const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); | ||||
|         ASSERT(cpu_addr_opt); | ||||
|         VAddr cpu_addr = *cpu_addr_opt; | ||||
| 
 | ||||
|         CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); | ||||
|         CachedQuery* query = TryGet(cpu_addr); | ||||
|         if (!query) { | ||||
|             const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||||
|             ASSERT_OR_EXECUTE(cpu_addr, return;); | ||||
|             ASSERT_OR_EXECUTE(cpu_addr_opt, return;); | ||||
|             const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||||
| 
 | ||||
|             query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | ||||
|             query = Register(type, cpu_addr, host_ptr, timestamp.has_value()); | ||||
|         } | ||||
| 
 | ||||
|         query->BindCounter(Stream(type).Current(), timestamp); | ||||
|  | @ -173,11 +175,11 @@ protected: | |||
| 
 | ||||
| private: | ||||
|     /// Flushes a memory range to guest memory and removes it from the cache.
 | ||||
|     void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { | ||||
|     void FlushAndRemoveRegion(VAddr addr, std::size_t size) { | ||||
|         const u64 addr_begin = static_cast<u64>(addr); | ||||
|         const u64 addr_end = addr_begin + static_cast<u64>(size); | ||||
|         const auto in_range = [addr_begin, addr_end](CachedQuery& query) { | ||||
|             const u64 cache_begin = query.GetCacheAddr(); | ||||
|             const u64 cache_begin = query.GetCpuAddr(); | ||||
|             const u64 cache_end = cache_begin + query.SizeInBytes(); | ||||
|             return cache_begin < addr_end && addr_begin < cache_end; | ||||
|         }; | ||||
|  | @ -193,7 +195,7 @@ private: | |||
|                 if (!in_range(query)) { | ||||
|                     continue; | ||||
|                 } | ||||
|                 rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); | ||||
|                 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); | ||||
|                 query.Flush(); | ||||
|             } | ||||
|             contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), | ||||
|  | @ -204,22 +206,21 @@ private: | |||
|     /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
 | ||||
|     CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { | ||||
|         rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); | ||||
|         const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; | ||||
|         const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT; | ||||
|         return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, | ||||
|                                                   host_ptr); | ||||
|     } | ||||
| 
 | ||||
|     /// Tries to a get a cached query. Returns nullptr on failure.
 | ||||
|     CachedQuery* TryGet(CacheAddr addr) { | ||||
|     CachedQuery* TryGet(VAddr addr) { | ||||
|         const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; | ||||
|         const auto it = cached_queries.find(page); | ||||
|         if (it == std::end(cached_queries)) { | ||||
|             return nullptr; | ||||
|         } | ||||
|         auto& contents = it->second; | ||||
|         const auto found = | ||||
|             std::find_if(std::begin(contents), std::end(contents), | ||||
|                          [addr](auto& query) { return query.GetCacheAddr() == addr; }); | ||||
|         const auto found = std::find_if(std::begin(contents), std::end(contents), | ||||
|                                         [addr](auto& query) { return query.GetCpuAddr() == addr; }); | ||||
|         return found != std::end(contents) ? &*found : nullptr; | ||||
|     } | ||||
| 
 | ||||
|  | @ -323,14 +324,10 @@ public: | |||
|         timestamp = timestamp_; | ||||
|     } | ||||
| 
 | ||||
|     VAddr CpuAddr() const noexcept { | ||||
|     VAddr GetCpuAddr() const noexcept { | ||||
|         return cpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     CacheAddr GetCacheAddr() const noexcept { | ||||
|         return ToCacheAddr(host_ptr); | ||||
|     } | ||||
| 
 | ||||
|     u64 SizeInBytes() const noexcept { | ||||
|         return SizeInBytes(timestamp.has_value()); | ||||
|     } | ||||
|  |  | |||
|  | @ -18,22 +18,14 @@ | |||
| 
 | ||||
| class RasterizerCacheObject { | ||||
| public: | ||||
|     explicit RasterizerCacheObject(const u8* host_ptr) | ||||
|         : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {} | ||||
|     explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {} | ||||
| 
 | ||||
|     virtual ~RasterizerCacheObject(); | ||||
| 
 | ||||
|     CacheAddr GetCacheAddr() const { | ||||
|         return cache_addr; | ||||
|     VAddr GetCpuAddr() const { | ||||
|         return cpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     const u8* GetHostPtr() const { | ||||
|         return host_ptr; | ||||
|     } | ||||
| 
 | ||||
|     /// Gets the address of the shader in guest memory, required for cache management
 | ||||
|     virtual VAddr GetCpuAddr() const = 0; | ||||
| 
 | ||||
|     /// Gets the size of the shader in guest memory, required for cache management
 | ||||
|     virtual std::size_t GetSizeInBytes() const = 0; | ||||
| 
 | ||||
|  | @ -68,8 +60,7 @@ private: | |||
|     bool is_registered{};      ///< Whether the object is currently registered with the cache
 | ||||
|     bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
 | ||||
|     u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
 | ||||
|     const u8* host_ptr{};      ///< Pointer to the memory backing this cached region
 | ||||
|     CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space
 | ||||
|     VAddr cpu_addr{};          ///< Cpu address memory, unique from emulated virtual address space
 | ||||
| }; | ||||
| 
 | ||||
| template <class T> | ||||
|  | @ -80,7 +71,7 @@ public: | |||
|     explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} | ||||
| 
 | ||||
|     /// Write any cached resources overlapping the specified region back to memory
 | ||||
|     void FlushRegion(CacheAddr addr, std::size_t size) { | ||||
|     void FlushRegion(VAddr addr, std::size_t size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | ||||
|  | @ -90,7 +81,7 @@ public: | |||
|     } | ||||
| 
 | ||||
|     /// Mark the specified region as being invalidated
 | ||||
|     void InvalidateRegion(CacheAddr addr, u64 size) { | ||||
|     void InvalidateRegion(VAddr addr, u64 size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | ||||
|  | @ -114,27 +105,20 @@ public: | |||
| 
 | ||||
| protected: | ||||
|     /// Tries to get an object from the cache with the specified cache address
 | ||||
|     T TryGet(CacheAddr addr) const { | ||||
|     T TryGet(VAddr addr) const { | ||||
|         const auto iter = map_cache.find(addr); | ||||
|         if (iter != map_cache.end()) | ||||
|             return iter->second; | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     T TryGet(const void* addr) const { | ||||
|         const auto iter = map_cache.find(ToCacheAddr(addr)); | ||||
|         if (iter != map_cache.end()) | ||||
|             return iter->second; | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     /// Register an object into the cache
 | ||||
|     virtual void Register(const T& object) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         object->SetIsRegistered(true); | ||||
|         interval_cache.add({GetInterval(object), ObjectSet{object}}); | ||||
|         map_cache.insert({object->GetCacheAddr(), object}); | ||||
|         map_cache.insert({object->GetCpuAddr(), object}); | ||||
|         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); | ||||
|     } | ||||
| 
 | ||||
|  | @ -144,7 +128,7 @@ protected: | |||
| 
 | ||||
|         object->SetIsRegistered(false); | ||||
|         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); | ||||
|         const CacheAddr addr = object->GetCacheAddr(); | ||||
|         const VAddr addr = object->GetCpuAddr(); | ||||
|         interval_cache.subtract({GetInterval(object), ObjectSet{object}}); | ||||
|         map_cache.erase(addr); | ||||
|     } | ||||
|  | @ -173,7 +157,7 @@ protected: | |||
| 
 | ||||
| private: | ||||
|     /// Returns a list of cached objects from the specified memory region, ordered by access time
 | ||||
|     std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { | ||||
|     std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { | ||||
|         if (size == 0) { | ||||
|             return {}; | ||||
|         } | ||||
|  | @ -197,13 +181,13 @@ private: | |||
|     } | ||||
| 
 | ||||
|     using ObjectSet = std::set<T>; | ||||
|     using ObjectCache = std::unordered_map<CacheAddr, T>; | ||||
|     using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; | ||||
|     using ObjectCache = std::unordered_map<VAddr, T>; | ||||
|     using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; | ||||
|     using ObjectInterval = typename IntervalCache::interval_type; | ||||
| 
 | ||||
|     static auto GetInterval(const T& object) { | ||||
|         return ObjectInterval::right_open(object->GetCacheAddr(), | ||||
|                                           object->GetCacheAddr() + object->GetSizeInBytes()); | ||||
|         return ObjectInterval::right_open(object->GetCpuAddr(), | ||||
|                                           object->GetCpuAddr() + object->GetSizeInBytes()); | ||||
|     } | ||||
| 
 | ||||
|     ObjectCache map_cache; | ||||
|  |  | |||
|  | @ -53,14 +53,14 @@ public: | |||
|     virtual void FlushAll() = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     virtual void FlushRegion(CacheAddr addr, u64 size) = 0; | ||||
|     virtual void FlushRegion(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||
|     virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; | ||||
|     virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||
|     /// and invalidated
 | ||||
|     virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | ||||
|     virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Notify the rasterizer to send all written commands to the host GPU.
 | ||||
|     virtual void FlushCommands() = 0; | ||||
|  |  | |||
|  | @ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 
 | ||||
| MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | ||||
| 
 | ||||
| CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) | ||||
|     : VideoCommon::BufferBlock{cache_addr, size} { | ||||
| CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) | ||||
|     : VideoCommon::BufferBlock{cpu_addr, size} { | ||||
|     gl_buffer.Create(); | ||||
|     glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | ||||
| } | ||||
|  | @ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() { | |||
|     glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | ||||
| } | ||||
| 
 | ||||
| Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { | ||||
|     return std::make_shared<CachedBufferBlock>(cache_addr, size); | ||||
| Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | ||||
|     return std::make_shared<CachedBufferBlock>(cpu_addr, size); | ||||
| } | ||||
| 
 | ||||
| void OGLBufferCache::WriteBarrier() { | ||||
|  |  | |||
|  | @ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf | |||
| 
 | ||||
| class CachedBufferBlock : public VideoCommon::BufferBlock { | ||||
| public: | ||||
|     explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); | ||||
|     explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); | ||||
|     ~CachedBufferBlock(); | ||||
| 
 | ||||
|     const GLuint* GetHandle() const { | ||||
|  | @ -55,7 +55,7 @@ public: | |||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; | ||||
|     Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; | ||||
| 
 | ||||
|     void WriteBarrier() override; | ||||
| 
 | ||||
|  |  | |||
|  | @ -656,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 
 | ||||
| void RasterizerOpenGL::FlushAll() {} | ||||
| 
 | ||||
| void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | ||||
| void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     if (!addr || !size) { | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|     texture_cache.FlushRegion(addr, size); | ||||
|  | @ -666,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | |||
|     query_cache.FlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | ||||
| void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     if (!addr || !size) { | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|     texture_cache.InvalidateRegion(addr, size); | ||||
|  | @ -677,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | |||
|     query_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     if (Settings::values.use_accurate_gpu_emulation) { | ||||
|         FlushRegion(addr, size); | ||||
|     } | ||||
|  | @ -716,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 
 | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
| 
 | ||||
|     const auto surface{ | ||||
|         texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))}; | ||||
|     const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; | ||||
|     if (!surface) { | ||||
|         return {}; | ||||
|     } | ||||
|  |  | |||
|  | @ -65,9 +65,9 @@ public: | |||
|     void ResetCounter(VideoCore::QueryType type) override; | ||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||
|     void FlushAll() override; | ||||
|     void FlushRegion(CacheAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void FlushRegion(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void FlushCommands() override; | ||||
|     void TickFrame() override; | ||||
|     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||||
|  |  | |||
|  | @ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, | ||||
| CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | ||||
|                            std::shared_ptr<VideoCommon::Shader::Registry> registry, | ||||
|                            ShaderEntries entries, std::shared_ptr<OGLProgram> program) | ||||
|     : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, | ||||
|       cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} | ||||
|     : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, | ||||
|       size_in_bytes{size_in_bytes}, program{std::move(program)} {} | ||||
| 
 | ||||
| CachedShader::~CachedShader() = default; | ||||
| 
 | ||||
|  | @ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
|     entry.bindless_samplers = registry->GetBindlessSamplers(); | ||||
|     params.disk_cache.SaveEntry(std::move(entry)); | ||||
| 
 | ||||
|     return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | ||||
|                                                           size_in_bytes, std::move(registry), | ||||
|                                                           MakeEntries(ir), std::move(program))); | ||||
|     return std::shared_ptr<CachedShader>(new CachedShader( | ||||
|         params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); | ||||
| } | ||||
| 
 | ||||
| Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | ||||
|  | @ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog | |||
|     entry.bindless_samplers = registry->GetBindlessSamplers(); | ||||
|     params.disk_cache.SaveEntry(std::move(entry)); | ||||
| 
 | ||||
|     return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | ||||
|                                                           size_in_bytes, std::move(registry), | ||||
|                                                           MakeEntries(ir), std::move(program))); | ||||
|     return std::shared_ptr<CachedShader>(new CachedShader( | ||||
|         params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); | ||||
| } | ||||
| 
 | ||||
| Shader CachedShader::CreateFromCache(const ShaderParameters& params, | ||||
|                                      const PrecompiledShader& precompiled_shader, | ||||
|                                      std::size_t size_in_bytes) { | ||||
|     return std::shared_ptr<CachedShader>(new CachedShader( | ||||
|         params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, | ||||
|         precompiled_shader.entries, precompiled_shader.program)); | ||||
|     return std::shared_ptr<CachedShader>( | ||||
|         new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry, | ||||
|                          precompiled_shader.entries, precompiled_shader.program)); | ||||
| } | ||||
| 
 | ||||
| ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | ||||
|  | @ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
|     const GPUVAddr address{GetShaderAddress(system, program)}; | ||||
| 
 | ||||
|     // Look up shader in the cache based on address
 | ||||
|     const auto host_ptr{memory_manager.GetPointer(address)}; | ||||
|     Shader shader{TryGet(host_ptr)}; | ||||
|     const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; | ||||
|     Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; | ||||
|     if (shader) { | ||||
|         return last_shaders[static_cast<std::size_t>(program)] = shader; | ||||
|     } | ||||
| 
 | ||||
|     const auto host_ptr{memory_manager.GetPointer(address)}; | ||||
| 
 | ||||
|     // No shader found - create a new one
 | ||||
|     ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; | ||||
|     ProgramCode code_b; | ||||
|  | @ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 
 | ||||
|     const auto unique_identifier = GetUniqueIdentifier( | ||||
|         GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | ||||
|     const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; | ||||
|     const ShaderParameters params{system,   disk_cache, device, | ||||
|                                   cpu_addr, host_ptr,   unique_identifier}; | ||||
| 
 | ||||
|     const ShaderParameters params{system,    disk_cache, device, | ||||
|                                   *cpu_addr, host_ptr,   unique_identifier}; | ||||
| 
 | ||||
|     const auto found = runtime_cache.find(unique_identifier); | ||||
|     if (found == runtime_cache.end()) { | ||||
|  | @ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 
 | ||||
| Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||||
|     auto& memory_manager{system.GPU().MemoryManager()}; | ||||
|     const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||||
|     auto kernel = TryGet(host_ptr); | ||||
|     const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; | ||||
| 
 | ||||
|     auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||||
|     if (kernel) { | ||||
|         return kernel; | ||||
|     } | ||||
| 
 | ||||
|     const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||||
|     // No kernel found, create a new one
 | ||||
|     auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | ||||
|     const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; | ||||
|     const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | ||||
|     const ShaderParameters params{system,   disk_cache, device, | ||||
|                                   cpu_addr, host_ptr,   unique_identifier}; | ||||
| 
 | ||||
|     const ShaderParameters params{system,    disk_cache, device, | ||||
|                                   *cpu_addr, host_ptr,   unique_identifier}; | ||||
| 
 | ||||
|     const auto found = runtime_cache.find(unique_identifier); | ||||
|     if (found == runtime_cache.end()) { | ||||
|  |  | |||
|  | @ -65,11 +65,6 @@ public: | |||
|     /// Gets the GL program handle for the shader
 | ||||
|     GLuint GetHandle() const; | ||||
| 
 | ||||
|     /// Returns the guest CPU address of the shader
 | ||||
|     VAddr GetCpuAddr() const override { | ||||
|         return cpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the size in bytes of the shader
 | ||||
|     std::size_t GetSizeInBytes() const override { | ||||
|         return size_in_bytes; | ||||
|  | @ -90,13 +85,12 @@ public: | |||
|                                   std::size_t size_in_bytes); | ||||
| 
 | ||||
| private: | ||||
|     explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, | ||||
|     explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | ||||
|                           std::shared_ptr<VideoCommon::Shader::Registry> registry, | ||||
|                           ShaderEntries entries, std::shared_ptr<OGLProgram> program); | ||||
| 
 | ||||
|     std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||||
|     ShaderEntries entries; | ||||
|     VAddr cpu_addr = 0; | ||||
|     std::size_t size_in_bytes = 0; | ||||
|     std::shared_ptr<OGLProgram> program; | ||||
| }; | ||||
|  |  | |||
|  | @ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { | |||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | ||||
|                                      CacheAddr cache_addr, std::size_t size) | ||||
|     : VideoCommon::BufferBlock{cache_addr, size} { | ||||
|                                      VAddr cpu_addr, std::size_t size) | ||||
|     : VideoCommon::BufferBlock{cpu_addr, size} { | ||||
|     const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), | ||||
|                                          BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | | ||||
|                                              vk::BufferUsageFlagBits::eTransferDst, | ||||
|  | @ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S | |||
| 
 | ||||
| VKBufferCache::~VKBufferCache() = default; | ||||
| 
 | ||||
| Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { | ||||
|     return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); | ||||
| Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | ||||
|     return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); | ||||
| } | ||||
| 
 | ||||
| const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { | ||||
|  |  | |||
|  | @ -30,7 +30,7 @@ class VKScheduler; | |||
| class CachedBufferBlock final : public VideoCommon::BufferBlock { | ||||
| public: | ||||
|     explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | ||||
|                                CacheAddr cache_addr, std::size_t size); | ||||
|                                VAddr cpu_addr, std::size_t size); | ||||
|     ~CachedBufferBlock(); | ||||
| 
 | ||||
|     const vk::Buffer* GetHandle() const { | ||||
|  | @ -55,7 +55,7 @@ public: | |||
| protected: | ||||
|     void WriteBarrier() override {} | ||||
| 
 | ||||
|     Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; | ||||
|     Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; | ||||
| 
 | ||||
|     const vk::Buffer* ToHandle(const Buffer& buffer) override; | ||||
| 
 | ||||
|  |  | |||
|  | @ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, | |||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, | ||||
|                            GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, | ||||
|                            ProgramCode program_code, u32 main_offset) | ||||
|     : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, | ||||
|       program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, | ||||
|       shader_ir{this->program_code, main_offset, compiler_settings, registry}, | ||||
|                            GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, | ||||
|                            u32 main_offset) | ||||
|     : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, | ||||
|       registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, | ||||
|                                                            compiler_settings, registry}, | ||||
|       entries{GenerateShaderEntries(shader_ir)} {} | ||||
| 
 | ||||
| CachedShader::~CachedShader() = default; | ||||
|  | @ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 
 | ||||
|         auto& memory_manager{system.GPU().MemoryManager()}; | ||||
|         const GPUVAddr program_addr{GetShaderAddress(system, program)}; | ||||
|         const auto host_ptr{memory_manager.GetPointer(program_addr)}; | ||||
|         auto shader = TryGet(host_ptr); | ||||
|         const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||||
|         ASSERT(cpu_addr); | ||||
|         auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||||
|         if (!shader) { | ||||
|             const auto host_ptr{memory_manager.GetPointer(program_addr)}; | ||||
| 
 | ||||
|             // No shader found - create a new one
 | ||||
|             constexpr u32 stage_offset = 10; | ||||
|             const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); | ||||
|             auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); | ||||
| 
 | ||||
|             const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||||
|             ASSERT(cpu_addr); | ||||
| 
 | ||||
|             shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, | ||||
|                                                     host_ptr, std::move(code), stage_offset); | ||||
|                                                     std::move(code), stage_offset); | ||||
|             Register(shader); | ||||
|         } | ||||
|         shaders[index] = std::move(shader); | ||||
|  | @ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 
 | ||||
|     auto& memory_manager = system.GPU().MemoryManager(); | ||||
|     const auto program_addr = key.shader; | ||||
|     const auto host_ptr = memory_manager.GetPointer(program_addr); | ||||
| 
 | ||||
|     auto shader = TryGet(host_ptr); | ||||
|     const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||||
|     ASSERT(cpu_addr); | ||||
| 
 | ||||
|     auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||||
|     if (!shader) { | ||||
|         // No shader found - create a new one
 | ||||
|         const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||||
|         ASSERT(cpu_addr); | ||||
|         const auto host_ptr = memory_manager.GetPointer(program_addr); | ||||
| 
 | ||||
|         auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); | ||||
|         constexpr u32 kernel_main_offset = 0; | ||||
|         shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, | ||||
|                                                 program_addr, *cpu_addr, host_ptr, std::move(code), | ||||
|                                                 program_addr, *cpu_addr, std::move(code), | ||||
|                                                 kernel_main_offset); | ||||
|         Register(shader); | ||||
|     } | ||||
|  | @ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
|         } | ||||
| 
 | ||||
|         const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); | ||||
|         const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||||
|         const auto shader = TryGet(host_ptr); | ||||
|         const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||||
|         ASSERT(cpu_addr); | ||||
|         const auto shader = TryGet(*cpu_addr); | ||||
|         ASSERT(shader); | ||||
| 
 | ||||
|         const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
 | ||||
|  |  | |||
|  | @ -113,17 +113,13 @@ namespace Vulkan { | |||
| class CachedShader final : public RasterizerCacheObject { | ||||
| public: | ||||
|     explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, | ||||
|                           VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); | ||||
|                           VAddr cpu_addr, ProgramCode program_code, u32 main_offset); | ||||
|     ~CachedShader(); | ||||
| 
 | ||||
|     GPUVAddr GetGpuAddr() const { | ||||
|         return gpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     VAddr GetCpuAddr() const override { | ||||
|         return cpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     std::size_t GetSizeInBytes() const override { | ||||
|         return program_code.size() * sizeof(u64); | ||||
|     } | ||||
|  | @ -149,7 +145,6 @@ private: | |||
|                                                                  Tegra::Engines::ShaderType stage); | ||||
| 
 | ||||
|     GPUVAddr gpu_addr{}; | ||||
|     VAddr cpu_addr{}; | ||||
|     ProgramCode program_code; | ||||
|     VideoCommon::Shader::Registry registry; | ||||
|     VideoCommon::Shader::ShaderIR shader_ir; | ||||
|  |  | |||
|  | @ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 
 | ||||
| void RasterizerVulkan::FlushAll() {} | ||||
| 
 | ||||
| void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { | ||||
| void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|     texture_cache.FlushRegion(addr, size); | ||||
|     buffer_cache.FlushRegion(addr, size); | ||||
|     query_cache.FlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { | ||||
| void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|     texture_cache.InvalidateRegion(addr, size); | ||||
|     pipeline_cache.InvalidateRegion(addr, size); | ||||
|     buffer_cache.InvalidateRegion(addr, size); | ||||
|     query_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||
| void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     FlushRegion(addr, size); | ||||
|     InvalidateRegion(addr, size); | ||||
| } | ||||
|  | @ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; | ||||
|     const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; | ||||
|     const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; | ||||
|     if (!surface) { | ||||
|         return false; | ||||
|     } | ||||
|  |  | |||
|  | @ -118,9 +118,9 @@ public: | |||
|     void ResetCounter(VideoCore::QueryType type) override; | ||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||
|     void FlushAll() override; | ||||
|     void FlushRegion(CacheAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|     void FlushRegion(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void FlushCommands() override; | ||||
|     void TickFrame() override; | ||||
|     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||||
|  |  | |||
|  | @ -190,22 +190,11 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | |||
|     MICROPROFILE_SCOPE(GPU_Load_Texture); | ||||
|     auto& staging_buffer = staging_cache.GetBuffer(0); | ||||
|     u8* host_ptr; | ||||
|     is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); | ||||
| 
 | ||||
|     // Handle continuouty
 | ||||
|     if (is_continuous) { | ||||
|         // Use physical memory directly
 | ||||
|         host_ptr = memory_manager.GetPointer(gpu_addr); | ||||
|         if (!host_ptr) { | ||||
|             return; | ||||
|         } | ||||
|     } else { | ||||
|         // Use an extra temporal buffer
 | ||||
|         auto& tmp_buffer = staging_cache.GetBuffer(1); | ||||
|         tmp_buffer.resize(guest_memory_size); | ||||
|         host_ptr = tmp_buffer.data(); | ||||
|         memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||||
|     } | ||||
|     // Use an extra temporal buffer
 | ||||
|     auto& tmp_buffer = staging_cache.GetBuffer(1); | ||||
|     tmp_buffer.resize(guest_memory_size); | ||||
|     host_ptr = tmp_buffer.data(); | ||||
|     memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||||
| 
 | ||||
|     if (params.is_tiled) { | ||||
|         ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", | ||||
|  | @ -257,19 +246,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | |||
|     auto& staging_buffer = staging_cache.GetBuffer(0); | ||||
|     u8* host_ptr; | ||||
| 
 | ||||
|     // Handle continuouty
 | ||||
|     if (is_continuous) { | ||||
|         // Use physical memory directly
 | ||||
|         host_ptr = memory_manager.GetPointer(gpu_addr); | ||||
|         if (!host_ptr) { | ||||
|             return; | ||||
|         } | ||||
|     } else { | ||||
|         // Use an extra temporal buffer
 | ||||
|         auto& tmp_buffer = staging_cache.GetBuffer(1); | ||||
|         tmp_buffer.resize(guest_memory_size); | ||||
|         host_ptr = tmp_buffer.data(); | ||||
|     } | ||||
|     // Use an extra temporal buffer
 | ||||
|     auto& tmp_buffer = staging_cache.GetBuffer(1); | ||||
|     tmp_buffer.resize(guest_memory_size); | ||||
|     host_ptr = tmp_buffer.data(); | ||||
| 
 | ||||
|     if (params.is_tiled) { | ||||
|         ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); | ||||
|  | @ -300,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | |||
|             } | ||||
|         } | ||||
|     } | ||||
|     if (!is_continuous) { | ||||
|         memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||||
|     } | ||||
|     memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||||
| } | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
|  |  | |||
|  | @ -68,8 +68,8 @@ public: | |||
|         return gpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     bool Overlaps(const CacheAddr start, const CacheAddr end) const { | ||||
|         return (cache_addr < end) && (cache_addr_end > start); | ||||
|     bool Overlaps(const VAddr start, const VAddr end) const { | ||||
|         return (cpu_addr < end) && (cpu_addr_end > start); | ||||
|     } | ||||
| 
 | ||||
|     bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { | ||||
|  | @ -86,21 +86,13 @@ public: | |||
|         return cpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     VAddr GetCpuAddrEnd() const { | ||||
|         return cpu_addr_end; | ||||
|     } | ||||
| 
 | ||||
|     void SetCpuAddr(const VAddr new_addr) { | ||||
|         cpu_addr = new_addr; | ||||
|     } | ||||
| 
 | ||||
|     CacheAddr GetCacheAddr() const { | ||||
|         return cache_addr; | ||||
|     } | ||||
| 
 | ||||
|     CacheAddr GetCacheAddrEnd() const { | ||||
|         return cache_addr_end; | ||||
|     } | ||||
| 
 | ||||
|     void SetCacheAddr(const CacheAddr new_addr) { | ||||
|         cache_addr = new_addr; | ||||
|         cache_addr_end = new_addr + guest_memory_size; | ||||
|         cpu_addr_end = new_addr + guest_memory_size; | ||||
|     } | ||||
| 
 | ||||
|     const SurfaceParams& GetSurfaceParams() const { | ||||
|  | @ -119,14 +111,6 @@ public: | |||
|         return mipmap_sizes[level]; | ||||
|     } | ||||
| 
 | ||||
|     void MarkAsContinuous(const bool is_continuous) { | ||||
|         this->is_continuous = is_continuous; | ||||
|     } | ||||
| 
 | ||||
|     bool IsContinuous() const { | ||||
|         return is_continuous; | ||||
|     } | ||||
| 
 | ||||
|     bool IsLinear() const { | ||||
|         return !params.is_tiled; | ||||
|     } | ||||
|  | @ -175,10 +159,8 @@ protected: | |||
|     std::size_t guest_memory_size; | ||||
|     std::size_t host_memory_size; | ||||
|     GPUVAddr gpu_addr{}; | ||||
|     CacheAddr cache_addr{}; | ||||
|     CacheAddr cache_addr_end{}; | ||||
|     VAddr cpu_addr{}; | ||||
|     bool is_continuous{}; | ||||
|     VAddr cpu_addr_end{}; | ||||
|     bool is_converted{}; | ||||
| 
 | ||||
|     std::vector<std::size_t> mipmap_sizes; | ||||
|  |  | |||
|  | @ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | |||
| 
 | ||||
| template <typename TSurface, typename TView> | ||||
| class TextureCache { | ||||
|     using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>; | ||||
|     using IntervalType = typename IntervalMap::interval_type; | ||||
| 
 | ||||
| public: | ||||
|     void InvalidateRegion(CacheAddr addr, std::size_t size) { | ||||
|     void InvalidateRegion(VAddr addr, std::size_t size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         for (const auto& surface : GetSurfacesInRegion(addr, size)) { | ||||
|  | @ -76,7 +74,7 @@ public: | |||
|         guard_samplers = new_guard; | ||||
|     } | ||||
| 
 | ||||
|     void FlushRegion(CacheAddr addr, std::size_t size) { | ||||
|     void FlushRegion(VAddr addr, std::size_t size) { | ||||
|         std::lock_guard lock{mutex}; | ||||
| 
 | ||||
|         auto surfaces = GetSurfacesInRegion(addr, size); | ||||
|  | @ -99,9 +97,9 @@ public: | |||
|             return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||||
|         } | ||||
| 
 | ||||
|         const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | ||||
|         const auto cache_addr{ToCacheAddr(host_ptr)}; | ||||
|         if (!cache_addr) { | ||||
|         const std::optional<VAddr> cpu_addr = | ||||
|             system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||||
|         if (!cpu_addr) { | ||||
|             return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||||
|         } | ||||
| 
 | ||||
|  | @ -110,7 +108,7 @@ public: | |||
|         } | ||||
| 
 | ||||
|         const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; | ||||
|         const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); | ||||
|         const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); | ||||
|         if (guard_samplers) { | ||||
|             sampled_textures.push_back(surface); | ||||
|         } | ||||
|  | @ -124,13 +122,13 @@ public: | |||
|         if (!gpu_addr) { | ||||
|             return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||||
|         } | ||||
|         const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | ||||
|         const auto cache_addr{ToCacheAddr(host_ptr)}; | ||||
|         if (!cache_addr) { | ||||
|         const std::optional<VAddr> cpu_addr = | ||||
|             system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||||
|         if (!cpu_addr) { | ||||
|             return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||||
|         } | ||||
|         const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; | ||||
|         const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); | ||||
|         const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); | ||||
|         if (guard_samplers) { | ||||
|             sampled_textures.push_back(surface); | ||||
|         } | ||||
|  | @ -159,14 +157,14 @@ public: | |||
|             SetEmptyDepthBuffer(); | ||||
|             return {}; | ||||
|         } | ||||
|         const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | ||||
|         const auto cache_addr{ToCacheAddr(host_ptr)}; | ||||
|         if (!cache_addr) { | ||||
|         const std::optional<VAddr> cpu_addr = | ||||
|             system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||||
|         if (!cpu_addr) { | ||||
|             SetEmptyDepthBuffer(); | ||||
|             return {}; | ||||
|         } | ||||
|         const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; | ||||
|         auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); | ||||
|         auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); | ||||
|         if (depth_buffer.target) | ||||
|             depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||||
|         depth_buffer.target = surface_view.first; | ||||
|  | @ -199,15 +197,15 @@ public: | |||
|             return {}; | ||||
|         } | ||||
| 
 | ||||
|         const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | ||||
|         const auto cache_addr{ToCacheAddr(host_ptr)}; | ||||
|         if (!cache_addr) { | ||||
|         const std::optional<VAddr> cpu_addr = | ||||
|             system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||||
|         if (!cpu_addr) { | ||||
|             SetEmptyColorBuffer(index); | ||||
|             return {}; | ||||
|         } | ||||
| 
 | ||||
|         auto surface_view = | ||||
|             GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), | ||||
|             GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | ||||
|                        preserve_contents, true); | ||||
|         if (render_targets[index].target) | ||||
|             render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | ||||
|  | @ -257,27 +255,26 @@ public: | |||
|         const GPUVAddr src_gpu_addr = src_config.Address(); | ||||
|         const GPUVAddr dst_gpu_addr = dst_config.Address(); | ||||
|         DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); | ||||
|         const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; | ||||
|         const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; | ||||
|         const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; | ||||
|         const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; | ||||
|         const std::optional<VAddr> dst_cpu_addr = | ||||
|             system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); | ||||
|         const std::optional<VAddr> src_cpu_addr = | ||||
|             system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); | ||||
|         std::pair<TSurface, TView> dst_surface = | ||||
|             GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); | ||||
|             GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); | ||||
|         std::pair<TSurface, TView> src_surface = | ||||
|             GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); | ||||
|             GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); | ||||
|         ImageBlit(src_surface.second, dst_surface.second, copy_config); | ||||
|         dst_surface.first->MarkAsModified(true, Tick()); | ||||
|     } | ||||
| 
 | ||||
|     TSurface TryFindFramebufferSurface(const u8* host_ptr) { | ||||
|         const CacheAddr cache_addr = ToCacheAddr(host_ptr); | ||||
|         if (!cache_addr) { | ||||
|     TSurface TryFindFramebufferSurface(VAddr addr) { | ||||
|         if (!addr) { | ||||
|             return nullptr; | ||||
|         } | ||||
|         const CacheAddr page = cache_addr >> registry_page_bits; | ||||
|         const VAddr page = addr >> registry_page_bits; | ||||
|         std::vector<TSurface>& list = registry[page]; | ||||
|         for (auto& surface : list) { | ||||
|             if (surface->GetCacheAddr() == cache_addr) { | ||||
|             if (surface->GetCpuAddr() == addr) { | ||||
|                 return surface; | ||||
|             } | ||||
|         } | ||||
|  | @ -338,18 +335,14 @@ protected: | |||
| 
 | ||||
|     void Register(TSurface surface) { | ||||
|         const GPUVAddr gpu_addr = surface->GetGpuAddr(); | ||||
|         const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); | ||||
|         const std::size_t size = surface->GetSizeInBytes(); | ||||
|         const std::optional<VAddr> cpu_addr = | ||||
|             system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||||
|         if (!cache_ptr || !cpu_addr) { | ||||
|         if (!cpu_addr) { | ||||
|             LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", | ||||
|                          gpu_addr); | ||||
|             return; | ||||
|         } | ||||
|         const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); | ||||
|         surface->MarkAsContinuous(continuous); | ||||
|         surface->SetCacheAddr(cache_ptr); | ||||
|         surface->SetCpuAddr(*cpu_addr); | ||||
|         RegisterInnerCache(surface); | ||||
|         surface->MarkAsRegistered(true); | ||||
|  | @ -634,7 +627,7 @@ private: | |||
|     std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, | ||||
|                                                                const SurfaceParams& params, | ||||
|                                                                const GPUVAddr gpu_addr, | ||||
|                                                                const CacheAddr cache_addr, | ||||
|                                                                const VAddr cpu_addr, | ||||
|                                                                bool preserve_contents) { | ||||
|         if (params.target == SurfaceTarget::Texture3D) { | ||||
|             bool failed = false; | ||||
|  | @ -659,7 +652,7 @@ private: | |||
|                     failed = true; | ||||
|                     break; | ||||
|                 } | ||||
|                 const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); | ||||
|                 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); | ||||
|                 const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); | ||||
|                 modified |= surface->IsModified(); | ||||
|                 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, | ||||
|  | @ -679,7 +672,7 @@ private: | |||
|         } else { | ||||
|             for (const auto& surface : overlaps) { | ||||
|                 if (!surface->MatchTarget(params.target)) { | ||||
|                     if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { | ||||
|                     if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { | ||||
|                         if (Settings::values.use_accurate_gpu_emulation) { | ||||
|                             return std::nullopt; | ||||
|                         } | ||||
|  | @ -688,7 +681,7 @@ private: | |||
|                     } | ||||
|                     return std::nullopt; | ||||
|                 } | ||||
|                 if (surface->GetCacheAddr() != cache_addr) { | ||||
|                 if (surface->GetCpuAddr() != cpu_addr) { | ||||
|                     continue; | ||||
|                 } | ||||
|                 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | ||||
|  | @ -722,13 +715,13 @@ private: | |||
|      *                          left blank. | ||||
|      * @param is_render         Whether or not the surface is a render target. | ||||
|      **/ | ||||
|     std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, | ||||
|     std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, | ||||
|                                           const SurfaceParams& params, bool preserve_contents, | ||||
|                                           bool is_render) { | ||||
|         // Step 1
 | ||||
|         // Check Level 1 Cache for a fast structural match. If candidate surface
 | ||||
|         // matches at certain level we are pretty much done.
 | ||||
|         if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { | ||||
|         if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { | ||||
|             TSurface& current_surface = iter->second; | ||||
|             const auto topological_result = current_surface->MatchesTopology(params); | ||||
|             if (topological_result != MatchTopologyResult::FullMatch) { | ||||
|  | @ -755,7 +748,7 @@ private: | |||
|         // Step 2
 | ||||
|         // Obtain all possible overlaps in the memory region
 | ||||
|         const std::size_t candidate_size = params.GetGuestSizeInBytes(); | ||||
|         auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; | ||||
|         auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; | ||||
| 
 | ||||
|         // If none are found, we are done. we just load the surface and create it.
 | ||||
|         if (overlaps.empty()) { | ||||
|  | @ -777,7 +770,7 @@ private: | |||
|         // Check if it's a 3D texture
 | ||||
|         if (params.block_depth > 0) { | ||||
|             auto surface = | ||||
|                 Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); | ||||
|                 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); | ||||
|             if (surface) { | ||||
|                 return *surface; | ||||
|             } | ||||
|  | @ -852,16 +845,16 @@ private: | |||
|      * @param params   The parameters on the candidate surface. | ||||
|      **/ | ||||
|     Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | ||||
|         const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | ||||
|         const auto cache_addr{ToCacheAddr(host_ptr)}; | ||||
|         const std::optional<VAddr> cpu_addr = | ||||
|             system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||||
| 
 | ||||
|         if (!cache_addr) { | ||||
|         if (!cpu_addr) { | ||||
|             Deduction result{}; | ||||
|             result.type = DeductionType::DeductionFailed; | ||||
|             return result; | ||||
|         } | ||||
| 
 | ||||
|         if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { | ||||
|         if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { | ||||
|             TSurface& current_surface = iter->second; | ||||
|             const auto topological_result = current_surface->MatchesTopology(params); | ||||
|             if (topological_result != MatchTopologyResult::FullMatch) { | ||||
|  | @ -880,7 +873,7 @@ private: | |||
|         } | ||||
| 
 | ||||
|         const std::size_t candidate_size = params.GetGuestSizeInBytes(); | ||||
|         auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; | ||||
|         auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; | ||||
| 
 | ||||
|         if (overlaps.empty()) { | ||||
|             Deduction result{}; | ||||
|  | @ -1024,10 +1017,10 @@ private: | |||
|     } | ||||
| 
 | ||||
|     void RegisterInnerCache(TSurface& surface) { | ||||
|         const CacheAddr cache_addr = surface->GetCacheAddr(); | ||||
|         CacheAddr start = cache_addr >> registry_page_bits; | ||||
|         const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; | ||||
|         l1_cache[cache_addr] = surface; | ||||
|         const VAddr cpu_addr = surface->GetCpuAddr(); | ||||
|         VAddr start = cpu_addr >> registry_page_bits; | ||||
|         const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; | ||||
|         l1_cache[cpu_addr] = surface; | ||||
|         while (start <= end) { | ||||
|             registry[start].push_back(surface); | ||||
|             start++; | ||||
|  | @ -1035,10 +1028,10 @@ private: | |||
|     } | ||||
| 
 | ||||
|     void UnregisterInnerCache(TSurface& surface) { | ||||
|         const CacheAddr cache_addr = surface->GetCacheAddr(); | ||||
|         CacheAddr start = cache_addr >> registry_page_bits; | ||||
|         const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; | ||||
|         l1_cache.erase(cache_addr); | ||||
|         const VAddr cpu_addr = surface->GetCpuAddr(); | ||||
|         VAddr start = cpu_addr >> registry_page_bits; | ||||
|         const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; | ||||
|         l1_cache.erase(cpu_addr); | ||||
|         while (start <= end) { | ||||
|             auto& reg{registry[start]}; | ||||
|             reg.erase(std::find(reg.begin(), reg.end(), surface)); | ||||
|  | @ -1046,18 +1039,18 @@ private: | |||
|         } | ||||
|     } | ||||
| 
 | ||||
|     std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { | ||||
|     std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { | ||||
|         if (size == 0) { | ||||
|             return {}; | ||||
|         } | ||||
|         const CacheAddr cache_addr_end = cache_addr + size; | ||||
|         CacheAddr start = cache_addr >> registry_page_bits; | ||||
|         const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; | ||||
|         const VAddr cpu_addr_end = cpu_addr + size; | ||||
|         VAddr start = cpu_addr >> registry_page_bits; | ||||
|         const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; | ||||
|         std::vector<TSurface> surfaces; | ||||
|         while (start <= end) { | ||||
|             std::vector<TSurface>& list = registry[start]; | ||||
|             for (auto& surface : list) { | ||||
|                 if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { | ||||
|                 if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { | ||||
|                     surface->MarkAsPicked(true); | ||||
|                     surfaces.push_back(surface); | ||||
|                 } | ||||
|  | @ -1146,14 +1139,14 @@ private: | |||
|     // large in size.
 | ||||
|     static constexpr u64 registry_page_bits{20}; | ||||
|     static constexpr u64 registry_page_size{1 << registry_page_bits}; | ||||
|     std::unordered_map<CacheAddr, std::vector<TSurface>> registry; | ||||
|     std::unordered_map<VAddr, std::vector<TSurface>> registry; | ||||
| 
 | ||||
|     static constexpr u32 DEPTH_RT = 8; | ||||
|     static constexpr u32 NO_RT = 0xFFFFFFFF; | ||||
| 
 | ||||
|     // The L1 Cache is used for fast texture lookup before checking the overlaps
 | ||||
|     // This avoids calculating size and other stuffs.
 | ||||
|     std::unordered_map<CacheAddr, TSurface> l1_cache; | ||||
|     std::unordered_map<VAddr, TSurface> l1_cache; | ||||
| 
 | ||||
|     /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
 | ||||
|     /// previously been used. This is to prevent surfaces from being constantly created and
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Rodrigo Locatti
						Rodrigo Locatti