forked from eden-emu/eden
		
	SMMU: Implement backing CPU page protect/unprotect
This commit is contained in:
		
							parent
							
								
									7a9d1ad2f8
								
							
						
					
					
						commit
						c85d7ccd79
					
				
					 4 changed files with 141 additions and 6 deletions
				
			
		|  | @ -5,6 +5,8 @@ | ||||||
| 
 | 
 | ||||||
| #include <deque> | #include <deque> | ||||||
| #include <memory> | #include <memory> | ||||||
|  | #include <array> | ||||||
|  | #include <atomic> | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/virtual_buffer.h" | #include "common/virtual_buffer.h" | ||||||
|  | @ -23,6 +25,7 @@ struct DeviceMemoryManagerAllocator; | ||||||
| template <typename Traits> | template <typename Traits> | ||||||
| class DeviceMemoryManager { | class DeviceMemoryManager { | ||||||
|     using DeviceInterface = typename Traits::DeviceInterface; |     using DeviceInterface = typename Traits::DeviceInterface; | ||||||
|  |     using DeviceMethods = Traits::DeviceMethods; | ||||||
| 
 | 
 | ||||||
| public: | public: | ||||||
|     DeviceMemoryManager(const DeviceMemory& device_memory); |     DeviceMemoryManager(const DeviceMemory& device_memory); | ||||||
|  | @ -35,7 +38,7 @@ public: | ||||||
|     DAddr AllocatePinned(size_t size); |     DAddr AllocatePinned(size_t size); | ||||||
|     void Free(DAddr start, size_t size); |     void Free(DAddr start, size_t size); | ||||||
| 
 | 
 | ||||||
|     void Map(DAddr address, VAddr virtual_address, size_t size, size_t p_id); |     void Map(DAddr address, VAddr virtual_address, size_t size, size_t process_id); | ||||||
|     void Unmap(DAddr address, size_t size); |     void Unmap(DAddr address, size_t size); | ||||||
| 
 | 
 | ||||||
|     // Write / Read
 |     // Write / Read
 | ||||||
|  | @ -57,6 +60,8 @@ public: | ||||||
|     size_t RegisterProcess(Memory::Memory* memory); |     size_t RegisterProcess(Memory::Memory* memory); | ||||||
|     void UnregisterProcess(size_t id); |     void UnregisterProcess(size_t id); | ||||||
| 
 | 
 | ||||||
|  |     void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta); | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     static constexpr bool supports_pinning = Traits::supports_pinning; |     static constexpr bool supports_pinning = Traits::supports_pinning; | ||||||
|     static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; |     static constexpr size_t device_virtual_bits = Traits::device_virtual_bits; | ||||||
|  | @ -90,8 +95,52 @@ private: | ||||||
|     Common::VirtualBuffer<u32> compressed_physical_ptr; |     Common::VirtualBuffer<u32> compressed_physical_ptr; | ||||||
|     Common::VirtualBuffer<u32> compressed_device_addr; |     Common::VirtualBuffer<u32> compressed_device_addr; | ||||||
| 
 | 
 | ||||||
|  |     // Process memory interfaces
 | ||||||
|  | 
 | ||||||
|     std::deque<size_t> id_pool; |     std::deque<size_t> id_pool; | ||||||
|     std::deque<Memory::Memory*> registered_processes; |     std::deque<Memory::Memory*> registered_processes; | ||||||
|  | 
 | ||||||
|  |     // Memory protection management
 | ||||||
|  | 
 | ||||||
|  |     static constexpr size_t guest_max_as_bits = 39; | ||||||
|  |     static constexpr size_t guest_as_size = 1ULL << guest_max_as_bits; | ||||||
|  |     static constexpr size_t guest_mask = guest_as_size - 1ULL; | ||||||
|  |     static constexpr size_t process_id_start_bit = guest_max_as_bits; | ||||||
|  | 
 | ||||||
|  |     std::pair<size_t, VAddr> ExtractCPUBacking(size_t page_index) { | ||||||
|  |         auto content = cpu_backing_address[page_index]; | ||||||
|  |         const VAddr address = content & guest_mask; | ||||||
|  |         const size_t process_id = static_cast<size_t>(content >> process_id_start_bit); | ||||||
|  |         return std::make_pair(process_id, address); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void InsertCPUBacking(size_t page_index, VAddr address, size_t process_id) { | ||||||
|  |         cpu_backing_address[page_index] = address | (process_id << page_index); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     Common::VirtualBuffer<VAddr> cpu_backing_address; | ||||||
|  |     static constexpr size_t subentries = 4; | ||||||
|  |     static constexpr size_t subentries_mask = subentries - 1; | ||||||
|  |     class CounterEntry final { | ||||||
|  |     public: | ||||||
|  |         CounterEntry() = default; | ||||||
|  | 
 | ||||||
|  |         std::atomic_uint16_t& Count(std::size_t page) { | ||||||
|  |             return values[page & subentries_mask]; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         const std::atomic_uint16_t& Count(std::size_t page) const { | ||||||
|  |             return values[page & subentries_mask]; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |     private: | ||||||
|  |         std::array<std::atomic_uint16_t, subentries> values{}; | ||||||
|  |     }; | ||||||
|  |     static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), "CounterEntry should be 8 bytes!"); | ||||||
|  | 
 | ||||||
|  |     static constexpr size_t num_counter_entries = (1ULL << (device_virtual_bits - page_bits)) / subentries; | ||||||
|  |     using CachedPages = std::array<CounterEntry, num_counter_entries>; | ||||||
|  |     std::unique_ptr<CachedPages> cached_pages; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace Core
 | } // namespace Core
 | ||||||
|  | @ -2,12 +2,15 @@ | ||||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||||
| 
 | 
 | ||||||
| #include <atomic>
 | #include <atomic>
 | ||||||
|  | #include <limits>
 | ||||||
| #include <memory>
 | #include <memory>
 | ||||||
| #include <type_traits>
 | #include <type_traits>
 | ||||||
| 
 | 
 | ||||||
| #include "common/address_space.h"
 | #include "common/address_space.h"
 | ||||||
| #include "common/address_space.inc"
 | #include "common/address_space.inc"
 | ||||||
| #include "common/alignment.h"
 | #include "common/alignment.h"
 | ||||||
|  | #include "common/assert.h"
 | ||||||
|  | #include "common/div_ceil.h"
 | ||||||
| #include "common/scope_exit.h"
 | #include "common/scope_exit.h"
 | ||||||
| #include "core/device_memory.h"
 | #include "core/device_memory.h"
 | ||||||
| #include "core/device_memory_manager.h"
 | #include "core/device_memory_manager.h"
 | ||||||
|  | @ -51,7 +54,11 @@ struct DeviceMemoryManagerAllocator { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     DAddr AllocatePinned(size_t size) { |     DAddr AllocatePinned(size_t size) { | ||||||
|         return pin_allocator.Allocate(size); |         if constexpr (supports_pinning) { | ||||||
|  |             return pin_allocator.Allocate(size); | ||||||
|  |         } else { | ||||||
|  |             return DAddr{}; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void DoInRange(DAddr address, size_t size, auto pin_func, auto main_func) { |     void DoInRange(DAddr address, size_t size, auto pin_func, auto main_func) { | ||||||
|  | @ -100,6 +107,7 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo | ||||||
|       interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), |       interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS), | ||||||
|       compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { |       compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) { | ||||||
|     impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); |     impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); | ||||||
|  |     cached_pages = std::make_unique<CachedPages>(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename Traits> | template <typename Traits> | ||||||
|  | @ -132,14 +140,14 @@ void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) { | ||||||
| 
 | 
 | ||||||
| template <typename Traits> | template <typename Traits> | ||||||
| void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, | void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, | ||||||
|                                       size_t p_id) { |                                       size_t process_id) { | ||||||
|     Core::Memory::Memory* process_memory = registered_processes[p_id]; |     Core::Memory::Memory* process_memory = registered_processes[process_id]; | ||||||
|     size_t start_page_d = address >> Memory::YUZU_PAGEBITS; |     size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | ||||||
|     size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; |     size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | ||||||
|     std::atomic_thread_fence(std::memory_order_acquire); |     std::atomic_thread_fence(std::memory_order_acquire); | ||||||
|     for (size_t i = 0; i < num_pages; i++) { |     for (size_t i = 0; i < num_pages; i++) { | ||||||
|         auto* ptr = process_memory->GetPointer( |         const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE; | ||||||
|             Common::ProcessAddress(virtual_address + i * Memory::YUZU_PAGESIZE)); |         auto* ptr = process_memory->GetPointer(Common::ProcessAddress(new_vaddress)); | ||||||
|         if (ptr == nullptr) [[unlikely]] { |         if (ptr == nullptr) [[unlikely]] { | ||||||
|             compressed_physical_ptr[start_page_d + i] = 0; |             compressed_physical_ptr[start_page_d + i] = 0; | ||||||
|             continue; |             continue; | ||||||
|  | @ -147,6 +155,7 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size | ||||||
|         auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; |         auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U; | ||||||
|         compressed_physical_ptr[start_page_d + i] = phys_addr; |         compressed_physical_ptr[start_page_d + i] = phys_addr; | ||||||
|         compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); |         compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i); | ||||||
|  |         InsertCPUBacking(start_page_d + i, new_vaddress, process_id); | ||||||
|     } |     } | ||||||
|     std::atomic_thread_fence(std::memory_order_release); |     std::atomic_thread_fence(std::memory_order_release); | ||||||
| } | } | ||||||
|  | @ -159,6 +168,7 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) { | ||||||
|     for (size_t i = 0; i < num_pages; i++) { |     for (size_t i = 0; i < num_pages; i++) { | ||||||
|         auto phys_addr = compressed_physical_ptr[start_page_d + i]; |         auto phys_addr = compressed_physical_ptr[start_page_d + i]; | ||||||
|         compressed_physical_ptr[start_page_d + i] = 0; |         compressed_physical_ptr[start_page_d + i] = 0; | ||||||
|  |         cpu_backing_address[start_page_d + i] = 0; | ||||||
|         if (phys_addr != 0) { |         if (phys_addr != 0) { | ||||||
|             compressed_device_addr[phys_addr - 1] = 0; |             compressed_device_addr[phys_addr - 1] = 0; | ||||||
|         } |         } | ||||||
|  | @ -301,4 +311,66 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(size_t id) { | ||||||
|     id_pool.push_front(id); |     id_pool.push_front(id); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | template <typename Traits> | ||||||
|  | void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { | ||||||
|  |     u64 uncache_begin = 0; | ||||||
|  |     u64 cache_begin = 0; | ||||||
|  |     u64 uncache_bytes = 0; | ||||||
|  |     u64 cache_bytes = 0; | ||||||
|  |     const auto* MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching; | ||||||
|  | 
 | ||||||
|  |     std::atomic_thread_fence(std::memory_order_acquire); | ||||||
|  |     const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); | ||||||
|  |     size_t page = addr >> Memory::YUZU_PAGEBITS; | ||||||
|  |     auto [process_id, base_vaddress] = ExtractCPUBacking(page); | ||||||
|  |     size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS; | ||||||
|  |     auto* memory_interface = registered_processes[process_id]; | ||||||
|  |     for (; page != page_end; ++page) { | ||||||
|  |         std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page); | ||||||
|  | 
 | ||||||
|  |         if (delta > 0) { | ||||||
|  |             ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u16>::max(), | ||||||
|  |                        "Count may overflow!"); | ||||||
|  |         } else if (delta < 0) { | ||||||
|  |             ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); | ||||||
|  |         } else { | ||||||
|  |             ASSERT_MSG(false, "Delta must be non-zero!"); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Adds or subtracts 1, as count is a unsigned 8-bit value
 | ||||||
|  |         count.fetch_add(static_cast<u16>(delta), std::memory_order_release); | ||||||
|  | 
 | ||||||
|  |         // Assume delta is either -1 or 1
 | ||||||
|  |         if (count.load(std::memory_order::relaxed) == 0) { | ||||||
|  |             if (uncache_bytes == 0) { | ||||||
|  |                 uncache_begin = vpage; | ||||||
|  |             } | ||||||
|  |             uncache_bytes += Memory::YUZU_PAGESIZE; | ||||||
|  |         } else if (uncache_bytes > 0) { | ||||||
|  |             MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, | ||||||
|  |                               uncache_bytes, false); | ||||||
|  |             uncache_bytes = 0; | ||||||
|  |         } | ||||||
|  |         if (count.load(std::memory_order::relaxed) == 1 && delta > 0) { | ||||||
|  |             if (cache_bytes == 0) { | ||||||
|  |                 cache_begin = vpage; | ||||||
|  |             } | ||||||
|  |             cache_bytes += Memory::YUZU_PAGESIZE; | ||||||
|  |         } else if (cache_bytes > 0) { | ||||||
|  |             MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||||||
|  |                               true); | ||||||
|  |             cache_bytes = 0; | ||||||
|  |         } | ||||||
|  |         vpage++; | ||||||
|  |     } | ||||||
|  |     if (uncache_bytes > 0) { | ||||||
|  |         MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, | ||||||
|  |                           false); | ||||||
|  |     } | ||||||
|  |     if (cache_bytes > 0) { | ||||||
|  |         MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||||||
|  |                           true); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Core
 | } // namespace Core
 | ||||||
|  | @ -5,6 +5,17 @@ | ||||||
| #include "video_core/host1x/gpu_device_memory_manager.h" | #include "video_core/host1x/gpu_device_memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| 
 | 
 | ||||||
|  | namespace Tegra { | ||||||
|  | 
 | ||||||
|  | struct MaxwellDeviceMethods { | ||||||
|  |     static inline void MarkRegionCaching(Core::Memory::Memory* interface, VAddr address, | ||||||
|  |                                          size_t size, bool caching) { | ||||||
|  |         interface->RasterizerMarkRegionCached(address, size, caching); | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace Tegra
 | ||||||
|  | 
 | ||||||
| template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>; | template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>; | ||||||
| template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>; | template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -9,10 +9,13 @@ class RasterizerInterface; | ||||||
| 
 | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| 
 | 
 | ||||||
|  | struct MaxwellDeviceMethods; | ||||||
|  | 
 | ||||||
| struct MaxwellDeviceTraits { | struct MaxwellDeviceTraits { | ||||||
|     static constexpr bool supports_pinning = true; |     static constexpr bool supports_pinning = true; | ||||||
|     static constexpr size_t device_virtual_bits = 34; |     static constexpr size_t device_virtual_bits = 34; | ||||||
|     using DeviceInterface = typename VideoCore::RasterizerInterface; |     using DeviceInterface = typename VideoCore::RasterizerInterface; | ||||||
|  |     using DeviceMethods = typename MaxwellDeviceMethods; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>; | using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow