forked from eden-emu/eden
		
	VideoCore: Rework Garbage Collection.
This commit is contained in:
		
							parent
							
								
									c20ea89390
								
							
						
					
					
						commit
						d540d284b5
					
				
					 6 changed files with 213 additions and 101 deletions
				
			
		
							
								
								
									
										141
									
								
								src/common/lru_cache.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										141
									
								
								src/common/lru_cache.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,141 @@ | ||||||
|  | // Copyright 2021 yuzu Emulator Project
 | ||||||
|  | // Licensed under GPLv2+ or any later version
 | ||||||
|  | // Refer to the license.txt file included.
 | ||||||
|  | 
 | ||||||
|  | #pragma once | ||||||
|  | 
 | ||||||
|  | #include <deque> | ||||||
|  | #include <memory> | ||||||
|  | #include <type_traits> | ||||||
|  | 
 | ||||||
|  | #include "common/common_types.h" | ||||||
|  | 
 | ||||||
|  | namespace Common { | ||||||
|  | 
 | ||||||
|  | template <class Traits> | ||||||
|  | class LeastRecentlyUsedCache { | ||||||
|  |     using ObjectType = typename Traits::ObjectType; | ||||||
|  |     using TickType = typename Traits::TickType; | ||||||
|  | 
 | ||||||
|  |     struct Item { | ||||||
|  |         ObjectType obj; | ||||||
|  |         TickType tick; | ||||||
|  |         Item* next{}; | ||||||
|  |         Item* prev{}; | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  | public: | ||||||
|  |     LeastRecentlyUsedCache() : first_item{}, last_item{} {} | ||||||
|  |     ~LeastRecentlyUsedCache() = default; | ||||||
|  | 
 | ||||||
|  |     size_t Insert(ObjectType obj, TickType tick) { | ||||||
|  |         const auto new_id = build(); | ||||||
|  |         auto& item = item_pool[new_id]; | ||||||
|  |         item.obj = obj; | ||||||
|  |         item.tick = tick; | ||||||
|  |         attach(item); | ||||||
|  |         return new_id; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Touch(size_t id, TickType tick) { | ||||||
|  |         auto& item = item_pool[id]; | ||||||
|  |         if (item.tick >= tick) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         item.tick = tick; | ||||||
|  |         if (&item == last_item) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         detach(item); | ||||||
|  |         attach(item); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void Free(size_t id) { | ||||||
|  |         auto& item = item_pool[id]; | ||||||
|  |         detach(item); | ||||||
|  |         item.prev = nullptr; | ||||||
|  |         item.next = nullptr; | ||||||
|  |         free_items.push_back(id); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     template <typename Func> | ||||||
|  |     void ForEachItemBelow(TickType tick, Func&& func) { | ||||||
|  |         static constexpr bool RETURNS_BOOL = | ||||||
|  |             std::is_same_v<std::invoke_result<Func, ObjectType>, bool>; | ||||||
|  |         Item* iterator = first_item; | ||||||
|  |         while (iterator) { | ||||||
|  |             if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) { | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  |             Item* next = iterator->next; | ||||||
|  |             if constexpr (RETURNS_BOOL) { | ||||||
|  |                 if (func(iterator->obj)) { | ||||||
|  |                     return; | ||||||
|  |                 } | ||||||
|  |             } else { | ||||||
|  |                 func(iterator->obj); | ||||||
|  |             } | ||||||
|  |             iterator = next; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     size_t build() { | ||||||
|  |         if (free_items.empty()) { | ||||||
|  |             const size_t item_id = item_pool.size(); | ||||||
|  |             item_pool.emplace_back(); | ||||||
|  |             auto& item = item_pool[item_id]; | ||||||
|  |             item.next = nullptr; | ||||||
|  |             item.prev = nullptr; | ||||||
|  |             return item_id; | ||||||
|  |         } | ||||||
|  |         const size_t item_id = free_items.front(); | ||||||
|  |         free_items.pop_front(); | ||||||
|  |         auto& item = item_pool[item_id]; | ||||||
|  |         item.next = nullptr; | ||||||
|  |         item.prev = nullptr; | ||||||
|  |         return item_id; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void attach(Item& item) { | ||||||
|  |         if (!first_item) { | ||||||
|  |             first_item = &item; | ||||||
|  |         } | ||||||
|  |         if (!last_item) { | ||||||
|  |             last_item = &item; | ||||||
|  |         } else { | ||||||
|  |             item.prev = last_item; | ||||||
|  |             last_item->next = &item; | ||||||
|  |             item.next = nullptr; | ||||||
|  |             last_item = &item; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void detach(Item& item) { | ||||||
|  |         if (item.prev) { | ||||||
|  |             item.prev->next = item.next; | ||||||
|  |         } | ||||||
|  |         if (item.next) { | ||||||
|  |             item.next->prev = item.prev; | ||||||
|  |         } | ||||||
|  |         if (&item == first_item) { | ||||||
|  |             first_item = item.next; | ||||||
|  |             if (first_item) { | ||||||
|  |                 first_item->prev = nullptr; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         if (&item == last_item) { | ||||||
|  |             last_item = item.prev; | ||||||
|  |             if (last_item) { | ||||||
|  |                 last_item->next = nullptr; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::deque<Item> item_pool; | ||||||
|  |     std::deque<size_t> free_items; | ||||||
|  |     Item* first_item; | ||||||
|  |     Item* last_item; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace Common
 | ||||||
|  | @ -261,16 +261,6 @@ public: | ||||||
|         stream_score += score; |         stream_score += score; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Sets the new frame tick
 |  | ||||||
|     void SetFrameTick(u64 new_frame_tick) noexcept { |  | ||||||
|         frame_tick = new_frame_tick; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /// Returns the new frame tick
 |  | ||||||
|     [[nodiscard]] u64 FrameTick() const noexcept { |  | ||||||
|         return frame_tick; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     /// Returns the likeliness of this being a stream buffer
 |     /// Returns the likeliness of this being a stream buffer
 | ||||||
|     [[nodiscard]] int StreamScore() const noexcept { |     [[nodiscard]] int StreamScore() const noexcept { | ||||||
|         return stream_score; |         return stream_score; | ||||||
|  | @ -307,6 +297,8 @@ public: | ||||||
|         return words.size_bytes; |         return words.size_bytes; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     size_t lru_id; | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     template <Type type> |     template <Type type> | ||||||
|     u64* Array() noexcept { |     u64* Array() noexcept { | ||||||
|  | @ -603,7 +595,6 @@ private: | ||||||
|     RasterizerInterface* rasterizer = nullptr; |     RasterizerInterface* rasterizer = nullptr; | ||||||
|     VAddr cpu_addr = 0; |     VAddr cpu_addr = 0; | ||||||
|     Words words; |     Words words; | ||||||
|     u64 frame_tick = 0; |  | ||||||
|     BufferFlagBits flags{}; |     BufferFlagBits flags{}; | ||||||
|     int stream_score = 0; |     int stream_score = 0; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/div_ceil.h" | #include "common/div_ceil.h" | ||||||
| #include "common/literals.h" | #include "common/literals.h" | ||||||
|  | #include "common/lru_cache.h" | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "common/scope_exit.h" | #include "common/scope_exit.h" | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
|  | @ -77,7 +78,7 @@ class BufferCache { | ||||||
| 
 | 
 | ||||||
|     static constexpr BufferId NULL_BUFFER_ID{0}; |     static constexpr BufferId NULL_BUFFER_ID{0}; | ||||||
| 
 | 
 | ||||||
|     static constexpr u64 EXPECTED_MEMORY = 512_MiB; |     static constexpr u64 EXPECTED_MEMORY = 256_MiB; | ||||||
|     static constexpr u64 CRITICAL_MEMORY = 1_GiB; |     static constexpr u64 CRITICAL_MEMORY = 1_GiB; | ||||||
| 
 | 
 | ||||||
|     using Maxwell = Tegra::Engines::Maxwell3D::Regs; |     using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||||
|  | @ -330,7 +331,7 @@ private: | ||||||
|     template <bool insert> |     template <bool insert> | ||||||
|     void ChangeRegister(BufferId buffer_id); |     void ChangeRegister(BufferId buffer_id); | ||||||
| 
 | 
 | ||||||
|     void TouchBuffer(Buffer& buffer) const noexcept; |     void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | ||||||
| 
 | 
 | ||||||
|     bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |     bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | ||||||
| 
 | 
 | ||||||
|  | @ -428,7 +429,11 @@ private: | ||||||
|     size_t immediate_buffer_capacity = 0; |     size_t immediate_buffer_capacity = 0; | ||||||
|     std::unique_ptr<u8[]> immediate_buffer_alloc; |     std::unique_ptr<u8[]> immediate_buffer_alloc; | ||||||
| 
 | 
 | ||||||
|     typename SlotVector<Buffer>::Iterator deletion_iterator; |     struct LRUItemParams { | ||||||
|  |         using ObjectType = BufferId; | ||||||
|  |         using TickType = u64; | ||||||
|  |     }; | ||||||
|  |     Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||||||
|     u64 frame_tick = 0; |     u64 frame_tick = 0; | ||||||
|     u64 total_used_memory = 0; |     u64 total_used_memory = 0; | ||||||
| 
 | 
 | ||||||
|  | @ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||||||
|       kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { |       kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { | ||||||
|     // Ensure the first slot is used for the null buffer
 |     // Ensure the first slot is used for the null buffer
 | ||||||
|     void(slot_buffers.insert(runtime, NullBufferParams{})); |     void(slot_buffers.insert(runtime, NullBufferParams{})); | ||||||
|     deletion_iterator = slot_buffers.end(); |  | ||||||
|     common_ranges.clear(); |     common_ranges.clear(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() { | ||||||
|     const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; |     const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; | ||||||
|     const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; |     const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; | ||||||
|     int num_iterations = aggressive_gc ? 64 : 32; |     int num_iterations = aggressive_gc ? 64 : 32; | ||||||
|     for (; num_iterations > 0; --num_iterations) { |     const auto clean_up = [this, &num_iterations](BufferId buffer_id) { | ||||||
|         if (deletion_iterator == slot_buffers.end()) { |         if (num_iterations == 0) { | ||||||
|             deletion_iterator = slot_buffers.begin(); |             return true; | ||||||
|         } |         } | ||||||
|         ++deletion_iterator; |         --num_iterations; | ||||||
|         if (deletion_iterator == slot_buffers.end()) { |         auto& buffer = slot_buffers[buffer_id]; | ||||||
|             break; |         DownloadBufferMemory(buffer); | ||||||
|         } |         DeleteBuffer(buffer_id); | ||||||
|         const auto [buffer_id, buffer] = *deletion_iterator; |         return false; | ||||||
|         if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { |     }; | ||||||
|             DownloadBufferMemory(*buffer); |     lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); | ||||||
|             DeleteBuffer(buffer_id); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
|  | @ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | ||||||
| template <class P> | template <class P> | ||||||
| void BufferCache<P>::BindHostIndexBuffer() { | void BufferCache<P>::BindHostIndexBuffer() { | ||||||
|     Buffer& buffer = slot_buffers[index_buffer.buffer_id]; |     Buffer& buffer = slot_buffers[index_buffer.buffer_id]; | ||||||
|     TouchBuffer(buffer); |     TouchBuffer(buffer, index_buffer.buffer_id); | ||||||
|     const u32 offset = buffer.Offset(index_buffer.cpu_addr); |     const u32 offset = buffer.Offset(index_buffer.cpu_addr); | ||||||
|     const u32 size = index_buffer.size; |     const u32 size = index_buffer.size; | ||||||
|     SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); |     SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | ||||||
|  | @ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | ||||||
|     for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |     for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | ||||||
|         const Binding& binding = vertex_buffers[index]; |         const Binding& binding = vertex_buffers[index]; | ||||||
|         Buffer& buffer = slot_buffers[binding.buffer_id]; |         Buffer& buffer = slot_buffers[binding.buffer_id]; | ||||||
|         TouchBuffer(buffer); |         TouchBuffer(buffer, binding.buffer_id); | ||||||
|         SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); |         SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | ||||||
|         if (!flags[Dirty::VertexBuffer0 + index]) { |         if (!flags[Dirty::VertexBuffer0 + index]) { | ||||||
|             continue; |             continue; | ||||||
|  | @ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | ||||||
|     const VAddr cpu_addr = binding.cpu_addr; |     const VAddr cpu_addr = binding.cpu_addr; | ||||||
|     const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); |     const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); | ||||||
|     Buffer& buffer = slot_buffers[binding.buffer_id]; |     Buffer& buffer = slot_buffers[binding.buffer_id]; | ||||||
|     TouchBuffer(buffer); |     TouchBuffer(buffer, binding.buffer_id); | ||||||
|     const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |     const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | ||||||
|                                  size <= uniform_buffer_skip_cache_size && |                                  size <= uniform_buffer_skip_cache_size && | ||||||
|                                  !buffer.IsRegionGpuModified(cpu_addr, size); |                                  !buffer.IsRegionGpuModified(cpu_addr, size); | ||||||
|  | @ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | ||||||
|     ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { |     ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | ||||||
|         const Binding& binding = storage_buffers[stage][index]; |         const Binding& binding = storage_buffers[stage][index]; | ||||||
|         Buffer& buffer = slot_buffers[binding.buffer_id]; |         Buffer& buffer = slot_buffers[binding.buffer_id]; | ||||||
|         TouchBuffer(buffer); |         TouchBuffer(buffer, binding.buffer_id); | ||||||
|         const u32 size = binding.size; |         const u32 size = binding.size; | ||||||
|         SynchronizeBuffer(buffer, binding.cpu_addr, size); |         SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||||||
| 
 | 
 | ||||||
|  | @ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | ||||||
|     for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |     for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | ||||||
|         const Binding& binding = transform_feedback_buffers[index]; |         const Binding& binding = transform_feedback_buffers[index]; | ||||||
|         Buffer& buffer = slot_buffers[binding.buffer_id]; |         Buffer& buffer = slot_buffers[binding.buffer_id]; | ||||||
|         TouchBuffer(buffer); |         TouchBuffer(buffer, binding.buffer_id); | ||||||
|         const u32 size = binding.size; |         const u32 size = binding.size; | ||||||
|         SynchronizeBuffer(buffer, binding.cpu_addr, size); |         SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||||||
| 
 | 
 | ||||||
|  | @ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | ||||||
|     ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { |     ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { | ||||||
|         const Binding& binding = compute_uniform_buffers[index]; |         const Binding& binding = compute_uniform_buffers[index]; | ||||||
|         Buffer& buffer = slot_buffers[binding.buffer_id]; |         Buffer& buffer = slot_buffers[binding.buffer_id]; | ||||||
|         TouchBuffer(buffer); |         TouchBuffer(buffer, binding.buffer_id); | ||||||
|         const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); |         const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); | ||||||
|         SynchronizeBuffer(buffer, binding.cpu_addr, size); |         SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||||||
| 
 | 
 | ||||||
|  | @ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | ||||||
|     ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { |     ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | ||||||
|         const Binding& binding = compute_storage_buffers[index]; |         const Binding& binding = compute_storage_buffers[index]; | ||||||
|         Buffer& buffer = slot_buffers[binding.buffer_id]; |         Buffer& buffer = slot_buffers[binding.buffer_id]; | ||||||
|         TouchBuffer(buffer); |         TouchBuffer(buffer, binding.buffer_id); | ||||||
|         const u32 size = binding.size; |         const u32 size = binding.size; | ||||||
|         SynchronizeBuffer(buffer, binding.cpu_addr, size); |         SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||||||
| 
 | 
 | ||||||
|  | @ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | ||||||
|     const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); |     const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | ||||||
|     const u32 size = static_cast<u32>(overlap.end - overlap.begin); |     const u32 size = static_cast<u32>(overlap.end - overlap.begin); | ||||||
|     const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |     const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | ||||||
|     TouchBuffer(slot_buffers[new_buffer_id]); |  | ||||||
|     for (const BufferId overlap_id : overlap.ids) { |     for (const BufferId overlap_id : overlap.ids) { | ||||||
|         JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |         JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | ||||||
|     } |     } | ||||||
|     Register(new_buffer_id); |     Register(new_buffer_id); | ||||||
|  |     TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); | ||||||
|     return new_buffer_id; |     return new_buffer_id; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) { | ||||||
| template <class P> | template <class P> | ||||||
| template <bool insert> | template <bool insert> | ||||||
| void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | ||||||
|     const Buffer& buffer = slot_buffers[buffer_id]; |     Buffer& buffer = slot_buffers[buffer_id]; | ||||||
|     const auto size = buffer.SizeBytes(); |     const auto size = buffer.SizeBytes(); | ||||||
|     if (insert) { |     if (insert) { | ||||||
|         total_used_memory += Common::AlignUp(size, 1024); |         total_used_memory += Common::AlignUp(size, 1024); | ||||||
|  |         buffer.lru_id = lru_cache.Insert(buffer_id, frame_tick); | ||||||
|     } else { |     } else { | ||||||
|         total_used_memory -= Common::AlignUp(size, 1024); |         total_used_memory -= Common::AlignUp(size, 1024); | ||||||
|  |         lru_cache.Free(buffer.lru_id); | ||||||
|     } |     } | ||||||
|     const VAddr cpu_addr_begin = buffer.CpuAddr(); |     const VAddr cpu_addr_begin = buffer.CpuAddr(); | ||||||
|     const VAddr cpu_addr_end = cpu_addr_begin + size; |     const VAddr cpu_addr_end = cpu_addr_begin + size; | ||||||
|  | @ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
| void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { | void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { | ||||||
|     buffer.SetFrameTick(frame_tick); |     if (buffer_id != NULL_BUFFER_ID) { | ||||||
|  |         lru_cache.Touch(buffer.lru_id, frame_tick); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
|  |  | ||||||
|  | @ -80,7 +80,7 @@ struct ImageBase { | ||||||
|     VAddr cpu_addr_end = 0; |     VAddr cpu_addr_end = 0; | ||||||
| 
 | 
 | ||||||
|     u64 modification_tick = 0; |     u64 modification_tick = 0; | ||||||
|     u64 frame_tick = 0; |     size_t lru_index = ~0; | ||||||
| 
 | 
 | ||||||
|     std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; |     std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -43,8 +43,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | ||||||
|     void(slot_image_views.insert(runtime, NullImageParams{})); |     void(slot_image_views.insert(runtime, NullImageParams{})); | ||||||
|     void(slot_samplers.insert(runtime, sampler_descriptor)); |     void(slot_samplers.insert(runtime, sampler_descriptor)); | ||||||
| 
 | 
 | ||||||
|     deletion_iterator = slot_images.begin(); |  | ||||||
| 
 |  | ||||||
|     if constexpr (HAS_DEVICE_MEMORY_INFO) { |     if constexpr (HAS_DEVICE_MEMORY_INFO) { | ||||||
|         const auto device_memory = runtime.GetDeviceLocalMemory(); |         const auto device_memory = runtime.GetDeviceLocalMemory(); | ||||||
|         const u64 possible_expected_memory = (device_memory * 3) / 10; |         const u64 possible_expected_memory = (device_memory * 3) / 10; | ||||||
|  | @ -64,65 +62,33 @@ template <class P> | ||||||
| void TextureCache<P>::RunGarbageCollector() { | void TextureCache<P>::RunGarbageCollector() { | ||||||
|     const bool high_priority_mode = total_used_memory >= expected_memory; |     const bool high_priority_mode = total_used_memory >= expected_memory; | ||||||
|     const bool aggressive_mode = total_used_memory >= critical_memory; |     const bool aggressive_mode = total_used_memory >= critical_memory; | ||||||
|     const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; |     const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 50ULL : 100ULL; | ||||||
|     int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); |     size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 50 : 5); | ||||||
|     for (; num_iterations > 0; --num_iterations) { |     const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { | ||||||
|         if (deletion_iterator == slot_images.end()) { |         if (num_iterations == 0) { | ||||||
|             deletion_iterator = slot_images.begin(); |             return true; | ||||||
|             if (deletion_iterator == slot_images.end()) { |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|         auto [image_id, image_tmp] = *deletion_iterator; |         --num_iterations; | ||||||
|         Image* image = image_tmp; // fix clang error.
 |         auto& image = slot_images[image_id]; | ||||||
|         const bool is_alias = True(image->flags & ImageFlagBits::Alias); |         const bool must_download = image.IsSafeDownload(); | ||||||
|         const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); |         if (!high_priority_mode && must_download) { | ||||||
|         const bool must_download = image->IsSafeDownload(); |             return false; | ||||||
|         bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); |  | ||||||
|         const u64 ticks_needed = |  | ||||||
|             is_bad_overlap |  | ||||||
|                 ? ticks_to_destroy >> 4 |  | ||||||
|                 : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); |  | ||||||
|         should_care |= aggressive_mode; |  | ||||||
|         if (should_care && image->frame_tick + ticks_needed < frame_tick) { |  | ||||||
|             if (is_bad_overlap) { |  | ||||||
|                 const bool overlap_check = std::ranges::all_of( |  | ||||||
|                     image->overlapping_images, [&, image](const ImageId& overlap_id) { |  | ||||||
|                         auto& overlap = slot_images[overlap_id]; |  | ||||||
|                         return overlap.frame_tick >= image->frame_tick; |  | ||||||
|                     }); |  | ||||||
|                 if (!overlap_check) { |  | ||||||
|                     ++deletion_iterator; |  | ||||||
|                     continue; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             if (!is_bad_overlap && must_download) { |  | ||||||
|                 const bool alias_check = std::ranges::none_of( |  | ||||||
|                     image->aliased_images, [&, image](const AliasedImage& alias) { |  | ||||||
|                         auto& alias_image = slot_images[alias.id]; |  | ||||||
|                         return (alias_image.frame_tick < image->frame_tick) || |  | ||||||
|                                (alias_image.modification_tick < image->modification_tick); |  | ||||||
|                     }); |  | ||||||
| 
 |  | ||||||
|                 if (alias_check) { |  | ||||||
|                     auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); |  | ||||||
|                     const auto copies = FullDownloadCopies(image->info); |  | ||||||
|                     image->DownloadMemory(map, copies); |  | ||||||
|                     runtime.Finish(); |  | ||||||
|                     SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             if (True(image->flags & ImageFlagBits::Tracked)) { |  | ||||||
|                 UntrackImage(*image, image_id); |  | ||||||
|             } |  | ||||||
|             UnregisterImage(image_id); |  | ||||||
|             DeleteImage(image_id); |  | ||||||
|             if (is_bad_overlap) { |  | ||||||
|                 ++num_iterations; |  | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|         ++deletion_iterator; |         if (must_download) { | ||||||
|     } |             auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); | ||||||
|  |             const auto copies = FullDownloadCopies(image.info); | ||||||
|  |             image.DownloadMemory(map, copies); | ||||||
|  |             runtime.Finish(); | ||||||
|  |             SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | ||||||
|  |         } | ||||||
|  |         if (True(image.flags & ImageFlagBits::Tracked)) { | ||||||
|  |             UntrackImage(image, image_id); | ||||||
|  |         } | ||||||
|  |         UnregisterImage(image_id); | ||||||
|  |         DeleteImage(image_id); | ||||||
|  |         return false; | ||||||
|  |     }; | ||||||
|  |     lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
|  | @ -1078,6 +1044,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | ||||||
|         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||||||
|     } |     } | ||||||
|     total_used_memory += Common::AlignUp(tentative_size, 1024); |     total_used_memory += Common::AlignUp(tentative_size, 1024); | ||||||
|  |     image.lru_index = lru_cache.Insert(image_id, frame_tick); | ||||||
|  | 
 | ||||||
|     ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, |     ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||||||
|                    [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); |                    [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); | ||||||
|     if (False(image.flags & ImageFlagBits::Sparse)) { |     if (False(image.flags & ImageFlagBits::Sparse)) { | ||||||
|  | @ -1115,6 +1083,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | ||||||
|         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |         tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||||||
|     } |     } | ||||||
|     total_used_memory -= Common::AlignUp(tentative_size, 1024); |     total_used_memory -= Common::AlignUp(tentative_size, 1024); | ||||||
|  |     lru_cache.Free(image.lru_index); | ||||||
|     const auto& clear_page_table = |     const auto& clear_page_table = | ||||||
|         [this, image_id]( |         [this, image_id]( | ||||||
|             u64 page, |             u64 page, | ||||||
|  | @ -1384,7 +1353,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool | ||||||
|     if (is_modification) { |     if (is_modification) { | ||||||
|         MarkModification(image); |         MarkModification(image); | ||||||
|     } |     } | ||||||
|     image.frame_tick = frame_tick; |     lru_cache.Touch(image.lru_index, frame_tick); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <class P> | template <class P> | ||||||
|  |  | ||||||
|  | @ -14,6 +14,7 @@ | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/literals.h" | #include "common/literals.h" | ||||||
|  | #include "common/lru_cache.h" | ||||||
| #include "video_core/compatible_formats.h" | #include "video_core/compatible_formats.h" | ||||||
| #include "video_core/delayed_destruction_ring.h" | #include "video_core/delayed_destruction_ring.h" | ||||||
| #include "video_core/engines/fermi_2d.h" | #include "video_core/engines/fermi_2d.h" | ||||||
|  | @ -370,6 +371,12 @@ private: | ||||||
|     std::vector<ImageId> uncommitted_downloads; |     std::vector<ImageId> uncommitted_downloads; | ||||||
|     std::queue<std::vector<ImageId>> committed_downloads; |     std::queue<std::vector<ImageId>> committed_downloads; | ||||||
| 
 | 
 | ||||||
|  |     struct LRUItemParams { | ||||||
|  |         using ObjectType = ImageId; | ||||||
|  |         using TickType = u64; | ||||||
|  |     }; | ||||||
|  |     Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||||||
|  | 
 | ||||||
|     static constexpr size_t TICKS_TO_DESTROY = 6; |     static constexpr size_t TICKS_TO_DESTROY = 6; | ||||||
|     DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; |     DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; | ||||||
|     DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; |     DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; | ||||||
|  | @ -379,7 +386,6 @@ private: | ||||||
| 
 | 
 | ||||||
|     u64 modification_tick = 0; |     u64 modification_tick = 0; | ||||||
|     u64 frame_tick = 0; |     u64 frame_tick = 0; | ||||||
|     typename SlotVector<Image>::Iterator deletion_iterator; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCommon
 | } // namespace VideoCommon
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow