forked from eden-emu/eden
		
	Reaper: Change memory restrictions on TC depending on host memory on VK.
This commit is contained in:
		
							parent
							
								
									2fd0207e2e
								
							
						
					
					
						commit
						aa941d40cd
					
				
					 10 changed files with 90 additions and 41 deletions
				
			
		|  | @ -106,6 +106,8 @@ public: | |||
| 
 | ||||
|     void TickFrame(); | ||||
| 
 | ||||
|     void RunGarbageCollector(); | ||||
| 
 | ||||
|     void WriteMemory(VAddr cpu_addr, u64 size); | ||||
| 
 | ||||
|     void CachedWriteMemory(VAddr cpu_addr, u64 size); | ||||
|  | @ -350,29 +352,7 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void BufferCache<P>::TickFrame() { | ||||
|     const bool enabled_gc = Settings::values.use_caches_gc.GetValue(); | ||||
|     SCOPE_EXIT({ | ||||
|         ++frame_tick; | ||||
|         delayed_destruction_ring.Tick(); | ||||
|     }); | ||||
|     // Calculate hits and shots and move hit bits to the right
 | ||||
|     const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); | ||||
|     const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); | ||||
|     std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, | ||||
|                 uniform_cache_hits.begin() + 1); | ||||
|     std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, | ||||
|                 uniform_cache_shots.begin() + 1); | ||||
|     uniform_cache_hits[0] = 0; | ||||
|     uniform_cache_shots[0] = 0; | ||||
| 
 | ||||
|     const bool skip_preferred = hits * 256 < shots * 251; | ||||
|     uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | ||||
| 
 | ||||
|     const bool activate_gc = enabled_gc && total_used_memory >= EXPECTED_MEMORY; | ||||
|     if (!activate_gc) { | ||||
|         return; | ||||
|     } | ||||
| void BufferCache<P>::RunGarbageCollector() { | ||||
|     const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; | ||||
|     const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; | ||||
|     int num_iterations = aggressive_gc ? 64 : 32; | ||||
|  | @ -392,6 +372,28 @@ void BufferCache<P>::TickFrame() { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void BufferCache<P>::TickFrame() { | ||||
|     // Calculate hits and shots and move hit bits to the right
 | ||||
|     const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); | ||||
|     const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); | ||||
|     std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, | ||||
|                 uniform_cache_hits.begin() + 1); | ||||
|     std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, | ||||
|                 uniform_cache_shots.begin() + 1); | ||||
|     uniform_cache_hits[0] = 0; | ||||
|     uniform_cache_shots[0] = 0; | ||||
| 
 | ||||
|     const bool skip_preferred = hits * 256 < shots * 251; | ||||
|     uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | ||||
| 
 | ||||
|     if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { | ||||
|         RunGarbageCollector(); | ||||
|     } | ||||
|     ++frame_tick; | ||||
|     delayed_destruction_ring.Tick(); | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | ||||
|     ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | ||||
|  |  | |||
|  | @ -235,6 +235,7 @@ struct TextureCacheParams { | |||
|     static constexpr bool ENABLE_VALIDATION = true; | ||||
|     static constexpr bool FRAMEBUFFER_BLITS = true; | ||||
|     static constexpr bool HAS_EMULATED_COPIES = true; | ||||
|     static constexpr bool HAS_DEVICE_MEMORY_INFO = false; | ||||
| 
 | ||||
|     using Runtime = OpenGL::TextureCacheRuntime; | ||||
|     using Image = OpenGL::Image; | ||||
|  |  | |||
|  | @ -818,6 +818,10 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | |||
|     }); | ||||
| } | ||||
| 
 | ||||
| u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | ||||
|     return device.GetDeviceLocalMemory(); | ||||
| } | ||||
| 
 | ||||
| Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, | ||||
|              VAddr cpu_addr_) | ||||
|     : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, | ||||
|  |  | |||
|  | @ -97,6 +97,8 @@ struct TextureCacheRuntime { | |||
|         // All known Vulkan drivers can natively handle BGR textures
 | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     u64 GetDeviceLocalMemory() const; | ||||
| }; | ||||
| 
 | ||||
| class Image : public VideoCommon::ImageBase { | ||||
|  | @ -257,6 +259,7 @@ struct TextureCacheParams { | |||
|     static constexpr bool ENABLE_VALIDATION = true; | ||||
|     static constexpr bool FRAMEBUFFER_BLITS = false; | ||||
|     static constexpr bool HAS_EMULATED_COPIES = false; | ||||
|     static constexpr bool HAS_DEVICE_MEMORY_INFO = true; | ||||
| 
 | ||||
|     using Runtime = Vulkan::TextureCacheRuntime; | ||||
|     using Image = Vulkan::Image; | ||||
|  |  | |||
|  | @ -79,7 +79,7 @@ public: | |||
|         Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept | ||||
|             : slot_vector{slot_vector_}, id{id_} {} | ||||
| 
 | ||||
|         bool IsValid(const u64* bitset) noexcept { | ||||
|         bool IsValid(const u64* bitset) const noexcept { | ||||
|             return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; | ||||
|         } | ||||
| 
 | ||||
|  |  | |||
|  | @ -71,14 +71,16 @@ class TextureCache { | |||
|     static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; | ||||
|     /// True when some copies have to be emulated
 | ||||
|     static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | ||||
|     /// True when the API can provide info about the memory of the device.
 | ||||
|     static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | ||||
| 
 | ||||
|     /// Image view ID for null descriptors
 | ||||
|     static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; | ||||
|     /// Sampler ID for bugged sampler ids
 | ||||
|     static constexpr SamplerId NULL_SAMPLER_ID{0}; | ||||
| 
 | ||||
|     static constexpr u64 EXPECTED_MEMORY = Common::Size_1_GB; | ||||
|     static constexpr u64 CRITICAL_MEMORY = Common::Size_2_GB; | ||||
|     static constexpr u64 DEFAULT_EXPECTED_MEMORY = Common::Size_1_GB; | ||||
|     static constexpr u64 DEFAULT_CRITICAL_MEMORY = Common::Size_2_GB; | ||||
| 
 | ||||
|     using Runtime = typename P::Runtime; | ||||
|     using Image = typename P::Image; | ||||
|  | @ -108,6 +110,9 @@ public: | |||
|     /// Notify the cache that a new frame has been queued
 | ||||
|     void TickFrame(); | ||||
| 
 | ||||
|     /// Runs the Garbage Collector.
 | ||||
|     void RunGarbageCollector(); | ||||
| 
 | ||||
|     /// Return a constant reference to the given image view id
 | ||||
|     [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; | ||||
| 
 | ||||
|  | @ -339,6 +344,8 @@ private: | |||
| 
 | ||||
|     bool has_deleted_images = false; | ||||
|     u64 total_used_memory = 0; | ||||
|     u64 expected_memory; | ||||
|     u64 critical_memory; | ||||
| 
 | ||||
|     SlotVector<Image> slot_images; | ||||
|     SlotVector<ImageView> slot_image_views; | ||||
|  | @ -382,21 +389,23 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
|     void(slot_samplers.insert(runtime, sampler_descriptor)); | ||||
| 
 | ||||
|     deletion_iterator = slot_images.begin(); | ||||
| 
 | ||||
|     if constexpr (HAS_DEVICE_MEMORY_INFO) { | ||||
|         const auto device_memory = runtime.GetDeviceLocalMemory(); | ||||
|         const u64 possible_expected_memory = (device_memory * 3) / 10; | ||||
|         const u64 possible_critical_memory = (device_memory * 6) / 10; | ||||
|         expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); | ||||
|         critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); | ||||
|     } else { | ||||
|         expected_memory = DEFAULT_EXPECTED_MEMORY; | ||||
|         critical_memory = DEFAULT_CRITICAL_MEMORY; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void TextureCache<P>::TickFrame() { | ||||
|     const bool enabled_gc = Settings::values.use_caches_gc.GetValue(); | ||||
|     if (!enabled_gc) { | ||||
|         // @Note(Blinkhawk): compile error with SCOPE_EXIT on msvc.
 | ||||
|         sentenced_images.Tick(); | ||||
|         sentenced_framebuffers.Tick(); | ||||
|         sentenced_image_view.Tick(); | ||||
|         ++frame_tick; | ||||
|         return; | ||||
|     } | ||||
|     const bool high_priority_mode = total_used_memory >= EXPECTED_MEMORY; | ||||
|     const bool aggressive_mode = total_used_memory >= CRITICAL_MEMORY; | ||||
| void TextureCache<P>::RunGarbageCollector() { | ||||
|     const bool high_priority_mode = total_used_memory >= expected_memory; | ||||
|     const bool aggressive_mode = total_used_memory >= critical_memory; | ||||
|     const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; | ||||
|     int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); | ||||
|     for (; num_iterations > 0; --num_iterations) { | ||||
|  | @ -451,11 +460,18 @@ void TextureCache<P>::TickFrame() { | |||
|             UnregisterImage(image_id); | ||||
|             DeleteImage(image_id); | ||||
|             if (is_bad_overlap) { | ||||
|                 num_iterations++; | ||||
|                 ++num_iterations; | ||||
|             } | ||||
|         } | ||||
|         ++deletion_iterator; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void TextureCache<P>::TickFrame() { | ||||
|     if (Settings::values.use_caches_gc.GetValue()) { | ||||
|         RunGarbageCollector(); | ||||
|     } | ||||
|     sentenced_images.Tick(); | ||||
|     sentenced_framebuffers.Tick(); | ||||
|     sentenced_image_view.Tick(); | ||||
|  |  | |||
|  | @ -408,6 +408,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
|     } | ||||
|     logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); | ||||
| 
 | ||||
|     CollectPhysicalMemoryInfo(); | ||||
|     CollectTelemetryParameters(); | ||||
|     CollectToolingInfo(); | ||||
| 
 | ||||
|  | @ -818,6 +819,19 @@ void Device::CollectTelemetryParameters() { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void Device::CollectPhysicalMemoryInfo() { | ||||
|     const auto mem_properties = physical.GetMemoryProperties(); | ||||
|     const std::size_t num_properties = mem_properties.memoryTypeCount; | ||||
|     device_access_memory = 0; | ||||
|     for (std::size_t element = 0; element < num_properties; element++) { | ||||
|         if ((mem_properties.memoryTypes[element].propertyFlags & | ||||
|              VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { | ||||
|             const std::size_t heap_index = mem_properties.memoryTypes[element].heapIndex; | ||||
|             device_access_memory += mem_properties.memoryHeaps[heap_index].size; | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void Device::CollectToolingInfo() { | ||||
|     if (!ext_tooling_info) { | ||||
|         return; | ||||
|  |  | |||
|  | @ -225,6 +225,10 @@ public: | |||
|         return use_asynchronous_shaders; | ||||
|     } | ||||
| 
 | ||||
|     u64 GetDeviceLocalMemory() const { | ||||
|         return device_access_memory; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     /// Checks if the physical device is suitable.
 | ||||
|     void CheckSuitability(bool requires_swapchain) const; | ||||
|  | @ -244,6 +248,9 @@ private: | |||
|     /// Collects information about attached tools.
 | ||||
|     void CollectToolingInfo(); | ||||
| 
 | ||||
|     /// Collects information about the device's local memory.
 | ||||
|     void CollectPhysicalMemoryInfo(); | ||||
| 
 | ||||
|     /// Returns a list of queue initialization descriptors.
 | ||||
|     std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; | ||||
| 
 | ||||
|  | @ -302,6 +309,8 @@ private: | |||
| 
 | ||||
|     /// Nsight Aftermath GPU crash tracker
 | ||||
|     std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker; | ||||
| 
 | ||||
|     u64 device_access_memory; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -109,7 +109,7 @@ | |||
|            <string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string> | ||||
|           </property> | ||||
|           <property name="text"> | ||||
|            <string>Enable GPU caches garbage collection (unsafe)</string> | ||||
|            <string>Enable GPU cache garbage collection (unsafe)</string> | ||||
|           </property> | ||||
|          </widget> | ||||
|         </item> | ||||
|  |  | |||
|  | @ -227,7 +227,7 @@ use_asynchronous_gpu_emulation = | |||
| # 0: Off, 1 (default): On | ||||
| use_vsync = | ||||
| 
 | ||||
| # Whether to use garbage collection or not. | ||||
| # Whether to use garbage collection or not for GPU caches. | ||||
| # 0 (default): Off, 1: On | ||||
| use_caches_gc = | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow