forked from eden-emu/eden
		
	Merge pull request #8987 from liamwhite/another-name-for-reinforcement-steel
vulkan: automatically use larger staging buffer sizes when possible
This commit is contained in:
		
						commit
						71fe9fd0f2
					
				
					 2 changed files with 60 additions and 27 deletions
				
			
		|  | @ -26,20 +26,39 @@ using namespace Common::Literals; | |||
| constexpr VkDeviceSize MAX_ALIGNMENT = 256; | ||||
| // Maximum size to put elements in the stream buffer
 | ||||
| constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; | ||||
| // Stream buffer size in bytes
 | ||||
| constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; | ||||
| constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; | ||||
| 
 | ||||
| constexpr VkMemoryPropertyFlags HOST_FLAGS = | ||||
|     VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | ||||
| constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; | ||||
| 
 | ||||
| bool IsStreamHeap(VkMemoryHeap heap) noexcept { | ||||
|     return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; | ||||
| static bool IsStreamHeap(VkMemoryHeap heap, size_t staging_buffer_size) noexcept { | ||||
|     return staging_buffer_size < (heap.size * 2) / 3; | ||||
| } | ||||
| 
 | ||||
| static bool HasLargeDeviceLocalHostVisibleMemory(const VkPhysicalDeviceMemoryProperties& props) { | ||||
|     const auto flags{VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT}; | ||||
| 
 | ||||
|     for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | ||||
|         const auto& memory_type{props.memoryTypes[type_index]}; | ||||
| 
 | ||||
|         if ((memory_type.propertyFlags & flags) != flags) { | ||||
|             // Memory must be device local and host visible
 | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         const auto& heap{props.memoryHeaps[memory_type.heapIndex]}; | ||||
|         if (heap.size >= 7168_MiB) { | ||||
|             // This is the right type of memory
 | ||||
|             return true; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return false; | ||||
| } | ||||
| 
 | ||||
| std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | ||||
|                                        VkMemoryPropertyFlags flags) noexcept { | ||||
|                                        VkMemoryPropertyFlags flags, | ||||
|                                        size_t staging_buffer_size) noexcept { | ||||
|     for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | ||||
|         if (((type_mask >> type_index) & 1) == 0) { | ||||
|             // Memory type is incompatible
 | ||||
|  | @ -50,7 +69,7 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p | |||
|             // Memory type doesn't have the flags we want
 | ||||
|             continue; | ||||
|         } | ||||
|         if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { | ||||
|         if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex], staging_buffer_size)) { | ||||
|             // Memory heap is not suitable for streaming
 | ||||
|             continue; | ||||
|         } | ||||
|  | @ -61,17 +80,17 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p | |||
| } | ||||
| 
 | ||||
| u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | ||||
|                         bool try_device_local) { | ||||
|                         bool try_device_local, size_t staging_buffer_size) { | ||||
|     std::optional<u32> type; | ||||
|     if (try_device_local) { | ||||
|         // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
 | ||||
|         type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); | ||||
|         type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS, staging_buffer_size); | ||||
|         if (type) { | ||||
|             return *type; | ||||
|         } | ||||
|     } | ||||
|     // Otherwise try without the DEVICE_LOCAL_BIT
 | ||||
|     type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); | ||||
|     type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS, staging_buffer_size); | ||||
|     if (type) { | ||||
|         return *type; | ||||
|     } | ||||
|  | @ -79,20 +98,32 @@ u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_ | |||
|     throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); | ||||
| } | ||||
| 
 | ||||
| size_t Region(size_t iterator) noexcept { | ||||
|     return iterator / REGION_SIZE; | ||||
| size_t Region(size_t iterator, size_t region_size) noexcept { | ||||
|     return iterator / region_size; | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                                      Scheduler& scheduler_) | ||||
|     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { | ||||
| 
 | ||||
|     const auto memory_properties{device.GetPhysical().GetMemoryProperties().memoryProperties}; | ||||
|     if (HasLargeDeviceLocalHostVisibleMemory(memory_properties)) { | ||||
|         // Possible on many integrated and newer discrete cards
 | ||||
|         staging_buffer_size = 1_GiB; | ||||
|     } else { | ||||
|         // Well-supported default size used by most Vulkan PC games
 | ||||
|         staging_buffer_size = 256_MiB; | ||||
|     } | ||||
| 
 | ||||
|     region_size = staging_buffer_size / StagingBufferPool::NUM_SYNCS; | ||||
| 
 | ||||
|     const vk::Device& dev = device.GetLogical(); | ||||
|     stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .flags = 0, | ||||
|         .size = STREAM_BUFFER_SIZE, | ||||
|         .size = staging_buffer_size, | ||||
|         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||||
|                  VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | ||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||
|  | @ -117,19 +148,18 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | |||
|         .image = nullptr, | ||||
|         .buffer = *stream_buffer, | ||||
|     }; | ||||
|     const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties; | ||||
|     VkMemoryAllocateInfo stream_memory_info{ | ||||
|         .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | ||||
|         .pNext = make_dedicated ? &dedicated_info : nullptr, | ||||
|         .allocationSize = requirements.size, | ||||
|         .memoryTypeIndex = | ||||
|             FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true), | ||||
|         .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true, | ||||
|                                                staging_buffer_size), | ||||
|     }; | ||||
|     stream_memory = dev.TryAllocateMemory(stream_memory_info); | ||||
|     if (!stream_memory) { | ||||
|         LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory"); | ||||
|         stream_memory_info.memoryTypeIndex = | ||||
|             FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false); | ||||
|         stream_memory_info.memoryTypeIndex = FindMemoryTypeIndex( | ||||
|             memory_properties, requirements.memoryTypeBits, false, staging_buffer_size); | ||||
|         stream_memory = dev.AllocateMemory(stream_memory_info); | ||||
|     } | ||||
| 
 | ||||
|  | @ -137,7 +167,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | |||
|         stream_memory.SetObjectNameEXT("Stream Buffer Memory"); | ||||
|     } | ||||
|     stream_buffer.BindMemory(*stream_memory, 0); | ||||
|     stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); | ||||
|     stream_pointer = stream_memory.Map(0, staging_buffer_size); | ||||
| } | ||||
| 
 | ||||
| StagingBufferPool::~StagingBufferPool() = default; | ||||
|  | @ -158,25 +188,25 @@ void StagingBufferPool::TickFrame() { | |||
| } | ||||
| 
 | ||||
| StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | ||||
|     if (AreRegionsActive(Region(free_iterator) + 1, | ||||
|                          std::min(Region(iterator + size) + 1, NUM_SYNCS))) { | ||||
|     if (AreRegionsActive(Region(free_iterator, region_size) + 1, | ||||
|                          std::min(Region(iterator + size, region_size) + 1, NUM_SYNCS))) { | ||||
|         // Avoid waiting for the previous usages to be free
 | ||||
|         return GetStagingBuffer(size, MemoryUsage::Upload); | ||||
|     } | ||||
|     const u64 current_tick = scheduler.CurrentTick(); | ||||
|     std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator), | ||||
|               current_tick); | ||||
|     std::fill(sync_ticks.begin() + Region(used_iterator, region_size), | ||||
|               sync_ticks.begin() + Region(iterator, region_size), current_tick); | ||||
|     used_iterator = iterator; | ||||
|     free_iterator = std::max(free_iterator, iterator + size); | ||||
| 
 | ||||
|     if (iterator + size >= STREAM_BUFFER_SIZE) { | ||||
|         std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, | ||||
|                   current_tick); | ||||
|     if (iterator + size >= staging_buffer_size) { | ||||
|         std::fill(sync_ticks.begin() + Region(used_iterator, region_size), | ||||
|                   sync_ticks.begin() + NUM_SYNCS, current_tick); | ||||
|         used_iterator = 0; | ||||
|         iterator = 0; | ||||
|         free_iterator = size; | ||||
| 
 | ||||
|         if (AreRegionsActive(0, Region(size) + 1)) { | ||||
|         if (AreRegionsActive(0, Region(size, region_size) + 1)) { | ||||
|             // Avoid waiting for the previous usages to be free
 | ||||
|             return GetStagingBuffer(size, MemoryUsage::Upload); | ||||
|         } | ||||
|  |  | |||
|  | @ -93,6 +93,9 @@ private: | |||
|     size_t free_iterator = 0; | ||||
|     std::array<u64, NUM_SYNCS> sync_ticks{}; | ||||
| 
 | ||||
|     size_t staging_buffer_size = 0; | ||||
|     size_t region_size = 0; | ||||
| 
 | ||||
|     StagingBuffersCache device_local_cache; | ||||
|     StagingBuffersCache upload_cache; | ||||
|     StagingBuffersCache download_cache; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando S
						Fernando S