WIP: [vk] Fix Vulkan streaming ring alignment and flushes #2702
13 changed files with 342 additions and 26 deletions
|
@ -9,6 +9,8 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "common/range_sets.inc"
|
#include "common/range_sets.inc"
|
||||||
#include "video_core/buffer_cache/buffer_cache_base.h"
|
#include "video_core/buffer_cache/buffer_cache_base.h"
|
||||||
|
@ -19,6 +21,43 @@ namespace VideoCommon {
|
||||||
|
|
||||||
using Core::DEVICE_PAGESIZE;
|
using Core::DEVICE_PAGESIZE;
|
||||||
|
|
||||||
|
namespace staging_detail {
|
||||||
|
template <typename T, typename = void>
|
||||||
|
struct has_flush_range : std::false_type {};
|
||||||
|
template <typename T>
|
||||||
|
struct has_flush_range<
|
||||||
|
T, std::void_t<decltype(std::declval<T&>().FlushRange(size_t{}, size_t{}))>> : std::true_type {};
|
||||||
|
|
||||||
|
template <typename T, typename = void>
|
||||||
|
struct has_invalidate_range : std::false_type {};
|
||||||
|
template <typename T>
|
||||||
|
struct has_invalidate_range<
|
||||||
|
T, std::void_t<decltype(std::declval<T&>().InvalidateRange(size_t{}, size_t{}))>>
|
||||||
|
: std::true_type {};
|
||||||
|
} // namespace staging_detail
|
||||||
|
|
||||||
|
template <typename Ref>
|
||||||
|
inline void StagingFlushRange(Ref& ref, size_t offset, size_t size) {
|
||||||
|
if constexpr (staging_detail::has_flush_range<Ref>::value) {
|
||||||
|
ref.FlushRange(offset, size);
|
||||||
|
} else {
|
||||||
|
(void)ref;
|
||||||
|
(void)offset;
|
||||||
|
(void)size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Ref>
|
||||||
|
inline void StagingInvalidateRange(Ref& ref, size_t offset, size_t size) {
|
||||||
|
if constexpr (staging_detail::has_invalidate_range<Ref>::value) {
|
||||||
|
ref.InvalidateRange(offset, size);
|
||||||
|
} else {
|
||||||
|
(void)ref;
|
||||||
|
(void)offset;
|
||||||
|
(void)size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_)
|
BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_)
|
||||||
: runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
|
: runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
|
||||||
|
@ -633,6 +672,7 @@ void BufferCache<P>::PopAsyncBuffers() {
|
||||||
u8* base = async_buffer->mapped_span.data();
|
u8* base = async_buffer->mapped_span.data();
|
||||||
const size_t base_offset = async_buffer->offset;
|
const size_t base_offset = async_buffer->offset;
|
||||||
for (const auto& copy : downloads) {
|
for (const auto& copy : downloads) {
|
||||||
|
StagingInvalidateRange(*async_buffer, copy.dst_offset, copy.size);
|
||||||
const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
|
const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
|
||||||
const u64 dst_offset = copy.dst_offset - base_offset;
|
const u64 dst_offset = copy.dst_offset - base_offset;
|
||||||
const u8* read_mapped_memory = base + dst_offset;
|
const u8* read_mapped_memory = base + dst_offset;
|
||||||
|
@ -696,6 +736,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
|
||||||
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
|
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
|
||||||
std::memcpy(upload_staging.mapped_span.data(),
|
std::memcpy(upload_staging.mapped_span.data(),
|
||||||
draw_state.inline_index_draw_indexes.data(), size);
|
draw_state.inline_index_draw_indexes.data(), size);
|
||||||
|
StagingFlushRange(upload_staging, upload_staging.offset, size);
|
||||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
|
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
|
||||||
} else {
|
} else {
|
||||||
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
|
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
|
||||||
|
@ -1519,7 +1560,7 @@ template <class P>
|
||||||
void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
|
void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
|
||||||
[[maybe_unused]] u64 total_size_bytes,
|
[[maybe_unused]] u64 total_size_bytes,
|
||||||
[[maybe_unused]] std::span<BufferCopy> copies) {
|
[[maybe_unused]] std::span<BufferCopy> copies) {
|
||||||
if constexpr (USE_MEMORY_MAPS) {
|
if constexpr (USE_MEMORY_MAPS) {
|
||||||
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
|
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
|
||||||
const std::span<u8> staging_pointer = upload_staging.mapped_span;
|
const std::span<u8> staging_pointer = upload_staging.mapped_span;
|
||||||
for (BufferCopy& copy : copies) {
|
for (BufferCopy& copy : copies) {
|
||||||
|
@ -1530,6 +1571,7 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
|
||||||
// Apply the staging offset
|
// Apply the staging offset
|
||||||
copy.src_offset += upload_staging.offset;
|
copy.src_offset += upload_staging.offset;
|
||||||
}
|
}
|
||||||
|
StagingFlushRange(upload_staging, upload_staging.offset, total_size_bytes);
|
||||||
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
|
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
|
||||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
|
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
|
||||||
}
|
}
|
||||||
|
@ -1572,6 +1614,7 @@ void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_
|
||||||
}};
|
}};
|
||||||
u8* const src_pointer = upload_staging.mapped_span.data();
|
u8* const src_pointer = upload_staging.mapped_span.data();
|
||||||
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
|
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
|
||||||
|
StagingFlushRange(upload_staging, upload_staging.offset, copy_size);
|
||||||
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
|
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
|
||||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
|
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1626,6 +1669,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64
|
||||||
}
|
}
|
||||||
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
|
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
|
||||||
runtime.Finish();
|
runtime.Finish();
|
||||||
|
StagingInvalidateRange(download_staging, download_staging.offset, total_size_bytes);
|
||||||
for (const BufferCopy& copy : copies) {
|
for (const BufferCopy& copy : copies) {
|
||||||
const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
|
const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
|
||||||
// Undo the modified offset
|
// Undo the modified offset
|
||||||
|
|
|
@ -166,7 +166,7 @@ try
|
||||||
|
|
||||||
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
|
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
|
||||||
turbo_mode.emplace(instance, dld);
|
turbo_mode.emplace(instance, dld);
|
||||||
scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
|
scheduler.AddOnSubmit([this] { turbo_mode->QueueSubmitted(); });
|
||||||
}
|
}
|
||||||
|
|
||||||
Report();
|
Report();
|
||||||
|
@ -176,7 +176,7 @@ try
|
||||||
}
|
}
|
||||||
|
|
||||||
RendererVulkan::~RendererVulkan() {
|
RendererVulkan::~RendererVulkan() {
|
||||||
scheduler.RegisterOnSubmit([] {});
|
scheduler.RegisterOnSubmit(std::function<void()>{});
|
||||||
void(device.GetLogical().WaitIdle());
|
void(device.GetLogical().WaitIdle());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -179,6 +179,11 @@ public:
|
||||||
}();
|
}();
|
||||||
|
|
||||||
u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data();
|
u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data();
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
if (!host_visible) {
|
||||||
|
ASSERT(staging.mapped_span.size() >= size_bytes);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
const size_t quad_size = bytes_per_index * 6;
|
const size_t quad_size = bytes_per_index * 6;
|
||||||
|
|
||||||
for (u32 first = 0; first < num_first_offset_copies; ++first) {
|
for (u32 first = 0; first < num_first_offset_copies; ++first) {
|
||||||
|
@ -189,6 +194,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!host_visible) {
|
if (!host_visible) {
|
||||||
|
staging.FlushRange(staging.offset, static_cast<VkDeviceSize>(size_bytes));
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
|
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
|
||||||
dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
|
dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
|
||||||
|
@ -514,6 +520,10 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
|
||||||
ReserveNullBuffer();
|
ReserveNullBuffer();
|
||||||
vk_buffer = *null_buffer;
|
vk_buffer = *null_buffer;
|
||||||
}
|
}
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
const size_t bytes_per_index = BytesPerIndex(vk_index_type);
|
||||||
|
ASSERT(bytes_per_index == 0 || (vk_offset % bytes_per_index) == 0);
|
||||||
|
#endif
|
||||||
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
|
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
|
||||||
});
|
});
|
||||||
|
|
|
@ -127,26 +127,43 @@ public:
|
||||||
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
|
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
|
||||||
[[maybe_unused]] u32 binding_index, u32 size) {
|
[[maybe_unused]] u32 binding_index, u32 size) {
|
||||||
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
|
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
|
||||||
BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
|
BindBuffer(ref.buffer, ref.offset, static_cast<VkDeviceSize>(size));
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
ASSERT(ref.mapped_span.size() >= size);
|
||||||
|
const VkDeviceSize ubo_align = device.GetUniformBufferAlignment();
|
||||||
|
ASSERT(ubo_align == 0 || (ref.offset % ubo_align) == 0);
|
||||||
|
#endif
|
||||||
return ref.mapped_span;
|
return ref.mapped_span;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
void BindUniformBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
const VkDeviceSize ubo_align = device.GetUniformBufferAlignment();
|
||||||
|
ASSERT(ubo_align == 0 || (offset % ubo_align) == 0);
|
||||||
|
#endif
|
||||||
BindBuffer(buffer, offset, size);
|
BindBuffer(buffer, offset, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
|
void BindStorageBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
|
||||||
[[maybe_unused]] bool is_written) {
|
[[maybe_unused]] bool is_written) {
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
const VkDeviceSize ssbo_align = device.GetStorageBufferAlignment();
|
||||||
|
ASSERT(ssbo_align == 0 || (offset % ssbo_align) == 0);
|
||||||
|
#endif
|
||||||
BindBuffer(buffer, offset, size);
|
BindBuffer(buffer, offset, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
|
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
|
||||||
VideoCore::Surface::PixelFormat format) {
|
VideoCore::Surface::PixelFormat format) {
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
const VkDeviceSize texel_align = device.GetTexelBufferAlignment();
|
||||||
|
ASSERT(texel_align == 0 || (offset % texel_align) == 0);
|
||||||
|
#endif
|
||||||
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
|
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
|
void BindBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
|
||||||
guest_descriptor_queue.AddBuffer(buffer, offset, size);
|
guest_descriptor_queue.AddBuffer(buffer, offset, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -850,10 +850,17 @@ public:
|
||||||
pending_flush_sets.pop_front();
|
pending_flush_sets.pop_front();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const VkDeviceSize read_size =
|
||||||
|
static_cast<VkDeviceSize>(flushed_queries.size() * TFBQueryBank::QUERY_SIZE);
|
||||||
|
staging_ref.InvalidateRange(staging_ref.offset, read_size);
|
||||||
|
|
||||||
size_t offset_base = staging_ref.offset;
|
size_t offset_base = staging_ref.offset;
|
||||||
for (auto q : flushed_queries) {
|
for (auto q : flushed_queries) {
|
||||||
auto* query = GetQuery(q);
|
auto* query = GetQuery(q);
|
||||||
u32 result = 0;
|
u32 result = 0;
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
ASSERT(staging_ref.mapped_span.size() >= offset_base + sizeof(u32));
|
||||||
|
#endif
|
||||||
std::memcpy(&result, staging_ref.mapped_span.data() + offset_base, sizeof(u32));
|
std::memcpy(&result, staging_ref.mapped_span.data() + offset_base, sizeof(u32));
|
||||||
query->value = static_cast<u64>(result);
|
query->value = static_cast<u64>(result);
|
||||||
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
|
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
|
||||||
|
@ -1567,10 +1574,14 @@ void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer ba
|
||||||
impl->little_cache[which_copy].first,
|
impl->little_cache[which_copy].first,
|
||||||
.size = values[i].size,
|
.size = values[i].size,
|
||||||
});
|
});
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
ASSERT(ref.mapped_span.size() >= accumulated_size + values[i].size);
|
||||||
|
#endif
|
||||||
std::memcpy(ref.mapped_span.data() + accumulated_size, &values[i].value,
|
std::memcpy(ref.mapped_span.data() + accumulated_size, &values[i].value,
|
||||||
values[i].size);
|
values[i].size);
|
||||||
accumulated_size += values[i].size;
|
accumulated_size += values[i].size;
|
||||||
}
|
}
|
||||||
|
ref.FlushRange(ref.offset, static_cast<VkDeviceSize>(accumulated_size));
|
||||||
src_buffer = ref.buffer;
|
src_buffer = ref.buffer;
|
||||||
} else {
|
} else {
|
||||||
for (size_t i = 0; i < values.size(); i++) {
|
for (size_t i = 0; i < values.size(); i++) {
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include "video_core/renderer_vulkan/vk_command_pool.h"
|
#include "video_core/renderer_vulkan/vk_command_pool.h"
|
||||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||||
#include "video_core/vulkan_common/vulkan_device.h"
|
#include "video_core/vulkan_common/vulkan_device.h"
|
||||||
|
@ -233,8 +234,14 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
|
||||||
upload_cmdbuf.End();
|
upload_cmdbuf.End();
|
||||||
cmdbuf.End();
|
cmdbuf.End();
|
||||||
|
|
||||||
if (on_submit) {
|
if (staging_buffer_pool) {
|
||||||
on_submit();
|
staging_buffer_pool->FlushStream();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& callback : on_submit_callbacks) {
|
||||||
|
if (callback) {
|
||||||
|
callback();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::scoped_lock lock{submit_mutex};
|
std::scoped_lock lock{submit_mutex};
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
@ -29,6 +30,7 @@ class Device;
|
||||||
class Framebuffer;
|
class Framebuffer;
|
||||||
class GraphicsPipeline;
|
class GraphicsPipeline;
|
||||||
class StateTracker;
|
class StateTracker;
|
||||||
|
class StagingBufferPool;
|
||||||
|
|
||||||
struct QueryCacheParams;
|
struct QueryCacheParams;
|
||||||
|
|
||||||
|
@ -73,9 +75,23 @@ public:
|
||||||
query_cache = &query_cache_;
|
query_cache = &query_cache_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Registers a callback to perform on queue submission.
|
void SetStagingBufferPool(StagingBufferPool* pool) {
|
||||||
|
staging_buffer_pool = pool;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Registers a callback to perform on queue submission, replacing existing callbacks.
|
||||||
void RegisterOnSubmit(std::function<void()>&& func) {
|
void RegisterOnSubmit(std::function<void()>&& func) {
|
||||||
on_submit = std::move(func);
|
on_submit_callbacks.clear();
|
||||||
|
if (func) {
|
||||||
|
on_submit_callbacks.emplace_back(std::move(func));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adds an additional callback to perform on queue submission.
|
||||||
|
void AddOnSubmit(std::function<void()>&& func) {
|
||||||
|
if (func) {
|
||||||
|
on_submit_callbacks.emplace_back(std::move(func));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send work to a separate thread.
|
/// Send work to a separate thread.
|
||||||
|
@ -237,12 +253,13 @@ private:
|
||||||
std::unique_ptr<CommandPool> command_pool;
|
std::unique_ptr<CommandPool> command_pool;
|
||||||
|
|
||||||
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
|
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
|
||||||
|
StagingBufferPool* staging_buffer_pool = nullptr;
|
||||||
|
|
||||||
vk::CommandBuffer current_cmdbuf;
|
vk::CommandBuffer current_cmdbuf;
|
||||||
vk::CommandBuffer current_upload_cmdbuf;
|
vk::CommandBuffer current_upload_cmdbuf;
|
||||||
|
|
||||||
std::unique_ptr<CommandChunk> chunk;
|
std::unique_ptr<CommandChunk> chunk;
|
||||||
std::function<void()> on_submit;
|
std::vector<std::function<void()>> on_submit_callbacks;
|
||||||
|
|
||||||
State state;
|
State state;
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -30,6 +31,11 @@ constexpr VkDeviceSize MIN_STREAM_ALIGNMENT = 256;
|
||||||
// Stream buffer size in bytes
|
// Stream buffer size in bytes
|
||||||
constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
|
constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
|
||||||
|
|
||||||
|
VkDeviceSize GetStreamAlignment(const Device& device) {
|
||||||
|
return (std::max)({device.GetUniformBufferAlignment(), device.GetStorageBufferAlignment(),
|
||||||
|
device.GetTexelBufferAlignment(), MIN_STREAM_ALIGNMENT});
|
||||||
|
}
|
||||||
|
|
||||||
size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
|
size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
|
||||||
VkDeviceSize size{0};
|
VkDeviceSize size{0};
|
||||||
if (device.HasDebuggingToolAttached()) {
|
if (device.HasDebuggingToolAttached()) {
|
||||||
|
@ -63,8 +69,7 @@ size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
|
||||||
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
|
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||||
Scheduler& scheduler_)
|
Scheduler& scheduler_)
|
||||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
||||||
stream_alignment{std::max<VkDeviceSize>(device_.GetUniformBufferAlignment(),
|
stream_alignment{GetStreamAlignment(device_)},
|
||||||
MIN_STREAM_ALIGNMENT)},
|
|
||||||
stream_buffer_size{GetStreamBufferSize(device_, stream_alignment)},
|
stream_buffer_size{GetStreamBufferSize(device_, stream_alignment)},
|
||||||
region_size{stream_buffer_size / StagingBufferPool::NUM_SYNCS} {
|
region_size{stream_buffer_size / StagingBufferPool::NUM_SYNCS} {
|
||||||
VkBufferCreateInfo stream_ci = {
|
VkBufferCreateInfo stream_ci = {
|
||||||
|
@ -87,9 +92,18 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
|
||||||
}
|
}
|
||||||
stream_pointer = stream_buffer.Mapped();
|
stream_pointer = stream_buffer.Mapped();
|
||||||
ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
|
ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
|
||||||
|
stream_is_coherent = stream_buffer.IsHostCoherent();
|
||||||
|
non_coherent_atom_size = std::max<VkDeviceSize>(device.GetNonCoherentAtomSize(),
|
||||||
|
static_cast<VkDeviceSize>(1));
|
||||||
|
dirty_begin = stream_buffer_size;
|
||||||
|
dirty_end = 0;
|
||||||
|
stream_dirty = false;
|
||||||
|
scheduler.SetStagingBufferPool(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
StagingBufferPool::~StagingBufferPool() = default;
|
StagingBufferPool::~StagingBufferPool() {
|
||||||
|
scheduler.SetStagingBufferPool(nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
|
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
|
||||||
if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
|
if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
|
||||||
|
@ -121,9 +135,10 @@ void StagingBufferPool::TickFrame() {
|
||||||
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
||||||
const size_t alignment = static_cast<size_t>(stream_alignment);
|
const size_t alignment = static_cast<size_t>(stream_alignment);
|
||||||
const size_t aligned_size = Common::AlignUp(size, alignment);
|
const size_t aligned_size = Common::AlignUp(size, alignment);
|
||||||
const bool wraps = iterator + size >= stream_buffer_size;
|
const size_t capacity = static_cast<size_t>(stream_buffer_size);
|
||||||
|
const bool wraps = iterator + aligned_size > capacity;
|
||||||
const size_t new_iterator =
|
const size_t new_iterator =
|
||||||
wraps ? aligned_size : Common::AlignUp(iterator + size, alignment);
|
wraps ? aligned_size : Common::AlignUp(iterator + aligned_size, alignment);
|
||||||
const size_t begin_region = wraps ? 0 : Region(iterator);
|
const size_t begin_region = wraps ? 0 : Region(iterator);
|
||||||
const size_t last_byte = new_iterator == 0 ? 0 : new_iterator - 1;
|
const size_t last_byte = new_iterator == 0 ? 0 : new_iterator - 1;
|
||||||
const size_t end_region = (std::min)(Region(last_byte) + 1, NUM_SYNCS);
|
const size_t end_region = (std::min)(Region(last_byte) + 1, NUM_SYNCS);
|
||||||
|
@ -167,16 +182,69 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
||||||
free_iterator = (std::max)(free_iterator, offset + aligned_size);
|
free_iterator = (std::max)(free_iterator, offset + aligned_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TrackStreamWrite(static_cast<VkDeviceSize>(offset), static_cast<VkDeviceSize>(aligned_size));
|
||||||
|
|
||||||
return StagingBufferRef{
|
return StagingBufferRef{
|
||||||
.buffer = *stream_buffer,
|
.buffer = *stream_buffer,
|
||||||
.offset = static_cast<VkDeviceSize>(offset),
|
.offset = static_cast<VkDeviceSize>(offset),
|
||||||
.mapped_span = stream_pointer.subspan(offset, size),
|
.mapped_span = stream_pointer.subspan(offset, size),
|
||||||
.usage{},
|
.usage = MemoryUsage::Upload,
|
||||||
.log2_level{},
|
.log2_level = 0,
|
||||||
.index{},
|
.index = 0,
|
||||||
|
.owner = &stream_buffer,
|
||||||
|
.atom_size = non_coherent_atom_size,
|
||||||
|
.is_coherent = stream_is_coherent,
|
||||||
|
.is_stream_ring = true,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void StagingBufferPool::TrackStreamWrite(VkDeviceSize offset, VkDeviceSize size) {
|
||||||
|
if (stream_is_coherent || size == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const VkDeviceSize clamped_offset = (std::min)(offset, stream_buffer_size);
|
||||||
|
const VkDeviceSize clamped_end = (std::min)(clamped_offset + size, stream_buffer_size);
|
||||||
|
std::scoped_lock lock{stream_mutex};
|
||||||
|
if (!stream_dirty) {
|
||||||
|
dirty_begin = clamped_offset;
|
||||||
|
dirty_end = clamped_end;
|
||||||
|
stream_dirty = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
dirty_begin = (std::min)(dirty_begin, clamped_offset);
|
||||||
|
dirty_end = (std::max)(dirty_end, clamped_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void StagingBufferPool::FlushStream() {
|
||||||
|
if (stream_is_coherent) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDeviceSize flush_begin = 0;
|
||||||
|
VkDeviceSize flush_end = 0;
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{stream_mutex};
|
||||||
|
if (!stream_dirty) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
flush_begin = dirty_begin;
|
||||||
|
flush_end = dirty_end;
|
||||||
|
stream_dirty = false;
|
||||||
|
dirty_begin = stream_buffer_size;
|
||||||
|
dirty_end = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flush_begin >= flush_end) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const VkDeviceSize atom = non_coherent_atom_size;
|
||||||
|
const VkDeviceSize aligned_begin = Common::AlignDown(flush_begin, atom);
|
||||||
|
const VkDeviceSize aligned_end = Common::AlignUp(flush_end, atom);
|
||||||
|
const VkDeviceSize flush_size = aligned_end - aligned_begin;
|
||||||
|
stream_buffer.FlushRange(aligned_begin, flush_size);
|
||||||
|
}
|
||||||
|
|
||||||
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
|
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
|
||||||
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
|
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
|
||||||
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
|
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
|
||||||
|
@ -238,15 +306,19 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage
|
||||||
++buffer_index;
|
++buffer_index;
|
||||||
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
|
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
|
||||||
}
|
}
|
||||||
|
const bool is_coherent = buffer.IsHostCoherent();
|
||||||
const std::span<u8> mapped_span = buffer.Mapped();
|
const std::span<u8> mapped_span = buffer.Mapped();
|
||||||
|
auto buffer_ptr = std::make_unique<vk::Buffer>(std::move(buffer));
|
||||||
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
|
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
|
||||||
.buffer = std::move(buffer),
|
.buffer = std::move(buffer_ptr),
|
||||||
.mapped_span = mapped_span,
|
.mapped_span = mapped_span,
|
||||||
.usage = usage,
|
.usage = usage,
|
||||||
.log2_level = log2,
|
.log2_level = log2,
|
||||||
.index = unique_ids++,
|
.index = unique_ids++,
|
||||||
.tick = deferred ? (std::numeric_limits<u64>::max)() : scheduler.CurrentTick(),
|
.tick = deferred ? (std::numeric_limits<u64>::max)() : scheduler.CurrentTick(),
|
||||||
.deferred = deferred,
|
.deferred = deferred,
|
||||||
|
.is_coherent = is_coherent,
|
||||||
|
.atom_size = is_coherent ? 1 : non_coherent_atom_size,
|
||||||
});
|
});
|
||||||
return entry.Ref();
|
return entry.Ref();
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,9 +7,12 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <climits>
|
#include <climits>
|
||||||
|
#include <mutex>
|
||||||
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "common/alignment.h"
|
||||||
|
|
||||||
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
|
||||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||||
|
@ -26,10 +29,54 @@ struct StagingBufferRef {
|
||||||
MemoryUsage usage;
|
MemoryUsage usage;
|
||||||
u32 log2_level;
|
u32 log2_level;
|
||||||
u64 index;
|
u64 index;
|
||||||
|
const vk::Buffer* owner = nullptr;
|
||||||
|
VkDeviceSize atom_size = 1;
|
||||||
|
bool is_coherent = true;
|
||||||
|
bool is_stream_ring = false;
|
||||||
|
|
||||||
|
void FlushRange(VkDeviceSize range_offset, VkDeviceSize size) const {
|
||||||
|
if (!owner || is_coherent || size == 0 || is_stream_ring) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (size == VK_WHOLE_SIZE) {
|
||||||
|
owner->FlushRange(range_offset, size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const VkDeviceSize atom = atom_size ? atom_size : 1;
|
||||||
|
const VkDeviceSize range_end = range_offset + size;
|
||||||
|
if (range_end < range_offset) {
|
||||||
|
owner->FlushRange(range_offset, size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const VkDeviceSize aligned_begin = Common::AlignDown(range_offset, atom);
|
||||||
|
const VkDeviceSize aligned_end = Common::AlignUp(range_end, atom);
|
||||||
|
owner->FlushRange(aligned_begin, aligned_end - aligned_begin);
|
||||||
|
}
|
||||||
|
|
||||||
|
void InvalidateRange(VkDeviceSize range_offset, VkDeviceSize size) const {
|
||||||
|
if (!owner || is_coherent || size == 0 || is_stream_ring) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (size == VK_WHOLE_SIZE) {
|
||||||
|
owner->InvalidateRange(range_offset, size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const VkDeviceSize atom = atom_size ? atom_size : 1;
|
||||||
|
const VkDeviceSize range_end = range_offset + size;
|
||||||
|
if (range_end < range_offset) {
|
||||||
|
owner->InvalidateRange(range_offset, size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const VkDeviceSize aligned_begin = Common::AlignDown(range_offset, atom);
|
||||||
|
const VkDeviceSize aligned_end = Common::AlignUp(range_end, atom);
|
||||||
|
owner->InvalidateRange(aligned_begin, aligned_end - aligned_begin);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class StagingBufferPool {
|
class StagingBufferPool {
|
||||||
public:
|
public:
|
||||||
|
friend class Scheduler;
|
||||||
|
|
||||||
static constexpr size_t NUM_SYNCS = 16;
|
static constexpr size_t NUM_SYNCS = 16;
|
||||||
|
|
||||||
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
|
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
|
||||||
|
@ -52,22 +99,28 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
struct StagingBuffer {
|
struct StagingBuffer {
|
||||||
vk::Buffer buffer;
|
std::unique_ptr<vk::Buffer> buffer;
|
||||||
std::span<u8> mapped_span;
|
std::span<u8> mapped_span;
|
||||||
MemoryUsage usage;
|
MemoryUsage usage;
|
||||||
u32 log2_level;
|
u32 log2_level;
|
||||||
u64 index;
|
u64 index;
|
||||||
u64 tick = 0;
|
u64 tick = 0;
|
||||||
bool deferred{};
|
bool deferred{};
|
||||||
|
bool is_coherent = true;
|
||||||
|
VkDeviceSize atom_size = 1;
|
||||||
|
|
||||||
StagingBufferRef Ref() const noexcept {
|
StagingBufferRef Ref() const noexcept {
|
||||||
return {
|
return {
|
||||||
.buffer = *buffer,
|
.buffer = buffer ? **buffer : VkBuffer{},
|
||||||
.offset = 0,
|
.offset = 0,
|
||||||
.mapped_span = mapped_span,
|
.mapped_span = mapped_span,
|
||||||
.usage = usage,
|
.usage = usage,
|
||||||
.log2_level = log2_level,
|
.log2_level = log2_level,
|
||||||
.index = index,
|
.index = index,
|
||||||
|
.owner = buffer.get(),
|
||||||
|
.atom_size = atom_size,
|
||||||
|
.is_coherent = is_coherent,
|
||||||
|
.is_stream_ring = false,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -83,6 +136,9 @@ private:
|
||||||
|
|
||||||
StagingBufferRef GetStreamBuffer(size_t size);
|
StagingBufferRef GetStreamBuffer(size_t size);
|
||||||
|
|
||||||
|
void TrackStreamWrite(VkDeviceSize offset, VkDeviceSize size);
|
||||||
|
void FlushStream();
|
||||||
|
|
||||||
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
|
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
|
||||||
|
|
||||||
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage, bool deferred = false);
|
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage, bool deferred = false);
|
||||||
|
@ -110,6 +166,12 @@ private:
|
||||||
std::span<u8> stream_pointer;
|
std::span<u8> stream_pointer;
|
||||||
VkDeviceSize stream_buffer_size;
|
VkDeviceSize stream_buffer_size;
|
||||||
VkDeviceSize region_size;
|
VkDeviceSize region_size;
|
||||||
|
bool stream_is_coherent = true;
|
||||||
|
VkDeviceSize non_coherent_atom_size = 1;
|
||||||
|
VkDeviceSize dirty_begin = 0;
|
||||||
|
VkDeviceSize dirty_end = 0;
|
||||||
|
bool stream_dirty = false;
|
||||||
|
std::mutex stream_mutex;
|
||||||
|
|
||||||
size_t iterator = 0;
|
size_t iterator = 0;
|
||||||
size_t used_iterator = 0;
|
size_t used_iterator = 0;
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <utility>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
|
@ -30,6 +32,42 @@ using VideoCore::Surface::PixelFormat;
|
||||||
using VideoCore::Surface::SurfaceType;
|
using VideoCore::Surface::SurfaceType;
|
||||||
using namespace Common::Literals;
|
using namespace Common::Literals;
|
||||||
|
|
||||||
|
namespace staging_detail {
|
||||||
|
template <typename T, typename = void>
|
||||||
|
struct has_flush_range : std::false_type {};
|
||||||
|
template <typename T>
|
||||||
|
struct has_flush_range<
|
||||||
|
T, std::void_t<decltype(std::declval<T&>().FlushRange(size_t{}, size_t{}))>> : std::true_type {};
|
||||||
|
template <typename T, typename = void>
|
||||||
|
struct has_invalidate_range : std::false_type {};
|
||||||
|
template <typename T>
|
||||||
|
struct has_invalidate_range<
|
||||||
|
T, std::void_t<decltype(std::declval<T&>().InvalidateRange(size_t{}, size_t{}))>>
|
||||||
|
: std::true_type {};
|
||||||
|
} // namespace staging_detail
|
||||||
|
|
||||||
|
template <typename Ref>
|
||||||
|
inline void StagingFlushRange(Ref& ref, size_t offset, size_t size) {
|
||||||
|
if constexpr (staging_detail::has_flush_range<Ref>::value) {
|
||||||
|
ref.FlushRange(offset, size);
|
||||||
|
} else {
|
||||||
|
(void)ref;
|
||||||
|
(void)offset;
|
||||||
|
(void)size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Ref>
|
||||||
|
inline void StagingInvalidateRange(Ref& ref, size_t offset, size_t size) {
|
||||||
|
if constexpr (staging_detail::has_invalidate_range<Ref>::value) {
|
||||||
|
ref.InvalidateRange(offset, size);
|
||||||
|
} else {
|
||||||
|
(void)ref;
|
||||||
|
(void)offset;
|
||||||
|
(void)size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_)
|
TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_)
|
||||||
: runtime{runtime_}, device_memory{device_memory_} {
|
: runtime{runtime_}, device_memory{device_memory_} {
|
||||||
|
@ -111,6 +149,7 @@ void TextureCache<P>::RunGarbageCollector() {
|
||||||
const auto copies = FullDownloadCopies(image.info);
|
const auto copies = FullDownloadCopies(image.info);
|
||||||
image.DownloadMemory(map, copies);
|
image.DownloadMemory(map, copies);
|
||||||
runtime.Finish();
|
runtime.Finish();
|
||||||
|
StagingInvalidateRange(map, map.offset, image.unswizzled_size_bytes);
|
||||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
|
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
|
||||||
swizzle_data_buffer);
|
swizzle_data_buffer);
|
||||||
}
|
}
|
||||||
|
@ -567,6 +606,7 @@ void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
|
||||||
const auto copies = FullDownloadCopies(image.info);
|
const auto copies = FullDownloadCopies(image.info);
|
||||||
image.DownloadMemory(map, copies);
|
image.DownloadMemory(map, copies);
|
||||||
runtime.Finish();
|
runtime.Finish();
|
||||||
|
StagingInvalidateRange(map, map.offset, image.unswizzled_size_bytes);
|
||||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
|
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
|
||||||
swizzle_data_buffer);
|
swizzle_data_buffer);
|
||||||
}
|
}
|
||||||
|
@ -863,13 +903,17 @@ void TextureCache<P>::PopAsyncFlushes() {
|
||||||
if (download_info.is_swizzle) {
|
if (download_info.is_swizzle) {
|
||||||
const ImageBase& image = slot_images[download_info.object_id];
|
const ImageBase& image = slot_images[download_info.object_id];
|
||||||
const auto copies = FullDownloadCopies(image.info);
|
const auto copies = FullDownloadCopies(image.info);
|
||||||
download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
|
const size_t aligned_size =
|
||||||
|
Common::AlignUp(image.unswizzled_size_bytes, static_cast<size_t>(64));
|
||||||
|
download_buffer.offset -= aligned_size;
|
||||||
|
StagingInvalidateRange(download_buffer, download_buffer.offset, aligned_size);
|
||||||
std::span<u8> download_span =
|
std::span<u8> download_span =
|
||||||
download_buffer.mapped_span.subspan(download_buffer.offset);
|
download_buffer.mapped_span.subspan(download_buffer.offset);
|
||||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
|
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
|
||||||
swizzle_data_buffer);
|
swizzle_data_buffer);
|
||||||
} else {
|
} else {
|
||||||
const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
|
const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
|
||||||
|
StagingInvalidateRange(download_buffer, download_buffer.offset, buffer_info.size);
|
||||||
std::span<u8> download_span =
|
std::span<u8> download_span =
|
||||||
download_buffer.mapped_span.subspan(download_buffer.offset);
|
download_buffer.mapped_span.subspan(download_buffer.offset);
|
||||||
gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
|
gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
|
||||||
|
@ -907,6 +951,7 @@ void TextureCache<P>::PopAsyncFlushes() {
|
||||||
}
|
}
|
||||||
// Wait for downloads to finish
|
// Wait for downloads to finish
|
||||||
runtime.Finish();
|
runtime.Finish();
|
||||||
|
StagingInvalidateRange(download_map, original_offset, total_size_bytes);
|
||||||
download_map.offset = original_offset;
|
download_map.offset = original_offset;
|
||||||
std::span<u8> download_span = download_map.mapped_span;
|
std::span<u8> download_span = download_map.mapped_span;
|
||||||
for (const PendingDownload& download_info : download_ids) {
|
for (const PendingDownload& download_info : download_ids) {
|
||||||
|
@ -1081,6 +1126,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
|
||||||
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
|
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
|
||||||
gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(),
|
gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(),
|
||||||
VideoCommon::CacheType::NoTextureCache);
|
VideoCommon::CacheType::NoTextureCache);
|
||||||
|
StagingFlushRange(staging, staging.offset, mapped_span.size_bytes());
|
||||||
const auto uploads = FullUploadSwizzles(image.info);
|
const auto uploads = FullUploadSwizzles(image.info);
|
||||||
runtime.AccelerateImageUpload(image, staging, uploads);
|
runtime.AccelerateImageUpload(image, staging, uploads);
|
||||||
return;
|
return;
|
||||||
|
@ -1094,10 +1140,12 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
|
||||||
auto copies =
|
auto copies =
|
||||||
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer);
|
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer);
|
||||||
ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
|
ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
|
||||||
|
StagingFlushRange(staging, staging.offset, mapped_span.size_bytes());
|
||||||
image.UploadMemory(staging, copies);
|
image.UploadMemory(staging, copies);
|
||||||
} else {
|
} else {
|
||||||
const auto copies =
|
const auto copies =
|
||||||
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span);
|
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span);
|
||||||
|
StagingFlushRange(staging, staging.offset, mapped_span.size_bytes());
|
||||||
image.UploadMemory(staging, copies);
|
image.UploadMemory(staging, copies);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1329,6 +1377,7 @@ void TextureCache<P>::TickAsyncDecode() {
|
||||||
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
|
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
|
||||||
std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(),
|
std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(),
|
||||||
async_decode->decoded_data.size());
|
async_decode->decoded_data.size());
|
||||||
|
StagingFlushRange(staging, staging.offset, async_decode->decoded_data.size());
|
||||||
image.UploadMemory(staging, async_decode->copies);
|
image.UploadMemory(staging, async_decode->copies);
|
||||||
image.flags &= ~ImageFlagBits::IsDecoding;
|
image.flags &= ~ImageFlagBits::IsDecoding;
|
||||||
has_uploads = true;
|
has_uploads = true;
|
||||||
|
|
|
@ -295,6 +295,16 @@ public:
|
||||||
return properties.properties.limits.minStorageBufferOffsetAlignment;
|
return properties.properties.limits.minStorageBufferOffsetAlignment;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns texel buffer alignment requirement.
|
||||||
|
VkDeviceSize GetTexelBufferAlignment() const {
|
||||||
|
return properties.properties.limits.minTexelBufferOffsetAlignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the non-coherent atom size for memory flushes.
|
||||||
|
VkDeviceSize GetNonCoherentAtomSize() const {
|
||||||
|
return properties.properties.limits.nonCoherentAtomSize;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the maximum range for storage buffers.
|
/// Returns the maximum range for storage buffers.
|
||||||
VkDeviceSize GetMaxStorageBufferRange() const {
|
VkDeviceSize GetMaxStorageBufferRange() const {
|
||||||
return properties.properties.limits.maxStorageBufferRange;
|
return properties.properties.limits.maxStorageBufferRange;
|
||||||
|
|
|
@ -499,14 +499,22 @@ void Image::Release() const noexcept {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::Flush() const {
|
void Buffer::Flush() const {
|
||||||
|
FlushRange(0, VK_WHOLE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Buffer::FlushRange(VkDeviceSize offset, VkDeviceSize size) const {
|
||||||
if (!is_coherent) {
|
if (!is_coherent) {
|
||||||
vmaFlushAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
|
vmaFlushAllocation(allocator, allocation, offset, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::Invalidate() const {
|
void Buffer::Invalidate() const {
|
||||||
|
InvalidateRange(0, VK_WHOLE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Buffer::InvalidateRange(VkDeviceSize offset, VkDeviceSize size) const {
|
||||||
if (!is_coherent) {
|
if (!is_coherent) {
|
||||||
vmaInvalidateAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
|
vmaInvalidateAllocation(allocator, allocation, offset, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -772,10 +772,19 @@ public:
|
||||||
return !mapped.empty();
|
return !mapped.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the buffer memory is host coherent.
|
||||||
|
bool IsHostCoherent() const noexcept {
|
||||||
|
return is_coherent;
|
||||||
|
}
|
||||||
|
|
||||||
void Flush() const;
|
void Flush() const;
|
||||||
|
|
||||||
|
void FlushRange(VkDeviceSize offset, VkDeviceSize size) const;
|
||||||
|
|
||||||
void Invalidate() const;
|
void Invalidate() const;
|
||||||
|
|
||||||
|
void InvalidateRange(VkDeviceSize offset, VkDeviceSize size) const;
|
||||||
|
|
||||||
void SetObjectNameEXT(const char* name) const;
|
void SetObjectNameEXT(const char* name) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue