forked from eden-emu/eden
		
	Merge pull request #11225 from FernandoS27/no-laxatives-in-santas-cookies
Y.F.C: Rework the Query Cache.
This commit is contained in:
		
						commit
						8936ff8f89
					
				
					 45 changed files with 3571 additions and 384 deletions
				
			
		|  | @ -130,13 +130,17 @@ void LogSettings() { | |||
|     log_path("DataStorage_SDMCDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir)); | ||||
| } | ||||
| 
 | ||||
| void UpdateGPUAccuracy() { | ||||
|     values.current_gpu_accuracy = values.gpu_accuracy.GetValue(); | ||||
| } | ||||
| 
 | ||||
| bool IsGPULevelExtreme() { | ||||
|     return values.gpu_accuracy.GetValue() == GpuAccuracy::Extreme; | ||||
|     return values.current_gpu_accuracy == GpuAccuracy::Extreme; | ||||
| } | ||||
| 
 | ||||
| bool IsGPULevelHigh() { | ||||
|     return values.gpu_accuracy.GetValue() == GpuAccuracy::Extreme || | ||||
|            values.gpu_accuracy.GetValue() == GpuAccuracy::High; | ||||
|     return values.current_gpu_accuracy == GpuAccuracy::Extreme || | ||||
|            values.current_gpu_accuracy == GpuAccuracy::High; | ||||
| } | ||||
| 
 | ||||
| bool IsFastmemEnabled() { | ||||
|  |  | |||
|  | @ -307,6 +307,7 @@ struct Values { | |||
|                                                       Specialization::Default, | ||||
|                                                       true, | ||||
|                                                       true}; | ||||
|     GpuAccuracy current_gpu_accuracy{GpuAccuracy::High}; | ||||
|     SwitchableSetting<AnisotropyMode, true> max_anisotropy{ | ||||
|         linkage,          AnisotropyMode::Automatic, AnisotropyMode::Automatic, AnisotropyMode::X16, | ||||
|         "max_anisotropy", Category::RendererAdvanced}; | ||||
|  | @ -522,6 +523,7 @@ struct Values { | |||
| 
 | ||||
| extern Values values; | ||||
| 
 | ||||
| void UpdateGPUAccuracy(); | ||||
| bool IsGPULevelExtreme(); | ||||
| bool IsGPULevelHigh(); | ||||
| 
 | ||||
|  |  | |||
|  | @ -95,6 +95,12 @@ add_library(video_core STATIC | |||
|     memory_manager.h | ||||
|     precompiled_headers.h | ||||
|     pte_kind.h | ||||
|     query_cache/bank_base.h | ||||
|     query_cache/query_base.h | ||||
|     query_cache/query_cache_base.h | ||||
|     query_cache/query_cache.h | ||||
|     query_cache/query_stream.h | ||||
|     query_cache/types.h | ||||
|     query_cache.h | ||||
|     rasterizer_accelerated.cpp | ||||
|     rasterizer_accelerated.h | ||||
|  |  | |||
|  | @ -272,13 +272,19 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||
|     if (!cpu_addr) { | ||||
|         return {&slot_buffers[NULL_BUFFER_ID], 0}; | ||||
|     } | ||||
|     const BufferId buffer_id = FindBuffer(*cpu_addr, size); | ||||
|     return ObtainCPUBuffer(*cpu_addr, size, sync_info, post_op); | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( | ||||
|     VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { | ||||
|     const BufferId buffer_id = FindBuffer(cpu_addr, size); | ||||
|     Buffer& buffer = slot_buffers[buffer_id]; | ||||
| 
 | ||||
|     // synchronize op
 | ||||
|     switch (sync_info) { | ||||
|     case ObtainBufferSynchronize::FullSynchronize: | ||||
|         SynchronizeBuffer(buffer, *cpu_addr, size); | ||||
|         SynchronizeBuffer(buffer, cpu_addr, size); | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|  | @ -286,11 +292,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||
| 
 | ||||
|     switch (post_op) { | ||||
|     case ObtainBufferOperation::MarkAsWritten: | ||||
|         MarkWrittenBuffer(buffer_id, *cpu_addr, size); | ||||
|         MarkWrittenBuffer(buffer_id, cpu_addr, size); | ||||
|         break; | ||||
|     case ObtainBufferOperation::DiscardWrite: { | ||||
|         VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64); | ||||
|         VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64); | ||||
|         VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64); | ||||
|         VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64); | ||||
|         IntervalType interval{cpu_addr_start, cpu_addr_end}; | ||||
|         ClearDownload(interval); | ||||
|         common_ranges.subtract(interval); | ||||
|  | @ -300,7 +306,7 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad | |||
|         break; | ||||
|     } | ||||
| 
 | ||||
|     return {&buffer, buffer.Offset(*cpu_addr)}; | ||||
|     return {&buffer, buffer.Offset(cpu_addr)}; | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
|  |  | |||
|  | @ -295,6 +295,10 @@ public: | |||
|     [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||||
|                                                        ObtainBufferSynchronize sync_info, | ||||
|                                                        ObtainBufferOperation post_op); | ||||
| 
 | ||||
|     [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, | ||||
|                                                           ObtainBufferSynchronize sync_info, | ||||
|                                                           ObtainBufferOperation post_op); | ||||
|     void FlushCachedWrites(); | ||||
| 
 | ||||
|     /// Return true when there are uncommitted buffers to be downloaded
 | ||||
|  | @ -335,6 +339,14 @@ public: | |||
| 
 | ||||
|     [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void BufferOperations(Func&& func) { | ||||
|         do { | ||||
|             channel_state->has_deleted_buffers = false; | ||||
|             func(); | ||||
|         } while (channel_state->has_deleted_buffers); | ||||
|     } | ||||
| 
 | ||||
|     std::recursive_mutex mutex; | ||||
|     Runtime& runtime; | ||||
| 
 | ||||
|  |  | |||
|  | @ -51,7 +51,7 @@ public: | |||
|     virtual void CreateChannel(Tegra::Control::ChannelState& channel); | ||||
| 
 | ||||
|     /// Bind a channel for execution.
 | ||||
|     void BindToChannel(s32 id); | ||||
|     virtual void BindToChannel(s32 id); | ||||
| 
 | ||||
|     /// Erase channel's state.
 | ||||
|     void EraseChannel(s32 id); | ||||
|  |  | |||
|  | @ -46,6 +46,7 @@ public: | |||
|     }; | ||||
| 
 | ||||
|     struct IndirectParams { | ||||
|         bool is_byte_count; | ||||
|         bool is_indexed; | ||||
|         bool include_count; | ||||
|         GPUVAddr count_start_address; | ||||
|  |  | |||
|  | @ -20,8 +20,6 @@ | |||
| 
 | ||||
| namespace Tegra::Engines { | ||||
| 
 | ||||
| using VideoCore::QueryType; | ||||
| 
 | ||||
| /// First register id that is actually a Macro call.
 | ||||
| constexpr u32 MacroRegistersStart = 0xE00; | ||||
| 
 | ||||
|  | @ -500,27 +498,21 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||
| } | ||||
| 
 | ||||
| void Maxwell3D::ProcessQueryGet() { | ||||
|     VideoCommon::QueryPropertiesFlags flags{}; | ||||
|     if (regs.report_semaphore.query.short_query == 0) { | ||||
|         flags |= VideoCommon::QueryPropertiesFlags::HasTimeout; | ||||
|     } | ||||
|     const GPUVAddr sequence_address{regs.report_semaphore.Address()}; | ||||
|     const VideoCommon::QueryType query_type = | ||||
|         static_cast<VideoCommon::QueryType>(regs.report_semaphore.query.report.Value()); | ||||
|     const u32 payload = regs.report_semaphore.payload; | ||||
|     const u32 subreport = regs.report_semaphore.query.sub_report; | ||||
|     switch (regs.report_semaphore.query.operation) { | ||||
|     case Regs::ReportSemaphore::Operation::Release: | ||||
|         if (regs.report_semaphore.query.short_query != 0) { | ||||
|             const GPUVAddr sequence_address{regs.report_semaphore.Address()}; | ||||
|             const u32 payload = regs.report_semaphore.payload; | ||||
|             std::function<void()> operation([this, sequence_address, payload] { | ||||
|                 memory_manager.Write<u32>(sequence_address, payload); | ||||
|             }); | ||||
|             rasterizer->SignalFence(std::move(operation)); | ||||
|         } else { | ||||
|             struct LongQueryResult { | ||||
|                 u64_le value; | ||||
|                 u64_le timestamp; | ||||
|             }; | ||||
|             const GPUVAddr sequence_address{regs.report_semaphore.Address()}; | ||||
|             const u32 payload = regs.report_semaphore.payload; | ||||
|             [this, sequence_address, payload] { | ||||
|                 memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks()); | ||||
|                 memory_manager.Write<u64>(sequence_address, payload); | ||||
|             }(); | ||||
|             flags |= VideoCommon::QueryPropertiesFlags::IsAFence; | ||||
|         } | ||||
|         rasterizer->Query(sequence_address, query_type, flags, payload, subreport); | ||||
|         break; | ||||
|     case Regs::ReportSemaphore::Operation::Acquire: | ||||
|         // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
 | ||||
|  | @ -528,11 +520,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
|         UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); | ||||
|         break; | ||||
|     case Regs::ReportSemaphore::Operation::ReportOnly: | ||||
|         if (const std::optional<u64> result = GetQueryResult()) { | ||||
|             // If the query returns an empty optional it means it's cached and deferred.
 | ||||
|             // In this case we have a non-empty result, so we stamp it immediately.
 | ||||
|             StampQueryResult(*result, regs.report_semaphore.query.short_query == 0); | ||||
|         } | ||||
|         rasterizer->Query(sequence_address, query_type, flags, payload, subreport); | ||||
|         break; | ||||
|     case Regs::ReportSemaphore::Operation::Trap: | ||||
|         UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); | ||||
|  | @ -544,6 +532,10 @@ void Maxwell3D::ProcessQueryGet() { | |||
| } | ||||
| 
 | ||||
| void Maxwell3D::ProcessQueryCondition() { | ||||
|     if (rasterizer->AccelerateConditionalRendering()) { | ||||
|         execute_on = true; | ||||
|         return; | ||||
|     } | ||||
|     const GPUVAddr condition_address{regs.render_enable.Address()}; | ||||
|     switch (regs.render_enable_override) { | ||||
|     case Regs::RenderEnable::Override::AlwaysRender: | ||||
|  | @ -553,10 +545,6 @@ void Maxwell3D::ProcessQueryCondition() { | |||
|         execute_on = false; | ||||
|         break; | ||||
|     case Regs::RenderEnable::Override::UseRenderEnable: { | ||||
|         if (rasterizer->AccelerateConditionalRendering()) { | ||||
|             execute_on = true; | ||||
|             return; | ||||
|         } | ||||
|         switch (regs.render_enable.mode) { | ||||
|         case Regs::RenderEnable::Mode::True: { | ||||
|             execute_on = true; | ||||
|  | @ -598,15 +586,9 @@ void Maxwell3D::ProcessQueryCondition() { | |||
| } | ||||
| 
 | ||||
| void Maxwell3D::ProcessCounterReset() { | ||||
| #if ANDROID | ||||
|     if (!Settings::IsGPULevelHigh()) { | ||||
|         // This is problematic on Android, disable on GPU Normal.
 | ||||
|         return; | ||||
|     } | ||||
| #endif | ||||
|     switch (regs.clear_report_value) { | ||||
|     case Regs::ClearReport::ZPassPixelCount: | ||||
|         rasterizer->ResetCounter(QueryType::SamplesPassed); | ||||
|         rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); | ||||
|         break; | ||||
|     default: | ||||
|         LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); | ||||
|  | @ -620,28 +602,6 @@ void Maxwell3D::ProcessSyncPoint() { | |||
|     rasterizer->SignalSyncPoint(sync_point); | ||||
| } | ||||
| 
 | ||||
| std::optional<u64> Maxwell3D::GetQueryResult() { | ||||
|     switch (regs.report_semaphore.query.report) { | ||||
|     case Regs::ReportSemaphore::Report::Payload: | ||||
|         return regs.report_semaphore.payload; | ||||
|     case Regs::ReportSemaphore::Report::ZPassPixelCount64: | ||||
| #if ANDROID | ||||
|         if (!Settings::IsGPULevelHigh()) { | ||||
|             // This is problematic on Android, disable on GPU Normal.
 | ||||
|             return 120; | ||||
|         } | ||||
| #endif | ||||
|         // Deferred.
 | ||||
|         rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed, | ||||
|                           system.GPU().GetTicks()); | ||||
|         return std::nullopt; | ||||
|     default: | ||||
|         LOG_DEBUG(HW_GPU, "Unimplemented query report type {}", | ||||
|                   regs.report_semaphore.query.report.Value()); | ||||
|         return 1; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void Maxwell3D::ProcessCBBind(size_t stage_index) { | ||||
|     // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader
 | ||||
|     // stage.
 | ||||
|  |  | |||
|  | @ -3182,9 +3182,6 @@ private: | |||
|     /// Handles writes to syncing register.
 | ||||
|     void ProcessSyncPoint(); | ||||
| 
 | ||||
|     /// Returns a query's value or an empty object if the value will be deferred through a cache.
 | ||||
|     std::optional<u64> GetQueryResult(); | ||||
| 
 | ||||
|     void RefreshParametersImpl(); | ||||
| 
 | ||||
|     bool IsMethodExecutable(u32 method); | ||||
|  |  | |||
|  | @ -362,21 +362,17 @@ void MaxwellDMA::ReleaseSemaphore() { | |||
|     const auto type = regs.launch_dma.semaphore_type; | ||||
|     const GPUVAddr address = regs.semaphore.address; | ||||
|     const u32 payload = regs.semaphore.payload; | ||||
|     VideoCommon::QueryPropertiesFlags flags{VideoCommon::QueryPropertiesFlags::IsAFence}; | ||||
|     switch (type) { | ||||
|     case LaunchDMA::SemaphoreType::NONE: | ||||
|         break; | ||||
|     case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { | ||||
|         std::function<void()> operation( | ||||
|             [this, address, payload] { memory_manager.Write<u32>(address, payload); }); | ||||
|         rasterizer->SignalFence(std::move(operation)); | ||||
|         rasterizer->Query(address, VideoCommon::QueryType::Payload, flags, payload, 0); | ||||
|         break; | ||||
|     } | ||||
|     case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { | ||||
|         std::function<void()> operation([this, address, payload] { | ||||
|             memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks()); | ||||
|             memory_manager.Write<u64>(address, payload); | ||||
|         }); | ||||
|         rasterizer->SignalFence(std::move(operation)); | ||||
|         rasterizer->Query(address, VideoCommon::QueryType::Payload, | ||||
|                           flags | VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); | ||||
|         break; | ||||
|     } | ||||
|     default: | ||||
|  |  | |||
|  | @ -82,10 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
|     if (op == GpuSemaphoreOperation::WriteLong) { | ||||
|         const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | ||||
|         const u32 payload = regs.semaphore_sequence; | ||||
|         [this, sequence_address, payload] { | ||||
|             memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks()); | ||||
|             memory_manager.Write<u64>(sequence_address, payload); | ||||
|         }(); | ||||
|         rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, | ||||
|                           VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); | ||||
|     } else { | ||||
|         do { | ||||
|             const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | ||||
|  | @ -120,10 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| void Puller::ProcessSemaphoreRelease() { | ||||
|     const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | ||||
|     const u32 payload = regs.semaphore_release; | ||||
|     std::function<void()> operation([this, sequence_address, payload] { | ||||
|         memory_manager.Write<u32>(sequence_address, payload); | ||||
|     }); | ||||
|     rasterizer->SignalFence(std::move(operation)); | ||||
|     rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, | ||||
|                       VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); | ||||
| } | ||||
| 
 | ||||
| void Puller::ProcessSemaphoreAcquire() { | ||||
|  | @ -132,7 +128,6 @@ void Puller::ProcessSemaphoreAcquire() { | |||
|     while (word != value) { | ||||
|         regs.acquire_active = true; | ||||
|         regs.acquire_value = value; | ||||
|         std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||||
|         rasterizer->ReleaseFences(); | ||||
|         word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||||
|         // TODO(kemathe73) figure out how to do the acquire_timeout
 | ||||
|  |  | |||
|  | @ -55,6 +55,9 @@ public: | |||
| 
 | ||||
|     // Unlike other fences, this one doesn't
 | ||||
|     void SignalOrdering() { | ||||
|         if constexpr (!can_async_check) { | ||||
|             TryReleasePendingFences<false>(); | ||||
|         } | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.AccumulateFlushes(); | ||||
|     } | ||||
|  | @ -104,9 +107,25 @@ public: | |||
|         SignalFence(std::move(func)); | ||||
|     } | ||||
| 
 | ||||
|     void WaitPendingFences() { | ||||
|     void WaitPendingFences([[maybe_unused]] bool force) { | ||||
|         if constexpr (!can_async_check) { | ||||
|             TryReleasePendingFences<true>(); | ||||
|         } else { | ||||
|             if (!force) { | ||||
|                 return; | ||||
|             } | ||||
|             std::mutex wait_mutex; | ||||
|             std::condition_variable wait_cv; | ||||
|             std::atomic<bool> wait_finished{}; | ||||
|             std::function<void()> func([&] { | ||||
|                 std::scoped_lock lk(wait_mutex); | ||||
|                 wait_finished.store(true, std::memory_order_relaxed); | ||||
|                 wait_cv.notify_all(); | ||||
|             }); | ||||
|             SignalFence(std::move(func)); | ||||
|             std::unique_lock lk(wait_mutex); | ||||
|             wait_cv.wait( | ||||
|                 lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -102,7 +102,8 @@ struct GPU::Impl { | |||
| 
 | ||||
|     /// Signal the ending of command list.
 | ||||
|     void OnCommandListEnd() { | ||||
|         rasterizer->ReleaseFences(); | ||||
|         rasterizer->ReleaseFences(false); | ||||
|         Settings::UpdateGPUAccuracy(); | ||||
|     } | ||||
| 
 | ||||
|     /// Request a host GPU memory flush from the CPU.
 | ||||
|  | @ -220,6 +221,7 @@ struct GPU::Impl { | |||
|     /// This can be used to launch any necessary threads and register any necessary
 | ||||
|     /// core timing events.
 | ||||
|     void Start() { | ||||
|         Settings::UpdateGPUAccuracy(); | ||||
|         gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -41,6 +41,9 @@ set(SHADER_FILES | |||
|     pitch_unswizzle.comp | ||||
|     present_bicubic.frag | ||||
|     present_gaussian.frag | ||||
|     queries_prefix_scan_sum.comp | ||||
|     queries_prefix_scan_sum_nosubgroups.comp | ||||
|     resolve_conditional_render.comp | ||||
|     smaa_edge_detection.vert | ||||
|     smaa_edge_detection.frag | ||||
|     smaa_blending_weight_calculation.vert | ||||
|  | @ -70,6 +73,7 @@ if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND") | |||
| endif() | ||||
| 
 | ||||
| set(GLSL_FLAGS "") | ||||
| set(SPIR_V_VERSION "spirv1.3") | ||||
| set(QUIET_FLAG "--quiet") | ||||
| 
 | ||||
| set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) | ||||
|  | @ -123,7 +127,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) | |||
|             OUTPUT | ||||
|                 ${SPIRV_HEADER_FILE} | ||||
|             COMMAND | ||||
|                 ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} | ||||
|                 ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} --target-env ${SPIR_V_VERSION} | ||||
|             MAIN_DEPENDENCY | ||||
|                 ${SOURCE_FILE} | ||||
|         ) | ||||
|  |  | |||
							
								
								
									
										173
									
								
								src/video_core/host_shaders/queries_prefix_scan_sum.comp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										173
									
								
								src/video_core/host_shaders/queries_prefix_scan_sum.comp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,173 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later | ||||
| 
 | ||||
| #version 460 core | ||||
| 
 | ||||
| #extension GL_KHR_shader_subgroup_basic : require | ||||
| #extension GL_KHR_shader_subgroup_shuffle : require | ||||
| #extension GL_KHR_shader_subgroup_shuffle_relative : require | ||||
| #extension GL_KHR_shader_subgroup_arithmetic : require | ||||
| 
 | ||||
| #ifdef VULKAN | ||||
| 
 | ||||
| #define HAS_EXTENDED_TYPES 1 | ||||
| #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||||
| #define END_PUSH_CONSTANTS }; | ||||
| #define UNIFORM(n) | ||||
| #define BINDING_INPUT_BUFFER 0 | ||||
| #define BINDING_OUTPUT_IMAGE 1 | ||||
| 
 | ||||
| #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||||
| 
 | ||||
| #extension GL_NV_gpu_shader5 : enable | ||||
| #ifdef GL_NV_gpu_shader5 | ||||
| #define HAS_EXTENDED_TYPES 1 | ||||
| #else | ||||
| #define HAS_EXTENDED_TYPES 0 | ||||
| #endif | ||||
| #define BEGIN_PUSH_CONSTANTS | ||||
| #define END_PUSH_CONSTANTS | ||||
| #define UNIFORM(n) layout(location = n) uniform | ||||
| #define BINDING_INPUT_BUFFER 0 | ||||
| #define BINDING_OUTPUT_IMAGE 0 | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| BEGIN_PUSH_CONSTANTS | ||||
| UNIFORM(0) uint min_accumulation_base; | ||||
| UNIFORM(1) uint max_accumulation_base; | ||||
| UNIFORM(2) uint accumulation_limit; | ||||
| UNIFORM(3) uint buffer_offset; | ||||
| END_PUSH_CONSTANTS | ||||
| 
 | ||||
| #define LOCAL_RESULTS 8 | ||||
| #define QUERIES_PER_INVOC 2048 | ||||
| 
 | ||||
| layout(local_size_x = QUERIES_PER_INVOC / LOCAL_RESULTS) in; | ||||
| 
 | ||||
| layout(std430, binding = 0) readonly buffer block1 { | ||||
|     uvec2 input_data[]; | ||||
| }; | ||||
| 
 | ||||
| layout(std430, binding = 1) coherent buffer block2 { | ||||
|     uvec2 output_data[]; | ||||
| }; | ||||
| 
 | ||||
| layout(std430, binding = 2) coherent buffer block3 { | ||||
|     uvec2 accumulated_data; | ||||
| }; | ||||
| 
 | ||||
| shared uvec2 shared_data[128]; | ||||
| 
 | ||||
| // Simple Uint64 add that uses 2 uint variables for GPUs that don't support uint64 | ||||
| uvec2 AddUint64(uvec2 value_1, uvec2 value_2) { | ||||
|     uint carry = 0; | ||||
|     uvec2 result; | ||||
|     result.x = uaddCarry(value_1.x, value_2.x, carry); | ||||
|     result.y = value_1.y + value_2.y + carry; | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| // do subgroup Prefix Sum using Hillis and Steele's algorithm | ||||
| uvec2 subgroupInclusiveAddUint64(uvec2 value) { | ||||
|     uvec2 result = value; | ||||
|     for (uint i = 1; i < gl_SubgroupSize; i *= 2) { | ||||
|         uvec2 other = subgroupShuffleUp(result, i); // get value from subgroup_inv_id - i; | ||||
|         if (i <= gl_SubgroupInvocationID) { | ||||
|             result = AddUint64(result, other); | ||||
|         } | ||||
|     } | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| // Writes down the results to the output buffer and to the accumulation buffer | ||||
| void WriteResults(uvec2 results[LOCAL_RESULTS]) { | ||||
|     const uint current_id = gl_LocalInvocationID.x; | ||||
|     const uvec2 accum = accumulated_data; | ||||
|     for (uint i = 0; i < LOCAL_RESULTS; i++) { | ||||
|         uvec2 base_data = current_id * LOCAL_RESULTS + i < min_accumulation_base ? accum : uvec2(0, 0); | ||||
|         AddUint64(results[i], base_data); | ||||
|     } | ||||
|     for (uint i = 0; i < LOCAL_RESULTS; i++) { | ||||
|         output_data[buffer_offset + current_id * LOCAL_RESULTS + i] = results[i]; | ||||
|     } | ||||
|     uint index = accumulation_limit % LOCAL_RESULTS; | ||||
|     uint base_id = accumulation_limit / LOCAL_RESULTS; | ||||
|     if (min_accumulation_base >= accumulation_limit + 1) { | ||||
|         if (current_id == base_id) { | ||||
|             accumulated_data = results[index]; | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     // We have that ugly case in which the accumulation data is reset in the middle somewhere. | ||||
|     barrier(); | ||||
|     groupMemoryBarrier(); | ||||
| 
 | ||||
|     if (current_id == base_id) { | ||||
|         uvec2 reset_value = output_data[max_accumulation_base - 1]; | ||||
|         // Calculate two complement / negate manually | ||||
|         reset_value = AddUint64(uvec2(1,0), ~reset_value); | ||||
|         accumulated_data = AddUint64(results[index], reset_value); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void main() { | ||||
|     const uint subgroup_inv_id = gl_SubgroupInvocationID; | ||||
|     const uint subgroup_id = gl_SubgroupID + gl_WorkGroupID.x * gl_NumSubgroups; | ||||
|     const uint last_subgroup_id = subgroupMax(subgroup_inv_id); | ||||
|     const uint current_id = gl_LocalInvocationID.x; | ||||
|     const uint total_work = accumulation_limit; | ||||
|     const uint last_result_id = LOCAL_RESULTS - 1; | ||||
|     uvec2 data[LOCAL_RESULTS]; | ||||
|     for (uint i = 0; i < LOCAL_RESULTS; i++) { | ||||
|         data[i] = input_data[buffer_offset + current_id * LOCAL_RESULTS + i]; | ||||
|     } | ||||
|     uvec2 results[LOCAL_RESULTS]; | ||||
|     results[0] = data[0]; | ||||
|     for (uint i = 1; i < LOCAL_RESULTS; i++) { | ||||
|         results[i] = AddUint64(data[i], results[i - 1]); | ||||
|     } | ||||
|     // make sure all input data has been loaded | ||||
|     subgroupBarrier(); | ||||
|     subgroupMemoryBarrier(); | ||||
| 
 | ||||
|     // on the last local result, do a subgroup inclusive scan sum | ||||
|     results[last_result_id] = subgroupInclusiveAddUint64(results[last_result_id]); | ||||
|     // get the last local result from the subgroup behind the current | ||||
|     uvec2 result_behind = subgroupShuffleUp(results[last_result_id], 1); | ||||
|     if (subgroup_inv_id != 0) { | ||||
|         for (uint i = 1; i < LOCAL_RESULTS; i++) { | ||||
|             results[i - 1] = AddUint64(results[i - 1], result_behind); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // if we had less queries than our subgroup, just write down the results. | ||||
|     if (total_work <= gl_SubgroupSize * LOCAL_RESULTS) { // This condition is constant per dispatch. | ||||
|         WriteResults(results); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     // We now have more, so lets write the last result into shared memory. | ||||
|     // Only pick the last subgroup. | ||||
|     if (subgroup_inv_id == last_subgroup_id) { | ||||
|         shared_data[subgroup_id] = results[last_result_id]; | ||||
|     } | ||||
|     // wait until everyone loaded their stuffs | ||||
|     barrier(); | ||||
|     memoryBarrierShared(); | ||||
| 
 | ||||
|     // only if it's not the first subgroup | ||||
|     if (subgroup_id != 0) { | ||||
|         // get the results from some previous invocation | ||||
|         uvec2 tmp = shared_data[subgroup_inv_id]; | ||||
|         subgroupBarrier(); | ||||
|         subgroupMemoryBarrierShared(); | ||||
|         tmp = subgroupInclusiveAddUint64(tmp); | ||||
|         // obtain the result that would be equivalent to the previous result | ||||
|         uvec2 shuffled_result = subgroupShuffle(tmp, subgroup_id - 1); | ||||
|         for (uint i = 0; i < LOCAL_RESULTS; i++) { | ||||
|             results[i] = AddUint64(results[i], shuffled_result); | ||||
|         } | ||||
|     } | ||||
|     WriteResults(results); | ||||
| } | ||||
|  | @ -0,0 +1,138 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2015 Graham Sellers, Richard Wright Jr. and Nicholas Haemel | ||||
| // SPDX-License-Identifier: MIT | ||||
| 
 | ||||
| // Code obtained from OpenGL SuperBible, Seventh Edition by Graham Sellers, Richard Wright Jr. and | ||||
| // Nicholas Haemel. Modified to suit needs. | ||||
| 
 | ||||
| #version 460 core | ||||
| 
 | ||||
| #ifdef VULKAN | ||||
| 
 | ||||
| #define HAS_EXTENDED_TYPES 1 | ||||
| #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||||
| #define END_PUSH_CONSTANTS }; | ||||
| #define UNIFORM(n) | ||||
| #define BINDING_INPUT_BUFFER 0 | ||||
| #define BINDING_OUTPUT_IMAGE 1 | ||||
| 
 | ||||
| #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||||
| 
 | ||||
| #extension GL_NV_gpu_shader5 : enable | ||||
| #ifdef GL_NV_gpu_shader5 | ||||
| #define HAS_EXTENDED_TYPES 1 | ||||
| #else | ||||
| #define HAS_EXTENDED_TYPES 0 | ||||
| #endif | ||||
| #define BEGIN_PUSH_CONSTANTS | ||||
| #define END_PUSH_CONSTANTS | ||||
| #define UNIFORM(n) layout(location = n) uniform | ||||
| #define BINDING_INPUT_BUFFER 0 | ||||
| #define BINDING_OUTPUT_IMAGE 0 | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| BEGIN_PUSH_CONSTANTS | ||||
| UNIFORM(0) uint min_accumulation_base; | ||||
| UNIFORM(1) uint max_accumulation_base; | ||||
| UNIFORM(2) uint accumulation_limit; | ||||
| UNIFORM(3) uint buffer_offset; | ||||
| END_PUSH_CONSTANTS | ||||
| 
 | ||||
| #define LOCAL_RESULTS 4 | ||||
| #define QUERIES_PER_INVOC 2048 | ||||
| 
 | ||||
| layout(local_size_x = QUERIES_PER_INVOC / LOCAL_RESULTS) in; | ||||
| 
 | ||||
| layout(std430, binding = 0) readonly buffer block1 { | ||||
|     uvec2 input_data[gl_WorkGroupSize.x * LOCAL_RESULTS]; | ||||
| }; | ||||
| 
 | ||||
| layout(std430, binding = 1) writeonly coherent buffer block2 { | ||||
|     uvec2 output_data[gl_WorkGroupSize.x * LOCAL_RESULTS]; | ||||
| }; | ||||
| 
 | ||||
| layout(std430, binding = 2) coherent buffer block3 { | ||||
|     uvec2 accumulated_data; | ||||
| }; | ||||
| 
 | ||||
| shared uvec2 shared_data[gl_WorkGroupSize.x * LOCAL_RESULTS]; | ||||
| 
 | ||||
| uvec2 AddUint64(uvec2 value_1, uvec2 value_2) { | ||||
|     uint carry = 0; | ||||
|     uvec2 result; | ||||
|     result.x = uaddCarry(value_1.x, value_2.x, carry); | ||||
|     result.y = value_1.y + value_2.y + carry; | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| void main(void) { | ||||
|     uint id = gl_LocalInvocationID.x; | ||||
|     uvec2 base_value[LOCAL_RESULTS]; | ||||
|     const uvec2 accum = accumulated_data; | ||||
|     for (uint i = 0; i < LOCAL_RESULTS; i++) { | ||||
|         base_value[i] = (buffer_offset + id * LOCAL_RESULTS + i) < min_accumulation_base | ||||
|                             ? accumulated_data | ||||
|                             : uvec2(0); | ||||
|     } | ||||
|     uint work_size = gl_WorkGroupSize.x; | ||||
|     uint rd_id; | ||||
|     uint wr_id; | ||||
|     uint mask; | ||||
|     uvec2 inputs[LOCAL_RESULTS]; | ||||
|     for (uint i = 0; i < LOCAL_RESULTS; i++) { | ||||
|         inputs[i] = input_data[buffer_offset + id * LOCAL_RESULTS + i]; | ||||
|     } | ||||
|     // The number of steps is the log base 2 of the | ||||
|     // work group size, which should be a power of 2 | ||||
|     const uint steps = uint(log2(work_size)) + uint(log2(LOCAL_RESULTS)); | ||||
|     uint step = 0; | ||||
| 
 | ||||
|     // Each invocation is responsible for the content of | ||||
|     // two elements of the output array | ||||
|     for (uint i = 0; i < LOCAL_RESULTS; i++) { | ||||
|         shared_data[id * LOCAL_RESULTS + i] = inputs[i]; | ||||
|     } | ||||
|     // Synchronize to make sure that everyone has initialized | ||||
|     // their elements of shared_data[] with data loaded from | ||||
|     // the input arrays | ||||
|     barrier(); | ||||
|     memoryBarrierShared(); | ||||
|     // For each step... | ||||
|     for (step = 0; step < steps; step++) { | ||||
|         // Calculate the read and write index in the | ||||
|         // shared array | ||||
|         mask = (1 << step) - 1; | ||||
|         rd_id = ((id >> step) << (step + 1)) + mask; | ||||
|         wr_id = rd_id + 1 + (id & mask); | ||||
|         // Accumulate the read data into our element | ||||
| 
 | ||||
|         shared_data[wr_id] = AddUint64(shared_data[rd_id], shared_data[wr_id]); | ||||
|         // Synchronize again to make sure that everyone | ||||
|         // has caught up with us | ||||
|         barrier(); | ||||
|         memoryBarrierShared(); | ||||
|     } | ||||
|     // Add the accumulation | ||||
|     for (uint i = 0; i < LOCAL_RESULTS; i++) { | ||||
|         shared_data[id * LOCAL_RESULTS + i] = | ||||
|             AddUint64(shared_data[id * LOCAL_RESULTS + i], base_value[i]); | ||||
|     } | ||||
|     barrier(); | ||||
|     memoryBarrierShared(); | ||||
| 
 | ||||
|     // Finally write our data back to the output buffer | ||||
|     for (uint i = 0; i < LOCAL_RESULTS; i++) { | ||||
|         output_data[buffer_offset + id * LOCAL_RESULTS + i] = shared_data[id * LOCAL_RESULTS + i]; | ||||
|     } | ||||
|     if (id == 0) { | ||||
|         if (min_accumulation_base >= accumulation_limit + 1) { | ||||
|             accumulated_data = shared_data[accumulation_limit]; | ||||
|             return; | ||||
|         } | ||||
|         uvec2 reset_value = shared_data[max_accumulation_base - 1]; | ||||
|         uvec2 final_value = shared_data[accumulation_limit]; | ||||
|         // Two complements | ||||
|         reset_value = AddUint64(uvec2(1, 0), ~reset_value); | ||||
|         accumulated_data = AddUint64(final_value, reset_value); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										20
									
								
								src/video_core/host_shaders/resolve_conditional_render.comp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								src/video_core/host_shaders/resolve_conditional_render.comp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,20 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later | ||||
| 
 | ||||
| #version 450 | ||||
| 
 | ||||
| layout(local_size_x = 1) in; | ||||
| 
 | ||||
| layout(std430, binding = 0) buffer Query { | ||||
|     uvec2 initial; | ||||
|     uvec2 unknown; | ||||
|     uvec2 current; | ||||
| }; | ||||
| 
 | ||||
| layout(std430, binding = 1) buffer Result { | ||||
|     uint result; | ||||
| }; | ||||
| 
 | ||||
| void main() { | ||||
|     result = all(equal(initial, current)) ? 1 : 0; | ||||
| } | ||||
|  | @ -67,6 +67,7 @@ public: | |||
|         } | ||||
| 
 | ||||
|         auto& params = maxwell3d.draw_manager->GetIndirectParams(); | ||||
|         params.is_byte_count = false; | ||||
|         params.is_indexed = false; | ||||
|         params.include_count = false; | ||||
|         params.count_start_address = 0; | ||||
|  | @ -161,6 +162,7 @@ public: | |||
|                 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); | ||||
|         } | ||||
|         auto& params = maxwell3d.draw_manager->GetIndirectParams(); | ||||
|         params.is_byte_count = false; | ||||
|         params.is_indexed = true; | ||||
|         params.include_count = false; | ||||
|         params.count_start_address = 0; | ||||
|  | @ -256,6 +258,7 @@ public: | |||
|         const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); | ||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||
|         auto& params = maxwell3d.draw_manager->GetIndirectParams(); | ||||
|         params.is_byte_count = false; | ||||
|         params.is_indexed = true; | ||||
|         params.include_count = true; | ||||
|         params.count_start_address = maxwell3d.GetMacroAddress(4); | ||||
|  | @ -319,6 +322,47 @@ private: | |||
|     } | ||||
| }; | ||||
| 
 | ||||
| class HLE_DrawIndirectByteCount final : public HLEMacroImpl { | ||||
| public: | ||||
|     explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||||
| 
 | ||||
|     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||
|         auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0xFFFFU); | ||||
|         if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { | ||||
|             Fallback(parameters); | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         auto& params = maxwell3d.draw_manager->GetIndirectParams(); | ||||
|         params.is_byte_count = true; | ||||
|         params.is_indexed = false; | ||||
|         params.include_count = false; | ||||
|         params.count_start_address = 0; | ||||
|         params.indirect_start_address = maxwell3d.GetMacroAddress(2); | ||||
|         params.buffer_size = 4; | ||||
|         params.max_draw_counts = 1; | ||||
|         params.stride = parameters[1]; | ||||
|         maxwell3d.regs.draw.begin = parameters[0]; | ||||
|         maxwell3d.regs.draw_auto_stride = parameters[1]; | ||||
|         maxwell3d.regs.draw_auto_byte_count = parameters[2]; | ||||
| 
 | ||||
|         maxwell3d.draw_manager->DrawArrayIndirect(topology); | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     void Fallback(const std::vector<u32>& parameters) { | ||||
|         maxwell3d.RefreshParameters(); | ||||
| 
 | ||||
|         maxwell3d.regs.draw.begin = parameters[0]; | ||||
|         maxwell3d.regs.draw_auto_stride = parameters[1]; | ||||
|         maxwell3d.regs.draw_auto_byte_count = parameters[2]; | ||||
| 
 | ||||
|         maxwell3d.draw_manager->DrawArray( | ||||
|             maxwell3d.regs.draw.topology, 0, | ||||
|             maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { | ||||
| public: | ||||
|     explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||||
|  | @ -536,6 +580,11 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { | |||
|                          [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | ||||
|                              return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__); | ||||
|                          })); | ||||
|     builders.emplace(0xB5F74EDB717278ECULL, | ||||
|                      std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( | ||||
|                          [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | ||||
|                              return std::make_unique<HLE_DrawIndirectByteCount>(maxwell3d__); | ||||
|                          })); | ||||
| } | ||||
| 
 | ||||
| HLEMacro::~HLEMacro() = default; | ||||
|  |  | |||
|  | @ -25,6 +25,13 @@ | |||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/texture_cache/slot_vector.h" | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| enum class QueryType { | ||||
|     SamplesPassed, | ||||
| }; | ||||
| constexpr std::size_t NumQueryTypes = 1; | ||||
| } // namespace VideoCore
 | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| using AsyncJobId = SlotId; | ||||
|  | @ -98,9 +105,9 @@ private: | |||
| }; | ||||
| 
 | ||||
| template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | ||||
| class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||
| class QueryCacheLegacy : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||
| public: | ||||
|     explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, | ||||
|     explicit QueryCacheLegacy(VideoCore::RasterizerInterface& rasterizer_, | ||||
|                               Core::Memory::Memory& cpu_memory_) | ||||
|         : rasterizer{rasterizer_}, | ||||
|           // Use reinterpret_cast instead of static_cast as workaround for
 | ||||
|  |  | |||
							
								
								
									
										104
									
								
								src/video_core/query_cache/bank_base.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								src/video_core/query_cache/bank_base.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,104 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <atomic> | ||||
| #include <deque> | ||||
| #include <utility> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| class BankBase { | ||||
| protected: | ||||
|     const size_t base_bank_size{}; | ||||
|     size_t bank_size{}; | ||||
|     std::atomic<size_t> references{}; | ||||
|     size_t current_slot{}; | ||||
| 
 | ||||
| public: | ||||
|     explicit BankBase(size_t bank_size_) : base_bank_size{bank_size_}, bank_size(bank_size_) {} | ||||
| 
 | ||||
|     virtual ~BankBase() = default; | ||||
| 
 | ||||
|     virtual std::pair<bool, size_t> Reserve() { | ||||
|         if (IsClosed()) { | ||||
|             return {false, bank_size}; | ||||
|         } | ||||
|         const size_t result = current_slot++; | ||||
|         return {true, result}; | ||||
|     } | ||||
| 
 | ||||
|     virtual void Reset() { | ||||
|         current_slot = 0; | ||||
|         references = 0; | ||||
|         bank_size = base_bank_size; | ||||
|     } | ||||
| 
 | ||||
|     size_t Size() const { | ||||
|         return bank_size; | ||||
|     } | ||||
| 
 | ||||
|     void AddReference(size_t how_many = 1) { | ||||
|         references.fetch_add(how_many, std::memory_order_relaxed); | ||||
|     } | ||||
| 
 | ||||
|     void CloseReference(size_t how_many = 1) { | ||||
|         if (how_many > references.load(std::memory_order_relaxed)) { | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|         references.fetch_sub(how_many, std::memory_order_relaxed); | ||||
|     } | ||||
| 
 | ||||
|     void Close() { | ||||
|         bank_size = current_slot; | ||||
|     } | ||||
| 
 | ||||
|     bool IsClosed() const { | ||||
|         return current_slot >= bank_size; | ||||
|     } | ||||
| 
 | ||||
|     bool IsDead() const { | ||||
|         return IsClosed() && references == 0; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| template <typename BankType> | ||||
| class BankPool { | ||||
| private: | ||||
|     std::deque<BankType> bank_pool; | ||||
|     std::deque<size_t> bank_indices; | ||||
| 
 | ||||
| public: | ||||
|     BankPool() = default; | ||||
|     ~BankPool() = default; | ||||
| 
 | ||||
|     // Reserve a bank from the pool and return its index
 | ||||
|     template <typename Func> | ||||
|     size_t ReserveBank(Func&& builder) { | ||||
|         if (!bank_indices.empty() && bank_pool[bank_indices.front()].IsDead()) { | ||||
|             size_t new_index = bank_indices.front(); | ||||
|             bank_indices.pop_front(); | ||||
|             bank_pool[new_index].Reset(); | ||||
|             return new_index; | ||||
|         } | ||||
|         size_t new_index = bank_pool.size(); | ||||
|         builder(bank_pool, new_index); | ||||
|         bank_indices.push_back(new_index); | ||||
|         return new_index; | ||||
|     } | ||||
| 
 | ||||
|     // Get a reference to a bank using its index
 | ||||
|     BankType& GetBank(size_t index) { | ||||
|         return bank_pool[index]; | ||||
|     } | ||||
| 
 | ||||
|     // Get the total number of banks in the pool
 | ||||
|     size_t BankCount() const { | ||||
|         return bank_pool.size(); | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
							
								
								
									
										70
									
								
								src/video_core/query_cache/query_base.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								src/video_core/query_cache/query_base.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,70 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| enum class QueryFlagBits : u32 { | ||||
|     HasTimestamp = 1 << 0,       ///< Indicates if this query has a timestamp.
 | ||||
|     IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
 | ||||
|     IsHostSynced = 1 << 2,       ///< Indicates if the query has been synced in the host
 | ||||
|     IsGuestSynced = 1 << 3,      ///< Indicates if the query has been synced with the guest.
 | ||||
|     IsHostManaged = 1 << 4,      ///< Indicates if this query points to a host query
 | ||||
|     IsRewritten = 1 << 5,        ///< Indicates if this query was rewritten by another query
 | ||||
|     IsInvalidated = 1 << 6,      ///< Indicates the value of th query has been nullified.
 | ||||
|     IsOrphan = 1 << 7,           ///< Indicates the query has not been set by a guest query.
 | ||||
|     IsFence = 1 << 8,            ///< Indicates the query is a fence.
 | ||||
| }; | ||||
| DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) | ||||
| 
 | ||||
| class QueryBase { | ||||
| public: | ||||
|     VAddr guest_address{}; | ||||
|     QueryFlagBits flags{}; | ||||
|     u64 value{}; | ||||
| 
 | ||||
| protected: | ||||
|     // Default constructor
 | ||||
|     QueryBase() = default; | ||||
| 
 | ||||
|     // Parameterized constructor
 | ||||
|     QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) | ||||
|         : guest_address(address), flags(flags_), value{value_} {} | ||||
| }; | ||||
| 
 | ||||
| class GuestQuery : public QueryBase { | ||||
| public: | ||||
|     // Parameterized constructor
 | ||||
|     GuestQuery(bool isLong, VAddr address, u64 queryValue) | ||||
|         : QueryBase(address, QueryFlagBits::IsFinalValueSynced, queryValue) { | ||||
|         if (isLong) { | ||||
|             flags |= QueryFlagBits::HasTimestamp; | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| class HostQueryBase : public QueryBase { | ||||
| public: | ||||
|     // Default constructor
 | ||||
|     HostQueryBase() : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0) {} | ||||
| 
 | ||||
|     // Parameterized constructor
 | ||||
|     HostQueryBase(bool has_timestamp, VAddr address) | ||||
|         : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, | ||||
|           start_slot{}, size_slots{} { | ||||
|         if (has_timestamp) { | ||||
|             flags |= QueryFlagBits::HasTimestamp; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     u32 start_bank_id{}; | ||||
|     u32 size_banks{}; | ||||
|     size_t start_slot{}; | ||||
|     size_t size_slots{}; | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
							
								
								
									
										580
									
								
								src/video_core/query_cache/query_cache.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										580
									
								
								src/video_core/query_cache/query_cache.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,580 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <deque> | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
| #include <unordered_map> | ||||
| #include <utility> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "common/scope_exit.h" | ||||
| #include "common/settings.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/query_cache/bank_base.h" | ||||
| #include "video_core/query_cache/query_base.h" | ||||
| #include "video_core/query_cache/query_cache_base.h" | ||||
| #include "video_core/query_cache/query_stream.h" | ||||
| #include "video_core/query_cache/types.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| using Maxwell = Tegra::Engines::Maxwell3D; | ||||
| 
 | ||||
| struct SyncValuesStruct { | ||||
|     VAddr address; | ||||
|     u64 value; | ||||
|     u64 size; | ||||
| 
 | ||||
|     static constexpr bool GeneratesBaseBuffer = true; | ||||
| }; | ||||
| 
 | ||||
| template <typename Traits> | ||||
| class GuestStreamer : public SimpleStreamer<GuestQuery> { | ||||
| public: | ||||
|     using RuntimeType = typename Traits::RuntimeType; | ||||
| 
 | ||||
|     GuestStreamer(size_t id_, RuntimeType& runtime_) | ||||
|         : SimpleStreamer<GuestQuery>(id_), runtime{runtime_} {} | ||||
| 
 | ||||
|     virtual ~GuestStreamer() = default; | ||||
| 
 | ||||
|     size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||||
|                         std::optional<u32> subreport = std::nullopt) override { | ||||
|         auto new_id = BuildQuery(has_timestamp, address, static_cast<u64>(value)); | ||||
|         pending_sync.push_back(new_id); | ||||
|         return new_id; | ||||
|     } | ||||
| 
 | ||||
|     bool HasPendingSync() const override { | ||||
|         return !pending_sync.empty(); | ||||
|     } | ||||
| 
 | ||||
|     void SyncWrites() override { | ||||
|         if (pending_sync.empty()) { | ||||
|             return; | ||||
|         } | ||||
|         std::vector<SyncValuesStruct> sync_values; | ||||
|         sync_values.reserve(pending_sync.size()); | ||||
|         for (size_t pending_id : pending_sync) { | ||||
|             auto& query = slot_queries[pending_id]; | ||||
|             if (True(query.flags & QueryFlagBits::IsRewritten) || | ||||
|                 True(query.flags & QueryFlagBits::IsInvalidated)) { | ||||
|                 continue; | ||||
|             } | ||||
|             query.flags |= QueryFlagBits::IsHostSynced; | ||||
|             sync_values.emplace_back(SyncValuesStruct{ | ||||
|                 .address = query.guest_address, | ||||
|                 .value = query.value, | ||||
|                 .size = static_cast<u64>(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)}); | ||||
|         } | ||||
|         pending_sync.clear(); | ||||
|         if (sync_values.size() > 0) { | ||||
|             runtime.template SyncValues<SyncValuesStruct>(sync_values); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     RuntimeType& runtime; | ||||
|     std::deque<size_t> pending_sync; | ||||
| }; | ||||
| 
 | ||||
| template <typename Traits> | ||||
| class StubStreamer : public GuestStreamer<Traits> { | ||||
| public: | ||||
|     using RuntimeType = typename Traits::RuntimeType; | ||||
| 
 | ||||
|     StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_) | ||||
|         : GuestStreamer<Traits>(id_, runtime_), stub_value{stub_value_} {} | ||||
| 
 | ||||
|     ~StubStreamer() override = default; | ||||
| 
 | ||||
|     size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, | ||||
|                         std::optional<u32> subreport = std::nullopt) override { | ||||
|         size_t new_id = | ||||
|             GuestStreamer<Traits>::WriteCounter(address, has_timestamp, stub_value, subreport); | ||||
|         return new_id; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     u32 stub_value; | ||||
| }; | ||||
| 
 | ||||
| template <typename Traits> | ||||
| struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | ||||
|     using RuntimeType = typename Traits::RuntimeType; | ||||
| 
 | ||||
|     QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, | ||||
|                        Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) | ||||
|         : owner{owner_}, rasterizer{rasterizer_}, | ||||
|           cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { | ||||
|         streamer_mask = 0; | ||||
|         for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { | ||||
|             streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); | ||||
|             if (streamers[i]) { | ||||
|                 streamer_mask |= 1ULL << streamers[i]->GetId(); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void ForEachStreamerIn(u64 mask, Func&& func) { | ||||
|         static constexpr bool RETURNS_BOOL = | ||||
|             std::is_same_v<std::invoke_result<Func, StreamerInterface*>, bool>; | ||||
|         while (mask != 0) { | ||||
|             size_t position = std::countr_zero(mask); | ||||
|             mask &= ~(1ULL << position); | ||||
|             if constexpr (RETURNS_BOOL) { | ||||
|                 if (func(streamers[position])) { | ||||
|                     return; | ||||
|                 } | ||||
|             } else { | ||||
|                 func(streamers[position]); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void ForEachStreamer(Func&& func) { | ||||
|         ForEachStreamerIn(streamer_mask, func); | ||||
|     } | ||||
| 
 | ||||
|     QueryBase* ObtainQuery(QueryCacheBase<Traits>::QueryLocation location) { | ||||
|         size_t which_stream = location.stream_id.Value(); | ||||
|         auto* streamer = streamers[which_stream]; | ||||
|         if (!streamer) { | ||||
|             return nullptr; | ||||
|         } | ||||
|         return streamer->GetQuery(location.query_id.Value()); | ||||
|     } | ||||
| 
 | ||||
|     QueryCacheBase<Traits>* owner; | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     RuntimeType& runtime; | ||||
|     Tegra::GPU& gpu; | ||||
|     std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; | ||||
|     u64 streamer_mask; | ||||
|     std::mutex flush_guard; | ||||
|     std::deque<u64> flushes_pending; | ||||
|     std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister; | ||||
| }; | ||||
| 
 | ||||
| template <typename Traits> | ||||
| QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, | ||||
|                                        VideoCore::RasterizerInterface& rasterizer_, | ||||
|                                        Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) | ||||
|     : cached_queries{} { | ||||
|     impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( | ||||
|         this, rasterizer_, cpu_memory_, runtime_, gpu_); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| QueryCacheBase<Traits>::~QueryCacheBase() = default; | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::CounterEnable(QueryType counter_type, bool is_enabled) { | ||||
|     size_t index = static_cast<size_t>(counter_type); | ||||
|     StreamerInterface* streamer = impl->streamers[index]; | ||||
|     if (!streamer) [[unlikely]] { | ||||
|         UNREACHABLE(); | ||||
|         return; | ||||
|     } | ||||
|     if (is_enabled) { | ||||
|         streamer->StartCounter(); | ||||
|     } else { | ||||
|         streamer->PauseCounter(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) { | ||||
|     size_t index = static_cast<size_t>(counter_type); | ||||
|     StreamerInterface* streamer = impl->streamers[index]; | ||||
|     if (!streamer) [[unlikely]] { | ||||
|         UNREACHABLE(); | ||||
|         return; | ||||
|     } | ||||
|     streamer->CloseCounter(); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) { | ||||
|     size_t index = static_cast<size_t>(counter_type); | ||||
|     StreamerInterface* streamer = impl->streamers[index]; | ||||
|     if (!streamer) [[unlikely]] { | ||||
|         UNIMPLEMENTED(); | ||||
|         return; | ||||
|     } | ||||
|     streamer->ResetCounter(); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::BindToChannel(s32 id) { | ||||
|     VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>::BindToChannel(id); | ||||
|     impl->runtime.Bind3DEngine(maxwell3d); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type, | ||||
|                                            QueryPropertiesFlags flags, u32 payload, u32 subreport) { | ||||
|     const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout); | ||||
|     const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); | ||||
|     size_t streamer_id = static_cast<size_t>(counter_type); | ||||
|     auto* streamer = impl->streamers[streamer_id]; | ||||
|     if (streamer == nullptr) [[unlikely]] { | ||||
|         counter_type = QueryType::Payload; | ||||
|         payload = 1U; | ||||
|         streamer_id = static_cast<size_t>(counter_type); | ||||
|         streamer = impl->streamers[streamer_id]; | ||||
|     } | ||||
|     auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); | ||||
|     if (!cpu_addr_opt) [[unlikely]] { | ||||
|         return; | ||||
|     } | ||||
|     VAddr cpu_addr = *cpu_addr_opt; | ||||
|     const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); | ||||
|     auto* query = streamer->GetQuery(new_query_id); | ||||
|     if (is_fence) { | ||||
|         query->flags |= QueryFlagBits::IsFence; | ||||
|     } | ||||
|     QueryLocation query_location{}; | ||||
|     query_location.stream_id.Assign(static_cast<u32>(streamer_id)); | ||||
|     query_location.query_id.Assign(static_cast<u32>(new_query_id)); | ||||
|     const auto gen_caching_indexing = [](VAddr cur_addr) { | ||||
|         return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, | ||||
|                                         static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); | ||||
|     }; | ||||
|     u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); | ||||
|     u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); | ||||
|     bool is_synced = !Settings::IsGPULevelHigh() && is_fence; | ||||
| 
 | ||||
|     std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, | ||||
|                                      pointer, pointer_timestamp] { | ||||
|         if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { | ||||
|             if (!is_synced) [[likely]] { | ||||
|                 impl->pending_unregister.push_back(query_location); | ||||
|             } | ||||
|             return; | ||||
|         } | ||||
|         if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { | ||||
|             UNREACHABLE(); | ||||
|             return; | ||||
|         } | ||||
|         query_base->value += streamer->GetAmmendValue(); | ||||
|         streamer->SetAccumulationValue(query_base->value); | ||||
|         if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | ||||
|             u64 timestamp = impl->gpu.GetTicks(); | ||||
|             std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); | ||||
|             std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); | ||||
|         } else { | ||||
|             u32 value = static_cast<u32>(query_base->value); | ||||
|             std::memcpy(pointer, &value, sizeof(value)); | ||||
|         } | ||||
|         if (!is_synced) [[likely]] { | ||||
|             impl->pending_unregister.push_back(query_location); | ||||
|         } | ||||
|     }); | ||||
|     if (is_fence) { | ||||
|         impl->rasterizer.SignalFence(std::move(operation)); | ||||
|     } else { | ||||
|         if (!Settings::IsGPULevelHigh() && counter_type == QueryType::Payload) { | ||||
|             if (has_timestamp) { | ||||
|                 u64 timestamp = impl->gpu.GetTicks(); | ||||
|                 u64 value = static_cast<u64>(payload); | ||||
|                 std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); | ||||
|                 std::memcpy(pointer, &value, sizeof(value)); | ||||
|             } else { | ||||
|                 std::memcpy(pointer, &payload, sizeof(payload)); | ||||
|             } | ||||
|             streamer->Free(new_query_id); | ||||
|             return; | ||||
|         } | ||||
|         impl->rasterizer.SyncOperation(std::move(operation)); | ||||
|     } | ||||
|     if (is_synced) { | ||||
|         streamer->Free(new_query_id); | ||||
|         return; | ||||
|     } | ||||
|     auto [cont_addr, base] = gen_caching_indexing(cpu_addr); | ||||
|     { | ||||
|         std::scoped_lock lock(cache_mutex); | ||||
|         auto it1 = cached_queries.try_emplace(cont_addr); | ||||
|         auto& sub_container = it1.first->second; | ||||
|         auto it_current = sub_container.find(base); | ||||
|         if (it_current == sub_container.end()) { | ||||
|             sub_container.insert_or_assign(base, query_location); | ||||
|             return; | ||||
|         } | ||||
|         auto* old_query = impl->ObtainQuery(it_current->second); | ||||
|         old_query->flags |= QueryFlagBits::IsRewritten; | ||||
|         sub_container.insert_or_assign(base, query_location); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::UnregisterPending() { | ||||
|     const auto gen_caching_indexing = [](VAddr cur_addr) { | ||||
|         return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, | ||||
|                                         static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); | ||||
|     }; | ||||
|     std::scoped_lock lock(cache_mutex); | ||||
|     for (QueryLocation loc : impl->pending_unregister) { | ||||
|         const auto [streamer_id, query_id] = loc.unpack(); | ||||
|         auto* streamer = impl->streamers[streamer_id]; | ||||
|         if (!streamer) [[unlikely]] { | ||||
|             continue; | ||||
|         } | ||||
|         auto* query = streamer->GetQuery(query_id); | ||||
|         auto [cont_addr, base] = gen_caching_indexing(query->guest_address); | ||||
|         auto it1 = cached_queries.find(cont_addr); | ||||
|         if (it1 != cached_queries.end()) { | ||||
|             auto it2 = it1->second.find(base); | ||||
|             if (it2 != it1->second.end()) { | ||||
|                 if (it2->second.raw == loc.raw) { | ||||
|                     it1->second.erase(it2); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         streamer->Free(query_id); | ||||
|     } | ||||
|     impl->pending_unregister.clear(); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::NotifyWFI() { | ||||
|     bool should_sync = false; | ||||
|     impl->ForEachStreamer( | ||||
|         [&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); }); | ||||
|     if (!should_sync) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); }); | ||||
|     impl->runtime.Barriers(true); | ||||
|     impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); }); | ||||
|     impl->runtime.Barriers(false); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::NotifySegment(bool resume) { | ||||
|     if (resume) { | ||||
|         impl->runtime.ResumeHostConditionalRendering(); | ||||
|     } else { | ||||
|         CounterClose(VideoCommon::QueryType::ZPassPixelCount64); | ||||
|         CounterClose(VideoCommon::QueryType::StreamingByteCount); | ||||
|         impl->runtime.PauseHostConditionalRendering(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { | ||||
|     bool qc_dirty = false; | ||||
|     const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData { | ||||
|         auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address); | ||||
|         if (!cpu_addr_opt) [[unlikely]] { | ||||
|             return VideoCommon::LookupData{ | ||||
|                 .address = 0, | ||||
|                 .found_query = nullptr, | ||||
|             }; | ||||
|         } | ||||
|         VAddr cpu_addr = *cpu_addr_opt; | ||||
|         std::scoped_lock lock(cache_mutex); | ||||
|         auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS); | ||||
|         if (it1 == cached_queries.end()) { | ||||
|             return VideoCommon::LookupData{ | ||||
|                 .address = cpu_addr, | ||||
|                 .found_query = nullptr, | ||||
|             }; | ||||
|         } | ||||
|         auto& sub_container = it1->second; | ||||
|         auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK); | ||||
| 
 | ||||
|         if (it_current == sub_container.end()) { | ||||
|             auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4); | ||||
|             if (it_current_2 == sub_container.end()) { | ||||
|                 return VideoCommon::LookupData{ | ||||
|                     .address = cpu_addr, | ||||
|                     .found_query = nullptr, | ||||
|                 }; | ||||
|             } | ||||
|         } | ||||
|         auto* query = impl->ObtainQuery(it_current->second); | ||||
|         qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) && | ||||
|                     False(query->flags & QueryFlagBits::IsGuestSynced); | ||||
|         return VideoCommon::LookupData{ | ||||
|             .address = cpu_addr, | ||||
|             .found_query = query, | ||||
|         }; | ||||
|     }; | ||||
| 
 | ||||
|     auto& regs = maxwell3d->regs; | ||||
|     if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) { | ||||
|         impl->runtime.EndHostConditionalRendering(); | ||||
|         return false; | ||||
|     } | ||||
|     const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode); | ||||
|     const GPUVAddr address = regs.render_enable.Address(); | ||||
|     switch (mode) { | ||||
|     case ComparisonMode::True: | ||||
|         impl->runtime.EndHostConditionalRendering(); | ||||
|         return false; | ||||
|     case ComparisonMode::False: | ||||
|         impl->runtime.EndHostConditionalRendering(); | ||||
|         return false; | ||||
|     case ComparisonMode::Conditional: { | ||||
|         VideoCommon::LookupData object_1{gen_lookup(address)}; | ||||
|         return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty); | ||||
|     } | ||||
|     case ComparisonMode::IfEqual: { | ||||
|         VideoCommon::LookupData object_1{gen_lookup(address)}; | ||||
|         VideoCommon::LookupData object_2{gen_lookup(address + 16)}; | ||||
|         return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, | ||||
|                                                                    true); | ||||
|     } | ||||
|     case ComparisonMode::IfNotEqual: { | ||||
|         VideoCommon::LookupData object_1{gen_lookup(address)}; | ||||
|         VideoCommon::LookupData object_2{gen_lookup(address + 16)}; | ||||
|         return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, | ||||
|                                                                    false); | ||||
|     } | ||||
|     default: | ||||
|         return false; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Async downloads
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::CommitAsyncFlushes() { | ||||
|     // Make sure to have the results synced in Host.
 | ||||
|     NotifyWFI(); | ||||
| 
 | ||||
|     u64 mask{}; | ||||
|     { | ||||
|         std::scoped_lock lk(impl->flush_guard); | ||||
|         impl->ForEachStreamer([&mask](StreamerInterface* streamer) { | ||||
|             bool local_result = streamer->HasUnsyncedQueries(); | ||||
|             if (local_result) { | ||||
|                 mask |= 1ULL << streamer->GetId(); | ||||
|             } | ||||
|         }); | ||||
|         impl->flushes_pending.push_back(mask); | ||||
|     } | ||||
|     std::function<void()> func([this] { UnregisterPending(); }); | ||||
|     impl->rasterizer.SyncOperation(std::move(func)); | ||||
|     if (mask == 0) { | ||||
|         return; | ||||
|     } | ||||
|     u64 ran_mask = ~mask; | ||||
|     while (mask) { | ||||
|         impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { | ||||
|             u64 dep_mask = streamer->GetDependentMask(); | ||||
|             if ((dep_mask & ~ran_mask) != 0) { | ||||
|                 return; | ||||
|             } | ||||
|             u64 index = streamer->GetId(); | ||||
|             ran_mask |= (1ULL << index); | ||||
|             mask &= ~(1ULL << index); | ||||
|             streamer->PushUnsyncedQueries(); | ||||
|         }); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| bool QueryCacheBase<Traits>::HasUncommittedFlushes() const { | ||||
|     bool result = false; | ||||
|     impl->ForEachStreamer([&result](StreamerInterface* streamer) { | ||||
|         result |= streamer->HasUnsyncedQueries(); | ||||
|         return result; | ||||
|     }); | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| bool QueryCacheBase<Traits>::ShouldWaitAsyncFlushes() { | ||||
|     std::scoped_lock lk(impl->flush_guard); | ||||
|     return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL; | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::PopAsyncFlushes() { | ||||
|     u64 mask; | ||||
|     { | ||||
|         std::scoped_lock lk(impl->flush_guard); | ||||
|         mask = impl->flushes_pending.front(); | ||||
|         impl->flushes_pending.pop_front(); | ||||
|     } | ||||
|     if (mask == 0) { | ||||
|         return; | ||||
|     } | ||||
|     u64 ran_mask = ~mask; | ||||
|     while (mask) { | ||||
|         impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { | ||||
|             u64 dep_mask = streamer->GetDependenceMask(); | ||||
|             if ((dep_mask & ~ran_mask) != 0) { | ||||
|                 return; | ||||
|             } | ||||
|             u64 index = streamer->GetId(); | ||||
|             ran_mask |= (1ULL << index); | ||||
|             mask &= ~(1ULL << index); | ||||
|             streamer->PopUnsyncedQueries(); | ||||
|         }); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Invalidation
 | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::InvalidateQuery(QueryCacheBase<Traits>::QueryLocation location) { | ||||
|     auto* query_base = impl->ObtainQuery(location); | ||||
|     if (!query_base) { | ||||
|         return; | ||||
|     } | ||||
|     query_base->flags |= QueryFlagBits::IsInvalidated; | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| bool QueryCacheBase<Traits>::IsQueryDirty(QueryCacheBase<Traits>::QueryLocation location) { | ||||
|     auto* query_base = impl->ObtainQuery(location); | ||||
|     if (!query_base) { | ||||
|         return false; | ||||
|     } | ||||
|     return True(query_base->flags & QueryFlagBits::IsHostManaged) && | ||||
|            False(query_base->flags & QueryFlagBits::IsGuestSynced); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLocation location) { | ||||
|     auto* query_base = impl->ObtainQuery(location); | ||||
|     if (!query_base) { | ||||
|         return false; | ||||
|     } | ||||
|     if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && | ||||
|         False(query_base->flags & QueryFlagBits::IsGuestSynced)) { | ||||
|         auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); | ||||
|         if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | ||||
|             std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); | ||||
|             return false; | ||||
|         } | ||||
|         u32 value_l = static_cast<u32>(query_base->value); | ||||
|         std::memcpy(ptr, &value_l, sizeof(value_l)); | ||||
|         return false; | ||||
|     } | ||||
|     return True(query_base->flags & QueryFlagBits::IsHostManaged) && | ||||
|            False(query_base->flags & QueryFlagBits::IsGuestSynced); | ||||
| } | ||||
| 
 | ||||
| template <typename Traits> | ||||
| void QueryCacheBase<Traits>::RequestGuestHostSync() { | ||||
|     impl->rasterizer.ReleaseFences(); | ||||
| } | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
							
								
								
									
										181
									
								
								src/video_core/query_cache/query_cache_base.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										181
									
								
								src/video_core/query_cache/query_cache_base.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,181 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <functional> | ||||
| #include <mutex> | ||||
| #include <optional> | ||||
| #include <span> | ||||
| #include <unordered_map> | ||||
| #include <utility> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/control/channel_state_cache.h" | ||||
| #include "video_core/query_cache/query_base.h" | ||||
| #include "video_core/query_cache/types.h" | ||||
| 
 | ||||
| namespace Core::Memory { | ||||
| class Memory; | ||||
| } | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| class RasterizerInterface; | ||||
| } | ||||
| 
 | ||||
| namespace Tegra { | ||||
| class GPU; | ||||
| } | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| struct LookupData { | ||||
|     VAddr address; | ||||
|     QueryBase* found_query; | ||||
| }; | ||||
| 
 | ||||
| template <typename Traits> | ||||
| class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||||
|     using RuntimeType = typename Traits::RuntimeType; | ||||
| 
 | ||||
| public: | ||||
|     union QueryLocation { | ||||
|         BitField<27, 5, u32> stream_id; | ||||
|         BitField<0, 27, u32> query_id; | ||||
|         u32 raw; | ||||
| 
 | ||||
|         std::pair<size_t, size_t> unpack() const { | ||||
|             return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())}; | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, | ||||
|                             Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); | ||||
| 
 | ||||
|     ~QueryCacheBase(); | ||||
| 
 | ||||
|     void InvalidateRegion(VAddr addr, std::size_t size) { | ||||
|         IterateCache<true>(addr, size, | ||||
|                            [this](QueryLocation location) { InvalidateQuery(location); }); | ||||
|     } | ||||
| 
 | ||||
|     void FlushRegion(VAddr addr, std::size_t size) { | ||||
|         bool result = false; | ||||
|         IterateCache<false>(addr, size, [this, &result](QueryLocation location) { | ||||
|             result |= SemiFlushQueryDirty(location); | ||||
|             return result; | ||||
|         }); | ||||
|         if (result) { | ||||
|             RequestGuestHostSync(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     static u64 BuildMask(std::span<const QueryType> types) { | ||||
|         u64 mask = 0; | ||||
|         for (auto query_type : types) { | ||||
|             mask |= 1ULL << (static_cast<u64>(query_type)); | ||||
|         } | ||||
|         return mask; | ||||
|     } | ||||
| 
 | ||||
|     /// Return true when a CPU region is modified from the GPU
 | ||||
|     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size) { | ||||
|         bool result = false; | ||||
|         IterateCache<false>(addr, size, [this, &result](QueryLocation location) { | ||||
|             result |= IsQueryDirty(location); | ||||
|             return result; | ||||
|         }); | ||||
|         return result; | ||||
|     } | ||||
| 
 | ||||
|     void CounterEnable(QueryType counter_type, bool is_enabled); | ||||
| 
 | ||||
|     void CounterReset(QueryType counter_type); | ||||
| 
 | ||||
|     void CounterClose(QueryType counter_type); | ||||
| 
 | ||||
|     void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags, | ||||
|                        u32 payload, u32 subreport); | ||||
| 
 | ||||
|     void NotifyWFI(); | ||||
| 
 | ||||
|     bool AccelerateHostConditionalRendering(); | ||||
| 
 | ||||
|     // Async downloads
 | ||||
|     void CommitAsyncFlushes(); | ||||
| 
 | ||||
|     bool HasUncommittedFlushes() const; | ||||
| 
 | ||||
|     bool ShouldWaitAsyncFlushes(); | ||||
| 
 | ||||
|     void PopAsyncFlushes(); | ||||
| 
 | ||||
|     void NotifySegment(bool resume); | ||||
| 
 | ||||
|     void BindToChannel(s32 id) override; | ||||
| 
 | ||||
| protected: | ||||
|     template <bool remove_from_cache, typename Func> | ||||
|     void IterateCache(VAddr addr, std::size_t size, Func&& func) { | ||||
|         static constexpr bool RETURNS_BOOL = | ||||
|             std::is_same_v<std::invoke_result<Func, QueryLocation>, bool>; | ||||
|         const u64 addr_begin = addr; | ||||
|         const u64 addr_end = addr_begin + size; | ||||
| 
 | ||||
|         const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS; | ||||
|         std::scoped_lock lock(cache_mutex); | ||||
|         for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) { | ||||
|             const u64 page_start = page << Core::Memory::YUZU_PAGEBITS; | ||||
|             const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) { | ||||
|                 const u64 cache_begin = page_start + query_location; | ||||
|                 const u64 cache_end = cache_begin + sizeof(u32); | ||||
|                 return cache_begin < addr_end && addr_begin < cache_end; | ||||
|             }; | ||||
|             const auto& it = cached_queries.find(page); | ||||
|             if (it == std::end(cached_queries)) { | ||||
|                 continue; | ||||
|             } | ||||
|             auto& contents = it->second; | ||||
|             for (auto& query : contents) { | ||||
|                 if (!in_range(query.first)) { | ||||
|                     continue; | ||||
|                 } | ||||
|                 if constexpr (RETURNS_BOOL) { | ||||
|                     if (func(query.second)) { | ||||
|                         return; | ||||
|                     } | ||||
|                 } else { | ||||
|                     func(query.second); | ||||
|                 } | ||||
|             } | ||||
|             if constexpr (remove_from_cache) { | ||||
|                 const auto in_range2 = [&](const std::pair<u32, QueryLocation>& pair) { | ||||
|                     return in_range(pair.first); | ||||
|                 }; | ||||
|                 std::erase_if(contents, in_range2); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     using ContentCache = std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; | ||||
| 
 | ||||
|     void InvalidateQuery(QueryLocation location); | ||||
|     bool IsQueryDirty(QueryLocation location); | ||||
|     bool SemiFlushQueryDirty(QueryLocation location); | ||||
|     void RequestGuestHostSync(); | ||||
|     void UnregisterPending(); | ||||
| 
 | ||||
|     std::unordered_map<u64, std::unordered_map<u32, QueryLocation>> cached_queries; | ||||
|     std::mutex cache_mutex; | ||||
| 
 | ||||
|     struct QueryCacheBaseImpl; | ||||
|     friend struct QueryCacheBaseImpl; | ||||
|     friend RuntimeType; | ||||
| 
 | ||||
|     std::unique_ptr<QueryCacheBaseImpl> impl; | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
							
								
								
									
										149
									
								
								src/video_core/query_cache/query_stream.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								src/video_core/query_cache/query_stream.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,149 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <deque> | ||||
| #include <optional> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/query_cache/bank_base.h" | ||||
| #include "video_core/query_cache/query_base.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| class StreamerInterface { | ||||
| public: | ||||
|     explicit StreamerInterface(size_t id_) : id{id_}, dependence_mask{}, dependent_mask{} {} | ||||
|     virtual ~StreamerInterface() = default; | ||||
| 
 | ||||
|     virtual QueryBase* GetQuery(size_t id) = 0; | ||||
| 
 | ||||
|     virtual void StartCounter() { | ||||
|         /* Do Nothing */ | ||||
|     } | ||||
| 
 | ||||
|     virtual void PauseCounter() { | ||||
|         /* Do Nothing */ | ||||
|     } | ||||
| 
 | ||||
|     virtual void ResetCounter() { | ||||
|         /* Do Nothing */ | ||||
|     } | ||||
| 
 | ||||
|     virtual void CloseCounter() { | ||||
|         /* Do Nothing */ | ||||
|     } | ||||
| 
 | ||||
|     virtual bool HasPendingSync() const { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     virtual void PresyncWrites() { | ||||
|         /* Do Nothing */ | ||||
|     } | ||||
| 
 | ||||
|     virtual void SyncWrites() { | ||||
|         /* Do Nothing */ | ||||
|     } | ||||
| 
 | ||||
|     virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||||
|                                 std::optional<u32> subreport = std::nullopt) = 0; | ||||
| 
 | ||||
|     virtual bool HasUnsyncedQueries() const { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     virtual void PushUnsyncedQueries() { | ||||
|         /* Do Nothing */ | ||||
|     } | ||||
| 
 | ||||
|     virtual void PopUnsyncedQueries() { | ||||
|         /* Do Nothing */ | ||||
|     } | ||||
| 
 | ||||
|     virtual void Free(size_t query_id) = 0; | ||||
| 
 | ||||
|     size_t GetId() const { | ||||
|         return id; | ||||
|     } | ||||
| 
 | ||||
|     u64 GetDependenceMask() const { | ||||
|         return dependence_mask; | ||||
|     } | ||||
| 
 | ||||
|     u64 GetDependentMask() const { | ||||
|         return dependence_mask; | ||||
|     } | ||||
| 
 | ||||
|     u64 GetAmmendValue() const { | ||||
|         return ammend_value; | ||||
|     } | ||||
| 
 | ||||
|     void SetAccumulationValue(u64 new_value) { | ||||
|         acumulation_value = new_value; | ||||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     void MakeDependent(StreamerInterface* depend_on) { | ||||
|         dependence_mask |= 1ULL << depend_on->id; | ||||
|         depend_on->dependent_mask |= 1ULL << id; | ||||
|     } | ||||
| 
 | ||||
|     const size_t id; | ||||
|     u64 dependence_mask; | ||||
|     u64 dependent_mask; | ||||
|     u64 ammend_value{}; | ||||
|     u64 acumulation_value{}; | ||||
| }; | ||||
| 
 | ||||
| template <typename QueryType> | ||||
| class SimpleStreamer : public StreamerInterface { | ||||
| public: | ||||
|     explicit SimpleStreamer(size_t id_) : StreamerInterface{id_} {} | ||||
|     virtual ~SimpleStreamer() = default; | ||||
| 
 | ||||
| protected: | ||||
|     virtual QueryType* GetQuery(size_t query_id) override { | ||||
|         if (query_id < slot_queries.size()) { | ||||
|             return &slot_queries[query_id]; | ||||
|         } | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     virtual void Free(size_t query_id) override { | ||||
|         std::scoped_lock lk(guard); | ||||
|         ReleaseQuery(query_id); | ||||
|     } | ||||
| 
 | ||||
|     template <typename... Args, typename = decltype(QueryType(std::declval<Args>()...))> | ||||
|     size_t BuildQuery(Args&&... args) { | ||||
|         std::scoped_lock lk(guard); | ||||
|         if (!old_queries.empty()) { | ||||
|             size_t new_id = old_queries.front(); | ||||
|             old_queries.pop_front(); | ||||
|             new (&slot_queries[new_id]) QueryType(std::forward<Args>(args)...); | ||||
|             return new_id; | ||||
|         } | ||||
|         size_t new_id = slot_queries.size(); | ||||
|         slot_queries.emplace_back(std::forward<Args>(args)...); | ||||
|         return new_id; | ||||
|     } | ||||
| 
 | ||||
|     void ReleaseQuery(size_t query_id) { | ||||
| 
 | ||||
|         if (query_id < slot_queries.size()) { | ||||
|             old_queries.push_back(query_id); | ||||
|             return; | ||||
|         } | ||||
|         UNREACHABLE(); | ||||
|     } | ||||
| 
 | ||||
|     std::mutex guard; | ||||
|     std::deque<QueryType> slot_queries; | ||||
|     std::deque<size_t> old_queries; | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
							
								
								
									
										74
									
								
								src/video_core/query_cache/types.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								src/video_core/query_cache/types.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,74 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| enum class QueryPropertiesFlags : u32 { | ||||
|     HasTimeout = 1 << 0, | ||||
|     IsAFence = 1 << 1, | ||||
| }; | ||||
| DECLARE_ENUM_FLAG_OPERATORS(QueryPropertiesFlags) | ||||
| 
 | ||||
| // This should always be equivalent to maxwell3d Report Semaphore Reports
 | ||||
| enum class QueryType : u32 { | ||||
|     Payload = 0, // "None" in docs, but confirmed via hardware to return the payload
 | ||||
|     VerticesGenerated = 1, | ||||
|     ZPassPixelCount = 2, | ||||
|     PrimitivesGenerated = 3, | ||||
|     AlphaBetaClocks = 4, | ||||
|     VertexShaderInvocations = 5, | ||||
|     StreamingPrimitivesNeededMinusSucceeded = 6, | ||||
|     GeometryShaderInvocations = 7, | ||||
|     GeometryShaderPrimitivesGenerated = 9, | ||||
|     ZCullStats0 = 10, | ||||
|     StreamingPrimitivesSucceeded = 11, | ||||
|     ZCullStats1 = 12, | ||||
|     StreamingPrimitivesNeeded = 13, | ||||
|     ZCullStats2 = 14, | ||||
|     ClipperInvocations = 15, | ||||
|     ZCullStats3 = 16, | ||||
|     ClipperPrimitivesGenerated = 17, | ||||
|     VtgPrimitivesOut = 18, | ||||
|     PixelShaderInvocations = 19, | ||||
|     ZPassPixelCount64 = 21, | ||||
|     IEEECleanColorTarget = 24, | ||||
|     IEEECleanZetaTarget = 25, | ||||
|     StreamingByteCount = 26, | ||||
|     TessellationInitInvocations = 27, | ||||
|     BoundingRectangle = 28, | ||||
|     TessellationShaderInvocations = 29, | ||||
|     TotalStreamingPrimitivesNeededMinusSucceeded = 30, | ||||
|     TessellationShaderPrimitivesGenerated = 31, | ||||
|     // max.
 | ||||
|     MaxQueryTypes, | ||||
| }; | ||||
| 
 | ||||
| // Comparison modes for Host Conditional Rendering
 | ||||
| enum class ComparisonMode : u32 { | ||||
|     False = 0, | ||||
|     True = 1, | ||||
|     Conditional = 2, | ||||
|     IfEqual = 3, | ||||
|     IfNotEqual = 4, | ||||
|     MaxComparisonMode, | ||||
| }; | ||||
| 
 | ||||
| // Reduction ops.
 | ||||
| enum class ReductionOp : u32 { | ||||
|     RedAdd = 0, | ||||
|     RedMin = 1, | ||||
|     RedMax = 2, | ||||
|     RedInc = 3, | ||||
|     RedDec = 4, | ||||
|     RedAnd = 5, | ||||
|     RedOr = 6, | ||||
|     RedXor = 7, | ||||
|     MaxReductionOp, | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
|  | @ -12,6 +12,7 @@ | |||
| #include "video_core/cache_types.h" | ||||
| #include "video_core/engines/fermi_2d.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/query_cache/types.h" | ||||
| #include "video_core/rasterizer_download_area.h" | ||||
| 
 | ||||
| namespace Tegra { | ||||
|  | @ -26,11 +27,6 @@ struct ChannelState; | |||
| 
 | ||||
| namespace VideoCore { | ||||
| 
 | ||||
| enum class QueryType { | ||||
|     SamplesPassed, | ||||
| }; | ||||
| constexpr std::size_t NumQueryTypes = 1; | ||||
| 
 | ||||
| enum class LoadCallbackStage { | ||||
|     Prepare, | ||||
|     Build, | ||||
|  | @ -58,10 +54,11 @@ public: | |||
|     virtual void DispatchCompute() = 0; | ||||
| 
 | ||||
|     /// Resets the counter of a query
 | ||||
|     virtual void ResetCounter(QueryType type) = 0; | ||||
|     virtual void ResetCounter(VideoCommon::QueryType type) = 0; | ||||
| 
 | ||||
|     /// Records a GPU query and caches it
 | ||||
|     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | ||||
|     virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||
|                        VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; | ||||
| 
 | ||||
|     /// Signal an uniform buffer binding
 | ||||
|     virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||
|  | @ -83,7 +80,7 @@ public: | |||
|     virtual void SignalReference() = 0; | ||||
| 
 | ||||
|     /// Release all pending fences.
 | ||||
|     virtual void ReleaseFences() = 0; | ||||
|     virtual void ReleaseFences(bool force = true) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that all caches should be flushed to Switch memory
 | ||||
|     virtual void FlushAll() = 0; | ||||
|  |  | |||
|  | @ -26,16 +26,18 @@ void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {} | |||
| void RasterizerNull::DrawTexture() {} | ||||
| void RasterizerNull::Clear(u32 layer_count) {} | ||||
| void RasterizerNull::DispatchCompute() {} | ||||
| void RasterizerNull::ResetCounter(VideoCore::QueryType type) {} | ||||
| void RasterizerNull::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | ||||
|                            std::optional<u64> timestamp) { | ||||
| void RasterizerNull::ResetCounter(VideoCommon::QueryType type) {} | ||||
| void RasterizerNull::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||
|                            VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { | ||||
|     if (!gpu_memory) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     gpu_memory->Write(gpu_addr, u64{0}); | ||||
|     if (timestamp) { | ||||
|         gpu_memory->Write(gpu_addr + 8, *timestamp); | ||||
|     if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | ||||
|         u64 ticks = m_gpu.GetTicks(); | ||||
|         gpu_memory->Write<u64>(gpu_addr + 8, ticks); | ||||
|         gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload)); | ||||
|     } else { | ||||
|         gpu_memory->Write<u32>(gpu_addr, payload); | ||||
|     } | ||||
| } | ||||
| void RasterizerNull::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||
|  | @ -74,7 +76,7 @@ void RasterizerNull::SignalSyncPoint(u32 value) { | |||
|     syncpoint_manager.IncrementHost(value); | ||||
| } | ||||
| void RasterizerNull::SignalReference() {} | ||||
| void RasterizerNull::ReleaseFences() {} | ||||
| void RasterizerNull::ReleaseFences(bool) {} | ||||
| void RasterizerNull::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | ||||
| void RasterizerNull::WaitForIdle() {} | ||||
| void RasterizerNull::FragmentBarrier() {} | ||||
|  |  | |||
|  | @ -42,8 +42,9 @@ public: | |||
|     void DrawTexture() override; | ||||
|     void Clear(u32 layer_count) override; | ||||
|     void DispatchCompute() override; | ||||
|     void ResetCounter(VideoCore::QueryType type) override; | ||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||
|     void ResetCounter(VideoCommon::QueryType type) override; | ||||
|     void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||
|                VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | ||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||
|     void FlushAll() override; | ||||
|  | @ -63,7 +64,7 @@ public: | |||
|     void SyncOperation(std::function<void()>&& func) override; | ||||
|     void SignalSyncPoint(u32 value) override; | ||||
|     void SignalReference() override; | ||||
|     void ReleaseFences() override; | ||||
|     void ReleaseFences(bool force) override; | ||||
|     void FlushAndInvalidateRegion( | ||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void WaitForIdle() override; | ||||
|  |  | |||
|  | @ -27,7 +27,7 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | |||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | ||||
|     : QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} | ||||
|     : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} | ||||
| 
 | ||||
| QueryCache::~QueryCache() = default; | ||||
| 
 | ||||
|  |  | |||
|  | @ -26,7 +26,7 @@ class RasterizerOpenGL; | |||
| using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | ||||
| 
 | ||||
| class QueryCache final | ||||
|     : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | ||||
|     : public VideoCommon::QueryCacheLegacy<QueryCache, CachedQuery, CounterStream, HostCounter> { | ||||
| public: | ||||
|     explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_); | ||||
|     ~QueryCache(); | ||||
|  |  | |||
|  | @ -396,13 +396,39 @@ void RasterizerOpenGL::DispatchCompute() { | |||
|     has_written_global_memory |= pipeline->WritesGlobalMemory(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | ||||
|     query_cache.ResetCounter(type); | ||||
| void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { | ||||
|     if (type == VideoCommon::QueryType::ZPassPixelCount64) { | ||||
|         query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | ||||
|                              std::optional<u64> timestamp) { | ||||
|     query_cache.Query(gpu_addr, type, timestamp); | ||||
| void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||
|                              VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { | ||||
|     if (type == VideoCommon::QueryType::ZPassPixelCount64) { | ||||
|         if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | ||||
|             query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); | ||||
|         } else { | ||||
|             query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     if (type != VideoCommon::QueryType::Payload) { | ||||
|         payload = 1u; | ||||
|     } | ||||
|     std::function<void()> func([this, gpu_addr, flags, memory_manager = gpu_memory, payload]() { | ||||
|         if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { | ||||
|             u64 ticks = gpu.GetTicks(); | ||||
|             memory_manager->Write<u64>(gpu_addr + 8, ticks); | ||||
|             memory_manager->Write<u64>(gpu_addr, static_cast<u64>(payload)); | ||||
|         } else { | ||||
|             memory_manager->Write<u32>(gpu_addr, payload); | ||||
|         } | ||||
|     }); | ||||
|     if (True(flags & VideoCommon::QueryPropertiesFlags::IsAFence)) { | ||||
|         SignalFence(std::move(func)); | ||||
|         return; | ||||
|     } | ||||
|     func(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||
|  | @ -573,8 +599,8 @@ void RasterizerOpenGL::SignalReference() { | |||
|     fence_manager.SignalOrdering(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::ReleaseFences() { | ||||
|     fence_manager.WaitPendingFences(); | ||||
| void RasterizerOpenGL::ReleaseFences(bool force) { | ||||
|     fence_manager.WaitPendingFences(force); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size, | ||||
|  |  | |||
|  | @ -86,8 +86,9 @@ public: | |||
|     void DrawTexture() override; | ||||
|     void Clear(u32 layer_count) override; | ||||
|     void DispatchCompute() override; | ||||
|     void ResetCounter(VideoCore::QueryType type) override; | ||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||
|     void ResetCounter(VideoCommon::QueryType type) override; | ||||
|     void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||
|                VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | ||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||
|     void FlushAll() override; | ||||
|  | @ -107,7 +108,7 @@ public: | |||
|     void SyncOperation(std::function<void()>&& func) override; | ||||
|     void SignalSyncPoint(u32 value) override; | ||||
|     void SignalReference() override; | ||||
|     void ReleaseFences() override; | ||||
|     void ReleaseFences(bool force = true) override; | ||||
|     void FlushAndInvalidateRegion( | ||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void WaitForIdle() override; | ||||
|  |  | |||
|  | @ -61,6 +61,9 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo | |||
|     if (device.IsExtTransformFeedbackSupported()) { | ||||
|         flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | ||||
|     } | ||||
|     if (device.IsExtConditionalRendering()) { | ||||
|         flags |= VK_BUFFER_USAGE_CONDITIONAL_RENDERING_BIT_EXT; | ||||
|     } | ||||
|     const VkBufferCreateInfo buffer_ci = { | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|  |  | |||
|  | @ -12,6 +12,9 @@ | |||
| #include "common/common_types.h" | ||||
| #include "common/div_ceil.h" | ||||
| #include "video_core/host_shaders/astc_decoder_comp_spv.h" | ||||
| #include "video_core/host_shaders/queries_prefix_scan_sum_comp_spv.h" | ||||
| #include "video_core/host_shaders/queries_prefix_scan_sum_nosubgroups_comp_spv.h" | ||||
| #include "video_core/host_shaders/resolve_conditional_render_comp_spv.h" | ||||
| #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | ||||
| #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | ||||
| #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||
|  | @ -57,6 +60,30 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SE | |||
|     }, | ||||
| }}; | ||||
| 
 | ||||
| constexpr std::array<VkDescriptorSetLayoutBinding, 3> QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS{{ | ||||
|     { | ||||
|         .binding = 0, | ||||
|         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||
|         .descriptorCount = 1, | ||||
|         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||
|         .pImmutableSamplers = nullptr, | ||||
|     }, | ||||
|     { | ||||
|         .binding = 1, | ||||
|         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||
|         .descriptorCount = 1, | ||||
|         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||
|         .pImmutableSamplers = nullptr, | ||||
|     }, | ||||
|     { | ||||
|         .binding = 2, | ||||
|         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||
|         .descriptorCount = 1, | ||||
|         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||
|         .pImmutableSamplers = nullptr, | ||||
|     }, | ||||
| }}; | ||||
| 
 | ||||
| constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ | ||||
|     .uniform_buffers = 0, | ||||
|     .storage_buffers = 2, | ||||
|  | @ -67,6 +94,16 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ | |||
|     .score = 2, | ||||
| }; | ||||
| 
 | ||||
| constexpr DescriptorBankInfo QUERIES_SCAN_BANK_INFO{ | ||||
|     .uniform_buffers = 0, | ||||
|     .storage_buffers = 3, | ||||
|     .texture_buffers = 0, | ||||
|     .image_buffers = 0, | ||||
|     .textures = 0, | ||||
|     .images = 0, | ||||
|     .score = 3, | ||||
| }; | ||||
| 
 | ||||
| constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{ | ||||
|     { | ||||
|         .binding = ASTC_BINDING_INPUT_BUFFER, | ||||
|  | @ -103,6 +140,15 @@ constexpr VkDescriptorUpdateTemplateEntry INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLAT | |||
|     .stride = sizeof(DescriptorUpdateEntry), | ||||
| }; | ||||
| 
 | ||||
| constexpr VkDescriptorUpdateTemplateEntry QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE{ | ||||
|     .dstBinding = 0, | ||||
|     .dstArrayElement = 0, | ||||
|     .descriptorCount = 3, | ||||
|     .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||
|     .offset = 0, | ||||
|     .stride = sizeof(DescriptorUpdateEntry), | ||||
| }; | ||||
| 
 | ||||
| constexpr std::array<VkDescriptorUpdateTemplateEntry, ASTC_NUM_BINDINGS> | ||||
|     ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{ | ||||
|         { | ||||
|  | @ -131,13 +177,21 @@ struct AstcPushConstants { | |||
|     u32 block_height; | ||||
|     u32 block_height_mask; | ||||
| }; | ||||
| 
 | ||||
| struct QueriesPrefixScanPushConstants { | ||||
|     u32 min_accumulation_base; | ||||
|     u32 max_accumulation_base; | ||||
|     u32 accumulation_limit; | ||||
|     u32 buffer_offset; | ||||
| }; | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, | ||||
|                          vk::Span<VkDescriptorSetLayoutBinding> bindings, | ||||
|                          vk::Span<VkDescriptorUpdateTemplateEntry> templates, | ||||
|                          const DescriptorBankInfo& bank_info, | ||||
|                          vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code) | ||||
|                          vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code, | ||||
|                          std::optional<u32> optional_subgroup_size) | ||||
|     : device{device_} { | ||||
|     descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ | ||||
|         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||||
|  | @ -178,13 +232,19 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, | |||
|         .pCode = code.data(), | ||||
|     }); | ||||
|     device.SaveShader(code); | ||||
|     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | ||||
|         .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | ||||
|         .pNext = nullptr, | ||||
|         .requiredSubgroupSize = optional_subgroup_size ? *optional_subgroup_size : 32U, | ||||
|     }; | ||||
|     bool use_setup_size = device.IsExtSubgroupSizeControlSupported() && optional_subgroup_size; | ||||
|     pipeline = device.GetLogical().CreateComputePipeline({ | ||||
|         .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .flags = 0, | ||||
|         .stage{ | ||||
|             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||||
|             .pNext = nullptr, | ||||
|             .pNext = use_setup_size ? &subgroup_size_ci : nullptr, | ||||
|             .flags = 0, | ||||
|             .stage = VK_SHADER_STAGE_COMPUTE_BIT, | ||||
|             .module = *module, | ||||
|  | @ -302,6 +362,123 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
|     return {staging.buffer, staging.offset}; | ||||
| } | ||||
| 
 | ||||
| ConditionalRenderingResolvePass::ConditionalRenderingResolvePass( | ||||
|     const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | ||||
|     ComputePassDescriptorQueue& compute_pass_descriptor_queue_) | ||||
|     : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, | ||||
|                   INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, | ||||
|                   RESOLVE_CONDITIONAL_RENDER_COMP_SPV), | ||||
|       scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} | ||||
| 
 | ||||
| void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, | ||||
|                                               u32 src_offset, bool compare_to_zero) { | ||||
|     const size_t compare_size = compare_to_zero ? 8 : 24; | ||||
| 
 | ||||
|     compute_pass_descriptor_queue.Acquire(); | ||||
|     compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size); | ||||
|     compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32)); | ||||
|     const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; | ||||
| 
 | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { | ||||
|         static constexpr VkMemoryBarrier read_barrier{ | ||||
|             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||||
|             .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||||
|         }; | ||||
|         static constexpr VkMemoryBarrier write_barrier{ | ||||
|             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||
|             .dstAccessMask = VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, | ||||
|         }; | ||||
|         const VkDescriptorSet set = descriptor_allocator.Commit(); | ||||
|         device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); | ||||
| 
 | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||||
|                                VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier); | ||||
|         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||||
|         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||||
|         cmdbuf.Dispatch(1, 1, 1); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||||
|                                VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| QueriesPrefixScanPass::QueriesPrefixScanPass( | ||||
|     const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | ||||
|     ComputePassDescriptorQueue& compute_pass_descriptor_queue_) | ||||
|     : ComputePass( | ||||
|           device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS, | ||||
|           QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO, | ||||
|           COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>, | ||||
|           device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT) && | ||||
|                   device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) && | ||||
|                   device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT) && | ||||
|                   device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) | ||||
|               ? std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_COMP_SPV) | ||||
|               : std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_NOSUBGROUPS_COMP_SPV)), | ||||
|       scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} | ||||
| 
 | ||||
| void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer, | ||||
|                                 VkBuffer src_buffer, size_t number_of_sums, | ||||
|                                 size_t min_accumulation_limit, size_t max_accumulation_limit) { | ||||
|     size_t current_runs = number_of_sums; | ||||
|     size_t offset = 0; | ||||
|     while (current_runs != 0) { | ||||
|         static constexpr size_t DISPATCH_SIZE = 2048U; | ||||
|         size_t runs_to_do = std::min<size_t>(current_runs, DISPATCH_SIZE); | ||||
|         current_runs -= runs_to_do; | ||||
|         compute_pass_descriptor_queue.Acquire(); | ||||
|         compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, number_of_sums * sizeof(u64)); | ||||
|         compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, number_of_sums * sizeof(u64)); | ||||
|         compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64)); | ||||
|         const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; | ||||
|         size_t used_offset = offset; | ||||
|         offset += runs_to_do; | ||||
| 
 | ||||
|         scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|         scheduler.Record([this, descriptor_data, min_accumulation_limit, max_accumulation_limit, | ||||
|                           runs_to_do, used_offset](vk::CommandBuffer cmdbuf) { | ||||
|             static constexpr VkMemoryBarrier read_barrier{ | ||||
|                 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|                 .pNext = nullptr, | ||||
|                 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||||
|             }; | ||||
|             static constexpr VkMemoryBarrier write_barrier{ | ||||
|                 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|                 .pNext = nullptr, | ||||
|                 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||
|                 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | | ||||
|                                  VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | | ||||
|                                  VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT | | ||||
|                                  VK_ACCESS_UNIFORM_READ_BIT | | ||||
|                                  VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, | ||||
|             }; | ||||
|             const QueriesPrefixScanPushConstants uniforms{ | ||||
|                 .min_accumulation_base = static_cast<u32>(min_accumulation_limit), | ||||
|                 .max_accumulation_base = static_cast<u32>(max_accumulation_limit), | ||||
|                 .accumulation_limit = static_cast<u32>(runs_to_do - 1), | ||||
|                 .buffer_offset = static_cast<u32>(used_offset), | ||||
|             }; | ||||
|             const VkDescriptorSet set = descriptor_allocator.Commit(); | ||||
|             device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); | ||||
| 
 | ||||
|             cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||||
|                                    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier); | ||||
|             cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||||
|             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||||
|             cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | ||||
|             cmdbuf.Dispatch(1, 1, 1); | ||||
|             cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||||
|                                    VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, | ||||
|                                    write_barrier); | ||||
|         }); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | ||||
|                                  DescriptorPool& descriptor_pool_, | ||||
|                                  StagingBufferPool& staging_buffer_pool_, | ||||
|  |  | |||
|  | @ -3,6 +3,7 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <optional> | ||||
| #include <span> | ||||
| #include <utility> | ||||
| 
 | ||||
|  | @ -31,7 +32,8 @@ public: | |||
|                          vk::Span<VkDescriptorSetLayoutBinding> bindings, | ||||
|                          vk::Span<VkDescriptorUpdateTemplateEntry> templates, | ||||
|                          const DescriptorBankInfo& bank_info, | ||||
|                          vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); | ||||
|                          vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code, | ||||
|                          std::optional<u32> optional_subgroup_size = std::nullopt); | ||||
|     ~ComputePass(); | ||||
| 
 | ||||
| protected: | ||||
|  | @ -82,6 +84,33 @@ private: | |||
|     ComputePassDescriptorQueue& compute_pass_descriptor_queue; | ||||
| }; | ||||
| 
 | ||||
| class ConditionalRenderingResolvePass final : public ComputePass { | ||||
| public: | ||||
|     explicit ConditionalRenderingResolvePass( | ||||
|         const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | ||||
|         ComputePassDescriptorQueue& compute_pass_descriptor_queue_); | ||||
| 
 | ||||
|     void Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, u32 src_offset, bool compare_to_zero); | ||||
| 
 | ||||
| private: | ||||
|     Scheduler& scheduler; | ||||
|     ComputePassDescriptorQueue& compute_pass_descriptor_queue; | ||||
| }; | ||||
| 
 | ||||
| class QueriesPrefixScanPass final : public ComputePass { | ||||
| public: | ||||
|     explicit QueriesPrefixScanPass(const Device& device_, Scheduler& scheduler_, | ||||
|                                    DescriptorPool& descriptor_pool_, | ||||
|                                    ComputePassDescriptorQueue& compute_pass_descriptor_queue_); | ||||
| 
 | ||||
|     void Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer, VkBuffer src_buffer, | ||||
|              size_t number_of_sums, size_t min_accumulation_limit, size_t max_accumulation_limit); | ||||
| 
 | ||||
| private: | ||||
|     Scheduler& scheduler; | ||||
|     ComputePassDescriptorQueue& compute_pass_descriptor_queue; | ||||
| }; | ||||
| 
 | ||||
| class ASTCDecoderPass final : public ComputePass { | ||||
| public: | ||||
|     explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | ||||
|  |  | |||
|  | @ -7,6 +7,7 @@ | |||
| 
 | ||||
| #include "video_core/fence_manager.h" | ||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_query_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||
| 
 | ||||
| namespace Core { | ||||
|  | @ -20,7 +21,6 @@ class RasterizerInterface; | |||
| namespace Vulkan { | ||||
| 
 | ||||
| class Device; | ||||
| class QueryCache; | ||||
| class Scheduler; | ||||
| 
 | ||||
| class InnerFence : public VideoCommon::FenceBase { | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -1,101 +1,75 @@ | |||
| // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later
 | ||||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 | ||||
| // SPDX-License-Identifier: GPL-3.0-or-later
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstddef> | ||||
| #include <memory> | ||||
| #include <utility> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/query_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_resource_pool.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| #include "video_core/query_cache/query_cache_base.h" | ||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| class RasterizerInterface; | ||||
| } | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| class StreamerInterface; | ||||
| } | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| class CachedQuery; | ||||
| class Device; | ||||
| class HostCounter; | ||||
| class QueryCache; | ||||
| class Scheduler; | ||||
| class StagingBufferPool; | ||||
| 
 | ||||
| using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | ||||
| struct QueryCacheRuntimeImpl; | ||||
| 
 | ||||
| class QueryPool final : public ResourcePool { | ||||
| class QueryCacheRuntime { | ||||
| public: | ||||
|     explicit QueryPool(const Device& device, Scheduler& scheduler, VideoCore::QueryType type); | ||||
|     ~QueryPool() override; | ||||
|     explicit QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, | ||||
|                                Core::Memory::Memory& cpu_memory_, | ||||
|                                Vulkan::BufferCache& buffer_cache_, const Device& device_, | ||||
|                                const MemoryAllocator& memory_allocator_, Scheduler& scheduler_, | ||||
|                                StagingBufferPool& staging_pool_, | ||||
|                                ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||||
|                                DescriptorPool& descriptor_pool); | ||||
|     ~QueryCacheRuntime(); | ||||
| 
 | ||||
|     std::pair<VkQueryPool, u32> Commit(); | ||||
|     template <typename SyncValuesType> | ||||
|     void SyncValues(std::span<SyncValuesType> values, VkBuffer base_src_buffer = nullptr); | ||||
| 
 | ||||
|     void Reserve(std::pair<VkQueryPool, u32> query); | ||||
|     void Barriers(bool is_prebarrier); | ||||
| 
 | ||||
| protected: | ||||
|     void Allocate(std::size_t begin, std::size_t end) override; | ||||
|     void EndHostConditionalRendering(); | ||||
| 
 | ||||
|     void PauseHostConditionalRendering(); | ||||
| 
 | ||||
|     void ResumeHostConditionalRendering(); | ||||
| 
 | ||||
|     bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); | ||||
| 
 | ||||
|     bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, | ||||
|                                                VideoCommon::LookupData object_2, bool qc_dirty, | ||||
|                                                bool equal_check); | ||||
| 
 | ||||
|     VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); | ||||
| 
 | ||||
|     void Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d); | ||||
| 
 | ||||
|     template <typename Func> | ||||
|     void View3DRegs(Func&& func); | ||||
| 
 | ||||
| private: | ||||
|     static constexpr std::size_t GROW_STEP = 512; | ||||
| 
 | ||||
|     const Device& device; | ||||
|     const VideoCore::QueryType type; | ||||
| 
 | ||||
|     std::vector<vk::QueryPool> pools; | ||||
|     std::vector<bool> usage; | ||||
|     void HostConditionalRenderingCompareValueImpl(VideoCommon::LookupData object, bool is_equal); | ||||
|     void HostConditionalRenderingCompareBCImpl(VAddr address, bool is_equal); | ||||
|     friend struct QueryCacheRuntimeImpl; | ||||
|     std::unique_ptr<QueryCacheRuntimeImpl> impl; | ||||
| }; | ||||
| 
 | ||||
| class QueryCache final | ||||
|     : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | ||||
| public: | ||||
|     explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, | ||||
|                         Core::Memory::Memory& cpu_memory_, const Device& device_, | ||||
|                         Scheduler& scheduler_); | ||||
|     ~QueryCache(); | ||||
| 
 | ||||
|     std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); | ||||
| 
 | ||||
|     void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); | ||||
| 
 | ||||
|     const Device& GetDevice() const noexcept { | ||||
|         return device; | ||||
|     } | ||||
| 
 | ||||
|     Scheduler& GetScheduler() const noexcept { | ||||
|         return scheduler; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     const Device& device; | ||||
|     Scheduler& scheduler; | ||||
|     std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; | ||||
| struct QueryCacheParams { | ||||
|     using RuntimeType = typename Vulkan::QueryCacheRuntime; | ||||
| }; | ||||
| 
 | ||||
| class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { | ||||
| public: | ||||
|     explicit HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> dependency_, | ||||
|                          VideoCore::QueryType type_); | ||||
|     ~HostCounter(); | ||||
| 
 | ||||
|     void EndQuery(); | ||||
| 
 | ||||
| private: | ||||
|     u64 BlockingQuery(bool async = false) const override; | ||||
| 
 | ||||
|     QueryCache& cache; | ||||
|     const VideoCore::QueryType type; | ||||
|     const std::pair<VkQueryPool, u32> query; | ||||
|     const u64 tick; | ||||
| }; | ||||
| 
 | ||||
| class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { | ||||
| public: | ||||
|     explicit CachedQuery(QueryCache&, VideoCore::QueryType, VAddr cpu_addr_, u8* host_ptr_) | ||||
|         : CachedQueryBase{cpu_addr_, host_ptr_} {} | ||||
| }; | ||||
| using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>; | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -24,6 +24,7 @@ | |||
| #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_query_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||
|  | @ -170,9 +171,11 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
|       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | ||||
|                            guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), | ||||
|       buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | ||||
|       query_cache_runtime(this, cpu_memory_, buffer_cache, device, memory_allocator, scheduler, | ||||
|                           staging_pool, compute_pass_descriptor_queue, descriptor_pool), | ||||
|       query_cache(gpu, *this, cpu_memory_, query_cache_runtime), | ||||
|       pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, | ||||
|                      render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | ||||
|       query_cache{*this, cpu_memory_, device, scheduler}, | ||||
|       accelerate_dma(buffer_cache, texture_cache, scheduler), | ||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | ||||
|       wfi_event(device.GetLogical().CreateEvent()) { | ||||
|  | @ -189,14 +192,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
|     FlushWork(); | ||||
|     gpu_memory->FlushCaching(); | ||||
| 
 | ||||
| #if ANDROID | ||||
|     if (Settings::IsGPULevelHigh()) { | ||||
|         // This is problematic on Android, disable on GPU Normal.
 | ||||
|         query_cache.UpdateCounters(); | ||||
|     } | ||||
| #else | ||||
|     query_cache.UpdateCounters(); | ||||
| #endif | ||||
|     query_cache.NotifySegment(true); | ||||
| 
 | ||||
|     GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; | ||||
|     if (!pipeline) { | ||||
|  | @ -207,13 +203,12 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
|     pipeline->SetEngine(maxwell3d, gpu_memory); | ||||
|     pipeline->Configure(is_indexed); | ||||
| 
 | ||||
|     BeginTransformFeedback(); | ||||
| 
 | ||||
|     UpdateDynamicStates(); | ||||
| 
 | ||||
|     HandleTransformFeedback(); | ||||
|     query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||||
|                               maxwell3d->regs.zpass_pixel_count_enable); | ||||
|     draw_func(); | ||||
| 
 | ||||
|     EndTransformFeedback(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { | ||||
|  | @ -241,6 +236,14 @@ void RasterizerVulkan::DrawIndirect() { | |||
|         const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); | ||||
|         const auto& buffer = indirect_buffer.first; | ||||
|         const auto& offset = indirect_buffer.second; | ||||
|         if (params.is_byte_count) { | ||||
|             scheduler.Record([buffer_obj = buffer->Handle(), offset, | ||||
|                               stride = params.stride](vk::CommandBuffer cmdbuf) { | ||||
|                 cmdbuf.DrawIndirectByteCountEXT(1, 0, buffer_obj, offset, 0, | ||||
|                                                 static_cast<u32>(stride)); | ||||
|             }); | ||||
|             return; | ||||
|         } | ||||
|         if (params.include_count) { | ||||
|             const auto count = buffer_cache.GetDrawIndirectCount(); | ||||
|             const auto& draw_buffer = count.first; | ||||
|  | @ -280,20 +283,15 @@ void RasterizerVulkan::DrawTexture() { | |||
|     SCOPE_EXIT({ gpu.TickWork(); }); | ||||
|     FlushWork(); | ||||
| 
 | ||||
| #if ANDROID | ||||
|     if (Settings::IsGPULevelHigh()) { | ||||
|         // This is problematic on Android, disable on GPU Normal.
 | ||||
|         query_cache.UpdateCounters(); | ||||
|     } | ||||
| #else | ||||
|     query_cache.UpdateCounters(); | ||||
| #endif | ||||
|     query_cache.NotifySegment(true); | ||||
| 
 | ||||
|     texture_cache.SynchronizeGraphicsDescriptors(); | ||||
|     texture_cache.UpdateRenderTargets(false); | ||||
| 
 | ||||
|     UpdateDynamicStates(); | ||||
| 
 | ||||
|     query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||||
|                               maxwell3d->regs.zpass_pixel_count_enable); | ||||
|     const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); | ||||
|     const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); | ||||
|     const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); | ||||
|  | @ -316,14 +314,9 @@ void RasterizerVulkan::Clear(u32 layer_count) { | |||
|     FlushWork(); | ||||
|     gpu_memory->FlushCaching(); | ||||
| 
 | ||||
| #if ANDROID | ||||
|     if (Settings::IsGPULevelHigh()) { | ||||
|         // This is problematic on Android, disable on GPU Normal.
 | ||||
|         query_cache.UpdateCounters(); | ||||
|     } | ||||
| #else | ||||
|     query_cache.UpdateCounters(); | ||||
| #endif | ||||
|     query_cache.NotifySegment(true); | ||||
|     query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, | ||||
|                               maxwell3d->regs.zpass_pixel_count_enable); | ||||
| 
 | ||||
|     auto& regs = maxwell3d->regs; | ||||
|     const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || | ||||
|  | @ -482,13 +475,13 @@ void RasterizerVulkan::DispatchCompute() { | |||
|     scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | ||||
|     query_cache.ResetCounter(type); | ||||
| void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { | ||||
|     query_cache.CounterReset(type); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | ||||
|                              std::optional<u64> timestamp) { | ||||
|     query_cache.Query(gpu_addr, type, timestamp); | ||||
| void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||
|                              VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { | ||||
|     query_cache.CounterReport(gpu_addr, type, flags, payload, subreport); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||
|  | @ -669,8 +662,8 @@ void RasterizerVulkan::SignalReference() { | |||
|     fence_manager.SignalReference(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::ReleaseFences() { | ||||
|     fence_manager.WaitPendingFences(); | ||||
| void RasterizerVulkan::ReleaseFences(bool force) { | ||||
|     fence_manager.WaitPendingFences(force); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, | ||||
|  | @ -694,6 +687,8 @@ void RasterizerVulkan::WaitForIdle() { | |||
|         flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; | ||||
|     } | ||||
| 
 | ||||
|     query_cache.NotifyWFI(); | ||||
| 
 | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.SetEvent(event, flags); | ||||
|  | @ -737,19 +732,7 @@ void RasterizerVulkan::TickFrame() { | |||
| 
 | ||||
| bool RasterizerVulkan::AccelerateConditionalRendering() { | ||||
|     gpu_memory->FlushCaching(); | ||||
|     if (Settings::IsGPULevelHigh()) { | ||||
|         // TODO(Blinkhawk): Reimplement Host conditional rendering.
 | ||||
|         return false; | ||||
|     } | ||||
|     // Medium / Low Hack: stub any checks on queries written into the buffer cache.
 | ||||
|     const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; | ||||
|     Maxwell::ReportSemaphore::Compare cmp; | ||||
|     if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), | ||||
|                                   VideoCommon::CacheType::BufferCache | | ||||
|                                       VideoCommon::CacheType::QueryCache)) { | ||||
|         return true; | ||||
|     } | ||||
|     return false; | ||||
|     return query_cache.AccelerateHostConditionalRendering(); | ||||
| } | ||||
| 
 | ||||
| bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | ||||
|  | @ -795,6 +778,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
|     if (!image_view) { | ||||
|         return false; | ||||
|     } | ||||
|     query_cache.NotifySegment(false); | ||||
|     screen_info.image = image_view->ImageHandle(); | ||||
|     screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); | ||||
|     screen_info.width = image_view->size.width; | ||||
|  | @ -933,31 +917,18 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::BeginTransformFeedback() { | ||||
| void RasterizerVulkan::HandleTransformFeedback() { | ||||
|     const auto& regs = maxwell3d->regs; | ||||
|     if (regs.transform_feedback_enabled == 0) { | ||||
|         return; | ||||
|     } | ||||
|     if (!device.IsExtTransformFeedbackSupported()) { | ||||
|         LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | ||||
|         return; | ||||
|     } | ||||
|     query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, | ||||
|                               regs.transform_feedback_enabled); | ||||
|     if (regs.transform_feedback_enabled != 0) { | ||||
|         UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || | ||||
|                          regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); | ||||
|     scheduler.Record( | ||||
|         [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | ||||
|     } | ||||
| 
 | ||||
| void RasterizerVulkan::EndTransformFeedback() { | ||||
|     const auto& regs = maxwell3d->regs; | ||||
|     if (regs.transform_feedback_enabled == 0) { | ||||
|         return; | ||||
|     } | ||||
|     if (!device.IsExtTransformFeedbackSupported()) { | ||||
|         return; | ||||
|     } | ||||
|     scheduler.Record( | ||||
|         [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | ||||
|  |  | |||
|  | @ -84,8 +84,9 @@ public: | |||
|     void DrawTexture() override; | ||||
|     void Clear(u32 layer_count) override; | ||||
|     void DispatchCompute() override; | ||||
|     void ResetCounter(VideoCore::QueryType type) override; | ||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||
|     void ResetCounter(VideoCommon::QueryType type) override; | ||||
|     void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||
|                VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; | ||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||
|     void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; | ||||
|     void FlushAll() override; | ||||
|  | @ -106,7 +107,7 @@ public: | |||
|     void SyncOperation(std::function<void()>&& func) override; | ||||
|     void SignalSyncPoint(u32 value) override; | ||||
|     void SignalReference() override; | ||||
|     void ReleaseFences() override; | ||||
|     void ReleaseFences(bool force = true) override; | ||||
|     void FlushAndInvalidateRegion( | ||||
|         VAddr addr, u64 size, VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||
|     void WaitForIdle() override; | ||||
|  | @ -146,9 +147,7 @@ private: | |||
| 
 | ||||
|     void UpdateDynamicStates(); | ||||
| 
 | ||||
|     void BeginTransformFeedback(); | ||||
| 
 | ||||
|     void EndTransformFeedback(); | ||||
|     void HandleTransformFeedback(); | ||||
| 
 | ||||
|     void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | ||||
|     void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | ||||
|  | @ -195,8 +194,9 @@ private: | |||
|     TextureCache texture_cache; | ||||
|     BufferCacheRuntime buffer_cache_runtime; | ||||
|     BufferCache buffer_cache; | ||||
|     PipelineCache pipeline_cache; | ||||
|     QueryCacheRuntime query_cache_runtime; | ||||
|     QueryCache query_cache; | ||||
|     PipelineCache pipeline_cache; | ||||
|     AccelerateDMA accelerate_dma; | ||||
|     FenceManager fence_manager; | ||||
| 
 | ||||
|  |  | |||
|  | @ -243,10 +243,10 @@ void Scheduler::AllocateNewContext() { | |||
| #if ANDROID | ||||
|         if (Settings::IsGPULevelHigh()) { | ||||
|             // This is problematic on Android, disable on GPU Normal.
 | ||||
|             query_cache->UpdateCounters(); | ||||
|             query_cache->NotifySegment(true); | ||||
|         } | ||||
| #else | ||||
|         query_cache->UpdateCounters(); | ||||
|         query_cache->NotifySegment(true); | ||||
| #endif | ||||
|     } | ||||
| } | ||||
|  | @ -261,11 +261,12 @@ void Scheduler::EndPendingOperations() { | |||
| #if ANDROID | ||||
|     if (Settings::IsGPULevelHigh()) { | ||||
|         // This is problematic on Android, disable on GPU Normal.
 | ||||
|         query_cache->DisableStreams(); | ||||
|         // query_cache->DisableStreams();
 | ||||
|     } | ||||
| #else | ||||
|     query_cache->DisableStreams(); | ||||
|     // query_cache->DisableStreams();
 | ||||
| #endif | ||||
|     query_cache->NotifySegment(false); | ||||
|     EndRenderPass(); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -17,6 +17,11 @@ | |||
| #include "video_core/renderer_vulkan/vk_master_semaphore.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| template <typename Trait> | ||||
| class QueryCacheBase; | ||||
| } | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| class CommandPool; | ||||
|  | @ -24,7 +29,8 @@ class Device; | |||
| class Framebuffer; | ||||
| class GraphicsPipeline; | ||||
| class StateTracker; | ||||
| class QueryCache; | ||||
| 
 | ||||
| struct QueryCacheParams; | ||||
| 
 | ||||
| /// The scheduler abstracts command buffer and fence management with an interface that's able to do
 | ||||
| /// OpenGL-like operations on Vulkan command buffers.
 | ||||
|  | @ -63,7 +69,7 @@ public: | |||
|     void InvalidateState(); | ||||
| 
 | ||||
|     /// Assigns the query cache.
 | ||||
|     void SetQueryCache(QueryCache& query_cache_) { | ||||
|     void SetQueryCache(VideoCommon::QueryCacheBase<QueryCacheParams>& query_cache_) { | ||||
|         query_cache = &query_cache_; | ||||
|     } | ||||
| 
 | ||||
|  | @ -219,7 +225,7 @@ private: | |||
|     std::unique_ptr<MasterSemaphore> master_semaphore; | ||||
|     std::unique_ptr<CommandPool> command_pool; | ||||
| 
 | ||||
|     QueryCache* query_cache = nullptr; | ||||
|     VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; | ||||
| 
 | ||||
|     vk::CommandBuffer current_cmdbuf; | ||||
| 
 | ||||
|  |  | |||
|  | @ -61,6 +61,7 @@ VK_DEFINE_HANDLE(VmaAllocator) | |||
| 
 | ||||
| // Define miscellaneous extensions which may be used by the implementation here.
 | ||||
| #define FOR_EACH_VK_EXTENSION(EXTENSION)                                                           \ | ||||
|     EXTENSION(EXT, CONDITIONAL_RENDERING, conditional_rendering)                                   \ | ||||
|     EXTENSION(EXT, CONSERVATIVE_RASTERIZATION, conservative_rasterization)                         \ | ||||
|     EXTENSION(EXT, DEPTH_RANGE_UNRESTRICTED, depth_range_unrestricted)                             \ | ||||
|     EXTENSION(EXT, MEMORY_BUDGET, memory_budget)                                                   \ | ||||
|  | @ -93,6 +94,7 @@ VK_DEFINE_HANDLE(VmaAllocator) | |||
| 
 | ||||
| // Define extensions where the absence of the extension may result in a degraded experience.
 | ||||
| #define FOR_EACH_VK_RECOMMENDED_EXTENSION(EXTENSION_NAME)                                          \ | ||||
|     EXTENSION_NAME(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME)                                    \ | ||||
|     EXTENSION_NAME(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)                               \ | ||||
|     EXTENSION_NAME(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME)                                 \ | ||||
|     EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME)                                   \ | ||||
|  | @ -541,6 +543,10 @@ public: | |||
|         return extensions.shader_atomic_int64; | ||||
|     } | ||||
| 
 | ||||
|     bool IsExtConditionalRendering() const { | ||||
|         return extensions.conditional_rendering; | ||||
|     } | ||||
| 
 | ||||
|     bool HasTimelineSemaphore() const; | ||||
| 
 | ||||
|     /// Returns the minimum supported version of SPIR-V.
 | ||||
|  |  | |||
|  | @ -75,6 +75,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
|     X(vkBeginCommandBuffer); | ||||
|     X(vkBindBufferMemory); | ||||
|     X(vkBindImageMemory); | ||||
|     X(vkCmdBeginConditionalRenderingEXT); | ||||
|     X(vkCmdBeginQuery); | ||||
|     X(vkCmdBeginRenderPass); | ||||
|     X(vkCmdBeginTransformFeedbackEXT); | ||||
|  | @ -91,6 +92,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
|     X(vkCmdCopyBufferToImage); | ||||
|     X(vkCmdCopyImage); | ||||
|     X(vkCmdCopyImageToBuffer); | ||||
|     X(vkCmdCopyQueryPoolResults); | ||||
|     X(vkCmdDispatch); | ||||
|     X(vkCmdDispatchIndirect); | ||||
|     X(vkCmdDraw); | ||||
|  | @ -99,6 +101,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
|     X(vkCmdDrawIndexedIndirect); | ||||
|     X(vkCmdDrawIndirectCount); | ||||
|     X(vkCmdDrawIndexedIndirectCount); | ||||
|     X(vkCmdDrawIndirectByteCountEXT); | ||||
|     X(vkCmdEndConditionalRenderingEXT); | ||||
|     X(vkCmdEndQuery); | ||||
|     X(vkCmdEndRenderPass); | ||||
|     X(vkCmdEndTransformFeedbackEXT); | ||||
|  |  | |||
|  | @ -185,6 +185,7 @@ struct DeviceDispatch : InstanceDispatch { | |||
|     PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; | ||||
|     PFN_vkBindBufferMemory vkBindBufferMemory{}; | ||||
|     PFN_vkBindImageMemory vkBindImageMemory{}; | ||||
|     PFN_vkCmdBeginConditionalRenderingEXT vkCmdBeginConditionalRenderingEXT{}; | ||||
|     PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; | ||||
|     PFN_vkCmdBeginQuery vkCmdBeginQuery{}; | ||||
|     PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; | ||||
|  | @ -202,6 +203,7 @@ struct DeviceDispatch : InstanceDispatch { | |||
|     PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage{}; | ||||
|     PFN_vkCmdCopyImage vkCmdCopyImage{}; | ||||
|     PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; | ||||
|     PFN_vkCmdCopyQueryPoolResults vkCmdCopyQueryPoolResults{}; | ||||
|     PFN_vkCmdDispatch vkCmdDispatch{}; | ||||
|     PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{}; | ||||
|     PFN_vkCmdDraw vkCmdDraw{}; | ||||
|  | @ -210,6 +212,8 @@ struct DeviceDispatch : InstanceDispatch { | |||
|     PFN_vkCmdDrawIndexedIndirect vkCmdDrawIndexedIndirect{}; | ||||
|     PFN_vkCmdDrawIndirectCount vkCmdDrawIndirectCount{}; | ||||
|     PFN_vkCmdDrawIndexedIndirectCount vkCmdDrawIndexedIndirectCount{}; | ||||
|     PFN_vkCmdDrawIndirectByteCountEXT vkCmdDrawIndirectByteCountEXT{}; | ||||
|     PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT{}; | ||||
|     PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; | ||||
|     PFN_vkCmdEndQuery vkCmdEndQuery{}; | ||||
|     PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; | ||||
|  | @ -1182,6 +1186,13 @@ public: | |||
|                                            count_offset, draw_count, stride); | ||||
|     } | ||||
| 
 | ||||
|     void DrawIndirectByteCountEXT(u32 instance_count, u32 first_instance, VkBuffer counter_buffer, | ||||
|                                   VkDeviceSize counter_buffer_offset, u32 counter_offset, | ||||
|                                   u32 stride) { | ||||
|         dld->vkCmdDrawIndirectByteCountEXT(handle, instance_count, first_instance, counter_buffer, | ||||
|                                            counter_buffer_offset, counter_offset, stride); | ||||
|     } | ||||
| 
 | ||||
|     void ClearAttachments(Span<VkClearAttachment> attachments, | ||||
|                           Span<VkClearRect> rects) const noexcept { | ||||
|         dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(), | ||||
|  | @ -1270,6 +1281,13 @@ public: | |||
|                                     regions.data()); | ||||
|     } | ||||
| 
 | ||||
|     void CopyQueryPoolResults(VkQueryPool query_pool, u32 first_query, u32 query_count, | ||||
|                               VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize stride, | ||||
|                               VkQueryResultFlags flags) const noexcept { | ||||
|         dld->vkCmdCopyQueryPoolResults(handle, query_pool, first_query, query_count, dst_buffer, | ||||
|                                        dst_offset, stride, flags); | ||||
|     } | ||||
| 
 | ||||
|     void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, | ||||
|                     u32 data) const noexcept { | ||||
|         dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); | ||||
|  | @ -1448,6 +1466,15 @@ public: | |||
|                                           counter_buffers, counter_buffer_offsets); | ||||
|     } | ||||
| 
 | ||||
|     void BeginConditionalRenderingEXT( | ||||
|         const VkConditionalRenderingBeginInfoEXT& info) const noexcept { | ||||
|         dld->vkCmdBeginConditionalRenderingEXT(handle, &info); | ||||
|     } | ||||
| 
 | ||||
|     void EndConditionalRenderingEXT() const noexcept { | ||||
|         dld->vkCmdEndConditionalRenderingEXT(handle); | ||||
|     } | ||||
| 
 | ||||
|     void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept { | ||||
|         const VkDebugUtilsLabelEXT label_info{ | ||||
|             .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 liamwhite
						liamwhite