forked from eden-emu/eden
		
	Merge pull request #5741 from ReinUsesLisp/new-bufcache
video_core: Reimplement the buffer cache
This commit is contained in:
		
						commit
						d557d12c9a
					
				
					 96 changed files with 3425 additions and 3118 deletions
				
			
		|  | @ -471,3 +471,79 @@ TEST_CASE("BufferBase: Unaligned page region query") { | |||
|     REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000)); | ||||
|     REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1)); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("BufferBase: Cached write") { | ||||
|     RasterizerInterface rasterizer; | ||||
|     BufferBase buffer(rasterizer, c, WORD); | ||||
|     buffer.UnmarkRegionAsCpuModified(c, WORD); | ||||
|     buffer.CachedCpuWrite(c + PAGE, PAGE); | ||||
|     REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     buffer.FlushCachedWrites(); | ||||
|     REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     buffer.MarkRegionAsCpuModified(c, WORD); | ||||
|     REQUIRE(rasterizer.Count() == 0); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("BufferBase: Multiple cached write") { | ||||
|     RasterizerInterface rasterizer; | ||||
|     BufferBase buffer(rasterizer, c, WORD); | ||||
|     buffer.UnmarkRegionAsCpuModified(c, WORD); | ||||
|     buffer.CachedCpuWrite(c + PAGE, PAGE); | ||||
|     buffer.CachedCpuWrite(c + PAGE * 3, PAGE); | ||||
|     REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||||
|     buffer.FlushCachedWrites(); | ||||
|     REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||||
|     buffer.MarkRegionAsCpuModified(c, WORD); | ||||
|     REQUIRE(rasterizer.Count() == 0); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("BufferBase: Cached write unmarked") { | ||||
|     RasterizerInterface rasterizer; | ||||
|     BufferBase buffer(rasterizer, c, WORD); | ||||
|     buffer.UnmarkRegionAsCpuModified(c, WORD); | ||||
|     buffer.CachedCpuWrite(c + PAGE, PAGE); | ||||
|     buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||||
|     REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     buffer.FlushCachedWrites(); | ||||
|     REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     buffer.MarkRegionAsCpuModified(c, WORD); | ||||
|     REQUIRE(rasterizer.Count() == 0); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("BufferBase: Cached write iterated") { | ||||
|     RasterizerInterface rasterizer; | ||||
|     BufferBase buffer(rasterizer, c, WORD); | ||||
|     buffer.UnmarkRegionAsCpuModified(c, WORD); | ||||
|     buffer.CachedCpuWrite(c + PAGE, PAGE); | ||||
|     int num = 0; | ||||
|     buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||||
|     REQUIRE(num == 0); | ||||
|     REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     buffer.FlushCachedWrites(); | ||||
|     REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     buffer.MarkRegionAsCpuModified(c, WORD); | ||||
|     REQUIRE(rasterizer.Count() == 0); | ||||
| } | ||||
| 
 | ||||
| TEST_CASE("BufferBase: Cached write downloads") { | ||||
|     RasterizerInterface rasterizer; | ||||
|     BufferBase buffer(rasterizer, c, WORD); | ||||
|     buffer.UnmarkRegionAsCpuModified(c, WORD); | ||||
|     REQUIRE(rasterizer.Count() == 64); | ||||
|     buffer.CachedCpuWrite(c + PAGE, PAGE); | ||||
|     REQUIRE(rasterizer.Count() == 63); | ||||
|     buffer.MarkRegionAsGpuModified(c + PAGE, PAGE); | ||||
|     int num = 0; | ||||
|     buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||||
|     buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||||
|     REQUIRE(num == 0); | ||||
|     REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | ||||
|     buffer.FlushCachedWrites(); | ||||
|     REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||||
|     REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | ||||
|     buffer.MarkRegionAsCpuModified(c, WORD); | ||||
|     REQUIRE(rasterizer.Count() == 0); | ||||
| } | ||||
|  |  | |||
|  | @ -2,10 +2,8 @@ add_subdirectory(host_shaders) | |||
| 
 | ||||
| add_library(video_core STATIC | ||||
|     buffer_cache/buffer_base.h | ||||
|     buffer_cache/buffer_block.h | ||||
|     buffer_cache/buffer_cache.cpp | ||||
|     buffer_cache/buffer_cache.h | ||||
|     buffer_cache/map_interval.cpp | ||||
|     buffer_cache/map_interval.h | ||||
|     cdma_pusher.cpp | ||||
|     cdma_pusher.h | ||||
|     command_classes/codecs/codec.cpp | ||||
|  | @ -152,8 +150,6 @@ add_library(video_core STATIC | |||
|     renderer_vulkan/vk_staging_buffer_pool.h | ||||
|     renderer_vulkan/vk_state_tracker.cpp | ||||
|     renderer_vulkan/vk_state_tracker.h | ||||
|     renderer_vulkan/vk_stream_buffer.cpp | ||||
|     renderer_vulkan/vk_stream_buffer.h | ||||
|     renderer_vulkan/vk_swapchain.cpp | ||||
|     renderer_vulkan/vk_swapchain.h | ||||
|     renderer_vulkan/vk_texture_cache.cpp | ||||
|  |  | |||
|  | @ -19,6 +19,7 @@ namespace VideoCommon { | |||
| 
 | ||||
| enum class BufferFlagBits { | ||||
|     Picked = 1 << 0, | ||||
|     CachedWrites = 1 << 1, | ||||
| }; | ||||
| DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) | ||||
| 
 | ||||
|  | @ -40,7 +41,7 @@ class BufferBase { | |||
|     static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; | ||||
| 
 | ||||
|     /// Vector tracking modified pages tightly packed with small vector optimization
 | ||||
|     union WrittenWords { | ||||
|     union WordsArray { | ||||
|         /// Returns the pointer to the words state
 | ||||
|         [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { | ||||
|             return is_short ? &stack : heap; | ||||
|  | @ -55,49 +56,59 @@ class BufferBase { | |||
|         u64* heap;     ///< Not-small buffers pointer to the storage
 | ||||
|     }; | ||||
| 
 | ||||
|     struct GpuCpuWords { | ||||
|         explicit GpuCpuWords() = default; | ||||
|         explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { | ||||
|     struct Words { | ||||
|         explicit Words() = default; | ||||
|         explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { | ||||
|             if (IsShort()) { | ||||
|                 cpu.stack = ~u64{0}; | ||||
|                 gpu.stack = 0; | ||||
|                 cached_cpu.stack = 0; | ||||
|                 untracked.stack = ~u64{0}; | ||||
|             } else { | ||||
|                 // Share allocation between CPU and GPU pages and set their default values
 | ||||
|                 const size_t num_words = NumWords(); | ||||
|                 u64* const alloc = new u64[num_words * 2]; | ||||
|                 u64* const alloc = new u64[num_words * 4]; | ||||
|                 cpu.heap = alloc; | ||||
|                 gpu.heap = alloc + num_words; | ||||
|                 cached_cpu.heap = alloc + num_words * 2; | ||||
|                 untracked.heap = alloc + num_words * 3; | ||||
|                 std::fill_n(cpu.heap, num_words, ~u64{0}); | ||||
|                 std::fill_n(gpu.heap, num_words, 0); | ||||
|                 std::fill_n(cached_cpu.heap, num_words, 0); | ||||
|                 std::fill_n(untracked.heap, num_words, ~u64{0}); | ||||
|             } | ||||
|             // Clean up tailing bits
 | ||||
|             const u64 last_local_page = | ||||
|                 Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); | ||||
|             const u64 last_word_size = size_bytes % BYTES_PER_WORD; | ||||
|             const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); | ||||
|             const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; | ||||
|             u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; | ||||
|             last_word = (last_word << shift) >> shift; | ||||
|             const u64 last_word = (~u64{0} << shift) >> shift; | ||||
|             cpu.Pointer(IsShort())[NumWords() - 1] = last_word; | ||||
|             untracked.Pointer(IsShort())[NumWords() - 1] = last_word; | ||||
|         } | ||||
| 
 | ||||
|         ~GpuCpuWords() { | ||||
|         ~Words() { | ||||
|             Release(); | ||||
|         } | ||||
| 
 | ||||
|         GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { | ||||
|         Words& operator=(Words&& rhs) noexcept { | ||||
|             Release(); | ||||
|             size_bytes = rhs.size_bytes; | ||||
|             cpu = rhs.cpu; | ||||
|             gpu = rhs.gpu; | ||||
|             cached_cpu = rhs.cached_cpu; | ||||
|             untracked = rhs.untracked; | ||||
|             rhs.cpu.heap = nullptr; | ||||
|             return *this; | ||||
|         } | ||||
| 
 | ||||
|         GpuCpuWords(GpuCpuWords&& rhs) noexcept | ||||
|             : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { | ||||
|         Words(Words&& rhs) noexcept | ||||
|             : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu}, | ||||
|               cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { | ||||
|             rhs.cpu.heap = nullptr; | ||||
|         } | ||||
| 
 | ||||
|         GpuCpuWords& operator=(const GpuCpuWords&) = delete; | ||||
|         GpuCpuWords(const GpuCpuWords&) = delete; | ||||
|         Words& operator=(const Words&) = delete; | ||||
|         Words(const Words&) = delete; | ||||
| 
 | ||||
|         /// Returns true when the buffer fits in the small vector optimization
 | ||||
|         [[nodiscard]] bool IsShort() const noexcept { | ||||
|  | @ -118,8 +129,17 @@ class BufferBase { | |||
|         } | ||||
| 
 | ||||
|         u64 size_bytes = 0; | ||||
|         WrittenWords cpu; | ||||
|         WrittenWords gpu; | ||||
|         WordsArray cpu; | ||||
|         WordsArray gpu; | ||||
|         WordsArray cached_cpu; | ||||
|         WordsArray untracked; | ||||
|     }; | ||||
| 
 | ||||
|     enum class Type { | ||||
|         CPU, | ||||
|         GPU, | ||||
|         CachedCPU, | ||||
|         Untracked, | ||||
|     }; | ||||
| 
 | ||||
| public: | ||||
|  | @ -132,68 +152,93 @@ public: | |||
|     BufferBase& operator=(const BufferBase&) = delete; | ||||
|     BufferBase(const BufferBase&) = delete; | ||||
| 
 | ||||
|     BufferBase& operator=(BufferBase&&) = default; | ||||
|     BufferBase(BufferBase&&) = default; | ||||
| 
 | ||||
|     /// Returns the inclusive CPU modified range in a begin end pair
 | ||||
|     [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, | ||||
|                                                         u64 query_size) const noexcept { | ||||
|         const u64 offset = query_cpu_addr - cpu_addr; | ||||
|         return ModifiedRegion<false>(offset, query_size); | ||||
|         return ModifiedRegion<Type::CPU>(offset, query_size); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the inclusive GPU modified range in a begin end pair
 | ||||
|     [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, | ||||
|                                                         u64 query_size) const noexcept { | ||||
|         const u64 offset = query_cpu_addr - cpu_addr; | ||||
|         return ModifiedRegion<true>(offset, query_size); | ||||
|         return ModifiedRegion<Type::GPU>(offset, query_size); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns true if a region has been modified from the CPU
 | ||||
|     [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | ||||
|         const u64 offset = query_cpu_addr - cpu_addr; | ||||
|         return IsRegionModified<false>(offset, query_size); | ||||
|         return IsRegionModified<Type::CPU>(offset, query_size); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns true if a region has been modified from the GPU
 | ||||
|     [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | ||||
|         const u64 offset = query_cpu_addr - cpu_addr; | ||||
|         return IsRegionModified<true>(offset, query_size); | ||||
|         return IsRegionModified<Type::GPU>(offset, query_size); | ||||
|     } | ||||
| 
 | ||||
|     /// Mark region as CPU modified, notifying the rasterizer about this change
 | ||||
|     void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | ||||
|         ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size); | ||||
|         ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size); | ||||
|     } | ||||
| 
 | ||||
|     /// Unmark region as CPU modified, notifying the rasterizer about this change
 | ||||
|     void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | ||||
|         ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size); | ||||
|         ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size); | ||||
|     } | ||||
| 
 | ||||
|     /// Mark region as modified from the host GPU
 | ||||
|     void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | ||||
|         ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size); | ||||
|         ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size); | ||||
|     } | ||||
| 
 | ||||
|     /// Unmark region as modified from the host GPU
 | ||||
|     void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | ||||
|         ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size); | ||||
|         ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size); | ||||
|     } | ||||
| 
 | ||||
|     /// Mark region as modified from the CPU
 | ||||
|     /// but don't mark it as modified until FlusHCachedWrites is called.
 | ||||
|     void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) { | ||||
|         flags |= BufferFlagBits::CachedWrites; | ||||
|         ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size); | ||||
|     } | ||||
| 
 | ||||
|     /// Flushes cached CPU writes, and notify the rasterizer about the deltas
 | ||||
|     void FlushCachedWrites() noexcept { | ||||
|         flags &= ~BufferFlagBits::CachedWrites; | ||||
|         const u64 num_words = NumWords(); | ||||
|         const u64* const cached_words = Array<Type::CachedCPU>(); | ||||
|         u64* const untracked_words = Array<Type::Untracked>(); | ||||
|         u64* const cpu_words = Array<Type::CPU>(); | ||||
|         for (u64 word_index = 0; word_index < num_words; ++word_index) { | ||||
|             const u64 cached_bits = cached_words[word_index]; | ||||
|             NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); | ||||
|             untracked_words[word_index] |= cached_bits; | ||||
|             cpu_words[word_index] |= cached_bits; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
 | ||||
|     template <typename Func> | ||||
|     void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { | ||||
|         ForEachModifiedRange<false, true>(query_cpu_range, size, func); | ||||
|         ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func); | ||||
|     } | ||||
| 
 | ||||
|     /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
 | ||||
|     template <typename Func> | ||||
|     void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { | ||||
|         ForEachModifiedRange<true, false>(query_cpu_range, size, func); | ||||
|         ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func); | ||||
|     } | ||||
| 
 | ||||
|     /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
 | ||||
|     template <typename Func> | ||||
|     void ForEachDownloadRange(Func&& func) { | ||||
|         ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func); | ||||
|         ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func); | ||||
|     } | ||||
| 
 | ||||
|     /// Mark buffer as picked
 | ||||
|  | @ -206,6 +251,16 @@ public: | |||
|         flags &= ~BufferFlagBits::Picked; | ||||
|     } | ||||
| 
 | ||||
|     /// Increases the likeliness of this being a stream buffer
 | ||||
|     void IncreaseStreamScore(int score) noexcept { | ||||
|         stream_score += score; | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the likeliness of this being a stream buffer
 | ||||
|     [[nodiscard]] int StreamScore() const noexcept { | ||||
|         return stream_score; | ||||
|     } | ||||
| 
 | ||||
|     /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
 | ||||
|     [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { | ||||
|         return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); | ||||
|  | @ -216,6 +271,11 @@ public: | |||
|         return True(flags & BufferFlagBits::Picked); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns true when the buffer has pending cached writes
 | ||||
|     [[nodiscard]] bool HasCachedWrites() const noexcept { | ||||
|         return True(flags & BufferFlagBits::CachedWrites); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the base CPU address of the buffer
 | ||||
|     [[nodiscard]] VAddr CpuAddr() const noexcept { | ||||
|         return cpu_addr; | ||||
|  | @ -233,26 +293,48 @@ public: | |||
|     } | ||||
| 
 | ||||
| private: | ||||
|     template <Type type> | ||||
|     u64* Array() noexcept { | ||||
|         if constexpr (type == Type::CPU) { | ||||
|             return words.cpu.Pointer(IsShort()); | ||||
|         } else if constexpr (type == Type::GPU) { | ||||
|             return words.gpu.Pointer(IsShort()); | ||||
|         } else if constexpr (type == Type::CachedCPU) { | ||||
|             return words.cached_cpu.Pointer(IsShort()); | ||||
|         } else if constexpr (type == Type::Untracked) { | ||||
|             return words.untracked.Pointer(IsShort()); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     template <Type type> | ||||
|     const u64* Array() const noexcept { | ||||
|         if constexpr (type == Type::CPU) { | ||||
|             return words.cpu.Pointer(IsShort()); | ||||
|         } else if constexpr (type == Type::GPU) { | ||||
|             return words.gpu.Pointer(IsShort()); | ||||
|         } else if constexpr (type == Type::CachedCPU) { | ||||
|             return words.cached_cpu.Pointer(IsShort()); | ||||
|         } else if constexpr (type == Type::Untracked) { | ||||
|             return words.untracked.Pointer(IsShort()); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /**
 | ||||
|      * Change the state of a range of pages | ||||
|      * | ||||
|      * @param written_words Pages to be marked or unmarked as modified | ||||
|      * @param dirty_addr    Base address to mark or unmark as modified | ||||
|      * @param size          Size in bytes to mark or unmark as modified | ||||
|      * | ||||
|      * @tparam enable            True when the bits will be set to one, false for zero | ||||
|      * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes | ||||
|      */ | ||||
|     template <bool enable, bool notify_rasterizer> | ||||
|     void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, | ||||
|                            s64 size) noexcept(!notify_rasterizer) { | ||||
|     template <Type type, bool enable> | ||||
|     void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) { | ||||
|         const s64 difference = dirty_addr - cpu_addr; | ||||
|         const u64 offset = std::max<s64>(difference, 0); | ||||
|         size += std::min<s64>(difference, 0); | ||||
|         if (offset >= SizeBytes() || size < 0) { | ||||
|             return; | ||||
|         } | ||||
|         u64* const state_words = written_words.Pointer(IsShort()); | ||||
|         u64* const untracked_words = Array<Type::Untracked>(); | ||||
|         u64* const state_words = Array<type>(); | ||||
|         const u64 offset_end = std::min(offset + size, SizeBytes()); | ||||
|         const u64 begin_page_index = offset / BYTES_PER_PAGE; | ||||
|         const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; | ||||
|  | @ -268,13 +350,19 @@ private: | |||
|             u64 bits = ~u64{0}; | ||||
|             bits = (bits >> right_offset) << right_offset; | ||||
|             bits = (bits << left_offset) >> left_offset; | ||||
|             if constexpr (notify_rasterizer) { | ||||
|                 NotifyRasterizer<!enable>(word_index, state_words[word_index], bits); | ||||
|             if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||||
|                 NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits); | ||||
|             } | ||||
|             if constexpr (enable) { | ||||
|                 state_words[word_index] |= bits; | ||||
|                 if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||||
|                     untracked_words[word_index] |= bits; | ||||
|                 } | ||||
|             } else { | ||||
|                 state_words[word_index] &= ~bits; | ||||
|                 if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||||
|                     untracked_words[word_index] &= ~bits; | ||||
|                 } | ||||
|             } | ||||
|             page_index = 0; | ||||
|             ++word_index; | ||||
|  | @ -291,7 +379,7 @@ private: | |||
|      * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | ||||
|      */ | ||||
|     template <bool add_to_rasterizer> | ||||
|     void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { | ||||
|     void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | ||||
|         u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | ||||
|         VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | ||||
|         while (changed_bits != 0) { | ||||
|  | @ -315,21 +403,20 @@ private: | |||
|      * @param query_cpu_range Base CPU address to loop over | ||||
|      * @param size            Size in bytes of the CPU range to loop over | ||||
|      * @param func            Function to call for each turned off region | ||||
|      * | ||||
|      * @tparam gpu               True for host GPU pages, false for CPU pages | ||||
|      * @tparam notify_rasterizer True when the rasterizer should be notified about state changes | ||||
|      */ | ||||
|     template <bool gpu, bool notify_rasterizer, typename Func> | ||||
|     template <Type type, typename Func> | ||||
|     void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { | ||||
|         static_assert(type != Type::Untracked); | ||||
| 
 | ||||
|         const s64 difference = query_cpu_range - cpu_addr; | ||||
|         const u64 query_begin = std::max<s64>(difference, 0); | ||||
|         size += std::min<s64>(difference, 0); | ||||
|         if (query_begin >= SizeBytes() || size < 0) { | ||||
|             return; | ||||
|         } | ||||
|         const u64* const cpu_words = words.cpu.Pointer(IsShort()); | ||||
|         u64* const untracked_words = Array<Type::Untracked>(); | ||||
|         u64* const state_words = Array<type>(); | ||||
|         const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); | ||||
|         u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); | ||||
|         u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; | ||||
|         u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); | ||||
| 
 | ||||
|  | @ -345,7 +432,8 @@ private: | |||
|         const u64 word_index_end = std::distance(state_words, last_modified_word); | ||||
| 
 | ||||
|         const unsigned local_page_begin = std::countr_zero(*first_modified_word); | ||||
|         const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); | ||||
|         const unsigned local_page_end = | ||||
|             static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]); | ||||
|         const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; | ||||
|         const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; | ||||
|         const u64 query_page_begin = query_begin / BYTES_PER_PAGE; | ||||
|  | @ -371,11 +459,13 @@ private: | |||
|             const u64 current_word = state_words[word_index] & bits; | ||||
|             state_words[word_index] &= ~bits; | ||||
| 
 | ||||
|             // Exclude CPU modified pages when visiting GPU pages
 | ||||
|             const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); | ||||
|             if constexpr (notify_rasterizer) { | ||||
|                 NotifyRasterizer<true>(word_index, word, ~u64{0}); | ||||
|             if constexpr (type == Type::CPU) { | ||||
|                 const u64 current_bits = untracked_words[word_index] & bits; | ||||
|                 untracked_words[word_index] &= ~bits; | ||||
|                 NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); | ||||
|             } | ||||
|             // Exclude CPU modified pages when visiting GPU pages
 | ||||
|             const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); | ||||
|             u64 page = page_begin; | ||||
|             page_begin = 0; | ||||
| 
 | ||||
|  | @ -416,17 +506,20 @@ private: | |||
|      * @param offset Offset in bytes from the start of the buffer | ||||
|      * @param size   Size in bytes of the region to query for modifications | ||||
|      */ | ||||
|     template <bool gpu> | ||||
|     template <Type type> | ||||
|     [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | ||||
|         const u64* const cpu_words = words.cpu.Pointer(IsShort()); | ||||
|         const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); | ||||
|         static_assert(type != Type::Untracked); | ||||
| 
 | ||||
|         const u64* const untracked_words = Array<Type::Untracked>(); | ||||
|         const u64* const state_words = Array<type>(); | ||||
|         const u64 num_query_words = size / BYTES_PER_WORD + 1; | ||||
|         const u64 word_begin = offset / BYTES_PER_WORD; | ||||
|         const u64 word_end = std::min(word_begin + num_query_words, NumWords()); | ||||
|         const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | ||||
|         u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; | ||||
|         for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { | ||||
|             const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); | ||||
|             const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | ||||
|             const u64 word = state_words[word_index] & ~off_word; | ||||
|             if (word == 0) { | ||||
|                 continue; | ||||
|             } | ||||
|  | @ -445,13 +538,13 @@ private: | |||
|      * | ||||
|      * @param offset Offset in bytes from the start of the buffer | ||||
|      * @param size   Size in bytes of the region to query for modifications | ||||
|      * | ||||
|      * @tparam gpu True to query GPU modified pages, false for CPU pages | ||||
|      */ | ||||
|     template <bool gpu> | ||||
|     template <Type type> | ||||
|     [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | ||||
|         const u64* const cpu_words = words.cpu.Pointer(IsShort()); | ||||
|         const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); | ||||
|         static_assert(type != Type::Untracked); | ||||
| 
 | ||||
|         const u64* const untracked_words = Array<Type::Untracked>(); | ||||
|         const u64* const state_words = Array<type>(); | ||||
|         const u64 num_query_words = size / BYTES_PER_WORD + 1; | ||||
|         const u64 word_begin = offset / BYTES_PER_WORD; | ||||
|         const u64 word_end = std::min(word_begin + num_query_words, NumWords()); | ||||
|  | @ -460,7 +553,8 @@ private: | |||
|         u64 begin = std::numeric_limits<u64>::max(); | ||||
|         u64 end = 0; | ||||
|         for (u64 word_index = word_begin; word_index < word_end; ++word_index) { | ||||
|             const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); | ||||
|             const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | ||||
|             const u64 word = state_words[word_index] & ~off_word; | ||||
|             if (word == 0) { | ||||
|                 continue; | ||||
|             } | ||||
|  | @ -488,8 +582,9 @@ private: | |||
| 
 | ||||
|     RasterizerInterface* rasterizer = nullptr; | ||||
|     VAddr cpu_addr = 0; | ||||
|     GpuCpuWords words; | ||||
|     Words words; | ||||
|     BufferFlagBits flags{}; | ||||
|     int stream_score = 0; | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
|  |  | |||
|  | @ -1,62 +0,0 @@ | |||
| // Copyright 2019 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| class BufferBlock { | ||||
| public: | ||||
|     [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const { | ||||
|         return (cpu_addr < end) && (cpu_addr_end > start); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const { | ||||
|         return cpu_addr <= other_start && other_end <= cpu_addr_end; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] std::size_t Offset(VAddr in_addr) const { | ||||
|         return static_cast<std::size_t>(in_addr - cpu_addr); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] VAddr CpuAddr() const { | ||||
|         return cpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] VAddr CpuAddrEnd() const { | ||||
|         return cpu_addr_end; | ||||
|     } | ||||
| 
 | ||||
|     void SetCpuAddr(VAddr new_addr) { | ||||
|         cpu_addr = new_addr; | ||||
|         cpu_addr_end = new_addr + size; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] std::size_t Size() const { | ||||
|         return size; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] u64 Epoch() const { | ||||
|         return epoch; | ||||
|     } | ||||
| 
 | ||||
|     void SetEpoch(u64 new_epoch) { | ||||
|         epoch = new_epoch; | ||||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} { | ||||
|         SetCpuAddr(cpu_addr_); | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     VAddr cpu_addr{}; | ||||
|     VAddr cpu_addr_end{}; | ||||
|     std::size_t size{}; | ||||
|     u64 epoch{}; | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
							
								
								
									
										13
									
								
								src/video_core/buffer_cache/buffer_cache.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								src/video_core/buffer_cache/buffer_cache.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,13 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/microprofile.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128)); | ||||
| MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128)); | ||||
| MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128)); | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -1,33 +0,0 @@ | |||
| // Copyright 2020 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <array> | ||||
| #include <cstddef> | ||||
| #include <memory> | ||||
| 
 | ||||
| #include "video_core/buffer_cache/map_interval.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| MapIntervalAllocator::MapIntervalAllocator() { | ||||
|     FillFreeList(first_chunk); | ||||
| } | ||||
| 
 | ||||
| MapIntervalAllocator::~MapIntervalAllocator() = default; | ||||
| 
 | ||||
| void MapIntervalAllocator::AllocateNewChunk() { | ||||
|     *new_chunk = std::make_unique<Chunk>(); | ||||
|     FillFreeList(**new_chunk); | ||||
|     new_chunk = &(*new_chunk)->next; | ||||
| } | ||||
| 
 | ||||
| void MapIntervalAllocator::FillFreeList(Chunk& chunk) { | ||||
|     const std::size_t old_size = free_list.size(); | ||||
|     free_list.resize(old_size + chunk.data.size()); | ||||
|     std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size, | ||||
|                    [](MapInterval& interval) { return &interval; }); | ||||
| } | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
|  | @ -1,93 +0,0 @@ | |||
| // Copyright 2019 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <cstddef> | ||||
| #include <memory> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include <boost/intrusive/set_hook.hpp> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/gpu.h" | ||||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> { | ||||
|     MapInterval() = default; | ||||
| 
 | ||||
|     /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} | ||||
| 
 | ||||
|     explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept | ||||
|         : start{start_}, end{end_}, gpu_addr{gpu_addr_} {} | ||||
| 
 | ||||
|     bool IsInside(VAddr other_start, VAddr other_end) const noexcept { | ||||
|         return start <= other_start && other_end <= end; | ||||
|     } | ||||
| 
 | ||||
|     bool Overlaps(VAddr other_start, VAddr other_end) const noexcept { | ||||
|         return start < other_end && other_start < end; | ||||
|     } | ||||
| 
 | ||||
|     void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { | ||||
|         is_modified = is_modified_; | ||||
|         ticks = ticks_; | ||||
|     } | ||||
| 
 | ||||
|     boost::intrusive::set_member_hook<> member_hook_; | ||||
|     VAddr start = 0; | ||||
|     VAddr end = 0; | ||||
|     GPUVAddr gpu_addr = 0; | ||||
|     u64 ticks = 0; | ||||
|     bool is_written = false; | ||||
|     bool is_modified = false; | ||||
|     bool is_registered = false; | ||||
|     bool is_memory_marked = false; | ||||
|     bool is_sync_pending = false; | ||||
| }; | ||||
| 
 | ||||
| struct MapIntervalCompare { | ||||
|     constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept { | ||||
|         return lhs.start < rhs.start; | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| class MapIntervalAllocator { | ||||
| public: | ||||
|     MapIntervalAllocator(); | ||||
|     ~MapIntervalAllocator(); | ||||
| 
 | ||||
|     MapInterval* Allocate() { | ||||
|         if (free_list.empty()) { | ||||
|             AllocateNewChunk(); | ||||
|         } | ||||
|         MapInterval* const interval = free_list.back(); | ||||
|         free_list.pop_back(); | ||||
|         return interval; | ||||
|     } | ||||
| 
 | ||||
|     void Release(MapInterval* interval) { | ||||
|         free_list.push_back(interval); | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     struct Chunk { | ||||
|         std::unique_ptr<Chunk> next; | ||||
|         std::array<MapInterval, 0x8000> data; | ||||
|     }; | ||||
| 
 | ||||
|     void AllocateNewChunk(); | ||||
| 
 | ||||
|     void FillFreeList(Chunk& chunk); | ||||
| 
 | ||||
|     std::vector<MapInterval*> free_list; | ||||
| 
 | ||||
|     Chunk first_chunk; | ||||
| 
 | ||||
|     std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon
 | ||||
|  | @ -110,12 +110,10 @@ void Vic::Execute() { | |||
|                                            converted_frame_buffer.get(), block_height, 0, 0); | ||||
| 
 | ||||
|             gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); | ||||
|             gpu.Maxwell3D().OnMemoryWrite(); | ||||
|         } else { | ||||
|             // send pitch linear frame
 | ||||
|             gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | ||||
|                                            linear_size); | ||||
|             gpu.Maxwell3D().OnMemoryWrite(); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|  | @ -163,7 +161,6 @@ void Vic::Execute() { | |||
|         } | ||||
|         gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), | ||||
|                                        chroma_buffer.size()); | ||||
|         gpu.Maxwell3D().OnMemoryWrite(); | ||||
|         break; | ||||
|     } | ||||
|     default: | ||||
|  |  | |||
|  | @ -12,13 +12,30 @@ | |||
| #define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32))) | ||||
| 
 | ||||
| namespace VideoCommon::Dirty { | ||||
| 
 | ||||
| namespace { | ||||
| using Tegra::Engines::Maxwell3D; | ||||
| 
 | ||||
| void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { | ||||
| void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) { | ||||
|     static constexpr std::size_t num_array = 3; | ||||
|     for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) { | ||||
|         const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); | ||||
|         const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); | ||||
| 
 | ||||
|         FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); | ||||
|         FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) { | ||||
|     FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer); | ||||
| } | ||||
| 
 | ||||
| void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) { | ||||
|     FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); | ||||
|     FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); | ||||
| } | ||||
| 
 | ||||
| void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { | ||||
|     static constexpr std::size_t num_per_rt = NUM(rt[0]); | ||||
|     static constexpr std::size_t begin = OFF(rt); | ||||
|     static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; | ||||
|  | @ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl | |||
|         FillBlock(table, OFF(zeta), NUM(zeta), flag); | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { | ||||
|     SetupDirtyVertexBuffers(tables); | ||||
|     SetupIndexBuffer(tables); | ||||
|     SetupDirtyDescriptors(tables); | ||||
|     SetupDirtyRenderTargets(tables); | ||||
| } | ||||
| 
 | ||||
| } // namespace VideoCommon::Dirty
 | ||||
|  |  | |||
|  | @ -30,6 +30,12 @@ enum : u8 { | |||
|     ColorBuffer7, | ||||
|     ZetaBuffer, | ||||
| 
 | ||||
|     VertexBuffers, | ||||
|     VertexBuffer0, | ||||
|     VertexBuffer31 = VertexBuffer0 + 31, | ||||
| 
 | ||||
|     IndexBuffer, | ||||
| 
 | ||||
|     LastCommonEntry, | ||||
| }; | ||||
| 
 | ||||
|  | @ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_ | |||
|     FillBlock(tables[1], begin, num, index_b); | ||||
| } | ||||
| 
 | ||||
| void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); | ||||
| void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); | ||||
| 
 | ||||
| } // namespace VideoCommon::Dirty
 | ||||
|  |  | |||
|  | @ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() { | |||
|     MICROPROFILE_SCOPE(DispatchCalls); | ||||
| 
 | ||||
|     gpu.SyncGuestHost(); | ||||
|     // On entering GPU code, assume all memory may be touched by the ARM core.
 | ||||
|     gpu.Maxwell3D().OnMemoryWrite(); | ||||
| 
 | ||||
|     dma_pushbuffer_subindex = 0; | ||||
| 
 | ||||
|  |  | |||
|  | @ -18,8 +18,8 @@ Fermi2D::Fermi2D() { | |||
| 
 | ||||
| Fermi2D::~Fermi2D() = default; | ||||
| 
 | ||||
| void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { | ||||
|     rasterizer = &rasterizer_; | ||||
| void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||
|     rasterizer = rasterizer_; | ||||
| } | ||||
| 
 | ||||
| void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||
|  |  | |||
|  | @ -38,7 +38,7 @@ public: | |||
|     ~Fermi2D(); | ||||
| 
 | ||||
|     /// Binds a rasterizer to this engine.
 | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||
| 
 | ||||
|     /// Write the value to the register identified by method.
 | ||||
|     void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; | ||||
|  |  | |||
|  | @ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage | |||
| 
 | ||||
| KeplerCompute::~KeplerCompute() = default; | ||||
| 
 | ||||
| void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { | ||||
|     rasterizer = &rasterizer_; | ||||
| void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||
|     rasterizer = rasterizer_; | ||||
| } | ||||
| 
 | ||||
| void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||
|  | @ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal | |||
|     case KEPLER_COMPUTE_REG_INDEX(data_upload): { | ||||
|         upload_state.ProcessData(method_argument, is_last_call); | ||||
|         if (is_last_call) { | ||||
|             system.GPU().Maxwell3D().OnMemoryWrite(); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|  |  | |||
|  | @ -46,7 +46,7 @@ public: | |||
|     ~KeplerCompute(); | ||||
| 
 | ||||
|     /// Binds a rasterizer to this engine.
 | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||
| 
 | ||||
|     static constexpr std::size_t NumConstBuffers = 8; | ||||
| 
 | ||||
|  |  | |||
|  | @ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call | |||
|     case KEPLERMEMORY_REG_INDEX(data): { | ||||
|         upload_state.ProcessData(method_argument, is_last_call); | ||||
|         if (is_last_call) { | ||||
|             system.GPU().Maxwell3D().OnMemoryWrite(); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|  |  | |||
|  | @ -30,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) | |||
| 
 | ||||
| Maxwell3D::~Maxwell3D() = default; | ||||
| 
 | ||||
| void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { | ||||
|     rasterizer = &rasterizer_; | ||||
| void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||
|     rasterizer = rasterizer_; | ||||
| } | ||||
| 
 | ||||
| void Maxwell3D::InitializeRegisterDefaults() { | ||||
|  | @ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
|     case MAXWELL3D_REG_INDEX(data_upload): | ||||
|         upload_state.ProcessData(argument, is_last_call); | ||||
|         if (is_last_call) { | ||||
|             OnMemoryWrite(); | ||||
|         } | ||||
|         return; | ||||
|     case MAXWELL3D_REG_INDEX(fragment_barrier): | ||||
|  | @ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void Maxwell3D::ProcessCBBind(std::size_t stage_index) { | ||||
| void Maxwell3D::ProcessCBBind(size_t stage_index) { | ||||
|     // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
 | ||||
|     auto& shader = state.shader_stages[stage_index]; | ||||
|     auto& bind_data = regs.cb_bind[stage_index]; | ||||
| 
 | ||||
|     ASSERT(bind_data.index < Regs::MaxConstBuffers); | ||||
|     auto& buffer = shader.const_buffers[bind_data.index]; | ||||
| 
 | ||||
|     const auto& bind_data = regs.cb_bind[stage_index]; | ||||
|     auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index]; | ||||
|     buffer.enabled = bind_data.valid.Value() != 0; | ||||
|     buffer.address = regs.const_buffer.BufferAddress(); | ||||
|     buffer.size = regs.const_buffer.cb_size; | ||||
| 
 | ||||
|     const bool is_enabled = bind_data.valid.Value() != 0; | ||||
|     const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0; | ||||
|     const u32 size = is_enabled ? regs.const_buffer.cb_size : 0; | ||||
|     rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size); | ||||
| } | ||||
| 
 | ||||
| void Maxwell3D::ProcessCBData(u32 value) { | ||||
|  | @ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() { | |||
| 
 | ||||
|     const u32 id = cb_data_state.id; | ||||
|     memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); | ||||
|     OnMemoryWrite(); | ||||
| 
 | ||||
|     cb_data_state.id = null_cb_data; | ||||
|     cb_data_state.current = null_cb_data; | ||||
|  |  | |||
|  | @ -55,7 +55,7 @@ public: | |||
|     ~Maxwell3D(); | ||||
| 
 | ||||
|     /// Binds a rasterizer to this engine.
 | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||
| 
 | ||||
|     /// Register structure of the Maxwell3D engine.
 | ||||
|     /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
 | ||||
|  | @ -1314,8 +1314,7 @@ public: | |||
| 
 | ||||
|                     GPUVAddr LimitAddress() const { | ||||
|                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) | | ||||
|                                                      limit_low) + | ||||
|                                1; | ||||
|                                                      limit_low); | ||||
|                     } | ||||
|                 } vertex_array_limit[NumVertexArrays]; | ||||
| 
 | ||||
|  | @ -1403,6 +1402,7 @@ public: | |||
|         }; | ||||
| 
 | ||||
|         std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; | ||||
| 
 | ||||
|         u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering.
 | ||||
|     }; | ||||
| 
 | ||||
|  | @ -1452,11 +1452,6 @@ public: | |||
|         return *rasterizer; | ||||
|     } | ||||
| 
 | ||||
|     /// Notify a memory write has happened.
 | ||||
|     void OnMemoryWrite() { | ||||
|         dirty.flags |= dirty.on_write_stores; | ||||
|     } | ||||
| 
 | ||||
|     enum class MMEDrawMode : u32 { | ||||
|         Undefined, | ||||
|         Array, | ||||
|  | @ -1478,7 +1473,6 @@ public: | |||
|         using Tables = std::array<Table, 2>; | ||||
| 
 | ||||
|         Flags flags; | ||||
|         Flags on_write_stores; | ||||
|         Tables tables{}; | ||||
|     } dirty; | ||||
| 
 | ||||
|  | @ -1541,7 +1535,7 @@ private: | |||
|     void FinishCBData(); | ||||
| 
 | ||||
|     /// Handles a write to the CB_BIND register.
 | ||||
|     void ProcessCBBind(std::size_t stage_index); | ||||
|     void ProcessCBBind(size_t stage_index); | ||||
| 
 | ||||
|     /// Handles a write to the VERTEX_END_GL register, triggering a draw.
 | ||||
|     void DrawArrays(); | ||||
|  |  | |||
|  | @ -60,9 +60,6 @@ void MaxwellDMA::Launch() { | |||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     // All copies here update the main memory, so mark all rasterizer states as invalid.
 | ||||
|     system.GPU().Maxwell3D().OnMemoryWrite(); | ||||
| 
 | ||||
|     if (is_src_pitch && is_dst_pitch) { | ||||
|         CopyPitchToPitch(); | ||||
|     } else { | ||||
|  |  | |||
|  | @ -143,22 +143,26 @@ private: | |||
|     } | ||||
| 
 | ||||
|     bool ShouldWait() const { | ||||
|         std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|         return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || | ||||
|                query_cache.ShouldWaitAsyncFlushes(); | ||||
|     } | ||||
| 
 | ||||
|     bool ShouldFlush() const { | ||||
|         std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|         return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || | ||||
|                query_cache.HasUncommittedFlushes(); | ||||
|     } | ||||
| 
 | ||||
|     void PopAsyncFlushes() { | ||||
|         std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|         texture_cache.PopAsyncFlushes(); | ||||
|         buffer_cache.PopAsyncFlushes(); | ||||
|         query_cache.PopAsyncFlushes(); | ||||
|     } | ||||
| 
 | ||||
|     void CommitAsyncFlushes() { | ||||
|         std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|         texture_cache.CommitAsyncFlushes(); | ||||
|         buffer_cache.CommitAsyncFlushes(); | ||||
|         query_cache.CommitAsyncFlushes(); | ||||
|  |  | |||
|  | @ -44,8 +44,8 @@ GPU::~GPU() = default; | |||
| 
 | ||||
| void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | ||||
|     renderer = std::move(renderer_); | ||||
|     rasterizer = renderer->ReadRasterizer(); | ||||
| 
 | ||||
|     VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer(); | ||||
|     memory_manager->BindRasterizer(rasterizer); | ||||
|     maxwell_3d->BindRasterizer(rasterizer); | ||||
|     fermi_2d->BindRasterizer(rasterizer); | ||||
|  | @ -171,7 +171,7 @@ void GPU::TickWork() { | |||
|         const std::size_t size = request.size; | ||||
|         flush_requests.pop_front(); | ||||
|         flush_request_mutex.unlock(); | ||||
|         renderer->Rasterizer().FlushRegion(addr, size); | ||||
|         rasterizer->FlushRegion(addr, size); | ||||
|         current_flush_fence.store(fence); | ||||
|         flush_request_mutex.lock(); | ||||
|     } | ||||
|  | @ -193,11 +193,11 @@ u64 GPU::GetTicks() const { | |||
| } | ||||
| 
 | ||||
| void GPU::FlushCommands() { | ||||
|     renderer->Rasterizer().FlushCommands(); | ||||
|     rasterizer->FlushCommands(); | ||||
| } | ||||
| 
 | ||||
| void GPU::SyncGuestHost() { | ||||
|     renderer->Rasterizer().SyncGuestHost(); | ||||
|     rasterizer->SyncGuestHost(); | ||||
| } | ||||
| 
 | ||||
| enum class GpuSemaphoreOperation { | ||||
|  |  | |||
|  | @ -366,6 +366,7 @@ protected: | |||
|     std::unique_ptr<Tegra::DmaPusher> dma_pusher; | ||||
|     std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; | ||||
|     std::unique_ptr<VideoCore::RendererBase> renderer; | ||||
|     VideoCore::RasterizerInterface* rasterizer = nullptr; | ||||
|     const bool use_nvdec; | ||||
| 
 | ||||
| private: | ||||
|  |  | |||
|  | @ -38,6 +38,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
|     } | ||||
| 
 | ||||
|     auto current_context = context.Acquire(); | ||||
|     VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer(); | ||||
| 
 | ||||
|     CommandDataContainer next; | ||||
|     while (state.is_running) { | ||||
|  | @ -52,13 +53,13 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
|         } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { | ||||
|             renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | ||||
|         } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { | ||||
|             renderer.Rasterizer().ReleaseFences(); | ||||
|             rasterizer->ReleaseFences(); | ||||
|         } else if (std::holds_alternative<GPUTickCommand>(next.data)) { | ||||
|             system.GPU().TickWork(); | ||||
|         } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { | ||||
|             renderer.Rasterizer().FlushRegion(flush->addr, flush->size); | ||||
|             rasterizer->FlushRegion(flush->addr, flush->size); | ||||
|         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | ||||
|             renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size); | ||||
|             rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); | ||||
|         } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | ||||
|             return; | ||||
|         } else { | ||||
|  | @ -84,6 +85,7 @@ ThreadManager::~ThreadManager() { | |||
| void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | ||||
|                                 Core::Frontend::GraphicsContext& context, | ||||
|                                 Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) { | ||||
|     rasterizer = renderer.ReadRasterizer(); | ||||
|     thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), | ||||
|                          std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher)); | ||||
| } | ||||
|  | @ -129,12 +131,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | |||
| } | ||||
| 
 | ||||
| void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||||
|     system.Renderer().Rasterizer().OnCPUWrite(addr, size); | ||||
|     rasterizer->OnCPUWrite(addr, size); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 | ||||
|     system.Renderer().Rasterizer().OnCPUWrite(addr, size); | ||||
|     rasterizer->OnCPUWrite(addr, size); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::WaitIdle() const { | ||||
|  |  | |||
|  | @ -27,6 +27,7 @@ class System; | |||
| } // namespace Core
 | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| class RasterizerInterface; | ||||
| class RendererBase; | ||||
| } // namespace VideoCore
 | ||||
| 
 | ||||
|  | @ -151,11 +152,12 @@ private: | |||
|     /// Pushes a command to be executed by the GPU thread
 | ||||
|     u64 PushCommand(CommandData&& command_data); | ||||
| 
 | ||||
|     SynchState state; | ||||
|     Core::System& system; | ||||
|     std::thread thread; | ||||
|     std::thread::id thread_id; | ||||
|     const bool is_async; | ||||
|     VideoCore::RasterizerInterface* rasterizer = nullptr; | ||||
| 
 | ||||
|     SynchState state; | ||||
|     std::thread thread; | ||||
| }; | ||||
| 
 | ||||
| } // namespace VideoCommon::GPUThread
 | ||||
|  |  | |||
|  | @ -12,7 +12,6 @@ set(SHADER_FILES | |||
|     vulkan_blit_depth_stencil.frag | ||||
|     vulkan_present.frag | ||||
|     vulkan_present.vert | ||||
|     vulkan_quad_array.comp | ||||
|     vulkan_quad_indexed.comp | ||||
|     vulkan_uint8.comp | ||||
| ) | ||||
|  |  | |||
|  | @ -1,28 +0,0 @@ | |||
| // Copyright 2019 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
| 
 | ||||
| #version 460 core | ||||
| 
 | ||||
| layout (local_size_x = 1024) in; | ||||
| 
 | ||||
| layout (std430, set = 0, binding = 0) buffer OutputBuffer { | ||||
|     uint output_indexes[]; | ||||
| }; | ||||
| 
 | ||||
| layout (push_constant) uniform PushConstants { | ||||
|     uint first; | ||||
| }; | ||||
| 
 | ||||
| void main() { | ||||
|     uint primitive = gl_GlobalInvocationID.x; | ||||
|     if (primitive * 6 >= output_indexes.length()) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); | ||||
|     for (uint vertex = 0; vertex < 6; ++vertex) { | ||||
|         uint index = first + primitive * 4 + quad_map[vertex]; | ||||
|         output_indexes[primitive * 6 + vertex] = index; | ||||
|     } | ||||
| } | ||||
|  | @ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | |||
|     uint16_t output_indexes[]; | ||||
| }; | ||||
| 
 | ||||
| uint AssembleIndex(uint id) { | ||||
|     // Most primitive restart indices are 0xFF | ||||
|     // Hardcode this to 0xFF for now | ||||
|     uint index = uint(input_indexes[id]); | ||||
|     return index == 0xFF ? 0xFFFF : index; | ||||
| } | ||||
| 
 | ||||
| void main() { | ||||
|     uint id = gl_GlobalInvocationID.x; | ||||
|     if (id < input_indexes.length()) { | ||||
|         output_indexes[id] = uint16_t(input_indexes[id]); | ||||
|         output_indexes[id] = uint16_t(AssembleIndex(id)); | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -21,8 +21,8 @@ MemoryManager::MemoryManager(Core::System& system_) | |||
| 
 | ||||
| MemoryManager::~MemoryManager() = default; | ||||
| 
 | ||||
| void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { | ||||
|     rasterizer = &rasterizer_; | ||||
| void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||
|     rasterizer = rasterizer_; | ||||
| } | ||||
| 
 | ||||
| GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { | ||||
|  |  | |||
|  | @ -72,7 +72,7 @@ public: | |||
|     ~MemoryManager(); | ||||
| 
 | ||||
|     /// Binds a renderer to the memory manager.
 | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); | ||||
|     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||
| 
 | ||||
|     [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; | ||||
| 
 | ||||
|  | @ -157,6 +157,8 @@ private: | |||
| 
 | ||||
|     using MapRange = std::pair<GPUVAddr, size_t>; | ||||
|     std::vector<MapRange> map_ranges; | ||||
| 
 | ||||
|     std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Tegra
 | ||||
|  |  | |||
|  | @ -7,6 +7,7 @@ | |||
| #include <atomic> | ||||
| #include <functional> | ||||
| #include <optional> | ||||
| #include <span> | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/engines/fermi_2d.h" | ||||
| #include "video_core/gpu.h" | ||||
|  | @ -49,6 +50,10 @@ public: | |||
|     /// Records a GPU query and caches it
 | ||||
|     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | ||||
| 
 | ||||
|     /// Signal an uniform buffer binding
 | ||||
|     virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||
|                                            u32 size) = 0; | ||||
| 
 | ||||
|     /// Signal a GPU based semaphore as a fence
 | ||||
|     virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; | ||||
| 
 | ||||
|  |  | |||
|  | @ -37,15 +37,11 @@ public: | |||
|                           std::unique_ptr<Core::Frontend::GraphicsContext> context); | ||||
|     virtual ~RendererBase(); | ||||
| 
 | ||||
|     /// Initialize the renderer
 | ||||
|     [[nodiscard]] virtual bool Init() = 0; | ||||
| 
 | ||||
|     /// Shutdown the renderer
 | ||||
|     virtual void ShutDown() = 0; | ||||
| 
 | ||||
|     /// Finalize rendering the guest frame and draw into the presentation texture
 | ||||
|     virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; | ||||
| 
 | ||||
|     [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; | ||||
| 
 | ||||
|     // Getter/setter functions:
 | ||||
|     // ------------------------
 | ||||
| 
 | ||||
|  | @ -57,14 +53,6 @@ public: | |||
|         return m_current_frame; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] RasterizerInterface& Rasterizer() { | ||||
|         return *rasterizer; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] const RasterizerInterface& Rasterizer() const { | ||||
|         return *rasterizer; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] Core::Frontend::GraphicsContext& Context() { | ||||
|         return *context; | ||||
|     } | ||||
|  | @ -98,7 +86,6 @@ public: | |||
| 
 | ||||
| protected: | ||||
|     Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
 | ||||
|     std::unique_ptr<RasterizerInterface> rasterizer; | ||||
|     std::unique_ptr<Core::Frontend::GraphicsContext> context; | ||||
|     f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
 | ||||
|     int m_current_frame = 0;  ///< Current frame, should be set by the renderer
 | ||||
|  |  | |||
|  | @ -2,98 +2,208 @@ | |||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <memory> | ||||
| #include <span> | ||||
| 
 | ||||
| #include <glad/glad.h> | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "video_core/buffer_cache/buffer_cache.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| namespace { | ||||
| struct BindlessSSBO { | ||||
|     GLuint64EXT address; | ||||
|     GLsizei length; | ||||
|     GLsizei padding; | ||||
| }; | ||||
| static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4); | ||||
| 
 | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
| constexpr std::array PROGRAM_LUT{ | ||||
|     GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, | ||||
|     GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | ||||
| }; | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | ||||
| Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | ||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | ||||
| 
 | ||||
| Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_) | ||||
|     : BufferBlock{cpu_addr_, size_} { | ||||
|     gl_buffer.Create(); | ||||
|     glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW); | ||||
|     if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) { | ||||
|         glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); | ||||
|         glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); | ||||
| Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||||
|                VAddr cpu_addr_, u64 size_bytes_) | ||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | ||||
|     buffer.Create(); | ||||
|     const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | ||||
|     glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); | ||||
|     glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); | ||||
| 
 | ||||
|     if (runtime.has_unified_vertex_buffers) { | ||||
|         glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| Buffer::~Buffer() = default; | ||||
| 
 | ||||
| void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { | ||||
|     glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), | ||||
|                          static_cast<GLsizeiptr>(data_size), data); | ||||
| void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { | ||||
|     glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||||
|                          static_cast<GLsizeiptr>(data.size_bytes()), data.data()); | ||||
| } | ||||
| 
 | ||||
| void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | ||||
|     const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size); | ||||
|     const GLintptr gl_offset = static_cast<GLintptr>(offset); | ||||
|     if (read_buffer.handle == 0) { | ||||
|         read_buffer.Create(); | ||||
|         glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr, | ||||
|                           GL_STREAM_READ); | ||||
|     } | ||||
|     glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||||
|     glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size); | ||||
|     glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data); | ||||
| void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept { | ||||
|     glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||||
|                             static_cast<GLsizeiptr>(data.size_bytes()), data.data()); | ||||
| } | ||||
| 
 | ||||
| void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | ||||
|                       std::size_t copy_size) { | ||||
|     glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), | ||||
|                              static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size)); | ||||
| } | ||||
| 
 | ||||
| OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||||
|                                Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||||
|                                const Device& device_, OGLStreamBuffer& stream_buffer_, | ||||
|                                StateTracker& state_tracker) | ||||
|     : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} { | ||||
|     if (!device.HasFastBufferSubData()) { | ||||
| void Buffer::MakeResident(GLenum access) noexcept { | ||||
|     // Abuse GLenum's order to exit early
 | ||||
|     // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE
 | ||||
|     if (access <= current_residency_access || buffer.handle == 0) { | ||||
|         return; | ||||
|     } | ||||
|     if (std::exchange(current_residency_access, access) != GL_NONE) { | ||||
|         // If the buffer is already resident, remove its residency before promoting it
 | ||||
|         glMakeNamedBufferNonResidentNV(buffer.handle); | ||||
|     } | ||||
|     glMakeNamedBufferResidentNV(buffer.handle, access); | ||||
| } | ||||
| 
 | ||||
|     static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); | ||||
|     glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | ||||
|     for (const GLuint cbuf : cbufs) { | ||||
|         glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); | ||||
| BufferCacheRuntime::BufferCacheRuntime(const Device& device_) | ||||
|     : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | ||||
|       use_assembly_shaders{device.UseAssemblyShaders()}, | ||||
|       has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, | ||||
|       stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { | ||||
|     GLint gl_max_attributes; | ||||
|     glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); | ||||
|     max_attributes = static_cast<u32>(gl_max_attributes); | ||||
|     for (auto& stage_uniforms : fast_uniforms) { | ||||
|         for (OGLBuffer& buffer : stage_uniforms) { | ||||
|             buffer.Create(); | ||||
|             glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); | ||||
|         } | ||||
|     } | ||||
|     for (auto& stage_uniforms : copy_uniforms) { | ||||
|         for (OGLBuffer& buffer : stage_uniforms) { | ||||
|             buffer.Create(); | ||||
|             glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||||
|         } | ||||
|     } | ||||
|     for (OGLBuffer& buffer : copy_compute_uniforms) { | ||||
|         buffer.Create(); | ||||
|         glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| OGLBufferCache::~OGLBufferCache() { | ||||
|     glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | ||||
| void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | ||||
|                                     std::span<const VideoCommon::BufferCopy> copies) { | ||||
|     for (const VideoCommon::BufferCopy& copy : copies) { | ||||
|         glCopyNamedBufferSubData( | ||||
|             src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset), | ||||
|             static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | ||||
|     return std::make_shared<Buffer>(device, cpu_addr, size); | ||||
| void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { | ||||
|     if (has_unified_vertex_buffers) { | ||||
|         buffer.MakeResident(GL_READ_ONLY); | ||||
|         glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, | ||||
|                                static_cast<GLsizeiptr>(size)); | ||||
|     } else { | ||||
|         glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); | ||||
|         index_buffer_offset = offset; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { | ||||
|     return {0, 0, 0}; | ||||
| void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, | ||||
|                                           u32 stride) { | ||||
|     if (index >= max_attributes) { | ||||
|         return; | ||||
|     } | ||||
|     if (has_unified_vertex_buffers) { | ||||
|         buffer.MakeResident(GL_READ_ONLY); | ||||
|         glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride)); | ||||
|         glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index, | ||||
|                                buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size)); | ||||
|     } else { | ||||
|         glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), | ||||
|                            static_cast<GLsizei>(stride)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, | ||||
|                                                              std::size_t size) { | ||||
|     DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); | ||||
|     const GLuint cbuf = cbufs[cbuf_cursor++]; | ||||
| void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, | ||||
|                                            u32 offset, u32 size) { | ||||
|     if (use_assembly_shaders) { | ||||
|         GLuint handle; | ||||
|         if (offset != 0) { | ||||
|             handle = copy_uniforms[stage][binding_index].handle; | ||||
|             glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); | ||||
|         } else { | ||||
|             handle = buffer.Handle(); | ||||
|         } | ||||
|         glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, | ||||
|                             static_cast<GLsizeiptr>(size)); | ||||
|     } else { | ||||
|         const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||||
|         const GLuint binding = base_binding + binding_index; | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), | ||||
|                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|     glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); | ||||
|     return {cbuf, 0, 0}; | ||||
| void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, | ||||
|                                                   u32 size) { | ||||
|     if (use_assembly_shaders) { | ||||
|         GLuint handle; | ||||
|         if (offset != 0) { | ||||
|             handle = copy_compute_uniforms[binding_index].handle; | ||||
|             glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); | ||||
|         } else { | ||||
|             handle = buffer.Handle(); | ||||
|         } | ||||
|         glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0, | ||||
|                             static_cast<GLsizeiptr>(size)); | ||||
|     } else { | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(), | ||||
|                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, | ||||
|                                            u32 offset, u32 size, bool is_written) { | ||||
|     if (use_assembly_shaders) { | ||||
|         const BindlessSSBO ssbo{ | ||||
|             .address = buffer.HostGpuAddr() + offset, | ||||
|             .length = static_cast<GLsizei>(size), | ||||
|             .padding = 0, | ||||
|         }; | ||||
|         buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | ||||
|         glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, | ||||
|                                         reinterpret_cast<const GLuint*>(&ssbo)); | ||||
|     } else { | ||||
|         const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; | ||||
|         const GLuint binding = base_binding + binding_index; | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), | ||||
|                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, | ||||
|                                                   u32 size, bool is_written) { | ||||
|     if (use_assembly_shaders) { | ||||
|         const BindlessSSBO ssbo{ | ||||
|             .address = buffer.HostGpuAddr() + offset, | ||||
|             .length = static_cast<GLsizei>(size), | ||||
|             .padding = 0, | ||||
|         }; | ||||
|         buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | ||||
|         glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, | ||||
|                                         reinterpret_cast<const GLuint*>(&ssbo)); | ||||
|     } else if (size == 0) { | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); | ||||
|     } else { | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), | ||||
|                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, | ||||
|                                                      u32 size) { | ||||
|     glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(), | ||||
|                       static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -5,79 +5,157 @@ | |||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <memory> | ||||
| #include <span> | ||||
| 
 | ||||
| #include "common/alignment.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/dynamic_library.h" | ||||
| #include "video_core/buffer_cache/buffer_cache.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||||
| 
 | ||||
| namespace Core { | ||||
| class System; | ||||
| } | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| class Device; | ||||
| class OGLStreamBuffer; | ||||
| class RasterizerOpenGL; | ||||
| class StateTracker; | ||||
| class BufferCacheRuntime; | ||||
| 
 | ||||
| class Buffer : public VideoCommon::BufferBlock { | ||||
| class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { | ||||
| public: | ||||
|     explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_); | ||||
|     ~Buffer(); | ||||
|     explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, | ||||
|                     u64 size_bytes); | ||||
|     explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); | ||||
| 
 | ||||
|     void Upload(std::size_t offset, std::size_t data_size, const u8* data); | ||||
|     void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept; | ||||
| 
 | ||||
|     void Download(std::size_t offset, std::size_t data_size, u8* data); | ||||
|     void ImmediateDownload(size_t offset, std::span<u8> data) noexcept; | ||||
| 
 | ||||
|     void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | ||||
|                   std::size_t copy_size); | ||||
|     void MakeResident(GLenum access) noexcept; | ||||
| 
 | ||||
|     GLuint Handle() const noexcept { | ||||
|         return gl_buffer.handle; | ||||
|     [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { | ||||
|         return address; | ||||
|     } | ||||
| 
 | ||||
|     u64 Address() const noexcept { | ||||
|         return gpu_address; | ||||
|     [[nodiscard]] GLuint Handle() const noexcept { | ||||
|         return buffer.handle; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     OGLBuffer gl_buffer; | ||||
|     OGLBuffer read_buffer; | ||||
|     u64 gpu_address = 0; | ||||
|     GLuint64EXT address = 0; | ||||
|     OGLBuffer buffer; | ||||
|     GLenum current_residency_access = GL_NONE; | ||||
| }; | ||||
| 
 | ||||
| using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | ||||
| class OGLBufferCache final : public GenericBufferCache { | ||||
| class BufferCacheRuntime { | ||||
|     friend Buffer; | ||||
| 
 | ||||
| public: | ||||
|     explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, | ||||
|                             Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, | ||||
|                             const Device& device, OGLStreamBuffer& stream_buffer, | ||||
|                             StateTracker& state_tracker); | ||||
|     ~OGLBufferCache(); | ||||
|     static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); | ||||
| 
 | ||||
|     BufferInfo GetEmptyBuffer(std::size_t) override; | ||||
|     explicit BufferCacheRuntime(const Device& device_); | ||||
| 
 | ||||
|     void Acquire() noexcept { | ||||
|         cbuf_cursor = 0; | ||||
|     void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | ||||
|                     std::span<const VideoCommon::BufferCopy> copies); | ||||
| 
 | ||||
|     void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); | ||||
| 
 | ||||
|     void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride); | ||||
| 
 | ||||
|     void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size); | ||||
| 
 | ||||
|     void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size); | ||||
| 
 | ||||
|     void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, | ||||
|                            bool is_written); | ||||
| 
 | ||||
|     void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, | ||||
|                                   bool is_written); | ||||
| 
 | ||||
|     void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); | ||||
| 
 | ||||
|     void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { | ||||
|         if (use_assembly_shaders) { | ||||
|             const GLuint handle = fast_uniforms[stage][binding_index].handle; | ||||
|             const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); | ||||
|             glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); | ||||
|         } else { | ||||
|             const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||||
|             const GLuint binding = base_binding + binding_index; | ||||
|             glBindBufferRange(GL_UNIFORM_BUFFER, binding, | ||||
|                               fast_uniforms[stage][binding_index].handle, 0, | ||||
|                               static_cast<GLsizeiptr>(size)); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| protected: | ||||
|     std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | ||||
|     void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) { | ||||
|         if (use_assembly_shaders) { | ||||
|             glProgramBufferParametersIuivNV( | ||||
|                 PABO_LUT[stage], binding_index, 0, | ||||
|                 static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)), | ||||
|                 reinterpret_cast<const GLuint*>(data.data())); | ||||
|         } else { | ||||
|             glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0, | ||||
|                                  static_cast<GLsizeiptr>(data.size_bytes()), data.data()); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; | ||||
|     std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { | ||||
|         const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); | ||||
|         const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||||
|         const GLuint binding = base_binding + binding_index; | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), | ||||
|                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||
|         return mapped_span; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] const GLvoid* IndexOffset() const noexcept { | ||||
|         return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset)); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] bool HasFastBufferSubData() const noexcept { | ||||
|         return has_fast_buffer_sub_data; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | ||||
|                                              Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | ||||
|     static constexpr std::array PABO_LUT{ | ||||
|         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|         GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|         GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|     }; | ||||
| 
 | ||||
|     const Device& device; | ||||
| 
 | ||||
|     std::size_t cbuf_cursor = 0; | ||||
|     std::array<GLuint, NUM_CBUFS> cbufs{}; | ||||
|     bool has_fast_buffer_sub_data = false; | ||||
|     bool use_assembly_shaders = false; | ||||
|     bool has_unified_vertex_buffers = false; | ||||
| 
 | ||||
|     u32 max_attributes = 0; | ||||
| 
 | ||||
|     std::optional<StreamBuffer> stream_buffer; | ||||
| 
 | ||||
|     std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | ||||
|                VideoCommon::NUM_STAGES> | ||||
|         fast_uniforms; | ||||
|     std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | ||||
|                VideoCommon::NUM_STAGES> | ||||
|         copy_uniforms; | ||||
|     std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms; | ||||
| 
 | ||||
|     u32 index_buffer_offset = 0; | ||||
| }; | ||||
| 
 | ||||
| struct BufferCacheParams { | ||||
|     using Runtime = OpenGL::BufferCacheRuntime; | ||||
|     using Buffer = OpenGL::Buffer; | ||||
| 
 | ||||
|     static constexpr bool IS_OPENGL = true; | ||||
|     static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; | ||||
|     static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; | ||||
|     static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; | ||||
|     static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | ||||
|     static constexpr bool USE_MEMORY_MAPS = false; | ||||
| }; | ||||
| 
 | ||||
| using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -21,9 +21,7 @@ | |||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| // One uniform block is reserved for emulation purposes
 | ||||
| constexpr u32 ReservedUniformBlocks = 1; | ||||
| 
 | ||||
|  | @ -197,11 +195,13 @@ bool IsASTCSupported() { | |||
|     const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | ||||
|     return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); | ||||
| } | ||||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| Device::Device() | ||||
|     : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { | ||||
| Device::Device() { | ||||
|     if (!GLAD_GL_VERSION_4_6) { | ||||
|         LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available"); | ||||
|         throw std::runtime_error{"Insufficient version"}; | ||||
|     } | ||||
|     const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); | ||||
|     const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); | ||||
|     const std::vector extensions = GetExtensions(); | ||||
|  | @ -217,6 +217,9 @@ Device::Device() | |||
|             "Beta driver 443.24 is known to have issues. There might be performance issues."); | ||||
|         disable_fast_buffer_sub_data = true; | ||||
|     } | ||||
| 
 | ||||
|     max_uniform_buffers = BuildMaxUniformBuffers(); | ||||
|     base_bindings = BuildBaseBindings(); | ||||
|     uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | ||||
|     shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | ||||
|     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | ||||
|  |  | |||
|  | @ -10,11 +10,9 @@ | |||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| static constexpr u32 EmulationUniformBlockBinding = 0; | ||||
| 
 | ||||
| class Device final { | ||||
| class Device { | ||||
| public: | ||||
|     struct BaseBindings final { | ||||
|     struct BaseBindings { | ||||
|         u32 uniform_buffer{}; | ||||
|         u32 shader_storage_buffer{}; | ||||
|         u32 sampler{}; | ||||
|  |  | |||
|  | @ -47,7 +47,7 @@ void GLInnerFence::Wait() { | |||
| 
 | ||||
| FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, | ||||
|                                        Tegra::GPU& gpu_, TextureCache& texture_cache_, | ||||
|                                        OGLBufferCache& buffer_cache_, QueryCache& query_cache_) | ||||
|                                        BufferCache& buffer_cache_, QueryCache& query_cache_) | ||||
|     : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} | ||||
| 
 | ||||
| Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { | ||||
|  |  | |||
|  | @ -32,14 +32,13 @@ private: | |||
| }; | ||||
| 
 | ||||
| using Fence = std::shared_ptr<GLInnerFence>; | ||||
| using GenericFenceManager = | ||||
|     VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>; | ||||
| using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; | ||||
| 
 | ||||
| class FenceManagerOpenGL final : public GenericFenceManager { | ||||
| public: | ||||
|     explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | ||||
|                                 TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, | ||||
|                                 QueryCache& query_cache_); | ||||
|     explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, | ||||
|                                 TextureCache& texture_cache, BufferCache& buffer_cache, | ||||
|                                 QueryCache& query_cache); | ||||
| 
 | ||||
| protected: | ||||
|     Fence CreateFence(u32 value, bool is_stubbed) override; | ||||
|  |  | |||
|  | @ -44,28 +44,14 @@ using VideoCore::Surface::PixelFormat; | |||
| using VideoCore::Surface::SurfaceTarget; | ||||
| using VideoCore::Surface::SurfaceType; | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | ||||
| MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); | ||||
| MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18; | ||||
| constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = | ||||
|     NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; | ||||
| constexpr size_t TOTAL_CONST_BUFFER_BYTES = | ||||
|     NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; | ||||
| 
 | ||||
| constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | ||||
| constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; | ||||
| 
 | ||||
| struct TextureHandle { | ||||
|     constexpr TextureHandle(u32 data, bool via_header_index) { | ||||
|  | @ -101,20 +87,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const | |||
|     return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||||
| } | ||||
| 
 | ||||
| std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | ||||
|                                const ConstBufferEntry& entry) { | ||||
|     if (!entry.IsIndirect()) { | ||||
|         return entry.GetSize(); | ||||
|     } | ||||
|     if (buffer.size > Maxwell::MaxConstBufferSize) { | ||||
|         LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, | ||||
|                     Maxwell::MaxConstBufferSize); | ||||
|         return Maxwell::MaxConstBufferSize; | ||||
|     } | ||||
| 
 | ||||
|     return buffer.size; | ||||
| } | ||||
| 
 | ||||
| /// Translates hardware transform feedback indices
 | ||||
| /// @param location Hardware location
 | ||||
| /// @return Pair of ARB_transform_feedback3 token stream first and third arguments
 | ||||
|  | @ -147,14 +119,6 @@ void oglEnable(GLenum cap, bool state) { | |||
|     (state ? glEnable : glDisable)(cap); | ||||
| } | ||||
| 
 | ||||
| void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) { | ||||
|     if (num_ssbos == 0) { | ||||
|         return; | ||||
|     } | ||||
|     glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos), | ||||
|                                     reinterpret_cast<const GLuint*>(ssbos)); | ||||
| } | ||||
| 
 | ||||
| ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||||
|     if (entry.is_buffer) { | ||||
|         return ImageViewType::Buffer; | ||||
|  | @ -201,44 +165,28 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
|     : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), | ||||
|       kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), | ||||
|       screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), | ||||
|       stream_buffer(device, state_tracker), | ||||
|       texture_cache_runtime(device, program_manager, state_tracker), | ||||
|       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | ||||
|       buffer_cache_runtime(device), | ||||
|       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||||
|       shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | ||||
|       query_cache(*this, maxwell3d, gpu_memory), | ||||
|       buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker), | ||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | ||||
|       async_shaders(emu_window_) { | ||||
|     unified_uniform_buffer.Create(); | ||||
|     glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); | ||||
| 
 | ||||
|     if (device.UseAssemblyShaders()) { | ||||
|         glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||||
|         for (const GLuint cbuf : staging_cbufs) { | ||||
|             glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize), | ||||
|                                  nullptr, 0); | ||||
|         } | ||||
|     } | ||||
|     if (device.UseAsynchronousShaders()) { | ||||
|         async_shaders.AllocateWorkers(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| RasterizerOpenGL::~RasterizerOpenGL() { | ||||
|     if (device.UseAssemblyShaders()) { | ||||
|         glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||||
|     } | ||||
| } | ||||
| RasterizerOpenGL::~RasterizerOpenGL() = default; | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupVertexFormat() { | ||||
| void RasterizerOpenGL::SyncVertexFormats() { | ||||
|     auto& flags = maxwell3d.dirty.flags; | ||||
|     if (!flags[Dirty::VertexFormats]) { | ||||
|         return; | ||||
|     } | ||||
|     flags[Dirty::VertexFormats] = false; | ||||
| 
 | ||||
|     MICROPROFILE_SCOPE(OpenGL_VAO); | ||||
| 
 | ||||
|     // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
 | ||||
|     // the first 16 vertex attributes always, as we don't know which ones are actually used until
 | ||||
|     // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
 | ||||
|  | @ -274,55 +222,7 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupVertexBuffer() { | ||||
|     auto& flags = maxwell3d.dirty.flags; | ||||
|     if (!flags[Dirty::VertexBuffers]) { | ||||
|         return; | ||||
|     } | ||||
|     flags[Dirty::VertexBuffers] = false; | ||||
| 
 | ||||
|     MICROPROFILE_SCOPE(OpenGL_VB); | ||||
| 
 | ||||
|     const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); | ||||
| 
 | ||||
|     // Upload all guest vertex arrays sequentially to our buffer
 | ||||
|     const auto& regs = maxwell3d.regs; | ||||
|     for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { | ||||
|         if (!flags[Dirty::VertexBuffer0 + index]) { | ||||
|             continue; | ||||
|         } | ||||
|         flags[Dirty::VertexBuffer0 + index] = false; | ||||
| 
 | ||||
|         const auto& vertex_array = regs.vertex_array[index]; | ||||
|         if (!vertex_array.IsEnabled()) { | ||||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         const GPUVAddr start = vertex_array.StartAddress(); | ||||
|         const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||||
|         ASSERT(end >= start); | ||||
| 
 | ||||
|         const GLuint gl_index = static_cast<GLuint>(index); | ||||
|         const u64 size = end - start; | ||||
|         if (size == 0) { | ||||
|             glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); | ||||
|             if (use_unified_memory) { | ||||
|                 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); | ||||
|             } | ||||
|             continue; | ||||
|         } | ||||
|         const auto info = buffer_cache.UploadMemory(start, size); | ||||
|         if (use_unified_memory) { | ||||
|             glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); | ||||
|             glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, | ||||
|                                    info.address + info.offset, size); | ||||
|         } else { | ||||
|             glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupVertexInstances() { | ||||
| void RasterizerOpenGL::SyncVertexInstances() { | ||||
|     auto& flags = maxwell3d.dirty.flags; | ||||
|     if (!flags[Dirty::VertexInstances]) { | ||||
|         return; | ||||
|  | @ -343,17 +243,7 @@ void RasterizerOpenGL::SetupVertexInstances() { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| GLintptr RasterizerOpenGL::SetupIndexBuffer() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Index); | ||||
|     const auto& regs = maxwell3d.regs; | ||||
|     const std::size_t size = CalculateIndexBufferSize(); | ||||
|     const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); | ||||
|     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); | ||||
|     return info.offset; | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupShaders() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Shader); | ||||
| void RasterizerOpenGL::SetupShaders(bool is_indexed) { | ||||
|     u32 clip_distances = 0; | ||||
| 
 | ||||
|     std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; | ||||
|  | @ -410,11 +300,19 @@ void RasterizerOpenGL::SetupShaders() { | |||
|         const size_t stage = index == 0 ? 0 : index - 1; | ||||
|         shaders[stage] = shader; | ||||
| 
 | ||||
|         SetupDrawConstBuffers(stage, shader); | ||||
|         SetupDrawGlobalMemory(stage, shader); | ||||
|         SetupDrawTextures(shader, stage); | ||||
|         SetupDrawImages(shader, stage); | ||||
| 
 | ||||
|         buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); | ||||
| 
 | ||||
|         buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||||
|         u32 ssbo_index = 0; | ||||
|         for (const auto& buffer : shader->GetEntries().global_memory_entries) { | ||||
|             buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||||
|                                                    buffer.cbuf_offset, buffer.is_written); | ||||
|             ++ssbo_index; | ||||
|         } | ||||
| 
 | ||||
|         // Workaround for Intel drivers.
 | ||||
|         // When a clip distance is enabled but not set in the shader it crops parts of the screen
 | ||||
|         // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
 | ||||
|  | @ -430,44 +328,27 @@ void RasterizerOpenGL::SetupShaders() { | |||
|     SyncClipEnabled(clip_distances); | ||||
|     maxwell3d.dirty.flags[Dirty::Shaders] = false; | ||||
| 
 | ||||
|     buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||||
| 
 | ||||
|     const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||||
|     texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||||
| 
 | ||||
|     buffer_cache.BindHostGeometryBuffers(is_indexed); | ||||
| 
 | ||||
|     size_t image_view_index = 0; | ||||
|     size_t texture_index = 0; | ||||
|     size_t image_index = 0; | ||||
|     for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||||
|         const Shader* const shader = shaders[stage]; | ||||
|         if (shader) { | ||||
|             const auto base = device.GetBaseBindings(stage); | ||||
|         if (!shader) { | ||||
|             continue; | ||||
|         } | ||||
|         buffer_cache.BindHostStageBuffers(stage); | ||||
|         const auto& base = device.GetBaseBindings(stage); | ||||
|         BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, | ||||
|                      texture_index, image_index); | ||||
|     } | ||||
| } | ||||
| } | ||||
| 
 | ||||
| std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||||
|     const auto& regs = maxwell3d.regs; | ||||
| 
 | ||||
|     std::size_t size = 0; | ||||
|     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||||
|         if (!regs.vertex_array[index].IsEnabled()) | ||||
|             continue; | ||||
| 
 | ||||
|         const GPUVAddr start = regs.vertex_array[index].StartAddress(); | ||||
|         const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||||
| 
 | ||||
|         size += end - start; | ||||
|         ASSERT(end >= start); | ||||
|     } | ||||
| 
 | ||||
|     return size; | ||||
| } | ||||
| 
 | ||||
| std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { | ||||
|     return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * | ||||
|            static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, | ||||
|                                          const VideoCore::DiskResourceLoadCallback& callback) { | ||||
|  | @ -475,6 +356,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s | |||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::Clear() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Clears); | ||||
|     if (!maxwell3d.ShouldExecute()) { | ||||
|         return; | ||||
|     } | ||||
|  | @ -525,11 +407,9 @@ void RasterizerOpenGL::Clear() { | |||
|     } | ||||
|     UNIMPLEMENTED_IF(regs.clear_flags.viewport); | ||||
| 
 | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|     std::scoped_lock lock{texture_cache.mutex}; | ||||
|     texture_cache.UpdateRenderTargets(true); | ||||
|     state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||||
|     } | ||||
| 
 | ||||
|     if (use_color) { | ||||
|         glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | ||||
|  | @ -541,7 +421,6 @@ void RasterizerOpenGL::Clear() { | |||
|     } else if (use_stencil) { | ||||
|         glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | ||||
|     } | ||||
| 
 | ||||
|     ++num_queued_commands; | ||||
| } | ||||
| 
 | ||||
|  | @ -550,75 +429,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 
 | ||||
|     query_cache.UpdateCounters(); | ||||
| 
 | ||||
|     SyncViewport(); | ||||
|     SyncRasterizeEnable(); | ||||
|     SyncPolygonModes(); | ||||
|     SyncColorMask(); | ||||
|     SyncFragmentColorClampState(); | ||||
|     SyncMultiSampleState(); | ||||
|     SyncDepthTestState(); | ||||
|     SyncDepthClamp(); | ||||
|     SyncStencilTestState(); | ||||
|     SyncBlendState(); | ||||
|     SyncLogicOpState(); | ||||
|     SyncCullMode(); | ||||
|     SyncPrimitiveRestart(); | ||||
|     SyncScissorTest(); | ||||
|     SyncPointState(); | ||||
|     SyncLineState(); | ||||
|     SyncPolygonOffset(); | ||||
|     SyncAlphaTest(); | ||||
|     SyncFramebufferSRGB(); | ||||
| 
 | ||||
|     buffer_cache.Acquire(); | ||||
|     current_cbuf = 0; | ||||
| 
 | ||||
|     std::size_t buffer_size = CalculateVertexArraysSize(); | ||||
| 
 | ||||
|     // Add space for index buffer
 | ||||
|     if (is_indexed) { | ||||
|         buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); | ||||
|     } | ||||
| 
 | ||||
|     // Uniform space for the 5 shader stages
 | ||||
|     buffer_size = | ||||
|         Common::AlignUp<std::size_t>(buffer_size, 4) + | ||||
|         (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage; | ||||
| 
 | ||||
|     // Add space for at least 18 constant buffers
 | ||||
|     buffer_size += Maxwell::MaxConstBuffers * | ||||
|                    (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||||
| 
 | ||||
|     // Prepare the vertex array.
 | ||||
|     buffer_cache.Map(buffer_size); | ||||
| 
 | ||||
|     // Prepare vertex array format.
 | ||||
|     SetupVertexFormat(); | ||||
| 
 | ||||
|     // Upload vertex and index data.
 | ||||
|     SetupVertexBuffer(); | ||||
|     SetupVertexInstances(); | ||||
|     GLintptr index_buffer_offset = 0; | ||||
|     if (is_indexed) { | ||||
|         index_buffer_offset = SetupIndexBuffer(); | ||||
|     } | ||||
| 
 | ||||
|     // Setup emulation uniform buffer.
 | ||||
|     if (!device.UseAssemblyShaders()) { | ||||
|         MaxwellUniformData ubo; | ||||
|         ubo.SetFromRegs(maxwell3d); | ||||
|         const auto info = | ||||
|             buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, | ||||
|                           static_cast<GLsizeiptr>(sizeof(ubo))); | ||||
|     } | ||||
|     SyncState(); | ||||
| 
 | ||||
|     // Setup shaders and their used resources.
 | ||||
|     auto lock = texture_cache.AcquireLock(); | ||||
|     SetupShaders(); | ||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|     SetupShaders(is_indexed); | ||||
| 
 | ||||
|     // Signal the buffer cache that we are not going to upload more things.
 | ||||
|     buffer_cache.Unmap(); | ||||
|     texture_cache.UpdateRenderTargets(false); | ||||
|     state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||||
|     program_manager.BindGraphicsPipeline(); | ||||
|  | @ -632,7 +448,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
|     if (is_indexed) { | ||||
|         const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base); | ||||
|         const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count); | ||||
|         const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); | ||||
|         const GLvoid* const offset = buffer_cache_runtime.IndexOffset(); | ||||
|         const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format); | ||||
|         if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { | ||||
|             glDrawElements(primitive_mode, num_vertices, format, offset); | ||||
|  | @ -672,22 +488,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | ||||
|     buffer_cache.Acquire(); | ||||
|     current_cbuf = 0; | ||||
| 
 | ||||
|     Shader* const kernel = shader_cache.GetComputeKernel(code_addr); | ||||
| 
 | ||||
|     auto lock = texture_cache.AcquireLock(); | ||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|     BindComputeTextures(kernel); | ||||
| 
 | ||||
|     const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * | ||||
|                                (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||||
|     buffer_cache.Map(buffer_size); | ||||
| 
 | ||||
|     SetupComputeConstBuffers(kernel); | ||||
|     SetupComputeGlobalMemory(kernel); | ||||
| 
 | ||||
|     buffer_cache.Unmap(); | ||||
|     const auto& entries = kernel->GetEntries(); | ||||
|     buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||||
|     buffer_cache.UnbindComputeStorageBuffers(); | ||||
|     u32 ssbo_index = 0; | ||||
|     for (const auto& buffer : entries.global_memory_entries) { | ||||
|         buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||||
|                                               buffer.is_written); | ||||
|         ++ssbo_index; | ||||
|     } | ||||
|     buffer_cache.UpdateComputeBuffers(); | ||||
|     buffer_cache.BindHostComputeBuffers(); | ||||
| 
 | ||||
|     const auto& launch_desc = kepler_compute.launch_description; | ||||
|     glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | ||||
|  | @ -703,6 +519,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
|     query_cache.Query(gpu_addr, type, timestamp); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||
|                                                  u32 size) { | ||||
|     std::scoped_lock lock{buffer_cache.mutex}; | ||||
|     buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::FlushAll() {} | ||||
| 
 | ||||
| void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | ||||
|  | @ -711,19 +533,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | |||
|         return; | ||||
|     } | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.DownloadMemory(addr, size); | ||||
|     } | ||||
|     buffer_cache.FlushRegion(addr, size); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.DownloadMemory(addr, size); | ||||
|     } | ||||
|     query_cache.FlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { | ||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|     if (!Settings::IsGPULevelHigh()) { | ||||
|         return buffer_cache.MustFlushRegion(addr, size); | ||||
|         return buffer_cache.IsRegionGpuModified(addr, size); | ||||
|     } | ||||
|     return texture_cache.IsRegionGpuModified(addr, size) || | ||||
|            buffer_cache.MustFlushRegion(addr, size); | ||||
|            buffer_cache.IsRegionGpuModified(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | ||||
|  | @ -732,11 +558,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | |||
|         return; | ||||
|     } | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|     shader_cache.InvalidateRegion(addr, size); | ||||
|     buffer_cache.InvalidateRegion(addr, size); | ||||
|     query_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
|  | @ -745,26 +574,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|     shader_cache.OnCPUWrite(addr, size); | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|     shader_cache.OnCPUWrite(addr, size); | ||||
|     buffer_cache.OnCPUWrite(addr, size); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.CachedWriteMemory(addr, size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SyncGuestHost() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     buffer_cache.SyncGuestHost(); | ||||
|     shader_cache.SyncGuestHost(); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.FlushCachedWrites(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.UnmapMemory(addr, size); | ||||
|     } | ||||
|     buffer_cache.OnCPUWrite(addr, size); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|     shader_cache.OnCPUWrite(addr, size); | ||||
| } | ||||
| 
 | ||||
|  | @ -799,14 +637,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::WaitForIdle() { | ||||
|     // Place a barrier on everything that is not framebuffer related.
 | ||||
|     // This is related to another flag that is not currently implemented.
 | ||||
|     glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT | | ||||
|                     GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | | ||||
|                     GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | | ||||
|                     GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT | | ||||
|                     GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT | | ||||
|                     GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); | ||||
|     glMemoryBarrier(GL_ALL_BARRIER_BITS); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::FragmentBarrier() { | ||||
|  | @ -831,18 +662,21 @@ void RasterizerOpenGL::TickFrame() { | |||
|     num_queued_commands = 0; | ||||
| 
 | ||||
|     fence_manager.TickFrame(); | ||||
|     buffer_cache.TickFrame(); | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.TickFrame(); | ||||
|     } | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.TickFrame(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | ||||
|                                              const Tegra::Engines::Fermi2D::Surface& dst, | ||||
|                                              const Tegra::Engines::Fermi2D::Config& copy_config) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Blits); | ||||
|     auto lock = texture_cache.AcquireLock(); | ||||
|     std::scoped_lock lock{texture_cache.mutex}; | ||||
|     texture_cache.BlitImage(dst, src, copy_config); | ||||
|     return true; | ||||
| } | ||||
|  | @ -854,7 +688,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
|     } | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
| 
 | ||||
|     auto lock = texture_cache.AcquireLock(); | ||||
|     std::scoped_lock lock{texture_cache.mutex}; | ||||
|     ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; | ||||
|     if (!image_view) { | ||||
|         return false; | ||||
|  | @ -921,166 +755,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { | ||||
|     static constexpr std::array PARAMETER_LUT{ | ||||
|         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|         GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|         GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|     }; | ||||
|     MICROPROFILE_SCOPE(OpenGL_UBO); | ||||
|     const auto& stages = maxwell3d.state.shader_stages; | ||||
|     const auto& shader_stage = stages[stage_index]; | ||||
|     const auto& entries = shader->GetEntries(); | ||||
|     const bool use_unified = entries.use_unified_uniforms; | ||||
|     const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE; | ||||
| 
 | ||||
|     const auto base_bindings = device.GetBaseBindings(stage_index); | ||||
|     u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer; | ||||
|     for (const auto& entry : entries.const_buffers) { | ||||
|         const u32 index = entry.GetIndex(); | ||||
|         const auto& buffer = shader_stage.const_buffers[index]; | ||||
|         SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified, | ||||
|                          base_unified_offset + index * Maxwell::MaxConstBufferSize); | ||||
|         ++binding; | ||||
|     } | ||||
|     if (use_unified) { | ||||
|         const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer + | ||||
|                                            entries.global_memory_entries.size()); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, | ||||
|                           base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_UBO); | ||||
|     const auto& launch_desc = kepler_compute.launch_description; | ||||
|     const auto& entries = kernel->GetEntries(); | ||||
|     const bool use_unified = entries.use_unified_uniforms; | ||||
| 
 | ||||
|     u32 binding = 0; | ||||
|     for (const auto& entry : entries.const_buffers) { | ||||
|         const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||||
|         const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||||
|         Tegra::Engines::ConstBufferInfo buffer; | ||||
|         buffer.address = config.Address(); | ||||
|         buffer.size = config.size; | ||||
|         buffer.enabled = mask[entry.GetIndex()]; | ||||
|         SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry, | ||||
|                          use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize); | ||||
|         ++binding; | ||||
|     } | ||||
|     if (use_unified) { | ||||
|         const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size()); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0, | ||||
|                           NUM_CONST_BUFFERS_BYTES_PER_STAGE); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | ||||
|                                         const Tegra::Engines::ConstBufferInfo& buffer, | ||||
|                                         const ConstBufferEntry& entry, bool use_unified, | ||||
|                                         std::size_t unified_offset) { | ||||
|     if (!buffer.enabled) { | ||||
|         // Set values to zero to unbind buffers
 | ||||
|         if (device.UseAssemblyShaders()) { | ||||
|             glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); | ||||
|         } else { | ||||
|             glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
 | ||||
|     // UBO alignment requirements.
 | ||||
|     const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); | ||||
| 
 | ||||
|     const bool fast_upload = !use_unified && device.HasFastBufferSubData(); | ||||
| 
 | ||||
|     const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); | ||||
|     const GPUVAddr gpu_addr = buffer.address; | ||||
|     auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); | ||||
| 
 | ||||
|     if (device.UseAssemblyShaders()) { | ||||
|         UNIMPLEMENTED_IF(use_unified); | ||||
|         if (info.offset != 0) { | ||||
|             const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; | ||||
|             glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); | ||||
|             info.handle = staging_cbuf; | ||||
|             info.offset = 0; | ||||
|         } | ||||
|         glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (use_unified) { | ||||
|         glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, | ||||
|                                  unified_offset, size); | ||||
|     } else { | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { | ||||
|     static constexpr std::array TARGET_LUT = { | ||||
|         GL_VERTEX_PROGRAM_NV,   GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, | ||||
|         GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | ||||
|     }; | ||||
|     const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; | ||||
|     const auto& entries{shader->GetEntries().global_memory_entries}; | ||||
| 
 | ||||
|     std::array<BindlessSSBO, 32> ssbos; | ||||
|     ASSERT(entries.size() < ssbos.size()); | ||||
| 
 | ||||
|     const bool assembly_shaders = device.UseAssemblyShaders(); | ||||
|     u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; | ||||
|     for (const auto& entry : entries) { | ||||
|         const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; | ||||
|         const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | ||||
|         const u32 size{gpu_memory.Read<u32>(addr + 8)}; | ||||
|         SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); | ||||
|         ++binding; | ||||
|     } | ||||
|     if (assembly_shaders) { | ||||
|         UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size()); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { | ||||
|     const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; | ||||
|     const auto& entries{kernel->GetEntries().global_memory_entries}; | ||||
| 
 | ||||
|     std::array<BindlessSSBO, 32> ssbos; | ||||
|     ASSERT(entries.size() < ssbos.size()); | ||||
| 
 | ||||
|     u32 binding = 0; | ||||
|     for (const auto& entry : entries) { | ||||
|         const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; | ||||
|         const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | ||||
|         const u32 size{gpu_memory.Read<u32>(addr + 8)}; | ||||
|         SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); | ||||
|         ++binding; | ||||
|     } | ||||
|     if (device.UseAssemblyShaders()) { | ||||
|         UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size()); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, | ||||
|                                          GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) { | ||||
|     const size_t alignment{device.GetShaderStorageBufferAlignment()}; | ||||
|     const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); | ||||
|     if (device.UseAssemblyShaders()) { | ||||
|         *ssbo = BindlessSSBO{ | ||||
|             .address = static_cast<GLuint64EXT>(info.address + info.offset), | ||||
|             .length = static_cast<GLsizei>(size), | ||||
|             .padding = 0, | ||||
|         }; | ||||
|     } else { | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, | ||||
|                           static_cast<GLsizeiptr>(size)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { | ||||
|     const bool via_header_index = | ||||
|         maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||||
|  | @ -1128,6 +802,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SyncState() { | ||||
|     SyncViewport(); | ||||
|     SyncRasterizeEnable(); | ||||
|     SyncPolygonModes(); | ||||
|     SyncColorMask(); | ||||
|     SyncFragmentColorClampState(); | ||||
|     SyncMultiSampleState(); | ||||
|     SyncDepthTestState(); | ||||
|     SyncDepthClamp(); | ||||
|     SyncStencilTestState(); | ||||
|     SyncBlendState(); | ||||
|     SyncLogicOpState(); | ||||
|     SyncCullMode(); | ||||
|     SyncPrimitiveRestart(); | ||||
|     SyncScissorTest(); | ||||
|     SyncPointState(); | ||||
|     SyncLineState(); | ||||
|     SyncPolygonOffset(); | ||||
|     SyncAlphaTest(); | ||||
|     SyncFramebufferSRGB(); | ||||
|     SyncVertexFormats(); | ||||
|     SyncVertexInstances(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SyncViewport() { | ||||
|     auto& flags = maxwell3d.dirty.flags; | ||||
|     const auto& regs = maxwell3d.regs; | ||||
|  | @ -1163,9 +861,11 @@ void RasterizerOpenGL::SyncViewport() { | |||
|         if (regs.screen_y_control.y_negate != 0) { | ||||
|             flip_y = !flip_y; | ||||
|         } | ||||
|         glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT, | ||||
|                       regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE | ||||
|                                                                        : GL_NEGATIVE_ONE_TO_ONE); | ||||
|         const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne; | ||||
|         const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; | ||||
|         const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE; | ||||
|         state_tracker.ClipControl(origin, depth); | ||||
|         state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0); | ||||
|     } | ||||
| 
 | ||||
|     if (dirty_viewport) { | ||||
|  | @ -1649,36 +1349,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | |||
|     if (regs.tfb_enabled == 0) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     if (device.UseAssemblyShaders()) { | ||||
|         SyncTransformFeedback(); | ||||
|     } | ||||
| 
 | ||||
|     UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||||
|                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||||
|                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||||
| 
 | ||||
|     for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||||
|         const auto& binding = regs.tfb_bindings[index]; | ||||
|         if (!binding.buffer_enable) { | ||||
|             if (enabled_transform_feedback_buffers[index]) { | ||||
|                 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0, | ||||
|                                   0); | ||||
|             } | ||||
|             enabled_transform_feedback_buffers[index] = false; | ||||
|             continue; | ||||
|         } | ||||
|         enabled_transform_feedback_buffers[index] = true; | ||||
| 
 | ||||
|         auto& tfb_buffer = transform_feedback_buffers[index]; | ||||
|         tfb_buffer.Create(); | ||||
| 
 | ||||
|         const GLuint handle = tfb_buffer.handle; | ||||
|         const std::size_t size = binding.buffer_size; | ||||
|         glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY); | ||||
|         glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0, | ||||
|                           static_cast<GLsizeiptr>(size)); | ||||
|     } | ||||
|     UNIMPLEMENTED_IF(primitive_mode != GL_POINTS); | ||||
| 
 | ||||
|     // We may have to call BeginTransformFeedbackNV here since they seem to call different
 | ||||
|     // implementations on Nvidia's driver (the pointer is different) but we are using
 | ||||
|  | @ -1692,23 +1369,7 @@ void RasterizerOpenGL::EndTransformFeedback() { | |||
|     if (regs.tfb_enabled == 0) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     glEndTransformFeedback(); | ||||
| 
 | ||||
|     for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||||
|         const auto& binding = regs.tfb_bindings[index]; | ||||
|         if (!binding.buffer_enable) { | ||||
|             continue; | ||||
|         } | ||||
|         UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||||
| 
 | ||||
|         const GLuint handle = transform_feedback_buffers[index].handle; | ||||
|         const GPUVAddr gpu_addr = binding.Address(); | ||||
|         const std::size_t size = binding.buffer_size; | ||||
|         const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||||
|         glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, | ||||
|                                  static_cast<GLsizeiptr>(size)); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -30,7 +30,6 @@ | |||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_state_tracker.h" | ||||
| #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||||
| #include "video_core/renderer_opengl/gl_texture_cache.h" | ||||
| #include "video_core/shader/async_shaders.h" | ||||
| #include "video_core/textures/texture.h" | ||||
|  | @ -72,6 +71,7 @@ public: | |||
|     void DispatchCompute(GPUVAddr code_addr) override; | ||||
|     void ResetCounter(VideoCore::QueryType type) override; | ||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||
|     void FlushAll() override; | ||||
|     void FlushRegion(VAddr addr, u64 size) override; | ||||
|     bool MustFlushRegion(VAddr addr, u64 size) override; | ||||
|  | @ -119,27 +119,6 @@ private: | |||
|     void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, | ||||
|                       size_t& image_view_index, size_t& texture_index, size_t& image_index); | ||||
| 
 | ||||
|     /// Configures the current constbuffers to use for the draw command.
 | ||||
|     void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); | ||||
| 
 | ||||
|     /// Configures the current constbuffers to use for the kernel invocation.
 | ||||
|     void SetupComputeConstBuffers(Shader* kernel); | ||||
| 
 | ||||
|     /// Configures a constant buffer.
 | ||||
|     void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | ||||
|                           const ConstBufferEntry& entry, bool use_unified, | ||||
|                           std::size_t unified_offset); | ||||
| 
 | ||||
|     /// Configures the current global memory entries to use for the draw command.
 | ||||
|     void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader); | ||||
| 
 | ||||
|     /// Configures the current global memory entries to use for the kernel invocation.
 | ||||
|     void SetupComputeGlobalMemory(Shader* kernel); | ||||
| 
 | ||||
|     /// Configures a global memory buffer.
 | ||||
|     void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||||
|                            size_t size, BindlessSSBO* ssbo); | ||||
| 
 | ||||
|     /// Configures the current textures to use for the draw command.
 | ||||
|     void SetupDrawTextures(const Shader* shader, size_t stage_index); | ||||
| 
 | ||||
|  | @ -152,6 +131,9 @@ private: | |||
|     /// Configures images in a compute shader.
 | ||||
|     void SetupComputeImages(const Shader* shader); | ||||
| 
 | ||||
|     /// Syncs state to match guest's
 | ||||
|     void SyncState(); | ||||
| 
 | ||||
|     /// Syncs the viewport and depth range to match the guest state
 | ||||
|     void SyncViewport(); | ||||
| 
 | ||||
|  | @ -215,6 +197,12 @@ private: | |||
|     /// Syncs the framebuffer sRGB state to match the guest state
 | ||||
|     void SyncFramebufferSRGB(); | ||||
| 
 | ||||
|     /// Syncs vertex formats to match the guest state
 | ||||
|     void SyncVertexFormats(); | ||||
| 
 | ||||
|     /// Syncs vertex instances to match the guest state
 | ||||
|     void SyncVertexInstances(); | ||||
| 
 | ||||
|     /// Syncs transform feedback state to match guest state
 | ||||
|     /// @note Only valid on assembly shaders
 | ||||
|     void SyncTransformFeedback(); | ||||
|  | @ -225,19 +213,7 @@ private: | |||
|     /// End a transform feedback
 | ||||
|     void EndTransformFeedback(); | ||||
| 
 | ||||
|     std::size_t CalculateVertexArraysSize() const; | ||||
| 
 | ||||
|     std::size_t CalculateIndexBufferSize() const; | ||||
| 
 | ||||
|     /// Updates the current vertex format
 | ||||
|     void SetupVertexFormat(); | ||||
| 
 | ||||
|     void SetupVertexBuffer(); | ||||
|     void SetupVertexInstances(); | ||||
| 
 | ||||
|     GLintptr SetupIndexBuffer(); | ||||
| 
 | ||||
|     void SetupShaders(); | ||||
|     void SetupShaders(bool is_indexed); | ||||
| 
 | ||||
|     Tegra::GPU& gpu; | ||||
|     Tegra::Engines::Maxwell3D& maxwell3d; | ||||
|  | @ -249,12 +225,12 @@ private: | |||
|     ProgramManager& program_manager; | ||||
|     StateTracker& state_tracker; | ||||
| 
 | ||||
|     OGLStreamBuffer stream_buffer; | ||||
|     TextureCacheRuntime texture_cache_runtime; | ||||
|     TextureCache texture_cache; | ||||
|     BufferCacheRuntime buffer_cache_runtime; | ||||
|     BufferCache buffer_cache; | ||||
|     ShaderCacheOpenGL shader_cache; | ||||
|     QueryCache query_cache; | ||||
|     OGLBufferCache buffer_cache; | ||||
|     FenceManagerOpenGL fence_manager; | ||||
| 
 | ||||
|     VideoCommon::Shader::AsyncShaders async_shaders; | ||||
|  | @ -262,20 +238,8 @@ private: | |||
|     boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | ||||
|     std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | ||||
|     boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; | ||||
|     std::array<GLuint, MAX_TEXTURES> texture_handles; | ||||
|     std::array<GLuint, MAX_IMAGES> image_handles; | ||||
| 
 | ||||
|     std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||||
|         transform_feedback_buffers; | ||||
|     std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||||
|         enabled_transform_feedback_buffers; | ||||
| 
 | ||||
|     static constexpr std::size_t NUM_CONSTANT_BUFFERS = | ||||
|         Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | ||||
|         Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | ||||
|     std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; | ||||
|     std::size_t current_cbuf = 0; | ||||
|     OGLBuffer unified_uniform_buffer; | ||||
|     std::array<GLuint, MAX_TEXTURES> texture_handles{}; | ||||
|     std::array<GLuint, MAX_IMAGES> image_handles{}; | ||||
| 
 | ||||
|     /// Number of commands queued to the OpenGL driver. Resetted on flush.
 | ||||
|     std::size_t num_queued_commands = 0; | ||||
|  |  | |||
|  | @ -171,12 +171,6 @@ void OGLBuffer::Release() { | |||
|     handle = 0; | ||||
| } | ||||
| 
 | ||||
| void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) { | ||||
|     ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; }); | ||||
| 
 | ||||
|     glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY); | ||||
| } | ||||
| 
 | ||||
| void OGLSync::Create() { | ||||
|     if (handle != 0) | ||||
|         return; | ||||
|  |  | |||
|  | @ -234,9 +234,6 @@ public: | |||
|     /// Deletes the internal OpenGL resource
 | ||||
|     void Release(); | ||||
| 
 | ||||
|     // Converts the buffer into a stream copy buffer with a fixed size
 | ||||
|     void MakeStreamCopy(std::size_t buffer_size); | ||||
| 
 | ||||
|     GLuint handle = 0; | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument> | |||
| constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); | ||||
| constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); | ||||
| 
 | ||||
| constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt | ||||
| constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt | ||||
| #define ftou floatBitsToUint | ||||
| #define itof intBitsToFloat | ||||
| #define utof uintBitsToFloat | ||||
|  | @ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ | |||
| 
 | ||||
| const float fswzadd_modifiers_a[] = float[4](-1.0f,  1.0f, -1.0f,  0.0f ); | ||||
| const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f,  1.0f, -1.0f ); | ||||
| 
 | ||||
| layout (std140, binding = {}) uniform vs_config {{ | ||||
|     float y_direction; | ||||
| }}; | ||||
| )"; | ||||
| 
 | ||||
| class ShaderWriter final { | ||||
|  | @ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
|     return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | ||||
| } | ||||
| 
 | ||||
| bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) { | ||||
|     const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size()); | ||||
|     // We waste one UBO for emulation
 | ||||
|     const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1; | ||||
|     return num_ubos > num_available_ubos; | ||||
| } | ||||
| 
 | ||||
| struct GenericVaryingDescription { | ||||
|     std::string name; | ||||
|     u8 first_element = 0; | ||||
|  | @ -420,9 +409,8 @@ public: | |||
|     explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | ||||
|                             ShaderType stage_, std::string_view identifier_, | ||||
|                             std::string_view suffix_) | ||||
|         : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_}, | ||||
|           suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{ | ||||
|                                                        UseUnifiedUniforms(device_, ir_, stage_)} { | ||||
|         : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, | ||||
|           identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { | ||||
|         if (stage != ShaderType::Compute) { | ||||
|             transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||||
|         } | ||||
|  | @ -516,7 +504,8 @@ private: | |||
|         if (!identifier.empty()) { | ||||
|             code.AddLine("// {}", identifier); | ||||
|         } | ||||
|         code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core"); | ||||
|         const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); | ||||
|         code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); | ||||
|         code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); | ||||
|         if (device.HasShaderBallot()) { | ||||
|             code.AddLine("#extension GL_ARB_shader_ballot : require"); | ||||
|  | @ -542,7 +531,7 @@ private: | |||
| 
 | ||||
|         code.AddNewLine(); | ||||
| 
 | ||||
|         code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); | ||||
|         code.AddLine(COMMON_DECLARATIONS); | ||||
|     } | ||||
| 
 | ||||
|     void DeclareVertex() { | ||||
|  | @ -865,17 +854,6 @@ private: | |||
|     } | ||||
| 
 | ||||
|     void DeclareConstantBuffers() { | ||||
|         if (use_unified_uniforms) { | ||||
|             const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer + | ||||
|                                 static_cast<u32>(ir.GetGlobalMemory().size()); | ||||
|             code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{", | ||||
|                          binding); | ||||
|             code.AddLine("    uint cbufs[];"); | ||||
|             code.AddLine("}};"); | ||||
|             code.AddNewLine(); | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         u32 binding = device.GetBaseBindings(stage).uniform_buffer; | ||||
|         for (const auto& [index, info] : ir.GetConstantBuffers()) { | ||||
|             const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); | ||||
|  | @ -1081,29 +1059,17 @@ private: | |||
| 
 | ||||
|         if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||||
|             const Node offset = cbuf->GetOffset(); | ||||
|             const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS; | ||||
| 
 | ||||
|             if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||||
|                 // Direct access
 | ||||
|                 const u32 offset_imm = immediate->GetValue(); | ||||
|                 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); | ||||
|                 if (use_unified_uniforms) { | ||||
|                     return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4), | ||||
|                             Type::Uint}; | ||||
|                 } else { | ||||
|                 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | ||||
|                                     offset_imm / (4 * 4), (offset_imm / 4) % 4), | ||||
|                         Type::Uint}; | ||||
|             } | ||||
|             } | ||||
| 
 | ||||
|             // Indirect access
 | ||||
|             if (use_unified_uniforms) { | ||||
|                 return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset, | ||||
|                                     Visit(offset).AsUint()), | ||||
|                         Type::Uint}; | ||||
|             } | ||||
| 
 | ||||
|             const std::string final_offset = code.GenerateTemporary(); | ||||
|             code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); | ||||
| 
 | ||||
|  | @ -2293,7 +2259,6 @@ private: | |||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if (header.ps.omap.depth) { | ||||
|             // The depth output is always 2 registers after the last color output, and current_reg
 | ||||
|             // already contains one past the last color register.
 | ||||
|  | @ -2337,7 +2302,8 @@ private: | |||
|     } | ||||
| 
 | ||||
|     Expression YNegate(Operation operation) { | ||||
|         return {"y_direction", Type::Float}; | ||||
|         // Y_NEGATE is mapped to this uniform value
 | ||||
|         return {"gl_FrontMaterial.ambient.a", Type::Float}; | ||||
|     } | ||||
| 
 | ||||
|     template <u32 element> | ||||
|  | @ -2787,7 +2753,6 @@ private: | |||
|     const std::string_view identifier; | ||||
|     const std::string_view suffix; | ||||
|     const Header header; | ||||
|     const bool use_unified_uniforms; | ||||
|     std::unordered_map<u8, VaryingTFB> transform_feedback; | ||||
| 
 | ||||
|     ShaderWriter code; | ||||
|  | @ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s | |||
|     for (std::size_t i = 0; i < std::size(clip_distances); ++i) { | ||||
|         entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; | ||||
|     } | ||||
|     for (const auto& buffer : entries.const_buffers) { | ||||
|         entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||||
|     } | ||||
|     entries.shader_length = ir.GetLength(); | ||||
|     entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage); | ||||
|     return entries; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -55,7 +55,7 @@ struct ShaderEntries { | |||
|     std::vector<ImageEntry> images; | ||||
|     std::size_t shader_length{}; | ||||
|     u32 clip_distances{}; | ||||
|     bool use_unified_uniforms{}; | ||||
|     u32 enabled_uniform_buffers{}; | ||||
| }; | ||||
| 
 | ||||
| ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||||
|  |  | |||
|  | @ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) { | |||
|     FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); | ||||
| } | ||||
| 
 | ||||
| void SetupDirtyVertexArrays(Tables& tables) { | ||||
|     static constexpr std::size_t num_array = 3; | ||||
| void SetupDirtyVertexInstances(Tables& tables) { | ||||
|     static constexpr std::size_t instance_base_offset = 3; | ||||
|     for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { | ||||
|         const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); | ||||
|         const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); | ||||
| 
 | ||||
|         FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); | ||||
|         FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); | ||||
| 
 | ||||
|         const std::size_t instance_array_offset = array_offset + instance_base_offset; | ||||
|         tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); | ||||
|         tables[1][instance_array_offset] = VertexInstances; | ||||
|  | @ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) { | |||
| StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} { | ||||
|     auto& dirty = gpu.Maxwell3D().dirty; | ||||
|     auto& tables = dirty.tables; | ||||
|     SetupDirtyRenderTargets(tables); | ||||
|     SetupDirtyFlags(tables); | ||||
|     SetupDirtyColorMasks(tables); | ||||
|     SetupDirtyViewports(tables); | ||||
|     SetupDirtyScissors(tables); | ||||
|     SetupDirtyVertexArrays(tables); | ||||
|     SetupDirtyVertexInstances(tables); | ||||
|     SetupDirtyVertexFormat(tables); | ||||
|     SetupDirtyShaders(tables); | ||||
|     SetupDirtyPolygonModes(tables); | ||||
|  | @ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} | |||
|     SetupDirtyClipControl(tables); | ||||
|     SetupDirtyDepthClampEnabled(tables); | ||||
|     SetupDirtyMisc(tables); | ||||
| 
 | ||||
|     auto& store = dirty.on_write_stores; | ||||
|     store[VertexBuffers] = true; | ||||
|     for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { | ||||
|         store[VertexBuffer0 + i] = true; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void StateTracker::InvalidateStreamBuffer() { | ||||
|     flags[Dirty::VertexBuffers] = true; | ||||
|     for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { | ||||
|         flags[index] = true; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -28,10 +28,6 @@ enum : u8 { | |||
|     VertexFormat0, | ||||
|     VertexFormat31 = VertexFormat0 + 31, | ||||
| 
 | ||||
|     VertexBuffers, | ||||
|     VertexBuffer0, | ||||
|     VertexBuffer31 = VertexBuffer0 + 31, | ||||
| 
 | ||||
|     VertexInstances, | ||||
|     VertexInstance0, | ||||
|     VertexInstance31 = VertexInstance0 + 31, | ||||
|  | @ -92,8 +88,6 @@ class StateTracker { | |||
| public: | ||||
|     explicit StateTracker(Tegra::GPU& gpu); | ||||
| 
 | ||||
|     void InvalidateStreamBuffer(); | ||||
| 
 | ||||
|     void BindIndexBuffer(GLuint new_index_buffer) { | ||||
|         if (index_buffer == new_index_buffer) { | ||||
|             return; | ||||
|  | @ -110,13 +104,32 @@ public: | |||
|         glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); | ||||
|     } | ||||
| 
 | ||||
|     void ClipControl(GLenum new_origin, GLenum new_depth) { | ||||
|         if (new_origin == origin && new_depth == depth) { | ||||
|             return; | ||||
|         } | ||||
|         origin = new_origin; | ||||
|         depth = new_depth; | ||||
|         glClipControl(origin, depth); | ||||
|     } | ||||
| 
 | ||||
|     void SetYNegate(bool new_y_negate) { | ||||
|         if (new_y_negate == y_negate) { | ||||
|             return; | ||||
|         } | ||||
|         // Y_NEGATE is mapped to gl_FrontMaterial.ambient.a
 | ||||
|         y_negate = new_y_negate; | ||||
|         const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f}; | ||||
|         glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data()); | ||||
|     } | ||||
| 
 | ||||
|     void NotifyScreenDrawVertexArray() { | ||||
|         flags[OpenGL::Dirty::VertexFormats] = true; | ||||
|         flags[OpenGL::Dirty::VertexFormat0 + 0] = true; | ||||
|         flags[OpenGL::Dirty::VertexFormat0 + 1] = true; | ||||
| 
 | ||||
|         flags[OpenGL::Dirty::VertexBuffers] = true; | ||||
|         flags[OpenGL::Dirty::VertexBuffer0] = true; | ||||
|         flags[VideoCommon::Dirty::VertexBuffers] = true; | ||||
|         flags[VideoCommon::Dirty::VertexBuffer0] = true; | ||||
| 
 | ||||
|         flags[OpenGL::Dirty::VertexInstances] = true; | ||||
|         flags[OpenGL::Dirty::VertexInstance0 + 0] = true; | ||||
|  | @ -202,6 +215,9 @@ private: | |||
| 
 | ||||
|     GLuint framebuffer = 0; | ||||
|     GLuint index_buffer = 0; | ||||
|     GLenum origin = GL_LOWER_LEFT; | ||||
|     GLenum depth = GL_NEGATIVE_ONE_TO_ONE; | ||||
|     bool y_negate = false; | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -1,70 +1,64 @@ | |||
| // Copyright 2018 Citra Emulator Project
 | ||||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <tuple> | ||||
| #include <vector> | ||||
| #include <array> | ||||
| #include <memory> | ||||
| #include <span> | ||||
| 
 | ||||
| #include <glad/glad.h> | ||||
| 
 | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/gl_state_tracker.h" | ||||
| #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||||
| 
 | ||||
| MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | ||||
|                     MP_RGB(128, 128, 192)); | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) | ||||
|     : state_tracker{state_tracker_} { | ||||
|     gl_buffer.Create(); | ||||
| 
 | ||||
|     static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; | ||||
|     glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); | ||||
|     mapped_ptr = static_cast<u8*>( | ||||
|         glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); | ||||
| 
 | ||||
|     if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { | ||||
|         glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); | ||||
|         glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); | ||||
| StreamBuffer::StreamBuffer() { | ||||
|     static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; | ||||
|     buffer.Create(); | ||||
|     glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); | ||||
|     glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); | ||||
|     mapped_pointer = | ||||
|         static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); | ||||
|     for (OGLSync& sync : fences) { | ||||
|         sync.Create(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| OGLStreamBuffer::~OGLStreamBuffer() { | ||||
|     glUnmapNamedBuffer(gl_buffer.handle); | ||||
|     gl_buffer.Release(); | ||||
| std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { | ||||
|     ASSERT(size < REGION_SIZE); | ||||
|     for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; | ||||
|          ++region) { | ||||
|         fences[region].Create(); | ||||
|     } | ||||
|     used_iterator = iterator; | ||||
| 
 | ||||
| std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { | ||||
|     ASSERT(size <= BUFFER_SIZE); | ||||
|     ASSERT(alignment <= BUFFER_SIZE); | ||||
|     mapped_size = size; | ||||
| 
 | ||||
|     if (alignment > 0) { | ||||
|         buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); | ||||
|     for (size_t region = Region(free_iterator) + 1, | ||||
|                 region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); | ||||
|          region < region_end; ++region) { | ||||
|         glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); | ||||
|         fences[region].Release(); | ||||
|     } | ||||
| 
 | ||||
|     if (buffer_pos + size > BUFFER_SIZE) { | ||||
|         MICROPROFILE_SCOPE(OpenGL_StreamBuffer); | ||||
|         glInvalidateBufferData(gl_buffer.handle); | ||||
|         state_tracker.InvalidateStreamBuffer(); | ||||
| 
 | ||||
|         buffer_pos = 0; | ||||
|     if (iterator + size > free_iterator) { | ||||
|         free_iterator = iterator + size; | ||||
|     } | ||||
| 
 | ||||
|     return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); | ||||
|     if (iterator + size > STREAM_BUFFER_SIZE) { | ||||
|         for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { | ||||
|             fences[region].Create(); | ||||
|         } | ||||
|         used_iterator = 0; | ||||
|         iterator = 0; | ||||
|         free_iterator = size; | ||||
| 
 | ||||
| void OGLStreamBuffer::Unmap(GLsizeiptr size) { | ||||
|     ASSERT(size <= mapped_size); | ||||
| 
 | ||||
|     if (size > 0) { | ||||
|         glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size); | ||||
|         for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { | ||||
|             glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); | ||||
|             fences[region].Release(); | ||||
|         } | ||||
| 
 | ||||
|     buffer_pos += size; | ||||
|     } | ||||
|     const size_t offset = iterator; | ||||
|     iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); | ||||
|     return {std::span(mapped_pointer + offset, size), offset}; | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -1,9 +1,12 @@ | |||
| // Copyright 2018 Citra Emulator Project
 | ||||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <memory> | ||||
| #include <span> | ||||
| #include <utility> | ||||
| 
 | ||||
| #include <glad/glad.h> | ||||
|  | @ -13,48 +16,35 @@ | |||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| class Device; | ||||
| class StateTracker; | ||||
| class StreamBuffer { | ||||
|     static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024; | ||||
|     static constexpr size_t NUM_SYNCS = 16; | ||||
|     static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS; | ||||
|     static constexpr size_t MAX_ALIGNMENT = 256; | ||||
|     static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0); | ||||
|     static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0); | ||||
|     static_assert(REGION_SIZE % MAX_ALIGNMENT == 0); | ||||
| 
 | ||||
| class OGLStreamBuffer : private NonCopyable { | ||||
| public: | ||||
|     explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); | ||||
|     ~OGLStreamBuffer(); | ||||
|     explicit StreamBuffer(); | ||||
| 
 | ||||
|     /*
 | ||||
|      * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes | ||||
|      * and the optional alignment requirement. | ||||
|      * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. | ||||
|      * The return values are the pointer to the new chunk, and the offset within the buffer. | ||||
|      * The actual used size must be specified on unmapping the chunk. | ||||
|      */ | ||||
|     std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0); | ||||
|     [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept; | ||||
| 
 | ||||
|     void Unmap(GLsizeiptr size); | ||||
| 
 | ||||
|     GLuint Handle() const { | ||||
|         return gl_buffer.handle; | ||||
|     } | ||||
| 
 | ||||
|     u64 Address() const { | ||||
|         return gpu_address; | ||||
|     } | ||||
| 
 | ||||
|     GLsizeiptr Size() const noexcept { | ||||
|         return BUFFER_SIZE; | ||||
|     [[nodiscard]] GLuint Handle() const noexcept { | ||||
|         return buffer.handle; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; | ||||
|     [[nodiscard]] static size_t Region(size_t offset) noexcept { | ||||
|         return offset / REGION_SIZE; | ||||
|     } | ||||
| 
 | ||||
|     StateTracker& state_tracker; | ||||
| 
 | ||||
|     OGLBuffer gl_buffer; | ||||
| 
 | ||||
|     GLuint64EXT gpu_address = 0; | ||||
|     GLintptr buffer_pos = 0; | ||||
|     GLsizeiptr mapped_size = 0; | ||||
|     u8* mapped_ptr = nullptr; | ||||
|     size_t iterator = 0; | ||||
|     size_t used_iterator = 0; | ||||
|     size_t free_iterator = 0; | ||||
|     u8* mapped_pointer = nullptr; | ||||
|     OGLBuffer buffer; | ||||
|     std::array<OGLSync, NUM_SYNCS> fences; | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | |||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) | ||||
|     : span(map, size), sync{sync_}, handle{handle_} {} | ||||
| 
 | ||||
| ImageBufferMap::~ImageBufferMap() { | ||||
|     if (sync) { | ||||
|         sync->Create(); | ||||
|  | @ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() { | |||
|     glFinish(); | ||||
| } | ||||
| 
 | ||||
| ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { | ||||
| ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { | ||||
|     return upload_buffers.RequestMap(size, true); | ||||
| } | ||||
| 
 | ||||
| ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { | ||||
| ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | ||||
|     return download_buffers.RequestMap(size, false); | ||||
| } | ||||
| 
 | ||||
|  | @ -553,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, | |||
| } | ||||
| 
 | ||||
| void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, | ||||
|                                                 size_t buffer_offset, | ||||
|                                                 std::span<const SwizzleParameters> swizzles) { | ||||
|     switch (image.info.type) { | ||||
|     case ImageType::e2D: | ||||
|         return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); | ||||
|         return util_shaders.BlockLinearUpload2D(image, map, swizzles); | ||||
|     case ImageType::e3D: | ||||
|         return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); | ||||
|         return util_shaders.BlockLinearUpload3D(image, map, swizzles); | ||||
|     case ImageType::Linear: | ||||
|         return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); | ||||
|         return util_shaders.PitchUpload(image, map, swizzles); | ||||
|     default: | ||||
|         UNREACHABLE(); | ||||
|         break; | ||||
|  | @ -596,7 +592,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_ | |||
|                                                                bool insert_fence) { | ||||
|     const size_t index = RequestBuffer(requested_size); | ||||
|     OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; | ||||
|     return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); | ||||
|     return ImageBufferMap{ | ||||
|         .mapped_span = std::span(maps[index], requested_size), | ||||
|         .sync = sync, | ||||
|         .buffer = buffers[index].handle, | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { | ||||
|  | @ -709,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | ||||
| void Image::UploadMemory(const ImageBufferMap& map, | ||||
|                          std::span<const VideoCommon::BufferImageCopy> copies) { | ||||
|     glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); | ||||
|     glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); | ||||
|     glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); | ||||
|     glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); | ||||
| 
 | ||||
|     glPixelStorei(GL_UNPACK_ALIGNMENT, 1); | ||||
| 
 | ||||
|  | @ -728,23 +728,23 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
|             current_image_height = copy.buffer_image_height; | ||||
|             glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); | ||||
|         } | ||||
|         CopyBufferToImage(copy, buffer_offset); | ||||
|         CopyBufferToImage(copy, map.offset); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | ||||
| void Image::UploadMemory(const ImageBufferMap& map, | ||||
|                          std::span<const VideoCommon::BufferCopy> copies) { | ||||
|     for (const VideoCommon::BufferCopy& copy : copies) { | ||||
|         glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, | ||||
|         glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, | ||||
|                                  copy.dst_offset, copy.size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, | ||||
| void Image::DownloadMemory(ImageBufferMap& map, | ||||
|                            std::span<const VideoCommon::BufferImageCopy> copies) { | ||||
|     glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
 | ||||
| 
 | ||||
|     glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); | ||||
|     glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); | ||||
|     glPixelStorei(GL_PACK_ALIGNMENT, 1); | ||||
| 
 | ||||
|     u32 current_row_length = std::numeric_limits<u32>::max(); | ||||
|  | @ -759,7 +759,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, | |||
|             current_image_height = copy.buffer_image_height; | ||||
|             glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); | ||||
|         } | ||||
|         CopyImageToBuffer(copy, buffer_offset); | ||||
|         CopyImageToBuffer(copy, map.offset); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -31,23 +31,13 @@ using VideoCommon::NUM_RT; | |||
| using VideoCommon::Offset2D; | ||||
| using VideoCommon::RenderTargets; | ||||
| 
 | ||||
| class ImageBufferMap { | ||||
| public: | ||||
|     explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); | ||||
| struct ImageBufferMap { | ||||
|     ~ImageBufferMap(); | ||||
| 
 | ||||
|     GLuint Handle() const noexcept { | ||||
|         return handle; | ||||
|     } | ||||
| 
 | ||||
|     std::span<u8> Span() const noexcept { | ||||
|         return span; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     std::span<u8> span; | ||||
|     std::span<u8> mapped_span; | ||||
|     size_t offset = 0; | ||||
|     OGLSync* sync; | ||||
|     GLuint handle; | ||||
|     GLuint buffer; | ||||
| }; | ||||
| 
 | ||||
| struct FormatProperties { | ||||
|  | @ -69,9 +59,9 @@ public: | |||
| 
 | ||||
|     void Finish(); | ||||
| 
 | ||||
|     ImageBufferMap MapUploadBuffer(size_t size); | ||||
|     ImageBufferMap UploadStagingBuffer(size_t size); | ||||
| 
 | ||||
|     ImageBufferMap MapDownloadBuffer(size_t size); | ||||
|     ImageBufferMap DownloadStagingBuffer(size_t size); | ||||
| 
 | ||||
|     void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
| 
 | ||||
|  | @ -89,7 +79,7 @@ public: | |||
|                          Tegra::Engines::Fermi2D::Filter filter, | ||||
|                          Tegra::Engines::Fermi2D::Operation operation); | ||||
| 
 | ||||
|     void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||||
|     void AccelerateImageUpload(Image& image, const ImageBufferMap& map, | ||||
|                                std::span<const VideoCommon::SwizzleParameters> swizzles); | ||||
| 
 | ||||
|     void InsertUploadMemoryBarrier(); | ||||
|  | @ -148,14 +138,12 @@ public: | |||
|     explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | ||||
|                    VAddr cpu_addr); | ||||
| 
 | ||||
|     void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | ||||
|     void UploadMemory(const ImageBufferMap& map, | ||||
|                       std::span<const VideoCommon::BufferImageCopy> copies); | ||||
| 
 | ||||
|     void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | ||||
|                       std::span<const VideoCommon::BufferCopy> copies); | ||||
|     void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies); | ||||
| 
 | ||||
|     void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, | ||||
|                         std::span<const VideoCommon::BufferImageCopy> copies); | ||||
|     void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); | ||||
| 
 | ||||
|     GLuint Handle() const noexcept { | ||||
|         return texture.handle; | ||||
|  |  | |||
|  | @ -29,9 +29,7 @@ | |||
| #include "video_core/textures/decoders.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| constexpr GLint PositionLocation = 0; | ||||
| constexpr GLint TexCoordLocation = 1; | ||||
| constexpr GLint ModelViewMatrixLocation = 0; | ||||
|  | @ -124,7 +122,6 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit | |||
|         break; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | ||||
|  | @ -132,7 +129,17 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | |||
|                                Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | ||||
|                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) | ||||
|     : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, | ||||
|       emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {} | ||||
|       emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, | ||||
|       program_manager{device}, | ||||
|       rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { | ||||
|     if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | ||||
|         glEnable(GL_DEBUG_OUTPUT); | ||||
|         glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | ||||
|         glDebugMessageCallback(DebugHandler, nullptr); | ||||
|     } | ||||
|     AddTelemetryFields(); | ||||
|     InitOpenGLObjects(); | ||||
| } | ||||
| 
 | ||||
| RendererOpenGL::~RendererOpenGL() = default; | ||||
| 
 | ||||
|  | @ -148,7 +155,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 
 | ||||
|     ++m_current_frame; | ||||
| 
 | ||||
|     rasterizer->TickFrame(); | ||||
|     rasterizer.TickFrame(); | ||||
| 
 | ||||
|     context->SwapBuffers(); | ||||
|     render_window.OnFrameDisplayed(); | ||||
|  | @ -179,7 +186,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | |||
|     framebuffer_crop_rect = framebuffer.crop_rect; | ||||
| 
 | ||||
|     const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; | ||||
|     if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { | ||||
|     if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|  | @ -267,6 +274,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
|     // Enable unified vertex attributes and query vertex buffer address when the driver supports it
 | ||||
|     if (device.HasVertexBufferUnifiedMemory()) { | ||||
|         glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | ||||
|         glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); | ||||
| 
 | ||||
|         glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); | ||||
|         glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, | ||||
|  | @ -289,14 +297,6 @@ void RendererOpenGL::AddTelemetryFields() { | |||
|     telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); | ||||
| } | ||||
| 
 | ||||
| void RendererOpenGL::CreateRasterizer() { | ||||
|     if (rasterizer) { | ||||
|         return; | ||||
|     } | ||||
|     rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device, | ||||
|                                                     screen_info, program_manager, state_tracker); | ||||
| } | ||||
| 
 | ||||
| void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | ||||
|                                                  const Tegra::FramebufferConfig& framebuffer) { | ||||
|     texture.width = framebuffer.width; | ||||
|  | @ -407,6 +407,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 
 | ||||
|     program_manager.BindHostPipeline(pipeline.handle); | ||||
| 
 | ||||
|     state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | ||||
|     glEnable(GL_CULL_FACE); | ||||
|     if (screen_info.display_srgb) { | ||||
|         glEnable(GL_FRAMEBUFFER_SRGB); | ||||
|  | @ -425,7 +426,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
|     glCullFace(GL_BACK); | ||||
|     glFrontFace(GL_CW); | ||||
|     glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | ||||
|     glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | ||||
|     glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), | ||||
|                        static_cast<GLfloat>(layout.height)); | ||||
|     glDepthRangeIndexed(0, 0.0, 0.0); | ||||
|  | @ -497,25 +497,4 @@ void RendererOpenGL::RenderScreenshot() { | |||
|     renderer_settings.screenshot_requested = false; | ||||
| } | ||||
| 
 | ||||
| bool RendererOpenGL::Init() { | ||||
|     if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | ||||
|         glEnable(GL_DEBUG_OUTPUT); | ||||
|         glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | ||||
|         glDebugMessageCallback(DebugHandler, nullptr); | ||||
|     } | ||||
| 
 | ||||
|     AddTelemetryFields(); | ||||
| 
 | ||||
|     if (!GLAD_GL_VERSION_4_6) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     InitOpenGLObjects(); | ||||
|     CreateRasterizer(); | ||||
| 
 | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| void RendererOpenGL::ShutDown() {} | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -10,6 +10,7 @@ | |||
| #include "common/math_util.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_state_tracker.h" | ||||
|  | @ -63,18 +64,18 @@ public: | |||
|                             std::unique_ptr<Core::Frontend::GraphicsContext> context_); | ||||
|     ~RendererOpenGL() override; | ||||
| 
 | ||||
|     bool Init() override; | ||||
|     void ShutDown() override; | ||||
|     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||||
| 
 | ||||
|     VideoCore::RasterizerInterface* ReadRasterizer() override { | ||||
|         return &rasterizer; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     /// Initializes the OpenGL state and creates persistent objects.
 | ||||
|     void InitOpenGLObjects(); | ||||
| 
 | ||||
|     void AddTelemetryFields(); | ||||
| 
 | ||||
|     void CreateRasterizer(); | ||||
| 
 | ||||
|     void ConfigureFramebufferTexture(TextureInfo& texture, | ||||
|                                      const Tegra::FramebufferConfig& framebuffer); | ||||
| 
 | ||||
|  | @ -98,8 +99,10 @@ private: | |||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Tegra::GPU& gpu; | ||||
| 
 | ||||
|     const Device device; | ||||
|     StateTracker state_tracker{gpu}; | ||||
|     Device device; | ||||
|     StateTracker state_tracker; | ||||
|     ProgramManager program_manager; | ||||
|     RasterizerOpenGL rasterizer; | ||||
| 
 | ||||
|     // OpenGL object IDs
 | ||||
|     OGLSampler present_sampler; | ||||
|  | @ -115,9 +118,6 @@ private: | |||
|     /// Display information for Switch screen
 | ||||
|     ScreenInfo screen_info; | ||||
| 
 | ||||
|     /// Global dummy shader pipeline
 | ||||
|     ProgramManager program_manager; | ||||
| 
 | ||||
|     /// OpenGL framebuffer data
 | ||||
|     std::vector<u8> gl_framebuffer_data; | ||||
| 
 | ||||
|  |  | |||
|  | @ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | |||
| 
 | ||||
| UtilShaders::~UtilShaders() = default; | ||||
| 
 | ||||
| void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||||
| void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | ||||
|                                       std::span<const SwizzleParameters> swizzles) { | ||||
|     static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | ||||
|     static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | ||||
|  | @ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | |||
|     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||||
| 
 | ||||
|     program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); | ||||
|     glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||||
|     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | ||||
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||||
| 
 | ||||
|     const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | ||||
|     for (const SwizzleParameters& swizzle : swizzles) { | ||||
|         const Extent3D num_tiles = swizzle.num_tiles; | ||||
|         const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||||
|         const size_t input_offset = swizzle.buffer_offset + map.offset; | ||||
| 
 | ||||
|         const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||||
|         const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||||
|  | @ -91,8 +91,8 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | |||
|         glUniform1ui(5, params.x_shift); | ||||
|         glUniform1ui(6, params.block_height); | ||||
|         glUniform1ui(7, params.block_height_mask); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||||
|                           input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, | ||||
|                           image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | ||||
|                            GL_WRITE_ONLY, store_format); | ||||
|         glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); | ||||
|  | @ -100,7 +100,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | |||
|     program_manager.RestoreGuestCompute(); | ||||
| } | ||||
| 
 | ||||
| void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||||
| void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, | ||||
|                                       std::span<const SwizzleParameters> swizzles) { | ||||
|     static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; | ||||
| 
 | ||||
|  | @ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | |||
|     static constexpr GLuint BINDING_INPUT_BUFFER = 1; | ||||
|     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||||
| 
 | ||||
|     glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||||
|     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | ||||
|     program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); | ||||
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||||
| 
 | ||||
|     const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | ||||
|     for (const SwizzleParameters& swizzle : swizzles) { | ||||
|         const Extent3D num_tiles = swizzle.num_tiles; | ||||
|         const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||||
|         const size_t input_offset = swizzle.buffer_offset + map.offset; | ||||
| 
 | ||||
|         const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||||
|         const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||||
|  | @ -132,8 +132,8 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | |||
|         glUniform1ui(7, params.block_height_mask); | ||||
|         glUniform1ui(8, params.block_depth); | ||||
|         glUniform1ui(9, params.block_depth_mask); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||||
|                           input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, | ||||
|                           image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | ||||
|                            GL_WRITE_ONLY, store_format); | ||||
|         glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); | ||||
|  | @ -141,7 +141,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | |||
|     program_manager.RestoreGuestCompute(); | ||||
| } | ||||
| 
 | ||||
| void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||||
| void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, | ||||
|                               std::span<const SwizzleParameters> swizzles) { | ||||
|     static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | ||||
|     static constexpr GLuint BINDING_INPUT_BUFFER = 0; | ||||
|  | @ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu | |||
|                          "Non-power of two images are not implemented"); | ||||
| 
 | ||||
|     program_manager.BindHostCompute(pitch_unswizzle_program.handle); | ||||
|     glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | ||||
|     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | ||||
|     glUniform2ui(LOC_ORIGIN, 0, 0); | ||||
|     glUniform2i(LOC_DESTINATION, 0, 0); | ||||
|     glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); | ||||
|  | @ -167,13 +167,13 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu | |||
|     glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); | ||||
|     for (const SwizzleParameters& swizzle : swizzles) { | ||||
|         const Extent3D num_tiles = swizzle.num_tiles; | ||||
|         const size_t input_offset = swizzle.buffer_offset + buffer_offset; | ||||
|         const size_t input_offset = swizzle.buffer_offset + map.offset; | ||||
| 
 | ||||
|         const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | ||||
|         const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | ||||
| 
 | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | ||||
|                           input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, | ||||
|                           image.guest_size_bytes - swizzle.buffer_offset); | ||||
|         glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); | ||||
|     } | ||||
|     program_manager.RestoreGuestCompute(); | ||||
|  |  | |||
|  | @ -15,21 +15,22 @@ | |||
| namespace OpenGL { | ||||
| 
 | ||||
| class Image; | ||||
| class ImageBufferMap; | ||||
| class ProgramManager; | ||||
| 
 | ||||
| struct ImageBufferMap; | ||||
| 
 | ||||
| class UtilShaders { | ||||
| public: | ||||
|     explicit UtilShaders(ProgramManager& program_manager); | ||||
|     ~UtilShaders(); | ||||
| 
 | ||||
|     void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||||
|     void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | ||||
|                              std::span<const VideoCommon::SwizzleParameters> swizzles); | ||||
| 
 | ||||
|     void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||||
|     void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, | ||||
|                              std::span<const VideoCommon::SwizzleParameters> swizzles); | ||||
| 
 | ||||
|     void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | ||||
|     void PitchUpload(Image& image, const ImageBufferMap& map, | ||||
|                      std::span<const VideoCommon::SwizzleParameters> swizzles); | ||||
| 
 | ||||
|     void CopyBC4(Image& dst_image, Image& src_image, | ||||
|  |  | |||
|  | @ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { | |||
|     return {}; | ||||
| } | ||||
| 
 | ||||
| VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { | ||||
| VkIndexType IndexFormat(Maxwell::IndexFormat index_format) { | ||||
|     switch (index_format) { | ||||
|     case Maxwell::IndexFormat::UnsignedByte: | ||||
|         if (!device.IsExtIndexTypeUint8Supported()) { | ||||
|             UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); | ||||
|             return VK_INDEX_TYPE_UINT16; | ||||
|         } | ||||
|         return VK_INDEX_TYPE_UINT8_EXT; | ||||
|     case Maxwell::IndexFormat::UnsignedShort: | ||||
|         return VK_INDEX_TYPE_UINT16; | ||||
|  |  | |||
|  | @ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib | |||
| 
 | ||||
| VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); | ||||
| 
 | ||||
| VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); | ||||
| VkIndexType IndexFormat(Maxwell::IndexFormat index_format); | ||||
| 
 | ||||
| VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); | ||||
| 
 | ||||
|  |  | |||
|  | @ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext | |||
|     return separated_extensions; | ||||
| } | ||||
| 
 | ||||
| Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, | ||||
|                     VkSurfaceKHR surface) { | ||||
|     const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); | ||||
|     const s32 device_index = Settings::values.vulkan_device.GetValue(); | ||||
|     if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { | ||||
|         LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); | ||||
|         throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||||
|     } | ||||
|     const vk::PhysicalDevice physical_device(devices[device_index], dld); | ||||
|     return Device(*instance, physical_device, surface, dld); | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | ||||
|                                Core::Frontend::EmuWindow& emu_window, | ||||
|                                Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | ||||
|                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) | ||||
|     : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_}, | ||||
|       cpu_memory{cpu_memory_}, gpu{gpu_} {} | ||||
|                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) try | ||||
|     : RendererBase(emu_window, std::move(context_)), | ||||
|       telemetry_session(telemetry_session_), | ||||
|       cpu_memory(cpu_memory_), | ||||
|       gpu(gpu_), | ||||
|       library(OpenLibrary()), | ||||
|       instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | ||||
|                               true, Settings::values.renderer_debug)), | ||||
|       debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), | ||||
|       surface(CreateSurface(instance, render_window)), | ||||
|       device(CreateDevice(instance, dld, *surface)), | ||||
|       memory_allocator(device, false), | ||||
|       state_tracker(gpu), | ||||
|       scheduler(device, state_tracker), | ||||
|       swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, | ||||
|                 render_window.GetFramebufferLayout().height, false), | ||||
|       blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, | ||||
|                   screen_info), | ||||
|       rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device, | ||||
|                  memory_allocator, state_tracker, scheduler) { | ||||
|     Report(); | ||||
| } catch (const vk::Exception& exception) { | ||||
|     LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); | ||||
|     throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())}; | ||||
| } | ||||
| 
 | ||||
| RendererVulkan::~RendererVulkan() { | ||||
|     ShutDown(); | ||||
|     void(device.GetLogical().WaitIdle()); | ||||
| } | ||||
| 
 | ||||
| void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||||
|  | @ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
|     if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { | ||||
|         const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | ||||
|         const bool use_accelerated = | ||||
|             rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | ||||
|             rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | ||||
|         const bool is_srgb = use_accelerated && screen_info.is_srgb; | ||||
|         if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) { | ||||
|             swapchain->Create(layout.width, layout.height, is_srgb); | ||||
|             blit_screen->Recreate(); | ||||
|         if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { | ||||
|             swapchain.Create(layout.width, layout.height, is_srgb); | ||||
|             blit_screen.Recreate(); | ||||
|         } | ||||
| 
 | ||||
|         scheduler->WaitWorker(); | ||||
|         scheduler.WaitWorker(); | ||||
| 
 | ||||
|         swapchain->AcquireNextImage(); | ||||
|         const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated); | ||||
|         swapchain.AcquireNextImage(); | ||||
|         const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); | ||||
| 
 | ||||
|         scheduler->Flush(render_semaphore); | ||||
|         scheduler.Flush(render_semaphore); | ||||
| 
 | ||||
|         if (swapchain->Present(render_semaphore)) { | ||||
|             blit_screen->Recreate(); | ||||
|         if (swapchain.Present(render_semaphore)) { | ||||
|             blit_screen.Recreate(); | ||||
|         } | ||||
| 
 | ||||
|         rasterizer->TickFrame(); | ||||
|         rasterizer.TickFrame(); | ||||
|     } | ||||
| 
 | ||||
|     render_window.OnFrameDisplayed(); | ||||
| } | ||||
| 
 | ||||
| bool RendererVulkan::Init() try { | ||||
|     library = OpenLibrary(); | ||||
|     instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | ||||
|                               true, Settings::values.renderer_debug); | ||||
|     if (Settings::values.renderer_debug) { | ||||
|         debug_callback = CreateDebugCallback(instance); | ||||
|     } | ||||
|     surface = CreateSurface(instance, render_window); | ||||
| 
 | ||||
|     InitializeDevice(); | ||||
|     Report(); | ||||
| 
 | ||||
|     memory_allocator = std::make_unique<MemoryAllocator>(*device); | ||||
| 
 | ||||
|     state_tracker = std::make_unique<StateTracker>(gpu); | ||||
| 
 | ||||
|     scheduler = std::make_unique<VKScheduler>(*device, *state_tracker); | ||||
| 
 | ||||
|     const auto& framebuffer = render_window.GetFramebufferLayout(); | ||||
|     swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler); | ||||
|     swapchain->Create(framebuffer.width, framebuffer.height, false); | ||||
| 
 | ||||
|     rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(), | ||||
|                                                     cpu_memory, screen_info, *device, | ||||
|                                                     *memory_allocator, *state_tracker, *scheduler); | ||||
| 
 | ||||
|     blit_screen = | ||||
|         std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, | ||||
|                                        *memory_allocator, *swapchain, *scheduler, screen_info); | ||||
|     return true; | ||||
| 
 | ||||
| } catch (const vk::Exception& exception) { | ||||
|     LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); | ||||
|     return false; | ||||
| } | ||||
| 
 | ||||
| void RendererVulkan::ShutDown() { | ||||
|     if (!device) { | ||||
|         return; | ||||
|     } | ||||
|     if (const auto& dev = device->GetLogical()) { | ||||
|         dev.WaitIdle(); | ||||
|     } | ||||
|     rasterizer.reset(); | ||||
|     blit_screen.reset(); | ||||
|     scheduler.reset(); | ||||
|     swapchain.reset(); | ||||
|     memory_allocator.reset(); | ||||
|     device.reset(); | ||||
| } | ||||
| 
 | ||||
| void RendererVulkan::InitializeDevice() { | ||||
|     const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); | ||||
|     const s32 device_index = Settings::values.vulkan_device.GetValue(); | ||||
|     if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { | ||||
|         LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); | ||||
|         throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||||
|     } | ||||
|     const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld); | ||||
|     device = std::make_unique<Device>(*instance, physical_device, *surface, dld); | ||||
| } | ||||
| 
 | ||||
| void RendererVulkan::Report() const { | ||||
|     const std::string vendor_name{device->GetVendorName()}; | ||||
|     const std::string model_name{device->GetModelName()}; | ||||
|     const std::string driver_version = GetDriverVersion(*device); | ||||
|     const std::string vendor_name{device.GetVendorName()}; | ||||
|     const std::string model_name{device.GetModelName()}; | ||||
|     const std::string driver_version = GetDriverVersion(device); | ||||
|     const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); | ||||
| 
 | ||||
|     const std::string api_version = GetReadableVersion(device->ApiVersion()); | ||||
|     const std::string api_version = GetReadableVersion(device.ApiVersion()); | ||||
| 
 | ||||
|     const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions()); | ||||
|     const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions()); | ||||
| 
 | ||||
|     LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); | ||||
|     LOG_INFO(Render_Vulkan, "Device: {}", model_name); | ||||
|  | @ -209,21 +179,4 @@ void RendererVulkan::Report() const { | |||
|     telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); | ||||
| } | ||||
| 
 | ||||
| std::vector<std::string> RendererVulkan::EnumerateDevices() try { | ||||
|     vk::InstanceDispatch dld; | ||||
|     const Common::DynamicLibrary library = OpenLibrary(); | ||||
|     const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0); | ||||
|     const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); | ||||
|     std::vector<std::string> names; | ||||
|     names.reserve(physical_devices.size()); | ||||
|     for (const VkPhysicalDevice device : physical_devices) { | ||||
|         names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); | ||||
|     } | ||||
|     return names; | ||||
| 
 | ||||
| } catch (const vk::Exception& exception) { | ||||
|     LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what()); | ||||
|     return {}; | ||||
| } | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -9,8 +9,14 @@ | |||
| #include <vector> | ||||
| 
 | ||||
| #include "common/dynamic_library.h" | ||||
| 
 | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/renderer_vulkan/vk_blit_screen.h" | ||||
| #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||
| #include "video_core/renderer_vulkan/vk_state_tracker.h" | ||||
| #include "video_core/renderer_vulkan/vk_swapchain.h" | ||||
| #include "video_core/vulkan_common/vulkan_device.h" | ||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| 
 | ||||
| namespace Core { | ||||
|  | @ -27,20 +33,6 @@ class GPU; | |||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| class Device; | ||||
| class StateTracker; | ||||
| class MemoryAllocator; | ||||
| class VKBlitScreen; | ||||
| class VKSwapchain; | ||||
| class VKScheduler; | ||||
| 
 | ||||
| struct VKScreenInfo { | ||||
|     VkImageView image_view{}; | ||||
|     u32 width{}; | ||||
|     u32 height{}; | ||||
|     bool is_srgb{}; | ||||
| }; | ||||
| 
 | ||||
| class RendererVulkan final : public VideoCore::RendererBase { | ||||
| public: | ||||
|     explicit RendererVulkan(Core::TelemetrySession& telemtry_session, | ||||
|  | @ -49,15 +41,13 @@ public: | |||
|                             std::unique_ptr<Core::Frontend::GraphicsContext> context_); | ||||
|     ~RendererVulkan() override; | ||||
| 
 | ||||
|     bool Init() override; | ||||
|     void ShutDown() override; | ||||
|     void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||||
| 
 | ||||
|     static std::vector<std::string> EnumerateDevices(); | ||||
|     VideoCore::RasterizerInterface* ReadRasterizer() override { | ||||
|         return &rasterizer; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     void InitializeDevice(); | ||||
| 
 | ||||
|     void Report() const; | ||||
| 
 | ||||
|     Core::TelemetrySession& telemetry_session; | ||||
|  | @ -68,18 +58,18 @@ private: | |||
|     vk::InstanceDispatch dld; | ||||
| 
 | ||||
|     vk::Instance instance; | ||||
| 
 | ||||
|     vk::DebugUtilsMessenger debug_callback; | ||||
|     vk::SurfaceKHR surface; | ||||
| 
 | ||||
|     VKScreenInfo screen_info; | ||||
| 
 | ||||
|     vk::DebugUtilsMessenger debug_callback; | ||||
|     std::unique_ptr<Device> device; | ||||
|     std::unique_ptr<MemoryAllocator> memory_allocator; | ||||
|     std::unique_ptr<StateTracker> state_tracker; | ||||
|     std::unique_ptr<VKScheduler> scheduler; | ||||
|     std::unique_ptr<VKSwapchain> swapchain; | ||||
|     std::unique_ptr<VKBlitScreen> blit_screen; | ||||
|     Device device; | ||||
|     MemoryAllocator memory_allocator; | ||||
|     StateTracker state_tracker; | ||||
|     VKScheduler scheduler; | ||||
|     VKSwapchain swapchain; | ||||
|     VKBlitScreen blit_screen; | ||||
|     RasterizerVulkan rasterizer; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -18,7 +18,6 @@ | |||
| #include "video_core/gpu.h" | ||||
| #include "video_core/host_shaders/vulkan_present_frag_spv.h" | ||||
| #include "video_core/host_shaders/vulkan_present_vert_spv.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_vulkan/renderer_vulkan.h" | ||||
| #include "video_core/renderer_vulkan/vk_blit_screen.h" | ||||
| #include "video_core/renderer_vulkan/vk_master_semaphore.h" | ||||
|  | @ -113,13 +112,12 @@ struct VKBlitScreen::BufferData { | |||
| }; | ||||
| 
 | ||||
| VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, | ||||
|                            Core::Frontend::EmuWindow& render_window_, | ||||
|                            VideoCore::RasterizerInterface& rasterizer_, const Device& device_, | ||||
|                            Core::Frontend::EmuWindow& render_window_, const Device& device_, | ||||
|                            MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_, | ||||
|                            VKScheduler& scheduler_, const VKScreenInfo& screen_info_) | ||||
|     : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, | ||||
|       device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_}, | ||||
|       scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { | ||||
|     : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, | ||||
|       memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_}, | ||||
|       image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { | ||||
|     resource_ticks.resize(image_count); | ||||
| 
 | ||||
|     CreateStaticResources(); | ||||
|  | @ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
|     SetUniformData(data, framebuffer); | ||||
|     SetVertexData(data, framebuffer); | ||||
| 
 | ||||
|     const std::span<u8> map = buffer_commit.Map(); | ||||
|     std::memcpy(map.data(), &data, sizeof(data)); | ||||
|     const std::span<u8> mapped_span = buffer_commit.Map(); | ||||
|     std::memcpy(mapped_span.data(), &data, sizeof(data)); | ||||
| 
 | ||||
|     if (!use_accelerated) { | ||||
|         const u64 image_offset = GetRawImageOffset(framebuffer, image_index); | ||||
|  | @ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
|         const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | ||||
|         const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); | ||||
|         const size_t size_bytes = GetSizeInBytes(framebuffer); | ||||
|         rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes); | ||||
| 
 | ||||
|         // TODO(Rodrigo): Read this from HLE
 | ||||
|         constexpr u32 block_height_log2 = 4; | ||||
|         const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); | ||||
|         Tegra::Texture::UnswizzleTexture( | ||||
|             map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, | ||||
|             framebuffer.width, framebuffer.height, 1, block_height_log2, 0); | ||||
|             mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), | ||||
|             bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); | ||||
| 
 | ||||
|         const VkBufferImageCopy copy{ | ||||
|             .bufferOffset = image_offset, | ||||
|  | @ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
|         cmdbuf.Draw(4, 1, 0, 0); | ||||
|         cmdbuf.EndRenderPass(); | ||||
|     }); | ||||
| 
 | ||||
|     return *semaphores[image_index]; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -38,12 +38,18 @@ class RasterizerVulkan; | |||
| class VKScheduler; | ||||
| class VKSwapchain; | ||||
| 
 | ||||
| class VKBlitScreen final { | ||||
| struct VKScreenInfo { | ||||
|     VkImageView image_view{}; | ||||
|     u32 width{}; | ||||
|     u32 height{}; | ||||
|     bool is_srgb{}; | ||||
| }; | ||||
| 
 | ||||
| class VKBlitScreen { | ||||
| public: | ||||
|     explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, | ||||
|                           Core::Frontend::EmuWindow& render_window, | ||||
|                           VideoCore::RasterizerInterface& rasterizer, const Device& device, | ||||
|                           MemoryAllocator& memory_allocator, VKSwapchain& swapchain, | ||||
|                           Core::Frontend::EmuWindow& render_window, const Device& device, | ||||
|                           MemoryAllocator& memory_manager, VKSwapchain& swapchain, | ||||
|                           VKScheduler& scheduler, const VKScreenInfo& screen_info); | ||||
|     ~VKBlitScreen(); | ||||
| 
 | ||||
|  | @ -84,7 +90,6 @@ private: | |||
| 
 | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Core::Frontend::EmuWindow& render_window; | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     const Device& device; | ||||
|     MemoryAllocator& memory_allocator; | ||||
|     VKSwapchain& swapchain; | ||||
|  |  | |||
|  | @ -3,188 +3,308 @@ | |||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <array> | ||||
| #include <cstring> | ||||
| #include <memory> | ||||
| #include <span> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "core/core.h" | ||||
| #include "video_core/buffer_cache/buffer_cache.h" | ||||
| #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||
| #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||
| #include "video_core/vulkan_common/vulkan_device.h" | ||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| namespace { | ||||
| VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) { | ||||
|     return VkBufferCopy{ | ||||
|         .srcOffset = copy.src_offset, | ||||
|         .dstOffset = copy.dst_offset, | ||||
|         .size = copy.size, | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| constexpr VkBufferUsageFlags BUFFER_USAGE = | ||||
|     VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||||
|     VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; | ||||
| VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) { | ||||
|     if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) { | ||||
|         return VK_INDEX_TYPE_UINT8_EXT; | ||||
|     } | ||||
|     if (num_elements <= 0xffff) { | ||||
|         return VK_INDEX_TYPE_UINT16; | ||||
|     } | ||||
|     return VK_INDEX_TYPE_UINT32; | ||||
| } | ||||
| 
 | ||||
| constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = | ||||
|     VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | | ||||
|     VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | ||||
|     VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | ||||
| 
 | ||||
| constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = | ||||
|     VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | | ||||
|     VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; | ||||
| 
 | ||||
| constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = | ||||
|     VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; | ||||
| size_t BytesPerIndex(VkIndexType index_type) { | ||||
|     switch (index_type) { | ||||
|     case VK_INDEX_TYPE_UINT8_EXT: | ||||
|         return 1; | ||||
|     case VK_INDEX_TYPE_UINT16: | ||||
|         return 2; | ||||
|     case VK_INDEX_TYPE_UINT32: | ||||
|         return 4; | ||||
|     default: | ||||
|         UNREACHABLE_MSG("Invalid index type={}", index_type); | ||||
|         return 1; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| template <typename T> | ||||
| std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { | ||||
|     std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; | ||||
|     std::ranges::transform(indices, indices.begin(), | ||||
|                            [quad, first](u32 index) { return first + index + quad * 4; }); | ||||
|     return indices; | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_, | ||||
|                StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) | ||||
|     : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ | ||||
|                                                                                  staging_pool_} { | ||||
|     buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||||
| Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | ||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | ||||
| 
 | ||||
| Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||||
|                VAddr cpu_addr_, u64 size_bytes_) | ||||
|     : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | ||||
|     buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .flags = 0, | ||||
|         .size = static_cast<VkDeviceSize>(size_), | ||||
|         .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||||
|         .size = SizeBytes(), | ||||
|         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||||
|                  VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||||
|                  VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||||
|                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||||
|                  VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, | ||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||
|         .queueFamilyIndexCount = 0, | ||||
|         .pQueueFamilyIndices = nullptr, | ||||
|     }); | ||||
|     commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||||
|     if (runtime.device.HasDebuggingToolAttached()) { | ||||
|         buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | ||||
|     } | ||||
|     commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||||
| } | ||||
| 
 | ||||
| Buffer::~Buffer() = default; | ||||
| BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                                        VKScheduler& scheduler_, StagingBufferPool& staging_pool_, | ||||
|                                        VKUpdateDescriptorQueue& update_descriptor_queue_, | ||||
|                                        VKDescriptorPool& descriptor_pool) | ||||
|     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||||
|       staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, | ||||
|       uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||||
|       quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {} | ||||
| 
 | ||||
| void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { | ||||
|     const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload); | ||||
|     std::memcpy(staging.mapped_span.data(), data, data_size); | ||||
| StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { | ||||
|     return staging_pool.Request(size, MemoryUsage::Upload); | ||||
| } | ||||
| 
 | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
| StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { | ||||
|     return staging_pool.Request(size, MemoryUsage::Download); | ||||
| } | ||||
| 
 | ||||
|     const VkBuffer handle = Handle(); | ||||
|     scheduler.Record([staging = staging.buffer, handle, offset, data_size, | ||||
|                       &device = device](vk::CommandBuffer cmdbuf) { | ||||
|         const VkBufferMemoryBarrier read_barrier{ | ||||
|             .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||||
| void BufferCacheRuntime::Finish() { | ||||
|     scheduler.Finish(); | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | ||||
|                                     std::span<const VideoCommon::BufferCopy> copies) { | ||||
|     static constexpr VkMemoryBarrier READ_BARRIER{ | ||||
|         .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|         .pNext = nullptr, | ||||
|             .srcAccessMask = | ||||
|                 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | | ||||
|                 VK_ACCESS_HOST_WRITE_BIT | | ||||
|                 (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), | ||||
|             .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||||
|             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|             .buffer = handle, | ||||
|             .offset = offset, | ||||
|             .size = data_size, | ||||
|         .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||||
|         .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|     }; | ||||
|     static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||||
|         .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|         .pNext = nullptr, | ||||
|         .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|         .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||||
|     }; | ||||
|     // Measuring a popular game, this number never exceeds the specified size once data is warmed up
 | ||||
|     boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); | ||||
|     std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||||
|                                0, READ_BARRIER); | ||||
|         cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||||
|                                0, WRITE_BARRIER); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, | ||||
|                                          u32 base_vertex, u32 num_indices, VkBuffer buffer, | ||||
|                                          u32 offset, [[maybe_unused]] u32 size) { | ||||
|     VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); | ||||
|     VkDeviceSize vk_offset = offset; | ||||
|     VkBuffer vk_buffer = buffer; | ||||
|     if (topology == PrimitiveTopology::Quads) { | ||||
|         vk_index_type = VK_INDEX_TYPE_UINT32; | ||||
|         std::tie(vk_buffer, vk_offset) = | ||||
|             quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); | ||||
|     } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { | ||||
|         vk_index_type = VK_INDEX_TYPE_UINT16; | ||||
|         std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); | ||||
|     } | ||||
|     if (vk_buffer == VK_NULL_HANDLE) { | ||||
|         // Vulkan doesn't support null index buffers. Replace it with our own null buffer.
 | ||||
|         ReserveNullIndexBuffer(); | ||||
|         vk_buffer = *null_index_buffer; | ||||
|     } | ||||
|     scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { | ||||
|     ReserveQuadArrayLUT(first + count, true); | ||||
| 
 | ||||
|     // The LUT has the indices 0, 1, 2, and 3 copied as an array
 | ||||
|     // To apply these 'first' offsets we can apply an offset based on the modulus.
 | ||||
|     const VkIndexType index_type = quad_array_lut_index_type; | ||||
|     const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4); | ||||
|     const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type); | ||||
|     scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.BindIndexBuffer(buffer, offset, index_type); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, | ||||
|                                           u32 stride) { | ||||
|     if (device.IsExtExtendedDynamicStateSupported()) { | ||||
|         scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) { | ||||
|             const VkDeviceSize vk_offset = offset; | ||||
|             const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE; | ||||
|             const VkDeviceSize vk_stride = stride; | ||||
|             cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); | ||||
|         }); | ||||
|     } else { | ||||
|         scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { | ||||
|             cmdbuf.BindVertexBuffer(index, buffer, offset); | ||||
|         }); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, | ||||
|                                                      u32 size) { | ||||
|     if (!device.IsExtTransformFeedbackSupported()) { | ||||
|         // Already logged in the rasterizer
 | ||||
|         return; | ||||
|     } | ||||
|     scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) { | ||||
|         const VkDeviceSize vk_offset = offset; | ||||
|         const VkDeviceSize vk_size = size; | ||||
|         cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) { | ||||
|     update_descriptor_queue.AddBuffer(buffer, offset, size); | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { | ||||
|     if (num_indices <= current_num_indices) { | ||||
|         return; | ||||
|     } | ||||
|     if (wait_for_idle) { | ||||
|         scheduler.Finish(); | ||||
|     } | ||||
|     current_num_indices = num_indices; | ||||
|     quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices); | ||||
| 
 | ||||
|     const u32 num_quads = num_indices / 4; | ||||
|     const u32 num_triangle_indices = num_quads * 6; | ||||
|     const u32 num_first_offset_copies = 4; | ||||
|     const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type); | ||||
|     const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; | ||||
|     quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .flags = 0, | ||||
|         .size = size_bytes, | ||||
|         .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||
|         .queueFamilyIndexCount = 0, | ||||
|         .pQueueFamilyIndices = nullptr, | ||||
|     }); | ||||
|     if (device.HasDebuggingToolAttached()) { | ||||
|         quad_array_lut.SetObjectNameEXT("Quad LUT"); | ||||
|     } | ||||
|     quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal); | ||||
| 
 | ||||
|     const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); | ||||
|     u8* staging_data = staging.mapped_span.data(); | ||||
|     const size_t quad_size = bytes_per_index * 6; | ||||
|     for (u32 first = 0; first < num_first_offset_copies; ++first) { | ||||
|         for (u32 quad = 0; quad < num_quads; ++quad) { | ||||
|             switch (quad_array_lut_index_type) { | ||||
|             case VK_INDEX_TYPE_UINT8_EXT: | ||||
|                 std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size); | ||||
|                 break; | ||||
|             case VK_INDEX_TYPE_UINT16: | ||||
|                 std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size); | ||||
|                 break; | ||||
|             case VK_INDEX_TYPE_UINT32: | ||||
|                 std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size); | ||||
|                 break; | ||||
|             default: | ||||
|                 UNREACHABLE(); | ||||
|                 break; | ||||
|             } | ||||
|             staging_data += quad_size; | ||||
|         } | ||||
|     } | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, | ||||
|                       dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { | ||||
|         const VkBufferCopy copy{ | ||||
|             .srcOffset = src_offset, | ||||
|             .dstOffset = 0, | ||||
|             .size = size_bytes, | ||||
|         }; | ||||
|         const VkBufferMemoryBarrier write_barrier{ | ||||
|             .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|             .dstAccessMask = UPLOAD_ACCESS_BARRIERS, | ||||
|             .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||||
|             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|             .buffer = handle, | ||||
|             .offset = offset, | ||||
|             .size = data_size, | ||||
|             .buffer = dst_buffer, | ||||
|             .offset = 0, | ||||
|             .size = size_bytes, | ||||
|         }; | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||||
|                                0, read_barrier); | ||||
|         cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, | ||||
|                                write_barrier); | ||||
|         cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, | ||||
|                                0, write_barrier); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { | ||||
|     auto staging = staging_pool.Request(data_size, MemoryUsage::Download); | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
| 
 | ||||
|     const VkBuffer handle = Handle(); | ||||
|     scheduler.Record( | ||||
|         [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) { | ||||
|             const VkBufferMemoryBarrier barrier{ | ||||
|                 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||||
| void BufferCacheRuntime::ReserveNullIndexBuffer() { | ||||
|     if (null_index_buffer) { | ||||
|         return; | ||||
|     } | ||||
|     null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|                 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||
|                 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||||
|                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .buffer = handle, | ||||
|                 .offset = offset, | ||||
|                 .size = data_size, | ||||
|             }; | ||||
| 
 | ||||
|             cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | | ||||
|                                        VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | ||||
|                                        VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||||
|                                    VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); | ||||
|             cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size}); | ||||
|         .flags = 0, | ||||
|         .size = 4, | ||||
|         .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||
|         .queueFamilyIndexCount = 0, | ||||
|         .pQueueFamilyIndices = nullptr, | ||||
|     }); | ||||
|     scheduler.Finish(); | ||||
| 
 | ||||
|     std::memcpy(data, staging.mapped_span.data(), data_size); | ||||
|     if (device.HasDebuggingToolAttached()) { | ||||
|         null_index_buffer.SetObjectNameEXT("Null index buffer"); | ||||
|     } | ||||
|     null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal); | ||||
| 
 | ||||
| void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | ||||
|                       std::size_t copy_size) { | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
| 
 | ||||
|     const VkBuffer dst_buffer = Handle(); | ||||
|     scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, | ||||
|                       copy_size](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); | ||||
| 
 | ||||
|         std::array<VkBufferMemoryBarrier, 2> barriers; | ||||
|         barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||||
|         barriers[0].pNext = nullptr; | ||||
|         barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | ||||
|         barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||||
|         barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barriers[0].buffer = src_buffer; | ||||
|         barriers[0].offset = src_offset; | ||||
|         barriers[0].size = copy_size; | ||||
|         barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||||
|         barriers[1].pNext = nullptr; | ||||
|         barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | ||||
|         barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS; | ||||
|         barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barriers[1].buffer = dst_buffer; | ||||
|         barriers[1].offset = dst_offset; | ||||
|         barriers[1].size = copy_size; | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, | ||||
|                                barriers, {}); | ||||
|     scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
| VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||||
|                              Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||||
|                              const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                              VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, | ||||
|                              StagingBufferPool& staging_pool_) | ||||
|     : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, | ||||
|                                                                  cpu_memory_, stream_buffer_}, | ||||
|       device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||||
|       staging_pool{staging_pool_} {} | ||||
| 
 | ||||
| VKBufferCache::~VKBufferCache() = default; | ||||
| 
 | ||||
| std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | ||||
|     return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr, | ||||
|                                     size); | ||||
| } | ||||
| 
 | ||||
| VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { | ||||
|     size = std::max(size, std::size_t(4)); | ||||
|     const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal); | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.FillBuffer(buffer, 0, size, 0); | ||||
|     }); | ||||
|     return {empty.buffer, 0, 0}; | ||||
| } | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -4,69 +4,124 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <memory> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/buffer_cache/buffer_cache.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||
| #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| class Device; | ||||
| class VKDescriptorPool; | ||||
| class VKScheduler; | ||||
| class VKUpdateDescriptorQueue; | ||||
| 
 | ||||
| class Buffer final : public VideoCommon::BufferBlock { | ||||
| class BufferCacheRuntime; | ||||
| 
 | ||||
| class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { | ||||
| public: | ||||
|     explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler, | ||||
|                     StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); | ||||
|     ~Buffer(); | ||||
|     explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); | ||||
|     explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||||
|                     VAddr cpu_addr_, u64 size_bytes_); | ||||
| 
 | ||||
|     void Upload(std::size_t offset, std::size_t data_size, const u8* data); | ||||
| 
 | ||||
|     void Download(std::size_t offset, std::size_t data_size, u8* data); | ||||
| 
 | ||||
|     void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | ||||
|                   std::size_t copy_size); | ||||
| 
 | ||||
|     VkBuffer Handle() const { | ||||
|     [[nodiscard]] VkBuffer Handle() const noexcept { | ||||
|         return *buffer; | ||||
|     } | ||||
| 
 | ||||
|     u64 Address() const { | ||||
|         return 0; | ||||
|     operator VkBuffer() const noexcept { | ||||
|         return *buffer; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     const Device& device; | ||||
|     VKScheduler& scheduler; | ||||
|     StagingBufferPool& staging_pool; | ||||
| 
 | ||||
|     vk::Buffer buffer; | ||||
|     MemoryCommit commit; | ||||
| }; | ||||
| 
 | ||||
| class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { | ||||
| class BufferCacheRuntime { | ||||
|     friend Buffer; | ||||
| 
 | ||||
|     using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology; | ||||
|     using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat; | ||||
| 
 | ||||
| public: | ||||
|     explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, | ||||
|                            Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, | ||||
|                            const Device& device, MemoryAllocator& memory_allocator, | ||||
|                            VKScheduler& scheduler, VKStreamBuffer& stream_buffer, | ||||
|                            StagingBufferPool& staging_pool); | ||||
|     ~VKBufferCache(); | ||||
|     explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, | ||||
|                                 VKScheduler& scheduler_, StagingBufferPool& staging_pool_, | ||||
|                                 VKUpdateDescriptorQueue& update_descriptor_queue_, | ||||
|                                 VKDescriptorPool& descriptor_pool); | ||||
| 
 | ||||
|     BufferInfo GetEmptyBuffer(std::size_t size) override; | ||||
|     void Finish(); | ||||
| 
 | ||||
| protected: | ||||
|     std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | ||||
|     [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | ||||
| 
 | ||||
|     [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | ||||
| 
 | ||||
|     void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, | ||||
|                     std::span<const VideoCommon::BufferCopy> copies); | ||||
| 
 | ||||
|     void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, | ||||
|                          u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); | ||||
| 
 | ||||
|     void BindQuadArrayIndexBuffer(u32 first, u32 count); | ||||
| 
 | ||||
|     void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); | ||||
| 
 | ||||
|     void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); | ||||
| 
 | ||||
|     std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage, | ||||
|                                           [[maybe_unused]] u32 binding_index, u32 size) { | ||||
|         const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload); | ||||
|         BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size); | ||||
|         return ref.mapped_span; | ||||
|     } | ||||
| 
 | ||||
|     void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) { | ||||
|         BindBuffer(buffer, offset, size); | ||||
|     } | ||||
| 
 | ||||
|     void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size, | ||||
|                            [[maybe_unused]] bool is_written) { | ||||
|         BindBuffer(buffer, offset, size); | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     void BindBuffer(VkBuffer buffer, u32 offset, u32 size); | ||||
| 
 | ||||
|     void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); | ||||
| 
 | ||||
|     void ReserveNullIndexBuffer(); | ||||
| 
 | ||||
|     const Device& device; | ||||
|     MemoryAllocator& memory_allocator; | ||||
|     VKScheduler& scheduler; | ||||
|     StagingBufferPool& staging_pool; | ||||
|     VKUpdateDescriptorQueue& update_descriptor_queue; | ||||
| 
 | ||||
|     vk::Buffer quad_array_lut; | ||||
|     MemoryCommit quad_array_lut_commit; | ||||
|     VkIndexType quad_array_lut_index_type{}; | ||||
|     u32 current_num_indices = 0; | ||||
| 
 | ||||
|     vk::Buffer null_index_buffer; | ||||
|     MemoryCommit null_index_buffer_commit; | ||||
| 
 | ||||
|     Uint8Pass uint8_pass; | ||||
|     QuadIndexedPass quad_index_pass; | ||||
| }; | ||||
| 
 | ||||
| struct BufferCacheParams { | ||||
|     using Runtime = Vulkan::BufferCacheRuntime; | ||||
|     using Buffer = Vulkan::Buffer; | ||||
| 
 | ||||
|     static constexpr bool IS_OPENGL = false; | ||||
|     static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | ||||
|     static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; | ||||
|     static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; | ||||
|     static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | ||||
|     static constexpr bool USE_MEMORY_MAPS = true; | ||||
| }; | ||||
| 
 | ||||
| using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -10,7 +10,7 @@ | |||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" | ||||
| #include "common/div_ceil.h" | ||||
| #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | ||||
| #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | ||||
| #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||
|  | @ -22,30 +22,7 @@ | |||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { | ||||
|     return { | ||||
|         .binding = 0, | ||||
|         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||
|         .descriptorCount = 1, | ||||
|         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||
|         .pImmutableSamplers = nullptr, | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { | ||||
|     return { | ||||
|         .dstBinding = 0, | ||||
|         .dstArrayElement = 0, | ||||
|         .descriptorCount = 1, | ||||
|         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||||
|         .offset = 0, | ||||
|         .stride = sizeof(DescriptorUpdateEntry), | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | ||||
|     return { | ||||
|         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||||
|  | @ -162,55 +139,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( | |||
|     return set; | ||||
| } | ||||
| 
 | ||||
| QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_, | ||||
|                              VKDescriptorPool& descriptor_pool_, | ||||
|                              StagingBufferPool& staging_buffer_pool_, | ||||
|                              VKUpdateDescriptorQueue& update_descriptor_queue_) | ||||
|     : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), | ||||
|                     BuildQuadArrayPassDescriptorUpdateTemplateEntry(), | ||||
|                     BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV), | ||||
|       scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | ||||
|       update_descriptor_queue{update_descriptor_queue_} {} | ||||
| 
 | ||||
| QuadArrayPass::~QuadArrayPass() = default; | ||||
| 
 | ||||
| std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { | ||||
|     const u32 num_triangle_vertices = (num_vertices / 4) * 6; | ||||
|     const std::size_t staging_size = num_triangle_vertices * sizeof(u32); | ||||
|     const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | ||||
| 
 | ||||
|     update_descriptor_queue.Acquire(); | ||||
|     update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | ||||
|     const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||||
| 
 | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
| 
 | ||||
|     ASSERT(num_vertices % 4 == 0); | ||||
|     const u32 num_quads = num_vertices / 4; | ||||
|     scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, | ||||
|                       num_quads, first, set](vk::CommandBuffer cmdbuf) { | ||||
|         constexpr u32 dispatch_size = 1024; | ||||
|         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||||
|         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||||
|         cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first); | ||||
|         cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1); | ||||
| 
 | ||||
|         VkBufferMemoryBarrier barrier; | ||||
|         barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||||
|         barrier.pNext = nullptr; | ||||
|         barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||||
|         barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||||
|         barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barrier.buffer = buffer; | ||||
|         barrier.offset = 0; | ||||
|         barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||||
|                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); | ||||
|     }); | ||||
|     return {staging_ref.buffer, 0}; | ||||
| } | ||||
| 
 | ||||
| Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | ||||
|                      VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, | ||||
|                      VKUpdateDescriptorQueue& update_descriptor_queue_) | ||||
|  | @ -221,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | |||
| 
 | ||||
| Uint8Pass::~Uint8Pass() = default; | ||||
| 
 | ||||
| std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, | ||||
|                                              u64 src_offset) { | ||||
| std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, | ||||
|                                                       u32 src_offset) { | ||||
|     const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); | ||||
|     const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | ||||
|     const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | ||||
| 
 | ||||
|     update_descriptor_queue.Acquire(); | ||||
|     update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); | ||||
|     update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | ||||
|     update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); | ||||
|     const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||||
| 
 | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, | ||||
|     scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, | ||||
|                       num_vertices](vk::CommandBuffer cmdbuf) { | ||||
|         constexpr u32 dispatch_size = 1024; | ||||
|         static constexpr u32 DISPATCH_SIZE = 1024; | ||||
|         static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||||
|             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||
|             .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | ||||
|         }; | ||||
|         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||||
|         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||||
|         cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); | ||||
| 
 | ||||
|         VkBufferMemoryBarrier barrier; | ||||
|         barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||||
|         barrier.pNext = nullptr; | ||||
|         barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||||
|         barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||||
|         barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barrier.buffer = buffer; | ||||
|         barrier.offset = 0; | ||||
|         barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16)); | ||||
|         cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||||
|                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | ||||
|                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); | ||||
|     }); | ||||
|     return {staging_ref.buffer, 0}; | ||||
|     return {staging.buffer, staging.offset}; | ||||
| } | ||||
| 
 | ||||
| QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | ||||
|  | @ -267,9 +190,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | |||
| 
 | ||||
| QuadIndexedPass::~QuadIndexedPass() = default; | ||||
| 
 | ||||
| std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | ||||
| std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | ||||
|     Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | ||||
|     VkBuffer src_buffer, u64 src_offset) { | ||||
|     VkBuffer src_buffer, u32 src_offset) { | ||||
|     const u32 index_shift = [index_format] { | ||||
|         switch (index_format) { | ||||
|         case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: | ||||
|  | @ -286,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | |||
|     const u32 num_tri_vertices = (num_vertices / 4) * 6; | ||||
| 
 | ||||
|     const std::size_t staging_size = num_tri_vertices * sizeof(u32); | ||||
|     const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | ||||
|     const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | ||||
| 
 | ||||
|     update_descriptor_queue.Acquire(); | ||||
|     update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); | ||||
|     update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | ||||
|     update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); | ||||
|     const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||||
| 
 | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, | ||||
|     scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, | ||||
|                       num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { | ||||
|         static constexpr u32 dispatch_size = 1024; | ||||
|         static constexpr u32 DISPATCH_SIZE = 1024; | ||||
|         static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||||
|             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||
|             .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | ||||
|         }; | ||||
|         const std::array push_constants = {base_vertex, index_shift}; | ||||
|         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||||
|         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||||
|         cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | ||||
|                              &push_constants); | ||||
|         cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); | ||||
| 
 | ||||
|         VkBufferMemoryBarrier barrier; | ||||
|         barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||||
|         barrier.pNext = nullptr; | ||||
|         barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||||
|         barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||||
|         barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barrier.buffer = buffer; | ||||
|         barrier.offset = 0; | ||||
|         barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); | ||||
|         cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||||
|                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | ||||
|                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); | ||||
|     }); | ||||
|     return {staging_ref.buffer, 0}; | ||||
|     return {staging.buffer, staging.offset}; | ||||
| } | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -41,22 +41,6 @@ private: | |||
|     vk::ShaderModule module; | ||||
| }; | ||||
| 
 | ||||
| class QuadArrayPass final : public VKComputePass { | ||||
| public: | ||||
|     explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_, | ||||
|                            VKDescriptorPool& descriptor_pool_, | ||||
|                            StagingBufferPool& staging_buffer_pool_, | ||||
|                            VKUpdateDescriptorQueue& update_descriptor_queue_); | ||||
|     ~QuadArrayPass(); | ||||
| 
 | ||||
|     std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first); | ||||
| 
 | ||||
| private: | ||||
|     VKScheduler& scheduler; | ||||
|     StagingBufferPool& staging_buffer_pool; | ||||
|     VKUpdateDescriptorQueue& update_descriptor_queue; | ||||
| }; | ||||
| 
 | ||||
| class Uint8Pass final : public VKComputePass { | ||||
| public: | ||||
|     explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, | ||||
|  | @ -64,7 +48,10 @@ public: | |||
|                        VKUpdateDescriptorQueue& update_descriptor_queue_); | ||||
|     ~Uint8Pass(); | ||||
| 
 | ||||
|     std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); | ||||
|     /// Assemble uint8 indices into an uint16 index buffer
 | ||||
|     /// Returns a pair with the staging buffer, and the offset where the assembled data is
 | ||||
|     std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer, | ||||
|                                                u32 src_offset); | ||||
| 
 | ||||
| private: | ||||
|     VKScheduler& scheduler; | ||||
|  | @ -80,9 +67,9 @@ public: | |||
|                              VKUpdateDescriptorQueue& update_descriptor_queue_); | ||||
|     ~QuadIndexedPass(); | ||||
| 
 | ||||
|     std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, | ||||
|                                       u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, | ||||
|                                       u64 src_offset); | ||||
|     std::pair<VkBuffer, VkDeviceSize> Assemble( | ||||
|         Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, | ||||
|         u32 base_vertex, VkBuffer src_buffer, u32 src_offset); | ||||
| 
 | ||||
| private: | ||||
|     VKScheduler& scheduler; | ||||
|  |  | |||
|  | @ -45,8 +45,8 @@ void InnerFence::Wait() { | |||
| } | ||||
| 
 | ||||
| VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | ||||
|                                Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, | ||||
|                                VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, | ||||
|                                TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||
|                                VKQueryCache& query_cache_, const Device& device_, | ||||
|                                VKScheduler& scheduler_) | ||||
|     : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, | ||||
|       scheduler{scheduler_} {} | ||||
|  |  | |||
|  | @ -22,7 +22,6 @@ class RasterizerInterface; | |||
| namespace Vulkan { | ||||
| 
 | ||||
| class Device; | ||||
| class VKBufferCache; | ||||
| class VKQueryCache; | ||||
| class VKScheduler; | ||||
| 
 | ||||
|  | @ -45,14 +44,14 @@ private: | |||
| using Fence = std::shared_ptr<InnerFence>; | ||||
| 
 | ||||
| using GenericFenceManager = | ||||
|     VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; | ||||
|     VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>; | ||||
| 
 | ||||
| class VKFenceManager final : public GenericFenceManager { | ||||
| public: | ||||
|     explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | ||||
|                             Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, | ||||
|                             VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, | ||||
|                             VKScheduler& scheduler_); | ||||
|     explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, | ||||
|                             TextureCache& texture_cache, BufferCache& buffer_cache, | ||||
|                             VKQueryCache& query_cache, const Device& device, | ||||
|                             VKScheduler& scheduler); | ||||
| 
 | ||||
| protected: | ||||
|     Fence CreateFence(u32 value, bool is_stubbed) override; | ||||
|  |  | |||
|  | @ -8,8 +8,6 @@ | |||
| #include <mutex> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include <boost/container/static_vector.hpp> | ||||
| 
 | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "common/logging/log.h" | ||||
|  | @ -24,7 +22,6 @@ | |||
| #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||||
| #include "video_core/renderer_vulkan/renderer_vulkan.h" | ||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||
| #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||
| #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||||
|  | @ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25 | |||
| MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); | ||||
| MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); | ||||
| MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); | ||||
| MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128)); | ||||
| MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128)); | ||||
| MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128)); | ||||
| MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128)); | ||||
| MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128)); | ||||
| MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128)); | ||||
| MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); | ||||
| 
 | ||||
| namespace { | ||||
| struct DrawParams { | ||||
|     u32 base_instance; | ||||
|     u32 num_instances; | ||||
|     u32 base_vertex; | ||||
|     u32 num_vertices; | ||||
|     bool is_indexed; | ||||
| }; | ||||
| 
 | ||||
| constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); | ||||
| 
 | ||||
|  | @ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in | |||
|     const float width = src.scale_x * 2.0f; | ||||
|     const float height = src.scale_y * 2.0f; | ||||
|     const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; | ||||
| 
 | ||||
|     VkViewport viewport{ | ||||
|         .x = src.translate_x - src.scale_x, | ||||
|         .y = src.translate_y - src.scale_y, | ||||
|  | @ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in | |||
|         .minDepth = src.translate_z - src.scale_z * reduce_z, | ||||
|         .maxDepth = src.translate_z + src.scale_z, | ||||
|     }; | ||||
| 
 | ||||
|     if (!device.IsExtDepthRangeUnrestrictedSupported()) { | ||||
|         viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); | ||||
|         viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); | ||||
|     } | ||||
| 
 | ||||
|     return viewport; | ||||
| } | ||||
| 
 | ||||
|  | @ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const | |||
|     return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||||
| } | ||||
| 
 | ||||
| template <size_t N> | ||||
| std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { | ||||
|     std::array<VkDeviceSize, N> expanded; | ||||
|     std::copy(strides.begin(), strides.end(), expanded.begin()); | ||||
|     return expanded; | ||||
| } | ||||
| 
 | ||||
| ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||||
|     if (entry.is_buffer) { | ||||
|         return ImageViewType::e2D; | ||||
|  | @ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca | |||
|     } | ||||
| } | ||||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| class BufferBindings final { | ||||
| public: | ||||
|     void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { | ||||
|         vertex.buffers[vertex.num_buffers] = buffer; | ||||
|         vertex.offsets[vertex.num_buffers] = offset; | ||||
|         vertex.sizes[vertex.num_buffers] = size; | ||||
|         vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); | ||||
|         ++vertex.num_buffers; | ||||
|     } | ||||
| 
 | ||||
|     void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { | ||||
|         index.buffer = buffer; | ||||
|         index.offset = offset; | ||||
|         index.type = type; | ||||
|     } | ||||
| 
 | ||||
|     void Bind(const Device& device, VKScheduler& scheduler) const { | ||||
|         // Use this large switch case to avoid dispatching more memory in the record lambda than
 | ||||
|         // what we need. It looks horrible, but it's the best we can do on standard C++.
 | ||||
|         switch (vertex.num_buffers) { | ||||
|         case 0: | ||||
|             return BindStatic<0>(device, scheduler); | ||||
|         case 1: | ||||
|             return BindStatic<1>(device, scheduler); | ||||
|         case 2: | ||||
|             return BindStatic<2>(device, scheduler); | ||||
|         case 3: | ||||
|             return BindStatic<3>(device, scheduler); | ||||
|         case 4: | ||||
|             return BindStatic<4>(device, scheduler); | ||||
|         case 5: | ||||
|             return BindStatic<5>(device, scheduler); | ||||
|         case 6: | ||||
|             return BindStatic<6>(device, scheduler); | ||||
|         case 7: | ||||
|             return BindStatic<7>(device, scheduler); | ||||
|         case 8: | ||||
|             return BindStatic<8>(device, scheduler); | ||||
|         case 9: | ||||
|             return BindStatic<9>(device, scheduler); | ||||
|         case 10: | ||||
|             return BindStatic<10>(device, scheduler); | ||||
|         case 11: | ||||
|             return BindStatic<11>(device, scheduler); | ||||
|         case 12: | ||||
|             return BindStatic<12>(device, scheduler); | ||||
|         case 13: | ||||
|             return BindStatic<13>(device, scheduler); | ||||
|         case 14: | ||||
|             return BindStatic<14>(device, scheduler); | ||||
|         case 15: | ||||
|             return BindStatic<15>(device, scheduler); | ||||
|         case 16: | ||||
|             return BindStatic<16>(device, scheduler); | ||||
|         case 17: | ||||
|             return BindStatic<17>(device, scheduler); | ||||
|         case 18: | ||||
|             return BindStatic<18>(device, scheduler); | ||||
|         case 19: | ||||
|             return BindStatic<19>(device, scheduler); | ||||
|         case 20: | ||||
|             return BindStatic<20>(device, scheduler); | ||||
|         case 21: | ||||
|             return BindStatic<21>(device, scheduler); | ||||
|         case 22: | ||||
|             return BindStatic<22>(device, scheduler); | ||||
|         case 23: | ||||
|             return BindStatic<23>(device, scheduler); | ||||
|         case 24: | ||||
|             return BindStatic<24>(device, scheduler); | ||||
|         case 25: | ||||
|             return BindStatic<25>(device, scheduler); | ||||
|         case 26: | ||||
|             return BindStatic<26>(device, scheduler); | ||||
|         case 27: | ||||
|             return BindStatic<27>(device, scheduler); | ||||
|         case 28: | ||||
|             return BindStatic<28>(device, scheduler); | ||||
|         case 29: | ||||
|             return BindStatic<29>(device, scheduler); | ||||
|         case 30: | ||||
|             return BindStatic<30>(device, scheduler); | ||||
|         case 31: | ||||
|             return BindStatic<31>(device, scheduler); | ||||
|         case 32: | ||||
|             return BindStatic<32>(device, scheduler); | ||||
|         } | ||||
|         UNREACHABLE(); | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
 | ||||
|     struct { | ||||
|         size_t num_buffers = 0; | ||||
|         std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; | ||||
|         std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; | ||||
|         std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; | ||||
|         std::array<u16, Maxwell::NumVertexArrays> strides; | ||||
|     } vertex; | ||||
| 
 | ||||
|     struct { | ||||
|         VkBuffer buffer = nullptr; | ||||
|         VkDeviceSize offset; | ||||
|         VkIndexType type; | ||||
|     } index; | ||||
| 
 | ||||
|     template <size_t N> | ||||
|     void BindStatic(const Device& device, VKScheduler& scheduler) const { | ||||
|         if (device.IsExtExtendedDynamicStateSupported()) { | ||||
|             if (index.buffer) { | ||||
|                 BindStatic<N, true, true>(scheduler); | ||||
|             } else { | ||||
|                 BindStatic<N, false, true>(scheduler); | ||||
|             } | ||||
|         } else { | ||||
|             if (index.buffer) { | ||||
|                 BindStatic<N, true, false>(scheduler); | ||||
|             } else { | ||||
|                 BindStatic<N, false, false>(scheduler); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     template <size_t N, bool is_indexed, bool has_extended_dynamic_state> | ||||
|     void BindStatic(VKScheduler& scheduler) const { | ||||
|         static_assert(N <= Maxwell::NumVertexArrays); | ||||
|         if constexpr (N == 0) { | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         std::array<VkBuffer, N> buffers; | ||||
|         std::array<VkDeviceSize, N> offsets; | ||||
|         std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin()); | ||||
|         std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); | ||||
| 
 | ||||
|         if constexpr (has_extended_dynamic_state) { | ||||
|             // With extended dynamic states we can specify the length and stride of a vertex buffer
 | ||||
|             std::array<VkDeviceSize, N> sizes; | ||||
|             std::array<u16, N> strides; | ||||
|             std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin()); | ||||
|             std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin()); | ||||
| 
 | ||||
|             if constexpr (is_indexed) { | ||||
|                 scheduler.Record( | ||||
|                     [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) { | ||||
|                         cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); | ||||
|                         cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), | ||||
|                                                      offsets.data(), sizes.data(), | ||||
|                                                      ExpandStrides(strides).data()); | ||||
|                     }); | ||||
|             } else { | ||||
|                 scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) { | ||||
|                     cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), | ||||
|                                                  offsets.data(), sizes.data(), | ||||
|                                                  ExpandStrides(strides).data()); | ||||
|                 }); | ||||
|             } | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         if constexpr (is_indexed) { | ||||
|             // Indexed draw
 | ||||
|             scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { | ||||
|                 cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); | ||||
|                 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); | ||||
|             }); | ||||
|         } else { | ||||
|             // Array draw
 | ||||
|             scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) { | ||||
|                 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); | ||||
|             }); | ||||
|         } | ||||
|     } | ||||
| DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, | ||||
|                           bool is_indexed) { | ||||
|     DrawParams params{ | ||||
|         .base_instance = regs.vb_base_instance, | ||||
|         .num_instances = is_instanced ? num_instances : 1, | ||||
|         .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, | ||||
|         .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, | ||||
|         .is_indexed = is_indexed, | ||||
|     }; | ||||
| 
 | ||||
| void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { | ||||
|     if (is_indexed) { | ||||
|         cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance); | ||||
|     } else { | ||||
|         cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance); | ||||
|     if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { | ||||
|         // 6 triangle vertices per quad, base vertex is part of the index
 | ||||
|         // See BindQuadArrayIndexBuffer for more details
 | ||||
|         params.num_vertices = (params.num_vertices / 4) * 6; | ||||
|         params.base_vertex = 0; | ||||
|         params.is_indexed = true; | ||||
|     } | ||||
|     return params; | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||
|                                    Tegra::MemoryManager& gpu_memory_, | ||||
|  | @ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
|     : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, | ||||
|       gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, | ||||
|       screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, | ||||
|       state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), | ||||
|       state_tracker{state_tracker_}, scheduler{scheduler_}, | ||||
|       staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | ||||
|       update_descriptor_queue(device, scheduler), | ||||
|       blit_image(device, scheduler, state_tracker, descriptor_pool), | ||||
|       quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||||
|       quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||||
|       uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||||
|       texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, | ||||
|       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | ||||
|       buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | ||||
|                            update_descriptor_queue, descriptor_pool), | ||||
|       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||||
|       pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | ||||
|                      descriptor_pool, update_descriptor_queue), | ||||
|       buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler, | ||||
|                    stream_buffer, staging_pool), | ||||
|       query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, | ||||
|       fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), | ||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | ||||
|       wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | ||||
|     scheduler.SetQueryCache(query_cache); | ||||
|     if (device.UseAsynchronousShaders()) { | ||||
|  | @ -449,22 +270,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
|     GraphicsPipelineCacheKey key; | ||||
|     key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); | ||||
| 
 | ||||
|     buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); | ||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
| 
 | ||||
|     BufferBindings buffer_bindings; | ||||
|     const DrawParameters draw_params = | ||||
|         SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); | ||||
| 
 | ||||
|     auto lock = texture_cache.AcquireLock(); | ||||
|     texture_cache.SynchronizeGraphicsDescriptors(); | ||||
| 
 | ||||
|     texture_cache.UpdateRenderTargets(false); | ||||
| 
 | ||||
|     const auto shaders = pipeline_cache.GetShaders(); | ||||
|     key.shaders = GetShaderAddresses(shaders); | ||||
|     SetupShaderDescriptors(shaders); | ||||
| 
 | ||||
|     buffer_cache.Unmap(); | ||||
|     SetupShaderDescriptors(shaders, is_indexed); | ||||
| 
 | ||||
|     const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | ||||
|     key.renderpass = framebuffer->RenderPass(); | ||||
|  | @ -476,22 +289,29 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     buffer_bindings.Bind(device, scheduler); | ||||
| 
 | ||||
|     BeginTransformFeedback(); | ||||
| 
 | ||||
|     scheduler.RequestRenderpass(framebuffer); | ||||
|     scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | ||||
|     UpdateDynamicStates(); | ||||
| 
 | ||||
|     const auto pipeline_layout = pipeline->GetLayout(); | ||||
|     const auto descriptor_set = pipeline->CommitDescriptorSet(); | ||||
|     const auto& regs = maxwell3d.regs; | ||||
|     const u32 num_instances = maxwell3d.mme_draw.instance_count; | ||||
|     const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); | ||||
|     const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); | ||||
|     const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); | ||||
|     scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | ||||
|         if (descriptor_set) { | ||||
|             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, | ||||
|                                       DESCRIPTOR_SET, descriptor_set, {}); | ||||
|                                       DESCRIPTOR_SET, descriptor_set, nullptr); | ||||
|         } | ||||
|         if (draw_params.is_indexed) { | ||||
|             cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, | ||||
|                                draw_params.base_vertex, draw_params.base_instance); | ||||
|         } else { | ||||
|             cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, | ||||
|                         draw_params.base_vertex, draw_params.base_instance); | ||||
|         } | ||||
|         draw_params.Draw(cmdbuf); | ||||
|     }); | ||||
| 
 | ||||
|     EndTransformFeedback(); | ||||
|  | @ -515,7 +335,7 @@ void RasterizerVulkan::Clear() { | |||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     auto lock = texture_cache.AcquireLock(); | ||||
|     std::scoped_lock lock{texture_cache.mutex}; | ||||
|     texture_cache.UpdateRenderTargets(true); | ||||
|     const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | ||||
|     const VkExtent2D render_area = framebuffer->RenderArea(); | ||||
|  | @ -559,7 +379,6 @@ void RasterizerVulkan::Clear() { | |||
|     if (use_stencil) { | ||||
|         aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; | ||||
|     } | ||||
| 
 | ||||
|     scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, | ||||
|                       clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { | ||||
|         VkClearAttachment attachment; | ||||
|  | @ -580,8 +399,7 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
|     auto& pipeline = pipeline_cache.GetComputePipeline({ | ||||
|         .shader = code_addr, | ||||
|         .shared_memory_size = launch_desc.shared_alloc, | ||||
|         .workgroup_size = | ||||
|             { | ||||
|         .workgroup_size{ | ||||
|             launch_desc.block_dim_x, | ||||
|             launch_desc.block_dim_y, | ||||
|             launch_desc.block_dim_z, | ||||
|  | @ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
|     image_view_indices.clear(); | ||||
|     sampler_handles.clear(); | ||||
| 
 | ||||
|     auto lock = texture_cache.AcquireLock(); | ||||
|     texture_cache.SynchronizeComputeDescriptors(); | ||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
| 
 | ||||
|     const auto& entries = pipeline.GetEntries(); | ||||
|     buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||||
|     buffer_cache.UnbindComputeStorageBuffers(); | ||||
|     u32 ssbo_index = 0; | ||||
|     for (const auto& buffer : entries.global_buffers) { | ||||
|         buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||||
|                                               buffer.is_written); | ||||
|         ++ssbo_index; | ||||
|     } | ||||
|     buffer_cache.UpdateComputeBuffers(); | ||||
| 
 | ||||
|     texture_cache.SynchronizeComputeDescriptors(); | ||||
| 
 | ||||
|     SetupComputeUniformTexels(entries); | ||||
|     SetupComputeTextures(entries); | ||||
|     SetupComputeStorageTexels(entries); | ||||
|  | @ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
|     const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||||
|     texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||||
| 
 | ||||
|     buffer_cache.Map(CalculateComputeStreamBufferSize()); | ||||
| 
 | ||||
|     update_descriptor_queue.Acquire(); | ||||
| 
 | ||||
|     SetupComputeConstBuffers(entries); | ||||
|     SetupComputeGlobalBuffers(entries); | ||||
|     buffer_cache.BindHostComputeBuffers(); | ||||
| 
 | ||||
|     ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||||
|     VkSampler* sampler_ptr = sampler_handles.data(); | ||||
|     PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||||
|                          sampler_ptr); | ||||
| 
 | ||||
|     buffer_cache.Unmap(); | ||||
| 
 | ||||
|     const VkPipeline pipeline_handle = pipeline.GetHandle(); | ||||
|     const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); | ||||
|     const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); | ||||
|  | @ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
|     query_cache.Query(gpu_addr, type, timestamp); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||
|                                                  u32 size) { | ||||
|     buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::FlushAll() {} | ||||
| 
 | ||||
| void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | ||||
|  | @ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | |||
|         return; | ||||
|     } | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.DownloadMemory(addr, size); | ||||
|     } | ||||
|     buffer_cache.FlushRegion(addr, size); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.DownloadMemory(addr, size); | ||||
|     } | ||||
|     query_cache.FlushRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
| bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { | ||||
|     std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; | ||||
|     if (!Settings::IsGPULevelHigh()) { | ||||
|         return buffer_cache.MustFlushRegion(addr, size); | ||||
|         return buffer_cache.IsRegionGpuModified(addr, size); | ||||
|     } | ||||
|     return texture_cache.IsRegionGpuModified(addr, size) || | ||||
|            buffer_cache.MustFlushRegion(addr, size); | ||||
|            buffer_cache.IsRegionGpuModified(addr, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | ||||
|  | @ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | |||
|         return; | ||||
|     } | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|     pipeline_cache.InvalidateRegion(addr, size); | ||||
|     buffer_cache.InvalidateRegion(addr, size); | ||||
|     query_cache.InvalidateRegion(addr, size); | ||||
| } | ||||
| 
 | ||||
|  | @ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
|     if (addr == 0 || size == 0) { | ||||
|         return; | ||||
|     } | ||||
|     pipeline_cache.OnCPUWrite(addr, size); | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|     pipeline_cache.OnCPUWrite(addr, size); | ||||
|     buffer_cache.OnCPUWrite(addr, size); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.CachedWriteMemory(addr, size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SyncGuestHost() { | ||||
|     buffer_cache.SyncGuestHost(); | ||||
|     pipeline_cache.SyncGuestHost(); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.FlushCachedWrites(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.UnmapMemory(addr, size); | ||||
|     } | ||||
|     buffer_cache.OnCPUWrite(addr, size); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.WriteMemory(addr, size); | ||||
|     } | ||||
|     pipeline_cache.OnCPUWrite(addr, size); | ||||
| } | ||||
| 
 | ||||
|  | @ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() { | |||
|     draw_counter = 0; | ||||
|     update_descriptor_queue.TickFrame(); | ||||
|     fence_manager.TickFrame(); | ||||
|     buffer_cache.TickFrame(); | ||||
|     staging_pool.TickFrame(); | ||||
|     { | ||||
|         auto lock = texture_cache.AcquireLock(); | ||||
|         std::scoped_lock lock{texture_cache.mutex}; | ||||
|         texture_cache.TickFrame(); | ||||
|     } | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|         buffer_cache.TickFrame(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | ||||
|                                              const Tegra::Engines::Fermi2D::Surface& dst, | ||||
|                                              const Tegra::Engines::Fermi2D::Config& copy_config) { | ||||
|     auto lock = texture_cache.AcquireLock(); | ||||
|     std::scoped_lock lock{texture_cache.mutex}; | ||||
|     texture_cache.BlitImage(dst, src, copy_config); | ||||
|     return true; | ||||
| } | ||||
|  | @ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
|     if (!framebuffer_addr) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     auto lock = texture_cache.AcquireLock(); | ||||
|     std::scoped_lock lock{texture_cache.mutex}; | ||||
|     ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); | ||||
|     if (!image_view) { | ||||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); | ||||
|     screen_info.width = image_view->size.width; | ||||
|     screen_info.height = image_view->size.height; | ||||
|  | @ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() { | |||
|     draw_counter = 0; | ||||
| } | ||||
| 
 | ||||
| RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, | ||||
|                                                                  BufferBindings& buffer_bindings, | ||||
|                                                                  bool is_indexed, | ||||
|                                                                  bool is_instanced) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_Geometry); | ||||
| 
 | ||||
|     const auto& regs = maxwell3d.regs; | ||||
| 
 | ||||
|     SetupVertexArrays(buffer_bindings); | ||||
| 
 | ||||
|     const u32 base_instance = regs.vb_base_instance; | ||||
|     const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1; | ||||
|     const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; | ||||
|     const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; | ||||
| 
 | ||||
|     DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed}; | ||||
|     SetupIndexBuffer(buffer_bindings, params, is_indexed); | ||||
| 
 | ||||
|     return params; | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupShaderDescriptors( | ||||
|     const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | ||||
|     const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { | ||||
|     image_view_indices.clear(); | ||||
|     sampler_handles.clear(); | ||||
|     for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||||
|  | @ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors( | |||
|         if (!shader) { | ||||
|             continue; | ||||
|         } | ||||
|         const auto& entries = shader->GetEntries(); | ||||
|         const ShaderEntries& entries = shader->GetEntries(); | ||||
|         SetupGraphicsUniformTexels(entries, stage); | ||||
|         SetupGraphicsTextures(entries, stage); | ||||
|         SetupGraphicsStorageTexels(entries, stage); | ||||
|         SetupGraphicsImages(entries, stage); | ||||
| 
 | ||||
|         buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); | ||||
|         buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||||
|         u32 ssbo_index = 0; | ||||
|         for (const auto& buffer : entries.global_buffers) { | ||||
|             buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||||
|                                                    buffer.cbuf_offset, buffer.is_written); | ||||
|             ++ssbo_index; | ||||
|         } | ||||
|     } | ||||
|     const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||||
|     buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||||
|     texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||||
| 
 | ||||
|     buffer_cache.BindHostGeometryBuffers(is_indexed); | ||||
| 
 | ||||
|     update_descriptor_queue.Acquire(); | ||||
| 
 | ||||
|     ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||||
|  | @ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors( | |||
|         if (!shader) { | ||||
|             continue; | ||||
|         } | ||||
|         const auto& entries = shader->GetEntries(); | ||||
|         SetupGraphicsConstBuffers(entries, stage); | ||||
|         SetupGraphicsGlobalBuffers(entries, stage); | ||||
|         PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||||
|                              sampler_ptr); | ||||
|         buffer_cache.BindHostStageBuffers(stage); | ||||
|         PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, | ||||
|                              image_view_id_ptr, sampler_ptr); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | @ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() { | |||
|         LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||||
|                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||||
|                      regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||||
| 
 | ||||
|     UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); | ||||
|     UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); | ||||
|     UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); | ||||
| 
 | ||||
|     const auto& binding = regs.tfb_bindings[0]; | ||||
|     UNIMPLEMENTED_IF(binding.buffer_enable == 0); | ||||
|     UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||||
| 
 | ||||
|     const GPUVAddr gpu_addr = binding.Address(); | ||||
|     const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size); | ||||
|     const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||||
| 
 | ||||
|     scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); | ||||
|         cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); | ||||
|     }); | ||||
|     scheduler.Record( | ||||
|         [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::EndTransformFeedback() { | ||||
|  | @ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
|     if (!device.IsExtTransformFeedbackSupported()) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     scheduler.Record( | ||||
|         [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { | ||||
|     const auto& regs = maxwell3d.regs; | ||||
| 
 | ||||
|     for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||||
|         const auto& vertex_array = regs.vertex_array[index]; | ||||
|         if (!vertex_array.IsEnabled()) { | ||||
|             continue; | ||||
|         } | ||||
|         const GPUVAddr start{vertex_array.StartAddress()}; | ||||
|         const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||||
| 
 | ||||
|         ASSERT(end >= start); | ||||
|         const size_t size = end - start; | ||||
|         if (size == 0) { | ||||
|             buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); | ||||
|             continue; | ||||
|         } | ||||
|         const auto info = buffer_cache.UploadMemory(start, size); | ||||
|         buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, | ||||
|                                         bool is_indexed) { | ||||
|     if (params.num_vertices == 0) { | ||||
|         return; | ||||
|     } | ||||
|     const auto& regs = maxwell3d.regs; | ||||
|     switch (regs.draw.topology) { | ||||
|     case Maxwell::PrimitiveTopology::Quads: { | ||||
|         if (!params.is_indexed) { | ||||
|             const auto [buffer, offset] = | ||||
|                 quad_array_pass.Assemble(params.num_vertices, params.base_vertex); | ||||
|             buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||||
|             params.base_vertex = 0; | ||||
|             params.num_vertices = params.num_vertices * 6 / 4; | ||||
|             params.is_indexed = true; | ||||
|             break; | ||||
|         } | ||||
|         const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||||
|         const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||||
|         VkBuffer buffer = info.handle; | ||||
|         u64 offset = info.offset; | ||||
|         std::tie(buffer, offset) = quad_indexed_pass.Assemble( | ||||
|             regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); | ||||
| 
 | ||||
|         buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||||
|         params.num_vertices = (params.num_vertices / 4) * 6; | ||||
|         params.base_vertex = 0; | ||||
|         break; | ||||
|     } | ||||
|     default: { | ||||
|         if (!is_indexed) { | ||||
|             break; | ||||
|         } | ||||
|         const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||||
|         const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||||
|         VkBuffer buffer = info.handle; | ||||
|         u64 offset = info.offset; | ||||
| 
 | ||||
|         auto format = regs.index_array.format; | ||||
|         const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; | ||||
|         if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { | ||||
|             std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset); | ||||
|             format = Maxwell::IndexFormat::UnsignedShort; | ||||
|         } | ||||
| 
 | ||||
|         buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format)); | ||||
|         break; | ||||
|     } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||||
|     const auto& shader_stage = maxwell3d.state.shader_stages[stage]; | ||||
|     for (const auto& entry : entries.const_buffers) { | ||||
|         SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||||
|     const auto& cbufs{maxwell3d.state.shader_stages[stage]}; | ||||
| 
 | ||||
|     for (const auto& entry : entries.global_buffers) { | ||||
|         const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); | ||||
|         SetupGlobalBuffer(entry, addr); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_Textures); | ||||
|     const auto& regs = maxwell3d.regs; | ||||
|     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||||
|     for (const auto& entry : entries.uniform_texels) { | ||||
|  | @ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, | |||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_Textures); | ||||
|     const auto& regs = maxwell3d.regs; | ||||
|     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||||
|     for (const auto& entry : entries.samplers) { | ||||
|  | @ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_ | |||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_Textures); | ||||
|     const auto& regs = maxwell3d.regs; | ||||
|     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||||
|     for (const auto& entry : entries.storage_texels) { | ||||
|  | @ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, | |||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_Images); | ||||
|     const auto& regs = maxwell3d.regs; | ||||
|     const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||||
|     for (const auto& entry : entries.images) { | ||||
|  | @ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||||
|     const auto& launch_desc = kepler_compute.launch_description; | ||||
|     for (const auto& entry : entries.const_buffers) { | ||||
|         const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||||
|         const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||||
|         const Tegra::Engines::ConstBufferInfo info{ | ||||
|             .address = config.Address(), | ||||
|             .size = config.size, | ||||
|             .enabled = mask[entry.GetIndex()], | ||||
|         }; | ||||
|         SetupConstBuffer(entry, info); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||||
|     const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; | ||||
|     for (const auto& entry : entries.global_buffers) { | ||||
|         const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||||
|         SetupGlobalBuffer(entry, addr); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_Textures); | ||||
|     const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||||
|     for (const auto& entry : entries.uniform_texels) { | ||||
|         const TextureHandle handle = | ||||
|  | @ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | |||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_Textures); | ||||
|     const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||||
|     for (const auto& entry : entries.samplers) { | ||||
|         for (size_t index = 0; index < entry.size; ++index) { | ||||
|  | @ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | |||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_Textures); | ||||
|     const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||||
|     for (const auto& entry : entries.storage_texels) { | ||||
|         const TextureHandle handle = | ||||
|  | @ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | |||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | ||||
|     MICROPROFILE_SCOPE(Vulkan_Images); | ||||
|     const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||||
|     for (const auto& entry : entries.images) { | ||||
|         const TextureHandle handle = | ||||
|  | @ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | ||||
|                                         const Tegra::Engines::ConstBufferInfo& buffer) { | ||||
|     if (!buffer.enabled) { | ||||
|         // Set values to zero to unbind buffers
 | ||||
|         update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); | ||||
|         return; | ||||
|     } | ||||
|     // Align the size to avoid bad std140 interactions
 | ||||
|     const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | ||||
|     ASSERT(size <= MaxConstbufferSize); | ||||
| 
 | ||||
|     const u64 alignment = device.GetUniformBufferAlignment(); | ||||
|     const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment); | ||||
|     update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { | ||||
|     const u64 actual_addr = gpu_memory.Read<u64>(address); | ||||
|     const u32 size = gpu_memory.Read<u32>(address + 8); | ||||
| 
 | ||||
|     if (size == 0) { | ||||
|         // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
 | ||||
|         // because Vulkan doesn't like empty buffers.
 | ||||
|         // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
 | ||||
|         // default buffer.
 | ||||
|         static constexpr size_t dummy_size = 4; | ||||
|         const auto info = buffer_cache.GetEmptyBuffer(dummy_size); | ||||
|         update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     const auto info = buffer_cache.UploadMemory( | ||||
|         actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); | ||||
|     update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | ||||
|     if (!state_tracker.TouchViewports()) { | ||||
|         return; | ||||
|  | @ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | |||
|         GetViewportState(device, regs, 8),  GetViewportState(device, regs, 9), | ||||
|         GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), | ||||
|         GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), | ||||
|         GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; | ||||
|         GetViewportState(device, regs, 14), GetViewportState(device, regs, 15), | ||||
|     }; | ||||
|     scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); | ||||
| } | ||||
| 
 | ||||
|  | @ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs | |||
|     if (!state_tracker.TouchScissors()) { | ||||
|         return; | ||||
|     } | ||||
|     const std::array scissors = { | ||||
|     const std::array scissors{ | ||||
|         GetScissorState(regs, 0),  GetScissorState(regs, 1),  GetScissorState(regs, 2), | ||||
|         GetScissorState(regs, 3),  GetScissorState(regs, 4),  GetScissorState(regs, 5), | ||||
|         GetScissorState(regs, 6),  GetScissorState(regs, 7),  GetScissorState(regs, 8), | ||||
|         GetScissorState(regs, 9),  GetScissorState(regs, 10), GetScissorState(regs, 11), | ||||
|         GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), | ||||
|         GetScissorState(regs, 15)}; | ||||
|         GetScissorState(regs, 15), | ||||
|     }; | ||||
|     scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); | ||||
| } | ||||
| 
 | ||||
|  | @ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& | |||
|     }); | ||||
| } | ||||
| 
 | ||||
| size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { | ||||
|     size_t size = CalculateVertexArraysSize(); | ||||
|     if (is_indexed) { | ||||
|         size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); | ||||
|     } | ||||
|     size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | ||||
|     return size; | ||||
| } | ||||
| 
 | ||||
| size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { | ||||
|     return Tegra::Engines::KeplerCompute::NumConstBuffers * | ||||
|            (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||||
| } | ||||
| 
 | ||||
| size_t RasterizerVulkan::CalculateVertexArraysSize() const { | ||||
|     const auto& regs = maxwell3d.regs; | ||||
| 
 | ||||
|     size_t size = 0; | ||||
|     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||||
|         // This implementation assumes that all attributes are used in the shader.
 | ||||
|         const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | ||||
|         const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||||
|         DEBUG_ASSERT(end >= start); | ||||
| 
 | ||||
|         size += (end - start) * regs.vertex_array[index].enable; | ||||
|     } | ||||
|     return size; | ||||
| } | ||||
| 
 | ||||
| size_t RasterizerVulkan::CalculateIndexBufferSize() const { | ||||
|     return static_cast<size_t>(maxwell3d.regs.index_array.count) * | ||||
|            static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); | ||||
| } | ||||
| 
 | ||||
| size_t RasterizerVulkan::CalculateConstBufferSize( | ||||
|     const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { | ||||
|     if (entry.IsIndirect()) { | ||||
|         // Buffer is accessed indirectly, so upload the entire thing
 | ||||
|         return buffer.size; | ||||
|     } else { | ||||
|         // Buffer is accessed directly, upload just what we use
 | ||||
|         return entry.GetSize(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| VkBuffer RasterizerVulkan::DefaultBuffer() { | ||||
|     if (default_buffer) { | ||||
|         return *default_buffer; | ||||
|     } | ||||
|     default_buffer = device.GetLogical().CreateBuffer({ | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .flags = 0, | ||||
|         .size = DEFAULT_BUFFER_SIZE, | ||||
|         .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | | ||||
|                  VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, | ||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||
|         .queueFamilyIndexCount = 0, | ||||
|         .pQueueFamilyIndices = nullptr, | ||||
|     }); | ||||
|     default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal); | ||||
| 
 | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0); | ||||
|     }); | ||||
|     return *default_buffer; | ||||
| } | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -18,14 +18,12 @@ | |||
| #include "video_core/renderer_vulkan/blit_image.h" | ||||
| #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||||
| #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||
| #include "video_core/renderer_vulkan/vk_fence_manager.h" | ||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_query_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||
| #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||||
| #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||
| #include "video_core/shader/async_shaders.h" | ||||
|  | @ -49,7 +47,6 @@ namespace Vulkan { | |||
| struct VKScreenInfo; | ||||
| 
 | ||||
| class StateTracker; | ||||
| class BufferBindings; | ||||
| 
 | ||||
| class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | ||||
| public: | ||||
|  | @ -65,6 +62,7 @@ public: | |||
|     void DispatchCompute(GPUVAddr code_addr) override; | ||||
|     void ResetCounter(VideoCore::QueryType type) override; | ||||
|     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||
|     void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||||
|     void FlushAll() override; | ||||
|     void FlushRegion(VAddr addr, u64 size) override; | ||||
|     bool MustFlushRegion(VAddr addr, u64 size) override; | ||||
|  | @ -107,24 +105,11 @@ private: | |||
| 
 | ||||
|     static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); | ||||
| 
 | ||||
|     struct DrawParameters { | ||||
|         void Draw(vk::CommandBuffer cmdbuf) const; | ||||
| 
 | ||||
|         u32 base_instance = 0; | ||||
|         u32 num_instances = 0; | ||||
|         u32 base_vertex = 0; | ||||
|         u32 num_vertices = 0; | ||||
|         bool is_indexed = 0; | ||||
|     }; | ||||
| 
 | ||||
|     void FlushWork(); | ||||
| 
 | ||||
|     /// Setups geometry buffers and state.
 | ||||
|     DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, | ||||
|                                  bool is_indexed, bool is_instanced); | ||||
| 
 | ||||
|     /// Setup descriptors in the graphics pipeline.
 | ||||
|     void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); | ||||
|     void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, | ||||
|                                 bool is_indexed); | ||||
| 
 | ||||
|     void UpdateDynamicStates(); | ||||
| 
 | ||||
|  | @ -132,16 +117,6 @@ private: | |||
| 
 | ||||
|     void EndTransformFeedback(); | ||||
| 
 | ||||
|     void SetupVertexArrays(BufferBindings& buffer_bindings); | ||||
| 
 | ||||
|     void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); | ||||
| 
 | ||||
|     /// Setup constant buffers in the graphics pipeline.
 | ||||
|     void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage); | ||||
| 
 | ||||
|     /// Setup global buffers in the graphics pipeline.
 | ||||
|     void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); | ||||
| 
 | ||||
|     /// Setup uniform texels in the graphics pipeline.
 | ||||
|     void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); | ||||
| 
 | ||||
|  | @ -154,12 +129,6 @@ private: | |||
|     /// Setup images in the graphics pipeline.
 | ||||
|     void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | ||||
| 
 | ||||
|     /// Setup constant buffers in the compute pipeline.
 | ||||
|     void SetupComputeConstBuffers(const ShaderEntries& entries); | ||||
| 
 | ||||
|     /// Setup global buffers in the compute pipeline.
 | ||||
|     void SetupComputeGlobalBuffers(const ShaderEntries& entries); | ||||
| 
 | ||||
|     /// Setup texel buffers in the compute pipeline.
 | ||||
|     void SetupComputeUniformTexels(const ShaderEntries& entries); | ||||
| 
 | ||||
|  | @ -172,11 +141,6 @@ private: | |||
|     /// Setup images in the compute pipeline.
 | ||||
|     void SetupComputeImages(const ShaderEntries& entries); | ||||
| 
 | ||||
|     void SetupConstBuffer(const ConstBufferEntry& entry, | ||||
|                           const Tegra::Engines::ConstBufferInfo& buffer); | ||||
| 
 | ||||
|     void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | ||||
| 
 | ||||
|     void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | ||||
|     void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | ||||
|     void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | ||||
|  | @ -193,19 +157,6 @@ private: | |||
|     void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); | ||||
|     void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); | ||||
| 
 | ||||
|     size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; | ||||
| 
 | ||||
|     size_t CalculateComputeStreamBufferSize() const; | ||||
| 
 | ||||
|     size_t CalculateVertexArraysSize() const; | ||||
| 
 | ||||
|     size_t CalculateIndexBufferSize() const; | ||||
| 
 | ||||
|     size_t CalculateConstBufferSize(const ConstBufferEntry& entry, | ||||
|                                     const Tegra::Engines::ConstBufferInfo& buffer) const; | ||||
| 
 | ||||
|     VkBuffer DefaultBuffer(); | ||||
| 
 | ||||
|     Tegra::GPU& gpu; | ||||
|     Tegra::MemoryManager& gpu_memory; | ||||
|     Tegra::Engines::Maxwell3D& maxwell3d; | ||||
|  | @ -217,24 +168,19 @@ private: | |||
|     StateTracker& state_tracker; | ||||
|     VKScheduler& scheduler; | ||||
| 
 | ||||
|     VKStreamBuffer stream_buffer; | ||||
|     StagingBufferPool staging_pool; | ||||
|     VKDescriptorPool descriptor_pool; | ||||
|     VKUpdateDescriptorQueue update_descriptor_queue; | ||||
|     BlitImageHelper blit_image; | ||||
|     QuadArrayPass quad_array_pass; | ||||
|     QuadIndexedPass quad_indexed_pass; | ||||
|     Uint8Pass uint8_pass; | ||||
| 
 | ||||
|     TextureCacheRuntime texture_cache_runtime; | ||||
|     TextureCache texture_cache; | ||||
|     BufferCacheRuntime buffer_cache_runtime; | ||||
|     BufferCache buffer_cache; | ||||
|     VKPipelineCache pipeline_cache; | ||||
|     VKBufferCache buffer_cache; | ||||
|     VKQueryCache query_cache; | ||||
|     VKFenceManager fence_manager; | ||||
| 
 | ||||
|     vk::Buffer default_buffer; | ||||
|     MemoryCommit default_buffer_commit; | ||||
|     vk::Event wfi_event; | ||||
|     VideoCommon::Shader::AsyncShaders async_shaders; | ||||
| 
 | ||||
|  |  | |||
|  | @ -52,18 +52,6 @@ VKScheduler::~VKScheduler() { | |||
|     worker_thread.join(); | ||||
| } | ||||
| 
 | ||||
| u64 VKScheduler::CurrentTick() const noexcept { | ||||
|     return master_semaphore->CurrentTick(); | ||||
| } | ||||
| 
 | ||||
| bool VKScheduler::IsFree(u64 tick) const noexcept { | ||||
|     return master_semaphore->IsFree(tick); | ||||
| } | ||||
| 
 | ||||
| void VKScheduler::Wait(u64 tick) { | ||||
|     master_semaphore->Wait(tick); | ||||
| } | ||||
| 
 | ||||
| void VKScheduler::Flush(VkSemaphore semaphore) { | ||||
|     SubmitExecution(semaphore); | ||||
|     AllocateNewContext(); | ||||
|  | @ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() { | |||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | | ||||
|                                    VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | | ||||
|                                    VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | ||||
|                                VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, | ||||
|                                VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr, | ||||
|                                vk::Span(barriers.data(), num_images)); | ||||
|     }); | ||||
|     state.renderpass = nullptr; | ||||
|  |  | |||
|  | @ -14,6 +14,7 @@ | |||
| #include "common/alignment.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/threadsafe_queue.h" | ||||
| #include "video_core/renderer_vulkan/vk_master_semaphore.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| 
 | ||||
| namespace Vulkan { | ||||
|  | @ -21,7 +22,6 @@ namespace Vulkan { | |||
| class CommandPool; | ||||
| class Device; | ||||
| class Framebuffer; | ||||
| class MasterSemaphore; | ||||
| class StateTracker; | ||||
| class VKQueryCache; | ||||
| 
 | ||||
|  | @ -32,15 +32,6 @@ public: | |||
|     explicit VKScheduler(const Device& device, StateTracker& state_tracker); | ||||
|     ~VKScheduler(); | ||||
| 
 | ||||
|     /// Returns the current command buffer tick.
 | ||||
|     [[nodiscard]] u64 CurrentTick() const noexcept; | ||||
| 
 | ||||
|     /// Returns true when a tick has been triggered by the GPU.
 | ||||
|     [[nodiscard]] bool IsFree(u64 tick) const noexcept; | ||||
| 
 | ||||
|     /// Waits for the given tick to trigger on the GPU.
 | ||||
|     void Wait(u64 tick); | ||||
| 
 | ||||
|     /// Sends the current execution context to the GPU.
 | ||||
|     void Flush(VkSemaphore semaphore = nullptr); | ||||
| 
 | ||||
|  | @ -82,6 +73,21 @@ public: | |||
|         (void)chunk->Record(command); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the current command buffer tick.
 | ||||
|     [[nodiscard]] u64 CurrentTick() const noexcept { | ||||
|         return master_semaphore->CurrentTick(); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns true when a tick has been triggered by the GPU.
 | ||||
|     [[nodiscard]] bool IsFree(u64 tick) const noexcept { | ||||
|         return master_semaphore->IsFree(tick); | ||||
|     } | ||||
| 
 | ||||
|     /// Waits for the given tick to trigger on the GPU.
 | ||||
|     void Wait(u64 tick) { | ||||
|         master_semaphore->Wait(tick); | ||||
|     } | ||||
| 
 | ||||
|     /// Returns the master timeline semaphore.
 | ||||
|     [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { | ||||
|         return *master_semaphore; | ||||
|  |  | |||
|  | @ -3106,7 +3106,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
|         entries.const_buffers.emplace_back(cbuf.second, cbuf.first); | ||||
|     } | ||||
|     for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||||
|         entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written); | ||||
|         entries.global_buffers.emplace_back(GlobalBufferEntry{ | ||||
|             .cbuf_index = base.cbuf_index, | ||||
|             .cbuf_offset = base.cbuf_offset, | ||||
|             .is_written = usage.is_written, | ||||
|         }); | ||||
|     } | ||||
|     for (const auto& sampler : ir.GetSamplers()) { | ||||
|         if (sampler.is_buffer) { | ||||
|  | @ -3127,6 +3131,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
|             entries.attributes.insert(GetGenericAttributeLocation(attribute)); | ||||
|         } | ||||
|     } | ||||
|     for (const auto& buffer : entries.const_buffers) { | ||||
|         entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||||
|     } | ||||
|     entries.clip_distances = ir.GetClipDistances(); | ||||
|     entries.shader_length = ir.GetLength(); | ||||
|     entries.uses_warps = ir.UsesWarps(); | ||||
|  |  | |||
|  | @ -39,24 +39,7 @@ private: | |||
|     u32 index{}; | ||||
| }; | ||||
| 
 | ||||
| class GlobalBufferEntry { | ||||
| public: | ||||
|     constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_) | ||||
|         : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {} | ||||
| 
 | ||||
|     constexpr u32 GetCbufIndex() const { | ||||
|         return cbuf_index; | ||||
|     } | ||||
| 
 | ||||
|     constexpr u32 GetCbufOffset() const { | ||||
|         return cbuf_offset; | ||||
|     } | ||||
| 
 | ||||
|     constexpr bool IsWritten() const { | ||||
|         return is_written; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
| struct GlobalBufferEntry { | ||||
|     u32 cbuf_index{}; | ||||
|     u32 cbuf_offset{}; | ||||
|     bool is_written{}; | ||||
|  | @ -78,6 +61,7 @@ struct ShaderEntries { | |||
|     std::set<u32> attributes; | ||||
|     std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | ||||
|     std::size_t shader_length{}; | ||||
|     u32 enabled_uniform_buffers{}; | ||||
|     bool uses_warps{}; | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ | |||
| 
 | ||||
| #include <fmt/format.h> | ||||
| 
 | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "common/bit_util.h" | ||||
| #include "common/common_types.h" | ||||
|  | @ -17,18 +18,119 @@ | |||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| namespace { | ||||
| // Maximum potential alignment of a Vulkan buffer
 | ||||
| constexpr VkDeviceSize MAX_ALIGNMENT = 256; | ||||
| // Maximum size to put elements in the stream buffer
 | ||||
| constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024; | ||||
| // Stream buffer size in bytes
 | ||||
| constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||||
| constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; | ||||
| 
 | ||||
| constexpr VkMemoryPropertyFlags HOST_FLAGS = | ||||
|     VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | ||||
| constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; | ||||
| 
 | ||||
| bool IsStreamHeap(VkMemoryHeap heap) noexcept { | ||||
|     return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; | ||||
| } | ||||
| 
 | ||||
| std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | ||||
|                                        VkMemoryPropertyFlags flags) noexcept { | ||||
|     for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | ||||
|         if (((type_mask >> type_index) & 1) == 0) { | ||||
|             // Memory type is incompatible
 | ||||
|             continue; | ||||
|         } | ||||
|         const VkMemoryType& memory_type = props.memoryTypes[type_index]; | ||||
|         if ((memory_type.propertyFlags & flags) != flags) { | ||||
|             // Memory type doesn't have the flags we want
 | ||||
|             continue; | ||||
|         } | ||||
|         if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { | ||||
|             // Memory heap is not suitable for streaming
 | ||||
|             continue; | ||||
|         } | ||||
|         // Success!
 | ||||
|         return type_index; | ||||
|     } | ||||
|     return std::nullopt; | ||||
| } | ||||
| 
 | ||||
| u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) { | ||||
|     // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
 | ||||
|     std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); | ||||
|     if (type) { | ||||
|         return *type; | ||||
|     } | ||||
|     // Otherwise try without the DEVICE_LOCAL_BIT
 | ||||
|     type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); | ||||
|     if (type) { | ||||
|         return *type; | ||||
|     } | ||||
|     // This should never happen, and in case it does, signal it as an out of memory situation
 | ||||
|     throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); | ||||
| } | ||||
| 
 | ||||
| size_t Region(size_t iterator) noexcept { | ||||
|     return iterator / REGION_SIZE; | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                                      VKScheduler& scheduler_) | ||||
|     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} | ||||
|     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { | ||||
|     const vk::Device& dev = device.GetLogical(); | ||||
|     stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .flags = 0, | ||||
|         .size = STREAM_BUFFER_SIZE, | ||||
|         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||||
|                  VK_BUFFER_USAGE_INDEX_BUFFER_BIT, | ||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||
|         .queueFamilyIndexCount = 0, | ||||
|         .pQueueFamilyIndices = nullptr, | ||||
|     }); | ||||
|     if (device.HasDebuggingToolAttached()) { | ||||
|         stream_buffer.SetObjectNameEXT("Stream Buffer"); | ||||
|     } | ||||
|     VkMemoryDedicatedRequirements dedicated_reqs{ | ||||
|         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, | ||||
|         .pNext = nullptr, | ||||
|         .prefersDedicatedAllocation = VK_FALSE, | ||||
|         .requiresDedicatedAllocation = VK_FALSE, | ||||
|     }; | ||||
|     const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); | ||||
|     const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || | ||||
|                                 dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; | ||||
|     const VkMemoryDedicatedAllocateInfo dedicated_info{ | ||||
|         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .image = nullptr, | ||||
|         .buffer = *stream_buffer, | ||||
|     }; | ||||
|     const auto memory_properties = device.GetPhysical().GetMemoryProperties(); | ||||
|     stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{ | ||||
|         .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | ||||
|         .pNext = make_dedicated ? &dedicated_info : nullptr, | ||||
|         .allocationSize = requirements.size, | ||||
|         .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits), | ||||
|     }); | ||||
|     if (device.HasDebuggingToolAttached()) { | ||||
|         stream_memory.SetObjectNameEXT("Stream Buffer Memory"); | ||||
|     } | ||||
|     stream_buffer.BindMemory(*stream_memory, 0); | ||||
|     stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); | ||||
| } | ||||
| 
 | ||||
| StagingBufferPool::~StagingBufferPool() = default; | ||||
| 
 | ||||
| StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { | ||||
|     if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { | ||||
|         return *ref; | ||||
|     if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { | ||||
|         return GetStreamBuffer(size); | ||||
|     } | ||||
|     return CreateStagingBuffer(size, usage); | ||||
|     return GetStagingBuffer(size, usage); | ||||
| } | ||||
| 
 | ||||
| void StagingBufferPool::TickFrame() { | ||||
|  | @ -39,6 +141,51 @@ void StagingBufferPool::TickFrame() { | |||
|     ReleaseCache(MemoryUsage::Download); | ||||
| } | ||||
| 
 | ||||
| StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | ||||
|     if (AreRegionsActive(Region(free_iterator) + 1, | ||||
|                          std::min(Region(iterator + size) + 1, NUM_SYNCS))) { | ||||
|         // Avoid waiting for the previous usages to be free
 | ||||
|         return GetStagingBuffer(size, MemoryUsage::Upload); | ||||
|     } | ||||
|     const u64 current_tick = scheduler.CurrentTick(); | ||||
|     std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator), | ||||
|               current_tick); | ||||
|     used_iterator = iterator; | ||||
|     free_iterator = std::max(free_iterator, iterator + size); | ||||
| 
 | ||||
|     if (iterator + size > STREAM_BUFFER_SIZE) { | ||||
|         std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, | ||||
|                   current_tick); | ||||
|         used_iterator = 0; | ||||
|         iterator = 0; | ||||
|         free_iterator = size; | ||||
| 
 | ||||
|         if (AreRegionsActive(0, Region(size) + 1)) { | ||||
|             // Avoid waiting for the previous usages to be free
 | ||||
|             return GetStagingBuffer(size, MemoryUsage::Upload); | ||||
|         } | ||||
|     } | ||||
|     const size_t offset = iterator; | ||||
|     iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); | ||||
|     return StagingBufferRef{ | ||||
|         .buffer = *stream_buffer, | ||||
|         .offset = static_cast<VkDeviceSize>(offset), | ||||
|         .mapped_span = std::span<u8>(stream_pointer + offset, size), | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const { | ||||
|     return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end, | ||||
|                        [this](u64 sync_tick) { return !scheduler.IsFree(sync_tick); }); | ||||
| }; | ||||
| 
 | ||||
| StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) { | ||||
|     if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { | ||||
|         return *ref; | ||||
|     } | ||||
|     return CreateStagingBuffer(size, usage); | ||||
| } | ||||
| 
 | ||||
| std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, | ||||
|                                                                         MemoryUsage usage) { | ||||
|     StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; | ||||
|  |  | |||
|  | @ -19,11 +19,14 @@ class VKScheduler; | |||
| 
 | ||||
| struct StagingBufferRef { | ||||
|     VkBuffer buffer; | ||||
|     VkDeviceSize offset; | ||||
|     std::span<u8> mapped_span; | ||||
| }; | ||||
| 
 | ||||
| class StagingBufferPool { | ||||
| public: | ||||
|     static constexpr size_t NUM_SYNCS = 16; | ||||
| 
 | ||||
|     explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, | ||||
|                                VKScheduler& scheduler); | ||||
|     ~StagingBufferPool(); | ||||
|  | @ -33,6 +36,11 @@ public: | |||
|     void TickFrame(); | ||||
| 
 | ||||
| private: | ||||
|     struct StreamBufferCommit { | ||||
|         size_t upper_bound; | ||||
|         u64 tick; | ||||
|     }; | ||||
| 
 | ||||
|     struct StagingBuffer { | ||||
|         vk::Buffer buffer; | ||||
|         MemoryCommit commit; | ||||
|  | @ -42,6 +50,7 @@ private: | |||
|         StagingBufferRef Ref() const noexcept { | ||||
|             return { | ||||
|                 .buffer = *buffer, | ||||
|                 .offset = 0, | ||||
|                 .mapped_span = mapped_span, | ||||
|             }; | ||||
|         } | ||||
|  | @ -56,6 +65,12 @@ private: | |||
|     static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; | ||||
|     using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; | ||||
| 
 | ||||
|     StagingBufferRef GetStreamBuffer(size_t size); | ||||
| 
 | ||||
|     bool AreRegionsActive(size_t region_begin, size_t region_end) const; | ||||
| 
 | ||||
|     StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage); | ||||
| 
 | ||||
|     std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); | ||||
| 
 | ||||
|     StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); | ||||
|  | @ -70,6 +85,15 @@ private: | |||
|     MemoryAllocator& memory_allocator; | ||||
|     VKScheduler& scheduler; | ||||
| 
 | ||||
|     vk::Buffer stream_buffer; | ||||
|     vk::DeviceMemory stream_memory; | ||||
|     u8* stream_pointer = nullptr; | ||||
| 
 | ||||
|     size_t iterator = 0; | ||||
|     size_t used_iterator = 0; | ||||
|     size_t free_iterator = 0; | ||||
|     std::array<u64, NUM_SYNCS> sync_ticks{}; | ||||
| 
 | ||||
|     StagingBuffersCache device_local_cache; | ||||
|     StagingBuffersCache upload_cache; | ||||
|     StagingBuffersCache download_cache; | ||||
|  |  | |||
|  | @ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table; | |||
| using Flags = Maxwell3D::DirtyState::Flags; | ||||
| 
 | ||||
| Flags MakeInvalidationFlags() { | ||||
|     static constexpr std::array INVALIDATION_FLAGS{ | ||||
|     static constexpr int INVALIDATION_FLAGS[]{ | ||||
|         Viewports,         Scissors,  DepthBias,         BlendConstants,    DepthBounds, | ||||
|         StencilProperties, CullMode,  DepthBoundsEnable, DepthTestEnable,   DepthWriteEnable, | ||||
|         DepthCompareOp,    FrontFace, StencilOp,         StencilTestEnable, | ||||
|         DepthCompareOp,    FrontFace, StencilOp,         StencilTestEnable, VertexBuffers, | ||||
|     }; | ||||
|     Flags flags{}; | ||||
|     for (const int flag : INVALIDATION_FLAGS) { | ||||
|         flags[flag] = true; | ||||
|     } | ||||
|     for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { | ||||
|         flags[index] = true; | ||||
|     } | ||||
|     return flags; | ||||
| } | ||||
| 
 | ||||
|  | @ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) { | |||
| StateTracker::StateTracker(Tegra::GPU& gpu) | ||||
|     : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { | ||||
|     auto& tables = gpu.Maxwell3D().dirty.tables; | ||||
|     SetupDirtyRenderTargets(tables); | ||||
|     SetupDirtyFlags(tables); | ||||
|     SetupDirtyViewports(tables); | ||||
|     SetupDirtyScissors(tables); | ||||
|     SetupDirtyDepthBias(tables); | ||||
|  |  | |||
|  | @ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi | |||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_) | ||||
|     : surface{surface_}, device{device_}, scheduler{scheduler_} {} | ||||
| VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_, | ||||
|                          u32 width, u32 height, bool srgb) | ||||
|     : surface{surface_}, device{device_}, scheduler{scheduler_} { | ||||
|     Create(width, height, srgb); | ||||
| } | ||||
| 
 | ||||
| VKSwapchain::~VKSwapchain() = default; | ||||
| 
 | ||||
|  |  | |||
|  | @ -20,7 +20,8 @@ class VKScheduler; | |||
| 
 | ||||
| class VKSwapchain { | ||||
| public: | ||||
|     explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler); | ||||
|     explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler, | ||||
|                          u32 width, u32 height, bool srgb); | ||||
|     ~VKSwapchain(); | ||||
| 
 | ||||
|     /// Creates (or recreates) the swapchain with a given size.
 | ||||
|  |  | |||
|  | @ -426,21 +426,23 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, | ||||
|                        VkImageAspectFlags aspect_mask, bool is_initialized, | ||||
|                        std::span<const VkBufferImageCopy> copies) { | ||||
|     static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | | ||||
|                                                   VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||||
|     static constexpr VkAccessFlags WRITE_ACCESS_FLAGS = | ||||
|         VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||||
|         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; | ||||
|     static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT | | ||||
|                                                        VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||||
|                                                        VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; | ||||
|     const VkImageMemoryBarrier read_barrier{ | ||||
|         .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||
|         .pNext = nullptr, | ||||
|         .srcAccessMask = ACCESS_FLAGS, | ||||
|         .srcAccessMask = WRITE_ACCESS_FLAGS, | ||||
|         .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|         .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | ||||
|         .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||||
|         .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|         .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|         .image = image, | ||||
|         .subresourceRange = | ||||
|             { | ||||
|         .subresourceRange{ | ||||
|             .aspectMask = aspect_mask, | ||||
|             .baseMipLevel = 0, | ||||
|             .levelCount = VK_REMAINING_MIP_LEVELS, | ||||
|  | @ -452,14 +454,13 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im | |||
|         .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||
|         .pNext = nullptr, | ||||
|         .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|         .dstAccessMask = ACCESS_FLAGS, | ||||
|         .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS, | ||||
|         .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||||
|         .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||
|         .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|         .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|         .image = image, | ||||
|         .subresourceRange = | ||||
|             { | ||||
|         .subresourceRange{ | ||||
|             .aspectMask = aspect_mask, | ||||
|             .baseMipLevel = 0, | ||||
|             .levelCount = VK_REMAINING_MIP_LEVELS, | ||||
|  | @ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() { | |||
|     scheduler.Finish(); | ||||
| } | ||||
| 
 | ||||
| ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { | ||||
|     const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload); | ||||
|     return { | ||||
|         .handle = staging_ref.buffer, | ||||
|         .span = staging_ref.mapped_span, | ||||
|     }; | ||||
| StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) { | ||||
|     return staging_buffer_pool.Request(size, MemoryUsage::Upload); | ||||
| } | ||||
| 
 | ||||
| ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { | ||||
|     const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download); | ||||
|     return { | ||||
|         .handle = staging_ref.buffer, | ||||
|         .span = staging_ref.mapped_span, | ||||
|     }; | ||||
| StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | ||||
|     return staging_buffer_pool.Request(size, MemoryUsage::Download); | ||||
| } | ||||
| 
 | ||||
| void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | ||||
|  | @ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | |||
|                 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||||
|                 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||
|                 .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||
|                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|  | @ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | |||
|             VkImageMemoryBarrier{ | ||||
|                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||
|                 .pNext = nullptr, | ||||
|                 .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | ||||
|                                  VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||||
|                                  VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||||
|                 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||||
|                                  VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                                  VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||
|                 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||||
|  | @ -828,12 +818,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | ||||
|                          std::span<const BufferImageCopy> copies) { | ||||
| void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | ||||
|     // TODO: Move this to another API
 | ||||
|     scheduler->RequestOutsideRenderPassOperationContext(); | ||||
|     std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | ||||
|     const VkBuffer src_buffer = map.handle; | ||||
|     std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); | ||||
|     const VkBuffer src_buffer = map.buffer; | ||||
|     const VkImage vk_image = *image; | ||||
|     const VkImageAspectFlags vk_aspect_mask = aspect_mask; | ||||
|     const bool is_initialized = std::exchange(initialized, true); | ||||
|  | @ -843,12 +832,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
|     }); | ||||
| } | ||||
| 
 | ||||
| void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | ||||
| void Image::UploadMemory(const StagingBufferRef& map, | ||||
|                          std::span<const VideoCommon::BufferCopy> copies) { | ||||
|     // TODO: Move this to another API
 | ||||
|     scheduler->RequestOutsideRenderPassOperationContext(); | ||||
|     std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); | ||||
|     const VkBuffer src_buffer = map.handle; | ||||
|     std::vector vk_copies = TransformBufferCopies(copies, map.offset); | ||||
|     const VkBuffer src_buffer = map.buffer; | ||||
|     const VkBuffer dst_buffer = *buffer; | ||||
|     scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | ||||
|         // TODO: Barriers
 | ||||
|  | @ -856,13 +845,57 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
|     }); | ||||
| } | ||||
| 
 | ||||
| void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, | ||||
|                            std::span<const BufferImageCopy> copies) { | ||||
|     std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | ||||
|     scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, | ||||
| void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | ||||
|     std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); | ||||
|     scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, | ||||
|                        vk_copies](vk::CommandBuffer cmdbuf) { | ||||
|         // TODO: Barriers
 | ||||
|         cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); | ||||
|         const VkImageMemoryBarrier read_barrier{ | ||||
|             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||||
|             .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||||
|             .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||
|             .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||||
|             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|             .image = image, | ||||
|             .subresourceRange{ | ||||
|                 .aspectMask = aspect_mask, | ||||
|                 .baseMipLevel = 0, | ||||
|                 .levelCount = VK_REMAINING_MIP_LEVELS, | ||||
|                 .baseArrayLayer = 0, | ||||
|                 .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||||
|             }, | ||||
|         }; | ||||
|         const VkImageMemoryBarrier image_write_barrier{ | ||||
|             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = 0, | ||||
|             .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||||
|             .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||||
|             .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||
|             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|             .image = image, | ||||
|             .subresourceRange{ | ||||
|                 .aspectMask = aspect_mask, | ||||
|                 .baseMipLevel = 0, | ||||
|                 .levelCount = VK_REMAINING_MIP_LEVELS, | ||||
|                 .baseArrayLayer = 0, | ||||
|                 .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||||
|             }, | ||||
|         }; | ||||
|         const VkMemoryBarrier memory_write_barrier{ | ||||
|             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||||
|             .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||||
|         }; | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||||
|                                0, read_barrier); | ||||
|         cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||||
|                                0, memory_write_barrier, nullptr, image_write_barrier); | ||||
|     }); | ||||
| } | ||||
| 
 | ||||
|  | @ -1127,7 +1160,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
|         .pAttachments = attachments.data(), | ||||
|         .width = key.size.width, | ||||
|         .height = key.size.height, | ||||
|         .layers = static_cast<u32>(num_layers), | ||||
|         .layers = static_cast<u32>(std::max(num_layers, 1)), | ||||
|     }); | ||||
|     if (runtime.device.HasDebuggingToolAttached()) { | ||||
|         framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); | ||||
|  |  | |||
|  | @ -7,6 +7,7 @@ | |||
| #include <compare> | ||||
| #include <span> | ||||
| 
 | ||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||
| #include "video_core/texture_cache/texture_cache.h" | ||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
|  | @ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> { | |||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| struct ImageBufferMap { | ||||
|     [[nodiscard]] VkBuffer Handle() const noexcept { | ||||
|         return handle; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] std::span<u8> Span() const noexcept { | ||||
|         return span; | ||||
|     } | ||||
| 
 | ||||
|     VkBuffer handle; | ||||
|     std::span<u8> span; | ||||
| }; | ||||
| 
 | ||||
| struct TextureCacheRuntime { | ||||
|     const Device& device; | ||||
|     VKScheduler& scheduler; | ||||
|  | @ -76,9 +64,9 @@ struct TextureCacheRuntime { | |||
| 
 | ||||
|     void Finish(); | ||||
| 
 | ||||
|     [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); | ||||
|     [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | ||||
| 
 | ||||
|     [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size); | ||||
|     [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | ||||
| 
 | ||||
|     void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | ||||
|                    const std::array<Offset2D, 2>& dst_region, | ||||
|  | @ -94,7 +82,7 @@ struct TextureCacheRuntime { | |||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, | ||||
|     void AccelerateImageUpload(Image&, const StagingBufferRef&, | ||||
|                                std::span<const VideoCommon::SwizzleParameters>) { | ||||
|         UNREACHABLE(); | ||||
|     } | ||||
|  | @ -112,13 +100,12 @@ public: | |||
|     explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | ||||
|                    VAddr cpu_addr); | ||||
| 
 | ||||
|     void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | ||||
|     void UploadMemory(const StagingBufferRef& map, | ||||
|                       std::span<const VideoCommon::BufferImageCopy> copies); | ||||
| 
 | ||||
|     void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | ||||
|                       std::span<const VideoCommon::BufferCopy> copies); | ||||
|     void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies); | ||||
| 
 | ||||
|     void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, | ||||
|     void DownloadMemory(const StagingBufferRef& map, | ||||
|                         std::span<const VideoCommon::BufferImageCopy> copies); | ||||
| 
 | ||||
|     [[nodiscard]] VkImage Handle() const noexcept { | ||||
|  |  | |||
|  | @ -9,16 +9,7 @@ | |||
| #include <shared_mutex> | ||||
| #include <thread> | ||||
| 
 | ||||
| // This header includes both Vulkan and OpenGL headers, this has to be fixed
 | ||||
| // Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
 | ||||
| // Forcefully include glad early and undefine macros
 | ||||
| #include <glad/glad.h> | ||||
| #ifdef CreateEvent | ||||
| #undef CreateEvent | ||||
| #endif | ||||
| #ifdef CreateSemaphore | ||||
| #undef CreateSemaphore | ||||
| #endif | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
|  |  | |||
|  | @ -76,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
|             case SystemVariable::InvocationId: | ||||
|                 return Operation(OperationCode::InvocationId); | ||||
|             case SystemVariable::Ydirection: | ||||
|                 uses_y_negate = true; | ||||
|                 return Operation(OperationCode::YNegate); | ||||
|             case SystemVariable::InvocationInfo: | ||||
|                 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); | ||||
|  |  | |||
|  | @ -139,6 +139,10 @@ public: | |||
|         return uses_legacy_varyings; | ||||
|     } | ||||
| 
 | ||||
|     bool UsesYNegate() const { | ||||
|         return uses_y_negate; | ||||
|     } | ||||
| 
 | ||||
|     bool UsesWarps() const { | ||||
|         return uses_warps; | ||||
|     } | ||||
|  | @ -465,6 +469,7 @@ private: | |||
|     bool uses_instance_id{}; | ||||
|     bool uses_vertex_id{}; | ||||
|     bool uses_legacy_varyings{}; | ||||
|     bool uses_y_negate{}; | ||||
|     bool uses_warps{}; | ||||
|     bool uses_indexed_samplers{}; | ||||
| 
 | ||||
|  |  | |||
|  | @ -103,9 +103,6 @@ public: | |||
|     /// Notify the cache that a new frame has been queued
 | ||||
|     void TickFrame(); | ||||
| 
 | ||||
|     /// Return an unique mutually exclusive lock for the cache
 | ||||
|     [[nodiscard]] std::unique_lock<std::mutex> AcquireLock(); | ||||
| 
 | ||||
|     /// Return a constant reference to the given image view id
 | ||||
|     [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; | ||||
| 
 | ||||
|  | @ -179,6 +176,8 @@ public: | |||
|     /// Return true when a CPU region is modified from the GPU
 | ||||
|     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||||
| 
 | ||||
|     std::mutex mutex; | ||||
| 
 | ||||
| private: | ||||
|     /// Iterate over all page indices in a range
 | ||||
|     template <typename Func> | ||||
|  | @ -212,8 +211,8 @@ private: | |||
|     void RefreshContents(Image& image); | ||||
| 
 | ||||
|     /// Upload data from guest to an image
 | ||||
|     template <typename MapBuffer> | ||||
|     void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); | ||||
|     template <typename StagingBuffer> | ||||
|     void UploadImageContents(Image& image, StagingBuffer& staging_buffer); | ||||
| 
 | ||||
|     /// Find or create an image view from a guest descriptor
 | ||||
|     [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); | ||||
|  | @ -325,8 +324,6 @@ private: | |||
| 
 | ||||
|     RenderTargets render_targets; | ||||
| 
 | ||||
|     std::mutex mutex; | ||||
| 
 | ||||
|     std::unordered_map<TICEntry, ImageViewId> image_views; | ||||
|     std::unordered_map<TSCEntry, SamplerId> samplers; | ||||
|     std::unordered_map<RenderTargets, FramebufferId> framebuffers; | ||||
|  | @ -385,11 +382,6 @@ void TextureCache<P>::TickFrame() { | |||
|     ++frame_tick; | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() { | ||||
|     return std::unique_lock{mutex}; | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { | ||||
|     return slot_image_views[id]; | ||||
|  | @ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | |||
|     }); | ||||
|     for (const ImageId image_id : images) { | ||||
|         Image& image = slot_images[image_id]; | ||||
|         auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); | ||||
|         auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); | ||||
|         const auto copies = FullDownloadCopies(image.info); | ||||
|         image.DownloadMemory(map, 0, copies); | ||||
|         image.DownloadMemory(map, copies); | ||||
|         runtime.Finish(); | ||||
|         SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); | ||||
|         SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | @ -757,25 +749,25 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
|     for (const ImageId image_id : download_ids) { | ||||
|         total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||||
|     } | ||||
|     auto download_map = runtime.MapDownloadBuffer(total_size_bytes); | ||||
|     size_t buffer_offset = 0; | ||||
|     auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | ||||
|     const size_t original_offset = download_map.offset; | ||||
|     for (const ImageId image_id : download_ids) { | ||||
|         Image& image = slot_images[image_id]; | ||||
|         const auto copies = FullDownloadCopies(image.info); | ||||
|         image.DownloadMemory(download_map, buffer_offset, copies); | ||||
|         buffer_offset += image.unswizzled_size_bytes; | ||||
|         image.DownloadMemory(download_map, copies); | ||||
|         download_map.offset += image.unswizzled_size_bytes; | ||||
|     } | ||||
|     // Wait for downloads to finish
 | ||||
|     runtime.Finish(); | ||||
| 
 | ||||
|     buffer_offset = 0; | ||||
|     const std::span<u8> download_span = download_map.Span(); | ||||
|     download_map.offset = original_offset; | ||||
|     std::span<u8> download_span = download_map.mapped_span; | ||||
|     for (const ImageId image_id : download_ids) { | ||||
|         const ImageBase& image = slot_images[image_id]; | ||||
|         const auto copies = FullDownloadCopies(image.info); | ||||
|         const std::span<u8> image_download_span = download_span.subspan(buffer_offset); | ||||
|         SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); | ||||
|         buffer_offset += image.unswizzled_size_bytes; | ||||
|         SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); | ||||
|         download_map.offset += image.unswizzled_size_bytes; | ||||
|         download_span = download_span.subspan(image.unswizzled_size_bytes); | ||||
|     } | ||||
|     committed_downloads.pop(); | ||||
| } | ||||
|  | @ -806,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) { | |||
|         LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | ||||
|         return; | ||||
|     } | ||||
|     auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); | ||||
|     UploadImageContents(image, map, 0); | ||||
|     auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); | ||||
|     UploadImageContents(image, staging); | ||||
|     runtime.InsertUploadMemoryBarrier(); | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| template <typename MapBuffer> | ||||
| void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { | ||||
|     const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); | ||||
| template <typename StagingBuffer> | ||||
| void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { | ||||
|     const std::span<u8> mapped_span = staging.mapped_span; | ||||
|     const GPUVAddr gpu_addr = image.gpu_addr; | ||||
| 
 | ||||
|     if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||||
|         gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | ||||
|         const auto uploads = FullUploadSwizzles(image.info); | ||||
|         runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); | ||||
|         runtime.AccelerateImageUpload(image, staging, uploads); | ||||
|     } else if (True(image.flags & ImageFlagBits::Converted)) { | ||||
|         std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | ||||
|         auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | ||||
|         ConvertImage(unswizzled_data, image.info, mapped_span, copies); | ||||
|         image.UploadMemory(map, buffer_offset, copies); | ||||
|         image.UploadMemory(staging, copies); | ||||
|     } else if (image.info.type == ImageType::Buffer) { | ||||
|         const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; | ||||
|         image.UploadMemory(map, buffer_offset, copies); | ||||
|         image.UploadMemory(staging, copies); | ||||
|     } else { | ||||
|         const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | ||||
|         image.UploadMemory(map, buffer_offset, copies); | ||||
|         image.UploadMemory(staging, copies); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -38,19 +38,18 @@ namespace VideoCore { | |||
| 
 | ||||
| std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { | ||||
|     const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); | ||||
|     std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>( | ||||
|         system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec); | ||||
| 
 | ||||
|     const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||||
|     auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec); | ||||
|     auto context = emu_window.CreateSharedContext(); | ||||
|     const auto scope = context->Acquire(); | ||||
| 
 | ||||
|     auto scope = context->Acquire(); | ||||
|     try { | ||||
|         auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); | ||||
|     if (!renderer->Init()) { | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|         gpu->BindRenderer(std::move(renderer)); | ||||
|         return gpu; | ||||
|     } catch (const std::runtime_error& exception) { | ||||
|         LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what()); | ||||
|         return nullptr; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| u16 GetResolutionScaleFactor(const RendererBase& renderer) { | ||||
|  |  | |||
|  | @ -18,27 +18,22 @@ | |||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| namespace Alternatives { | ||||
| 
 | ||||
| constexpr std::array Depth24UnormS8_UINT{ | ||||
| constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{ | ||||
|     VK_FORMAT_D32_SFLOAT_S8_UINT, | ||||
|     VK_FORMAT_D16_UNORM_S8_UINT, | ||||
|     VkFormat{}, | ||||
|     VK_FORMAT_UNDEFINED, | ||||
| }; | ||||
| 
 | ||||
| constexpr std::array Depth16UnormS8_UINT{ | ||||
| constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{ | ||||
|     VK_FORMAT_D24_UNORM_S8_UINT, | ||||
|     VK_FORMAT_D32_SFLOAT_S8_UINT, | ||||
|     VkFormat{}, | ||||
|     VK_FORMAT_UNDEFINED, | ||||
| }; | ||||
| 
 | ||||
| } // namespace Alternatives
 | ||||
| 
 | ||||
| constexpr std::array REQUIRED_EXTENSIONS{ | ||||
|     VK_KHR_SWAPCHAIN_EXTENSION_NAME, | ||||
|     VK_KHR_MAINTENANCE1_EXTENSION_NAME, | ||||
|     VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, | ||||
|     VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, | ||||
|  | @ -52,6 +47,12 @@ constexpr std::array REQUIRED_EXTENSIONS{ | |||
|     VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | ||||
|     VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | ||||
|     VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, | ||||
| #ifdef _WIN32 | ||||
|     VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, | ||||
| #endif | ||||
| #ifdef __linux__ | ||||
|     VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, | ||||
| #endif | ||||
| }; | ||||
| 
 | ||||
| template <typename T> | ||||
|  | @ -63,9 +64,9 @@ void SetNext(void**& next, T& data) { | |||
| constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { | ||||
|     switch (format) { | ||||
|     case VK_FORMAT_D24_UNORM_S8_UINT: | ||||
|         return Alternatives::Depth24UnormS8_UINT.data(); | ||||
|         return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data(); | ||||
|     case VK_FORMAT_D16_UNORM_S8_UINT: | ||||
|         return Alternatives::Depth16UnormS8_UINT.data(); | ||||
|         return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data(); | ||||
|     default: | ||||
|         return nullptr; | ||||
|     } | ||||
|  | @ -195,23 +196,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
|                const vk::InstanceDispatch& dld_) | ||||
|     : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, | ||||
|       format_properties{GetFormatProperties(physical)} { | ||||
|     CheckSuitability(); | ||||
|     CheckSuitability(surface != nullptr); | ||||
|     SetupFamilies(surface); | ||||
|     SetupFeatures(); | ||||
| 
 | ||||
|     const auto queue_cis = GetDeviceQueueCreateInfos(); | ||||
|     const std::vector extensions = LoadExtensions(); | ||||
|     const std::vector extensions = LoadExtensions(surface != nullptr); | ||||
| 
 | ||||
|     VkPhysicalDeviceFeatures2 features2{ | ||||
|         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, | ||||
|         .pNext = nullptr, | ||||
|         .features{}, | ||||
|     }; | ||||
|     const void* first_next = &features2; | ||||
|     void** next = &features2.pNext; | ||||
| 
 | ||||
|     features2.features = { | ||||
|         .robustBufferAccess = false, | ||||
|         .features{ | ||||
|             .robustBufferAccess = true, | ||||
|             .fullDrawIndexUint32 = false, | ||||
|             .imageCubeArray = true, | ||||
|             .independentBlend = true, | ||||
|  | @ -266,7 +262,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
|             .sparseResidencyAliased = false, | ||||
|             .variableMultisampleRate = false, | ||||
|             .inheritedQueries = false, | ||||
|         }, | ||||
|     }; | ||||
|     const void* first_next = &features2; | ||||
|     void** next = &features2.pNext; | ||||
| 
 | ||||
|     VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ | ||||
|         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, | ||||
|         .pNext = nullptr, | ||||
|  | @ -384,7 +384,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
|         robustness2 = { | ||||
|             .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, | ||||
|             .pNext = nullptr, | ||||
|             .robustBufferAccess2 = false, | ||||
|             .robustBufferAccess2 = true, | ||||
|             .robustImageAccess2 = true, | ||||
|             .nullDescriptor = true, | ||||
|         }; | ||||
|  | @ -535,16 +535,18 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want | |||
|     return (supported_usage & wanted_usage) == wanted_usage; | ||||
| } | ||||
| 
 | ||||
| void Device::CheckSuitability() const { | ||||
| void Device::CheckSuitability(bool requires_swapchain) const { | ||||
|     std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; | ||||
|     bool has_swapchain = false; | ||||
|     for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { | ||||
|         for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { | ||||
|         const std::string_view name{property.extensionName}; | ||||
|         for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { | ||||
|             if (available_extensions[i]) { | ||||
|                 continue; | ||||
|             } | ||||
|             const std::string_view name{property.extensionName}; | ||||
|             available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; | ||||
|         } | ||||
|         has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME; | ||||
|     } | ||||
|     for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { | ||||
|         if (available_extensions[i]) { | ||||
|  | @ -553,6 +555,11 @@ void Device::CheckSuitability() const { | |||
|         LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); | ||||
|         throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); | ||||
|     } | ||||
|     if (requires_swapchain && !has_swapchain) { | ||||
|         LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain"); | ||||
|         throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); | ||||
|     } | ||||
| 
 | ||||
|     struct LimitTuple { | ||||
|         u32 minimum; | ||||
|         u32 value; | ||||
|  | @ -574,7 +581,9 @@ void Device::CheckSuitability() const { | |||
|     } | ||||
|     const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; | ||||
|     const std::array feature_report{ | ||||
|         std::make_pair(features.robustBufferAccess, "robustBufferAccess"), | ||||
|         std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), | ||||
|         std::make_pair(features.robustBufferAccess, "robustBufferAccess"), | ||||
|         std::make_pair(features.imageCubeArray, "imageCubeArray"), | ||||
|         std::make_pair(features.independentBlend, "independentBlend"), | ||||
|         std::make_pair(features.depthClamp, "depthClamp"), | ||||
|  | @ -599,10 +608,13 @@ void Device::CheckSuitability() const { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| std::vector<const char*> Device::LoadExtensions() { | ||||
| std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | ||||
|     std::vector<const char*> extensions; | ||||
|     extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); | ||||
|     extensions.reserve(8 + REQUIRED_EXTENSIONS.size()); | ||||
|     extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); | ||||
|     if (requires_surface) { | ||||
|         extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | ||||
|     } | ||||
| 
 | ||||
|     bool has_khr_shader_float16_int8{}; | ||||
|     bool has_ext_subgroup_size_control{}; | ||||
|  | @ -743,7 +755,8 @@ std::vector<const char*> Device::LoadExtensions() { | |||
|         robustness2.pNext = nullptr; | ||||
|         features.pNext = &robustness2; | ||||
|         physical.GetFeatures2KHR(features); | ||||
|         if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { | ||||
|         if (robustness2.nullDescriptor && robustness2.robustBufferAccess2 && | ||||
|             robustness2.robustImageAccess2) { | ||||
|             extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); | ||||
|             ext_robustness2 = true; | ||||
|         } | ||||
|  |  | |||
|  | @ -23,7 +23,7 @@ enum class FormatType { Linear, Optimal, Buffer }; | |||
| const u32 GuestWarpSize = 32; | ||||
| 
 | ||||
| /// Handles data specific to a physical device.
 | ||||
| class Device final { | ||||
| class Device { | ||||
| public: | ||||
|     explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, | ||||
|                     const vk::InstanceDispatch& dld); | ||||
|  | @ -227,10 +227,10 @@ public: | |||
| 
 | ||||
| private: | ||||
|     /// Checks if the physical device is suitable.
 | ||||
|     void CheckSuitability() const; | ||||
|     void CheckSuitability(bool requires_swapchain) const; | ||||
| 
 | ||||
|     /// Loads extensions into a vector and stores available ones in this object.
 | ||||
|     std::vector<const char*> LoadExtensions(); | ||||
|     std::vector<const char*> LoadExtensions(bool requires_surface); | ||||
| 
 | ||||
|     /// Sets up queue families.
 | ||||
|     void SetupFamilies(VkSurfaceKHR surface); | ||||
|  |  | |||
|  | @ -3,6 +3,7 @@ | |||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <future> | ||||
| #include <optional> | ||||
| #include <span> | ||||
| #include <utility> | ||||
|  | @ -140,7 +141,10 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD | |||
|                   VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version)); | ||||
|         throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); | ||||
|     } | ||||
|     vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld); | ||||
|     vk::Instance instance = | ||||
|         std::async([&] { | ||||
|             return vk::Instance::Create(required_version, layers, extensions, dld); | ||||
|         }).get(); | ||||
|     if (!vk::Load(*instance, dld)) { | ||||
|         LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); | ||||
|         throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||||
|  |  | |||
|  | @ -7,6 +7,8 @@ | |||
| #include <optional> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include <glad/glad.h> | ||||
| 
 | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
|  | @ -55,10 +57,24 @@ struct Range { | |||
| 
 | ||||
| class MemoryAllocation { | ||||
| public: | ||||
|     explicit MemoryAllocation(const Device& device_, vk::DeviceMemory memory_, | ||||
|                               VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type) | ||||
|         : device{device_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, | ||||
|           property_flags{properties}, shifted_memory_type{1U << type} {} | ||||
|     explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties, | ||||
|                               u64 allocation_size_, u32 type) | ||||
|         : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties}, | ||||
|           shifted_memory_type{1U << type} {} | ||||
| 
 | ||||
| #if defined(_WIN32) || defined(__linux__) | ||||
|     ~MemoryAllocation() { | ||||
|         if (owning_opengl_handle != 0) { | ||||
|             glDeleteMemoryObjectsEXT(1, &owning_opengl_handle); | ||||
|         } | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     MemoryAllocation& operator=(const MemoryAllocation&) = delete; | ||||
|     MemoryAllocation(const MemoryAllocation&) = delete; | ||||
| 
 | ||||
|     MemoryAllocation& operator=(MemoryAllocation&&) = delete; | ||||
|     MemoryAllocation(MemoryAllocation&&) = delete; | ||||
| 
 | ||||
|     [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) { | ||||
|         const std::optional<u64> alloc = FindFreeRegion(size, alignment); | ||||
|  | @ -88,6 +104,31 @@ public: | |||
|         return memory_mapped_span; | ||||
|     } | ||||
| 
 | ||||
| #ifdef _WIN32 | ||||
|     [[nodiscard]] u32 ExportOpenGLHandle() { | ||||
|         if (!owning_opengl_handle) { | ||||
|             glCreateMemoryObjectsEXT(1, &owning_opengl_handle); | ||||
|             glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size, | ||||
|                                          GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, | ||||
|                                          memory.GetMemoryWin32HandleKHR()); | ||||
|         } | ||||
|         return owning_opengl_handle; | ||||
|     } | ||||
| #elif __linux__ | ||||
|     [[nodiscard]] u32 ExportOpenGLHandle() { | ||||
|         if (!owning_opengl_handle) { | ||||
|             glCreateMemoryObjectsEXT(1, &owning_opengl_handle); | ||||
|             glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT, | ||||
|                                 memory.GetMemoryFdKHR()); | ||||
|         } | ||||
|         return owning_opengl_handle; | ||||
|     } | ||||
| #else | ||||
|     [[nodiscard]] u32 ExportOpenGLHandle() { | ||||
|         return 0; | ||||
|     } | ||||
| #endif | ||||
| 
 | ||||
|     /// Returns whether this allocation is compatible with the arguments.
 | ||||
|     [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { | ||||
|         return (flags & property_flags) && (type_mask & shifted_memory_type) != 0; | ||||
|  | @ -118,13 +159,15 @@ private: | |||
|         return candidate; | ||||
|     } | ||||
| 
 | ||||
|     const Device& device;                       ///< Vulkan device.
 | ||||
|     const vk::DeviceMemory memory;              ///< Vulkan memory allocation handler.
 | ||||
|     const u64 allocation_size;                  ///< Size of this allocation.
 | ||||
|     const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
 | ||||
|     const u32 shifted_memory_type;              ///< Shifted Vulkan memory type.
 | ||||
|     std::vector<Range> commits;                 ///< All commit ranges done from this allocation.
 | ||||
|     std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
 | ||||
| #if defined(_WIN32) || defined(__linux__) | ||||
|     u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
 | ||||
| #endif | ||||
| }; | ||||
| 
 | ||||
| MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, | ||||
|  | @ -156,14 +199,19 @@ std::span<u8> MemoryCommit::Map() { | |||
|     return span; | ||||
| } | ||||
| 
 | ||||
| u32 MemoryCommit::ExportOpenGLHandle() const { | ||||
|     return allocation->ExportOpenGLHandle(); | ||||
| } | ||||
| 
 | ||||
| void MemoryCommit::Release() { | ||||
|     if (allocation) { | ||||
|         allocation->Free(begin); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| MemoryAllocator::MemoryAllocator(const Device& device_) | ||||
|     : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} | ||||
| MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) | ||||
|     : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()}, | ||||
|       export_allocations{export_allocations_} {} | ||||
| 
 | ||||
| MemoryAllocator::~MemoryAllocator() = default; | ||||
| 
 | ||||
|  | @ -196,14 +244,24 @@ MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) | |||
| 
 | ||||
| void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { | ||||
|     const u32 type = FindType(flags, type_mask).value(); | ||||
|     const VkExportMemoryAllocateInfo export_allocate_info{ | ||||
|         .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, | ||||
|         .pNext = nullptr, | ||||
| #ifdef _WIN32 | ||||
|         .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, | ||||
| #elif __linux__ | ||||
|         .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, | ||||
| #else | ||||
|         .handleTypes = 0, | ||||
| #endif | ||||
|     }; | ||||
|     vk::DeviceMemory memory = device.GetLogical().AllocateMemory({ | ||||
|         .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .pNext = export_allocations ? &export_allocate_info : nullptr, | ||||
|         .allocationSize = size, | ||||
|         .memoryTypeIndex = type, | ||||
|     }); | ||||
|     allocations.push_back( | ||||
|         std::make_unique<MemoryAllocation>(device, std::move(memory), flags, size, type)); | ||||
|     allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type)); | ||||
| } | ||||
| 
 | ||||
| std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, | ||||
|  |  | |||
|  | @ -43,6 +43,9 @@ public: | |||
|     /// It will map the backing allocation if it hasn't been mapped before.
 | ||||
|     std::span<u8> Map(); | ||||
| 
 | ||||
|     /// Returns an non-owning OpenGL handle, creating one if it doesn't exist.
 | ||||
|     u32 ExportOpenGLHandle() const; | ||||
| 
 | ||||
|     /// Returns the Vulkan memory handler.
 | ||||
|     VkDeviceMemory Memory() const { | ||||
|         return memory; | ||||
|  | @ -67,7 +70,15 @@ private: | |||
| /// Allocates and releases memory allocations on demand.
 | ||||
| class MemoryAllocator { | ||||
| public: | ||||
|     explicit MemoryAllocator(const Device& device_); | ||||
|     /**
 | ||||
|      * Construct memory allocator | ||||
|      * | ||||
|      * @param device_             Device to allocate from | ||||
|      * @param export_allocations_ True when allocations have to be exported | ||||
|      * | ||||
|      * @throw vk::Exception on failure | ||||
|      */ | ||||
|     explicit MemoryAllocator(const Device& device_, bool export_allocations_); | ||||
|     ~MemoryAllocator(); | ||||
| 
 | ||||
|     MemoryAllocator& operator=(const MemoryAllocator&) = delete; | ||||
|  | @ -108,6 +119,7 @@ private: | |||
| 
 | ||||
|     const Device& device;                              ///< Device handle.
 | ||||
|     const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
 | ||||
|     const bool export_allocations; ///< True when memory allocations have to be exported.
 | ||||
|     std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
 | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -168,11 +168,15 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
|     X(vkFreeCommandBuffers); | ||||
|     X(vkFreeDescriptorSets); | ||||
|     X(vkFreeMemory); | ||||
|     X(vkGetBufferMemoryRequirements); | ||||
|     X(vkGetBufferMemoryRequirements2); | ||||
|     X(vkGetDeviceQueue); | ||||
|     X(vkGetEventStatus); | ||||
|     X(vkGetFenceStatus); | ||||
|     X(vkGetImageMemoryRequirements); | ||||
|     X(vkGetMemoryFdKHR); | ||||
| #ifdef _WIN32 | ||||
|     X(vkGetMemoryWin32HandleKHR); | ||||
| #endif | ||||
|     X(vkGetQueryPoolResults); | ||||
|     X(vkGetSemaphoreCounterValueKHR); | ||||
|     X(vkMapMemory); | ||||
|  | @ -505,6 +509,32 @@ void ImageView::SetObjectNameEXT(const char* name) const { | |||
|     SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); | ||||
| } | ||||
| 
 | ||||
| int DeviceMemory::GetMemoryFdKHR() const { | ||||
|     const VkMemoryGetFdInfoKHR get_fd_info{ | ||||
|         .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, | ||||
|         .pNext = nullptr, | ||||
|         .memory = handle, | ||||
|         .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, | ||||
|     }; | ||||
|     int fd; | ||||
|     Check(dld->vkGetMemoryFdKHR(owner, &get_fd_info, &fd)); | ||||
|     return fd; | ||||
| } | ||||
| 
 | ||||
| #ifdef _WIN32 | ||||
| HANDLE DeviceMemory::GetMemoryWin32HandleKHR() const { | ||||
|     const VkMemoryGetWin32HandleInfoKHR get_win32_handle_info{ | ||||
|         .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, | ||||
|         .pNext = nullptr, | ||||
|         .memory = handle, | ||||
|         .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR, | ||||
|     }; | ||||
|     HANDLE win32_handle; | ||||
|     Check(dld->vkGetMemoryWin32HandleKHR(owner, &get_win32_handle_info, &win32_handle)); | ||||
|     return win32_handle; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| void DeviceMemory::SetObjectNameEXT(const char* name) const { | ||||
|     SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); | ||||
| } | ||||
|  | @ -756,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const { | |||
|     return DeviceMemory(memory, handle, *dld); | ||||
| } | ||||
| 
 | ||||
| VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { | ||||
|     VkMemoryRequirements requirements; | ||||
|     dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); | ||||
|     return requirements; | ||||
| VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer, | ||||
|                                                          void* pnext) const noexcept { | ||||
|     const VkBufferMemoryRequirementsInfo2 info{ | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, | ||||
|         .pNext = nullptr, | ||||
|         .buffer = buffer, | ||||
|     }; | ||||
|     VkMemoryRequirements2 requirements{ | ||||
|         .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, | ||||
|         .pNext = pnext, | ||||
|         .memoryRequirements{}, | ||||
|     }; | ||||
|     dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements); | ||||
|     return requirements.memoryRequirements; | ||||
| } | ||||
| 
 | ||||
| VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { | ||||
|  |  | |||
|  | @ -15,8 +15,19 @@ | |||
| #include <vector> | ||||
| 
 | ||||
| #define VK_NO_PROTOTYPES | ||||
| #ifdef _WIN32 | ||||
| #define VK_USE_PLATFORM_WIN32_KHR | ||||
| #endif | ||||
| #include <vulkan/vulkan.h> | ||||
| 
 | ||||
| // Sanitize macros
 | ||||
| #ifdef CreateEvent | ||||
| #undef CreateEvent | ||||
| #endif | ||||
| #ifdef CreateSemaphore | ||||
| #undef CreateSemaphore | ||||
| #endif | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| #ifdef _MSC_VER | ||||
|  | @ -174,7 +185,7 @@ struct InstanceDispatch { | |||
| }; | ||||
| 
 | ||||
| /// Table holding Vulkan device function pointers.
 | ||||
| struct DeviceDispatch : public InstanceDispatch { | ||||
| struct DeviceDispatch : InstanceDispatch { | ||||
|     PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{}; | ||||
|     PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{}; | ||||
|     PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{}; | ||||
|  | @ -272,11 +283,15 @@ struct DeviceDispatch : public InstanceDispatch { | |||
|     PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; | ||||
|     PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; | ||||
|     PFN_vkFreeMemory vkFreeMemory{}; | ||||
|     PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{}; | ||||
|     PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{}; | ||||
|     PFN_vkGetDeviceQueue vkGetDeviceQueue{}; | ||||
|     PFN_vkGetEventStatus vkGetEventStatus{}; | ||||
|     PFN_vkGetFenceStatus vkGetFenceStatus{}; | ||||
|     PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; | ||||
|     PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{}; | ||||
| #ifdef _WIN32 | ||||
|     PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{}; | ||||
| #endif | ||||
|     PFN_vkGetQueryPoolResults vkGetQueryPoolResults{}; | ||||
|     PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{}; | ||||
|     PFN_vkMapMemory vkMapMemory{}; | ||||
|  | @ -344,6 +359,9 @@ public: | |||
|     /// Construct an empty handle.
 | ||||
|     Handle() = default; | ||||
| 
 | ||||
|     /// Construct an empty handle.
 | ||||
|     Handle(std::nullptr_t) {} | ||||
| 
 | ||||
|     /// Copying Vulkan objects is not supported and will never be.
 | ||||
|     Handle(const Handle&) = delete; | ||||
|     Handle& operator=(const Handle&) = delete; | ||||
|  | @ -659,6 +677,12 @@ class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { | |||
|     using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; | ||||
| 
 | ||||
| public: | ||||
|     int GetMemoryFdKHR() const; | ||||
| 
 | ||||
| #ifdef _WIN32 | ||||
|     HANDLE GetMemoryWin32HandleKHR() const; | ||||
| #endif | ||||
| 
 | ||||
|     /// Set object name.
 | ||||
|     void SetObjectNameEXT(const char* name) const; | ||||
| 
 | ||||
|  | @ -847,7 +871,8 @@ public: | |||
| 
 | ||||
|     DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; | ||||
| 
 | ||||
|     VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; | ||||
|     VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer, | ||||
|                                                      void* pnext = nullptr) const noexcept; | ||||
| 
 | ||||
|     VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; | ||||
| 
 | ||||
|  | @ -1031,6 +1056,12 @@ public: | |||
|         PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {}); | ||||
|     } | ||||
| 
 | ||||
|     void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | ||||
|                          VkDependencyFlags dependency_flags, | ||||
|                          const VkMemoryBarrier& memory_barrier) const noexcept { | ||||
|         PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, memory_barrier, {}, {}); | ||||
|     } | ||||
| 
 | ||||
|     void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | ||||
|                          VkDependencyFlags dependency_flags, | ||||
|                          const VkBufferMemoryBarrier& buffer_barrier) const noexcept { | ||||
|  |  | |||
|  | @ -64,7 +64,7 @@ void EmuThread::run() { | |||
| 
 | ||||
|     emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); | ||||
| 
 | ||||
|     system.Renderer().Rasterizer().LoadDiskResources( | ||||
|     system.Renderer().ReadRasterizer()->LoadDiskResources( | ||||
|         system.CurrentProcess()->GetTitleID(), stop_run, | ||||
|         [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { | ||||
|             emit LoadProgress(stage, value, total); | ||||
|  |  | |||
|  | @ -782,14 +782,14 @@ void Config::ReadRendererValues() { | |||
|     ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100); | ||||
|     ReadSettingGlobal(Settings::values.use_disk_shader_cache, | ||||
|                       QStringLiteral("use_disk_shader_cache"), true); | ||||
|     ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0); | ||||
|     ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 1); | ||||
|     ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation, | ||||
|                       QStringLiteral("use_asynchronous_gpu_emulation"), true); | ||||
|     ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), | ||||
|                       true); | ||||
|     ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); | ||||
|     ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), | ||||
|                       true); | ||||
|                       false); | ||||
|     ReadSettingGlobal(Settings::values.use_asynchronous_shaders, | ||||
|                       QStringLiteral("use_asynchronous_shaders"), false); | ||||
|     ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), | ||||
|  | @ -1351,14 +1351,14 @@ void Config::SaveRendererValues() { | |||
|                        Settings::values.use_disk_shader_cache, true); | ||||
|     WriteSettingGlobal(QStringLiteral("gpu_accuracy"), | ||||
|                        static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)), | ||||
|                        Settings::values.gpu_accuracy.UsingGlobal(), 0); | ||||
|                        Settings::values.gpu_accuracy.UsingGlobal(), 1); | ||||
|     WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"), | ||||
|                        Settings::values.use_asynchronous_gpu_emulation, true); | ||||
|     WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, | ||||
|                        true); | ||||
|     WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); | ||||
|     WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), | ||||
|                        Settings::values.use_assembly_shaders, true); | ||||
|                        Settings::values.use_assembly_shaders, false); | ||||
|     WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"), | ||||
|                        Settings::values.use_asynchronous_shaders, false); | ||||
|     WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, | ||||
|  |  | |||
|  | @ -2,6 +2,9 @@ | |||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| // Include this early to include Vulkan headers how we want to
 | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
| 
 | ||||
| #include <QColorDialog> | ||||
| #include <QComboBox> | ||||
| #include <QVulkanInstance> | ||||
|  | @ -11,7 +14,8 @@ | |||
| #include "core/core.h" | ||||
| #include "core/settings.h" | ||||
| #include "ui_configure_graphics.h" | ||||
| #include "video_core/renderer_vulkan/renderer_vulkan.h" | ||||
| #include "video_core/vulkan_common/vulkan_instance.h" | ||||
| #include "video_core/vulkan_common/vulkan_library.h" | ||||
| #include "yuzu/configuration/configuration_shared.h" | ||||
| #include "yuzu/configuration/configure_graphics.h" | ||||
| 
 | ||||
|  | @ -212,11 +216,23 @@ void ConfigureGraphics::UpdateDeviceComboBox() { | |||
|     ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); | ||||
| } | ||||
| 
 | ||||
| void ConfigureGraphics::RetrieveVulkanDevices() { | ||||
| void ConfigureGraphics::RetrieveVulkanDevices() try { | ||||
|     using namespace Vulkan; | ||||
| 
 | ||||
|     vk::InstanceDispatch dld; | ||||
|     const Common::DynamicLibrary library = OpenLibrary(); | ||||
|     const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0); | ||||
|     const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); | ||||
| 
 | ||||
|     vulkan_devices.clear(); | ||||
|     for (const auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { | ||||
|     vulkan_devices.reserve(physical_devices.size()); | ||||
|     for (const VkPhysicalDevice device : physical_devices) { | ||||
|         const char* const name = vk::PhysicalDevice(device, dld).GetProperties().deviceName; | ||||
|         vulkan_devices.push_back(QString::fromStdString(name)); | ||||
|     } | ||||
| 
 | ||||
| } catch (const Vulkan::vk::Exception& exception) { | ||||
|     LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what()); | ||||
| } | ||||
| 
 | ||||
| Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { | ||||
|  |  | |||
|  | @ -388,7 +388,7 @@ void Config::ReadValues() { | |||
|         static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100))); | ||||
|     Settings::values.use_disk_shader_cache.SetValue( | ||||
|         sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false)); | ||||
|     const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); | ||||
|     const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 1); | ||||
|     Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level)); | ||||
|     Settings::values.use_asynchronous_gpu_emulation.SetValue( | ||||
|         sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true)); | ||||
|  |  | |||
|  | @ -215,7 +215,7 @@ int main(int argc, char** argv) { | |||
|     // Core is loaded, start the GPU (makes the GPU contexts current to this thread)
 | ||||
|     system.GPU().Start(); | ||||
| 
 | ||||
|     system.Renderer().Rasterizer().LoadDiskResources( | ||||
|     system.Renderer().ReadRasterizer()->LoadDiskResources( | ||||
|         system.CurrentProcess()->GetTitleID(), false, | ||||
|         [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei